BUG #17557: ts_headline will error with "invalid memory alloc request size" for large documents

Started by PG Bug reporting formalmost 4 years ago2 messagesbugs
Jump to latest
#1PG Bug reporting form
noreply@postgresql.org

The following bug has been logged on the website:

Bug reference: 17557
Logged by: Alex Malek
Email address: magicagent@gmail.com
PostgreSQL version: 14.4
Operating system: Red Hat
Description:

ts_headline when given a documents over a certain size/number of words will
cause "ERROR: invalid memory alloc request size XXXXXX"

# select ts_headline('b ' || repeat('1 ',16777215), $$'b'$$::tsquery,
'MaxWords=4, MinWords=3') ;
ERROR: invalid memory alloc request size 1610612736

Not just related to document size but also to number of "words" in a
document:

One less "word" works:

select ts_headline('b ' || repeat('1 ',16777214), $$'b'$$::tsquery,
'MaxWords=4, MinWords=3') ;
ts_headline
----------------
<b>b</b> 1 1 1
(1 row)

Mem not an issue for larger "words" up to a point:

# select ts_headline('b ' || repeat('123456789012345 ',16777214),
$$'b'$$::tsquery, 'MaxWords=4, MinWords=3') ;
ts_headline
----------------------------------------------------------
<b>b</b> 123456789012345 123456789012345 123456789012345
(1 row)

# select ts_headline('b ' || repeat('1234567890123456 ',16777214),
$$'b'$$::tsquery, 'MaxWords=4, MinWords=3') ;
ERROR: invalid memory alloc request size 1140850564

Memory issue appears to be triggered based on total number of words and word
length

# select ts_headline('b ' || repeat('1234567890123456 ',15790000),
$$'b'$$::tsquery, 'MaxWords=4, MinWords=3') ;
ts_headline
-------------------------------------------------------------
<b>b</b> 1234567890123456 1234567890123456 1234567890123456
(1 row)

# select ts_headline('b ' || repeat('1234567890123456 ',15795000),
$$'b'$$::tsquery, 'MaxWords=4, MinWords=3') ;
ERROR: invalid memory alloc request size 1074060012

I get the same results even when increasing psql GUCs including work_mem,
shared_buffers and effective_cache_size
Also on machines w/ significantly more RAM, with and w/o HugePages enabled.

#2Japin Li
japinli@hotmail.com
In reply to: PG Bug reporting form (#1)
Re: BUG #17557: ts_headline will error with "invalid memory alloc request size" for large documents

On Fri, 22 Jul 2022 at 23:39, PG Bug reporting form <noreply@postgresql.org> wrote:

The following bug has been logged on the website:

Bug reference: 17557
Logged by: Alex Malek
Email address: magicagent@gmail.com
PostgreSQL version: 14.4
Operating system: Red Hat
Description:

ts_headline when given a documents over a certain size/number of words will
cause "ERROR: invalid memory alloc request size XXXXXX"

# select ts_headline('b ' || repeat('1 ',16777215), $$'b'$$::tsquery,
'MaxWords=4, MinWords=3') ;
ERROR: invalid memory alloc request size 1610612736

Not just related to document size but also to number of "words" in a
document:

One less "word" works:

select ts_headline('b ' || repeat('1 ',16777214), $$'b'$$::tsquery,
'MaxWords=4, MinWords=3') ;
ts_headline
----------------
<b>b</b> 1 1 1
(1 row)

Mem not an issue for larger "words" up to a point:

# select ts_headline('b ' || repeat('123456789012345 ',16777214),
$$'b'$$::tsquery, 'MaxWords=4, MinWords=3') ;
ts_headline
----------------------------------------------------------
<b>b</b> 123456789012345 123456789012345 123456789012345
(1 row)

# select ts_headline('b ' || repeat('1234567890123456 ',16777214),
$$'b'$$::tsquery, 'MaxWords=4, MinWords=3') ;
ERROR: invalid memory alloc request size 1140850564

Memory issue appears to be triggered based on total number of words and word
length

# select ts_headline('b ' || repeat('1234567890123456 ',15790000),
$$'b'$$::tsquery, 'MaxWords=4, MinWords=3') ;
ts_headline
-------------------------------------------------------------
<b>b</b> 1234567890123456 1234567890123456 1234567890123456
(1 row)

# select ts_headline('b ' || repeat('1234567890123456 ',15795000),
$$'b'$$::tsquery, 'MaxWords=4, MinWords=3') ;
ERROR: invalid memory alloc request size 1074060012

I get the same results even when increasing psql GUCs including work_mem,
shared_buffers and effective_cache_size
Also on machines w/ significantly more RAM, with and w/o HugePages enabled.

It seems the limitation of repalloc() function [1]/messages/by-id/ME3P282MB16676ED32167189CB0462173B6D69@ME3P282MB1667.AUSP282.PROD.OUTLOOK.COM. Here is the backtrace.

#0 repalloc (pointer=0x7f3868312050, size=1610612736) at /mnt/workspace/postgresql/build/../src/backend/utils/mmgr/mcxt.c:1190
#1 0x000055f11dcee750 in hlfinditem (prs=0x7ffc5c5ac580, query=0x55f12016f408, pos=16777216, buf=0x55f160e51bf0 "1", buflen=1) at /mnt/workspace/postgresql/build/../src/backend/tsearch/ts_parse.c:462
#2 0x000055f11dcee9b4 in addHLParsedLex (prs=0x7ffc5c5ac580, query=0x55f12016f408, lexs=0x55f12023e9a0, norms=0x55f12023e928) at /mnt/workspace/postgresql/build/../src/backend/tsearch/ts_parse.c:505
#3 0x000055f11dceec2c in hlparsetext (cfgId=12360, prs=0x7ffc5c5ac580, query=0x55f12016f408,
buf=0x7f38ceb19054 "b 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 "..., buflen=33554432) at /mnt/workspace/postgresql/build/../src/backend/tsearch/ts_parse.c:582
#4 0x000055f11dcf1c2b in ts_headline_byid_opt (fcinfo=0x7ffc5c5ac610) at /mnt/workspace/postgresql/build/../src/backend/tsearch/wparser.c:319
#5 0x000055f11de9f57b in DirectFunctionCall4Coll (func=0x55f11dcf1a0b <ts_headline_byid_opt>, collation=0, arg1=12360, arg2=139881962639440, arg3=94494113854472, arg4=94494113855272)
at /mnt/workspace/postgresql/build/../src/backend/utils/fmgr/fmgr.c:861
#6 0x000055f11dcf1e02 in ts_headline_opt (fcinfo=0x55f12023cb48) at /mnt/workspace/postgresql/build/../src/backend/tsearch/wparser.c:366
#7 0x000055f11da28543 in ExecInterpExpr (state=0x55f12023ca60, econtext=0x55f12023c760, isnull=0x7ffc5c5ac96f) at /mnt/workspace/postgresql/build/../src/backend/executor/execExprInterp.c:763
#8 0x000055f11da2a6be in ExecInterpExprStillValid (state=0x55f12023ca60, econtext=0x55f12023c760, isNull=0x7ffc5c5ac96f) at /mnt/workspace/postgresql/build/../src/backend/executor/execExprInterp.c:1858
#9 0x000055f11da86710 in ExecEvalExprSwitchContext (state=0x55f12023ca60, econtext=0x55f12023c760, isNull=0x7ffc5c5ac96f) at /mnt/workspace/postgresql/build/../src/include/executor/executor.h:343
#10 0x000055f11da86788 in ExecProject (projInfo=0x55f12023ca58) at /mnt/workspace/postgresql/build/../src/include/executor/executor.h:377
#11 0x000055f11da869b9 in ExecResult (pstate=0x55f12023c648) at /mnt/workspace/postgresql/build/../src/backend/executor/nodeResult.c:136
#12 0x000055f11da41e17 in ExecProcNodeFirst (node=0x55f12023c648) at /mnt/workspace/postgresql/build/../src/backend/executor/execProcnode.c:463
#13 0x000055f11da356a1 in ExecProcNode (node=0x55f12023c648) at /mnt/workspace/postgresql/build/../src/include/executor/executor.h:259
#14 0x000055f11da38318 in ExecutePlan (estate=0x55f12023c410, planstate=0x55f12023c648, use_parallel_mode=false, operation=CMD_SELECT, sendTuples=true, numberTuples=0, direction=ForwardScanDirection,
dest=0x55f120237198, execute_once=true) at /mnt/workspace/postgresql/build/../src/backend/executor/execMain.c:1636
#15 0x000055f11da35d6b in standard_ExecutorRun (queryDesc=0x55f12018e800, direction=ForwardScanDirection, count=0, execute_once=true)
at /mnt/workspace/postgresql/build/../src/backend/executor/execMain.c:363
#16 0x000055f11da35b81 in ExecutorRun (queryDesc=0x55f12018e800, direction=ForwardScanDirection, count=0, execute_once=true) at /mnt/workspace/postgresql/build/../src/backend/executor/execMain.c:307
#17 0x000055f11dcda8d3 in PortalRunSelect (portal=0x55f1201da030, forward=true, count=0, dest=0x55f120237198) at /mnt/workspace/postgresql/build/../src/backend/tcop/pquery.c:924
#18 0x000055f11dcda50a in PortalRun (portal=0x55f1201da030, count=9223372036854775807, isTopLevel=true, run_once=true, dest=0x55f120237198, altdest=0x55f120237198, qc=0x7ffc5c5acd70)
at /mnt/workspace/postgresql/build/../src/backend/tcop/pquery.c:768
#19 0x000055f11dcd392c in exec_simple_query (query_string=0x55f12016d970 "select ts_headline('b ' || repeat('1 ',16777215), $$'b'$$::tsquery, 'MaxWords=4, MinWords=3') ;")
at /mnt/workspace/postgresql/build/../src/backend/tcop/postgres.c:1243
#20 0x000055f11dcd83ef in PostgresMain (dbname=0x55f120197ea8 "postgres", username=0x55f120197e88 "japin") at /mnt/workspace/postgresql/build/../src/backend/tcop/postgres.c:4505
#21 0x000055f11dc000c2 in BackendRun (port=0x55f1201907c0) at /mnt/workspace/postgresql/build/../src/backend/postmaster/postmaster.c:4490
#22 0x000055f11dbff9b0 in BackendStartup (port=0x55f1201907c0) at /mnt/workspace/postgresql/build/../src/backend/postmaster/postmaster.c:4218
#23 0x000055f11dbfbc28 in ServerLoop () at /mnt/workspace/postgresql/build/../src/backend/postmaster/postmaster.c:1808
#24 0x000055f11dbfb3d9 in PostmasterMain (argc=3, argv=0x55f120166150) at /mnt/workspace/postgresql/build/../src/backend/postmaster/postmaster.c:1480
#25 0x000055f11dabf5f5 in main (argc=3, argv=0x55f120166150) at /mnt/workspace/postgresql/build/../src/backend/main/main.c:197

The repalloc() can only allocate 1GB - 1 bytes.

#define MaxAllocSize ((Size) 0x3fffffff) /* 1 gigabyte - 1 */

#define AllocSizeIsValid(size) ((Size) (size) <= MaxAllocSize)

[1]: /messages/by-id/ME3P282MB16676ED32167189CB0462173B6D69@ME3P282MB1667.AUSP282.PROD.OUTLOOK.COM

--
Regrads,
Japin Li.
ChengDu WenWu Information Technology Co.,Ltd.