BUG #16024: segfault ip 0000560103865c60 error 4 in postgres

Started by PG Bug reporting formover 6 years ago6 messagesbugs
Jump to latest
#1PG Bug reporting form
noreply@postgresql.org

The following bug has been logged on the website:

Bug reference: 16024
Logged by: Stephan Knauss
Email address: sknauss@gmx.de
PostgreSQL version: 9.6.15
Operating system: Ubuntu 18.04
Description:

Using ubuntu packages 9.6.15 from postgresql.org. Recently a unchanged
routine crashes a postgres backend process most of the time (or always?)
with a segmentation fault.

Call stack below. Please let me know what further details can help you. The
core is 3.6G, I can provide it on request.

Program terminated with signal SIGSEGV, Segmentation fault.
#0 deconstruct_array (array=array@entry=0x564008a6bde8,
elmtype=elmtype@entry=25, elmlen=elmlen@entry=-1, elmbyval=elmbyval@entry=0
'\000',
elmalign=elmalign@entry=105 'i', elemsp=elemsp@entry=0x7ffc5a33d570,
nullsp=0x7ffc5a33d578, nelemsp=0x7ffc5a33d56c)
at ./build/../src/backend/utils/adt/arrayfuncs.c:3530
3530 ./build/../src/backend/utils/adt/arrayfuncs.c: No such file or
directory.
(gdb) bt
#0 deconstruct_array (array=array@entry=0x564008a6bde8,
elmtype=elmtype@entry=25, elmlen=elmlen@entry=-1, elmbyval=elmbyval@entry=0
'\000', elmalign=elmalign@entry=105 'i', elemsp=elemsp@entry=0x7ffc5a33d570,
nullsp=0x7ffc5a33d578, nelemsp=0x7ffc5a33d56c)
at ./build/../src/backend/utils/adt/arrayfuncs.c:3530
#1 0x0000564007ad7e48 in array_contain_compare
(array1=array1@entry=0x564008a1c718, array2=array2@entry=0x564008a6bde8,
collation=100, matchall=matchall@entry=0 '\000', fn_extra=<optimized out>)
at ./build/../src/backend/utils/adt/arrayfuncs.c:4111
#2 0x0000564007ada7dd in arrayoverlap (fcinfo=0x564008a5d6e8) at
./build/../src/backend/utils/adt/arrayfuncs.c:4201
#3 0x000056400799d3bc in ExecMakeFunctionResultNoSets
(fcache=0x564008a5d678, econtext=0x564008a5d1e8, isNull=0x7ffc5a33da37 "",
isDone=<optimized out>) at ./build/../src/backend/executor/execQual.c:2041
#4 0x00005640079a30ef in ExecQual (qual=qual@entry=0x564008a5df78,
econtext=econtext@entry=0x564008a5d1e8, resultForNull=resultForNull@entry=0
'\000') at ./build/../src/backend/executor/execQual.c:5316
#5 0x00005640079a3655 in ExecScan (node=node@entry=0x564008a5d0d8,
accessMtd=accessMtd@entry=0x5640079b9b00 <SeqNext>,
recheckMtd=recheckMtd@entry=0x5640079b9af0 <SeqRecheck>) at
./build/../src/backend/executor/execScan.c:208
#6 0x00005640079b9bb8 in ExecSeqScan (node=node@entry=0x564008a5d0d8) at
./build/../src/backend/executor/nodeSeqscan.c:127
#7 0x000056400799c340 in ExecProcNode (node=node@entry=0x564008a5d0d8) at
./build/../src/backend/executor/execProcnode.c:419
#8 0x00005640079b5f59 in ExecModifyTable (node=node@entry=0x564008a5cf98)
at ./build/../src/backend/executor/nodeModifyTable.c:1392
#9 0x000056400799c380 in ExecProcNode (node=node@entry=0x564008a5cf98) at
./build/../src/backend/executor/execProcnode.c:396
#10 0x00005640079984f6 in ExecutePlan (dest=0x564008a39bc0,
direction=<optimized out>, numberTuples=0, sendTuples=<optimized out>,
operation=CMD_INSERT, use_parallel_mode=<optimized out>,
planstate=0x564008a5cf98, estate=0x564008a5ccf8)
at ./build/../src/backend/executor/execMain.c:1567
#11 standard_ExecutorRun (queryDesc=0x564008a29ec8, direction=<optimized
out>, count=0) at ./build/../src/backend/executor/execMain.c:339
#12 0x0000564007ab6eb8 in ProcessQuery (plan=<optimized out>,
sourceText=0x5640089ed678 "DROP TABLE if EXISTS loose;CREATE UNLOGGED
TABLE loose (id bigint, node bigint, lat float, lon float, x integer, y
integer);INSERT INTO loose SELECT id, nodes[1] AS node FROM planet_osm_ways
WHERE ARR"..., params=0x0, dest=0x564008a39bc0,
completionTag=0x7ffc5a33e220 "") at
./build/../src/backend/tcop/pquery.c:187
#13 0x0000564007ab70ed in PortalRunMulti
(portal=portal@entry=0x5640089536e8, isTopLevel=isTopLevel@entry=0 '\000',
setHoldSnapshot=setHoldSnapshot@entry=0 '\000',
dest=dest@entry=0x564008a39bc0, altdest=altdest@entry=0x564008a39bc0,
completionTag=completionTag@entry=0x7ffc5a33e220 "")
at ./build/../src/backend/tcop/pquery.c:1303
#14 0x0000564007ab7cd1 in PortalRun (portal=portal@entry=0x5640089536e8,
count=count@entry=9223372036854775807, isTopLevel=isTopLevel@entry=0 '\000',
dest=dest@entry=0x564008a39bc0, altdest=altdest@entry=0x564008a39bc0,
completionTag=completionTag@entry=0x7ffc5a33e220 "")
at ./build/../src/backend/tcop/pquery.c:815
#15 0x0000564007ab5455 in exec_simple_query (query_string=0x5640089ed678
"DROP TABLE if EXISTS loose;CREATE UNLOGGED TABLE loose (id bigint, node
bigint, lat float, lon float, x integer, y integer);INSERT INTO loose SELECT
id, nodes[1] AS node FROM planet_osm_ways WHERE ARR"...)
at ./build/../src/backend/tcop/postgres.c:1109
#16 PostgresMain (argc=<optimized out>, argv=argv@entry=0x564008989868,
dbname=<optimized out>, username=<optimized out>) at
./build/../src/backend/tcop/postgres.c:4101
#17 0x0000564007831664 in BackendRun (port=0x564008981600) at
./build/../src/backend/postmaster/postmaster.c:4339
#18 BackendStartup (port=0x564008981600) at
./build/../src/backend/postmaster/postmaster.c:4013
#19 ServerLoop () at ./build/../src/backend/postmaster/postmaster.c:1722
#20 0x0000564007a51e29 in PostmasterMain (argc=5, argv=0x564008934090) at
./build/../src/backend/postmaster/postmaster.c:1330
#21 0x0000564007832ed2 in main (argc=5, argv=0x564008934090) at
./build/../src/backend/main/main.c:228

#2Tom Lane
tgl@sss.pgh.pa.us
In reply to: PG Bug reporting form (#1)
Re: BUG #16024: segfault ip 0000560103865c60 error 4 in postgres

PG Bug reporting form <noreply@postgresql.org> writes:

Using ubuntu packages 9.6.15 from postgresql.org. Recently a unchanged
routine crashes a postgres backend process most of the time (or always?)
with a segmentation fault.
Call stack below. Please let me know what further details can help you. The
core is 3.6G, I can provide it on request.

A self-contained test case would be much handier. The stack trace
provides a suggestive hint about what you're doing:

#15 0x0000564007ab5455 in exec_simple_query (query_string=0x5640089ed678
"DROP TABLE if EXISTS loose;CREATE UNLOGGED TABLE loose (id bigint, node
bigint, lat float, lon float, x integer, y integer);INSERT INTO loose SELECT
id, nodes[1] AS node FROM planet_osm_ways WHERE ARR"...)
at ./build/../src/backend/tcop/postgres.c:1109

but that's not enough to reproduce the problem.

Note that in psql, the way to submit a multi-query string like that
is to backslash-escape all but the last semicolon:

DROP TABLE if EXISTS loose\;CREATE UNLOGGED ... y integer)\;INSERT INTO ...

That might be necessary to reproduce the problem, or not.

regards, tom lane

#3Stephan Knauss
sknauss@gmx.de
In reply to: Tom Lane (#2)
Re: BUG #16024: segfault ip 0000560103865c60 error 4 in postgres

Hello Tom,

thank you for the quick reply.

On 26.09.2019 00:45, Tom Lane wrote:

PG Bug reporting form <noreply@postgresql.org> writes:
A self-contained test case would be much handier. The stack trace
provides a suggestive hint about what you're doing:

I fully agree on that. Unfortunately I have no idea what actually
breaks. The query was running unchanged for quite a while now. It
creates statistics for OpenStreetMap data a few times a day.

It could either be that the input data has changed in a way now
triggering a bug in postgres or that one of the recent updates broke it.
I do update from the official postgresql repository.

So I can't come up with a small repro case. I could dump the full DB.

#15 0x0000564007ab5455 in exec_simple_query (query_string=0x5640089ed678
"DROP TABLE if EXISTS loose;CREATE UNLOGGED TABLE loose (id bigint, node
bigint, lat float, lon float, x integer, y integer);INSERT INTO loose SELECT
id, nodes[1] AS node FROM planet_osm_ways WHERE ARR"...)
at ./build/../src/backend/tcop/postgres.c:1109

but that's not enough to reproduce the problem.

The full query is:

$sql = "DROP TABLE if EXISTS loose;";
$sql .= "CREATE UNLOGGED TABLE loose (id bigint, node bigint, lat float,
lon float, x integer, y integer);";
$sql .= "INSERT INTO loose SELECT id, nodes[1] AS node FROM
planet_osm_ways WHERE
ARRAY['motorway','trunk','primary','secondary','tertiary'] && tags AND
NOT ARRAY['construction','proposed'] && tags AND nodes[1] <>
nodes[array_upper(nodes, 1)];";
$sql .= "INSERT INTO loose SELECT id, nodes[array_upper(nodes, 1)] AS
node FROM planet_osm_ways WHERE
ARRAY['motorway','trunk','primary','secondary','tertiary'] && tags AND
NOT ARRAY['construction','proposed'] && tags AND nodes[1] <>
nodes[array_upper(nodes, 1)];";
$sql .= "DELETE FROM loose AS l WHERE (SELECT count(1) FROM
planet_osm_ways AS w WHERE w.nodes && ARRAY[l.node] ) > 1;";
$sql .= "DELETE FROM loose AS l WHERE 0=(SELECT COUNT(1) FROM
planet_osm_nodes AS n WHERE n.id=l.node);";
$sql .= "DELETE FROM loose AS l USING planet_osm_point AS p WHERE
p.osm_id =l.node AND p.tags->'noexit' = 'yes';";
$sql .= "UPDATE loose AS l SET lat=(n.lat/10000000::float),
lon=(n.lon/10000000::float), x=FLOOR(
((n.lon/10000000::float)+180)/360*(1<<19) )::integer, y=FLOOR(
(1.0-ln(tan(radians((n.lat/10000000::float)))+1.0/cos(radians((n.lat/10000000::float))))/pi())/2.0*(1<<19)
)::integer FROM planet_osm_nodes AS n WHERE n.id=l.node;";
$sql .= "DELETE FROM loose AS l using planet_osm_nodes AS n WHERE
n.id=l.node AND NOT ST_Within(ST_SetSRID(ST_POINT(n.lon/10000000::float,
n.lat/10000000::float), 4326), st_transform((select
st_transform(st_union(way),4326) as w FROM planet_osm_polygon where
osm_id='-2067731' or osm_id='-49903' or osm_id='-50371' or
osm_id='-49915' or osm_id='-49898' or osm_id='-2108121' or
osm_id='-536780' or osm_id='-2103120'), 4326));";

Note that in psql, the way to submit a multi-query string like that
is to backslash-escape all but the last semicolon:

DROP TABLE if EXISTS loose\;CREATE UNLOGGED ... y integer)\;INSERT INTO ...

That might be necessary to reproduce the problem, or not.

I am not sure what you try to say. I am quite confident that I can
separate individual commands in the query with a semicolon. It also
worked quite well in the past and seems to be backed by the documentation:
https://www.postgresql.org/docs/9.6/sql-syntax-lexical.html

What do you mean by backslash-escaping? Or are you referring to the psql
command line utility? This is not used in my case. I submit the query
from PHP. But it sounds unrelated to me. Based on the call-stack is
looks crashing way more inside some processing routines.

Anything else I can support you tracking down this crash?

Stephan

#4Stephan Knauss
sknauss@gmx.de
In reply to: Tom Lane (#2)
Re: BUG #16024: segfault ip 0000560103865c60 error 4 in postgres

<html><head></head><body><div style="font-family: Verdana;font-size: 12.0px;"><div>
<div>Hello,</div>

<div>&nbsp;</div>

<div>I try to further condense the use-case to make it easier to figure out the root cause.</div>

<div>&nbsp;</div>

<div>The call involved here was recently modified. Could it be a regression?<br/>
https://github.com/postgres/postgres/commit/2938aa2a5b1cebb41f9e54c1ea289c286139c21e&lt;/div&gt;

<div>&nbsp;</div>

<div>@Tom Lane: As you reviewe that change, could you please have another look on the details?</div>

<div><br/>
Please find further hints below in the hope it helps you to get to the root cause.</div>

<div>&nbsp;</div>

<div>While the issue seems to appear frequently when I run the task from a cron job, it crashes much more sporadic when manually calling.</div>

<div>Executing the full query using pgAdmin crashed postgres. Sub-Sequent runs seem to work fine.</div>

<div>&nbsp;</div>

<div>Today I tried to execute smaller parts of the query.</div>

<div>&nbsp;</div>

<div>The following query caused a segfault:</div>

<div>SELECT count(1) FROM planet_osm_ways WHERE ARRAY[&#39;motorway&#39;,&#39;trunk&#39;,&#39;primary&#39;,&#39;secondary&#39;,&#39;tertiary&#39;] &amp;&amp; tags;</div>

<div>postgres[32041]: segfault at 557f561cddbc ip 0000557f5450bc60 sp 00007ffe79c8a6c0 error 4 in postgres[557f541bc000+64d000]</div>

<div>&nbsp;</div>

<div>The instruction pointer is at least similar to the initial crash.</div>

<div>&nbsp;</div>

<div>This is the table queried:</div>

<div>&nbsp;</div>

<div>Table &quot;public.planet_osm_ways&quot; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;&nbsp;<br/>
&nbsp;Column &#124; &nbsp; Type &nbsp; &#124; Modifiers &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;&nbsp;<br/>
--------+----------+----------- &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;<br/>
&nbsp;id &nbsp; &nbsp; &#124; bigint &nbsp; &#124; not null &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;<br/>
&nbsp;nodes &nbsp;&#124; bigint[] &#124; not null &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;<br/>
&nbsp;tags &nbsp; &#124; text[] &nbsp; &#124; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;&nbsp;<br/>
Indexes: &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;&nbsp;<br/>
&nbsp; &nbsp; &quot;planet_osm_ways_pkey&quot; PRIMARY KEY, btree (id) &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;&nbsp;<br/>
&nbsp; &nbsp; &quot;planet_osm_ways_nodes&quot; gin (nodes) WITH (fastupdate=off) &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;</div>

<div><br/>
I checked, the tags all have a cardinality greater of at least 2.</div>

<div>&nbsp;</div>

<div>The table planet_osm_ways is updated every few minutes by parallel tasks. Could it be some glitch in the table update?</div>

<div>&nbsp;</div>

<div>Executing the same query again works later on:</div>

<div>&nbsp;</div>

<div>gis=&gt; SELECT count(1) FROM planet_osm_ways WHERE ARRAY[&#39;motorway&#39;,&#39;trunk&#39;,&#39;primary&#39;,&#39;secondary&#39;,&#39;tertiary&#39;] &amp;&amp; tags; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;&nbsp;<br/>
&nbsp;count &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;&nbsp;<br/>
-------- &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;&nbsp;<br/>
&nbsp;765231 &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;<br/>
(1 row) &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;</div>

<div>&nbsp;</div>

<div>also waiting a bit longer I was not able to reproduce the issue. But usually it comes back when waiting long enough.</div>

<div>&nbsp;</div>

<div>The initial call-stack showed the crash here:</div>

<div>&nbsp;</div>

<div>segfault ip 0000560103865c60 error 4 in postgres<br/>
#0 &nbsp;deconstruct_array (array=array(at)entry=0x564008a6bde8,<br/>
elmtype=elmtype(at)entry=25, elmlen=elmlen(at)entry=-1, elmbyval=elmbyval(at)entry=0<br/>
&#39;&#92;000&#39;, elmalign=elmalign(at)entry=105 &#39;i&#39;, elemsp=elemsp(at)entry=0x7ffc5a33d570,<br/>
nullsp=0x7ffc5a33d578, nelemsp=0x7ffc5a33d56c)<br/>
&nbsp; &nbsp; at ./build/../src/backend/utils/adt/arrayfuncs.c:3530</div>

<div>&nbsp;</div>

<div>Which is later on only a read operation on the array address plus an offset:<br/>
p = att_addlength_pointer(p, elmlen, p);<br/>
expands to:<br/>
p = (cur_offset) + VARSIZE_ANY(attptr)&nbsp;</div>

<div>with VARSIZE_ANY doing a read like this:<br/>
((((varattrib_1b *) (PTR))-&gt;va_header) == 0x01)</div>

<div>&nbsp;</div>

<div>Crashing here would mean either the array pointer is off, or the pointer is too far towards the end of the array and va_header points after the end of the array.<br/>
Does the address sound reasonable? I am not that familiar with the virtual address space layout involved here. It is quite close to the instruction pointer address.</div>

<div>array=array(at)entry=0x564008a6bde8</div>

<div>&nbsp;</div>

<div>As I am doing an array overlap operation and the call stack passes through this function it hints that the crashing query could be the one above.</div>

<div>
<div>#2 &nbsp;0x0000564007ada7dd in arrayoverlap (fcinfo=0x564008a5d6e8)&nbsp;</div>
</div>

<div>If I have a bit more time and it would bring further details I could try getting a core for such crashes as well. Currently my assumption is that it is the same root cause.</div>

<div>&nbsp;</div>

<div>Please let me know what other details might help in getting an idea of what breaks here.</div>

<div>&nbsp;</div>

<div>Thanks,</div>

<div><br/>
Stephan<br/>
&nbsp;</div>
</div></div></body></html>

In reply to: Stephan Knauss (#4)
Re: BUG #16024: segfault ip 0000560103865c60 error 4 in postgres

On Fri, Sep 27, 2019 at 10:21 AM Stephan Knauss <sknauss@gmx.de> wrote:

segfault ip 0000560103865c60 error 4 in postgres
#0 deconstruct_array (array=array(at)entry=0x564008a6bde8,
elmtype=elmtype(at)entry=25, elmlen=elmlen(at)entry=-1, elmbyval=elmbyval(at)entry=0
'\000', elmalign=elmalign(at)entry=105 'i', elemsp=elemsp(at)entry=0x7ffc5a33d570,
nullsp=0x7ffc5a33d578, nelemsp=0x7ffc5a33d56c)
at ./build/../src/backend/utils/adt/arrayfuncs.c:3530

Can you show a full stack trace, please? Showing "bt full" output from
GDB is ideal.

--
Peter Geoghegan

#6Stephan Knauss
sknauss@gmx.de
In reply to: Peter Geoghegan (#5)
Re: BUG #16024: segfault ip 0000560103865c60 error 4 in postgres

 

Can you show a full stack trace, please? Showing "bt full" output from
GDB is ideal.

 
Sure. Here you go.

As reference, I assume the part of the query failing is this one:
ARRAY['motorway','trunk','primary','secondary','tertiary'] && tags

tags is text[]. It should have an even cardinality >=2.

(gdb) bt full
#0 deconstruct_array (array=array@entry=0x564008a6bde8, elmtype=elmtype@entry=25, elmlen=elmlen@entry=-1, elmbyval=elmbyval@entry=0 '\000', elmalign=elmalign@entry=105 'i', elemsp=elemsp@entry=0x7ffc5a33d570, nullsp=0x7ffc5a33d578,
nelemsp=0x7ffc5a33d56c) at ./build/../src/backend/utils/adt/arrayfuncs.c:3530
elems = <optimized out>
nulls = 0x564008a6be58 ""
nelems = <optimized out>
p = 0x564008a8be0c <error: Cannot access memory at address 0x564008a8be0c>
bitmap = 0x0
bitmask = 1
i = 1
__func__ = "deconstruct_array"
#1 0x0000564007ad7e48 in array_contain_compare (array1=array1@entry=0x564008a1c718, array2=array2@entry=0x564008a6bde8, collation=100, matchall=matchall@entry=0 '\000', fn_extra=<optimized out>)
at ./build/../src/backend/utils/adt/arrayfuncs.c:4111
result = 0 '\000'
element_type = 25
typentry = 0x564008a67c08
nelems1 = <optimized out>
values2 = 0x564008a6be38
nulls2 = 0x564008a6be58 ""
nelems2 = 2
typlen = -1
typbyval = 0 '\000'
typalign = 105 'i'
i = <optimized out>
j = <optimized out>
it1 = <optimized out>
locfcinfo = {flinfo = 0x564008a67c40, context = 0x0, resultinfo = 0x0, fncollation = 100, isnull = 0 '\000', nargs = 2, arg = {94833022715748, 94833023041036, 0, 94832877895682, 94833022066616, 94833022072328, 140721821832704,
94833011943744, 94833022072328, 94833022072328, 140721821832896, 1, 140721821832752, 94833006466294, 94833011943744, 94833011943744, 1, 94833022072328, 140721821832880, 94833006475060, 94833022872008, 94833022871809, 101531, 1,
863782347751425, 72057594037927936, 94832877895681, 1901405520149961216, 140721821832880, 1, 1, 140374854075720, 94833023081832, 94833023041352, 140721821832928, 94833006461707, 863782347751425, 72057594037927936, 1,
1901405520149961216, 140721821832976, 94833004136278, 140721821833056, 94833006368963, 140374854075720, 94833007703462, 0, 94832877895680, 140721821833104, 94833006368963, 8192, 9656843508925360256, 0, 94832877895680,
94833009026328, 94832877899733, 94833009027128, 1901405520149961216, 140721821833152, 140370932574592, 16777216, 0, 140721821833152, 1901405520149961216, 140370932574592, 94833022581928, 140371762374784, 303952, 140721821833376,
94833006375993, 1305463899554112, 140721821833296, 140721821833260, 140721821833431, 112, 140721821833264, 94833022987064, 18446744070427260766, 140370906723840, 1305463899553800, 320, 9386627527654289668, 70373039146623, 1862361,
303952, 20, 70373039146623, 1862361, 303920, 1901405520149961216, 101530, 140374854028144, 0, 303952, 140371762374784, 98565, 140374854028144, 94833022093368, 140721821833472, 94833006500083},
argnull = "\000\000\063Z\374\177\000\000\000Z\201\032?&c\001\070Υ\b@V\000\000\000Z\201\032?&c\032\370\232\246\b@V\000\000\060\000\000\000\000\000\000\000\225|\b٪\177\000\000\030ǡ\b@V\000\000\300\000\000\000\000\000\000\000x֥\b@V\000\000\060\331\063Z\374\177\000\000^2\276\a@V\000\000-\000\000"}
__func__ = "array_contain_compare"
#2 0x0000564007ada7dd in arrayoverlap (fcinfo=0x564008a5d6e8) at ./build/../src/backend/utils/adt/arrayfuncs.c:4201
array1 = 0x564008a1c718
array2 = 0x564008a6bde8
collation = <optimized out>
result = <optimized out>
#3 0x000056400799d3bc in ExecMakeFunctionResultNoSets (fcache=0x564008a5d678, econtext=0x564008a5d1e8, isNull=0x7ffc5a33da37 "", isDone=<optimized out>) at ./build/../src/backend/executor/execQual.c:2041
arg = <optimized out>
result = <optimized out>
fcinfo = 0x564008a5d6e8
fcusage = {fs = 0x0, save_f_total_time = {tv_sec = 16, tv_usec = 99952}, save_total = {tv_sec = 0, tv_usec = 140721821833735}, f_start = {tv_sec = 140371762374784, tv_usec = 303952}}
i = <optimized out>
#4 0x00005640079a30ef in ExecQual (qual=qual@entry=0x564008a5df78, econtext=econtext@entry=0x564008a5d1e8, resultForNull=resultForNull@entry=0 '\000') at ./build/../src/backend/executor/execQual.c:5316
clause = <optimized out>
expr_value = <optimized out>
isNull = 0 '\000'
result = 1 '\001'
oldContext = <optimized out>
l = 0x564008a5df58
#5 0x00005640079a3655 in ExecScan (node=node@entry=0x564008a5d0d8, accessMtd=accessMtd@entry=0x5640079b9b00 <SeqNext>, recheckMtd=recheckMtd@entry=0x5640079b9af0 <SeqRecheck>) at ./build/../src/backend/executor/execScan.c:208
slot = <optimized out>
econtext = 0x564008a5d1e8
qual = 0x564008a5df78
projInfo = 0x564008a696f8
isDone = ExprSingleResult
resultSlot = <optimized out>
#6 0x00005640079b9bb8 in ExecSeqScan (node=node@entry=0x564008a5d0d8) at ./build/../src/backend/executor/nodeSeqscan.c:127
No locals.
#7 0x000056400799c340 in ExecProcNode (node=node@entry=0x564008a5d0d8) at ./build/../src/backend/executor/execProcnode.c:419
result = <optimized out>
__func__ = "ExecProcNode"
#8 0x00005640079b5f59 in ExecModifyTable (node=node@entry=0x564008a5cf98) at ./build/../src/backend/executor/nodeModifyTable.c:1392
estate = 0x564008a5ccf8
operation = <optimized out>
saved_resultRelInfo = <optimized out>
resultRelInfo = 0x564008a5ce88
subplanstate = 0x564008a5d0d8
junkfilter = 0x0
slot = <optimized out>
planSlot = <optimized out>
tupleid = <optimized out>
tuple_ctid = {ip_blkid = {bi_hi = 32764, bi_lo = 0}, ip_posid = 31309}
oldtupdata = {t_len = 3, t_self = {ip_blkid = {bi_hi = 0, bi_lo = 0}, ip_posid = 53416}, t_tableOid = 22080, t_data = 0x564008a33378}
oldtuple = <optimized out>
__func__ = "ExecModifyTable"
#9 0x000056400799c380 in ExecProcNode (node=node@entry=0x564008a5cf98) at ./build/../src/backend/executor/execProcnode.c:396
result = <optimized out>
__func__ = "ExecProcNode"
#10 0x00005640079984f6 in ExecutePlan (dest=0x564008a39bc0, direction=<optimized out>, numberTuples=0, sendTuples=<optimized out>, operation=CMD_INSERT, use_parallel_mode=<optimized out>, planstate=0x564008a5cf98, estate=0x564008a5ccf8)
at ./build/../src/backend/executor/execMain.c:1567
slot = <optimized out>
current_tuple_count = 0
slot = <optimized out>
current_tuple_count = <optimized out>
#11 standard_ExecutorRun (queryDesc=0x564008a29ec8, direction=<optimized out>, count=0) at ./build/../src/backend/executor/execMain.c:339
estate = 0x564008a5ccf8
operation = CMD_INSERT
dest = 0x564008a39bc0
sendTuples = <optimized out>
oldcontext = <optimized out>
#12 0x0000564007ab6eb8 in ProcessQuery (plan=<optimized out>,
sourceText=0x5640089ed678 "DROP TABLE if EXISTS loose;CREATE UNLOGGED TABLE loose (id bigint, node bigint, lat float, lon float, x integer, y integer);INSERT INTO loose SELECT id, nodes[1] AS node FROM planet_osm_ways WHERE ARR"...,
params=0x0, dest=0x564008a39bc0, completionTag=0x7ffc5a33e220 "") at ./build/../src/backend/tcop/pquery.c:187
queryDesc = 0x564008a29ec8
__func__ = "ProcessQuery"
#13 0x0000564007ab70ed in PortalRunMulti (portal=portal@entry=0x5640089536e8, isTopLevel=isTopLevel@entry=0 '\000', setHoldSnapshot=setHoldSnapshot@entry=0 '\000', dest=dest@entry=0x564008a39bc0, altdest=altdest@entry=0x564008a39bc0,
completionTag=completionTag@entry=0x7ffc5a33e220 "") at ./build/../src/backend/tcop/pquery.c:1303
pstmt = 0x564008a35260
stmt = 0x564008a35260
active_snapshot_set = 1 '\001'
stmtlist_item = 0x564008a3ae28
#14 0x0000564007ab7cd1 in PortalRun (portal=portal@entry=0x5640089536e8, count=count@entry=9223372036854775807, isTopLevel=isTopLevel@entry=0 '\000', dest=dest@entry=0x564008a39bc0, altdest=altdest@entry=0x564008a39bc0,
completionTag=completionTag@entry=0x7ffc5a33e220 "") at ./build/../src/backend/tcop/pquery.c:815
save_exception_stack = 0x7ffc5a33e0d0
save_context_stack = 0x0
local_sigjmp_buf = {{__jmpbuf = {94833022679960, -3071035816762185612, 94833021892328, 94833022835648, 2, 94833022680040, -3071035816655230860, -8783771880478250892}, __mask_was_saved = 0, __saved_mask = {__val = {2817148525,
94833022425664, 94832877895681, 94833022422312, 94833009247749, 140721821834928, 88, 94833021892328, 94833009140074, 2, 94833022680040, 140721821834960, 94833007801154, 2, 94833021892328, 140721821834992}}}}
result = <optimized out>
nprocessed = <optimized out>
saveTopTransactionResourceOwner = 0x5640089a1438
saveTopTransactionContext = 0x564008934ce8
saveActivePortal = 0x0
saveResourceOwner = 0x5640089a1438
savePortalContext = 0x0
saveMemoryContext = 0x564008934ce8
__func__ = "PortalRun"
#15 0x0000564007ab5455 in exec_simple_query (
query_string=0x5640089ed678 "DROP TABLE if EXISTS loose;CREATE UNLOGGED TABLE loose (id bigint, node bigint, lat float, lon float, x integer, y integer);INSERT INTO loose SELECT id, nodes[1] AS node FROM planet_osm_ways WHERE ARR"...)
at ./build/../src/backend/tcop/postgres.c:1109
parsetree = 0x564008a13b98
portal = 0x5640089536e8
snapshot_set = <optimized out>
commandTag = <optimized out>
completionTag = "\000REATE TABLE\000\000\000\000P\342\063Z\374\177\000\000^2\276\a@V\000\000\220\342\063Z\374\177\000\000$\343\063Z\374\177\000\000p\342\063Z\374\177\000\000\266\223\234\a@V\000"
querytree_list = <optimized out>
plantree_list = 0x564008a3b408
receiver = 0x564008a39bc0
format = 0
dest = DestRemote
oldcontext = <optimized out>
parsetree_list = 0x5640089ee958
save_log_statement_stats = 0 '\000'
was_logged = 0 '\000'
msec_str = "\340\342\063Z\374\177\000\000F\001\000\000\000\000\000\000\200\350\063Z\374\177\000\000P\344\063Z\374\177\000"
parsetree_item = 0x564008a13be8
isTopLevel = <optimized out>
dest = <optimized out>
oldcontext = <optimized out>
parsetree_list = <optimized out>
parsetree_item = <optimized out>
save_log_statement_stats = <optimized out>
was_logged = <optimized out>
isTopLevel = <optimized out>
msec_str = <optimized out>
parsetree = <optimized out>
snapshot_set = <optimized out>
commandTag = <optimized out>
completionTag = <optimized out>
querytree_list = <optimized out>
plantree_list = <optimized out>
portal = <optimized out>
receiver = <optimized out>
format = <optimized out>
stmt = <optimized out>
fportal = <optimized out>
#16 PostgresMain (argc=<optimized out>, argv=argv@entry=0x564008989868, dbname=<optimized out>, username=<optimized out>) at ./build/../src/backend/tcop/postgres.c:4101
query_string = 0x5640089ed678 "DROP TABLE if EXISTS loose;CREATE UNLOGGED TABLE loose (id bigint, node bigint, lat float, lon float, x integer, y integer);INSERT INTO loose SELECT id, nodes[1] AS node FROM planet_osm_ways WHERE ARR"...
firstchar = <optimized out>
input_message = {
data = 0x5640089ed678 "DROP TABLE if EXISTS loose;CREATE UNLOGGED TABLE loose (id bigint, node bigint, lat float, lon float, x integer, y integer);INSERT INTO loose SELECT id, nodes[1] AS node FROM planet_osm_ways WHERE ARR"...,
len = 1649, maxlen = 2048, cursor = 1649}
local_sigjmp_buf = {{__jmpbuf = {140721821835376, -3071035815556323212, 1, 94833022113848, 94833022113896, 0, -3071035816743311244, -8783771878490150796}, __mask_was_saved = 1, __saved_mask = {__val = {0, 140721821835604,
140374822780832, 0, 94833012125920, 8192, 4, 94833021897870, 94833021763952, 27, 94833021897870, 140721821835664, 94833007799533, 4, 1901405520149961216, 140721821835712}}}}
send_ready_for_query = 0 '\000'
disable_idle_in_transaction_timeout = 0 '\000'
__func__ = "PostgresMain"
#17 0x0000564007831664 in BackendRun (port=0x564008981600) at ./build/../src/backend/postmaster/postmaster.c:4339
ac = 1
secs = 622763581
usecs = 141864
i = 1
av = 0x564008989868
maxac = <optimized out>
av = <optimized out>
maxac = <optimized out>
ac = <optimized out>
secs = <optimized out>
usecs = <optimized out>
i = <optimized out>
#18 BackendStartup (port=0x564008981600) at ./build/../src/backend/postmaster/postmaster.c:4013
bn = <optimized out>
pid = <optimized out>
bn = <optimized out>
pid = <optimized out>
save_errno = <optimized out>
#19 ServerLoop () at ./build/../src/backend/postmaster/postmaster.c:1722
port = <optimized out>
i = <optimized out>
rmask = {fds_bits = {32, 0 <repeats 15 times>}}
selres = <optimized out>
now = <optimized out>
readmask = {fds_bits = {96, 0 <repeats 15 times>}}
nSockets = <optimized out>
last_lockfile_recheck_time = 1569448353
last_touch_time = 1569448284
__func__ = "ServerLoop"
#20 0x0000564007a51e29 in PostmasterMain (argc=5, argv=0x564008934090) at ./build/../src/backend/postmaster/postmaster.c:1330
opt = <optimized out>
status = <optimized out>
userDoption = <optimized out>
listen_addr_saved = <optimized out>
i = <optimized out>
output_config_variable = <optimized out>
__func__ = "PostmasterMain"
#21 0x0000564007832ed2 in main (argc=5, argv=0x564008934090) at ./build/../src/backend/main/main.c:228
No locals.