Internal error XX000 with enable_partition_pruning=on, pg 11 beta1 on Debian

Started by Phil Florentalmost 8 years ago48 messageshackers
Jump to latest
#1Phil Florent
philflorent@hotmail.com

Hi,

I obtained an XX000 error testing my DSS application with PostgreSQL 11 beta 1.

Here is a simplified version of my test, no data in the tables :

-- 11
select version();
version

-----------------------------------------------------------------------------------------------------------------------

PostgreSQL 11beta1 (Debian 11~beta1-2.pgdg+1) on x86_64-pc-linux-gnu, compiled by gcc (Debian 7.3.0-19) 7.3.0, 64-bit
(1 ligne)

-- connected superuser -- postgres
create user a password 'a';
create schema a authorization a;
create user b password 'b';
create schema b authorization b;
create user c password 'c';
create schema c authorization c;
create user z password 'z';
create schema z authorization z;

-- connected a
create table t1(k1 timestamp, c1 int);
create view v as select k1, c1 from t1;
grant usage on schema a to z;
grant select on all tables in schema a to z;

-- connected b
create table t2(k1 timestamp, c1 int) partition by range(k1);
create table t2_2016 partition of t2 for values from ('2016-01-01') to ('2017-01-01');
create table t2_2017 partition of t2 for values from ('2017-01-01') to ('2018-01-01');
create table t2_2018 partition of t2 for values from ('2018-01-01') to ('2019-01-01');
create view v as select k1, c1 from t2;
grant select on all tables in schema b to z;
grant usage on schema b to z;

-- connected c
create table t3(k1 timestamp, c1 int) partition by range(k1);
create table t3_2016 partition of t3 for values from ('2016-01-01') to ('2017-01-01');
create table t3_2017 partition of t3 for values from ('2017-01-01') to ('2018-01-01');
create table t3_2018 partition of t3 for values from ('2018-01-01') to ('2019-01-01');
create view v as select k1, c1 from t3;
grant select on all tables in schema c to z;
grant usage on schema c to z;

-- connected z
create view v as
select k1, c1 from
(select * from a.v
UNION ALL
select * from b.v
UNION ALL
select * from c.v) vabc ;

explain analyze select * from v where v.k1 > date '2017-01-01';
ERREUR: XX000: did not find all requested child rels in append_rel_list
EMPLACEMENT : find_appinfos_by_relids, prepunion.c : 2643

set enable_partition_pruning=off;
SET

explain analyze select * from v where v.k1 > date '2017-01-01';
QUERY PLAN
-----------------------------------------------------------------------------------------------------------
Append (cost=0.00..272.30 rows=4760 width=12) (actual time=0.217..0.217 rows=0 loops=1)
-> Seq Scan on t1 (cost=0.00..35.50 rows=680 width=12) (actual time=0.020..0.020 rows=0 loops=1)
Filter: (k1 > '2017-01-01'::date)
-> Seq Scan on t2_2016 (cost=0.00..35.50 rows=680 width=12) (actual time=0.035..0.035 rows=0 loops=1)
Filter: (k1 > '2017-01-01'::date)
-> Seq Scan on t2_2017 (cost=0.00..35.50 rows=680 width=12) (actual time=0.016..0.016 rows=0 loops=1)
Filter: (k1 > '2017-01-01'::date)
-> Seq Scan on t2_2018 (cost=0.00..35.50 rows=680 width=12) (actual time=0.015..0.015 rows=0 loops=1)
Filter: (k1 > '2017-01-01'::date)
-> Seq Scan on t3_2016 (cost=0.00..35.50 rows=680 width=12) (actual time=0.040..0.040 rows=0 loops=1)
Filter: (k1 > '2017-01-01'::date)
-> Seq Scan on t3_2017 (cost=0.00..35.50 rows=680 width=12) (actual time=0.016..0.016 rows=0 loops=1)
Filter: (k1 > '2017-01-01'::date)
-> Seq Scan on t3_2018 (cost=0.00..35.50 rows=680 width=12) (actual time=0.016..0.016 rows=0 loops=1)
Filter: (k1 > '2017-01-01'::date)
Planning Time: 0.639 ms
Execution Time: 0.400 ms

set enable_partition_pruning=on;
SET

explain analyze select * from v where v.k1 > date '2017-01-01';
ERREUR: XX000: did not find all requested child rels in append_rel_list
EMPLACEMENT : find_appinfos_by_relids, prepunion.c : 2643

-- 10
select version();
version
--------------------------------------------------------------------------------------------------------------------------------------------

PostgreSQL 10.4 (Ubuntu 10.4-2.pgdg16.04+1) on x86_64-pc-linux-gnu, compiled by gcc (Ubuntu 5.4.0-6ubuntu1~16.04.9) 5.4.0 20160609, 64-bit
(1 ligne)

-- connected superuser -- postgres
create user a password 'a';
create schema a authorization a;

create user b password 'b';
create schema b authorization b;

create user c password 'c';
create schema c authorization c;

create user z password 'z';
create schema z authorization z;

-- connected a
create table t1(k1 timestamp, c1 int);
create view v as select k1, c1 from t1;
grant usage on schema a to z;
grant select on all tables in schema a to z;

-- connected b
create table t2(k1 timestamp, c1 int) partition by range(k1);
create table t2_2016 partition of t2 for values from ('2016-01-01') to ('2017-01-01');
create table t2_2017 partition of t2 for values from ('2017-01-01') to ('2018-01-01');
create table t2_2018 partition of t2 for values from ('2018-01-01') to ('2019-01-01');
create view v as select k1, c1 from t2;
grant select on all tables in schema b to z;
grant usage on schema b to z;

-- connected c
create table t3(k1 timestamp, c1 int) partition by range(k1);
create table t3_2016 partition of t3 for values from ('2016-01-01') to ('2017-01-01');
create table t3_2017 partition of t3 for values from ('2017-01-01') to ('2018-01-01');
create table t3_2018 partition of t3 for values from ('2018-01-01') to ('2019-01-01');
create view v as select k1, c1 from t3;
grant select on all tables in schema c to z;
grant usage on schema c to z;

-- connected z
create view v as
select k1, c1 from
(select * from a.v
UNION ALL
select * from b.v
UNION ALL
select * from c.v) vabc ;

explain analyze select * from v where v.k1 > date '2017-01-01';
QUERY PLAN
-----------------------------------------------------------------------------------------------------------
Append (cost=0.00..177.50 rows=3400 width=12) (actual time=0.206..0.206 rows=0 loops=1)
-> Seq Scan on t1 (cost=0.00..35.50 rows=680 width=12) (actual time=0.044..0.044 rows=0 loops=1)
Filter: (k1 > '2017-01-01'::date)
-> Seq Scan on t2_2017 (cost=0.00..35.50 rows=680 width=12) (actual time=0.020..0.020 rows=0 loops=1)
Filter: (k1 > '2017-01-01'::date)
-> Seq Scan on t2_2018 (cost=0.00..35.50 rows=680 width=12) (actual time=0.020..0.020 rows=0 loops=1)
Filter: (k1 > '2017-01-01'::date)
-> Seq Scan on t3_2017 (cost=0.00..35.50 rows=680 width=12) (actual time=0.036..0.036 rows=0 loops=1)
Filter: (k1 > '2017-01-01'::date)
-> Seq Scan on t3_2018 (cost=0.00..35.50 rows=680 width=12) (actual time=0.020..0.020 rows=0 loops=1)
Filter: (k1 > '2017-01-01'::date)
Planning time: 0.780 ms
Execution time: 0.427 ms

Best regards

Phil

#2Tom Lane
tgl@sss.pgh.pa.us
In reply to: Phil Florent (#1)
Re: Internal error XX000 with enable_partition_pruning=on, pg 11 beta1 on Debian

Phil Florent <philflorent@hotmail.com> writes:

explain analyze select * from v where v.k1 > date '2017-01-01';
ERREUR: XX000: did not find all requested child rels in append_rel_list
EMPLACEMENT : find_appinfos_by_relids, prepunion.c : 2643

Reproduced here, thanks for the report! This is very timely since
we were just in process of rewriting that code anyway ...

regards, tom lane

#3David Rowley
dgrowleyml@gmail.com
In reply to: Tom Lane (#2)
Re: Internal error XX000 with enable_partition_pruning=on, pg 11 beta1 on Debian

On 9 June 2018 at 04:57, Tom Lane <tgl@sss.pgh.pa.us> wrote:

Phil Florent <philflorent@hotmail.com> writes:

explain analyze select * from v where v.k1 > date '2017-01-01';
ERREUR: XX000: did not find all requested child rels in append_rel_list
EMPLACEMENT : find_appinfos_by_relids, prepunion.c : 2643

Reproduced here, thanks for the report! This is very timely since
we were just in process of rewriting that code anyway ...

Yeah. Thanks for the report Phil.

It looks like this was 499be013de6, which was one of mine.

A more simple case to reproduce is:

drop table listp;
create table listp (a int, b int) partition by list(a);
create table listp1 partition of listp for values in (1);
select * from (select * from listp union all select * from listp) t where a = 1;

I'll look in more detail after sleeping.

--
David Rowley http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Training & Services

#4David Rowley
dgrowleyml@gmail.com
In reply to: David Rowley (#3)
Re: Internal error XX000 with enable_partition_pruning=on, pg 11 beta1 on Debian

On 9 June 2018 at 06:50, David Rowley <david.rowley@2ndquadrant.com> wrote:

It looks like this was 499be013de6, which was one of mine.

A more simple case to reproduce is:

drop table listp;
create table listp (a int, b int) partition by list(a);
create table listp1 partition of listp for values in (1);
select * from (select * from listp union all select * from listp) t where a = 1;

I'll look in more detail after sleeping.

So it looks like I've assumed that the Append path's partitioned_rels
will only ever be set for partitioned tables, but it can, in fact, be
set for UNION ALL parents too when the union children are partitioned
tables.

As a discussion topic, I've attached a patch which does resolve the
error, but it also disables run-time pruning in this case.

There might be some way we can treat UNION ALL parents differently
when building the PartitionPruneInfos. I've just not thought of what
this would be just yet. If I can't think of that, I wonder if this is
a rare enough case not to bother with run-time partition pruning.

--
David Rowley http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Training & Services

Attachments:

run-time_prune_only_for_partitioned_tables.patchapplication/octet-stream; name=run-time_prune_only_for_partitioned_tables.patchDownload+2-1
#5Tom Lane
tgl@sss.pgh.pa.us
In reply to: David Rowley (#4)
Re: Internal error XX000 with enable_partition_pruning=on, pg 11 beta1 on Debian

David Rowley <david.rowley@2ndquadrant.com> writes:

So it looks like I've assumed that the Append path's partitioned_rels
will only ever be set for partitioned tables, but it can, in fact, be
set for UNION ALL parents too when the union children are partitioned
tables.

As a discussion topic, I've attached a patch which does resolve the
error, but it also disables run-time pruning in this case.

There might be some way we can treat UNION ALL parents differently
when building the PartitionPruneInfos. I've just not thought of what
this would be just yet. If I can't think of that, I wonder if this is
a rare enough case not to bother with run-time partition pruning.

So, IIUC, the issue is that for partitioning cases Append expects *all*
its children to be partitions of the *same* partitioned table? That
is, you could also break it with

select * from partitioned_table_a
union all
select * from partitioned_table_b

?

If so, maybe the best solution is to not allow a partitioning appendrel
to be flattened into an appendrel generated in other ways (particularly,
via UNION ALL). I also wonder whether it was a bad idea to treat these
as the same kind of path/plan in the first place.

regards, tom lane

#6David Rowley
dgrowleyml@gmail.com
In reply to: Tom Lane (#5)
Re: Internal error XX000 with enable_partition_pruning=on, pg 11 beta1 on Debian

On 10 June 2018 at 04:48, Tom Lane <tgl@sss.pgh.pa.us> wrote:

So, IIUC, the issue is that for partitioning cases Append expects *all*
its children to be partitions of the *same* partitioned table? That
is, you could also break it with

select * from partitioned_table_a
union all
select * from partitioned_table_b

?

Not quite. I think what I sent above is the most simple way to break
it. Your case won't because there are no quals to prune with, so
run-time pruning is never attempted.

If so, maybe the best solution is to not allow a partitioning appendrel
to be flattened into an appendrel generated in other ways (particularly,
via UNION ALL). I also wonder whether it was a bad idea to treat these
as the same kind of path/plan in the first place.

That might be the best idea. I'll look into that now. The only
drawback I see is that we'll end up pulling tuples through more Append
nodes in cases like you mentioned above.

--
David Rowley http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Training & Services

#7Tom Lane
tgl@sss.pgh.pa.us
In reply to: David Rowley (#6)
Re: Internal error XX000 with enable_partition_pruning=on, pg 11 beta1 on Debian

David Rowley <david.rowley@2ndquadrant.com> writes:

On 10 June 2018 at 04:48, Tom Lane <tgl@sss.pgh.pa.us> wrote:

So, IIUC, the issue is that for partitioning cases Append expects *all*
its children to be partitions of the *same* partitioned table? That
is, you could also break it with

select * from partitioned_table_a
union all
select * from partitioned_table_b

?

Not quite. I think what I sent above is the most simple way to break
it. Your case won't because there are no quals to prune with, so
run-time pruning is never attempted.

Well, I hadn't bothered to put in the extra code needed to have a qual
to prune with, but my point remains that it doesn't seem like the current
Append code is prepared to cope with an Append that contains partitions
of more than one top-level partitioned table.

I just had a thought that might lead to a nice solution to that, or
might be totally crazy. What if we inverted the sense of the bitmaps
that track partition pruning state, so that instead of a bitmap of
valid partitions that need to be scanned, we had a bitmap of pruned
partitions that we know we don't need to scan? (The indexes of this
bitmap would be subplan indexes not partition indexes.) With this
representation, it doesn't matter if some of the Append's children
are not supposed to participate in pruning; they just don't ever get
added to the bitmap of what to skip. It's also fairly clear, I think,
how to handle independent pruning rules for different top-level tables
that are being unioned together: just OR the what-to-skip bitmaps.
But there may be some reason why this isn't workable.

regards, tom lane

#8David Rowley
dgrowleyml@gmail.com
In reply to: Tom Lane (#7)
Re: Internal error XX000 with enable_partition_pruning=on, pg 11 beta1 on Debian

On 11 June 2018 at 12:19, Tom Lane <tgl@sss.pgh.pa.us> wrote:

David Rowley <david.rowley@2ndquadrant.com> writes:

On 10 June 2018 at 04:48, Tom Lane <tgl@sss.pgh.pa.us> wrote:

So, IIUC, the issue is that for partitioning cases Append expects *all*
its children to be partitions of the *same* partitioned table? That
is, you could also break it with

select * from partitioned_table_a
union all
select * from partitioned_table_b

?

Not quite. I think what I sent above is the most simple way to break
it. Your case won't because there are no quals to prune with, so
run-time pruning is never attempted.

Well, I hadn't bothered to put in the extra code needed to have a qual
to prune with, but my point remains that it doesn't seem like the current
Append code is prepared to cope with an Append that contains partitions
of more than one top-level partitioned table.

Besides the partition pruning code, was there other aspects of Append
that you saw to be incompatible with mixed hierarchies?

I just had a thought that might lead to a nice solution to that, or
might be totally crazy. What if we inverted the sense of the bitmaps
that track partition pruning state, so that instead of a bitmap of
valid partitions that need to be scanned, we had a bitmap of pruned
partitions that we know we don't need to scan? (The indexes of this
bitmap would be subplan indexes not partition indexes.) With this
representation, it doesn't matter if some of the Append's children
are not supposed to participate in pruning; they just don't ever get
added to the bitmap of what to skip. It's also fairly clear, I think,
how to handle independent pruning rules for different top-level tables
that are being unioned together: just OR the what-to-skip bitmaps.
But there may be some reason why this isn't workable.

I think it would be less efficient. A common case and one that I very
much would like to make as fast as possible is when all but a single
partition is pruned. Doing the opposite sounds like more effort would
need to be expended to get the subplans that we do need to scan.

I don't really see the way it works now as a huge problem to overcome
in pruning. We'd just a list of subplans that don't belong to the
hierarchy and tag them on to the matches found in
ExecFindInitialMatchingSubPlans and ExecFindMatchingSubPlans. The
bigger issue to overcome is the mixed flattened list of partition RT
indexes in partitioned_rels. Perhaps having a list of Lists for
partitioned_rels could be used to resolve that. The question is more,
how to solve for PG11. Do we need that?

I think we'll very soon be wanting to have ordered partition scans
where something like:

create table listp(a int) partition by list(a);
create index on listp(a);
create table listp1 partition of listp for values in (1);
create table listp2 partition of listp for values in (2);

and

select * from listp order by a;

would be possible with an Append and Index Scan, rather than having a
MergeAppend or Sort. In which case we'll not want mixed partition
hierarchies in the Append subplans. Although, perhaps that would mean
we just wouldn't pullup AppendPaths which have PathKeys.

I have written and attached the patch to stop flattening of
partitioned tables into UNION ALL parent's paths, meaning we can now
get nested Append and MergeAppend paths.

I've added Robert too as I know he was the committer of partitioning
and parallel Append. Maybe he has a view on what should be done about
this? Is not flattening the paths a problem?

--
David Rowley http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Training & Services

Attachments:

dont_flatten_append_paths_for_partitions.patchapplication/octet-stream; name=dont_flatten_append_paths_for_partitions.patchDownload+68-29
#9Amit Langote
Langote_Amit_f8@lab.ntt.co.jp
In reply to: David Rowley (#8)
Re: Internal error XX000 with enable_partition_pruning=on, pg 11 beta1 on Debian

On 2018/06/11 16:49, David Rowley wrote:

On 11 June 2018 at 12:19, Tom Lane <tgl@sss.pgh.pa.us> wrote:

David Rowley <david.rowley@2ndquadrant.com> writes:

On 10 June 2018 at 04:48, Tom Lane <tgl@sss.pgh.pa.us> wrote:

So, IIUC, the issue is that for partitioning cases Append expects *all*
its children to be partitions of the *same* partitioned table? That
is, you could also break it with

select * from partitioned_table_a
union all
select * from partitioned_table_b

?

Not quite.

That would be correct I think. An Append may contain multiple partitioned
tables that all appear under an UNION ALL parent, as in the OP's case and
the example above. In this case, the partitioned_rels list of Append
consist of non-leaf tables from *all* of the partitioned tables. Before
run-time pruning arrived, the only purpose of partitioned_rels list was to
make sure that the executor goes through it and locks all of those
non-leaf tables (ExecLockNonLeafAppendTables). Run-time pruning expanded
its usage by depending it to generate run-time pruning info.

I just had a thought that might lead to a nice solution to that, or
might be totally crazy. What if we inverted the sense of the bitmaps
that track partition pruning state, so that instead of a bitmap of
valid partitions that need to be scanned, we had a bitmap of pruned
partitions that we know we don't need to scan? (The indexes of this
bitmap would be subplan indexes not partition indexes.) With this
representation, it doesn't matter if some of the Append's children
are not supposed to participate in pruning; they just don't ever get
added to the bitmap of what to skip. It's also fairly clear, I think,
how to handle independent pruning rules for different top-level tables
that are being unioned together: just OR the what-to-skip bitmaps.
But there may be some reason why this isn't workable.

I think it would be less efficient. A common case and one that I very
much would like to make as fast as possible is when all but a single
partition is pruned. Doing the opposite sounds like more effort would
need to be expended to get the subplans that we do need to scan.

I don't really see the way it works now as a huge problem to overcome
in pruning. We'd just a list of subplans that don't belong to the
hierarchy and tag them on to the matches found in
ExecFindInitialMatchingSubPlans and ExecFindMatchingSubPlans. The
bigger issue to overcome is the mixed flattened list of partition RT
indexes in partitioned_rels. Perhaps having a list of Lists for
partitioned_rels could be used to resolve that. The question is more,
how to solve for PG11. Do we need that?

I think we'll very soon be wanting to have ordered partition scans
where something like:

create table listp(a int) partition by list(a);
create index on listp(a);
create table listp1 partition of listp for values in (1);
create table listp2 partition of listp for values in (2);

and

select * from listp order by a;

would be possible with an Append and Index Scan, rather than having a
MergeAppend or Sort. In which case we'll not want mixed partition
hierarchies in the Append subplans. Although, perhaps that would mean
we just wouldn't pullup AppendPaths which have PathKeys.

I have written and attached the patch to stop flattening of
partitioned tables into UNION ALL parent's paths, meaning we can now
get nested Append and MergeAppend paths.

I've added Robert too as I know he was the committer of partitioning
and parallel Append. Maybe he has a view on what should be done about
this? Is not flattening the paths a problem?

Not speaking for Robert here, just saying from what I know.

I don't think your patch breaks anything, even if does change the shape of
the plan. So, for:

select * from partitioned_table_a
union all
select * from partitioned_table_b

The only thing that changes with the patch is that
ExecLockNonLeafAppendTables is called *twice* for the two nested Appends
corresponding to partitioned_table_a and partitioned_table_b, resp.,
instead of just once for the top level Append corresponding to the UNION
ALL parent. In fact, when called for the top level Append,
ExecLockNonLeafAppendTables is now a no-op.

Thanks,
Amit

#10David Rowley
dgrowleyml@gmail.com
In reply to: Amit Langote (#9)
Re: Internal error XX000 with enable_partition_pruning=on, pg 11 beta1 on Debian

On 15 June 2018 at 20:37, Amit Langote <Langote_Amit_f8@lab.ntt.co.jp> wrote:

select * from partitioned_table_a
union all
select * from partitioned_table_b

The only thing that changes with the patch is that
ExecLockNonLeafAppendTables is called *twice* for the two nested Appends
corresponding to partitioned_table_a and partitioned_table_b, resp.,
instead of just once for the top level Append corresponding to the UNION
ALL parent. In fact, when called for the top level Append,
ExecLockNonLeafAppendTables is now a no-op.

If the top level Append is the UNION ALL one, then there won't be any
partitioned_rels. If that's what you mean by no-op then, yeah. There
are no duplicate locks already obtained in the parent with the child
Append node.

--
David Rowley http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Training & Services

#11Amit Langote
Langote_Amit_f8@lab.ntt.co.jp
In reply to: David Rowley (#10)
Re: Internal error XX000 with enable_partition_pruning=on, pg 11 beta1 on Debian

On 2018/06/15 20:41, David Rowley wrote:

On 15 June 2018 at 20:37, Amit Langote <Langote_Amit_f8@lab.ntt.co.jp> wrote:

select * from partitioned_table_a
union all
select * from partitioned_table_b

The only thing that changes with the patch is that
ExecLockNonLeafAppendTables is called *twice* for the two nested Appends
corresponding to partitioned_table_a and partitioned_table_b, resp.,
instead of just once for the top level Append corresponding to the UNION
ALL parent. In fact, when called for the top level Append,
ExecLockNonLeafAppendTables is now a no-op.

If the top level Append is the UNION ALL one, then there won't be any
partitioned_rels. If that's what you mean by no-op then, yeah. There
are no duplicate locks already obtained in the parent with the child
Append node.

Yeah, that's what I meant to say. I looked for whether the locks end up
being taken twice, once in the UNION ALL parent's ExecInitAppend and then
again in the individual child Appends' ExecInitAppend, but that they
don't, so the patch is right.

Thanks,
Amit

#12David Rowley
dgrowleyml@gmail.com
In reply to: Amit Langote (#11)
Re: Internal error XX000 with enable_partition_pruning=on, pg 11 beta1 on Debian

On 18 June 2018 at 14:36, Amit Langote <Langote_Amit_f8@lab.ntt.co.jp> wrote:

On 2018/06/15 20:41, David Rowley wrote:

If the top level Append is the UNION ALL one, then there won't be any
partitioned_rels. If that's what you mean by no-op then, yeah. There
are no duplicate locks already obtained in the parent with the child
Append node.

Yeah, that's what I meant to say. I looked for whether the locks end up
being taken twice, once in the UNION ALL parent's ExecInitAppend and then
again in the individual child Appends' ExecInitAppend, but that they
don't, so the patch is right.

Thanks for looking.

Robert, do you have any objections to the proposed patch?

--
David Rowley http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Training & Services

#13Robert Haas
robertmhaas@gmail.com
In reply to: David Rowley (#12)
Re: Internal error XX000 with enable_partition_pruning=on, pg 11 beta1 on Debian

On Sun, Jun 17, 2018 at 10:59 PM, David Rowley
<david.rowley@2ndquadrant.com> wrote:

Thanks for looking.

Robert, do you have any objections to the proposed patch?

I don't have time to study this right now, but I think the main
possible objection is around performance. If not flattening the
Append is the best way to make queries run fast, then we should do it
that way. If making pruning capable of coping with mixed hierarchies
is going to be faster, then we should do that. If I were to speculate
in the absence of data, my guess would be that failing to flatten the
hierarchy is going to lead to a significant per-tuple cost, while the
cost of making run-time pruning smarter is likely to be incurred once
per rescan (i.e. a lot less). But that might be wrong, and it might
be impractical to get this working perfectly in v11 given the time we
have. But I would suggest that you performance test a query that ends
up feeding lots of tuples through two Append nodes rather than one and
see how much it hurts.

--
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company

#14David Rowley
dgrowleyml@gmail.com
In reply to: Robert Haas (#13)
Re: Internal error XX000 with enable_partition_pruning=on, pg 11 beta1 on Debian

On 20 June 2018 at 02:28, Robert Haas <robertmhaas@gmail.com> wrote:

On Sun, Jun 17, 2018 at 10:59 PM, David Rowley

Robert, do you have any objections to the proposed patch?

I don't have time to study this right now, but I think the main
possible objection is around performance. If not flattening the
Append is the best way to make queries run fast, then we should do it
that way. If making pruning capable of coping with mixed hierarchies
is going to be faster, then we should do that. If I were to speculate
in the absence of data, my guess would be that failing to flatten the
hierarchy is going to lead to a significant per-tuple cost, while the
cost of making run-time pruning smarter is likely to be incurred once
per rescan (i.e. a lot less). But that might be wrong, and it might
be impractical to get this working perfectly in v11 given the time we
have. But I would suggest that you performance test a query that ends
up feeding lots of tuples through two Append nodes rather than one and
see how much it hurts.

I've performed two tests. One to see what the overhead of the
additional append is, and one to see what the saving from pruning away
unneeded partitions is. I tried to make the 2nd test use a realistic
number of partitions. Partition pruning will become more useful with
higher numbers of partitions.

Test 1: Test overhead of pulling tuples through an additional append

create table p (a int) partition by list (a);
create table p1 partition of p for values in(1);
insert into p select 1 from generate_series(1,1000000);
vacuum p1;
set max_parallel_workers_per_gather=0;

select count(*) from (select * from p union all select * from p) p;

Unpatched:
tps = 8.530355 (excluding connections establishing)

Patched:
tps = 7.853939 (excluding connections establishing)

Patched version takes 108.61% of the unpatched time.

Test 2: Tests time saved from run-time partition pruning and not
scanning the index on 23 of the partitions.

create table rp (d date) partition by range (d);
select 'CREATE TABLE rp' || x::text || ' PARTITION OF rp FOR VALUES
FROM (''' || '2017-01-01'::date + (x::text || ' month')::interval ||
''') TO (''' || '2017-01-01'::date + ((x+1)::text || '
month')::interval || ''');'
from generate_Series(0,23) x;
\gexec
insert into rp select d::date from
generate_series('2017-01-01','2018-12-31', interval '10 sec') d;
create index on rp (d);

select count(*) from (select * from rp union all select * from rp) rp
where d = current_date;

Unpatched: (set enable_partition_pruning = 0; to make it work)
tps = 260.969953 (excluding connections establishing)

Patched:
tps = 301.319038 (excluding connections establishing)

Patched version takes 86.61% of the unpatched time.

So, I don't think that really concludes much. I'd say the overhead
shown in test 1 is going to be a bit more predictable as it will
depend on how many tuples are being pulled through the additional
Append, but the savings shown in test 2 will vary. Having run-time
pruning not magically fail to work when the partitioned table is part
of a UNION ALL certainly seems less surprising.

If I drop the index from the "d" column in test 2, the performance gap
increases significantly and is roughly proportional to the number of
partitions.

Unpatched:
tps = 0.523691 (excluding connections establishing)

Patched:
tps = 13.453964 (excluding connections establishing)

--
David Rowley http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Training & Services

#15David Rowley
dgrowleyml@gmail.com
In reply to: David Rowley (#14)
Re: Internal error XX000 with enable_partition_pruning=on, pg 11 beta1 on Debian

On 20 June 2018 at 13:20, David Rowley <david.rowley@2ndquadrant.com> wrote:

select count(*) from (select * from p union all select * from p) p;

Unpatched:
tps = 8.530355 (excluding connections establishing)

Patched:
tps = 7.853939 (excluding connections establishing)

I've been thinking about this and I'm not so happy about my earlier
proposed fix about not flattening the Appends for UNION ALL parents
with partitioned relation children. Apart from the additional
overhead of pulling all tuples through an additional Append node, I
also don't like it because we one day might decide to fix it and make
it flatten these again. It would be much nicer not to fool around
with the plan shapes like that from one release to the next.

So, today I decided to write some code to just make the UNION ALL
parents work with partitioned tables while maintaining the Append
hierarchy flattening.

I've only just completed reading back through all the code and I think
it's correct. I ended up changing add_paths_to_append_rel() so that
instead of performing concatenation on partitioned_rels from two UNION
ALL children, it creates a List of lists. I believe this list can
only end up with a 2-level hierarchy of partitioned rels. I tested
this and it seems to work as I expect, although I think I need to
study the code a bit more to ensure it can't happen. I need to check
if there's some cases where nested UNION ALLs cannot be flattened to
have a single UNION ALL parent. Supporting this did cause me to have
to check the List->type in a few places. I only saw one other place in
the code where this is done, so I figured that meant it was okay.

In the executor side, I didn't add any pre-calculation code for each
partition hierarchy. I did this previously to save having to re-prune
on individual partitions, but I figured that was at a good enough
level not to have to add it for each partition hierarchy. I imagine
most partition hierarchies will just contain a single partitioned
table anyway, so an additional level of caching might not buy very
much, but I might just be trying to convince myself of that because
it's late here now.

Anyway... Patch attached. This is method 3 of the 3 methods I thought
to fix this, so if this is not suitable then I'm out of ideas.

It would be great to get some feedback on this as I'd really like to
be done with it before July.

--
David Rowley http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Training & Services

Attachments:

v1-0001-Fix-run-time-partition-pruning-for-UNION-ALL-pare.patchapplication/octet-stream; name=v1-0001-Fix-run-time-partition-pruning-for-UNION-ALL-pare.patchDownload+706-263
#16Alvaro Herrera
alvherre@2ndquadrant.com
In reply to: David Rowley (#15)
Re: Internal error XX000 with enable_partition_pruning=on, pg 11 beta1 on Debian

On 2018-Jun-27, David Rowley wrote:

I've only just completed reading back through all the code and I think
it's correct. I ended up changing add_paths_to_append_rel() so that
instead of performing concatenation on partitioned_rels from two UNION
ALL children, it creates a List of lists. I believe this list can
only end up with a 2-level hierarchy of partitioned rels. I tested
this and it seems to work as I expect, although I think I need to
study the code a bit more to ensure it can't happen. I need to check
if there's some cases where nested UNION ALLs cannot be flattened to
have a single UNION ALL parent. Supporting this did cause me to have
to check the List->type in a few places. I only saw one other place in
the code where this is done, so I figured that meant it was okay.

Checking a node's ->type member is not idiomatic -- use IsA() for that.
(Strangely, we have IsIntegerList() but only for use within list.c.) But
do we rely on the ordering of these lists anywhere? I'm wondering why
it's not more sensible to use a bitmapset there (I guess for your
list-of-lists business you'd have a list of bms's).

I didn't look your patch much further.

Since Tom has been revamping this code lately, I think it's a good
idea to wait for his input.

--
�lvaro Herrera https://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services

#17Tom Lane
tgl@sss.pgh.pa.us
In reply to: Alvaro Herrera (#16)
Re: Internal error XX000 with enable_partition_pruning=on, pg 11 beta1 on Debian

Alvaro Herrera <alvherre@2ndquadrant.com> writes:

Since Tom has been revamping this code lately, I think it's a good
idea to wait for his input.

I'm on vacation and won't have time to look at this until week after
next. If you don't mind putting the topic on hold that long, I'll
be happy to take responsibility for it.

regards, tom lane

#18Andres Freund
andres@anarazel.de
In reply to: Tom Lane (#17)
Re: Internal error XX000 with enable_partition_pruning=on, pg 11 beta1 on Debian

Hi Tom,

On 2018-06-29 18:17:08 -0400, Tom Lane wrote:

Alvaro Herrera <alvherre@2ndquadrant.com> writes:

Since Tom has been revamping this code lately, I think it's a good
idea to wait for his input.

I'm on vacation and won't have time to look at this until week after
next. If you don't mind putting the topic on hold that long, I'll
be happy to take responsibility for it.

Is that still the plan? Do you forsee any issues here? This has been
somewhat of a longstanding open item...

Greetings,

Andres Freund

#19Tom Lane
tgl@sss.pgh.pa.us
In reply to: Andres Freund (#18)
Re: Internal error XX000 with enable_partition_pruning=on, pg 11 beta1 on Debian

Andres Freund <andres@anarazel.de> writes:

On 2018-06-29 18:17:08 -0400, Tom Lane wrote:

I'm on vacation and won't have time to look at this until week after
next. If you don't mind putting the topic on hold that long, I'll
be happy to take responsibility for it.

Is that still the plan? Do you forsee any issues here? This has been
somewhat of a longstanding open item...

It's on my to-do list, but I'm still catching up vacation backlog.
Is this item blocking anyone?

regards, tom lane

#20Alvaro Herrera
alvherre@2ndquadrant.com
In reply to: Tom Lane (#19)
Re: Internal error XX000 with enable_partition_pruning=on, pg 11 beta1 on Debian

On 2018-Jul-12, Tom Lane wrote:

Andres Freund <andres@anarazel.de> writes:

On 2018-06-29 18:17:08 -0400, Tom Lane wrote:

I'm on vacation and won't have time to look at this until week after
next. If you don't mind putting the topic on hold that long, I'll
be happy to take responsibility for it.

Is that still the plan? Do you forsee any issues here? This has been
somewhat of a longstanding open item...

It's on my to-do list, but I'm still catching up vacation backlog.
Is this item blocking anyone?

I don't think so. The patch might conflict with other fixes, but I
suppose that's a fact of life.

--
�lvaro Herrera https://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services

#21Tom Lane
tgl@sss.pgh.pa.us
In reply to: David Rowley (#15)
#22David Rowley
dgrowleyml@gmail.com
In reply to: Tom Lane (#21)
#23David Rowley
dgrowleyml@gmail.com
In reply to: Tom Lane (#21)
#24David Rowley
dgrowleyml@gmail.com
In reply to: David Rowley (#23)
#25Phil Florent
philflorent@hotmail.com
In reply to: David Rowley (#23)
#26David Rowley
dgrowleyml@gmail.com
In reply to: Phil Florent (#25)
#27Phil Florent
philflorent@hotmail.com
In reply to: David Rowley (#26)
#28David Rowley
dgrowleyml@gmail.com
In reply to: Tom Lane (#21)
#29Alvaro Herrera
alvherre@2ndquadrant.com
In reply to: David Rowley (#24)
#30David Rowley
dgrowleyml@gmail.com
In reply to: David Rowley (#28)
#31David Rowley
dgrowleyml@gmail.com
In reply to: Alvaro Herrera (#29)
#32Amit Langote
Langote_Amit_f8@lab.ntt.co.jp
In reply to: David Rowley (#30)
#33David Rowley
dgrowleyml@gmail.com
In reply to: Amit Langote (#32)
#34Amit Langote
Langote_Amit_f8@lab.ntt.co.jp
In reply to: David Rowley (#33)
#35David Rowley
dgrowleyml@gmail.com
In reply to: David Rowley (#30)
#36Tom Lane
tgl@sss.pgh.pa.us
In reply to: Amit Langote (#34)
#37Tom Lane
tgl@sss.pgh.pa.us
In reply to: David Rowley (#35)
#38David Rowley
dgrowleyml@gmail.com
In reply to: Tom Lane (#37)
#39Alvaro Herrera
alvherre@2ndquadrant.com
In reply to: Tom Lane (#37)
#40Rushabh Lathia
rushabh.lathia@gmail.com
In reply to: Alvaro Herrera (#39)
#41Tom Lane
tgl@sss.pgh.pa.us
In reply to: Rushabh Lathia (#40)
#42Amit Langote
Langote_Amit_f8@lab.ntt.co.jp
In reply to: Tom Lane (#41)
#43David Rowley
dgrowleyml@gmail.com
In reply to: Amit Langote (#42)
#44Tom Lane
tgl@sss.pgh.pa.us
In reply to: David Rowley (#43)
#45Amit Langote
Langote_Amit_f8@lab.ntt.co.jp
In reply to: Tom Lane (#44)
#46Tom Lane
tgl@sss.pgh.pa.us
In reply to: Amit Langote (#45)
#47Amit Langote
Langote_Amit_f8@lab.ntt.co.jp
In reply to: Tom Lane (#46)
#48Phil Florent
philflorent@hotmail.com
In reply to: Amit Langote (#47)