From 44f01f8d311d3e1e328a5c2fba58682650e6da3f Mon Sep 17 00:00:00 2001 From: "dgrowley@gmail.com" Date: Sun, 2 Dec 2018 12:25:56 +1300 Subject: [PATCH v2] Allow lock acquisitions for partitions to be delayed Normally during query execution, AcquireExecutorLocks will obtain locks on all RangeTblEntry objects. When many RangeTblEntrys are table partitions then a large number of locks may be required. This can slow down execution of such plans. Now that we have run-time partition pruning we may end up not scanning some of these partitions and if so we have no need to obtain a lock on them. Here we modify things so that locks are only obtained on partitions the first time they are accessed in the executor, instead of at the start of execution. --- src/backend/catalog/dependency.c | 1 + src/backend/commands/createas.c | 1 + src/backend/executor/execUtils.c | 20 +++++++++++--------- src/backend/nodes/copyfuncs.c | 1 + src/backend/nodes/equalfuncs.c | 1 + src/backend/nodes/outfuncs.c | 1 + src/backend/nodes/readfuncs.c | 1 + src/backend/optimizer/plan/planner.c | 2 ++ src/backend/optimizer/util/inherit.c | 11 +++++++++++ src/backend/parser/parse_relation.c | 2 ++ src/backend/replication/logical/worker.c | 1 + src/backend/rewrite/rewriteHandler.c | 1 + src/backend/utils/adt/ri_triggers.c | 2 ++ src/backend/utils/adt/ruleutils.c | 3 +++ src/backend/utils/cache/plancache.c | 15 +++++++++++---- src/include/nodes/parsenodes.h | 2 ++ 16 files changed, 52 insertions(+), 13 deletions(-) diff --git a/src/backend/catalog/dependency.c b/src/backend/catalog/dependency.c index dc679ed8b9..9c31c50159 100644 --- a/src/backend/catalog/dependency.c +++ b/src/backend/catalog/dependency.c @@ -1415,6 +1415,7 @@ recordDependencyOnSingleRelExpr(const ObjectAddress *depender, rte.rtekind = RTE_RELATION; rte.relid = relId; rte.relkind = RELKIND_RELATION; /* no need for exactness here */ + rte.delaylock = false; rte.rellockmode = AccessShareLock; context.rtables = list_make1(list_make1(&rte)); diff --git a/src/backend/commands/createas.c b/src/backend/commands/createas.c index 7185432763..ab89c587e7 100644 --- a/src/backend/commands/createas.c +++ b/src/backend/commands/createas.c @@ -515,6 +515,7 @@ intorel_startup(DestReceiver *self, int operation, TupleDesc typeinfo) rte->rtekind = RTE_RELATION; rte->relid = intoRelationAddr.objectId; rte->relkind = relkind; + rte->delaylock = false; rte->rellockmode = RowExclusiveLock; rte->requiredPerms = ACL_INSERT; diff --git a/src/backend/executor/execUtils.c b/src/backend/executor/execUtils.c index 24ab43d5e5..db4d5c3dc7 100644 --- a/src/backend/executor/execUtils.c +++ b/src/backend/executor/execUtils.c @@ -770,14 +770,15 @@ ExecGetRangeTableRelation(EState *estate, Index rti) Assert(rte->rtekind == RTE_RELATION); - if (!IsParallelWorker()) + if (!rte->delaylock && !IsParallelWorker()) { /* - * In a normal query, we should already have the appropriate lock, - * but verify that through an Assert. Since there's already an - * Assert inside heap_open that insists on holding some lock, it - * seems sufficient to check this only when rellockmode is higher - * than the minimum. + * In a normal query, unless the planner set the delaylock flag, + * we should already have the appropriate lock, but verify that + * through an Assert. Since there's already an Assert inside + * heap_open that insists on holding some lock, it seems + * sufficient to check this only when rellockmode is higher than + * the minimum. */ rel = heap_open(rte->relid, NoLock); Assert(rte->rellockmode == AccessShareLock || @@ -786,9 +787,10 @@ ExecGetRangeTableRelation(EState *estate, Index rti) else { /* - * If we are a parallel worker, we need to obtain our own local - * lock on the relation. This ensures sane behavior in case the - * parent process exits before we do. + * If we are a parallel worker or delaylock is set, we need to + * obtain a lock on the relation. For parallel workers, this + * ensures sane behavior in case the parent process exits before + * we do. */ rel = heap_open(rte->relid, rte->rellockmode); } diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index 006a3d1772..9f3d54d512 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -2353,6 +2353,7 @@ _copyRangeTblEntry(const RangeTblEntry *from) COPY_SCALAR_FIELD(rtekind); COPY_SCALAR_FIELD(relid); COPY_SCALAR_FIELD(relkind); + COPY_SCALAR_FIELD(delaylock); COPY_SCALAR_FIELD(rellockmode); COPY_NODE_FIELD(tablesample); COPY_NODE_FIELD(subquery); diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c index 133df1b364..84b692080e 100644 --- a/src/backend/nodes/equalfuncs.c +++ b/src/backend/nodes/equalfuncs.c @@ -2629,6 +2629,7 @@ _equalRangeTblEntry(const RangeTblEntry *a, const RangeTblEntry *b) COMPARE_SCALAR_FIELD(rtekind); COMPARE_SCALAR_FIELD(relid); COMPARE_SCALAR_FIELD(relkind); + COMPARE_SCALAR_FIELD(delaylock); COMPARE_SCALAR_FIELD(rellockmode); COMPARE_NODE_FIELD(tablesample); COMPARE_NODE_FIELD(subquery); diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index 0fde876c77..2180c59547 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -3020,6 +3020,7 @@ _outRangeTblEntry(StringInfo str, const RangeTblEntry *node) case RTE_RELATION: WRITE_OID_FIELD(relid); WRITE_CHAR_FIELD(relkind); + WRITE_BOOL_FIELD(delaylock); WRITE_INT_FIELD(rellockmode); WRITE_NODE_FIELD(tablesample); break; diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c index ec6f2569ab..ec4aafdf0b 100644 --- a/src/backend/nodes/readfuncs.c +++ b/src/backend/nodes/readfuncs.c @@ -1362,6 +1362,7 @@ _readRangeTblEntry(void) case RTE_RELATION: READ_OID_FIELD(relid); READ_CHAR_FIELD(relkind); + READ_BOOL_FIELD(delaylock); READ_INT_FIELD(rellockmode); READ_NODE_FIELD(tablesample); break; diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index b849ae03b8..ba39d965f8 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -6055,6 +6055,7 @@ plan_cluster_use_sort(Oid tableOid, Oid indexOid) rte->rtekind = RTE_RELATION; rte->relid = tableOid; rte->relkind = RELKIND_RELATION; /* Don't be too picky. */ + rte->delaylock = false; rte->rellockmode = AccessShareLock; rte->lateral = false; rte->inh = false; @@ -6178,6 +6179,7 @@ plan_create_index_workers(Oid tableOid, Oid indexOid) rte->rtekind = RTE_RELATION; rte->relid = tableOid; rte->relkind = RELKIND_RELATION; /* Don't be too picky. */ + rte->delaylock = false; rte->rellockmode = AccessShareLock; rte->lateral = false; rte->inh = true; diff --git a/src/backend/optimizer/util/inherit.c b/src/backend/optimizer/util/inherit.c index 350e6afe27..3daf2511f2 100644 --- a/src/backend/optimizer/util/inherit.c +++ b/src/backend/optimizer/util/inherit.c @@ -362,6 +362,17 @@ expand_single_inheritance_child(PlannerInfo *root, RangeTblEntry *parentrte, *childrte_p = childrte; childrte->relid = childOID; childrte->relkind = childrel->rd_rel->relkind; + + /* + * For leaf partitions, we've no need to obtain the lock on the relation + * during query execution until the partition is first required. This can + * drastically reduce the number of partitions we must lock when many + * partitions are run-time pruned. + */ + childrte->delaylock = (childOID != parentOID && + parentrte->relkind == RELKIND_PARTITIONED_TABLE && + childrte->relkind != RELKIND_PARTITIONED_TABLE); + /* A partitioned child will need to be expanded further. */ if (childOID != parentOID && childrte->relkind == RELKIND_PARTITIONED_TABLE) diff --git a/src/backend/parser/parse_relation.c b/src/backend/parser/parse_relation.c index dfbc1cc499..eb277928b4 100644 --- a/src/backend/parser/parse_relation.c +++ b/src/backend/parser/parse_relation.c @@ -1224,6 +1224,7 @@ addRangeTableEntry(ParseState *pstate, rel = parserOpenTable(pstate, relation, lockmode); rte->relid = RelationGetRelid(rel); rte->relkind = rel->rd_rel->relkind; + rte->delaylock = false; rte->rellockmode = lockmode; /* @@ -1302,6 +1303,7 @@ addRangeTableEntryForRelation(ParseState *pstate, rte->alias = alias; rte->relid = RelationGetRelid(rel); rte->relkind = rel->rd_rel->relkind; + rte->delaylock = false; rte->rellockmode = lockmode; /* diff --git a/src/backend/replication/logical/worker.c b/src/backend/replication/logical/worker.c index f5d622193c..99279b9a49 100644 --- a/src/backend/replication/logical/worker.c +++ b/src/backend/replication/logical/worker.c @@ -199,6 +199,7 @@ create_estate_for_relation(LogicalRepRelMapEntry *rel) rte->rtekind = RTE_RELATION; rte->relid = RelationGetRelid(rel->localrel); rte->relkind = rel->localrel->rd_rel->relkind; + rte->delaylock = false; rte->rellockmode = AccessShareLock; ExecInitRangeTable(estate, list_make1(rte)); diff --git a/src/backend/rewrite/rewriteHandler.c b/src/backend/rewrite/rewriteHandler.c index c7a5e630b7..9af78c237f 100644 --- a/src/backend/rewrite/rewriteHandler.c +++ b/src/backend/rewrite/rewriteHandler.c @@ -1595,6 +1595,7 @@ ApplyRetrieveRule(Query *parsetree, /* Clear fields that should not be set in a subquery RTE */ rte->relid = InvalidOid; rte->relkind = 0; + rte->delaylock = false; rte->rellockmode = 0; rte->tablesample = NULL; rte->inh = false; /* must not be set for a subquery */ diff --git a/src/backend/utils/adt/ri_triggers.c b/src/backend/utils/adt/ri_triggers.c index 590df56a0a..c6fd4098ef 100644 --- a/src/backend/utils/adt/ri_triggers.c +++ b/src/backend/utils/adt/ri_triggers.c @@ -1730,6 +1730,7 @@ RI_Initial_Check(Trigger *trigger, Relation fk_rel, Relation pk_rel) pkrte->rtekind = RTE_RELATION; pkrte->relid = RelationGetRelid(pk_rel); pkrte->relkind = pk_rel->rd_rel->relkind; + pkrte->delaylock = false; pkrte->rellockmode = AccessShareLock; pkrte->requiredPerms = ACL_SELECT; @@ -1737,6 +1738,7 @@ RI_Initial_Check(Trigger *trigger, Relation fk_rel, Relation pk_rel) fkrte->rtekind = RTE_RELATION; fkrte->relid = RelationGetRelid(fk_rel); fkrte->relkind = fk_rel->rd_rel->relkind; + fkrte->delaylock = false; fkrte->rellockmode = AccessShareLock; fkrte->requiredPerms = ACL_SELECT; diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c index 77811f6818..a013f3c19c 100644 --- a/src/backend/utils/adt/ruleutils.c +++ b/src/backend/utils/adt/ruleutils.c @@ -1002,6 +1002,7 @@ pg_get_triggerdef_worker(Oid trigid, bool pretty) oldrte->rtekind = RTE_RELATION; oldrte->relid = trigrec->tgrelid; oldrte->relkind = relkind; + oldrte->delaylock = false; oldrte->rellockmode = AccessShareLock; oldrte->alias = makeAlias("old", NIL); oldrte->eref = oldrte->alias; @@ -1013,6 +1014,7 @@ pg_get_triggerdef_worker(Oid trigid, bool pretty) newrte->rtekind = RTE_RELATION; newrte->relid = trigrec->tgrelid; newrte->relkind = relkind; + newrte->delaylock = false; newrte->rellockmode = AccessShareLock; newrte->alias = makeAlias("new", NIL); newrte->eref = newrte->alias; @@ -3209,6 +3211,7 @@ deparse_context_for(const char *aliasname, Oid relid) rte->rtekind = RTE_RELATION; rte->relid = relid; rte->relkind = RELKIND_RELATION; /* no need for exactness here */ + rte->delaylock = false; rte->rellockmode = AccessShareLock; rte->alias = makeAlias(aliasname, NIL); rte->eref = rte->alias; diff --git a/src/backend/utils/cache/plancache.c b/src/backend/utils/cache/plancache.c index 3f46b5dfb2..0cfa8baca5 100644 --- a/src/backend/utils/cache/plancache.c +++ b/src/backend/utils/cache/plancache.c @@ -1593,10 +1593,17 @@ AcquireExecutorLocks(List *stmt_list, bool acquire) continue; /* - * Acquire the appropriate type of lock on each relation OID. Note - * that we don't actually try to open the rel, and hence will not - * fail if it's been dropped entirely --- we'll just transiently - * acquire a non-conflicting lock. + * delaylock relations will be locked only when they are going + * to be accessed for the first time. + */ + if (rte->delaylock) + continue; + + /* + * Otherwise, acquire the appropriate type of lock on the + * relation's OID. Note that we don't actually try to open the + * rel, and hence will not fail if it's been dropped entirely --- + * we'll just transiently acquire a non-conflicting lock. */ if (acquire) LockRelationOid(rte->relid, rte->rellockmode); diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h index 27782fed6c..e6e57c644c 100644 --- a/src/include/nodes/parsenodes.h +++ b/src/include/nodes/parsenodes.h @@ -986,6 +986,8 @@ typedef struct RangeTblEntry */ Oid relid; /* OID of the relation */ char relkind; /* relation kind (see pg_class.relkind) */ + bool delaylock; /* delay locking until executor needs to + * access this relation */ int rellockmode; /* lock level that query requires on the rel */ struct TableSampleClause *tablesample; /* sampling info, or NULL */ -- 2.16.2.windows.1