diff --git a/src/backend/commands/createas.c b/src/backend/commands/createas.c index 36e3d44aad..60a205942c 100644 --- a/src/backend/commands/createas.c +++ b/src/backend/commands/createas.c @@ -518,6 +518,7 @@ intorel_startup(DestReceiver *self, int operation, TupleDesc typeinfo) rte->relid = intoRelationAddr.objectId; rte->relkind = relkind; rte->rellockmode = RowExclusiveLock; + rte->idxlockmode = RowExclusiveLock; rte->requiredPerms = ACL_INSERT; for (attnum = 1; attnum <= intoRelationDesc->rd_att->natts; attnum++) diff --git a/src/backend/executor/nodeBitmapIndexscan.c b/src/backend/executor/nodeBitmapIndexscan.c index bd837d3cd8..0d9b247bb4 100644 --- a/src/backend/executor/nodeBitmapIndexscan.c +++ b/src/backend/executor/nodeBitmapIndexscan.c @@ -211,7 +211,7 @@ BitmapIndexScanState * ExecInitBitmapIndexScan(BitmapIndexScan *node, EState *estate, int eflags) { BitmapIndexScanState *indexstate; - bool relistarget; + LOCKMODE lockmode; /* check for unsupported flags */ Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK))); @@ -260,16 +260,9 @@ ExecInitBitmapIndexScan(BitmapIndexScan *node, EState *estate, int eflags) if (eflags & EXEC_FLAG_EXPLAIN_ONLY) return indexstate; - /* - * Open the index relation. - * - * If the parent table is one of the target relations of the query, then - * InitPlan already opened and write-locked the index, so we can avoid - * taking another lock here. Otherwise we need a normal reader's lock. - */ - relistarget = ExecRelationIsTargetRelation(estate, node->scan.scanrelid); - indexstate->biss_RelationDesc = index_open(node->indexid, - relistarget ? NoLock : AccessShareLock); + /* Open the index relation. */ + lockmode = exec_rt_fetch(node->scan.scanrelid, estate)->idxlockmode; + indexstate->biss_RelationDesc = index_open(node->indexid, lockmode); /* * Initialize index-specific scan state diff --git a/src/backend/executor/nodeIndexonlyscan.c b/src/backend/executor/nodeIndexonlyscan.c index 2d954b722a..0c74509825 100644 --- a/src/backend/executor/nodeIndexonlyscan.c +++ b/src/backend/executor/nodeIndexonlyscan.c @@ -493,7 +493,7 @@ ExecInitIndexOnlyScan(IndexOnlyScan *node, EState *estate, int eflags) { IndexOnlyScanState *indexstate; Relation currentRelation; - bool relistarget; + LOCKMODE lockmode; TupleDesc tupDesc; /* @@ -556,16 +556,9 @@ ExecInitIndexOnlyScan(IndexOnlyScan *node, EState *estate, int eflags) if (eflags & EXEC_FLAG_EXPLAIN_ONLY) return indexstate; - /* - * Open the index relation. - * - * If the parent table is one of the target relations of the query, then - * InitPlan already opened and write-locked the index, so we can avoid - * taking another lock here. Otherwise we need a normal reader's lock. - */ - relistarget = ExecRelationIsTargetRelation(estate, node->scan.scanrelid); - indexstate->ioss_RelationDesc = index_open(node->indexid, - relistarget ? NoLock : AccessShareLock); + /* Open the index relation. */ + lockmode = exec_rt_fetch(node->scan.scanrelid, estate)->idxlockmode; + indexstate->ioss_RelationDesc = index_open(node->indexid, lockmode); /* * Initialize index-specific scan state diff --git a/src/backend/executor/nodeIndexscan.c b/src/backend/executor/nodeIndexscan.c index 8f39cc2b6b..0dca63ccd5 100644 --- a/src/backend/executor/nodeIndexscan.c +++ b/src/backend/executor/nodeIndexscan.c @@ -901,7 +901,7 @@ ExecInitIndexScan(IndexScan *node, EState *estate, int eflags) { IndexScanState *indexstate; Relation currentRelation; - bool relistarget; + LOCKMODE lockmode; /* * create state structure @@ -964,16 +964,9 @@ ExecInitIndexScan(IndexScan *node, EState *estate, int eflags) if (eflags & EXEC_FLAG_EXPLAIN_ONLY) return indexstate; - /* - * Open the index relation. - * - * If the parent table is one of the target relations of the query, then - * InitPlan already opened and write-locked the index, so we can avoid - * taking another lock here. Otherwise we need a normal reader's lock. - */ - relistarget = ExecRelationIsTargetRelation(estate, node->scan.scanrelid); - indexstate->iss_RelationDesc = index_open(node->indexid, - relistarget ? NoLock : AccessShareLock); + /* Open the index relation. */ + lockmode = exec_rt_fetch(node->scan.scanrelid, estate)->idxlockmode; + indexstate->iss_RelationDesc = index_open(node->indexid, lockmode); /* * Initialize index-specific scan state diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index a8a735c247..96e1ee716c 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -2356,6 +2356,7 @@ _copyRangeTblEntry(const RangeTblEntry *from) COPY_SCALAR_FIELD(relid); COPY_SCALAR_FIELD(relkind); COPY_SCALAR_FIELD(rellockmode); + COPY_SCALAR_FIELD(idxlockmode); COPY_NODE_FIELD(tablesample); COPY_NODE_FIELD(subquery); COPY_SCALAR_FIELD(security_barrier); diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c index 3cab90e9f8..ae806fb38b 100644 --- a/src/backend/nodes/equalfuncs.c +++ b/src/backend/nodes/equalfuncs.c @@ -2633,6 +2633,7 @@ _equalRangeTblEntry(const RangeTblEntry *a, const RangeTblEntry *b) COMPARE_SCALAR_FIELD(relid); COMPARE_SCALAR_FIELD(relkind); COMPARE_SCALAR_FIELD(rellockmode); + COMPARE_SCALAR_FIELD(idxlockmode); COMPARE_NODE_FIELD(tablesample); COMPARE_NODE_FIELD(subquery); COMPARE_SCALAR_FIELD(security_barrier); diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index 69179a07c3..c4246695a3 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -3034,6 +3034,7 @@ _outRangeTblEntry(StringInfo str, const RangeTblEntry *node) WRITE_OID_FIELD(relid); WRITE_CHAR_FIELD(relkind); WRITE_INT_FIELD(rellockmode); + WRITE_INT_FIELD(idxlockmode); WRITE_NODE_FIELD(tablesample); break; case RTE_SUBQUERY: diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c index 4b845b1bb7..1119cd1ebb 100644 --- a/src/backend/nodes/readfuncs.c +++ b/src/backend/nodes/readfuncs.c @@ -1365,6 +1365,7 @@ _readRangeTblEntry(void) READ_OID_FIELD(relid); READ_CHAR_FIELD(relkind); READ_INT_FIELD(rellockmode); + READ_INT_FIELD(idxlockmode); READ_NODE_FIELD(tablesample); break; case RTE_SUBQUERY: diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 9bb068a52e..2d4aad4628 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -128,6 +128,7 @@ typedef struct } WindowClauseSortData; /* Local functions */ +static void finalize_lockmodes(PlannedStmt *stmt); static Node *preprocess_expression(PlannerInfo *root, Node *expr, int kind); static void preprocess_qual_conditions(PlannerInfo *root, Node *jtnode); static void inheritance_planner(PlannerInfo *root); @@ -137,6 +138,7 @@ static grouping_sets_data *preprocess_grouping_sets(PlannerInfo *root); static List *remap_to_groupclause_idx(List *groupClause, List *gsets, int *tleref_to_colnum_map); static void preprocess_rowmarks(PlannerInfo *root); +static void determine_index_lockmode(PlannerInfo *root); static double preprocess_limit(PlannerInfo *root, double tuple_fraction, int64 *offset_est, int64 *count_est); @@ -571,9 +573,132 @@ standard_planner(Query *parse, int cursorOptions, ParamListInfo boundParams) if (glob->partition_directory != NULL) DestroyPartitionDirectory(glob->partition_directory); + /* + * Determine correct lock modes for each rtable entry and the indexes + * belonging to it. + */ + finalize_lockmodes(result); + return result; } +typedef struct RelationLockmodeElem +{ + Oid relid; /* hash key -- must be first */ + LOCKMODE rellockmode; + LOCKMODE idxlockmode; +} RelationLockmodeElem; + + +/* + * finalize_lockmodes + * Process stmt's rtable and determine the strongest lock level for each + * distinct relation and upgrade weaker locks to the strongest lock level + * for that relation. Also determine the lock level required for each + * relation's indexes and set that in the rel's idxlockmode field. + */ +static void +finalize_lockmodes(PlannedStmt *stmt) +{ + List *rtable = stmt->rtable; + ListCell *lc; + RelationLockmodeElem *elem; + HTAB *htab; + HASHCTL ctl; + bool found; + bool applystrongest; + + /* + * There can't be any duplicate references to the same relation when the + * rtable has a single entry. + */ + if (list_length(rtable) < 2) + return; + + applystrongest = false; + + /* + * Determine the strongest lock level for each relation in rtable. We + * must apply the strongest lock of each relation if the same relation is + * seen more than once and the lock levels vary. This defends against + * lock upgrade hazards we might see if we obtained the weaker lock + * followed by the stronger lock level. We need only attempt this when + * there are multiple entries in the rtable. + */ + memset(&ctl, 0, sizeof(ctl)); + ctl.keysize = sizeof(Oid); + ctl.entrysize = sizeof(RelationLockmodeElem); + ctl.hcxt = CurrentMemoryContext; + + htab = hash_create("Lockmode table", list_length(rtable), + &ctl, HASH_ELEM | HASH_BLOBS | HASH_CONTEXT); + + foreach(lc, rtable) + { + RangeTblEntry *rte = lfirst(lc); + Oid reloid; + + if (rte->relkind != RELKIND_RELATION) + continue; + + reloid = rte->relid; + elem = (RelationLockmodeElem *) + hash_search(htab, &reloid, HASH_ENTER, &found); + + /* + * When we've seen this relation before and the lockmode varies from + * the last time we saw it, then mark that we need to make another + * pass over the list to apply the strongest of the seen lock modes. + */ + if (found) + { + if (elem->rellockmode != rte->rellockmode) + { + applystrongest = true; + elem->rellockmode = Max(elem->rellockmode, rte->rellockmode); + } + + if (elem->idxlockmode != rte->idxlockmode) + { + applystrongest = true; + elem->idxlockmode = Max(elem->idxlockmode, rte->rellockmode); + } + } + else + { + elem->rellockmode = rte->rellockmode; + elem->idxlockmode = rte->idxlockmode; + } + } + + /* + * If there are multiple instances of the same rel with varying lock + * strengths then set the strongest lock level to each instance of that + * relation. + */ + if (applystrongest) + { + foreach(lc, rtable) + { + RangeTblEntry *rte = lfirst(lc); + Oid reloid; + + if (rte->relkind != RELKIND_RELATION) + continue; + + reloid = rte->relid; + + elem = (RelationLockmodeElem *) + hash_search(htab, &reloid, HASH_FIND, &found); + Assert(found); + + rte->rellockmode = elem->rellockmode; + rte->idxlockmode = elem->idxlockmode; + } + } + + hash_destroy(htab); +} /*-------------------- * subquery_planner @@ -728,6 +853,13 @@ subquery_planner(PlannerGlobal *glob, Query *parse, */ preprocess_rowmarks(root); + /* + * Determine the lock level required for indexes for each RTE_RELATION. + * We must do this before calling expand_inherited_tables as we want + * child tables to inherit the same lock level as their parent. + */ + determine_index_lockmode(root); + /* * Expand any rangetable entries that are inheritance sets into "append * relations". This can add entries to the rangetable, but they must be @@ -2675,6 +2807,37 @@ select_rowmark_type(RangeTblEntry *rte, LockClauseStrength strength) } } +/* + * determine_index_lockmode + * Determine and set the idxlockmode for each entry in the rtable. + */ +static void +determine_index_lockmode(PlannerInfo *root) +{ + Query *parse = root->parse; + ListCell *lc; + Index rti; + + rti = 1; + foreach(lc, parse->rtable) + { + RangeTblEntry *rte = (RangeTblEntry *) lfirst(lc); + + /* + * Indexes of the target of an INSERT and UPDATE require a + * RowExclusiveLock. DELETE can use AccessShareLock since we don't + * modify index entries on delete. SELECT always uses + * AccessShareLock. + */ + if (rti == parse->resultRelation && parse->commandType != CMD_DELETE) + rte->idxlockmode = RowExclusiveLock; + else + rte->idxlockmode = AccessShareLock; + + rti++; + } +} + /* * preprocess_limit - do pre-estimation for LIMIT and/or OFFSET clauses * @@ -6039,6 +6202,7 @@ plan_cluster_use_sort(Oid tableOid, Oid indexOid) rte->relid = tableOid; rte->relkind = RELKIND_RELATION; /* Don't be too picky. */ rte->rellockmode = AccessShareLock; + rte->idxlockmode = AccessShareLock; rte->lateral = false; rte->inh = false; rte->inFromCl = true; @@ -6162,6 +6326,7 @@ plan_create_index_workers(Oid tableOid, Oid indexOid) rte->relid = tableOid; rte->relkind = RELKIND_RELATION; /* Don't be too picky. */ rte->rellockmode = AccessShareLock; + rte->idxlockmode = AccessShareLock; rte->lateral = false; rte->inh = true; rte->inFromCl = true; diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c index 30f4dc151b..65d4b0d742 100644 --- a/src/backend/optimizer/util/plancat.c +++ b/src/backend/optimizer/util/plancat.c @@ -110,6 +110,7 @@ void get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent, RelOptInfo *rel) { + RangeTblEntry *rte = root->simple_rte_array[rel->relid]; Index varno = rel->relid; Relation relation; bool hasindex; @@ -164,23 +165,9 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent, { List *indexoidlist; ListCell *l; - LOCKMODE lmode; indexoidlist = RelationGetIndexList(relation); - /* - * For each index, we get the same type of lock that the executor will - * need, and do not release it. This saves a couple of trips to the - * shared lock manager while not creating any real loss of - * concurrency, because no schema changes could be happening on the - * index while we hold lock on the parent rel, and neither lock type - * blocks any other kind of index operation. - */ - if (rel->relid == root->parse->resultRelation) - lmode = RowExclusiveLock; - else - lmode = AccessShareLock; - foreach(l, indexoidlist) { Oid indexoid = lfirst_oid(l); @@ -195,7 +182,7 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent, /* * Extract info from the relation descriptor for the index. */ - indexRelation = index_open(indexoid, lmode); + indexRelation = index_open(indexoid, rte->idxlockmode); index = indexRelation->rd_index; /* diff --git a/src/backend/parser/parse_relation.c b/src/backend/parser/parse_relation.c index f3b6d193aa..6601387bb7 100644 --- a/src/backend/parser/parse_relation.c +++ b/src/backend/parser/parse_relation.c @@ -1227,6 +1227,7 @@ addRangeTableEntry(ParseState *pstate, rte->relid = RelationGetRelid(rel); rte->relkind = rel->rd_rel->relkind; rte->rellockmode = lockmode; + rte->idxlockmode = 0; /* Set during planning */ /* * Build the list of effective column names using user-supplied aliases @@ -1305,6 +1306,7 @@ addRangeTableEntryForRelation(ParseState *pstate, rte->relid = RelationGetRelid(rel); rte->relkind = rel->rd_rel->relkind; rte->rellockmode = lockmode; + rte->idxlockmode = 0; /* Set during planning */ /* * Build the list of effective column names using user-supplied aliases diff --git a/src/backend/replication/logical/worker.c b/src/backend/replication/logical/worker.c index 07f4ec9055..42964eb31a 100644 --- a/src/backend/replication/logical/worker.c +++ b/src/backend/replication/logical/worker.c @@ -186,6 +186,7 @@ create_estate_for_relation(LogicalRepRelMapEntry *rel) rte->relid = RelationGetRelid(rel->localrel); rte->relkind = rel->localrel->rd_rel->relkind; rte->rellockmode = AccessShareLock; + rte->idxlockmode = AccessShareLock; ExecInitRangeTable(estate, list_make1(rte)); resultRelInfo = makeNode(ResultRelInfo); diff --git a/src/backend/rewrite/rewriteHandler.c b/src/backend/rewrite/rewriteHandler.c index 4fc50c89b9..ae4b75ad24 100644 --- a/src/backend/rewrite/rewriteHandler.c +++ b/src/backend/rewrite/rewriteHandler.c @@ -1706,6 +1706,7 @@ ApplyRetrieveRule(Query *parsetree, rte->relid = InvalidOid; rte->relkind = 0; rte->rellockmode = 0; + rte->idxlockmode = 0; rte->tablesample = NULL; rte->inh = false; /* must not be set for a subquery */ @@ -3040,6 +3041,7 @@ rewriteTargetView(Query *parsetree, Relation view) */ new_rte = base_rte; new_rte->rellockmode = RowExclusiveLock; + new_rte->idxlockmode = RowExclusiveLock; parsetree->rtable = lappend(parsetree->rtable, new_rte); new_rt_index = list_length(parsetree->rtable); diff --git a/src/backend/utils/adt/ri_triggers.c b/src/backend/utils/adt/ri_triggers.c index d715709b7c..d58ebddd9b 100644 --- a/src/backend/utils/adt/ri_triggers.c +++ b/src/backend/utils/adt/ri_triggers.c @@ -1295,6 +1295,7 @@ RI_Initial_Check(Trigger *trigger, Relation fk_rel, Relation pk_rel) pkrte->relid = RelationGetRelid(pk_rel); pkrte->relkind = pk_rel->rd_rel->relkind; pkrte->rellockmode = AccessShareLock; + pkrte->idxlockmode = AccessShareLock; pkrte->requiredPerms = ACL_SELECT; fkrte = makeNode(RangeTblEntry); @@ -1302,6 +1303,7 @@ RI_Initial_Check(Trigger *trigger, Relation fk_rel, Relation pk_rel) fkrte->relid = RelationGetRelid(fk_rel); fkrte->relkind = fk_rel->rd_rel->relkind; fkrte->rellockmode = AccessShareLock; + fkrte->idxlockmode = AccessShareLock; fkrte->requiredPerms = ACL_SELECT; for (int i = 0; i < riinfo->nkeys; i++) diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c index 85055bbb95..3197b04298 100644 --- a/src/backend/utils/adt/ruleutils.c +++ b/src/backend/utils/adt/ruleutils.c @@ -1004,6 +1004,7 @@ pg_get_triggerdef_worker(Oid trigid, bool pretty) oldrte->relid = trigrec->tgrelid; oldrte->relkind = relkind; oldrte->rellockmode = AccessShareLock; + oldrte->idxlockmode = AccessShareLock; oldrte->alias = makeAlias("old", NIL); oldrte->eref = oldrte->alias; oldrte->lateral = false; @@ -1015,6 +1016,7 @@ pg_get_triggerdef_worker(Oid trigid, bool pretty) newrte->relid = trigrec->tgrelid; newrte->relkind = relkind; newrte->rellockmode = AccessShareLock; + newrte->idxlockmode = AccessShareLock; newrte->alias = makeAlias("new", NIL); newrte->eref = newrte->alias; newrte->lateral = false; @@ -3226,6 +3228,7 @@ deparse_context_for(const char *aliasname, Oid relid) rte->relid = relid; rte->relkind = RELKIND_RELATION; /* no need for exactness here */ rte->rellockmode = AccessShareLock; + rte->idxlockmode = AccessShareLock; rte->alias = makeAlias(aliasname, NIL); rte->eref = rte->alias; rte->lateral = false; diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h index fe35783359..8197a76be6 100644 --- a/src/include/nodes/parsenodes.h +++ b/src/include/nodes/parsenodes.h @@ -990,6 +990,7 @@ typedef struct RangeTblEntry Oid relid; /* OID of the relation */ char relkind; /* relation kind (see pg_class.relkind) */ int rellockmode; /* lock level that query requires on the rel */ + int idxlockmode; /* lock level required for rel's indexes */ struct TableSampleClause *tablesample; /* sampling info, or NULL */ /*