parallel index(-only) scan breaks when run without parallelism

Started by Robert Haasalmost 9 years ago2 messages
#1Robert Haas
robertmhaas@gmail.com

Amit, Rafia,

nodeIndexscan.c, unlike nodeSeqscan.c, thinks that a parallel-aware
scan will always be executed in parallel mode. But that's not true:
an Execute message with a non-zero row count could cause us to abandon
planned parallelism and execute the plan serially. I believe this
would cause a core dump. We definitely core dump with the following
small patch, which causes parallelism to always be abandoned:

diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c
index f5cd65d..fc4de48 100644
--- a/src/backend/executor/execMain.c
+++ b/src/backend/executor/execMain.c
@@ -1596,8 +1596,7 @@ ExecutePlan(EState *estate,
      * when writing into a relation, because no database changes are allowed
      * in parallel mode.
      */
-    if (numberTuples || dest->mydest == DestIntoRel)
-        use_parallel_mode = false;
+    use_parallel_mode = false;

if (use_parallel_mode)
EnterParallelMode();

I believe this defect was introduced by
5262f7a4fc44f651241d2ff1fa688dd664a34874 and that nodeIndexonlyscan.c
has the same defect as of 0414b26bac09379a4cbf1fbd847d1cee2293c5e4.

Please fix.

--
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#2Amit Kapila
amit.kapila16@gmail.com
In reply to: Robert Haas (#1)
1 attachment(s)
Re: parallel index(-only) scan breaks when run without parallelism

On Wed, Mar 8, 2017 at 12:27 AM, Robert Haas <robertmhaas@gmail.com> wrote:

Amit, Rafia,

nodeIndexscan.c, unlike nodeSeqscan.c, thinks that a parallel-aware
scan will always be executed in parallel mode. But that's not true:
an Execute message with a non-zero row count could cause us to abandon
planned parallelism and execute the plan serially.

Right, and the current code had assumed that if there is a parallel
plan then it will always enter the parallel mode. I think the fix is
quite similar to what we do in nodeSeqscan.c i.e. initialize the scan
descriptor before starting the scan if it is not already initialized.
There is an additional check required for ensuring if index runtime
keys are ready before calling index_rescan. Attached patch fixes the
problem.

--
With Regards,
Amit Kapila.
EnterpriseDB: http://www.enterprisedb.com

Attachments:

fix_scandesc_initialization_parallel_index_v1.patchapplication/octet-stream; name=fix_scandesc_initialization_parallel_index_v1.patchDownload
diff --git a/src/backend/executor/nodeIndexonlyscan.c b/src/backend/executor/nodeIndexonlyscan.c
index 4a7f39a..db7f2e1 100644
--- a/src/backend/executor/nodeIndexonlyscan.c
+++ b/src/backend/executor/nodeIndexonlyscan.c
@@ -78,6 +78,38 @@ IndexOnlyNext(IndexOnlyScanState *node)
 	econtext = node->ss.ps.ps_ExprContext;
 	slot = node->ss.ss_ScanTupleSlot;
 
+	if (scandesc == NULL)
+	{
+		/*
+		 * We reach here if the index only scan is not parallel, or if we're
+		 * executing a index only scan that was intended to be parallel
+		 * serially.
+		 */
+		scandesc = index_beginscan(node->ss.ss_currentRelation,
+								   node->ioss_RelationDesc,
+								   estate->es_snapshot,
+								   node->ioss_NumScanKeys,
+								   node->ioss_NumOrderByKeys);
+
+		node->ioss_ScanDesc = scandesc;
+
+
+		/* Set it up for index-only scan */
+		node->ioss_ScanDesc->xs_want_itup = true;
+		node->ioss_VMBuffer = InvalidBuffer;
+
+		/*
+		 * If no run-time keys to calculate or they are ready, go ahead and
+		 * pass the scankeys to the index AM.
+		 */
+		if (node->ioss_NumRuntimeKeys == 0 || node->ioss_RuntimeKeysReady)
+			index_rescan(scandesc,
+						 node->ioss_ScanKeys,
+						 node->ioss_NumScanKeys,
+						 node->ioss_OrderByKeys,
+						 node->ioss_NumOrderByKeys);
+	}
+
 	/*
 	 * OK, now that we have what we need, fetch the next tuple.
 	 */
@@ -572,34 +604,6 @@ ExecInitIndexOnlyScan(IndexOnlyScan *node, EState *estate, int eflags)
 	}
 
 	/*
-	 * Initialize scan descriptor.
-	 */
-	if (!node->scan.plan.parallel_aware)
-	{
-		indexstate->ioss_ScanDesc = index_beginscan(currentRelation,
-											   indexstate->ioss_RelationDesc,
-													estate->es_snapshot,
-												indexstate->ioss_NumScanKeys,
-											indexstate->ioss_NumOrderByKeys);
-
-
-		/* Set it up for index-only scan */
-		indexstate->ioss_ScanDesc->xs_want_itup = true;
-		indexstate->ioss_VMBuffer = InvalidBuffer;
-
-		/*
-		 * If no run-time keys to calculate, go ahead and pass the scankeys to
-		 * the index AM.
-		 */
-		if (indexstate->ioss_NumRuntimeKeys == 0)
-			index_rescan(indexstate->ioss_ScanDesc,
-						 indexstate->ioss_ScanKeys,
-						 indexstate->ioss_NumScanKeys,
-						 indexstate->ioss_OrderByKeys,
-						 indexstate->ioss_NumOrderByKeys);
-	}
-
-	/*
 	 * all done.
 	 */
 	return indexstate;
@@ -657,10 +661,10 @@ ExecIndexOnlyScanInitializeDSM(IndexOnlyScanState *node,
 	node->ioss_VMBuffer = InvalidBuffer;
 
 	/*
-	 * If no run-time keys to calculate, go ahead and pass the scankeys to
-	 * the index AM.
+	 * If no run-time keys to calculate or they are ready, go ahead and pass
+	 * the scankeys to the index AM.
 	 */
-	if (node->ioss_NumRuntimeKeys == 0)
+	if (node->ioss_NumRuntimeKeys == 0 || node->ioss_RuntimeKeysReady)
 		index_rescan(node->ioss_ScanDesc,
 					 node->ioss_ScanKeys, node->ioss_NumScanKeys,
 					 node->ioss_OrderByKeys, node->ioss_NumOrderByKeys);
@@ -687,10 +691,10 @@ ExecIndexOnlyScanInitializeWorker(IndexOnlyScanState *node, shm_toc *toc)
 	node->ioss_ScanDesc->xs_want_itup = true;
 
 	/*
-	 * If no run-time keys to calculate, go ahead and pass the scankeys to the
-	 * index AM.
+	 * If no run-time keys to calculate or they are ready, go ahead and pass
+	 * the scankeys to the index AM.
 	 */
-	if (node->ioss_NumRuntimeKeys == 0)
+	if (node->ioss_NumRuntimeKeys == 0 || node->ioss_RuntimeKeysReady)
 		index_rescan(node->ioss_ScanDesc,
 					 node->ioss_ScanKeys, node->ioss_NumScanKeys,
 					 node->ioss_OrderByKeys, node->ioss_NumOrderByKeys);
diff --git a/src/backend/executor/nodeIndexscan.c b/src/backend/executor/nodeIndexscan.c
index 0a9dfdb..cb6aff9 100644
--- a/src/backend/executor/nodeIndexscan.c
+++ b/src/backend/executor/nodeIndexscan.c
@@ -102,6 +102,30 @@ IndexNext(IndexScanState *node)
 	econtext = node->ss.ps.ps_ExprContext;
 	slot = node->ss.ss_ScanTupleSlot;
 
+	if (scandesc == NULL)
+	{
+		/*
+		 * We reach here if the index scan is not parallel, or if we're
+		 * executing a index scan that was intended to be parallel serially.
+		 */
+		scandesc = index_beginscan(node->ss.ss_currentRelation,
+								   node->iss_RelationDesc,
+								   estate->es_snapshot,
+								   node->iss_NumScanKeys,
+								   node->iss_NumOrderByKeys);
+
+		node->iss_ScanDesc = scandesc;
+
+		/*
+		 * If no run-time keys to calculate or they are ready, go ahead and
+		 * pass the scankeys to the index AM.
+		 */
+		if (node->iss_NumRuntimeKeys == 0 || node->iss_RuntimeKeysReady)
+			index_rescan(scandesc,
+						 node->iss_ScanKeys, node->iss_NumScanKeys,
+						 node->iss_OrderByKeys, node->iss_NumOrderByKeys);
+	}
+
 	/*
 	 * ok, now that we have what we need, fetch the next tuple.
 	 */
@@ -154,6 +178,7 @@ IndexNext(IndexScanState *node)
 static TupleTableSlot *
 IndexNextWithReorder(IndexScanState *node)
 {
+	EState	   *estate;
 	ExprContext *econtext;
 	IndexScanDesc scandesc;
 	HeapTuple	tuple;
@@ -164,6 +189,8 @@ IndexNextWithReorder(IndexScanState *node)
 	bool	   *lastfetched_nulls;
 	int			cmp;
 
+	estate = node->ss.ps.state;
+
 	/*
 	 * Only forward scan is supported with reordering.  Note: we can get away
 	 * with just Asserting here because the system will not try to run the
@@ -174,12 +201,36 @@ IndexNextWithReorder(IndexScanState *node)
 	 * explicitly.
 	 */
 	Assert(!ScanDirectionIsBackward(((IndexScan *) node->ss.ps.plan)->indexorderdir));
-	Assert(ScanDirectionIsForward(node->ss.ps.state->es_direction));
+	Assert(ScanDirectionIsForward(estate->es_direction));
 
 	scandesc = node->iss_ScanDesc;
 	econtext = node->ss.ps.ps_ExprContext;
 	slot = node->ss.ss_ScanTupleSlot;
 
+	if (scandesc == NULL)
+	{
+		/*
+		 * We reach here if the index scan is not parallel, or if we're
+		 * executing a index scan that was intended to be parallel serially.
+		 */
+		scandesc = index_beginscan(node->ss.ss_currentRelation,
+								   node->iss_RelationDesc,
+								   estate->es_snapshot,
+								   node->iss_NumScanKeys,
+								   node->iss_NumOrderByKeys);
+
+		node->iss_ScanDesc = scandesc;
+
+		/*
+		 * If no run-time keys to calculate or they are ready, go ahead and
+		 * pass the scankeys to the index AM.
+		 */
+		if (node->iss_NumRuntimeKeys == 0 || node->iss_RuntimeKeysReady)
+			index_rescan(scandesc,
+						 node->iss_ScanKeys, node->iss_NumScanKeys,
+						 node->iss_OrderByKeys, node->iss_NumOrderByKeys);
+	}
+
 	for (;;)
 	{
 		/*
@@ -1039,31 +1090,6 @@ ExecInitIndexScan(IndexScan *node, EState *estate, int eflags)
 	}
 
 	/*
-	 * for parallel-aware node, we initialize the scan descriptor after
-	 * initializing the shared memory for parallel execution.
-	 */
-	if (!node->scan.plan.parallel_aware)
-	{
-		/*
-		 * Initialize scan descriptor.
-		 */
-		indexstate->iss_ScanDesc = index_beginscan(currentRelation,
-												indexstate->iss_RelationDesc,
-												   estate->es_snapshot,
-												 indexstate->iss_NumScanKeys,
-											 indexstate->iss_NumOrderByKeys);
-
-		/*
-		 * If no run-time keys to calculate, go ahead and pass the scankeys to
-		 * the index AM.
-		 */
-		if (indexstate->iss_NumRuntimeKeys == 0)
-			index_rescan(indexstate->iss_ScanDesc,
-					   indexstate->iss_ScanKeys, indexstate->iss_NumScanKeys,
-				indexstate->iss_OrderByKeys, indexstate->iss_NumOrderByKeys);
-	}
-
-	/*
 	 * all done.
 	 */
 	return indexstate;
@@ -1674,10 +1700,10 @@ ExecIndexScanInitializeDSM(IndexScanState *node,
 								 piscan);
 
 	/*
-	 * If no run-time keys to calculate, go ahead and pass the scankeys to the
-	 * index AM.
+	 * If no run-time keys to calculate or they are ready, go ahead and pass
+	 * the scankeys to the index AM.
 	 */
-	if (node->iss_NumRuntimeKeys == 0)
+	if (node->iss_NumRuntimeKeys == 0 || node->iss_RuntimeKeysReady)
 		index_rescan(node->iss_ScanDesc,
 					 node->iss_ScanKeys, node->iss_NumScanKeys,
 					 node->iss_OrderByKeys, node->iss_NumOrderByKeys);
@@ -1703,10 +1729,10 @@ ExecIndexScanInitializeWorker(IndexScanState *node, shm_toc *toc)
 								 piscan);
 
 	/*
-	 * If no run-time keys to calculate, go ahead and pass the scankeys to the
-	 * index AM.
+	 * If no run-time keys to calculate or they are ready, go ahead and pass
+	 * the scankeys to the index AM.
 	 */
-	if (node->iss_NumRuntimeKeys == 0)
+	if (node->iss_NumRuntimeKeys == 0 || node->iss_RuntimeKeysReady)
 		index_rescan(node->iss_ScanDesc,
 					 node->iss_ScanKeys, node->iss_NumScanKeys,
 					 node->iss_OrderByKeys, node->iss_NumOrderByKeys);