diff --git a/src/backend/access/transam/parallel.c b/src/backend/access/transam/parallel.c index d3431a7..987709b 100644 --- a/src/backend/access/transam/parallel.c +++ b/src/backend/access/transam/parallel.c @@ -538,8 +538,16 @@ LaunchParallelWorkers(ParallelContext *pcxt) void WaitForParallelWorkersToFinish(ParallelContext *pcxt) { + bool *alive_workers; + bool *dead_workers; + int nworkers = pcxt->nworkers_launched; + + alive_workers = (bool *) palloc0(nworkers * sizeof(bool)); + dead_workers = (bool *) palloc0(nworkers * sizeof(bool)); + for (;;) { + BgwHandleStatus status; bool anyone_alive = false; int i; @@ -550,9 +558,32 @@ WaitForParallelWorkersToFinish(ParallelContext *pcxt) */ CHECK_FOR_INTERRUPTS(); - for (i = 0; i < pcxt->nworkers_launched; ++i) + for (i = 0; i < pcxt->nworkers; ++i) { - if (pcxt->worker[i].error_mqh != NULL) + pid_t pid; + + if (pcxt->worker == NULL || pcxt->worker[i].bgwhandle == NULL) + continue; + + /* + * Check for unexpected worker death. This will ensure that if + * the postmaster failed to start the worker, then we don't wait + * for it indefinitely. For workers that are known to be + * launched, we can rely on their error queue being freed once + * they exit. + */ + if (!alive_workers[i]) + { + status = GetBackgroundWorkerPid(pcxt->worker[i].bgwhandle, + &pid); + if (status == BGWH_STOPPED) + dead_workers[i] = true; + if (status != BGWH_NOT_YET_STARTED) + alive_workers[i] = true; + } + + if (pcxt->worker[i].error_mqh != NULL && + !dead_workers[i]) { anyone_alive = true; break; @@ -567,6 +598,10 @@ WaitForParallelWorkersToFinish(ParallelContext *pcxt) ResetLatch(MyLatch); } + /* be tidy */ + pfree(alive_workers); + pfree(dead_workers); + if (pcxt->toc != NULL) { FixedParallelState *fps;