From 510af00802c04b8d6d3982069c96082572a76c72 Mon Sep 17 00:00:00 2001
From: Tomas Vondra <tomas.vondra@postgresql.org>
Date: Thu, 2 May 2024 15:21:26 +0200
Subject: [PATCH v20240505 1/8] Allow parallel create for GIN indexes

Add support for parallel create of GIN indexes, using an approach and
code very similar to the one used by BRIN indexes.

Each worker reads a subset of the table (from a parallel scan), and
accumulated index entries in memory. But instead of writing the results
into the index (after hitting the memory limit), the data are written
to a shared tuplesort (and sorted by index key).

The leader then reads data from the tuplesort, and combines them into
entries that get inserted into the index.
---
 src/backend/access/gin/ginbulk.c           |    7 +
 src/backend/access/gin/gininsert.c         | 1340 +++++++++++++++++++-
 src/backend/access/gin/ginutil.c           |    2 +-
 src/backend/access/transam/parallel.c      |    4 +
 src/backend/utils/sort/tuplesortvariants.c |  154 +++
 src/include/access/gin.h                   |    4 +
 src/include/access/gin_tuple.h             |   29 +
 src/include/utils/tuplesort.h              |    6 +
 src/tools/pgindent/typedefs.list           |    4 +
 9 files changed, 1534 insertions(+), 16 deletions(-)
 create mode 100644 src/include/access/gin_tuple.h

diff --git a/src/backend/access/gin/ginbulk.c b/src/backend/access/gin/ginbulk.c
index a522801c2f7..12eeff04a6c 100644
--- a/src/backend/access/gin/ginbulk.c
+++ b/src/backend/access/gin/ginbulk.c
@@ -153,6 +153,13 @@ ginInsertBAEntry(BuildAccumulator *accum,
 	GinEntryAccumulator *ea;
 	bool		isNew;
 
+	/*
+	 * FIXME prevents writes of uninitialized bytes reported by valgrind in
+	 * writetup (likely that build_gin_tuple copies some fields that are only
+	 * initialized for a certain category, or something similar)
+	 */
+	memset(&eatmp, 0, sizeof(GinEntryAccumulator));
+
 	/*
 	 * For the moment, fill only the fields of eatmp that will be looked at by
 	 * cmpEntryAccumulator or ginCombineData.
diff --git a/src/backend/access/gin/gininsert.c b/src/backend/access/gin/gininsert.c
index 71f38be90c3..b353e155fc6 100644
--- a/src/backend/access/gin/gininsert.c
+++ b/src/backend/access/gin/gininsert.c
@@ -15,14 +15,124 @@
 #include "postgres.h"
 
 #include "access/gin_private.h"
+#include "access/gin_tuple.h"
+#include "access/table.h"
 #include "access/tableam.h"
 #include "access/xloginsert.h"
+#include "catalog/index.h"
+#include "commands/vacuum.h"
 #include "miscadmin.h"
 #include "nodes/execnodes.h"
+#include "pgstat.h"
 #include "storage/bufmgr.h"
 #include "storage/predicate.h"
+#include "tcop/tcopprot.h"		/* pgrminclude ignore */
+#include "utils/datum.h"
 #include "utils/memutils.h"
 #include "utils/rel.h"
+#include "utils/builtins.h"
+
+
+/* Magic numbers for parallel state sharing */
+#define PARALLEL_KEY_GIN_SHARED			UINT64CONST(0xB000000000000001)
+#define PARALLEL_KEY_TUPLESORT			UINT64CONST(0xB000000000000002)
+#define PARALLEL_KEY_QUERY_TEXT			UINT64CONST(0xB000000000000003)
+#define PARALLEL_KEY_WAL_USAGE			UINT64CONST(0xB000000000000004)
+#define PARALLEL_KEY_BUFFER_USAGE		UINT64CONST(0xB000000000000005)
+
+/*
+ * Status for index builds performed in parallel.  This is allocated in a
+ * dynamic shared memory segment.
+ */
+typedef struct GinShared
+{
+	/*
+	 * These fields are not modified during the build.  They primarily exist
+	 * for the benefit of worker processes that need to create state
+	 * corresponding to that used by the leader.
+	 */
+	Oid			heaprelid;
+	Oid			indexrelid;
+	bool		isconcurrent;
+	int			scantuplesortstates;
+
+	/*
+	 * workersdonecv is used to monitor the progress of workers.  All parallel
+	 * participants must indicate that they are done before leader can use
+	 * results built by the workers (and before leader can write the data into
+	 * the index).
+	 */
+	ConditionVariable workersdonecv;
+
+	/*
+	 * mutex protects all fields before heapdesc.
+	 *
+	 * These fields contain status information of interest to GIN index builds
+	 * that must work just the same when an index is built in parallel.
+	 */
+	slock_t		mutex;
+
+	/*
+	 * Mutable state that is maintained by workers, and reported back to
+	 * leader at end of the scans.
+	 *
+	 * nparticipantsdone is number of worker processes finished.
+	 *
+	 * reltuples is the total number of input heap tuples.
+	 *
+	 * indtuples is the total number of tuples that made it into the index.
+	 */
+	int			nparticipantsdone;
+	double		reltuples;
+	double		indtuples;
+
+	/*
+	 * ParallelTableScanDescData data follows. Can't directly embed here, as
+	 * implementations of the parallel table scan desc interface might need
+	 * stronger alignment.
+	 */
+} GinShared;
+
+/*
+ * Return pointer to a GinShared's parallel table scan.
+ *
+ * c.f. shm_toc_allocate as to why BUFFERALIGN is used, rather than just
+ * MAXALIGN.
+ */
+#define ParallelTableScanFromGinShared(shared) \
+	(ParallelTableScanDesc) ((char *) (shared) + BUFFERALIGN(sizeof(GinShared)))
+
+/*
+ * Status for leader in parallel index build.
+ */
+typedef struct GinLeader
+{
+	/* parallel context itself */
+	ParallelContext *pcxt;
+
+	/*
+	 * nparticipanttuplesorts is the exact number of worker processes
+	 * successfully launched, plus one leader process if it participates as a
+	 * worker (only DISABLE_LEADER_PARTICIPATION builds avoid leader
+	 * participating as a worker).
+	 */
+	int			nparticipanttuplesorts;
+
+	/*
+	 * Leader process convenience pointers to shared state (leader avoids TOC
+	 * lookups).
+	 *
+	 * GinShared is the shared state for entire build.  sharedsort is the
+	 * shared, tuplesort-managed state passed to each process tuplesort.
+	 * snapshot is the snapshot used by the scan iff an MVCC snapshot is
+	 * required.
+	 */
+	GinShared  *ginshared;
+	Sharedsort *sharedsort;
+	Snapshot	snapshot;
+	WalUsage   *walusage;
+	BufferUsage *bufferusage;
+} GinLeader;
 
 typedef struct
 {
@@ -32,9 +142,49 @@ typedef struct
 	MemoryContext tmpCtx;
 	MemoryContext funcCtx;
 	BuildAccumulator accum;
+
+	/* FIXME likely duplicate with indtuples */
+	double		bs_numtuples;
+	double		bs_reltuples;
+
+	/*
+	 * bs_leader is only present when a parallel index build is performed, and
+	 * only in the leader process. (Actually, only the leader process has a
+	 * GinBuildState.)
+	 */
+	GinLeader  *bs_leader;
+	int			bs_worker_id;
+
+	/*
+	 * The sortstate is used by workers (including the leader). It has to be
+	 * part of the build state, because that's the only thing passed to the
+	 * build callback etc.
+	 */
+	Tuplesortstate *bs_sortstate;
 } GinBuildState;
 
 
+/* parallel index builds */
+static void _gin_begin_parallel(GinBuildState *buildstate, Relation heap, Relation index,
+								bool isconcurrent, int request);
+static void _gin_end_parallel(GinLeader *ginleader, GinBuildState *state);
+static Size _gin_parallel_estimate_shared(Relation heap, Snapshot snapshot);
+static double _gin_parallel_heapscan(GinBuildState *buildstate);
+static double _gin_parallel_merge(GinBuildState *buildstate);
+static void _gin_leader_participate_as_worker(GinBuildState *buildstate,
+											  Relation heap, Relation index);
+static void _gin_parallel_scan_and_build(GinBuildState *buildstate,
+										 GinShared *ginshared,
+										 Sharedsort *sharedsort,
+										 Relation heap, Relation index,
+										 int sortmem, bool progress);
+
+static Datum _gin_parse_tuple(GinTuple *a, ItemPointerData **items);
+static GinTuple *_gin_build_tuple(OffsetNumber attrnum, unsigned char category,
+								  Datum key, int16 typlen, bool typbyval,
+								  ItemPointerData *items, uint32 nitems,
+								  Size *len);
+
 /*
  * Adds array of item pointers to tuple's posting list, or
  * creates posting tree and tuple pointing to tree in case
@@ -313,12 +463,95 @@ ginBuildCallback(Relation index, ItemPointer tid, Datum *values,
 	MemoryContextSwitchTo(oldCtx);
 }
 
+static void
+ginBuildCallbackParallel(Relation index, ItemPointer tid, Datum *values,
+						 bool *isnull, bool tupleIsAlive, void *state)
+{
+	GinBuildState *buildstate = (GinBuildState *) state;
+	MemoryContext oldCtx;
+	int			i;
+
+	oldCtx = MemoryContextSwitchTo(buildstate->tmpCtx);
+
+	for (i = 0; i < buildstate->ginstate.origTupdesc->natts; i++)
+		ginHeapTupleBulkInsert(buildstate, (OffsetNumber) (i + 1),
+							   values[i], isnull[i], tid);
+
+	/*
+	 * XXX idea - Instead of writing the entries directly into the shared
+	 * tuplesort, write it into a local one, do the sort in the worker, and
+	 * combine the results. For large tables with many different keys that's
+	 * going to work better than the current approach where we don't get many
+	 * matches in work_mem (maybe this should use 32MB, which is what we use
+	 * when planning, but even that may not be great). Which means we are
+	 * likely to have many entries with a single TID, forcing the leader to do
+	 * a qsort() when merging the data, often amounting to ~50% of the serial
+	 * part. By doing the qsort() in a worker, leader then can do a mergesort
+	 * (likely cheaper). Also, it means the amount of data worker->leader is
+	 * going to be lower thanks to deduplication.
+	 *
+	 * Disadvantage: It needs more disk space, possibly up to 2x, because each
+	 * worker creates a tuplestore, then "transforms it" into the shared
+	 * tuplestore (hopefully less data, but not guaranteed).
+	 *
+	 * It's however possible to partition the data into multiple tuplesorts
+	 * per worker (by hashing). We don't need perfect sorting, and we can even
+	 * live with "equal" keys having multiple hashes (if there are multiple
+	 * binary representations of the value).
+	 */
+
+	/*
+	 * If we've maxed out our available memory, dump everything to the
+	 * tuplesort
+	 *
+	 * XXX probably should use 32MB, not work_mem, as used during planning?
+	 */
+	if (buildstate->accum.allocatedMemory >= (Size) work_mem * 1024L)
+	{
+		ItemPointerData *list;
+		Datum		key;
+		GinNullCategory category;
+		uint32		nlist;
+		OffsetNumber attnum;
+		TupleDesc	tdesc = RelationGetDescr(index);
+
+		ginBeginBAScan(&buildstate->accum);
+		while ((list = ginGetBAEntry(&buildstate->accum,
+									 &attnum, &key, &category, &nlist)) != NULL)
+		{
+			/* information about the key */
+			Form_pg_attribute attr = TupleDescAttr(tdesc, (attnum - 1));
+
+			/* GIN tuple and tuple length */
+			GinTuple   *tup;
+			Size		tuplen;
+
+			/* there could be many entries, so be willing to abort here */
+			CHECK_FOR_INTERRUPTS();
+
+			tup = _gin_build_tuple(attnum, category,
+								   key, attr->attlen, attr->attbyval,
+								   list, nlist, &tuplen);
+
+			tuplesort_putgintuple(buildstate->bs_sortstate, tup, tuplen);
+
+			pfree(tup);
+		}
+
+		MemoryContextReset(buildstate->tmpCtx);
+		ginInitBA(&buildstate->accum);
+	}
+
+	MemoryContextSwitchTo(oldCtx);
+}
+
 IndexBuildResult *
 ginbuild(Relation heap, Relation index, IndexInfo *indexInfo)
 {
 	IndexBuildResult *result;
 	double		reltuples;
 	GinBuildState buildstate;
+	GinBuildState *state = &buildstate;
 	Buffer		RootBuffer,
 				MetaBuffer;
 	ItemPointerData *list;
@@ -336,6 +569,14 @@ ginbuild(Relation heap, Relation index, IndexInfo *indexInfo)
 	buildstate.indtuples = 0;
 	memset(&buildstate.buildStats, 0, sizeof(GinStatsData));
 
+	/*
+	 * XXX Make sure to initialize a bunch of fields, not to trip valgrind.
+	 * Maybe there should be an "init" function for build state?
+	 */
+	buildstate.bs_numtuples = 0;
+	buildstate.bs_reltuples = 0;
+	buildstate.bs_leader = NULL;
+
 	/* initialize the meta page */
 	MetaBuffer = GinNewBuffer(index);
 
@@ -376,25 +617,91 @@ ginbuild(Relation heap, Relation index, IndexInfo *indexInfo)
 	ginInitBA(&buildstate.accum);
 
 	/*
-	 * Do the heap scan.  We disallow sync scan here because dataPlaceToPage
-	 * prefers to receive tuples in TID order.
+	 * Attempt to launch parallel worker scan when required
+	 *
+	 * XXX plan_create_index_workers makes the number of workers dependent on
+	 * maintenance_work_mem, requiring 32MB for each worker. That makes sense
+	 * for btree, but not for GIN, which can do with much less memory. So
+	 * maybe make that somehow less strict, optionally?
+	 */
+	if (indexInfo->ii_ParallelWorkers > 0)
+		_gin_begin_parallel(state, heap, index, indexInfo->ii_Concurrent,
+							indexInfo->ii_ParallelWorkers);
+
+
+	/*
+	 * If parallel build requested and at least one worker process was
+	 * successfully launched, set up coordination state, wait for workers to
+	 * complete. Then read all tuples from the shared tuplesort and insert
+	 * them into the index.
+	 *
+	 * In serial mode, simply scan the table and build the index one index
+	 * tuple at a time.
 	 */
-	reltuples = table_index_build_scan(heap, index, indexInfo, false, true,
-									   ginBuildCallback, (void *) &buildstate,
-									   NULL);
+	if (state->bs_leader)
+	{
+		SortCoordinate coordinate;
+
+		coordinate = (SortCoordinate) palloc0(sizeof(SortCoordinateData));
+		coordinate->isWorker = false;
+		coordinate->nParticipants =
+			state->bs_leader->nparticipanttuplesorts;
+		coordinate->sharedsort = state->bs_leader->sharedsort;
+
+		/*
+		 * Begin leader tuplesort.
+		 *
+		 * In cases where parallelism is involved, the leader receives the
+		 * same share of maintenance_work_mem as a serial sort (it is
+		 * generally treated in the same way as a serial sort once we return).
+		 * Parallel worker Tuplesortstates will have received only a fraction
+		 * of maintenance_work_mem, though.
+		 *
+		 * We rely on the lifetime of the Leader Tuplesortstate almost not
+		 * overlapping with any worker Tuplesortstate's lifetime.  There may
+		 * be some small overlap, but that's okay because we rely on leader
+		 * Tuplesortstate only allocating a small, fixed amount of memory
+		 * here. When its tuplesort_performsort() is called (by our caller),
+		 * and significant amounts of memory are likely to be used, all
+		 * workers must have already freed almost all memory held by their
+		 * Tuplesortstates (they are about to go away completely, too).  The
+		 * overall effect is that maintenance_work_mem always represents an
+		 * absolute high watermark on the amount of memory used by a CREATE
+		 * INDEX operation, regardless of the use of parallelism or any other
+		 * factor.
+		 */
+		state->bs_sortstate =
+			tuplesort_begin_index_gin(maintenance_work_mem, coordinate,
+									  TUPLESORT_NONE);
 
-	/* dump remaining entries to the index */
-	oldCtx = MemoryContextSwitchTo(buildstate.tmpCtx);
-	ginBeginBAScan(&buildstate.accum);
-	while ((list = ginGetBAEntry(&buildstate.accum,
-								 &attnum, &key, &category, &nlist)) != NULL)
+		/* scan the relation and merge per-worker results */
+		reltuples = _gin_parallel_merge(state);
+
+		_gin_end_parallel(state->bs_leader, state);
+	}
+	else						/* no parallel index build */
 	{
-		/* there could be many entries, so be willing to abort here */
-		CHECK_FOR_INTERRUPTS();
-		ginEntryInsert(&buildstate.ginstate, attnum, key, category,
-					   list, nlist, &buildstate.buildStats);
+		/*
+		 * Do the heap scan.  We disallow sync scan here because
+		 * dataPlaceToPage prefers to receive tuples in TID order.
+		 */
+		reltuples = table_index_build_scan(heap, index, indexInfo, false, true,
+										   ginBuildCallback, (void *) &buildstate,
+										   NULL);
+
+		/* dump remaining entries to the index */
+		oldCtx = MemoryContextSwitchTo(buildstate.tmpCtx);
+		ginBeginBAScan(&buildstate.accum);
+		while ((list = ginGetBAEntry(&buildstate.accum,
+									 &attnum, &key, &category, &nlist)) != NULL)
+		{
+			/* there could be many entries, so be willing to abort here */
+			CHECK_FOR_INTERRUPTS();
+			ginEntryInsert(&buildstate.ginstate, attnum, key, category,
+						   list, nlist, &buildstate.buildStats);
+		}
+		MemoryContextSwitchTo(oldCtx);
 	}
-	MemoryContextSwitchTo(oldCtx);
 
 	MemoryContextDelete(buildstate.funcCtx);
 	MemoryContextDelete(buildstate.tmpCtx);
@@ -534,3 +841,1006 @@ gininsert(Relation index, Datum *values, bool *isnull,
 
 	return false;
 }
+
+/*
+ * Create parallel context, and launch workers for leader.
+ *
+ * buildstate argument should be initialized (with the exception of the
+ * tuplesort states, which may later be created based on shared
+ * state initially set up here).
+ *
+ * isconcurrent indicates if operation is CREATE INDEX CONCURRENTLY.
+ *
+ * request is the target number of parallel worker processes to launch.
+ *
+ * Sets buildstate's GinLeader, which caller must use to shut down parallel
+ * mode by passing it to _gin_end_parallel() at the very end of its index
+ * build.  If not even a single worker process can be launched, this is
+ * never set, and caller should proceed with a serial index build.
+ */
+static void
+_gin_begin_parallel(GinBuildState *buildstate, Relation heap, Relation index,
+					bool isconcurrent, int request)
+{
+	ParallelContext *pcxt;
+	int			scantuplesortstates;
+	Snapshot	snapshot;
+	Size		estginshared;
+	Size		estsort;
+	GinShared  *ginshared;
+	Sharedsort *sharedsort;
+	GinLeader  *ginleader = (GinLeader *) palloc0(sizeof(GinLeader));
+	WalUsage   *walusage;
+	BufferUsage *bufferusage;
+	bool		leaderparticipates = true;
+	int			querylen;
+
+#ifdef DISABLE_LEADER_PARTICIPATION
+	leaderparticipates = false;
+#endif
+
+	/*
+	 * Enter parallel mode, and create context for parallel build of gin index
+	 */
+	EnterParallelMode();
+	Assert(request > 0);
+	pcxt = CreateParallelContext("postgres", "_gin_parallel_build_main",
+								 request);
+
+	scantuplesortstates = leaderparticipates ? request + 1 : request;
+
+	/*
+	 * Prepare for scan of the base relation.  In a normal index build, we use
+	 * SnapshotAny because we must retrieve all tuples and do our own time
+	 * qual checks (because we have to index RECENTLY_DEAD tuples).  In a
+	 * concurrent build, we take a regular MVCC snapshot and index whatever's
+	 * live according to that.
+	 */
+	if (!isconcurrent)
+		snapshot = SnapshotAny;
+	else
+		snapshot = RegisterSnapshot(GetTransactionSnapshot());
+
+	/*
+	 * Estimate size for our own PARALLEL_KEY_GIN_SHARED workspace.
+	 */
+	estginshared = _gin_parallel_estimate_shared(heap, snapshot);
+	shm_toc_estimate_chunk(&pcxt->estimator, estginshared);
+	estsort = tuplesort_estimate_shared(scantuplesortstates);
+	shm_toc_estimate_chunk(&pcxt->estimator, estsort);
+
+	shm_toc_estimate_keys(&pcxt->estimator, 2);
+
+	/*
+	 * Estimate space for WalUsage and BufferUsage -- PARALLEL_KEY_WAL_USAGE
+	 * and PARALLEL_KEY_BUFFER_USAGE.
+	 *
+	 * If there are no extensions loaded that care, we could skip this.  We
+	 * have no way of knowing whether anyone's looking at pgWalUsage or
+	 * pgBufferUsage, so do it unconditionally.
+	 */
+	shm_toc_estimate_chunk(&pcxt->estimator,
+						   mul_size(sizeof(WalUsage), pcxt->nworkers));
+	shm_toc_estimate_keys(&pcxt->estimator, 1);
+	shm_toc_estimate_chunk(&pcxt->estimator,
+						   mul_size(sizeof(BufferUsage), pcxt->nworkers));
+	shm_toc_estimate_keys(&pcxt->estimator, 1);
+
+	/* Finally, estimate PARALLEL_KEY_QUERY_TEXT space */
+	if (debug_query_string)
+	{
+		querylen = strlen(debug_query_string);
+		shm_toc_estimate_chunk(&pcxt->estimator, querylen + 1);
+		shm_toc_estimate_keys(&pcxt->estimator, 1);
+	}
+	else
+		querylen = 0;			/* keep compiler quiet */
+
+	/* Everyone's had a chance to ask for space, so now create the DSM */
+	InitializeParallelDSM(pcxt);
+
+	/* If no DSM segment was available, back out (do serial build) */
+	if (pcxt->seg == NULL)
+	{
+		if (IsMVCCSnapshot(snapshot))
+			UnregisterSnapshot(snapshot);
+		DestroyParallelContext(pcxt);
+		ExitParallelMode();
+		return;
+	}
+
+	/* Store shared build state, for which we reserved space */
+	ginshared = (GinShared *) shm_toc_allocate(pcxt->toc, estginshared);
+	/* Initialize immutable state */
+	ginshared->heaprelid = RelationGetRelid(heap);
+	ginshared->indexrelid = RelationGetRelid(index);
+	ginshared->isconcurrent = isconcurrent;
+	ginshared->scantuplesortstates = scantuplesortstates;
+
+	ConditionVariableInit(&ginshared->workersdonecv);
+	SpinLockInit(&ginshared->mutex);
+
+	/* Initialize mutable state */
+	ginshared->nparticipantsdone = 0;
+	ginshared->reltuples = 0.0;
+	ginshared->indtuples = 0.0;
+
+	table_parallelscan_initialize(heap,
+								  ParallelTableScanFromGinShared(ginshared),
+								  snapshot);
+
+	/*
+	 * Store shared tuplesort-private state, for which we reserved space.
+	 * Then, initialize opaque state using tuplesort routine.
+	 */
+	sharedsort = (Sharedsort *) shm_toc_allocate(pcxt->toc, estsort);
+	tuplesort_initialize_shared(sharedsort, scantuplesortstates,
+								pcxt->seg);
+
+	/*
+	 * Store shared tuplesort-private state, for which we reserved space.
+	 * Then, initialize opaque state using tuplesort routine.
+	 */
+	shm_toc_insert(pcxt->toc, PARALLEL_KEY_GIN_SHARED, ginshared);
+	shm_toc_insert(pcxt->toc, PARALLEL_KEY_TUPLESORT, sharedsort);
+
+	/* Store query string for workers */
+	if (debug_query_string)
+	{
+		char	   *sharedquery;
+
+		sharedquery = (char *) shm_toc_allocate(pcxt->toc, querylen + 1);
+		memcpy(sharedquery, debug_query_string, querylen + 1);
+		shm_toc_insert(pcxt->toc, PARALLEL_KEY_QUERY_TEXT, sharedquery);
+	}
+
+	/*
+	 * Allocate space for each worker's WalUsage and BufferUsage; no need to
+	 * initialize.
+	 */
+	walusage = shm_toc_allocate(pcxt->toc,
+								mul_size(sizeof(WalUsage), pcxt->nworkers));
+	shm_toc_insert(pcxt->toc, PARALLEL_KEY_WAL_USAGE, walusage);
+	bufferusage = shm_toc_allocate(pcxt->toc,
+								   mul_size(sizeof(BufferUsage), pcxt->nworkers));
+	shm_toc_insert(pcxt->toc, PARALLEL_KEY_BUFFER_USAGE, bufferusage);
+
+	/* Launch workers, saving status for leader/caller */
+	LaunchParallelWorkers(pcxt);
+	ginleader->pcxt = pcxt;
+	ginleader->nparticipanttuplesorts = pcxt->nworkers_launched;
+	if (leaderparticipates)
+		ginleader->nparticipanttuplesorts++;
+	ginleader->ginshared = ginshared;
+	ginleader->sharedsort = sharedsort;
+	ginleader->snapshot = snapshot;
+	ginleader->walusage = walusage;
+	ginleader->bufferusage = bufferusage;
+
+	/* If no workers were successfully launched, back out (do serial build) */
+	if (pcxt->nworkers_launched == 0)
+	{
+		_gin_end_parallel(ginleader, NULL);
+		return;
+	}
+
+	/* Save leader state now that it's clear build will be parallel */
+	buildstate->bs_leader = ginleader;
+
+	/* Join heap scan ourselves */
+	if (leaderparticipates)
+		_gin_leader_participate_as_worker(buildstate, heap, index);
+
+	/*
+	 * Caller needs to wait for all launched workers when we return.  Make
+	 * sure that the failure-to-start case will not hang forever.
+	 */
+	WaitForParallelWorkersToAttach(pcxt);
+}
+
+/*
+ * Shut down workers, destroy parallel context, and end parallel mode.
+ */
+static void
+_gin_end_parallel(GinLeader *ginleader, GinBuildState *state)
+{
+	int			i;
+
+	/* Shutdown worker processes */
+	WaitForParallelWorkersToFinish(ginleader->pcxt);
+
+	/*
+	 * Next, accumulate WAL usage.  (This must wait for the workers to finish,
+	 * or we might get incomplete data.)
+	 */
+	for (i = 0; i < ginleader->pcxt->nworkers_launched; i++)
+		InstrAccumParallelQuery(&ginleader->bufferusage[i], &ginleader->walusage[i]);
+
+	/* Free last reference to MVCC snapshot, if one was used */
+	if (IsMVCCSnapshot(ginleader->snapshot))
+		UnregisterSnapshot(ginleader->snapshot);
+	DestroyParallelContext(ginleader->pcxt);
+	ExitParallelMode();
+}
+
+/*
+ * Within leader, wait for end of heap scan.
+ *
+ * When called, parallel heap scan started by _gin_begin_parallel() will
+ * already be underway within worker processes (when leader participates
+ * as a worker, we should end up here just as workers are finishing).
+ *
+ * Returns the total number of heap tuples scanned.
+ */
+static double
+_gin_parallel_heapscan(GinBuildState *state)
+{
+	GinShared  *ginshared = state->bs_leader->ginshared;
+	int			nparticipanttuplesorts;
+
+	nparticipanttuplesorts = state->bs_leader->nparticipanttuplesorts;
+	for (;;)
+	{
+		SpinLockAcquire(&ginshared->mutex);
+		if (ginshared->nparticipantsdone == nparticipanttuplesorts)
+		{
+			/* copy the data into leader state */
+			state->bs_reltuples = ginshared->reltuples;
+			state->bs_numtuples = ginshared->indtuples;
+
+			SpinLockRelease(&ginshared->mutex);
+			break;
+		}
+		SpinLockRelease(&ginshared->mutex);
+
+		ConditionVariableSleep(&ginshared->workersdonecv,
+							   WAIT_EVENT_PARALLEL_CREATE_INDEX_SCAN);
+	}
+
+	ConditionVariableCancelSleep();
+
+	return state->bs_reltuples;
+}
+
+static int
+tid_cmp(const void *a, const void *b)
+{
+	return ItemPointerCompare((ItemPointer) a, (ItemPointer) b);
+}
+
+/*
+ * State used to combine accumulate TIDs from multiple GinTuples for the same
+ * key value.
+ *
+ * XXX Similar purpose to BuildAccumulator, but much simpler.
+ */
+typedef struct GinBuffer
+{
+	OffsetNumber attnum;
+	GinNullCategory category;
+	Datum		key;			/* 0 if no key (and keylen == 0) */
+	Size		keylen;			/* number of bytes (not typlen) */
+
+	/* type info */
+	int16		typlen;
+	bool		typbyval;
+
+	/* array of TID values */
+	int			nitems;
+	int			maxitems;
+	ItemPointerData *items;
+} GinBuffer;
+
+/* XXX should do more checks */
+static void
+AssertCheckGinBuffer(GinBuffer *buffer)
+{
+#ifdef USE_ASSERT_CHECKING
+	Assert(buffer->nitems <= buffer->maxitems);
+#endif
+}
+
+static void
+AssertCheckItemPointers(ItemPointerData *items, int nitems, bool sorted)
+{
+#ifdef USE_ASSERT_CHECKING
+	for (int i = 0; i < nitems; i++)
+	{
+		Assert(ItemPointerIsValid(&items[i]));
+
+		if ((i == 0) || !sorted)
+			continue;
+
+		Assert(ItemPointerCompare(&items[i - 1], &items[i]) < 0);
+	}
+#endif
+}
+
+static GinBuffer *
+GinBufferInit(void)
+{
+	return palloc0(sizeof(GinBuffer));
+}
+
+static bool
+GinBufferIsEmpty(GinBuffer *buffer)
+{
+	return (buffer->nitems == 0);
+}
+
+/*
+ * Compare if the tuple matches the already accumulated data. Compare
+ * scalar fields first, before the actual key.
+ *
+ * XXX The key is compared using memcmp, which means that if a key has
+ * multiple binary representations, we may end up treating them as
+ * different here. But that's OK, the index will merge them anyway.
+ */
+static bool
+GinBufferKeyEquals(GinBuffer *buffer, GinTuple *tup)
+{
+	AssertCheckGinBuffer(buffer);
+
+	if (tup->attrnum != buffer->attnum)
+		return false;
+
+	/* same attribute should have the same type info */
+	Assert(tup->typbyval == buffer->typbyval);
+	Assert(tup->typlen == buffer->typlen);
+
+	if (tup->category != buffer->category)
+		return false;
+
+	if (tup->keylen != buffer->keylen)
+		return false;
+
+	/*
+	 * For NULL/empty keys, this means equality, for normal keys we need to
+	 * compare the actual key value.
+	 */
+	if (buffer->category != GIN_CAT_NORM_KEY)
+		return true;
+
+	/*
+	 * Compare the key value, depending on the type information.
+	 *
+	 * XXX Not sure this works correctly for byval types that don't need the
+	 * whole Datum. What if there is garbage in the padding bytes?
+	 */
+	if (buffer->typbyval)
+		return (buffer->key == *(Datum *) tup->data);
+
+	/* byref values simply uses memcmp for comparison */
+	return (memcmp(tup->data, DatumGetPointer(buffer->key), buffer->keylen) == 0);
+}
+
+static void
+GinBufferStoreTuple(GinBuffer *buffer, GinTuple *tup)
+{
+	ItemPointerData *items;
+	Datum		key;
+
+	AssertCheckGinBuffer(buffer);
+
+	key = _gin_parse_tuple(tup, &items);
+
+	/* if the buffer is empty, set the fields (and copy the key) */
+	if (GinBufferIsEmpty(buffer))
+	{
+		buffer->category = tup->category;
+		buffer->keylen = tup->keylen;
+		buffer->attnum = tup->attrnum;
+
+		buffer->typlen = tup->typlen;
+		buffer->typbyval = tup->typbyval;
+
+		if (tup->category == GIN_CAT_NORM_KEY)
+			buffer->key = datumCopy(key, buffer->typbyval, buffer->typlen);
+		else
+			buffer->key = (Datum) 0;
+	}
+
+	/* enlarge the TID buffer, if needed */
+	if (buffer->nitems + tup->nitems > buffer->maxitems)
+	{
+		/* 64 seems like a good init value */
+		buffer->maxitems = Max(buffer->maxitems, 64);
+
+		while (buffer->nitems + tup->nitems > buffer->maxitems)
+			buffer->maxitems *= 2;
+
+		if (buffer->items == NULL)
+			buffer->items = palloc(buffer->maxitems * sizeof(ItemPointerData));
+		else
+			buffer->items = repalloc(buffer->items,
+									 buffer->maxitems * sizeof(ItemPointerData));
+	}
+
+	/* now we should be guaranteed to have enough space for all the TIDs */
+	Assert(buffer->nitems + tup->nitems <= buffer->maxitems);
+
+	/* copy the new TIDs into the buffer */
+	memcpy(&buffer->items[buffer->nitems], items, sizeof(ItemPointerData) * tup->nitems);
+	buffer->nitems += tup->nitems;
+
+	AssertCheckItemPointers(buffer->items, buffer->nitems, false);
+}
+
+static void
+GinBufferSortItems(GinBuffer *buffer)
+{
+	/* we should not have a buffer with no TIDs to sort */
+	Assert(buffer->items != NULL);
+	Assert(buffer->nitems > 0);
+
+	pg_qsort(buffer->items, buffer->nitems, sizeof(ItemPointerData), tid_cmp);
+
+	AssertCheckItemPointers(buffer->items, buffer->nitems, true);
+}
+
+/* XXX probably would be better to have a memory context for the buffer */
+static void
+GinBufferReset(GinBuffer *buffer)
+{
+	Assert(!GinBufferIsEmpty(buffer));
+
+	/* release byref values, do nothing for by-val ones */
+	if ((buffer->category == GIN_CAT_NORM_KEY) && !buffer->typbyval)
+		pfree(DatumGetPointer(buffer->key));
+
+	/* XXX not really needed, but easier to trigger NULL deref etc. */
+	buffer->key = (Datum) 0;
+
+	buffer->attnum = 0;
+	buffer->category = 0;
+	buffer->keylen = 0;
+	buffer->nitems = 0;
+
+	buffer->typlen = 0;
+	buffer->typbyval = 0;
+
+	/* XXX should do something with extremely large array of items? */
+}
+
+/*
+ * XXX Maybe check size of the TID arrays, and return false if it's too
+ * large (more thant maintenance_work_mem or something?).
+ */
+static bool
+GinBufferCanAddKey(GinBuffer *buffer, GinTuple *tup)
+{
+	/* empty buffer can accept data for any key */
+	if (GinBufferIsEmpty(buffer))
+		return true;
+
+	/* otherwise just data for the same key */
+	return GinBufferKeyEquals(buffer, tup);
+}
+
+/*
+ * Within leader, wait for end of heap scan and merge per-worker results.
+ *
+ * After waiting for all workers to finish, merge the per-worker results into
+ * the complete index. The results from each worker are sorted by block number
+ * (start of the page range). While combinig the per-worker results we merge
+ * summaries for the same page range, and also fill-in empty summaries for
+ * ranges without any tuples.
+ *
+ * Returns the total number of heap tuples scanned.
+ *
+ * FIXME probably should have local memory contexts similar to what
+ * _brin_parallel_merge  does.
+ */
+static double
+_gin_parallel_merge(GinBuildState *state)
+{
+	GinTuple   *tup;
+	Size		tuplen;
+	double		reltuples = 0;
+	GinBuffer  *buffer;
+
+	/* wait for workers to scan table and produce partial results */
+	reltuples = _gin_parallel_heapscan(state);
+
+	/* do the actual sort in the leader */
+	tuplesort_performsort(state->bs_sortstate);
+
+	/* initialize buffer to combine entries for the same key */
+	buffer = GinBufferInit();
+
+	/*
+	 * Read the GIN tuples from the shared tuplesort, sorted by category and
+	 * key. That probably gives us order matching how data is organized in the
+	 * index.
+	 *
+	 * XXX Maybe we should sort by key first, then by category?
+	 *
+	 * We don't insert the GIN tuples right away, but instead accumulate as
+	 * many TIDs for the same key as possible, and then insert that at once.
+	 * This way we don't need to decompress/recompress the posting lists, etc.
+	 */
+	while ((tup = tuplesort_getgintuple(state->bs_sortstate, &tuplen, true)) != NULL)
+	{
+		CHECK_FOR_INTERRUPTS();
+
+		/*
+		 * If the buffer can accept the new GIN tuple, just store it there and
+		 * we're done. If it's a different key (or maybe too much data) flush
+		 * the current contents into the index first.
+		 */
+		if (!GinBufferCanAddKey(buffer, tup))
+		{
+			/*
+			 * Buffer is not empty and it's storing a different key - flush
+			 * the data into the insert, and start a new entry for current
+			 * GinTuple.
+			 */
+			GinBufferSortItems(buffer);
+
+			ginEntryInsert(&state->ginstate,
+						   buffer->attnum, buffer->key, buffer->category,
+						   buffer->items, buffer->nitems, &state->buildStats);
+
+			/* discard the existing data */
+			GinBufferReset(buffer);
+		}
+
+		/* now remember the new key */
+		GinBufferStoreTuple(buffer, tup);
+	}
+
+	/* flush data remaining in the buffer (for the last key) */
+	if (!GinBufferIsEmpty(buffer))
+	{
+		GinBufferSortItems(buffer);
+
+		ginEntryInsert(&state->ginstate,
+					   buffer->attnum, buffer->key, buffer->category,
+					   buffer->items, buffer->nitems, &state->buildStats);
+
+		/* discard the existing data */
+		GinBufferReset(buffer);
+	}
+
+	tuplesort_end(state->bs_sortstate);
+
+	return reltuples;
+}
+
+/*
+ * Returns size of shared memory required to store state for a parallel
+ * gin index build based on the snapshot its parallel scan will use.
+ */
+static Size
+_gin_parallel_estimate_shared(Relation heap, Snapshot snapshot)
+{
+	/* c.f. shm_toc_allocate as to why BUFFERALIGN is used */
+	return add_size(BUFFERALIGN(sizeof(GinShared)),
+					table_parallelscan_estimate(heap, snapshot));
+}
+
+/*
+ * Within leader, participate as a parallel worker.
+ */
+static void
+_gin_leader_participate_as_worker(GinBuildState *buildstate, Relation heap, Relation index)
+{
+	GinLeader  *ginleader = buildstate->bs_leader;
+	int			sortmem;
+
+	/*
+	 * Might as well use reliable figure when doling out maintenance_work_mem
+	 * (when requested number of workers were not launched, this will be
+	 * somewhat higher than it is for other workers).
+	 */
+	sortmem = maintenance_work_mem / ginleader->nparticipanttuplesorts;
+
+	/* Perform work common to all participants */
+	_gin_parallel_scan_and_build(buildstate, ginleader->ginshared,
+								 ginleader->sharedsort, heap, index, sortmem, true);
+}
+
+/*
+ * Perform a worker's portion of a parallel sort.
+ *
+ * This generates a tuplesort for the worker portion of the table.
+ *
+ * sortmem is the amount of working memory to use within each worker,
+ * expressed in KBs.
+ *
+ * When this returns, workers are done, and need only release resources.
+ */
+static void
+_gin_parallel_scan_and_build(GinBuildState *state,
+							 GinShared *ginshared, Sharedsort *sharedsort,
+							 Relation heap, Relation index,
+							 int sortmem, bool progress)
+{
+	SortCoordinate coordinate;
+	TableScanDesc scan;
+	double		reltuples;
+	IndexInfo  *indexInfo;
+
+	/* Initialize local tuplesort coordination state */
+	coordinate = palloc0(sizeof(SortCoordinateData));
+	coordinate->isWorker = true;
+	coordinate->nParticipants = -1;
+	coordinate->sharedsort = sharedsort;
+
+	/* Begin "partial" tuplesort */
+	state->bs_sortstate = tuplesort_begin_index_gin(sortmem, coordinate,
+													TUPLESORT_NONE);
+
+	/* Join parallel scan */
+	indexInfo = BuildIndexInfo(index);
+	indexInfo->ii_Concurrent = ginshared->isconcurrent;
+
+	scan = table_beginscan_parallel(heap,
+									ParallelTableScanFromGinShared(ginshared));
+
+	reltuples = table_index_build_scan(heap, index, indexInfo, true, true,
+									   ginBuildCallbackParallel, state, scan);
+
+	/* insert the last item */
+	/* write remaining accumulated entries */
+	{
+		ItemPointerData *list;
+		Datum		key;
+		GinNullCategory category;
+		uint32		nlist;
+		OffsetNumber attnum;
+		TupleDesc	tdesc = RelationGetDescr(index);
+
+		ginBeginBAScan(&state->accum);
+		while ((list = ginGetBAEntry(&state->accum,
+									 &attnum, &key, &category, &nlist)) != NULL)
+		{
+			/* information about the key */
+			Form_pg_attribute attr = TupleDescAttr(tdesc, (attnum - 1));
+
+			GinTuple   *tup;
+			Size		len;
+
+			/* there could be many entries, so be willing to abort here */
+			CHECK_FOR_INTERRUPTS();
+
+			tup = _gin_build_tuple(attnum, category,
+								   key, attr->attlen, attr->attbyval,
+								   list, nlist, &len);
+
+			tuplesort_putgintuple(state->bs_sortstate, tup, len);
+
+			pfree(tup);
+		}
+
+		MemoryContextReset(state->tmpCtx);
+		ginInitBA(&state->accum);
+	}
+
+	/* sort the GIN tuples built by this worker */
+	tuplesort_performsort(state->bs_sortstate);
+
+	state->bs_reltuples += reltuples;
+
+	/*
+	 * Done.  Record ambuild statistics.
+	 */
+	SpinLockAcquire(&ginshared->mutex);
+	ginshared->nparticipantsdone++;
+	ginshared->reltuples += state->bs_reltuples;
+	ginshared->indtuples += state->bs_numtuples;
+	SpinLockRelease(&ginshared->mutex);
+
+	/* Notify leader */
+	ConditionVariableSignal(&ginshared->workersdonecv);
+
+	tuplesort_end(state->bs_sortstate);
+}
+
+/*
+ * Perform work within a launched parallel process.
+ */
+void
+_gin_parallel_build_main(dsm_segment *seg, shm_toc *toc)
+{
+	char	   *sharedquery;
+	GinShared  *ginshared;
+	Sharedsort *sharedsort;
+	GinBuildState buildstate;
+	Relation	heapRel;
+	Relation	indexRel;
+	LOCKMODE	heapLockmode;
+	LOCKMODE	indexLockmode;
+	WalUsage   *walusage;
+	BufferUsage *bufferusage;
+	int			sortmem;
+
+	/*
+	 * The only possible status flag that can be set to the parallel worker is
+	 * PROC_IN_SAFE_IC.
+	 */
+	Assert((MyProc->statusFlags == 0) ||
+		   (MyProc->statusFlags == PROC_IN_SAFE_IC));
+
+	/* Set debug_query_string for individual workers first */
+	sharedquery = shm_toc_lookup(toc, PARALLEL_KEY_QUERY_TEXT, true);
+	debug_query_string = sharedquery;
+
+	/* Report the query string from leader */
+	pgstat_report_activity(STATE_RUNNING, debug_query_string);
+
+	/* Look up gin shared state */
+	ginshared = shm_toc_lookup(toc, PARALLEL_KEY_GIN_SHARED, false);
+
+	/* Open relations using lock modes known to be obtained by index.c */
+	if (!ginshared->isconcurrent)
+	{
+		heapLockmode = ShareLock;
+		indexLockmode = AccessExclusiveLock;
+	}
+	else
+	{
+		heapLockmode = ShareUpdateExclusiveLock;
+		indexLockmode = RowExclusiveLock;
+	}
+
+	/* Open relations within worker */
+	heapRel = table_open(ginshared->heaprelid, heapLockmode);
+	indexRel = index_open(ginshared->indexrelid, indexLockmode);
+
+	/* initialize the GIN build state */
+	initGinState(&buildstate.ginstate, indexRel);
+	buildstate.indtuples = 0;
+	memset(&buildstate.buildStats, 0, sizeof(GinStatsData));
+
+	/*
+	 * create a temporary memory context that is used to hold data not yet
+	 * dumped out to the index
+	 */
+	buildstate.tmpCtx = AllocSetContextCreate(CurrentMemoryContext,
+											  "Gin build temporary context",
+											  ALLOCSET_DEFAULT_SIZES);
+
+	/*
+	 * create a temporary memory context that is used for calling
+	 * ginExtractEntries(), and can be reset after each tuple
+	 */
+	buildstate.funcCtx = AllocSetContextCreate(CurrentMemoryContext,
+											   "Gin build temporary context for user-defined function",
+											   ALLOCSET_DEFAULT_SIZES);
+
+	buildstate.accum.ginstate = &buildstate.ginstate;
+	ginInitBA(&buildstate.accum);
+
+
+	/* Look up shared state private to tuplesort.c */
+	sharedsort = shm_toc_lookup(toc, PARALLEL_KEY_TUPLESORT, false);
+	tuplesort_attach_shared(sharedsort, seg);
+
+	/* Prepare to track buffer usage during parallel execution */
+	InstrStartParallelQuery();
+
+	/*
+	 * Might as well use reliable figure when doling out maintenance_work_mem
+	 * (when requested number of workers were not launched, this will be
+	 * somewhat higher than it is for other workers).
+	 */
+	sortmem = maintenance_work_mem / ginshared->scantuplesortstates;
+
+	_gin_parallel_scan_and_build(&buildstate, ginshared, sharedsort,
+								 heapRel, indexRel, sortmem, false);
+
+	/* Report WAL/buffer usage during parallel execution */
+	bufferusage = shm_toc_lookup(toc, PARALLEL_KEY_BUFFER_USAGE, false);
+	walusage = shm_toc_lookup(toc, PARALLEL_KEY_WAL_USAGE, false);
+	InstrEndParallelQuery(&bufferusage[ParallelWorkerNumber],
+						  &walusage[ParallelWorkerNumber]);
+
+	index_close(indexRel, indexLockmode);
+	table_close(heapRel, heapLockmode);
+}
+
+/*
+ * _gin_build_tuple
+ *		Serialize the state for an index key into a tuple for tuplesort.
+ *
+ * The tuple has a number of scalar fields (mostly matching the build state),
+ * and then a data array that stores the key first, and then the TID list.
+ *
+ * For by-reference data types, we store the actual data. For by-val types
+ * we simply copy the whole Datum, so that we don't have to care about stuff
+ * like endianess etc. We could make it a little bit smaller, but it's not
+ * worth it - it's a tiny fraction of the data, and we need to MAXALIGN the
+ * start of the TID list anyway. So we wouldn't save anything.
+ */
+static GinTuple *
+_gin_build_tuple(OffsetNumber attrnum, unsigned char category,
+				 Datum key, int16 typlen, bool typbyval,
+				 ItemPointerData *items, uint32 nitems,
+				 Size *len)
+{
+	GinTuple   *tuple;
+	char	   *ptr;
+
+	Size		tuplen;
+	int			keylen;
+
+	/*
+	 * Calculate how long is the key value. Only keys with GIN_CAT_NORM_KEY
+	 * have actual non-empty key. We include varlena headers and \0 bytes for
+	 * strings, to make it easier to access the data in-line.
+	 *
+	 * For byval types we simply copy the whole Datum. We could store just the
+	 * necessary bytes, but this is simpler to work with and not worth the
+	 * extra complexity. Moreover we still need to do the MAXALIGN to allow
+	 * direct access to items pointers.
+	 */
+	if (category != GIN_CAT_NORM_KEY)
+		keylen = 0;
+	else if (typbyval)
+		keylen = sizeof(Datum);
+	else if (typlen > 0)
+		keylen = typlen;
+	else if (typlen == -1)
+		keylen = VARSIZE_ANY(key);
+	else if (typlen == -2)
+		keylen = strlen(DatumGetPointer(key)) + 1;
+	else
+		elog(ERROR, "invalid typlen");
+
+	/*
+	 * Determine GIN tuple length with all the data included. Be careful about
+	 * alignment, to allow direct access to item pointers.
+	 */
+	tuplen = MAXALIGN(offsetof(GinTuple, data) + keylen) +
+		(sizeof(ItemPointerData) * nitems);
+
+	*len = tuplen;
+
+	/*
+	 * allocate space for the whole GIN tuple
+	 *
+	 * XXX palloc0 so that valgrind does not complain about uninitialized
+	 * bytes in writetup_index_gin, likely because of padding
+	 */
+	tuple = palloc0(tuplen);
+
+	tuple->tuplen = tuplen;
+	tuple->attrnum = attrnum;
+	tuple->category = category;
+	tuple->keylen = keylen;
+	tuple->nitems = nitems;
+
+	/* key type info */
+	tuple->typlen = typlen;
+	tuple->typbyval = typbyval;
+
+	/*
+	 * Copy the key and items into the tuple. First the key value, which we
+	 * can simply copy right at the beginning of the data array.
+	 */
+	if (category == GIN_CAT_NORM_KEY)
+	{
+		if (typbyval)
+		{
+			memcpy(tuple->data, &key, sizeof(Datum));
+		}
+		else if (typlen > 0)	/* byref, fixed length */
+		{
+			memcpy(tuple->data, DatumGetPointer(key), typlen);
+		}
+		else if (typlen == -1)
+		{
+			memcpy(tuple->data, DatumGetPointer(key), keylen);
+		}
+		else if (typlen == -2)
+		{
+			memcpy(tuple->data, DatumGetPointer(key), keylen);
+		}
+	}
+
+	/* finally, copy the TIDs into the array */
+	ptr = (char *) tuple + MAXALIGN(offsetof(GinTuple, data) + keylen);
+
+	memcpy(ptr, items, sizeof(ItemPointerData) * nitems);
+
+	return tuple;
+}
+
+/*
+ * _gin_parse_tuple
+ *		Deserialize the tuple from the tuplestore representation.
+ *
+ * Most of the fields are actually directly accessible, the only thing that
+ * needs more care is the key and the TID list.
+ *
+ * For the key, this returns a regular Datum representing it. It's either the
+ * actual key value, or a pointer to the beginning of the data array (which is
+ * where the data was copied by _gin_build_tuple).
+ *
+ * The pointer to the TID list is returned through 'items' (which is simply
+ * a pointer to the data array).
+ */
+static Datum
+_gin_parse_tuple(GinTuple *a, ItemPointerData **items)
+{
+	Datum		key;
+
+	if (items)
+	{
+		char	   *ptr = (char *) a + MAXALIGN(offsetof(GinTuple, data) + a->keylen);
+
+		*items = (ItemPointerData *) ptr;
+	}
+
+	if (a->category != GIN_CAT_NORM_KEY)
+		return (Datum) 0;
+
+	if (a->typbyval)
+	{
+		memcpy(&key, a->data, a->keylen);
+		return key;
+	}
+
+	return PointerGetDatum(a->data);
+}
+
+/*
+ * _gin_compare_tuples
+ *		Compare GIN tuples, used by tuplesort during parallel index build.
+ *
+ * The scalar fields (attrnum, category) are compared first, the key value is
+ * compared last. The comparisons are done simply by "memcmp", based on the
+ * assumption that if we get two keys that are two different representations
+ * of a logically equal value, it'll get merged by the index build.
+ *
+ * FIXME Is the assumption we can just memcmp() actually valid? Won't this
+ * trigger the "could not split GIN page; all old items didn't fit" error
+ * when trying to update the TID list?
+ */
+int
+_gin_compare_tuples(GinTuple *a, GinTuple *b)
+{
+	Datum		keya,
+				keyb;
+
+	if (a->attrnum < b->attrnum)
+		return -1;
+
+	if (a->attrnum > b->attrnum)
+		return 1;
+
+	if (a->category < b->category)
+		return -1;
+
+	if (a->category > b->category)
+		return 1;
+
+	if ((a->category == GIN_CAT_NORM_KEY) &&
+		(b->category == GIN_CAT_NORM_KEY))
+	{
+		keya = _gin_parse_tuple(a, NULL);
+		keyb = _gin_parse_tuple(b, NULL);
+
+		/*
+		 * works for both byval and byref types with fixed lenght, because for
+		 * byval we set keylen to sizeof(Datum)
+		 */
+		if (a->typbyval)
+		{
+			return memcmp(&keya, &keyb, a->keylen);
+		}
+		else
+		{
+			if (a->keylen < b->keylen)
+				return -1;
+
+			if (a->keylen > b->keylen)
+				return 1;
+
+			return memcmp(DatumGetPointer(keya), DatumGetPointer(keyb), a->keylen);
+		}
+	}
+
+	return 0;
+}
diff --git a/src/backend/access/gin/ginutil.c b/src/backend/access/gin/ginutil.c
index 5747ae6a4ca..dd22b44aca9 100644
--- a/src/backend/access/gin/ginutil.c
+++ b/src/backend/access/gin/ginutil.c
@@ -53,7 +53,7 @@ ginhandler(PG_FUNCTION_ARGS)
 	amroutine->amclusterable = false;
 	amroutine->ampredlocks = true;
 	amroutine->amcanparallel = false;
-	amroutine->amcanbuildparallel = false;
+	amroutine->amcanbuildparallel = true;
 	amroutine->amcaninclude = false;
 	amroutine->amusemaintenanceworkmem = true;
 	amroutine->amsummarizing = false;
diff --git a/src/backend/access/transam/parallel.c b/src/backend/access/transam/parallel.c
index 8613fc6fb54..c9ea769afb5 100644
--- a/src/backend/access/transam/parallel.c
+++ b/src/backend/access/transam/parallel.c
@@ -15,6 +15,7 @@
 #include "postgres.h"
 
 #include "access/brin.h"
+#include "access/gin.h"
 #include "access/nbtree.h"
 #include "access/parallel.h"
 #include "access/session.h"
@@ -146,6 +147,9 @@ static const struct
 	{
 		"_brin_parallel_build_main", _brin_parallel_build_main
 	},
+	{
+		"_gin_parallel_build_main", _gin_parallel_build_main
+	},
 	{
 		"parallel_vacuum_main", parallel_vacuum_main
 	}
diff --git a/src/backend/utils/sort/tuplesortvariants.c b/src/backend/utils/sort/tuplesortvariants.c
index 05a853caa36..55cc55969e5 100644
--- a/src/backend/utils/sort/tuplesortvariants.c
+++ b/src/backend/utils/sort/tuplesortvariants.c
@@ -20,6 +20,7 @@
 #include "postgres.h"
 
 #include "access/brin_tuple.h"
+#include "access/gin_tuple.h"
 #include "access/hash.h"
 #include "access/htup_details.h"
 #include "access/nbtree.h"
@@ -46,6 +47,8 @@ static void removeabbrev_index(Tuplesortstate *state, SortTuple *stups,
 							   int count);
 static void removeabbrev_index_brin(Tuplesortstate *state, SortTuple *stups,
 									int count);
+static void removeabbrev_index_gin(Tuplesortstate *state, SortTuple *stups,
+								   int count);
 static void removeabbrev_datum(Tuplesortstate *state, SortTuple *stups,
 							   int count);
 static int	comparetup_heap(const SortTuple *a, const SortTuple *b,
@@ -74,6 +77,8 @@ static int	comparetup_index_hash_tiebreak(const SortTuple *a, const SortTuple *b
 										   Tuplesortstate *state);
 static int	comparetup_index_brin(const SortTuple *a, const SortTuple *b,
 								  Tuplesortstate *state);
+static int	comparetup_index_gin(const SortTuple *a, const SortTuple *b,
+								 Tuplesortstate *state);
 static void writetup_index(Tuplesortstate *state, LogicalTape *tape,
 						   SortTuple *stup);
 static void readtup_index(Tuplesortstate *state, SortTuple *stup,
@@ -82,6 +87,10 @@ static void writetup_index_brin(Tuplesortstate *state, LogicalTape *tape,
 								SortTuple *stup);
 static void readtup_index_brin(Tuplesortstate *state, SortTuple *stup,
 							   LogicalTape *tape, unsigned int len);
+static void writetup_index_gin(Tuplesortstate *state, LogicalTape *tape,
+							   SortTuple *stup);
+static void readtup_index_gin(Tuplesortstate *state, SortTuple *stup,
+							  LogicalTape *tape, unsigned int len);
 static int	comparetup_datum(const SortTuple *a, const SortTuple *b,
 							 Tuplesortstate *state);
 static int	comparetup_datum_tiebreak(const SortTuple *a, const SortTuple *b,
@@ -580,6 +589,35 @@ tuplesort_begin_index_brin(int workMem,
 	return state;
 }
 
+
+Tuplesortstate *
+tuplesort_begin_index_gin(int workMem, SortCoordinate coordinate,
+						  int sortopt)
+{
+	Tuplesortstate *state = tuplesort_begin_common(workMem, coordinate,
+												   sortopt);
+	TuplesortPublic *base = TuplesortstateGetPublic(state);
+
+#ifdef TRACE_SORT
+	if (trace_sort)
+		elog(LOG,
+			 "begin index sort: workMem = %d, randomAccess = %c",
+			 workMem,
+			 sortopt & TUPLESORT_RANDOMACCESS ? 't' : 'f');
+#endif
+
+	base->nKeys = 1;			/* Only the index key */
+
+	base->removeabbrev = removeabbrev_index_gin;
+	base->comparetup = comparetup_index_gin;
+	base->writetup = writetup_index_gin;
+	base->readtup = readtup_index_gin;
+	base->haveDatum1 = false;
+	base->arg = NULL;
+
+	return state;
+}
+
 Tuplesortstate *
 tuplesort_begin_datum(Oid datumType, Oid sortOperator, Oid sortCollation,
 					  bool nullsFirstFlag, int workMem,
@@ -817,6 +855,37 @@ tuplesort_putbrintuple(Tuplesortstate *state, BrinTuple *tuple, Size size)
 	MemoryContextSwitchTo(oldcontext);
 }
 
+void
+tuplesort_putgintuple(Tuplesortstate *state, GinTuple *tup, Size size)
+{
+	SortTuple	stup;
+	GinTuple   *ctup;
+	TuplesortPublic *base = TuplesortstateGetPublic(state);
+	MemoryContext oldcontext = MemoryContextSwitchTo(base->tuplecontext);
+	Size		tuplen;
+
+	/* copy the GinTuple into the right memory context */
+	ctup = palloc(size);
+	memcpy(ctup, tup, size);
+
+	stup.tuple = ctup;
+	stup.datum1 = (Datum) 0;
+	stup.isnull1 = false;
+
+	/* GetMemoryChunkSpace is not supported for bump contexts */
+	if (TupleSortUseBumpTupleCxt(base->sortopt))
+		tuplen = MAXALIGN(size);
+	else
+		tuplen = GetMemoryChunkSpace(ctup);
+
+	tuplesort_puttuple_common(state, &stup,
+							  base->sortKeys &&
+							  base->sortKeys->abbrev_converter &&
+							  !stup.isnull1, tuplen);
+
+	MemoryContextSwitchTo(oldcontext);
+}
+
 /*
  * Accept one Datum while collecting input data for sort.
  *
@@ -989,6 +1058,29 @@ tuplesort_getbrintuple(Tuplesortstate *state, Size *len, bool forward)
 	return &btup->tuple;
 }
 
+GinTuple *
+tuplesort_getgintuple(Tuplesortstate *state, Size *len, bool forward)
+{
+	TuplesortPublic *base = TuplesortstateGetPublic(state);
+	MemoryContext oldcontext = MemoryContextSwitchTo(base->sortcontext);
+	SortTuple	stup;
+	GinTuple   *tup;
+
+	if (!tuplesort_gettuple_common(state, forward, &stup))
+		stup.tuple = NULL;
+
+	MemoryContextSwitchTo(oldcontext);
+
+	if (!stup.tuple)
+		return false;
+
+	tup = (GinTuple *) stup.tuple;
+
+	*len = tup->tuplen;
+
+	return tup;
+}
+
 /*
  * Fetch the next Datum in either forward or back direction.
  * Returns false if no more datums.
@@ -1777,6 +1869,68 @@ readtup_index_brin(Tuplesortstate *state, SortTuple *stup,
 	stup->datum1 = tuple->tuple.bt_blkno;
 }
 
+
+/*
+ * Routines specialized for GIN case
+ */
+
+static void
+removeabbrev_index_gin(Tuplesortstate *state, SortTuple *stups, int count)
+{
+	Assert(false);
+	elog(ERROR, "removeabbrev_index_gin not implemented");
+}
+
+static int
+comparetup_index_gin(const SortTuple *a, const SortTuple *b,
+					 Tuplesortstate *state)
+{
+	Assert(!TuplesortstateGetPublic(state)->haveDatum1);
+
+	return _gin_compare_tuples((GinTuple *) a->tuple,
+							   (GinTuple *) b->tuple);
+}
+
+static void
+writetup_index_gin(Tuplesortstate *state, LogicalTape *tape, SortTuple *stup)
+{
+	TuplesortPublic *base = TuplesortstateGetPublic(state);
+	GinTuple   *tuple = (GinTuple *) stup->tuple;
+	unsigned int tuplen = tuple->tuplen;
+
+	tuplen = tuplen + sizeof(tuplen);
+	LogicalTapeWrite(tape, &tuplen, sizeof(tuplen));
+	LogicalTapeWrite(tape, tuple, tuple->tuplen);
+	if (base->sortopt & TUPLESORT_RANDOMACCESS) /* need trailing length word? */
+		LogicalTapeWrite(tape, &tuplen, sizeof(tuplen));
+}
+
+static void
+readtup_index_gin(Tuplesortstate *state, SortTuple *stup,
+				  LogicalTape *tape, unsigned int len)
+{
+	GinTuple   *tuple;
+	TuplesortPublic *base = TuplesortstateGetPublic(state);
+	unsigned int tuplen = len - sizeof(unsigned int);
+
+	/*
+	 * Allocate space for the GIN sort tuple, which already has the proper
+	 * length included in the header.
+	 */
+	tuple = (GinTuple *) tuplesort_readtup_alloc(state, tuplen);
+
+	tuple->tuplen = tuplen;
+
+	LogicalTapeReadExact(tape, tuple, tuplen);
+	if (base->sortopt & TUPLESORT_RANDOMACCESS) /* need trailing length word? */
+		LogicalTapeReadExact(tape, &tuplen, sizeof(tuplen));
+	stup->tuple = (void *) tuple;
+
+	/* no abbreviations (FIXME maybe use attrnum for this?) */
+	stup->datum1 = (Datum) 0;
+}
+
+
 /*
  * Routines specialized for DatumTuple case
  */
diff --git a/src/include/access/gin.h b/src/include/access/gin.h
index 25983b7a505..be76d8446f4 100644
--- a/src/include/access/gin.h
+++ b/src/include/access/gin.h
@@ -12,6 +12,8 @@
 
 #include "access/xlogreader.h"
 #include "lib/stringinfo.h"
+#include "nodes/execnodes.h"
+#include "storage/shm_toc.h"
 #include "storage/block.h"
 #include "utils/relcache.h"
 
@@ -88,4 +90,6 @@ extern void ginGetStats(Relation index, GinStatsData *stats);
 extern void ginUpdateStats(Relation index, const GinStatsData *stats,
 						   bool is_build);
 
+extern void _gin_parallel_build_main(dsm_segment *seg, shm_toc *toc);
+
 #endif							/* GIN_H */
diff --git a/src/include/access/gin_tuple.h b/src/include/access/gin_tuple.h
new file mode 100644
index 00000000000..56aed40fb96
--- /dev/null
+++ b/src/include/access/gin_tuple.h
@@ -0,0 +1,29 @@
+/*--------------------------------------------------------------------------
+ * gin.h
+ *	  Public header file for Generalized Inverted Index access method.
+ *
+ *	Copyright (c) 2006-2024, PostgreSQL Global Development Group
+ *
+ *	src/include/access/gin.h
+ *--------------------------------------------------------------------------
+ */
+#ifndef GIN_TUPLE_
+#define GIN_TUPLE_
+
+#include "storage/itemptr.h"
+
+typedef struct GinTuple
+{
+	Size		tuplen;			/* length of the whole tuple */
+	Size		keylen;			/* bytes in data for key value */
+	int16		typlen;			/* typlen for key */
+	bool		typbyval;		/* typbyval for key */
+	OffsetNumber attrnum;
+	signed char category;		/* category: normal or NULL? */
+	int			nitems;			/* number of TIDs in the data */
+	char		data[FLEXIBLE_ARRAY_MEMBER];
+} GinTuple;
+
+extern int	_gin_compare_tuples(GinTuple *a, GinTuple *b);
+
+#endif							/* GIN_TUPLE_H */
diff --git a/src/include/utils/tuplesort.h b/src/include/utils/tuplesort.h
index e7941a1f09f..35fa5ae2442 100644
--- a/src/include/utils/tuplesort.h
+++ b/src/include/utils/tuplesort.h
@@ -22,6 +22,7 @@
 #define TUPLESORT_H
 
 #include "access/brin_tuple.h"
+#include "access/gin_tuple.h"
 #include "access/itup.h"
 #include "executor/tuptable.h"
 #include "storage/dsm.h"
@@ -443,6 +444,8 @@ extern Tuplesortstate *tuplesort_begin_index_gist(Relation heapRel,
 												  int sortopt);
 extern Tuplesortstate *tuplesort_begin_index_brin(int workMem, SortCoordinate coordinate,
 												  int sortopt);
+extern Tuplesortstate *tuplesort_begin_index_gin(int workMem, SortCoordinate coordinate,
+												 int sortopt);
 extern Tuplesortstate *tuplesort_begin_datum(Oid datumType,
 											 Oid sortOperator, Oid sortCollation,
 											 bool nullsFirstFlag,
@@ -456,6 +459,7 @@ extern void tuplesort_putindextuplevalues(Tuplesortstate *state,
 										  Relation rel, ItemPointer self,
 										  const Datum *values, const bool *isnull);
 extern void tuplesort_putbrintuple(Tuplesortstate *state, BrinTuple *tup, Size len);
+extern void tuplesort_putgintuple(Tuplesortstate *state, GinTuple *tup, Size len);
 extern void tuplesort_putdatum(Tuplesortstate *state, Datum val,
 							   bool isNull);
 
@@ -465,6 +469,8 @@ extern HeapTuple tuplesort_getheaptuple(Tuplesortstate *state, bool forward);
 extern IndexTuple tuplesort_getindextuple(Tuplesortstate *state, bool forward);
 extern BrinTuple *tuplesort_getbrintuple(Tuplesortstate *state, Size *len,
 										 bool forward);
+extern GinTuple *tuplesort_getgintuple(Tuplesortstate *state, Size *len,
+									   bool forward);
 extern bool tuplesort_getdatum(Tuplesortstate *state, bool forward, bool copy,
 							   Datum *val, bool *isNull, Datum *abbrev);
 
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index eee989bba17..9769f4d6b09 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -1004,11 +1004,13 @@ GinBtreeData
 GinBtreeDataLeafInsertData
 GinBtreeEntryInsertData
 GinBtreeStack
+GinBuffer
 GinBuildState
 GinChkVal
 GinEntries
 GinEntryAccumulator
 GinIndexStat
+GinLeader
 GinMetaPageData
 GinNullCategory
 GinOptions
@@ -1021,9 +1023,11 @@ GinScanEntry
 GinScanKey
 GinScanOpaque
 GinScanOpaqueData
+GinShared
 GinState
 GinStatsData
 GinTernaryValue
+GinTuple
 GinTupleCollector
 GinVacuumState
 GistBuildMode
-- 
2.44.0

