diff --git a/src/backend/utils/sort/logtape.c b/src/backend/utils/sort/logtape.c
index 48ae0de305b..48f32cbe86f 100644
--- a/src/backend/utils/sort/logtape.c
+++ b/src/backend/utils/sort/logtape.c
@@ -153,6 +153,19 @@ typedef struct LogicalTape
 	int			nbytes;			/* total # of valid bytes in buffer */
 } LogicalTape;
 
+/*
+ * It would be more natural to simply use an ordinary pointer in
+ * LogicalTapeSet, and allocate many elements for it. Unfortunately, some
+ * compilers (notably GCC with FORTIFY_SOURCE specified) generate signficantly
+ * slower code when that's done, and using this dummy structure is a
+ * workaround for that.
+ */
+typedef struct LogicalTapeDummy
+{
+	int dummy;
+	LogicalTape tape_array[FLEXIBLE_ARRAY_MEMBER];
+} LogicalTapeDummy;
+
 /*
  * This data structure represents a set of related "logical tapes" sharing
  * space in a single underlying file.  (But that "file" may be multiple files
@@ -192,7 +205,7 @@ struct LogicalTapeSet
 
 	/* The array of logical tapes. */
 	int				nTapes;	/* # of logical tapes in set */
-	LogicalTape	   *tapes;	/* has nTapes nentries */
+	LogicalTapeDummy	   *tapes;	/* has nTapes nentries */
 };
 
 static void ltsWriteBlock(LogicalTapeSet *lts, long blocknum, void *buffer);
@@ -479,7 +492,7 @@ ltsConcatWorkerTapes(LogicalTapeSet *lts, TapeShare *shared,
 		BufFile    *file;
 		int64		filesize;
 
-		lt = &lts->tapes[i];
+		lt = &lts->tapes->tape_array[i];
 
 		pg_itoa(i, filename);
 		file = BufFileOpenShared(fileset, filename);
@@ -616,10 +629,12 @@ LogicalTapeSetCreate(int ntapes, TapeShare *shared, SharedFileSet *fileset,
 	lts->freeBlocks = (long *) palloc(lts->freeBlocksLen * sizeof(long));
 	lts->nFreeBlocks = 0;
 	lts->nTapes = ntapes;
-	lts->tapes = (LogicalTape *) palloc(ntapes * sizeof(LogicalTape));
+	lts->tapes = (LogicalTapeDummy *) palloc(
+		offsetof(LogicalTapeDummy, tape_array) +
+		ntapes * sizeof(LogicalTape));
 
 	for (i = 0; i < ntapes; i++)
-		ltsInitTape(&lts->tapes[i]);
+		ltsInitTape(&lts->tapes->tape_array[i]);
 
 	/*
 	 * Create temp BufFile storage as required.
@@ -656,7 +671,7 @@ LogicalTapeSetClose(LogicalTapeSet *lts)
 	BufFileClose(lts->pfile);
 	for (i = 0; i < lts->nTapes; i++)
 	{
-		lt = &lts->tapes[i];
+		lt = &lts->tapes->tape_array[i];
 		if (lt->buffer)
 			pfree(lt->buffer);
 	}
@@ -693,7 +708,7 @@ LogicalTapeWrite(LogicalTapeSet *lts, int tapenum,
 	size_t		nthistime;
 
 	Assert(tapenum >= 0 && tapenum < lts->nTapes);
-	lt = &lts->tapes[tapenum];
+	lt = &lts->tapes->tape_array[tapenum];
 	Assert(lt->writing);
 	Assert(lt->offsetBlockNumber == 0L);
 
@@ -779,7 +794,7 @@ LogicalTapeRewindForRead(LogicalTapeSet *lts, int tapenum, size_t buffer_size)
 	LogicalTape *lt;
 
 	Assert(tapenum >= 0 && tapenum < lts->nTapes);
-	lt = &lts->tapes[tapenum];
+	lt = &lts->tapes->tape_array[tapenum];
 
 	/*
 	 * Round and cap buffer_size if needed.
@@ -857,7 +872,7 @@ LogicalTapeRewindForWrite(LogicalTapeSet *lts, int tapenum)
 	LogicalTape *lt;
 
 	Assert(tapenum >= 0 && tapenum < lts->nTapes);
-	lt = &lts->tapes[tapenum];
+	lt = &lts->tapes->tape_array[tapenum];
 
 	Assert(!lt->writing && !lt->frozen);
 	lt->writing = true;
@@ -886,7 +901,7 @@ LogicalTapeRead(LogicalTapeSet *lts, int tapenum,
 	size_t		nthistime;
 
 	Assert(tapenum >= 0 && tapenum < lts->nTapes);
-	lt = &lts->tapes[tapenum];
+	lt = &lts->tapes->tape_array[tapenum];
 	Assert(!lt->writing);
 
 	if (lt->buffer == NULL)
@@ -940,7 +955,7 @@ LogicalTapeFreeze(LogicalTapeSet *lts, int tapenum, TapeShare *share)
 	LogicalTape *lt;
 
 	Assert(tapenum >= 0 && tapenum < lts->nTapes);
-	lt = &lts->tapes[tapenum];
+	lt = &lts->tapes->tape_array[tapenum];
 	Assert(lt->writing);
 	Assert(lt->offsetBlockNumber == 0L);
 
@@ -1017,11 +1032,13 @@ LogicalTapeSetExtend(LogicalTapeSet *lts, int nAdditional)
 
 	lts->nTapes += nAdditional;
 
-	lts->tapes = (LogicalTape *) repalloc(
-		lts->tapes, lts->nTapes * sizeof(LogicalTape));
+	lts->tapes = (LogicalTapeDummy *) repalloc(
+		lts->tapes,
+		offsetof(LogicalTapeDummy, tape_array) +
+		lts->nTapes * sizeof(LogicalTape));
 
 	for (i = nTapesOrig; i < lts->nTapes; i++)
-		ltsInitTape(&lts->tapes[i]);
+		ltsInitTape(&lts->tapes->tape_array[i]);
 }
 
 /*
@@ -1044,7 +1061,7 @@ LogicalTapeBackspace(LogicalTapeSet *lts, int tapenum, size_t size)
 	size_t		seekpos = 0;
 
 	Assert(tapenum >= 0 && tapenum < lts->nTapes);
-	lt = &lts->tapes[tapenum];
+	lt = &lts->tapes->tape_array[tapenum];
 	Assert(lt->frozen);
 	Assert(lt->buffer_size == BLCKSZ);
 
@@ -1118,7 +1135,7 @@ LogicalTapeSeek(LogicalTapeSet *lts, int tapenum,
 	LogicalTape *lt;
 
 	Assert(tapenum >= 0 && tapenum < lts->nTapes);
-	lt = &lts->tapes[tapenum];
+	lt = &lts->tapes->tape_array[tapenum];
 	Assert(lt->frozen);
 	Assert(offset >= 0 && offset <= TapeBlockPayloadSize);
 	Assert(lt->buffer_size == BLCKSZ);
@@ -1152,7 +1169,7 @@ LogicalTapeTell(LogicalTapeSet *lts, int tapenum,
 	LogicalTape *lt;
 
 	Assert(tapenum >= 0 && tapenum < lts->nTapes);
-	lt = &lts->tapes[tapenum];
+	lt = &lts->tapes->tape_array[tapenum];
 
 	if (lt->buffer == NULL)
 		ltsInitReadBuffer(lts, lt);
