From 40536a80563b5b12e9123989af3d94c264a824ee Mon Sep 17 00:00:00 2001 From: Matthias van de Meent Date: Wed, 29 Nov 2023 15:09:17 +0100 Subject: [PATCH v5 2/2] Reduce de-/forming of BRIN tuples in parallel BRIN build De-/forming of the BRIN tuple is actually quite expensive, so in this commit we introduce a mechanism where we don't do that if only one BRIN tuple is stored in the shared sort by moving the serialized tuple into the local context, and only deserialize it once we need to use the memtuple. --- src/backend/access/brin/brin.c | 58 ++++++++++++++++++++++++++-------- 1 file changed, 44 insertions(+), 14 deletions(-) diff --git a/src/backend/access/brin/brin.c b/src/backend/access/brin/brin.c index 001cf04aac..909074d430 100644 --- a/src/backend/access/brin/brin.c +++ b/src/backend/access/brin/brin.c @@ -2471,6 +2471,7 @@ _brin_end_parallel(BrinLeader *brinleader, BrinBuildState *state) int i; BrinTuple *btup; BrinMemTuple *memtuple = NULL; + BrinMemTuple *memtup_holder = NULL; Size tuplen; BrinShared *brinshared = brinleader->brinshared; BlockNumber prevblkno = InvalidBlockNumber; @@ -2479,6 +2480,8 @@ _brin_end_parallel(BrinLeader *brinleader, BrinBuildState *state) BrinSpool *spool; MemoryContext rangeCxt, oldCxt; + BrinTuple *prevbtup; + Size prevtuplen; /* Shutdown worker processes */ WaitForParallelWorkersToFinish(brinleader->pcxt); @@ -2498,7 +2501,7 @@ _brin_end_parallel(BrinLeader *brinleader, BrinBuildState *state) * Initialize BrinMemTuple we'll use to union summaries from workers (in * case they happened to produce parts of the same paga range). */ - memtuple = brin_new_memtuple(state->bs_bdesc); + memtup_holder = brin_new_memtuple(state->bs_bdesc); /* * Create a memory context we'll reset to combine results for a single @@ -2542,17 +2545,25 @@ _brin_end_parallel(BrinLeader *brinleader, BrinBuildState *state) */ if (prevblkno == InvalidBlockNumber) { - /* First brin tuples, just deform into memtuple. */ - memtuple = brin_deform_tuple(state->bs_bdesc, btup, memtuple); + /* First brin tuple, store it in the local context. */ + prevbtup = palloc0(tuplen); + memcpy(prevbtup, btup, tuplen); + prevtuplen = tuplen; /* continue to insert empty pages before thisblock */ } - else if (memtuple->bt_blkno == btup->bt_blkno) + else if (prevbtup->bt_blkno == btup->bt_blkno) { /* * Not the first brin tuple, but same page range as the previous - * one, so we can merge it into the memtuple. + * one, so we can merge it into the memtuple. If this is the first + * merge for this block number, we still have to deform the + * original tuple before we can merge them. */ + if (memtuple == NULL) + memtuple = brin_deform_tuple(state->bs_bdesc, btup, + memtup_holder); + union_tuples(state->bs_bdesc, memtuple, btup); continue; } @@ -2563,23 +2574,36 @@ _brin_end_parallel(BrinLeader *brinleader, BrinBuildState *state) /* * We got brin tuple for a different page range, so form a brin - * tuple from the memtuple, insert it, and re-init the memtuple - * from the new brin tuple. + * tuple from the memtuple (if required), insert it, and store a + * copy of the new tuple for future use. */ - tmp = brin_form_tuple(state->bs_bdesc, memtuple->bt_blkno, - memtuple, &len); + if (memtuple != NULL) + tmp = brin_form_tuple(state->bs_bdesc, memtuple->bt_blkno, + memtuple, &len); + else + { + tmp = prevbtup; + len = prevtuplen; + } brin_doinsert(state->bs_irel, state->bs_pagesPerRange, state->bs_rmAccess, &state->bs_currentInsertBuf, tmp->bt_blkno, tmp, len); /* * Reset the per-output-range context. This frees all the memory - * possibly allocated by the union functions, and also the BRIN - * tuple we just formed and inserted. + * possibly allocated by the union functions, the previous cached + * btuple, and the btup we just may have formed from the memtuple. */ MemoryContextReset(rangeCxt); - memtuple = brin_deform_tuple(state->bs_bdesc, btup, memtuple); + /* + * Prepare for the next iteration by storing the new range's tuple + * in the current context, and resetting memtuple. + */ + prevbtup = palloc0(tuplen); + memcpy(prevbtup, btup, tuplen); + prevtuplen = tuplen; + memtuple = NULL; /* continue to insert empty pages before thisblock */ } @@ -2645,8 +2669,14 @@ _brin_end_parallel(BrinLeader *brinleader, BrinBuildState *state) BrinTuple *tmp; Size len; - tmp = brin_form_tuple(state->bs_bdesc, memtuple->bt_blkno, - memtuple, &len); + if (memtuple != NULL) + tmp = brin_form_tuple(state->bs_bdesc, memtuple->bt_blkno, + memtuple, &len); + else + { + tmp = prevbtup; + len = prevtuplen; + } brin_doinsert(state->bs_irel, state->bs_pagesPerRange, state->bs_rmAccess, &state->bs_currentInsertBuf, tmp->bt_blkno, tmp, len); -- 2.40.1