From 2981406dbf5bb738273b3ff4c26854ce7d715dd1 Mon Sep 17 00:00:00 2001 From: ChangAo Chen Date: Wed, 3 Dec 2025 18:58:28 +0800 Subject: [PATCH v1] Support loser tree for k-way merge. The loser tree usually has fewer comparisons than the heap during k-way merge. --- src/backend/utils/misc/guc_parameters.dat | 7 + src/backend/utils/sort/tuplesort.c | 160 +++++++++++++++++++++- src/include/utils/guc.h | 1 + 3 files changed, 167 insertions(+), 1 deletion(-) diff --git a/src/backend/utils/misc/guc_parameters.dat b/src/backend/utils/misc/guc_parameters.dat index 3b9d8349078..123ece8ea75 100644 --- a/src/backend/utils/misc/guc_parameters.dat +++ b/src/backend/utils/misc/guc_parameters.dat @@ -882,6 +882,13 @@ boot_val => 'true', }, +{ name => 'enable_loser_tree', type => 'bool', context => 'PGC_USERSET', group => 'QUERY_TUNING_METHOD', + short_desc => 'Enables loser tree for k-way merge.', + flags => 'GUC_NOT_IN_SAMPLE', + variable => 'enable_loser_tree', + boot_val => 'false', +}, + { name => 'enable_material', type => 'bool', context => 'PGC_USERSET', group => 'QUERY_TUNING_METHOD', short_desc => 'Enables the planner\'s use of materialization.', flags => 'GUC_EXPLAIN', diff --git a/src/backend/utils/sort/tuplesort.c b/src/backend/utils/sort/tuplesort.c index 5d4411dc33f..381f5909094 100644 --- a/src/backend/utils/sort/tuplesort.c +++ b/src/backend/utils/sort/tuplesort.c @@ -122,6 +122,9 @@ /* GUC variables */ bool trace_sort = false; +bool enable_loser_tree = false; + +#define LOSER_TREE_EOF -1 #ifdef DEBUG_BOUNDED_SORT bool optimize_bounded_sort = true; @@ -219,6 +222,9 @@ struct Tuplesortstate int memtupsize; /* allocated length of memtuples array */ bool growmemtuples; /* memtuples' growth still underway? */ + bool useLoserTree; /* use loser tree for k-way merge if true */ + int *losers; /* array of losers, losers[0] is the winner */ + /* * Memory for tuples is sometimes allocated using a simple slab allocator, * rather than with palloc(). Currently, we switch to slab allocation @@ -468,6 +474,8 @@ static void tuplesort_sort_memtuples(Tuplesortstate *state); static void tuplesort_heap_insert(Tuplesortstate *state, SortTuple *tuple); static void tuplesort_heap_replace_top(Tuplesortstate *state, SortTuple *tuple); static void tuplesort_heap_delete_top(Tuplesortstate *state); +static void tuplesort_loser_tree_build(Tuplesortstate *state); +static void tuplesort_loser_tree_adjust(Tuplesortstate *state, int start); static void reversedirection(Tuplesortstate *state); static unsigned int getlen(LogicalTape *tape, bool eofOK); static void markrunend(LogicalTape *tape); @@ -681,6 +689,8 @@ tuplesort_begin_common(int workMem, SortCoordinate coordinate, int sortopt) state->base.sortopt = sortopt; state->base.tuples = true; state->abbrevNext = 10; + state->useLoserTree = enable_loser_tree; + state->losers = NULL; /* * workMem is forced to be at least 64KB, the current minimum valid value @@ -1647,6 +1657,37 @@ tuplesort_gettuple_common(Tuplesortstate *state, bool forward, state->lastReturnedTuple = NULL; } + if (state->useLoserTree && state->memtupcount > 0) + { + int i = state->losers[0]; + int start = i + state->memtupcount; + int srcTapeIndex = state->memtuples[i].srctape; + LogicalTape *srcTape = state->inputTapes[srcTapeIndex]; + SortTuple newtup; + + *stup = state->memtuples[i]; + + state->lastReturnedTuple = stup->tuple; + + if (mergereadnext(state, srcTape, &newtup)) + { + newtup.srctape = srcTapeIndex; + state->memtuples[i] = newtup; + } + else + { + state->losers[start] = LOSER_TREE_EOF; + state->nInputRuns--; + LogicalTapeClose(srcTape); + } + tuplesort_loser_tree_adjust(state, start); + + if (state->losers[0] == LOSER_TREE_EOF) + state->memtupcount = 0; + + return true; + } + /* * This code should match the inner loop of mergeonerun(). */ @@ -2206,6 +2247,41 @@ mergeonerun(Tuplesortstate *state) Assert(state->slabAllocatorUsed); + if (state->useLoserTree && state->memtupcount > 0) + { + while (state->losers[0] != LOSER_TREE_EOF) + { + int i = state->losers[0]; + int start = i + state->memtupcount; + SortTuple stup; + + /* write the tuple to destTape */ + srcTapeIndex = state->memtuples[i].srctape; + srcTape = state->inputTapes[srcTapeIndex]; + WRITETUP(state, state->destTape, &state->memtuples[i]); + + /* recycle the slot of the tuple we just wrote out, for the next read */ + if (state->memtuples[i].tuple) + RELEASE_SLAB_SLOT(state, state->memtuples[i].tuple); + + if (mergereadnext(state, srcTape, &stup)) + { + stup.srctape = srcTapeIndex; + state->memtuples[i] = stup; + } + else + { + state->losers[start] = LOSER_TREE_EOF; + state->nInputRuns--; + } + + tuplesort_loser_tree_adjust(state, start); + } + state->memtupcount = 0; + markrunend(state->destTape); + return; + } + /* * Execute merge by repeatedly extracting lowest tuple in heap, writing it * out, and replacing it with next tuple from same tape (if there is @@ -2270,9 +2346,16 @@ beginmerge(Tuplesortstate *state) if (mergereadnext(state, state->inputTapes[srcTapeIndex], &tup)) { tup.srctape = srcTapeIndex; - tuplesort_heap_insert(state, &tup); + + if (state->useLoserTree) + state->memtuples[state->memtupcount++] = tup; + else + tuplesort_heap_insert(state, &tup); } } + + if (state->useLoserTree && state->memtupcount > 0) + tuplesort_loser_tree_build(state); } /* @@ -2823,6 +2906,81 @@ tuplesort_heap_replace_top(Tuplesortstate *state, SortTuple *tuple) memtuples[i] = *tuple; } +static void +tuplesort_loser_tree_build(Tuplesortstate *state) +{ + int k = state->memtupcount; /* k-way */ + int winner[MAXORDER * 2]; + int i; + + Assert(k > 0 && k <= MAXORDER); + + if (state->losers == NULL) + state->losers = (int *) MemoryContextAlloc(state->base.maincontext, + MAXORDER * 2 * sizeof(int)); + + for (i = k; i < 2 * k; i++) + { + winner[i] = i - k; + state->losers[i] = i - k; + } + + for (i = k - 1; i >= 1; i--) + { + int l = i * 2; + int r = l + 1; + + if (COMPARETUP(state, + &state->memtuples[winner[l]], + &state->memtuples[winner[r]]) < 0) + { + winner[i] = winner[l]; + state->losers[i] = winner[r]; + } + else + { + winner[i] = winner[r]; + state->losers[i] = winner[l]; + } + } + + /* set the final winner to state->losers[0] */ + state->losers[0] = winner[1]; +} + +static void +tuplesort_loser_tree_adjust(Tuplesortstate *state, int start) +{ + int winner; + int parent; + + Assert(state->losers != NULL); + Assert(start >= state->memtupcount && start < state->memtupcount * 2); + + winner = state->losers[start]; + + for (parent = start / 2; parent > 0; parent /= 2) + { + int loser = state->losers[parent]; + + /* eof is always the loser */ + if (loser == LOSER_TREE_EOF) + continue; + + if (winner == LOSER_TREE_EOF || + COMPARETUP(state, + &state->memtuples[winner], + &state->memtuples[loser]) > 0) + { + state->losers[parent] = winner; + winner = loser; + } + } + + /* set the final winner to state->losers[0] */ + state->losers[0] = winner; +} + /* * Function to reverse the sort direction from its current state * diff --git a/src/include/utils/guc.h b/src/include/utils/guc.h index f21ec37da89..8c22eed716b 100644 --- a/src/include/utils/guc.h +++ b/src/include/utils/guc.h @@ -324,6 +324,7 @@ extern PGDLLIMPORT int tcp_user_timeout; extern PGDLLIMPORT char *role_string; extern PGDLLIMPORT bool in_hot_standby_guc; extern PGDLLIMPORT bool trace_sort; +extern PGDLLIMPORT bool enable_loser_tree; #ifdef DEBUG_BOUNDED_SORT extern PGDLLIMPORT bool optimize_bounded_sort; -- 2.34.1