From d3670fd7245d01d5878b066b600f7e846fb553bb Mon Sep 17 00:00:00 2001 From: Richard Guo Date: Mon, 17 Jul 2023 14:07:39 +0800 Subject: [PATCH v2 1/2] Revise how we sort partial paths in create_ordered_paths --- src/backend/optimizer/plan/planner.c | 129 +++++++----------- src/test/regress/expected/select_parallel.out | 33 +++++ src/test/regress/sql/select_parallel.sql | 17 +++ 3 files changed, 101 insertions(+), 78 deletions(-) diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 44efb1f4eb..cf0a3fad2a 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -5192,8 +5192,9 @@ create_ordered_paths(PlannerInfo *root, * have generated order-preserving Gather Merge plans which can be used * without sorting if they happen to match the sort_pathkeys, and the loop * above will have handled those as well. However, there's one more - * possibility: it may make sense to sort the cheapest partial path - * according to the required output order and then use Gather Merge. + * possibility: it may make sense to sort the cheapest partial path or + * incrementally sort any partial path that is partially sorted according + * to the required output order and then use Gather Merge. */ if (ordered_rel->consider_parallel && root->sort_pathkeys != NIL && input_rel->partial_pathlist != NIL) @@ -5202,97 +5203,69 @@ create_ordered_paths(PlannerInfo *root, cheapest_partial_path = linitial(input_rel->partial_pathlist); - /* - * If cheapest partial path doesn't need a sort, this is redundant - * with what's already been tried. - */ - if (!pathkeys_contained_in(root->sort_pathkeys, - cheapest_partial_path->pathkeys)) + foreach(lc, input_rel->partial_pathlist) { - Path *path; + Path *input_path = (Path *) lfirst(lc); + Path *sorted_path; + bool is_sorted; + int presorted_keys; double total_groups; - path = (Path *) create_sort_path(root, - ordered_rel, - cheapest_partial_path, - root->sort_pathkeys, - limit_tuples); - - total_groups = cheapest_partial_path->rows * - cheapest_partial_path->parallel_workers; - path = (Path *) - create_gather_merge_path(root, ordered_rel, - path, - path->pathtarget, - root->sort_pathkeys, NULL, - &total_groups); - - /* Add projection step if needed */ - if (path->pathtarget != target) - path = apply_projection_to_path(root, ordered_rel, - path, target); - - add_path(ordered_rel, path); - } + is_sorted = pathkeys_count_contained_in(root->sort_pathkeys, + input_path->pathkeys, + &presorted_keys); - /* - * Consider incremental sort with a gather merge on partial paths. - * - * We can also skip the entire loop when we only have a single-item - * sort_pathkeys because then we can't possibly have a presorted - * prefix of the list without having the list be fully sorted. - */ - if (enable_incremental_sort && list_length(root->sort_pathkeys) > 1) - { - foreach(lc, input_rel->partial_pathlist) - { - Path *input_path = (Path *) lfirst(lc); - Path *sorted_path; - bool is_sorted; - int presorted_keys; - double total_groups; + /* No point in adding incremental sort on fully sorted paths. */ + if (is_sorted) + continue; - /* - * We don't care if this is the cheapest partial path - we - * can't simply skip it, because it may be partially sorted in - * which case we want to consider adding incremental sort - * (instead of full sort, which is what happens above). - */ + /* + * Try at least sorting the cheapest path and also try + * incrementally sorting any path which is partially sorted + * already (no need to deal with paths which have presorted + * keys when incremental sort is disabled unless it's the + * cheapest partial path). + */ + if (input_path != cheapest_partial_path && + (presorted_keys == 0 || !enable_incremental_sort)) + continue; - is_sorted = pathkeys_count_contained_in(root->sort_pathkeys, - input_path->pathkeys, - &presorted_keys); + /* + * We've no need to consider both a sort and incremental sort. + * We'll just do a sort if there are no presorted keys and an + * incremental sort when there are presorted keys. + */ + if (presorted_keys == 0 || !enable_incremental_sort) + sorted_path = (Path *) create_sort_path(root, + ordered_rel, + input_path, + root->sort_pathkeys, + limit_tuples); - /* No point in adding incremental sort on fully sorted paths. */ - if (is_sorted) - continue; - if (presorted_keys == 0) - continue; - /* Since we have presorted keys, consider incremental sort. */ + else sorted_path = (Path *) create_incremental_sort_path(root, ordered_rel, input_path, root->sort_pathkeys, presorted_keys, limit_tuples); - total_groups = input_path->rows * - input_path->parallel_workers; - sorted_path = (Path *) - create_gather_merge_path(root, ordered_rel, - sorted_path, - sorted_path->pathtarget, - root->sort_pathkeys, NULL, - &total_groups); - - /* Add projection step if needed */ - if (sorted_path->pathtarget != target) - sorted_path = apply_projection_to_path(root, ordered_rel, - sorted_path, target); - - add_path(ordered_rel, sorted_path); - } + total_groups = input_path->rows * + input_path->parallel_workers; + sorted_path = (Path *) + create_gather_merge_path(root, ordered_rel, + sorted_path, + sorted_path->pathtarget, + root->sort_pathkeys, NULL, + &total_groups); + + /* Add projection step if needed */ + if (sorted_path->pathtarget != target) + sorted_path = apply_projection_to_path(root, ordered_rel, + sorted_path, target); + + add_path(ordered_rel, sorted_path); } } diff --git a/src/test/regress/expected/select_parallel.out b/src/test/regress/expected/select_parallel.out index d88353d496..d82a7799e6 100644 --- a/src/test/regress/expected/select_parallel.out +++ b/src/test/regress/expected/select_parallel.out @@ -937,6 +937,39 @@ select string4 from tenk1 order by string4 limit 5; reset parallel_leader_participation; reset max_parallel_workers; +-- gather merge atop sort of partial paths +create function parallel_safe_volatile(a int) returns int as + $$ begin return a; end; $$ parallel safe volatile language plpgsql; +explain (costs off) +select * from tenk1 where four = 2 order by four, hundred, parallel_safe_volatile(thousand); + QUERY PLAN +--------------------------------------------------------------- + Gather Merge + Workers Planned: 4 + -> Sort + Sort Key: hundred, (parallel_safe_volatile(thousand)) + -> Parallel Seq Scan on tenk1 + Filter: (four = 2) +(6 rows) + +-- gather merge atop incremental sort of partial paths +set min_parallel_index_scan_size to 0; +set enable_seqscan = off; +explain (costs off) +select * from tenk1 where four = 2 order by four, hundred, parallel_safe_volatile(thousand); + QUERY PLAN +--------------------------------------------------------------- + Gather Merge + Workers Planned: 4 + -> Incremental Sort + Sort Key: hundred, (parallel_safe_volatile(thousand)) + Presorted Key: hundred + -> Parallel Index Scan using tenk1_hundred on tenk1 + Filter: (four = 2) +(7 rows) + +reset min_parallel_index_scan_size; +reset enable_seqscan; SAVEPOINT settings; SET LOCAL debug_parallel_query = 1; explain (costs off) diff --git a/src/test/regress/sql/select_parallel.sql b/src/test/regress/sql/select_parallel.sql index 80c914dc02..7cba6519cb 100644 --- a/src/test/regress/sql/select_parallel.sql +++ b/src/test/regress/sql/select_parallel.sql @@ -343,6 +343,23 @@ select string4 from tenk1 order by string4 limit 5; reset parallel_leader_participation; reset max_parallel_workers; +-- gather merge atop sort of partial paths +create function parallel_safe_volatile(a int) returns int as + $$ begin return a; end; $$ parallel safe volatile language plpgsql; + +explain (costs off) +select * from tenk1 where four = 2 order by four, hundred, parallel_safe_volatile(thousand); + +-- gather merge atop incremental sort of partial paths +set min_parallel_index_scan_size to 0; +set enable_seqscan = off; + +explain (costs off) +select * from tenk1 where four = 2 order by four, hundred, parallel_safe_volatile(thousand); + +reset min_parallel_index_scan_size; +reset enable_seqscan; + SAVEPOINT settings; SET LOCAL debug_parallel_query = 1; explain (costs off) -- 2.31.0