From ae698d22b40a665b4048f5db61b3f791be0d246e Mon Sep 17 00:00:00 2001 From: Ilia Evdokimov Date: Tue, 17 Mar 2026 13:40:57 +0300 Subject: [PATCH v8] Reduce planning time for large NOT IN lists containing NULL For x <> ALL (...), the presence of a NULL makes the selectivity 0.0. The planner currently still iterates over all elements and computes per-element selectivity, even though the final result is known. Add an early NULL check for constant arrays and immediately return 0.0 under ALL semantics. This reduces planning time for large <> ALL lists without changing semantics. --- src/backend/utils/adt/selfuncs.c | 17 +++++++++++++ src/test/regress/expected/planner_est.out | 30 +++++++++++++++++++++++ src/test/regress/sql/planner_est.sql | 22 +++++++++++++++++ 3 files changed, 69 insertions(+) diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index d4da0e8dea9..073d93f4f3d 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -2018,6 +2018,15 @@ scalararraysel(PlannerInfo *root, if (arrayisnull) /* qual can't succeed if null array */ return (Selectivity) 0.0; arrayval = DatumGetArrayTypeP(arraydatum); + + /* + * For ALL semantics, if the array contains NULL, assume operator is + * strict. The ScalarArrayOpExpr cannot evaluate to TRUE, so return + * zero. + */ + if (!useOr && array_contains_nulls(arrayval)) + return (Selectivity) 0.0; + get_typlenbyvalalign(ARR_ELEMTYPE(arrayval), &elmlen, &elmbyval, &elmalign); deconstruct_array(arrayval, @@ -2115,6 +2124,14 @@ scalararraysel(PlannerInfo *root, List *args; Selectivity s2; + /* + * For ALL semantics, if the array contains NULL, assume operator + * is strict. The ScalarArrayOpExpr cannot evaluate to TRUE, so + * return zero. + */ + if (!useOr && IsA(elem, Const) && ((Const *) elem)->constisnull) + return (Selectivity) 0.0; + /* * Theoretically, if elem isn't of nominal_element_type we should * insert a RelabelType, but it seems unlikely that any operator diff --git a/src/test/regress/expected/planner_est.out b/src/test/regress/expected/planner_est.out index 3a47061800a..7718197283b 100644 --- a/src/test/regress/expected/planner_est.out +++ b/src/test/regress/expected/planner_est.out @@ -183,4 +183,34 @@ false, true, false, true); Function Scan on generate_series g (cost=N..N rows=1000 width=N) (1 row) +-- +-- Test <> ALL when array initially contained NULL but no longer does +-- +CREATE FUNCTION replace_elem(arr int[], idx int, val int) +RETURNS int[] AS $$ +BEGIN + arr[idx] := val; + RETURN arr; +end; +$$ language plpgsql IMMUTABLE; +SELECT explain_mask_costs( + 'SELECT * FROM tenk1 WHERE unique1 <> ALL(ARRAY[1,99,3])', +false, true, false, true ); + explain_mask_costs +---------------------------------------------------- + Seq Scan on tenk1 (cost=N..N rows=9997 width=N) + Filter: (unique1 <> ALL ('{1,99,3}'::integer[])) +(2 rows) + +-- same array, constructed from an array with a NULL +SELECT explain_mask_costs( + 'SELECT * FROM tenk1 WHERE unique1 <> ALL(replace_elem(ARRAY[1,NULL,3], 2, 99))', +false, true, false, true ); + explain_mask_costs +---------------------------------------------------- + Seq Scan on tenk1 (cost=N..N rows=9997 width=N) + Filter: (unique1 <> ALL ('{1,99,3}'::integer[])) +(2 rows) + +DROP FUNCTION replace_elem; DROP FUNCTION explain_mask_costs(text, bool, bool, bool, bool); diff --git a/src/test/regress/sql/planner_est.sql b/src/test/regress/sql/planner_est.sql index 47d5ae679c7..391ec42a6e7 100644 --- a/src/test/regress/sql/planner_est.sql +++ b/src/test/regress/sql/planner_est.sql @@ -131,5 +131,27 @@ SELECT explain_mask_costs($$ SELECT * FROM generate_series(25.0, 2.0, 0.0) g(s);$$, false, true, false, true); +-- +-- Test <> ALL when array initially contained NULL but no longer does +-- +CREATE FUNCTION replace_elem(arr int[], idx int, val int) +RETURNS int[] AS $$ +BEGIN + arr[idx] := val; + RETURN arr; +end; +$$ language plpgsql IMMUTABLE; + +SELECT explain_mask_costs( + 'SELECT * FROM tenk1 WHERE unique1 <> ALL(ARRAY[1,99,3])', +false, true, false, true ); + +-- same array, constructed from an array with a NULL +SELECT explain_mask_costs( + 'SELECT * FROM tenk1 WHERE unique1 <> ALL(replace_elem(ARRAY[1,NULL,3], 2, 99))', +false, true, false, true ); + +DROP FUNCTION replace_elem; + DROP FUNCTION explain_mask_costs(text, bool, bool, bool, bool); -- 2.51.0