From df834d3dca34a0ce3146d87108301b74c9e306c9 Mon Sep 17 00:00:00 2001 From: David Rowley Date: Sun, 19 Apr 2026 15:24:39 +1200 Subject: [PATCH v2] Fix incorrect logic for hashed IN / NOT IN with non-strict operators This fixes the incorrect logic for hashed IN and NOT IN clauses which triggers when there are 9 or more constant elements in the clause and the datatype supports hashing. When the value being looked up was NULL, in some cases it was possible that the hashed version evaluated to false rather than NULL, and in much more rare cases, true instead of NULL. The former technically wouldn't cause an issue for an IN or NOT IN in a WHERE clause as false and NULL mean the same thing there, however, the latter could cause rows to match the WHERE clause that shouldn't. Since all built-in operators are strict, this could only affect custom types. Reported-by: Chengpeng Yan Discussion: https://postgr.es/m/A16187AE-2359-4265-9F5E-71D015EC2B2D@outlook.com --- src/backend/executor/execExprInterp.c | 109 ++++++++++------- src/test/regress/expected/expressions.out | 141 ++++++++++++++++------ src/test/regress/sql/expressions.sql | 75 ++++++++++-- 3 files changed, 236 insertions(+), 89 deletions(-) diff --git a/src/backend/executor/execExprInterp.c b/src/backend/executor/execExprInterp.c index 3c4843cde86..ff561df4e1f 100644 --- a/src/backend/executor/execExprInterp.c +++ b/src/backend/executor/execExprInterp.c @@ -4246,9 +4246,10 @@ ExecEvalHashedScalarArrayOp(ExprState *state, ExprEvalStep *op, ExprContext *eco * If the scalar is NULL, and the function is strict, return NULL; no * point in executing the search. */ - if (fcinfo->args[0].isnull && strictfunc) + if (scalar_isnull && strictfunc) { *op->resnull = true; + *op->resvalue = (Datum) 0; return; } @@ -4348,59 +4349,79 @@ ExecEvalHashedScalarArrayOp(ExprState *state, ExprEvalStep *op, ExprContext *eco op->d.hashedscalararrayop.has_nulls = has_nulls; } - /* Check the hash to see if we have a match. */ - hashfound = NULL != saophash_lookup(elements_tab->hashtab, scalar); - - /* the result depends on if the clause is an IN or NOT IN clause */ - if (inclause) - result = BoolGetDatum(hashfound); /* IN */ - else - result = BoolGetDatum(!hashfound); /* NOT IN */ - - resultnull = false; - - /* - * If we didn't find a match in the array, we still might need to handle - * the possibility of null values. We didn't put any NULLs into the - * hashtable, but instead marked if we found any when building the table - * in has_nulls. - */ - if (!hashfound && op->d.hashedscalararrayop.has_nulls) + if (!scalar_isnull) { - if (strictfunc) - { + /* perform hash lookup on the non-NULL value */ + hashfound = NULL != saophash_lookup(elements_tab->hashtab, scalar); - /* - * We have nulls in the array so a non-null lhs and no match must - * yield NULL. - */ + /* + * If we didn't find the value and the array contains a NULL, the + * result is NULL + */ + if (!hashfound && op->d.hashedscalararrayop.has_nulls) + { result = (Datum) 0; resultnull = true; } else { - /* - * Execute function will null rhs just once. - * - * The hash lookup path will have scribbled on the lhs argument so - * we need to set it up also (even though we entered this function - * with it already set). - */ - fcinfo->args[0].value = scalar; - fcinfo->args[0].isnull = scalar_isnull; - fcinfo->args[1].value = (Datum) 0; - fcinfo->args[1].isnull = true; + /* the result depends on if the clause is an IN or NOT IN clause */ + if (inclause) + result = BoolGetDatum(hashfound); /* IN */ + else + result = BoolGetDatum(!hashfound); /* NOT IN */ - result = op->d.hashedscalararrayop.finfo->fn_addr(fcinfo); - resultnull = fcinfo->isnull; + resultnull = false; + } + } + else if (op->d.hashedscalararrayop.has_nulls) + { + /* + * The short-circuit earlier means we should never get here with a + * NULL LHS and a strict function. + */ + Assert(!strictfunc); - /* - * Reverse the result for NOT IN clauses since the above function - * is the equality function and we need not-equals. - */ - if (!inclause) - result = BoolGetDatum(!DatumGetBool(result)); + /* + * We have non-strict operator, a NULL LHS and the array contains a + * NULL. Call the function to see if it classes NULLs as equal. + */ + fcinfo->args[0].value = (Datum) 0; + fcinfo->args[0].isnull = true; + fcinfo->args[1].value = (Datum) 0; + fcinfo->args[1].isnull = true; + + result = op->d.hashedscalararrayop.finfo->fn_addr(fcinfo); + resultnull = fcinfo->isnull; + + /* + * If the non-strict function returned false for NULL = NULL then we + * must still consider other array members. If any non-NULL elements + * exists then NULL = any-non-NULL must yield NULL even with a + * non-strict function. Here we're emulating "FALSE OR NULL" for IN + * and "TRUE AND NULL" for NOT IN, both of which yield NULL. We don't + * need to handle NOT IN specifically here as we reverse the result of + * that below. + */ + if (!resultnull && !DatumGetBool(result) && + op->d.hashedscalararrayop.elements_tab->hashtab->members > 0) + { + result = (Datum) 0; + resultnull = true; } + + /* + * Reverse the result for NOT IN clauses since the above function is + * the equality function and we need not-equals. + */ + else if (!inclause) + result = BoolGetDatum(!DatumGetBool(result)); + } + else + { + /* NULL LHS and array has no NULLs. NULL result */ + result = (Datum) 0; + resultnull = true; } *op->resvalue = result; diff --git a/src/test/regress/expected/expressions.out b/src/test/regress/expected/expressions.out index 9a3c97b15a3..cbfa3f5a1cc 100644 --- a/src/test/regress/expected/expressions.out +++ b/src/test/regress/expected/expressions.out @@ -387,42 +387,115 @@ default for type myint using hash as operator 1 = (myint, myint), function 1 myinthash(myint); create table inttest (a myint); -insert into inttest values(1::myint),(null); --- try an array with enough elements to cause hashing -select * from inttest where a in (1::myint,2::myint,3::myint,4::myint,5::myint,6::myint,7::myint,8::myint,9::myint, null); - a ---- - 1 - -(2 rows) +insert into inttest values (1::myint), (2::myint), (null); +-- Test EEOP_HASHED_SCALARARRAYOP against EEOP_SCALARARRAYOP. Ensure the +-- result of non-hashed vs hashed is the same. +select + a, + a in (1::myint,2::myint,3::myint,4::myint,5::myint,6::myint,7::myint,8::myint) as not_hashed, + a in (1::myint,2::myint,3::myint,4::myint,5::myint,6::myint,7::myint,8::myint,9::myint) as hashed +from inttest; + a | not_hashed | hashed +---+------------+-------- + 1 | t | t + 2 | t | t + | | +(3 rows) -select * from inttest where a not in (1::myint,2::myint,3::myint,4::myint,5::myint,6::myint,7::myint,8::myint,9::myint, null); - a ---- -(0 rows) - -select * from inttest where a not in (0::myint,2::myint,3::myint,4::myint,5::myint,6::myint,7::myint,8::myint,9::myint, null); - a ---- -(0 rows) - --- ensure the result matched with the non-hashed version. We simply remove --- some array elements so that we don't reach the hashing threshold. -select * from inttest where a in (1::myint,2::myint,3::myint,4::myint,5::myint, null); - a ---- - 1 - -(2 rows) +select + a, + a in (null::myint,2::myint,3::myint,4::myint,5::myint,6::myint,7::myint,8::myint) as not_hashed, + a in (null::myint,2::myint,3::myint,4::myint,5::myint,6::myint,7::myint,8::myint,9::myint) as hashed + from inttest; + a | not_hashed | hashed +---+------------+-------- + 1 | | + 2 | t | t + | t | t +(3 rows) -select * from inttest where a not in (1::myint,2::myint,3::myint,4::myint,5::myint, null); - a ---- -(0 rows) +select + a, + a not in (1::myint,2::myint,3::myint,4::myint,5::myint,6::myint,7::myint,8::myint) as not_hashed, + a not in (1::myint,2::myint,3::myint,4::myint,5::myint,6::myint,7::myint,8::myint,9::myint) as hashed +from inttest; + a | not_hashed | hashed +---+------------+-------- + 1 | f | f + 2 | f | f + | | +(3 rows) + +select + a, + a not in (null::myint,2::myint,3::myint,4::myint,5::myint,6::myint,7::myint,8::myint) as not_hashed, + a not in (null::myint,2::myint,3::myint,4::myint,5::myint,6::myint,7::myint,8::myint,9::myint) as hashed +from inttest; + a | not_hashed | hashed +---+------------+-------- + 1 | | + 2 | f | f + | f | f +(3 rows) -select * from inttest where a not in (0::myint,2::myint,3::myint,4::myint,5::myint, null); - a ---- -(0 rows) +-- Now make the equal function return false when given two NULLs +create or replace function myinteq(myint, myint) returns bool as $$ +begin + if $1 is null and $2 is null then + return false; + else + return $1::int = $2::int; + end if; +end; +$$ language plpgsql immutable; +-- And try the same again to ensure EEOP_HASHED_SCALARARRAYOP does the same +-- thing as EEOP_SCALARARRAYOP. +select + a, + a in (1::myint,2::myint,3::myint,4::myint,5::myint,6::myint,7::myint,8::myint) as not_hashed, + a in (1::myint,2::myint,3::myint,4::myint,5::myint,6::myint,7::myint,8::myint,9::myint) as hashed +from inttest; + a | not_hashed | hashed +---+------------+-------- + 1 | t | t + 2 | t | t + | | +(3 rows) + +select + a, + a in (null::myint,2::myint,3::myint,4::myint,5::myint,6::myint,7::myint,8::myint) as not_hashed, + a in (null::myint,2::myint,3::myint,4::myint,5::myint,6::myint,7::myint,8::myint,9::myint) as hashed + from inttest; + a | not_hashed | hashed +---+------------+-------- + 1 | | + 2 | t | t + | | +(3 rows) + +select + a, + a not in (1::myint,2::myint,3::myint,4::myint,5::myint,6::myint,7::myint,8::myint) as not_hashed, + a not in (1::myint,2::myint,3::myint,4::myint,5::myint,6::myint,7::myint,8::myint,9::myint) as hashed +from inttest; + a | not_hashed | hashed +---+------------+-------- + 1 | f | f + 2 | f | f + | | +(3 rows) + +select + a, + a not in (null::myint,2::myint,3::myint,4::myint,5::myint,6::myint,7::myint,8::myint) as not_hashed, + a not in (null::myint,2::myint,3::myint,4::myint,5::myint,6::myint,7::myint,8::myint,9::myint) as hashed +from inttest; + a | not_hashed | hashed +---+------------+-------- + 1 | | + 2 | f | f + | | +(3 rows) rollback; diff --git a/src/test/regress/sql/expressions.sql b/src/test/regress/sql/expressions.sql index e02c21f3368..9c362b10365 100644 --- a/src/test/regress/sql/expressions.sql +++ b/src/test/regress/sql/expressions.sql @@ -196,16 +196,69 @@ default for type myint using hash as function 1 myinthash(myint); create table inttest (a myint); -insert into inttest values(1::myint),(null); - --- try an array with enough elements to cause hashing -select * from inttest where a in (1::myint,2::myint,3::myint,4::myint,5::myint,6::myint,7::myint,8::myint,9::myint, null); -select * from inttest where a not in (1::myint,2::myint,3::myint,4::myint,5::myint,6::myint,7::myint,8::myint,9::myint, null); -select * from inttest where a not in (0::myint,2::myint,3::myint,4::myint,5::myint,6::myint,7::myint,8::myint,9::myint, null); --- ensure the result matched with the non-hashed version. We simply remove --- some array elements so that we don't reach the hashing threshold. -select * from inttest where a in (1::myint,2::myint,3::myint,4::myint,5::myint, null); -select * from inttest where a not in (1::myint,2::myint,3::myint,4::myint,5::myint, null); -select * from inttest where a not in (0::myint,2::myint,3::myint,4::myint,5::myint, null); +insert into inttest values (1::myint), (2::myint), (null); + +-- Test EEOP_HASHED_SCALARARRAYOP against EEOP_SCALARARRAYOP. Ensure the +-- result of non-hashed vs hashed is the same. +select + a, + a in (1::myint,2::myint,3::myint,4::myint,5::myint,6::myint,7::myint,8::myint) as not_hashed, + a in (1::myint,2::myint,3::myint,4::myint,5::myint,6::myint,7::myint,8::myint,9::myint) as hashed +from inttest; + +select + a, + a in (null::myint,2::myint,3::myint,4::myint,5::myint,6::myint,7::myint,8::myint) as not_hashed, + a in (null::myint,2::myint,3::myint,4::myint,5::myint,6::myint,7::myint,8::myint,9::myint) as hashed + from inttest; + +select + a, + a not in (1::myint,2::myint,3::myint,4::myint,5::myint,6::myint,7::myint,8::myint) as not_hashed, + a not in (1::myint,2::myint,3::myint,4::myint,5::myint,6::myint,7::myint,8::myint,9::myint) as hashed +from inttest; + +select + a, + a not in (null::myint,2::myint,3::myint,4::myint,5::myint,6::myint,7::myint,8::myint) as not_hashed, + a not in (null::myint,2::myint,3::myint,4::myint,5::myint,6::myint,7::myint,8::myint,9::myint) as hashed +from inttest; + +-- Now make the equal function return false when given two NULLs +create or replace function myinteq(myint, myint) returns bool as $$ +begin + if $1 is null and $2 is null then + return false; + else + return $1::int = $2::int; + end if; +end; +$$ language plpgsql immutable; + +-- And try the same again to ensure EEOP_HASHED_SCALARARRAYOP does the same +-- thing as EEOP_SCALARARRAYOP. +select + a, + a in (1::myint,2::myint,3::myint,4::myint,5::myint,6::myint,7::myint,8::myint) as not_hashed, + a in (1::myint,2::myint,3::myint,4::myint,5::myint,6::myint,7::myint,8::myint,9::myint) as hashed +from inttest; + +select + a, + a in (null::myint,2::myint,3::myint,4::myint,5::myint,6::myint,7::myint,8::myint) as not_hashed, + a in (null::myint,2::myint,3::myint,4::myint,5::myint,6::myint,7::myint,8::myint,9::myint) as hashed + from inttest; + +select + a, + a not in (1::myint,2::myint,3::myint,4::myint,5::myint,6::myint,7::myint,8::myint) as not_hashed, + a not in (1::myint,2::myint,3::myint,4::myint,5::myint,6::myint,7::myint,8::myint,9::myint) as hashed +from inttest; + +select + a, + a not in (null::myint,2::myint,3::myint,4::myint,5::myint,6::myint,7::myint,8::myint) as not_hashed, + a not in (null::myint,2::myint,3::myint,4::myint,5::myint,6::myint,7::myint,8::myint,9::myint) as hashed +from inttest; rollback; -- 2.51.0