From 35c84ffbbc0fe7b09c7edea7570ff21963821c07 Mon Sep 17 00:00:00 2001 From: David Rowley Date: Sun, 19 Apr 2026 15:24:39 +1200 Subject: [PATCH v3] Fix incorrect logic for hashed IN / NOT IN with non-strict operators This fixes the incorrect logic for hashed IN and NOT IN clauses which triggers when there are 9 or more constant elements in the clause and the datatype supports hashing. When the value being looked up was NULL, in some cases it was possible that the hashed version evaluated to false rather than NULL, and in much more rare cases, true instead of NULL. The former technically wouldn't cause an issue for an IN or NOT IN in a WHERE clause as false and NULL mean the same thing there, however, the latter could cause rows to match the WHERE clause that shouldn't. Since all built-in operators are strict, this could only affect custom types. Reported-by: Chengpeng Yan Discussion: https://postgr.es/m/A16187AE-2359-4265-9F5E-71D015EC2B2D@outlook.com --- src/backend/executor/execExprInterp.c | 161 ++++++++++++----- src/include/executor/execExpr.h | 4 + src/test/regress/expected/expressions.out | 203 ++++++++++++++++++---- src/test/regress/sql/expressions.sql | 114 ++++++++++-- 4 files changed, 395 insertions(+), 87 deletions(-) diff --git a/src/backend/executor/execExprInterp.c b/src/backend/executor/execExprInterp.c index 3c4843cde86..d7c61b5bd06 100644 --- a/src/backend/executor/execExprInterp.c +++ b/src/backend/executor/execExprInterp.c @@ -4246,9 +4246,10 @@ ExecEvalHashedScalarArrayOp(ExprState *state, ExprEvalStep *op, ExprContext *eco * If the scalar is NULL, and the function is strict, return NULL; no * point in executing the search. */ - if (fcinfo->args[0].isnull && strictfunc) + if (scalar_isnull && strictfunc) { *op->resnull = true; + *op->resvalue = (Datum) 0; return; } @@ -4267,6 +4268,7 @@ ExecEvalHashedScalarArrayOp(ExprState *state, ExprEvalStep *op, ExprContext *eco int bitmask; MemoryContext oldcontext; ArrayType *arr; + bool checked_null_eq_nonnull = strictfunc; saop = op->d.hashedscalararrayop.saop; @@ -4327,6 +4329,23 @@ ExecEvalHashedScalarArrayOp(ExprState *state, ExprEvalStep *op, ExprContext *eco s = (char *) att_nominal_alignby(s, typalignby); saophash_insert(elements_tab->hashtab, element, &hashfound); + + /* + * For non-strict functions, check what NULL = non-NULL does. + * We do assume that this will be the same for every Datum + * value. + */ + if (!checked_null_eq_nonnull) + { + fcinfo->args[0].value = (Datum) 0; + fcinfo->args[0].isnull = true; + fcinfo->args[1].value = element; + fcinfo->args[1].isnull = false; + + op->d.hashedscalararrayop.null_eq_nonnull = DatumGetBool(op->d.hashedscalararrayop.finfo->fn_addr(fcinfo)); + op->d.hashedscalararrayop.null_eq_nonnull_isnull = fcinfo->isnull; + checked_null_eq_nonnull = true; + } } /* Advance bitmap pointer if any. */ @@ -4346,61 +4365,119 @@ ExecEvalHashedScalarArrayOp(ExprState *state, ExprEvalStep *op, ExprContext *eco * non-strict functions with a null lhs value if no match is found. */ op->d.hashedscalararrayop.has_nulls = has_nulls; - } - /* Check the hash to see if we have a match. */ - hashfound = NULL != saophash_lookup(elements_tab->hashtab, scalar); + if (has_nulls && !strictfunc) + { + fcinfo->args[0].value = (Datum) 0; + fcinfo->args[0].isnull = true; + fcinfo->args[1].value = (Datum) 0; + fcinfo->args[1].isnull = true; - /* the result depends on if the clause is an IN or NOT IN clause */ - if (inclause) - result = BoolGetDatum(hashfound); /* IN */ - else - result = BoolGetDatum(!hashfound); /* NOT IN */ + op->d.hashedscalararrayop.null_eq_null = DatumGetBool(op->d.hashedscalararrayop.finfo->fn_addr(fcinfo)); + op->d.hashedscalararrayop.null_eq_null_isnull = fcinfo->isnull; - resultnull = false; + } + } - /* - * If we didn't find a match in the array, we still might need to handle - * the possibility of null values. We didn't put any NULLs into the - * hashtable, but instead marked if we found any when building the table - * in has_nulls. - */ - if (!hashfound && op->d.hashedscalararrayop.has_nulls) + if (likely(!scalar_isnull)) { - if (strictfunc) - { + /* perform hash lookup on the non-NULL value */ + hashfound = NULL != saophash_lookup(elements_tab->hashtab, scalar); + /* + * If we didn't find the value and the array contains a NULL, the + * result is NULL + */ + if (!hashfound && op->d.hashedscalararrayop.has_nulls) + { /* - * We have nulls in the array so a non-null lhs and no match must - * yield NULL. + * For strict functions, a failed lookup for arrays containing a + * NULL means a NULL result. For non-strict it depends on if the + * array contains any non-null values and what the strict function + * results for non-NULL = NULL. */ - result = (Datum) 0; - resultnull = true; + if (likely(strictfunc) || + op->d.hashedscalararrayop.elements_tab->hashtab->members == 0) + { + result = (Datum) 0; + resultnull = true; + } + else + { + /* + * Use the cached value for NULL = non-NULL. Let's just + * assume this is the same as non-NULL = NULL. Any non-strict + * function where that doesn't hold true will often result in + * surprising behavior. + */ + result = BoolGetDatum(op->d.hashedscalararrayop.null_eq_nonnull); + resultnull = op->d.hashedscalararrayop.null_eq_nonnull_isnull; + + /* Reverse any non-null result for NOT IN clauses */ + if (!resultnull && !inclause) + result = BoolGetDatum(!DatumGetBool(result)); + } } else { - /* - * Execute function will null rhs just once. - * - * The hash lookup path will have scribbled on the lhs argument so - * we need to set it up also (even though we entered this function - * with it already set). - */ - fcinfo->args[0].value = scalar; - fcinfo->args[0].isnull = scalar_isnull; - fcinfo->args[1].value = (Datum) 0; - fcinfo->args[1].isnull = true; + /* the result depends on if the clause is an IN or NOT IN clause */ + if (inclause) + result = BoolGetDatum(hashfound); /* IN */ + else + result = BoolGetDatum(!hashfound); /* NOT IN */ - result = op->d.hashedscalararrayop.finfo->fn_addr(fcinfo); - resultnull = fcinfo->isnull; + resultnull = false; + } + } + else if (op->d.hashedscalararrayop.has_nulls) + { + /* + * The short-circuit earlier means we should never get here with a + * NULL LHS and a strict function. + */ + Assert(!strictfunc); - /* - * Reverse the result for NOT IN clauses since the above function - * is the equality function and we need not-equals. - */ - if (!inclause) - result = BoolGetDatum(!DatumGetBool(result)); + result = BoolGetDatum(op->d.hashedscalararrayop.null_eq_null); + resultnull = op->d.hashedscalararrayop.null_eq_null_isnull; + + /* + * If the non-strict function returned false for NULL = NULL then we + * must still consider other array members. What we return here must + * depend on both if there are any non-nulls in the array and what the + * function returns for null = non-null. If null = non-null yields + * NULL then that's what we return so that we behave the same way as + * EEOP_SCALARARRAYOP. + */ + if (!resultnull && !DatumGetBool(result) && + op->d.hashedscalararrayop.null_eq_nonnull_isnull && + op->d.hashedscalararrayop.elements_tab->hashtab->members > 0) + { + result = (Datum) 0; + resultnull = true; } + + /* + * Reverse the result for NOT IN clauses since the above function is + * the equality function and we need not-equals. + */ + else if (!inclause) + result = BoolGetDatum(!DatumGetBool(result)); + } + else + { + Assert(!strictfunc); + + /* + * NULL LHS and array has no NULLs. Use the value that we cached from + * the test call of the function we did when inserting the first + * non-null into the hash table above. + */ + result = op->d.hashedscalararrayop.null_eq_nonnull; + resultnull = op->d.hashedscalararrayop.null_eq_nonnull_isnull; + + /* Reverse any non-null result for NOT IN clauses */ + if (!resultnull && !inclause) + result = BoolGetDatum(!DatumGetBool(result)); } *op->resvalue = result; diff --git a/src/include/executor/execExpr.h b/src/include/executor/execExpr.h index aa9b361fa31..56e6bb5d239 100644 --- a/src/include/executor/execExpr.h +++ b/src/include/executor/execExpr.h @@ -641,6 +641,10 @@ typedef struct ExprEvalStep { bool has_nulls; bool inclause; /* true for IN and false for NOT IN */ + bool null_eq_nonnull; /* non strict func null behavior */ + bool null_eq_nonnull_isnull; /* the isnull for the above */ + bool null_eq_null; /* non strict func null behavior */ + bool null_eq_null_isnull; /* the isnull for the above */ struct ScalarArrayOpExprHashTable *elements_tab; FmgrInfo *finfo; /* function's lookup data */ FunctionCallInfo fcinfo_data; /* arguments etc */ diff --git a/src/test/regress/expected/expressions.out b/src/test/regress/expected/expressions.out index 9a3c97b15a3..57616a02c3c 100644 --- a/src/test/regress/expected/expressions.out +++ b/src/test/regress/expected/expressions.out @@ -387,42 +387,177 @@ default for type myint using hash as operator 1 = (myint, myint), function 1 myinthash(myint); create table inttest (a myint); -insert into inttest values(1::myint),(null); --- try an array with enough elements to cause hashing -select * from inttest where a in (1::myint,2::myint,3::myint,4::myint,5::myint,6::myint,7::myint,8::myint,9::myint, null); - a ---- - 1 - -(2 rows) +insert into inttest values (1::myint), (2::myint), (null); +-- Test EEOP_HASHED_SCALARARRAYOP against EEOP_SCALARARRAYOP. Ensure the +-- result of non-hashed vs hashed is the same. +select + a, + a in (1::myint,2::myint,3::myint,4::myint,5::myint,6::myint,7::myint,8::myint) as not_hashed, + a in (1::myint,2::myint,3::myint,4::myint,5::myint,6::myint,7::myint,8::myint,9::myint) as hashed +from inttest; + a | not_hashed | hashed +---+------------+-------- + 1 | t | t + 2 | t | t + | | +(3 rows) -select * from inttest where a not in (1::myint,2::myint,3::myint,4::myint,5::myint,6::myint,7::myint,8::myint,9::myint, null); - a ---- -(0 rows) - -select * from inttest where a not in (0::myint,2::myint,3::myint,4::myint,5::myint,6::myint,7::myint,8::myint,9::myint, null); - a ---- -(0 rows) - --- ensure the result matched with the non-hashed version. We simply remove --- some array elements so that we don't reach the hashing threshold. -select * from inttest where a in (1::myint,2::myint,3::myint,4::myint,5::myint, null); - a ---- - 1 - -(2 rows) +select + a, + a in (null::myint,2::myint,3::myint,4::myint,5::myint,6::myint,7::myint,8::myint) as not_hashed, + a in (null::myint,2::myint,3::myint,4::myint,5::myint,6::myint,7::myint,8::myint,9::myint) as hashed + from inttest; + a | not_hashed | hashed +---+------------+-------- + 1 | | + 2 | t | t + | t | t +(3 rows) + +select + a, + a not in (1::myint,2::myint,3::myint,4::myint,5::myint,6::myint,7::myint,8::myint) as not_hashed, + a not in (1::myint,2::myint,3::myint,4::myint,5::myint,6::myint,7::myint,8::myint,9::myint) as hashed +from inttest; + a | not_hashed | hashed +---+------------+-------- + 1 | f | f + 2 | f | f + | | +(3 rows) + +select + a, + a not in (null::myint,2::myint,3::myint,4::myint,5::myint,6::myint,7::myint,8::myint) as not_hashed, + a not in (null::myint,2::myint,3::myint,4::myint,5::myint,6::myint,7::myint,8::myint,9::myint) as hashed +from inttest; + a | not_hashed | hashed +---+------------+-------- + 1 | | + 2 | f | f + | f | f +(3 rows) + +-- Now make the equal function return false when given two NULLs +create or replace function myinteq(myint, myint) returns bool as $$ +begin + if $1 is null and $2 is null then + return false; + else + return $1::int = $2::int; + end if; +end; +$$ language plpgsql immutable; +-- And try the same again to ensure EEOP_HASHED_SCALARARRAYOP does the same +-- thing as EEOP_SCALARARRAYOP. +select + a, + a in (1::myint,2::myint,3::myint,4::myint,5::myint,6::myint,7::myint,8::myint) as not_hashed, + a in (1::myint,2::myint,3::myint,4::myint,5::myint,6::myint,7::myint,8::myint,9::myint) as hashed +from inttest; + a | not_hashed | hashed +---+------------+-------- + 1 | t | t + 2 | t | t + | | +(3 rows) -select * from inttest where a not in (1::myint,2::myint,3::myint,4::myint,5::myint, null); - a ---- -(0 rows) +select + a, + a in (null::myint,2::myint,3::myint,4::myint,5::myint,6::myint,7::myint,8::myint) as not_hashed, + a in (null::myint,2::myint,3::myint,4::myint,5::myint,6::myint,7::myint,8::myint,9::myint) as hashed + from inttest; + a | not_hashed | hashed +---+------------+-------- + 1 | | + 2 | t | t + | | +(3 rows) + +select + a, + a not in (1::myint,2::myint,3::myint,4::myint,5::myint,6::myint,7::myint,8::myint) as not_hashed, + a not in (1::myint,2::myint,3::myint,4::myint,5::myint,6::myint,7::myint,8::myint,9::myint) as hashed +from inttest; + a | not_hashed | hashed +---+------------+-------- + 1 | f | f + 2 | f | f + | | +(3 rows) + +select + a, + a not in (null::myint,2::myint,3::myint,4::myint,5::myint,6::myint,7::myint,8::myint) as not_hashed, + a not in (null::myint,2::myint,3::myint,4::myint,5::myint,6::myint,7::myint,8::myint,9::myint) as hashed +from inttest; + a | not_hashed | hashed +---+------------+-------- + 1 | | + 2 | f | f + | | +(3 rows) -select * from inttest where a not in (0::myint,2::myint,3::myint,4::myint,5::myint, null); - a ---- -(0 rows) +-- Try one more time with something even more bizarre +create or replace function myinteq(myint, myint) returns bool as $$ +begin + if $1 is null and $2 is null then + return false; + elsif $1 is null and $2 is not null then + return false; + elsif $1 is not null and $2 is null then + return false; + else + return $1::int = $2::int; + end if; +end; +$$ language plpgsql immutable; +select + a, + a in (1::myint,2::myint,3::myint,4::myint,5::myint,6::myint,7::myint,8::myint) as not_hashed, + a in (1::myint,2::myint,3::myint,4::myint,5::myint,6::myint,7::myint,8::myint,9::myint) as hashed +from inttest; + a | not_hashed | hashed +---+------------+-------- + 1 | t | t + 2 | t | t + | f | f +(3 rows) + +select + a, + a in (null::myint,2::myint,3::myint,4::myint,5::myint,6::myint,7::myint,8::myint) as not_hashed, + a in (null::myint,2::myint,3::myint,4::myint,5::myint,6::myint,7::myint,8::myint,9::myint) as hashed + from inttest; + a | not_hashed | hashed +---+------------+-------- + 1 | f | f + 2 | t | t + | f | f +(3 rows) + +select + a, + a not in (1::myint,2::myint,3::myint,4::myint,5::myint,6::myint,7::myint,8::myint) as not_hashed, + a not in (1::myint,2::myint,3::myint,4::myint,5::myint,6::myint,7::myint,8::myint,9::myint) as hashed +from inttest; + a | not_hashed | hashed +---+------------+-------- + 1 | f | f + 2 | f | f + | t | t +(3 rows) + +select + a, + a not in (null::myint,2::myint,3::myint,4::myint,5::myint,6::myint,7::myint,8::myint) as not_hashed, + a not in (null::myint,2::myint,3::myint,4::myint,5::myint,6::myint,7::myint,8::myint,9::myint) as hashed +from inttest; + a | not_hashed | hashed +---+------------+-------- + 1 | t | t + 2 | f | f + | t | t +(3 rows) rollback; diff --git a/src/test/regress/sql/expressions.sql b/src/test/regress/sql/expressions.sql index e02c21f3368..e48d6b3184a 100644 --- a/src/test/regress/sql/expressions.sql +++ b/src/test/regress/sql/expressions.sql @@ -196,16 +196,108 @@ default for type myint using hash as function 1 myinthash(myint); create table inttest (a myint); -insert into inttest values(1::myint),(null); - --- try an array with enough elements to cause hashing -select * from inttest where a in (1::myint,2::myint,3::myint,4::myint,5::myint,6::myint,7::myint,8::myint,9::myint, null); -select * from inttest where a not in (1::myint,2::myint,3::myint,4::myint,5::myint,6::myint,7::myint,8::myint,9::myint, null); -select * from inttest where a not in (0::myint,2::myint,3::myint,4::myint,5::myint,6::myint,7::myint,8::myint,9::myint, null); --- ensure the result matched with the non-hashed version. We simply remove --- some array elements so that we don't reach the hashing threshold. -select * from inttest where a in (1::myint,2::myint,3::myint,4::myint,5::myint, null); -select * from inttest where a not in (1::myint,2::myint,3::myint,4::myint,5::myint, null); -select * from inttest where a not in (0::myint,2::myint,3::myint,4::myint,5::myint, null); +insert into inttest values (1::myint), (2::myint), (null); + +-- Test EEOP_HASHED_SCALARARRAYOP against EEOP_SCALARARRAYOP. Ensure the +-- result of non-hashed vs hashed is the same. +select + a, + a in (1::myint,2::myint,3::myint,4::myint,5::myint,6::myint,7::myint,8::myint) as not_hashed, + a in (1::myint,2::myint,3::myint,4::myint,5::myint,6::myint,7::myint,8::myint,9::myint) as hashed +from inttest; + +select + a, + a in (null::myint,2::myint,3::myint,4::myint,5::myint,6::myint,7::myint,8::myint) as not_hashed, + a in (null::myint,2::myint,3::myint,4::myint,5::myint,6::myint,7::myint,8::myint,9::myint) as hashed + from inttest; + +select + a, + a not in (1::myint,2::myint,3::myint,4::myint,5::myint,6::myint,7::myint,8::myint) as not_hashed, + a not in (1::myint,2::myint,3::myint,4::myint,5::myint,6::myint,7::myint,8::myint,9::myint) as hashed +from inttest; + +select + a, + a not in (null::myint,2::myint,3::myint,4::myint,5::myint,6::myint,7::myint,8::myint) as not_hashed, + a not in (null::myint,2::myint,3::myint,4::myint,5::myint,6::myint,7::myint,8::myint,9::myint) as hashed +from inttest; + +-- Now make the equal function return false when given two NULLs +create or replace function myinteq(myint, myint) returns bool as $$ +begin + if $1 is null and $2 is null then + return false; + else + return $1::int = $2::int; + end if; +end; +$$ language plpgsql immutable; + +-- And try the same again to ensure EEOP_HASHED_SCALARARRAYOP does the same +-- thing as EEOP_SCALARARRAYOP. +select + a, + a in (1::myint,2::myint,3::myint,4::myint,5::myint,6::myint,7::myint,8::myint) as not_hashed, + a in (1::myint,2::myint,3::myint,4::myint,5::myint,6::myint,7::myint,8::myint,9::myint) as hashed +from inttest; + +select + a, + a in (null::myint,2::myint,3::myint,4::myint,5::myint,6::myint,7::myint,8::myint) as not_hashed, + a in (null::myint,2::myint,3::myint,4::myint,5::myint,6::myint,7::myint,8::myint,9::myint) as hashed + from inttest; + +select + a, + a not in (1::myint,2::myint,3::myint,4::myint,5::myint,6::myint,7::myint,8::myint) as not_hashed, + a not in (1::myint,2::myint,3::myint,4::myint,5::myint,6::myint,7::myint,8::myint,9::myint) as hashed +from inttest; + +select + a, + a not in (null::myint,2::myint,3::myint,4::myint,5::myint,6::myint,7::myint,8::myint) as not_hashed, + a not in (null::myint,2::myint,3::myint,4::myint,5::myint,6::myint,7::myint,8::myint,9::myint) as hashed +from inttest; + +-- Try one more time with something even more bizarre +create or replace function myinteq(myint, myint) returns bool as $$ +begin + if $1 is null and $2 is null then + return false; + elsif $1 is null and $2 is not null then + return false; + elsif $1 is not null and $2 is null then + return false; + else + return $1::int = $2::int; + end if; +end; +$$ language plpgsql immutable; + +select + a, + a in (1::myint,2::myint,3::myint,4::myint,5::myint,6::myint,7::myint,8::myint) as not_hashed, + a in (1::myint,2::myint,3::myint,4::myint,5::myint,6::myint,7::myint,8::myint,9::myint) as hashed +from inttest; + +select + a, + a in (null::myint,2::myint,3::myint,4::myint,5::myint,6::myint,7::myint,8::myint) as not_hashed, + a in (null::myint,2::myint,3::myint,4::myint,5::myint,6::myint,7::myint,8::myint,9::myint) as hashed + from inttest; + +select + a, + a not in (1::myint,2::myint,3::myint,4::myint,5::myint,6::myint,7::myint,8::myint) as not_hashed, + a not in (1::myint,2::myint,3::myint,4::myint,5::myint,6::myint,7::myint,8::myint,9::myint) as hashed +from inttest; + +select + a, + a not in (null::myint,2::myint,3::myint,4::myint,5::myint,6::myint,7::myint,8::myint) as not_hashed, + a not in (null::myint,2::myint,3::myint,4::myint,5::myint,6::myint,7::myint,8::myint,9::myint) as hashed +from inttest; rollback; -- 2.51.0