subquery results bypassed

Started by PostgreSQL Bugs Listover 24 years ago3 messagesbugs
Jump to latest
#1PostgreSQL Bugs List
pgsql-bugs@postgresql.org

Anthony Wood (woody+postgresqlbugs@switchonline.com.au) reports a bug with a severity of 1
The lower the number the more severe it is.

Short Description
subquery results bypassed

Long Description
The second query in the example code should return:

a | b | c
---+---+---
(0 rows)

but actually returns

a | b | c
---+---+---
1 | 3 | 2
(1 row)

which is wrong, because this tuple is not in the subquery

Sample Code
CREATE TABLE "bug" ("a" TEXT, "b" TEXT, "c" TEXT);
INSERT INTO "bug" VALUES ('1','2','1');
INSERT INTO "bug" VALUES ('1','3','2');
SELECT DISTINCT ON ("a") * FROM "bug" ORDER BY "a","c";
SELECT * FROM (SELECT DISTINCT ON ("a") * FROM "bug" ORDER BY "a","c") bug WHERE "b"='3';
DROP TABLE "bug";

No file was uploaded with this report

#2Tom Lane
tgl@sss.pgh.pa.us
In reply to: PostgreSQL Bugs List (#1)
Re: subquery results bypassed

pgsql-bugs@postgresql.org writes:

[ SELECT DISTINCT ON in a subquery-in-FROM misbehaves ]

Oooh, good catch! I had thought about pushing down quals into a SELECT
DISTINCT, and concluded it was OK because the qual would eliminate all
or none of a set of not-DISTINCT rows. But I forgot about DISTINCT ON
:-(.

Will have a source patch for this tomorrow, if that helps.

regards, tom lane

#3Tom Lane
tgl@sss.pgh.pa.us
In reply to: PostgreSQL Bugs List (#1)
Re: subquery results bypassed

Anthony Wood (woody+postgresqlbugs@switchonline.com.au) writes:

[ SELECT DISTINCT ON in a subquery-in-FROM misbehaves ]

Here's the patch against 7.1.2 to fix this problem. This also fixes a
related problem noted a few days ago, that outer WHERE clauses shouldn't
be pushed down into a sub-select that has a LIMIT clause.

regards, tom lane

*** src/backend/optimizer/path/allpaths.c.orig	Wed Mar 21 22:59:34 2001
--- src/backend/optimizer/path/allpaths.c	Tue Jul 31 14:05:05 2001
***************
*** 125,135 ****
  			 * Non-pushed-down clauses will get evaluated as qpquals of
  			 * the SubqueryScan node.
  			 *
  			 * XXX Are there any cases where we want to make a policy
  			 * decision not to push down, because it'd result in a worse
  			 * plan?
  			 */
! 			if (rte->subquery->setOperations == NULL)
  			{
  				/* OK to consider pushing down individual quals */
  				List	   *upperrestrictlist = NIL;
--- 125,141 ----
  			 * Non-pushed-down clauses will get evaluated as qpquals of
  			 * the SubqueryScan node.
  			 *
+ 			 * We can't push down into subqueries with LIMIT or DISTINCT ON
+ 			 * clauses, either.
+ 			 *
  			 * XXX Are there any cases where we want to make a policy
  			 * decision not to push down, because it'd result in a worse
  			 * plan?
  			 */
! 			if (rte->subquery->setOperations == NULL &&
! 				rte->subquery->limitOffset == NULL &&
! 				rte->subquery->limitCount == NULL &&
! 				!has_distinct_on_clause(rte->subquery))
  			{
  				/* OK to consider pushing down individual quals */
  				List	   *upperrestrictlist = NIL;
*** src/backend/optimizer/util/clauses.c.orig	Tue Mar 27 12:12:34 2001
--- src/backend/optimizer/util/clauses.c	Tue Jul 31 14:05:01 2001
***************
*** 730,742 ****

/*****************************************************************************
* *
* General clause-manipulating routines *
* *
*****************************************************************************/

  /*
!  * clause_relids_vars
   *	  Retrieves distinct relids and vars appearing within a clause.
   *
   * '*relids' is set to an integer list of all distinct "varno"s appearing
--- 730,794 ----
  /*****************************************************************************
+  *		Tests on clauses of queries
+  *
+  * Possibly this code should go someplace else, since this isn't quite the
+  * same meaning of "clause" as is used elsewhere in this module.  But I can't
+  * think of a better place for it...
+  *****************************************************************************/
+ 
+ /*
+  * Test whether a query uses DISTINCT ON, ie, has a distinct-list that is
+  * just a subset of the output columns.
+  */
+ bool
+ has_distinct_on_clause(Query *query)
+ {
+ 	List   *targetList;
+ 
+ 	/* Is there a DISTINCT clause at all? */
+ 	if (query->distinctClause == NIL)
+ 		return false;
+ 	/*
+ 	 * If the DISTINCT list contains all the nonjunk targetlist items,
+ 	 * then it's a simple DISTINCT, else it's DISTINCT ON.  We do not
+ 	 * require the lists to be in the same order (since the parser may
+ 	 * have adjusted the DISTINCT clause ordering to agree with ORDER BY).
+ 	 */
+ 	foreach(targetList, query->targetList)
+ 	{
+ 		TargetEntry *tle = (TargetEntry *) lfirst(targetList);
+ 		Index		ressortgroupref;
+ 		List	   *distinctClause;
+ 
+ 		if (tle->resdom->resjunk)
+ 			continue;
+ 		ressortgroupref = tle->resdom->ressortgroupref;
+ 		if (ressortgroupref == 0)
+ 			return true;		/* definitely not in DISTINCT list */
+ 		foreach(distinctClause, query->distinctClause)
+ 		{
+ 			SortClause *scl = (SortClause *) lfirst(distinctClause);
+ 
+ 			if (scl->tleSortGroupRef == ressortgroupref)
+ 				break;			/* found TLE in DISTINCT */
+ 		}
+ 		if (distinctClause == NIL)
+ 			return true;		/* this TLE is not in DISTINCT list */
+ 	}
+ 	/* It's a simple DISTINCT */
+ 	return false;
+ }
+ 
+ 
+ /*****************************************************************************
   *																			 *
   *		General clause-manipulating routines								 *
   *																			 *
   *****************************************************************************/
  /*
!  * clause_get_relids_vars
   *	  Retrieves distinct relids and vars appearing within a clause.
   *
   * '*relids' is set to an integer list of all distinct "varno"s appearing
*** src/backend/utils/adt/ruleutils.c.orig	Wed Apr 18 13:04:24 2001
--- src/backend/utils/adt/ruleutils.c	Tue Jul 31 14:04:51 2001
***************
*** 119,125 ****
  static void get_basic_select_query(Query *query, deparse_context *context);
  static void get_setop_query(Node *setOp, Query *query,
  				deparse_context *context, bool toplevel);
- static bool simple_distinct(List *distinctClause, List *targetList);
  static void get_rule_sortgroupclause(SortClause *srt, List *tlist,
  									 bool force_colno,
  									 deparse_context *context);
--- 119,124 ----
***************
*** 1006,1014 ****
  	/* Add the DISTINCT clause if given */
  	if (query->distinctClause != NIL)
  	{
! 		if (simple_distinct(query->distinctClause, query->targetList))
! 			appendStringInfo(buf, " DISTINCT");
! 		else
  		{
  			appendStringInfo(buf, " DISTINCT ON (");
  			sep = "";
--- 1005,1011 ----
  	/* Add the DISTINCT clause if given */
  	if (query->distinctClause != NIL)
  	{
! 		if (has_distinct_on_clause(query))
  		{
  			appendStringInfo(buf, " DISTINCT ON (");
  			sep = "";
***************
*** 1023,1028 ****
--- 1020,1027 ----
  			}
  			appendStringInfo(buf, ")");
  		}
+ 		else
+ 			appendStringInfo(buf, " DISTINCT");
  	}

/* Then we tell what to select (the targetlist) */
***************
*** 1147,1180 ****
elog(ERROR, "get_setop_query: unexpected node %d",
(int) nodeTag(setOp));
}
- }
-
- /*
- * Detect whether a DISTINCT list can be represented as just DISTINCT
- * or needs DISTINCT ON. It's simple if it contains exactly the nonjunk
- * targetlist items.
- */
- static bool
- simple_distinct(List *distinctClause, List *targetList)
- {
- while (targetList)
- {
- TargetEntry *tle = (TargetEntry *) lfirst(targetList);
-
- if (!tle->resdom->resjunk)
- {
- if (distinctClause == NIL)
- return false;
- if (((SortClause *) lfirst(distinctClause))->tleSortGroupRef !=
- tle->resdom->ressortgroupref)
- return false;
- distinctClause = lnext(distinctClause);
- }
- targetList = lnext(targetList);
- }
- if (distinctClause != NIL)
- return false;
- return true;
}

  /*
--- 1146,1151 ----
*** src/include/optimizer/clauses.h.orig	Wed Mar 21 23:00:53 2001
--- src/include/optimizer/clauses.h	Tue Jul 31 14:04:35 2001
***************
*** 59,64 ****
--- 59,66 ----
  extern bool is_pseudo_constant_clause(Node *clause);
  extern List *pull_constant_clauses(List *quals, List **constantQual);
+ extern bool has_distinct_on_clause(Query *query);
+ 
  extern void clause_get_relids_vars(Node *clause, Relids *relids, List **vars);
  extern int	NumRelids(Node *clause);
  extern void get_relattval(Node *clause, int targetrelid,