From 28543dda9febe8d8b5fc91060a4323c08f3c4a8a Mon Sep 17 00:00:00 2001
From: Marti Raudsepp <marti@juffo.org>
Date: Wed, 1 Oct 2014 02:17:21 +0300
Subject: [PATCH] Simplify EXISTS subqueries containing LIMIT

---
 src/backend/optimizer/plan/subselect.c  | 42 ++++++++++++++++++++++-----
 src/test/regress/expected/subselect.out | 51 +++++++++++++++++++++++++++++++++
 src/test/regress/sql/subselect.sql      | 14 +++++++++
 3 files changed, 100 insertions(+), 7 deletions(-)

diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c
index 3e7dc85..66d1b90 100644
--- a/src/backend/optimizer/plan/subselect.c
+++ b/src/backend/optimizer/plan/subselect.c
@@ -70,7 +70,7 @@ static Node *convert_testexpr_mutator(Node *node,
 static bool subplan_is_hashable(Plan *plan);
 static bool testexpr_is_hashable(Node *testexpr);
 static bool hash_ok_operator(OpExpr *expr);
-static bool simplify_EXISTS_query(Query *query);
+static bool simplify_EXISTS_query(PlannerInfo *root, Query *query);
 static Query *convert_EXISTS_to_ANY(PlannerInfo *root, Query *subselect,
 					  Node **testexpr, List **paramIds);
 static Node *replace_correlation_vars_mutator(Node *node, PlannerInfo *root);
@@ -452,7 +452,7 @@ make_subplan(PlannerInfo *root, Query *orig_subquery,
 	 * If it's an EXISTS subplan, we might be able to simplify it.
 	 */
 	if (subLinkType == EXISTS_SUBLINK)
-		simple_exists = simplify_EXISTS_query(subquery);
+		simple_exists = simplify_EXISTS_query(root, subquery);
 
 	/*
 	 * For an EXISTS subplan, tell lower-level planner to expect that only the
@@ -518,7 +518,7 @@ make_subplan(PlannerInfo *root, Query *orig_subquery,
 		/* Make a second copy of the original subquery */
 		subquery = (Query *) copyObject(orig_subquery);
 		/* and re-simplify */
-		simple_exists = simplify_EXISTS_query(subquery);
+		simple_exists = simplify_EXISTS_query(root, subquery);
 		Assert(simple_exists);
 		/* See if it can be converted to an ANY query */
 		subquery = convert_EXISTS_to_ANY(root, subquery,
@@ -1359,7 +1359,7 @@ convert_EXISTS_sublink_to_join(PlannerInfo *root, SubLink *sublink,
 	 * targetlist, we have to fail, because the pullup operation leaves us
 	 * with noplace to evaluate the targetlist.
 	 */
-	if (!simplify_EXISTS_query(subselect))
+	if (!simplify_EXISTS_query(root, subselect))
 		return NULL;
 
 	/*
@@ -1486,11 +1486,11 @@ convert_EXISTS_sublink_to_join(PlannerInfo *root, SubLink *sublink,
  * Returns TRUE if was able to discard the targetlist, else FALSE.
  */
 static bool
-simplify_EXISTS_query(Query *query)
+simplify_EXISTS_query(PlannerInfo *root, Query *query)
 {
 	/*
 	 * We don't try to simplify at all if the query uses set operations,
-	 * aggregates, modifying CTEs, HAVING, LIMIT/OFFSET, or FOR UPDATE/SHARE;
+	 * aggregates, modifying CTEs, HAVING, OFFSET, or FOR UPDATE/SHARE;
 	 * none of these seem likely in normal usage and their possible effects
 	 * are complex.
 	 */
@@ -1501,7 +1501,6 @@ simplify_EXISTS_query(Query *query)
 		query->hasModifyingCTE ||
 		query->havingQual ||
 		query->limitOffset ||
-		query->limitCount ||
 		query->rowMarks)
 		return false;
 
@@ -1513,6 +1512,35 @@ simplify_EXISTS_query(Query *query)
 		return false;
 
 	/*
+	 * A subquery that has a LIMIT clause with a positive value or NULL causes
+	 * no behavioral change to the query. With EXISTS we only care about the
+	 * first row anyway, so we'll simply remove the LIMIT clause. If the LIMIT
+	 * value does not reduce to a constant that's positive or NULL then do not
+	 * touch it.
+	 */
+	if (query->limitCount)
+	{
+		/*
+		 * eval_const_expressions has not been called yet by subquery_planner,
+		 * may still contain int64 coercions etc.
+		 */
+		Node	   *node = eval_const_expressions(root, query->limitCount);
+		Const	   *limit;
+
+		if (! IsA(node, Const))
+			return false;
+
+		limit = (Const *) node;
+		Assert(limit->consttype == INT8OID);
+
+		/* If it's not NULL and not positive, keep it as a regular subquery */
+		if (!limit->constisnull && DatumGetInt64(limit->constvalue) <= 0)
+			return false;
+
+		query->limitCount = NULL;
+	}
+
+	/*
 	 * Otherwise, we can throw away the targetlist, as well as any GROUP,
 	 * WINDOW, DISTINCT, and ORDER BY clauses; none of those clauses will
 	 * change a nonzero-rows result to zero rows or vice versa.  (Furthermore,
diff --git a/src/test/regress/expected/subselect.out b/src/test/regress/expected/subselect.out
index 01c9130..e3b871b 100644
--- a/src/test/regress/expected/subselect.out
+++ b/src/test/regress/expected/subselect.out
@@ -775,6 +775,57 @@ select * from int4_tbl o where (f1, f1) in
 (1 row)
 
 --
+-- Check EXISTS simplification with LIMIT
+--
+explain (costs off)
+select * from int4_tbl o where exists
+  (select 1 from int4_tbl i where i.f1=o.f1 limit null);
+             QUERY PLAN             
+------------------------------------
+ Hash Semi Join
+   Hash Cond: (o.f1 = i.f1)
+   ->  Seq Scan on int4_tbl o
+   ->  Hash
+         ->  Seq Scan on int4_tbl i
+(5 rows)
+
+explain (costs off)
+select * from int4_tbl o where not exists
+  (select 1 from int4_tbl i where i.f1=o.f1 limit 1);
+             QUERY PLAN             
+------------------------------------
+ Hash Anti Join
+   Hash Cond: (o.f1 = i.f1)
+   ->  Seq Scan on int4_tbl o
+   ->  Hash
+         ->  Seq Scan on int4_tbl i
+(5 rows)
+
+explain (costs off)
+select * from int4_tbl o where exists (select 1 limit -1);
+          QUERY PLAN          
+------------------------------
+ Result
+   One-Time Filter: $0
+   InitPlan 1 (returns $0)
+     ->  Limit
+           ->  Result
+   ->  Seq Scan on int4_tbl o
+(6 rows)
+
+explain (costs off)
+select * from int4_tbl o where exists (select 1 limit 0);
+          QUERY PLAN          
+------------------------------
+ Result
+   One-Time Filter: $0
+   InitPlan 1 (returns $0)
+     ->  Limit
+           ->  Result
+   ->  Seq Scan on int4_tbl o
+(6 rows)
+
+--
 -- check for over-optimization of whole-row Var referencing an Append plan
 --
 select (select q from
diff --git a/src/test/regress/sql/subselect.sql b/src/test/regress/sql/subselect.sql
index 56707e2..3f04137 100644
--- a/src/test/regress/sql/subselect.sql
+++ b/src/test/regress/sql/subselect.sql
@@ -433,6 +433,20 @@ select * from int4_tbl o where (f1, f1) in
   (select f1, generate_series(1,2) / 10 g from int4_tbl i group by f1);
 
 --
+-- Check EXISTS simplification with LIMIT
+--
+explain (costs off)
+select * from int4_tbl o where exists
+  (select 1 from int4_tbl i where i.f1=o.f1 limit null);
+explain (costs off)
+select * from int4_tbl o where not exists
+  (select 1 from int4_tbl i where i.f1=o.f1 limit 1);
+explain (costs off)
+select * from int4_tbl o where exists (select 1 limit -1);
+explain (costs off)
+select * from int4_tbl o where exists (select 1 limit 0);
+
+--
 -- check for over-optimization of whole-row Var referencing an Append plan
 --
 select (select q from
-- 
2.1.2

