Index Onlys Scan for expressions

Started by Ildar Musinover 9 years ago14 messages
#1Ildar Musin
i.musin@postgrespro.ru
1 attachment(s)

Hi, hackers!

There is a known issue that index only scan (IOS) can only work with
simple index keys based on single attributes and doesn't work with index
expressions. In this patch I propose a solution that adds support of IOS
for index expressions. Here's an example:

create table abc(a int, b int, c int);
create index on abc ((a * 1000 + b), c);

with t1 as (select generate_series(1, 1000) as x),
t2 as (select generate_series(0, 999) as x)
insert into abc(a, b, c)
select t1.x, t2.x, t2.x from t1, t2;
vacuum analyze;

Explain results with the patch:

explain (analyze, buffers) select a * 1000 + b + c from abc where a *
1000 + b = 1001;
QUERY PLAN
-------------------------------------------------------------------------------------------------------------------------
Index Only Scan using abc_expr_c_idx on abc (cost=0.42..4.45 rows=1
width=4) (actual time=0.032..0.033 rows=1 loops=1)
Index Cond: ((((a * 1000) + b)) = 1001)
Heap Fetches: 0
Buffers: shared hit=4
Planning time: 0.184 ms
Execution time: 0.077 ms
(6 rows)

Before the patch it was:

explain (analyze, buffers) select a * 1000 + b + c from abc where a *
1000 + b = 1001;
QUERY PLAN
--------------------------------------------------------------------------------------------------------------------
Index Scan using abc_expr_c_idx on abc (cost=0.42..8.45 rows=1
width=4) (actual time=0.039..0.041 rows=1 loops=1)
Index Cond: (((a * 1000) + b) = 1001)
Buffers: shared hit=4
Planning time: 0.177 ms
Execution time: 0.088 ms
(5 rows)

This solution has limitations though: the restriction or the target
expression tree (or its part) must match exactly the index. E.g. this
expression will pass the check:

select a * 1000 + b + 100 from ...

but this will fail:

select 100 + a * 1000 + b from ...

because the parser groups it as:

(100 + a * 1000) + b

In this form it won't match any index key. Another case is when we
create index on (a+b) and then make query like 'select b+a ...' or '...
where b+a = smth' -- it won't match. This applies to regular index scan
too. Probably it worth to discuss the way to normalize index expressions
and clauses and work out more convenient way to match them.
Anyway, I will be grateful if you take a look at the patch in
attachment. Any comments and tips are welcome.

Thanks!

--
Ildar Musin
i.musin@postgrespro.ru

Attachments:

indexonlyscan5.patchtext/x-patch; name=indexonlyscan5.patchDownload
diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c
index 2952bfb..9eb0e12 100644
--- a/src/backend/optimizer/path/indxpath.c
+++ b/src/backend/optimizer/path/indxpath.c
@@ -25,6 +25,7 @@
 #include "catalog/pg_opfamily.h"
 #include "catalog/pg_type.h"
 #include "nodes/makefuncs.h"
+#include "nodes/nodeFuncs.h"
 #include "optimizer/clauses.h"
 #include "optimizer/cost.h"
 #include "optimizer/pathnode.h"
@@ -130,7 +131,13 @@ static PathClauseUsage *classify_index_clause_usage(Path *path,
 static Relids get_bitmap_tree_required_outer(Path *bitmapqual);
 static void find_indexpath_quals(Path *bitmapqual, List **quals, List **preds);
 static int	find_list_position(Node *node, List **nodelist);
-static bool check_index_only(RelOptInfo *rel, IndexOptInfo *index);
+static bool check_index_only(PlannerInfo *root, RelOptInfo *rel, IndexOptInfo *index);
+static bool check_index_only_targetlist(PlannerInfo *root,
+							RelOptInfo *rel,
+							IndexOptInfo *index,
+							Bitmapset *index_canreturn_attrs);
+static bool check_index_only_clauses(List *clauses,
+						 IndexOptInfo *index);
 static double get_loop_count(PlannerInfo *root, Index cur_relid, Relids outer_relids);
 static double adjust_rowcount_for_semijoins(PlannerInfo *root,
 							  Index cur_relid,
@@ -190,7 +197,6 @@ static List *network_prefix_quals(Node *leftop, Oid expr_op, Oid opfamily,
 static Datum string_to_datum(const char *str, Oid datatype);
 static Const *string_to_const(const char *str, Oid datatype);
 
-
 /*
  * create_index_paths()
  *	  Generate all interesting index paths for the given relation.
@@ -1020,7 +1026,7 @@ build_index_paths(PlannerInfo *root, RelOptInfo *rel,
 	 * index data retrieval anyway.
 	 */
 	index_only_scan = (scantype != ST_BITMAPSCAN &&
-					   check_index_only(rel, index));
+					   check_index_only(root, rel, index));
 
 	/*
 	 * 4. Generate an indexscan path if there are relevant restriction clauses
@@ -1780,13 +1786,12 @@ find_list_position(Node *node, List **nodelist)
 	return i;
 }
 
-
 /*
  * check_index_only
  *		Determine whether an index-only scan is possible for this index.
  */
 static bool
-check_index_only(RelOptInfo *rel, IndexOptInfo *index)
+check_index_only(PlannerInfo *root, RelOptInfo *rel, IndexOptInfo *index)
 {
 	bool		result;
 	Bitmapset  *attrs_used = NULL;
@@ -1836,10 +1841,7 @@ check_index_only(RelOptInfo *rel, IndexOptInfo *index)
 	{
 		int			attno = index->indexkeys[i];
 
-		/*
-		 * For the moment, we just ignore index expressions.  It might be nice
-		 * to do something with them, later.
-		 */
+		/* Handle expressions later */
 		if (attno == 0)
 			continue;
 
@@ -1852,12 +1854,210 @@ check_index_only(RelOptInfo *rel, IndexOptInfo *index)
 	/* Do we have all the necessary attributes? */
 	result = bms_is_subset(attrs_used, index_canreturn_attrs);
 
+	/* Check index expressions */
+	if (!result && index->indexprs)
+	{
+		/*
+		 * We need to check both: the target list and the clauses. If the
+		 * match the index
+		 */
+		result =
+			check_index_only_targetlist(root, rel, index, index_canreturn_attrs) &&
+			check_index_only_clauses(index->indrestrictinfo, index);
+	}
+
 	bms_free(attrs_used);
 	bms_free(index_canreturn_attrs);
 
 	return result;
 }
 
+typedef struct check_index_only_walker_ctx
+{
+	IndexOptInfo   *index;
+	Bitmapset	   *index_canreturn_attrs;	/* attributes that can be returned
+											 * by index */
+	bool			match;					/* TRUE if expression matches
+											 * index */
+} check_index_only_walker_ctx;
+
+/*
+ * check_index_only_expr_walker
+ * 		Recursive walker that checks if each part of the expression can be
+ *		build with index expressions or attributes
+ */
+static bool
+check_index_only_expr_walker(Node *node, check_index_only_walker_ctx *ctx)
+{
+	int i;
+
+	/*
+	 * If node is empty or some subexpression has already reported that it
+	 * isn't match the index then quit looking any further
+	 */
+	if (node == NULL || ctx->match == false)
+		return false;
+
+	/*
+	 * If this is a Var then check if it can be returned by the index
+	 */
+	if (IsA(node, Var))
+	{
+		Var *var = (Var *) node;
+
+		if (var->varno == ctx->index->rel->relid &&
+			!bms_is_member(var->varattno - FirstLowInvalidHeapAttributeNumber,
+						   ctx->index_canreturn_attrs))
+			ctx->match = false;
+		return false;
+	}
+
+	/*
+	 * Check if expression can be returned by index
+	 */
+	for (i = 0; i < ctx->index->ncolumns; i++)
+	{
+		if (match_index_to_operand(node, i, ctx->index))
+		{
+			/*
+			 * It's alright. This expression can be returned by index
+			 */
+			return false;
+		}
+	}
+
+	/*
+	 * Recursive check
+	 */
+	return expression_tree_walker(node, check_index_only_expr_walker, (void *) ctx);
+}
+
+/*
+ * check_index_only_targetlist
+ *		Checks that every target of index->relation can be returned by index
+ *
+ * In this function we're looking through root->parse->targetlist and pick
+ * those targets which contain index->relation's attributes. For each selected
+ * target we use recursive function check_index_only_expr_walker() to check if
+ * target can be build with the index expressions and attributes and none of
+ * the other index->relation's attributes
+ */
+static bool
+check_index_only_targetlist(PlannerInfo *root,
+							RelOptInfo *rel,
+							IndexOptInfo *index,
+							Bitmapset *index_canreturn_attrs)
+{
+	check_index_only_walker_ctx *ctx;
+	ListCell *lc;
+	bool	result = true;
+	bool	flag;
+
+	/* Check expressions from targetlist */
+	foreach(lc, root->parse->targetList)
+	{
+		TargetEntry *te = (TargetEntry *) lfirst(lc);
+		Bitmapset *varnos = pull_varnos((Node *) te->expr);
+
+		if (bms_is_member(rel->relid, varnos))
+		{
+			Bitmapset *attrs = NULL;
+
+			flag = false;
+
+			pull_varattnos((Node *) te->expr, rel->relid, &attrs);
+			if (bms_is_subset(attrs, index_canreturn_attrs))
+				flag = true;
+			else
+			{
+				ctx = (check_index_only_walker_ctx *) palloc(sizeof(check_index_only_walker_ctx));
+				ctx->index = index;
+				ctx->match = true;
+				ctx->index_canreturn_attrs = index_canreturn_attrs;
+				check_index_only_expr_walker((Node *) te->expr, ctx);
+
+				if (ctx->match)
+					flag = true;
+
+				pfree(ctx);
+			}
+			bms_free(attrs);
+
+			if (!flag)
+			{
+				result = false;
+				break;
+			}
+		}
+		bms_free(varnos);
+	}
+
+	return result;
+}
+
+/*
+ * check_index_only_clauses
+ *		Recursive function that checks that each clause matches the index
+ *
+ * clauses is a list of RestrictInfos or a BoolExpr containing RestrictInfos
+ */
+static bool
+check_index_only_clauses(List *clauses,
+						 IndexOptInfo *index)
+{
+	int 		indexcol;
+	bool		match;
+	ListCell   *lc;
+
+	foreach(lc, clauses)
+	{
+		Node *node = (Node *) lfirst(lc);
+
+		match = false;
+		/*
+		 * We expect here either a RestrictInfo or a boolean
+		 * expression containing other RestrictInfos
+		 */
+		if (IsA(node, RestrictInfo))
+		{
+			RestrictInfo *rinfo = (RestrictInfo *) node;
+
+			if (!restriction_is_or_clause(rinfo))
+			{
+				for (indexcol = 0; indexcol < index->ncolumns; indexcol++)
+				{
+					if (match_clause_to_indexcol(index, indexcol, rinfo))
+					{
+						match = true;
+						break;
+					}
+				}
+			}
+			else
+			{
+				List *subclauses = ((BoolExpr *) rinfo->orclause)->args;
+
+				match = check_index_only_clauses(subclauses, index);
+			}
+		}
+		else if (IsA(node, BoolExpr))
+		{
+			BoolExpr *boolexpr = (BoolExpr *) node;
+
+			match = check_index_only_clauses(boolexpr->args, index);
+		}
+
+		/*
+		 * If at least one restriction doesn't match index then return false
+		 */
+		if (!match)
+			return false;
+	}
+
+	/* If we got here then everything's fine */
+	return true;
+}
+
 /*
  * get_loop_count
  *		Choose the loop count estimate to use for costing a parameterized path
#2Tomas Vondra
tomas.vondra@2ndquadrant.com
In reply to: Ildar Musin (#1)
Re: Index Onlys Scan for expressions

On 08/16/2016 12:03 AM, Ildar Musin wrote:

Hi, hackers!

There is a known issue that index only scan (IOS) can only work with
simple index keys based on single attributes and doesn't work with index
expressions. In this patch I propose a solution that adds support of IOS
for index expressions. Here's an example:

create table abc(a int, b int, c int);
create index on abc ((a * 1000 + b), c);

with t1 as (select generate_series(1, 1000) as x),
t2 as (select generate_series(0, 999) as x)
insert into abc(a, b, c)
select t1.x, t2.x, t2.x from t1, t2;
vacuum analyze;

Explain results with the patch:

explain (analyze, buffers) select a * 1000 + b + c from abc where a *
1000 + b = 1001;
QUERY PLAN
-------------------------------------------------------------------------------------------------------------------------

Index Only Scan using abc_expr_c_idx on abc (cost=0.42..4.45 rows=1
width=4) (actual time=0.032..0.033 rows=1 loops=1)
Index Cond: ((((a * 1000) + b)) = 1001)
Heap Fetches: 0
Buffers: shared hit=4
Planning time: 0.184 ms
Execution time: 0.077 ms
(6 rows)

Before the patch it was:

explain (analyze, buffers) select a * 1000 + b + c from abc where a *
1000 + b = 1001;
QUERY PLAN
--------------------------------------------------------------------------------------------------------------------

Index Scan using abc_expr_c_idx on abc (cost=0.42..8.45 rows=1
width=4) (actual time=0.039..0.041 rows=1 loops=1)
Index Cond: (((a * 1000) + b) = 1001)
Buffers: shared hit=4
Planning time: 0.177 ms
Execution time: 0.088 ms
(5 rows)

Nice! I've only quickly skimmed through the diff, but it seems sane.
Please add the patch to the 2016-09 CF, though.

This solution has limitations though: the restriction or the target
expression tree (or its part) must match exactly the index. E.g. this
expression will pass the check:

select a * 1000 + b + 100 from ...

but this will fail:

select 100 + a * 1000 + b from ...

because the parser groups it as:

(100 + a * 1000) + b

In this form it won't match any index key. Another case is when we
create index on (a+b) and then make query like 'select b+a ...' or '...
where b+a = smth' -- it won't match. This applies to regular index scan
too. Probably it worth to discuss the way to normalize index expressions
and clauses and work out more convenient way to match them.
Anyway, I will be grateful if you take a look at the patch in
attachment. Any comments and tips are welcome.

I don't think it's a major limitation - it's quite similar to the
limitation for partial indexes, i.e. with an index defined like

CREATE INDEX ON abc (c) WHERE a + b = 1000;

the index will not be used unless the query expression matches exactly.
So for example this won't work:

SELECT c FROM abc WHERE b + a = 1000;

because the variables are in the opposite order. Moreover, in the target
list it might be possible to use explicit parentheses to make it work,
no? That is, will this work?

select 100 + (a * 1000 + b) from ...

Or will it still break the IOS?

regards

--
Tomas Vondra http://www.2ndQuadrant.com
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#3Oleg Bartunov
obartunov@gmail.com
In reply to: Ildar Musin (#1)
Re: Index Onlys Scan for expressions

On Tue, Aug 16, 2016 at 1:03 AM, Ildar Musin <i.musin@postgrespro.ru> wrote:

Hi, hackers!

There is a known issue that index only scan (IOS) can only work with simple
index keys based on single attributes and doesn't work with index
expressions. In this patch I propose a solution that adds support of IOS for
index expressions. Here's an example:

create table abc(a int, b int, c int);
create index on abc ((a * 1000 + b), c);

with t1 as (select generate_series(1, 1000) as x),
t2 as (select generate_series(0, 999) as x)
insert into abc(a, b, c)
select t1.x, t2.x, t2.x from t1, t2;
vacuum analyze;

Explain results with the patch:

explain (analyze, buffers) select a * 1000 + b + c from abc where a * 1000 +
b = 1001;
QUERY PLAN
-------------------------------------------------------------------------------------------------------------------------
Index Only Scan using abc_expr_c_idx on abc (cost=0.42..4.45 rows=1
width=4) (actual time=0.032..0.033 rows=1 loops=1)
Index Cond: ((((a * 1000) + b)) = 1001)
Heap Fetches: 0
Buffers: shared hit=4
Planning time: 0.184 ms
Execution time: 0.077 ms
(6 rows)

Before the patch it was:

explain (analyze, buffers) select a * 1000 + b + c from abc where a * 1000 +
b = 1001;
QUERY PLAN
--------------------------------------------------------------------------------------------------------------------
Index Scan using abc_expr_c_idx on abc (cost=0.42..8.45 rows=1 width=4)
(actual time=0.039..0.041 rows=1 loops=1)
Index Cond: (((a * 1000) + b) = 1001)
Buffers: shared hit=4
Planning time: 0.177 ms
Execution time: 0.088 ms
(5 rows)

This solution has limitations though: the restriction or the target
expression tree (or its part) must match exactly the index. E.g. this
expression will pass the check:

select a * 1000 + b + 100 from ...

but this will fail:

select 100 + a * 1000 + b from ...

because the parser groups it as:

(100 + a * 1000) + b

In this form it won't match any index key. Another case is when we create
index on (a+b) and then make query like 'select b+a ...' or '... where b+a =
smth' -- it won't match. This applies to regular index scan too. Probably it
worth to discuss the way to normalize index expressions and clauses and work
out more convenient way to match them.

pg_operator.oprcommutative ?

Anyway, I will be grateful if you take a look at the patch in attachment.
Any comments and tips are welcome.

Thanks!

--
Ildar Musin
i.musin@postgrespro.ru

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#4Alexander Korotkov
a.korotkov@postgrespro.ru
In reply to: Oleg Bartunov (#3)
Re: Index Onlys Scan for expressions

On Tue, Aug 16, 2016 at 9:09 AM, Oleg Bartunov <obartunov@gmail.com> wrote:

On Tue, Aug 16, 2016 at 1:03 AM, Ildar Musin <i.musin@postgrespro.ru>
wrote:

Hi, hackers!

There is a known issue that index only scan (IOS) can only work with

simple

index keys based on single attributes and doesn't work with index
expressions. In this patch I propose a solution that adds support of IOS

for

index expressions. Here's an example:

create table abc(a int, b int, c int);
create index on abc ((a * 1000 + b), c);

with t1 as (select generate_series(1, 1000) as x),
t2 as (select generate_series(0, 999) as x)
insert into abc(a, b, c)
select t1.x, t2.x, t2.x from t1, t2;
vacuum analyze;

Explain results with the patch:

explain (analyze, buffers) select a * 1000 + b + c from abc where a *

1000 +

b = 1001;
QUERY PLAN
------------------------------------------------------------

-------------------------------------------------------------

Index Only Scan using abc_expr_c_idx on abc (cost=0.42..4.45 rows=1
width=4) (actual time=0.032..0.033 rows=1 loops=1)
Index Cond: ((((a * 1000) + b)) = 1001)
Heap Fetches: 0
Buffers: shared hit=4
Planning time: 0.184 ms
Execution time: 0.077 ms
(6 rows)

Before the patch it was:

explain (analyze, buffers) select a * 1000 + b + c from abc where a *

1000 +

b = 1001;
QUERY PLAN
------------------------------------------------------------

--------------------------------------------------------

Index Scan using abc_expr_c_idx on abc (cost=0.42..8.45 rows=1 width=4)
(actual time=0.039..0.041 rows=1 loops=1)
Index Cond: (((a * 1000) + b) = 1001)
Buffers: shared hit=4
Planning time: 0.177 ms
Execution time: 0.088 ms
(5 rows)

This solution has limitations though: the restriction or the target
expression tree (or its part) must match exactly the index. E.g. this
expression will pass the check:

select a * 1000 + b + 100 from ...

but this will fail:

select 100 + a * 1000 + b from ...

because the parser groups it as:

(100 + a * 1000) + b

In this form it won't match any index key. Another case is when we create
index on (a+b) and then make query like 'select b+a ...' or '... where

b+a =

smth' -- it won't match. This applies to regular index scan too.

Probably it

worth to discuss the way to normalize index expressions and clauses and

work

out more convenient way to match them.

pg_operator.oprcommutative ?

Do you mean pg_operator.oprcom?
In these examples we're also lacking of pg_operator.oprassociative...
Another problem is computational complexity of such transformations.
AFAIR, few patches for making optimizer smarter with expressions were
already rejected because of this reason.
Also, even if we would have such transformation of expressions, floating
points types would be still problematic, because (a + b) + c = a + (b + c)
is not exactly true for them because of computational error.

------
Alexander Korotkov
Postgres Professional: http://www.postgrespro.com
The Russian Postgres Company

#5Robert Haas
robertmhaas@gmail.com
In reply to: Alexander Korotkov (#4)
Re: Index Onlys Scan for expressions

On Tue, Aug 16, 2016 at 2:43 AM, Alexander Korotkov
<a.korotkov@postgrespro.ru> wrote:g of pg_operator.oprassociative...

Another problem is computational complexity of such transformations. AFAIR,
few patches for making optimizer smarter with expressions were already
rejected because of this reason.

s/few/many/

Also, even if we would have such transformation of expressions, floating
points types would be still problematic, because (a + b) + c = a + (b + c)
is not exactly true for them because of computational error.

Right. I think matching expressions exactly is plenty good enough.

--
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#6Vladimir Sitnikov
sitnikov.vladimir@gmail.com
In reply to: Robert Haas (#5)
Re: Index Onlys Scan for expressions

Hi,

I've tried your indexonlypatch5.patch against REL9_6_BETA3.
Here are some results.

TL;DR:
1) <<where type=42 and upper(vc) like '%ABC%'>> does not support index-only
scan for index (type, upper(vc) varchar_pattern_ops).
3) <<(... where type=42 offset 0) where upper_vc like '%ABC%'>> does
trigger index-only scan. IOS reduces number of buffers from 977 to 17 and
that is impressive.

Can IOS be used for simple query like #1 as well?

Here are the details.

drop table vlsi;
create table vlsi(type numeric, vc varchar(500));
insert into vlsi(type,vc) select round(x/1000),
md5('||x)||md5('||x+1)||md5(''||x+2) from generate_series(1,1000000) as
s(x);
create index type_vc__vlsi on vlsi(type, upper(vc) varchar_pattern_ops);
vacuum analyze vlsi;

0) Smoke test (index only scan works when selecting indexed expression):

explain (analyze, buffers) select type, upper(vc) from vlsi where type=42;

Index Only Scan using type_vc__vlsi on vlsi (cost=0.55..67.97 rows=971
width=36) (actual time=0.012..0.212 rows=1000 loops=1)
Index Cond: (type = '42'::numeric)
Heap Fetches: 0
Buffers: shared hit=17
Planning time: 0.112 ms
Execution time: 0.272 ms

1) When trying to apply "like condition", index only scan does not work.
Note: "buffers hit" becomes 977 instead of 17.

explain (analyze, buffers) select type, upper(vc) from vlsi where type=42
and upper(vc) like '%ABC%';

Index Scan using type_vc__vlsi on vlsi (cost=0.55..1715.13 rows=20
width=36) (actual time=0.069..1.343 rows=23 loops=1)
Index Cond: (type = '42'::numeric)
Filter: (upper((vc)::text) ~~ '%ABC%'::text)
Rows Removed by Filter: 977
Buffers: shared hit=939
Planning time: 0.104 ms
Execution time: 1.358 ms

Mere "subquery" does not help: still no index-only scan

2) explain (analyze, buffers) select * from (select type, upper(vc)
upper_vc from vlsi where type=42) as x where upper_vc like '%ABC%';

Index Scan using type_vc__vlsi on vlsi (cost=0.55..1715.13 rows=20
width=36) (actual time=0.068..1.344 rows=23 loops=1)
Index Cond: (type = '42'::numeric)
Filter: (upper((vc)::text) ~~ '%ABC%'::text)
Rows Removed by Filter: 977
Buffers: shared hit=939
Planning time: 0.114 ms
Execution time: 1.357 ms

3) "offset 0" trick does help:
explain (analyze, buffers) select * from (select type, upper(vc) upper_vc
from vlsi where type=42 offset 0) as x where upper_vc like '%ABC%';

Subquery Scan on x (cost=0.55..80.11 rows=39 width=36) (actual
time=0.033..0.488 rows=23 loops=1)
Filter: (x.upper_vc ~~ '%ABC%'::text)
Rows Removed by Filter: 977
Buffers: shared hit=17
-> Index Only Scan using type_vc__vlsi on vlsi (cost=0.55..67.97
rows=971 width=36) (actual time=0.015..0.210 rows=1000 loops=1)
Index Cond: (type = '42'::numeric)
Heap Fetches: 0
Buffers: shared hit=17
Planning time: 0.086 ms
Execution time: 0.503 ms

Vladimir

#7Ildar Musin
i.musin@postgrespro.ru
In reply to: Vladimir Sitnikov (#6)
Re: Index Onlys Scan for expressions

Hi Vladimir,

On 23.08.2016 23:35, Vladimir Sitnikov wrote:

Hi,

I've tried your indexonlypatch5.patch against REL9_6_BETA3.
Here are some results.

TL;DR:
1) <<where type=42 and upper(vc) like '%ABC%'>> does not support
index-only scan for index (type, upper(vc) varchar_pattern_ops).
3) <<(... where type=42 offset 0) where upper_vc like '%ABC%'>> does
trigger index-only scan. IOS reduces number of buffers from 977 to 17
and that is impressive.

Can IOS be used for simple query like #1 as well?

Thanks for checking out the patch. Sorry for the delayed reply.

Here are the details.

drop table vlsi;
create table vlsi(type numeric, vc varchar(500));
insert into vlsi(type,vc) select round(x/1000),
md5('||x)||md5('||x+1)||md5(''||x+2) from generate_series(1,1000000) as
s(x);
create index type_vc__vlsi on vlsi(type, upper(vc) varchar_pattern_ops);
vacuum analyze vlsi;

0) Smoke test (index only scan works when selecting indexed expression):

explain (analyze, buffers) select type, upper(vc) from vlsi where type=42;

Index Only Scan using type_vc__vlsi on vlsi (cost=0.55..67.97 rows=971
width=36) (actual time=0.012..0.212 rows=1000 loops=1)
Index Cond: (type = '42'::numeric)
Heap Fetches: 0
Buffers: shared hit=17
Planning time: 0.112 ms
Execution time: 0.272 ms

1) When trying to apply "like condition", index only scan does not work.
Note: "buffers hit" becomes 977 instead of 17.

explain (analyze, buffers) select type, upper(vc) from vlsi where
type=42 and upper(vc) like '%ABC%';

Index Scan using type_vc__vlsi on vlsi (cost=0.55..1715.13 rows=20
width=36) (actual time=0.069..1.343 rows=23 loops=1)
Index Cond: (type = '42'::numeric)
Filter: (upper((vc)::text) ~~ '%ABC%'::text)
Rows Removed by Filter: 977
Buffers: shared hit=939
Planning time: 0.104 ms
Execution time: 1.358 ms

The reason why this doesn't work is that '~~' operator (which is a
synonym for 'like') isn't supported by operator class for btree. Since
the only operators supported by btree are <, <=, =, >=, >, you can use
it with queries like:

explain (analyze, buffers) select type, upper(vc) from vlsi where
type=42 and upper(vc) ~~ 'ABC%';
QUERY PLAN

-----------------------------------------------------------------------------------------------------------------------------
Index Only Scan using type_vc__vlsi on vlsi (cost=0.55..4.58 rows=1
width=36) (actual time=0.021..0.021 rows=0 loops=1)
Index Cond: ((type = '42'::numeric) AND ((upper((vc)::text)) ~>=~
'ABC'::text) AND ((upper((vc)::text)) ~<~ 'ABD'::text))
Filter: ((upper((vc)::text)) ~~ 'ABC%'::text)
Heap Fetches: 0
Buffers: shared hit=4
Planning time: 0.214 ms
Execution time: 0.044 ms
(7 rows)

In case of fixed prefix postgres implicitly substitutes '~~' operator
with two range operators:

((upper((vc)::text)) ~>=~ 'ABC'::text) AND ((upper((vc)::text)) ~<~
'ABD'::text)

so that you can use these conditions to lookup in btree.

Mere "subquery" does not help: still no index-only scan

2) explain (analyze, buffers) select * from (select type, upper(vc)
upper_vc from vlsi where type=42) as x where upper_vc like '%ABC%';

Index Scan using type_vc__vlsi on vlsi (cost=0.55..1715.13 rows=20
width=36) (actual time=0.068..1.344 rows=23 loops=1)
Index Cond: (type = '42'::numeric)
Filter: (upper((vc)::text) ~~ '%ABC%'::text)
Rows Removed by Filter: 977
Buffers: shared hit=939
Planning time: 0.114 ms
Execution time: 1.357 ms

3) "offset 0" trick does help:
explain (analyze, buffers) select * from (select type, upper(vc)
upper_vc from vlsi where type=42 offset 0) as x where upper_vc like '%ABC%';

Subquery Scan on x (cost=0.55..80.11 rows=39 width=36) (actual
time=0.033..0.488 rows=23 loops=1)
Filter: (x.upper_vc ~~ '%ABC%'::text)
Rows Removed by Filter: 977
Buffers: shared hit=17
-> Index Only Scan using type_vc__vlsi on vlsi (cost=0.55..67.97
rows=971 width=36) (actual time=0.015..0.210 rows=1000 loops=1)
Index Cond: (type = '42'::numeric)
Heap Fetches: 0
Buffers: shared hit=17
Planning time: 0.086 ms
Execution time: 0.503 ms

Vladimir

I debugged the last two queries to figure out the difference between
them. It turned out that that the query 2) transforms to the same as
query 1). And in 3rd query 'OFFSET' statement prevents rewriter from
transforming the query, so it is possible to use index only scan on
subquery and then filter the result of subquery with '~~' operator.

--
Ildar Musin
i.musin@postgrespro.ru

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#8Tomas Vondra
tomas.vondra@2ndquadrant.com
In reply to: Ildar Musin (#1)
1 attachment(s)
Re: Index Onlys Scan for expressions

Hi Ildar,

I've looked at this patch again today to do a bit more thorough review,
and I think it's fine. There are a few comments (particularly in the new
code in check_index_only) that need improving, and also a few small
tweaks in the walkers.

Attached is a modified v5 patch with the proposed changes - it's
probably easier than explaining what the changes should/might be.

I've added an XXX comment in check_index_only_expr_walker - ISTM we're
first explicitly matching Vars to index attributes, and then dealing
with expressions. But we loop over all index columns (including those
that can only really match Vars). Not sure if it makes any difference,
but is it worth differentiating between Var and non-Var expressions? Why
not to simply call match_index_to_operand() in both cases?

I've also tweaked a few comments to match project code style, and moved
a few variables into the block where they are used. But the latter is
probably matter of personal taste, I guess.

regards

--
Tomas Vondra http://www.2ndQuadrant.com
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services

Attachments:

indexonlyscan5-tomas.patchbinary/octet-stream; name=indexonlyscan5-tomas.patchDownload
diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c
index 2952bfb..6abeda1 100644
--- a/src/backend/optimizer/path/indxpath.c
+++ b/src/backend/optimizer/path/indxpath.c
@@ -25,6 +25,7 @@
 #include "catalog/pg_opfamily.h"
 #include "catalog/pg_type.h"
 #include "nodes/makefuncs.h"
+#include "nodes/nodeFuncs.h"
 #include "optimizer/clauses.h"
 #include "optimizer/cost.h"
 #include "optimizer/pathnode.h"
@@ -130,7 +131,13 @@ static PathClauseUsage *classify_index_clause_usage(Path *path,
 static Relids get_bitmap_tree_required_outer(Path *bitmapqual);
 static void find_indexpath_quals(Path *bitmapqual, List **quals, List **preds);
 static int	find_list_position(Node *node, List **nodelist);
-static bool check_index_only(RelOptInfo *rel, IndexOptInfo *index);
+static bool check_index_only(PlannerInfo *root, RelOptInfo *rel, IndexOptInfo *index);
+static bool check_index_only_targetlist(PlannerInfo *root,
+							RelOptInfo *rel,
+							IndexOptInfo *index,
+							Bitmapset *index_canreturn_attrs);
+static bool check_index_only_clauses(List *clauses,
+						 IndexOptInfo *index);
 static double get_loop_count(PlannerInfo *root, Index cur_relid, Relids outer_relids);
 static double adjust_rowcount_for_semijoins(PlannerInfo *root,
 							  Index cur_relid,
@@ -1020,7 +1027,7 @@ build_index_paths(PlannerInfo *root, RelOptInfo *rel,
 	 * index data retrieval anyway.
 	 */
 	index_only_scan = (scantype != ST_BITMAPSCAN &&
-					   check_index_only(rel, index));
+					   check_index_only(root, rel, index));
 
 	/*
 	 * 4. Generate an indexscan path if there are relevant restriction clauses
@@ -1786,7 +1793,7 @@ find_list_position(Node *node, List **nodelist)
  *		Determine whether an index-only scan is possible for this index.
  */
 static bool
-check_index_only(RelOptInfo *rel, IndexOptInfo *index)
+check_index_only(PlannerInfo *root, RelOptInfo *rel, IndexOptInfo *index)
 {
 	bool		result;
 	Bitmapset  *attrs_used = NULL;
@@ -1837,8 +1844,10 @@ check_index_only(RelOptInfo *rel, IndexOptInfo *index)
 		int			attno = index->indexkeys[i];
 
 		/*
-		 * For the moment, we just ignore index expressions.  It might be nice
-		 * to do something with them, later.
+		 * We ignore expressions here and only look at plain attributes. Analyzing
+		 * expressions is also a bit more expensive, so we first try to match the
+		 * expressions to attributes indexed directly, and spend additional CPU time
+		 * on analyzing the expressions only if needed.
 		 */
 		if (attno == 0)
 			continue;
@@ -1852,12 +1861,206 @@ check_index_only(RelOptInfo *rel, IndexOptInfo *index)
 	/* Do we have all the necessary attributes? */
 	result = bms_is_subset(attrs_used, index_canreturn_attrs);
 
+	/*
+	 * If the directly indexed attributes are not sufficient, it's time to look
+	 * at index expressions and try to match them to targetlist and index clauses.
+	 */
+	if (!result && index->indexprs)
+		result =
+			check_index_only_targetlist(root, rel, index, index_canreturn_attrs) &&
+			check_index_only_clauses(index->indrestrictinfo, index);
+
 	bms_free(attrs_used);
 	bms_free(index_canreturn_attrs);
 
 	return result;
 }
 
+typedef struct check_index_only_walker_ctx
+{
+	IndexOptInfo   *index;
+	Bitmapset	   *index_canreturn_attrs;	/* attributes that can be returned
+											 * by index */
+	bool			match;					/* TRUE if expression matches
+											 * index */
+} check_index_only_walker_ctx;
+
+/*
+ * check_index_only_expr_walker
+ * 		Recursive walker that checks if each part of the expression can be
+ *		build with index expressions or attributes
+ */
+static bool
+check_index_only_expr_walker(Node *node, check_index_only_walker_ctx *ctx)
+{
+	int i;
+
+	/*
+	 * If node is empty or some subexpression has already reported that it
+	 * isn't match the index then quit looking any further
+	 */
+	if (node == NULL || ctx->match == false)
+		return false;
+
+	/* If this is a Var then check if it can be returned by the index */
+	if (IsA(node, Var))
+	{
+		Var *var = (Var *) node;
+
+		if (var->varno == ctx->index->rel->relid &&
+			!bms_is_member(var->varattno - FirstLowInvalidHeapAttributeNumber,
+						   ctx->index_canreturn_attrs))
+			ctx->match = false;
+		return false;
+	}
+
+	/*
+	 * Not a Var, so it has to be an expression. So we try to match it to
+	 * index expressions. If it matches, we're done with this subtree.
+	 *
+	 * XXX This also check the expression against indexed attributes, but
+	 * that seems a bit pointless as that can only match Vars, which we
+	 * already handled above. It's a fairly cheap check, but maybe on
+	 * indexes with many columns it might be noticeable (and only looking
+	 * at index->indexprs would help?).
+	 */
+	for (i = 0; i < ctx->index->ncolumns; i++)
+		if (match_index_to_operand(node, i, ctx->index))
+			return false;
+
+	/* Recursive check */
+	return expression_tree_walker(node, check_index_only_expr_walker, (void *) ctx);
+}
+
+/*
+ * check_index_only_targetlist
+ *		Checks that every target of index->relation can be returned by index
+ *
+ * In this function we're looking through root->parse->targetlist and pick
+ * those targets which contain index->relation's attributes. For each selected
+ * target we use recursive function check_index_only_expr_walker() to check if
+ * target can be build with the index expressions and attributes and none of
+ * the other index->relation's attributes
+ */
+static bool
+check_index_only_targetlist(PlannerInfo *root,
+							RelOptInfo *rel,
+							IndexOptInfo *index,
+							Bitmapset *index_canreturn_attrs)
+{
+	ListCell *lc;
+	bool	result = true;
+
+	/* Check expressions from targetlist */
+	foreach(lc, root->parse->targetList)
+	{
+		TargetEntry *te = (TargetEntry *) lfirst(lc);
+		Bitmapset *varnos = pull_varnos((Node *) te->expr);
+
+		/* Ignore targetlist entries not related to the indexed relation. */
+		if (bms_is_member(rel->relid, varnos))
+		{
+			bool		is_matched = false; /* assume the expression is not matched */
+			Bitmapset  *attrs = NULL;
+
+			pull_varattnos((Node *) te->expr, rel->relid, &attrs);
+
+			if (bms_is_subset(attrs, index_canreturn_attrs))
+				is_matched = true;
+			else
+			{
+				check_index_only_walker_ctx *ctx;
+
+				ctx = (check_index_only_walker_ctx *) palloc(sizeof(check_index_only_walker_ctx));
+				ctx->index = index;
+				ctx->match = true;
+				ctx->index_canreturn_attrs = index_canreturn_attrs;
+				check_index_only_expr_walker((Node *) te->expr, ctx);
+
+				if (ctx->match)
+					is_matched = true;
+
+				pfree(ctx);
+			}
+
+			bms_free(attrs);
+
+			/* if the expression is not matched, we don't need to look any further */
+			if (! is_matched)
+			{
+				result = false;
+				break;
+			}
+		}
+
+		bms_free(varnos);
+	}
+
+	return result;
+}
+
+/*
+ * check_index_only_clauses
+ *		Recursive function that checks that each clause matches the index
+ *
+ * clauses is a list of RestrictInfos or a BoolExpr containing RestrictInfos
+ */
+static bool
+check_index_only_clauses(List *clauses,
+						 IndexOptInfo *index)
+{
+	int 		indexcol;
+	ListCell   *lc;
+
+	foreach(lc, clauses)
+	{
+		bool	match = false;
+		Node   *node = (Node *) lfirst(lc);
+
+		/*
+		 * We expect here either a RestrictInfo or a boolean
+		 * expression containing other RestrictInfos
+		 */
+		if (IsA(node, RestrictInfo))
+		{
+			RestrictInfo *rinfo = (RestrictInfo *) node;
+
+			if (!restriction_is_or_clause(rinfo))
+			{
+				for (indexcol = 0; indexcol < index->ncolumns; indexcol++)
+				{
+					if (match_clause_to_indexcol(index, indexcol, rinfo))
+					{
+						match = true;
+						break;
+					}
+				}
+			}
+			else
+			{
+				List *subclauses = ((BoolExpr *) rinfo->orclause)->args;
+
+				match = check_index_only_clauses(subclauses, index);
+			}
+		}
+		else if (IsA(node, BoolExpr))
+		{
+			BoolExpr *boolexpr = (BoolExpr *) node;
+
+			match = check_index_only_clauses(boolexpr->args, index);
+		}
+
+		/*
+		 * If at least one restriction doesn't match index then return false
+		 */
+		if (!match)
+			return false;
+	}
+
+	/* If we got here then everything's fine */
+	return true;
+}
+
 /*
  * get_loop_count
  *		Choose the loop count estimate to use for costing a parameterized path
#9Vladimir Sitnikov
sitnikov.vladimir@gmail.com
In reply to: Ildar Musin (#7)
Re: Index Onlys Scan for expressions

Ildar>The reason why this doesn't work is that '~~' operator (which is a
Ildar>synonym for 'like') isn't supported by operator class for btree. Since
Ildar>the only operators supported by btree are <, <=, =, >=, >, you can use
Ildar>it with queries like:

Ildar>And in 3rd query 'OFFSET' statement prevents rewriter from
Ildar>transforming the query, so it is possible to use index only scan on
Ildar>subquery and then filter the result of subquery with '~~' operator.

I'm afraid I do not follow you.
Note: query 3 is 100% equivalent of query 2, however query 3 takes 55 times
less reads.
It looks like either an optimizer bug, or some missing feature in the
"index only scan" logic.

Here's quote from "query 2" (note % are at both ends): ... where type=42)
as x where upper_vc like '%ABC%';

Note: I do NOT use "indexed scan" for the like operator. I'm very well aware
that LIKE patterns with leading % cannot be optimized to a btree range scan.
What I want is "use the first indexed column as index scan, then use the
second column
for filtering".

As shown in "query 2" vs "query 3", PostgreSQL cannot come up with such a
plan on its own
for some reason.

This is not a theoretical issue, but it is something that I use a lot with
Oracle DB (it just creates a good plan for "query 2").

Vladimir

#10Ildar Musin
i.musin@postgrespro.ru
In reply to: Vladimir Sitnikov (#9)
Re: Index Onlys Scan for expressions

Hi Vladimir,

On 03.09.2016 19:31, Vladimir Sitnikov wrote:

Ildar>The reason why this doesn't work is that '~~' operator (which is a
Ildar>synonym for 'like') isn't supported by operator class for btree. Since
Ildar>the only operators supported by btree are <, <=, =, >=, >, you can use
Ildar>it with queries like:

Ildar>And in 3rd query 'OFFSET' statement prevents rewriter from
Ildar>transforming the query, so it is possible to use index only scan on
Ildar>subquery and then filter the result of subquery with '~~' operator.

I'm afraid I do not follow you.
Note: query 3 is 100% equivalent of query 2, however query 3 takes 55
times less reads.
It looks like either an optimizer bug, or some missing feature in the
"index only scan" logic.

Here's quote from "query 2" (note % are at both ends): ... where
type=42) as x where upper_vc like '%ABC%';

Note: I do NOT use "indexed scan" for the like operator. I'm very well aware
that LIKE patterns with leading % cannot be optimized to a btree range scan.
What I want is "use the first indexed column as index scan, then use the
second column
for filtering".

As shown in "query 2" vs "query 3", PostgreSQL cannot come up with such
a plan on its own
for some reason.

This is not a theoretical issue, but it is something that I use a lot
with Oracle DB (it just creates a good plan for "query 2").

Vladimir

Thanks, I get it now. The reason why it acts like this is that I used
match_clause_to_index() function to determine if IOS can be used with
the specified clauses. This function among other things checks if
operator matches the index opfamily. Apparently this isn't correct. I
wrote another prototype to test your case and it seems to work. But it's
not ready for public yet, I'll publish it in 1-2 days.

--
Ildar Musin
i.musin@postgrespro.ru

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#11Ildar Musin
i.musin@postgrespro.ru
In reply to: Tomas Vondra (#8)
Re: Index Onlys Scan for expressions

Hi Tomas,

On 03.09.2016 14:37, Tomas Vondra wrote:

Hi Ildar,

I've looked at this patch again today to do a bit more thorough review,
and I think it's fine. There are a few comments (particularly in the new
code in check_index_only) that need improving, and also a few small
tweaks in the walkers.

Attached is a modified v5 patch with the proposed changes - it's
probably easier than explaining what the changes should/might be.

I've added an XXX comment in check_index_only_expr_walker - ISTM we're
first explicitly matching Vars to index attributes, and then dealing
with expressions. But we loop over all index columns (including those
that can only really match Vars). Not sure if it makes any difference,
but is it worth differentiating between Var and non-Var expressions? Why
not to simply call match_index_to_operand() in both cases?

Thank you! Yes, you're right and probably it doesn't worth it. I
intended to optimize just a little bit since we already have attribute
numbers that can be returned by index. But as you have already noted in
comment it is a cheap check and it would barely be noticeable.

I've also tweaked a few comments to match project code style, and moved
a few variables into the block where they are used. But the latter is
probably matter of personal taste, I guess.

regards

Thanks for that, I have some difficulties in expressing myself in
english, so it was very helpful.

Best regards,
--
Ildar Musin
i.musin@postgrespro.ru

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#12Ildar Musin
i.musin@postgrespro.ru
In reply to: Ildar Musin (#10)
1 attachment(s)
Re: Index Onlys Scan for expressions

Hi Vladimir,

On 05.09.2016 16:38, Ildar Musin wrote:

Hi Vladimir,

On 03.09.2016 19:31, Vladimir Sitnikov wrote:

Ildar>The reason why this doesn't work is that '~~' operator (which is a
Ildar>synonym for 'like') isn't supported by operator class for btree.
Since
Ildar>the only operators supported by btree are <, <=, =, >=, >, you
can use
Ildar>it with queries like:

Ildar>And in 3rd query 'OFFSET' statement prevents rewriter from
Ildar>transforming the query, so it is possible to use index only scan on
Ildar>subquery and then filter the result of subquery with '~~' operator.

I'm afraid I do not follow you.
Note: query 3 is 100% equivalent of query 2, however query 3 takes 55
times less reads.
It looks like either an optimizer bug, or some missing feature in the
"index only scan" logic.

Here's quote from "query 2" (note % are at both ends): ... where
type=42) as x where upper_vc like '%ABC%';

Note: I do NOT use "indexed scan" for the like operator. I'm very well
aware
that LIKE patterns with leading % cannot be optimized to a btree range
scan.
What I want is "use the first indexed column as index scan, then use the
second column
for filtering".

As shown in "query 2" vs "query 3", PostgreSQL cannot come up with such
a plan on its own
for some reason.

This is not a theoretical issue, but it is something that I use a lot
with Oracle DB (it just creates a good plan for "query 2").

Vladimir

Thanks, I get it now. The reason why it acts like this is that I used
match_clause_to_index() function to determine if IOS can be used with
the specified clauses. This function among other things checks if
operator matches the index opfamily. Apparently this isn't correct. I
wrote another prototype to test your case and it seems to work. But it's
not ready for public yet, I'll publish it in 1-2 days.

Here is a new patch version. I modified check_index_only_clauses() so
that it doesn't use match_clause_to_indexcol() anymore. Instead it
handles different types of expressions including binary operator
expressions, scalar array expressions, row compare expressions (e.g.
(a,b)<(1,2)) and null tests and tries to match each part of expression
to index regardless an operator. I reproduced your example and was able
to get index only scan on all queries. Could you please try the patch
and tell if it works for you?

--
Ildar Musin
i.musin@postgrespro.ru

Attachments:

indexonlyscan6.patchtext/x-patch; name=indexonlyscan6.patchDownload
diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c
index 2952bfb..e81f914 100644
--- a/src/backend/optimizer/path/indxpath.c
+++ b/src/backend/optimizer/path/indxpath.c
@@ -25,6 +25,7 @@
 #include "catalog/pg_opfamily.h"
 #include "catalog/pg_type.h"
 #include "nodes/makefuncs.h"
+#include "nodes/nodeFuncs.h"
 #include "optimizer/clauses.h"
 #include "optimizer/cost.h"
 #include "optimizer/pathnode.h"
@@ -78,6 +79,14 @@ typedef struct
 	int			indexcol;		/* index column we want to match to */
 } ec_member_matches_arg;
 
+/* Context for index only expression checker */
+typedef struct
+{
+	IndexOptInfo   *index;
+	bool			match;					/* TRUE if expression matches
+											 * index */
+} check_index_only_walker_ctx;
+
 
 static void consider_index_join_clauses(PlannerInfo *root, RelOptInfo *rel,
 							IndexOptInfo *index,
@@ -130,7 +139,15 @@ static PathClauseUsage *classify_index_clause_usage(Path *path,
 static Relids get_bitmap_tree_required_outer(Path *bitmapqual);
 static void find_indexpath_quals(Path *bitmapqual, List **quals, List **preds);
 static int	find_list_position(Node *node, List **nodelist);
-static bool check_index_only(RelOptInfo *rel, IndexOptInfo *index);
+static bool check_index_only(PlannerInfo *root, RelOptInfo *rel, IndexOptInfo *index);
+static bool check_index_only_targetlist(PlannerInfo *root,
+							RelOptInfo *rel,
+							IndexOptInfo *index,
+							Bitmapset *index_canreturn_attrs);
+static bool check_index_only_clauses(List *clauses,
+						 IndexOptInfo *index);
+static bool check_index_only_expr_walker(Node *node, check_index_only_walker_ctx *ctx);
+static bool check_index_only_expr(Node *expr, IndexOptInfo *index);
 static double get_loop_count(PlannerInfo *root, Index cur_relid, Relids outer_relids);
 static double adjust_rowcount_for_semijoins(PlannerInfo *root,
 							  Index cur_relid,
@@ -1020,7 +1037,7 @@ build_index_paths(PlannerInfo *root, RelOptInfo *rel,
 	 * index data retrieval anyway.
 	 */
 	index_only_scan = (scantype != ST_BITMAPSCAN &&
-					   check_index_only(rel, index));
+					   check_index_only(root, rel, index));
 
 	/*
 	 * 4. Generate an indexscan path if there are relevant restriction clauses
@@ -1786,7 +1803,7 @@ find_list_position(Node *node, List **nodelist)
  *		Determine whether an index-only scan is possible for this index.
  */
 static bool
-check_index_only(RelOptInfo *rel, IndexOptInfo *index)
+check_index_only(PlannerInfo *root, RelOptInfo *rel, IndexOptInfo *index)
 {
 	bool		result;
 	Bitmapset  *attrs_used = NULL;
@@ -1837,8 +1854,10 @@ check_index_only(RelOptInfo *rel, IndexOptInfo *index)
 		int			attno = index->indexkeys[i];
 
 		/*
-		 * For the moment, we just ignore index expressions.  It might be nice
-		 * to do something with them, later.
+		 * We ignore expressions here and only look at plain attributes. Analyzing
+		 * expressions is also a bit more expensive, so we first try to match the
+		 * expressions to attributes indexed directly, and spend additional CPU time
+		 * on analyzing the expressions only if needed.
 		 */
 		if (attno == 0)
 			continue;
@@ -1852,6 +1871,15 @@ check_index_only(RelOptInfo *rel, IndexOptInfo *index)
 	/* Do we have all the necessary attributes? */
 	result = bms_is_subset(attrs_used, index_canreturn_attrs);
 
+	/*
+	 * If the directly indexed attributes are not sufficient, it's time to look
+	 * at index expressions and try to match them to targetlist and index clauses.
+	 */
+	if (!result && index->indexprs)
+		result =
+			check_index_only_targetlist(root, rel, index, index_canreturn_attrs) &&
+			check_index_only_clauses(index->indrestrictinfo, index);
+
 	bms_free(attrs_used);
 	bms_free(index_canreturn_attrs);
 
@@ -1859,6 +1887,225 @@ check_index_only(RelOptInfo *rel, IndexOptInfo *index)
 }
 
 /*
+ * check_index_only_expr_walker
+ * 		Recursive walker that checks if each part of the expression can be
+ *		build with index expressions or attributes
+ */
+static bool
+check_index_only_expr_walker(Node *node, check_index_only_walker_ctx *ctx)
+{
+	int		i;
+
+	/*
+	 * If node is empty or some subexpression has already reported that it
+	 * isn't match the index then quit looking any further
+	 */
+	if (node == NULL || ctx->match == false)
+		return false;
+
+	/*
+	 * Try to match current node to index expressions. If it matches, we're
+	 * done with this subtree
+	 */
+	for (i = 0; i < ctx->index->ncolumns; i++)
+		if (match_index_to_operand(node, i, ctx->index))
+			return false;
+
+	/*
+	 * If after all we still have an unresolved Var that doesn't match index
+	 * then we cannot use index only scan
+	 */
+	if (IsA(node, Var) && ((Var *) node)->varno == ctx->index->rel->relid)
+	{
+		ctx->match = false;
+		return false;
+	}
+
+	/* Recursive check */
+	return expression_tree_walker(node, check_index_only_expr_walker, (void *) ctx);
+}
+
+/*
+ * check_index_only_expr
+ *		Runs check_index_only_expr_walker() to make sure that expression could
+ *		be build with index expressions and attributes
+ */
+static bool
+check_index_only_expr(Node *expr, IndexOptInfo *index)
+{
+	bool	flag = false;
+	check_index_only_walker_ctx *ctx;
+
+	/* Don't bother to run walker for NULL expressions */
+	if (!expr)
+		return true;
+
+	ctx = (check_index_only_walker_ctx *) palloc(sizeof(check_index_only_walker_ctx));
+	ctx->index = index;
+	ctx->match = true;
+	check_index_only_expr_walker(expr, ctx);
+
+	if (ctx->match)
+		flag = true;
+
+	pfree(ctx);
+
+	return flag;
+}
+
+/*
+ * check_index_only_targetlist
+ *		Checks that every target of index->relation can be returned by index
+ *
+ * In this function we're looking through root->parse->targetlist and pick
+ * those targets which contain index->relation's attributes. For each selected
+ * target we use recursive function check_index_only_expr_walker() to check if
+ * target can be build with the index expressions and attributes and none of
+ * the other index->relation's attributes
+ */
+static bool
+check_index_only_targetlist(PlannerInfo *root,
+							RelOptInfo *rel,
+							IndexOptInfo *index,
+							Bitmapset *index_canreturn_attrs)
+{
+	ListCell *lc;
+	bool	result = true;
+
+	/* Check expressions from targetlist */
+	foreach(lc, root->parse->targetList)
+	{
+		TargetEntry *te = (TargetEntry *) lfirst(lc);
+		Bitmapset *varnos = pull_varnos((Node *) te->expr);
+
+		/* Ignore targetlist entries not related to the indexed relation. */
+		if (bms_is_member(rel->relid, varnos))
+		{
+			bool		is_matched = false; /* assume the expression is not matched */
+			Bitmapset  *attrs = NULL;
+
+			pull_varattnos((Node *) te->expr, rel->relid, &attrs);
+
+			if (bms_is_subset(attrs, index_canreturn_attrs) ||
+				check_index_only_expr((Node *) te->expr, index))
+			{
+				is_matched = true;
+			}
+
+			bms_free(attrs);
+
+			/* if the expression is not matched, we don't need to look any further */
+			if (! is_matched)
+			{
+				result = false;
+				break;
+			}
+		}
+
+		bms_free(varnos);
+	}
+
+	return result;
+}
+
+/*
+ * check_index_only_clauses
+ *		Recursive function that checks that each clause matches the index
+ *
+ * clauses is a list of RestrictInfos or a BoolExpr containing RestrictInfos
+ */
+static bool
+check_index_only_clauses(List *clauses,
+						 IndexOptInfo *index)
+{
+	ListCell   *lc;
+
+	foreach(lc, clauses)
+	{
+		bool	match = false;
+		Node   *node = (Node *) lfirst(lc);
+
+		/*
+		 * We expect here either a RestrictInfo or a boolean expression
+		 * containing other RestrictInfos
+		 */
+		if (IsA(node, RestrictInfo))
+		{
+			RestrictInfo *rinfo = (RestrictInfo *) node;
+			Expr		 *clause = rinfo->clause;
+
+			if (!restriction_is_or_clause(rinfo))
+			{
+				/*
+				 * Clause could be a binary opclause, ScalarArrayOpExpr,
+				 * RowCompareExpr or NullTest. We take left and right parts of
+				 * binary clauses or single argument of NullTest and make sure
+				 * that they match index. Unlike match_clause_to_indexcol()
+				 * we do not care here about operator family
+				 */
+				Node *left = NULL;
+				Node *right = NULL;
+
+				if (is_opclause(clause))
+				{
+					left = get_leftop(clause);
+					right = get_rightop(clause);
+				}
+				else if (IsA(clause, ScalarArrayOpExpr))
+				{
+					ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) clause;
+
+					left = (Node *) linitial(saop->args);
+					right = (Node *) lsecond(saop->args);
+				}
+				else if (IsA(clause, RowCompareExpr))
+				{
+					RowCompareExpr *rowexpr = (RowCompareExpr *) clause;
+
+					left = (Node *) rowexpr->largs;
+					right = (Node *) rowexpr->rargs;
+				}
+				else if (index->amsearchnulls && IsA(clause, NullTest))
+				{
+					NullTest   *nt = (NullTest *) clause;
+
+					left = (Node *) nt->arg;
+					/* Leave right part equal to NULL */
+				}
+
+				if (check_index_only_expr(left, index)
+					&& check_index_only_expr(right, index))
+				{
+					match = true;
+					break;
+				}
+			}
+			else
+			{
+				List *subclauses = ((BoolExpr *) rinfo->orclause)->args;
+
+				match = check_index_only_clauses(subclauses, index);
+			}
+		}
+		else if (IsA(node, BoolExpr))
+		{
+			BoolExpr *boolexpr = (BoolExpr *) node;
+
+			match = check_index_only_clauses(boolexpr->args, index);
+		}
+
+		/*
+		 * If at least one restriction doesn't match index then return false
+		 */
+		if (!match)
+			return false;
+	}
+
+	/* If we got here then everything's fine */
+	return true;
+}
+
+/*
  * get_loop_count
  *		Choose the loop count estimate to use for costing a parameterized path
  *		with the given set of outer relids.
#13Vladimir Sitnikov
sitnikov.vladimir@gmail.com
In reply to: Ildar Musin (#12)
Re: Index Onlys Scan for expressions

Ildar> Could you please try the patch and tell if it works for you?

I've tested patch6 against recent head. The patch applies with no problems.

The previous case (filter on top of i-o-s) is fixed. Great work.

Here are the test cases and results:
https://gist.github.com/vlsi/008e18e18b609fcaaec53d9cc210b7e2

However, it looks there are issues when accessing non-indexed columns.
The error is "ERROR: variable not found in subplan target list"
The case is 02_case2_fails.sql (see the gist link above)

The essence of the case is "create index on substr(vc, 1, 128)"
and assume that majority of the rows have length(vc)<128.
Under that conditions, it would be nice to do index-only-scan
and filter (like in my previous case), but detect "long" rows
and do additional recheck for them.

Vladimir

#14Robert Haas
robertmhaas@gmail.com
In reply to: Vladimir Sitnikov (#13)
Re: Index Onlys Scan for expressions

On Thu, Sep 8, 2016 at 2:58 PM, Vladimir Sitnikov
<sitnikov.vladimir@gmail.com> wrote:

Ildar> Could you please try the patch and tell if it works for you?

I've tested patch6 against recent head. The patch applies with no problems.

The previous case (filter on top of i-o-s) is fixed. Great work.

Here are the test cases and results:
https://gist.github.com/vlsi/008e18e18b609fcaaec53d9cc210b7e2

However, it looks there are issues when accessing non-indexed columns.
The error is "ERROR: variable not found in subplan target list"
The case is 02_case2_fails.sql (see the gist link above)

The essence of the case is "create index on substr(vc, 1, 128)"
and assume that majority of the rows have length(vc)<128.
Under that conditions, it would be nice to do index-only-scan
and filter (like in my previous case), but detect "long" rows
and do additional recheck for them.

Based on this report, this patch appears to have bugs that would
preclude committing it, so I'm marking it "Returned with Feedback" for
this CommitFest, which is due to end shortly. Ildar, please feel free
to resubmit once you've updated the patch.

FWIW, I think this is a good effort and hope to see it move forward.

--
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers