Allow to specify a index name as ANALYZE parameter

Started by Yugo Nagataover 7 years ago6 messages
#1Yugo Nagata
nagata@sraoss.co.jp
1 attachment(s)

Hi,

When we specify column names for ANALYZE, only the statistics for those columns
are collected. Similarly, is it useful if we have a option to specify an index
for ANALYZE to collect only the statistics for expression in the specified index?

A usecase I suppose is that when a new expression index is created and that
we need only the statistics for the new index. Although ANALYZE of all the indexes
is usually fast because ANALYZE uses a random sampling of the table rows, ANALYZE
on the specific index may be still useful if there are other index whose "statistics
target" is large and/or whose expression takes time to compute, for example.

Attached is the WIP patch to allow to specify a index name as ANALYZE parameter.
Any documatation is not yet included. I would appreciate any feedback!

Regards,

--
Yugo Nagata <nagata@sraoss.co.jp>

Attachments:

analyze_index_WIP.patchtext/x-diff; name=analyze_index_WIP.patchDownload
diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c
index 25194e8..058f242 100644
--- a/src/backend/commands/analyze.c
+++ b/src/backend/commands/analyze.c
@@ -84,7 +84,7 @@ static BufferAccessStrategy vac_strategy;
 static void do_analyze_rel(Relation onerel, int options,
 			   VacuumParams *params, List *va_cols,
 			   AcquireSampleRowsFunc acquirefunc, BlockNumber relpages,
-			   bool inh, bool in_outer_xact, int elevel);
+			   bool inh, bool in_outer_xact, int elevel, Oid indexid);
 static void compute_index_stats(Relation onerel, double totalrows,
 					AnlIndexData *indexdata, int nindexes,
 					HeapTuple *rows, int numrows,
@@ -121,6 +121,7 @@ analyze_rel(Oid relid, RangeVar *relation, int options,
 	AcquireSampleRowsFunc acquirefunc = NULL;
 	BlockNumber relpages = 0;
 	bool		rel_lock = true;
+	Oid			indexid = InvalidOid;
 
 	/* Select logging level */
 	if (options & VACOPT_VERBOSE)
@@ -253,6 +254,28 @@ analyze_rel(Oid relid, RangeVar *relation, int options,
 		/* Also get regular table's size */
 		relpages = RelationGetNumberOfBlocks(onerel);
 	}
+	else if (onerel->rd_rel->relkind == RELKIND_INDEX)
+	{
+		Relation rel;
+
+		indexid = relid;
+		relid = IndexGetRelation(indexid, true);
+
+		if (va_cols != NIL)
+			ereport(ERROR,
+					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+					 errmsg("ANALYZE option must be specified when a column list is provided")));
+
+		rel = try_relation_open(relid, ShareUpdateExclusiveLock);
+		relation_close(onerel, ShareUpdateExclusiveLock);
+
+		onerel = rel;
+
+		/* Regular table, so we'll use the regular row acquisition function */
+		acquirefunc = acquire_sample_rows;
+		/* Also get regular table's size */
+		relpages = RelationGetNumberOfBlocks(onerel);
+	}
 	else if (onerel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
 	{
 		/*
@@ -308,14 +331,14 @@ analyze_rel(Oid relid, RangeVar *relation, int options,
 	 */
 	if (onerel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
 		do_analyze_rel(onerel, options, params, va_cols, acquirefunc,
-					   relpages, false, in_outer_xact, elevel);
+					   relpages, false, in_outer_xact, elevel, indexid);
 
 	/*
 	 * If there are child tables, do recursive ANALYZE.
 	 */
 	if (onerel->rd_rel->relhassubclass)
 		do_analyze_rel(onerel, options, params, va_cols, acquirefunc, relpages,
-					   true, in_outer_xact, elevel);
+					   true, in_outer_xact, elevel, InvalidOid);
 
 	/*
 	 * Close source relation now, but keep lock so that no one deletes it
@@ -345,7 +368,7 @@ static void
 do_analyze_rel(Relation onerel, int options, VacuumParams *params,
 			   List *va_cols, AcquireSampleRowsFunc acquirefunc,
 			   BlockNumber relpages, bool inh, bool in_outer_xact,
-			   int elevel)
+			   int elevel, Oid indexid)
 {
 	int			attr_cnt,
 				tcnt,
@@ -445,7 +468,7 @@ do_analyze_rel(Relation onerel, int options, VacuumParams *params,
 		}
 		attr_cnt = tcnt;
 	}
-	else
+	else if (!OidIsValid(indexid))
 	{
 		attr_cnt = onerel->rd_att->natts;
 		vacattrstats = (VacAttrStats **)
@@ -459,6 +482,8 @@ do_analyze_rel(Relation onerel, int options, VacuumParams *params,
 		}
 		attr_cnt = tcnt;
 	}
+	else
+		attr_cnt = 0;
 
 	/*
 	 * Open all indexes of the relation, and see if there are any analyzable
@@ -468,7 +493,7 @@ do_analyze_rel(Relation onerel, int options, VacuumParams *params,
 	 * all.
 	 */
 	if (!inh)
-		vac_open_indexes(onerel, AccessShareLock, &nindexes, &Irel);
+		vac_open_indexes(onerel, AccessShareLock, &nindexes, &Irel, indexid);
 	else
 	{
 		Irel = NULL;
@@ -750,6 +775,7 @@ compute_index_stats(Relation onerel, double totalrows,
 	int			ind,
 				i;
 
+
 	ind_context = AllocSetContextCreate(anl_context,
 										"Analyze Index",
 										ALLOCSET_DEFAULT_SIZES);
diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c
index d90cb9a..b21811d 100644
--- a/src/backend/commands/vacuum.c
+++ b/src/backend/commands/vacuum.c
@@ -1606,7 +1606,7 @@ vacuum_rel(Oid relid, RangeVar *relation, int options, VacuumParams *params)
  */
 void
 vac_open_indexes(Relation relation, LOCKMODE lockmode,
-				 int *nindexes, Relation **Irel)
+				 int *nindexes, Relation **Irel, Oid idx)
 {
 	List	   *indexoidlist;
 	ListCell   *indexoidscan;
@@ -1614,7 +1614,10 @@ vac_open_indexes(Relation relation, LOCKMODE lockmode,
 
 	Assert(lockmode != NoLock);
 
-	indexoidlist = RelationGetIndexList(relation);
+	if (OidIsValid(idx))
+		indexoidlist = list_make1_oid(idx);
+	else
+		indexoidlist = RelationGetIndexList(relation);
 
 	/* allocate enough memory for all indexes */
 	i = list_length(indexoidlist);
diff --git a/src/backend/commands/vacuumlazy.c b/src/backend/commands/vacuumlazy.c
index 5649a70..40e0a31 100644
--- a/src/backend/commands/vacuumlazy.c
+++ b/src/backend/commands/vacuumlazy.c
@@ -258,7 +258,7 @@ lazy_vacuum_rel(Relation onerel, int options, VacuumParams *params,
 	vacrelstats->lock_waiter_detected = false;
 
 	/* Open all indexes of the relation */
-	vac_open_indexes(onerel, RowExclusiveLock, &nindexes, &Irel);
+	vac_open_indexes(onerel, RowExclusiveLock, &nindexes, &Irel, InvalidOid);
 	vacrelstats->hasindex = (nindexes > 0);
 
 	/* Do the vacuuming */
diff --git a/src/include/commands/vacuum.h b/src/include/commands/vacuum.h
index 85d472f..00719d5 100644
--- a/src/include/commands/vacuum.h
+++ b/src/include/commands/vacuum.h
@@ -160,7 +160,7 @@ extern void ExecVacuum(VacuumStmt *vacstmt, bool isTopLevel);
 extern void vacuum(int options, List *relations, VacuumParams *params,
 	   BufferAccessStrategy bstrategy, bool isTopLevel);
 extern void vac_open_indexes(Relation relation, LOCKMODE lockmode,
-				 int *nindexes, Relation **Irel);
+				 int *nindexes, Relation **Irel, Oid idx);
 extern void vac_close_indexes(int nindexes, Relation *Irel, LOCKMODE lockmode);
 extern double vac_estimate_reltuples(Relation relation,
 					   BlockNumber total_pages,
#2Alexander Korotkov
a.korotkov@postgrespro.ru
In reply to: Yugo Nagata (#1)
Re: Allow to specify a index name as ANALYZE parameter

Hi!

On Wed, Jul 11, 2018 at 12:04 PM Yugo Nagata <nagata@sraoss.co.jp> wrote:

When we specify column names for ANALYZE, only the statistics for those columns
are collected. Similarly, is it useful if we have a option to specify an index
for ANALYZE to collect only the statistics for expression in the specified index?

A usecase I suppose is that when a new expression index is created and that
we need only the statistics for the new index. Although ANALYZE of all the indexes
is usually fast because ANALYZE uses a random sampling of the table rows, ANALYZE
on the specific index may be still useful if there are other index whose "statistics
target" is large and/or whose expression takes time to compute, for example.

Attached is the WIP patch to allow to specify a index name as ANALYZE parameter.
Any documatation is not yet included. I would appreciate any feedback!

I think this makes sense. Once we can collect statistics individually
for regular columns, we should be able to do the same for indexed
expression. Please, register this patch on the next commitfest.

Regarding current implementation I found message "ANALYZE option must
be specified when a column list is provided" to be confusing.
Perhaps, you've missed some negation in this message, since in fact
you disallow analyze with column list.

However, since multicolumn index may contain multiple expression, I
think we should support specifying columns for ANALYZE index clause.
We could support specifying columns by their numbers in the same way
we did for "ALTER INDEX index ALTER COLUMN colnum SET STATISTICS
number".

------
Alexander Korotkov
Postgres Professional: http://www.postgrespro.com
The Russian Postgres Company

#3Yugo Nagata
nagata@sraoss.co.jp
In reply to: Alexander Korotkov (#2)
Re: Allow to specify a index name as ANALYZE parameter

On Wed, 11 Jul 2018 14:26:03 +0300
Alexander Korotkov <a.korotkov@postgrespro.ru> wrote:

On Wed, Jul 11, 2018 at 12:04 PM Yugo Nagata <nagata@sraoss.co.jp> wrote:

When we specify column names for ANALYZE, only the statistics for those columns
are collected. Similarly, is it useful if we have a option to specify an index
for ANALYZE to collect only the statistics for expression in the specified index?

A usecase I suppose is that when a new expression index is created and that
we need only the statistics for the new index. Although ANALYZE of all the indexes
is usually fast because ANALYZE uses a random sampling of the table rows, ANALYZE
on the specific index may be still useful if there are other index whose "statistics
target" is large and/or whose expression takes time to compute, for example.

Attached is the WIP patch to allow to specify a index name as ANALYZE parameter.
Any documatation is not yet included. I would appreciate any feedback!

I think this makes sense. Once we can collect statistics individually
for regular columns, we should be able to do the same for indexed
expression. Please, register this patch on the next commitfest.

Thank you for your comment! I registered this to CF 2018-09.

Regarding current implementation I found message "ANALYZE option must
be specified when a column list is provided" to be confusing.
Perhaps, you've missed some negation in this message, since in fact
you disallow analyze with column list.

However, since multicolumn index may contain multiple expression, I
think we should support specifying columns for ANALYZE index clause.
We could support specifying columns by their numbers in the same way
we did for "ALTER INDEX index ALTER COLUMN colnum SET STATISTICS
number".

Make sense. I'll fix the patch to support specifying columns of index.

Thanks,

--
Yugo Nagata <nagata@sraoss.co.jp>

#4Vik Fearing
vik.fearing@2ndquadrant.com
In reply to: Yugo Nagata (#1)
Re: Allow to specify a index name as ANALYZE parameter

On 11/07/18 11:04, Yugo Nagata wrote:

A usecase I suppose is that when a new expression index is created and that
we need only the statistics for the new index.

I wonder if this shouldn't just be done automatically.
--
Vik Fearing +33 6 46 75 15 36
http://2ndQuadrant.fr PostgreSQL : Expertise, Formation et Support

#5John Naylor
jcnaylor@gmail.com
In reply to: Yugo Nagata (#3)
Re: Allow to specify a index name as ANALYZE parameter

On 7/12/18, Yugo Nagata <nagata@sraoss.co.jp> wrote:

On Wed, 11 Jul 2018 14:26:03 +0300
Alexander Korotkov <a.korotkov@postgrespro.ru> wrote:

However, since multicolumn index may contain multiple expression, I
think we should support specifying columns for ANALYZE index clause.
We could support specifying columns by their numbers in the same way
we did for "ALTER INDEX index ALTER COLUMN colnum SET STATISTICS
number".

Make sense. I'll fix the patch to support specifying columns of index.

Hi,
I'm interested in this feature, so I've signed up to help review.
Given the above, I thought it appropriate to mark the patch Waiting on
Author.

-John Naylor

Show quoted text

Thanks,

--
Yugo Nagata <nagata@sraoss.co.jp>

#6Michael Paquier
michael@paquier.xyz
In reply to: John Naylor (#5)
Re: Allow to specify a index name as ANALYZE parameter

On Sun, Sep 09, 2018 at 01:25:49PM +0700, John Naylor wrote:

I'm interested in this feature, so I've signed up to help review.
Given the above, I thought it appropriate to mark the patch Waiting on
Author.

I find this feature interesting as well. The patch has been waiting on
author input for a couple of weeks unfortunately, so I am marking as
returned with feedback. Could you update the patch?
--
Michael