WIP: cross column correlation, 2nd shot
hello,
we (zoli, ants and myself) have put some more effort into our struggle to make cross column estimates better and more reasonable.
this is supposed to fix problems related to columns which are not "independent" (such as "time of the year" and temperature and so on).
this patch is far from ready but does some nice and useful stuff already:
Here's the cross-col patch against todays master branch. Also, a data
generator that creates a few data distributions for testing. Just run
it on an empty db, it will create 3 tables, expdist, normdist and
weigheddist. A good example would be:
EXPLAIN ANALYZE SELECT * FROM weigheddist WHERE a = 'a' AND b = 3; --
estimates 1 rows, actual 0
DROP CROSS COLUMN STATISTICS ON TABLE weigheddist (a, b);
EXPLAIN ANALYZE SELECT * FROM weigheddist WHERE a = 'a' AND b = 3; --
estimates 161k rows
Some notes:
* Currently only exact equality operators on one table are supported.
* The operators = ANY (ie blah IN (1,2,3)), != and IS NULL would be
quite simple to estimate with the current design.
* Range ops (var BETWEEN c1 AND c2) would require decent
multi-dimensional histogram construction. The previous MCV based
bucket selection mechanism created pretty useless histograms for cases
where there are more values than mcv buckets (should most cases where
range ops are useful). I found some papers with algorithms for good
multidimensional histogram construction.
* I'll look if there are any decent algorithms for "functional dependency"
stats (given tuples t1 and t2, calculate P(t1.b = t2.b|t1.a = t2.a)),
seems like a very useful stat to estimate joins.
* The stats catalog design seems top hacky, probably needs a good
rethink. Maybe a separate catalog for multi attribute stats. I'd like
to work out a decent roadmap where this feature is going before
redesigning it, otherwise multi table stats will require another
redesign.
* A decent set of real-worldish usecases would be really good to
double check if I'm on the right track.
one more thing:
ants has come up with the idea of making the stats collecting engine for more complicated multi-table-cross correlation plugable; we should ship a default collector but allow people to put in some other functionality if needed (could make sense for some not-so-easy set of data and some nasty corner cases). any thoughts on that one?
many thanks,
hans
Attachments:
cross-col-stats.v1.patchapplication/octet-stream; name=cross-col-stats.v1.patchDownload
diff --git a/src/backend/access/common/tupdesc.c b/src/backend/access/common/tupdesc.c
index 1f40b7c..5fe1710 100644
--- a/src/backend/access/common/tupdesc.c
+++ b/src/backend/access/common/tupdesc.c
@@ -339,8 +339,6 @@ equalTupleDescs(TupleDesc tupdesc1, TupleDesc tupdesc2)
return false;
if (attr1->atttypid != attr2->atttypid)
return false;
- if (attr1->attstattarget != attr2->attstattarget)
- return false;
if (attr1->attlen != attr2->attlen)
return false;
if (attr1->attndims != attr2->attndims)
@@ -473,7 +471,6 @@ TupleDescInitEntry(TupleDesc desc,
else
MemSet(NameStr(att->attname), 0, NAMEDATALEN);
- att->attstattarget = -1;
att->attcacheoff = -1;
att->atttypmod = typmod;
diff --git a/src/backend/access/nbtree/nbtcompare.c b/src/backend/access/nbtree/nbtcompare.c
index fedde93..c5a9474 100644
--- a/src/backend/access/nbtree/nbtcompare.c
+++ b/src/backend/access/nbtree/nbtcompare.c
@@ -308,6 +308,30 @@ btoidvectorcmp(PG_FUNCTION_ARGS)
}
Datum
+btint2vectorcmp(PG_FUNCTION_ARGS)
+{
+ int2vector *a = (int2vector *) PG_GETARG_POINTER(0);
+ int2vector *b = (int2vector *) PG_GETARG_POINTER(1);
+ int i;
+
+ /* We arbitrarily choose to sort first by vector length */
+ if (a->dim1 != b->dim1)
+ PG_RETURN_INT32(a->dim1 - b->dim1);
+
+ for (i = 0; i < a->dim1; i++)
+ {
+ if (a->values[i] != b->values[i])
+ {
+ if (a->values[i] > b->values[i])
+ PG_RETURN_INT32(1);
+ else
+ PG_RETURN_INT32(-1);
+ }
+ }
+ PG_RETURN_INT32(0);
+}
+
+Datum
btcharcmp(PG_FUNCTION_ARGS)
{
char a = PG_GETARG_CHAR(0);
diff --git a/src/backend/bootstrap/bootstrap.c b/src/backend/bootstrap/bootstrap.c
index e3ae92d..bde0285 100644
--- a/src/backend/bootstrap/bootstrap.c
+++ b/src/backend/bootstrap/bootstrap.c
@@ -737,7 +737,6 @@ DefineAttr(char *name, char *type, int attnum)
attrtypes[attnum]->attndims = 0;
}
- attrtypes[attnum]->attstattarget = -1;
attrtypes[attnum]->attcacheoff = -1;
attrtypes[attnum]->atttypmod = -1;
attrtypes[attnum]->attislocal = true;
diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c
index a8653cd..e3b17e8 100644
--- a/src/backend/catalog/heap.c
+++ b/src/backend/catalog/heap.c
@@ -126,37 +126,37 @@ static List *insert_ordered_unique_oid(List *list, Oid datum);
*/
static FormData_pg_attribute a1 = {
- 0, {"ctid"}, TIDOID, 0, sizeof(ItemPointerData),
+ 0, {"ctid"}, TIDOID, sizeof(ItemPointerData),
SelfItemPointerAttributeNumber, 0, -1, -1,
false, 'p', 's', true, false, false, true, 0
};
static FormData_pg_attribute a2 = {
- 0, {"oid"}, OIDOID, 0, sizeof(Oid),
+ 0, {"oid"}, OIDOID, sizeof(Oid),
ObjectIdAttributeNumber, 0, -1, -1,
true, 'p', 'i', true, false, false, true, 0
};
static FormData_pg_attribute a3 = {
- 0, {"xmin"}, XIDOID, 0, sizeof(TransactionId),
+ 0, {"xmin"}, XIDOID, sizeof(TransactionId),
MinTransactionIdAttributeNumber, 0, -1, -1,
true, 'p', 'i', true, false, false, true, 0
};
static FormData_pg_attribute a4 = {
- 0, {"cmin"}, CIDOID, 0, sizeof(CommandId),
+ 0, {"cmin"}, CIDOID, sizeof(CommandId),
MinCommandIdAttributeNumber, 0, -1, -1,
true, 'p', 'i', true, false, false, true, 0
};
static FormData_pg_attribute a5 = {
- 0, {"xmax"}, XIDOID, 0, sizeof(TransactionId),
+ 0, {"xmax"}, XIDOID, sizeof(TransactionId),
MaxTransactionIdAttributeNumber, 0, -1, -1,
true, 'p', 'i', true, false, false, true, 0
};
static FormData_pg_attribute a6 = {
- 0, {"cmax"}, CIDOID, 0, sizeof(CommandId),
+ 0, {"cmax"}, CIDOID, sizeof(CommandId),
MaxCommandIdAttributeNumber, 0, -1, -1,
true, 'p', 'i', true, false, false, true, 0
};
@@ -168,7 +168,7 @@ static FormData_pg_attribute a6 = {
* used in SQL.
*/
static FormData_pg_attribute a7 = {
- 0, {"tableoid"}, OIDOID, 0, sizeof(Oid),
+ 0, {"tableoid"}, OIDOID, sizeof(Oid),
TableOidAttributeNumber, 0, -1, -1,
true, 'p', 'i', true, false, false, true, 0
};
@@ -595,7 +595,6 @@ InsertPgAttributeTuple(Relation pg_attribute_rel,
values[Anum_pg_attribute_attrelid - 1] = ObjectIdGetDatum(new_attribute->attrelid);
values[Anum_pg_attribute_attname - 1] = NameGetDatum(&new_attribute->attname);
values[Anum_pg_attribute_atttypid - 1] = ObjectIdGetDatum(new_attribute->atttypid);
- values[Anum_pg_attribute_attstattarget - 1] = Int32GetDatum(new_attribute->attstattarget);
values[Anum_pg_attribute_attlen - 1] = Int16GetDatum(new_attribute->attlen);
values[Anum_pg_attribute_attnum - 1] = Int16GetDatum(new_attribute->attnum);
values[Anum_pg_attribute_attndims - 1] = Int32GetDatum(new_attribute->attndims);
@@ -667,12 +666,13 @@ AddNewAttributeTuples(Oid new_rel_oid,
attr = tupdesc->attrs[i];
/* Fill in the correct relation OID */
attr->attrelid = new_rel_oid;
- /* Make sure these are OK, too */
- attr->attstattarget = -1;
+ /* Make sure this is OK, too */
attr->attcacheoff = -1;
InsertPgAttributeTuple(rel, attr, indstate);
+ AddStatistics(new_rel_oid, &attr->attnum, 1, (oidinhcount > 0), -1);
+
/* Add dependency info */
myself.classId = RelationRelationId;
myself.objectId = new_rel_oid;
@@ -1490,9 +1490,6 @@ RemoveAttributeById(Oid relid, AttrNumber attnum)
/* Remove any NOT NULL constraint the column may have */
attStruct->attnotnull = false;
- /* We don't want to keep stats for it anymore */
- attStruct->attstattarget = 0;
-
/*
* Change the column name to something that isn't likely to conflict
*/
@@ -1514,8 +1511,9 @@ RemoveAttributeById(Oid relid, AttrNumber attnum)
heap_close(attr_rel, RowExclusiveLock);
+ /* Only drop pg_statistic entries for non system columns. */
if (attnum > 0)
- RemoveStatistics(relid, attnum);
+ RemoveStatistics(relid, &attnum, 1);
relation_close(rel, NoLock);
}
@@ -1740,7 +1738,7 @@ heap_drop_with_catalog(Oid relid)
/*
* delete statistics
*/
- RemoveStatistics(relid, 0);
+ RemoveStatistics(relid, NULL, 0);
/*
* delete attribute tuples
@@ -2542,19 +2540,214 @@ cookConstraint(ParseState *pstate,
/*
- * RemoveStatistics --- remove entries in pg_statistic for a rel or column
+ * AddStatistics --- add an entry in pg_statistic
+ *
+ * attnums - an ordered array of AttrNumbers
+ * n_attnum - number of elements in the array
+ * statistics_target - the sampling size for this statistics
+ *
+ * Entries in pg_statistic are used by the planner to collect selectivity values.
+ * This function is called when a new relation is created or a new column is added
+ * to a relation. It is therefore ensured that every column has an entry during the
+ * lifetime of the relation since its creation. There is one exception from under
+ * this rule: thus function is a no-op during bootstrapping to avoid a catch-22
+ * situation where a pg_statistic entry would be created when pg_statistic itself
+ * doesn't exist yet. pg_statistic entries for system tables will be created by
+ * ANALYZE as before. The entry is created as invalid (stavalid == false) and
+ * the histogram columns are NULLs. This will also be fixed by ANALYZE.
+ */
+void
+AddStatistics(Oid relid, AttrNumber *attnums, int n_attnums, bool inherited, int statistics_target)
+{
+ Relation rel;
+ ScanKeyData scanKey[2];
+ SysScanDesc scan;
+ int2vector *attnumvector;
+ HeapTuple tuple;
+ TupleDesc tupDesc;
+ Datum values[Natts_pg_statistic];
+ bool nulls[Natts_pg_statistic];
+ int i, j;
+
+ if (IsBootstrapProcessingMode())
+ return;
+
+ Assert(attnums != NULL);
+ Assert(n_attnums > 0);
+
+ attnumvector = buildint2vector(attnums, n_attnums);
+
+ rel = heap_open(StatisticRelationId, RowExclusiveLock);
+
+ ScanKeyInit(&scanKey[0],
+ Anum_pg_statistic_starelid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(relid));
+ ScanKeyInit(&scanKey[1],
+ Anum_pg_statistic_staattnums,
+ BTEqualStrategyNumber, F_ARRAY_EQ,
+ PointerGetDatum(attnumvector));
+
+ scan = systable_beginscan(rel, StatisticRelidAttnumsInhIndexId, true,
+ SnapshotNow, 2, scanKey);
+
+ tuple = systable_getnext(scan);
+ if (HeapTupleIsValid(tuple))
+ {
+ systable_endscan(scan);
+ elog(ERROR, "pg_statistic entry already exists for this table and set of columns");
+ }
+
+ systable_endscan(scan);
+
+ for (i = 0; i < Natts_pg_statistic; i++)
+ nulls[i] = true;
+
+ i = 0;
+ values[i] = ObjectIdGetDatum(relid); nulls[i++] = false; /* starelid */
+ values[i] = BoolGetDatum(inherited); nulls[i++] = false; /* stainherit */
+ values[i] = BoolGetDatum(false); nulls[i++] = false; /* stavalid */
+ values[i] = Int32GetDatum(statistics_target); nulls[i++] = false; /* statarget */
+ values[i] = Float4GetDatum(0); nulls[i++] = false; /* stanullfrac */
+ values[i] = Int32GetDatum(0); nulls[i++] = false; /* stawidth */
+ values[i] = Float4GetDatum(0); nulls[i++] = false; /* stadistinct */
+ for (j = 0; j < STATISTIC_NUM_SLOTS; j++)
+ {
+ values[i] = Int16GetDatum(0); nulls[i++] = false; /* stakindN */
+ }
+ for (j = 0; j < STATISTIC_NUM_SLOTS; j++)
+ {
+ values[i] = ObjectIdGetDatum(0); nulls[i++] = false; /* staopN */
+ }
+ values[i] = PointerGetDatum(attnumvector); nulls[i++] = false; /* stainherit */
+
+ tupDesc = RelationGetDescr(rel);
+
+ tuple = heap_form_tuple(tupDesc, values, nulls);
+
+ simple_heap_insert(rel, tuple);
+
+ CatalogUpdateIndexes(rel, tuple);
+
+ pfree(attnumvector);
+
+ relation_close(rel, RowExclusiveLock);
+}
+
+typedef struct invalidate_stats {
+ HeapTuple tuple;
+ struct invalidate_stats *next;
+} invalidate_stats;
+
+/*
+ * InvalidateStatistics --- invalidate all pg_statistic entries of this attnum
+ *
+ * We need to collect copies of tuples that need invalidating in order not to
+ * conflict with the system table scan.
+ */
+void
+InvalidateStatistics(Oid relid, AttrNumber attnum)
+{
+ Relation rel;
+ ScanKeyData scanKey;
+ SysScanDesc scan;
+ HeapTuple tuple;
+ TupleDesc tupDesc;
+ Form_pg_statistic stattuple;
+ invalidate_stats *tupptr;
+ invalidate_stats *tupptr_next;
+ int i;
+
+ if (IsBootstrapProcessingMode())
+ return;
+
+ rel = heap_open(StatisticRelationId, RowExclusiveLock);
+ tupDesc = RelationGetDescr(rel);
+
+ ScanKeyInit(&scanKey,
+ Anum_pg_statistic_starelid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(relid));
+
+ scan = systable_beginscan(rel, StatisticRelidAttnumsInhIndexId, true,
+ SnapshotNow, 1, &scanKey);
+
+
+ tupptr = tupptr_next = NULL;
+ while (HeapTupleIsValid(tuple = systable_getnext(scan)))
+ {
+ bool isnull;
+ Datum attnvec;
+ int2vector *attnumvector;
+
+ attnvec = heap_getattr(tuple, Anum_pg_statistic_staattnums, tupDesc, &isnull);
+
+ Assert(!isnull);
+
+ attnumvector = (int2vector *) DatumGetPointer(attnvec);
+
+ for (i = 0; i < attnumvector->dim1; i++)
+ {
+ if (attnumvector->values[i] == attnum)
+ {
+ invalidate_stats *tmp;
+
+
+ tmp = palloc(sizeof(invalidate_stats));
+ tmp->tuple = heap_copytuple(tuple);
+ tmp->next = NULL;
+
+ if (tupptr == NULL)
+ tupptr = tupptr_next = tmp;
+ else
+ {
+ tupptr_next->next = tmp;
+ tupptr_next = tmp;
+ }
+
+ break; /* find next tuple */
+ }
+ }
+ }
+
+ systable_endscan(scan);
+
+ while (tupptr)
+ {
+ stattuple = (Form_pg_statistic) GETSTRUCT(tupptr->tuple);
+
+ stattuple->stavalid = false;
+
+ simple_heap_update(rel, &tupptr->tuple->t_self, tupptr->tuple);
+
+ CatalogUpdateIndexes(rel, tupptr->tuple);
+
+ tupptr_next = tupptr->next;
+
+ heap_freetuple(tupptr->tuple);
+ pfree(tupptr);
+
+ tupptr = tupptr_next;
+ }
+
+ relation_close(rel, RowExclusiveLock);
+}
+
+/*
+ * RemoveStatistics --- remove entries in pg_statistic for a rel's set of columns
*
- * If attnum is zero, remove all entries for rel; else remove only the one(s)
- * for that column.
+ * If attnums is NULL, remove all entries for rel; else remove only the one
+ * for that set of column(s).
*/
void
-RemoveStatistics(Oid relid, AttrNumber attnum)
+RemoveStatistics(Oid relid, AttrNumber *attnums, int n_attnums)
{
Relation pgstatistic;
SysScanDesc scan;
ScanKeyData key[2];
int nkeys;
HeapTuple tuple;
+ int2vector *attnumvector = NULL;
pgstatistic = heap_open(StatisticRelationId, RowExclusiveLock);
@@ -2563,18 +2756,19 @@ RemoveStatistics(Oid relid, AttrNumber attnum)
BTEqualStrategyNumber, F_OIDEQ,
ObjectIdGetDatum(relid));
- if (attnum == 0)
+ if (attnums == NULL)
nkeys = 1;
else
{
+ attnumvector = buildint2vector(attnums, n_attnums);
ScanKeyInit(&key[1],
- Anum_pg_statistic_staattnum,
- BTEqualStrategyNumber, F_INT2EQ,
- Int16GetDatum(attnum));
+ Anum_pg_statistic_staattnums,
+ BTEqualStrategyNumber, F_INT2VECTOREQ,
+ PointerGetDatum(attnumvector));
nkeys = 2;
}
- scan = systable_beginscan(pgstatistic, StatisticRelidAttnumInhIndexId, true,
+ scan = systable_beginscan(pgstatistic, StatisticRelidAttnumsInhIndexId, true,
SnapshotNow, nkeys, key);
/* we must loop even when attnum != 0, in case of inherited stats */
@@ -2583,6 +2777,9 @@ RemoveStatistics(Oid relid, AttrNumber attnum)
systable_endscan(scan);
+ if (attnumvector)
+ pfree(attnumvector);
+
heap_close(pgstatistic, RowExclusiveLock);
}
diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c
index bfbe642..74cfa58 100644
--- a/src/backend/catalog/index.c
+++ b/src/backend/catalog/index.c
@@ -342,7 +342,6 @@ ConstructTupleDescriptor(Relation heapRelation,
*/
to->attnum = i + 1;
- to->attstattarget = -1;
to->attcacheoff = -1;
to->attnotnull = false;
to->atthasdef = false;
@@ -380,7 +379,6 @@ ConstructTupleDescriptor(Relation heapRelation,
to->attbyval = typeTup->typbyval;
to->attstorage = typeTup->typstorage;
to->attalign = typeTup->typalign;
- to->attstattarget = -1;
to->attcacheoff = -1;
to->atttypmod = -1;
to->attislocal = true;
@@ -1353,7 +1351,7 @@ index_drop(Oid indexId)
* them.
*/
if (hasexprs)
- RemoveStatistics(indexId, 0);
+ RemoveStatistics(indexId, NULL, 0);
/*
* fix ATTRIBUTE relation
diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql
index ab594eb..62c4ce3 100644
--- a/src/backend/catalog/system_views.sql
+++ b/src/backend/catalog/system_views.sql
@@ -166,7 +166,7 @@ CREATE VIEW pg_stats AS
WHEN stakind5 = 5 THEN stanumbers5
END AS elem_count_histogram
FROM pg_statistic s JOIN pg_class c ON (c.oid = s.starelid)
- JOIN pg_attribute a ON (c.oid = attrelid AND attnum = s.staattnum)
+ JOIN pg_attribute a ON (c.oid = attrelid AND array_length(s.staattnums, 1) = 1 AND attnum = s.staattnums[0])
LEFT JOIN pg_namespace n ON (n.oid = c.relnamespace)
WHERE NOT attisdropped AND has_column_privilege(c.oid, a.attnum, 'select');
diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c
index 9cd6e67..1ac9b5a 100644
--- a/src/backend/commands/analyze.c
+++ b/src/backend/commands/analyze.c
@@ -21,12 +21,15 @@
#include "access/tuptoaster.h"
#include "access/visibilitymap.h"
#include "access/xact.h"
+#include "catalog/heap.h"
#include "catalog/index.h"
#include "catalog/indexing.h"
+#include "catalog/namespace.h"
#include "catalog/pg_collation.h"
#include "catalog/pg_inherits_fn.h"
#include "catalog/pg_namespace.h"
#include "commands/dbcommands.h"
+#include "commands/defrem.h"
#include "commands/tablecmds.h"
#include "commands/vacuum.h"
#include "executor/executor.h"
@@ -42,7 +45,9 @@
#include "storage/procarray.h"
#include "utils/acl.h"
#include "utils/attoptcache.h"
+#include "utils/builtins.h"
#include "utils/datum.h"
+#include "utils/fmgroids.h"
#include "utils/guc.h"
#include "utils/lsyscache.h"
#include "utils/memutils.h"
@@ -69,8 +74,10 @@ typedef struct AnlIndexData
{
IndexInfo *indexInfo; /* BuildIndexInfo result */
double tupleFract; /* fraction of rows for partial index */
- VacAttrStats **vacattrstats; /* index attrs to analyze */
- int attr_cnt;
+ VacStats **vacstats; /* index attrs to analyze */
+ int stats_cnt; /* this many stats we need to compute - single and cross-column stats included */
+ int attr_cnt; /* the relation has attr_cnt number of columns */
+ bool has_xcol_stats;
} AnlIndexData;
@@ -94,8 +101,8 @@ static void compute_index_stats(Relation onerel, double totalrows,
AnlIndexData *indexdata, int nindexes,
HeapTuple *rows, int numrows,
MemoryContext col_context);
-static VacAttrStats *examine_attribute(Relation onerel, int attnum,
- Node *index_expr);
+static VacStats *examine_attribute(Relation onerel, int2vector *attnums, bool inh,
+ Node **index_expr);
static int acquire_sample_rows(Relation onerel, HeapTuple *rows,
int targrows, double *totalrows, double *totaldeadrows);
static double random_fract(void);
@@ -106,10 +113,16 @@ static int acquire_inherited_sample_rows(Relation onerel,
HeapTuple *rows, int targrows,
double *totalrows, double *totaldeadrows);
static void update_attstats(Oid relid, bool inh,
- int natts, VacAttrStats **vacattrstats);
-static Datum std_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull);
-static Datum ind_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull);
+ int natts, VacStats **vacstats);
+static Datum std_fetch_func(VacStatsP stats, int rownum, AttrNumber tupattnum,
+ bool *isNull);
+static Datum ind_fetch_func(VacStatsP stats, int rownum, AttrNumber tupattnum,
+ bool *isNull);
+static int ordered_findval(VacStatsP stats, int rownum, int index, AnalyzeAttrFetchFunc fetchfunc, void *arg);
+static int unordered_findval(VacStatsP stats, int rownum, int index, AnalyzeAttrFetchFunc fetchfunc, void *arg);
+static void compute_cross_column_stats(VacStatsP stats, AnalyzeAttrFetchFunc fetchfunc,
+ int numrows);
/*
* analyze_rel() -- analyze one relation
@@ -252,20 +265,78 @@ analyze_rel(Oid relid, VacuumStmt *vacstmt, BufferAccessStrategy bstrategy)
LWLockRelease(ProcArrayLock);
}
+static int
+get_extrastats_count(Oid relid, int2vector ***out_attnumvectors)
+{
+ Relation pgstatistic;
+ TupleDesc tupDesc;
+ SysScanDesc scan;
+ ScanKeyData key;
+ HeapTuple tuple;
+ int count = 0;
+ bool isnull;
+ int2vector *attnumvector = NULL;
+ int2vector **attnumvectors = NULL;
+
+ pgstatistic = heap_open(StatisticRelationId, RowExclusiveLock);
+ tupDesc = RelationGetDescr(pgstatistic);
+
+ ScanKeyInit(&key,
+ Anum_pg_statistic_starelid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(relid));
+
+ scan = systable_beginscan(pgstatistic, StatisticRelidAttnumsInhIndexId, true,
+ SnapshotNow, 1, &key);
+
+ while (HeapTupleIsValid(tuple = systable_getnext(scan)))
+ {
+ Datum attnvec = heap_getattr(tuple, Anum_pg_statistic_staattnums, tupDesc, &isnull);
+
+ Assert(!isnull);
+
+ attnumvector = (int2vector *) DatumGetPointer(attnvec);
+
+ if (attnumvector->dim1 > 1)
+ {
+ count++;
+ if (out_attnumvectors)
+ {
+ if (attnumvectors == NULL)
+ attnumvectors = palloc(count * sizeof(int2vector *));
+ else
+ attnumvectors = repalloc(attnumvectors, count * sizeof(int2vector *));
+ attnumvectors[count - 1] = (int2vector *)datumCopy(attnvec, false, -1);
+ }
+ }
+ }
+
+ systable_endscan(scan);
+
+ heap_close(pgstatistic, RowExclusiveLock);
+
+ if (out_attnumvectors)
+ *out_attnumvectors = attnumvectors;
+
+ return count;
+}
+
/*
* do_analyze_rel() -- analyze one relation, recursively or not
*/
static void
do_analyze_rel(Relation onerel, VacuumStmt *vacstmt, bool inh)
{
- int attr_cnt,
+ int stats_cnt,
+ extrastats_cnt,
tcnt,
i,
ind;
Relation *Irel;
int nindexes;
bool hasindex;
- VacAttrStats **vacattrstats;
+ VacStats **vacstats;
+ int2vector **extrastats_attnums;
AnlIndexData *indexdata;
int targrows,
numrows;
@@ -328,38 +399,75 @@ do_analyze_rel(Relation onerel, VacuumStmt *vacstmt, bool inh)
{
ListCell *le;
- vacattrstats = (VacAttrStats **) palloc(list_length(vacstmt->va_cols) *
- sizeof(VacAttrStats *));
+ vacstats = (VacStats **) palloc(list_length(vacstmt->va_cols) *
+ sizeof(VacStats *));
tcnt = 0;
foreach(le, vacstmt->va_cols)
{
char *col = strVal(lfirst(le));
+ AttrNumber attnum;
+ int2vector *attnums;
- i = attnameAttNum(onerel, col, false);
- if (i == InvalidAttrNumber)
+ attnum = attnameAttNum(onerel, col, false);
+ if (attnum == InvalidAttrNumber)
ereport(ERROR,
(errcode(ERRCODE_UNDEFINED_COLUMN),
errmsg("column \"%s\" of relation \"%s\" does not exist",
col, RelationGetRelationName(onerel))));
- vacattrstats[tcnt] = examine_attribute(onerel, i, NULL);
- if (vacattrstats[tcnt] != NULL)
+ attnums = buildint2vector(&attnum, 1);
+ vacstats[tcnt] = examine_attribute(onerel, attnums, inh, NULL);
+ if (vacstats[tcnt] != NULL)
tcnt++;
+ else
+ pfree(attnums);
}
- attr_cnt = tcnt;
+ stats_cnt = tcnt;
+ extrastats_cnt = 0;
}
else
{
- attr_cnt = onerel->rd_att->natts;
- vacattrstats = (VacAttrStats **)
- palloc(attr_cnt * sizeof(VacAttrStats *));
+ /*
+ * Process all attributes of the relation unconditionally
+ * as system tables may not have pg_statistic entries at
+ * this point, they will be created in update_attstats().
+ */
+ stats_cnt = onerel->rd_att->natts;
+
+ extrastats_cnt = get_extrastats_count(onerel->rd_id, &extrastats_attnums);
+
+ vacstats = (VacStats **)
+ palloc((stats_cnt + extrastats_cnt) * sizeof(VacStats *));
tcnt = 0;
- for (i = 1; i <= attr_cnt; i++)
+ for (i = 1; i <= stats_cnt; i++)
{
- vacattrstats[tcnt] = examine_attribute(onerel, i, NULL);
- if (vacattrstats[tcnt] != NULL)
+ AttrNumber attnum = i;
+ int2vector *attnums;
+
+ attnums = buildint2vector(&attnum, 1);
+
+ vacstats[tcnt] = examine_attribute(onerel, attnums, inh, NULL);
+ if (vacstats[tcnt] != NULL)
tcnt++;
+ else
+ pfree(attnums);
}
- attr_cnt = tcnt;
+
+ /* Now process the extra statistics */
+ if (extrastats_attnums)
+ {
+ for (i = 0; extrastats_attnums && i < extrastats_cnt; i++)
+ {
+ vacstats[tcnt] = examine_attribute(onerel, extrastats_attnums[i], inh, NULL);
+ if (vacstats[tcnt] != NULL)
+ tcnt++;
+ else
+ pfree(extrastats_attnums[i]);
+ }
+ pfree(extrastats_attnums);
+ extrastats_attnums = NULL;
+ }
+
+ stats_cnt = tcnt;
}
/*
@@ -388,13 +496,20 @@ do_analyze_rel(Relation onerel, VacuumStmt *vacstmt, bool inh)
thisdata->indexInfo = indexInfo = BuildIndexInfo(Irel[ind]);
thisdata->tupleFract = 1.0; /* fix later if partial */
+
+ tcnt = 0;
+ extrastats_cnt = get_extrastats_count(Irel[ind]->rd_id, &extrastats_attnums);
+
+ /*
+ * Process expression index single-attribute statistics.
+ */
if (indexInfo->ii_Expressions != NIL && vacstmt->va_cols == NIL)
{
ListCell *indexpr_item = list_head(indexInfo->ii_Expressions);
- thisdata->vacattrstats = (VacAttrStats **)
- palloc(indexInfo->ii_NumIndexAttrs * sizeof(VacAttrStats *));
- tcnt = 0;
+ thisdata->vacstats = (VacStats **)
+ palloc((indexInfo->ii_NumIndexAttrs + extrastats_cnt) * sizeof(VacStats *));
+
for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
{
int keycol = indexInfo->ii_KeyAttrNumbers[i];
@@ -403,19 +518,74 @@ do_analyze_rel(Relation onerel, VacuumStmt *vacstmt, bool inh)
{
/* Found an index expression */
Node *indexkey;
+ AttrNumber attnum;
+ int2vector *attnums;
if (indexpr_item == NULL) /* shouldn't happen */
elog(ERROR, "too few entries in indexprs list");
indexkey = (Node *) lfirst(indexpr_item);
indexpr_item = lnext(indexpr_item);
- thisdata->vacattrstats[tcnt] =
- examine_attribute(Irel[ind], i + 1, indexkey);
- if (thisdata->vacattrstats[tcnt] != NULL)
+
+ attnum = i + 1;
+ attnums = buildint2vector(&attnum, 1);
+
+ thisdata->vacstats[tcnt] =
+ examine_attribute(Irel[ind], attnums, inh, &indexkey);
+ if (thisdata->vacstats[tcnt] != NULL)
tcnt++;
+ else
+ pfree(attnums);
}
}
- thisdata->attr_cnt = tcnt;
+
}
+
+ /*
+ * Process cross-column statistics. There can only be at most one
+ * multi-column statistics for an index that covers every attributes.
+ */
+ if (extrastats_attnums && vacstmt->va_cols == NIL)
+ {
+ Node *indexkeys[STATISTIC_NUM_SLOTS];
+
+ if (thisdata->vacstats == NULL)
+ thisdata->vacstats = (VacStats **) palloc(sizeof(VacStats *));
+
+ /*
+ * Don't process indexes with too many attributes.
+ * CREATE EXTRA STATISTICS bails out for such a case
+ * but better safe than sorry.
+ */
+ if (indexInfo->ii_NumIndexAttrs <= STATISTIC_NUM_SLOTS)
+ {
+ ListCell *indexpr_item = list_head(indexInfo->ii_Expressions);
+
+ for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
+ {
+ int keycol = indexInfo->ii_KeyAttrNumbers[i];
+ if (keycol == 0)
+ {
+ indexkeys[i] = (Node *) lfirst(indexpr_item);
+ indexpr_item = lnext(indexpr_item);
+ }
+ else
+ indexkeys[i] = NULL;
+ }
+
+ thisdata->vacstats[tcnt] = examine_attribute(onerel, extrastats_attnums[0], inh, indexkeys);
+ if (thisdata->vacstats[tcnt] != NULL)
+ tcnt++;
+ else
+ pfree(extrastats_attnums[i]);
+ }
+ pfree(extrastats_attnums);
+ extrastats_attnums = NULL;
+
+ thisdata->has_xcol_stats = true;
+ }
+
+ thisdata->stats_cnt = tcnt;
+ thisdata->attr_cnt = indexInfo->ii_NumIndexAttrs;
}
}
@@ -426,19 +596,19 @@ do_analyze_rel(Relation onerel, VacuumStmt *vacstmt, bool inh)
* the target in the corner case where there are no analyzable columns.)
*/
targrows = 100;
- for (i = 0; i < attr_cnt; i++)
+ for (i = 0; i < stats_cnt; i++)
{
- if (targrows < vacattrstats[i]->minrows)
- targrows = vacattrstats[i]->minrows;
+ if (targrows < vacstats[i]->minrows)
+ targrows = vacstats[i]->minrows;
}
for (ind = 0; ind < nindexes; ind++)
{
AnlIndexData *thisdata = &indexdata[ind];
- for (i = 0; i < thisdata->attr_cnt; i++)
+ for (i = 0; i < thisdata->stats_cnt; i++)
{
- if (targrows < thisdata->vacattrstats[i]->minrows)
- targrows = thisdata->vacattrstats[i]->minrows;
+ if (targrows < thisdata->vacstats[i]->minrows)
+ targrows = thisdata->vacstats[i]->minrows;
}
}
@@ -456,7 +626,7 @@ do_analyze_rel(Relation onerel, VacuumStmt *vacstmt, bool inh)
/*
* Compute the statistics. Temporary results during the calculations for
* each column are stored in a child context. The calc routines are
- * responsible to make sure that whatever they store into the VacAttrStats
+ * responsible to make sure that whatever they store into the VacStats
* structure is allocated in anl_context.
*/
if (numrows > 0)
@@ -471,33 +641,40 @@ do_analyze_rel(Relation onerel, VacuumStmt *vacstmt, bool inh)
ALLOCSET_DEFAULT_MAXSIZE);
old_context = MemoryContextSwitchTo(col_context);
- for (i = 0; i < attr_cnt; i++)
+ for (i = 0; i < stats_cnt; i++)
{
- VacAttrStats *stats = vacattrstats[i];
+ VacStats *stats = vacstats[i];
AttributeOpts *aopt;
+ int j;
stats->rows = rows;
stats->tupDesc = onerel->rd_att;
- (*stats->compute_stats) (stats,
+
+ for (j = 0; j < stats->attnums->dim1; j++)
+ {
+ (*stats->statfuncs[j].compute_func_ptr) (stats,
+ j,
std_fetch_func,
numrows,
totalrows);
+ /*
+ * If the appropriate flavor of the n_distinct option is
+ * specified, override with the corresponding value.
+ */
+ aopt = get_attribute_options(onerel->rd_id, stats->attnums->values[j]);
+ if (aopt != NULL)
+ {
+ float8 n_distinct =
+ inh ? aopt->n_distinct_inherited : aopt->n_distinct;
- /*
- * If the appropriate flavor of the n_distinct option is
- * specified, override with the corresponding value.
- */
- aopt = get_attribute_options(onerel->rd_id, stats->attr->attnum);
- if (aopt != NULL)
- {
- float8 n_distinct;
-
- n_distinct = inh ? aopt->n_distinct_inherited : aopt->n_distinct;
- if (n_distinct != 0.0)
- stats->stadistinct = n_distinct;
+ if (n_distinct != 0.0)
+ stats->stadistinct[j] = n_distinct;
+ }
}
MemoryContextResetAndDeleteChildren(col_context);
+
+ compute_cross_column_stats(stats, std_fetch_func, numrows);
}
if (hasindex)
@@ -515,14 +692,17 @@ do_analyze_rel(Relation onerel, VacuumStmt *vacstmt, bool inh)
* pg_statistic for columns we didn't process, we leave them alone.)
*/
update_attstats(RelationGetRelid(onerel), inh,
- attr_cnt, vacattrstats);
+ stats_cnt, vacstats);
for (ind = 0; ind < nindexes; ind++)
{
AnlIndexData *thisdata = &indexdata[ind];
+ for (i = 0; i < thisdata->stats_cnt; i++)
+ compute_cross_column_stats(thisdata->vacstats[i], ind_fetch_func, numrows);
+
update_attstats(RelationGetRelid(Irel[ind]), false,
- thisdata->attr_cnt, thisdata->vacattrstats);
+ thisdata->stats_cnt, thisdata->vacstats);
}
}
@@ -632,6 +812,8 @@ compute_index_stats(Relation onerel, double totalrows,
old_context;
Datum values[INDEX_MAX_KEYS];
bool isnull[INDEX_MAX_KEYS];
+ bool typbyval[INDEX_MAX_KEYS];
+ int16 typlen[INDEX_MAX_KEYS];
int ind,
i;
@@ -646,7 +828,7 @@ compute_index_stats(Relation onerel, double totalrows,
{
AnlIndexData *thisdata = &indexdata[ind];
IndexInfo *indexInfo = thisdata->indexInfo;
- int attr_cnt = thisdata->attr_cnt;
+ int stats_cnt = thisdata->stats_cnt;
TupleTableSlot *slot;
EState *estate;
ExprContext *econtext;
@@ -657,11 +839,30 @@ compute_index_stats(Relation onerel, double totalrows,
tcnt,
rowno;
double totalindexrows;
+ ListCell *indexpr_item = list_head(indexInfo->ii_Expressions);
/* Ignore index if no columns to analyze and not partial */
- if (attr_cnt == 0 && indexInfo->ii_Predicate == NIL)
+ if (stats_cnt == 0 && indexInfo->ii_Predicate == NIL)
continue;
+ for (i = 0; i < thisdata->attr_cnt; i++)
+ {
+ int keycol = indexInfo->ii_KeyAttrNumbers[i];
+
+ if (keycol == 0)
+ {
+ Node *indexkey = (Node *) lfirst(indexpr_item);
+
+ get_typlenbyval(exprType(indexkey), &typlen[i], &typbyval[i]);
+ indexpr_item = lnext(indexpr_item);
+ }
+ else
+ {
+ typbyval[i] = onerel->rd_att->attrs[keycol - 1]->attbyval;
+ typlen[i] = onerel->rd_att->attrs[keycol - 1]->attlen;
+ }
+ }
+
/*
* Need an EState for evaluation of index expressions and
* partial-index predicates. Create it in the per-index context to be
@@ -681,8 +882,8 @@ compute_index_stats(Relation onerel, double totalrows,
estate);
/* Compute and save index expression values */
- exprvals = (Datum *) palloc(numrows * attr_cnt * sizeof(Datum));
- exprnulls = (bool *) palloc(numrows * attr_cnt * sizeof(bool));
+ exprvals = (Datum *) palloc(numrows * thisdata->attr_cnt * sizeof(Datum));
+ exprnulls = (bool *) palloc(numrows * thisdata->attr_cnt * sizeof(bool));
numindexrows = 0;
tcnt = 0;
for (rowno = 0; rowno < numrows; rowno++)
@@ -706,7 +907,7 @@ compute_index_stats(Relation onerel, double totalrows,
}
numindexrows++;
- if (attr_cnt > 0)
+ if (stats_cnt > 0)
{
/*
* Evaluate the index row to compute expression values. We
@@ -719,24 +920,18 @@ compute_index_stats(Relation onerel, double totalrows,
isnull);
/*
- * Save just the columns we care about. We copy the values
- * into ind_context from the estate's per-tuple context.
+ * We copy the values * into ind_context from the estate's per-tuple context.
*/
- for (i = 0; i < attr_cnt; i++)
+ for (i = 0; i < thisdata->attr_cnt; i++)
{
- VacAttrStats *stats = thisdata->vacattrstats[i];
- int attnum = stats->attr->attnum;
-
- if (isnull[attnum - 1])
+ if (isnull[i])
{
exprvals[tcnt] = (Datum) 0;
exprnulls[tcnt] = true;
}
else
{
- exprvals[tcnt] = datumCopy(values[attnum - 1],
- stats->attrtype->typbyval,
- stats->attrtype->typlen);
+ exprvals[tcnt] = datumCopy(values[i], typbyval[i], typlen[i]);
exprnulls[tcnt] = false;
}
tcnt++;
@@ -757,28 +952,37 @@ compute_index_stats(Relation onerel, double totalrows,
if (numindexrows > 0)
{
MemoryContextSwitchTo(col_context);
- for (i = 0; i < attr_cnt; i++)
+ for (i = 0; i < stats_cnt; i++)
{
- VacAttrStats *stats = thisdata->vacattrstats[i];
- AttributeOpts *aopt =
- get_attribute_options(stats->attr->attrelid,
- stats->attr->attnum);
-
- stats->exprvals = exprvals + i;
- stats->exprnulls = exprnulls + i;
- stats->rowstride = attr_cnt;
- (*stats->compute_stats) (stats,
- ind_fetch_func,
- numindexrows,
- totalindexrows);
+ VacStats *stats = thisdata->vacstats[i];
+ AttributeOpts *aopt;
+ int j;
- /*
- * If the n_distinct option is specified, it overrides the
- * above computation. For indices, we always use just
- * n_distinct, not n_distinct_inherited.
- */
- if (aopt != NULL && aopt->n_distinct != 0.0)
- stats->stadistinct = aopt->n_distinct;
+ for (j = 0; j < stats->attnums->dim1; j++)
+ {
+ if (stats->attnums->dim1 == 1)
+ aopt = get_attribute_options(stats->attrs[j]->attrelid,
+ stats->attrs[j]->attnum);
+ else
+ aopt = NULL;
+
+ stats->exprvals = exprvals;
+ stats->exprnulls = exprnulls;
+ stats->rowstride = thisdata->attr_cnt;
+ (*stats->statfuncs[j].compute_func_ptr) (stats,
+ j,
+ ind_fetch_func,
+ numindexrows,
+ totalindexrows);
+
+ /*
+ * If the n_distinct option is specified, it overrides the
+ * above computation. For indices, we always use just
+ * n_distinct, not n_distinct_inherited.
+ */
+ if (aopt != NULL && aopt->n_distinct != 0.0)
+ stats->stadistinct[j] = aopt->n_distinct;
+ }
MemoryContextResetAndDeleteChildren(col_context);
}
@@ -797,38 +1001,77 @@ compute_index_stats(Relation onerel, double totalrows,
}
/*
- * examine_attribute -- pre-analysis of a single column
+ * statistics_target -- returns pg_statistic.statarget
+ */
+static int4
+statistics_target(Oid relid, int2vector *attnums, bool inherited)
+{
+ HeapTuple tuple;
+ int4 statarget = -1; /* default */
+
+ tuple = SearchSysCache3(STATRELATTINH,
+ ObjectIdGetDatum(relid),
+ PointerGetDatum(attnums),
+ BoolGetDatum(inherited));
+ if (HeapTupleIsValid(tuple))
+ {
+ Form_pg_statistic stats = (Form_pg_statistic) GETSTRUCT(tuple);
+
+ statarget = stats->statarget;
+
+ ReleaseSysCache(tuple);
+ }
+
+ return statarget;
+}
+
+
+/*
+ * examine_attribute -- pre-analysis of a single column or a set of columns
*
* Determine whether the column is analyzable; if so, create and initialize
- * a VacAttrStats struct for it. If not, return NULL.
+ * a VacStats struct for it. If not, return NULL.
*
* If index_expr isn't NULL, then we're trying to analyze an expression index,
* and index_expr is the expression tree representing the column's data.
*/
-static VacAttrStats *
-examine_attribute(Relation onerel, int attnum, Node *index_expr)
+static VacStats *
+examine_attribute(Relation onerel, int2vector *attnums, bool inh, Node **index_exprs)
{
- Form_pg_attribute attr = onerel->rd_att->attrs[attnum - 1];
HeapTuple typtuple;
- VacAttrStats *stats;
- int i;
+ int4 statarget;
+ VacStats *stats;
+ int i, j;
bool ok;
- /* Never analyze dropped columns */
- if (attr->attisdropped)
- return NULL;
+ for (i = 0; i < attnums->dim1; i++)
+ {
+ Form_pg_attribute attr = onerel->rd_att->attrs[attnums->values[i] - 1];
+
+ /* Never analyze dropped columns */
+ if (attr->attisdropped)
+ return NULL;
+ }
/* Don't analyze column if user has specified not to */
- if (attr->attstattarget == 0)
+ statarget = statistics_target(onerel->rd_id, attnums, inh);
+ if (statarget == 0)
return NULL;
/*
- * Create the VacAttrStats struct. Note that we only have a copy of the
+ * Create the VacStats struct. Note that we only have a copy of the
* fixed fields of the pg_attribute tuple.
*/
- stats = (VacAttrStats *) palloc0(sizeof(VacAttrStats));
- stats->attr = (Form_pg_attribute) palloc(ATTRIBUTE_FIXED_PART_SIZE);
- memcpy(stats->attr, attr, ATTRIBUTE_FIXED_PART_SIZE);
+ stats = (VacStats *) palloc0(sizeof(VacStats));
+ for (i = 0; i < attnums->dim1; i++)
+ {
+ Form_pg_attribute attr = onerel->rd_att->attrs[attnums->values[i] - 1];
+ stats->attrs[i] = (Form_pg_attribute) palloc(ATTRIBUTE_FIXED_PART_SIZE);
+ memcpy(stats->attrs[i], attr, ATTRIBUTE_FIXED_PART_SIZE);
+ }
+
+ stats->attnums = attnums;
+ stats->statarget = statarget;
/*
* When analyzing an expression index, believe the expression tree's type
@@ -839,54 +1082,86 @@ examine_attribute(Relation onerel, int attnum, Node *index_expr)
* not a problem.) It's not clear whether anyone will care about the
* typmod, but we store that too just in case.
*/
- if (index_expr)
- {
- stats->attrtypid = exprType(index_expr);
- stats->attrtypmod = exprTypmod(index_expr);
- }
- else
+
+ for (i = 0; i < attnums->dim1; i++)
{
- stats->attrtypid = attr->atttypid;
- stats->attrtypmod = attr->atttypmod;
+ if (index_exprs && index_exprs[i])
+ {
+ stats->attrtypids[i] = exprType(index_exprs[i]);
+ stats->attrtypmods[i] = exprTypmod(index_exprs[i]);
+ }
+ else
+ {
+ stats->attrtypids[i] = stats->attrs[i]->atttypid;
+ stats->attrtypmods[i] = stats->attrs[i]->atttypmod;
+ }
+
+ typtuple = SearchSysCacheCopy1(TYPEOID,
+ ObjectIdGetDatum(stats->attrtypids[i]));
+ if (!HeapTupleIsValid(typtuple))
+ elog(ERROR, "cache lookup failed for type %u", stats->attrtypids[i]);
+ stats->attrtypes[i] = (Form_pg_type) GETSTRUCT(typtuple);
}
- typtuple = SearchSysCacheCopy1(TYPEOID,
- ObjectIdGetDatum(stats->attrtypid));
- if (!HeapTupleIsValid(typtuple))
- elog(ERROR, "cache lookup failed for type %u", stats->attrtypid);
- stats->attrtype = (Form_pg_type) GETSTRUCT(typtuple);
stats->anl_context = anl_context;
- stats->tupattnum = attnum;
/*
* The fields describing the stats->stavalues[n] element types default to
* the type of the data being analyzed, but the type-specific typanalyze
* function can change them if it wants to store something else.
*/
- for (i = 0; i < STATISTIC_NUM_SLOTS; i++)
+ for (i = 0; i < attnums->dim1; i++)
{
- stats->statypid[i] = stats->attrtypid;
- stats->statyplen[i] = stats->attrtype->typlen;
- stats->statypbyval[i] = stats->attrtype->typbyval;
- stats->statypalign[i] = stats->attrtype->typalign;
+ for (j = 0; j < STATISTIC_NUM_SLOTS; j++)
+ {
+ stats->statypid[i][j] = stats->attrtypids[i];
+ stats->statyplen[i][j] = stats->attrtypes[i]->typlen;
+ stats->statypbyval[i][j] = stats->attrtypes[i]->typbyval;
+ stats->statypalign[i][j] = stats->attrtypes[i]->typalign;
+ }
+
+ /*
+ * Call the type-specific typanalyze function. If none is specified, use
+ * std_typanalyze().
+ */
+ if (OidIsValid(stats->attrtypes[i]->typanalyze))
+ ok = DatumGetBool(OidFunctionCall2(stats->attrtypes[i]->typanalyze,
+ PointerGetDatum(stats),
+ Int32GetDatum(i)));
+ else
+ ok = std_typanalyze(stats, i);
+
+ if (!ok ||
+ stats->statfuncs[i].compute_func_ptr == NULL ||
+ /* intentionally don't check stats->statfuncs[i].findval_func_ptr == NULL */
+ stats->minrows <= 0)
+ {
+ for (; i >= 0; i--)
+ {
+ heap_freetuple((HeapTuple) stats->attrtypes[i]);
+ pfree(stats->attrs[i]);
+ }
+ pfree(stats);
+
+ return NULL;
+ }
}
/*
- * Call the type-specific typanalyze function. If none is specified, use
- * std_typanalyze().
+ * If we have a cross-column statistics, compute the cube size for
+ * the histogram so pg_statistic.statarget really means the number of
+ * cells in the multi-dimensional cube.
*/
- if (OidIsValid(stats->attrtype->typanalyze))
- ok = DatumGetBool(OidFunctionCall1(stats->attrtype->typanalyze,
- PointerGetDatum(stats)));
- else
- ok = std_typanalyze(stats);
-
- if (!ok || stats->compute_stats == NULL || stats->minrows <= 0)
+ if (stats->attnums->dim1 > 1)
{
- heap_freetuple(typtuple);
- pfree(stats->attr);
- pfree(stats);
- return NULL;
+ double x, y, n;
+
+ y = stats->statarget;
+ n = stats->attnums->dim1;
+ x = pow(M_E, (log(y) / n));
+
+ /* On some systems, ceil() returns surprising result. */
+ stats->statarget = trunc(x + 0.5);
}
return stats;
@@ -1560,7 +1835,7 @@ acquire_inherited_sample_rows(Relation onerel, HeapTuple *rows, int targrows,
* by taking a self-exclusive lock on the relation in analyze_rel().
*/
static void
-update_attstats(Oid relid, bool inh, int natts, VacAttrStats **vacattrstats)
+update_attstats(Oid relid, bool inh, int natts, VacStats **vacstats)
{
Relation sd;
int attno;
@@ -1572,7 +1847,7 @@ update_attstats(Oid relid, bool inh, int natts, VacAttrStats **vacattrstats)
for (attno = 0; attno < natts; attno++)
{
- VacAttrStats *stats = vacattrstats[attno];
+ VacStats *stats = vacstats[attno];
HeapTuple stup,
oldtup;
int i,
@@ -1595,26 +1870,35 @@ update_attstats(Oid relid, bool inh, int natts, VacAttrStats **vacattrstats)
replaces[i] = true;
}
+ /*
+ * We use statarget = -1 here, so if the pg_statistic entry is new,
+ * we use the default_statistics_target for it. If there's a pg_statistic
+ * row for this statistics, we will set the "replaces" flag to false.
+ */
+ values[Anum_pg_statistic_statarget - 1] = Int32GetDatum(-1);
values[Anum_pg_statistic_starelid - 1] = ObjectIdGetDatum(relid);
- values[Anum_pg_statistic_staattnum - 1] = Int16GetDatum(stats->attr->attnum);
values[Anum_pg_statistic_stainherit - 1] = BoolGetDatum(inh);
- values[Anum_pg_statistic_stanullfrac - 1] = Float4GetDatum(stats->stanullfrac);
- values[Anum_pg_statistic_stawidth - 1] = Int32GetDatum(stats->stawidth);
- values[Anum_pg_statistic_stadistinct - 1] = Float4GetDatum(stats->stadistinct);
+ values[Anum_pg_statistic_stavalid - 1] = BoolGetDatum(true);
+ values[Anum_pg_statistic_stanullfrac - 1] = Float4GetDatum(stats->stanullfrac[0]);
+ values[Anum_pg_statistic_stawidth - 1] = Int32GetDatum(stats->stawidth[0]);
+ values[Anum_pg_statistic_stadistinct - 1] = Float4GetDatum(stats->stadistinct[0]);
i = Anum_pg_statistic_stakind1 - 1;
for (k = 0; k < STATISTIC_NUM_SLOTS; k++)
{
- values[i++] = Int16GetDatum(stats->stakind[k]); /* stakindN */
+ values[i++] = Int16GetDatum(stats->stakind[0][k]); /* stakindN */
}
i = Anum_pg_statistic_staop1 - 1;
for (k = 0; k < STATISTIC_NUM_SLOTS; k++)
{
- values[i++] = ObjectIdGetDatum(stats->staop[k]); /* staopN */
+ values[i++] = ObjectIdGetDatum(stats->staop[0][k]); /* staopN */
}
+
+ values[Anum_pg_statistic_staattnums - 1] = PointerGetDatum(stats->attnums); /* staattnums */
+
i = Anum_pg_statistic_stanumbers1 - 1;
for (k = 0; k < STATISTIC_NUM_SLOTS; k++)
{
- int nnum = stats->numnumbers[k];
+ int nnum = stats->numnumbers[0][k];
if (nnum > 0)
{
@@ -1622,7 +1906,7 @@ update_attstats(Oid relid, bool inh, int natts, VacAttrStats **vacattrstats)
ArrayType *arry;
for (n = 0; n < nnum; n++)
- numdatums[n] = Float4GetDatum(stats->stanumbers[k][n]);
+ numdatums[n] = Float4GetDatum(stats->stanumbers[0][k][n]);
/* XXX knows more than it should about type float4: */
arry = construct_array(numdatums, nnum,
FLOAT4OID,
@@ -1638,16 +1922,29 @@ update_attstats(Oid relid, bool inh, int natts, VacAttrStats **vacattrstats)
i = Anum_pg_statistic_stavalues1 - 1;
for (k = 0; k < STATISTIC_NUM_SLOTS; k++)
{
- if (stats->numvalues[k] > 0)
+ int idx1, idx2;
+
+ if (stats->attnums->dim1 > 1 && k < stats->attnums->dim1)
+ {
+ idx1 = k;
+ idx2 = 0;
+ }
+ else
+ {
+ idx1 = 0;
+ idx2 = k;
+ }
+
+ if (stats->numvalues[idx1][idx2] > 0)
{
ArrayType *arry;
- arry = construct_array(stats->stavalues[k],
- stats->numvalues[k],
- stats->statypid[k],
- stats->statyplen[k],
- stats->statypbyval[k],
- stats->statypalign[k]);
+ arry = construct_array(stats->stavalues[idx1][idx2],
+ stats->numvalues[idx1][idx2],
+ stats->statypid[idx1][idx2],
+ stats->statyplen[idx1][idx2],
+ stats->statypbyval[idx1][idx2],
+ stats->statypalign[idx1][idx2]);
values[i++] = PointerGetDatum(arry); /* stavaluesN */
}
else
@@ -1660,11 +1957,14 @@ update_attstats(Oid relid, bool inh, int natts, VacAttrStats **vacattrstats)
/* Is there already a pg_statistic tuple for this attribute? */
oldtup = SearchSysCache3(STATRELATTINH,
ObjectIdGetDatum(relid),
- Int16GetDatum(stats->attr->attnum),
+ PointerGetDatum(stats->attnums),
BoolGetDatum(inh));
if (HeapTupleIsValid(oldtup))
{
+ /* Skip replacing the statarget value, so we remember its value. */
+ replaces[Anum_pg_statistic_statarget - 1] = false;
+
/* Yes, replace it */
stup = heap_modify_tuple(oldtup,
RelationGetDescr(sd),
@@ -1697,13 +1997,12 @@ update_attstats(Oid relid, bool inh, int natts, VacAttrStats **vacattrstats)
* and the actual storage of the sample data.
*/
static Datum
-std_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull)
+std_fetch_func(VacStatsP stats, int rownum, AttrNumber tupattnum, bool *isNull)
{
- int attnum = stats->tupattnum;
HeapTuple tuple = stats->rows[rownum];
TupleDesc tupDesc = stats->tupDesc;
- return heap_getattr(tuple, attnum, tupDesc, isNull);
+ return heap_getattr(tuple, tupattnum, tupDesc, isNull);
}
/*
@@ -1713,12 +2012,11 @@ std_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull)
* just in Datum arrays.
*/
static Datum
-ind_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull)
+ind_fetch_func(VacStatsP stats, int rownum, AttrNumber tupattnum, bool *isNull)
{
int i;
- /* exprvals and exprnulls are already offset for proper column */
- i = rownum * stats->rowstride;
+ i = rownum * stats->rowstride + tupattnum - 1;
*isNull = stats->exprnulls[i];
return stats->exprvals[i];
}
@@ -1776,12 +2074,19 @@ typedef struct
int *tupnoLink;
} CompareScalarsContext;
+typedef struct
+{
+ SortSupport ssup;
+ FmgrInfo cmpFn; /* FIXME: this should be somewhere else? */
+} CompareDatumsContext;
-static void compute_minimal_stats(VacAttrStatsP stats,
+static void compute_minimal_stats(VacStatsP stats,
+ int index,
AnalyzeAttrFetchFunc fetchfunc,
int samplerows,
double totalrows);
-static void compute_scalar_stats(VacAttrStatsP stats,
+static void compute_scalar_stats(VacStatsP stats,
+ int index,
AnalyzeAttrFetchFunc fetchfunc,
int samplerows,
double totalrows);
@@ -1793,20 +2098,19 @@ static int compare_mcvs(const void *a, const void *b);
* std_typanalyze -- the default type-specific typanalyze function
*/
bool
-std_typanalyze(VacAttrStats *stats)
+std_typanalyze(VacStats *stats, int index)
{
- Form_pg_attribute attr = stats->attr;
Oid ltopr;
Oid eqopr;
StdAnalyzeData *mystats;
- /* If the attstattarget column is negative, use the default value */
+ /* If the statarget column is negative, use the default value */
/* NB: it is okay to scribble on stats->attr since it's a copy */
- if (attr->attstattarget < 0)
- attr->attstattarget = default_statistics_target;
+ if (stats->statarget < 0)
+ stats->statarget = default_statistics_target;
/* Look for default "<" and "=" operators for column's type */
- get_sort_group_operators(stats->attrtypid,
+ get_sort_group_operators(stats->attrtypids[index],
false, false, false,
<opr, &eqopr, NULL,
NULL);
@@ -1820,7 +2124,7 @@ std_typanalyze(VacAttrStats *stats)
mystats->eqopr = eqopr;
mystats->eqfunc = get_opcode(eqopr);
mystats->ltopr = ltopr;
- stats->extra_data = mystats;
+ stats->extra_data[index] = mystats;
/*
* Determine which standard statistics algorithm to use
@@ -1828,7 +2132,8 @@ std_typanalyze(VacAttrStats *stats)
if (OidIsValid(ltopr))
{
/* Seems to be a scalar datatype */
- stats->compute_stats = compute_scalar_stats;
+ stats->statfuncs[index].compute_func_ptr = compute_scalar_stats;
+ stats->statfuncs[index].findval_func_ptr = ordered_findval;
/*--------------------
* The following choice of minrows is based on the paper
* "Random sampling for histogram construction: how much is enough?"
@@ -1848,14 +2153,15 @@ std_typanalyze(VacAttrStats *stats)
* know it at this point.
*--------------------
*/
- stats->minrows = 300 * attr->attstattarget;
+ stats->minrows = 300 * stats->statarget;
}
else
{
/* Can't do much but the minimal stuff */
- stats->compute_stats = compute_minimal_stats;
+ stats->statfuncs[index].compute_func_ptr = compute_minimal_stats;
+ stats->statfuncs[index].findval_func_ptr = unordered_findval;
/* Might as well use the same minrows as above */
- stats->minrows = 300 * attr->attstattarget;
+ stats->minrows = 300 * stats->statarget;
}
return true;
@@ -1877,7 +2183,8 @@ std_typanalyze(VacAttrStats *stats)
* depend mainly on the length of the list we are willing to keep.
*/
static void
-compute_minimal_stats(VacAttrStatsP stats,
+compute_minimal_stats(VacStatsP stats,
+ int index,
AnalyzeAttrFetchFunc fetchfunc,
int samplerows,
double totalrows)
@@ -1887,10 +2194,10 @@ compute_minimal_stats(VacAttrStatsP stats,
int nonnull_cnt = 0;
int toowide_cnt = 0;
double total_width = 0;
- bool is_varlena = (!stats->attrtype->typbyval &&
- stats->attrtype->typlen == -1);
- bool is_varwidth = (!stats->attrtype->typbyval &&
- stats->attrtype->typlen < 0);
+ bool is_varlena = (!stats->attrtypes[index]->typbyval &&
+ stats->attrtypes[index]->typlen == -1);
+ bool is_varwidth = (!stats->attrtypes[index]->typbyval &&
+ stats->attrtypes[index]->typlen < 0);
FmgrInfo f_cmpeq;
typedef struct
{
@@ -1900,8 +2207,8 @@ compute_minimal_stats(VacAttrStatsP stats,
TrackItem *track;
int track_cnt,
track_max;
- int num_mcv = stats->attr->attstattarget;
- StdAnalyzeData *mystats = (StdAnalyzeData *) stats->extra_data;
+ int num_mcv = stats->statarget;
+ StdAnalyzeData *mystats = (StdAnalyzeData *) stats->extra_data[index];
/*
* We track up to 2*n values for an n-element MCV list; but at least 10
@@ -1924,7 +2231,7 @@ compute_minimal_stats(VacAttrStatsP stats,
vacuum_delay_point();
- value = fetchfunc(stats, i, &isnull);
+ value = fetchfunc(stats, i, stats->attnums->values[index], &isnull);
/* Check for null/nonnull */
if (isnull)
@@ -2021,11 +2328,11 @@ compute_minimal_stats(VacAttrStatsP stats,
stats->stats_valid = true;
/* Do the simple null-frac and width stats */
- stats->stanullfrac = (double) null_cnt / (double) samplerows;
+ stats->stanullfrac[index] = (double) null_cnt / (double) samplerows;
if (is_varwidth)
- stats->stawidth = total_width / (double) nonnull_cnt;
+ stats->stawidth[index] = total_width / (double) nonnull_cnt;
else
- stats->stawidth = stats->attrtype->typlen;
+ stats->stawidth[index] = stats->attrtypes[index]->typlen;
/* Count the number of values we found multiple times */
summultiple = 0;
@@ -2039,7 +2346,7 @@ compute_minimal_stats(VacAttrStatsP stats,
if (nmultiple == 0)
{
/* If we found no repeated values, assume it's a unique column */
- stats->stadistinct = -1.0;
+ stats->stadistinct[index] = -1.0;
}
else if (track_cnt < track_max && toowide_cnt == 0 &&
nmultiple == track_cnt)
@@ -2049,7 +2356,7 @@ compute_minimal_stats(VacAttrStatsP stats,
* value appeared more than once. Assume the column has just
* these values.
*/
- stats->stadistinct = track_cnt;
+ stats->stadistinct[index] = track_cnt;
}
else
{
@@ -2088,7 +2395,7 @@ compute_minimal_stats(VacAttrStatsP stats,
stadistinct = (double) d;
if (stadistinct > totalrows)
stadistinct = totalrows;
- stats->stadistinct = floor(stadistinct + 0.5);
+ stats->stadistinct[index] = floor(stadistinct + 0.5);
}
/*
@@ -2097,8 +2404,8 @@ compute_minimal_stats(VacAttrStatsP stats,
* stadistinct should scale with the row count rather than be a fixed
* value.
*/
- if (stats->stadistinct > 0.1 * totalrows)
- stats->stadistinct = -(stats->stadistinct / totalrows);
+ if (stats->stadistinct[index] > 0.1 * totalrows)
+ stats->stadistinct[index] = -(stats->stadistinct[index] / totalrows);
/*
* Decide how many values are worth storing as most-common values. If
@@ -2118,7 +2425,7 @@ compute_minimal_stats(VacAttrStatsP stats,
}
else
{
- double ndistinct = stats->stadistinct;
+ double ndistinct = stats->stadistinct[index];
double avgcount,
mincount;
@@ -2156,18 +2463,18 @@ compute_minimal_stats(VacAttrStatsP stats,
for (i = 0; i < num_mcv; i++)
{
mcv_values[i] = datumCopy(track[i].value,
- stats->attrtype->typbyval,
- stats->attrtype->typlen);
+ stats->attrtypes[index]->typbyval,
+ stats->attrtypes[index]->typlen);
mcv_freqs[i] = (double) track[i].count / (double) samplerows;
}
MemoryContextSwitchTo(old_context);
- stats->stakind[0] = STATISTIC_KIND_MCV;
- stats->staop[0] = mystats->eqopr;
- stats->stanumbers[0] = mcv_freqs;
- stats->numnumbers[0] = num_mcv;
- stats->stavalues[0] = mcv_values;
- stats->numvalues[0] = num_mcv;
+ stats->stakind[index][0] = STATISTIC_KIND_MCV;
+ stats->staop[index][0] = mystats->eqopr;
+ stats->stanumbers[index][0] = mcv_freqs;
+ stats->numnumbers[index][0] = num_mcv;
+ stats->stavalues[index][0] = mcv_values;
+ stats->numvalues[index][0] = num_mcv;
/*
* Accept the defaults for stats->statypid and others. They have
@@ -2179,12 +2486,12 @@ compute_minimal_stats(VacAttrStatsP stats,
{
/* We found only nulls; assume the column is entirely null */
stats->stats_valid = true;
- stats->stanullfrac = 1.0;
+ stats->stanullfrac[index] = 1.0;
if (is_varwidth)
- stats->stawidth = 0; /* "unknown" */
+ stats->stawidth[index] = 0; /* "unknown" */
else
- stats->stawidth = stats->attrtype->typlen;
- stats->stadistinct = 0.0; /* "unknown" */
+ stats->stawidth[index] = stats->attrtypes[index]->typlen;
+ stats->stadistinct[index] = 0.0; /* "unknown" */
}
/* We don't need to bother cleaning up any of our temporary palloc's */
@@ -2204,7 +2511,8 @@ compute_minimal_stats(VacAttrStatsP stats,
* data values into order.
*/
static void
-compute_scalar_stats(VacAttrStatsP stats,
+compute_scalar_stats(VacStatsP stats,
+ int index,
AnalyzeAttrFetchFunc fetchfunc,
int samplerows,
double totalrows)
@@ -2214,10 +2522,10 @@ compute_scalar_stats(VacAttrStatsP stats,
int nonnull_cnt = 0;
int toowide_cnt = 0;
double total_width = 0;
- bool is_varlena = (!stats->attrtype->typbyval &&
- stats->attrtype->typlen == -1);
- bool is_varwidth = (!stats->attrtype->typbyval &&
- stats->attrtype->typlen < 0);
+ bool is_varlena = (!stats->attrtypes[index]->typbyval &&
+ stats->attrtypes[index]->typlen == -1);
+ bool is_varwidth = (!stats->attrtypes[index]->typbyval &&
+ stats->attrtypes[index]->typlen < 0);
double corr_xysum;
SortSupportData ssup;
ScalarItem *values;
@@ -2225,9 +2533,9 @@ compute_scalar_stats(VacAttrStatsP stats,
int *tupnoLink;
ScalarMCVItem *track;
int track_cnt = 0;
- int num_mcv = stats->attr->attstattarget;
- int num_bins = stats->attr->attstattarget;
- StdAnalyzeData *mystats = (StdAnalyzeData *) stats->extra_data;
+ int num_mcv = stats->statarget;
+ int num_bins = stats->statarget;
+ StdAnalyzeData *mystats = (StdAnalyzeData *) stats->extra_data[index];
values = (ScalarItem *) palloc(samplerows * sizeof(ScalarItem));
tupnoLink = (int *) palloc(samplerows * sizeof(int));
@@ -2249,7 +2557,7 @@ compute_scalar_stats(VacAttrStatsP stats,
vacuum_delay_point();
- value = fetchfunc(stats, i, &isnull);
+ value = fetchfunc(stats, i, stats->attnums->values[index], &isnull);
/* Check for null/nonnull */
if (isnull)
@@ -2378,16 +2686,16 @@ compute_scalar_stats(VacAttrStatsP stats,
stats->stats_valid = true;
/* Do the simple null-frac and width stats */
- stats->stanullfrac = (double) null_cnt / (double) samplerows;
+ stats->stanullfrac[index] = (double) null_cnt / (double) samplerows;
if (is_varwidth)
- stats->stawidth = total_width / (double) nonnull_cnt;
+ stats->stawidth[index] = total_width / (double) nonnull_cnt;
else
- stats->stawidth = stats->attrtype->typlen;
+ stats->stawidth[index] = stats->attrtypes[index]->typlen;
if (nmultiple == 0)
{
/* If we found no repeated values, assume it's a unique column */
- stats->stadistinct = -1.0;
+ stats->stadistinct[index] = -1.0;
}
else if (toowide_cnt == 0 && nmultiple == ndistinct)
{
@@ -2395,7 +2703,7 @@ compute_scalar_stats(VacAttrStatsP stats,
* Every value in the sample appeared more than once. Assume the
* column has just these values.
*/
- stats->stadistinct = ndistinct;
+ stats->stadistinct[index] = ndistinct;
}
else
{
@@ -2430,7 +2738,7 @@ compute_scalar_stats(VacAttrStatsP stats,
stadistinct = (double) d;
if (stadistinct > totalrows)
stadistinct = totalrows;
- stats->stadistinct = floor(stadistinct + 0.5);
+ stats->stadistinct[index] = floor(stadistinct + 0.5);
}
/*
@@ -2439,8 +2747,8 @@ compute_scalar_stats(VacAttrStatsP stats,
* stadistinct should scale with the row count rather than be a fixed
* value.
*/
- if (stats->stadistinct > 0.1 * totalrows)
- stats->stadistinct = -(stats->stadistinct / totalrows);
+ if (stats->stadistinct[index] > 0.1 * totalrows)
+ stats->stadistinct[index] = -(stats->stadistinct[index] / totalrows);
/*
* Decide how many values are worth storing as most-common values. If
@@ -2457,7 +2765,7 @@ compute_scalar_stats(VacAttrStatsP stats,
* but we prefer to treat such values as MCVs if at all possible.)
*/
if (track_cnt == ndistinct && toowide_cnt == 0 &&
- stats->stadistinct > 0 &&
+ stats->stadistinct[index] > 0 &&
track_cnt <= num_mcv)
{
/* Track list includes all values seen, and all will fit */
@@ -2465,7 +2773,7 @@ compute_scalar_stats(VacAttrStatsP stats,
}
else
{
- double ndistinct = stats->stadistinct;
+ double ndistinct = stats->stadistinct[index];
double avgcount,
mincount,
maxmincount;
@@ -2508,18 +2816,18 @@ compute_scalar_stats(VacAttrStatsP stats,
for (i = 0; i < num_mcv; i++)
{
mcv_values[i] = datumCopy(values[track[i].first].value,
- stats->attrtype->typbyval,
- stats->attrtype->typlen);
+ stats->attrtypes[index]->typbyval,
+ stats->attrtypes[index]->typlen);
mcv_freqs[i] = (double) track[i].count / (double) samplerows;
}
MemoryContextSwitchTo(old_context);
- stats->stakind[slot_idx] = STATISTIC_KIND_MCV;
- stats->staop[slot_idx] = mystats->eqopr;
- stats->stanumbers[slot_idx] = mcv_freqs;
- stats->numnumbers[slot_idx] = num_mcv;
- stats->stavalues[slot_idx] = mcv_values;
- stats->numvalues[slot_idx] = num_mcv;
+ stats->stakind[index][slot_idx] = STATISTIC_KIND_MCV;
+ stats->staop[index][slot_idx] = mystats->eqopr;
+ stats->stanumbers[index][slot_idx] = mcv_freqs;
+ stats->numnumbers[index][slot_idx] = num_mcv;
+ stats->stavalues[index][slot_idx] = mcv_values;
+ stats->numvalues[index][slot_idx] = num_mcv;
/*
* Accept the defaults for stats->statypid and others. They have
@@ -2615,8 +2923,8 @@ compute_scalar_stats(VacAttrStatsP stats,
for (i = 0; i < num_hist; i++)
{
hist_values[i] = datumCopy(values[pos].value,
- stats->attrtype->typbyval,
- stats->attrtype->typlen);
+ stats->attrtypes[index]->typbyval,
+ stats->attrtypes[index]->typlen);
pos += delta;
posfrac += deltafrac;
if (posfrac >= (num_hist - 1))
@@ -2629,10 +2937,10 @@ compute_scalar_stats(VacAttrStatsP stats,
MemoryContextSwitchTo(old_context);
- stats->stakind[slot_idx] = STATISTIC_KIND_HISTOGRAM;
- stats->staop[slot_idx] = mystats->ltopr;
- stats->stavalues[slot_idx] = hist_values;
- stats->numvalues[slot_idx] = num_hist;
+ stats->stakind[index][slot_idx] = STATISTIC_KIND_HISTOGRAM;
+ stats->staop[index][slot_idx] = mystats->ltopr;
+ stats->stavalues[index][slot_idx] = hist_values;
+ stats->numvalues[index][slot_idx] = num_hist;
/*
* Accept the defaults for stats->statypid and others. They have
@@ -2672,10 +2980,10 @@ compute_scalar_stats(VacAttrStatsP stats,
corrs[0] = (values_cnt * corr_xysum - corr_xsum * corr_xsum) /
(values_cnt * corr_x2sum - corr_xsum * corr_xsum);
- stats->stakind[slot_idx] = STATISTIC_KIND_CORRELATION;
- stats->staop[slot_idx] = mystats->ltopr;
- stats->stanumbers[slot_idx] = corrs;
- stats->numnumbers[slot_idx] = 1;
+ stats->stakind[index][slot_idx] = STATISTIC_KIND_CORRELATION;
+ stats->staop[index][slot_idx] = mystats->ltopr;
+ stats->stanumbers[index][slot_idx] = corrs;
+ stats->numnumbers[index][slot_idx] = 1;
slot_idx++;
}
}
@@ -2683,12 +2991,12 @@ compute_scalar_stats(VacAttrStatsP stats,
{
/* We found only nulls; assume the column is entirely null */
stats->stats_valid = true;
- stats->stanullfrac = 1.0;
+ stats->stanullfrac[index] = 1.0;
if (is_varwidth)
- stats->stawidth = 0; /* "unknown" */
+ stats->stawidth[index] = 0; /* "unknown" */
else
- stats->stawidth = stats->attrtype->typlen;
- stats->stadistinct = 0.0; /* "unknown" */
+ stats->stawidth[index] = stats->attrtypes[index]->typlen;
+ stats->stadistinct[index] = 0.0; /* "unknown" */
}
/* We don't need to bother cleaning up any of our temporary palloc's */
@@ -2732,6 +3040,209 @@ compare_scalars(const void *a, const void *b, void *arg)
}
/*
+ * qsort_arg comparator for cross column statistics
+ * The dimension axes need to be properly ordered.
+ */
+static int
+compare_datums(const void *a, const void *b, void *arg)
+{
+ Datum da = *((Datum *) a);
+ Datum db = *((Datum *) b);
+ CompareDatumsContext *ctx = (CompareDatumsContext *) arg;
+
+ return ApplySortComparator(da, false, db, false, ctx->ssup);
+}
+
+/*
+ * Find the index in stavalues for a Datum with ordering operators.
+ * Because it's ordered, we can use the ordering operator for binary searching.
+ * Returns: 0 for a NULL value, 1 for a non MCV value, >= 2 for a valid index.
+ */
+static int
+ordered_findval(VacStatsP stats, int rownum, int index, AnalyzeAttrFetchFunc fetchfunc, void *arg)
+{
+ Datum value;
+ bool isnull;
+ int idx_lo, idx_hi, idx_mid;
+ int32 compare;
+ CompareDatumsContext *ctx = (CompareDatumsContext *) arg;
+
+ value = fetchfunc(stats, rownum, stats->attnums->values[index], &isnull);
+ if (isnull)
+ return 0;
+
+ idx_lo = 0;
+ idx_hi = stats->numvalues[index][0] - 1;
+
+ idx_mid = (idx_lo + idx_hi) / 2;
+ while (idx_lo <= idx_hi)
+ {
+ compare = ApplySortComparator(value, false,
+ stats->stavalues[index][0][idx_mid], false, ctx->ssup);
+ if (compare == 0)
+ break;
+ else if (compare < 0)
+ idx_hi = idx_mid - 1;
+ else /* compare > 0 */
+ idx_lo = idx_mid + 1;
+
+ idx_mid = (idx_lo + idx_hi) / 2;
+ }
+
+ if (compare == 0)
+ return idx_mid + 2;
+
+ /* Not found, return "other" special slot */
+ return 1;
+}
+
+/*
+ * Find the index in stavalues for a Datum with equality operator only.
+ * Byte the bullet and loop through the stavalues array.
+ * Returns: 0 for NULL value, >= 2 for a valid index and 1 for a Datum
+ * that isn't in stavalues ("other" value).
+ */
+static int
+unordered_findval(VacStatsP stats, int rownum, int index, AnalyzeAttrFetchFunc fetchfunc, void *arg)
+{
+ CompareDatumsContext *ctx = (CompareDatumsContext *) arg;
+ Datum value;
+ bool isnull;
+ int i;
+
+ /* FIXME: Assert ctx->cmpFn is correct */
+
+ value = fetchfunc(stats, rownum, stats->attnums->values[index], &isnull);
+ if (isnull)
+ return 0;
+
+ for (i = 0; i < stats->numvalues[index][0]; i++)
+ {
+ if (DatumGetBool(FunctionCall2Coll(&(ctx->cmpFn),
+ DEFAULT_COLLATION_OID,
+ value, stats->stavalues[index][0][i])))
+ return i + 2;
+ }
+
+ return 1;
+}
+
+/*
+ * Compute a cross-column statistics.
+ */
+static void
+compute_cross_column_stats(VacStatsP stats, AnalyzeAttrFetchFunc fetchfunc, int numrows)
+{
+ int i, j, numvalues;
+ int dimstride[STATISTIC_NUM_SLOTS];
+ MemoryContext old_context;
+ CompareDatumsContext ctx[STATISTIC_NUM_SLOTS];
+ float4 *hist;
+
+ /* Don't touch single column statistics. */
+ if (stats->attnums->dim1 == 1)
+ return;
+
+ /*
+ * Don't compute cross-column statistics if there is no way to find
+ * a Datum's index in the stavalues[] array
+ */
+ for (i = 0; i < stats->attnums->dim1; i++)
+ if (stats->statfuncs[i].findval_func_ptr == NULL)
+ return;
+
+ /* We need ordered stavalues arrays whenever possible. */
+ for (i = 0; i < stats->attnums->dim1; i++)
+ {
+ StdAnalyzeData *mystats = (StdAnalyzeData *) stats->extra_data[i];
+
+ if (OidIsValid(mystats->ltopr))
+ {
+ Oid cmpFn;
+ SortSupportData *ssup;
+ /* FIXME: shouldn't be necessary in this case */
+ bool reverse, issupport;
+
+ if (!get_sort_function_for_ordering_op(mystats->ltopr, &cmpFn,
+ &issupport, &reverse)) {
+ elog(ERROR, "operator %u is not a valid ordering operator",
+ mystats->ltopr);
+ }
+ fmgr_info(cmpFn, &(ctx[i].cmpFn));
+
+ ssup = (SortSupportData*) palloc0(sizeof(SortSupportData));
+ ssup->ssup_cxt = CurrentMemoryContext;
+ /* We always use the default collation for statistics */
+ ssup->ssup_collation = DEFAULT_COLLATION_OID;
+ ssup->ssup_nulls_first = false;
+
+ PrepareSortSupportFromOrderingOp(mystats->ltopr, ssup);
+
+ ctx[i].ssup = ssup;
+
+ qsort_arg((void *) stats->stavalues[i][0], stats->numvalues[i][0], sizeof(Datum),
+ compare_datums, (void *) &ctx[i]);
+ }
+ else
+ fmgr_info(mystats->eqfunc, &(ctx[i].cmpFn));
+ }
+
+ /* Free up previously computed stanumbers arrays. */
+ for (i = 0; i < STATISTIC_NUM_SLOTS; i++)
+ for (j = 0; j < STATISTIC_NUM_SLOTS; j++)
+ {
+ if (stats->stanumbers[i][j])
+ {
+ pfree(stats->stanumbers[i][j]);
+ stats->stanumbers[i][j] = NULL;
+ }
+ stats->numnumbers[i][j] = 0;
+ }
+
+ /*
+ * Compute the size of the N-dimension histogram and the dimension stride values.
+ * The histogram cells at the 0th index of a dimension is the nullfrac value,
+ * hence the + 1.
+ */
+ for (i = 0, numvalues = 1; i < stats->attnums->dim1; i++)
+ {
+ dimstride[stats->attnums->dim1 - i - 1] = numvalues;
+ numvalues *= stats->numvalues[stats->attnums->dim1 - i - 1][0] + 2;
+ }
+
+ old_context = MemoryContextSwitchTo(stats->anl_context);
+
+ hist = (float *) palloc(numvalues * sizeof(float));
+ for (i = 0; i < numvalues; i++)
+ hist[i] = 0.0;
+
+ MemoryContextSwitchTo(old_context);
+
+ for (i = 0; i < numrows; i++)
+ {
+ int idx, idx_tmp;
+
+ idx = 0;
+ for (j = 0; j < stats->attnums->dim1; j++)
+ {
+ idx_tmp = stats->statfuncs[j].findval_func_ptr(stats, i, j, fetchfunc, &ctx[j]);
+ if (idx_tmp < 0)
+ continue;
+
+ idx += idx_tmp * dimstride[j];
+ }
+ hist[idx] = hist[idx] + 1.0;
+ }
+
+ /* Store selectivity instead of actual counts. */
+ for (i = 0; i < numvalues; i ++)
+ hist[i] /= numrows;
+
+ stats->numnumbers[0][0] = numvalues;
+ stats->stanumbers[0][0] = hist;
+}
+
+/*
* qsort comparator for sorting ScalarMCVItems by position
*/
static int
@@ -2742,3 +3253,74 @@ compare_mcvs(const void *a, const void *b)
return da - db;
}
+
+/*
+ * ExtraColStat
+ * Add or remove one extra entry in pg_statistics
+ */
+void ExtraStatistics(ExtraStatStmt *stmt)
+{
+ Oid relId;
+ int len, i, j;
+ bool differ = false;
+ AttrNumber *attnums;
+ AttrNumber *sorted_attnums;
+ ListCell *l;
+
+ relId = RangeVarGetRelid(stmt->relation, AccessExclusiveLock, false);
+
+ len = list_length(stmt->columns);
+ if (len < 2)
+ elog(ERROR, "cross column statistics need at least two columns");
+ if (len > STATISTIC_NUM_SLOTS)
+ elog(ERROR, "cross column statistics can cover at most %d columns", STATISTIC_NUM_SLOTS);
+
+ attnums = (int2 *)palloc(len * sizeof(AttrNumber));
+ sorted_attnums = (int2 *)palloc(len * sizeof(AttrNumber));
+
+ i = 0;
+ foreach(l, stmt->columns)
+ {
+ Node *node = (Node *) lfirst(l);
+ Var *var;
+
+ if (!IsA(node, Var))
+ elog(ERROR, "not a column reference");
+
+ var = (Var *) node;
+
+ if (var->varattno == 0)
+ elog(ERROR, "row expansion via \"*\" is not supported here");
+
+ sorted_attnums[i] = attnums[i] = var->varattno;
+
+ i++;
+ }
+
+ for (i = 0; i < len - 1; i++)
+ for (j = i+1; j < len; j++)
+ if (sorted_attnums[i] > sorted_attnums[j])
+ {
+ AttrNumber tmp = sorted_attnums[i];
+
+ sorted_attnums[i] = sorted_attnums[j];
+ sorted_attnums[j] = tmp;
+ }
+
+ for (i = 0; i < len; i++)
+ {
+ if (!differ && attnums[i] != sorted_attnums[i])
+ differ = true;
+
+ if ((i < len - 1) && sorted_attnums[i] == sorted_attnums[i+1])
+ elog(ERROR, "column list must contain every column exactly once");
+
+ }
+ if (differ)
+ elog(WARNING, "the column list was reordered in the order of table attributes");
+
+ if (stmt->create)
+ AddStatistics(relId, sorted_attnums, len, false, stmt->statistics_target);
+ else
+ RemoveStatistics(relId, sorted_attnums, len);
+}
diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c
index cd4490a..4ab4b4c 100644
--- a/src/backend/commands/tablecmds.c
+++ b/src/backend/commands/tablecmds.c
@@ -35,6 +35,7 @@
#include "catalog/pg_inherits_fn.h"
#include "catalog/pg_namespace.h"
#include "catalog/pg_opclass.h"
+#include "catalog/pg_statistic.h"
#include "catalog/pg_tablespace.h"
#include "catalog/pg_trigger.h"
#include "catalog/pg_type.h"
@@ -4342,7 +4343,6 @@ ATExecAddColumn(List **wqueue, AlteredTableInfo *tab, Relation rel,
attribute.attrelid = myrelid;
namestrcpy(&(attribute.attname), colDef->colname);
attribute.atttypid = typeOid;
- attribute.attstattarget = (newattnum > 0) ? -1 : 0;
attribute.attlen = tform->typlen;
attribute.attcacheoff = -1;
attribute.atttypmod = typmod;
@@ -4504,6 +4504,8 @@ ATExecAddColumn(List **wqueue, AlteredTableInfo *tab, Relation rel,
add_column_datatype_dependency(myrelid, newattnum, attribute.atttypid);
add_column_collation_dependency(myrelid, newattnum, attribute.attcollation);
+ AddStatistics(myrelid, &attribute.attnum, 1, attribute.attinhcount, (newattnum > 0) ? -1 : 0);
+
/*
* Propagate to children as appropriate. Unlike most other ALTER
* routines, we have to do this one level of recursion at a time; we can't
@@ -4885,8 +4887,14 @@ ATExecSetStatistics(Relation rel, const char *colName, Node *newValue, LOCKMODE
{
int newtarget;
Relation attrelation;
+ Relation statsrelation;
+ Oid relid;
HeapTuple tuple;
Form_pg_attribute attrtuple;
+ AttrNumber attnum;
+ bool inherited;
+ int2vector *attnumvector;
+ Form_pg_statistic stattuple;
Assert(IsA(newValue, Integer));
newtarget = intVal(newValue);
@@ -4912,7 +4920,9 @@ ATExecSetStatistics(Relation rel, const char *colName, Node *newValue, LOCKMODE
attrelation = heap_open(AttributeRelationId, RowExclusiveLock);
- tuple = SearchSysCacheCopyAttName(RelationGetRelid(rel), colName);
+ relid = RelationGetRelid(rel);
+
+ tuple = SearchSysCacheAttName(relid, colName);
if (!HeapTupleIsValid(tuple))
ereport(ERROR,
@@ -4927,16 +4937,41 @@ ATExecSetStatistics(Relation rel, const char *colName, Node *newValue, LOCKMODE
errmsg("cannot alter system column \"%s\"",
colName)));
- attrtuple->attstattarget = newtarget;
+ attnum = attrtuple->attnum;
+ inherited = (attrtuple->attinhcount > 0);
- simple_heap_update(attrelation, &tuple->t_self, tuple);
+ ReleaseSysCache(tuple);
- /* keep system catalog indexes current */
- CatalogUpdateIndexes(attrelation, tuple);
+ heap_close(attrelation, RowExclusiveLock);
- heap_freetuple(tuple);
+ statsrelation = heap_open(StatisticRelationId, RowExclusiveLock);
- heap_close(attrelation, RowExclusiveLock);
+ attnumvector = buildint2vector(&attnum, 1);
+
+ tuple = SearchSysCacheCopy3(STATRELATTINH,
+ ObjectIdGetDatum(relid),
+ PointerGetDatum(attnumvector),
+ BoolGetDatum(inherited));
+
+ pfree(attnumvector);
+
+ if (!HeapTupleIsValid(tuple))
+ AddStatistics(relid, &attnum, 1, inherited, newtarget);
+ else
+ {
+ stattuple = (Form_pg_statistic) GETSTRUCT(tuple);
+
+ stattuple->statarget = newtarget;
+
+ simple_heap_update(statsrelation, &tuple->t_self, tuple);
+
+ /* keep system catalog indexes current */
+ CatalogUpdateIndexes(statsrelation, tuple);
+
+ heap_freetuple(tuple);
+ }
+
+ heap_close(statsrelation, RowExclusiveLock);
}
static void
@@ -7587,9 +7622,9 @@ ATExecAlterColumnType(AlteredTableInfo *tab, Relation rel,
add_column_collation_dependency(RelationGetRelid(rel), attnum, targetcollid);
/*
- * Drop any pg_statistic entry for the column, since it's now wrong type
+ * Invalidate any pg_statistic entry for the column, since it's now wrong type
*/
- RemoveStatistics(RelationGetRelid(rel), attnum);
+ InvalidateStatistics(RelationGetRelid(rel), attnum);
/*
* Update the default, if present, by brute force --- remove and re-add
diff --git a/src/backend/executor/nodeHash.c b/src/backend/executor/nodeHash.c
index cabe663..38b5f73 100644
--- a/src/backend/executor/nodeHash.c
+++ b/src/backend/executor/nodeHash.c
@@ -31,6 +31,7 @@
#include "executor/nodeHash.h"
#include "executor/nodeHashjoin.h"
#include "miscadmin.h"
+#include "utils/builtins.h"
#include "utils/dynahash.h"
#include "utils/memutils.h"
#include "utils/lsyscache.h"
@@ -1124,6 +1125,7 @@ ExecHashBuildSkewHash(HashJoinTable hashtable, Hash *node, int mcvsToUse)
int nvalues;
float4 *numbers;
int nnumbers;
+ int2vector *attnumvector;
/* Do nothing if planner didn't identify the outer relation's join key */
if (!OidIsValid(node->skewTable))
@@ -1135,10 +1137,23 @@ ExecHashBuildSkewHash(HashJoinTable hashtable, Hash *node, int mcvsToUse)
/*
* Try to find the MCV statistics for the outer relation's join key.
*/
+ attnumvector = buildint2vector(&(node->skewColumn), 1);
statsTuple = SearchSysCache3(STATRELATTINH,
ObjectIdGetDatum(node->skewTable),
- Int16GetDatum(node->skewColumn),
+ PointerGetDatum(attnumvector),
BoolGetDatum(node->skewInherit));
+ pfree(attnumvector);
+
+ /* check whether the stats entry is valid */
+ if (HeapTupleIsValid(statsTuple))
+ {
+ if (!((Form_pg_statistic)GETSTRUCT(statsTuple))->stavalid)
+ {
+ ReleaseSysCache(statsTuple);
+ statsTuple = NULL;
+ }
+ }
+
if (!HeapTupleIsValid(statsTuple))
return;
diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c
index 868fb71..66dc24a 100644
--- a/src/backend/nodes/copyfuncs.c
+++ b/src/backend/nodes/copyfuncs.c
@@ -3422,6 +3422,19 @@ _copyCreateForeignTableStmt(const CreateForeignTableStmt *from)
return newnode;
}
+static ExtraStatStmt *
+_copyExtraStatStmt(ExtraStatStmt *from)
+{
+ ExtraStatStmt *newnode = makeNode(ExtraStatStmt);
+
+ COPY_SCALAR_FIELD(create);
+ newnode->relation = _copyRangeVar(from->relation);
+ COPY_NODE_FIELD(columns);
+ COPY_SCALAR_FIELD(statistics_target);
+
+ return newnode;
+}
+
static CreateTrigStmt *
_copyCreateTrigStmt(const CreateTrigStmt *from)
{
@@ -4290,6 +4303,9 @@ copyObject(const void *from)
case T_CreateForeignTableStmt:
retval = _copyCreateForeignTableStmt(from);
break;
+ case T_ExtraStatStmt:
+ retval = _copyExtraStatStmt(from);
+ break;
case T_CreateTrigStmt:
retval = _copyCreateTrigStmt(from);
break;
diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c
index d2a79eb..a7bf2a6 100644
--- a/src/backend/nodes/equalfuncs.c
+++ b/src/backend/nodes/equalfuncs.c
@@ -1758,6 +1758,18 @@ _equalCreateForeignTableStmt(const CreateForeignTableStmt *a, const CreateForeig
}
static bool
+_equalExtraStatStmt(ExtraStatStmt *a, ExtraStatStmt *b)
+{
+ COMPARE_SCALAR_FIELD(create);
+ if (!_equalRangeVar(a->relation, b->relation))
+ return FALSE;
+ COMPARE_NODE_FIELD(columns);
+ COMPARE_SCALAR_FIELD(statistics_target);
+
+ return true;
+}
+
+static bool
_equalCreateTrigStmt(const CreateTrigStmt *a, const CreateTrigStmt *b)
{
COMPARE_STRING_FIELD(trigname);
@@ -2850,6 +2862,9 @@ equal(const void *a, const void *b)
case T_CreateForeignTableStmt:
retval = _equalCreateForeignTableStmt(a, b);
break;
+ case T_ExtraStatStmt:
+ retval = _equalExtraStatStmt(a, b);
+ break;
case T_CreateTrigStmt:
retval = _equalCreateTrigStmt(a, b);
break;
diff --git a/src/backend/optimizer/path/clausesel.c b/src/backend/optimizer/path/clausesel.c
index bb148d7..cae5562 100644
--- a/src/backend/optimizer/path/clausesel.c
+++ b/src/backend/optimizer/path/clausesel.c
@@ -13,17 +13,29 @@
*-------------------------------------------------------------------------
*/
#include "postgres.h"
+#include "postgres_ext.h"
+#include "access/skey.h"
+#include "access/relscan.h"
+#include "catalog/indexing.h"
+#include "catalog/pg_collation.h"
#include "catalog/pg_operator.h"
+#include "catalog/pg_statistic.h"
+#include "catalog/pg_type.h"
#include "nodes/makefuncs.h"
+#include "nodes/pg_list.h"
#include "optimizer/clauses.h"
#include "optimizer/cost.h"
#include "optimizer/pathnode.h"
#include "optimizer/plancat.h"
+#include "optimizer/var.h"
+#include "utils/array.h"
#include "utils/fmgroids.h"
#include "utils/lsyscache.h"
+#include "utils/rel.h"
#include "utils/selfuncs.h"
-
+#include "utils/syscache.h"
+#include "utils/tqual.h"
/*
* Data structure for accumulating info about possible range-query
@@ -33,6 +45,7 @@ typedef struct RangeQueryClause
{
struct RangeQueryClause *next; /* next in linked list */
Node *var; /* The common variable of the clauses */
+ AttrNumber varattno; /* for finding cross-column statistics */
bool have_lobound; /* found a low-bound clause yet? */
bool have_hibound; /* found a high-bound clause yet? */
Selectivity lobound; /* Selectivity of a var > something clause */
@@ -42,6 +55,24 @@ typedef struct RangeQueryClause
static void addRangeClause(RangeQueryClause **rqlist, Node *clause,
bool varonleft, bool isLTsel, Selectivity s2);
+typedef struct CrossColumnClause
+{
+ struct CrossColumnClause *next;
+ Node *var;
+ AttrNumber varattno;
+ OpExpr *expr;
+ Selectivity sel;
+} CrossColumnClause;
+
+static void addXCClause(CrossColumnClause **xclist, OpExpr *clause,
+ bool varonleft, Selectivity s2);
+
+static bool crosscolumn_selectivity(Oid relId,
+ CrossColumnClause **xcnext, RangeQueryClause **rqlist,
+ Selectivity *result_sel);
+
+static Selectivity calculate_xcol_sel(PlannerInfo *root, CrossColumnClause **clauses, int2vector *attnums, HeapTuple statstuple, TupleDesc tupDesc);
+static Selectivity match_xcol_stats(PlannerInfo *root, Oid relId, CrossColumnClause **xclist);
/****************************************************************************
* ROUTINES TO COMPUTE SELECTIVITIES
@@ -98,8 +129,13 @@ clauselist_selectivity(PlannerInfo *root,
{
Selectivity s1 = 1.0;
RangeQueryClause *rqlist = NULL;
+ CrossColumnClause *xclist = NULL;
+ Oid relId = InvalidOid;
+ bool onerel = false;
ListCell *l;
+// elog(NOTICE, "clauselist_selectivity varRelid %d, list length %d", varRelid, list_length(clauses));
+
/*
* If there's exactly one clause, then no use in trying to match up pairs,
* so just go directly to clause_selectivity().
@@ -161,6 +197,22 @@ clauselist_selectivity(PlannerInfo *root,
(varonleft = false,
is_pseudo_constant_clause_relids(linitial(expr->args),
rinfo->left_relids)));
+ if (ok)
+ {
+ int relid;
+ Oid tmprelId;
+
+ relid = bms_singleton_member(rinfo->clause_relids);
+ tmprelId = root->simple_rte_array[relid]->relid;
+
+ if (!OidIsValid(relId))
+ {
+ onerel = true;
+ relId = tmprelId;
+ }
+ else if (relId != tmprelId)
+ onerel = false;
+ }
}
else
{
@@ -168,6 +220,25 @@ clauselist_selectivity(PlannerInfo *root,
(is_pseudo_constant_clause(lsecond(expr->args)) ||
(varonleft = false,
is_pseudo_constant_clause(linitial(expr->args))));
+ if (ok)
+ {
+ Relids relids;
+ int relid;
+ Oid tmprelId;
+
+ relids = pull_varnos(clause);
+ relid = bms_singleton_member(relids);
+ tmprelId = root->simple_rte_array[relid]->relid;
+ bms_free(relids);
+
+ if (!OidIsValid(relId))
+ {
+ onerel = true;
+ relId = tmprelId;
+ }
+ else if (relId != tmprelId)
+ onerel = false;
+ }
}
if (ok)
@@ -187,6 +258,10 @@ clauselist_selectivity(PlannerInfo *root,
addRangeClause(&rqlist, clause,
varonleft, false, s2);
break;
+ case F_EQSEL:
+ addXCClause(&xclist, expr,
+ varonleft, s2);
+ break;
default:
/* Just merge the selectivity in generically */
s1 = s1 * s2;
@@ -201,6 +276,34 @@ clauselist_selectivity(PlannerInfo *root,
}
/*
+ * Scan xclist and rqlist recursively and filter out
+ * all possible cross-column selectivities.
+ */
+ if (onerel)
+ {
+ // elog(NOTICE, "Old selectivity: %0.5f", s1);
+ s1 = s1 * match_xcol_stats(root, relId, &xclist);
+ /*crosscolumn_selectivity(relId, &xclist, &rqlist, &s1);*/
+ // elog(NOTICE, "New selectivity: %0.5f", s1);
+ }
+
+ /*
+ * Free the cross-column clauses
+ */
+ while (xclist != NULL)
+ {
+ CrossColumnClause *xcnext;
+
+ // elog(NOTICE, "Freeing att %d", xclist->varattno);
+
+ s1 = s1 * xclist->sel;
+
+ xcnext = xclist->next;
+ pfree(xclist);
+ xclist = xcnext;
+ }
+
+ /*
* Now scan the rangequery pair list.
*/
while (rqlist != NULL)
@@ -278,6 +381,22 @@ clauselist_selectivity(PlannerInfo *root,
return s1;
}
+static AttrNumber
+var_get_attno(Node *clause)
+{
+ Var *var;
+
+ if (IsA(clause, Var))
+ {
+ var = (Var *)clause;
+// elog(NOTICE, "var_get_attno varattno %d", var->varattno);
+ return var->varattno;
+ }
+
+// elog(NOTICE, "var_get_attno default 0");
+ return 0;
+}
+
/*
* addRangeClause --- add a new range clause for clauselist_selectivity
*
@@ -357,6 +476,8 @@ addRangeClause(RangeQueryClause **rqlist, Node *clause,
/* No matching var found, so make a new clause-pair data structure */
rqelem = (RangeQueryClause *) palloc(sizeof(RangeQueryClause));
rqelem->var = var;
+ rqelem->varattno = var_get_attno(var);
+
if (is_lobound)
{
rqelem->have_lobound = true;
@@ -374,6 +495,49 @@ addRangeClause(RangeQueryClause **rqlist, Node *clause,
}
/*
+ * addXCClause - add a new clause to the list of clauses for cross-column stats inspection
+ *
+ * Clauses are added in varattno order.
+ */
+static void
+addXCClause(CrossColumnClause **xclist, OpExpr *clause,
+ bool varonleft, Selectivity s)
+{
+ CrossColumnClause *xcelem;
+ CrossColumnClause **xcinsertpos;
+ CrossColumnClause *xcnext;
+ Node *var;
+ /*Node *expr;*/
+
+ if (varonleft)
+ {
+ var = get_leftop((Expr *) clause);
+ /*expr = get_rightop((Expr *) clause);*/
+ }
+ else
+ {
+ var = get_rightop((Expr *) clause);
+ /*expr = get_leftop((Expr *) clause);*/
+ }
+
+ xcelem = (CrossColumnClause *) palloc(sizeof(CrossColumnClause));
+ xcelem->var = var;
+ xcelem->varattno = var_get_attno(var);
+ xcelem->expr = clause;
+ xcelem->sel = s;
+
+ xcinsertpos = xclist;
+ xcnext = *xclist;
+ while (xcnext && xcnext->varattno < xcelem->varattno)
+ {
+ xcinsertpos = &(xcnext->next);
+ xcnext = xcnext->next;
+ }
+ xcelem->next = xcnext;
+ *xcinsertpos = xcelem;
+}
+
+/*
* bms_is_subset_singleton
*
* Same result as bms_is_subset(s, bms_make_singleton(x)),
@@ -498,6 +662,8 @@ clause_selectivity(PlannerInfo *root,
{
rinfo = (RestrictInfo *) clause;
+// elog(NOTICE, "RestrictInfo, %s", nodeToString(rinfo->clause));
+
/*
* If the clause is marked pseudoconstant, then it will be used as a
* gating qual and should not affect selectivity estimates; hence
@@ -563,7 +729,7 @@ clause_selectivity(PlannerInfo *root,
/*
* We probably shouldn't ever see an uplevel Var here, but if we do,
- * return the default selectivity...
+ * return the default selectivity... (FIXME: wrong comment given code below)
*/
if (var->varlevelsup == 0 &&
(varRelid == 0 || varRelid == (int) var->varno))
@@ -778,3 +944,571 @@ clause_selectivity(PlannerInfo *root,
return s1;
}
+
+static bool
+has_xcol_selectivity(Oid relId, int natts, AttrNumber *attnums, Selectivity *result_sel)
+{
+ Relation rel;
+ Datum *datums = (Datum *)palloc(natts * sizeof(Datum));
+ ArrayType *arr_attnums;
+ int i;
+ int16 typlen;
+ bool typbyval;
+ char typalign;
+ ScanKeyData scanKey[2];
+ SysScanDesc scan;
+ HeapTuple tuple;
+ bool result;
+ Selectivity sel = 1e-5; /* fixed selectivity for now */
+
+ for (i = 0; i < natts; i++)
+ datums[i] = Int16GetDatum(attnums[i]);
+
+ get_typlenbyvalalign(INT2OID, &typlen, &typbyval, &typalign);
+ arr_attnums = construct_array(datums, natts,
+ INT2OID, typlen, typbyval, typalign);
+
+ rel = heap_open(StatisticRelationId, AccessShareLock);
+
+ ScanKeyInit(&scanKey[0],
+ Anum_pg_statistic_starelid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(relId));
+ ScanKeyInit(&scanKey[1],
+ Anum_pg_statistic_staattnums,
+ BTEqualStrategyNumber, F_ARRAY_EQ,
+ PointerGetDatum(arr_attnums));
+
+ scan = systable_beginscan(rel, StatisticRelidAttnumsInhIndexId, true,
+ SnapshotNow, 2, scanKey);
+
+ tuple = systable_getnext(scan);
+
+ result = HeapTupleIsValid(tuple);
+
+ systable_endscan(scan);
+
+ heap_close(rel, NoLock);
+
+ pfree(datums);
+
+ if (result)
+ *result_sel = sel;
+
+ return result;
+}
+
+static Selectivity
+match_xcol_stats(PlannerInfo *root, Oid relId, CrossColumnClause **xclist)
+{
+ Relation pgstatistic;
+ Datum datums[2];
+ ArrayType *arr_attnums;
+ int16 typlen;
+ bool typbyval;
+ char typalign;
+ ScanKeyData scanKey[2];
+ SysScanDesc scan;
+ HeapTuple tuple;
+ TupleDesc tupDesc;
+ bool isnull;
+ Datum sta_attnums_dat;
+ int2vector *sta_attnum_vec;
+ CrossColumnClause *clauses[STATISTIC_NUM_SLOTS];
+ Selectivity s1, s2;
+ Form_pg_statistic stats;
+
+ s1 = 1.0;
+
+ datums[0] = Int16GetDatum(0);
+ datums[1] = Int16GetDatum(0);
+ get_typlenbyvalalign(INT2OID, &typlen, &typbyval, &typalign);
+ arr_attnums = construct_array(datums, 2,
+ INT2OID, typlen, typbyval, typalign);
+
+
+ pgstatistic = heap_open(StatisticRelationId, AccessShareLock);
+ tupDesc = RelationGetDescr(pgstatistic);
+
+ ScanKeyInit(&scanKey[0],
+ Anum_pg_statistic_starelid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(relId));
+ ScanKeyInit(&scanKey[1],
+ Anum_pg_statistic_staattnums,
+ BTGreaterEqualStrategyNumber, F_INT2VECTORGE,
+ PointerGetDatum(arr_attnums));
+
+ scan = systable_beginscan(pgstatistic, StatisticRelidAttnumsInhIndexId, true,
+ SnapshotNow, 2, scanKey);
+
+ while (HeapTupleIsValid(tuple = systable_getnext(scan))) {
+ CrossColumnClause *xc_tmp;
+ CrossColumnClause **prev_link;
+ int i, cur_attno;
+ Selectivity old_sel = 1.0;
+
+ stats = (Form_pg_statistic) GETSTRUCT(tuple);
+ if (!stats->stavalid)
+ continue;
+
+ sta_attnums_dat = heap_getattr(tuple, Anum_pg_statistic_staattnums, tupDesc, &isnull);
+ Assert(!isnull && sta_attnums_dat);
+ sta_attnum_vec = (int2vector *) DatumGetPointer(sta_attnums_dat);
+
+ /* match up against clauses */
+ xc_tmp = *xclist;
+ for (i = 0; i < sta_attnum_vec->dim1; i++)
+ {
+ cur_attno = sta_attnum_vec->values[i];
+ while (xc_tmp && xc_tmp->varattno < cur_attno)
+ xc_tmp = xc_tmp->next;
+
+ if (!xc_tmp || xc_tmp->varattno > cur_attno)
+ break;
+
+ /* store picked clause */
+ clauses[i] = xc_tmp;
+ }
+ if (i != sta_attnum_vec->dim1) /* No match */
+ continue;
+
+
+
+ s2 = calculate_xcol_sel(root, clauses, sta_attnum_vec, tuple, tupDesc);
+ if (s2 < 0.0)
+ continue;
+
+ /* remove matched clauses from consideration */
+ prev_link = xclist;
+ xc_tmp = *xclist;
+ i = 0;
+ while (xc_tmp && i < sta_attnum_vec->dim1)
+ {
+ if (xc_tmp == clauses[i])
+ {
+ CrossColumnClause *xc_removed;
+ // elog(NOTICE, "Removing clause with att %d from consideration", xc_tmp->varattno);
+ old_sel *= xc_tmp->sel;
+ *prev_link = xc_tmp->next;
+ xc_removed = xc_tmp;
+ xc_tmp = xc_tmp->next;
+ pfree(xc_removed);
+ i++;
+
+ } else {
+ prev_link = &(xc_tmp->next);
+ xc_tmp = xc_tmp->next;
+ }
+ }
+ // elog(NOTICE, "New selectivity %0.5f old sel %0.5f", s2, old_sel);
+ /* Merge in selectivity */
+ s1 = s1*s2;
+ }
+ systable_endscan(scan);
+
+ heap_close(pgstatistic, AccessShareLock);
+
+ return s1;
+}
+
+/* Calculates selectivity from cross column statistics.
+ * Returns a negative number if not possible to calculate. */
+static Selectivity
+calculate_xcol_sel(PlannerInfo *root, CrossColumnClause **clauses, int2vector *attnums, HeapTuple statstuple, TupleDesc tupDesc)
+{
+ int i;
+ /* elog(NOTICE, "Calculating stats for %d clauses", attnums->dim1);
+ for (i = 0; i < attnums->dim1; i++)
+ {
+ elog(NOTICE, "Attribute number: %d", clauses[i]->varattno);
+ }*/
+ bool isnull;
+ Datum values_dat;
+ ArrayType *statarray;
+ Oid arrayelemtype;
+ HeapTuple typeTuple;
+ Form_pg_type typeForm;
+ Datum *values;
+ int nvalues;
+ int j;
+ CrossColumnClause *clause;
+ Form_pg_statistic stats;
+ int dimindices[STATISTIC_NUM_SLOTS];
+ int dimstrides[STATISTIC_NUM_SLOTS];
+ int mcv_index;
+ Selectivity s1 = 1.0;
+
+ stats = (Form_pg_statistic) GETSTRUCT(statstuple);
+
+ for (i = 0; i < attnums->dim1; i++)
+ {
+ VariableStatData vardata;
+ Node *other;
+ bool varonleft;
+ Datum constval;
+ bool result;
+ FmgrInfo eqproc;
+
+ clause = clauses[i];
+
+ /* FIXME: refactor multi-col stats storage to store operators per col
+ * so we can check that the correct op is used
+ opproc = DatumGetObjectId(heap_getattr(statstuple, Anum_pg_statistic_staop1 + i,
+ tupDesc, &isnull));
+ Assert(opproc == clause->expr->opno); */
+ values_dat = heap_getattr(statstuple, Anum_pg_statistic_stavalues1 + i,
+ tupDesc, &isnull);
+ Assert(!isnull);
+
+ statarray = DatumGetArrayTypeP(values_dat);
+
+ arrayelemtype = ARR_ELEMTYPE(statarray);
+ typeTuple = SearchSysCache1(TYPEOID, ObjectIdGetDatum(arrayelemtype));
+ if (!HeapTupleIsValid(typeTuple))
+ elog(ERROR, "cache lookup failed for type %u", arrayelemtype);
+
+ typeForm = (Form_pg_type) GETSTRUCT(typeTuple);
+ deconstruct_array(statarray,
+ arrayelemtype,
+ typeForm->typlen,
+ typeForm->typbyval,
+ typeForm->typalign,
+ &values, NULL, &nvalues);
+ ReleaseSysCache(typeTuple);
+
+ dimstrides[i] = 1;
+ for (j = i-1; j >= 0; j--)
+ {
+ dimstrides[j] *= (nvalues + 2);
+ }
+
+ get_restriction_variable(root, clause->expr->args, 0 /* FIXME: pass this in */,
+ &vardata, &other, &varonleft);
+ constval = ((Const *) other)->constvalue;
+
+ /* Null is stored in the first item of a dimension */
+ if (((Const *)other)->constisnull)
+ {
+ dimindices[i] = 0;
+ ReleaseVariableStats(vardata);
+ continue;
+ }
+
+ /* TODO: use binary search when possible */
+ for (j = 0; j < nvalues; j++)
+ {
+ fmgr_info(get_opcode(clause->expr->opno), &eqproc);
+ // elog(NOTICE, "Comparing %d to %d using %d", values[j], constval, clause->expr->opno);
+ result = DatumGetBool(FunctionCall2Coll(&eqproc,
+ DEFAULT_COLLATION_OID,
+ values[j],
+ constval));
+ if (result) { /* Found matching value */
+ dimindices[i] = j+2;
+ break;
+ }
+ }
+
+ if (j == nvalues) /* Nothing matching was found in MCV's */
+ {
+ double ndistinct;
+ bool isdefault;
+
+ // elog(NOTICE, "Value for dimension %d not found, constval %d", i, constval);
+ dimindices[i] = 1;
+
+ /* We presume uniform distribution of values not present in MCV.
+ * Predivide selectivity with ndistinct for this column. */
+ ndistinct = get_variable_numdistinct(&vardata, &isdefault);
+ ndistinct -= nvalues;
+ // elog(NOTICE, "Predividing selectivity with %f", ndistinct);
+ if (ndistinct > 1.0)
+ s1 = s1 / ndistinct;
+ }
+
+ /*
+ * Free statarray if it's a detoasted copy.
+ */
+ if ((Pointer) statarray != DatumGetPointer(values_dat))
+ pfree(statarray);
+
+ ReleaseVariableStats(vardata);
+ }
+
+ mcv_index = 0;
+ for (i = 0; i < attnums->dim1; i++)
+ {
+ mcv_index += dimindices[i]*dimstrides[i];
+ // elog(NOTICE, "MCV dimension %d index %d", i, dimindices[i]);
+ }
+ // elog(NOTICE, "MCV position %d", mcv_index);
+ do {
+ Datum numbers_dat;
+ int narrayelem;
+
+ numbers_dat = heap_getattr(statstuple, Anum_pg_statistic_stanumbers1,
+ tupDesc, &isnull);
+ Assert(!isnull);
+
+ statarray = DatumGetArrayTypeP(numbers_dat);
+
+ /*
+ * We expect the array to be a 1-D float4 array; verify that. We don't
+ * need to use deconstruct_array() since the array data is just going
+ * to look like a C array of float4 values.
+ */
+ narrayelem = ARR_DIMS(statarray)[0];
+ if (ARR_NDIM(statarray) != 1 || narrayelem <= 0 ||
+ ARR_HASNULL(statarray) ||
+ ARR_ELEMTYPE(statarray) != FLOAT4OID)
+ elog(ERROR, "stanumbers is not a 1-D float4 array");
+ if (narrayelem <= mcv_index)
+ elog(ERROR, "stanumbers is too short");
+ s1 = s1 * ((float4 *) ARR_DATA_PTR(statarray))[mcv_index];
+
+ /*
+ * Free statarray if it's a detoasted copy.
+ */
+ if ((Pointer) statarray != DatumGetPointer(numbers_dat))
+ pfree(statarray);
+
+ } while(0);
+
+ //elog(NOTICE, "Returning selectivity %0.7f", s1);
+ return s1;
+}
+
+typedef struct {
+ CrossColumnClause *xc;
+ RangeQueryClause *rq;
+} reclist;
+
+typedef struct {
+ int len;
+ reclist *rclist;
+ AttrNumber *attnums;
+} reclist2;
+
+
+/* add rclist to our list, so the ordered attnums arrays are unique */
+static void
+add_reclist(int len, reclist *rclist, List **results)
+{
+ ListCell *lc;
+ int i, j;
+ reclist2 *rclist2;
+ AttrNumber *attnums = (AttrNumber *) palloc(len * sizeof(AttrNumber));
+
+ /* collect the ordered varattnos from the Vars */
+ for (i = 0; i < len; i++)
+ {
+ if (rclist[i].xc)
+ attnums[i] = rclist[i].xc->varattno;
+ else
+ attnums[i] = rclist[i].rq->varattno;
+ }
+ /* FIXME: is this supposed to be a bubblesort? Maybe do an insertion sort? */
+ for (i = 0; i < len - 1; i++)
+ for (j = i + 1; j < len; j++)
+ {
+ AttrNumber tmp = attnums[i];
+ attnums[i] = attnums[j];
+ attnums[j] = tmp;
+ }
+
+ /* match this ordered attnum list against the current list of attnum arrays */
+ foreach(lc, *results)
+ {
+ reclist2 *rc2 = (reclist2 *) lfirst(lc);
+
+ if (len != rc2->len)
+ continue;
+
+ for (i = 0; i < len; i++)
+ if (attnums[i] != rc2->attnums[i])
+ break;
+ if (i < len)
+ continue;
+
+ /* found */
+ return;
+ }
+
+ /* not found, add it to the list */
+ rclist2 = (reclist2 *) palloc(sizeof(reclist2));
+ rclist2->len = len;
+ rclist2->rclist = (reclist *) palloc(len * sizeof(reclist));
+ for (i = 0; i < len; i++)
+ {
+ rclist2->rclist[i].xc = rclist[i].xc;
+ rclist2->rclist[i].rq = rclist[i].rq;
+ }
+ rclist2->attnums = attnums;
+
+ *results = lappend(*results, rclist2);
+}
+
+static int
+compare_reclist2(reclist2 *a, reclist2 *b)
+{
+ int i;
+
+ if (a->len < b->len)
+ return -1;
+ else if (a->len > b->len)
+ return 1;
+
+ for (i = 0; i < a->len; i++)
+ {
+ if (a->attnums[i] < b->attnums[i])
+ return -1;
+ else if (a->attnums[i] > b->attnums[i])
+ return 1;
+ }
+
+ return 0;
+}
+
+static bool
+add_reclist2(int *len, reclist2 **p_reclist2, reclist2 *rclist2)
+{
+ int curr_len = *len;
+ int i, j;
+
+ if (curr_len == 0)
+ {
+ p_reclist2[i] = rclist2;
+ curr_len++;
+ *len = curr_len;
+ return true;
+ }
+
+ for (i = 0; i < curr_len; i++)
+ {
+ if (compare_reclist2(rclist2, p_reclist2[i]) > 0)
+ {
+ for (j = curr_len; j > i; j--)
+ p_reclist2[j] = p_reclist2[j - 1];
+ p_reclist2[i] = rclist2;
+ curr_len++;
+ *len = curr_len;
+ return true;
+ }
+ }
+
+ return false;
+}
+
+static void
+collect_xcol_lists(int curr_depth, CrossColumnClause *xclist, RangeQueryClause *rqlist, reclist *rclist, List **results)
+{
+ CrossColumnClause *xc_tmp;
+ RangeQueryClause *rq_tmp;
+
+ for (xc_tmp = xclist; xc_tmp; xc_tmp = xc_tmp->next)
+ {
+ if (xc_tmp->varattno == 0)
+ continue;
+
+ rclist[curr_depth].xc = xc_tmp;
+ collect_xcol_lists(curr_depth + 1, xc_tmp->next, rqlist, rclist, results);
+ add_reclist(curr_depth + 1, rclist, results);
+ rclist[curr_depth].xc = NULL;
+ }
+
+ for (rq_tmp = rqlist; rq_tmp; rq_tmp = rq_tmp->next)
+ {
+ if (rq_tmp->varattno == 0)
+ continue;
+
+ rclist[curr_depth].rq = rq_tmp;
+ collect_xcol_lists(curr_depth + 1, (xclist ? xclist->next : xclist), rq_tmp->next, rclist, results);
+ add_reclist(curr_depth + 1, rclist, results);
+ rclist[curr_depth].rq = NULL;
+ }
+}
+
+static bool
+crosscolumn_selectivity(Oid relId, CrossColumnClause **xclist, RangeQueryClause **rqlist, Selectivity *result_sel)
+{
+ CrossColumnClause *xc;
+ RangeQueryClause *rq;
+ List *resultlist = NIL;
+ ListCell *lc;
+ reclist *rclist;
+ reclist2 **p_rclist2;
+ int max_len, i;
+ Selectivity sel = 1.0;
+ bool found_xc_sel = false;
+
+ max_len = 0;
+ for (rq = *rqlist; rq; max_len++, rq = rq->next)
+ ;
+ for (xc = *xclist; xc; max_len++, xc = xc->next)
+ ;
+
+// elog(NOTICE, "crosscolumn_selectivity max length of array %d", max_len);
+
+ rclist = (reclist *) palloc(max_len * sizeof(reclist));
+ for (i = 0; i < max_len; i++)
+ {
+ rclist[i].xc = NULL;
+ rclist[i].rq = NULL;
+ }
+
+ collect_xcol_lists(0, *xclist, *rqlist, rclist, &resultlist);
+
+ pfree(rclist);
+
+ max_len = list_length(resultlist);
+// elog(NOTICE, "crosscolumn_selectivity list length of arrays %d", max_len);
+ p_rclist2 = (reclist2 **) palloc(max_len * sizeof(reclist2 *));
+
+ max_len = 0;
+ foreach (lc, resultlist)
+ {
+ reclist2 *rclist2 = (reclist2 *) lfirst(lc);
+
+ if (!add_reclist2(&max_len, p_rclist2, rclist2))
+ {
+ pfree(rclist2->rclist);
+ pfree(rclist2->attnums);
+ pfree(rclist2);
+ }
+ }
+// elog(NOTICE, "crosscolumn_selectivity length of ordered/unique array of previous list %d", max_len);
+
+ list_free(resultlist);
+
+ for (i = 0; i < max_len; i++)
+ {
+ if (p_rclist2[i] == NULL)
+ continue;
+
+ if (has_xcol_selectivity(relId, p_rclist2[i]->len, p_rclist2[i]->attnums, &sel))
+ {
+ int j;
+
+ /* remove the xclist and rqlist members found in p_rclist2[i] */
+ for (j = 0; j < p_rclist2[i]->len; j++)
+ {
+ /* TODO ... */
+ }
+
+ /* also, remove later elements in p_rclist2 that has any of the removed elements */
+ /* TODO ... */
+
+// elog(NOTICE, "crosscolumn_selectivity found xc selectivity %lf", sel);
+ found_xc_sel = true;
+ *result_sel *= sel;
+ }
+
+ pfree(p_rclist2[i]->rclist);
+ pfree(p_rclist2[i]->attnums);
+ pfree(p_rclist2[i]);
+ }
+ pfree(p_rclist2);
+
+ return found_xc_sel;
+}
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y
index 9aea2cd..7e4f352 100644
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -214,7 +214,7 @@ static void processCASbits(int cas_bits, int location, const char *constrType,
DropGroupStmt DropOpClassStmt DropOpFamilyStmt DropPLangStmt DropStmt
DropAssertStmt DropTrigStmt DropRuleStmt DropCastStmt DropRoleStmt
DropUserStmt DropdbStmt DropTableSpaceStmt DropFdwStmt
- DropForeignServerStmt DropUserMappingStmt ExplainStmt FetchStmt
+ DropForeignServerStmt DropUserMappingStmt ExplainStmt ExtraStatStmt FetchStmt
GrantStmt GrantRoleStmt IndexStmt InsertStmt ListenStmt LoadStmt
LockStmt NotifyStmt ExplainableStmt PreparableStmt
CreateFunctionStmt AlterFunctionStmt ReindexStmt RemoveAggrStmt
@@ -246,7 +246,7 @@ static void processCASbits(int cas_bits, int location, const char *constrType,
transaction_mode_item
create_extension_opt_item alter_extension_opt_item
-%type <ival> opt_lock lock_type cast_context
+%type <ival> opt_lock lock_type cast_context opt_stattarget
%type <ival> vacuum_option_list vacuum_option_elem
%type <boolean> opt_force opt_or_replace
opt_grant_grant_option opt_grant_admin_option
@@ -325,6 +325,8 @@ static void processCASbits(int cas_bits, int location, const char *constrType,
%type <list> opt_fdw_options fdw_options
%type <defelt> fdw_option
+%type <list> cc_column_list
+
%type <range> OptTempTableName
%type <into> into_clause create_as_target
@@ -755,6 +757,7 @@ stmt :
| DropdbStmt
| ExecuteStmt
| ExplainStmt
+ | ExtraStatStmt
| FetchStmt
| GrantStmt
| GrantRoleStmt
@@ -1199,6 +1202,74 @@ schema_stmt:
/*****************************************************************************
*
+ * Add / drop extra statistics
+ *
+ *****************************************************************************/
+
+ExtraStatStmt:
+ CREATE CROSS COLUMN STATISTICS ON TABLE qualified_name '(' cc_column_list ')' opt_stattarget
+ {
+ ExtraStatStmt *n = makeNode(ExtraStatStmt);
+
+ n->relkind = 'r';
+ n->create = true;
+ n->relation = $7;
+ n->columns = $9;
+ n->statistics_target = $11;
+ $$ = (Node *)n;
+ }
+ | DROP CROSS COLUMN STATISTICS ON TABLE qualified_name '(' cc_column_list ')'
+ {
+ ExtraStatStmt *n = makeNode(ExtraStatStmt);
+
+ n->relkind = 'r';
+ n->create = false;
+ n->relation = $7;
+ n->columns = $9;
+ $$ = (Node *)n;
+ }
+ | CREATE CROSS COLUMN STATISTICS ON INDEX qualified_name opt_stattarget
+ {
+ ExtraStatStmt *n = makeNode(ExtraStatStmt);
+
+ n->relkind = 'i';
+ n->create = true;
+ n->relation = $7;
+ n->columns = NIL;
+ n->statistics_target = $8;
+ $$ = (Node *)n;
+ }
+ | DROP CROSS COLUMN STATISTICS ON INDEX qualified_name
+ {
+ ExtraStatStmt *n = makeNode(ExtraStatStmt);
+
+ n->relkind = 'i';
+ n->create = false;
+ n->relation = $7;
+ n->columns = NIL;
+ $$ = (Node *)n;
+ }
+ ;
+
+cc_column_list:
+ columnref
+ {
+ $$ = list_make1($1);
+ }
+ | cc_column_list ',' columnref
+ {
+ $$ = lappend($1, $3);
+ }
+ ;
+
+opt_stattarget:
+ WITH '(' Iconst ')' { $$ = $3; }
+ | /* EMPTY */ { $$ = -1; }
+ ;
+
+
+/*****************************************************************************
+ *
* Set PG internal variable
* SET name TO 'var_value'
* Include SQL92 syntax (thomas 1997-10-22):
diff --git a/src/backend/parser/parse_utilcmd.c b/src/backend/parser/parse_utilcmd.c
index 43f5634..2378435 100644
--- a/src/backend/parser/parse_utilcmd.c
+++ b/src/backend/parser/parse_utilcmd.c
@@ -2754,3 +2754,96 @@ setSchemaName(char *context_schema, char **stmt_schema_name)
"different from the one being created (%s)",
*stmt_schema_name, context_schema)));
}
+
+/*
+ * transformExtraStatistics
+ * Transform the column list or the expression into a form
+ * usable by the executor.
+ */
+ExtraStatStmt *
+transformExtraStatistics(ExtraStatStmt *stmt, const char *queryString)
+{
+ ParseState *pstate;
+ RangeTblEntry *rte;
+ ExtraStatStmt *newstmt;
+ List *columns = NIL;
+ ListCell *cell;
+ Oid relId;
+ HeapTuple tuple;
+ HeapTuple attuple;
+ Form_pg_class classptr;
+ Form_pg_index indexptr;
+ Form_pg_attribute attptr;
+ AttrNumber i;
+
+ switch (stmt->relkind)
+ {
+ case 'r':
+ pstate = make_parsestate(NULL);
+ pstate->p_sourcetext = queryString;
+
+ rte = addRangeTableEntry(pstate, stmt->relation, NULL, false, true);
+ addRTEtoQuery(pstate, rte, true, true, true);
+
+ foreach(cell, stmt->columns)
+ {
+ Node *col = lfirst(cell);
+
+ columns = lappend(columns, transformExpr(pstate, col));
+ }
+
+ break;
+
+ case 'i':
+ relId = RangeVarGetRelid(stmt->relation, ShareLock, false);
+
+ tuple = SearchSysCache1(RELOID, relId);
+ classptr = (Form_pg_class) GETSTRUCT(tuple);
+
+ if (classptr->relkind != 'i')
+ elog(ERROR, "not an index");
+
+ ReleaseSysCache(tuple);
+
+ tuple = SearchSysCache1(INDEXRELID, relId);
+ indexptr = (Form_pg_index) GETSTRUCT(tuple);
+
+ if (indexptr->indnatts < 2)
+ {
+ ReleaseSysCache(tuple);
+
+ elog(ERROR, "cross column statistics are only usable on multi-column indexes");
+ }
+
+ for (i = 1; i <= indexptr->indnatts; i++)
+ {
+ attuple = SearchSysCache2(ATTNUM, relId, i);
+ if (!HeapTupleIsValid(attuple))
+ elog(ERROR, "pg_attribute row not found for index");
+
+ attptr = (Form_pg_attribute) GETSTRUCT(attuple);
+
+ columns = lappend(columns, makeVar(0, i,
+ attptr->atttypid,
+ attptr->atttypmod,
+ InvalidOid, 0));
+
+ ReleaseSysCache(attuple);
+ }
+
+ ReleaseSysCache(tuple);
+ break;
+
+ default:
+ elog(ERROR, "invalid relkind");
+ }
+
+ newstmt = makeNode(ExtraStatStmt);
+ newstmt->relkind = stmt->relkind;
+ newstmt->create = stmt->create;
+ newstmt->relation = copyObject(stmt->relation);
+ newstmt->columns = columns;
+ newstmt->statistics_target = stmt->statistics_target;
+
+ return newstmt;
+}
diff --git a/src/backend/tcop/utility.c b/src/backend/tcop/utility.c
index 5b81c0b..6506142 100644
--- a/src/backend/tcop/utility.c
+++ b/src/backend/tcop/utility.c
@@ -230,6 +230,7 @@ check_xact_readonly(Node *parsetree)
case T_AlterTableSpaceOptionsStmt:
case T_CreateForeignTableStmt:
case T_SecLabelStmt:
+ case T_ExtraStatStmt:
PreventCommandIfReadOnly(CreateCommandTag(parsetree));
break;
default:
@@ -574,6 +575,14 @@ standard_ProcessUtility(Node *parsetree,
}
break;
+ case T_ExtraStatStmt:
+ {
+ ExtraStatStmt *newstmt = transformExtraStatistics((ExtraStatStmt *)parsetree, queryString);
+
+ ExtraStatistics(newstmt);
+ }
+ break;
+
case T_CreateTableSpaceStmt:
PreventTransactionChain(isTopLevel, "CREATE TABLESPACE");
CreateTableSpace((CreateTableSpaceStmt *) parsetree);
@@ -1638,6 +1647,17 @@ CreateCommandTag(Node *parsetree)
tag = "CREATE FOREIGN TABLE";
break;
+ case T_ExtraStatStmt:
+ {
+ ExtraStatStmt *stmt = (ExtraStatStmt *)parsetree;
+
+ if (stmt->create)
+ tag = "CREATE CROSS COLUMN STATISTICS";
+ else
+ tag = "DROP CROSS COLUMN STATISTICS";
+ }
+ break;
+
case T_DropStmt:
switch (((DropStmt *) parsetree)->removeType)
{
diff --git a/src/backend/tsearch/ts_typanalyze.c b/src/backend/tsearch/ts_typanalyze.c
index 9771415..170d5ca 100644
--- a/src/backend/tsearch/ts_typanalyze.c
+++ b/src/backend/tsearch/ts_typanalyze.c
@@ -35,7 +35,8 @@ typedef struct
int delta; /* And this is 'delta'. */
} TrackItem;
-static void compute_tsvector_stats(VacAttrStats *stats,
+static void compute_tsvector_stats(VacStats *stats,
+ int index,
AnalyzeAttrFetchFunc fetchfunc,
int samplerows,
double totalrows);
@@ -53,17 +54,21 @@ static int trackitem_compare_lexemes(const void *e1, const void *e2);
Datum
ts_typanalyze(PG_FUNCTION_ARGS)
{
- VacAttrStats *stats = (VacAttrStats *) PG_GETARG_POINTER(0);
- Form_pg_attribute attr = stats->attr;
+ VacStats *stats = (VacStats *) PG_GETARG_POINTER(0);
+ int4 i = PG_GETARG_INT32(1);
- /* If the attstattarget column is negative, use the default value */
- /* NB: it is okay to scribble on stats->attr since it's a copy */
- if (attr->attstattarget < 0)
- attr->attstattarget = default_statistics_target;
+ if (i < 0 || i >= STATISTIC_NUM_SLOTS)
+ PG_RETURN_BOOL(false);
- stats->compute_stats = compute_tsvector_stats;
+ /* If the statarget column is negative, use the default value */
+ /* NB: it is okay to scribble on stats->statarget since it's a copy */
+ if (stats->statarget < 0)
+ stats->statarget = default_statistics_target;
+
+ stats->statfuncs[i].compute_func_ptr = compute_tsvector_stats;
+ stats->statfuncs[i].findval_func_ptr = NULL; /* this prevents cross-column stats to be computed */
/* see comment about the choice of minrows in commands/analyze.c */
- stats->minrows = 300 * attr->attstattarget;
+ stats->minrows = 300 * stats->statarget;
PG_RETURN_BOOL(true);
}
@@ -136,7 +141,8 @@ ts_typanalyze(PG_FUNCTION_ARGS)
* want.
*/
static void
-compute_tsvector_stats(VacAttrStats *stats,
+compute_tsvector_stats(VacStats *stats,
+ int index,
AnalyzeAttrFetchFunc fetchfunc,
int samplerows,
double totalrows)
@@ -166,7 +172,7 @@ compute_tsvector_stats(VacAttrStats *stats,
* the number of individual lexeme values tracked in pg_statistic ought to
* be more than the number of values for a simple scalar column.
*/
- num_mcelem = stats->attr->attstattarget * 10;
+ num_mcelem = stats->statarget * 10;
/*
* We set bucket width equal to (num_mcelem + 10) / 0.007 as per the
@@ -206,7 +212,7 @@ compute_tsvector_stats(VacAttrStats *stats,
vacuum_delay_point();
- value = fetchfunc(stats, vector_no, &isnull);
+ value = fetchfunc(stats, vector_no, stats->attnums->values[index], &isnull);
/*
* Check for null/nonnull.
@@ -291,11 +297,11 @@ compute_tsvector_stats(VacAttrStats *stats,
stats->stats_valid = true;
/* Do the simple null-frac and average width stats */
- stats->stanullfrac = (double) null_cnt / (double) samplerows;
- stats->stawidth = total_width / (double) nonnull_cnt;
+ stats->stanullfrac[index] = (double) null_cnt / (double) samplerows;
+ stats->stawidth[index] = total_width / (double) nonnull_cnt;
/* Assume it's a unique column (see notes above) */
- stats->stadistinct = -1.0;
+ stats->stadistinct[index] = -1.0;
/*
* Construct an array of the interesting hashtable items, that is,
@@ -403,27 +409,27 @@ compute_tsvector_stats(VacAttrStats *stats,
mcelem_freqs[i] = (double) maxfreq / (double) nonnull_cnt;
MemoryContextSwitchTo(old_context);
- stats->stakind[0] = STATISTIC_KIND_MCELEM;
- stats->staop[0] = TextEqualOperator;
- stats->stanumbers[0] = mcelem_freqs;
+ stats->stakind[index][0] = STATISTIC_KIND_MCELEM;
+ stats->staop[index][0] = TextEqualOperator;
+ stats->stanumbers[index][0] = mcelem_freqs;
/* See above comment about two extra frequency fields */
- stats->numnumbers[0] = num_mcelem + 2;
- stats->stavalues[0] = mcelem_values;
- stats->numvalues[0] = num_mcelem;
+ stats->numnumbers[index][0] = num_mcelem + 2;
+ stats->stavalues[index][0] = mcelem_values;
+ stats->numvalues[index][0] = num_mcelem;
/* We are storing text values */
- stats->statypid[0] = TEXTOID;
- stats->statyplen[0] = -1; /* typlen, -1 for varlena */
- stats->statypbyval[0] = false;
- stats->statypalign[0] = 'i';
+ stats->statypid[index][0] = TEXTOID;
+ stats->statyplen[index][0] = -1; /* typlen, -1 for varlena */
+ stats->statypbyval[index][0] = false;
+ stats->statypalign[index][0] = 'i';
}
}
else
{
/* We found only nulls; assume the column is entirely null */
stats->stats_valid = true;
- stats->stanullfrac = 1.0;
- stats->stawidth = 0; /* "unknown" */
- stats->stadistinct = 0.0; /* "unknown" */
+ stats->stanullfrac[index] = 1.0;
+ stats->stawidth[index] = 0; /* "unknown" */
+ stats->stadistinct[index] = 0.0; /* "unknown" */
}
/*
diff --git a/src/backend/utils/adt/array_typanalyze.c b/src/backend/utils/adt/array_typanalyze.c
index ba98739..4abc29e 100644
--- a/src/backend/utils/adt/array_typanalyze.c
+++ b/src/backend/utils/adt/array_typanalyze.c
@@ -50,7 +50,7 @@ typedef struct
FmgrInfo *hash;
/* Saved state from std_typanalyze() */
- AnalyzeAttrComputeStatsFunc std_compute_stats;
+ ComputeStatsFunc std_compute_stats;
void *std_extra_data;
} ArrayAnalyzeExtraData;
@@ -78,7 +78,7 @@ typedef struct
int frequency; /* Number of arrays seen with this count */
} DECountItem;
-static void compute_array_stats(VacAttrStats *stats,
+static void compute_array_stats(VacStats *stats, int index,
AnalyzeAttrFetchFunc fetchfunc, int samplerows, double totalrows);
static void prune_element_hashtable(HTAB *elements_tab, int b_current);
static uint32 element_hash(const void *key, Size keysize);
@@ -95,7 +95,8 @@ static int countitem_compare_count(const void *e1, const void *e2);
Datum
array_typanalyze(PG_FUNCTION_ARGS)
{
- VacAttrStats *stats = (VacAttrStats *) PG_GETARG_POINTER(0);
+ VacStats *stats = (VacStats *) PG_GETARG_POINTER(0);
+ int4 index = PG_GETARG_INT32(1);
Oid element_typeid;
TypeCacheEntry *typentry;
ArrayAnalyzeExtraData *extra_data;
@@ -104,17 +105,17 @@ array_typanalyze(PG_FUNCTION_ARGS)
* Call the standard typanalyze function. It may fail to find needed
* operators, in which case we also can't do anything, so just fail.
*/
- if (!std_typanalyze(stats))
+ if (!std_typanalyze(stats, index))
PG_RETURN_BOOL(false);
/*
* Check attribute data type is a varlena array.
*/
- element_typeid = stats->attrtype->typelem;
+ element_typeid = stats->attrtypes[index]->typelem;
- if (!OidIsValid(element_typeid) || stats->attrtype->typlen != -1)
+ if (!OidIsValid(element_typeid) || stats->attrtypes[index]->typlen != -1)
elog(ERROR, "array_typanalyze was invoked for non-array type %u",
- stats->attrtypid);
+ stats->attrtypids[index]);
/*
* Gather information about the element type. If we fail to find
@@ -141,12 +142,12 @@ array_typanalyze(PG_FUNCTION_ARGS)
extra_data->hash = &typentry->hash_proc_finfo;
/* Save old compute_stats and extra_data for scalar statistics ... */
- extra_data->std_compute_stats = stats->compute_stats;
- extra_data->std_extra_data = stats->extra_data;
+ extra_data->std_compute_stats = stats->statfuncs[index].compute_func_ptr;
+ extra_data->std_extra_data = stats->extra_data[index];
/* ... and replace with our info */
- stats->compute_stats = compute_array_stats;
- stats->extra_data = extra_data;
+ stats->statfuncs[index].compute_func_ptr = compute_array_stats;
+ stats->extra_data[index] = extra_data;
/*
* Note we leave stats->minrows set as std_typanalyze set it. Should
@@ -211,7 +212,7 @@ array_typanalyze(PG_FUNCTION_ARGS)
* We divide the raw counts by nonnull_cnt to get those figures.
*/
static void
-compute_array_stats(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc,
+compute_array_stats(VacStats *stats, int index, AnalyzeAttrFetchFunc fetchfunc,
int samplerows, double totalrows)
{
ArrayAnalyzeExtraData *extra_data;
@@ -238,16 +239,16 @@ compute_array_stats(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc,
HASHCTL count_hash_ctl;
DECountItem *count_item;
- extra_data = (ArrayAnalyzeExtraData *) stats->extra_data;
+ extra_data = (ArrayAnalyzeExtraData *) stats->extra_data[index];
/*
* Invoke analyze.c's standard analysis function to create scalar-style
* stats for the column. It will expect its own extra_data pointer,
* so temporarily install that.
*/
- stats->extra_data = extra_data->std_extra_data;
- (*extra_data->std_compute_stats) (stats, fetchfunc, samplerows, totalrows);
- stats->extra_data = extra_data;
+ stats->extra_data[index] = extra_data->std_extra_data;
+ (*extra_data->std_compute_stats) (stats, index, fetchfunc, samplerows, totalrows);
+ stats->extra_data[index] = extra_data;
/*
* Set up static pointer for use by subroutines. We wait till here in
@@ -262,7 +263,7 @@ compute_array_stats(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc,
* the number of individual elements tracked in pg_statistic ought to be
* more than the number of values for a simple scalar column.
*/
- num_mcelem = stats->attr->attstattarget * 10;
+ num_mcelem = stats->statarget * 10;
/*
* We set bucket width equal to num_mcelem / 0.007 as per the comment
@@ -318,7 +319,7 @@ compute_array_stats(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc,
vacuum_delay_point();
- value = fetchfunc(stats, array_no, &isnull);
+ value = fetchfunc(stats, array_no, stats->attnums->values[index], &isnull);
if (isnull)
{
/* array is null, just count that */
@@ -437,7 +438,7 @@ compute_array_stats(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc,
/* Skip pg_statistic slots occupied by standard statistics */
slot_idx = 0;
- while (slot_idx < STATISTIC_NUM_SLOTS && stats->stakind[slot_idx] != 0)
+ while (slot_idx < STATISTIC_NUM_SLOTS && stats->stakind[index][slot_idx] != 0)
slot_idx++;
if (slot_idx > STATISTIC_NUM_SLOTS - 2)
elog(ERROR, "insufficient pg_statistic slots for array stats");
@@ -558,18 +559,18 @@ compute_array_stats(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc,
MemoryContextSwitchTo(old_context);
- stats->stakind[slot_idx] = STATISTIC_KIND_MCELEM;
- stats->staop[slot_idx] = extra_data->eq_opr;
- stats->stanumbers[slot_idx] = mcelem_freqs;
+ stats->stakind[index][slot_idx] = STATISTIC_KIND_MCELEM;
+ stats->staop[index][slot_idx] = extra_data->eq_opr;
+ stats->stanumbers[index][slot_idx] = mcelem_freqs;
/* See above comment about extra stanumber entries */
- stats->numnumbers[slot_idx] = num_mcelem + 3;
- stats->stavalues[slot_idx] = mcelem_values;
- stats->numvalues[slot_idx] = num_mcelem;
+ stats->numnumbers[index][slot_idx] = num_mcelem + 3;
+ stats->stavalues[index][slot_idx] = mcelem_values;
+ stats->numvalues[index][slot_idx] = num_mcelem;
/* We are storing values of element type */
- stats->statypid[slot_idx] = extra_data->type_id;
- stats->statyplen[slot_idx] = extra_data->typlen;
- stats->statypbyval[slot_idx] = extra_data->typbyval;
- stats->statypalign[slot_idx] = extra_data->typalign;
+ stats->statypid[index][slot_idx] = extra_data->type_id;
+ stats->statyplen[index][slot_idx] = extra_data->typlen;
+ stats->statypbyval[index][slot_idx] = extra_data->typbyval;
+ stats->statypalign[index][slot_idx] = extra_data->typalign;
slot_idx++;
}
@@ -577,7 +578,7 @@ compute_array_stats(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc,
count_items_count = hash_get_num_entries(count_tab);
if (count_items_count > 0)
{
- int num_hist = stats->attr->attstattarget;
+ int num_hist = stats->statarget;
DECountItem **sorted_count_items;
int j;
int delta;
@@ -659,10 +660,10 @@ compute_array_stats(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc,
}
Assert(j == count_items_count - 1);
- stats->stakind[slot_idx] = STATISTIC_KIND_DECHIST;
- stats->staop[slot_idx] = extra_data->eq_opr;
- stats->stanumbers[slot_idx] = hist;
- stats->numnumbers[slot_idx] = num_hist + 1;
+ stats->stakind[index][slot_idx] = STATISTIC_KIND_DECHIST;
+ stats->staop[index][slot_idx] = extra_data->eq_opr;
+ stats->stanumbers[index][slot_idx] = hist;
+ stats->numnumbers[index][slot_idx] = num_hist + 1;
slot_idx++;
}
}
diff --git a/src/backend/utils/adt/int.c b/src/backend/utils/adt/int.c
index a339abf..cb10dbe 100644
--- a/src/backend/utils/adt/int.c
+++ b/src/backend/utils/adt/int.c
@@ -254,21 +254,53 @@ int2vectorsend(PG_FUNCTION_ARGS)
return array_send(fcinfo);
}
-/*
- * We don't have a complete set of int2vector support routines,
- * but we need int2vectoreq for catcache indexing.
- */
Datum
int2vectoreq(PG_FUNCTION_ARGS)
{
- int2vector *a = (int2vector *) PG_GETARG_POINTER(0);
- int2vector *b = (int2vector *) PG_GETARG_POINTER(1);
+ int32 cmp = DatumGetInt32(btint2vectorcmp(fcinfo));
- if (a->dim1 != b->dim1)
- PG_RETURN_BOOL(false);
- PG_RETURN_BOOL(memcmp(a->values, b->values, a->dim1 * sizeof(int2)) == 0);
+ PG_RETURN_BOOL(cmp == 0);
+}
+
+Datum
+int2vectorne(PG_FUNCTION_ARGS)
+{
+ int32 cmp = DatumGetInt32(btint2vectorcmp(fcinfo));
+
+ PG_RETURN_BOOL(cmp != 0);
}
+Datum
+int2vectorlt(PG_FUNCTION_ARGS)
+{
+ int32 cmp = DatumGetInt32(btint2vectorcmp(fcinfo));
+
+ PG_RETURN_BOOL(cmp < 0);
+}
+
+Datum
+int2vectorle(PG_FUNCTION_ARGS)
+{
+ int32 cmp = DatumGetInt32(btint2vectorcmp(fcinfo));
+
+ PG_RETURN_BOOL(cmp <= 0);
+}
+
+Datum
+int2vectorge(PG_FUNCTION_ARGS)
+{
+ int32 cmp = DatumGetInt32(btint2vectorcmp(fcinfo));
+
+ PG_RETURN_BOOL(cmp >= 0);
+}
+
+Datum
+int2vectorgt(PG_FUNCTION_ARGS)
+{
+ int32 cmp = DatumGetInt32(btint2vectorcmp(fcinfo));
+
+ PG_RETURN_BOOL(cmp > 0);
+}
/*****************************************************************************
* PUBLIC ROUTINES *
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index 382cd73..34db4a6 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -94,6 +94,7 @@
#include "access/gin.h"
#include "access/sysattr.h"
#include "catalog/index.h"
+#include "catalog/indexing.h"
#include "catalog/pg_collation.h"
#include "catalog/pg_opfamily.h"
#include "catalog/pg_statistic.h"
@@ -112,6 +113,7 @@
#include "optimizer/var.h"
#include "parser/parse_clause.h"
#include "parser/parse_coerce.h"
+#include "parser/parse_utilcmd.h"
#include "parser/parsetree.h"
#include "utils/builtins.h"
#include "utils/bytea.h"
@@ -4140,6 +4142,34 @@ get_join_variables(PlannerInfo *root, List *args, SpecialJoinInfo *sjinfo,
}
/*
+ * validate_statistics -- sets vardata->statsTuple only if the statistics is valid
+ */
+void
+validate_statistics(VariableStatData *vardata,
+ Oid relid, AttrNumber *attnums, int n_attnums, bool inherited)
+{
+ int2vector *attnumvector = buildint2vector(attnums, n_attnums);
+ HeapTuple tuple;
+
+ tuple = SearchSysCache3(STATRELATTINH,
+ ObjectIdGetDatum(relid),
+ PointerGetDatum(attnumvector),
+ BoolGetDatum(inherited));
+ pfree(attnumvector);
+
+ if (HeapTupleIsValid(tuple))
+ {
+ if (((Form_pg_statistic) GETSTRUCT(tuple))->stavalid)
+ {
+ vardata->statsTuple = tuple;
+ vardata->freefunc = ReleaseSysCache;
+ }
+ else
+ ReleaseSysCache(tuple);
+ }
+}
+
+/*
* examine_variable
* Try to look up statistical data about an expression.
* Fill in a VariableStatData struct to describe the expression.
@@ -4177,6 +4207,7 @@ examine_variable(PlannerInfo *root, Node *node, int varRelid,
{
Node *basenode;
Relids varnos;
+ int onerelid = 0;
RelOptInfo *onerel;
/* Make sure we don't return dangling pointers in vardata */
@@ -4229,8 +4260,8 @@ examine_variable(PlannerInfo *root, Node *node, int varRelid,
case BMS_SINGLETON:
if (varRelid == 0 || bms_is_member(varRelid, varnos))
{
- onerel = find_base_rel(root,
- (varRelid ? varRelid : bms_singleton_member(varnos)));
+ onerelid = (varRelid ? varRelid : bms_singleton_member(varnos));
+ onerel = find_base_rel(root, onerelid);
vardata->rel = onerel;
node = basenode; /* strip any relabeling */
}
@@ -4264,15 +4295,82 @@ examine_variable(PlannerInfo *root, Node *node, int varRelid,
{
/*
* We have an expression in vars of a single relation. Try to match
- * it to expressional index columns, in hopes of finding some
- * statistics.
+ * it to expression statistics first then to expressional index columns,
+ * in hopes of finding some statistics.
*
* XXX it's conceivable that there are multiple matches with different
* index opfamilies; if so, we need to pick one that matches the
* operator we are estimating for. FIXME later.
*/
ListCell *ilist;
+ Node *expr = copyObject(node);
+ char *exprbin;
+ Datum exprbindatum;
+
+#if 0
+#define USE_SYSCACHE_FOR_SEARCH 0
+#if !USE_SYSCACHE_FOR_SEARCH
+ Relation rel;
+ ScanKeyData scanKey[2];
+ SysScanDesc scan;
+ HeapTuple tuple;
+#endif
+#endif
+
+ /* FIXME:
+ * query_or_expression_tree_walker(expr, set_location_unknown_walker, NULL, 0);*/
+ exprbin = nodeToString(expr);
+ exprbindatum = CStringGetTextDatum(exprbin);
+
+#if 0
+#if USE_SYSCACHE_FOR_SEARCH
+ vardata->statsTuple = SearchSysCache3(STAT3RELEXPRINH,
+ ObjectIdGetDatum(root->simple_rte_array[onerelid]->relid),
+ exprbindatum,
+ BoolGetDatum(false));
+ if (HeapTupleIsValid(vardata->statsTuple))
+ {
+ vardata->stats_type = STAT_EXPRESSION;
+ vardata->freefunc = ReleaseSysCache;
+ return;
+ }
+#else
+
+ rel = heap_open(StatisticRelationId, RowShareLock);
+
+ ScanKeyInit(&scanKey[0],
+ Anum_pg_statistic3_sta3relid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(root->simple_rte_array[onerelid]->relid));
+ ScanKeyInit(&scanKey[1],
+ Anum_pg_statistic3_sta3expr,
+ BTEqualStrategyNumber, F_TEXTEQ,
+ exprbindatum);
+
+ scan = systable_beginscan(rel, Statistic3RelidExprInhIndexId, true,
+ SnapshotNow, 2, scanKey);
+
+ tuple = systable_getnext(scan);
+ if (HeapTupleIsValid(tuple))
+ {
+// elog(NOTICE, "examine_variable expression found");
+ vardata->stats_type = STAT_EXPRESSION;
+ vardata->statsTuple = heap_copytuple(tuple);
+ vardata->freefunc = heap_freetuple;
+ }
+
+ systable_endscan(scan);
+
+ pfree(exprbin);
+ pfree(DatumGetPointer(exprbindatum));
+
+ relation_close(rel, RowShareLock);
+
+ if (vardata->statsTuple)
+ return;
+#endif
+#endif
foreach(ilist, onerel->indexlist)
{
IndexOptInfo *index = (IndexOptInfo *) lfirst(ilist);
@@ -4330,12 +4428,9 @@ examine_variable(PlannerInfo *root, Node *node, int varRelid,
}
else if (index->indpred == NIL)
{
- vardata->statsTuple =
- SearchSysCache3(STATRELATTINH,
- ObjectIdGetDatum(index->indexoid),
- Int16GetDatum(pos + 1),
- BoolGetDatum(false));
- vardata->freefunc = ReleaseSysCache;
+ int2 attnum = pos + 1;
+
+ validate_statistics(vardata, index->indexoid, &attnum, 1, false);
}
if (vardata->statsTuple)
break;
@@ -4383,11 +4478,7 @@ examine_simple_variable(PlannerInfo *root, Var *var,
* Plain table or parent of an inheritance appendrel, so look up the
* column in pg_statistic
*/
- vardata->statsTuple = SearchSysCache3(STATRELATTINH,
- ObjectIdGetDatum(rte->relid),
- Int16GetDatum(var->varattno),
- BoolGetDatum(rte->inh));
- vardata->freefunc = ReleaseSysCache;
+ validate_statistics(vardata, rte->relid, &(var->varattno), 1, rte->inh);
}
else if (rte->rtekind == RTE_SUBQUERY && !rte->inh)
{
@@ -6514,13 +6605,7 @@ btcostestimate(PG_FUNCTION_ARGS)
elog(ERROR, "no function provided to release variable stats with");
}
else
- {
- vardata.statsTuple = SearchSysCache3(STATRELATTINH,
- ObjectIdGetDatum(relid),
- Int16GetDatum(colnum),
- BoolGetDatum(rte->inh));
- vardata.freefunc = ReleaseSysCache;
- }
+ validate_statistics(&vardata, relid, &colnum, 1, rte->inh);
}
else
{
@@ -6540,13 +6625,7 @@ btcostestimate(PG_FUNCTION_ARGS)
elog(ERROR, "no function provided to release variable stats with");
}
else
- {
- vardata.statsTuple = SearchSysCache3(STATRELATTINH,
- ObjectIdGetDatum(relid),
- Int16GetDatum(colnum),
- BoolGetDatum(false));
- vardata.freefunc = ReleaseSysCache;
- }
+ validate_statistics(&vardata, relid, &colnum, 1, false);
}
if (HeapTupleIsValid(vardata.statsTuple))
diff --git a/src/backend/utils/cache/lsyscache.c b/src/backend/utils/cache/lsyscache.c
index 44dab82..6391d92 100644
--- a/src/backend/utils/cache/lsyscache.c
+++ b/src/backend/utils/cache/lsyscache.c
@@ -2674,6 +2674,7 @@ get_attavgwidth(Oid relid, AttrNumber attnum)
{
HeapTuple tp;
int32 stawidth;
+ int2vector *attnumvector = NULL;
if (get_attavgwidth_hook)
{
@@ -2681,10 +2682,12 @@ get_attavgwidth(Oid relid, AttrNumber attnum)
if (stawidth > 0)
return stawidth;
}
+ attnumvector = buildint2vector(&attnum, 1);
tp = SearchSysCache3(STATRELATTINH,
ObjectIdGetDatum(relid),
- Int16GetDatum(attnum),
+ PointerGetDatum(attnumvector),
BoolGetDatum(false));
+ pfree(attnumvector);
if (HeapTupleIsValid(tp))
{
stawidth = ((Form_pg_statistic) GETSTRUCT(tp))->stawidth;
@@ -2735,7 +2738,7 @@ get_attstatsslot(HeapTuple statstuple,
Datum **values, int *nvalues,
float4 **numbers, int *nnumbers)
{
- Form_pg_statistic stats = (Form_pg_statistic) GETSTRUCT(statstuple);
+ Form_pg_statistic stats;
int i,
j;
Datum val;
@@ -2746,12 +2749,15 @@ get_attstatsslot(HeapTuple statstuple,
HeapTuple typeTuple;
Form_pg_type typeForm;
+ stats = (Form_pg_statistic) GETSTRUCT(statstuple);
+
for (i = 0; i < STATISTIC_NUM_SLOTS; i++)
{
if ((&stats->stakind1)[i] == reqkind &&
(reqop == InvalidOid || (&stats->staop1)[i] == reqop))
break;
}
+
if (i >= STATISTIC_NUM_SLOTS)
return false; /* not there */
@@ -2763,8 +2769,9 @@ get_attstatsslot(HeapTuple statstuple,
val = SysCacheGetAttr(STATRELATTINH, statstuple,
Anum_pg_statistic_stavalues1 + i,
&isnull);
+ /* invalid stats record, i.e. analyze hasn't yet run for this column */
if (isnull)
- elog(ERROR, "stavalues is null");
+ return false;
statarray = DatumGetArrayTypeP(val);
/*
@@ -2817,8 +2824,9 @@ get_attstatsslot(HeapTuple statstuple,
val = SysCacheGetAttr(STATRELATTINH, statstuple,
Anum_pg_statistic_stanumbers1 + i,
&isnull);
+ /* invalid stats record, i.e. analyze hasn't yet run for this column */
if (isnull)
- elog(ERROR, "stanumbers is null");
+ return false;
statarray = DatumGetArrayTypeP(val);
/*
diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c
index a59950e..9da6ed8 100644
--- a/src/backend/utils/cache/relcache.c
+++ b/src/backend/utils/cache/relcache.c
@@ -4333,6 +4333,7 @@ write_relcache_init_file(bool shared)
write_item(relform, CLASS_TUPLE_SIZE, fp);
/* next, do all the attribute tuple form data entries */
+ elog(LOG, "write_relcache_init_file relname '%s' relnatts %d", NameStr(relform->relname), relform->relnatts);
for (i = 0; i < relform->relnatts; i++)
{
write_item(rel->rd_att->attrs[i], ATTRIBUTE_FIXED_PART_SIZE, fp);
diff --git a/src/backend/utils/cache/syscache.c b/src/backend/utils/cache/syscache.c
index 78ce0b8..f7c1ba0 100644
--- a/src/backend/utils/cache/syscache.c
+++ b/src/backend/utils/cache/syscache.c
@@ -600,11 +600,11 @@ static const struct cachedesc cacheinfo[] = {
1024
},
{StatisticRelationId, /* STATRELATTINH */
- StatisticRelidAttnumInhIndexId,
+ StatisticRelidAttnumsInhIndexId,
3,
{
Anum_pg_statistic_starelid,
- Anum_pg_statistic_staattnum,
+ Anum_pg_statistic_staattnums,
Anum_pg_statistic_stainherit,
0
},
diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c
index 99cf5b4..bb95a6d 100644
--- a/src/bin/initdb/initdb.c
+++ b/src/bin/initdb/initdb.c
@@ -119,6 +119,12 @@ static bool noclean = false;
static bool show_setting = false;
static char *xlog_dir = "";
+#ifdef DEVNULL
+#undef DEVNULL
+#endif
+#define DEVNULL "/tmp/initdb.log"
+
+#define DEVNULLINPUT "/dev/zero"
/* internal vars */
static const char *progname;
@@ -165,7 +171,7 @@ static char *authwarning = NULL;
* (no quoting to worry about).
*/
static const char *boot_options = "-F";
-static const char *backend_options = "--single -F -O -c search_path=pg_catalog -c exit_on_error=true";
+static const char *backend_options = "--single -F -O -c search_path=pg_catalog -c exit_on_error=true -c log_min_messages=warning -c log_min_error_statement=info ";
/* path to 'initdb' binary directory */
@@ -924,7 +930,7 @@ test_config_settings(void)
"< \"%s\" > \"%s\" 2>&1" SYSTEMQUOTE,
backend_exec, boot_options,
test_conns, test_buffs,
- DEVNULL, DEVNULL);
+ DEVNULLINPUT, DEVNULL);
status = system(cmd);
if (status == 0)
{
@@ -958,7 +964,7 @@ test_config_settings(void)
"< \"%s\" > \"%s\" 2>&1" SYSTEMQUOTE,
backend_exec, boot_options,
n_connections, test_buffs,
- DEVNULL, DEVNULL);
+ DEVNULLINPUT, DEVNULL);
status = system(cmd);
if (status == 0)
break;
@@ -1273,7 +1279,7 @@ setup_auth(void)
fflush(stdout);
snprintf(cmd, sizeof(cmd),
- "\"%s\" %s template1 >%s",
+ "\"%s\" %s template1 >>%s 2>&1",
backend_exec, backend_options,
DEVNULL);
@@ -1351,7 +1357,7 @@ get_set_pwd(void)
fflush(stdout);
snprintf(cmd, sizeof(cmd),
- "\"%s\" %s template1 >%s",
+ "\"%s\" %s template1 >>%s 2>&1",
backend_exec, backend_options,
DEVNULL);
@@ -1451,7 +1457,7 @@ setup_depend(void)
fflush(stdout);
snprintf(cmd, sizeof(cmd),
- "\"%s\" %s template1 >%s",
+ "\"%s\" %s template1 >>%s 2>&1",
backend_exec, backend_options,
DEVNULL);
@@ -1484,7 +1490,7 @@ setup_sysviews(void)
* We use -j here to avoid backslashing stuff in system_views.sql
*/
snprintf(cmd, sizeof(cmd),
- "\"%s\" %s -j template1 >%s",
+ "\"%s\" %s -j template1 >>%s 2>&1",
backend_exec, backend_options,
DEVNULL);
@@ -1515,7 +1521,7 @@ setup_description(void)
fflush(stdout);
snprintf(cmd, sizeof(cmd),
- "\"%s\" %s template1 >%s",
+ "\"%s\" %s template1 >>%s 2>&1",
backend_exec, backend_options,
DEVNULL);
@@ -1622,7 +1628,7 @@ setup_collation(void)
#if defined(HAVE_LOCALE_T) && !defined(WIN32)
snprintf(cmd, sizeof(cmd),
- "\"%s\" %s template1 >%s",
+ "\"%s\" %s template1 >>%s 2>&1",
backend_exec, backend_options,
DEVNULL);
@@ -1761,7 +1767,7 @@ setup_conversion(void)
fflush(stdout);
snprintf(cmd, sizeof(cmd),
- "\"%s\" %s template1 >%s",
+ "\"%s\" %s template1 >>%s 2>&1",
backend_exec, backend_options,
DEVNULL);
@@ -1799,7 +1805,7 @@ setup_dictionary(void)
* We use -j here to avoid backslashing stuff
*/
snprintf(cmd, sizeof(cmd),
- "\"%s\" %s -j template1 >%s",
+ "\"%s\" %s -j template1 >>%s 2>&1",
backend_exec, backend_options,
DEVNULL);
@@ -1850,7 +1856,7 @@ setup_privileges(void)
fflush(stdout);
snprintf(cmd, sizeof(cmd),
- "\"%s\" %s template1 >%s",
+ "\"%s\" %s template1 >>%s 2>&1",
backend_exec, backend_options,
DEVNULL);
@@ -1913,7 +1919,7 @@ setup_schema(void)
* We use -j here to avoid backslashing stuff in information_schema.sql
*/
snprintf(cmd, sizeof(cmd),
- "\"%s\" %s -j template1 >%s",
+ "\"%s\" %s -j template1 >>%s 2>&1",
backend_exec, backend_options,
DEVNULL);
@@ -1930,7 +1936,7 @@ setup_schema(void)
PG_CMD_CLOSE;
snprintf(cmd, sizeof(cmd),
- "\"%s\" %s template1 >%s",
+ "\"%s\" %s template1 >>%s 2>&1",
backend_exec, backend_options,
DEVNULL);
@@ -1964,7 +1970,7 @@ load_plpgsql(void)
fflush(stdout);
snprintf(cmd, sizeof(cmd),
- "\"%s\" %s template1 >%s",
+ "\"%s\" %s template1 >>%s 2>&1",
backend_exec, backend_options,
DEVNULL);
@@ -1989,7 +1995,7 @@ vacuum_db(void)
fflush(stdout);
snprintf(cmd, sizeof(cmd),
- "\"%s\" %s template1 >%s",
+ "\"%s\" %s template1 >>%s 2>&1",
backend_exec, backend_options,
DEVNULL);
@@ -2045,7 +2051,7 @@ make_template0(void)
fflush(stdout);
snprintf(cmd, sizeof(cmd),
- "\"%s\" %s template1 >%s",
+ "\"%s\" %s template1 >>%s 2>&1",
backend_exec, backend_options,
DEVNULL);
@@ -2077,7 +2083,7 @@ make_postgres(void)
fflush(stdout);
snprintf(cmd, sizeof(cmd),
- "\"%s\" %s template1 >%s",
+ "\"%s\" %s template1 >>%s 2>&1",
backend_exec, backend_options,
DEVNULL);
diff --git a/src/bin/psql/describe.c b/src/bin/psql/describe.c
index 4eee4be..42153de 100644
--- a/src/bin/psql/describe.c
+++ b/src/bin/psql/describe.c
@@ -1300,7 +1300,8 @@ describeOneTableDetails(const char *schemaname,
if (verbose)
{
appendPQExpBuffer(&buf, ",\n a.attstorage");
- appendPQExpBuffer(&buf, ",\n CASE WHEN a.attstattarget=-1 THEN NULL ELSE a.attstattarget END AS attstattarget");
+ /* FIXME: need stat target value here*/
+ appendPQExpBuffer(&buf, ",\n NULL AS attstattarget");
/*
* In 9.0+, we have column comments for: relations, views, composite
* types, and foreign tables (c.f. CommentObject() in comment.c).
diff --git a/src/include/catalog/heap.h b/src/include/catalog/heap.h
index 2055382..b0a3009 100644
--- a/src/include/catalog/heap.h
+++ b/src/include/catalog/heap.h
@@ -109,7 +109,12 @@ extern void RemoveAttributeById(Oid relid, AttrNumber attnum);
extern void RemoveAttrDefault(Oid relid, AttrNumber attnum,
DropBehavior behavior, bool complain, bool internal);
extern void RemoveAttrDefaultById(Oid attrdefId);
-extern void RemoveStatistics(Oid relid, AttrNumber attnum);
+extern void AddStatistics(Oid relid, AttrNumber *attnums,
+ int n_attnums,
+ bool inherited,
+ int statistics_target);
+extern void InvalidateStatistics(Oid relid, AttrNumber attnum);
+extern void RemoveStatistics(Oid relid, AttrNumber *attnums, int n_attnums);
extern Form_pg_attribute SystemAttributeDefinition(AttrNumber attno,
bool relhasoids);
diff --git a/src/include/catalog/indexing.h b/src/include/catalog/indexing.h
index 450ec25..90a5c7b 100644
--- a/src/include/catalog/indexing.h
+++ b/src/include/catalog/indexing.h
@@ -218,8 +218,8 @@ DECLARE_INDEX(pg_shdepend_depender_index, 1232, on pg_shdepend using btree(dbid
DECLARE_INDEX(pg_shdepend_reference_index, 1233, on pg_shdepend using btree(refclassid oid_ops, refobjid oid_ops));
#define SharedDependReferenceIndexId 1233
-DECLARE_UNIQUE_INDEX(pg_statistic_relid_att_inh_index, 2696, on pg_statistic using btree(starelid oid_ops, staattnum int2_ops, stainherit bool_ops));
-#define StatisticRelidAttnumInhIndexId 2696
+DECLARE_UNIQUE_INDEX(pg_statistic_relid_att_inh_index, 2696, on pg_statistic using btree(starelid oid_ops, staattnums int2vector_ops, stainherit bool_ops));
+#define StatisticRelidAttnumsInhIndexId 2696
DECLARE_UNIQUE_INDEX(pg_tablespace_oid_index, 2697, on pg_tablespace using btree(oid oid_ops));
#define TablespaceOidIndexId 2697
diff --git a/src/include/catalog/pg_amop.h b/src/include/catalog/pg_amop.h
index 82391b0..b6d9cd6 100644
--- a/src/include/catalog/pg_amop.h
+++ b/src/include/catalog/pg_amop.h
@@ -185,6 +185,16 @@ DATA(insert ( 1991 30 30 4 s 648 403 0 ));
DATA(insert ( 1991 30 30 5 s 646 403 0 ));
/*
+ * btree int2vector_ops
+ */
+
+DATA(insert ( 3171 22 22 1 s 3166 403 0 ));
+DATA(insert ( 3171 22 22 2 s 3169 403 0 ));
+DATA(insert ( 3171 22 22 3 s 386 403 0 ));
+DATA(insert ( 3171 22 22 4 s 3170 403 0 ));
+DATA(insert ( 3171 22 22 5 s 3167 403 0 ));
+
+/*
* btree float_ops
*/
diff --git a/src/include/catalog/pg_amproc.h b/src/include/catalog/pg_amproc.h
index 72307de..191270c 100644
--- a/src/include/catalog/pg_amproc.h
+++ b/src/include/catalog/pg_amproc.h
@@ -133,6 +133,7 @@ DATA(insert ( 2233 703 703 1 380 ));
DATA(insert ( 2234 704 704 1 381 ));
DATA(insert ( 2789 27 27 1 2794 ));
DATA(insert ( 2968 2950 2950 1 2960 ));
+DATA(insert ( 3171 22 22 1 3168 ));
DATA(insert ( 3522 3500 3500 1 3514 ));
diff --git a/src/include/catalog/pg_attribute.h b/src/include/catalog/pg_attribute.h
index 45e38e4..083629c 100644
--- a/src/include/catalog/pg_attribute.h
+++ b/src/include/catalog/pg_attribute.h
@@ -48,15 +48,6 @@ CATALOG(pg_attribute,1249) BKI_BOOTSTRAP BKI_WITHOUT_OIDS BKI_ROWTYPE_OID(75) BK
Oid atttypid;
/*
- * attstattarget is the target number of statistics datapoints to collect
- * during VACUUM ANALYZE of this column. A zero here indicates that we do
- * not wish to collect any stats about this column. A "-1" here indicates
- * that no value has been explicitly set for this column, so ANALYZE
- * should use the default setting.
- */
- int4 attstattarget;
-
- /*
* attlen is a copy of the typlen field from pg_type for this attribute.
* See atttypid comments above.
*/
@@ -180,28 +171,27 @@ typedef FormData_pg_attribute *Form_pg_attribute;
* ----------------
*/
-#define Natts_pg_attribute 21
+#define Natts_pg_attribute 20
#define Anum_pg_attribute_attrelid 1
#define Anum_pg_attribute_attname 2
#define Anum_pg_attribute_atttypid 3
-#define Anum_pg_attribute_attstattarget 4
-#define Anum_pg_attribute_attlen 5
-#define Anum_pg_attribute_attnum 6
-#define Anum_pg_attribute_attndims 7
-#define Anum_pg_attribute_attcacheoff 8
-#define Anum_pg_attribute_atttypmod 9
-#define Anum_pg_attribute_attbyval 10
-#define Anum_pg_attribute_attstorage 11
-#define Anum_pg_attribute_attalign 12
-#define Anum_pg_attribute_attnotnull 13
-#define Anum_pg_attribute_atthasdef 14
-#define Anum_pg_attribute_attisdropped 15
-#define Anum_pg_attribute_attislocal 16
-#define Anum_pg_attribute_attinhcount 17
-#define Anum_pg_attribute_attcollation 18
-#define Anum_pg_attribute_attacl 19
-#define Anum_pg_attribute_attoptions 20
-#define Anum_pg_attribute_attfdwoptions 21
+#define Anum_pg_attribute_attlen 4
+#define Anum_pg_attribute_attnum 5
+#define Anum_pg_attribute_attndims 6
+#define Anum_pg_attribute_attcacheoff 7
+#define Anum_pg_attribute_atttypmod 8
+#define Anum_pg_attribute_attbyval 9
+#define Anum_pg_attribute_attstorage 10
+#define Anum_pg_attribute_attalign 11
+#define Anum_pg_attribute_attnotnull 12
+#define Anum_pg_attribute_atthasdef 13
+#define Anum_pg_attribute_attisdropped 14
+#define Anum_pg_attribute_attislocal 15
+#define Anum_pg_attribute_attinhcount 16
+#define Anum_pg_attribute_attcollation 17
+#define Anum_pg_attribute_attacl 18
+#define Anum_pg_attribute_attoptions 19
+#define Anum_pg_attribute_attfdwoptions 20
/* ----------------
diff --git a/src/include/catalog/pg_class.h b/src/include/catalog/pg_class.h
index 1567206..4df03b4 100644
--- a/src/include/catalog/pg_class.h
+++ b/src/include/catalog/pg_class.h
@@ -132,7 +132,7 @@ typedef FormData_pg_class *Form_pg_class;
/* Note: "3" in the relfrozenxid column stands for FirstNormalTransactionId */
DATA(insert OID = 1247 ( pg_type PGNSP 71 0 PGUID 0 0 0 0 0 0 0 0 f f p r 30 0 t f f f f 3 _null_ _null_ ));
DESCR("");
-DATA(insert OID = 1249 ( pg_attribute PGNSP 75 0 PGUID 0 0 0 0 0 0 0 0 f f p r 21 0 f f f f f 3 _null_ _null_ ));
+DATA(insert OID = 1249 ( pg_attribute PGNSP 75 0 PGUID 0 0 0 0 0 0 0 0 f f p r 20 0 f f f f f 3 _null_ _null_ ));
DESCR("");
DATA(insert OID = 1255 ( pg_proc PGNSP 81 0 PGUID 0 0 0 0 0 0 0 0 f f p r 27 0 t f f f f 3 _null_ _null_ ));
DESCR("");
diff --git a/src/include/catalog/pg_opclass.h b/src/include/catalog/pg_opclass.h
index 96eaa60..94b74ce 100644
--- a/src/include/catalog/pg_opclass.h
+++ b/src/include/catalog/pg_opclass.h
@@ -165,6 +165,7 @@ DATA(insert ( 403 bpchar_pattern_ops PGNSP PGUID 2097 1042 f 0 ));
DATA(insert ( 403 money_ops PGNSP PGUID 2099 790 t 0 ));
DATA(insert ( 405 bool_ops PGNSP PGUID 2222 16 t 0 ));
DATA(insert ( 405 bytea_ops PGNSP PGUID 2223 17 t 0 ));
+DATA(insert ( 403 int2vector_ops PGNSP PGUID 3171 22 t 0 ));
DATA(insert ( 405 int2vector_ops PGNSP PGUID 2224 22 t 0 ));
DATA(insert ( 403 tid_ops PGNSP PGUID 2789 27 t 0 ));
DATA(insert ( 405 xid_ops PGNSP PGUID 2225 28 t 0 ));
diff --git a/src/include/catalog/pg_operator.h b/src/include/catalog/pg_operator.h
index 48ddd16..c0b19b2 100644
--- a/src/include/catalog/pg_operator.h
+++ b/src/include/catalog/pg_operator.h
@@ -134,6 +134,19 @@ DATA(insert OID = 98 ( "=" PGNSP PGUID b t t 25 25 16 98 531 texteq eqsel e
DESCR("equal");
#define TextEqualOperator 98
+DATA(insert OID = 386 ( "=" PGNSP PGUID b t t 22 22 16 386 3165 int2vectoreq eqsel eqjoinsel ));
+DESCR("equal");
+DATA(insert OID = 3165 ( "<>" PGNSP PGUID b f f 22 22 16 3165 386 int2vectorne neqsel neqjoinsel ));
+DESCR("not equal");
+DATA(insert OID = 3166 ( "<" PGNSP PGUID b f f 22 22 16 3167 3170 int2vectorlt scalarltsel scalarltjoinsel ));
+DESCR("less than");
+DATA(insert OID = 3167 ( ">" PGNSP PGUID b f f 22 22 16 3166 3169 int2vectorgt scalargtsel scalargtjoinsel ));
+DESCR("greater than");
+DATA(insert OID = 3169 ( "<=" PGNSP PGUID b f f 22 22 16 3170 3167 int2vectorle scalarltsel scalarltjoinsel ));
+DESCR("less than or equal");
+DATA(insert OID = 3170 ( ">=" PGNSP PGUID b f f 22 22 16 3169 3166 int2vectorge scalargtsel scalargtjoinsel ));
+DESCR("greater than or equal");
+
DATA(insert OID = 349 ( "||" PGNSP PGUID b f f 2277 2283 2277 0 0 array_append - - ));
DESCR("append element onto end of array");
DATA(insert OID = 374 ( "||" PGNSP PGUID b f f 2283 2277 2277 0 0 array_prepend - - ));
@@ -151,8 +164,6 @@ DATA(insert OID = 389 ( "!!" PGNSP PGUID l f f 0 20 1700 0 0 numeric_fac
DESCR("deprecated, use ! instead");
DATA(insert OID = 385 ( "=" PGNSP PGUID b f t 29 29 16 385 0 cideq eqsel eqjoinsel ));
DESCR("equal");
-DATA(insert OID = 386 ( "=" PGNSP PGUID b f t 22 22 16 386 0 int2vectoreq eqsel eqjoinsel ));
-DESCR("equal");
DATA(insert OID = 387 ( "=" PGNSP PGUID b t f 27 27 16 387 402 tideq eqsel eqjoinsel ));
DESCR("equal");
diff --git a/src/include/catalog/pg_opfamily.h b/src/include/catalog/pg_opfamily.h
index 41ebccc..69b67ec 100644
--- a/src/include/catalog/pg_opfamily.h
+++ b/src/include/catalog/pg_opfamily.h
@@ -114,6 +114,7 @@ DATA(insert OID = 2099 ( 403 money_ops PGNSP PGUID ));
DATA(insert OID = 2222 ( 405 bool_ops PGNSP PGUID ));
#define BOOL_HASH_FAM_OID 2222
DATA(insert OID = 2223 ( 405 bytea_ops PGNSP PGUID ));
+DATA(insert OID = 3171 ( 403 int2vector_ops PGNSP PGUID ));
DATA(insert OID = 2224 ( 405 int2vector_ops PGNSP PGUID ));
DATA(insert OID = 2789 ( 403 tid_ops PGNSP PGUID ));
DATA(insert OID = 2225 ( 405 xid_ops PGNSP PGUID ));
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h
index 2db8489..541f487 100644
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -220,6 +220,13 @@ DESCR("length");
DATA(insert OID = 1258 ( textcat PGNSP PGUID 12 1 0 0 0 f f f f t f i 2 0 25 "25 25" _null_ _null_ _null_ _null_ textcat _null_ _null_ _null_ ));
DATA(insert OID = 84 ( boolne PGNSP PGUID 12 1 0 0 0 f f f t t f i 2 0 16 "16 16" _null_ _null_ _null_ _null_ boolne _null_ _null_ _null_ ));
+
+DATA(insert OID = 86 ( int2vectorne PGNSP PGUID 12 1 0 0 0 f f f f t f i 2 0 16 "22 22" _null_ _null_ _null_ _null_ int2vectorne _null_ _null_ _null_ ));
+DATA(insert OID = 87 ( int2vectorlt PGNSP PGUID 12 1 0 0 0 f f f f t f i 2 0 16 "22 22" _null_ _null_ _null_ _null_ int2vectorlt _null_ _null_ _null_ ));
+DATA(insert OID = 88 ( int2vectorle PGNSP PGUID 12 1 0 0 0 f f f f t f i 2 0 16 "22 22" _null_ _null_ _null_ _null_ int2vectorle _null_ _null_ _null_ ));
+DATA(insert OID = 90 ( int2vectorge PGNSP PGUID 12 1 0 0 0 f f f f t f i 2 0 16 "22 22" _null_ _null_ _null_ _null_ int2vectorge _null_ _null_ _null_ ));
+DATA(insert OID = 3172 ( int2vectorgt PGNSP PGUID 12 1 0 0 0 f f f f t f i 2 0 16 "22 22" _null_ _null_ _null_ _null_ int2vectorgt _null_ _null_ _null_ ));
+
DATA(insert OID = 89 ( version PGNSP PGUID 12 1 0 0 0 f f f f t f s 0 0 25 "" _null_ _null_ _null_ _null_ pgsql_version _null_ _null_ _null_ ));
DESCR("PostgreSQL version string");
@@ -574,6 +581,8 @@ DATA(insert OID = 350 ( btint2cmp PGNSP PGUID 12 1 0 0 0 f f f f t f i 2 0
DESCR("less-equal-greater");
DATA(insert OID = 3129 ( btint2sortsupport PGNSP PGUID 12 1 0 0 0 f f f f t f i 1 0 2278 "2281" _null_ _null_ _null_ _null_ btint2sortsupport _null_ _null_ _null_ ));
DESCR("sort support");
+DATA(insert OID = 3168 ( btint2vectorcmp PGNSP PGUID 12 1 0 0 0 f f f f t f i 2 0 23 "22 22" _null_ _null_ _null_ _null_ btint2vectorcmp _null_ _null_ _null_ ));
+DESCR("less-equal-greater");
DATA(insert OID = 351 ( btint4cmp PGNSP PGUID 12 1 0 0 0 f f f f t f i 2 0 23 "23 23" _null_ _null_ _null_ _null_ btint4cmp _null_ _null_ _null_ ));
DESCR("less-equal-greater");
DATA(insert OID = 3130 ( btint4sortsupport PGNSP PGUID 12 1 0 0 0 f f f f t f i 1 0 2278 "2281" _null_ _null_ _null_ _null_ btint4sortsupport _null_ _null_ _null_ ));
@@ -2936,7 +2945,7 @@ DESCR("xlog filename and byte offset, given an xlog location");
DATA(insert OID = 2851 ( pg_xlogfile_name PGNSP PGUID 12 1 0 0 0 f f f f t f i 1 0 25 "25" _null_ _null_ _null_ _null_ pg_xlogfile_name _null_ _null_ _null_ ));
DESCR("xlog filename, given an xlog location");
-DATA(insert OID = 3165 ( pg_xlog_location_diff PGNSP PGUID 12 1 0 0 0 f f f f t f i 2 0 1700 "25 25" _null_ _null_ _null_ _null_ pg_xlog_location_diff _null_ _null_ _null_ ));
+DATA(insert OID = 3160 ( pg_xlog_location_diff PGNSP PGUID 12 1 0 0 0 f f f f t f i 2 0 1700 "25 25" _null_ _null_ _null_ _null_ pg_xlog_location_diff _null_ _null_ _null_ ));
DESCR("difference in bytes, given two xlog locations");
DATA(insert OID = 3809 ( pg_export_snapshot PGNSP PGUID 12 1 0 0 0 f f f f t f v 0 0 25 "" _null_ _null_ _null_ _null_ pg_export_snapshot _null_ _null_ _null_ ));
@@ -4246,7 +4255,7 @@ DATA(insert OID = 3686 ( tsmatchsel PGNSP PGUID 12 1 0 0 0 f f f f t f s 4 0 7
DESCR("restriction selectivity of tsvector @@ tsquery");
DATA(insert OID = 3687 ( tsmatchjoinsel PGNSP PGUID 12 1 0 0 0 f f f f t f s 5 0 701 "2281 26 2281 21 2281" _null_ _null_ _null_ _null_ tsmatchjoinsel _null_ _null_ _null_ ));
DESCR("join selectivity of tsvector @@ tsquery");
-DATA(insert OID = 3688 ( ts_typanalyze PGNSP PGUID 12 1 0 0 0 f f f f t f s 1 0 16 "2281" _null_ _null_ _null_ _null_ ts_typanalyze _null_ _null_ _null_ ));
+DATA(insert OID = 3688 ( ts_typanalyze PGNSP PGUID 12 1 0 0 0 f f f f t f s 2 0 16 "2281 23" _null_ _null_ _null_ _null_ ts_typanalyze _null_ _null_ _null_ ));
DESCR("tsvector typanalyze");
DATA(insert OID = 3689 ( ts_stat PGNSP PGUID 12 10 10000 0 0 f f f f t t v 1 0 2249 "25" "{25,25,23,23}" "{i,o,o,o}" "{query,word,ndoc,nentry}" _null_ ts_stat1 _null_ _null_ _null_ ));
@@ -4515,7 +4524,7 @@ DATA(insert OID = 3881 ( range_gist_same PGNSP PGUID 12 1 0 0 0 f f f f t f i
DESCR("GiST support");
DATA(insert OID = 3902 ( hash_range PGNSP PGUID 12 1 0 0 0 f f f f t f i 1 0 23 "3831" _null_ _null_ _null_ _null_ hash_range _null_ _null_ _null_ ));
DESCR("hash a range");
-DATA(insert OID = 3916 ( range_typanalyze PGNSP PGUID 12 1 0 0 0 f f f f t f s 1 0 16 "2281" _null_ _null_ _null_ _null_ range_typanalyze _null_ _null_ _null_ ));
+DATA(insert OID = 3916 ( range_typanalyze PGNSP PGUID 12 1 0 0 0 f f f f t f s 2 0 16 "2281 23" _null_ _null_ _null_ _null_ range_typanalyze _null_ _null_ _null_ ));
DESCR("range typanalyze");
DATA(insert OID = 3914 ( int4range_canonical PGNSP PGUID 12 1 0 0 0 f f f f t f i 1 0 3904 "3904" _null_ _null_ _null_ _null_ int4range_canonical _null_ _null_ _null_ ));
diff --git a/src/include/catalog/pg_statistic.h b/src/include/catalog/pg_statistic.h
index 383cc01..c35198a 100644
--- a/src/include/catalog/pg_statistic.h
+++ b/src/include/catalog/pg_statistic.h
@@ -30,11 +30,24 @@
CATALOG(pg_statistic,2619) BKI_WITHOUT_OIDS
{
- /* These fields form the unique key for the entry: */
+ /*
+ * These fields (together with the staattnums field below in the variable fields)
+ * form the unique key for the entry:
+ */
Oid starelid; /* relation containing attribute */
- int2 staattnum; /* attribute (column) stats are for */
bool stainherit; /* true if inheritance children are included */
+ /* this entry is valid */
+ bool stavalid;
+
+ /* statarget is the target number of statistics datapoints to collect
+ * during VACUUM ANALYZE of this column. A zero here indicates that we do
+ * not wish to collect any stats about this column. A "-1" here indicates
+ * that no value has been explicitly set for this column, so ANALYZE
+ * should use the default setting.
+ */
+ int4 statarget;
+
/* the fraction of the column's entries that are NULL: */
float4 stanullfrac;
@@ -96,7 +109,15 @@ CATALOG(pg_statistic,2619) BKI_WITHOUT_OIDS
Oid staop4;
Oid staop5;
-#ifdef CATALOG_VARLEN /* variable-length fields start here */
+#ifdef CATALOG_VARLEN /* variable-length fields start here */
+ /*
+ * THE REST OF THESE ARE VARIABLE LENGTH FIELDS, and may even be absent
+ * (NULL). They cannot be accessed as C struct entries; you have to use
+ * the full field access machinery (heap_getattr) for them. We declare
+ * them here for the catalog machinery.
+ */
+ int2vector staattnums; /* attributes (columns) stats are for */
+
float4 stanumbers1[1];
float4 stanumbers2[1];
float4 stanumbers3[1];
@@ -131,33 +152,35 @@ typedef FormData_pg_statistic *Form_pg_statistic;
* compiler constants for pg_statistic
* ----------------
*/
-#define Natts_pg_statistic 26
+#define Natts_pg_statistic 28
#define Anum_pg_statistic_starelid 1
-#define Anum_pg_statistic_staattnum 2
-#define Anum_pg_statistic_stainherit 3
-#define Anum_pg_statistic_stanullfrac 4
-#define Anum_pg_statistic_stawidth 5
-#define Anum_pg_statistic_stadistinct 6
-#define Anum_pg_statistic_stakind1 7
-#define Anum_pg_statistic_stakind2 8
-#define Anum_pg_statistic_stakind3 9
-#define Anum_pg_statistic_stakind4 10
-#define Anum_pg_statistic_stakind5 11
-#define Anum_pg_statistic_staop1 12
-#define Anum_pg_statistic_staop2 13
-#define Anum_pg_statistic_staop3 14
-#define Anum_pg_statistic_staop4 15
-#define Anum_pg_statistic_staop5 16
-#define Anum_pg_statistic_stanumbers1 17
-#define Anum_pg_statistic_stanumbers2 18
-#define Anum_pg_statistic_stanumbers3 19
-#define Anum_pg_statistic_stanumbers4 20
-#define Anum_pg_statistic_stanumbers5 21
-#define Anum_pg_statistic_stavalues1 22
-#define Anum_pg_statistic_stavalues2 23
-#define Anum_pg_statistic_stavalues3 24
-#define Anum_pg_statistic_stavalues4 25
-#define Anum_pg_statistic_stavalues5 26
+#define Anum_pg_statistic_stainherit 2
+#define Anum_pg_statistic_stavalid 3
+#define Anum_pg_statistic_statarget 4
+#define Anum_pg_statistic_stanullfrac 5
+#define Anum_pg_statistic_stawidth 6
+#define Anum_pg_statistic_stadistinct 7
+#define Anum_pg_statistic_stakind1 8
+#define Anum_pg_statistic_stakind2 9
+#define Anum_pg_statistic_stakind3 10
+#define Anum_pg_statistic_stakind4 11
+#define Anum_pg_statistic_stakind5 12
+#define Anum_pg_statistic_staop1 13
+#define Anum_pg_statistic_staop2 14
+#define Anum_pg_statistic_staop3 15
+#define Anum_pg_statistic_staop4 16
+#define Anum_pg_statistic_staop5 17
+#define Anum_pg_statistic_staattnums 18
+#define Anum_pg_statistic_stanumbers1 19
+#define Anum_pg_statistic_stanumbers2 20
+#define Anum_pg_statistic_stanumbers3 21
+#define Anum_pg_statistic_stanumbers4 22
+#define Anum_pg_statistic_stanumbers5 23
+#define Anum_pg_statistic_stavalues1 24
+#define Anum_pg_statistic_stavalues2 25
+#define Anum_pg_statistic_stavalues3 26
+#define Anum_pg_statistic_stavalues4 27
+#define Anum_pg_statistic_stavalues5 28
/*
* Currently, five statistical slot "kinds" are defined by core PostgreSQL,
diff --git a/src/include/commands/defrem.h b/src/include/commands/defrem.h
index 163b2ea..f57be6a 100644
--- a/src/include/commands/defrem.h
+++ b/src/include/commands/defrem.h
@@ -91,6 +91,9 @@ extern void DefineAggregate(List *name, List *args, bool oldstyle,
extern void RenameAggregate(List *name, List *args, const char *newname);
extern void AlterAggregateOwner(List *name, List *args, Oid newOwnerId);
+/* commands/analyze.c */
+extern void ExtraStatistics(ExtraStatStmt *stmt);
+
/* commands/opclasscmds.c */
extern void DefineOpClass(CreateOpClassStmt *stmt);
extern void DefineOpFamily(CreateOpFamilyStmt *stmt);
diff --git a/src/include/commands/vacuum.h b/src/include/commands/vacuum.h
index 3deee66..45bb99e 100644
--- a/src/include/commands/vacuum.h
+++ b/src/include/commands/vacuum.h
@@ -56,17 +56,29 @@
* This might change in some future release.
*----------
*/
-typedef struct VacAttrStats *VacAttrStatsP;
+typedef struct VacStats *VacStatsP;
-typedef Datum (*AnalyzeAttrFetchFunc) (VacAttrStatsP stats, int rownum,
+typedef Datum (*AnalyzeAttrFetchFunc) (VacStatsP stats, int rownum, AttrNumber tupattnum,
bool *isNull);
-typedef void (*AnalyzeAttrComputeStatsFunc) (VacAttrStatsP stats,
- AnalyzeAttrFetchFunc fetchfunc,
- int samplerows,
- double totalrows);
+typedef void (*ComputeStatsFunc) (VacStatsP stats,
+ int index,
+ AnalyzeAttrFetchFunc fetchfunc,
+ int samplerows,
+ double totalrows);
-typedef struct VacAttrStats
+typedef int (*FindValueIndex) (VacStatsP stats,
+ int rownum,
+ int index,
+ AnalyzeAttrFetchFunc fetchfunc,
+ void *arg);
+
+typedef struct StatsFuncStruct {
+ ComputeStatsFunc compute_func_ptr;
+ FindValueIndex findval_func_ptr;
+} StatsFuncStruct;
+
+typedef struct VacStats
{
/*
* These fields are set up by the main ANALYZE code before invoking the
@@ -78,34 +90,36 @@ typedef struct VacAttrStats
* column/expression. Instead use attrtypid, attrtypmod, and attrtype for
* information about the datatype being fed to the typanalyze function.
*/
- Form_pg_attribute attr; /* copy of pg_attribute row for column */
- Oid attrtypid; /* type of data being analyzed */
- int32 attrtypmod; /* typmod of data being analyzed */
- Form_pg_type attrtype; /* copy of pg_type row for attrtypid */
+ Form_pg_attribute attrs[STATISTIC_NUM_SLOTS]; /* copy of pg_attribute row for column */
+ int2vector *attnums; /* array of attributes this statistics is for */
+ int4 statarget; /* effective statistics target */
+ Oid attrtypids[STATISTIC_NUM_SLOTS]; /* type of data being analyzed */
+ int32 attrtypmods[STATISTIC_NUM_SLOTS]; /* typmod of data being analyzed */
+ Form_pg_type attrtypes[STATISTIC_NUM_SLOTS]; /* copy of pg_type row for attrtypid */
MemoryContext anl_context; /* where to save long-lived data */
/*
* These fields must be filled in by the typanalyze routine, unless it
* returns FALSE.
*/
- AnalyzeAttrComputeStatsFunc compute_stats; /* function pointer */
+ StatsFuncStruct statfuncs[STATISTIC_NUM_SLOTS];
int minrows; /* Minimum # of rows wanted for stats */
- void *extra_data; /* for extra type-specific data */
+ void *extra_data[STATISTIC_NUM_SLOTS]; /* for extra type-specific data */
/*
* These fields are to be filled in by the compute_stats routine. (They
* are initialized to zero when the struct is created.)
*/
bool stats_valid;
- float4 stanullfrac; /* fraction of entries that are NULL */
- int4 stawidth; /* average width of column values */
- float4 stadistinct; /* # distinct values */
- int2 stakind[STATISTIC_NUM_SLOTS];
- Oid staop[STATISTIC_NUM_SLOTS];
- int numnumbers[STATISTIC_NUM_SLOTS];
- float4 *stanumbers[STATISTIC_NUM_SLOTS];
- int numvalues[STATISTIC_NUM_SLOTS];
- Datum *stavalues[STATISTIC_NUM_SLOTS];
+ float4 stanullfrac[STATISTIC_NUM_SLOTS]; /* fraction of entries that are NULL */
+ int4 stawidth[STATISTIC_NUM_SLOTS]; /* average width of column values */
+ float4 stadistinct[STATISTIC_NUM_SLOTS]; /* # distinct values */
+ int2 stakind[STATISTIC_NUM_SLOTS][STATISTIC_NUM_SLOTS];
+ Oid staop[STATISTIC_NUM_SLOTS][STATISTIC_NUM_SLOTS];
+ int numnumbers[STATISTIC_NUM_SLOTS][STATISTIC_NUM_SLOTS];
+ float4 *stanumbers[STATISTIC_NUM_SLOTS][STATISTIC_NUM_SLOTS];
+ int numvalues[STATISTIC_NUM_SLOTS][STATISTIC_NUM_SLOTS];
+ Datum *stavalues[STATISTIC_NUM_SLOTS][STATISTIC_NUM_SLOTS];
/*
* These fields describe the stavalues[n] element types. They will be
@@ -113,22 +127,21 @@ typedef struct VacAttrStats
* want to store an array of something other than the analyzed column's
* elements. It should then overwrite these fields.
*/
- Oid statypid[STATISTIC_NUM_SLOTS];
- int2 statyplen[STATISTIC_NUM_SLOTS];
- bool statypbyval[STATISTIC_NUM_SLOTS];
- char statypalign[STATISTIC_NUM_SLOTS];
+ Oid statypid[STATISTIC_NUM_SLOTS][STATISTIC_NUM_SLOTS];
+ int2 statyplen[STATISTIC_NUM_SLOTS][STATISTIC_NUM_SLOTS];
+ bool statypbyval[STATISTIC_NUM_SLOTS][STATISTIC_NUM_SLOTS];
+ char statypalign[STATISTIC_NUM_SLOTS][STATISTIC_NUM_SLOTS];
/*
* These fields are private to the main ANALYZE code and should not be
* looked at by type-specific functions.
*/
- int tupattnum; /* attribute number within tuples */
HeapTuple *rows; /* access info for std fetch function */
TupleDesc tupDesc;
Datum *exprvals; /* access info for index fetch function */
bool *exprnulls;
int rowstride;
-} VacAttrStats;
+} VacStats;
/* GUC parameters */
@@ -169,6 +182,6 @@ extern void lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt,
/* in commands/analyze.c */
extern void analyze_rel(Oid relid, VacuumStmt *vacstmt,
BufferAccessStrategy bstrategy);
-extern bool std_typanalyze(VacAttrStats *stats);
+extern bool std_typanalyze(VacStats *stats, int index);
#endif /* VACUUM_H */
diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h
index 905458f..3f1ff9c 100644
--- a/src/include/nodes/nodes.h
+++ b/src/include/nodes/nodes.h
@@ -355,6 +355,7 @@ typedef enum NodeTag
T_CreateExtensionStmt,
T_AlterExtensionStmt,
T_AlterExtensionContentsStmt,
+ T_ExtraStatStmt,
/*
* TAGS FOR PARSE TREE NODES (parsenodes.h)
diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h
index ab55639..8c4340c 100644
--- a/src/include/nodes/parsenodes.h
+++ b/src/include/nodes/parsenodes.h
@@ -1162,6 +1162,20 @@ typedef enum DropBehavior
} DropBehavior;
/* ----------------------
+ * Create Cross Column Statistics
+ * ----------------------
+ */
+typedef struct ExtraStatStmt
+{
+ NodeTag type;
+ char relkind;
+ bool create;
+ RangeVar *relation;
+ List *columns;
+ int statistics_target;
+} ExtraStatStmt;
+
+/* ----------------------
* Alter Table
* ----------------------
*/
diff --git a/src/include/parser/parse_utilcmd.h b/src/include/parser/parse_utilcmd.h
index 4ad793a..541d8d1 100644
--- a/src/include/parser/parse_utilcmd.h
+++ b/src/include/parser/parse_utilcmd.h
@@ -25,4 +25,7 @@ extern void transformRuleStmt(RuleStmt *stmt, const char *queryString,
List **actions, Node **whereClause);
extern List *transformCreateSchemaStmt(CreateSchemaStmt *stmt);
+extern ExtraStatStmt *transformExtraStatistics(ExtraStatStmt *stmt,
+ const char *queryString);
+
#endif /* PARSE_UTILCMD_H */
diff --git a/src/include/utils/builtins.h b/src/include/utils/builtins.h
index 9fda7ad..0a34097 100644
--- a/src/include/utils/builtins.h
+++ b/src/include/utils/builtins.h
@@ -180,6 +180,11 @@ extern Datum int2vectorout(PG_FUNCTION_ARGS);
extern Datum int2vectorrecv(PG_FUNCTION_ARGS);
extern Datum int2vectorsend(PG_FUNCTION_ARGS);
extern Datum int2vectoreq(PG_FUNCTION_ARGS);
+extern Datum int2vectorne(PG_FUNCTION_ARGS);
+extern Datum int2vectorlt(PG_FUNCTION_ARGS);
+extern Datum int2vectorle(PG_FUNCTION_ARGS);
+extern Datum int2vectorgt(PG_FUNCTION_ARGS);
+extern Datum int2vectorge(PG_FUNCTION_ARGS);
extern Datum int4in(PG_FUNCTION_ARGS);
extern Datum int4out(PG_FUNCTION_ARGS);
extern Datum int4recv(PG_FUNCTION_ARGS);
@@ -289,6 +294,7 @@ extern void pg_lltoa(int64 ll, char *a);
*/
extern Datum btboolcmp(PG_FUNCTION_ARGS);
extern Datum btint2cmp(PG_FUNCTION_ARGS);
+extern Datum btint2vectorcmp(PG_FUNCTION_ARGS);
extern Datum btint4cmp(PG_FUNCTION_ARGS);
extern Datum btint8cmp(PG_FUNCTION_ARGS);
extern Datum btfloat4cmp(PG_FUNCTION_ARGS);
diff --git a/src/include/utils/selfuncs.h b/src/include/utils/selfuncs.h
index 4529f27..0b90312 100644
--- a/src/include/utils/selfuncs.h
+++ b/src/include/utils/selfuncs.h
@@ -107,6 +107,10 @@ typedef bool (*get_index_stats_hook_type) (PlannerInfo *root,
VariableStatData *vardata);
extern PGDLLIMPORT get_index_stats_hook_type get_index_stats_hook;
+extern void validate_statistics(VariableStatData *vardata,
+ Oid relid,
+ AttrNumber *attnums, int n_attnums,
+ bool inherited);
/* Functions in selfuncs.c */
extern void examine_variable(PlannerInfo *root, Node *node, int varRelid,
diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out
index aaf0cca..92a141e 100644
--- a/src/test/regress/expected/rules.out
+++ b/src/test/regress/expected/rules.out
@@ -1317,7 +1317,7 @@ SELECT viewname, definition FROM pg_views WHERE schemaname <> 'information_schem
pg_statio_user_indexes | SELECT pg_statio_all_indexes.relid, pg_statio_all_indexes.indexrelid, pg_statio_all_indexes.schemaname, pg_statio_all_indexes.relname, pg_statio_all_indexes.indexrelname, pg_statio_all_indexes.idx_blks_read, pg_statio_all_indexes.idx_blks_hit FROM pg_statio_all_indexes WHERE ((pg_statio_all_indexes.schemaname <> ALL (ARRAY['pg_catalog'::name, 'information_schema'::name])) AND (pg_statio_all_indexes.schemaname !~ '^pg_toast'::text));
pg_statio_user_sequences | SELECT pg_statio_all_sequences.relid, pg_statio_all_sequences.schemaname, pg_statio_all_sequences.relname, pg_statio_all_sequences.blks_read, pg_statio_all_sequences.blks_hit FROM pg_statio_all_sequences WHERE ((pg_statio_all_sequences.schemaname <> ALL (ARRAY['pg_catalog'::name, 'information_schema'::name])) AND (pg_statio_all_sequences.schemaname !~ '^pg_toast'::text));
pg_statio_user_tables | SELECT pg_statio_all_tables.relid, pg_statio_all_tables.schemaname, pg_statio_all_tables.relname, pg_statio_all_tables.heap_blks_read, pg_statio_all_tables.heap_blks_hit, pg_statio_all_tables.idx_blks_read, pg_statio_all_tables.idx_blks_hit, pg_statio_all_tables.toast_blks_read, pg_statio_all_tables.toast_blks_hit, pg_statio_all_tables.tidx_blks_read, pg_statio_all_tables.tidx_blks_hit FROM pg_statio_all_tables WHERE ((pg_statio_all_tables.schemaname <> ALL (ARRAY['pg_catalog'::name, 'information_schema'::name])) AND (pg_statio_all_tables.schemaname !~ '^pg_toast'::text));
- pg_stats | SELECT n.nspname AS schemaname, c.relname AS tablename, a.attname, s.stainherit AS inherited, s.stanullfrac AS null_frac, s.stawidth AS avg_width, s.stadistinct AS n_distinct, CASE WHEN (s.stakind1 = 1) THEN s.stavalues1 WHEN (s.stakind2 = 1) THEN s.stavalues2 WHEN (s.stakind3 = 1) THEN s.stavalues3 WHEN (s.stakind4 = 1) THEN s.stavalues4 WHEN (s.stakind5 = 1) THEN s.stavalues5 ELSE NULL::anyarray END AS most_common_vals, CASE WHEN (s.stakind1 = 1) THEN s.stanumbers1 WHEN (s.stakind2 = 1) THEN s.stanumbers2 WHEN (s.stakind3 = 1) THEN s.stanumbers3 WHEN (s.stakind4 = 1) THEN s.stanumbers4 WHEN (s.stakind5 = 1) THEN s.stanumbers5 ELSE NULL::real[] END AS most_common_freqs, CASE WHEN (s.stakind1 = 2) THEN s.stavalues1 WHEN (s.stakind2 = 2) THEN s.stavalues2 WHEN (s.stakind3 = 2) THEN s.stavalues3 WHEN (s.stakind4 = 2) THEN s.stavalues4 WHEN (s.stakind5 = 2) THEN s.stavalues5 ELSE NULL::anyarray END AS histogram_bounds, CASE WHEN (s.stakind1 = 3) THEN s.stanumbers1[1] WHEN (s.stakind2 = 3) THEN s.stanumbers2[1] WHEN (s.stakind3 = 3) THEN s.stanumbers3[1] WHEN (s.stakind4 = 3) THEN s.stanumbers4[1] WHEN (s.stakind5 = 3) THEN s.stanumbers5[1] ELSE NULL::real END AS correlation, CASE WHEN (s.stakind1 = 4) THEN s.stavalues1 WHEN (s.stakind2 = 4) THEN s.stavalues2 WHEN (s.stakind3 = 4) THEN s.stavalues3 WHEN (s.stakind4 = 4) THEN s.stavalues4 WHEN (s.stakind5 = 4) THEN s.stavalues5 ELSE NULL::anyarray END AS most_common_elems, CASE WHEN (s.stakind1 = 4) THEN s.stanumbers1 WHEN (s.stakind2 = 4) THEN s.stanumbers2 WHEN (s.stakind3 = 4) THEN s.stanumbers3 WHEN (s.stakind4 = 4) THEN s.stanumbers4 WHEN (s.stakind5 = 4) THEN s.stanumbers5 ELSE NULL::real[] END AS most_common_elem_freqs, CASE WHEN (s.stakind1 = 5) THEN s.stanumbers1 WHEN (s.stakind2 = 5) THEN s.stanumbers2 WHEN (s.stakind3 = 5) THEN s.stanumbers3 WHEN (s.stakind4 = 5) THEN s.stanumbers4 WHEN (s.stakind5 = 5) THEN s.stanumbers5 ELSE NULL::real[] END AS elem_count_histogram FROM (((pg_statistic s JOIN pg_class c ON ((c.oid = s.starelid))) JOIN pg_attribute a ON (((c.oid = a.attrelid) AND (a.attnum = s.staattnum)))) LEFT JOIN pg_namespace n ON ((n.oid = c.relnamespace))) WHERE ((NOT a.attisdropped) AND has_column_privilege(c.oid, a.attnum, 'select'::text));
+ pg_stats | SELECT n.nspname AS schemaname, c.relname AS tablename, a.attname, s.stainherit AS inherited, s.stanullfrac AS null_frac, s.stawidth AS avg_width, s.stadistinct AS n_distinct, CASE WHEN (s.stakind1 = 1) THEN s.stavalues1 WHEN (s.stakind2 = 1) THEN s.stavalues2 WHEN (s.stakind3 = 1) THEN s.stavalues3 WHEN (s.stakind4 = 1) THEN s.stavalues4 WHEN (s.stakind5 = 1) THEN s.stavalues5 ELSE NULL::anyarray END AS most_common_vals, CASE WHEN (s.stakind1 = 1) THEN s.stanumbers1 WHEN (s.stakind2 = 1) THEN s.stanumbers2 WHEN (s.stakind3 = 1) THEN s.stanumbers3 WHEN (s.stakind4 = 1) THEN s.stanumbers4 WHEN (s.stakind5 = 1) THEN s.stanumbers5 ELSE NULL::real[] END AS most_common_freqs, CASE WHEN (s.stakind1 = 2) THEN s.stavalues1 WHEN (s.stakind2 = 2) THEN s.stavalues2 WHEN (s.stakind3 = 2) THEN s.stavalues3 WHEN (s.stakind4 = 2) THEN s.stavalues4 WHEN (s.stakind5 = 2) THEN s.stavalues5 ELSE NULL::anyarray END AS histogram_bounds, CASE WHEN (s.stakind1 = 3) THEN s.stanumbers1[1] WHEN (s.stakind2 = 3) THEN s.stanumbers2[1] WHEN (s.stakind3 = 3) THEN s.stanumbers3[1] WHEN (s.stakind4 = 3) THEN s.stanumbers4[1] WHEN (s.stakind5 = 3) THEN s.stanumbers5[1] ELSE NULL::real END AS correlation, CASE WHEN (s.stakind1 = 4) THEN s.stavalues1 WHEN (s.stakind2 = 4) THEN s.stavalues2 WHEN (s.stakind3 = 4) THEN s.stavalues3 WHEN (s.stakind4 = 4) THEN s.stavalues4 WHEN (s.stakind5 = 4) THEN s.stavalues5 ELSE NULL::anyarray END AS most_common_elems, CASE WHEN (s.stakind1 = 4) THEN s.stanumbers1 WHEN (s.stakind2 = 4) THEN s.stanumbers2 WHEN (s.stakind3 = 4) THEN s.stanumbers3 WHEN (s.stakind4 = 4) THEN s.stanumbers4 WHEN (s.stakind5 = 4) THEN s.stanumbers5 ELSE NULL::real[] END AS most_common_elem_freqs, CASE WHEN (s.stakind1 = 5) THEN s.stanumbers1 WHEN (s.stakind2 = 5) THEN s.stanumbers2 WHEN (s.stakind3 = 5) THEN s.stanumbers3 WHEN (s.stakind4 = 5) THEN s.stanumbers4 WHEN (s.stakind5 = 5) THEN s.stanumbers5 ELSE NULL::real[] END AS elem_count_histogram FROM (((pg_statistic s JOIN pg_class c ON ((c.oid = s.starelid))) JOIN pg_attribute a ON (((c.oid = a.attrelid) AND (array_length(s.staattnums, 1) = 1)) AND (a.attnum = s.staattnum)))) LEFT JOIN pg_namespace n ON ((n.oid = c.relnamespace))) WHERE ((NOT a.attisdropped) AND has_column_privilege(c.oid, a.attnum, 'select'::text));
pg_tables | SELECT n.nspname AS schemaname, c.relname AS tablename, pg_get_userbyid(c.relowner) AS tableowner, t.spcname AS tablespace, c.relhasindex AS hasindexes, c.relhasrules AS hasrules, c.relhastriggers AS hastriggers FROM ((pg_class c LEFT JOIN pg_namespace n ON ((n.oid = c.relnamespace))) LEFT JOIN pg_tablespace t ON ((t.oid = c.reltablespace))) WHERE (c.relkind = 'r'::"char");
pg_timezone_abbrevs | SELECT pg_timezone_abbrevs.abbrev, pg_timezone_abbrevs.utc_offset, pg_timezone_abbrevs.is_dst FROM pg_timezone_abbrevs() pg_timezone_abbrevs(abbrev, utc_offset, is_dst);
pg_timezone_names | SELECT pg_timezone_names.name, pg_timezone_names.abbrev, pg_timezone_names.utc_offset, pg_timezone_names.is_dst FROM pg_timezone_names() pg_timezone_names(name, abbrev, utc_offset, is_dst);
diff --git a/src/test/regress/expected/type_sanity.out b/src/test/regress/expected/type_sanity.out
index 70eab92..c983a11 100644
--- a/src/test/regress/expected/type_sanity.out
+++ b/src/test/regress/expected/type_sanity.out
@@ -368,8 +368,9 @@ WHERE p1.typarray = p2.oid AND
SELECT p1.oid, p1.typname, p2.oid, p2.proname
FROM pg_type AS p1, pg_proc AS p2
WHERE p1.typanalyze = p2.oid AND NOT
- (p2.pronargs = 1 AND
+ (p2.pronargs = 2 AND
p2.proargtypes[0] = 'internal'::regtype AND
+ p2.proargtypes[1] = 'int4'::regtype AND
p2.prorettype = 'bool'::regtype AND NOT p2.proretset);
oid | typname | oid | proname
-----+---------+-----+---------
diff --git a/src/test/regress/sql/type_sanity.sql b/src/test/regress/sql/type_sanity.sql
index 413b220..a8d2a8a 100644
--- a/src/test/regress/sql/type_sanity.sql
+++ b/src/test/regress/sql/type_sanity.sql
@@ -268,8 +268,9 @@ WHERE p1.typarray = p2.oid AND
SELECT p1.oid, p1.typname, p2.oid, p2.proname
FROM pg_type AS p1, pg_proc AS p2
WHERE p1.typanalyze = p2.oid AND NOT
- (p2.pronargs = 1 AND
+ (p2.pronargs = 2 AND
p2.proargtypes[0] = 'internal'::regtype AND
+ p2.proargtypes[1] = 'int4'::regtype AND
p2.prorettype = 'bool'::regtype AND NOT p2.proretset);
-- domains inherit their base type's typanalyze
On Tue, Mar 13, 2012 at 9:56 AM, Hans-Jürgen Schönig
<postgres@cybertec.at> wrote:
Here's the cross-col patch against todays master branch.
Please add your patch here, so it doesn't get forgotten:
https://commitfest.postgresql.org/action/commitfest_view/open
--
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company