diff -durpN postgresql.orig/src/backend/catalog/Makefile postgresql/src/backend/catalog/Makefile --- postgresql.orig/src/backend/catalog/Makefile 2011-01-04 15:13:15.852565371 +0100 +++ postgresql/src/backend/catalog/Makefile 2011-01-27 10:32:41.796567782 +0100 @@ -31,7 +31,7 @@ POSTGRES_BKI_SRCS = $(addprefix $(top_sr pg_attrdef.h pg_constraint.h pg_inherits.h pg_index.h pg_operator.h \ pg_opfamily.h pg_opclass.h pg_am.h pg_amop.h pg_amproc.h \ pg_language.h pg_largeobject_metadata.h pg_largeobject.h pg_aggregate.h \ - pg_statistic.h pg_rewrite.h pg_trigger.h pg_description.h \ + pg_statistic.h pg_statistic2.h pg_rewrite.h pg_trigger.h pg_description.h \ pg_cast.h pg_enum.h pg_namespace.h pg_conversion.h pg_depend.h \ pg_database.h pg_db_role_setting.h pg_tablespace.h pg_pltemplate.h \ pg_authid.h pg_auth_members.h pg_shdepend.h pg_shdescription.h \ diff -durpN postgresql.orig/src/backend/commands/indexcmds.c postgresql/src/backend/commands/indexcmds.c --- postgresql.orig/src/backend/commands/indexcmds.c 2011-01-26 14:17:23.911759868 +0100 +++ postgresql/src/backend/commands/indexcmds.c 2011-02-22 13:32:08.034596118 +0100 @@ -26,7 +26,9 @@ #include "catalog/indexing.h" #include "catalog/pg_opclass.h" #include "catalog/pg_opfamily.h" +#include "catalog/pg_statistic2.h" #include "catalog/pg_tablespace.h" +#include "catalog/pg_type.h" #include "commands/dbcommands.h" #include "commands/defrem.h" #include "commands/tablecmds.h" @@ -1639,3 +1641,142 @@ ReindexDatabase(const char *databaseName MemoryContextDelete(private_context); } + +/* + * CrossColStat + * Add or remove one in pg_statistic2 + */ +void CrossColStat(CrossColStatStmt *stmt) +{ + Oid relId; + Relation rel; + ListCell *l; + int len, i, j; + bool differ = false; + AttrNumber *attnums; + AttrNumber *sorted_attnums; + int16 typlen; + bool typbyval; + char typalign; + Datum *datum_attnums; + ArrayType *arr_attnums; + ScanKeyData scanKey[2]; + SysScanDesc scan; + HeapTuple tuple; + TupleDesc tupDesc; + Datum values[Natts_pg_statistic2]; + bool nulls[Natts_pg_statistic2]; + + relId = RangeVarGetRelid(stmt->relation, false); + + len = list_length(stmt->columns); + if (len < 2) + elog(ERROR, "cross column statistics need at least two columns"); + + attnums = (int2 *)palloc(len * sizeof(AttrNumber)); + sorted_attnums = (int2 *)palloc(len * sizeof(AttrNumber)); + datum_attnums = (Datum *)palloc(len * sizeof(Datum)); + + i = 0; + foreach(l, stmt->columns) + { + CrossColStatColumn *col = (CrossColStatColumn *) lfirst(l); + + attnums[i++] = get_attnum(relId, col->colname); + } + + for (i = 0; i < len; i++) + sorted_attnums[i] = attnums[i]; + for (i = 0; i < len - 1; i++) + for (j = i+1; j < len; j++) + if (sorted_attnums[i] > sorted_attnums[j]) + { + AttrNumber tmp = sorted_attnums[i]; + + sorted_attnums[i] = sorted_attnums[j]; + sorted_attnums[j] = tmp; + } + + for (i = 0; i < len; i++) + { + if (!differ && attnums[i] != sorted_attnums[i]) + differ = true; + + if ((i < len - 1) && sorted_attnums[i] == sorted_attnums[i+1]) + elog(ERROR, "column list must contain every column exactly once"); + + datum_attnums[i] = Int16GetDatum(sorted_attnums[i]); + } + + if (differ) + elog(WARNING, "the column list was reordered in the order of table attributes"); + + get_typlenbyvalalign(INT2OID, &typlen, &typbyval, &typalign); + arr_attnums = construct_array(datum_attnums, len, + INT2OID, typlen, typbyval, typalign); + + rel = heap_open(Statistic2RelationId, RowExclusiveLock); + + /* + * There's no syscache for pg_statistic2, + * arrays aren't supported there as search keys. + * We need to do the hard way. + */ + ScanKeyInit(&scanKey[0], + Anum_pg_statistic2_sta2relid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(relId)); + ScanKeyInit(&scanKey[1], + Anum_pg_statistic2_sta2attnums, + BTEqualStrategyNumber, F_ARRAY_EQ, + PointerGetDatum(arr_attnums)); + + scan = systable_beginscan(rel, Statistic2RelidAttnumsInhIndexId, true, + SnapshotNow, 2, scanKey); + + tuple = systable_getnext(scan); + + if (stmt->create) + { + if (HeapTupleIsValid(tuple)) + { + systable_endscan(scan); + elog(ERROR, "pg_statistic2 entry already exists for this table and set of columns"); + } + systable_endscan(scan); + + for (i = 0; i < Natts_pg_statistic2; i++) + nulls[i] = TRUE; + + values[Anum_pg_statistic2_sta2relid - 1] = ObjectIdGetDatum(relId); + nulls[Anum_pg_statistic2_sta2relid - 1] = FALSE; + + values[Anum_pg_statistic2_sta2attnums - 1] = PointerGetDatum(arr_attnums); + nulls[Anum_pg_statistic2_sta2attnums - 1] = FALSE; + + values[Anum_pg_statistic2_sta2inherit - 1] = BoolGetDatum(FALSE); + nulls[Anum_pg_statistic2_sta2inherit - 1] = FALSE; + + tupDesc = RelationGetDescr(rel); + + tuple = heap_form_tuple(tupDesc, values, nulls); + + simple_heap_insert(rel, tuple); + + CatalogUpdateIndexes(rel, tuple); + } + else + { + if (!HeapTupleIsValid(tuple)) + { + systable_endscan(scan); + elog(ERROR, "pg_statistic2 entry doesn't exist for this table and set of columns"); + } + + simple_heap_delete(rel, &tuple->t_self); + + systable_endscan(scan); + } + + relation_close(rel, NoLock); +} diff -durpN postgresql.orig/src/backend/nodes/copyfuncs.c postgresql/src/backend/nodes/copyfuncs.c --- postgresql.orig/src/backend/nodes/copyfuncs.c 2011-01-26 14:17:23.922759114 +0100 +++ postgresql/src/backend/nodes/copyfuncs.c 2011-02-22 08:24:27.628837801 +0100 @@ -3321,6 +3321,28 @@ _copyCreateForeignTableStmt(CreateForeig return newnode; } +static CrossColStatStmt * +_copyCrossColStatStmt(CrossColStatStmt *from) +{ + CrossColStatStmt *newnode = makeNode(CrossColStatStmt); + + COPY_SCALAR_FIELD(create); + newnode->relation = _copyRangeVar(from->relation); + COPY_NODE_FIELD(columns); + + return newnode; +} + +static CrossColStatColumn * +_copyCrossColStatColumn(CrossColStatColumn *from) +{ + CrossColStatColumn *newnode = makeNode(CrossColStatColumn); + + COPY_STRING_FIELD(colname); + + return newnode; +} + static CreateTrigStmt * _copyCreateTrigStmt(CreateTrigStmt *from) { @@ -4225,6 +4247,12 @@ copyObject(void *from) case T_CreateForeignTableStmt: retval = _copyCreateForeignTableStmt(from); break; + case T_CrossColStatStmt: + retval = _copyCrossColStatStmt(from); + break; + case T_CrossColStatColumn: + retval = _copyCrossColStatColumn(from); + break; case T_CreateTrigStmt: retval = _copyCreateTrigStmt(from); break; diff -durpN postgresql.orig/src/backend/nodes/equalfuncs.c postgresql/src/backend/nodes/equalfuncs.c --- postgresql.orig/src/backend/nodes/equalfuncs.c 2011-01-26 14:17:23.924758978 +0100 +++ postgresql/src/backend/nodes/equalfuncs.c 2011-02-22 06:11:54.259890037 +0100 @@ -1732,6 +1732,25 @@ _equalCreateForeignTableStmt(CreateForei } static bool +_equalCrossColStatStmt(CrossColStatStmt *a, CrossColStatStmt *b) +{ + COMPARE_SCALAR_FIELD(create); + if (!_equalRangeVar(a->relation, b->relation)) + return FALSE; + COMPARE_NODE_FIELD(columns); + + return true; +} + +static bool +_equalCrossColStatColumn(CrossColStatColumn *a, CrossColStatColumn *b) +{ + COMPARE_STRING_FIELD(colname); + + return true; +} + +static bool _equalCreateTrigStmt(CreateTrigStmt *a, CreateTrigStmt *b) { COMPARE_STRING_FIELD(trigname); @@ -2838,6 +2857,12 @@ equal(void *a, void *b) case T_CreateForeignTableStmt: retval = _equalCreateForeignTableStmt(a, b); break; + case T_CrossColStatStmt: + retval = _equalCrossColStatStmt(a, b); + break; + case T_CrossColStatColumn: + retval = _equalCrossColStatColumn(a, b); + break; case T_CreateTrigStmt: retval = _equalCreateTrigStmt(a, b); break; diff -durpN postgresql.orig/src/backend/optimizer/path/clausesel.c postgresql/src/backend/optimizer/path/clausesel.c --- postgresql.orig/src/backend/optimizer/path/clausesel.c 2011-01-04 15:13:15.940560845 +0100 +++ postgresql/src/backend/optimizer/path/clausesel.c 2011-02-22 17:22:43.761445055 +0100 @@ -13,17 +13,27 @@ *------------------------------------------------------------------------- */ #include "postgres.h" +#include "postgres_ext.h" +#include "access/skey.h" +#include "access/relscan.h" +#include "catalog/indexing.h" #include "catalog/pg_operator.h" +#include "catalog/pg_statistic2.h" +#include "catalog/pg_type.h" #include "nodes/makefuncs.h" +#include "nodes/pg_list.h" #include "optimizer/clauses.h" #include "optimizer/cost.h" #include "optimizer/pathnode.h" #include "optimizer/plancat.h" +#include "optimizer/var.h" #include "parser/parsetree.h" +#include "utils/array.h" #include "utils/fmgroids.h" #include "utils/lsyscache.h" #include "utils/selfuncs.h" +#include "utils/tqual.h" /* @@ -34,6 +44,7 @@ typedef struct RangeQueryClause { struct RangeQueryClause *next; /* next in linked list */ Node *var; /* The common variable of the clauses */ + AttrNumber varattno; /* for finding cross-column statistics */ bool have_lobound; /* found a low-bound clause yet? */ bool have_hibound; /* found a high-bound clause yet? */ Selectivity lobound; /* Selectivity of a var > something clause */ @@ -43,6 +54,21 @@ typedef struct RangeQueryClause static void addRangeClause(RangeQueryClause **rqlist, Node *clause, bool varonleft, bool isLTsel, Selectivity s2); +typedef struct CrossColumnClause +{ + struct CrossColumnClause *next; + Node *var; + AttrNumber varattno; + Node *expr; + Selectivity sel; +} CrossColumnClause; + +static void addXCClause(CrossColumnClause **xclist, Node *clause, + bool varonleft, Selectivity s2); + +static bool crosscolumn_selectivity(Oid relId, + CrossColumnClause **xcnext, RangeQueryClause **rqlist, + Selectivity *result_sel); /**************************************************************************** * ROUTINES TO COMPUTE SELECTIVITIES @@ -99,8 +125,12 @@ clauselist_selectivity(PlannerInfo *root { Selectivity s1 = 1.0; RangeQueryClause *rqlist = NULL; + CrossColumnClause *xclist = NULL; + Oid relId = InvalidOid; ListCell *l; + elog(NOTICE, "clauselist_selectivity varRelid %d, list length %d", varRelid, list_length(clauses)); + /* * If there's exactly one clause, then no use in trying to match up pairs, * so just go directly to clause_selectivity(). @@ -173,6 +203,15 @@ clauselist_selectivity(PlannerInfo *root if (ok) { + + if (!OidIsValid(relId)) + { + int relid = bms_singleton_member(rinfo->clause_relids); + + relId = root->simple_rte_array[relid]->relid; + elog(NOTICE, "clauselist_selectivity: oprrest %d, relid %d (%d)", get_oprrest(expr->opno), + relid, relId); + } /* * If it's not a "<" or ">" operator, just merge the * selectivity in generically. But if it's the right oprrest, @@ -188,6 +227,10 @@ clauselist_selectivity(PlannerInfo *root addRangeClause(&rqlist, clause, varonleft, false, s2); break; + case F_EQSEL: + addXCClause(&xclist, clause, + varonleft, s2); + break; default: /* Just merge the selectivity in generically */ s1 = s1 * s2; @@ -202,11 +245,32 @@ clauselist_selectivity(PlannerInfo *root } /* + * Scan xclist and rqlist recursively and filter out + * all possible cross-column selectivities. + */ + crosscolumn_selectivity(relId, &xclist, &rqlist, &s1); + + while (xclist != NULL) + { + CrossColumnClause *xcnext; + + s1 = s1 * xclist->sel; + + xcnext = xclist->next; + pfree(xclist); + xclist = xcnext; + } + + /* * Now scan the rangequery pair list. */ while (rqlist != NULL) { RangeQueryClause *rqnext; + char *nodetostr = nodeToString(rqlist->var); + + elog(NOTICE, "%s", nodetostr); + pfree(nodetostr); if (rqlist->have_lobound && rqlist->have_hibound) { @@ -279,6 +343,22 @@ clauselist_selectivity(PlannerInfo *root return s1; } +static AttrNumber +var_get_attno(Node *clause) +{ + Var *var; + + if (IsA(clause, Var)) + { + var = (Var *)clause; + elog(NOTICE, "var_get_attno varattno %d", var->varattno); + return var->varattno; + } + + elog(NOTICE, "var_get_attno default 0"); + return 0; +} + /* * addRangeClause --- add a new range clause for clauselist_selectivity * @@ -358,6 +438,8 @@ addRangeClause(RangeQueryClause **rqlist /* No matching var found, so make a new clause-pair data structure */ rqelem = (RangeQueryClause *) palloc(sizeof(RangeQueryClause)); rqelem->var = var; + rqelem->varattno = var_get_attno(var); + if (is_lobound) { rqelem->have_lobound = true; @@ -375,6 +457,38 @@ addRangeClause(RangeQueryClause **rqlist } /* + * addXCClause - add a new clause to the list of clauses for cross-column stats inspection + * + */ +static void +addXCClause(CrossColumnClause **xclist, Node *clause, + bool varonleft, Selectivity s) +{ + CrossColumnClause *xcelem; + Node *var; + Node *expr; + + if (varonleft) + { + var = get_leftop((Expr *) clause); + expr = get_rightop((Expr *) clause); + } + else + { + var = get_rightop((Expr *) clause); + expr = get_leftop((Expr *) clause); + } + + xcelem = (CrossColumnClause *) palloc(sizeof(CrossColumnClause)); + xcelem->var = var; + xcelem->varattno = var_get_attno(var); + xcelem->expr = expr; + xcelem->sel = s; + xcelem->next = *xclist; + *xclist = xcelem; +} + +/* * bms_is_subset_singleton * * Same result as bms_is_subset(s, bms_make_singleton(x)), @@ -499,6 +613,8 @@ clause_selectivity(PlannerInfo *root, { rinfo = (RestrictInfo *) clause; +// elog(NOTICE, "RestrictInfo, %s", nodeToString(rinfo->clause)); + /* * If the clause is marked pseudoconstant, then it will be used as a * gating qual and should not affect selectivity estimates; hence @@ -779,3 +895,288 @@ clause_selectivity(PlannerInfo *root, return s1; } + +static bool +has_xcol_selectivity(Oid relId, int natts, AttrNumber *attnums, Selectivity *result_sel) +{ + Relation rel; + Datum *datums = (Datum *)palloc(natts * sizeof(Datum)); + ArrayType *arr_attnums; + int i; + int16 typlen; + bool typbyval; + char typalign; + ScanKeyData scanKey[2]; + SysScanDesc scan; + HeapTuple tuple; + bool result; + Selectivity sel = 1e-5; /* fixed selectivity for now */ + + for (i = 0; i < natts; i++) + datums[i] = Int16GetDatum(attnums[i]); + + get_typlenbyvalalign(INT2OID, &typlen, &typbyval, &typalign); + arr_attnums = construct_array(datums, natts, + INT2OID, typlen, typbyval, typalign); + + rel = heap_open(Statistic2RelationId, AccessShareLock); + + ScanKeyInit(&scanKey[0], + Anum_pg_statistic2_sta2relid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(relId)); + ScanKeyInit(&scanKey[1], + Anum_pg_statistic2_sta2attnums, + BTEqualStrategyNumber, F_ARRAY_EQ, + PointerGetDatum(arr_attnums)); + + scan = systable_beginscan(rel, Statistic2RelidAttnumsInhIndexId, true, + SnapshotNow, 2, scanKey); + + tuple = systable_getnext(scan); + + result = HeapTupleIsValid(tuple); + + systable_endscan(scan); + + heap_close(rel, NoLock); + + pfree(datums); + + if (result) + *result_sel = sel; + + return result; +} + +typedef struct { + CrossColumnClause *xc; + RangeQueryClause *rq; +} reclist; + +typedef struct { + int len; + reclist *rclist; + AttrNumber *attnums; +} reclist2; + + +/* add rclist to our list, so the ordered attnums arrays are unique */ +static void +add_reclist(int len, reclist *rclist, List **results) +{ + ListCell *lc; + int i, j; + reclist2 *rclist2; + AttrNumber *attnums = (AttrNumber *) palloc(len * sizeof(AttrNumber)); + + /* collect the ordered varattnos from the Vars */ + for (i = 0; i < len; i++) + { + if (rclist[i].xc) + attnums[i] = rclist[i].xc->varattno; + else + attnums[i] = rclist[i].rq->varattno; + } + for (i = 0; i < len - 1; i++) + for (j = i + 1; j < len; j++) + { + AttrNumber tmp = attnums[i]; + attnums[i] = attnums[j]; + attnums[j] = tmp; + } + + /* match this ordered attnum list against the current list of attnum arrays */ + foreach(lc, *results) + { + reclist2 *rc2 = (reclist2 *) lfirst(lc); + + if (len != rc2->len) + continue; + + for (i = 0; i < len; i++) + if (attnums[i] != rc2->attnums[i]) + break; + if (i < len) + continue; + + /* found */ + return; + } + + /* not found, add it to the list */ + rclist2 = (reclist2 *) palloc(sizeof(reclist2)); + rclist2->len = len; + rclist2->rclist = (reclist *) palloc(len * sizeof(reclist)); + for (i = 0; i < len; i++) + { + rclist2->rclist[i].xc = rclist[i].xc; + rclist2->rclist[i].rq = rclist[i].rq; + } + rclist2->attnums = attnums; + + *results = lappend(*results, rclist2); +} + +static int +compare_reclist2(reclist2 *a, reclist2 *b) +{ + int i; + + if (a->len < b->len) + return -1; + else if (a->len > b->len) + return 1; + + for (i = 0; i < a->len; i++) + { + if (a->attnums[i] < b->attnums[i]) + return -1; + else if (a->attnums[i] > b->attnums[i]) + return 1; + } + + return 0; +} + +static bool +add_reclist2(int *len, reclist2 **p_reclist2, reclist2 *rclist2) +{ + int curr_len = *len; + int i, j; + + if (curr_len == 0) + { + p_reclist2[i] = rclist2; + curr_len++; + *len = curr_len; + return true; + } + + for (i = 0; i < curr_len; i++) + { + if (compare_reclist2(rclist2, p_reclist2[i]) > 0) + { + for (j = curr_len; j > i; j--) + p_reclist2[j] = p_reclist2[j - 1]; + p_reclist2[i] = rclist2; + curr_len++; + *len = curr_len; + return true; + } + } + + return false; +} + +static void +collect_xcol_lists(int curr_depth, CrossColumnClause *xclist, RangeQueryClause *rqlist, reclist *rclist, List **results) +{ + CrossColumnClause *xc_tmp; + RangeQueryClause *rq_tmp; + + for (xc_tmp = xclist; xc_tmp; xc_tmp = xc_tmp->next) + { + if (xc_tmp->varattno == 0) + continue; + + rclist[curr_depth].xc = xc_tmp; + collect_xcol_lists(curr_depth + 1, xc_tmp->next, rqlist, rclist, results); + add_reclist(curr_depth + 1, rclist, results); + rclist[curr_depth].xc = NULL; + } + + for (rq_tmp = rqlist; rq_tmp; rq_tmp = rq_tmp->next) + { + if (rq_tmp->varattno == 0) + continue; + + rclist[curr_depth].rq = rq_tmp; + collect_xcol_lists(curr_depth + 1, (xclist ? xclist->next : xclist), rq_tmp->next, rclist, results); + add_reclist(curr_depth + 1, rclist, results); + rclist[curr_depth].rq = NULL; + } +} + +static bool +crosscolumn_selectivity(Oid relId, CrossColumnClause **xclist, RangeQueryClause **rqlist, Selectivity *result_sel) +{ + CrossColumnClause *xc; + RangeQueryClause *rq; + List *resultlist = NIL; + ListCell *lc; + reclist *rclist; + reclist2 **p_rclist2; + int max_len, i; + Selectivity sel = 1.0; + bool found_xc_sel = false; + + max_len = 0; + for (rq = *rqlist; rq; max_len++, rq = rq->next) + ; + for (xc = *xclist; xc; max_len++, xc = xc->next) + ; + + elog(NOTICE, "crosscolumn_selectivity max length of array %d", max_len); + + rclist = (reclist *) palloc(max_len * sizeof(reclist)); + for (i = 0; i < max_len; i++) + { + rclist[i].xc = NULL; + rclist[i].rq = NULL; + } + + collect_xcol_lists(0, *xclist, *rqlist, rclist, &resultlist); + + pfree(rclist); + + max_len = list_length(resultlist); + elog(NOTICE, "crosscolumn_selectivity list length of arrays %d", max_len); + p_rclist2 = (reclist2 **) palloc(max_len * sizeof(reclist2 *)); + + max_len = 0; + foreach (lc, resultlist) + { + reclist2 *rclist2 = (reclist2 *) lfirst(lc); + + if (!add_reclist2(&max_len, p_rclist2, rclist2)) + { + pfree(rclist2->rclist); + pfree(rclist2->attnums); + pfree(rclist2); + } + } + elog(NOTICE, "crosscolumn_selectivity length of ordered/unique array of previous list %d", max_len); + + list_free(resultlist); + + for (i = 0; i < max_len; i++) + { + if (p_rclist2[i] == NULL) + continue; + + if (has_xcol_selectivity(relId, p_rclist2[i]->len, p_rclist2[i]->attnums, &sel)) + { + int j; + + /* remove the xclist and rqlist members found in p_rclist2[i] */ + for (j = 0; j < p_rclist2[i]->len; j++) + { + /* TODO ... */ + } + + /* also, remove later elements in p_rclist2 that has any of the removed elements */ + /* TODO ... */ + + elog(NOTICE, "crosscolumn_selectivity found xc selectivity %lf", sel); + found_xc_sel = true; + *result_sel *= sel; + } + + pfree(p_rclist2[i]->rclist); + pfree(p_rclist2[i]->attnums); + pfree(p_rclist2[i]); + } + + return found_xc_sel; +} diff -durpN postgresql.orig/src/backend/parser/gram.y postgresql/src/backend/parser/gram.y --- postgresql.orig/src/backend/parser/gram.y 2011-01-26 14:17:23.934758294 +0100 +++ postgresql/src/backend/parser/gram.y 2011-02-22 08:34:41.549462096 +0100 @@ -196,10 +196,10 @@ static RangeVar *makeRangeVarFromAnyName CreateSchemaStmt CreateSeqStmt CreateStmt CreateTableSpaceStmt CreateFdwStmt CreateForeignServerStmt CreateForeignTableStmt CreateAssertStmt CreateTrigStmt - CreateUserStmt CreateUserMappingStmt CreateRoleStmt - CreatedbStmt DeclareCursorStmt DefineStmt DeleteStmt DiscardStmt DoStmt + CreateUserStmt CreateUserMappingStmt CreateRoleStmt CreatedbStmt CreateCCStmt + DeclareCursorStmt DefineStmt DeleteStmt DiscardStmt DoStmt DropGroupStmt DropOpClassStmt DropOpFamilyStmt DropPLangStmt DropStmt - DropAssertStmt DropTrigStmt DropRuleStmt DropCastStmt DropRoleStmt + DropAssertStmt DropTrigStmt DropRuleStmt DropCastStmt DropCCStmt DropRoleStmt DropUserStmt DropdbStmt DropTableSpaceStmt DropFdwStmt DropForeignServerStmt DropUserMappingStmt ExplainStmt FetchStmt GrantStmt GrantRoleStmt IndexStmt InsertStmt ListenStmt LoadStmt @@ -306,6 +306,8 @@ static RangeVar *makeRangeVarFromAnyName create_generic_options alter_generic_options relation_expr_list dostmt_opt_list +%type cc_column_list + %type OptTempTableName %type into_clause create_as_target @@ -689,6 +691,7 @@ stmt : | CreateAssertStmt | CreateCastStmt | CreateConversionStmt + | CreateCCStmt | CreateDomainStmt | CreateFdwStmt | CreateForeignServerStmt @@ -716,6 +719,7 @@ stmt : | DoStmt | DropAssertStmt | DropCastStmt + | DropCCStmt | DropFdwStmt | DropForeignServerStmt | DropGroupStmt @@ -1179,6 +1183,49 @@ schema_stmt: /***************************************************************************** * + * Create cross column statistics + * + *****************************************************************************/ + +CreateCCStmt: + CREATE CROSS COLUMN STATISTICS ON qualified_name '(' cc_column_list ')' + { + CrossColStatStmt *n = makeNode(CrossColStatStmt); + n->create = TRUE; + n->relation = $6; + n->columns = $8; + $$ = (Node *)n; + } + ; + +DropCCStmt: DROP CROSS COLUMN STATISTICS ON qualified_name '(' cc_column_list ')' + { + CrossColStatStmt *n = makeNode(CrossColStatStmt); + n->create = FALSE; + n->relation = $6; + n->columns = $8; + $$ = (Node *)n; + } + ; + +cc_column_list: + ColId + { + CrossColStatColumn *n = makeNode(CrossColStatColumn); + n->colname = $1; + $$ = list_make1((Node *) n); + } + | cc_column_list ',' ColId + { + CrossColStatColumn *n = makeNode(CrossColStatColumn); + n->colname = $3; + $$ = lappend($1, n); + } + ; + + +/***************************************************************************** + * * Set PG internal variable * SET name TO 'var_value' * Include SQL92 syntax (thomas 1997-10-22): diff -durpN postgresql.orig/src/backend/tcop/utility.c postgresql/src/backend/tcop/utility.c --- postgresql.orig/src/backend/tcop/utility.c 2011-01-04 15:13:16.009557296 +0100 +++ postgresql/src/backend/tcop/utility.c 2011-02-22 08:30:01.524693402 +0100 @@ -222,6 +222,7 @@ check_xact_readonly(Node *parsetree) case T_AlterTableSpaceOptionsStmt: case T_CreateForeignTableStmt: case T_SecLabelStmt: + case T_CrossColStatStmt: PreventCommandIfReadOnly(CreateCommandTag(parsetree)); break; default: @@ -576,6 +577,10 @@ standard_ProcessUtility(Node *parsetree, } break; + case T_CrossColStatStmt: + CrossColStat((CrossColStatStmt *)parsetree); + break; + case T_CreateTableSpaceStmt: PreventTransactionChain(isTopLevel, "CREATE TABLESPACE"); CreateTableSpace((CreateTableSpaceStmt *) parsetree); @@ -1580,6 +1585,13 @@ CreateCommandTag(Node *parsetree) tag = "CREATE FOREIGN TABLE"; break; + case T_CrossColStatStmt: + if (((CrossColStatStmt *)parsetree)->create) + tag = "CREATE CROSS COLUMN STATISTICS"; + else + tag = "DROP CROSS COLUMN STATISTICS"; + break; + case T_DropStmt: switch (((DropStmt *) parsetree)->removeType) { diff -durpN postgresql.orig/src/backend/utils/cache/syscache.c postgresql/src/backend/utils/cache/syscache.c --- postgresql.orig/src/backend/utils/cache/syscache.c 2011-01-04 15:13:16.056554877 +0100 +++ postgresql/src/backend/utils/cache/syscache.c 2011-02-22 10:09:25.215145794 +0100 @@ -44,6 +44,7 @@ #include "catalog/pg_proc.h" #include "catalog/pg_rewrite.h" #include "catalog/pg_statistic.h" +#include "catalog/pg_statistic2.h" #include "catalog/pg_tablespace.h" #include "catalog/pg_ts_config.h" #include "catalog/pg_ts_config_map.h" diff -durpN postgresql.orig/src/include/catalog/indexing.h postgresql/src/include/catalog/indexing.h --- postgresql.orig/src/include/catalog/indexing.h 2011-01-04 15:13:16.118551689 +0100 +++ postgresql/src/include/catalog/indexing.h 2011-02-22 09:27:45.371295981 +0100 @@ -289,6 +289,10 @@ DECLARE_UNIQUE_INDEX(pg_db_role_setting_ DECLARE_UNIQUE_INDEX(pg_seclabel_object_index, 3597, on pg_seclabel using btree(objoid oid_ops, classoid oid_ops, objsubid int4_ops, provider text_ops)); #define SecLabelObjectIndexId 3597 +DECLARE_UNIQUE_INDEX(pg_statistic2_relid_att_inh_index, 3072, on pg_statistic2 using btree(sta2relid oid_ops, sta2attnums array_ops, sta2inherit bool_ops)); +#define Statistic2RelidAttnumsInhIndexId 3072 + + /* last step of initialization script: build the indexes declared above */ BUILD_INDICES diff -durpN postgresql.orig/src/include/catalog/pg_statistic2.h postgresql/src/include/catalog/pg_statistic2.h --- postgresql.orig/src/include/catalog/pg_statistic2.h 1970-01-01 01:00:00.000000000 +0100 +++ postgresql/src/include/catalog/pg_statistic2.h 2011-02-22 09:35:58.817165678 +0100 @@ -0,0 +1,265 @@ +/*------------------------------------------------------------------------- + * + * pg_statistic2.h + * definition of the system "cross-column statistic" relation (pg_statistic2) + * along with the relation's initial contents. + * + * + * Portions Copyright (c) 1996-2011, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/include/catalog/pg_statistic2.h + * + * NOTES + * the genbki.pl script reads this file and generates .bki + * information from the DATA() statements. + * + *------------------------------------------------------------------------- + */ +#ifndef PG_STATISTIC2_H +#define PG_STATISTIC2_H + +#include "catalog/genbki.h" + +/* + * The CATALOG definition has to refer to the type of stavaluesN as + * "anyarray" so that bootstrap mode recognizes it. There is no real + * typedef for that, however. Since the fields are potentially-null and + * therefore can't be accessed directly from C code, there is no particular + * need for the C struct definition to show a valid field type --- instead + * we just make it int. + */ +#define anyarray int + +/* ---------------- + * pg_statistic definition. cpp turns this into + * typedef struct FormData_pg_statistic + * ---------------- + */ +#define Statistic2RelationId 3071 + +CATALOG(pg_statistic2,3071) BKI_WITHOUT_OIDS +{ + /* These fields form the unique key for the entry: */ + Oid sta2relid; /* relation containing attribute */ + int2 sta2attnums[1]; /* attribute (column) stats are for */ + bool sta2inherit; /* true if inheritance children are included */ + + /* the fraction of the column's entries that are NULL: */ + float4 sta2nullfrac; + + /* + * stawidth is the average width in bytes of non-null entries. For + * fixed-width datatypes this is of course the same as the typlen, but for + * var-width types it is more useful. Note that this is the average width + * of the data as actually stored, post-TOASTing (eg, for a + * moved-out-of-line value, only the size of the pointer object is + * counted). This is the appropriate definition for the primary use of + * the statistic, which is to estimate sizes of in-memory hash tables of + * tuples. + */ + int4 sta2width; + + /* ---------------- + * stadistinct indicates the (approximate) number of distinct non-null + * data values in the column. The interpretation is: + * 0 unknown or not computed + * > 0 actual number of distinct values + * < 0 negative of multiplier for number of rows + * The special negative case allows us to cope with columns that are + * unique (stadistinct = -1) or nearly so (for example, a column in + * which values appear about twice on the average could be represented + * by stadistinct = -0.5). Because the number-of-rows statistic in + * pg_class may be updated more frequently than pg_statistic is, it's + * important to be able to describe such situations as a multiple of + * the number of rows, rather than a fixed number of distinct values. + * But in other cases a fixed number is correct (eg, a boolean column). + * ---------------- + */ + float4 sta2distinct; + + /* ---------------- + * To allow keeping statistics on different kinds of datatypes, + * we do not hard-wire any particular meaning for the remaining + * statistical fields. Instead, we provide several "slots" in which + * statistical data can be placed. Each slot includes: + * kind integer code identifying kind of data + * op OID of associated operator, if needed + * numbers float4 array (for statistical values) + * values anyarray (for representations of data values) + * The ID and operator fields are never NULL; they are zeroes in an + * unused slot. The numbers and values fields are NULL in an unused + * slot, and might also be NULL in a used slot if the slot kind has + * no need for one or the other. + * ---------------- + */ + + int2 sta2kind1; + int2 sta2kind2; + int2 sta2kind3; + int2 sta2kind4; + + Oid sta2op1; + Oid sta2op2; + Oid sta2op3; + Oid sta2op4; + + /* + * THE REST OF THESE ARE VARIABLE LENGTH FIELDS, and may even be absent + * (NULL). They cannot be accessed as C struct entries; you have to use + * the full field access machinery (heap_getattr) for them. We declare + * them here for the catalog machinery. + */ + + float4 sta2numbers1[1]; + float4 sta2numbers2[1]; + float4 sta2numbers3[1]; + float4 sta2numbers4[1]; + + /* + * Values in these arrays are values of the column's data type. We + * presently have to cheat quite a bit to allow polymorphic arrays of this + * kind, but perhaps someday it'll be a less bogus facility. + */ + anyarray sta2values1; + anyarray sta2values2; + anyarray sta2values3; + anyarray sta2values4; +} FormData_pg_statistic2; + +#define STATISTIC_NUM_SLOTS 4 + +#undef anyarray + + +/* ---------------- + * Form_pg_statistic corresponds to a pointer to a tuple with + * the format of pg_statistic relation. + * ---------------- + */ +typedef FormData_pg_statistic2 *Form_pg_statistic2; + +/* ---------------- + * compiler constants for pg_statistic + * ---------------- + */ +#define Natts_pg_statistic2 22 +#define Anum_pg_statistic2_sta2relid 1 +#define Anum_pg_statistic2_sta2attnums 2 +#define Anum_pg_statistic2_sta2inherit 3 +#define Anum_pg_statistic2_sta2nullfrac 4 +#define Anum_pg_statistic2_sta2width 5 +#define Anum_pg_statistic2_sta2distinct 6 +#define Anum_pg_statistic2_sta2kind1 7 +#define Anum_pg_statistic2_sta2kind2 8 +#define Anum_pg_statistic2_sta2kind3 9 +#define Anum_pg_statistic2_sta2kind4 10 +#define Anum_pg_statistic2_sta2op1 11 +#define Anum_pg_statistic2_sta2op2 12 +#define Anum_pg_statistic2_sta2op3 13 +#define Anum_pg_statistic2_sta2op4 14 +#define Anum_pg_statistic2_sta2numbers1 15 +#define Anum_pg_statistic2_sta2numbers2 16 +#define Anum_pg_statistic2_sta2numbers3 17 +#define Anum_pg_statistic2_sta2numbers4 18 +#define Anum_pg_statistic2_sta2values1 19 +#define Anum_pg_statistic2_sta2values2 20 +#define Anum_pg_statistic2_sta2values3 21 +#define Anum_pg_statistic2_sta2values4 22 + +#if 0 + +/* + * Currently, three statistical slot "kinds" are defined: most common values, + * histogram, and correlation. Additional "kinds" will probably appear in + * future to help cope with non-scalar datatypes. Also, custom data types + * can define their own "kind" codes by mutual agreement between a custom + * typanalyze routine and the selectivity estimation functions of the type's + * operators. + * + * Code reading the pg_statistic relation should not assume that a particular + * data "kind" will appear in any particular slot. Instead, search the + * stakind fields to see if the desired data is available. (The standard + * function get_attstatsslot() may be used for this.) + */ + +/* + * The present allocation of "kind" codes is: + * + * 1-99: reserved for assignment by the core PostgreSQL project + * (values in this range will be documented in this file) + * 100-199: reserved for assignment by the PostGIS project + * (values to be documented in PostGIS documentation) + * 200-299: reserved for assignment by the ESRI ST_Geometry project + * (values to be documented in ESRI ST_Geometry documentation) + * 300-9999: reserved for future public assignments + * + * For private use you may choose a "kind" code at random in the range + * 10000-30000. However, for code that is to be widely disseminated it is + * better to obtain a publicly defined "kind" code by request from the + * PostgreSQL Global Development Group. + */ + +/* + * In a "most common values" slot, staop is the OID of the "=" operator + * used to decide whether values are the same or not. stavalues contains + * the K most common non-null values appearing in the column, and stanumbers + * contains their frequencies (fractions of total row count). The values + * shall be ordered in decreasing frequency. Note that since the arrays are + * variable-size, K may be chosen by the statistics collector. Values should + * not appear in MCV unless they have been observed to occur more than once; + * a unique column will have no MCV slot. + */ +#define STATISTIC_KIND_MCV 1 + +/* + * A "histogram" slot describes the distribution of scalar data. staop is + * the OID of the "<" operator that describes the sort ordering. (In theory, + * more than one histogram could appear, if a datatype has more than one + * useful sort operator.) stavalues contains M (>=2) non-null values that + * divide the non-null column data values into M-1 bins of approximately equal + * population. The first stavalues item is the MIN and the last is the MAX. + * stanumbers is not used and should be NULL. IMPORTANT POINT: if an MCV + * slot is also provided, then the histogram describes the data distribution + * *after removing the values listed in MCV* (thus, it's a "compressed + * histogram" in the technical parlance). This allows a more accurate + * representation of the distribution of a column with some very-common + * values. In a column with only a few distinct values, it's possible that + * the MCV list describes the entire data population; in this case the + * histogram reduces to empty and should be omitted. + */ +#define STATISTIC_KIND_HISTOGRAM 2 + +/* + * A "correlation" slot describes the correlation between the physical order + * of table tuples and the ordering of data values of this column, as seen + * by the "<" operator identified by staop. (As with the histogram, more + * than one entry could theoretically appear.) stavalues is not used and + * should be NULL. stanumbers contains a single entry, the correlation + * coefficient between the sequence of data values and the sequence of + * their actual tuple positions. The coefficient ranges from +1 to -1. + */ +#define STATISTIC_KIND_CORRELATION 3 + +/* + * A "most common elements" slot is similar to a "most common values" slot, + * except that it stores the most common non-null *elements* of the column + * values. This is useful when the column datatype is an array or some other + * type with identifiable elements (for instance, tsvector). staop contains + * the equality operator appropriate to the element type. stavalues contains + * the most common element values, and stanumbers their frequencies. Unlike + * MCV slots, the values are sorted into order (to support binary search + * for a particular value). Since this puts the minimum and maximum + * frequencies at unpredictable spots in stanumbers, there are two extra + * members of stanumbers, holding copies of the minimum and maximum + * frequencies. + * + * Note: in current usage for tsvector columns, the stavalues elements are of + * type text, even though their representation within tsvector is not + * exactly text. + */ +#define STATISTIC_KIND_MCELEM 4 + +#endif + +#endif /* PG_STATISTIC2_H */ diff -durpN postgresql.orig/src/include/commands/defrem.h postgresql/src/include/commands/defrem.h --- postgresql.orig/src/include/commands/defrem.h 2011-01-19 11:15:56.583064592 +0100 +++ postgresql/src/include/commands/defrem.h 2011-02-22 05:36:33.159517868 +0100 @@ -50,6 +50,7 @@ extern char *ChooseIndexName(const char bool primary, bool isconstraint); extern List *ChooseIndexColumnNames(List *indexElems); extern Oid GetDefaultOpClass(Oid type_id, Oid am_id); +extern void CrossColStat(CrossColStatStmt *stmt); /* commands/functioncmds.c */ extern void CreateFunction(CreateFunctionStmt *stmt, const char *queryString); diff -durpN postgresql.orig/src/include/nodes/nodes.h postgresql/src/include/nodes/nodes.h --- postgresql.orig/src/include/nodes/nodes.h 2011-01-04 15:13:16.151549992 +0100 +++ postgresql/src/include/nodes/nodes.h 2011-02-22 06:09:34.349625597 +0100 @@ -354,6 +354,8 @@ typedef enum NodeTag T_AlterTableSpaceOptionsStmt, T_SecLabelStmt, T_CreateForeignTableStmt, + T_CrossColStatStmt, + T_CrossColStatColumn, /* * TAGS FOR PARSE TREE NODES (parsenodes.h) diff -durpN postgresql.orig/src/include/nodes/parsenodes.h postgresql/src/include/nodes/parsenodes.h --- postgresql.orig/src/include/nodes/parsenodes.h 2011-01-26 14:17:23.956756790 +0100 +++ postgresql/src/include/nodes/parsenodes.h 2011-02-22 06:09:22.638440685 +0100 @@ -1112,6 +1112,24 @@ typedef enum DropBehavior } DropBehavior; /* ---------------------- + * Create Cross Column Statistics + * ---------------------- + */ +typedef struct CrossColStatStmt +{ + NodeTag type; + bool create; + RangeVar *relation; + List *columns; +} CrossColStatStmt; + +typedef struct CrossColStatColumn +{ + NodeTag type; + char *colname; +} CrossColStatColumn; + +/* ---------------------- * Alter Table * ---------------------- */