TEXT vs PG_NODE_TREE in system columns (cross column and expression statistics patch)

Started by Boszormenyi Zoltanover 14 years ago6 messages
#1Boszormenyi Zoltan
zb@cybertec.at
1 attachment(s)

Hi,

attached is the WIP patch for cross-column statistics and
extra expression statistics.

My question is that why pg_node_tree is unusable as
syscache attribute? I attempted to alias it as text in the patch
but I get the following error if I try to use it by setting
USE_SYSCACHE_FOR_SEARCH to 1 in selfuncs.c.
Directly using the underlying pg_statistic3 doesn't cause an error.

zozo=# select * from t1 where i+1 = 5;
ERROR: could not determine which collation to use for string comparison
HINT: Use the COLLATE clause to set the collation explicitly.

The table looks like this:

create table t1 (id serial primary key, t text, i integer);

Best regards,
Zolt�n B�sz�rm�nyi

--
----------------------------------
Zolt�n B�sz�rm�nyi
Cybertec Sch�nig & Sch�nig GmbH
Gr�hrm�hlgasse 26
A-2700 Wiener Neustadt, Austria
Web: http://www.postgresql-support.de
http://www.postgresql.at/

Attachments:

cross-column-v10.patchtext/plain; name=cross-column-v10.patchDownload
diff -dcrpN postgresql.orig/src/backend/catalog/Makefile postgresql/src/backend/catalog/Makefile
*** postgresql.orig/src/backend/catalog/Makefile	2011-02-22 18:51:42.675518441 +0100
--- postgresql/src/backend/catalog/Makefile	2011-04-28 14:21:14.694179328 +0200
*************** POSTGRES_BKI_SRCS = $(addprefix $(top_sr
*** 31,38 ****
  	pg_attrdef.h pg_constraint.h pg_inherits.h pg_index.h pg_operator.h \
  	pg_opfamily.h pg_opclass.h pg_am.h pg_amop.h pg_amproc.h \
  	pg_language.h pg_largeobject_metadata.h pg_largeobject.h pg_aggregate.h \
! 	pg_statistic.h pg_rewrite.h pg_trigger.h pg_description.h \
! 	pg_cast.h pg_enum.h pg_namespace.h pg_conversion.h pg_depend.h \
  	pg_database.h pg_db_role_setting.h pg_tablespace.h pg_pltemplate.h \
  	pg_authid.h pg_auth_members.h pg_shdepend.h pg_shdescription.h \
  	pg_ts_config.h pg_ts_config_map.h pg_ts_dict.h \
--- 31,38 ----
  	pg_attrdef.h pg_constraint.h pg_inherits.h pg_index.h pg_operator.h \
  	pg_opfamily.h pg_opclass.h pg_am.h pg_amop.h pg_amproc.h \
  	pg_language.h pg_largeobject_metadata.h pg_largeobject.h pg_aggregate.h \
! 	pg_statistic.h pg_statistic2.h pg_statistic3.h pg_rewrite.h pg_trigger.h \
! 	pg_description.h pg_cast.h pg_enum.h pg_namespace.h pg_conversion.h pg_depend.h \
  	pg_database.h pg_db_role_setting.h pg_tablespace.h pg_pltemplate.h \
  	pg_authid.h pg_auth_members.h pg_shdepend.h pg_shdescription.h \
  	pg_ts_config.h pg_ts_config_map.h pg_ts_dict.h \
diff -dcrpN postgresql.orig/src/backend/commands/indexcmds.c postgresql/src/backend/commands/indexcmds.c
*** postgresql.orig/src/backend/commands/indexcmds.c	2011-04-26 09:54:04.012362009 +0200
--- postgresql/src/backend/commands/indexcmds.c	2011-04-28 14:21:14.697179127 +0200
***************
*** 26,32 ****
--- 26,35 ----
  #include "catalog/indexing.h"
  #include "catalog/pg_opclass.h"
  #include "catalog/pg_opfamily.h"
+ #include "catalog/pg_statistic2.h"
+ #include "catalog/pg_statistic3.h"
  #include "catalog/pg_tablespace.h"
+ #include "catalog/pg_type.h"
  #include "commands/dbcommands.h"
  #include "commands/defrem.h"
  #include "commands/tablecmds.h"
***************
*** 36,41 ****
--- 39,45 ----
  #include "nodes/nodeFuncs.h"
  #include "optimizer/clauses.h"
  #include "optimizer/planner.h"
+ #include "optimizer/var.h"
  #include "parser/parse_coerce.h"
  #include "parser/parse_func.h"
  #include "parser/parse_oper.h"
*************** ReindexDatabase(const char *databaseName
*** 1693,1695 ****
--- 1697,1961 ----
  
  	MemoryContextDelete(private_context);
  }
+ 
+ /*
+  * DoCrossColStat
+  *	Add or remove one row in pg_statistic2
+  */
+ static void
+ DoCrossColStat(ExtraStatStmt *stmt)
+ {
+ 	Oid			relId;
+ 	Relation		rel;
+ 	ListCell	   *l;
+ 	int			len, i, j;
+ 	bool			differ = false;
+ 	AttrNumber	   *attnums;
+ 	AttrNumber	   *sorted_attnums;
+ 	int16			typlen;
+ 	bool			typbyval;
+ 	char			typalign;
+ 	Datum		   *datum_attnums;
+ 	ArrayType	   *arr_attnums;
+ 	ScanKeyData		scanKey[2];
+ 	SysScanDesc		scan;
+ 	HeapTuple		tuple;
+ 	TupleDesc		tupDesc;
+ 	Datum			values[Natts_pg_statistic2];
+ 	bool			nulls[Natts_pg_statistic2];
+ 
+ 	relId = RangeVarGetRelid(stmt->relation, false);
+ 
+ 	len = list_length(stmt->columns);
+ 	if (len < 2)
+ 		elog(ERROR, "cross column statistics need at least two columns");
+ 
+ 	attnums = (int2 *)palloc(len * sizeof(AttrNumber));
+ 	sorted_attnums = (int2 *)palloc(len * sizeof(AttrNumber));
+ 	datum_attnums = (Datum *)palloc(len * sizeof(Datum));
+ 
+ 	i = 0;
+ 	foreach(l, stmt->columns)
+ 	{
+ 		Node	   *node = (Node *) lfirst(l);
+ 		Var	   *var;
+ 
+ 		if (!IsA(node, Var))
+ 			elog(ERROR, "not a column reference");
+ 
+ 		var = (Var *) node;
+ 
+ 		if (var->varattno == 0)
+ 			elog(ERROR, "row expansion via \"*\" is not supported here");
+ 
+ 		attnums[i++] = var->varattno;
+ 	}
+ 
+ 	for (i = 0; i < len; i++)
+ 		sorted_attnums[i] = attnums[i];
+ 	for (i = 0;  i < len - 1; i++)
+ 		for (j = i+1; j < len; j++)
+ 			if (sorted_attnums[i] > sorted_attnums[j])
+ 			{
+ 				AttrNumber	tmp = sorted_attnums[i];
+ 
+ 				sorted_attnums[i] = sorted_attnums[j];
+ 				sorted_attnums[j] = tmp;
+ 			}
+ 
+ 	for (i = 0; i < len; i++)
+ 	{
+ 		if (!differ && attnums[i] != sorted_attnums[i])
+ 			differ = true;
+ 
+ 		if ((i < len - 1) && sorted_attnums[i] == sorted_attnums[i+1])
+ 			elog(ERROR, "column list must contain every column exactly once");
+ 
+ 		datum_attnums[i] = Int16GetDatum(sorted_attnums[i]);
+ 	}
+ 
+ 	if (differ)
+ 		elog(WARNING, "the column list was reordered in the order of table attributes");
+ 
+ 	get_typlenbyvalalign(INT2OID, &typlen, &typbyval, &typalign);
+ 	arr_attnums = construct_array(datum_attnums, len,
+ 						INT2OID, typlen, typbyval, typalign);
+ 
+ 	rel = heap_open(Statistic2RelationId, RowExclusiveLock);
+ 
+ 	/*
+ 	 * There's no syscache for pg_statistic2,
+ 	 * arrays aren't supported there as search keys.
+ 	 * We need to do the hard way.
+ 	 */
+ 	ScanKeyInit(&scanKey[0],
+ 					Anum_pg_statistic2_sta2relid,
+ 					BTEqualStrategyNumber, F_OIDEQ,
+ 					ObjectIdGetDatum(relId));
+ 	ScanKeyInit(&scanKey[1],
+ 					Anum_pg_statistic2_sta2attnums,
+ 					BTEqualStrategyNumber, F_ARRAY_EQ,
+ 					PointerGetDatum(arr_attnums));
+ 
+ 	scan = systable_beginscan(rel, Statistic2RelidAttnumsInhIndexId, true,
+ 									SnapshotNow, 2, scanKey);
+ 
+ 	tuple = systable_getnext(scan);
+ 
+ 	if (stmt->create)
+ 	{
+ 		if (HeapTupleIsValid(tuple))
+ 		{
+ 			systable_endscan(scan);
+ 			elog(ERROR, "pg_statistic2 entry already exists for this table and set of columns");
+ 		}
+ 		systable_endscan(scan);
+ 
+ 		for (i = 0; i < Natts_pg_statistic2; i++)
+ 			nulls[i] = TRUE;
+ 
+ 		values[Anum_pg_statistic2_sta2relid - 1] = ObjectIdGetDatum(relId);
+ 		nulls[Anum_pg_statistic2_sta2relid - 1] = FALSE;
+ 
+ 		values[Anum_pg_statistic2_sta2attnums - 1] = PointerGetDatum(arr_attnums);
+ 		nulls[Anum_pg_statistic2_sta2attnums - 1] = FALSE;
+ 
+ 		values[Anum_pg_statistic2_sta2inherit - 1] = BoolGetDatum(FALSE);
+ 		nulls[Anum_pg_statistic2_sta2inherit - 1] = FALSE;
+ 
+ 		tupDesc = RelationGetDescr(rel);
+ 
+ 		tuple = heap_form_tuple(tupDesc, values, nulls);
+ 
+ 		simple_heap_insert(rel, tuple);
+ 
+ 		CatalogUpdateIndexes(rel, tuple);
+ 	}
+ 	else
+ 	{
+ 		if (!HeapTupleIsValid(tuple))
+ 		{
+ 			systable_endscan(scan);
+ 			elog(ERROR, "pg_statistic2 entry doesn't exist for this table and set of columns");
+ 		}
+ 
+ 		simple_heap_delete(rel, &tuple->t_self);
+ 
+ 		systable_endscan(scan);
+ 	}
+ 
+ 	relation_close(rel, NoLock);
+ }
+ 
+ /*
+  * DoExprColStat
+  *	Add or remove one row in pg_statistic3
+  */
+ static void
+ DoExprStat(ExtraStatStmt *stmt)
+ {
+ 	Oid			relId;
+ 	Relation		rel;
+ 	int			i;
+ 	char			*exprbin;
+ 	Datum			exprbindatum;
+ 	ScanKeyData		scanKey[2];
+ 	SysScanDesc		scan;
+ 	HeapTuple		tuple;
+ 	TupleDesc		tupDesc;
+ 	Datum			values[Natts_pg_statistic3];
+ 	bool			nulls[Natts_pg_statistic3];
+ 
+ 	if (IsA(stmt->expr, Var) || IsA(stmt->expr, ColumnRef))
+ 		elog(ERROR, "single column are covered by basic statistics");
+ 
+ 	relId = RangeVarGetRelid(stmt->relation, false);
+ 
+ 	rel = heap_open(Statistic3RelationId, RowExclusiveLock);
+ 
+ 	exprbin = nodeToString(stmt->expr);
+ 	exprbindatum = CStringGetTextDatum(exprbin);
+ 
+ 	/*
+ 	 * There's no syscache for pg_statistic3,
+ 	 * arrays aren't supported there as search keys.
+ 	 * We need to do the hard way.
+ 	 */
+ 	ScanKeyInit(&scanKey[0],
+ 					Anum_pg_statistic3_sta3relid,
+ 					BTEqualStrategyNumber, F_OIDEQ,
+ 					ObjectIdGetDatum(relId));
+ 	ScanKeyInit(&scanKey[1],
+ 					Anum_pg_statistic3_sta3expr,
+ 					BTEqualStrategyNumber, F_TEXTEQ,
+ 					exprbindatum);
+ 
+ 	scan = systable_beginscan(rel, Statistic3RelidExprInhIndexId, true,
+ 									SnapshotNow, 2, scanKey);
+ 
+ 	tuple = systable_getnext(scan);
+ 
+ 	if (stmt->create)
+ 	{
+ 		if (HeapTupleIsValid(tuple))
+ 		{
+ 			systable_endscan(scan);
+ 			elog(ERROR, "pg_statistic3 entry already exists for this table and expression");
+ 		}
+ 		systable_endscan(scan);
+ 
+ 		if (!contain_var_clause(stmt->expr))
+ 			elog(ERROR, "constant expressions are not interesting");
+ 
+ 		for (i = 0; i < Natts_pg_statistic3; i++)
+ 			nulls[i] = TRUE;
+ 
+ 		values[Anum_pg_statistic3_sta3relid - 1] = ObjectIdGetDatum(relId);
+ 		nulls[Anum_pg_statistic3_sta3relid - 1] = FALSE;
+ 
+ 		values[Anum_pg_statistic3_sta3expr - 1] = exprbindatum;
+ 		nulls[Anum_pg_statistic3_sta3expr - 1] = FALSE;
+ 
+ 		values[Anum_pg_statistic3_sta3inherit - 1] = BoolGetDatum(FALSE);
+ 		nulls[Anum_pg_statistic3_sta3inherit - 1] = FALSE;
+ 
+ 		tupDesc = RelationGetDescr(rel);
+ 
+ 		tuple = heap_form_tuple(tupDesc, values, nulls);
+ 
+ 		simple_heap_insert(rel, tuple);
+ 
+ 		CatalogUpdateIndexes(rel, tuple);
+ 	}
+ 	else
+ 	{
+ 		if (!HeapTupleIsValid(tuple))
+ 		{
+ 			systable_endscan(scan);
+ 			elog(ERROR, "pg_statistic3 entry doesn't exist for this table and expression");
+ 		}
+ 
+ 		simple_heap_delete(rel, &tuple->t_self);
+ 
+ 		systable_endscan(scan);
+ 	}
+ 
+ 	pfree(exprbin);
+ 	pfree(DatumGetPointer(exprbindatum));
+ 
+ 	relation_close(rel, NoLock);
+ }
+ 
+ /*
+  * ExtraColStat
+  *	Add or remove one entry in pg_statistics2 or pg_statistic3
+  */
+ void ExtraStatistics(ExtraStatStmt *stmt)
+ {
+ 	if (list_length(stmt->columns) > 0)
+ 		DoCrossColStat(stmt);
+ 	else if (stmt->expr != NULL)
+ 		DoExprStat(stmt);
+ 	else
+ 		elog(ERROR, "internal error in ExtraStatistics");
+ }
diff -dcrpN postgresql.orig/src/backend/executor/nodeHash.c postgresql/src/backend/executor/nodeHash.c
*** postgresql.orig/src/backend/executor/nodeHash.c	2011-04-11 15:36:27.096816773 +0200
--- postgresql/src/backend/executor/nodeHash.c	2011-04-28 14:21:14.700178924 +0200
*************** ExecHashBuildSkewHash(HashJoinTable hash
*** 1144,1150 ****
  	if (!HeapTupleIsValid(statsTuple))
  		return;
  
! 	if (get_attstatsslot(statsTuple, node->skewColType, node->skewColTypmod,
  						 STATISTIC_KIND_MCV, InvalidOid,
  						 NULL,
  						 &values, &nvalues,
--- 1144,1150 ----
  	if (!HeapTupleIsValid(statsTuple))
  		return;
  
! 	if (get_attstatsslot(statsTuple, STAT_VARIABLE, node->skewColType, node->skewColTypmod,
  						 STATISTIC_KIND_MCV, InvalidOid,
  						 NULL,
  						 &values, &nvalues,
diff -dcrpN postgresql.orig/src/backend/nodes/copyfuncs.c postgresql/src/backend/nodes/copyfuncs.c
*** postgresql.orig/src/backend/nodes/copyfuncs.c	2011-04-19 09:37:54.828715621 +0200
--- postgresql/src/backend/nodes/copyfuncs.c	2011-04-28 14:21:14.704178653 +0200
*************** _copyCreateForeignTableStmt(CreateForeig
*** 3458,3463 ****
--- 3458,3476 ----
  	return newnode;
  }
  
+ static ExtraStatStmt *
+ _copyExtraStatStmt(ExtraStatStmt *from)
+ {
+ 	ExtraStatStmt *newnode = makeNode(ExtraStatStmt);
+ 
+ 	COPY_SCALAR_FIELD(create);
+ 	newnode->relation = _copyRangeVar(from->relation);
+ 	COPY_NODE_FIELD(columns);
+ 	COPY_NODE_FIELD(expr);
+ 
+ 	return newnode;
+ }
+ 
  static CreateTrigStmt *
  _copyCreateTrigStmt(CreateTrigStmt *from)
  {
*************** copyObject(void *from)
*** 4377,4382 ****
--- 4390,4398 ----
  		case T_CreateForeignTableStmt:
  			retval = _copyCreateForeignTableStmt(from);
  			break;
+ 		case T_ExtraStatStmt:
+ 			retval = _copyExtraStatStmt(from);
+ 			break;
  		case T_CreateTrigStmt:
  			retval = _copyCreateTrigStmt(from);
  			break;
diff -dcrpN postgresql.orig/src/backend/nodes/equalfuncs.c postgresql/src/backend/nodes/equalfuncs.c
*** postgresql.orig/src/backend/nodes/equalfuncs.c	2011-04-19 09:37:54.829715550 +0200
--- postgresql/src/backend/nodes/equalfuncs.c	2011-04-28 14:21:14.707178453 +0200
*************** _equalCreateForeignTableStmt(CreateForei
*** 1795,1800 ****
--- 1795,1812 ----
  }
  
  static bool
+ _equalExtraStatStmt(ExtraStatStmt *a, ExtraStatStmt *b)
+ {
+ 	COMPARE_SCALAR_FIELD(create);
+ 	if (!_equalRangeVar(a->relation, b->relation))
+ 		return FALSE;
+ 	COMPARE_NODE_FIELD(columns);
+ 	COMPARE_NODE_FIELD(expr);
+ 
+ 	return true;
+ }
+ 
+ static bool
  _equalCreateTrigStmt(CreateTrigStmt *a, CreateTrigStmt *b)
  {
  	COMPARE_STRING_FIELD(trigname);
*************** equal(void *a, void *b)
*** 2930,2935 ****
--- 2942,2950 ----
  		case T_CreateForeignTableStmt:
  			retval = _equalCreateForeignTableStmt(a, b);
  			break;
+ 		case T_ExtraStatStmt:
+ 			retval = _equalExtraStatStmt(a, b);
+ 			break;
  		case T_CreateTrigStmt:
  			retval = _equalCreateTrigStmt(a, b);
  			break;
diff -dcrpN postgresql.orig/src/backend/optimizer/path/clausesel.c postgresql/src/backend/optimizer/path/clausesel.c
*** postgresql.orig/src/backend/optimizer/path/clausesel.c	2011-01-04 15:13:15.940560845 +0100
--- postgresql/src/backend/optimizer/path/clausesel.c	2011-04-28 14:21:14.720177575 +0200
***************
*** 13,29 ****
--- 13,40 ----
   *-------------------------------------------------------------------------
   */
  #include "postgres.h"
+ #include "postgres_ext.h"
  
+ #include "access/skey.h"
+ #include "access/relscan.h"
+ #include "catalog/indexing.h"
  #include "catalog/pg_operator.h"
+ #include "catalog/pg_statistic2.h"
+ #include "catalog/pg_statistic3.h"
+ #include "catalog/pg_type.h"
  #include "nodes/makefuncs.h"
+ #include "nodes/pg_list.h"
  #include "optimizer/clauses.h"
  #include "optimizer/cost.h"
  #include "optimizer/pathnode.h"
  #include "optimizer/plancat.h"
+ #include "optimizer/var.h"
  #include "parser/parsetree.h"
+ #include "utils/array.h" 
  #include "utils/fmgroids.h"
  #include "utils/lsyscache.h"
  #include "utils/selfuncs.h"
+ #include "utils/tqual.h"
  
  
  /*
*************** typedef struct RangeQueryClause
*** 34,39 ****
--- 45,51 ----
  {
  	struct RangeQueryClause *next;		/* next in linked list */
  	Node	   *var;			/* The common variable of the clauses */
+ 	AttrNumber	varattno;	/* for finding cross-column statistics */
  	bool		have_lobound;	/* found a low-bound clause yet? */
  	bool		have_hibound;	/* found a high-bound clause yet? */
  	Selectivity lobound;		/* Selectivity of a var > something clause */
*************** typedef struct RangeQueryClause
*** 43,48 ****
--- 55,75 ----
  static void addRangeClause(RangeQueryClause **rqlist, Node *clause,
  			   bool varonleft, bool isLTsel, Selectivity s2);
  
+ typedef struct CrossColumnClause
+ {
+ 	struct CrossColumnClause *next;
+ 	Node	   *var;
+ 	AttrNumber	varattno;
+ 	Node	   *expr;
+ 	Selectivity	sel;
+ } CrossColumnClause;
+ 
+ static void addXCClause(CrossColumnClause **xclist, Node *clause,
+ 			   bool varonleft, Selectivity s2);
+ 
+ static bool crosscolumn_selectivity(Oid relId,
+ 			   CrossColumnClause **xcnext, RangeQueryClause **rqlist,
+ 			   Selectivity *result_sel);
  
  /****************************************************************************
   *		ROUTINES TO COMPUTE SELECTIVITIES
*************** clauselist_selectivity(PlannerInfo *root
*** 99,106 ****
--- 126,138 ----
  {
  	Selectivity s1 = 1.0;
  	RangeQueryClause *rqlist = NULL;
+ 	CrossColumnClause *xclist = NULL;
+ 	Oid		relId = InvalidOid;
+ 	bool		onerel = false;
  	ListCell   *l;
  
+ //	elog(NOTICE, "clauselist_selectivity varRelid %d, list length %d", varRelid, list_length(clauses));
+ 
  	/*
  	 * If there's exactly one clause, then no use in trying to match up pairs,
  	 * so just go directly to clause_selectivity().
*************** clauselist_selectivity(PlannerInfo *root
*** 162,167 ****
--- 194,215 ----
  					 (varonleft = false,
  					  is_pseudo_constant_clause_relids(linitial(expr->args),
  													   rinfo->left_relids)));
+ 				if (ok)
+ 				{
+ 					int	relid;
+ 					Oid	tmprelId;
+ 
+ 					relid = bms_singleton_member(rinfo->clause_relids);
+ 					tmprelId = root->simple_rte_array[relid]->relid;
+ 
+ 					if (!OidIsValid(relId))
+ 					{
+ 						onerel = true;
+ 						relId = tmprelId;
+ 					}
+ 					else if (relId != tmprelId)
+ 						onerel = false;
+ 				}
  			}
  			else
  			{
*************** clauselist_selectivity(PlannerInfo *root
*** 169,174 ****
--- 217,241 ----
  					(is_pseudo_constant_clause(lsecond(expr->args)) ||
  					 (varonleft = false,
  					  is_pseudo_constant_clause(linitial(expr->args))));
+ 				if (ok)
+ 				{
+ 					Relids	relids;
+ 					int	relid;
+ 					Oid	tmprelId;
+ 
+ 					relids = pull_varnos(clause);
+ 					relid = bms_singleton_member(relids);
+ 					tmprelId = root->simple_rte_array[relid]->relid;
+ 					bms_free(relids);
+ 
+ 					if (!OidIsValid(relId))
+ 					{
+ 						onerel = true;
+ 						relId = tmprelId;
+ 					}
+ 					else if (relId != tmprelId)
+ 						onerel = false;
+ 				}
  			}
  
  			if (ok)
*************** clauselist_selectivity(PlannerInfo *root
*** 188,193 ****
--- 255,264 ----
  						addRangeClause(&rqlist, clause,
  									   varonleft, false, s2);
  						break;
+ 					case F_EQSEL:
+ 						addXCClause(&xclist, clause,
+ 									   varonleft, s2);
+ 						break;
  					default:
  						/* Just merge the selectivity in generically */
  						s1 = s1 * s2;
*************** clauselist_selectivity(PlannerInfo *root
*** 202,207 ****
--- 273,299 ----
  	}
  
  	/*
+ 	 * Scan xclist and rqlist recursively and filter out
+ 	 * all possible cross-column selectivities.
+ 	 */
+ 	if (onerel)
+ 		crosscolumn_selectivity(relId, &xclist, &rqlist, &s1);
+ 
+ 	/*
+ 	 * Free the cross-column clauses
+ 	 */
+ 	while (xclist != NULL)
+ 	{
+ 		CrossColumnClause *xcnext;
+ 
+ 		s1 = s1 * xclist->sel;
+ 
+ 		xcnext = xclist->next;
+ 		pfree(xclist);
+ 		xclist = xcnext;
+ 	}
+ 
+ 	/*
  	 * Now scan the rangequery pair list.
  	 */
  	while (rqlist != NULL)
*************** clauselist_selectivity(PlannerInfo *root
*** 279,284 ****
--- 371,392 ----
  	return s1;
  }
  
+ static AttrNumber
+ var_get_attno(Node *clause)
+ {
+ 	Var	   *var;
+ 
+ 	if (IsA(clause, Var))
+ 	{
+ 		var = (Var *)clause;
+ //		elog(NOTICE, "var_get_attno varattno %d", var->varattno);
+ 		return var->varattno;
+ 	}
+ 
+ //	elog(NOTICE, "var_get_attno default 0");
+ 	return 0;
+ }
+ 
  /*
   * addRangeClause --- add a new range clause for clauselist_selectivity
   *
*************** addRangeClause(RangeQueryClause **rqlist
*** 358,363 ****
--- 466,473 ----
  	/* No matching var found, so make a new clause-pair data structure */
  	rqelem = (RangeQueryClause *) palloc(sizeof(RangeQueryClause));
  	rqelem->var = var;
+ 	rqelem->varattno = var_get_attno(var);
+ 
  	if (is_lobound)
  	{
  		rqelem->have_lobound = true;
*************** addRangeClause(RangeQueryClause **rqlist
*** 375,380 ****
--- 485,522 ----
  }
  
  /*
+  * addXCClause - add a new clause to the list of clauses for cross-column stats inspection
+  *
+  */
+ static void
+ addXCClause(CrossColumnClause **xclist, Node *clause,
+ 					bool varonleft, Selectivity s)
+ {
+ 	CrossColumnClause *xcelem;
+ 	Node	   *var;
+ 	Node	   *expr;
+ 
+ 	if (varonleft)
+ 	{
+ 		var = get_leftop((Expr *) clause);
+ 		expr = get_rightop((Expr *) clause);
+ 	}
+ 	else
+ 	{
+ 		var = get_rightop((Expr *) clause);
+ 		expr = get_leftop((Expr *) clause);
+ 	}
+ 
+ 	xcelem = (CrossColumnClause *) palloc(sizeof(CrossColumnClause));
+ 	xcelem->var = var;
+ 	xcelem->varattno = var_get_attno(var);
+ 	xcelem->expr = expr;
+ 	xcelem->sel = s;
+ 	xcelem->next = *xclist;
+ 	*xclist = xcelem;
+ }
+ 
+ /*
   * bms_is_subset_singleton
   *
   * Same result as bms_is_subset(s, bms_make_singleton(x)),
*************** clause_selectivity(PlannerInfo *root,
*** 499,504 ****
--- 641,648 ----
  	{
  		rinfo = (RestrictInfo *) clause;
  
+ //		elog(NOTICE, "RestrictInfo, %s", nodeToString(rinfo->clause));
+ 
  		/*
  		 * If the clause is marked pseudoconstant, then it will be used as a
  		 * gating qual and should not affect selectivity estimates; hence
*************** clause_selectivity(PlannerInfo *root,
*** 779,781 ****
--- 923,1211 ----
  
  	return s1;
  }
+ 
+ static bool
+ has_xcol_selectivity(Oid relId, int natts, AttrNumber *attnums, Selectivity *result_sel)
+ {
+ 	Relation	rel;
+ 	Datum		*datums = (Datum *)palloc(natts * sizeof(Datum));
+ 	ArrayType	*arr_attnums;
+ 	int		i;
+ 	int16		typlen;
+ 	bool		typbyval;
+ 	char		typalign;
+ 	ScanKeyData	scanKey[2];
+ 	SysScanDesc	scan;
+ 	HeapTuple	tuple;
+ 	bool		result;
+ 	Selectivity	sel = 1e-5; /* fixed selectivity for now */
+ 
+ 	for (i = 0; i < natts; i++)
+ 		datums[i] = Int16GetDatum(attnums[i]);
+ 
+ 	get_typlenbyvalalign(INT2OID, &typlen, &typbyval, &typalign);
+ 	arr_attnums = construct_array(datums, natts,
+ 							INT2OID, typlen, typbyval, typalign);
+ 
+ 	rel = heap_open(Statistic2RelationId, AccessShareLock);
+ 
+ 	ScanKeyInit(&scanKey[0],
+ 					Anum_pg_statistic2_sta2relid,
+ 					BTEqualStrategyNumber, F_OIDEQ,
+ 					ObjectIdGetDatum(relId));
+ 	ScanKeyInit(&scanKey[1],
+ 					Anum_pg_statistic2_sta2attnums,
+ 					BTEqualStrategyNumber, F_ARRAY_EQ,
+ 					PointerGetDatum(arr_attnums));
+ 
+ 	scan = systable_beginscan(rel, Statistic2RelidAttnumsInhIndexId, true,
+ 								SnapshotNow, 2, scanKey);
+ 
+ 	tuple = systable_getnext(scan);
+ 
+ 	result = HeapTupleIsValid(tuple);
+ 
+ 	systable_endscan(scan);
+ 
+ 	heap_close(rel, NoLock);
+ 
+ 	pfree(datums);
+ 
+ 	if (result)
+ 		*result_sel = sel;
+ 
+ 	return result;
+ }
+ 
+ typedef struct {
+ 	CrossColumnClause	*xc;
+ 	RangeQueryClause	*rq;
+ } reclist;
+ 
+ typedef struct {
+ 	int	len;
+ 	reclist	*rclist;
+ 	AttrNumber *attnums;
+ } reclist2;
+ 
+ 
+ /* add rclist to our list, so the ordered attnums arrays are unique */
+ static void
+ add_reclist(int len, reclist *rclist, List **results)
+ {
+ 	ListCell   *lc;
+ 	int		i, j;
+ 	reclist2	*rclist2;
+ 	AttrNumber	*attnums = (AttrNumber *) palloc(len * sizeof(AttrNumber));
+ 
+ 	/* collect the ordered varattnos from the Vars */
+ 	for (i = 0; i < len; i++)
+ 	{
+ 		if (rclist[i].xc)
+ 			attnums[i] = rclist[i].xc->varattno;
+ 		else
+ 			attnums[i] = rclist[i].rq->varattno;
+ 	}
+ 	for (i = 0; i < len - 1; i++)
+ 		for (j = i + 1; j < len; j++)
+ 		{
+ 			AttrNumber tmp = attnums[i];
+ 			attnums[i] = attnums[j];
+ 			attnums[j] = tmp;
+ 		}
+ 
+ 	/* match this ordered attnum list against the current list of attnum arrays */
+ 	foreach(lc, *results)
+ 	{
+ 		reclist2   *rc2 = (reclist2 *) lfirst(lc);
+ 
+ 		if (len != rc2->len)
+ 			continue;
+ 
+ 		for (i = 0; i < len; i++)
+ 			if (attnums[i] != rc2->attnums[i])
+ 				break;
+ 		if (i < len)
+ 			continue;
+ 
+ 		/* found */
+ 		return;
+ 	}
+ 
+ 	/* not found, add it to the list */
+ 	rclist2 = (reclist2 *) palloc(sizeof(reclist2));
+ 	rclist2->len = len;
+ 	rclist2->rclist = (reclist *) palloc(len * sizeof(reclist));
+ 	for (i = 0; i < len; i++)
+ 	{
+ 		rclist2->rclist[i].xc = rclist[i].xc;
+ 		rclist2->rclist[i].rq = rclist[i].rq;
+ 	}
+ 	rclist2->attnums = attnums;
+ 
+ 	*results = lappend(*results, rclist2);
+ }
+ 
+ static int
+ compare_reclist2(reclist2 *a, reclist2 *b)
+ {
+ 	int	i;
+ 
+ 	if (a->len < b->len)
+ 		return -1;
+ 	else if (a->len > b->len)
+ 		return 1;
+ 
+ 	for (i = 0; i < a->len; i++)
+ 	{
+ 		if (a->attnums[i] < b->attnums[i])
+ 			return -1;
+ 		else if (a->attnums[i] > b->attnums[i])
+ 			return 1;
+ 	}
+ 
+ 	return 0;
+ }
+ 
+ static bool
+ add_reclist2(int *len, reclist2 **p_reclist2, reclist2 *rclist2)
+ {
+ 	int	curr_len = *len;
+ 	int	i, j;
+ 
+ 	if (curr_len == 0)
+ 	{
+ 		p_reclist2[i] = rclist2;
+ 		curr_len++;
+ 		*len = curr_len;
+ 		return true;
+ 	}
+ 
+ 	for (i = 0; i < curr_len; i++)
+ 	{
+ 		if (compare_reclist2(rclist2, p_reclist2[i]) > 0)
+ 		{
+ 			for (j = curr_len; j > i; j--)
+ 				p_reclist2[j] = p_reclist2[j - 1];
+ 			p_reclist2[i] = rclist2;
+ 			curr_len++;
+ 			*len = curr_len;
+ 			return true;
+ 		}
+ 	}
+ 
+ 	return false;
+ }
+ 
+ static void
+ collect_xcol_lists(int curr_depth, CrossColumnClause *xclist, RangeQueryClause *rqlist, reclist *rclist, List **results)
+ {
+ 	CrossColumnClause	*xc_tmp;
+ 	RangeQueryClause	*rq_tmp;
+ 
+ 	for (xc_tmp = xclist; xc_tmp; xc_tmp = xc_tmp->next)
+ 	{
+ 		if (xc_tmp->varattno == 0)
+ 			continue;
+ 
+ 		rclist[curr_depth].xc = xc_tmp;
+ 		collect_xcol_lists(curr_depth + 1, xc_tmp->next, rqlist, rclist, results);
+ 		add_reclist(curr_depth + 1, rclist, results);
+ 		rclist[curr_depth].xc = NULL;
+ 	}
+ 
+ 	for (rq_tmp = rqlist; rq_tmp; rq_tmp = rq_tmp->next)
+ 	{
+ 		if (rq_tmp->varattno == 0)
+ 			continue;
+ 
+ 		rclist[curr_depth].rq = rq_tmp;
+ 		collect_xcol_lists(curr_depth + 1, (xclist ? xclist->next : xclist), rq_tmp->next, rclist, results);
+ 		add_reclist(curr_depth + 1, rclist, results);
+ 		rclist[curr_depth].rq = NULL;
+ 	}
+ }
+ 
+ static bool
+ crosscolumn_selectivity(Oid relId, CrossColumnClause **xclist, RangeQueryClause **rqlist, Selectivity *result_sel)
+ {
+ 	CrossColumnClause *xc;
+ 	RangeQueryClause *rq;
+ 	List	   *resultlist = NIL;
+ 	ListCell   *lc;
+ 	reclist	   *rclist;
+ 	reclist2   **p_rclist2;
+ 	int		max_len, i;
+ 	Selectivity	sel = 1.0;
+ 	bool		found_xc_sel = false;
+ 
+ 	max_len = 0;
+ 	for (rq = *rqlist; rq; max_len++, rq = rq->next)
+ 		;
+ 	for (xc = *xclist; xc; max_len++, xc = xc->next)
+ 		;
+ 
+ //	elog(NOTICE, "crosscolumn_selectivity max length of array %d", max_len);
+ 
+ 	rclist = (reclist *) palloc(max_len * sizeof(reclist));
+ 	for (i = 0; i < max_len; i++)
+ 	{
+ 		rclist[i].xc = NULL;
+ 		rclist[i].rq = NULL;
+ 	}
+ 
+ 	collect_xcol_lists(0, *xclist, *rqlist, rclist, &resultlist);
+ 
+ 	pfree(rclist);
+ 
+ 	max_len = list_length(resultlist);
+ //	elog(NOTICE, "crosscolumn_selectivity list length of arrays %d", max_len);
+ 	p_rclist2 = (reclist2 **) palloc(max_len * sizeof(reclist2 *));
+ 
+ 	max_len = 0;
+ 	foreach (lc, resultlist)
+ 	{
+ 		reclist2 *rclist2 = (reclist2 *) lfirst(lc);
+ 
+ 		if (!add_reclist2(&max_len, p_rclist2, rclist2))
+ 		{
+ 			pfree(rclist2->rclist);
+ 			pfree(rclist2->attnums);
+ 			pfree(rclist2);
+ 		}
+ 	}
+ //	elog(NOTICE, "crosscolumn_selectivity length of ordered/unique array of previous list %d", max_len);
+ 
+ 	list_free(resultlist);
+ 
+ 	for (i = 0; i < max_len; i++)
+ 	{
+ 		if (p_rclist2[i] == NULL)
+ 			continue;
+ 
+ 		if (has_xcol_selectivity(relId, p_rclist2[i]->len, p_rclist2[i]->attnums, &sel))
+ 		{
+ 			int	j;
+ 
+ 			/* remove the xclist and rqlist members found in p_rclist2[i] */
+ 			for (j = 0; j < p_rclist2[i]->len; j++)
+ 			{
+ 				/* TODO ... */
+ 			}
+ 
+ 			/* also, remove later elements in p_rclist2 that has any of the removed elements */
+ 			/* TODO ... */
+ 
+ //			elog(NOTICE, "crosscolumn_selectivity found xc selectivity %lf", sel);
+ 			found_xc_sel = true;
+ 			*result_sel *= sel;
+ 		}
+ 
+ 		pfree(p_rclist2[i]->rclist);
+ 		pfree(p_rclist2[i]->attnums);
+ 		pfree(p_rclist2[i]);
+ 	}
+ 	pfree(p_rclist2);
+ 
+ 	return found_xc_sel;
+ }
diff -dcrpN postgresql.orig/src/backend/parser/gram.y postgresql/src/backend/parser/gram.y
*** postgresql.orig/src/backend/parser/gram.y	2011-04-26 09:54:04.055359065 +0200
--- postgresql/src/backend/parser/gram.y	2011-04-28 14:21:14.739176296 +0200
*************** static void SplitColQualList(List *qualL
*** 199,209 ****
  		CreateSchemaStmt CreateSeqStmt CreateStmt CreateTableSpaceStmt
  		CreateFdwStmt CreateForeignServerStmt CreateForeignTableStmt
  		CreateAssertStmt CreateTrigStmt
! 		CreateUserStmt CreateUserMappingStmt CreateRoleStmt
! 		CreatedbStmt DeclareCursorStmt DefineStmt DeleteStmt DiscardStmt DoStmt
  		DropGroupStmt DropOpClassStmt DropOpFamilyStmt DropPLangStmt DropStmt
! 		DropAssertStmt DropTrigStmt DropRuleStmt DropCastStmt DropRoleStmt
! 		DropUserStmt DropdbStmt DropTableSpaceStmt DropFdwStmt
  		DropForeignServerStmt DropUserMappingStmt ExplainStmt FetchStmt
  		GrantStmt GrantRoleStmt IndexStmt InsertStmt ListenStmt LoadStmt
  		LockStmt NotifyStmt ExplainableStmt PreparableStmt
--- 199,210 ----
  		CreateSchemaStmt CreateSeqStmt CreateStmt CreateTableSpaceStmt
  		CreateFdwStmt CreateForeignServerStmt CreateForeignTableStmt
  		CreateAssertStmt CreateTrigStmt
! 		CreateUserStmt CreateUserMappingStmt CreateRoleStmt CreatedbStmt
! 		CreateCCStmt CreateESStmt
! 		DeclareCursorStmt DefineStmt DeleteStmt DiscardStmt DoStmt
  		DropGroupStmt DropOpClassStmt DropOpFamilyStmt DropPLangStmt DropStmt
! 		DropAssertStmt DropTrigStmt DropRuleStmt DropCastStmt DropCCStmt DropESStmt
! 		DropRoleStmt DropUserStmt DropdbStmt DropTableSpaceStmt DropFdwStmt
  		DropForeignServerStmt DropUserMappingStmt ExplainStmt FetchStmt
  		GrantStmt GrantRoleStmt IndexStmt InsertStmt ListenStmt LoadStmt
  		LockStmt NotifyStmt ExplainableStmt PreparableStmt
*************** static void SplitColQualList(List *qualL
*** 315,320 ****
--- 316,323 ----
  %type <list>	opt_fdw_options fdw_options
  %type <defelt>	fdw_option
  
+ %type <list>	cc_column_list
+ 
  %type <range>	OptTempTableName
  %type <into>	into_clause create_as_target
  
*************** static void SplitColQualList(List *qualL
*** 499,505 ****
  	DICTIONARY DISABLE_P DISCARD DISTINCT DO DOCUMENT_P DOMAIN_P DOUBLE_P DROP
  
  	EACH ELSE ENABLE_P ENCODING ENCRYPTED END_P ENUM_P ESCAPE EXCEPT
! 	EXCLUDE EXCLUDING EXCLUSIVE EXECUTE EXISTS EXPLAIN
  	EXTENSION EXTERNAL EXTRACT
  
  	FALSE_P FAMILY FETCH FIRST_P FLOAT_P FOLLOWING FOR FORCE FOREIGN FORWARD
--- 502,508 ----
  	DICTIONARY DISABLE_P DISCARD DISTINCT DO DOCUMENT_P DOMAIN_P DOUBLE_P DROP
  
  	EACH ELSE ENABLE_P ENCODING ENCRYPTED END_P ENUM_P ESCAPE EXCEPT
! 	EXCLUDE EXCLUDING EXCLUSIVE EXECUTE EXISTS EXPLAIN EXPRESSION
  	EXTENSION EXTERNAL EXTRACT
  
  	FALSE_P FAMILY FETCH FIRST_P FLOAT_P FOLLOWING FOR FORCE FOREIGN FORWARD
*************** stmt :
*** 700,707 ****
--- 703,712 ----
  			| CreateAsStmt
  			| CreateAssertStmt
  			| CreateCastStmt
+ 			| CreateCCStmt
  			| CreateConversionStmt
  			| CreateDomainStmt
+ 			| CreateESStmt
  			| CreateExtensionStmt
  			| CreateFdwStmt
  			| CreateForeignServerStmt
*************** stmt :
*** 729,734 ****
--- 734,741 ----
  			| DoStmt
  			| DropAssertStmt
  			| DropCastStmt
+ 			| DropCCStmt
+ 			| DropESStmt
  			| DropFdwStmt
  			| DropForeignServerStmt
  			| DropGroupStmt
*************** schema_stmt:
*** 1190,1195 ****
--- 1197,1267 ----
  
  /*****************************************************************************
   *
+  * Create cross column / expression statistics
+  *
+  *****************************************************************************/
+ 
+ CreateCCStmt:
+ 			CREATE CROSS COLUMN STATISTICS ON qualified_name '(' cc_column_list ')'
+ 				{
+ 					ExtraStatStmt *n = makeNode(ExtraStatStmt);
+ 					n->create = TRUE;
+ 					n->relation = $6;
+ 					n->columns = $8;
+ 					n->expr = NULL;
+ 					$$ = (Node *)n;
+ 				}
+ 		;
+ 
+ DropCCStmt:
+ 			DROP CROSS COLUMN STATISTICS ON qualified_name '(' cc_column_list ')'
+ 				{
+ 					ExtraStatStmt *n = makeNode(ExtraStatStmt);
+ 					n->create = FALSE;
+ 					n->relation = $6;
+ 					n->columns = $8;
+ 					n->expr = NULL;
+ 					$$ = (Node *)n;
+ 				}
+ 		;
+ 
+ cc_column_list:
+ 			columnref
+ 				{
+ 					$$ = list_make1($1);
+ 				}
+ 			| cc_column_list ',' columnref
+ 				{
+ 					$$ = lappend($1, $3);
+ 				}
+ 		;
+ 
+ CreateESStmt:
+ 			CREATE EXPRESSION STATISTICS ON qualified_name '(' a_expr ')'
+ 				{
+ 					ExtraStatStmt *n = makeNode(ExtraStatStmt);
+ 					n->create = TRUE;
+ 					n->relation = $5;
+ 					n->columns = NIL;
+ 					n->expr = $7;
+ 					$$ = (Node *)n;
+ 				}
+ 		;
+ 
+ DropESStmt:
+ 			DROP EXPRESSION STATISTICS ON qualified_name '(' a_expr ')'
+ 				{
+ 					ExtraStatStmt *n = makeNode(ExtraStatStmt);
+ 					n->create = FALSE;
+ 					n->relation = $5;
+ 					n->columns = NIL;
+ 					n->expr = $7;
+ 					$$ = (Node *)n;
+ 				}
+ 		;
+ 
+ /*****************************************************************************
+  *
   * Set PG internal variable
   *	  SET name TO 'var_value'
   * Include SQL92 syntax (thomas 1997-10-22):
*************** unreserved_keyword:
*** 11898,11903 ****
--- 11970,11976 ----
  			| EXCLUSIVE
  			| EXECUTE
  			| EXPLAIN
+ 			| EXPRESSION
  			| EXTENSION
  			| EXTERNAL
  			| FAMILY
diff -dcrpN postgresql.orig/src/backend/parser/parse_utilcmd.c postgresql/src/backend/parser/parse_utilcmd.c
*** postgresql.orig/src/backend/parser/parse_utilcmd.c	2011-04-26 09:54:04.062358585 +0200
--- postgresql/src/backend/parser/parse_utilcmd.c	2011-04-28 14:21:14.745175892 +0200
*************** setSchemaName(char *context_schema, char
*** 2700,2702 ****
--- 2700,2878 ----
  						"different from the one being created (%s)",
  						*stmt_schema_name, context_schema)));
  }
+ 
+ /*
+  *
+  */
+ bool
+ set_location_unknown_walker(Node *node, void *dummy)
+ {
+ 	if (node == NULL)
+ 		return false;
+ 
+ 	switch (node->type)
+ 	{
+ 		case T_TypeName:
+ 			{
+ 				TypeName *n = (TypeName *) node;
+ 				n->location = -1;
+ 			}
+ 			break;
+ 		case T_ColumnRef:
+ 			{
+ 				ColumnRef *n = (ColumnRef *)node;
+ 				n->location = -1;
+ 			}
+ 			break;
+ 		case T_ParamRef:
+ 			{
+ 				ParamRef *n = (ParamRef *) node;
+ 				n->location = -1;
+ 			}
+ 			break;
+ 		case T_A_Expr:
+ 			{
+ 				A_Expr *n = (A_Expr *) node;
+ 				n->location = -1;
+ 			}
+ 			break;
+ 		case T_A_Const:
+ 			{
+ 				A_Const *n = (A_Const *) node;
+ 				n->location = -1;
+ 			}
+ 			break;
+ 		case T_TypeCast:
+ 			{
+ 				TypeCast *n = (TypeCast *) node;
+ 				n->location = -1;
+ 			}
+ 			break;
+ 		case T_FuncCall:
+ 			{
+ 				FuncCall *n = (FuncCall *) node;
+ 				n->location = -1;
+ 			}
+ 			break;
+ 		case T_A_ArrayExpr:
+ 			{
+ 				A_ArrayExpr *n = (A_ArrayExpr *) node;
+ 				n->location = -1;
+ 			}
+ 			break;
+ 		case T_Var:
+ 			{
+ 				Var *n = (Var *) node;
+ 				n->location = -1;
+ 			}
+ 			break;
+ 		case T_Const:
+ 			{
+ 				Const *n = (Const *) node;
+ 				n->location = -1;
+ 			}
+ 			break;
+ 		case T_FuncExpr:
+ 			{
+ 				FuncExpr *n = (FuncExpr *) node;
+ 				n->location = -1;
+ 			}
+ 			break;
+ 		case T_OpExpr:
+ 			{
+ 				OpExpr *n = (OpExpr *) node;
+ 				n->location = -1;
+ 			}
+ 			break;
+ 		case T_DistinctExpr:
+ 			{
+ 				DistinctExpr *n = (DistinctExpr *) node;
+ 				n->location = -1;
+ 			}
+ 			break;
+ 		case T_ScalarArrayOpExpr:
+ 			{
+ 				ScalarArrayOpExpr *n = (ScalarArrayOpExpr *) node;
+ 				n->location = -1;
+ 			}
+ 			break;
+ 		case T_BoolExpr:
+ 			{
+ 				BoolExpr *n = (BoolExpr *) node;
+ 				n->location = -1;
+ 			}
+ 			break;
+ 		case T_CaseExpr:
+ 			{
+ 				CaseExpr *n = (CaseExpr *) node;
+ 				n->location = -1;
+ 			}
+ 			break;
+ 		case T_CaseWhen:
+ 			{
+ 				CaseWhen *n = (CaseWhen *) node;
+ 				n->location = -1;
+ 			}
+ 			break;
+ 		case T_ArrayExpr:
+ 			{
+ 				ArrayExpr *n = (ArrayExpr *) node;
+ 				n->location = -1;
+ 			}
+ 			break;
+ 		case T_CoalesceExpr:
+ 			{
+ 				CoalesceExpr *n = (CoalesceExpr *) node;
+ 				n->location = -1;
+ 			}
+ 			break;
+ 		case T_CoerceToDomain:
+ 			{
+ 				CoerceToDomain *n = (CoerceToDomain *) node;
+ 				n->location = -1;
+ 			}
+ 			break;
+ 		default:
+ 			break;
+ 	}
+ 
+ 	return expression_tree_walker(node, set_location_unknown_walker, NULL);
+ }
+ 
+ /*
+  * transformExtraStatistics
+  *		Transform the column list or the expression into a form
+  *		usable by the executor.
+  */
+ ExtraStatStmt *
+ transformExtraStatistics(ExtraStatStmt *stmt, const char *queryString)
+ {
+ 	ParseState *pstate;
+ 	RangeTblEntry *rte;
+ 	ExtraStatStmt *newstmt;
+ 	List	   *columns = NIL;
+ 	ListCell   *cell;
+ 
+ 	pstate = make_parsestate(NULL);
+ 	pstate->p_sourcetext = queryString;
+ 
+ 	rte = addRangeTableEntry(pstate, stmt->relation, NULL, false, true);
+ 	addRTEtoQuery(pstate, rte, true, true, true);
+ 
+ 	newstmt = makeNode(ExtraStatStmt);
+ 	newstmt->create = stmt->create;
+ 	newstmt->relation = copyObject(stmt->relation);
+ 
+ 	foreach(cell, stmt->columns)
+ 	{
+ 		Node *col = lfirst(cell);
+ 
+ 		columns = lappend(columns, transformExpr(pstate, col));
+ 	}
+ 
+ 	newstmt->columns = columns;
+ 	newstmt->expr = transformExpr(pstate, stmt->expr);
+ 	query_or_expression_tree_walker(newstmt->expr, set_location_unknown_walker, NULL, 0);
+ 
+ 	return newstmt;
+ }
diff -dcrpN postgresql.orig/src/backend/tcop/utility.c postgresql/src/backend/tcop/utility.c
*** postgresql.orig/src/backend/tcop/utility.c	2011-04-26 09:54:04.075357697 +0200
--- postgresql/src/backend/tcop/utility.c	2011-04-28 14:21:14.748175689 +0200
*************** check_xact_readonly(Node *parsetree)
*** 229,234 ****
--- 229,235 ----
  		case T_AlterTableSpaceOptionsStmt:
  		case T_CreateForeignTableStmt:
  		case T_SecLabelStmt:
+ 		case T_ExtraStatStmt:
  			PreventCommandIfReadOnly(CreateCommandTag(parsetree));
  			break;
  		default:
*************** standard_ProcessUtility(Node *parsetree,
*** 573,578 ****
--- 574,587 ----
  			}
  			break;
  
+ 		case T_ExtraStatStmt:
+ 			{
+ 				ExtraStatStmt *newstmt = transformExtraStatistics((ExtraStatStmt *)parsetree, queryString);
+ 
+ 				ExtraStatistics(newstmt);
+ 			}
+ 			break;
+ 
  		case T_CreateTableSpaceStmt:
  			PreventTransactionChain(isTopLevel, "CREATE TABLESPACE");
  			CreateTableSpace((CreateTableSpaceStmt *) parsetree);
*************** CreateCommandTag(Node *parsetree)
*** 1734,1739 ****
--- 1743,1771 ----
  			tag = "CREATE FOREIGN TABLE";
  			break;
  
+ 		case T_ExtraStatStmt:
+ 			{
+ 				ExtraStatStmt *stmt = (ExtraStatStmt *)parsetree;
+ 
+ 				if (list_length(stmt->columns) > 0)
+ 				{
+ 					if (stmt->create)
+ 						tag = "CREATE CROSS COLUMN STATISTICS";
+ 					else
+ 						tag = "DROP CROSS COLUMN STATISTICS";
+ 				}
+ 				else if (stmt->expr != NULL)
+ 				{
+ 					if (stmt->create)
+ 						tag = "CREATE EXPRESSION STATISTICS";
+ 					else
+ 						tag = "DROP EXPRESSION STATISTICS";
+ 				}
+ 				else
+ 					tag = "???";
+ 			}
+ 			break;
+ 
  		case T_DropStmt:
  			switch (((DropStmt *) parsetree)->removeType)
  			{
diff -dcrpN postgresql.orig/src/backend/tsearch/ts_selfuncs.c postgresql/src/backend/tsearch/ts_selfuncs.c
*** postgresql.orig/src/backend/tsearch/ts_selfuncs.c	2011-04-11 15:36:27.150812982 +0200
--- postgresql/src/backend/tsearch/ts_selfuncs.c	2011-04-28 14:21:14.749175621 +0200
*************** tsquerysel(VariableStatData *vardata, Da
*** 169,175 ****
  		stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple);
  
  		/* MCELEM will be an array of TEXT elements for a tsvector column */
! 		if (get_attstatsslot(vardata->statsTuple,
  							 TEXTOID, -1,
  							 STATISTIC_KIND_MCELEM, InvalidOid,
  							 NULL,
--- 169,175 ----
  		stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple);
  
  		/* MCELEM will be an array of TEXT elements for a tsvector column */
! 		if (get_attstatsslot(vardata->statsTuple, STAT_VARIABLE,
  							 TEXTOID, -1,
  							 STATISTIC_KIND_MCELEM, InvalidOid,
  							 NULL,
diff -dcrpN postgresql.orig/src/backend/utils/adt/selfuncs.c postgresql/src/backend/utils/adt/selfuncs.c
*** postgresql.orig/src/backend/utils/adt/selfuncs.c	2011-04-26 09:54:04.094356395 +0200
--- postgresql/src/backend/utils/adt/selfuncs.c	2011-04-28 15:53:46.195302017 +0200
***************
*** 94,102 ****
--- 94,104 ----
  #include "access/gin.h"
  #include "access/sysattr.h"
  #include "catalog/index.h"
+ #include "catalog/indexing.h"
  #include "catalog/pg_collation.h"
  #include "catalog/pg_opfamily.h"
  #include "catalog/pg_statistic.h"
+ #include "catalog/pg_statistic3.h"
  #include "catalog/pg_type.h"
  #include "executor/executor.h"
  #include "mb/pg_wchar.h"
***************
*** 111,116 ****
--- 113,119 ----
  #include "optimizer/restrictinfo.h"
  #include "optimizer/var.h"
  #include "parser/parse_coerce.h"
+ #include "parser/parse_utilcmd.h"
  #include "parser/parsetree.h"
  #include "utils/builtins.h"
  #include "utils/bytea.h"
*************** var_eq_const(VariableStatData *vardata, 
*** 275,281 ****
  		 * don't like this, maybe you shouldn't be using eqsel for your
  		 * operator...)
  		 */
! 		if (get_attstatsslot(vardata->statsTuple,
  							 vardata->atttype, vardata->atttypmod,
  							 STATISTIC_KIND_MCV, InvalidOid,
  							 NULL,
--- 278,284 ----
  		 * don't like this, maybe you shouldn't be using eqsel for your
  		 * operator...)
  		 */
! 		if (get_attstatsslot(vardata->statsTuple, vardata->stats_type,
  							 vardata->atttype, vardata->atttypmod,
  							 STATISTIC_KIND_MCV, InvalidOid,
  							 NULL,
*************** var_eq_non_const(VariableStatData *varda
*** 417,423 ****
  		 * Cross-check: selectivity should never be estimated as more than the
  		 * most common value's.
  		 */
! 		if (get_attstatsslot(vardata->statsTuple,
  							 vardata->atttype, vardata->atttypmod,
  							 STATISTIC_KIND_MCV, InvalidOid,
  							 NULL,
--- 420,426 ----
  		 * Cross-check: selectivity should never be estimated as more than the
  		 * most common value's.
  		 */
! 		if (get_attstatsslot(vardata->statsTuple, vardata->stats_type,
  							 vardata->atttype, vardata->atttypmod,
  							 STATISTIC_KIND_MCV, InvalidOid,
  							 NULL,
*************** mcv_selectivity(VariableStatData *vardat
*** 588,594 ****
  	sumcommon = 0.0;
  
  	if (HeapTupleIsValid(vardata->statsTuple) &&
! 		get_attstatsslot(vardata->statsTuple,
  						 vardata->atttype, vardata->atttypmod,
  						 STATISTIC_KIND_MCV, InvalidOid,
  						 NULL,
--- 591,597 ----
  	sumcommon = 0.0;
  
  	if (HeapTupleIsValid(vardata->statsTuple) &&
! 		get_attstatsslot(vardata->statsTuple, vardata->stats_type,
  						 vardata->atttype, vardata->atttypmod,
  						 STATISTIC_KIND_MCV, InvalidOid,
  						 NULL,
*************** histogram_selectivity(VariableStatData *
*** 664,670 ****
  	Assert(min_hist_size > 2 * n_skip);
  
  	if (HeapTupleIsValid(vardata->statsTuple) &&
! 		get_attstatsslot(vardata->statsTuple,
  						 vardata->atttype, vardata->atttypmod,
  						 STATISTIC_KIND_HISTOGRAM, InvalidOid,
  						 NULL,
--- 667,673 ----
  	Assert(min_hist_size > 2 * n_skip);
  
  	if (HeapTupleIsValid(vardata->statsTuple) &&
! 		get_attstatsslot(vardata->statsTuple, vardata->stats_type,
  						 vardata->atttype, vardata->atttypmod,
  						 STATISTIC_KIND_HISTOGRAM, InvalidOid,
  						 NULL,
*************** ineq_histogram_selectivity(PlannerInfo *
*** 741,747 ****
  	 * the reverse way if isgt is TRUE.
  	 */
  	if (HeapTupleIsValid(vardata->statsTuple) &&
! 		get_attstatsslot(vardata->statsTuple,
  						 vardata->atttype, vardata->atttypmod,
  						 STATISTIC_KIND_HISTOGRAM, InvalidOid,
  						 &hist_op,
--- 744,750 ----
  	 * the reverse way if isgt is TRUE.
  	 */
  	if (HeapTupleIsValid(vardata->statsTuple) &&
! 		get_attstatsslot(vardata->statsTuple, vardata->stats_type,
  						 vardata->atttype, vardata->atttypmod,
  						 STATISTIC_KIND_HISTOGRAM, InvalidOid,
  						 &hist_op,
*************** booltestsel(PlannerInfo *root, BoolTestT
*** 1434,1440 ****
  		stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
  		freq_null = stats->stanullfrac;
  
! 		if (get_attstatsslot(vardata.statsTuple,
  							 vardata.atttype, vardata.atttypmod,
  							 STATISTIC_KIND_MCV, InvalidOid,
  							 NULL,
--- 1437,1443 ----
  		stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
  		freq_null = stats->stanullfrac;
  
! 		if (get_attstatsslot(vardata.statsTuple, vardata.stats_type,
  							 vardata.atttype, vardata.atttypmod,
  							 STATISTIC_KIND_MCV, InvalidOid,
  							 NULL,
*************** eqjoinsel_inner(Oid operator,
*** 2074,2080 ****
  	if (HeapTupleIsValid(vardata1->statsTuple))
  	{
  		stats1 = (Form_pg_statistic) GETSTRUCT(vardata1->statsTuple);
! 		have_mcvs1 = get_attstatsslot(vardata1->statsTuple,
  									  vardata1->atttype,
  									  vardata1->atttypmod,
  									  STATISTIC_KIND_MCV,
--- 2077,2083 ----
  	if (HeapTupleIsValid(vardata1->statsTuple))
  	{
  		stats1 = (Form_pg_statistic) GETSTRUCT(vardata1->statsTuple);
! 		have_mcvs1 = get_attstatsslot(vardata1->statsTuple, vardata1->stats_type,
  									  vardata1->atttype,
  									  vardata1->atttypmod,
  									  STATISTIC_KIND_MCV,
*************** eqjoinsel_inner(Oid operator,
*** 2087,2093 ****
  	if (HeapTupleIsValid(vardata2->statsTuple))
  	{
  		stats2 = (Form_pg_statistic) GETSTRUCT(vardata2->statsTuple);
! 		have_mcvs2 = get_attstatsslot(vardata2->statsTuple,
  									  vardata2->atttype,
  									  vardata2->atttypmod,
  									  STATISTIC_KIND_MCV,
--- 2090,2096 ----
  	if (HeapTupleIsValid(vardata2->statsTuple))
  	{
  		stats2 = (Form_pg_statistic) GETSTRUCT(vardata2->statsTuple);
! 		have_mcvs2 = get_attstatsslot(vardata2->statsTuple, vardata2->stats_type,
  									  vardata2->atttype,
  									  vardata2->atttypmod,
  									  STATISTIC_KIND_MCV,
*************** eqjoinsel_semi(Oid operator,
*** 2309,2315 ****
  	if (HeapTupleIsValid(vardata1->statsTuple))
  	{
  		stats1 = (Form_pg_statistic) GETSTRUCT(vardata1->statsTuple);
! 		have_mcvs1 = get_attstatsslot(vardata1->statsTuple,
  									  vardata1->atttype,
  									  vardata1->atttypmod,
  									  STATISTIC_KIND_MCV,
--- 2312,2318 ----
  	if (HeapTupleIsValid(vardata1->statsTuple))
  	{
  		stats1 = (Form_pg_statistic) GETSTRUCT(vardata1->statsTuple);
! 		have_mcvs1 = get_attstatsslot(vardata1->statsTuple, vardata1->stats_type,
  									  vardata1->atttype,
  									  vardata1->atttypmod,
  									  STATISTIC_KIND_MCV,
*************** eqjoinsel_semi(Oid operator,
*** 2321,2327 ****
  
  	if (HeapTupleIsValid(vardata2->statsTuple))
  	{
! 		have_mcvs2 = get_attstatsslot(vardata2->statsTuple,
  									  vardata2->atttype,
  									  vardata2->atttypmod,
  									  STATISTIC_KIND_MCV,
--- 2324,2330 ----
  
  	if (HeapTupleIsValid(vardata2->statsTuple))
  	{
! 		have_mcvs2 = get_attstatsslot(vardata2->statsTuple, vardata2->stats_type,
  									  vardata2->atttype,
  									  vardata2->atttypmod,
  									  STATISTIC_KIND_MCV,
*************** estimate_hash_bucketsize(PlannerInfo *ro
*** 3322,3328 ****
  
  	if (HeapTupleIsValid(vardata.statsTuple))
  	{
! 		if (get_attstatsslot(vardata.statsTuple,
  							 vardata.atttype, vardata.atttypmod,
  							 STATISTIC_KIND_MCV, InvalidOid,
  							 NULL,
--- 3325,3331 ----
  
  	if (HeapTupleIsValid(vardata.statsTuple))
  	{
! 		if (get_attstatsslot(vardata.statsTuple, vardata.stats_type,
  							 vardata.atttype, vardata.atttypmod,
  							 STATISTIC_KIND_MCV, InvalidOid,
  							 NULL,
*************** examine_variable(PlannerInfo *root, Node
*** 4103,4108 ****
--- 4106,4112 ----
  {
  	Node	   *basenode;
  	Relids		varnos;
+ 	int		onerelid = 0;
  	RelOptInfo *onerel;
  
  	/* Make sure we don't return dangling pointers in vardata */
*************** examine_variable(PlannerInfo *root, Node
*** 4147,4152 ****
--- 4151,4157 ----
  		}
  		else if (rte->rtekind == RTE_RELATION)
  		{
+ 			vardata->stats_type = STAT_VARIABLE;
  			vardata->statsTuple = SearchSysCache3(STATRELATTINH,
  												ObjectIdGetDatum(rte->relid),
  												Int16GetDatum(var->varattno),
*************** examine_variable(PlannerInfo *root, Node
*** 4185,4192 ****
  		case BMS_SINGLETON:
  			if (varRelid == 0 || bms_is_member(varRelid, varnos))
  			{
! 				onerel = find_base_rel(root,
! 					   (varRelid ? varRelid : bms_singleton_member(varnos)));
  				vardata->rel = onerel;
  				node = basenode;	/* strip any relabeling */
  			}
--- 4190,4197 ----
  		case BMS_SINGLETON:
  			if (varRelid == 0 || bms_is_member(varRelid, varnos))
  			{
! 				onerelid = (varRelid ? varRelid : bms_singleton_member(varnos));
! 				onerel = find_base_rel(root, onerelid);
  				vardata->rel = onerel;
  				node = basenode;	/* strip any relabeling */
  			}
*************** examine_variable(PlannerInfo *root, Node
*** 4220,4233 ****
  	{
  		/*
  		 * We have an expression in vars of a single relation.	Try to match
! 		 * it to expressional index columns, in hopes of finding some
! 		 * statistics.
  		 *
  		 * XXX it's conceivable that there are multiple matches with different
  		 * index opfamilies; if so, we need to pick one that matches the
  		 * operator we are estimating for.	FIXME later.
  		 */
  		ListCell   *ilist;
  
  		foreach(ilist, onerel->indexlist)
  		{
--- 4225,4301 ----
  	{
  		/*
  		 * We have an expression in vars of a single relation.	Try to match
! 		 * it to expression statistics first then to expressional index columns,
! 		 * in hopes of finding some statistics.
  		 *
  		 * XXX it's conceivable that there are multiple matches with different
  		 * index opfamilies; if so, we need to pick one that matches the
  		 * operator we are estimating for.	FIXME later.
  		 */
  		ListCell   *ilist;
+ 		Node	   *expr = copyObject(node);
+ 		char	   *exprbin;
+ 		Datum		exprbindatum;
+ 
+ #define USE_SYSCACHE_FOR_SEARCH	0
+ #if !USE_SYSCACHE_FOR_SEARCH
+ 		Relation	rel;
+ 		ScanKeyData	scanKey[2];  
+ 		SysScanDesc	scan;
+ 		HeapTuple	tuple;
+ #endif
+ 
+ 		query_or_expression_tree_walker(expr, set_location_unknown_walker, NULL, 0);
+ 		exprbin = nodeToString(expr);
+ 		exprbindatum = CStringGetTextDatum(exprbin);
+ 
+ #if USE_SYSCACHE_FOR_SEARCH
+ 		vardata->statsTuple = SearchSysCache3(STAT3RELEXPRINH,
+ 								ObjectIdGetDatum(root->simple_rte_array[onerelid]->relid),
+ 								exprbindatum,
+ 								BoolGetDatum(false));
+ 		if (HeapTupleIsValid(vardata->statsTuple))
+ 		{
+ 			vardata->stats_type = STAT_EXPRESSION;
+ 			vardata->freefunc = ReleaseSysCache;
+ 			return;
+ 		}
+ 
+ #else
+ 
+ 		rel = heap_open(Statistic3RelationId, RowShareLock);
+ 
+ 		ScanKeyInit(&scanKey[0],
+ 							Anum_pg_statistic3_sta3relid,
+ 							BTEqualStrategyNumber, F_OIDEQ,
+ 							ObjectIdGetDatum(root->simple_rte_array[onerelid]->relid));
+ 		ScanKeyInit(&scanKey[1],
+ 							Anum_pg_statistic3_sta3expr,
+ 							BTEqualStrategyNumber, F_TEXTEQ,
+ 							exprbindatum);
+ 
+ 		scan = systable_beginscan(rel, Statistic3RelidExprInhIndexId, true,
+ 							SnapshotNow, 2, scanKey);
+ 
+ 		tuple = systable_getnext(scan);
+ 		if (HeapTupleIsValid(tuple))
+ 		{
+ //			elog(NOTICE, "examine_variable expression found");
+ 			vardata->stats_type = STAT_EXPRESSION;
+ 			vardata->statsTuple = heap_copytuple(tuple);
+ 			vardata->freefunc = heap_freetuple;
+ 		}
+ 
+ 		systable_endscan(scan);
+ 
+ 		pfree(exprbin);
+ 		pfree(DatumGetPointer(exprbindatum));
+ 
+ 		relation_close(rel, RowShareLock);
+ 
+ 		if (vardata->statsTuple)
+ 			return;
+ #endif
  
  		foreach(ilist, onerel->indexlist)
  		{
*************** examine_variable(PlannerInfo *root, Node
*** 4286,4291 ****
--- 4354,4360 ----
  						}
  						else if (index->indpred == NIL)
  						{
+ 							vardata->stats_type = STAT_VARIABLE;
  							vardata->statsTuple =
  								SearchSysCache3(STATRELATTINH,
  										   ObjectIdGetDatum(index->indexoid),
*************** get_variable_numdistinct(VariableStatDat
*** 4327,4337 ****
  	 */
  	if (HeapTupleIsValid(vardata->statsTuple))
  	{
! 		/* Use the pg_statistic entry */
! 		Form_pg_statistic stats;
  
! 		stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple);
! 		stadistinct = stats->stadistinct;
  	}
  	else if (vardata->vartype == BOOLOID)
  	{
--- 4396,4425 ----
  	 */
  	if (HeapTupleIsValid(vardata->statsTuple))
  	{
! 		switch (vardata->stats_type)
! 		{
! 			case STAT_VARIABLE:
! 			{
! 				/* Use the pg_statistic entry */
! 				Form_pg_statistic stats;
  
! 				stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple);
! 				stadistinct = stats->stadistinct;
! 				break;
! 			}
! 			case STAT_EXPRESSION:
! 			{
! 				/* Use the pg_statistic entry */
! 				Form_pg_statistic3 stats3;
! 
! 				stats3 = (Form_pg_statistic3) GETSTRUCT(vardata->statsTuple);
! 				stadistinct = stats3->sta3distinct;
! 				break;
! 			}
! 			default:
! 				elog(ERROR, "internal error");
! 				return 0.0;
! 		}
  	}
  	else if (vardata->vartype == BOOLOID)
  	{
*************** get_variable_range(PlannerInfo *root, Va
*** 4462,4468 ****
  	 * the one we want, fail --- this suggests that there is data we can't
  	 * use.
  	 */
! 	if (get_attstatsslot(vardata->statsTuple,
  						 vardata->atttype, vardata->atttypmod,
  						 STATISTIC_KIND_HISTOGRAM, sortop,
  						 NULL,
--- 4550,4556 ----
  	 * the one we want, fail --- this suggests that there is data we can't
  	 * use.
  	 */
! 	if (get_attstatsslot(vardata->statsTuple, vardata->stats_type,
  						 vardata->atttype, vardata->atttypmod,
  						 STATISTIC_KIND_HISTOGRAM, sortop,
  						 NULL,
*************** get_variable_range(PlannerInfo *root, Va
*** 4477,4483 ****
  		}
  		free_attstatsslot(vardata->atttype, values, nvalues, NULL, 0);
  	}
! 	else if (get_attstatsslot(vardata->statsTuple,
  							  vardata->atttype, vardata->atttypmod,
  							  STATISTIC_KIND_HISTOGRAM, InvalidOid,
  							  NULL,
--- 4565,4571 ----
  		}
  		free_attstatsslot(vardata->atttype, values, nvalues, NULL, 0);
  	}
! 	else if (get_attstatsslot(vardata->statsTuple, vardata->stats_type,
  							  vardata->atttype, vardata->atttypmod,
  							  STATISTIC_KIND_HISTOGRAM, InvalidOid,
  							  NULL,
*************** get_variable_range(PlannerInfo *root, Va
*** 4494,4500 ****
  	 * the MCVs.  However, usually the MCVs will not be the extreme values, so
  	 * avoid unnecessary data copying.
  	 */
! 	if (get_attstatsslot(vardata->statsTuple,
  						 vardata->atttype, vardata->atttypmod,
  						 STATISTIC_KIND_MCV, InvalidOid,
  						 NULL,
--- 4582,4588 ----
  	 * the MCVs.  However, usually the MCVs will not be the extreme values, so
  	 * avoid unnecessary data copying.
  	 */
! 	if (get_attstatsslot(vardata->statsTuple, vardata->stats_type,
  						 vardata->atttype, vardata->atttypmod,
  						 STATISTIC_KIND_MCV, InvalidOid,
  						 NULL,
*************** btcostestimate(PG_FUNCTION_ARGS)
*** 6255,6260 ****
--- 6343,6349 ----
  		}
  		else
  		{
+ 			vardata.stats_type = STAT_VARIABLE;
  			vardata.statsTuple = SearchSysCache3(STATRELATTINH,
  												 ObjectIdGetDatum(relid),
  												 Int16GetDatum(colnum),
*************** btcostestimate(PG_FUNCTION_ARGS)
*** 6281,6286 ****
--- 6370,6376 ----
  		}
  		else
  		{
+ 			vardata.stats_type = STAT_VARIABLE;
  			vardata.statsTuple = SearchSysCache3(STATRELATTINH,
  												 ObjectIdGetDatum(relid),
  												 Int16GetDatum(colnum),
*************** btcostestimate(PG_FUNCTION_ARGS)
*** 6300,6306 ****
  									 index->opcintype[0],
  									 BTLessStrategyNumber);
  		if (OidIsValid(sortop) &&
! 			get_attstatsslot(vardata.statsTuple, InvalidOid, 0,
  							 STATISTIC_KIND_CORRELATION,
  							 sortop,
  							 NULL,
--- 6390,6396 ----
  									 index->opcintype[0],
  									 BTLessStrategyNumber);
  		if (OidIsValid(sortop) &&
! 			get_attstatsslot(vardata.statsTuple, vardata.stats_type, InvalidOid, 0,
  							 STATISTIC_KIND_CORRELATION,
  							 sortop,
  							 NULL,
diff -dcrpN postgresql.orig/src/backend/utils/cache/catcache.c postgresql/src/backend/utils/cache/catcache.c
*** postgresql.orig/src/backend/utils/cache/catcache.c	2011-04-13 10:11:05.021216766 +0200
--- postgresql/src/backend/utils/cache/catcache.c	2011-04-28 14:21:14.766174476 +0200
*************** GetCCHashEqFuncs(Oid keytype, PGFunction
*** 135,140 ****
--- 135,141 ----
  			*eqfunc = F_INT4EQ;
  			break;
  		case TEXTOID:
+ 		case PGNODETREEOID:
  			*hashfunc = hashtext;
  
  			*eqfunc = F_TEXTEQ;
diff -dcrpN postgresql.orig/src/backend/utils/cache/lsyscache.c postgresql/src/backend/utils/cache/lsyscache.c
*** postgresql.orig/src/backend/utils/cache/lsyscache.c	2011-04-11 15:36:27.175811226 +0200
--- postgresql/src/backend/utils/cache/lsyscache.c	2011-04-28 14:21:14.769174273 +0200
***************
*** 27,32 ****
--- 27,33 ----
  #include "catalog/pg_operator.h"
  #include "catalog/pg_proc.h"
  #include "catalog/pg_statistic.h"
+ #include "catalog/pg_statistic3.h" 
  #include "catalog/pg_type.h"
  #include "miscadmin.h"
  #include "nodes/makefuncs.h"
*************** get_attavgwidth(Oid relid, AttrNumber at
*** 2667,2680 ****
   * type ID to pass to free_attstatsslot later.
   */
  bool
! get_attstatsslot(HeapTuple statstuple,
  				 Oid atttype, int32 atttypmod,
  				 int reqkind, Oid reqop,
  				 Oid *actualop,
  				 Datum **values, int *nvalues,
  				 float4 **numbers, int *nnumbers)
  {
! 	Form_pg_statistic stats = (Form_pg_statistic) GETSTRUCT(statstuple);
  	int			i,
  				j;
  	Datum		val;
--- 2668,2682 ----
   * type ID to pass to free_attstatsslot later.
   */
  bool
! get_attstatsslot(HeapTuple statstuple, StatType stat_type,
  				 Oid atttype, int32 atttypmod,
  				 int reqkind, Oid reqop,
  				 Oid *actualop,
  				 Datum **values, int *nvalues,
  				 float4 **numbers, int *nnumbers)
  {
! 	Form_pg_statistic stats;
! 	Form_pg_statistic3 stats3;
  	int			i,
  				j;
  	Datum		val;
*************** get_attstatsslot(HeapTuple statstuple,
*** 2685,2707 ****
  	HeapTuple	typeTuple;
  	Form_pg_type typeForm;
  
! 	for (i = 0; i < STATISTIC_NUM_SLOTS; i++)
  	{
! 		if ((&stats->stakind1)[i] == reqkind &&
! 			(reqop == InvalidOid || (&stats->staop1)[i] == reqop))
  			break;
  	}
  	if (i >= STATISTIC_NUM_SLOTS)
  		return false;			/* not there */
  
  	if (actualop)
! 		*actualop = (&stats->staop1)[i];
  
  	if (values)
  	{
! 		val = SysCacheGetAttr(STATRELATTINH, statstuple,
  							  Anum_pg_statistic_stavalues1 + i,
  							  &isnull);
  		if (isnull)
  			elog(ERROR, "stavalues is null");
  		statarray = DatumGetArrayTypeP(val);
--- 2687,2755 ----
  	HeapTuple	typeTuple;
  	Form_pg_type typeForm;
  
! 	switch (stat_type)
  	{
! 		case STAT_VARIABLE:
! 			stats = (Form_pg_statistic) GETSTRUCT(statstuple);
! 			stats3 = NULL;
! 
! 			for (i = 0; i < STATISTIC_NUM_SLOTS; i++)
! 			{
! 				if ((&stats->stakind1)[i] == reqkind &&
! 					(reqop == InvalidOid || (&stats->staop1)[i] == reqop))
! 					break;
! 			}
  			break;
+ 		case STAT_EXPRESSION:
+ 			stats = NULL;
+ 			stats3 = (Form_pg_statistic3) GETSTRUCT(statstuple);
+ 
+ 			for (i = 0; i < STATISTIC_NUM_SLOTS; i++)
+ 			{
+ 				if ((&stats3->sta3kind1)[i] == reqkind &&
+ 					(reqop == InvalidOid || (&stats3->sta3op1)[i] == reqop))
+ 					break;
+ 			}
+ 			break;
+ 		default:
+ 			elog(ERROR, "internal error");
+ 			return false; /* make compiler quiet */
  	}
+ 
  	if (i >= STATISTIC_NUM_SLOTS)
  		return false;			/* not there */
  
  	if (actualop)
! 	{
! 		switch (stat_type)
! 		{
! 			case STAT_VARIABLE:
! 				*actualop = (&stats->staop1)[i];
! 				break;
! 			case STAT_EXPRESSION:
! 				*actualop = (&stats3->sta3op1)[i];
! 				break;
! 		}
! 	}
  
  	if (values)
  	{
! 		switch (stat_type)
! 		{
! 			case STAT_VARIABLE:
! 				val = SysCacheGetAttr(STATRELATTINH, statstuple,
  							  Anum_pg_statistic_stavalues1 + i,
  							  &isnull);
+ 				break;
+ 			case STAT_EXPRESSION:
+ 				val = SysCacheGetAttr(STAT3RELEXPRINH, statstuple,
+ 							  Anum_pg_statistic3_sta3values1 + i,
+ 							  &isnull);
+ 				break;
+ 			default:
+ 				elog(ERROR, "internal error");
+ 				return false; /* silence compiler */
+ 		}
  		if (isnull)
  			elog(ERROR, "stavalues is null");
  		statarray = DatumGetArrayTypeP(val);
*************** get_attstatsslot(HeapTuple statstuple,
*** 2753,2761 ****
  
  	if (numbers)
  	{
! 		val = SysCacheGetAttr(STATRELATTINH, statstuple,
  							  Anum_pg_statistic_stanumbers1 + i,
  							  &isnull);
  		if (isnull)
  			elog(ERROR, "stanumbers is null");
  		statarray = DatumGetArrayTypeP(val);
--- 2801,2821 ----
  
  	if (numbers)
  	{
! 		switch (stat_type)
! 		{
! 			case STAT_VARIABLE:
! 				val = SysCacheGetAttr(STATRELATTINH, statstuple,
  							  Anum_pg_statistic_stanumbers1 + i,
  							  &isnull);
+ 				break;
+ 			case STAT_EXPRESSION:
+ 				val = SysCacheGetAttr(STAT3RELEXPRINH, statstuple,
+ 							  Anum_pg_statistic3_sta3numbers1 + i,
+ 							  &isnull);
+ 				break;
+ 			default:
+ 				return false; /* silence compiler */
+ 		}
  		if (isnull)
  			elog(ERROR, "stanumbers is null");
  		statarray = DatumGetArrayTypeP(val);
diff -dcrpN postgresql.orig/src/backend/utils/cache/syscache.c postgresql/src/backend/utils/cache/syscache.c
*** postgresql.orig/src/backend/utils/cache/syscache.c	2011-04-26 09:54:04.095356326 +0200
--- postgresql/src/backend/utils/cache/syscache.c	2011-04-28 14:21:14.775173869 +0200
***************
*** 45,50 ****
--- 45,51 ----
  #include "catalog/pg_proc.h"
  #include "catalog/pg_rewrite.h"
  #include "catalog/pg_statistic.h"
+ #include "catalog/pg_statistic3.h"
  #include "catalog/pg_tablespace.h"
  #include "catalog/pg_ts_config.h"
  #include "catalog/pg_ts_config_map.h"
*************** static const struct cachedesc cacheinfo[
*** 587,592 ****
--- 588,604 ----
  		},
  		1024
  	},
+ 	{Statistic3RelationId,		/* STAT3RELEXPRINH */
+ 		Statistic3RelidExprInhIndexId,
+ 		3,
+ 		{
+ 			Anum_pg_statistic3_sta3relid,
+ 			Anum_pg_statistic3_sta3expr,
+ 			Anum_pg_statistic3_sta3inherit,
+ 			0
+ 		},
+ 		1024
+ 	},
  	{StatisticRelationId,		/* STATRELATTINH */
  		StatisticRelidAttnumInhIndexId,
  		3,
diff -dcrpN postgresql.orig/src/include/catalog/indexing.h postgresql/src/include/catalog/indexing.h
*** postgresql.orig/src/include/catalog/indexing.h	2011-02-10 10:36:32.320680534 +0100
--- postgresql/src/include/catalog/indexing.h	2011-04-28 14:21:14.777173734 +0200
*************** DECLARE_UNIQUE_INDEX(pg_extension_oid_in
*** 300,305 ****
--- 300,312 ----
  DECLARE_UNIQUE_INDEX(pg_extension_name_index, 3081, on pg_extension using btree(extname name_ops));
  #define ExtensionNameIndexId 3081
  
+ DECLARE_UNIQUE_INDEX(pg_statistic2_relid_att_inh_index, 3072, on pg_statistic2 using btree(sta2relid oid_ops, sta2attnums array_ops, sta2inherit bool_ops));
+ #define Statistic2RelidAttnumsInhIndexId	3072
+ 
+ DECLARE_UNIQUE_INDEX(pg_statistic3_relid_expr_inh_index, 3074, on pg_statistic3 using btree(sta3relid oid_ops, sta3expr text_ops, sta3inherit bool_ops));
+ #define Statistic3RelidExprInhIndexId	3074
+ 
+ 
  /* last step of initialization script: build the indexes declared above */
  BUILD_INDICES
  
diff -dcrpN postgresql.orig/src/include/catalog/pg_statistic2.h postgresql/src/include/catalog/pg_statistic2.h
*** postgresql.orig/src/include/catalog/pg_statistic2.h	1970-01-01 01:00:00.000000000 +0100
--- postgresql/src/include/catalog/pg_statistic2.h	2011-04-28 14:21:14.779173600 +0200
***************
*** 0 ****
--- 1,265 ----
+ /*-------------------------------------------------------------------------
+  *
+  * pg_statistic2.h
+  *	  definition of the system "cross-column statistic" relation (pg_statistic2)
+  *	  along with the relation's initial contents.
+  *
+  *
+  * Portions Copyright (c) 1996-2011, PostgreSQL Global Development Group
+  * Portions Copyright (c) 1994, Regents of the University of California
+  *
+  * src/include/catalog/pg_statistic2.h
+  *
+  * NOTES
+  *	  the genbki.pl script reads this file and generates .bki
+  *	  information from the DATA() statements.
+  *
+  *-------------------------------------------------------------------------
+  */
+ #ifndef PG_STATISTIC2_H
+ #define PG_STATISTIC2_H
+ 
+ #include "catalog/genbki.h"
+ 
+ /*
+  * The CATALOG definition has to refer to the type of stavaluesN as
+  * "anyarray" so that bootstrap mode recognizes it.  There is no real
+  * typedef for that, however.  Since the fields are potentially-null and
+  * therefore can't be accessed directly from C code, there is no particular
+  * need for the C struct definition to show a valid field type --- instead
+  * we just make it int.
+  */
+ #define anyarray int
+ 
+ /* ----------------
+  *		pg_statistic2 definition.  cpp turns this into
+  *		typedef struct FormData_pg_statistic2
+  * ----------------
+  */
+ #define Statistic2RelationId  3071
+ 
+ CATALOG(pg_statistic2,3071) BKI_WITHOUT_OIDS
+ {
+ 	/* These fields form the unique key for the entry: */
+ 	Oid			sta2relid;		/* relation containing attribute */
+ 	int2		sta2attnums[1];		/* attribute (column) stats are for */
+ 	bool		sta2inherit;		/* true if inheritance children are included */
+ 
+ 	/* the fraction of the column's entries that are NULL: */
+ 	float4		sta2nullfrac;
+ 
+ 	/*
+ 	 * stawidth is the average width in bytes of non-null entries.	For
+ 	 * fixed-width datatypes this is of course the same as the typlen, but for
+ 	 * var-width types it is more useful.  Note that this is the average width
+ 	 * of the data as actually stored, post-TOASTing (eg, for a
+ 	 * moved-out-of-line value, only the size of the pointer object is
+ 	 * counted).  This is the appropriate definition for the primary use of
+ 	 * the statistic, which is to estimate sizes of in-memory hash tables of
+ 	 * tuples.
+ 	 */
+ 	int4		sta2width;
+ 
+ 	/* ----------------
+ 	 * stadistinct indicates the (approximate) number of distinct non-null
+ 	 * data values in the column.  The interpretation is:
+ 	 *		0		unknown or not computed
+ 	 *		> 0		actual number of distinct values
+ 	 *		< 0		negative of multiplier for number of rows
+ 	 * The special negative case allows us to cope with columns that are
+ 	 * unique (stadistinct = -1) or nearly so (for example, a column in
+ 	 * which values appear about twice on the average could be represented
+ 	 * by stadistinct = -0.5).	Because the number-of-rows statistic in
+ 	 * pg_class may be updated more frequently than pg_statistic2 is, it's
+ 	 * important to be able to describe such situations as a multiple of
+ 	 * the number of rows, rather than a fixed number of distinct values.
+ 	 * But in other cases a fixed number is correct (eg, a boolean column).
+ 	 * ----------------
+ 	 */
+ 	float4		sta2distinct;
+ 
+ 	/* ----------------
+ 	 * To allow keeping statistics on different kinds of datatypes,
+ 	 * we do not hard-wire any particular meaning for the remaining
+ 	 * statistical fields.	Instead, we provide several "slots" in which
+ 	 * statistical data can be placed.	Each slot includes:
+ 	 *		kind			integer code identifying kind of data
+ 	 *		op				OID of associated operator, if needed
+ 	 *		numbers			float4 array (for statistical values)
+ 	 *		values			anyarray (for representations of data values)
+ 	 * The ID and operator fields are never NULL; they are zeroes in an
+ 	 * unused slot.  The numbers and values fields are NULL in an unused
+ 	 * slot, and might also be NULL in a used slot if the slot kind has
+ 	 * no need for one or the other.
+ 	 * ----------------
+ 	 */
+ 
+ 	int2		sta2kind1;
+ 	int2		sta2kind2;
+ 	int2		sta2kind3;
+ 	int2		sta2kind4;
+ 
+ 	Oid			sta2op1;
+ 	Oid			sta2op2;
+ 	Oid			sta2op3;
+ 	Oid			sta2op4;
+ 
+ 	/*
+ 	 * THE REST OF THESE ARE VARIABLE LENGTH FIELDS, and may even be absent
+ 	 * (NULL). They cannot be accessed as C struct entries; you have to use
+ 	 * the full field access machinery (heap_getattr) for them.  We declare
+ 	 * them here for the catalog machinery.
+ 	 */
+ 
+ 	float4		sta2numbers1[1];
+ 	float4		sta2numbers2[1];
+ 	float4		sta2numbers3[1];
+ 	float4		sta2numbers4[1];
+ 
+ 	/*
+ 	 * Values in these arrays are values of the column's data type.  We
+ 	 * presently have to cheat quite a bit to allow polymorphic arrays of this
+ 	 * kind, but perhaps someday it'll be a less bogus facility.
+ 	 */
+ 	anyarray	sta2values1;
+ 	anyarray	sta2values2;
+ 	anyarray	sta2values3;
+ 	anyarray	sta2values4;
+ } FormData_pg_statistic2;
+ 
+ #define STATISTIC_NUM_SLOTS  4
+ 
+ #undef anyarray
+ 
+ 
+ /* ----------------
+  *		Form_pg_statistic2 corresponds to a pointer to a tuple with
+  *		the format of pg_statistic2 relation.
+  * ----------------
+  */
+ typedef FormData_pg_statistic2 *Form_pg_statistic2;
+ 
+ /* ----------------
+  *		compiler constants for pg_statistic2
+  * ----------------
+  */
+ #define Natts_pg_statistic2				22
+ #define Anum_pg_statistic2_sta2relid		1
+ #define Anum_pg_statistic2_sta2attnums		2
+ #define Anum_pg_statistic2_sta2inherit	3
+ #define Anum_pg_statistic2_sta2nullfrac	4
+ #define Anum_pg_statistic2_sta2width		5
+ #define Anum_pg_statistic2_sta2distinct	6
+ #define Anum_pg_statistic2_sta2kind1		7
+ #define Anum_pg_statistic2_sta2kind2		8
+ #define Anum_pg_statistic2_sta2kind3		9
+ #define Anum_pg_statistic2_sta2kind4		10
+ #define Anum_pg_statistic2_sta2op1		11
+ #define Anum_pg_statistic2_sta2op2		12
+ #define Anum_pg_statistic2_sta2op3		13
+ #define Anum_pg_statistic2_sta2op4		14
+ #define Anum_pg_statistic2_sta2numbers1	15
+ #define Anum_pg_statistic2_sta2numbers2	16
+ #define Anum_pg_statistic2_sta2numbers3	17
+ #define Anum_pg_statistic2_sta2numbers4	18
+ #define Anum_pg_statistic2_sta2values1	19
+ #define Anum_pg_statistic2_sta2values2	20
+ #define Anum_pg_statistic2_sta2values3	21
+ #define Anum_pg_statistic2_sta2values4	22
+ 
+ #if 0
+ 
+ /*
+  * Currently, three statistical slot "kinds" are defined: most common values,
+  * histogram, and correlation.	Additional "kinds" will probably appear in
+  * future to help cope with non-scalar datatypes.  Also, custom data types
+  * can define their own "kind" codes by mutual agreement between a custom
+  * typanalyze routine and the selectivity estimation functions of the type's
+  * operators.
+  *
+  * Code reading the pg_statistic2 relation should not assume that a particular
+  * data "kind" will appear in any particular slot.	Instead, search the
+  * stakind fields to see if the desired data is available.	(The standard
+  * function get_attstatsslot() may be used for this.)
+  */
+ 
+ /*
+  * The present allocation of "kind" codes is:
+  *
+  *	1-99:		reserved for assignment by the core PostgreSQL project
+  *				(values in this range will be documented in this file)
+  *	100-199:	reserved for assignment by the PostGIS project
+  *				(values to be documented in PostGIS documentation)
+  *	200-299:	reserved for assignment by the ESRI ST_Geometry project
+  *				(values to be documented in ESRI ST_Geometry documentation)
+  *	300-9999:	reserved for future public assignments
+  *
+  * For private use you may choose a "kind" code at random in the range
+  * 10000-30000.  However, for code that is to be widely disseminated it is
+  * better to obtain a publicly defined "kind" code by request from the
+  * PostgreSQL Global Development Group.
+  */
+ 
+ /*
+  * In a "most common values" slot, staop is the OID of the "=" operator
+  * used to decide whether values are the same or not.  stavalues contains
+  * the K most common non-null values appearing in the column, and stanumbers
+  * contains their frequencies (fractions of total row count).  The values
+  * shall be ordered in decreasing frequency.  Note that since the arrays are
+  * variable-size, K may be chosen by the statistics collector.	Values should
+  * not appear in MCV unless they have been observed to occur more than once;
+  * a unique column will have no MCV slot.
+  */
+ #define STATISTIC_KIND_MCV	1
+ 
+ /*
+  * A "histogram" slot describes the distribution of scalar data.  staop is
+  * the OID of the "<" operator that describes the sort ordering.  (In theory,
+  * more than one histogram could appear, if a datatype has more than one
+  * useful sort operator.)  stavalues contains M (>=2) non-null values that
+  * divide the non-null column data values into M-1 bins of approximately equal
+  * population.	The first stavalues item is the MIN and the last is the MAX.
+  * stanumbers is not used and should be NULL.  IMPORTANT POINT: if an MCV
+  * slot is also provided, then the histogram describes the data distribution
+  * *after removing the values listed in MCV* (thus, it's a "compressed
+  * histogram" in the technical parlance).  This allows a more accurate
+  * representation of the distribution of a column with some very-common
+  * values.	In a column with only a few distinct values, it's possible that
+  * the MCV list describes the entire data population; in this case the
+  * histogram reduces to empty and should be omitted.
+  */
+ #define STATISTIC_KIND_HISTOGRAM  2
+ 
+ /*
+  * A "correlation" slot describes the correlation between the physical order
+  * of table tuples and the ordering of data values of this column, as seen
+  * by the "<" operator identified by staop.  (As with the histogram, more
+  * than one entry could theoretically appear.)	stavalues is not used and
+  * should be NULL.	stanumbers contains a single entry, the correlation
+  * coefficient between the sequence of data values and the sequence of
+  * their actual tuple positions.  The coefficient ranges from +1 to -1.
+  */
+ #define STATISTIC_KIND_CORRELATION	3
+ 
+ /*
+  * A "most common elements" slot is similar to a "most common values" slot,
+  * except that it stores the most common non-null *elements* of the column
+  * values.	This is useful when the column datatype is an array or some other
+  * type with identifiable elements (for instance, tsvector).  staop contains
+  * the equality operator appropriate to the element type.  stavalues contains
+  * the most common element values, and stanumbers their frequencies.  Unlike
+  * MCV slots, the values are sorted into order (to support binary search
+  * for a particular value).  Since this puts the minimum and maximum
+  * frequencies at unpredictable spots in stanumbers, there are two extra
+  * members of stanumbers, holding copies of the minimum and maximum
+  * frequencies.
+  *
+  * Note: in current usage for tsvector columns, the stavalues elements are of
+  * type text, even though their representation within tsvector is not
+  * exactly text.
+  */
+ #define STATISTIC_KIND_MCELEM  4
+ 
+ #endif
+ 
+ #endif   /* PG_STATISTIC2_H */
diff -dcrpN postgresql.orig/src/include/catalog/pg_statistic3.h postgresql/src/include/catalog/pg_statistic3.h
*** postgresql.orig/src/include/catalog/pg_statistic3.h	1970-01-01 01:00:00.000000000 +0100
--- postgresql/src/include/catalog/pg_statistic3.h	2011-04-28 14:21:14.780173533 +0200
***************
*** 0 ****
--- 1,265 ----
+ /*-------------------------------------------------------------------------
+  *
+  * pg_statistic3.h
+  *	  definition of the system "expression statistic" relation (pg_statistic3)
+  *	  along with the relation's initial contents.
+  *
+  *
+  * Portions Copyright (c) 1996-2011, PostgreSQL Global Development Group
+  * Portions Copyright (c) 1994, Regents of the University of California
+  *
+  * src/include/catalog/pg_statistic3.h
+  *
+  * NOTES
+  *	  the genbki.pl script reads this file and generates .bki
+  *	  information from the DATA() statements.
+  *
+  *-------------------------------------------------------------------------
+  */
+ #ifndef PG_STATISTIC3_H
+ #define PG_STATISTIC3_H
+ 
+ #include "catalog/genbki.h"
+ 
+ /*
+  * The CATALOG definition has to refer to the type of stavaluesN as
+  * "anyarray" so that bootstrap mode recognizes it.  There is no real
+  * typedef for that, however.  Since the fields are potentially-null and
+  * therefore can't be accessed directly from C code, there is no particular
+  * need for the C struct definition to show a valid field type --- instead
+  * we just make it int.
+  */
+ #define anyarray int
+ 
+ /* ----------------
+  *		pg_statistic3 definition.  cpp turns this into
+  *		typedef struct FormData_pg_statistic3
+  * ----------------
+  */
+ #define Statistic3RelationId  3073
+ 
+ CATALOG(pg_statistic3,3073) BKI_WITHOUT_OIDS
+ {
+ 	/* These fields form the unique key for the entry: */
+ 	Oid			sta3relid;		/* relation containing attribute */
+ 	pg_node_tree		sta3expr;		/* expression stat is for */
+ 	bool		sta3inherit;		/* true if inheritance children are included */
+ 
+ 	/* the fraction of the column's entries that are NULL: */
+ 	float4		sta3nullfrac;
+ 
+ 	/*
+ 	 * stawidth is the average width in bytes of non-null entries.	For
+ 	 * fixed-width datatypes this is of course the same as the typlen, but for
+ 	 * var-width types it is more useful.  Note that this is the average width
+ 	 * of the data as actually stored, post-TOASTing (eg, for a
+ 	 * moved-out-of-line value, only the size of the pointer object is
+ 	 * counted).  This is the appropriate definition for the primary use of
+ 	 * the statistic, which is to estimate sizes of in-memory hash tables of
+ 	 * tuples.
+ 	 */
+ 	int4		sta3width;
+ 
+ 	/* ----------------
+ 	 * stadistinct indicates the (approximate) number of distinct non-null
+ 	 * data values in the column.  The interpretation is:
+ 	 *		0		unknown or not computed
+ 	 *		> 0		actual number of distinct values
+ 	 *		< 0		negative of multiplier for number of rows
+ 	 * The special negative case allows us to cope with columns that are
+ 	 * unique (stadistinct = -1) or nearly so (for example, a column in
+ 	 * which values appear about twice on the average could be represented
+ 	 * by stadistinct = -0.5).	Because the number-of-rows statistic in
+ 	 * pg_class may be updated more frequently than pg_statistic3 is, it's
+ 	 * important to be able to describe such situations as a multiple of
+ 	 * the number of rows, rather than a fixed number of distinct values.
+ 	 * But in other cases a fixed number is correct (eg, a boolean column).
+ 	 * ----------------
+ 	 */
+ 	float4		sta3distinct;
+ 
+ 	/* ----------------
+ 	 * To allow keeping statistics on different kinds of datatypes,
+ 	 * we do not hard-wire any particular meaning for the remaining
+ 	 * statistical fields.	Instead, we provide several "slots" in which
+ 	 * statistical data can be placed.	Each slot includes:
+ 	 *		kind			integer code identifying kind of data
+ 	 *		op				OID of associated operator, if needed
+ 	 *		numbers			float4 array (for statistical values)
+ 	 *		values			anyarray (for representations of data values)
+ 	 * The ID and operator fields are never NULL; they are zeroes in an
+ 	 * unused slot.  The numbers and values fields are NULL in an unused
+ 	 * slot, and might also be NULL in a used slot if the slot kind has
+ 	 * no need for one or the other.
+ 	 * ----------------
+ 	 */
+ 
+ 	int2		sta3kind1;
+ 	int2		sta3kind2;
+ 	int2		sta3kind3;
+ 	int2		sta3kind4;
+ 
+ 	Oid			sta3op1;
+ 	Oid			sta3op2;
+ 	Oid			sta3op3;
+ 	Oid			sta3op4;
+ 
+ 	/*
+ 	 * THE REST OF THESE ARE VARIABLE LENGTH FIELDS, and may even be absent
+ 	 * (NULL). They cannot be accessed as C struct entries; you have to use
+ 	 * the full field access machinery (heap_getattr) for them.  We declare
+ 	 * them here for the catalog machinery.
+ 	 */
+ 
+ 	float4		sta3numbers1[1];
+ 	float4		sta3numbers2[1];
+ 	float4		sta3numbers3[1];
+ 	float4		sta3numbers4[1];
+ 
+ 	/*
+ 	 * Values in these arrays are values of the column's data type.  We
+ 	 * presently have to cheat quite a bit to allow polymorphic arrays of this
+ 	 * kind, but perhaps someday it'll be a less bogus facility.
+ 	 */
+ 	anyarray	sta3values1;
+ 	anyarray	sta3values2;
+ 	anyarray	sta3values3;
+ 	anyarray	sta3values4;
+ } FormData_pg_statistic3;
+ 
+ #define STATISTIC_NUM_SLOTS  4
+ 
+ #undef anyarray
+ 
+ 
+ /* ----------------
+  *		Form_pg_statistic3 corresponds to a pointer to a tuple with
+  *		the format of pg_statistic3 relation.
+  * ----------------
+  */
+ typedef FormData_pg_statistic3 *Form_pg_statistic3;
+ 
+ /* ----------------
+  *		compiler constants for pg_statistic3
+  * ----------------
+  */
+ #define Natts_pg_statistic3				22
+ #define Anum_pg_statistic3_sta3relid		1
+ #define Anum_pg_statistic3_sta3expr		2
+ #define Anum_pg_statistic3_sta3inherit	3
+ #define Anum_pg_statistic3_sta3nullfrac	4
+ #define Anum_pg_statistic3_sta3width		5
+ #define Anum_pg_statistic3_sta3distinct	6
+ #define Anum_pg_statistic3_sta3kind1		7
+ #define Anum_pg_statistic3_sta3kind2		8
+ #define Anum_pg_statistic3_sta3kind3		9
+ #define Anum_pg_statistic3_sta3kind4		10
+ #define Anum_pg_statistic3_sta3op1		11
+ #define Anum_pg_statistic3_sta3op2		12
+ #define Anum_pg_statistic3_sta3op3		13
+ #define Anum_pg_statistic3_sta3op4		14
+ #define Anum_pg_statistic3_sta3numbers1	15
+ #define Anum_pg_statistic3_sta3numbers2	16
+ #define Anum_pg_statistic3_sta3numbers3	17
+ #define Anum_pg_statistic3_sta3numbers4	18
+ #define Anum_pg_statistic3_sta3values1	19
+ #define Anum_pg_statistic3_sta3values2	20
+ #define Anum_pg_statistic3_sta3values3	21
+ #define Anum_pg_statistic3_sta3values4	22
+ 
+ #if 0
+ 
+ /*
+  * Currently, three statistical slot "kinds" are defined: most common values,
+  * histogram, and correlation.	Additional "kinds" will probably appear in
+  * future to help cope with non-scalar datatypes.  Also, custom data types
+  * can define their own "kind" codes by mutual agreement between a custom
+  * typanalyze routine and the selectivity estimation functions of the type's
+  * operators.
+  *
+  * Code reading the pg_statistic3 relation should not assume that a particular
+  * data "kind" will appear in any particular slot.	Instead, search the
+  * stakind fields to see if the desired data is available.	(The standard
+  * function get_attstatsslot() may be used for this.)
+  */
+ 
+ /*
+  * The present allocation of "kind" codes is:
+  *
+  *	1-99:		reserved for assignment by the core PostgreSQL project
+  *				(values in this range will be documented in this file)
+  *	100-199:	reserved for assignment by the PostGIS project
+  *				(values to be documented in PostGIS documentation)
+  *	200-299:	reserved for assignment by the ESRI ST_Geometry project
+  *				(values to be documented in ESRI ST_Geometry documentation)
+  *	300-9999:	reserved for future public assignments
+  *
+  * For private use you may choose a "kind" code at random in the range
+  * 10000-30000.  However, for code that is to be widely disseminated it is
+  * better to obtain a publicly defined "kind" code by request from the
+  * PostgreSQL Global Development Group.
+  */
+ 
+ /*
+  * In a "most common values" slot, staop is the OID of the "=" operator
+  * used to decide whether values are the same or not.  stavalues contains
+  * the K most common non-null values appearing in the column, and stanumbers
+  * contains their frequencies (fractions of total row count).  The values
+  * shall be ordered in decreasing frequency.  Note that since the arrays are
+  * variable-size, K may be chosen by the statistics collector.	Values should
+  * not appear in MCV unless they have been observed to occur more than once;
+  * a unique column will have no MCV slot.
+  */
+ #define STATISTIC_KIND_MCV	1
+ 
+ /*
+  * A "histogram" slot describes the distribution of scalar data.  staop is
+  * the OID of the "<" operator that describes the sort ordering.  (In theory,
+  * more than one histogram could appear, if a datatype has more than one
+  * useful sort operator.)  stavalues contains M (>=2) non-null values that
+  * divide the non-null column data values into M-1 bins of approximately equal
+  * population.	The first stavalues item is the MIN and the last is the MAX.
+  * stanumbers is not used and should be NULL.  IMPORTANT POINT: if an MCV
+  * slot is also provided, then the histogram describes the data distribution
+  * *after removing the values listed in MCV* (thus, it's a "compressed
+  * histogram" in the technical parlance).  This allows a more accurate
+  * representation of the distribution of a column with some very-common
+  * values.	In a column with only a few distinct values, it's possible that
+  * the MCV list describes the entire data population; in this case the
+  * histogram reduces to empty and should be omitted.
+  */
+ #define STATISTIC_KIND_HISTOGRAM  2
+ 
+ /*
+  * A "correlation" slot describes the correlation between the physical order
+  * of table tuples and the ordering of data values of this column, as seen
+  * by the "<" operator identified by staop.  (As with the histogram, more
+  * than one entry could theoretically appear.)	stavalues is not used and
+  * should be NULL.	stanumbers contains a single entry, the correlation
+  * coefficient between the sequence of data values and the sequence of
+  * their actual tuple positions.  The coefficient ranges from +1 to -1.
+  */
+ #define STATISTIC_KIND_CORRELATION	3
+ 
+ /*
+  * A "most common elements" slot is similar to a "most common values" slot,
+  * except that it stores the most common non-null *elements* of the column
+  * values.	This is useful when the column datatype is an array or some other
+  * type with identifiable elements (for instance, tsvector).  staop contains
+  * the equality operator appropriate to the element type.  stavalues contains
+  * the most common element values, and stanumbers their frequencies.  Unlike
+  * MCV slots, the values are sorted into order (to support binary search
+  * for a particular value).  Since this puts the minimum and maximum
+  * frequencies at unpredictable spots in stanumbers, there are two extra
+  * members of stanumbers, holding copies of the minimum and maximum
+  * frequencies.
+  *
+  * Note: in current usage for tsvector columns, the stavalues elements are of
+  * type text, even though their representation within tsvector is not
+  * exactly text.
+  */
+ #define STATISTIC_KIND_MCELEM  4
+ 
+ #endif
+ 
+ #endif   /* PG_STATISTIC2_H */
diff -dcrpN postgresql.orig/src/include/commands/defrem.h postgresql/src/include/commands/defrem.h
*** postgresql.orig/src/include/commands/defrem.h	2011-04-11 15:36:27.243806451 +0200
--- postgresql/src/include/commands/defrem.h	2011-04-28 14:21:14.782173399 +0200
*************** extern char *ChooseIndexName(const char 
*** 50,55 ****
--- 50,56 ----
  				bool primary, bool isconstraint);
  extern List *ChooseIndexColumnNames(List *indexElems);
  extern Oid	GetDefaultOpClass(Oid type_id, Oid am_id);
+ extern void ExtraStatistics(ExtraStatStmt *stmt);
  
  /* commands/functioncmds.c */
  extern void CreateFunction(CreateFunctionStmt *stmt, const char *queryString);
diff -dcrpN postgresql.orig/src/include/nodes/nodes.h postgresql/src/include/nodes/nodes.h
*** postgresql.orig/src/include/nodes/nodes.h	2011-03-22 17:53:48.045903422 +0100
--- postgresql/src/include/nodes/nodes.h	2011-04-28 14:21:14.784173265 +0200
*************** typedef enum NodeTag
*** 362,367 ****
--- 362,368 ----
  	T_CreateExtensionStmt,
  	T_AlterExtensionStmt,
  	T_AlterExtensionContentsStmt,
+ 	T_ExtraStatStmt,
  
  	/*
  	 * TAGS FOR PARSE TREE NODES (parsenodes.h)
diff -dcrpN postgresql.orig/src/include/nodes/parsenodes.h postgresql/src/include/nodes/parsenodes.h
*** postgresql.orig/src/include/nodes/parsenodes.h	2011-04-26 09:54:04.106355573 +0200
--- postgresql/src/include/nodes/parsenodes.h	2011-04-28 14:21:14.789172925 +0200
*************** typedef enum DropBehavior
*** 1160,1165 ****
--- 1160,1178 ----
  } DropBehavior;
  
  /* ----------------------
+  *	Create Cross Column Statistics
+  * ----------------------
+  */
+ typedef struct ExtraStatStmt
+ {
+ 	NodeTag		type;
+ 	bool		create;
+ 	RangeVar   *relation;
+ 	List	   *columns;
+ 	Node	   *expr;
+ } ExtraStatStmt;
+ 
+ /* ----------------------
   *	Alter Table
   * ----------------------
   */
diff -dcrpN postgresql.orig/src/include/parser/kwlist.h postgresql/src/include/parser/kwlist.h
*** postgresql.orig/src/include/parser/kwlist.h	2011-03-18 13:11:36.826637445 +0100
--- postgresql/src/include/parser/kwlist.h	2011-04-28 14:21:14.790172858 +0200
*************** PG_KEYWORD("exclusive", EXCLUSIVE, UNRES
*** 148,153 ****
--- 148,154 ----
  PG_KEYWORD("execute", EXECUTE, UNRESERVED_KEYWORD)
  PG_KEYWORD("exists", EXISTS, COL_NAME_KEYWORD)
  PG_KEYWORD("explain", EXPLAIN, UNRESERVED_KEYWORD)
+ PG_KEYWORD("expression", EXPRESSION, UNRESERVED_KEYWORD)
  PG_KEYWORD("extension", EXTENSION, UNRESERVED_KEYWORD)
  PG_KEYWORD("external", EXTERNAL, UNRESERVED_KEYWORD)
  PG_KEYWORD("extract", EXTRACT, COL_NAME_KEYWORD)
diff -dcrpN postgresql.orig/src/include/parser/parse_utilcmd.h postgresql/src/include/parser/parse_utilcmd.h
*** postgresql.orig/src/include/parser/parse_utilcmd.h	2011-01-04 15:13:16.163549374 +0100
--- postgresql/src/include/parser/parse_utilcmd.h	2011-04-28 14:21:14.792172725 +0200
*************** extern void transformRuleStmt(RuleStmt *
*** 25,28 ****
--- 25,33 ----
  				  List **actions, Node **whereClause);
  extern List *transformCreateSchemaStmt(CreateSchemaStmt *stmt);
  
+ extern ExtraStatStmt *transformExtraStatistics(ExtraStatStmt *stmt,
+ 						const char *queryString);
+ 
+ extern bool set_location_unknown_walker(Node *node, void *context);
+ 
  #endif   /* PARSE_UTILCMD_H */
diff -dcrpN postgresql.orig/src/include/utils/lsyscache.h postgresql/src/include/utils/lsyscache.h
*** postgresql.orig/src/include/utils/lsyscache.h	2011-04-11 15:36:27.256805539 +0200
--- postgresql/src/include/utils/lsyscache.h	2011-04-28 14:21:14.793172658 +0200
***************
*** 16,21 ****
--- 16,22 ----
  #include "access/attnum.h"
  #include "access/htup.h"
  #include "nodes/pg_list.h"
+ #include "utils/selfuncs.h"
  
  /* I/O function selector for get_type_io_data */
  typedef enum IOFuncSelector
*************** extern Oid	getBaseType(Oid typid);
*** 131,137 ****
  extern Oid	getBaseTypeAndTypmod(Oid typid, int32 *typmod);
  extern int32 get_typavgwidth(Oid typid, int32 typmod);
  extern int32 get_attavgwidth(Oid relid, AttrNumber attnum);
! extern bool get_attstatsslot(HeapTuple statstuple,
  				 Oid atttype, int32 atttypmod,
  				 int reqkind, Oid reqop,
  				 Oid *actualop,
--- 132,139 ----
  extern Oid	getBaseTypeAndTypmod(Oid typid, int32 *typmod);
  extern int32 get_typavgwidth(Oid typid, int32 typmod);
  extern int32 get_attavgwidth(Oid relid, AttrNumber attnum);
! 
! extern bool get_attstatsslot(HeapTuple statstuple, StatType stat_type,
  				 Oid atttype, int32 atttypmod,
  				 int reqkind, Oid reqop,
  				 Oid *actualop,
diff -dcrpN postgresql.orig/src/include/utils/selfuncs.h postgresql/src/include/utils/selfuncs.h
*** postgresql.orig/src/include/utils/selfuncs.h	2011-04-13 10:11:05.060214051 +0200
--- postgresql/src/include/utils/selfuncs.h	2011-04-28 14:21:14.795172522 +0200
***************
*** 62,75 ****
  			p = 1.0; \
  	} while (0)
  
  
  /* Return data from examine_variable and friends */
  typedef struct VariableStatData
  {
  	Node	   *var;			/* the Var or expression tree */
  	RelOptInfo *rel;			/* Relation, or NULL if not identifiable */
! 	HeapTuple	statsTuple;		/* pg_statistic tuple, or NULL if none */
! 	/* NB: if statsTuple!=NULL, it must be freed when caller is done */
  	void		(*freefunc) (HeapTuple tuple);	/* how to free statsTuple */
  	Oid			vartype;		/* exposed type of expression */
  	Oid			atttype;		/* type to pass to get_attstatsslot */
--- 62,81 ----
  			p = 1.0; \
  	} while (0)
  
+ typedef enum StatType {
+ 	STAT_VARIABLE,
+ 	STAT_EXPRESSION
+ } StatType;
  
  /* Return data from examine_variable and friends */
  typedef struct VariableStatData
  {
  	Node	   *var;			/* the Var or expression tree */
  	RelOptInfo *rel;			/* Relation, or NULL if not identifiable */
! 	StatType	stats_type;
! 	HeapTuple	statsTuple;		/* pg_statistic or pg_statistic3 tuple depending on stats_type
! 						 * or NULL if none */
! 	/* NB: if statsTuple!=NULL || stats3Tuple!=NULL, it must be freed when caller is done */
  	void		(*freefunc) (HeapTuple tuple);	/* how to free statsTuple */
  	Oid			vartype;		/* exposed type of expression */
  	Oid			atttype;		/* type to pass to get_attstatsslot */
diff -dcrpN postgresql.orig/src/include/utils/syscache.h postgresql/src/include/utils/syscache.h
*** postgresql.orig/src/include/utils/syscache.h	2011-02-10 10:36:32.352678334 +0100
--- postgresql/src/include/utils/syscache.h	2011-04-28 14:21:14.796172454 +0200
*************** enum SysCacheIdentifier
*** 73,78 ****
--- 73,79 ----
  	RELNAMENSP,
  	RELOID,
  	RULERELNAME,
+ 	STAT3RELEXPRINH,
  	STATRELATTINH,
  	TABLESPACEOID,
  	TSCONFIGMAP,
diff -dcrpN postgresql.orig/src/test/regress/expected/sanity_check.out postgresql/src/test/regress/expected/sanity_check.out
*** postgresql.orig/src/test/regress/expected/sanity_check.out	2011-02-10 10:36:32.374676822 +0100
--- postgresql/src/test/regress/expected/sanity_check.out	2011-04-28 14:21:14.797172386 +0200
*************** SELECT relname, relhasindex
*** 121,126 ****
--- 121,128 ----
   pg_shdepend             | t
   pg_shdescription        | t
   pg_statistic            | t
+  pg_statistic2           | t
+  pg_statistic3           | t
   pg_tablespace           | t
   pg_trigger              | t
   pg_ts_config            | t
*************** SELECT relname, relhasindex
*** 157,163 ****
   timetz_tbl              | f
   tinterval_tbl           | f
   varchar_tbl             | f
! (146 rows)
  
  --
  -- another sanity check: every system catalog that has OIDs should have
--- 159,165 ----
   timetz_tbl              | f
   tinterval_tbl           | f
   varchar_tbl             | f
! (148 rows)
  
  --
  -- another sanity check: every system catalog that has OIDs should have
#2Tom Lane
tgl@sss.pgh.pa.us
In reply to: Boszormenyi Zoltan (#1)
Re: TEXT vs PG_NODE_TREE in system columns (cross column and expression statistics patch)

Boszormenyi Zoltan <zb@cybertec.at> writes:

My question is that why pg_node_tree is unusable as
syscache attribute? I attempted to alias it as text in the patch
but I get the following error if I try to use it by setting
USE_SYSCACHE_FOR_SEARCH to 1 in selfuncs.c.
Directly using the underlying pg_statistic3 doesn't cause an error.

I'm not sure what you're running into, but it doesn't matter because the
design would be unworkable anyway. Expression text representations
could be extremely long, too long to be usable as index keys. I don't
believe either of the proposed indexes on the new catalogs are workable,
actually, and the catalog definitions themselves seem a bit outre.
Why are you setting it up so that stats on expressions and cross-column
stats are mutually exclusive?

The idea that's used currently is that we only compute stats on
expressions that are indexed, so the OID/attnum of the index column
can be used as a reference in pg_statistic. I don't see a strong
need to deviate from that approach.

regards, tom lane

#3Alvaro Herrera
alvherre@commandprompt.com
In reply to: Boszormenyi Zoltan (#1)
Re: TEXT vs PG_NODE_TREE in system columns (cross column and expression statistics patch)

Excerpts from Boszormenyi Zoltan's message of jue abr 28 11:03:56 -0300 2011:

Hi,

attached is the WIP patch for cross-column statistics and
extra expression statistics.

My question is that why pg_node_tree is unusable as
syscache attribute? I attempted to alias it as text in the patch
but I get the following error if I try to use it by setting
USE_SYSCACHE_FOR_SEARCH to 1 in selfuncs.c.
Directly using the underlying pg_statistic3 doesn't cause an error.

Two comments:
1. it seems that expression stats are mostly separate from cross-column
stats; does it really make sense to submit the two in the same patch?

2. there are almost no code comments anywhere

3. (bonus) if you're going to copy/paste pg_attribute.h verbatim into
the new files, please remove the bits you currently have in "#if 0".
(Not to mention the fact that the new catalogs seem rather poorly
named).

--
Álvaro Herrera <alvherre@commandprompt.com>
The PostgreSQL Company - Command Prompt, Inc.
PostgreSQL Replication, Consulting, Custom Development, 24x7 support

#4Alvaro Herrera
alvherre@commandprompt.com
In reply to: Boszormenyi Zoltan (#1)
Re: TEXT vs PG_NODE_TREE in system columns (cross column and expression statistics patch)

Excerpts from Boszormenyi Zoltan's message of jue abr 28 11:03:56 -0300 2011:

My question is that why pg_node_tree is unusable as
syscache attribute? I attempted to alias it as text in the patch
but I get the following error if I try to use it by setting
USE_SYSCACHE_FOR_SEARCH to 1 in selfuncs.c.
Directly using the underlying pg_statistic3 doesn't cause an error.

zozo=# select * from t1 where i+1 = 5;
ERROR: could not determine which collation to use for string comparison
HINT: Use the COLLATE clause to set the collation explicitly.

Maybe the pg_node_tree problem is a bug with the collation feature. If
you could reproduce it in unpatched master, I'm sure it'd find a quick
death.

--
Álvaro Herrera <alvherre@commandprompt.com>
The PostgreSQL Company - Command Prompt, Inc.
PostgreSQL Replication, Consulting, Custom Development, 24x7 support

#5Tom Lane
tgl@sss.pgh.pa.us
In reply to: Alvaro Herrera (#4)
Re: TEXT vs PG_NODE_TREE in system columns (cross column and expression statistics patch)

Alvaro Herrera <alvherre@commandprompt.com> writes:

Excerpts from Boszormenyi Zoltan's message of jue abr 28 11:03:56 -0300 2011:

ERROR: could not determine which collation to use for string comparison
HINT: Use the COLLATE clause to set the collation explicitly.

Maybe the pg_node_tree problem is a bug with the collation feature. If
you could reproduce it in unpatched master, I'm sure it'd find a quick
death.

Actually, I rather imagine it comes from this choice in catcache.c:

/* Currently, there are no catcaches on collation-aware data types */
cache->cc_skey[i].sk_collation = InvalidOid;

I'd be more worried about that if I thought it made any sense to use
a pg_node_tree column as an index key, but I don't ...

regards, tom lane

#6Boszormenyi Zoltan
zb@cybertec.at
In reply to: Alvaro Herrera (#3)
2 attachment(s)
New WIP patch for cross column statistics Re: TEXT vs PG_NODE_TREE in system columns (cross column and expression statistics patch)

Hi,

2011-04-28 17:20 keltezéssel, Alvaro Herrera írta:

Excerpts from Boszormenyi Zoltan's message of jue abr 28 11:03:56 -0300 2011:

Hi,

attached is the WIP patch for cross-column statistics and
extra expression statistics.

My question is that why pg_node_tree is unusable as
syscache attribute? I attempted to alias it as text in the patch
but I get the following error if I try to use it by setting
USE_SYSCACHE_FOR_SEARCH to 1 in selfuncs.c.
Directly using the underlying pg_statistic3 doesn't cause an error.

Two comments:
1. it seems that expression stats are mostly separate from cross-column
stats; does it really make sense to submit the two in the same patch?

2. there are almost no code comments anywhere

3. (bonus) if you're going to copy/paste pg_attribute.h verbatim into
the new files, please remove the bits you currently have in "#if 0".
(Not to mention the fact that the new catalogs seem rather poorly
named).

OK, we went to a different route this time. Here is what we came
up with. Attached are two patches.

attnum-int2vector.patch implements:

- int2vector support routines and catalog entries for them
- pg_statistic is modified so "staattnum int2" it converted to
"staattnums int2vector". RemoveStatistics() is modified to take
an array of AttrNumber and the length of it.
- pg_attribute.attstattarget is moved to pg_statistic.statarget,
pg_statistic gains a new "stavalid" bool field. Two support routines
are added: AddStatistics() and InvalidateStatistics(). Entries
in pg_statistic for table columns are created upon table creation
and ALTER TABLE ADD COLUMN and maintained for the lifetime
of the column. Exceptions are system tables: calling AddStatistics()
for them during initdb is a Catch-22 when pg_statistic doesn't yet
exist. For these, ANALYZE creates the pg_statistic record just
as before. ALTER TABLE ALTER COLUMN SET DATA TYPE
only invalidates the record by setting "stavalid" to false.
- Factor out common code for getting the statistics tuple into a
new function called validate_statistics().

cross-col-syntax.patch builds on the first patch and implements:

CREATE CROSS COLUMN STATISTICS ON TABLE tabname (col, ...)
[ WITH ( statistics_target ) ] ;

DROP CROSS COLUMN STATISTICS ON TABLE tabname (col, ...) ;

CREATE CROSS COLUMN STATISTICS ON INDEX idxname
[ WITH ( statistics_target ) ] ;

DROP CROSS COLUMN STATISTICS ON INDEX idxname ;

and puts new records into pg_statistic with array_length(staattnums, 1) > 1.
Note: this patch should record dependencies on the respective table or
index and the fields but doesn't.

The data structure for storing the N-dimension histogram is not yet decided.

Comments?

Best regards,
Zoltán böszörményi

--
----------------------------------
Zoltán Böszörményi
Cybertec Schönig & Schönig GmbH
Gröhrmühlgasse 26
A-2700 Wiener Neustadt, Austria
Web: http://www.postgresql-support.de
http://www.postgresql.at/

Attachments:

attnum-int2vector.patchtext/plain; name=attnum-int2vector.patchDownload
diff -dcrpN postgresql.orig/src/backend/access/common/tupdesc.c postgresql.4/src/backend/access/common/tupdesc.c
*** postgresql.orig/src/backend/access/common/tupdesc.c	2011-07-18 15:42:00.008379772 +0200
--- postgresql.4/src/backend/access/common/tupdesc.c	2011-08-02 11:49:16.452381480 +0200
*************** equalTupleDescs(TupleDesc tupdesc1, Tupl
*** 337,344 ****
  			return false;
  		if (attr1->atttypid != attr2->atttypid)
  			return false;
- 		if (attr1->attstattarget != attr2->attstattarget)
- 			return false;
  		if (attr1->attlen != attr2->attlen)
  			return false;
  		if (attr1->attndims != attr2->attndims)
--- 337,342 ----
*************** TupleDescInitEntry(TupleDesc desc,
*** 471,477 ****
  	else
  		MemSet(NameStr(att->attname), 0, NAMEDATALEN);
  
- 	att->attstattarget = -1;
  	att->attcacheoff = -1;
  	att->atttypmod = typmod;
  
--- 469,474 ----
diff -dcrpN postgresql.orig/src/backend/access/nbtree/nbtcompare.c postgresql.4/src/backend/access/nbtree/nbtcompare.c
*** postgresql.orig/src/backend/access/nbtree/nbtcompare.c	2011-01-04 15:13:15.816567224 +0100
--- postgresql.4/src/backend/access/nbtree/nbtcompare.c	2011-07-27 15:13:09.756534651 +0200
*************** btoidvectorcmp(PG_FUNCTION_ARGS)
*** 220,225 ****
--- 220,249 ----
  }
  
  Datum
+ btint2vectorcmp(PG_FUNCTION_ARGS)
+ {
+ 	int2vector  *a = (int2vector *) PG_GETARG_POINTER(0);
+ 	int2vector  *b = (int2vector *) PG_GETARG_POINTER(1);
+ 	int			i;
+ 
+ 	/* We arbitrarily choose to sort first by vector length */
+ 	if (a->dim1 != b->dim1)
+ 		PG_RETURN_INT32(a->dim1 - b->dim1);
+ 
+ 	for (i = 0; i < a->dim1; i++)
+ 	{
+ 		if (a->values[i] != b->values[i])
+ 		{
+ 			if (a->values[i] > b->values[i])
+ 				PG_RETURN_INT32(1);
+ 			else
+ 				PG_RETURN_INT32(-1);
+ 		}
+ 	}
+ 	PG_RETURN_INT32(0);
+ }
+ 
+ Datum
  btcharcmp(PG_FUNCTION_ARGS)
  {
  	char		a = PG_GETARG_CHAR(0);
diff -dcrpN postgresql.orig/src/backend/bootstrap/bootstrap.c postgresql.4/src/backend/bootstrap/bootstrap.c
*** postgresql.orig/src/backend/bootstrap/bootstrap.c	2011-07-18 15:42:00.015379264 +0200
--- postgresql.4/src/backend/bootstrap/bootstrap.c	2011-08-02 11:49:16.481379350 +0200
*************** DefineAttr(char *name, char *type, int a
*** 736,742 ****
  			attrtypes[attnum]->attndims = 0;
  	}
  
- 	attrtypes[attnum]->attstattarget = -1;
  	attrtypes[attnum]->attcacheoff = -1;
  	attrtypes[attnum]->atttypmod = -1;
  	attrtypes[attnum]->attislocal = true;
--- 736,741 ----
diff -dcrpN postgresql.orig/src/backend/catalog/heap.c postgresql.4/src/backend/catalog/heap.c
*** postgresql.orig/src/backend/catalog/heap.c	2011-07-24 18:16:45.258679387 +0200
--- postgresql.4/src/backend/catalog/heap.c	2011-08-02 11:59:09.568791478 +0200
*************** static List *insert_ordered_unique_oid(L
*** 132,168 ****
   */
  
  static FormData_pg_attribute a1 = {
! 	0, {"ctid"}, TIDOID, 0, sizeof(ItemPointerData),
  	SelfItemPointerAttributeNumber, 0, -1, -1,
  	false, 'p', 's', true, false, false, true, 0
  };
  
  static FormData_pg_attribute a2 = {
! 	0, {"oid"}, OIDOID, 0, sizeof(Oid),
  	ObjectIdAttributeNumber, 0, -1, -1,
  	true, 'p', 'i', true, false, false, true, 0
  };
  
  static FormData_pg_attribute a3 = {
! 	0, {"xmin"}, XIDOID, 0, sizeof(TransactionId),
  	MinTransactionIdAttributeNumber, 0, -1, -1,
  	true, 'p', 'i', true, false, false, true, 0
  };
  
  static FormData_pg_attribute a4 = {
! 	0, {"cmin"}, CIDOID, 0, sizeof(CommandId),
  	MinCommandIdAttributeNumber, 0, -1, -1,
  	true, 'p', 'i', true, false, false, true, 0
  };
  
  static FormData_pg_attribute a5 = {
! 	0, {"xmax"}, XIDOID, 0, sizeof(TransactionId),
  	MaxTransactionIdAttributeNumber, 0, -1, -1,
  	true, 'p', 'i', true, false, false, true, 0
  };
  
  static FormData_pg_attribute a6 = {
! 	0, {"cmax"}, CIDOID, 0, sizeof(CommandId),
  	MaxCommandIdAttributeNumber, 0, -1, -1,
  	true, 'p', 'i', true, false, false, true, 0
  };
--- 132,168 ----
   */
  
  static FormData_pg_attribute a1 = {
! 	0, {"ctid"}, TIDOID, sizeof(ItemPointerData),
  	SelfItemPointerAttributeNumber, 0, -1, -1,
  	false, 'p', 's', true, false, false, true, 0
  };
  
  static FormData_pg_attribute a2 = {
! 	0, {"oid"}, OIDOID, sizeof(Oid),
  	ObjectIdAttributeNumber, 0, -1, -1,
  	true, 'p', 'i', true, false, false, true, 0
  };
  
  static FormData_pg_attribute a3 = {
! 	0, {"xmin"}, XIDOID, sizeof(TransactionId),
  	MinTransactionIdAttributeNumber, 0, -1, -1,
  	true, 'p', 'i', true, false, false, true, 0
  };
  
  static FormData_pg_attribute a4 = {
! 	0, {"cmin"}, CIDOID, sizeof(CommandId),
  	MinCommandIdAttributeNumber, 0, -1, -1,
  	true, 'p', 'i', true, false, false, true, 0
  };
  
  static FormData_pg_attribute a5 = {
! 	0, {"xmax"}, XIDOID, sizeof(TransactionId),
  	MaxTransactionIdAttributeNumber, 0, -1, -1,
  	true, 'p', 'i', true, false, false, true, 0
  };
  
  static FormData_pg_attribute a6 = {
! 	0, {"cmax"}, CIDOID, sizeof(CommandId),
  	MaxCommandIdAttributeNumber, 0, -1, -1,
  	true, 'p', 'i', true, false, false, true, 0
  };
*************** static FormData_pg_attribute a6 = {
*** 174,180 ****
   * used in SQL.
   */
  static FormData_pg_attribute a7 = {
! 	0, {"tableoid"}, OIDOID, 0, sizeof(Oid),
  	TableOidAttributeNumber, 0, -1, -1,
  	true, 'p', 'i', true, false, false, true, 0
  };
--- 174,180 ----
   * used in SQL.
   */
  static FormData_pg_attribute a7 = {
! 	0, {"tableoid"}, OIDOID, sizeof(Oid),
  	TableOidAttributeNumber, 0, -1, -1,
  	true, 'p', 'i', true, false, false, true, 0
  };
*************** InsertPgAttributeTuple(Relation pg_attri
*** 601,607 ****
  	values[Anum_pg_attribute_attrelid - 1] = ObjectIdGetDatum(new_attribute->attrelid);
  	values[Anum_pg_attribute_attname - 1] = NameGetDatum(&new_attribute->attname);
  	values[Anum_pg_attribute_atttypid - 1] = ObjectIdGetDatum(new_attribute->atttypid);
- 	values[Anum_pg_attribute_attstattarget - 1] = Int32GetDatum(new_attribute->attstattarget);
  	values[Anum_pg_attribute_attlen - 1] = Int16GetDatum(new_attribute->attlen);
  	values[Anum_pg_attribute_attnum - 1] = Int16GetDatum(new_attribute->attnum);
  	values[Anum_pg_attribute_attndims - 1] = Int32GetDatum(new_attribute->attndims);
--- 601,606 ----
*************** AddNewAttributeTuples(Oid new_rel_oid,
*** 672,683 ****
  		attr = tupdesc->attrs[i];
  		/* Fill in the correct relation OID */
  		attr->attrelid = new_rel_oid;
! 		/* Make sure these are OK, too */
! 		attr->attstattarget = -1;
  		attr->attcacheoff = -1;
  
  		InsertPgAttributeTuple(rel, attr, indstate);
  
  		/* Add dependency info */
  		myself.classId = RelationRelationId;
  		myself.objectId = new_rel_oid;
--- 671,683 ----
  		attr = tupdesc->attrs[i];
  		/* Fill in the correct relation OID */
  		attr->attrelid = new_rel_oid;
! 		/* Make sure this is OK, too */
  		attr->attcacheoff = -1;
  
  		InsertPgAttributeTuple(rel, attr, indstate);
  
+ 		AddStatistics(new_rel_oid, &attr->attnum, 1, (oidinhcount > 0), -1);
+ 
  		/* Add dependency info */
  		myself.classId = RelationRelationId;
  		myself.objectId = new_rel_oid;
*************** RemoveAttributeById(Oid relid, AttrNumbe
*** 1491,1499 ****
  		/* Remove any NOT NULL constraint the column may have */
  		attStruct->attnotnull = false;
  
- 		/* We don't want to keep stats for it anymore */
- 		attStruct->attstattarget = 0;
- 
  		/*
  		 * Change the column name to something that isn't likely to conflict
  		 */
--- 1491,1496 ----
*************** RemoveAttributeById(Oid relid, AttrNumbe
*** 1515,1522 ****
  
  	heap_close(attr_rel, RowExclusiveLock);
  
  	if (attnum > 0)
! 		RemoveStatistics(relid, attnum);
  
  	relation_close(rel, NoLock);
  }
--- 1512,1520 ----
  
  	heap_close(attr_rel, RowExclusiveLock);
  
+ 	/* Only drop pg_statistic entries for non system columns. */
  	if (attnum > 0)
! 		RemoveStatistics(relid, &attnum, 1);
  
  	relation_close(rel, NoLock);
  }
*************** heap_drop_with_catalog(Oid relid)
*** 1740,1746 ****
  	/*
  	 * delete statistics
  	 */
! 	RemoveStatistics(relid, 0);
  
  	/*
  	 * delete attribute tuples
--- 1738,1744 ----
  	/*
  	 * delete statistics
  	 */
! 	RemoveStatistics(relid, NULL, 0);
  
  	/*
  	 * delete attribute tuples
*************** cookConstraint(ParseState *pstate,
*** 2521,2539 ****
  
  
  /*
!  * RemoveStatistics --- remove entries in pg_statistic for a rel or column
   *
!  * If attnum is zero, remove all entries for rel; else remove only the one(s)
!  * for that column.
   */
  void
! RemoveStatistics(Oid relid, AttrNumber attnum)
  {
  	Relation	pgstatistic;
  	SysScanDesc scan;
  	ScanKeyData key[2];
  	int			nkeys;
  	HeapTuple	tuple;
  
  	pgstatistic = heap_open(StatisticRelationId, RowExclusiveLock);
  
--- 2519,2735 ----
  
  
  /*
!  * AddStatistics --- add an entry in pg_statistic
   *
!  * attnums		- an ordered array of AttrNumbers
!  * n_attnum		- number of elements in the array
!  * statistics_target	- the sampling size for this statistics
!  *
!  * Entries in pg_statistic are used by the planner to collect selectivity values.
!  * This function is called when a new relation is created or a new column is added
!  * to a relation. It is therefore ensured that every column has an entry during the
!  * lifetime of the relation since its creation. There is one exception from under
!  * this rule: thus function is a no-op during bootstrapping to avoid a catch-22
!  * situation where a pg_statistic entry would be created when pg_statistic itself
!  * doesn't exist yet. pg_statistic entries for system tables will be created by
!  * ANALYZE as before. The entry is created as invalid (stavalid == false) and
!  * the histogram columns are NULLs. This will also be fixed by ANALYZE.
   */
  void
! AddStatistics(Oid relid, AttrNumber *attnums, int n_attnums, bool inherited, int statistics_target)
! {
! 	Relation	rel;
! 	ScanKeyData	scanKey[2];
! 	SysScanDesc	scan;
! 	int2vector *attnumvector;
! 	HeapTuple	tuple;
! 	TupleDesc	tupDesc;
! 	Datum		values[Natts_pg_statistic];
! 	bool		nulls[Natts_pg_statistic];
! 	int		i, j;
! 
! 	if (IsBootstrapProcessingMode())
! 		return;
! 
! 	Assert(attnums != NULL);
! 	Assert(n_attnums > 0);
! 
! 	attnumvector = buildint2vector(attnums, n_attnums);
! 
! 	rel = heap_open(StatisticRelationId, RowExclusiveLock);
! 
! 	ScanKeyInit(&scanKey[0],
! 					Anum_pg_statistic_starelid,
! 					BTEqualStrategyNumber, F_OIDEQ,
! 					ObjectIdGetDatum(relid));
! 	ScanKeyInit(&scanKey[1],
! 					Anum_pg_statistic_staattnums,
! 					BTEqualStrategyNumber, F_ARRAY_EQ,
! 					PointerGetDatum(attnumvector));
! 
! 	scan = systable_beginscan(rel, StatisticRelidAttnumsInhIndexId, true,
! 									SnapshotNow, 2, scanKey);
! 
! 	tuple = systable_getnext(scan);
! 	if (HeapTupleIsValid(tuple))
! 	{
! 		systable_endscan(scan);
! 		elog(ERROR, "pg_statistic entry already exists for this table and set of columns");
! 	}
! 
! 	systable_endscan(scan);
! 
! 	for (i = 0; i < Natts_pg_statistic; i++)
! 		nulls[i] = true;
! 
! 	i = 0;
! 	values[i] = ObjectIdGetDatum(relid);		nulls[i++] = false;	/* starelid */
! 	values[i] = BoolGetDatum(inherited);		nulls[i++] = false;	/* stainherit */
! 	values[i] = BoolGetDatum(false);		nulls[i++] = false;	/* stavalid */
! 	values[i] = Int32GetDatum(statistics_target);	nulls[i++] = false;	/* statarget */
! 	values[i] = Float4GetDatum(0);			nulls[i++] = false;	/* stanullfrac */
! 	values[i] = Int32GetDatum(0);			nulls[i++] = false;	/* stawidth */
! 	values[i] = Float4GetDatum(0);			nulls[i++] = false;	/* stadistinct */
! 	for (j = 0; j < STATISTIC_NUM_SLOTS; j++)
! 	{
! 		values[i] = Int16GetDatum(0);		nulls[i++] = false;	/* stakindN */
! 	}
! 	for (j = 0; j < STATISTIC_NUM_SLOTS; j++)
! 	{
! 		values[i] = ObjectIdGetDatum(0);	nulls[i++] = false;	/* staopN */
! 	}
! 	values[i] = PointerGetDatum(attnumvector);	nulls[i++] = false;	/* stainherit */
! 
! 	tupDesc = RelationGetDescr(rel);
! 
! 	tuple = heap_form_tuple(tupDesc, values, nulls);
! 
! 	simple_heap_insert(rel, tuple);
! 
! 	CatalogUpdateIndexes(rel, tuple);
! 
! 	pfree(attnumvector);
! 
! 	relation_close(rel, RowExclusiveLock);
! }
! 
! typedef struct invalidate_stats {
! 	HeapTuple	tuple;
! 	struct invalidate_stats *next;
! } invalidate_stats;
! 
! /*
!  * InvalidateStatistics --- invalidate all pg_statistic entries of this attnum
!  *
!  * We need to collect copies of tuples that need invalidating in order not to
!  * conflict with the system table scan.
!  */
! void
! InvalidateStatistics(Oid relid, AttrNumber attnum)
! {
! 	Relation		rel;
! 	ScanKeyData		scanKey;
! 	SysScanDesc		scan;
! 	HeapTuple		tuple;
! 	TupleDesc		tupDesc;
! 	Form_pg_statistic	stattuple;
! 	invalidate_stats   *tupptr;
! 	invalidate_stats   *tupptr_next;
! 	int			i;
! 
! 	if (IsBootstrapProcessingMode())
! 		return;
! 
! 	Assert(attnums != NULL);
! 	Assert(n_attnums > 0);
! 
! 	rel = heap_open(StatisticRelationId, RowExclusiveLock);
! 	tupDesc = RelationGetDescr(rel);
! 
! 	ScanKeyInit(&scanKey,
! 					Anum_pg_statistic_starelid,
! 					BTEqualStrategyNumber, F_OIDEQ,
! 					ObjectIdGetDatum(relid));
! 
! 	scan = systable_beginscan(rel, StatisticRelidAttnumsInhIndexId, true,
! 									SnapshotNow, 1, &scanKey);
! 
! 
! 	tupptr = tupptr_next = NULL;
! 	while (HeapTupleIsValid(tuple = systable_getnext(scan)))
! 	{
! 		bool	isnull;
! 		Datum	attnvec;
! 		int2vector *attnumvector;
! 
! 		attnvec = heap_getattr(tuple, Anum_pg_statistic_staattnums, tupDesc, &isnull);
! 
! 		Assert(!isnull);
! 
! 		attnumvector = (int2vector *) DatumGetPointer(attnvec);
! 
! 		for (i = 0; i < attnumvector->dim1; i++)
! 		{
! 			if (attnumvector->values[i] == attnum)
! 			{
! 				invalidate_stats *tmp;
! 
! 
! 				tmp = palloc(sizeof(invalidate_stats));
! 				tmp->tuple = heap_copytuple(tuple);
! 				tmp->next = NULL;
! 
! 				if (tupptr == NULL)
! 					tupptr = tupptr_next = tmp;
! 				else
! 				{
! 					tupptr_next->next = tmp;
! 					tupptr_next = tmp;
! 				}
! 
! 				break; /* find next tuple */
! 			}
! 		}
! 	}
! 
! 	systable_endscan(scan);
! 
! 	while (tupptr)
! 	{
! 		stattuple = (Form_pg_statistic) GETSTRUCT(tupptr->tuple);
! 
! 		stattuple->stavalid = false;
! 
! 		simple_heap_update(rel, &tupptr->tuple->t_self, tupptr->tuple);
! 
! 		CatalogUpdateIndexes(rel, tupptr->tuple);
! 
! 		tupptr_next = tupptr->next;
! 
! 		heap_freetuple(tupptr->tuple);
! 		pfree(tupptr);
! 
! 		tupptr = tupptr_next;
! 	}
! 
! 	relation_close(rel, RowExclusiveLock);
! }
! 
! /*
!  * RemoveStatistics --- remove entries in pg_statistic for a rel's set of columns
!  *
!  * If attnums is NULL, remove all entries for rel; else remove only the one
!  * for that set of column(s).
!  */
! void
! RemoveStatistics(Oid relid, AttrNumber *attnums, int n_attnums)
  {
  	Relation	pgstatistic;
  	SysScanDesc scan;
  	ScanKeyData key[2];
  	int			nkeys;
  	HeapTuple	tuple;
+ 	int2vector *attnumvector = NULL;
  
  	pgstatistic = heap_open(StatisticRelationId, RowExclusiveLock);
  
*************** RemoveStatistics(Oid relid, AttrNumber a
*** 2542,2559 ****
  				BTEqualStrategyNumber, F_OIDEQ,
  				ObjectIdGetDatum(relid));
  
! 	if (attnum == 0)
  		nkeys = 1;
  	else
  	{
  		ScanKeyInit(&key[1],
! 					Anum_pg_statistic_staattnum,
! 					BTEqualStrategyNumber, F_INT2EQ,
! 					Int16GetDatum(attnum));
  		nkeys = 2;
  	}
  
! 	scan = systable_beginscan(pgstatistic, StatisticRelidAttnumInhIndexId, true,
  							  SnapshotNow, nkeys, key);
  
  	/* we must loop even when attnum != 0, in case of inherited stats */
--- 2738,2756 ----
  				BTEqualStrategyNumber, F_OIDEQ,
  				ObjectIdGetDatum(relid));
  
! 	if (attnums == NULL)
  		nkeys = 1;
  	else
  	{
+ 		attnumvector = buildint2vector(attnums, n_attnums);
  		ScanKeyInit(&key[1],
! 					Anum_pg_statistic_staattnums,
! 					BTEqualStrategyNumber, F_INT2VECTOREQ,
! 					PointerGetDatum(attnumvector));
  		nkeys = 2;
  	}
  
! 	scan = systable_beginscan(pgstatistic, StatisticRelidAttnumsInhIndexId, true,
  							  SnapshotNow, nkeys, key);
  
  	/* we must loop even when attnum != 0, in case of inherited stats */
*************** RemoveStatistics(Oid relid, AttrNumber a
*** 2562,2567 ****
--- 2759,2767 ----
  
  	systable_endscan(scan);
  
+ 	if (attnumvector)
+ 		pfree(attnumvector);
+ 
  	heap_close(pgstatistic, RowExclusiveLock);
  }
  
diff -dcrpN postgresql.orig/src/backend/catalog/index.c postgresql.4/src/backend/catalog/index.c
*** postgresql.orig/src/backend/catalog/index.c	2011-07-24 18:16:45.259679336 +0200
--- postgresql.4/src/backend/catalog/index.c	2011-08-02 11:49:16.512377071 +0200
*************** ConstructTupleDescriptor(Relation heapRe
*** 347,353 ****
  			 */
  			to->attnum = i + 1;
  
- 			to->attstattarget = -1;
  			to->attcacheoff = -1;
  			to->attnotnull = false;
  			to->atthasdef = false;
--- 347,352 ----
*************** ConstructTupleDescriptor(Relation heapRe
*** 385,391 ****
  			to->attbyval = typeTup->typbyval;
  			to->attstorage = typeTup->typstorage;
  			to->attalign = typeTup->typalign;
- 			to->attstattarget = -1;
  			to->attcacheoff = -1;
  			to->atttypmod = -1;
  			to->attislocal = true;
--- 384,389 ----
*************** index_drop(Oid indexId)
*** 1357,1363 ****
  	 * them.
  	 */
  	if (hasexprs)
! 		RemoveStatistics(indexId, 0);
  
  	/*
  	 * fix ATTRIBUTE relation
--- 1355,1361 ----
  	 * them.
  	 */
  	if (hasexprs)
! 		RemoveStatistics(indexId, NULL, 0);
  
  	/*
  	 * fix ATTRIBUTE relation
diff -dcrpN postgresql.orig/src/backend/catalog/system_views.sql postgresql.4/src/backend/catalog/system_views.sql
*** postgresql.orig/src/backend/catalog/system_views.sql	2011-07-24 18:16:45.262679185 +0200
--- postgresql.4/src/backend/catalog/system_views.sql	2011-07-27 15:33:44.392554069 +0200
*************** CREATE VIEW pg_stats AS
*** 141,147 ****
              WHEN stakind4 = 3 THEN stanumbers4[1]
          END AS correlation
      FROM pg_statistic s JOIN pg_class c ON (c.oid = s.starelid)
!          JOIN pg_attribute a ON (c.oid = attrelid AND attnum = s.staattnum)
           LEFT JOIN pg_namespace n ON (n.oid = c.relnamespace)
      WHERE NOT attisdropped AND has_column_privilege(c.oid, a.attnum, 'select');
  
--- 141,147 ----
              WHEN stakind4 = 3 THEN stanumbers4[1]
          END AS correlation
      FROM pg_statistic s JOIN pg_class c ON (c.oid = s.starelid)
!          JOIN pg_attribute a ON (c.oid = attrelid AND array_length(s.staattnums, 1) = 1 AND attnum = s.staattnums[0])
           LEFT JOIN pg_namespace n ON (n.oid = c.relnamespace)
      WHERE NOT attisdropped AND has_column_privilege(c.oid, a.attnum, 'select');
  
diff -dcrpN postgresql.orig/src/backend/commands/analyze.c postgresql.4/src/backend/commands/analyze.c
*** postgresql.orig/src/backend/commands/analyze.c	2011-06-20 10:11:35.729661282 +0200
--- postgresql.4/src/backend/commands/analyze.c	2011-08-02 11:51:06.071322632 +0200
***************
*** 42,47 ****
--- 42,48 ----
  #include "storage/procarray.h"
  #include "utils/acl.h"
  #include "utils/attoptcache.h"
+ #include "utils/builtins.h"
  #include "utils/datum.h"
  #include "utils/guc.h"
  #include "utils/lsyscache.h"
*************** static void compute_index_stats(Relation
*** 93,99 ****
  					AnlIndexData *indexdata, int nindexes,
  					HeapTuple *rows, int numrows,
  					MemoryContext col_context);
! static VacAttrStats *examine_attribute(Relation onerel, int attnum,
  				  Node *index_expr);
  static int acquire_sample_rows(Relation onerel, HeapTuple *rows,
  					int targrows, double *totalrows, double *totaldeadrows);
--- 94,100 ----
  					AnlIndexData *indexdata, int nindexes,
  					HeapTuple *rows, int numrows,
  					MemoryContext col_context);
! static VacAttrStats *examine_attribute(Relation onerel, AttrNumber attnum,
  				  Node *index_expr);
  static int acquire_sample_rows(Relation onerel, HeapTuple *rows,
  					int targrows, double *totalrows, double *totaldeadrows);
*************** compute_index_stats(Relation onerel, dou
*** 792,797 ****
--- 793,827 ----
  }
  
  /*
+  * statistics_target -- returns pg_statistic.statarget
+  */
+ static int4
+ statistics_target(Oid relid, AttrNumber *attnums, int n_attnums, bool inherited)
+ {
+ 	int2vector	   *attnumvector;
+ 	HeapTuple		tuple;
+ 	int4			statarget = -1; /* default */
+ 
+ 	attnumvector = buildint2vector(attnums, n_attnums);
+ 	tuple = SearchSysCache3(STATRELATTINH,
+ 								ObjectIdGetDatum(relid),
+ 								PointerGetDatum(attnumvector),
+ 								BoolGetDatum(inherited));
+ 	if (HeapTupleIsValid(tuple))
+ 	{
+ 		Form_pg_statistic stats = (Form_pg_statistic) GETSTRUCT(tuple);
+ 
+ 		if (stats->stavalid)
+ 			statarget = stats->statarget;
+ 
+ 		ReleaseSysCache(tuple);
+ 	}
+ 
+ 	return statarget;
+ }
+ 
+ 
+ /*
   * examine_attribute -- pre-analysis of a single column
   *
   * Determine whether the column is analyzable; if so, create and initialize
*************** compute_index_stats(Relation onerel, dou
*** 801,810 ****
   * and index_expr is the expression tree representing the column's data.
   */
  static VacAttrStats *
! examine_attribute(Relation onerel, int attnum, Node *index_expr)
  {
  	Form_pg_attribute attr = onerel->rd_att->attrs[attnum - 1];
  	HeapTuple	typtuple;
  	VacAttrStats *stats;
  	int			i;
  	bool		ok;
--- 831,841 ----
   * and index_expr is the expression tree representing the column's data.
   */
  static VacAttrStats *
! examine_attribute(Relation onerel, AttrNumber attnum, Node *index_expr)
  {
  	Form_pg_attribute attr = onerel->rd_att->attrs[attnum - 1];
  	HeapTuple	typtuple;
+ 	int4		statarget;
  	VacAttrStats *stats;
  	int			i;
  	bool		ok;
*************** examine_attribute(Relation onerel, int a
*** 814,820 ****
  		return NULL;
  
  	/* Don't analyze column if user has specified not to */
! 	if (attr->attstattarget == 0)
  		return NULL;
  
  	/*
--- 845,852 ----
  		return NULL;
  
  	/* Don't analyze column if user has specified not to */
! 	statarget = statistics_target(onerel->rd_id, &attnum, 1, onerel->rd_att->attrs[attnum - 1]->attinhcount > 0);
! 	if (statarget == 0)
  		return NULL;
  
  	/*
*************** examine_attribute(Relation onerel, int a
*** 823,828 ****
--- 855,861 ----
  	 */
  	stats = (VacAttrStats *) palloc0(sizeof(VacAttrStats));
  	stats->attr = (Form_pg_attribute) palloc(ATTRIBUTE_FIXED_PART_SIZE);
+ 	stats->statarget = stats->oldtarget = statarget;
  	memcpy(stats->attr, attr, ATTRIBUTE_FIXED_PART_SIZE);
  
  	/*
*************** update_attstats(Oid relid, bool inh, int
*** 1573,1578 ****
--- 1606,1612 ----
  		int			i,
  					k,
  					n;
+ 		int2vector *attnumvector = NULL;
  		Datum		values[Natts_pg_statistic];
  		bool		nulls[Natts_pg_statistic];
  		bool		replaces[Natts_pg_statistic];
*************** update_attstats(Oid relid, bool inh, int
*** 1591,1598 ****
  		}
  
  		values[Anum_pg_statistic_starelid - 1] = ObjectIdGetDatum(relid);
- 		values[Anum_pg_statistic_staattnum - 1] = Int16GetDatum(stats->attr->attnum);
  		values[Anum_pg_statistic_stainherit - 1] = BoolGetDatum(inh);
  		values[Anum_pg_statistic_stanullfrac - 1] = Float4GetDatum(stats->stanullfrac);
  		values[Anum_pg_statistic_stawidth - 1] = Int32GetDatum(stats->stawidth);
  		values[Anum_pg_statistic_stadistinct - 1] = Float4GetDatum(stats->stadistinct);
--- 1625,1633 ----
  		}
  
  		values[Anum_pg_statistic_starelid - 1] = ObjectIdGetDatum(relid);
  		values[Anum_pg_statistic_stainherit - 1] = BoolGetDatum(inh);
+ 		values[Anum_pg_statistic_stavalid] = BoolGetDatum(true);
+ 		values[Anum_pg_statistic_statarget] = Int32GetDatum(stats->oldtarget);
  		values[Anum_pg_statistic_stanullfrac - 1] = Float4GetDatum(stats->stanullfrac);
  		values[Anum_pg_statistic_stawidth - 1] = Int32GetDatum(stats->stawidth);
  		values[Anum_pg_statistic_stadistinct - 1] = Float4GetDatum(stats->stadistinct);
*************** update_attstats(Oid relid, bool inh, int
*** 1606,1611 ****
--- 1641,1650 ----
  		{
  			values[i++] = ObjectIdGetDatum(stats->staop[k]);	/* staopN */
  		}
+ 
+ 		attnumvector = buildint2vector(&(stats->attr->attnum), 1);
+ 		values[Anum_pg_statistic_staattnums - 1] = PointerGetDatum(attnumvector);	/* staattnums */
+ 
  		i = Anum_pg_statistic_stanumbers1 - 1;
  		for (k = 0; k < STATISTIC_NUM_SLOTS; k++)
  		{
*************** update_attstats(Oid relid, bool inh, int
*** 1655,1661 ****
  		/* Is there already a pg_statistic tuple for this attribute? */
  		oldtup = SearchSysCache3(STATRELATTINH,
  								 ObjectIdGetDatum(relid),
! 								 Int16GetDatum(stats->attr->attnum),
  								 BoolGetDatum(inh));
  
  		if (HeapTupleIsValid(oldtup))
--- 1694,1700 ----
  		/* Is there already a pg_statistic tuple for this attribute? */
  		oldtup = SearchSysCache3(STATRELATTINH,
  								 ObjectIdGetDatum(relid),
! 								 PointerGetDatum(attnumvector),
  								 BoolGetDatum(inh));
  
  		if (HeapTupleIsValid(oldtup))
*************** update_attstats(Oid relid, bool inh, int
*** 1676,1681 ****
--- 1715,1722 ----
  			simple_heap_insert(sd, stup);
  		}
  
+ 		pfree(attnumvector);
+ 
  		/* update indexes too */
  		CatalogUpdateIndexes(sd, stup);
  
*************** static int	compare_mcvs(const void *a, c
*** 1791,1805 ****
  static bool
  std_typanalyze(VacAttrStats *stats)
  {
- 	Form_pg_attribute attr = stats->attr;
  	Oid			ltopr;
  	Oid			eqopr;
  	StdAnalyzeData *mystats;
  
! 	/* If the attstattarget column is negative, use the default value */
  	/* NB: it is okay to scribble on stats->attr since it's a copy */
! 	if (attr->attstattarget < 0)
! 		attr->attstattarget = default_statistics_target;
  
  	/* Look for default "<" and "=" operators for column's type */
  	get_sort_group_operators(stats->attrtypid,
--- 1832,1845 ----
  static bool
  std_typanalyze(VacAttrStats *stats)
  {
  	Oid			ltopr;
  	Oid			eqopr;
  	StdAnalyzeData *mystats;
  
! 	/* If the statarget column is negative, use the default value */
  	/* NB: it is okay to scribble on stats->attr since it's a copy */
! 	if (stats->statarget < 0)
! 		stats->statarget = default_statistics_target;
  
  	/* Look for default "<" and "=" operators for column's type */
  	get_sort_group_operators(stats->attrtypid,
*************** std_typanalyze(VacAttrStats *stats)
*** 1844,1857 ****
  		 * know it at this point.
  		 *--------------------
  		 */
! 		stats->minrows = 300 * attr->attstattarget;
  	}
  	else
  	{
  		/* Can't do much but the minimal stuff */
  		stats->compute_stats = compute_minimal_stats;
  		/* Might as well use the same minrows as above */
! 		stats->minrows = 300 * attr->attstattarget;
  	}
  
  	return true;
--- 1884,1897 ----
  		 * know it at this point.
  		 *--------------------
  		 */
! 		stats->minrows = 300 * stats->statarget;
  	}
  	else
  	{
  		/* Can't do much but the minimal stuff */
  		stats->compute_stats = compute_minimal_stats;
  		/* Might as well use the same minrows as above */
! 		stats->minrows = 300 * stats->statarget;
  	}
  
  	return true;
*************** compute_minimal_stats(VacAttrStatsP stat
*** 1896,1902 ****
  	TrackItem  *track;
  	int			track_cnt,
  				track_max;
! 	int			num_mcv = stats->attr->attstattarget;
  	StdAnalyzeData *mystats = (StdAnalyzeData *) stats->extra_data;
  
  	/*
--- 1936,1942 ----
  	TrackItem  *track;
  	int			track_cnt,
  				track_max;
! 	int			num_mcv = stats->statarget;
  	StdAnalyzeData *mystats = (StdAnalyzeData *) stats->extra_data;
  
  	/*
*************** compute_scalar_stats(VacAttrStatsP stats
*** 2223,2230 ****
  	int		   *tupnoLink;
  	ScalarMCVItem *track;
  	int			track_cnt = 0;
! 	int			num_mcv = stats->attr->attstattarget;
! 	int			num_bins = stats->attr->attstattarget;
  	StdAnalyzeData *mystats = (StdAnalyzeData *) stats->extra_data;
  
  	values = (ScalarItem *) palloc(samplerows * sizeof(ScalarItem));
--- 2263,2270 ----
  	int		   *tupnoLink;
  	ScalarMCVItem *track;
  	int			track_cnt = 0;
! 	int			num_mcv = stats->statarget;
! 	int			num_bins = stats->statarget;
  	StdAnalyzeData *mystats = (StdAnalyzeData *) stats->extra_data;
  
  	values = (ScalarItem *) palloc(samplerows * sizeof(ScalarItem));
diff -dcrpN postgresql.orig/src/backend/commands/tablecmds.c postgresql.4/src/backend/commands/tablecmds.c
*** postgresql.orig/src/backend/commands/tablecmds.c	2011-07-24 18:16:45.267678934 +0200
--- postgresql.4/src/backend/commands/tablecmds.c	2011-08-02 11:49:16.568372952 +0200
***************
*** 35,40 ****
--- 35,41 ----
  #include "catalog/pg_inherits_fn.h"
  #include "catalog/pg_namespace.h"
  #include "catalog/pg_opclass.h"
+ #include "catalog/pg_statistic.h"
  #include "catalog/pg_tablespace.h"
  #include "catalog/pg_trigger.h"
  #include "catalog/pg_type.h"
*************** ATExecAddColumn(List **wqueue, AlteredTa
*** 4319,4325 ****
  	attribute.attrelid = myrelid;
  	namestrcpy(&(attribute.attname), colDef->colname);
  	attribute.atttypid = typeOid;
- 	attribute.attstattarget = (newattnum > 0) ? -1 : 0;
  	attribute.attlen = tform->typlen;
  	attribute.attcacheoff = -1;
  	attribute.atttypmod = typmod;
--- 4320,4325 ----
*************** ATExecAddColumn(List **wqueue, AlteredTa
*** 4481,4486 ****
--- 4481,4488 ----
  	add_column_datatype_dependency(myrelid, newattnum, attribute.atttypid);
  	add_column_collation_dependency(myrelid, newattnum, attribute.attcollation);
  
+ 	AddStatistics(myrelid, &attribute.attnum, 1, attribute.attinhcount, (newattnum > 0) ? -1 : 0);
+ 
  	/*
  	 * Propagate to children as appropriate.  Unlike most other ALTER
  	 * routines, we have to do this one level of recursion at a time; we can't
*************** ATExecSetStatistics(Relation rel, const 
*** 4817,4824 ****
--- 4819,4832 ----
  {
  	int			newtarget;
  	Relation	attrelation;
+ 	Relation	statsrelation;
+ 	Oid		relid;
  	HeapTuple	tuple;
  	Form_pg_attribute attrtuple;
+ 	AttrNumber	attnum;
+ 	bool		inherited;
+ 	int2vector *attnumvector;
+ 	Form_pg_statistic stattuple;
  
  	Assert(IsA(newValue, Integer));
  	newtarget = intVal(newValue);
*************** ATExecSetStatistics(Relation rel, const 
*** 4844,4850 ****
  
  	attrelation = heap_open(AttributeRelationId, RowExclusiveLock);
  
! 	tuple = SearchSysCacheCopyAttName(RelationGetRelid(rel), colName);
  
  	if (!HeapTupleIsValid(tuple))
  		ereport(ERROR,
--- 4852,4860 ----
  
  	attrelation = heap_open(AttributeRelationId, RowExclusiveLock);
  
! 	relid = RelationGetRelid(rel);
! 
! 	tuple = SearchSysCacheAttName(relid, colName);
  
  	if (!HeapTupleIsValid(tuple))
  		ereport(ERROR,
*************** ATExecSetStatistics(Relation rel, const 
*** 4859,4874 ****
  				 errmsg("cannot alter system column \"%s\"",
  						colName)));
  
! 	attrtuple->attstattarget = newtarget;
  
! 	simple_heap_update(attrelation, &tuple->t_self, tuple);
  
! 	/* keep system catalog indexes current */
! 	CatalogUpdateIndexes(attrelation, tuple);
  
! 	heap_freetuple(tuple);
  
! 	heap_close(attrelation, RowExclusiveLock);
  }
  
  static void
--- 4869,4909 ----
  				 errmsg("cannot alter system column \"%s\"",
  						colName)));
  
! 	attnum = attrtuple->attnum;
! 	inherited = (attrtuple->attinhcount > 0);
  
! 	ReleaseSysCache(tuple);
  
! 	heap_close(attrelation, RowExclusiveLock);
  
! 	statsrelation = heap_open(StatisticRelationId, RowExclusiveLock);
  
! 	attnumvector = buildint2vector(&attnum, 1);
! 
! 	tuple = SearchSysCacheCopy3(STATRELATTINH,
! 								ObjectIdGetDatum(relid),
! 								PointerGetDatum(attnumvector),
! 								BoolGetDatum(inherited));
! 
! 	pfree(attnumvector);
! 
! 	if (!HeapTupleIsValid(tuple))
! 		AddStatistics(relid, &attnum, 1, inherited, newtarget);
! 	else
! 	{
! 		stattuple = (Form_pg_statistic) GETSTRUCT(tuple);
! 
! 		stattuple->statarget = newtarget;
! 
! 		simple_heap_update(statsrelation, &tuple->t_self, tuple);
! 
! 		/* keep system catalog indexes current */
! 		CatalogUpdateIndexes(statsrelation, tuple);
! 
! 		heap_freetuple(tuple);
! 	}
! 
! 	heap_close(statsrelation, RowExclusiveLock);
  }
  
  static void
*************** ATExecAlterColumnType(AlteredTableInfo *
*** 7368,7376 ****
  	add_column_collation_dependency(RelationGetRelid(rel), attnum, targetcollid);
  
  	/*
! 	 * Drop any pg_statistic entry for the column, since it's now wrong type
  	 */
! 	RemoveStatistics(RelationGetRelid(rel), attnum);
  
  	/*
  	 * Update the default, if present, by brute force --- remove and re-add
--- 7403,7411 ----
  	add_column_collation_dependency(RelationGetRelid(rel), attnum, targetcollid);
  
  	/*
! 	 * Invalidate any pg_statistic entry for the column, since it's now wrong type
  	 */
! 	InvalidateStatistics(RelationGetRelid(rel), attnum);
  
  	/*
  	 * Update the default, if present, by brute force --- remove and re-add
diff -dcrpN postgresql.orig/src/backend/executor/nodeHash.c postgresql.4/src/backend/executor/nodeHash.c
*** postgresql.orig/src/backend/executor/nodeHash.c	2011-04-11 15:36:27.096816773 +0200
--- postgresql.4/src/backend/executor/nodeHash.c	2011-08-02 11:49:16.588371482 +0200
***************
*** 33,38 ****
--- 33,39 ----
  #include "executor/nodeHashjoin.h"
  #include "miscadmin.h"
  #include "parser/parse_expr.h"
+ #include "utils/builtins.h"
  #include "utils/dynahash.h"
  #include "utils/memutils.h"
  #include "utils/lsyscache.h"
*************** ExecHashBuildSkewHash(HashJoinTable hash
*** 1126,1131 ****
--- 1127,1133 ----
  	int			nvalues;
  	float4	   *numbers;
  	int			nnumbers;
+ 	int2vector *attnumvector;
  
  	/* Do nothing if planner didn't identify the outer relation's join key */
  	if (!OidIsValid(node->skewTable))
*************** ExecHashBuildSkewHash(HashJoinTable hash
*** 1137,1146 ****
  	/*
  	 * Try to find the MCV statistics for the outer relation's join key.
  	 */
  	statsTuple = SearchSysCache3(STATRELATTINH,
  								 ObjectIdGetDatum(node->skewTable),
! 								 Int16GetDatum(node->skewColumn),
  								 BoolGetDatum(node->skewInherit));
  	if (!HeapTupleIsValid(statsTuple))
  		return;
  
--- 1139,1161 ----
  	/*
  	 * Try to find the MCV statistics for the outer relation's join key.
  	 */
+ 	attnumvector = buildint2vector(&(node->skewColumn), 1);
  	statsTuple = SearchSysCache3(STATRELATTINH,
  								 ObjectIdGetDatum(node->skewTable),
! 								 PointerGetDatum(attnumvector),
  								 BoolGetDatum(node->skewInherit));
+ 	pfree(attnumvector);
+ 
+ 	/* check whether the stats entry is valid */
+ 	if (HeapTupleIsValid(statsTuple))
+ 	{
+ 		if (!((Form_pg_statistic)GETSTRUCT(statsTuple))->stavalid)
+ 		{
+ 			ReleaseSysCache(statsTuple);
+ 			statsTuple = NULL;
+ 		}
+ 	}
+ 
  	if (!HeapTupleIsValid(statsTuple))
  		return;
  
diff -dcrpN postgresql.orig/src/backend/tsearch/ts_typanalyze.c postgresql.4/src/backend/tsearch/ts_typanalyze.c
*** postgresql.orig/src/backend/tsearch/ts_typanalyze.c	2011-01-04 15:13:16.013557090 +0100
--- postgresql.4/src/backend/tsearch/ts_typanalyze.c	2011-08-02 11:49:16.626368688 +0200
*************** Datum
*** 55,70 ****
  ts_typanalyze(PG_FUNCTION_ARGS)
  {
  	VacAttrStats *stats = (VacAttrStats *) PG_GETARG_POINTER(0);
- 	Form_pg_attribute attr = stats->attr;
  
! 	/* If the attstattarget column is negative, use the default value */
! 	/* NB: it is okay to scribble on stats->attr since it's a copy */
! 	if (attr->attstattarget < 0)
! 		attr->attstattarget = default_statistics_target;
  
  	stats->compute_stats = compute_tsvector_stats;
  	/* see comment about the choice of minrows in commands/analyze.c */
! 	stats->minrows = 300 * attr->attstattarget;
  
  	PG_RETURN_BOOL(true);
  }
--- 55,69 ----
  ts_typanalyze(PG_FUNCTION_ARGS)
  {
  	VacAttrStats *stats = (VacAttrStats *) PG_GETARG_POINTER(0);
  
! 	/* If the statarget column is negative, use the default value */
! 	/* NB: it is okay to scribble on stats->statarget since it's a copy */
! 	if (stats->statarget < 0)
! 		stats->statarget = default_statistics_target;
  
  	stats->compute_stats = compute_tsvector_stats;
  	/* see comment about the choice of minrows in commands/analyze.c */
! 	stats->minrows = 300 * stats->statarget;
  
  	PG_RETURN_BOOL(true);
  }
*************** compute_tsvector_stats(VacAttrStats *sta
*** 167,173 ****
  	 * the number of individual lexeme values tracked in pg_statistic ought to
  	 * be more than the number of values for a simple scalar column.
  	 */
! 	num_mcelem = stats->attr->attstattarget * 10;
  
  	/*
  	 * We set bucket width equal to (num_mcelem + 10) / 0.007 as per the
--- 166,172 ----
  	 * the number of individual lexeme values tracked in pg_statistic ought to
  	 * be more than the number of values for a simple scalar column.
  	 */
! 	num_mcelem = stats->statarget * 10;
  
  	/*
  	 * We set bucket width equal to (num_mcelem + 10) / 0.007 as per the
diff -dcrpN postgresql.orig/src/backend/utils/adt/int.c postgresql.4/src/backend/utils/adt/int.c
*** postgresql.orig/src/backend/utils/adt/int.c	2011-06-20 10:11:35.739660477 +0200
--- postgresql.4/src/backend/utils/adt/int.c	2011-07-27 15:13:09.758534476 +0200
*************** int2vectorsend(PG_FUNCTION_ARGS)
*** 254,274 ****
  	return array_send(fcinfo);
  }
  
- /*
-  * We don't have a complete set of int2vector support routines,
-  * but we need int2vectoreq for catcache indexing.
-  */
  Datum
  int2vectoreq(PG_FUNCTION_ARGS)
  {
! 	int2vector *a = (int2vector *) PG_GETARG_POINTER(0);
! 	int2vector *b = (int2vector *) PG_GETARG_POINTER(1);
  
! 	if (a->dim1 != b->dim1)
! 		PG_RETURN_BOOL(false);
! 	PG_RETURN_BOOL(memcmp(a->values, b->values, a->dim1 * sizeof(int2)) == 0);
  }
  
  
  /*****************************************************************************
   *	 PUBLIC ROUTINES														 *
--- 254,306 ----
  	return array_send(fcinfo);
  }
  
  Datum
  int2vectoreq(PG_FUNCTION_ARGS)
  {
! 	int32		cmp = DatumGetInt32(btint2vectorcmp(fcinfo));
  
! 	PG_RETURN_BOOL(cmp == 0);
! }
! 
! Datum
! int2vectorne(PG_FUNCTION_ARGS)
! {
! 	int32		cmp = DatumGetInt32(btint2vectorcmp(fcinfo));
! 
! 	PG_RETURN_BOOL(cmp != 0);
! }
! 
! Datum
! int2vectorlt(PG_FUNCTION_ARGS)
! {
! 	int32		cmp = DatumGetInt32(btint2vectorcmp(fcinfo));
! 
! 	PG_RETURN_BOOL(cmp < 0);
  }
  
+ Datum
+ int2vectorle(PG_FUNCTION_ARGS)
+ {
+ 	int32		cmp = DatumGetInt32(btint2vectorcmp(fcinfo));
+ 
+ 	PG_RETURN_BOOL(cmp <= 0);
+ }
+ 
+ Datum
+ int2vectorge(PG_FUNCTION_ARGS)
+ {
+ 	int32		cmp = DatumGetInt32(btint2vectorcmp(fcinfo));
+ 
+ 	PG_RETURN_BOOL(cmp >= 0);
+ }
+ 
+ Datum
+ int2vectorgt(PG_FUNCTION_ARGS)
+ {
+ 	int32		cmp = DatumGetInt32(btint2vectorcmp(fcinfo));
+ 
+ 	PG_RETURN_BOOL(cmp > 0);
+ }
  
  /*****************************************************************************
   *	 PUBLIC ROUTINES														 *
diff -dcrpN postgresql.orig/src/backend/utils/adt/selfuncs.c postgresql.4/src/backend/utils/adt/selfuncs.c
*** postgresql.orig/src/backend/utils/adt/selfuncs.c	2011-07-18 15:42:00.064375706 +0200
--- postgresql.4/src/backend/utils/adt/selfuncs.c	2011-08-02 11:49:16.651366852 +0200
*************** get_join_variables(PlannerInfo *root, Li
*** 4072,4077 ****
--- 4072,4105 ----
  }
  
  /*
+  * validate_statistics -- sets vardata->statsTuple only if the statistics is valid
+  */
+ void
+ validate_statistics(VariableStatData *vardata,
+ 			Oid relid, AttrNumber *attnums, int n_attnums, bool inherited)
+ {
+ 	int2vector *attnumvector = buildint2vector(attnums, n_attnums);
+ 	HeapTuple	tuple;
+ 
+ 	tuple = SearchSysCache3(STATRELATTINH,
+ 									ObjectIdGetDatum(relid),
+ 									PointerGetDatum(attnumvector),
+ 									  BoolGetDatum(inherited));
+ 	pfree(attnumvector);
+ 
+ 	if (HeapTupleIsValid(tuple))
+ 	{
+ 		if (((Form_pg_statistic) GETSTRUCT(tuple))->stavalid)
+ 		{
+ 			vardata->statsTuple = tuple;
+ 			vardata->freefunc = ReleaseSysCache;
+ 		}
+ 		else
+ 			ReleaseSysCache(tuple);
+ 	}
+ }
+ 
+ /*
   * examine_variable
   *		Try to look up statistical data about an expression.
   *		Fill in a VariableStatData struct to describe the expression.
*************** examine_variable(PlannerInfo *root, Node
*** 4150,4160 ****
  		}
  		else if (rte->rtekind == RTE_RELATION)
  		{
! 			vardata->statsTuple = SearchSysCache3(STATRELATTINH,
! 												ObjectIdGetDatum(rte->relid),
! 												Int16GetDatum(var->varattno),
! 												  BoolGetDatum(rte->inh));
! 			vardata->freefunc = ReleaseSysCache;
  		}
  		else
  		{
--- 4178,4184 ----
  		}
  		else if (rte->rtekind == RTE_RELATION)
  		{
! 			validate_statistics(vardata, rte->relid, &(var->varattno), 1, rte->inh);
  		}
  		else
  		{
*************** examine_variable(PlannerInfo *root, Node
*** 4289,4300 ****
  						}
  						else if (index->indpred == NIL)
  						{
! 							vardata->statsTuple =
! 								SearchSysCache3(STATRELATTINH,
! 										   ObjectIdGetDatum(index->indexoid),
! 												Int16GetDatum(pos + 1),
! 												BoolGetDatum(false));
! 							vardata->freefunc = ReleaseSysCache;
  						}
  						if (vardata->statsTuple)
  							break;
--- 4313,4321 ----
  						}
  						else if (index->indpred == NIL)
  						{
! 							int2		attnum = pos + 1;
! 
! 							validate_statistics(vardata, index->indexoid, &attnum, 1, false);
  						}
  						if (vardata->statsTuple)
  							break;
*************** btcostestimate(PG_FUNCTION_ARGS)
*** 6257,6269 ****
  				elog(ERROR, "no function provided to release variable stats with");
  		}
  		else
! 		{
! 			vardata.statsTuple = SearchSysCache3(STATRELATTINH,
! 												 ObjectIdGetDatum(relid),
! 												 Int16GetDatum(colnum),
! 												 BoolGetDatum(rte->inh));
! 			vardata.freefunc = ReleaseSysCache;
! 		}
  	}
  	else
  	{
--- 6278,6284 ----
  				elog(ERROR, "no function provided to release variable stats with");
  		}
  		else
! 			validate_statistics(&vardata, relid, &colnum, 1, rte->inh);
  	}
  	else
  	{
*************** btcostestimate(PG_FUNCTION_ARGS)
*** 6283,6295 ****
  				elog(ERROR, "no function provided to release variable stats with");
  		}
  		else
! 		{
! 			vardata.statsTuple = SearchSysCache3(STATRELATTINH,
! 												 ObjectIdGetDatum(relid),
! 												 Int16GetDatum(colnum),
! 												 BoolGetDatum(false));
! 			vardata.freefunc = ReleaseSysCache;
! 		}
  	}
  
  	if (HeapTupleIsValid(vardata.statsTuple))
--- 6298,6304 ----
  				elog(ERROR, "no function provided to release variable stats with");
  		}
  		else
! 			validate_statistics(&vardata, relid, &colnum, 1, false);
  	}
  
  	if (HeapTupleIsValid(vardata.statsTuple))
diff -dcrpN postgresql.orig/src/backend/utils/cache/lsyscache.c postgresql.4/src/backend/utils/cache/lsyscache.c
*** postgresql.orig/src/backend/utils/cache/lsyscache.c	2011-07-18 15:42:00.066375563 +0200
--- postgresql.4/src/backend/utils/cache/lsyscache.c	2011-08-02 11:49:16.684364424 +0200
*************** get_attavgwidth(Oid relid, AttrNumber at
*** 2632,2637 ****
--- 2632,2638 ----
  {
  	HeapTuple	tp;
  	int32		stawidth;
+ 	int2vector *attnumvector = NULL;
  
  	if (get_attavgwidth_hook)
  	{
*************** get_attavgwidth(Oid relid, AttrNumber at
*** 2639,2648 ****
  		if (stawidth > 0)
  			return stawidth;
  	}
  	tp = SearchSysCache3(STATRELATTINH,
  						 ObjectIdGetDatum(relid),
! 						 Int16GetDatum(attnum),
  						 BoolGetDatum(false));
  	if (HeapTupleIsValid(tp))
  	{
  		stawidth = ((Form_pg_statistic) GETSTRUCT(tp))->stawidth;
--- 2640,2651 ----
  		if (stawidth > 0)
  			return stawidth;
  	}
+ 	attnumvector = buildint2vector(&attnum, 1);
  	tp = SearchSysCache3(STATRELATTINH,
  						 ObjectIdGetDatum(relid),
! 						 PointerGetDatum(attnumvector),
  						 BoolGetDatum(false));
+ 	pfree(attnumvector);
  	if (HeapTupleIsValid(tp))
  	{
  		stawidth = ((Form_pg_statistic) GETSTRUCT(tp))->stawidth;
*************** get_attstatsslot(HeapTuple statstuple,
*** 2721,2728 ****
  		val = SysCacheGetAttr(STATRELATTINH, statstuple,
  							  Anum_pg_statistic_stavalues1 + i,
  							  &isnull);
  		if (isnull)
! 			elog(ERROR, "stavalues is null");
  		statarray = DatumGetArrayTypeP(val);
  
  		/*
--- 2724,2732 ----
  		val = SysCacheGetAttr(STATRELATTINH, statstuple,
  							  Anum_pg_statistic_stavalues1 + i,
  							  &isnull);
+ 		/* invalid stats record, i.e. analyze hasn't yet run for this column */
  		if (isnull)
! 			return false;
  		statarray = DatumGetArrayTypeP(val);
  
  		/*
*************** get_attstatsslot(HeapTuple statstuple,
*** 2775,2782 ****
  		val = SysCacheGetAttr(STATRELATTINH, statstuple,
  							  Anum_pg_statistic_stanumbers1 + i,
  							  &isnull);
  		if (isnull)
! 			elog(ERROR, "stanumbers is null");
  		statarray = DatumGetArrayTypeP(val);
  
  		/*
--- 2779,2787 ----
  		val = SysCacheGetAttr(STATRELATTINH, statstuple,
  							  Anum_pg_statistic_stanumbers1 + i,
  							  &isnull);
+ 		/* invalid stats record, i.e. analyze hasn't yet run for this column */
  		if (isnull)
! 			return false;
  		statarray = DatumGetArrayTypeP(val);
  
  		/*
diff -dcrpN postgresql.orig/src/backend/utils/cache/syscache.c postgresql.4/src/backend/utils/cache/syscache.c
*** postgresql.orig/src/backend/utils/cache/syscache.c	2011-06-20 10:11:35.741660316 +0200
--- postgresql.4/src/backend/utils/cache/syscache.c	2011-08-02 14:05:12.866629811 +0200
*************** static const struct cachedesc cacheinfo[
*** 588,598 ****
  		1024
  	},
  	{StatisticRelationId,		/* STATRELATTINH */
! 		StatisticRelidAttnumInhIndexId,
  		3,
  		{
  			Anum_pg_statistic_starelid,
! 			Anum_pg_statistic_staattnum,
  			Anum_pg_statistic_stainherit,
  			0
  		},
--- 588,598 ----
  		1024
  	},
  	{StatisticRelationId,		/* STATRELATTINH */
! 		StatisticRelidAttnumsInhIndexId,
  		3,
  		{
  			Anum_pg_statistic_starelid,
! 			Anum_pg_statistic_staattnums,
  			Anum_pg_statistic_stainherit,
  			0
  		},
diff -dcrpN postgresql.orig/src/include/catalog/heap.h postgresql.4/src/include/catalog/heap.h
*** postgresql.orig/src/include/catalog/heap.h	2011-07-24 18:16:45.286677978 +0200
--- postgresql.4/src/include/catalog/heap.h	2011-08-02 11:49:16.715362145 +0200
*************** extern void RemoveAttributeById(Oid reli
*** 107,113 ****
  extern void RemoveAttrDefault(Oid relid, AttrNumber attnum,
  				  DropBehavior behavior, bool complain);
  extern void RemoveAttrDefaultById(Oid attrdefId);
! extern void RemoveStatistics(Oid relid, AttrNumber attnum);
  
  extern Form_pg_attribute SystemAttributeDefinition(AttrNumber attno,
  						  bool relhasoids);
--- 107,118 ----
  extern void RemoveAttrDefault(Oid relid, AttrNumber attnum,
  				  DropBehavior behavior, bool complain);
  extern void RemoveAttrDefaultById(Oid attrdefId);
! extern void AddStatistics(Oid relid, AttrNumber *attnums,
! 						  int n_attnums,
! 						  bool inherited,
! 						  int statistics_target);
! extern void InvalidateStatistics(Oid relid, AttrNumber attnum);
! extern void RemoveStatistics(Oid relid, AttrNumber *attnums, int n_attnums);
  
  extern Form_pg_attribute SystemAttributeDefinition(AttrNumber attno,
  						  bool relhasoids);
diff -dcrpN postgresql.orig/src/include/catalog/indexing.h postgresql.4/src/include/catalog/indexing.h
*** postgresql.orig/src/include/catalog/indexing.h	2011-07-24 18:16:45.286677978 +0200
--- postgresql.4/src/include/catalog/indexing.h	2011-07-27 15:33:44.519543035 +0200
*************** DECLARE_INDEX(pg_shdepend_depender_index
*** 218,225 ****
  DECLARE_INDEX(pg_shdepend_reference_index, 1233, on pg_shdepend using btree(refclassid oid_ops, refobjid oid_ops));
  #define SharedDependReferenceIndexId	1233
  
! DECLARE_UNIQUE_INDEX(pg_statistic_relid_att_inh_index, 2696, on pg_statistic using btree(starelid oid_ops, staattnum int2_ops, stainherit bool_ops));
! #define StatisticRelidAttnumInhIndexId	2696
  
  DECLARE_UNIQUE_INDEX(pg_tablespace_oid_index, 2697, on pg_tablespace using btree(oid oid_ops));
  #define TablespaceOidIndexId  2697
--- 218,225 ----
  DECLARE_INDEX(pg_shdepend_reference_index, 1233, on pg_shdepend using btree(refclassid oid_ops, refobjid oid_ops));
  #define SharedDependReferenceIndexId	1233
  
! DECLARE_UNIQUE_INDEX(pg_statistic_relid_att_inh_index, 2696, on pg_statistic using btree(starelid oid_ops, staattnums int2vector_ops, stainherit bool_ops));
! #define StatisticRelidAttnumsInhIndexId	2696
  
  DECLARE_UNIQUE_INDEX(pg_tablespace_oid_index, 2697, on pg_tablespace using btree(oid oid_ops));
  #define TablespaceOidIndexId  2697
diff -dcrpN postgresql.orig/src/include/catalog/pg_amop.h postgresql.4/src/include/catalog/pg_amop.h
*** postgresql.orig/src/include/catalog/pg_amop.h	2011-04-11 15:36:27.235807013 +0200
--- postgresql.4/src/include/catalog/pg_amop.h	2011-07-27 15:13:09.785532127 +0200
*************** DATA(insert (	1991   30 30 4 s	648 403 0
*** 185,190 ****
--- 185,200 ----
  DATA(insert (	1991   30 30 5 s	646 403 0 ));
  
  /*
+  *	btree int2vector_ops
+  */
+ 
+ DATA(insert (	3097   22 22 1 s	199 403 0 ));
+ DATA(insert (	3097   22 22 2 s	322 403 0 ));
+ DATA(insert (	3097   22 22 3 s	386 403 0 ));
+ DATA(insert (	3097   22 22 4 s	323 403 0 ));
+ DATA(insert (	3097   22 22 5 s	276 403 0 ));
+ 
+ /*
   *	btree float_ops
   */
  
diff -dcrpN postgresql.orig/src/include/catalog/pg_amproc.h postgresql.4/src/include/catalog/pg_amproc.h
*** postgresql.orig/src/include/catalog/pg_amproc.h	2011-01-04 15:13:16.120551585 +0100
--- postgresql.4/src/include/catalog/pg_amproc.h	2011-07-27 15:13:09.793531431 +0200
*************** DATA(insert (	2233   703 703 1  380 ));
*** 123,128 ****
--- 123,129 ----
  DATA(insert (	2234   704 704 1  381 ));
  DATA(insert (	2789   27 27 1 2794 ));
  DATA(insert (	2968   2950 2950 1 2960 ));
+ DATA(insert (	3097   22 22 1 321 ));
  DATA(insert (	3522   3500 3500 1 3514 ));
  
  
diff -dcrpN postgresql.orig/src/include/catalog/pg_attribute.h postgresql.4/src/include/catalog/pg_attribute.h
*** postgresql.orig/src/include/catalog/pg_attribute.h	2011-02-10 10:36:32.321680466 +0100
--- postgresql.4/src/include/catalog/pg_attribute.h	2011-08-02 11:59:09.569791405 +0200
*************** CATALOG(pg_attribute,1249) BKI_BOOTSTRAP
*** 48,62 ****
  	Oid			atttypid;
  
  	/*
- 	 * attstattarget is the target number of statistics datapoints to collect
- 	 * during VACUUM ANALYZE of this column.  A zero here indicates that we do
- 	 * not wish to collect any stats about this column. A "-1" here indicates
- 	 * that no value has been explicitly set for this column, so ANALYZE
- 	 * should use the default setting.
- 	 */
- 	int4		attstattarget;
- 
- 	/*
  	 * attlen is a copy of the typlen field from pg_type for this attribute.
  	 * See atttypid comments above.
  	 */
--- 48,53 ----
*************** typedef FormData_pg_attribute *Form_pg_a
*** 179,205 ****
   * ----------------
   */
  
! #define Natts_pg_attribute				20
  #define Anum_pg_attribute_attrelid		1
  #define Anum_pg_attribute_attname		2
  #define Anum_pg_attribute_atttypid		3
! #define Anum_pg_attribute_attstattarget 4
! #define Anum_pg_attribute_attlen		5
! #define Anum_pg_attribute_attnum		6
! #define Anum_pg_attribute_attndims		7
! #define Anum_pg_attribute_attcacheoff	8
! #define Anum_pg_attribute_atttypmod		9
! #define Anum_pg_attribute_attbyval		10
! #define Anum_pg_attribute_attstorage	11
! #define Anum_pg_attribute_attalign		12
! #define Anum_pg_attribute_attnotnull	13
! #define Anum_pg_attribute_atthasdef		14
! #define Anum_pg_attribute_attisdropped	15
! #define Anum_pg_attribute_attislocal	16
! #define Anum_pg_attribute_attinhcount	17
! #define Anum_pg_attribute_attcollation	18
! #define Anum_pg_attribute_attacl		19
! #define Anum_pg_attribute_attoptions	20
  
  
  /* ----------------
--- 170,195 ----
   * ----------------
   */
  
! #define Natts_pg_attribute				19
  #define Anum_pg_attribute_attrelid		1
  #define Anum_pg_attribute_attname		2
  #define Anum_pg_attribute_atttypid		3
! #define Anum_pg_attribute_attlen		4
! #define Anum_pg_attribute_attnum		5
! #define Anum_pg_attribute_attndims		6
! #define Anum_pg_attribute_attcacheoff	7
! #define Anum_pg_attribute_atttypmod		8
! #define Anum_pg_attribute_attbyval		9
! #define Anum_pg_attribute_attstorage	10
! #define Anum_pg_attribute_attalign		11
! #define Anum_pg_attribute_attnotnull	12
! #define Anum_pg_attribute_atthasdef		13
! #define Anum_pg_attribute_attisdropped	14
! #define Anum_pg_attribute_attislocal	15
! #define Anum_pg_attribute_attinhcount	16
! #define Anum_pg_attribute_attcollation	17
! #define Anum_pg_attribute_attacl		18
! #define Anum_pg_attribute_attoptions	19
  
  
  /* ----------------
diff -dcrpN postgresql.orig/src/include/catalog/pg_class.h postgresql.4/src/include/catalog/pg_class.h
*** postgresql.orig/src/include/catalog/pg_class.h	2011-06-24 11:38:23.338906107 +0200
--- postgresql.4/src/include/catalog/pg_class.h	2011-08-02 14:03:43.862124993 +0200
*************** typedef FormData_pg_class *Form_pg_class
*** 132,138 ****
  /* Note: "3" in the relfrozenxid column stands for FirstNormalTransactionId */
  DATA(insert OID = 1247 (  pg_type		PGNSP 71 0 PGUID 0 0 0 0 0 0 0 f f p r 29 0 t f f f f 3 _null_ _null_ ));
  DESCR("");
! DATA(insert OID = 1249 (  pg_attribute	PGNSP 75 0 PGUID 0 0 0 0 0 0 0 f f p r 20 0 f f f f f 3 _null_ _null_ ));
  DESCR("");
  DATA(insert OID = 1255 (  pg_proc		PGNSP 81 0 PGUID 0 0 0 0 0 0 0 f f p r 26 0 t f f f f 3 _null_ _null_ ));
  DESCR("");
--- 132,138 ----
  /* Note: "3" in the relfrozenxid column stands for FirstNormalTransactionId */
  DATA(insert OID = 1247 (  pg_type		PGNSP 71 0 PGUID 0 0 0 0 0 0 0 f f p r 29 0 t f f f f 3 _null_ _null_ ));
  DESCR("");
! DATA(insert OID = 1249 (  pg_attribute	PGNSP 75 0 PGUID 0 0 0 0 0 0 0 f f p r 19 0 f f f f f 3 _null_ _null_ ));
  DESCR("");
  DATA(insert OID = 1255 (  pg_proc		PGNSP 81 0 PGUID 0 0 0 0 0 0 0 f f p r 26 0 t f f f f 3 _null_ _null_ ));
  DESCR("");
diff -dcrpN postgresql.orig/src/include/catalog/pg_opclass.h postgresql.4/src/include/catalog/pg_opclass.h
*** postgresql.orig/src/include/catalog/pg_opclass.h	2011-01-04 15:13:16.125551330 +0100
--- postgresql.4/src/include/catalog/pg_opclass.h	2011-07-27 15:13:09.811529864 +0200
*************** DATA(insert (	403		bpchar_pattern_ops	PG
*** 158,163 ****
--- 158,164 ----
  DATA(insert (	403		money_ops			PGNSP PGUID 2099  790 t 0 ));
  DATA(insert (	405		bool_ops			PGNSP PGUID 2222   16 t 0 ));
  DATA(insert (	405		bytea_ops			PGNSP PGUID 2223   17 t 0 ));
+ DATA(insert (	403		int2vector_ops		PGNSP PGUID 3097   22 t 0 ));
  DATA(insert (	405		int2vector_ops		PGNSP PGUID 2224   22 t 0 ));
  DATA(insert (	403		tid_ops				PGNSP PGUID 2789   27 t 0 ));
  DATA(insert (	405		xid_ops				PGNSP PGUID 2225   28 t 0 ));
diff -dcrpN postgresql.orig/src/include/catalog/pg_operator.h postgresql.4/src/include/catalog/pg_operator.h
*** postgresql.orig/src/include/catalog/pg_operator.h	2011-06-06 09:12:54.421675553 +0200
--- postgresql.4/src/include/catalog/pg_operator.h	2011-07-27 15:13:09.822528906 +0200
*************** DATA(insert OID =  98 ( "="		   PGNSP PG
*** 134,139 ****
--- 134,150 ----
  DESCR("equal");
  #define TextEqualOperator	98
  
+ DATA(insert OID = 114 (  "<>"	   PGNSP PGUID b f f	22	22	16 114 386 int2vectorne neqsel neqjoinsel ));
+ DESCR("not equal");
+ DATA(insert OID = 199 (  "<"	   PGNSP PGUID b f f	22	22	16 276 323 int2vectorlt scalarltsel scalarltjoinsel ));
+ DESCR("less than");
+ DATA(insert OID = 276 (  ">"	   PGNSP PGUID b f f	22	22	16 199 322 int2vectorgt scalargtsel scalargtjoinsel ));
+ DESCR("greater than");
+ DATA(insert OID = 322 (  "<="	   PGNSP PGUID b f f	22	22	16 323 276 int2vectorle scalarltsel scalarltjoinsel ));
+ DESCR("less than or equal");
+ DATA(insert OID = 323 (  ">="	   PGNSP PGUID b f f	22	22	16 322 199 int2vectorge scalargtsel scalargtjoinsel ));
+ DESCR("greater than or equal");
+ 
  DATA(insert OID = 349 (  "||"	   PGNSP PGUID b f f 2277 2283 2277 0 0 array_append   -	   -	 ));
  DESCR("append element onto end of array");
  DATA(insert OID = 374 (  "||"	   PGNSP PGUID b f f 2283 2277 2277 0 0 array_prepend  -	   -	 ));
*************** DATA(insert OID = 389 (  "!!"	   PGNSP P
*** 151,157 ****
  DESCR("deprecated, use ! instead");
  DATA(insert OID = 385 (  "="	   PGNSP PGUID b f t	29	29	16 385	 0 cideq eqsel eqjoinsel ));
  DESCR("equal");
! DATA(insert OID = 386 (  "="	   PGNSP PGUID b f t	22	22	16 386	 0 int2vectoreq eqsel eqjoinsel ));
  DESCR("equal");
  
  DATA(insert OID = 387 (  "="	   PGNSP PGUID b t f	27	27	16 387 402 tideq eqsel eqjoinsel ));
--- 162,168 ----
  DESCR("deprecated, use ! instead");
  DATA(insert OID = 385 (  "="	   PGNSP PGUID b f t	29	29	16 385	 0 cideq eqsel eqjoinsel ));
  DESCR("equal");
! DATA(insert OID = 386 (  "="	   PGNSP PGUID b t t	22	22	16 386 114 int2vectoreq eqsel eqjoinsel ));
  DESCR("equal");
  
  DATA(insert OID = 387 (  "="	   PGNSP PGUID b t f	27	27	16 387 402 tideq eqsel eqjoinsel ));
diff -dcrpN postgresql.orig/src/include/catalog/pg_opfamily.h postgresql.4/src/include/catalog/pg_opfamily.h
*** postgresql.orig/src/include/catalog/pg_opfamily.h	2011-01-04 15:13:16.126551278 +0100
--- postgresql.4/src/include/catalog/pg_opfamily.h	2011-07-27 15:13:09.836527688 +0200
*************** DATA(insert OID = 2099 (	403		money_ops	
*** 114,119 ****
--- 114,120 ----
  DATA(insert OID = 2222 (	405		bool_ops		PGNSP PGUID ));
  #define BOOL_HASH_FAM_OID 2222
  DATA(insert OID = 2223 (	405		bytea_ops		PGNSP PGUID ));
+ DATA(insert OID = 3097 (	403		int2vector_ops	PGNSP PGUID ));
  DATA(insert OID = 2224 (	405		int2vector_ops	PGNSP PGUID ));
  DATA(insert OID = 2789 (	403		tid_ops			PGNSP PGUID ));
  DATA(insert OID = 2225 (	405		xid_ops			PGNSP PGUID ));
diff -dcrpN postgresql.orig/src/include/catalog/pg_proc.h postgresql.4/src/include/catalog/pg_proc.h
*** postgresql.orig/src/include/catalog/pg_proc.h	2011-07-18 15:42:00.078374691 +0200
--- postgresql.4/src/include/catalog/pg_proc.h	2011-07-27 15:27:18.053344124 +0200
*************** DESCR("length");
*** 216,221 ****
--- 216,228 ----
  DATA(insert OID = 1258 (  textcat		   PGNSP PGUID 12 1 0 0 0 f f f t f i 2 0 25 "25 25" _null_ _null_ _null_ _null_ textcat _null_ _null_ _null_ ));
  
  DATA(insert OID =  84 (  boolne			   PGNSP PGUID 12 1 0 0 0 f f f t f i 2 0 16 "16 16" _null_ _null_ _null_ _null_ boolne _null_ _null_ _null_ ));
+ 
+ DATA(insert OID =  86 (  int2vectorne		   PGNSP PGUID 12 1 0 0 0 f f f t f i 2 0 16 "22 22" _null_ _null_ _null_ _null_ int2vectorne _null_ _null_ _null_ ));
+ DATA(insert OID =  87 (  int2vectorlt		   PGNSP PGUID 12 1 0 0 0 f f f t f i 2 0 16 "22 22" _null_ _null_ _null_ _null_ int2vectorlt _null_ _null_ _null_ ));
+ DATA(insert OID =  88 (  int2vectorle		   PGNSP PGUID 12 1 0 0 0 f f f t f i 2 0 16 "22 22" _null_ _null_ _null_ _null_ int2vectorle _null_ _null_ _null_ ));
+ DATA(insert OID =  90 (  int2vectorge		   PGNSP PGUID 12 1 0 0 0 f f f t f i 2 0 16 "22 22" _null_ _null_ _null_ _null_ int2vectorge _null_ _null_ _null_ ));
+ DATA(insert OID = 3122 (  int2vectorgt		   PGNSP PGUID 12 1 0 0 0 f f f t f i 2 0 16 "22 22" _null_ _null_ _null_ _null_ int2vectorgt _null_ _null_ _null_ ));
+ 
  DATA(insert OID =  89 (  version		   PGNSP PGUID 12 1 0 0 0 f f f t f s 0 0 25 "" _null_ _null_ _null_ _null_ pgsql_version _null_ _null_ _null_ ));
  DESCR("PostgreSQL version string");
  
*************** DESCR("I/O");
*** 566,571 ****
--- 573,580 ----
  
  DATA(insert OID = 350 (  btint2cmp		   PGNSP PGUID 12 1 0 0 0 f f f t f i 2 0 23 "21 21" _null_ _null_ _null_ _null_ btint2cmp _null_ _null_ _null_ ));
  DESCR("less-equal-greater");
+ DATA(insert OID = 321 (  btint2vectorcmp	   PGNSP PGUID 12 1 0 0 0 f f f t f i 2 0 23 "22 22" _null_ _null_ _null_ _null_ btint2vectorcmp _null_ _null_ _null_ ));
+ DESCR("less-equal-greater");
  DATA(insert OID = 351 (  btint4cmp		   PGNSP PGUID 12 1 0 0 0 f f f t f i 2 0 23 "23 23" _null_ _null_ _null_ _null_ btint4cmp _null_ _null_ _null_ ));
  DESCR("less-equal-greater");
  DATA(insert OID = 842 (  btint8cmp		   PGNSP PGUID 12 1 0 0 0 f f f t f i 2 0 23 "20 20" _null_ _null_ _null_ _null_ btint8cmp _null_ _null_ _null_ ));
diff -dcrpN postgresql.orig/src/include/catalog/pg_statistic.h postgresql.4/src/include/catalog/pg_statistic.h
*** postgresql.orig/src/include/catalog/pg_statistic.h	2011-02-22 18:51:42.762512469 +0100
--- postgresql.4/src/include/catalog/pg_statistic.h	2011-08-02 11:49:16.758358984 +0200
***************
*** 40,50 ****
  
  CATALOG(pg_statistic,2619) BKI_WITHOUT_OIDS
  {
! 	/* These fields form the unique key for the entry: */
  	Oid			starelid;		/* relation containing attribute */
- 	int2		staattnum;		/* attribute (column) stats are for */
  	bool		stainherit;		/* true if inheritance children are included */
  
  	/* the fraction of the column's entries that are NULL: */
  	float4		stanullfrac;
  
--- 40,63 ----
  
  CATALOG(pg_statistic,2619) BKI_WITHOUT_OIDS
  {
! 	/*
! 	 * These fields (together with the staattnums field below in the variable fields)
! 	 * form the unique key for the entry:
! 	 */
  	Oid			starelid;		/* relation containing attribute */
  	bool		stainherit;		/* true if inheritance children are included */
  
+ 	/* this entry is valid */
+ 	bool		stavalid;
+ 
+ 	/* statarget is the target number of statistics datapoints to collect
+ 	 * during VACUUM ANALYZE of this column.  A zero here indicates that we do
+ 	 * not wish to collect any stats about this column. A "-1" here indicates
+ 	 * that no value has been explicitly set for this column, so ANALYZE
+ 	 * should use the default setting.
+ 	 */
+ 	int4		statarget;
+ 
  	/* the fraction of the column's entries that are NULL: */
  	float4		stanullfrac;
  
*************** CATALOG(pg_statistic,2619) BKI_WITHOUT_O
*** 110,115 ****
--- 123,129 ----
  	 * the full field access machinery (heap_getattr) for them.  We declare
  	 * them here for the catalog machinery.
  	 */
+ 	int2vector		staattnums;		/* attributes (columns) stats are for */
  
  	float4		stanumbers1[1];
  	float4		stanumbers2[1];
*************** typedef FormData_pg_statistic *Form_pg_s
*** 143,171 ****
   *		compiler constants for pg_statistic
   * ----------------
   */
! #define Natts_pg_statistic				22
  #define Anum_pg_statistic_starelid		1
! #define Anum_pg_statistic_staattnum		2
! #define Anum_pg_statistic_stainherit	3
! #define Anum_pg_statistic_stanullfrac	4
! #define Anum_pg_statistic_stawidth		5
! #define Anum_pg_statistic_stadistinct	6
! #define Anum_pg_statistic_stakind1		7
! #define Anum_pg_statistic_stakind2		8
! #define Anum_pg_statistic_stakind3		9
! #define Anum_pg_statistic_stakind4		10
! #define Anum_pg_statistic_staop1		11
! #define Anum_pg_statistic_staop2		12
! #define Anum_pg_statistic_staop3		13
! #define Anum_pg_statistic_staop4		14
! #define Anum_pg_statistic_stanumbers1	15
! #define Anum_pg_statistic_stanumbers2	16
! #define Anum_pg_statistic_stanumbers3	17
! #define Anum_pg_statistic_stanumbers4	18
! #define Anum_pg_statistic_stavalues1	19
! #define Anum_pg_statistic_stavalues2	20
! #define Anum_pg_statistic_stavalues3	21
! #define Anum_pg_statistic_stavalues4	22
  
  /*
   * Currently, three statistical slot "kinds" are defined: most common values,
--- 157,187 ----
   *		compiler constants for pg_statistic
   * ----------------
   */
! #define Natts_pg_statistic				24
  #define Anum_pg_statistic_starelid		1
! #define Anum_pg_statistic_stainherit		2
! #define Anum_pg_statistic_stavalid		3
! #define Anum_pg_statistic_statarget		4
! #define Anum_pg_statistic_stanullfrac		5
! #define Anum_pg_statistic_stawidth		6
! #define Anum_pg_statistic_stadistinct		7
! #define Anum_pg_statistic_stakind1		8
! #define Anum_pg_statistic_stakind2		9
! #define Anum_pg_statistic_stakind3		10
! #define Anum_pg_statistic_stakind4		11
! #define Anum_pg_statistic_staop1		12
! #define Anum_pg_statistic_staop2		13
! #define Anum_pg_statistic_staop3		14
! #define Anum_pg_statistic_staop4		15
! #define Anum_pg_statistic_staattnums		16
! #define Anum_pg_statistic_stanumbers1	17
! #define Anum_pg_statistic_stanumbers2	18
! #define Anum_pg_statistic_stanumbers3	19
! #define Anum_pg_statistic_stanumbers4	20
! #define Anum_pg_statistic_stavalues1	21
! #define Anum_pg_statistic_stavalues2	22
! #define Anum_pg_statistic_stavalues3	23
! #define Anum_pg_statistic_stavalues4	24
  
  /*
   * Currently, three statistical slot "kinds" are defined: most common values,
diff -dcrpN postgresql.orig/src/include/commands/vacuum.h postgresql.4/src/include/commands/vacuum.h
*** postgresql.orig/src/include/commands/vacuum.h	2011-06-02 10:21:24.006634564 +0200
--- postgresql.4/src/include/commands/vacuum.h	2011-08-02 11:49:16.768358249 +0200
*************** typedef struct VacAttrStats
*** 74,79 ****
--- 74,83 ----
  	 * information about the datatype being fed to the typanalyze function.
  	 */
  	Form_pg_attribute attr;		/* copy of pg_attribute row for column */
+ 	int4		statarget;	/* effective statistics target */
+ 	int4		oldtarget;	/* statistics target in pg_statistic
+ 					   this must be kept so the -1 to denote
+ 					   default_statistics_target is not overwritten */
  	Oid			attrtypid;		/* type of data being analyzed */
  	int32		attrtypmod;		/* typmod of data being analyzed */
  	Form_pg_type attrtype;		/* copy of pg_type row for attrtypid */
diff -dcrpN postgresql.orig/src/include/utils/builtins.h postgresql.4/src/include/utils/builtins.h
*** postgresql.orig/src/include/utils/builtins.h	2011-06-24 11:38:23.342905815 +0200
--- postgresql.4/src/include/utils/builtins.h	2011-07-27 15:13:09.893522727 +0200
*************** extern Datum int2vectorout(PG_FUNCTION_A
*** 174,179 ****
--- 174,184 ----
  extern Datum int2vectorrecv(PG_FUNCTION_ARGS);
  extern Datum int2vectorsend(PG_FUNCTION_ARGS);
  extern Datum int2vectoreq(PG_FUNCTION_ARGS);
+ extern Datum int2vectorne(PG_FUNCTION_ARGS);
+ extern Datum int2vectorlt(PG_FUNCTION_ARGS);
+ extern Datum int2vectorle(PG_FUNCTION_ARGS);
+ extern Datum int2vectorgt(PG_FUNCTION_ARGS);
+ extern Datum int2vectorge(PG_FUNCTION_ARGS);
  extern Datum int4in(PG_FUNCTION_ARGS);
  extern Datum int4out(PG_FUNCTION_ARGS);
  extern Datum int4recv(PG_FUNCTION_ARGS);
*************** extern void pg_lltoa(int64 ll, char *a);
*** 283,288 ****
--- 288,294 ----
   */
  extern Datum btboolcmp(PG_FUNCTION_ARGS);
  extern Datum btint2cmp(PG_FUNCTION_ARGS);
+ extern Datum btint2vectorcmp(PG_FUNCTION_ARGS);
  extern Datum btint4cmp(PG_FUNCTION_ARGS);
  extern Datum btint8cmp(PG_FUNCTION_ARGS);
  extern Datum btfloat4cmp(PG_FUNCTION_ARGS);
diff -dcrpN postgresql.orig/src/include/utils/selfuncs.h postgresql.4/src/include/utils/selfuncs.h
*** postgresql.orig/src/include/utils/selfuncs.h	2011-06-10 11:06:01.495860021 +0200
--- postgresql.4/src/include/utils/selfuncs.h	2011-08-02 11:49:16.788356778 +0200
*************** typedef bool (*get_index_stats_hook_type
*** 110,115 ****
--- 110,119 ----
  												  VariableStatData *vardata);
  extern PGDLLIMPORT get_index_stats_hook_type get_index_stats_hook;
  
+ extern void validate_statistics(VariableStatData *vardata,
+ 						 Oid relid,
+ 						 AttrNumber *attnums, int n_attnums,
+ 						 bool inherited);
  extern void examine_variable(PlannerInfo *root, Node *node, int varRelid,
  				 VariableStatData *vardata);
  extern bool get_restriction_variable(PlannerInfo *root, List *args,
diff -dcrpN postgresql.orig/src/test/regress/expected/rules.out postgresql.4/src/test/regress/expected/rules.out
*** postgresql.orig/src/test/regress/expected/rules.out	2011-07-24 18:16:45.313676620 +0200
--- postgresql.4/src/test/regress/expected/rules.out	2011-07-27 15:33:44.546540690 +0200
*************** SELECT viewname, definition FROM pg_view
*** 1317,1323 ****
   pg_statio_user_indexes          | SELECT pg_statio_all_indexes.relid, pg_statio_all_indexes.indexrelid, pg_statio_all_indexes.schemaname, pg_statio_all_indexes.relname, pg_statio_all_indexes.indexrelname, pg_statio_all_indexes.idx_blks_read, pg_statio_all_indexes.idx_blks_hit FROM pg_statio_all_indexes WHERE ((pg_statio_all_indexes.schemaname <> ALL (ARRAY['pg_catalog'::name, 'information_schema'::name])) AND (pg_statio_all_indexes.schemaname !~ '^pg_toast'::text));
   pg_statio_user_sequences        | SELECT pg_statio_all_sequences.relid, pg_statio_all_sequences.schemaname, pg_statio_all_sequences.relname, pg_statio_all_sequences.blks_read, pg_statio_all_sequences.blks_hit FROM pg_statio_all_sequences WHERE ((pg_statio_all_sequences.schemaname <> ALL (ARRAY['pg_catalog'::name, 'information_schema'::name])) AND (pg_statio_all_sequences.schemaname !~ '^pg_toast'::text));
   pg_statio_user_tables           | SELECT pg_statio_all_tables.relid, pg_statio_all_tables.schemaname, pg_statio_all_tables.relname, pg_statio_all_tables.heap_blks_read, pg_statio_all_tables.heap_blks_hit, pg_statio_all_tables.idx_blks_read, pg_statio_all_tables.idx_blks_hit, pg_statio_all_tables.toast_blks_read, pg_statio_all_tables.toast_blks_hit, pg_statio_all_tables.tidx_blks_read, pg_statio_all_tables.tidx_blks_hit FROM pg_statio_all_tables WHERE ((pg_statio_all_tables.schemaname <> ALL (ARRAY['pg_catalog'::name, 'information_schema'::name])) AND (pg_statio_all_tables.schemaname !~ '^pg_toast'::text));
!  pg_stats                        | SELECT n.nspname AS schemaname, c.relname AS tablename, a.attname, s.stainherit AS inherited, s.stanullfrac AS null_frac, s.stawidth AS avg_width, s.stadistinct AS n_distinct, CASE WHEN (s.stakind1 = ANY (ARRAY[1, 4])) THEN s.stavalues1 WHEN (s.stakind2 = ANY (ARRAY[1, 4])) THEN s.stavalues2 WHEN (s.stakind3 = ANY (ARRAY[1, 4])) THEN s.stavalues3 WHEN (s.stakind4 = ANY (ARRAY[1, 4])) THEN s.stavalues4 ELSE NULL::anyarray END AS most_common_vals, CASE WHEN (s.stakind1 = ANY (ARRAY[1, 4])) THEN s.stanumbers1 WHEN (s.stakind2 = ANY (ARRAY[1, 4])) THEN s.stanumbers2 WHEN (s.stakind3 = ANY (ARRAY[1, 4])) THEN s.stanumbers3 WHEN (s.stakind4 = ANY (ARRAY[1, 4])) THEN s.stanumbers4 ELSE NULL::real[] END AS most_common_freqs, CASE WHEN (s.stakind1 = 2) THEN s.stavalues1 WHEN (s.stakind2 = 2) THEN s.stavalues2 WHEN (s.stakind3 = 2) THEN s.stavalues3 WHEN (s.stakind4 = 2) THEN s.stavalues4 ELSE NULL::anyarray END AS histogram_bounds, CASE WHEN (s.stakind1 = 3) THEN s.stanumbers1[1] WHEN (s.stakind2 = 3) THEN s.stanumbers2[1] WHEN (s.stakind3 = 3) THEN s.stanumbers3[1] WHEN (s.stakind4 = 3) THEN s.stanumbers4[1] ELSE NULL::real END AS correlation FROM (((pg_statistic s JOIN pg_class c ON ((c.oid = s.starelid))) JOIN pg_attribute a ON (((c.oid = a.attrelid) AND (a.attnum = s.staattnum)))) LEFT JOIN pg_namespace n ON ((n.oid = c.relnamespace))) WHERE ((NOT a.attisdropped) AND has_column_privilege(c.oid, a.attnum, 'select'::text));
   pg_tables                       | SELECT n.nspname AS schemaname, c.relname AS tablename, pg_get_userbyid(c.relowner) AS tableowner, t.spcname AS tablespace, c.relhasindex AS hasindexes, c.relhasrules AS hasrules, c.relhastriggers AS hastriggers FROM ((pg_class c LEFT JOIN pg_namespace n ON ((n.oid = c.relnamespace))) LEFT JOIN pg_tablespace t ON ((t.oid = c.reltablespace))) WHERE (c.relkind = 'r'::"char");
   pg_timezone_abbrevs             | SELECT pg_timezone_abbrevs.abbrev, pg_timezone_abbrevs.utc_offset, pg_timezone_abbrevs.is_dst FROM pg_timezone_abbrevs() pg_timezone_abbrevs(abbrev, utc_offset, is_dst);
   pg_timezone_names               | SELECT pg_timezone_names.name, pg_timezone_names.abbrev, pg_timezone_names.utc_offset, pg_timezone_names.is_dst FROM pg_timezone_names() pg_timezone_names(name, abbrev, utc_offset, is_dst);
--- 1317,1323 ----
   pg_statio_user_indexes          | SELECT pg_statio_all_indexes.relid, pg_statio_all_indexes.indexrelid, pg_statio_all_indexes.schemaname, pg_statio_all_indexes.relname, pg_statio_all_indexes.indexrelname, pg_statio_all_indexes.idx_blks_read, pg_statio_all_indexes.idx_blks_hit FROM pg_statio_all_indexes WHERE ((pg_statio_all_indexes.schemaname <> ALL (ARRAY['pg_catalog'::name, 'information_schema'::name])) AND (pg_statio_all_indexes.schemaname !~ '^pg_toast'::text));
   pg_statio_user_sequences        | SELECT pg_statio_all_sequences.relid, pg_statio_all_sequences.schemaname, pg_statio_all_sequences.relname, pg_statio_all_sequences.blks_read, pg_statio_all_sequences.blks_hit FROM pg_statio_all_sequences WHERE ((pg_statio_all_sequences.schemaname <> ALL (ARRAY['pg_catalog'::name, 'information_schema'::name])) AND (pg_statio_all_sequences.schemaname !~ '^pg_toast'::text));
   pg_statio_user_tables           | SELECT pg_statio_all_tables.relid, pg_statio_all_tables.schemaname, pg_statio_all_tables.relname, pg_statio_all_tables.heap_blks_read, pg_statio_all_tables.heap_blks_hit, pg_statio_all_tables.idx_blks_read, pg_statio_all_tables.idx_blks_hit, pg_statio_all_tables.toast_blks_read, pg_statio_all_tables.toast_blks_hit, pg_statio_all_tables.tidx_blks_read, pg_statio_all_tables.tidx_blks_hit FROM pg_statio_all_tables WHERE ((pg_statio_all_tables.schemaname <> ALL (ARRAY['pg_catalog'::name, 'information_schema'::name])) AND (pg_statio_all_tables.schemaname !~ '^pg_toast'::text));
!  pg_stats                        | SELECT n.nspname AS schemaname, c.relname AS tablename, a.attname, s.stainherit AS inherited, s.stanullfrac AS null_frac, s.stawidth AS avg_width, s.stadistinct AS n_distinct, CASE WHEN (s.stakind1 = ANY (ARRAY[1, 4])) THEN s.stavalues1 WHEN (s.stakind2 = ANY (ARRAY[1, 4])) THEN s.stavalues2 WHEN (s.stakind3 = ANY (ARRAY[1, 4])) THEN s.stavalues3 WHEN (s.stakind4 = ANY (ARRAY[1, 4])) THEN s.stavalues4 ELSE NULL::anyarray END AS most_common_vals, CASE WHEN (s.stakind1 = ANY (ARRAY[1, 4])) THEN s.stanumbers1 WHEN (s.stakind2 = ANY (ARRAY[1, 4])) THEN s.stanumbers2 WHEN (s.stakind3 = ANY (ARRAY[1, 4])) THEN s.stanumbers3 WHEN (s.stakind4 = ANY (ARRAY[1, 4])) THEN s.stanumbers4 ELSE NULL::real[] END AS most_common_freqs, CASE WHEN (s.stakind1 = 2) THEN s.stavalues1 WHEN (s.stakind2 = 2) THEN s.stavalues2 WHEN (s.stakind3 = 2) THEN s.stavalues3 WHEN (s.stakind4 = 2) THEN s.stavalues4 ELSE NULL::anyarray END AS histogram_bounds, CASE WHEN (s.stakind1 = 3) THEN s.stanumbers1[1] WHEN (s.stakind2 = 3) THEN s.stanumbers2[1] WHEN (s.stakind3 = 3) THEN s.stanumbers3[1] WHEN (s.stakind4 = 3) THEN s.stanumbers4[1] ELSE NULL::real END AS correlation FROM (((pg_statistic s JOIN pg_class c ON ((c.oid = s.starelid))) JOIN pg_attribute a ON ((((c.oid = a.attrelid) AND (array_length(s.staattnums, 1) = 1)) AND (a.attnum = s.staattnums[0])))) LEFT JOIN pg_namespace n ON ((n.oid = c.relnamespace))) WHERE ((NOT a.attisdropped) AND has_column_privilege(c.oid, a.attnum, 'select'::text));
   pg_tables                       | SELECT n.nspname AS schemaname, c.relname AS tablename, pg_get_userbyid(c.relowner) AS tableowner, t.spcname AS tablespace, c.relhasindex AS hasindexes, c.relhasrules AS hasrules, c.relhastriggers AS hastriggers FROM ((pg_class c LEFT JOIN pg_namespace n ON ((n.oid = c.relnamespace))) LEFT JOIN pg_tablespace t ON ((t.oid = c.reltablespace))) WHERE (c.relkind = 'r'::"char");
   pg_timezone_abbrevs             | SELECT pg_timezone_abbrevs.abbrev, pg_timezone_abbrevs.utc_offset, pg_timezone_abbrevs.is_dst FROM pg_timezone_abbrevs() pg_timezone_abbrevs(abbrev, utc_offset, is_dst);
   pg_timezone_names               | SELECT pg_timezone_names.name, pg_timezone_names.abbrev, pg_timezone_names.utc_offset, pg_timezone_names.is_dst FROM pg_timezone_names() pg_timezone_names(name, abbrev, utc_offset, is_dst);
cross-col-syntax.patchtext/plain; name=cross-col-syntax.patchDownload
diff -dcrpN postgresql.4/src/backend/commands/analyze.c postgresql.5/src/backend/commands/analyze.c
*** postgresql.4/src/backend/commands/analyze.c	2011-08-02 11:51:06.071322632 +0200
--- postgresql.5/src/backend/commands/analyze.c	2011-08-02 14:59:37.136374568 +0200
***************
*** 21,26 ****
--- 21,27 ----
  #include "access/tupconvert.h"
  #include "access/tuptoaster.h"
  #include "access/xact.h"
+ #include "catalog/heap.h"
  #include "catalog/index.h"
  #include "catalog/indexing.h"
  #include "catalog/namespace.h"
***************
*** 28,33 ****
--- 29,35 ----
  #include "catalog/pg_inherits_fn.h"
  #include "catalog/pg_namespace.h"
  #include "commands/dbcommands.h"
+ #include "commands/defrem.h"
  #include "commands/vacuum.h"
  #include "executor/executor.h"
  #include "miscadmin.h"
*************** compare_mcvs(const void *a, const void *
*** 2779,2781 ****
--- 2781,2852 ----
  
  	return da - db;
  }
+ 
+ /*
+  * ExtraColStat
+  *     Add or remove one extra entry in pg_statistics
+  */
+ void ExtraStatistics(ExtraStatStmt *stmt)
+ {
+ 	Oid		relId;
+ 	int		len, i, j;
+ 	bool		differ = false;
+ 	AttrNumber	   *attnums;
+ 	AttrNumber	   *sorted_attnums;
+ 	ListCell	   *l;
+ 
+ 	relId = RangeVarGetRelid(stmt->relation, AccessExclusiveLock, false, false);
+ 
+ 	len = list_length(stmt->columns);
+ 	if (len < 2)
+ 		elog(ERROR, "cross column statistics need at least two columns");
+ 
+ 	attnums = (int2 *)palloc(len * sizeof(AttrNumber));
+ 	sorted_attnums = (int2 *)palloc(len * sizeof(AttrNumber));
+ 
+ 	i = 0;
+ 	foreach(l, stmt->columns)
+ 	{
+ 		Node	   *node = (Node *) lfirst(l);
+ 		Var	   *var;
+ 
+ 		if (!IsA(node, Var))
+ 			elog(ERROR, "not a column reference");
+ 
+ 		var = (Var *) node;
+ 
+ 		if (var->varattno == 0)
+ 			elog(ERROR, "row expansion via \"*\" is not supported here");
+ 
+ 		sorted_attnums[i] = attnums[i] = var->varattno;
+ 
+ 		i++;
+ 	}
+ 
+ 	for (i = 0;  i < len - 1; i++)
+ 		for (j = i+1; j < len; j++)
+ 			if (sorted_attnums[i] > sorted_attnums[j])
+ 			{
+ 				AttrNumber	tmp = sorted_attnums[i];
+ 
+ 				sorted_attnums[i] = sorted_attnums[j];
+ 				sorted_attnums[j] = tmp;
+ 			}
+ 
+ 	for (i = 0; i < len; i++)
+ 	{
+ 		if (!differ && attnums[i] != sorted_attnums[i])
+ 			differ = true;
+ 
+ 		if ((i < len - 1) && sorted_attnums[i] == sorted_attnums[i+1])
+ 			elog(ERROR, "column list must contain every column exactly once");
+ 
+ 	}
+ 	if (differ)
+ 		elog(WARNING, "the column list was reordered in the order of table attributes");
+ 
+ 	if (stmt->create)
+ 		AddStatistics(relId, sorted_attnums, len, false, stmt->statistics_target);
+ 	else
+ 		RemoveStatistics(relId, sorted_attnums, len);
+ }
diff -dcrpN postgresql.4/src/backend/nodes/copyfuncs.c postgresql.5/src/backend/nodes/copyfuncs.c
*** postgresql.4/src/backend/nodes/copyfuncs.c	2011-07-24 18:16:45.269678833 +0200
--- postgresql.5/src/backend/nodes/copyfuncs.c	2011-08-02 14:07:24.223043799 +0200
*************** _copyCreateForeignTableStmt(CreateForeig
*** 3459,3464 ****
--- 3459,3477 ----
  	return newnode;
  }
  
+ static ExtraStatStmt *
+ _copyExtraStatStmt(ExtraStatStmt *from)
+ {
+ 	ExtraStatStmt *newnode = makeNode(ExtraStatStmt);
+ 
+ 	COPY_SCALAR_FIELD(create);
+ 	newnode->relation = _copyRangeVar(from->relation);
+ 	COPY_NODE_FIELD(columns);
+ 	COPY_SCALAR_FIELD(statistics_target);
+ 
+ 	return newnode;
+ }
+ 
  static CreateTrigStmt *
  _copyCreateTrigStmt(CreateTrigStmt *from)
  {
*************** copyObject(void *from)
*** 4378,4383 ****
--- 4391,4399 ----
  		case T_CreateForeignTableStmt:
  			retval = _copyCreateForeignTableStmt(from);
  			break;
+ 		case T_ExtraStatStmt:
+ 			retval = _copyExtraStatStmt(from);
+ 			break;
  		case T_CreateTrigStmt:
  			retval = _copyCreateTrigStmt(from);
  			break;
diff -dcrpN postgresql.4/src/backend/nodes/equalfuncs.c postgresql.5/src/backend/nodes/equalfuncs.c
*** postgresql.4/src/backend/nodes/equalfuncs.c	2011-07-24 18:16:45.269678833 +0200
--- postgresql.5/src/backend/nodes/equalfuncs.c	2011-08-02 14:07:24.246042121 +0200
*************** _equalCreateForeignTableStmt(CreateForei
*** 1796,1801 ****
--- 1796,1813 ----
  }
  
  static bool
+ _equalExtraStatStmt(ExtraStatStmt *a, ExtraStatStmt *b)
+ {
+ 	COMPARE_SCALAR_FIELD(create);
+ 	if (!_equalRangeVar(a->relation, b->relation))
+ 		return FALSE;
+ 	COMPARE_NODE_FIELD(columns);
+ 	COMPARE_SCALAR_FIELD(statistics_target);
+ 
+ 	return true;
+ }
+ 
+ static bool
  _equalCreateTrigStmt(CreateTrigStmt *a, CreateTrigStmt *b)
  {
  	COMPARE_STRING_FIELD(trigname);
*************** equal(void *a, void *b)
*** 2931,2936 ****
--- 2943,2951 ----
  		case T_CreateForeignTableStmt:
  			retval = _equalCreateForeignTableStmt(a, b);
  			break;
+ 		case T_ExtraStatStmt:
+ 			retval = _equalExtraStatStmt(a, b);
+ 			break;
  		case T_CreateTrigStmt:
  			retval = _equalCreateTrigStmt(a, b);
  			break;
diff -dcrpN postgresql.4/src/backend/parser/gram.y postgresql.5/src/backend/parser/gram.y
*** postgresql.4/src/backend/parser/gram.y	2011-07-24 18:16:45.272678682 +0200
--- postgresql.5/src/backend/parser/gram.y	2011-08-02 14:07:24.282039495 +0200
*************** static void processCASbits(int cas_bits,
*** 214,220 ****
  		DropGroupStmt DropOpClassStmt DropOpFamilyStmt DropPLangStmt DropStmt
  		DropAssertStmt DropTrigStmt DropRuleStmt DropCastStmt DropRoleStmt
  		DropUserStmt DropdbStmt DropTableSpaceStmt DropFdwStmt
! 		DropForeignServerStmt DropUserMappingStmt ExplainStmt FetchStmt
  		GrantStmt GrantRoleStmt IndexStmt InsertStmt ListenStmt LoadStmt
  		LockStmt NotifyStmt ExplainableStmt PreparableStmt
  		CreateFunctionStmt AlterFunctionStmt ReindexStmt RemoveAggrStmt
--- 214,220 ----
  		DropGroupStmt DropOpClassStmt DropOpFamilyStmt DropPLangStmt DropStmt
  		DropAssertStmt DropTrigStmt DropRuleStmt DropCastStmt DropRoleStmt
  		DropUserStmt DropdbStmt DropTableSpaceStmt DropFdwStmt
! 		DropForeignServerStmt DropUserMappingStmt ExplainStmt ExtraStatStmt FetchStmt
  		GrantStmt GrantRoleStmt IndexStmt InsertStmt ListenStmt LoadStmt
  		LockStmt NotifyStmt ExplainableStmt PreparableStmt
  		CreateFunctionStmt AlterFunctionStmt ReindexStmt RemoveAggrStmt
*************** static void processCASbits(int cas_bits,
*** 246,252 ****
  				transaction_mode_item
  				create_extension_opt_item alter_extension_opt_item
  
! %type <ival>	opt_lock lock_type cast_context
  %type <ival>	vacuum_option_list vacuum_option_elem
  %type <boolean>	opt_force opt_or_replace
  				opt_grant_grant_option opt_grant_admin_option
--- 246,252 ----
  				transaction_mode_item
  				create_extension_opt_item alter_extension_opt_item
  
! %type <ival>	opt_lock lock_type cast_context opt_stattarget
  %type <ival>	vacuum_option_list vacuum_option_elem
  %type <boolean>	opt_force opt_or_replace
  				opt_grant_grant_option opt_grant_admin_option
*************** static void processCASbits(int cas_bits,
*** 325,330 ****
--- 325,332 ----
  %type <list>	opt_fdw_options fdw_options
  %type <defelt>	fdw_option
  
+ %type <list>	cc_column_list
+ 
  %type <range>	OptTempTableName
  %type <into>	into_clause create_as_target
  
*************** stmt :
*** 756,761 ****
--- 758,764 ----
  			| DropdbStmt
  			| ExecuteStmt
  			| ExplainStmt
+ 			| ExtraStatStmt
  			| FetchStmt
  			| GrantStmt
  			| GrantRoleStmt
*************** schema_stmt:
*** 1200,1205 ****
--- 1203,1276 ----
  
  /*****************************************************************************
   *
+  * Add / drop extra statistics
+  *
+  *****************************************************************************/
+ 
+ ExtraStatStmt:
+ 			CREATE CROSS COLUMN STATISTICS ON TABLE qualified_name '(' cc_column_list ')' opt_stattarget
+ 				{
+ 					ExtraStatStmt *n = makeNode(ExtraStatStmt);
+ 
+ 					n->relkind = 'r';
+ 					n->create = true;
+ 					n->relation = $7;
+ 					n->columns = $9;
+ 					n->statistics_target = $11;
+ 					$$ = (Node *)n;
+ 				}
+ 			| DROP CROSS COLUMN STATISTICS ON TABLE qualified_name '(' cc_column_list ')'
+ 				{
+ 					ExtraStatStmt *n = makeNode(ExtraStatStmt);
+ 
+ 					n->relkind = 'r';
+ 					n->create = false;
+ 					n->relation = $7;
+ 					n->columns = $9;
+ 					$$ = (Node *)n;
+ 				}
+ 			| CREATE CROSS COLUMN STATISTICS ON INDEX qualified_name opt_stattarget
+ 				{
+ 					ExtraStatStmt *n = makeNode(ExtraStatStmt);
+ 
+ 					n->relkind = 'i';
+ 					n->create = true;
+ 					n->relation = $7;
+ 					n->columns = NIL;
+ 					n->statistics_target = $8;
+ 					$$ = (Node *)n;
+ 				}
+ 			| DROP CROSS COLUMN STATISTICS ON INDEX qualified_name
+ 				{
+ 					ExtraStatStmt *n = makeNode(ExtraStatStmt);
+ 
+ 					n->relkind = 'i';
+ 					n->create = false;
+ 					n->relation = $7;
+ 					n->columns = NIL;
+ 					$$ = (Node *)n;
+ 				}
+ 		;
+ 
+ cc_column_list:
+ 			columnref
+ 				{
+ 					$$ = list_make1($1);
+ 				}
+ 			| cc_column_list ',' columnref
+ 				{
+ 					$$ = lappend($1, $3);
+ 				}
+ 		;
+ 
+ opt_stattarget:
+ 			WITH '(' Iconst ')'			{ $$ = $3; }
+ 			| /* EMPTY */				{ $$ = -1; }
+ 		;
+ 
+ 
+ /*****************************************************************************
+  *
   * Set PG internal variable
   *	  SET name TO 'var_value'
   * Include SQL92 syntax (thomas 1997-10-22):
diff -dcrpN postgresql.4/src/backend/parser/parse_utilcmd.c postgresql.5/src/backend/parser/parse_utilcmd.c
*** postgresql.4/src/backend/parser/parse_utilcmd.c	2011-07-18 15:42:00.045377085 +0200
--- postgresql.5/src/backend/parser/parse_utilcmd.c	2011-08-02 15:00:29.005542805 +0200
*************** setSchemaName(char *context_schema, char
*** 2710,2712 ****
--- 2710,2804 ----
  						"different from the one being created (%s)",
  						*stmt_schema_name, context_schema)));
  }
+ 
+ /*
+  * transformExtraStatistics
+  *             Transform the column list or the expression into a form
+  *             usable by the executor.
+  */
+ ExtraStatStmt *
+ transformExtraStatistics(ExtraStatStmt *stmt, const char *queryString)
+ {
+ 	ParseState	   *pstate;
+ 	RangeTblEntry	   *rte;
+ 	ExtraStatStmt	   *newstmt;
+ 	List		   *columns = NIL;
+ 	ListCell	   *cell;
+ 	Oid			relId;
+ 	HeapTuple		tuple;
+ 	HeapTuple		attuple;
+ 	Form_pg_class		classptr;
+ 	Form_pg_index		indexptr;
+ 	Form_pg_attribute	attptr;
+ 	AttrNumber		i;
+ 
+ 	switch (stmt->relkind)
+ 	{
+ 		case 'r':
+ 			pstate = make_parsestate(NULL);
+ 			pstate->p_sourcetext = queryString;
+ 
+ 			rte = addRangeTableEntry(pstate, stmt->relation, NULL, false, true);
+ 			addRTEtoQuery(pstate, rte, true, true, true);
+ 
+ 			foreach(cell, stmt->columns)
+ 			{
+ 				Node *col = lfirst(cell);
+ 
+ 				columns = lappend(columns, transformExpr(pstate, col));
+ 			}
+ 
+ 			break;
+ 
+ 		case 'i':
+ 			relId = RangeVarGetRelid(stmt->relation, ShareLock, false, false);
+ 
+ 			tuple = SearchSysCache1(RELOID, relId);
+ 			classptr = (Form_pg_class) GETSTRUCT(tuple);
+ 
+ 			if (classptr->relkind != 'i')
+ 				elog(ERROR, "not an index");
+ 
+ 			ReleaseSysCache(tuple);
+ 
+ 			tuple = SearchSysCache1(INDEXRELID, relId);
+ 			indexptr = (Form_pg_index) GETSTRUCT(tuple);
+ 
+ 			if (indexptr->indnatts < 2)
+ 			{
+ 				ReleaseSysCache(tuple);
+ 
+ 				elog(ERROR, "cross column statistics are only usable on multi-column indexes");
+ 			}
+ 
+ 			for (i = 1; i <= indexptr->indnatts; i++)
+ 			{
+ 				attuple = SearchSysCache2(ATTNUM, relId, i);
+ 				if (!HeapTupleIsValid(attuple))
+ 					elog(ERROR, "pg_attribute row not found for index");
+ 
+ 				attptr = (Form_pg_attribute) GETSTRUCT(attuple);
+ 
+ 				columns = lappend(columns, makeVar(0, i,
+ 										attptr->atttypid,
+ 										attptr->atttypmod,
+ 										InvalidOid, 0));
+ 
+ 				ReleaseSysCache(attuple);
+ 			}
+ 
+ 			ReleaseSysCache(tuple);
+ 			break;
+ 
+ 		default:
+ 			elog(ERROR, "invalid relkind");
+ 	}
+ 
+ 	newstmt = makeNode(ExtraStatStmt);
+ 	newstmt->relkind = stmt->relkind;
+ 	newstmt->create = stmt->create;
+ 	newstmt->relation = copyObject(stmt->relation);
+ 	newstmt->columns = columns;
+ 
+ 	return newstmt;
+ }
diff -dcrpN postgresql.4/src/backend/tcop/utility.c postgresql.5/src/backend/tcop/utility.c
*** postgresql.4/src/backend/tcop/utility.c	2011-07-24 18:16:45.276678481 +0200
--- postgresql.5/src/backend/tcop/utility.c	2011-08-02 14:07:24.319036796 +0200
*************** check_xact_readonly(Node *parsetree)
*** 237,242 ****
--- 237,243 ----
  		case T_AlterTableSpaceOptionsStmt:
  		case T_CreateForeignTableStmt:
  		case T_SecLabelStmt:
+ 		case T_ExtraStatStmt:
  			PreventCommandIfReadOnly(CreateCommandTag(parsetree));
  			break;
  		default:
*************** standard_ProcessUtility(Node *parsetree,
*** 581,586 ****
--- 582,595 ----
  			}
  			break;
  
+ 		case T_ExtraStatStmt:
+ 			{
+ 				ExtraStatStmt *newstmt = transformExtraStatistics((ExtraStatStmt *)parsetree, queryString);
+ 
+ 				ExtraStatistics(newstmt);
+ 			}
+ 			break;
+ 
  		case T_CreateTableSpaceStmt:
  			PreventTransactionChain(isTopLevel, "CREATE TABLESPACE");
  			CreateTableSpace((CreateTableSpaceStmt *) parsetree);
*************** CreateCommandTag(Node *parsetree)
*** 1744,1749 ****
--- 1753,1769 ----
  			tag = "CREATE FOREIGN TABLE";
  			break;
  
+ 		case T_ExtraStatStmt:
+ 			{
+ 				ExtraStatStmt *stmt = (ExtraStatStmt *)parsetree;
+ 
+ 				if (stmt->create)
+ 					tag = "CREATE CROSS COLUMN STATISTICS";
+ 				else
+ 					tag = "DROP CROSS COLUMN STATISTICS";
+ 			}
+ 			break;
+ 
  		case T_DropStmt:
  			switch (((DropStmt *) parsetree)->removeType)
  			{
diff -dcrpN postgresql.4/src/include/commands/defrem.h postgresql.5/src/include/commands/defrem.h
*** postgresql.4/src/include/commands/defrem.h	2011-07-24 18:16:45.287677928 +0200
--- postgresql.5/src/include/commands/defrem.h	2011-08-02 14:07:24.332035848 +0200
*************** extern void RemoveAggregate(RemoveFuncSt
*** 93,98 ****
--- 93,101 ----
  extern void RenameAggregate(List *name, List *args, const char *newname);
  extern void AlterAggregateOwner(List *name, List *args, Oid newOwnerId);
  
+ /* commands/analyze.c */
+ extern void ExtraStatistics(ExtraStatStmt *stmt);
+ 
  /* commands/opclasscmds.c */
  extern void DefineOpClass(CreateOpClassStmt *stmt);
  extern void DefineOpFamily(CreateOpFamilyStmt *stmt);
diff -dcrpN postgresql.4/src/include/nodes/nodes.h postgresql.5/src/include/nodes/nodes.h
*** postgresql.4/src/include/nodes/nodes.h	2011-03-22 17:53:48.045903422 +0100
--- postgresql.5/src/include/nodes/nodes.h	2011-08-02 14:07:24.340035264 +0200
*************** typedef enum NodeTag
*** 362,367 ****
--- 362,368 ----
  	T_CreateExtensionStmt,
  	T_AlterExtensionStmt,
  	T_AlterExtensionContentsStmt,
+ 	T_ExtraStatStmt,
  
  	/*
  	 * TAGS FOR PARSE TREE NODES (parsenodes.h)
diff -dcrpN postgresql.4/src/include/nodes/parsenodes.h postgresql.5/src/include/nodes/parsenodes.h
*** postgresql.4/src/include/nodes/parsenodes.h	2011-07-24 18:16:45.287677928 +0200
--- postgresql.5/src/include/nodes/parsenodes.h	2011-08-02 14:07:24.351034462 +0200
*************** typedef enum DropBehavior
*** 1160,1165 ****
--- 1160,1179 ----
  } DropBehavior;
  
  /* ----------------------
+  *     Create Cross Column Statistics
+  * ----------------------
+  */
+ typedef struct ExtraStatStmt
+ {
+ 	NodeTag		type;
+ 	char		relkind;
+ 	bool		create;
+ 	RangeVar   *relation;
+ 	List	   *columns;
+ 	int		statistics_target;
+ } ExtraStatStmt;
+ 
+ /* ----------------------
   *	Alter Table
   * ----------------------
   */
diff -dcrpN postgresql.4/src/include/parser/parse_utilcmd.h postgresql.5/src/include/parser/parse_utilcmd.h
*** postgresql.4/src/include/parser/parse_utilcmd.h	2011-01-04 15:13:16.163549374 +0100
--- postgresql.5/src/include/parser/parse_utilcmd.h	2011-08-02 14:07:24.365033441 +0200
*************** extern void transformRuleStmt(RuleStmt *
*** 25,28 ****
--- 25,31 ----
  				  List **actions, Node **whereClause);
  extern List *transformCreateSchemaStmt(CreateSchemaStmt *stmt);
  
+ extern ExtraStatStmt *transformExtraStatistics(ExtraStatStmt *stmt,
+ 						const char *queryString);
+ 
  #endif   /* PARSE_UTILCMD_H */