SQLF Optimization question

Started by Werner Echezuriaover 18 years ago2 messages
#1Werner Echezuria
wercool@gmail.com
2 attachment(s)

Hello, (First of all, sorry for my english),

I'm in a project that involves including sqlf sentences to make postgres
understand the fuzzy querys, i mean, make people to create fuzzy
predicates, these are words like fat, tall, young, old, in an sql sentence.
I guess the first step is complete. The parser know when an user writes
"CREATE FUZZY PREDICATE <name> ON <domain> AS <fuzzy set>".

For instance, a person can create the young word as a fuzzy predicate like
this: "CREATE FUZZY PREDICATE young ON 0..120 AS (0,0,25,50)", that means,
in a domain of 0..120 a young person is between 25 and 50. The fuzzy set has
the follows: (beg,core1,core2,end), (begin of the set, first core, second
core, end of the set).

The sentence creates a row in a catalog named "pg_fuzzypred", and we're good
till then.

But when I write "Select * from students where age=young", brings me a
message telling me the node isn't exist. My teacher who is very involved in
the project tells me that postgres has to do the derivation principle,
before it gets to the executor (what sounds logical), in order to the
optimizer calculates all costs properly.

It's in this point where i need help, ¿What do i have to modify or create to
make postgres undestand the derivation principle, where it's the point in
parser tree goes to query tree and do the optimization?

I send a file, this has all the modification that my teacher and i have make
to postgres (It has some words in spanish, but i guess it's understandable),
and a file with the errors.

Thanks.

Attachments:

PostgreSQLf 14-05-07.txttext/plain; charset=ANSI_X3.4-1968; name="PostgreSQLf 14-05-07.txt"Download

*) Agregaciones en /src/backend/parser/keywords.c:
	linea 156: 	{"fuzzy", FUZZY},
	linea 267: 	{"predicate", PREDICATE},
	l��nea 177:      {"infinite", INFINITE},


*) Agregaciones en /src/backend/parser/gram.y:
	linea 154:	CreateFuzzyPredStmt
	linea 381: 	FUZZY
	l��nea 386:      INFINITE
	linea 410:	PREDICATE
	linea 540:	| CreateFuzzyPredStmt
	linea 604:
	/*****************************************************************************
	 *
	 * Create a new Fuzzy Predicate :)
	 *
	 *****************************************************************************/

	CreateFuzzyPredStmt:
		CREATE FUZZY PREDICATE ColId ON Iconst'.''.'Iconst AS '('Iconst','Iconst','Iconst','Iconst')'
		{
			CreateFuzzyPredStmt *n = makeNode(CreateFuzzyPredStmt);
			n->pred=$4;
			n->begd=$6;
			n->endd=$9;
			n->minfp=$12;
			n->core1=$14;
			n->core2=$16;
			n->maxfp=$18;
			n->typefp=1;
			$$ = (Node *)n;
		}
			| 	
		CREATE FUZZY PREDICATE ColId ON Iconst'.''.'Iconst AS '('INFINITE','INFINITE','Iconst','Iconst')'
		{
			CreateFuzzyPredStmt *n = makeNode(CreateFuzzyPredStmt);
			n->pred=$4;
			n->begd=$6;
			n->endd=$9;
			n->core2=$16;
			n->maxfp=$18;
			n->typefp=2;
			$$ = (Node *)n;
		}
			| 
		CREATE FUZZY PREDICATE ColId ON Iconst'.''.'Iconst AS '('Iconst','Iconst','INFINITE','INFINITE')'
		{
			CreateFuzzyPredStmt *n = makeNode(CreateFuzzyPredStmt);
			n->pred=$4;
			n->begd=$6;
			n->endd=$9;
			n->minfp=$13;
			n->core1=$15;
			n->typefp=3;
			$$ = (Node *)n;
		}
	;


*) Agregaciones en /src/include/nodes/nodes.h:
	linea 296:	T_CreateFuzzyPredStmt,
	linea 299:    	T_A_FuzzyPred,

*) Agregaciones en /src/include/nodes/parsenodes.h:
	linea 252:
/*
 * A_FuzzPred- a Fuzzy Predicate expression
 */
typedef struct A_FuzzyPred
{
	NodeTag		type;
	char		*pred;
	int		minfp;
	int		core1;
	int		core2;
	int		maxfp;
	int		typefp;
} A_FuzzyPred;

	linea 1282:
/* ----------------------
 *		{Create} FUZZY PREDICATE Statement
 * ----------------------
 */
typedef struct CreateFuzzyPredStmt
{
	NodeTag		type;

	int		begd;	/* begin domain  */
	int		endd;	/* end domain */
	int		minfp;	/* min trapezoid */
	int		core1;	/* core 1 trapezoid */
	int		core2;	/* core 2 trapezoid */
	int		maxfp;	/* max trapezoid */
	int		typefp;	/* type=1 (trapezoid), 2 (at least), 3 (at most)*/	
} CreateFuzzyPredStmt;


*) Agregaciones en /src/backend/tcop/utility.c
linea 46:	#include "commands/fuzzypred.h"

l��nea 321:	case T_CreateFuzzyPredStmt:

l��nea 1019:
			/*
			 * ******************************** FUZZY statements ****
			 */
		case T_CreateFuzzyPredStmt:
			CreateFuzzyPredicate((CreateFuzzyPredStmt *) parsetree);             
			break;

l��nea 1733:
		case T_CreateFuzzyPredStmt:
			tag = "CREATE FUZZY PREDICATE";
			break;
l��nea 2108:
		case T_CreateFuzzyPredStmt:
			lev = LOGSTMT_DDL;
			break;


*) Se crea un nuevo archivo: /src/include/catalog/pg_fuzzypred.h que contiene:

#ifndef PG_FUZZYP_H
#define PG_FUZZYP_H

/* ----------------
 *		postgres.h contains the system type definitions and the
 *		CATALOG(), BKI_BOOTSTRAP and DATA() sugar words so this file
 *		can be read by both genbki.sh and the C compiler.
 * ----------------
 */

/* ----------------
 *		pg_fuzzypred definition.  cpp turns this into
 *		typedef struct FormData_pg_fuzzypred
 * ----------------
 */
#define RelationFuzzyPredId	2859

CATALOG(pg_fuzzypred,2859) BKI_BOOTSTRAP BKI_WITHOUT_OIDS
{
	NameData	predname;	/* predicate name */
	int2		predbegd;	/* begin domain  */
	int2		predendd;	/* end domain */
	int2		predminfp;	/* min trapezoid */
	int2		predcore1;	/* core1 trapezoid */
	int2		predcore2;	/* core2 trapezoid */
	int2		predmaxfp;	/* max trapezoid */
	int2		predtypefp;	/* type=1 (trapezoid), 2 (at least), 3 (at most) */ 
} FormData_pg_fuzzypred;


/* ----------------
 *		Form_pg_fuzzypred corresponds to a pointer to a tuple with
 *		the format of pg_fuzzypred relation.
 * ----------------
 */
typedef FormData_pg_fuzzypred *Form_pg_fuzzypred;

/* ----------------
 *		compiler constants for pg_fuzzypred
 * ----------------
 */

#define Natts_pg_fuzzypred			8
#define Anum_pg_fuzzypred_predname		1
#define Anum_pg_fuzzypred_predbegd		2
#define Anum_pg_fuzzypred_predendd		3
#define Anum_pg_fuzzypred_predminfp		4
#define Anum_pg_fuzzypred_predcore1		5
#define Anum_pg_fuzzypred_predcore2		6
#define Anum_pg_fuzzypred_predmaxfp		7
#define Anum_pg_fuzzypred_predtypefp		8


#endif   /* PG_FUZZYP_H */

*) Agregaciones en /src/backend/catalog/Makefile.in
l��nea 39:
	pg_fuzzypred.h \

*) Agregaciones en /src/include/catalog/pg_class.h
l��nea 143:
DATA(insert OID = 2859 (  pg_fuzzypred		PGNSP 87 PGUID 0 2859 0 0 0 0 0 f f r 8 0 0 0 0 0 f f f f 3 _null_ _null_ ));
DESCR("");

*) Agregaciones en /src/include/catalog/pg_attribute.h
l��nea 446:
#define Schema_pg_fuzzypred \
{ 2857, {"predname"},   19, -1, NAMEDATALEN, 1, 0, -1, -1, false, 'p', 'i', true, false, false, true, 0 }, \
{ 2859, {"predbegd"},	21, -1,	2, 15, 0, -1, -1, true, 'p', 's', true, false, false, true, 0 }, \
{ 2859, {"predendd"},	21, -1,	2, 16, 0, -1, -1, true, 'p', 's', true, false, false, true, 0 }, \
{ 2859, {"predminfp"},  21, -1,	2, 17, 0, -1, -1, true, 'p', 's', true, false, false, true, 0 }, \
{ 2859, {"predcore1"},	21, -1,	2, 18, 0, -1, -1, true, 'p', 's', true, false, false, true, 0 }, \
{ 2859, {"predcore2"},	21, -1,	2, 19, 0, -1, -1, true, 'p', 's', true, false, false, true, 0 }, \
{ 2859, {"predmaxfp"},	21, -1,	2, 19, 0, -1, -1, true, 'p', 's', true, false, false, true, 0 }, \
{ 2859, {"predtypefp"},	21, -1,	2, 20, 0, -1, -1, true, 'p', 's', true, false, false, true, 0 }
DATA(insert ( 2859 predname   		19 -1 NAMEDATALEN	1 0 -1 -1 f p i t f f t 0));
DATA(insert ( 2859 predbegd		21 -1 2   2 0 -1 -1 t p s t f f t 0));
DATA(insert ( 2859 predendd		21 -1 2   3 0 -1 -1 t p s t f f t 0));
DATA(insert ( 2859 predminfp		21 -1 2   4 0 -1 -1 t p s t f f t 0));
DATA(insert ( 2859 predcore1		21 -1 2   5 0 -1 -1 t p s t f f t 0));
DATA(insert ( 2859 predcore2		21 -1 2   6 0 -1 -1 t p s t f f t 0));
DATA(insert ( 2859 predmaxfp		21 -1 2   7 0 -1 -1 t p s t f f t 0));
DATA(insert ( 2859 predtypefp		21 -1 2   8 0 -1 -1 t p s t f f t 0));

*) Nuevo archivo en /include/commands/fuzzypred.h:
	/*-------------------------------------------------------------------------
	 *
	 * fuzzypred.h
	 *	  Commands for manipulating fuzzy predicates.
	 *
	 *
	 * $PostgreSQL: pgsql/src/include/commands/fuzzypred.h
	 *
	 *-------------------------------------------------------------------------
	 */
	#ifndef FUZZYPRED_H
	#define FUZZYPRED_H
	
	#include "nodes/parsenodes.h"
	
	extern void CreateFuzzyPredicate(CreateFuzzyPredStmt *stmt);
	
	#endif   /* FUZZYPRED_H */
	
	
*) Nuevo archivo en /backend/commands/fuzzypred.c:
/*-------------------------------------------------------------------------
 *
 * fuzzypred.c
 *	  Commands for manipulating fuzzy predicates.
 *
 *
 * $PostgreSQL: pgsql/src/backend/commands/fuzzypred.c
 *
 *-------------------------------------------------------------------------
 */
#include "postgres.h"
#include "catalog/catalog.h"
#include "catalog/dependency.h"
#include "catalog/index.h"
#include "catalog/indexing.h"
#include "catalog/pg_fuzzypred.h"
#include "commands/fuzzypred.h"
#include "storage/lock.h"
#include "access/heapam.h"
#include "access/genam.h"
#include "access/xact.h"
#include "commands/comment.h"
#include "miscadmin.h"
#include "utils/acl.h"
#include "utils/builtins.h"
#include "utils/flatfiles.h"
#include "utils/fmgroids.h"
#include "utils/guc.h"
#include "utils/lsyscache.h"
#include "utils/syscache.h"


//These are the several kinds of fuzzy predicates the user can create
#define NO_INFINITE 1
#define INFINITE_BEG 2
#define INFINITE_LAST 3

/*
 * CREATE FUZZY PREDICATE
 */
void
CreateFuzzyPredicate(CreateFuzzyPredStmt *stmt)
{
	Relation	pg_fuzzypred_rel;
	TupleDesc	pg_fuzzypred_dsc;
	HeapTuple	tuple;
	Datum		new_record[Natts_pg_fuzzypred];
	char		new_record_nulls[Natts_pg_fuzzypred];
	Oid		fuzzypredid;
	char		*namefp;
	int2		begd=0;
	int2		endd=0;
	int2		minfp=0;
	int2		core1=0;
	int2		core2=0;
	int2		maxfp=0;
	int 		typefp=0;
	ScanKeyData 	key;
	SysScanDesc 	fpscan;

	namefp=stmt->pred;
	switch (stmt->typefp)
	{
		case NO_INFINITE:
			typefp=NO_INFINITE;
			begd=stmt->begd;
			endd=stmt->endd;
			minfp=stmt->minfp;
			core1=stmt->core1;
			core2=stmt->core2;
			maxfp=stmt->maxfp;
			break;

		case INFINITE_BEG:
			typefp=INFINITE_BEG;
			begd=stmt->begd;
			endd=stmt->endd;
			core2=stmt->core2;
			maxfp=stmt->maxfp;
			break;

		case INFINITE_LAST:
			typefp=INFINITE_LAST;
			begd=stmt->begd;
			endd=stmt->endd;
			minfp=stmt->minfp;
			core1=stmt->core1;
			break;
	
	}
	/*
	 * Check the pg_fuzzypred relation to be certain the fuzzy predicate doesn't already
	 * exist.
	 */
	pg_fuzzypred_rel = heap_open(RelationFuzzyPredId, RowExclusiveLock); 
	pg_fuzzypred_dsc = RelationGetDescr(pg_fuzzypred_rel);
	
	ScanKeyInit(&key,	Anum_pg_fuzzypred_predname,
				BTEqualStrategyNumber, F_NAMEEQ,
				PointerGetDatum(namefp));
	fpscan = systable_beginscan(pg_fuzzypred_rel, 0, true,
								SnapshotNow, 1, &key);
	while (HeapTupleIsValid(tuple = systable_getnext(fpscan)))
	{
		Form_pg_fuzzypred  pg_fuzzypred = (Form_pg_fuzzypred) GETSTRUCT(tuple);

		if (namestrcmp(&(pg_fuzzypred->predname), namefp) == 0)

			ereport(ERROR,
				(errcode(ERRCODE_DUPLICATE_OBJECT),
				 errmsg("Fuzzy Predicate \"%s\" for relation \"%s\" already exists",namefp, RelationGetRelationName(pg_fuzzypred_rel))));
	}
	systable_endscan(fpscan);

	MemSet(new_record, 0, sizeof(new_record));
	MemSet(new_record_nulls, ' ', sizeof(new_record_nulls));

	new_record[Anum_pg_fuzzypred_predname - 1] =
		DirectFunctionCall1(namein, CStringGetDatum(stmt->pred));


	new_record[Anum_pg_fuzzypred_predbegd - 1] = begd;
	new_record[Anum_pg_fuzzypred_predendd - 1] = endd;
	new_record[Anum_pg_fuzzypred_predminfp - 1] = minfp;
	new_record[Anum_pg_fuzzypred_predcore1 - 1] = core1;
	new_record[Anum_pg_fuzzypred_predcore2 - 1] = core2;
	new_record[Anum_pg_fuzzypred_predmaxfp - 1] = maxfp;
	new_record[Anum_pg_fuzzypred_predtypefp - 1] = typefp;
	
	tuple = heap_formtuple(pg_fuzzypred_dsc, new_record, new_record_nulls);

	/*
	 * Insert new record in the pg_fuzzypred table
	 */
	fuzzypredid = simple_heap_insert(pg_fuzzypred_rel, tuple);
	CatalogUpdateIndexes(pg_fuzzypred_rel, tuple);

	/*
	 * Close pg_fuzzypred, but keep lock till commit (this is important to
	 * prevent any risk of deadlock failure while updating flat file)
	 */
	heap_close(pg_fuzzypred_rel, NoLock);
}
	
*) Agregaciones en /src/backend/commands/Makefile
	linea 21:
		fuzzypred.o

*) Agregaciones en /src/backend/utils/cache/relcache.c
	linea 44:
		#include "catalog/pg_fuzzypred.h"
 	linea 83:
		static FormData_pg_attribute Desc_pg_fuzzypred[Natts_pg_fuzzypred] = {Schema_pg_fuzzypred};

*) Agregaciones en /src/backend/parser/parse_expr.c
	linea 17:
		#include "access/genam.h"
		#include "access/heapam.h"
		#include "catalog/heap.h"
		#include "catalog/pg_fuzzypred.h"

	linea 23:
		#include "commands/fuzzypred.h"
	linea 27:
		#include "nodes/print.h"
	linea 40:
		#include "utils/fmgroids.h"

	linea 250:
		case T_A_FuzzyPred:
		
	linea 342:
		// Variables for Fuzzy Predicates 
		Relation	pg_fuzzypred_rel;
		HeapTuple	tuple;
		ScanKeyData 	key;
		SysScanDesc 	fpscan;
		bool		isfp;
		A_FuzzyPred    *n = makeNode(A_FuzzyPred);

	linea 412:
		{
		else
			{
		// Is a Fuzzy Predicate?  
			isfp = false;
			pg_fuzzypred_rel = heap_open(RelationFuzzyPredId,RowExclusiveLock); 
			ScanKeyInit(&key,Anum_pg_fuzzypred_predname,
				BTEqualStrategyNumber, F_NAMEEQ,
				PointerGetDatum(name));
			fpscan = systable_beginscan(pg_fuzzypred_rel, 0, true,
				SnapshotNow, 1, &key);
			while (HeapTupleIsValid(tuple = systable_getnext(fpscan)))
			{
			Form_pg_fuzzypred  pg_fuzzypred = (Form_pg_fuzzypred) GETSTRUCT(tuple);
			if (namestrcmp(&(pg_fuzzypred->predname), name) == 0)
				{
				n->type=T_A_FuzzyPred;
				n->pred=name;
				n->minfp=pg_fuzzypred->predminfp;
				n->core1=pg_fuzzypred->predcore1;
				n->core2=pg_fuzzypred->predcore2;
				n->maxfp=pg_fuzzypred->predmaxfp;
				n->typefp=pg_fuzzypred->predtypefp;
				isfp = true;
				break;
				}
			}
			systable_endscan(fpscan);
			heap_close(pg_fuzzypred_rel, NoLock);
			if (isfp) {
				node = (Node *) n;
				pstate->hasFuzzy = true;
				pstate->numFuzzy++;
			}
				ereport(ERROR,
					(errcode(ERRCODE_UNDEFINED_COLUMN),
					 errmsg("column \"%s\" does not exist",
							name),
					 parser_errposition(pstate, cref->location)));
			}
		}
		break;

	linea 1573:
		case T_A_FuzzyPred:
			type = UNKNOWNOID;
			break;


*) Agregaciones en /src/backend/nodes/outfuncs.c
	linea 26:
		#include "nodes/parsenodes.h"

	linea 1526:
		WRITE_BOOL_FIELD(hasFuzzyPred);
		WRITE_INT_FIELD(numFuzzyPred);

	linea 1834:
		_outFuzzyPred(StringInfo str, A_FuzzyPred *node)
		{
			WRITE_NODE_TYPE("Nodo A_Fuzzy_Predicate");
			WRITE_STRING_FIELD(pred);
			WRITE_INT_FIELD(minfp);
			WRITE_INT_FIELD(core1);
			WRITE_INT_FIELD(core2);
			WRITE_INT_FIELD(maxfp);
			WRITE_INT_FIELD(typefp);
		}

	linea 2203:
			case T_A_FuzzyPred:
				_outFuzzyPred(str, obj);
				break;

*) Agregaciones en /src/backend/nodes/print.c
	linea 540:
		case T_A_FuzzyPred:
			return "FUZZY PREDICATE";

*) Agregaciones en /src/backend/utils/cache/syscache.c
	linea 42:
		#include "catalog/pg_fuzzypred.h"

*) Agregaciones en /src/backend/utils/cache/lsyscache.c
	linea 28:
		#include "catalog/pg_fuzzypred.h"

*) Agregaciones en /src/backend/include/catalog/pg_type.h
DATA(insert OID = 801 (  fuzzypred PGNSP PGUID -1 f b t \054 0	 0 unknownin unknownout unknownrecv unknownsend - c p f 0 -1 0 _null_ _null_ ));
DESCR("Fuzzy Predicate");

*) Agregaciones en /src/backend/parser/parse_coerce.c
	linea 362:
	// A Fuzzy Predicate Node
	if (inputTypeId == UNKNOWNOID)
	{		
		return node;
	}

*) Agregaciones en /src/backend/optimizer/utils/clause.c
	linea 3176:
		case T_A_FuzzyPred:
	linea 3638:
		case T_A_FuzzyPred:

*) Agregaciones en /src/backend/nodes/copyfuncs.c
	linea 2706:
		static A_FuzzyPred *
		_copyAFuzzyPred(A_FuzzyPred *from)
		{
			A_FuzzyPred *newnode = makeNode(A_FuzzyPred);
				COPY_STRING_FIELD(pred);
				COPY_SCALAR_FIELD(minfp);
				COPY_SCALAR_FIELD(core1);
				COPY_SCALAR_FIELD(core2);
				COPY_SCALAR_FIELD(maxfp);
				COPY_SCALAR_FIELD(typefp);
			return newnode;
		}
	linea 3356:
		case T_A_FuzzyPred:
			retval = _copyAFuzzyPred(from);
			break;
	
*) Agregaciones en /src/backend/include/parser/parse_node.h
	linea 83:
		bool		p_hasFuzzyPred;
		int		p_numFuzzyPred;	

*) Agregaciones en /src/backend/include/nodes/parsenodes.h
	linea 102:
		bool		hasFuzzyPred;	/* has Fuzzy Predicates */
		int		numFuzzyPred;	/* How many Fuzzy predicates */

*) Agregaciones en /src/backend/parser/analyze.c
	linea 522, 862, 2161, 2358, 2582, 2886:
		qry->hasFuzzyPred = pstate->p_hasFuzzyPred;
		qry->numFuzzyPred = pstate->p_numFuzzyPred;

*) Agregaciones en /src/backend/optimizer/plan/planner.c
	LINEA 211:
		if (parse->hasFuzzyPred) {
		printf("*** Expandiendo Fuzzy Pred para colocarlo como booleano en qual ***\n");
		}
Resultado.txttext/plain; charset=ANSI_X3.4-1968; name=Resultado.txtDownload
#2Martijn van Oosterhout
kleptog@svana.org
In reply to: Werner Echezuria (#1)
Re: SQLF Optimization question

On Thu, May 31, 2007 at 09:58:27AM -0400, Werner Echezuria wrote:

Hello, (First of all, sorry for my english),

I'm in a project that involves including sqlf sentences to make postgres
understand the fuzzy querys, i mean, make people to create fuzzy
predicates, these are words like fat, tall, young, old, in an sql sentence.
I guess the first step is complete. The parser know when an user writes
"CREATE FUZZY PREDICATE <name> ON <domain> AS <fuzzy set>".

For instance, a person can create the young word as a fuzzy predicate like
this: "CREATE FUZZY PREDICATE young ON 0..120 AS (0,0,25,50)", that means,
in a domain of 0..120 a young person is between 25 and 50. The fuzzy set has
the follows: (beg,core1,core2,end), (begin of the set, first core, second
core, end of the set).

The sentence creates a row in a catalog named "pg_fuzzypred", and we're good
till then.

But when I write "Select * from students where age=young", brings me a
message telling me the node isn't exist. My teacher who is very involved in
the project tells me that postgres has to do the derivation principle,
before it gets to the executor (what sounds logical), in order to the
optimizer calculates all costs properly.

Ok, I didn't totally follow the patch, but it seems to be you need to,
somewhere, get postgres to see an unadorned word as a fuzzy predicate.
Somewhere involving IDENT probably.

However, it seems to me you're opening yourself up to problems, what if
someone has a column name the same as your fuzzy predicate? I suggest
you also add syntax for the matching, something like:

Select * from students where age FUZZYMATCH young;

This means that you don't interfere with existing grammer, but you have
your own bit which you can completely control.

Hope this helps,
--
Martijn van Oosterhout <kleptog@svana.org> http://svana.org/kleptog/

Show quoted text

From each according to his ability. To each according to his ability to litigate.