UNIQUE predicate
Hi all,
The attached patch implements the SQL92 UNIQUE predicate. I've written
some regression tests (as well as adding a few for subselects in FROM
clauses). I'll update the documentation if/when this patch is accepted.
Cheers,
Neil
--
Neil Conway <neilconway@rogers.com>
PGP Key ID: DB3C29FC
Attachments:
unique_pred-4.patchtext/plain; charset=us-asciiDownload
Index: src/backend/executor/nodeSubplan.c
===================================================================
RCS file: /var/lib/cvs/pgsql/src/backend/executor/nodeSubplan.c,v
retrieving revision 1.33
diff -c -r1.33 nodeSubplan.c
*** src/backend/executor/nodeSubplan.c 20 Jun 2002 20:29:28 -0000 1.33
--- src/backend/executor/nodeSubplan.c 3 Jul 2002 16:09:10 -0000
***************
*** 21,29 ****
--- 21,31 ----
#include "access/heapam.h"
#include "executor/executor.h"
+ #include "executor/nodeGroup.h"
#include "executor/nodeSubplan.h"
#include "tcop/pquery.h"
+ static bool ExecUniquePred(List *tuples, TupleDesc tdesc, MemoryContext cxt);
/* ----------------------------------------------------------------
* ExecSubPlan(node)
***************
*** 41,46 ****
--- 43,50 ----
TupleTableSlot *slot;
Datum result;
bool found = false; /* TRUE if got at least one subplan tuple */
+ List *tuples = NIL; /* used to collect tuples for UNIQUE pred */
+ TupleDesc saved_td = NULL;
List *lst;
/*
***************
*** 113,118 ****
--- 117,141 ----
break;
}
+ if (subLinkType == UNIQUE_SUBLINK)
+ {
+ /*
+ * Stick the tuple at the front of the list: more efficient
+ * than lappend(), and we don't care about the ordering
+ */
+ tuples = lcons(heap_copytuple(tup), tuples);
+
+ /*
+ * We need a TupleDesc to calculate equality, so we save the
+ * first one we see. This assumes that the TupleDesc for one
+ * tuple is valid for use with the other tuples of the result
+ * set.
+ */
+ if (!saved_td)
+ saved_td = tdesc;
+ continue;
+ }
+
if (subLinkType == EXPR_SUBLINK)
{
/* cannot allow multiple input tuples for EXPR sublink */
***************
*** 255,260 ****
--- 278,287 ----
}
}
+ if (subLinkType == UNIQUE_SUBLINK)
+ result = BoolGetDatum(ExecUniquePred(tuples, saved_td,
+ econtext->ecxt_per_tuple_memory));
+
if (!found)
{
/*
***************
*** 269,277 ****
--- 296,384 ----
}
}
+ foreach (lst, tuples)
+ {
+ HeapTuple tup = lfirst(lst);
+ Assert(HeapTupleIsValid(tup));
+ heap_freetuple(tup);
+ }
+ freeList(tuples);
+
MemoryContextSwitchTo(oldcontext);
return result;
+ }
+
+ /*
+ * Evaluates a UNIQUE predicate. Note that this is distinct from a UNIQUE
+ * index/constraint, in that it only applies to a subquery. The spec is:
+ * (SQL99, section 8.10)
+ *
+ * <unique predicate> ::= UNIQUE <table subquery>
+ *
+ * 1) Let T be the result of the <table subquery>.
+ *
+ * 2) If there are no two rows in T such that the value of each column
+ * in one row is non-null and is equal to the value of the cor-
+ * responding column in the other row according to Subclause 8.2,
+ * "<comparison predicate>", then the result of the <unique predi-
+ * cate> is true; otherwise, the result of the <unique predicate>
+ * is false.
+ *
+ * Equality operators are executed in CurrentMemoryContext, so the caller
+ * is responsible for changing into the appropriate short-term context
+ * (which is reset, when we call execTuplesMatch() ).
+ *
+ * This reuses some of the code in nodeGroup.c
+ */
+ static bool
+ ExecUniquePred(List *tuples, TupleDesc tdesc, MemoryContext cxt)
+ {
+ FmgrInfo *eqfunctions;
+ AttrNumber *attnums;
+ bool retval = true;
+ List *l,
+ *l2;
+ int i;
+
+ /* Fast path: an empty result set cannot have any duplicates. */
+ if (tuples == NIL)
+ return true;
+
+ /* cxt is reset by execTuplesMatch(), we can't use it ourselves */
+ Assert(cxt != CurrentMemoryContext);
+
+ attnums = palloc(sizeof(AttrNumber) * tdesc->natts);
+
+ for (i = 0; i < tdesc->natts; i++)
+ attnums[i] = tdesc->attrs[i]->attnum;
+
+ eqfunctions = execTuplesMatchPrepare(tdesc, tdesc->natts, attnums);
+
+ /*
+ * Do a simple exhaustive search of the result set, checking for
+ * duplicates. There is likely a faster way to do this.
+ */
+ foreach (l, tuples)
+ {
+ HeapTuple tuple1 = lfirst(l);
+ foreach (l2, lnext(l))
+ {
+ HeapTuple tuple2 = lfirst(l2);
+ if (execTuplesMatch(tuple1, tuple2, tdesc, tdesc->natts,
+ attnums, eqfunctions, cxt))
+ {
+ retval = false;
+ goto finished; /* easiest way to break out of 2 loops */
+ }
+ }
+ }
+
+ finished:
+ pfree(eqfunctions);
+ pfree(attnums);
+
+ return retval;
}
/* ----------------------------------------------------------------
Index: src/backend/parser/gram.y
===================================================================
RCS file: /var/lib/cvs/pgsql/src/backend/parser/gram.y,v
retrieving revision 2.334
diff -c -r2.334 gram.y
*** src/backend/parser/gram.y 22 Jun 2002 02:04:45 -0000 2.334
--- src/backend/parser/gram.y 3 Jul 2002 16:09:11 -0000
***************
*** 5515,5520 ****
--- 5515,5530 ----
n->subselect = $4;
$$ = (Node *)n;
}
+ | UNIQUE select_with_parens
+ {
+ SubLink *n = makeNode(SubLink);
+ n->subLinkType = UNIQUE_SUBLINK;
+ n->useor = FALSE;
+ n->lefthand = NIL;
+ n->oper = NIL;
+ n->subselect = $2;
+ $$ = (Node *) n;
+ }
| row_expr
{ $$ = $1; }
;
Index: src/backend/parser/parse_expr.c
===================================================================
RCS file: /var/lib/cvs/pgsql/src/backend/parser/parse_expr.c,v
retrieving revision 1.119
diff -c -r1.119 parse_expr.c
*** src/backend/parser/parse_expr.c 20 Jun 2002 20:29:32 -0000 1.119
--- src/backend/parser/parse_expr.c 3 Jul 2002 16:09:11 -0000
***************
*** 313,322 ****
elog(ERROR, "Bad query in subselect");
sublink->subselect = (Node *) qtree;
! if (sublink->subLinkType == EXISTS_SUBLINK)
{
/*
! * EXISTS needs no lefthand or combining operator.
* These fields should be NIL already, but make sure.
*/
sublink->lefthand = NIL;
--- 313,323 ----
elog(ERROR, "Bad query in subselect");
sublink->subselect = (Node *) qtree;
! if (sublink->subLinkType == EXISTS_SUBLINK ||
! sublink->subLinkType == UNIQUE_SUBLINK)
{
/*
! * EXISTS and UNIQUE need no lefthand or combining operator.
* These fields should be NIL already, but make sure.
*/
sublink->lefthand = NIL;
Index: src/include/nodes/primnodes.h
===================================================================
RCS file: /var/lib/cvs/pgsql/src/include/nodes/primnodes.h,v
retrieving revision 1.64
diff -c -r1.64 primnodes.h
*** src/include/nodes/primnodes.h 20 Jun 2002 20:29:51 -0000 1.64
--- src/include/nodes/primnodes.h 3 Jul 2002 16:09:13 -0000
***************
*** 312,317 ****
--- 312,318 ----
* cases also the combining operator(s) just above it. The subLinkType
* indicates the form of the expression represented:
* EXISTS_SUBLINK EXISTS(SELECT ...)
+ * UNIQUE_SUBLINK UNIQUE(SELECT ...)
* ALL_SUBLINK (lefthand) op ALL (SELECT ...)
* ANY_SUBLINK (lefthand) op ANY (SELECT ...)
* MULTIEXPR_SUBLINK (lefthand) op (SELECT ...)
***************
*** 357,370 ****
*/
typedef enum SubLinkType
{
! EXISTS_SUBLINK, ALL_SUBLINK, ANY_SUBLINK, MULTIEXPR_SUBLINK, EXPR_SUBLINK
} SubLinkType;
typedef struct SubLink
{
NodeTag type;
! SubLinkType subLinkType; /* EXISTS, ALL, ANY, MULTIEXPR, EXPR */
bool useor; /* TRUE to combine column results with
* "OR" not "AND" */
List *lefthand; /* list of outer-query expressions on the
--- 358,376 ----
*/
typedef enum SubLinkType
{
! EXISTS_SUBLINK,
! UNIQUE_SUBLINK,
! ALL_SUBLINK,
! ANY_SUBLINK,
! MULTIEXPR_SUBLINK,
! EXPR_SUBLINK
} SubLinkType;
typedef struct SubLink
{
NodeTag type;
! SubLinkType subLinkType;
bool useor; /* TRUE to combine column results with
* "OR" not "AND" */
List *lefthand; /* list of outer-query expressions on the
Index: src/test/regress/expected/subselect.out
===================================================================
RCS file: /var/lib/cvs/pgsql/src/test/regress/expected/subselect.out,v
retrieving revision 1.3
diff -c -r1.3 subselect.out
*** src/test/regress/expected/subselect.out 23 Mar 2000 07:42:13 -0000 1.3
--- src/test/regress/expected/subselect.out 3 Jul 2002 16:09:14 -0000
***************
*** 141,146 ****
--- 141,214 ----
| 3
(5 rows)
+ -- Subselects in FROM clause
+ SELECT * FROM (SELECT * FROM subselect_tbl) first_test;
+ f1 | f2 | f3
+ ----+----+----
+ 1 | 2 | 3
+ 2 | 3 | 4
+ 3 | 4 | 5
+ 1 | 1 | 1
+ 2 | 2 | 2
+ 3 | 3 | 3
+ 6 | 7 | 8
+ 8 | 9 |
+ (8 rows)
+
+ SELECT * FROM (SELECT * FROM subselect_tbl WHERE f1 = 2) first_test WHERE f1 <> 2;
+ f1 | f2 | f3
+ ----+----+----
+ (0 rows)
+
+ -- Subselects using UNIQUE
+ SELECT * FROM subselect_tbl WHERE UNIQUE (SELECT f3 FROM subselect_tbl);
+ f1 | f2 | f3
+ ----+----+----
+ (0 rows)
+
+ SELECT * FROM subselect_tbl WHERE UNIQUE (SELECT random());
+ f1 | f2 | f3
+ ----+----+----
+ 1 | 2 | 3
+ 2 | 3 | 4
+ 3 | 4 | 5
+ 1 | 1 | 1
+ 2 | 2 | 2
+ 3 | 3 | 3
+ 6 | 7 | 8
+ 8 | 9 |
+ (8 rows)
+
+ SELECT f1 AS "Correlated Field", f3 AS "Second Field"
+ FROM subselect_tbl upper
+ WHERE UNIQUE (SELECT upper.f1 + f2 FROM subselect_tbl
+ WHERE f2 = CAST(f3 AS integer));
+ Correlated Field | Second Field
+ ------------------+--------------
+ 1 | 3
+ 2 | 4
+ 3 | 5
+ 1 | 1
+ 2 | 2
+ 3 | 3
+ 6 | 8
+ 8 |
+ (8 rows)
+
+ SELECT * FROM subselect_tbl WHERE UNIQUE
+ (SELECT f1, f1 * 2 AS f2 FROM int4_tbl);
+ f1 | f2 | f3
+ ----+----+----
+ 1 | 2 | 3
+ 2 | 3 | 4
+ 3 | 4 | 5
+ 1 | 1 | 1
+ 2 | 2 | 2
+ 3 | 3 | 3
+ 6 | 7 | 8
+ 8 | 9 |
+ (8 rows)
+
--
-- Use some existing tables in the regression test
--
Index: src/test/regress/sql/subselect.sql
===================================================================
RCS file: /var/lib/cvs/pgsql/src/test/regress/sql/subselect.sql,v
retrieving revision 1.3
diff -c -r1.3 subselect.sql
*** src/test/regress/sql/subselect.sql 23 Mar 2000 07:42:12 -0000 1.3
--- src/test/regress/sql/subselect.sql 3 Jul 2002 16:09:14 -0000
***************
*** 65,70 ****
--- 65,90 ----
WHERE (f1, f2) IN (SELECT f2, CAST(f3 AS int4) FROM SUBSELECT_TBL
WHERE f3 IS NOT NULL);
+ -- Subselects in FROM clause
+
+ SELECT * FROM (SELECT * FROM subselect_tbl) first_test;
+
+ SELECT * FROM (SELECT * FROM subselect_tbl WHERE f1 = 2) first_test WHERE f1 <> 2;
+
+ -- Subselects using UNIQUE
+
+ SELECT * FROM subselect_tbl WHERE UNIQUE (SELECT f3 FROM subselect_tbl);
+
+ SELECT * FROM subselect_tbl WHERE UNIQUE (SELECT random());
+
+ SELECT f1 AS "Correlated Field", f3 AS "Second Field"
+ FROM subselect_tbl upper
+ WHERE UNIQUE (SELECT upper.f1 + f2 FROM subselect_tbl
+ WHERE f2 = CAST(f3 AS integer));
+
+ SELECT * FROM subselect_tbl WHERE UNIQUE
+ (SELECT f1, f1 * 2 AS f2 FROM int4_tbl);
+
--
-- Use some existing tables in the regression test
--
Hi,
I've attached the changes I've made to pg_attribute.h - I can't see what's
wrong but whenever I do an initdb it fails:
initdb -D /home/chriskl/local/data
The files belonging to this database system will be owned by user "chriskl".
This user must also own the server process.
The database cluster will be initialized with locale C.
creating directory /home/chriskl/local/data... ok
creating directory /home/chriskl/local/data/base... ok
creating directory /home/chriskl/local/data/global... ok
creating directory /home/chriskl/local/data/pg_xlog... ok
creating directory /home/chriskl/local/data/pg_clog... ok
creating template1 database in /home/chriskl/local/data/base/1...
initdb failed.
Removing /home/chriskl/local/data.
Chris
Attachments:
attisdropped.txttext/plain; name=attisdropped.txtDownload
Index: pg_attribute.h
===================================================================
RCS file: /projects/cvsroot/pgsql/src/include/catalog/pg_attribute.h,v
retrieving revision 1.93
diff -c -r1.93 pg_attribute.h
*** pg_attribute.h 2002/06/20 20:29:44 1.93
--- pg_attribute.h 2002/07/04 02:08:29
***************
*** 142,147 ****
--- 142,150 ----
/* Has DEFAULT value or not */
bool atthasdef;
+
+ /* Is dropped or not */
+ bool attisdropped;
} FormData_pg_attribute;
/*
***************
*** 150,156 ****
* because of alignment padding at the end of the struct.)
*/
#define ATTRIBUTE_TUPLE_SIZE \
! (offsetof(FormData_pg_attribute,atthasdef) + sizeof(bool))
/* ----------------
* Form_pg_attribute corresponds to a pointer to a tuple with
--- 153,159 ----
* because of alignment padding at the end of the struct.)
*/
#define ATTRIBUTE_TUPLE_SIZE \
! (offsetof(FormData_pg_attribute,attisdropped) + sizeof(bool))
/* ----------------
* Form_pg_attribute corresponds to a pointer to a tuple with
***************
*** 164,170 ****
* ----------------
*/
! #define Natts_pg_attribute 15
#define Anum_pg_attribute_attrelid 1
#define Anum_pg_attribute_attname 2
#define Anum_pg_attribute_atttypid 3
--- 167,173 ----
* ----------------
*/
! #define Natts_pg_attribute 16
#define Anum_pg_attribute_attrelid 1
#define Anum_pg_attribute_attname 2
#define Anum_pg_attribute_atttypid 3
***************
*** 180,185 ****
--- 183,189 ----
#define Anum_pg_attribute_attalign 13
#define Anum_pg_attribute_attnotnull 14
#define Anum_pg_attribute_atthasdef 15
+ #define Anum_pg_attribute_attisdropped 16
***************
*** 398,405 ****
{ 1249, {"attstorage"}, 18, 0, 1, 11, 0, -1, -1, true, 'p', false, 'c', false, false }, \
{ 1249, {"attisset"}, 16, 0, 1, 12, 0, -1, -1, true, 'p', false, 'c', false, false }, \
{ 1249, {"attalign"}, 18, 0, 1, 13, 0, -1, -1, true, 'p', false, 'c', false, false }, \
! { 1249, {"attnotnull"}, 16, 0, 1, 14, 0, -1, -1, true, 'p', false, 'c', false, false }, \
! { 1249, {"atthasdef"}, 16, 0, 1, 15, 0, -1, -1, true, 'p', false, 'c', false, false }
DATA(insert ( 1249 attrelid 26 DEFAULT_ATTSTATTARGET 4 1 0 -1 -1 t p f i f f));
DATA(insert ( 1249 attname 19 DEFAULT_ATTSTATTARGET NAMEDATALEN 2 0 -1 -1 f p f i f f));
--- 402,410 ----
{ 1249, {"attstorage"}, 18, 0, 1, 11, 0, -1, -1, true, 'p', false, 'c', false, false }, \
{ 1249, {"attisset"}, 16, 0, 1, 12, 0, -1, -1, true, 'p', false, 'c', false, false }, \
{ 1249, {"attalign"}, 18, 0, 1, 13, 0, -1, -1, true, 'p', false, 'c', false, false }, \
! { 1249, {"attnotnull"}, 16, 0, 1, 14, 0, -1, -1, true, 'p', false, 'c', false, false }, \
! { 1249, {"atthasdef"}, 16, 0, 1, 15, 0, -1, -1, true, 'p', false, 'c', false, false }, \
! { 1249, {"attisdropped"}, 16, 0, 1, 16, 0, -1, -1, true, 'p', false, 'c', false, false }
DATA(insert ( 1249 attrelid 26 DEFAULT_ATTSTATTARGET 4 1 0 -1 -1 t p f i f f));
DATA(insert ( 1249 attname 19 DEFAULT_ATTSTATTARGET NAMEDATALEN 2 0 -1 -1 f p f i f f));
***************
*** 416,421 ****
--- 421,427 ----
DATA(insert ( 1249 attalign 18 0 1 13 0 -1 -1 t p f c f f));
DATA(insert ( 1249 attnotnull 16 0 1 14 0 -1 -1 t p f c f f));
DATA(insert ( 1249 atthasdef 16 0 1 15 0 -1 -1 t p f c f f));
+ DATA(insert ( 1249 attisdropped 16 0 1 16 0 -1 -1 t p f c f f));
DATA(insert ( 1249 ctid 27 0 6 -1 0 -1 -1 f p f i f f));
/* no OIDs in pg_attribute */
DATA(insert ( 1249 xmin 28 0 4 -3 0 -1 -1 t p f i f f));
"Christopher Kings-Lynne" <chriskl@familyhealth.com.au> writes:
I've attached the changes I've made to pg_attribute.h - I can't see what's
wrong but whenever I do an initdb it fails:
Did you change the relnatts entry in pg_class.h for pg_attribute?
More generally, run initdb with -d or -v or whatever its debug-output
switch is, and look at the last few lines to see the actual error.
(Caution: this may produce megabytes of output.)
regards, tom lane
Seems we may not need isdropped, so I will hold on evaluating this.
---------------------------------------------------------------------------
Christopher Kings-Lynne wrote:
Hi,
I've attached the changes I've made to pg_attribute.h - I can't see what's
wrong but whenever I do an initdb it fails:initdb -D /home/chriskl/local/data
The files belonging to this database system will be owned by user "chriskl".
This user must also own the server process.The database cluster will be initialized with locale C.
creating directory /home/chriskl/local/data... ok
creating directory /home/chriskl/local/data/base... ok
creating directory /home/chriskl/local/data/global... ok
creating directory /home/chriskl/local/data/pg_xlog... ok
creating directory /home/chriskl/local/data/pg_clog... ok
creating template1 database in /home/chriskl/local/data/base/1...
initdb failed.
Removing /home/chriskl/local/data.Chris
[ Attachment, skipping... ]
---------------------------(end of broadcast)---------------------------
TIP 4: Don't 'kill -9' the postmaster
--
Bruce Momjian | http://candle.pha.pa.us
pgman@candle.pha.pa.us | (610) 853-3000
+ If your life is a hard drive, | 830 Blythe Avenue
+ Christ can be your backup. | Drexel Hill, Pennsylvania 19026
nconway@klamath.dyndns.org (Neil Conway) writes:
The attached patch implements the SQL92 UNIQUE predicate.
The implementation seems to be well short of usefulness in a production
setting, for two reasons: (1) you're accumulating all the tuples into
memory --- what if they don't fit? (2) the comparison step is O(N^2),
which renders the first point rather moot ... a test case large enough
to risk memory exhaustion will not complete in your lifetime.
I think a useful implementation will require work in the planner to
convert the UNIQUE predicate into a SORT/UNIQUE plan structure (somewhat
like the way DISTINCT is implemented, but we just want a boolean
result).
regards, tom lane
On Sat, Jul 06, 2002 at 05:32:53PM -0400, Tom Lane wrote:
nconway@klamath.dyndns.org (Neil Conway) writes:
The attached patch implements the SQL92 UNIQUE predicate.
The implementation seems to be well short of usefulness in a production
setting, for two reasons: (1) you're accumulating all the tuples into
memory --- what if they don't fit? (2) the comparison step is O(N^2),
which renders the first point rather moot ... a test case large enough
to risk memory exhaustion will not complete in your lifetime.
That's true -- I probably should have noted in the original email that
my implementation was pretty much "the simplest thing that works".
I think a useful implementation will require work in the planner to
convert the UNIQUE predicate into a SORT/UNIQUE plan structure (somewhat
like the way DISTINCT is implemented, but we just want a boolean
result).
Hmmm... that's certainly possible, but I'm not sure the feature is
important enough to justify that much effort.
Cheers,
Neil
--
Neil Conway <neilconway@rogers.com>
PGP Key ID: DB3C29FC
Neil Conway wrote:
On Sat, Jul 06, 2002 at 05:32:53PM -0400, Tom Lane wrote:
nconway@klamath.dyndns.org (Neil Conway) writes:
The attached patch implements the SQL92 UNIQUE predicate.
The implementation seems to be well short of usefulness in a production
setting, for two reasons: (1) you're accumulating all the tuples into
memory --- what if they don't fit? (2) the comparison step is O(N^2),
which renders the first point rather moot ... a test case large enough
to risk memory exhaustion will not complete in your lifetime.That's true -- I probably should have noted in the original email that
my implementation was pretty much "the simplest thing that works".I think a useful implementation will require work in the planner to
convert the UNIQUE predicate into a SORT/UNIQUE plan structure (somewhat
like the way DISTINCT is implemented, but we just want a boolean
result).Hmmm... that's certainly possible, but I'm not sure the feature is
important enough to justify that much effort.
I am going to agree with Tom on this one. We do foreign key triggers in
memory, but having a entire query result in memory to perform UNIQUE
seems really stretching the resources of the machine.
--
Bruce Momjian | http://candle.pha.pa.us
pgman@candle.pha.pa.us | (610) 853-3000
+ If your life is a hard drive, | 830 Blythe Avenue
+ Christ can be your backup. | Drexel Hill, Pennsylvania 19026