[PATCH] Allow anonymous rowtypes in function return column definition

Started by Elvis Pranskevichusabout 7 years ago6 messages
1 attachment(s)

Currently, the following query

SELECT q.b = row(2)
FROM unnest(ARRAY[row(1, row(2))]) AS q(a int, b record);

would fail with

ERROR: column "b" has pseudo-type record

This is due to CheckAttributeNamesTypes() being used on a function
coldeflist as if it was a real relation definition. But in the context
of a query there seems to be no harm in allowing this, as other ways of
manipulating anonymous rowtypes work well, e.g.:

SELECT (ARRAY[ROW(1, ROW(2))])[1];

Elvis

Attachments:

0001-Allow-anonymous-rowtypes-in-function-return-column-d.patchtext/x-patch; charset=UTF-8; name=0001-Allow-anonymous-rowtypes-in-function-return-column-d.patchDownload
From 873ecd6b31abc28c787f398d78ba2511c6e712a2 Mon Sep 17 00:00:00 2001
From: Elvis Pranskevichus <elvis@magic.io>
Date: Thu, 6 Dec 2018 17:16:28 -0500
Subject: [PATCH] Allow anonymous rowtypes in function return column definition

Currently, the following query

    SELECT q.b = row(2)
    FROM unnest(ARRAY[row(1, row(2))]) AS q(a int, b record);

would fail with

    ERROR:  column "b" has pseudo-type record

This is due to CheckAttributeNamesTypes() being used on a function
coldeflist as if it was a real relation definition.  But in the context
of a query there seems to be no harm in allowing this, as other ways of
manipulating anonymous rowtypes work well, e.g.:

    SELECT (ARRAY[ROW(1, ROW(2))])[1];
---
 src/backend/catalog/heap.c             | 23 +++++++++++++++--------
 src/backend/catalog/index.c            |  2 +-
 src/backend/commands/tablecmds.c       |  4 ++--
 src/backend/parser/parse_relation.c    |  2 +-
 src/include/catalog/heap.h             |  6 ++++--
 src/test/regress/expected/rowtypes.out |  7 +++++++
 src/test/regress/sql/rowtypes.sql      |  2 ++
 7 files changed, 32 insertions(+), 14 deletions(-)

diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c
index 8c52a1543d..ab9cb600f1 100644
--- a/src/backend/catalog/heap.c
+++ b/src/backend/catalog/heap.c
@@ -412,7 +412,8 @@ heap_create(const char *relname,
  */
 void
 CheckAttributeNamesTypes(TupleDesc tupdesc, char relkind,
-						 bool allow_system_table_mods)
+						 bool allow_system_table_mods,
+						 bool allow_anonymous_records)
 {
 	int			i;
 	int			j;
@@ -471,7 +472,8 @@ CheckAttributeNamesTypes(TupleDesc tupdesc, char relkind,
 						   TupleDescAttr(tupdesc, i)->atttypid,
 						   TupleDescAttr(tupdesc, i)->attcollation,
 						   NIL, /* assume we're creating a new rowtype */
-						   allow_system_table_mods);
+						   allow_system_table_mods,
+						   allow_anonymous_records);
 	}
 }
 
@@ -494,7 +496,8 @@ void
 CheckAttributeType(const char *attname,
 				   Oid atttypid, Oid attcollation,
 				   List *containing_rowtypes,
-				   bool allow_system_table_mods)
+				   bool allow_system_table_mods,
+				   bool allow_anonymous_records)
 {
 	char		att_typtype = get_typtype(atttypid);
 	Oid			att_typelem;
@@ -507,7 +510,8 @@ CheckAttributeType(const char *attname,
 		 * catalogs (this allows creating pg_statistic and cloning it during
 		 * VACUUM FULL)
 		 */
-		if (atttypid != ANYARRAYOID || !allow_system_table_mods)
+		if (!((atttypid == ANYARRAYOID && allow_system_table_mods) ||
+				(atttypid == RECORDOID && allow_anonymous_records)))
 			ereport(ERROR,
 					(errcode(ERRCODE_INVALID_TABLE_DEFINITION),
 					 errmsg("column \"%s\" has pseudo-type %s",
@@ -520,7 +524,8 @@ CheckAttributeType(const char *attname,
 		 */
 		CheckAttributeType(attname, getBaseType(atttypid), attcollation,
 						   containing_rowtypes,
-						   allow_system_table_mods);
+						   allow_system_table_mods,
+						   allow_anonymous_records);
 	}
 	else if (att_typtype == TYPTYPE_COMPOSITE)
 	{
@@ -558,7 +563,8 @@ CheckAttributeType(const char *attname,
 			CheckAttributeType(NameStr(attr->attname),
 							   attr->atttypid, attr->attcollation,
 							   containing_rowtypes,
-							   allow_system_table_mods);
+							   allow_system_table_mods,
+							   allow_anonymous_records);
 		}
 
 		relation_close(relation, AccessShareLock);
@@ -572,7 +578,8 @@ CheckAttributeType(const char *attname,
 		 */
 		CheckAttributeType(attname, att_typelem, attcollation,
 						   containing_rowtypes,
-						   allow_system_table_mods);
+						   allow_system_table_mods,
+						   allow_anonymous_records);
 	}
 
 	/*
@@ -1063,7 +1070,7 @@ heap_create_with_catalog(const char *relname,
 	 */
 	Assert(IsNormalProcessingMode() || IsBootstrapProcessingMode());
 
-	CheckAttributeNamesTypes(tupdesc, relkind, allow_system_table_mods);
+	CheckAttributeNamesTypes(tupdesc, relkind, allow_system_table_mods, false);
 
 	/*
 	 * This would fail later on anyway, if the relation already exists.  But
diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c
index 44625a507b..ac5a285be7 100644
--- a/src/backend/catalog/index.c
+++ b/src/backend/catalog/index.c
@@ -435,7 +435,7 @@ ConstructTupleDescriptor(Relation heapRelation,
 			 */
 			CheckAttributeType(NameStr(to->attname),
 							   to->atttypid, to->attcollation,
-							   NIL, false);
+							   NIL, false, false);
 		}
 
 		/*
diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c
index 357c73073d..0a19209519 100644
--- a/src/backend/commands/tablecmds.c
+++ b/src/backend/commands/tablecmds.c
@@ -5488,7 +5488,7 @@ ATExecAddColumn(List **wqueue, AlteredTableInfo *tab, Relation rel,
 	/* make sure datatype is legal for a column */
 	CheckAttributeType(colDef->colname, typeOid, collOid,
 					   list_make1_oid(rel->rd_rel->reltype),
-					   false);
+					   false, false);
 
 	/* construct new attribute's pg_attribute entry */
 	attribute.attrelid = myrelid;
@@ -9126,7 +9126,7 @@ ATPrepAlterColumnType(List **wqueue,
 	/* make sure datatype is legal for a column */
 	CheckAttributeType(colName, targettype, targetcollid,
 					   list_make1_oid(rel->rd_rel->reltype),
-					   false);
+					   false, false);
 
 	if (tab->relkind == RELKIND_RELATION ||
 		tab->relkind == RELKIND_PARTITIONED_TABLE)
diff --git a/src/backend/parser/parse_relation.c b/src/backend/parser/parse_relation.c
index bf5df26009..0f224e7718 100644
--- a/src/backend/parser/parse_relation.c
+++ b/src/backend/parser/parse_relation.c
@@ -1567,7 +1567,7 @@ addRangeTableEntryForFunction(ParseState *pstate,
 			 * Ensure that the coldeflist defines a legal set of names (no
 			 * duplicates) and datatypes (no pseudo-types, for instance).
 			 */
-			CheckAttributeNamesTypes(tupdesc, RELKIND_COMPOSITE_TYPE, false);
+			CheckAttributeNamesTypes(tupdesc, RELKIND_COMPOSITE_TYPE, false, true);
 		}
 		else
 			ereport(ERROR,
diff --git a/src/include/catalog/heap.h b/src/include/catalog/heap.h
index c5e40ff017..9ee4824af8 100644
--- a/src/include/catalog/heap.h
+++ b/src/include/catalog/heap.h
@@ -133,12 +133,14 @@ extern Form_pg_attribute SystemAttributeByName(const char *attname,
 					  bool relhasoids);
 
 extern void CheckAttributeNamesTypes(TupleDesc tupdesc, char relkind,
-						 bool allow_system_table_mods);
+						 bool allow_system_table_mods,
+						 bool allow_anonymous_records);
 
 extern void CheckAttributeType(const char *attname,
 				   Oid atttypid, Oid attcollation,
 				   List *containing_rowtypes,
-				   bool allow_system_table_mods);
+				   bool allow_system_table_mods,
+				   bool allow_anonymous_records);
 
 /* pg_partitioned_table catalog manipulation functions */
 extern void StorePartitionKey(Relation rel,
diff --git a/src/test/regress/expected/rowtypes.out b/src/test/regress/expected/rowtypes.out
index 30053d07df..0a98725d98 100644
--- a/src/test/regress/expected/rowtypes.out
+++ b/src/test/regress/expected/rowtypes.out
@@ -668,6 +668,13 @@ select row(1, '(1,2)')::testtype6 *<> row(1, '(1,3)')::testtype6;
 (1 row)
 
 drop type testtype1, testtype2, testtype3, testtype4, testtype5, testtype6;
+-- Test anonymous rowtype in coldeflist
+SELECT q.b = row(2) FROM unnest(ARRAY[row(1, row(2))]) AS q(a int, b record);
+ ?column? 
+----------
+ t
+(1 row)
+
 --
 -- Test case derived from bug #5716: check multiple uses of a rowtype result
 --
diff --git a/src/test/regress/sql/rowtypes.sql b/src/test/regress/sql/rowtypes.sql
index faf2e108d6..08d320426a 100644
--- a/src/test/regress/sql/rowtypes.sql
+++ b/src/test/regress/sql/rowtypes.sql
@@ -261,6 +261,8 @@ select row(1, '(1,2)')::testtype6 *<> row(1, '(1,3)')::testtype6;
 
 drop type testtype1, testtype2, testtype3, testtype4, testtype5, testtype6;
 
+-- Test anonymous rowtype in coldeflist
+SELECT q.b = row(2) FROM unnest(ARRAY[row(1, row(2))]) AS q(a int, b record);
 
 --
 -- Test case derived from bug #5716: check multiple uses of a rowtype result
-- 
2.19.2

#2Tom Lane
tgl@sss.pgh.pa.us
In reply to: Elvis Pranskevichus (#1)
Re: [PATCH] Allow anonymous rowtypes in function return column definition

Elvis Pranskevichus <el@prans.net> writes:

Currently, the following query
SELECT q.b = row(2)
FROM unnest(ARRAY[row(1, row(2))]) AS q(a int, b record);
would fail with
ERROR: column "b" has pseudo-type record
This is due to CheckAttributeNamesTypes() being used on a function
coldeflist as if it was a real relation definition. But in the context
of a query there seems to be no harm in allowing this,

Hmm, I'm not entirely convinced of that. Looking at the conditions
checked by CheckAttributeType, the first question that pops to mind
is whether allowing "record" doesn't break our defenses against
a rowtype recursively containing itself. Perhaps not --- allowing
an anonymous record to contain another one isn't really recursion,
because since they're both anonymous they can't be the "same" type.
But it could do with more thought than I've given it just now,
and with a comment explaining the reasoning.

(Speaking of which, you did not bother updating the comment immediately
adjacent to the code you changed in CheckAttributeType, even though
this change makes it incomplete/not very sensible.)

I also wonder why we'd allow RECORD but not RECORDARRAY. More
generally, why not any polymorphic type? There's probably a
good reason why not, but having a clear explanation why we're
treating RECORD differently from other polymorphics would go
a long way towards convincing me that this is safe. Again
this is just handwaving, but such an argument might rest on the
fact that a record value is self-identifying as long as you know
it's a record, whereas a random Datum is not a self-identifying
member of the type class "anyelement", for instance.

I feel a bit uncomfortable about defining the new flag to
CheckAttributeNamesTypes as "allow_anonymous_records"; that
seems pretty short-sighted and single-purpose, especially in
view of there being no obvious reason why it shouldn't accept
RECORDARRAY too. Maybe with a clearer explanation of the
issues above, we could define that flag in a more on-point way.

BTW, it strikes me that maybe the reason ANYARRAY isn't insane
to allow in pg_statistic has to do with arrays also being
self-identifying members of that type class, and so possibly
we could get to a place where we're unifying that hack with
this feature addition. I don't insist on doing that as part of
this patch, but as long as we're trying to think through these
issues, it's something to think about.

regards, tom lane

In reply to: Tom Lane (#2)
1 attachment(s)
Re: [PATCH] Allow anonymous rowtypes in function return column definition

On Sunday, January 13, 2019 4:57:48 PM EST Tom Lane wrote:

I also wonder why we'd allow RECORD but not RECORDARRAY.

That's by omission. There's no reason to refuse RECORDARRAY here for
the same reason why RECORD is accepted.

More generally, why not any polymorphic type? [...] the
fact that a record value is self-identifying as long as you know
it's a record, whereas a random Datum is not a self-identifying
member of the type class "anyelement", for instance.

Yes that's the reason. We allow "record" in coldeflist because it
only happens near a RangeFunction, and set-returning functions always do
"BlessTupleDesc", which means that RECORDOID data would always have an
associated TupleDesc and can be processed as a regular composite type.
Recursion is not an issue, since every instance would have a separate
TupleDesc even if the shape is the same.

I feel a bit uncomfortable about defining the new flag to
CheckAttributeNamesTypes as "allow_anonymous_records"; that
seems pretty short-sighted and single-purpose, especially in
view of there being no obvious reason why it shouldn't accept
RECORDARRAY too. Maybe with a clearer explanation of the
issues above, we could define that flag in a more on-point way.

I renamed it to "in_coldeflist", which seems like a clearer indication
that we are validating that, and not a regular table definition.

BTW, it strikes me that maybe the reason ANYARRAY isn't insane
to allow in pg_statistic has to do with arrays also being
self-identifying members of that type class

That's true. Array data encode the OID of their element type, but that
only allows sending the data out, as subscripting or casting anyarray is
not allowed. There also seems to be no guarantee that the actual type
of the array doesn't change from row to row in such a scenario. Given
that, I think it would be best to keep anyarray restricted to the system
catalogs.

I attached the updated patch.

Elvis

Attachments:

Allow-anonymous-rowtypes-in-coldeflist-v2.patchtext/x-patch; charset=UTF-8; name=Allow-anonymous-rowtypes-in-coldeflist-v2.patchDownload
From 2841fd41db106ae09b68d3b5cf29901e98e2446a Mon Sep 17 00:00:00 2001
From: Elvis Pranskevichus <elvis@magic.io>
Date: Thu, 6 Dec 2018 17:16:28 -0500
Subject: [PATCH] Allow anonymous rowtypes in function return column definition

Currently, the following query

    SELECT q.b = row(2)
    FROM unnest(ARRAY[row(1, row(2))]) AS q(a int, b record);

would fail with

    ERROR:  column "b" has pseudo-type record

This is due to CheckAttributeNamesTypes() being used on a function
coldeflist as if it was a real relation definition.  But in the context
of a query there seems to be no harm in allowing this, as other ways of
manipulating anonymous rowtypes work well, e.g.:

    SELECT (ARRAY[ROW(1, ROW(2))])[1];
---
 src/backend/catalog/heap.c             | 42 ++++++++++++++++++--------
 src/backend/catalog/index.c            |  2 +-
 src/backend/commands/tablecmds.c       |  4 +--
 src/backend/parser/parse_relation.c    |  5 +--
 src/include/catalog/heap.h             |  6 ++--
 src/test/regress/expected/rowtypes.out | 10 ++++++
 src/test/regress/sql/rowtypes.sql      |  5 +++
 7 files changed, 55 insertions(+), 19 deletions(-)

diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c
index cc865de627..103b4fb96a 100644
--- a/src/backend/catalog/heap.c
+++ b/src/backend/catalog/heap.c
@@ -451,7 +451,8 @@ heap_create(const char *relname,
  */
 void
 CheckAttributeNamesTypes(TupleDesc tupdesc, char relkind,
-						 bool allow_system_table_mods)
+						 bool allow_system_table_mods,
+						 bool in_coldeflist)
 {
 	int			i;
 	int			j;
@@ -509,7 +510,8 @@ CheckAttributeNamesTypes(TupleDesc tupdesc, char relkind,
 						   TupleDescAttr(tupdesc, i)->atttypid,
 						   TupleDescAttr(tupdesc, i)->attcollation,
 						   NIL, /* assume we're creating a new rowtype */
-						   allow_system_table_mods);
+						   allow_system_table_mods,
+						   in_coldeflist);
 	}
 }
 
@@ -532,7 +534,8 @@ void
 CheckAttributeType(const char *attname,
 				   Oid atttypid, Oid attcollation,
 				   List *containing_rowtypes,
-				   bool allow_system_table_mods)
+				   bool allow_system_table_mods,
+				   bool in_coldeflist)
 {
 	char		att_typtype = get_typtype(atttypid);
 	Oid			att_typelem;
@@ -540,12 +543,24 @@ CheckAttributeType(const char *attname,
 	if (att_typtype == TYPTYPE_PSEUDO)
 	{
 		/*
-		 * Refuse any attempt to create a pseudo-type column, except for a
-		 * special hack for pg_statistic: allow ANYARRAY when modifying system
-		 * catalogs (this allows creating pg_statistic and cloning it during
-		 * VACUUM FULL)
+		 * Generally speaking, we don't allow pseudo-types in column
+		 * definitions, with the following exceptions:
+		 *
+		 * First, we allow the record pseudo-type (and its array type) in
+		 * the column definition of a set-returning function.  In this case
+		 * the record type is "blessed", i.e. there is an actual TupleDesc
+		 * associated with it and it can be treated as a normal composite
+		 * type.  Composite type recursion is not a danger here since
+		 * each instance of an anonymous rowtype in this context would have
+		 * a distinct TupleDesc.
+		 *
+		 * Secondly, we allow allow ANYARRAY when modifying system
+		 * catalogs (this allows creating pg_statistic and cloning
+		 * it during VACUUM FULL).
 		 */
-		if (atttypid != ANYARRAYOID || !allow_system_table_mods)
+		if (!((atttypid == ANYARRAYOID && allow_system_table_mods) ||
+				((atttypid == RECORDOID || atttypid == RECORDARRAYOID) &&
+					in_coldeflist)))
 			ereport(ERROR,
 					(errcode(ERRCODE_INVALID_TABLE_DEFINITION),
 					 errmsg("column \"%s\" has pseudo-type %s",
@@ -558,7 +573,8 @@ CheckAttributeType(const char *attname,
 		 */
 		CheckAttributeType(attname, getBaseType(atttypid), attcollation,
 						   containing_rowtypes,
-						   allow_system_table_mods);
+						   allow_system_table_mods,
+						   in_coldeflist);
 	}
 	else if (att_typtype == TYPTYPE_COMPOSITE)
 	{
@@ -596,7 +612,8 @@ CheckAttributeType(const char *attname,
 			CheckAttributeType(NameStr(attr->attname),
 							   attr->atttypid, attr->attcollation,
 							   containing_rowtypes,
-							   allow_system_table_mods);
+							   allow_system_table_mods,
+							   in_coldeflist);
 		}
 
 		relation_close(relation, AccessShareLock);
@@ -610,7 +627,8 @@ CheckAttributeType(const char *attname,
 		 */
 		CheckAttributeType(attname, att_typelem, attcollation,
 						   containing_rowtypes,
-						   allow_system_table_mods);
+						   allow_system_table_mods,
+						   in_coldeflist);
 	}
 
 	/*
@@ -1076,7 +1094,7 @@ heap_create_with_catalog(const char *relname,
 	 */
 	Assert(IsNormalProcessingMode() || IsBootstrapProcessingMode());
 
-	CheckAttributeNamesTypes(tupdesc, relkind, allow_system_table_mods);
+	CheckAttributeNamesTypes(tupdesc, relkind, allow_system_table_mods, false);
 
 	/*
 	 * This would fail later on anyway, if the relation already exists.  But
diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c
index 225c078018..9a7f7aabbf 100644
--- a/src/backend/catalog/index.c
+++ b/src/backend/catalog/index.c
@@ -411,7 +411,7 @@ ConstructTupleDescriptor(Relation heapRelation,
 			 */
 			CheckAttributeType(NameStr(to->attname),
 							   to->atttypid, to->attcollation,
-							   NIL, false);
+							   NIL, false, false);
 		}
 
 		/*
diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c
index ff76499137..8383b419aa 100644
--- a/src/backend/commands/tablecmds.c
+++ b/src/backend/commands/tablecmds.c
@@ -5509,7 +5509,7 @@ ATExecAddColumn(List **wqueue, AlteredTableInfo *tab, Relation rel,
 	/* make sure datatype is legal for a column */
 	CheckAttributeType(colDef->colname, typeOid, collOid,
 					   list_make1_oid(rel->rd_rel->reltype),
-					   false);
+					   false, false);
 
 	/* construct new attribute's pg_attribute entry */
 	attribute.attrelid = myrelid;
@@ -9449,7 +9449,7 @@ ATPrepAlterColumnType(List **wqueue,
 	/* make sure datatype is legal for a column */
 	CheckAttributeType(colName, targettype, targetcollid,
 					   list_make1_oid(rel->rd_rel->reltype),
-					   false);
+					   false, false);
 
 	if (tab->relkind == RELKIND_RELATION ||
 		tab->relkind == RELKIND_PARTITIONED_TABLE)
diff --git a/src/backend/parser/parse_relation.c b/src/backend/parser/parse_relation.c
index 09fbb588af..2106d5bcd7 100644
--- a/src/backend/parser/parse_relation.c
+++ b/src/backend/parser/parse_relation.c
@@ -1590,9 +1590,10 @@ addRangeTableEntryForFunction(ParseState *pstate,
 
 			/*
 			 * Ensure that the coldeflist defines a legal set of names (no
-			 * duplicates) and datatypes (no pseudo-types, for instance).
+			 * duplicates) and datatypes (no pseudo-types, for instance, but
+			 * record, and record[] are allowed).
 			 */
-			CheckAttributeNamesTypes(tupdesc, RELKIND_COMPOSITE_TYPE, false);
+			CheckAttributeNamesTypes(tupdesc, RELKIND_COMPOSITE_TYPE, false, true);
 		}
 		else
 			ereport(ERROR,
diff --git a/src/include/catalog/heap.h b/src/include/catalog/heap.h
index 625b7e5c43..962ca08eab 100644
--- a/src/include/catalog/heap.h
+++ b/src/include/catalog/heap.h
@@ -130,12 +130,14 @@ extern const FormData_pg_attribute *SystemAttributeDefinition(AttrNumber attno);
 extern const FormData_pg_attribute *SystemAttributeByName(const char *attname);
 
 extern void CheckAttributeNamesTypes(TupleDesc tupdesc, char relkind,
-						 bool allow_system_table_mods);
+						 bool allow_system_table_mods,
+						 bool allow_anonymous_records);
 
 extern void CheckAttributeType(const char *attname,
 				   Oid atttypid, Oid attcollation,
 				   List *containing_rowtypes,
-				   bool allow_system_table_mods);
+				   bool allow_system_table_mods,
+				   bool allow_anonymous_records);
 
 /* pg_partitioned_table catalog manipulation functions */
 extern void StorePartitionKey(Relation rel,
diff --git a/src/test/regress/expected/rowtypes.out b/src/test/regress/expected/rowtypes.out
index d6a1a3331e..aaeffbc2a2 100644
--- a/src/test/regress/expected/rowtypes.out
+++ b/src/test/regress/expected/rowtypes.out
@@ -668,6 +668,16 @@ select row(1, '(1,2)')::testtype6 *<> row(1, '(1,3)')::testtype6;
 (1 row)
 
 drop type testtype1, testtype2, testtype3, testtype4, testtype5, testtype6;
+-- Test anonymous rowtype in coldeflist
+SELECT q.b = row(2), q.c = ARRAY[row(3)], q.d = row(row(4)) FROM
+    unnest(ARRAY[
+      row(1, row(2), ARRAY[row(3)], row(row(4)))
+    ]) AS q(a int, b record, c record[], d record);
+ ?column? | ?column? | ?column? 
+----------+----------+----------
+ t        | t        | t
+(1 row)
+
 --
 -- Test case derived from bug #5716: check multiple uses of a rowtype result
 --
diff --git a/src/test/regress/sql/rowtypes.sql b/src/test/regress/sql/rowtypes.sql
index e6d389805c..5fe7a48788 100644
--- a/src/test/regress/sql/rowtypes.sql
+++ b/src/test/regress/sql/rowtypes.sql
@@ -261,6 +261,11 @@ select row(1, '(1,2)')::testtype6 *<> row(1, '(1,3)')::testtype6;
 
 drop type testtype1, testtype2, testtype3, testtype4, testtype5, testtype6;
 
+-- Test anonymous rowtype in coldeflist
+SELECT q.b = row(2), q.c = ARRAY[row(3)], q.d = row(row(4)) FROM
+    unnest(ARRAY[
+      row(1, row(2), ARRAY[row(3)], row(row(4)))
+    ]) AS q(a int, b record, c record[], d record);
 
 --
 -- Test case derived from bug #5716: check multiple uses of a rowtype result
-- 
2.19.2

#4Tom Lane
tgl@sss.pgh.pa.us
In reply to: Elvis Pranskevichus (#3)
1 attachment(s)
Re: [PATCH] Allow anonymous rowtypes in function return column definition

Elvis Pranskevichus <el@prans.net> writes:

On Sunday, January 13, 2019 4:57:48 PM EST Tom Lane wrote:

I feel a bit uncomfortable about defining the new flag to
CheckAttributeNamesTypes as "allow_anonymous_records"; that
seems pretty short-sighted and single-purpose, especially in
view of there being no obvious reason why it shouldn't accept
RECORDARRAY too. Maybe with a clearer explanation of the
issues above, we could define that flag in a more on-point way.

I renamed it to "in_coldeflist", which seems like a clearer indication
that we are validating that, and not a regular table definition.

I still found this pretty disjointed. After a bit of thought I propose
the attached reformulation, which has the callers just tell the routines
which types to allow explicitly, with the justification comments living
at the call sites instead of within the routines.

One point that we hadn't really talked about is what happens when
CheckArrayTypes recurses. The existing logic is that it just passes
down the same restrictions it was told at the top level, and I kept
that here. Right now it hardly matters what we pass down, because
the base type of a domain or array can't be a pseudotype, and we
don't really expect to find one in a named composite type either.
The one case where it could matter is if someone tries to use
"pg_statistic" as a field's composite type: that would be allowed if
allow_system_table_mods and otherwise not. But it's not really
hard to imagine future additions where it would matter and it'd
be important to restrict things as we recurse down. I think this
formulation makes it easier to see what to do in such cases.

regards, tom lane

Attachments:

anonymous-rowtypes-in-coldeflist-v3.patchtext/x-diff; charset=us-ascii; name=anonymous-rowtypes-in-coldeflist-v3.patchDownload
diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c
index 910f651..06d18a1 100644
--- a/src/backend/catalog/heap.c
+++ b/src/backend/catalog/heap.c
@@ -445,11 +445,14 @@ heap_create(const char *relname,
  *		this is used to make certain the tuple descriptor contains a
  *		valid set of attribute names and datatypes.  a problem simply
  *		generates ereport(ERROR) which aborts the current transaction.
+ *
+ *		relkind is the relkind of the relation to be created.
+ *		flags controls which datatypes are allowed, cf CheckAttributeType.
  * --------------------------------
  */
 void
 CheckAttributeNamesTypes(TupleDesc tupdesc, char relkind,
-						 bool allow_system_table_mods)
+						 int flags)
 {
 	int			i;
 	int			j;
@@ -507,7 +510,7 @@ CheckAttributeNamesTypes(TupleDesc tupdesc, char relkind,
 						   TupleDescAttr(tupdesc, i)->atttypid,
 						   TupleDescAttr(tupdesc, i)->attcollation,
 						   NIL, /* assume we're creating a new rowtype */
-						   allow_system_table_mods);
+						   flags);
 	}
 }
 
@@ -524,13 +527,22 @@ CheckAttributeNamesTypes(TupleDesc tupdesc, char relkind,
  * containing_rowtypes.  When checking a to-be-created rowtype, it's
  * sufficient to pass NIL, because there could not be any recursive reference
  * to a not-yet-existing rowtype.
+ *
+ * flags is a bitmask controlling which datatypes we allow.  For the most
+ * part, pseudo-types are disallowed as attribute types, but there are some
+ * exceptions: ANYARRAYOID, RECORDOID, and RECORDARRAYOID can be allowed
+ * in some cases.  (This works because values of those type classes are
+ * self-identifying to some extent.  However, RECORDOID and RECORDARRAYOID
+ * are reliably identifiable only within a session, since the identity info
+ * may use a typmod that is only locally assigned.  The caller is expected
+ * to know whether these cases are safe.)
  * --------------------------------
  */
 void
 CheckAttributeType(const char *attname,
 				   Oid atttypid, Oid attcollation,
 				   List *containing_rowtypes,
-				   bool allow_system_table_mods)
+				   int flags)
 {
 	char		att_typtype = get_typtype(atttypid);
 	Oid			att_typelem;
@@ -538,12 +550,18 @@ CheckAttributeType(const char *attname,
 	if (att_typtype == TYPTYPE_PSEUDO)
 	{
 		/*
-		 * Refuse any attempt to create a pseudo-type column, except for a
-		 * special hack for pg_statistic: allow ANYARRAY when modifying system
-		 * catalogs (this allows creating pg_statistic and cloning it during
-		 * VACUUM FULL)
+		 * We disallow pseudo-type columns, with the exception of ANYARRAY,
+		 * RECORD, and RECORD[] when the caller says that those are OK.
+		 *
+		 * We don't need to worry about recursive containment for RECORD and
+		 * RECORD[] because (a) no named composite type should be allowed to
+		 * contain those, and (b) two "anonymous" record types couldn't be
+		 * considered to be the same type, so infinite recursion isn't
+		 * possible.
 		 */
-		if (atttypid != ANYARRAYOID || !allow_system_table_mods)
+		if (!((atttypid == ANYARRAYOID && (flags & CHKATYPE_ANYARRAY)) ||
+			  (atttypid == RECORDOID && (flags & CHKATYPE_ANYRECORD)) ||
+			  (atttypid == RECORDARRAYOID && (flags & CHKATYPE_ANYRECORD))))
 			ereport(ERROR,
 					(errcode(ERRCODE_INVALID_TABLE_DEFINITION),
 					 errmsg("column \"%s\" has pseudo-type %s",
@@ -556,7 +574,7 @@ CheckAttributeType(const char *attname,
 		 */
 		CheckAttributeType(attname, getBaseType(atttypid), attcollation,
 						   containing_rowtypes,
-						   allow_system_table_mods);
+						   flags);
 	}
 	else if (att_typtype == TYPTYPE_COMPOSITE)
 	{
@@ -594,7 +612,7 @@ CheckAttributeType(const char *attname,
 			CheckAttributeType(NameStr(attr->attname),
 							   attr->atttypid, attr->attcollation,
 							   containing_rowtypes,
-							   allow_system_table_mods);
+							   flags);
 		}
 
 		relation_close(relation, AccessShareLock);
@@ -608,7 +626,7 @@ CheckAttributeType(const char *attname,
 		 */
 		CheckAttributeType(attname, att_typelem, attcollation,
 						   containing_rowtypes,
-						   allow_system_table_mods);
+						   flags);
 	}
 
 	/*
@@ -1074,7 +1092,13 @@ heap_create_with_catalog(const char *relname,
 	 */
 	Assert(IsNormalProcessingMode() || IsBootstrapProcessingMode());
 
-	CheckAttributeNamesTypes(tupdesc, relkind, allow_system_table_mods);
+	/*
+	 * Validate proposed tupdesc for the desired relkind.  If
+	 * allow_system_table_mods is on, allow ANYARRAY to be used; this is a
+	 * hack to allow creating pg_statistic and cloning it during VACUUM FULL.
+	 */
+	CheckAttributeNamesTypes(tupdesc, relkind,
+							 allow_system_table_mods ? CHKATYPE_ANYARRAY : 0);
 
 	/*
 	 * This would fail later on anyway, if the relation already exists.  But
diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c
index 169b2de..faf6956 100644
--- a/src/backend/catalog/index.c
+++ b/src/backend/catalog/index.c
@@ -410,7 +410,7 @@ ConstructTupleDescriptor(Relation heapRelation,
 			 */
 			CheckAttributeType(NameStr(to->attname),
 							   to->atttypid, to->attcollation,
-							   NIL, false);
+							   NIL, 0);
 		}
 
 		/*
diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c
index 434be40..35a9ade 100644
--- a/src/backend/commands/tablecmds.c
+++ b/src/backend/commands/tablecmds.c
@@ -5505,7 +5505,7 @@ ATExecAddColumn(List **wqueue, AlteredTableInfo *tab, Relation rel,
 	/* make sure datatype is legal for a column */
 	CheckAttributeType(colDef->colname, typeOid, collOid,
 					   list_make1_oid(rel->rd_rel->reltype),
-					   false);
+					   0);
 
 	/* construct new attribute's pg_attribute entry */
 	attribute.attrelid = myrelid;
@@ -9445,7 +9445,7 @@ ATPrepAlterColumnType(List **wqueue,
 	/* make sure datatype is legal for a column */
 	CheckAttributeType(colName, targettype, targetcollid,
 					   list_make1_oid(rel->rd_rel->reltype),
-					   false);
+					   0);
 
 	if (tab->relkind == RELKIND_RELATION ||
 		tab->relkind == RELKIND_PARTITIONED_TABLE)
diff --git a/src/backend/parser/parse_relation.c b/src/backend/parser/parse_relation.c
index 3ff799f..f3b6d19 100644
--- a/src/backend/parser/parse_relation.c
+++ b/src/backend/parser/parse_relation.c
@@ -1590,9 +1590,15 @@ addRangeTableEntryForFunction(ParseState *pstate,
 
 			/*
 			 * Ensure that the coldeflist defines a legal set of names (no
-			 * duplicates) and datatypes (no pseudo-types, for instance).
+			 * duplicates, but we needn't worry about system column names) and
+			 * datatypes.  Although we mostly can't allow pseudo-types, it
+			 * seems safe to allow RECORD and RECORD[], since values within
+			 * those type classes are self-identifying at runtime, and the
+			 * coldeflist doesn't represent anything that will be visible to
+			 * other sessions.
 			 */
-			CheckAttributeNamesTypes(tupdesc, RELKIND_COMPOSITE_TYPE, false);
+			CheckAttributeNamesTypes(tupdesc, RELKIND_COMPOSITE_TYPE,
+									 CHKATYPE_ANYRECORD);
 		}
 		else
 			ereport(ERROR,
diff --git a/src/include/catalog/heap.h b/src/include/catalog/heap.h
index 625b7e5..50fb62b 100644
--- a/src/include/catalog/heap.h
+++ b/src/include/catalog/heap.h
@@ -19,6 +19,10 @@
 #include "parser/parse_node.h"
 
 
+/* flag bits for CheckAttributeType/CheckAttributeNamesTypes */
+#define CHKATYPE_ANYARRAY		0x01	/* allow ANYARRAY */
+#define CHKATYPE_ANYRECORD		0x02	/* allow RECORD and RECORD[] */
+
 typedef struct RawColumnDefault
 {
 	AttrNumber	attnum;			/* attribute to attach default to */
@@ -130,12 +134,12 @@ extern const FormData_pg_attribute *SystemAttributeDefinition(AttrNumber attno);
 extern const FormData_pg_attribute *SystemAttributeByName(const char *attname);
 
 extern void CheckAttributeNamesTypes(TupleDesc tupdesc, char relkind,
-						 bool allow_system_table_mods);
+						 int flags);
 
 extern void CheckAttributeType(const char *attname,
 				   Oid atttypid, Oid attcollation,
 				   List *containing_rowtypes,
-				   bool allow_system_table_mods);
+				   int flags);
 
 /* pg_partitioned_table catalog manipulation functions */
 extern void StorePartitionKey(Relation rel,
diff --git a/src/test/regress/expected/rowtypes.out b/src/test/regress/expected/rowtypes.out
index d6a1a33..054faabb 100644
--- a/src/test/regress/expected/rowtypes.out
+++ b/src/test/regress/expected/rowtypes.out
@@ -667,6 +667,17 @@ select row(1, '(1,2)')::testtype6 *<> row(1, '(1,3)')::testtype6;
  t
 (1 row)
 
+-- anonymous rowtypes in coldeflists
+select q.a, q.b = row(2), q.c = array[row(3)], q.d = row(row(4)) from
+    unnest(array[row(1, row(2), array[row(3)], row(row(4))),
+                 row(2, row(3), array[row(4)], row(row(5)))])
+      as q(a int, b record, c record[], d record);
+ a | ?column? | ?column? | ?column? 
+---+----------+----------+----------
+ 1 | t        | t        | t
+ 2 | f        | f        | f
+(2 rows)
+
 drop type testtype1, testtype2, testtype3, testtype4, testtype5, testtype6;
 --
 -- Test case derived from bug #5716: check multiple uses of a rowtype result
diff --git a/src/test/regress/sql/rowtypes.sql b/src/test/regress/sql/rowtypes.sql
index e6d3898..454d462 100644
--- a/src/test/regress/sql/rowtypes.sql
+++ b/src/test/regress/sql/rowtypes.sql
@@ -259,6 +259,12 @@ select row(1, '(1,2)')::testtype6 *< row(1, '(1,3)')::testtype6;
 select row(1, '(1,2)')::testtype6 *>= row(1, '(1,3)')::testtype6;
 select row(1, '(1,2)')::testtype6 *<> row(1, '(1,3)')::testtype6;
 
+-- anonymous rowtypes in coldeflists
+select q.a, q.b = row(2), q.c = array[row(3)], q.d = row(row(4)) from
+    unnest(array[row(1, row(2), array[row(3)], row(row(4))),
+                 row(2, row(3), array[row(4)], row(row(5)))])
+      as q(a int, b record, c record[], d record);
+
 drop type testtype1, testtype2, testtype3, testtype4, testtype5, testtype6;
 
 
In reply to: Tom Lane (#4)
Re: [PATCH] Allow anonymous rowtypes in function return column definition

On Wednesday, January 30, 2019 5:59:41 PM EST Tom Lane wrote:

I still found this pretty disjointed. After a bit of thought I
propose the attached reformulation, which has the callers just tell
the routines which types to allow explicitly, with the justification
comments living at the call sites instead of within the routines.

This is a much better formulation, thank you!

Elvis

#6Tom Lane
tgl@sss.pgh.pa.us
In reply to: Elvis Pranskevichus (#5)
Re: [PATCH] Allow anonymous rowtypes in function return column definition

Elvis Pranskevichus <el@prans.net> writes:

On Wednesday, January 30, 2019 5:59:41 PM EST Tom Lane wrote:

I still found this pretty disjointed. After a bit of thought I
propose the attached reformulation, which has the callers just tell
the routines which types to allow explicitly, with the justification
comments living at the call sites instead of within the routines.

This is a much better formulation, thank you!

OK, pushed.

regards, tom lane