TRUNCATE on foreign table

Started by Kazutaka Onishialmost 5 years ago117 messages
#1Kazutaka Onishi
onishi@heterodb.com
1 attachment(s)

Hello,

The attached patch is for supporting "TRUNCATE" on foreign tables.

This patch includes:
* Adding "ExecForeignTruncate" function into FdwRoutine.
* Enabling "postgres_fdw" to use TRUNCATE.

This patch was proposed by Kaigai-san in March 2020,
but it was returned because it can't be applied to the latest source codes.

Please refer to the discussion.
/messages/by-id/CAOP8fzb-t3WVNLjGMC+4sV4AZa9S=MAQ7Q6pQoADMCf_1jp4ew@mail.gmail.com

I have fixed the patch due to submit it to Commit Fest 2021-03.

regards,

--
------------------
Kazutaka Onishi
(onishi@heterodb.com)

Attachments:

pgsql14-truncate-on-foreign-table.v1.patchapplication/x-patch; name=pgsql14-truncate-on-foreign-table.v1.patchDownload
diff --git a/contrib/postgres_fdw/deparse.c b/contrib/postgres_fdw/deparse.c
index 6faf499f9a..a9ce323a67 100644
--- a/contrib/postgres_fdw/deparse.c
+++ b/contrib/postgres_fdw/deparse.c
@@ -2171,6 +2171,44 @@ deparseAnalyzeSql(StringInfo buf, Relation rel, List **retrieved_attrs)
 	deparseRelation(buf, rel);
 }
 
+/*
+ * Construct a simple "TRUNCATE rel" statement
+ */
+void
+deparseTruncateSql(StringInfo buf,
+				   List *frels_list,
+				   List *frels_extra,
+				   DropBehavior behavior,
+				   bool restart_seqs)
+{
+	ListCell   *lc1, *lc2;
+
+	appendStringInfoString(buf, "TRUNCATE ");
+	forboth (lc1, frels_list,
+			 lc2, frels_extra)
+	{
+		Relation	frel = lfirst(lc1);
+		int			extra = lfirst_int(lc2);
+
+		if (lc1 != list_head(frels_list))
+			appendStringInfoString(buf, ", ");
+		if (extra != 0)
+			appendStringInfoString(buf, "ONLY ");
+		deparseRelation(buf, frel);
+	}
+	appendStringInfo(buf, " %s IDENTITY",
+					 restart_seqs ? "RESTART" : "CONTINUE");
+	switch (behavior)
+	{
+		case DROP_RESTRICT:
+			appendStringInfoString(buf, " RESTRICT");
+			break;
+		case DROP_CASCADE:
+			appendStringInfoString(buf, " CASCADE");
+			break;
+	}
+}
+
 /*
  * Construct name to use for given column, and emit it into buf.
  * If it has a column_name FDW option, use that instead of attribute name.
diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out
index b09dce63f5..17e0f250c8 100644
--- a/contrib/postgres_fdw/expected/postgres_fdw.out
+++ b/contrib/postgres_fdw/expected/postgres_fdw.out
@@ -8229,6 +8229,239 @@ select * from rem3;
 
 drop foreign table rem3;
 drop table loc3;
+-- ===================================================================
+-- test for TRUNCATE
+-- ===================================================================
+CREATE TABLE tru_rtable0 (id int primary key, x text);
+CREATE TABLE tru_rtable1 (id int primary key, y text);
+CREATE FOREIGN TABLE tru_ftable (id int, x text)
+       SERVER loopback OPTIONS (table_name 'tru_rtable0');
+INSERT INTO tru_rtable0 (SELECT x,md5(x::text) FROM generate_series(1,10) x);
+CREATE TABLE tru_ptable (id int, y text) PARTITION BY HASH(id);
+CREATE TABLE tru_ptable__p0 PARTITION OF tru_ptable
+                            FOR VALUES WITH (MODULUS 2, REMAINDER 0);
+CREATE FOREIGN TABLE tru_ftable__p1 PARTITION OF tru_ptable
+                                    FOR VALUES WITH (MODULUS 2, REMAINDER 1)
+       SERVER loopback OPTIONS (table_name 'tru_rtable1');
+INSERT INTO tru_ptable (SELECT x,md5(x::text) FROM generate_series(11,20) x);
+CREATE TABLE tru_pk_table(id int primary key, z text);
+CREATE TABLE tru_fk_table(fkey int references tru_pk_table(id));
+INSERT INTO tru_pk_table (SELECT x,md5((x+1)::text) FROM generate_series(1,10) x);
+INSERT INTO tru_fk_table (SELECT x % 10 + 1 FROM generate_series(5,25) x);
+CREATE FOREIGN TABLE tru_pk_ftable (id int, z text)
+       SERVER loopback OPTIONS (table_name 'tru_pk_table');
+CREATE TABLE tru_rtable_parent (id int, a text);
+CREATE TABLE tru_rtable_child (id int, a text);
+CREATE FOREIGN TABLE tru_ftable_parent (id int, a text)
+       SERVER loopback OPTIONS (table_name 'tru_rtable_parent');
+CREATE FOREIGN TABLE tru_ftable_child () INHERITS (tru_ftable_parent)
+       SERVER loopback OPTIONS (table_name 'tru_rtable_child');
+INSERT INTO tru_rtable_parent (SELECT x, md5(x::text) FROM generate_series(1,8) x);
+INSERT INTO tru_rtable_child  (SELECT x, md5(x::text) FROM generate_series(10, 18) x);
+-- normal truncate
+SELECT * FROM tru_ftable;
+ id |                x                 
+----+----------------------------------
+  1 | c4ca4238a0b923820dcc509a6f75849b
+  2 | c81e728d9d4c2f636f067f89cc14862c
+  3 | eccbc87e4b5ce2fe28308fd9f2a7baf3
+  4 | a87ff679a2f3e71d9181a67b7542122c
+  5 | e4da3b7fbbce2345d7772b0674a318d5
+  6 | 1679091c5a880faf6fb5e6087eb1b2dc
+  7 | 8f14e45fceea167a5a36dedd4bea2543
+  8 | c9f0f895fb98ab9159f51fd0297e236d
+  9 | 45c48cce2e2d7fbdea1afc51c7c6ad26
+ 10 | d3d9446802a44259755d38e6d163e820
+(10 rows)
+
+TRUNCATE tru_ftable;
+SELECT * FROM tru_rtable0;		-- empty
+ id | x 
+----+---
+(0 rows)
+
+SELECT * FROM tru_ftable;		-- empty
+ id | x 
+----+---
+(0 rows)
+
+-- 'truncatable' option
+ALTER SERVER loopback OPTIONS (ADD truncatable 'false');
+TRUNCATE tru_ftable;			-- error
+ERROR:  truncate on "tru_ftable" is prohibited
+ALTER SERVER loopback OPTIONS (DROP truncatable);
+ALTER FOREIGN TABLE tru_ftable OPTIONS (ADD truncatable 'false');
+TRUNCATE tru_ftable;			-- error
+ERROR:  truncate on "tru_ftable" is prohibited
+ALTER FOREIGN TABLE tru_ftable OPTIONS (SET truncatable 'true');
+TRUNCATE tru_ftable;			-- accepted
+-- partition table mixtured by table and foreign table
+SELECT * FROM tru_ptable;
+ id |                y                 
+----+----------------------------------
+ 12 | c20ad4d76fe97759aa27a0c99bff6710
+ 13 | c51ce410c124a10e0db5e4b97fc2af39
+ 14 | aab3238922bcc25a6f606eb525ffdc56
+ 16 | c74d97b01eae257e44aa9d5bade97baf
+ 17 | 70efdf2ec9b086079795c442636b55fb
+ 18 | 6f4922f45568161a8cdf4ad2299f6d23
+ 11 | 6512bd43d9caa6e02c990b0a82652dca
+ 15 | 9bf31c7ff062936a96d3c8bd1f8f2ff3
+ 19 | 1f0e3dad99908345f7439f8ffabdffc4
+ 20 | 98f13708210194c475687be6106a3b84
+(10 rows)
+
+TRUNCATE tru_ptable;
+SELECT * FROM tru_ptable;		-- empty
+ id | y 
+----+---
+(0 rows)
+
+SELECT * FROM tru_ptable__p0;	-- empty
+ id | y 
+----+---
+(0 rows)
+
+SELECT * FROM tru_ftable__p1;	-- empty
+ id | y 
+----+---
+(0 rows)
+
+SELECT * FROM tru_rtable1;		-- empty
+ id | y 
+----+---
+(0 rows)
+
+-- 'CASCADE' option
+SELECT * FROM tru_pk_ftable;
+ id |                z                 
+----+----------------------------------
+  1 | c81e728d9d4c2f636f067f89cc14862c
+  2 | eccbc87e4b5ce2fe28308fd9f2a7baf3
+  3 | a87ff679a2f3e71d9181a67b7542122c
+  4 | e4da3b7fbbce2345d7772b0674a318d5
+  5 | 1679091c5a880faf6fb5e6087eb1b2dc
+  6 | 8f14e45fceea167a5a36dedd4bea2543
+  7 | c9f0f895fb98ab9159f51fd0297e236d
+  8 | 45c48cce2e2d7fbdea1afc51c7c6ad26
+  9 | d3d9446802a44259755d38e6d163e820
+ 10 | 6512bd43d9caa6e02c990b0a82652dca
+(10 rows)
+
+TRUNCATE tru_pk_ftable;	-- failed by FK reference
+ERROR:  cannot truncate a table referenced in a foreign key constraint
+DETAIL:  Table "tru_fk_table" references "tru_pk_table".
+HINT:  Truncate table "tru_fk_table" at the same time, or use TRUNCATE ... CASCADE.
+CONTEXT:  remote SQL command: TRUNCATE public.tru_pk_table CONTINUE IDENTITY RESTRICT
+TRUNCATE tru_pk_ftable CASCADE;
+SELECT * FROM tru_pk_ftable;
+ id | z 
+----+---
+(0 rows)
+
+SELECT * FROM tru_fk_table;		-- also truncated
+ fkey 
+------
+(0 rows)
+
+-- truncate two tables at a command
+INSERT INTO tru_ftable (SELECT x,md5((x+2)::text) FROM generate_series(1,8) x);
+INSERT INTO tru_pk_ftable (SELECT x,md5((x+3)::text) FROM generate_series(3,10) x);
+SELECT * FROM tru_ftable a FULL OUTER JOIN tru_pk_ftable b ON a.id = b.id;
+ id |                x                 | id |                z                 
+----+----------------------------------+----+----------------------------------
+  1 | eccbc87e4b5ce2fe28308fd9f2a7baf3 |    | 
+  2 | a87ff679a2f3e71d9181a67b7542122c |    | 
+  3 | e4da3b7fbbce2345d7772b0674a318d5 |  3 | 1679091c5a880faf6fb5e6087eb1b2dc
+  4 | 1679091c5a880faf6fb5e6087eb1b2dc |  4 | 8f14e45fceea167a5a36dedd4bea2543
+  5 | 8f14e45fceea167a5a36dedd4bea2543 |  5 | c9f0f895fb98ab9159f51fd0297e236d
+  6 | c9f0f895fb98ab9159f51fd0297e236d |  6 | 45c48cce2e2d7fbdea1afc51c7c6ad26
+  7 | 45c48cce2e2d7fbdea1afc51c7c6ad26 |  7 | d3d9446802a44259755d38e6d163e820
+  8 | d3d9446802a44259755d38e6d163e820 |  8 | 6512bd43d9caa6e02c990b0a82652dca
+    |                                  |  9 | c20ad4d76fe97759aa27a0c99bff6710
+    |                                  | 10 | c51ce410c124a10e0db5e4b97fc2af39
+(10 rows)
+
+TRUNCATE tru_ftable, tru_pk_ftable CASCADE;
+SELECT * FROM tru_ftable a FULL OUTER JOIN tru_pk_ftable b ON a.id = b.id;
+ id | x | id | z 
+----+---+----+---
+(0 rows)
+
+-- truncate with ONLY clause
+TRUNCATE ONLY tru_ftable_parent;
+SELECT * FROM tru_ftable_parent;
+ id |                a                 
+----+----------------------------------
+ 10 | d3d9446802a44259755d38e6d163e820
+ 11 | 6512bd43d9caa6e02c990b0a82652dca
+ 12 | c20ad4d76fe97759aa27a0c99bff6710
+ 13 | c51ce410c124a10e0db5e4b97fc2af39
+ 14 | aab3238922bcc25a6f606eb525ffdc56
+ 15 | 9bf31c7ff062936a96d3c8bd1f8f2ff3
+ 16 | c74d97b01eae257e44aa9d5bade97baf
+ 17 | 70efdf2ec9b086079795c442636b55fb
+ 18 | 6f4922f45568161a8cdf4ad2299f6d23
+(9 rows)
+
+TRUNCATE tru_ftable_parent;
+SELECT * FROM tru_ftable_parent;
+ id | a 
+----+---
+(0 rows)
+
+-- in case when remote table has inherited children
+CREATE TABLE tru_rtable0_child () INHERITS (tru_rtable0);
+INSERT INTO tru_rtable0 (SELECT x,md5(x::text) FROM generate_series(5,9) x);
+INSERT INTO tru_rtable0_child (SELECT x,md5(x::text) FROM generate_series(10,14) x);
+SELECT * FROM tru_ftable;
+ id |                x                 
+----+----------------------------------
+  5 | e4da3b7fbbce2345d7772b0674a318d5
+  6 | 1679091c5a880faf6fb5e6087eb1b2dc
+  7 | 8f14e45fceea167a5a36dedd4bea2543
+  8 | c9f0f895fb98ab9159f51fd0297e236d
+  9 | 45c48cce2e2d7fbdea1afc51c7c6ad26
+ 10 | d3d9446802a44259755d38e6d163e820
+ 11 | 6512bd43d9caa6e02c990b0a82652dca
+ 12 | c20ad4d76fe97759aa27a0c99bff6710
+ 13 | c51ce410c124a10e0db5e4b97fc2af39
+ 14 | aab3238922bcc25a6f606eb525ffdc56
+(10 rows)
+
+TRUNCATE ONLY tru_ftable;		-- truncate only parent portion
+SELECT * FROM tru_ftable;
+ id |                x                 
+----+----------------------------------
+ 10 | d3d9446802a44259755d38e6d163e820
+ 11 | 6512bd43d9caa6e02c990b0a82652dca
+ 12 | c20ad4d76fe97759aa27a0c99bff6710
+ 13 | c51ce410c124a10e0db5e4b97fc2af39
+ 14 | aab3238922bcc25a6f606eb525ffdc56
+(5 rows)
+
+INSERT INTO tru_rtable0 (SELECT x,md5(x::text) FROM generate_series(21,25) x);
+SELECT * FROM tru_ftable;
+ id |                x                 
+----+----------------------------------
+ 21 | 3c59dc048e8850243be8079a5c74d079
+ 22 | b6d767d2f8ed5d21a44b0e5886680cb9
+ 23 | 37693cfc748049e45d87b8c7d8b9aacd
+ 24 | 1ff1de774005f8da13f42943881c655f
+ 25 | 8e296a067a37563370ded05f5a3bf3ec
+ 10 | d3d9446802a44259755d38e6d163e820
+ 11 | 6512bd43d9caa6e02c990b0a82652dca
+ 12 | c20ad4d76fe97759aa27a0c99bff6710
+ 13 | c51ce410c124a10e0db5e4b97fc2af39
+ 14 | aab3238922bcc25a6f606eb525ffdc56
+(10 rows)
+
+TRUNCATE tru_ftable;			-- truncate both of parent and child
+SELECT * FROM tru_ftable;
+ id | x 
+----+---
+(0 rows)
+
 -- ===================================================================
 -- test IMPORT FOREIGN SCHEMA
 -- ===================================================================
@@ -8928,7 +9161,7 @@ DO $d$
     END;
 $d$;
 ERROR:  invalid option "password"
-HINT:  Valid options in this context are: service, passfile, channel_binding, connect_timeout, dbname, host, hostaddr, port, options, application_name, keepalives, keepalives_idle, keepalives_interval, keepalives_count, tcp_user_timeout, sslmode, sslcompression, sslcert, sslkey, sslrootcert, sslcrl, requirepeer, ssl_min_protocol_version, ssl_max_protocol_version, gssencmode, krbsrvname, gsslib, target_session_attrs, use_remote_estimate, fdw_startup_cost, fdw_tuple_cost, extensions, updatable, fetch_size, batch_size
+HINT:  Valid options in this context are: service, passfile, channel_binding, connect_timeout, dbname, host, hostaddr, port, options, application_name, keepalives, keepalives_idle, keepalives_interval, keepalives_count, tcp_user_timeout, sslmode, sslcompression, sslcert, sslkey, sslrootcert, sslcrl, requirepeer, ssl_min_protocol_version, ssl_max_protocol_version, gssencmode, krbsrvname, gsslib, target_session_attrs, use_remote_estimate, fdw_startup_cost, fdw_tuple_cost, extensions, updatable, truncatable, fetch_size, batch_size
 CONTEXT:  SQL statement "ALTER SERVER loopback_nopw OPTIONS (ADD password 'dummypw')"
 PL/pgSQL function inline_code_block line 3 at EXECUTE
 -- If we add a password for our user mapping instead, we should get a different
diff --git a/contrib/postgres_fdw/option.c b/contrib/postgres_fdw/option.c
index 64698c4da3..668178524d 100644
--- a/contrib/postgres_fdw/option.c
+++ b/contrib/postgres_fdw/option.c
@@ -107,7 +107,8 @@ postgres_fdw_validator(PG_FUNCTION_ARGS)
 		 * Validate option value, when we can do so without any context.
 		 */
 		if (strcmp(def->defname, "use_remote_estimate") == 0 ||
-			strcmp(def->defname, "updatable") == 0)
+			strcmp(def->defname, "updatable") == 0 ||
+			strcmp(def->defname, "truncatable") == 0)
 		{
 			/* these accept only boolean values */
 			(void) defGetBoolean(def);
@@ -211,6 +212,9 @@ InitPgFdwOptions(void)
 		/* updatable is available on both server and table */
 		{"updatable", ForeignServerRelationId, false},
 		{"updatable", ForeignTableRelationId, false},
+		/* truncatable is available on both server and table */
+		{"truncatable", ForeignServerRelationId, false},
+		{"truncatable", ForeignTableRelationId, false},
 		/* fetch_size is available on both server and table */
 		{"fetch_size", ForeignServerRelationId, false},
 		{"fetch_size", ForeignTableRelationId, false},
diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c
index 2ce42ce3f1..3c4c970313 100644
--- a/contrib/postgres_fdw/postgres_fdw.c
+++ b/contrib/postgres_fdw/postgres_fdw.c
@@ -390,6 +390,10 @@ static void postgresExplainForeignModify(ModifyTableState *mtstate,
 										 ExplainState *es);
 static void postgresExplainDirectModify(ForeignScanState *node,
 										ExplainState *es);
+static void postgresExecForeignTruncate(List *frels_list,
+										List *frels_extra,
+										DropBehavior behavior,
+										bool restart_seqs);
 static bool postgresAnalyzeForeignTable(Relation relation,
 										AcquireSampleRowsFunc *func,
 										BlockNumber *totalpages);
@@ -571,6 +575,9 @@ postgres_fdw_handler(PG_FUNCTION_ARGS)
 	routine->ExplainForeignModify = postgresExplainForeignModify;
 	routine->ExplainDirectModify = postgresExplainDirectModify;
 
+	/* Support function for TRUNCATE */
+	routine->ExecForeignTruncate = postgresExecForeignTruncate;
+
 	/* Support functions for ANALYZE */
 	routine->AnalyzeForeignTable = postgresAnalyzeForeignTable;
 
@@ -2772,6 +2779,84 @@ postgresExplainDirectModify(ForeignScanState *node, ExplainState *es)
 	}
 }
 
+/*
+ * postgresExecForeignTruncate
+ *		It propagates TRUNCATE command to the remote host inside of the
+ *		transaction block.
+ */
+static void
+postgresExecForeignTruncate(List *frels_list,
+							List *frels_extra,
+							DropBehavior behavior,
+							bool restart_seqs)
+{
+	Oid				server_id = InvalidOid;
+	ForeignServer  *serv = NULL;
+	UserMapping	   *user = NULL;
+	PGconn		   *conn = NULL;
+	PGresult	   *res;
+	StringInfoData	sql;
+	ListCell	   *lc;
+	bool			server_truncatable = true;
+
+	/* pick up remote connection, and sanity checks */
+	foreach (lc, frels_list)
+	{
+		Relation	frel = lfirst(lc);
+		Oid			frel_oid = RelationGetRelid(frel);
+		ForeignTable *ft = GetForeignTable(frel_oid);
+		ListCell   *cell;
+		bool		truncatable;
+
+		if (!OidIsValid(server_id))
+		{
+			server_id = GetForeignServerIdByRelId(frel_oid);
+			serv = GetForeignServer(server_id);
+			user = GetUserMapping(GetUserId(), server_id);
+			conn = GetConnection(user, false);
+			
+			foreach (cell, serv->options)
+			{
+				DefElem	   *defel = (DefElem *) lfirst(cell);
+
+				if (strcmp(defel->defname, "truncatable") == 0)
+					server_truncatable = defGetBoolean(defel);
+			}
+		}
+		else
+		{
+			/* ExecForeignTruncate() is invoked for each server */
+			Assert(server_id == GetForeignServerIdByRelId(frel_oid));
+		}
+
+		/* ensure the target foreign table is truncatable */
+		truncatable = server_truncatable;
+		foreach (cell, ft->options)
+		{
+			DefElem	   *defel = (DefElem *) lfirst(cell);
+
+			if (strcmp(defel->defname, "truncatable") == 0)
+				truncatable = defGetBoolean(defel);
+		}
+		if (!truncatable)
+			ereport(ERROR,
+					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+					 errmsg("truncate on \"%s\" is prohibited",
+							RelationGetRelationName(frel))));
+	}
+	/* set up remote query */
+	initStringInfo(&sql);
+	deparseTruncateSql(&sql, frels_list, frels_extra, behavior, restart_seqs);
+	/* run remote query */
+	if (!PQsendQuery(conn, sql.data))
+		pgfdw_report_error(ERROR, NULL, conn, false, sql.data);
+	res = pgfdw_get_result(conn, sql.data);
+	if (PQresultStatus(res) != PGRES_COMMAND_OK)
+		pgfdw_report_error(ERROR, res, conn, true, sql.data);
+	/* clean-up */
+	PQclear(res);
+	pfree(sql.data);
+}
 
 /*
  * estimate_path_cost_size
diff --git a/contrib/postgres_fdw/postgres_fdw.h b/contrib/postgres_fdw/postgres_fdw.h
index 1f67b4d9fd..c89af59973 100644
--- a/contrib/postgres_fdw/postgres_fdw.h
+++ b/contrib/postgres_fdw/postgres_fdw.h
@@ -193,6 +193,11 @@ extern void deparseDirectDeleteSql(StringInfo buf, PlannerInfo *root,
 extern void deparseAnalyzeSizeSql(StringInfo buf, Relation rel);
 extern void deparseAnalyzeSql(StringInfo buf, Relation rel,
 							  List **retrieved_attrs);
+extern void deparseTruncateSql(StringInfo buf,
+							   List *frels_list,
+							   List *frels_extra,
+							   DropBehavior behavior,
+							   bool restart_seqs);
 extern void deparseStringLiteral(StringInfo buf, const char *val);
 extern Expr *find_em_expr_for_rel(EquivalenceClass *ec, RelOptInfo *rel);
 extern Expr *find_em_expr_for_input_target(PlannerInfo *root,
diff --git a/contrib/postgres_fdw/sql/postgres_fdw.sql b/contrib/postgres_fdw/sql/postgres_fdw.sql
index 319c15d635..1a13799379 100644
--- a/contrib/postgres_fdw/sql/postgres_fdw.sql
+++ b/contrib/postgres_fdw/sql/postgres_fdw.sql
@@ -2337,6 +2337,96 @@ select * from rem3;
 drop foreign table rem3;
 drop table loc3;
 
+-- ===================================================================
+-- test for TRUNCATE
+-- ===================================================================
+CREATE TABLE tru_rtable0 (id int primary key, x text);
+CREATE TABLE tru_rtable1 (id int primary key, y text);
+CREATE FOREIGN TABLE tru_ftable (id int, x text)
+       SERVER loopback OPTIONS (table_name 'tru_rtable0');
+INSERT INTO tru_rtable0 (SELECT x,md5(x::text) FROM generate_series(1,10) x);
+
+CREATE TABLE tru_ptable (id int, y text) PARTITION BY HASH(id);
+CREATE TABLE tru_ptable__p0 PARTITION OF tru_ptable
+                            FOR VALUES WITH (MODULUS 2, REMAINDER 0);
+CREATE FOREIGN TABLE tru_ftable__p1 PARTITION OF tru_ptable
+                                    FOR VALUES WITH (MODULUS 2, REMAINDER 1)
+       SERVER loopback OPTIONS (table_name 'tru_rtable1');
+INSERT INTO tru_ptable (SELECT x,md5(x::text) FROM generate_series(11,20) x);
+
+CREATE TABLE tru_pk_table(id int primary key, z text);
+CREATE TABLE tru_fk_table(fkey int references tru_pk_table(id));
+INSERT INTO tru_pk_table (SELECT x,md5((x+1)::text) FROM generate_series(1,10) x);
+INSERT INTO tru_fk_table (SELECT x % 10 + 1 FROM generate_series(5,25) x);
+CREATE FOREIGN TABLE tru_pk_ftable (id int, z text)
+       SERVER loopback OPTIONS (table_name 'tru_pk_table');
+
+CREATE TABLE tru_rtable_parent (id int, a text);
+CREATE TABLE tru_rtable_child (id int, a text);
+CREATE FOREIGN TABLE tru_ftable_parent (id int, a text)
+       SERVER loopback OPTIONS (table_name 'tru_rtable_parent');
+CREATE FOREIGN TABLE tru_ftable_child () INHERITS (tru_ftable_parent)
+       SERVER loopback OPTIONS (table_name 'tru_rtable_child');
+INSERT INTO tru_rtable_parent (SELECT x, md5(x::text) FROM generate_series(1,8) x);
+INSERT INTO tru_rtable_child  (SELECT x, md5(x::text) FROM generate_series(10, 18) x);
+
+-- normal truncate
+SELECT * FROM tru_ftable;
+TRUNCATE tru_ftable;
+SELECT * FROM tru_rtable0;		-- empty
+SELECT * FROM tru_ftable;		-- empty
+
+-- 'truncatable' option
+ALTER SERVER loopback OPTIONS (ADD truncatable 'false');
+TRUNCATE tru_ftable;			-- error
+ALTER SERVER loopback OPTIONS (DROP truncatable);
+ALTER FOREIGN TABLE tru_ftable OPTIONS (ADD truncatable 'false');
+TRUNCATE tru_ftable;			-- error
+ALTER FOREIGN TABLE tru_ftable OPTIONS (SET truncatable 'true');
+TRUNCATE tru_ftable;			-- accepted
+
+-- partition table mixtured by table and foreign table
+SELECT * FROM tru_ptable;
+TRUNCATE tru_ptable;
+SELECT * FROM tru_ptable;		-- empty
+SELECT * FROM tru_ptable__p0;	-- empty
+SELECT * FROM tru_ftable__p1;	-- empty
+SELECT * FROM tru_rtable1;		-- empty
+
+-- 'CASCADE' option
+SELECT * FROM tru_pk_ftable;
+TRUNCATE tru_pk_ftable;	-- failed by FK reference
+TRUNCATE tru_pk_ftable CASCADE;
+SELECT * FROM tru_pk_ftable;
+SELECT * FROM tru_fk_table;		-- also truncated
+
+-- truncate two tables at a command
+INSERT INTO tru_ftable (SELECT x,md5((x+2)::text) FROM generate_series(1,8) x);
+INSERT INTO tru_pk_ftable (SELECT x,md5((x+3)::text) FROM generate_series(3,10) x);
+SELECT * FROM tru_ftable a FULL OUTER JOIN tru_pk_ftable b ON a.id = b.id;
+TRUNCATE tru_ftable, tru_pk_ftable CASCADE;
+SELECT * FROM tru_ftable a FULL OUTER JOIN tru_pk_ftable b ON a.id = b.id;
+
+-- truncate with ONLY clause
+TRUNCATE ONLY tru_ftable_parent;
+SELECT * FROM tru_ftable_parent;
+TRUNCATE tru_ftable_parent;
+SELECT * FROM tru_ftable_parent;
+
+-- in case when remote table has inherited children
+CREATE TABLE tru_rtable0_child () INHERITS (tru_rtable0);
+INSERT INTO tru_rtable0 (SELECT x,md5(x::text) FROM generate_series(5,9) x);
+INSERT INTO tru_rtable0_child (SELECT x,md5(x::text) FROM generate_series(10,14) x);
+SELECT * FROM tru_ftable;
+
+TRUNCATE ONLY tru_ftable;		-- truncate only parent portion
+SELECT * FROM tru_ftable;
+
+INSERT INTO tru_rtable0 (SELECT x,md5(x::text) FROM generate_series(21,25) x);
+SELECT * FROM tru_ftable;
+TRUNCATE tru_ftable;			-- truncate both of parent and child
+SELECT * FROM tru_ftable;
+
 -- ===================================================================
 -- test IMPORT FOREIGN SCHEMA
 -- ===================================================================
diff --git a/doc/src/sgml/fdwhandler.sgml b/doc/src/sgml/fdwhandler.sgml
index 854913ae5f..1ce103fee7 100644
--- a/doc/src/sgml/fdwhandler.sgml
+++ b/doc/src/sgml/fdwhandler.sgml
@@ -1057,6 +1057,46 @@ EndDirectModify(ForeignScanState *node);
 
    </sect2>
 
+   <sect2 id="fdw-callbacks-truncate">
+    <title>FDW Routines for Truncate</title>
+<programlisting>
+void
+ExecForeignTruncate(List *frels_list, List *frels_extra,
+                    DropBehavior behavior, bool restart_seqs);
+</programlisting>
+    <para>
+     Truncate a set of foreign tables defined by
+     <literal>frels_list</literal> belonging to the same foreign server.
+     This optional function is called during execution of
+     <command>TRUNCATE</command> for each foreign server being involved
+     in one <command>TRUNCATE</command> command (note that invocations
+     are not per foreign table).
+
+     <literal>frels_extra</literal> is same length with
+     <literal>frels_list</literal>, that delivers extra information of
+     the context where the foreign-tables are truncated.
+    </para>
+
+    <para>
+     If the <function>ExecForeignTruncate</function> pointer is set to
+     <literal>NULL</literal>, attempts to truncate the foreign table will
+     fail with an error message.
+    </para>
+    
+    <para>
+     <literal>behavior</literal> defines how foreign tables should
+     be truncated, using as possible values <literal>DROP_RESTRICT</literal>
+     and <literal>DROP_CASCADE</literal> (to map with the equivalents of
+     <command>TRUNCATE</command>).
+    </para>
+    
+    <para>
+     <literal>restart_seqs</literal> is set to <literal>true</literal>
+     if <literal>RESTART IDENTITY</literal> was supplied in the
+     <command>TRUNCATE</command>.
+    </para>
+   </sect2>
+
    <sect2 id="fdw-callbacks-row-locking">
     <title>FDW Routines for Row Locking</title>
 
diff --git a/doc/src/sgml/postgres-fdw.sgml b/doc/src/sgml/postgres-fdw.sgml
index 8d6abd4c54..af360a6db7 100644
--- a/doc/src/sgml/postgres-fdw.sgml
+++ b/doc/src/sgml/postgres-fdw.sgml
@@ -63,9 +63,10 @@
  <para>
   Now you need only <command>SELECT</command> from a foreign table to access
   the data stored in its underlying remote table.  You can also modify
-  the remote table using <command>INSERT</command>, <command>UPDATE</command>, or
-  <command>DELETE</command>.  (Of course, the remote user you have specified
-  in your user mapping must have privileges to do these things.)
+  the remote table using <command>INSERT</command>, <command>UPDATE</command>,
+  <command>DELETE</command>, or <command>TRUNCATE</command>.
+  (Of course, the remote user you have specified in your user mapping must
+  have privileges to do these things.)
  </para>
 
  <para>
@@ -405,6 +406,29 @@ OPTIONS (ADD password_required 'false');
      </listitem>
     </varlistentry>
 
+    <varlistentry>
+     <term><literal>truncatable</literal></term>
+     <listitem>
+      <para>
+       This option controls whether <filename>postgres_fdw</filename> allows
+       foreign tables to be truncated using <command>TRUNCATE</command>
+       command. It can be specified for a foreign table or a foreign server.
+       A table-level option overrides a server-level option.
+       The default is <literal>true</literal>.
+      </para>
+      <para>
+       Pay attention for the case when a foreign table maps remote table
+       that has inherited children or partition leafs.
+       <command>TRUNCATE</command> specifies the foreign tables with
+       <literal>ONLY</literal> clause, remove queries over the
+       <filename>postgres_fdw</filename> also specify remote tables with
+       <literal>ONLY</literal> clause, that will truncate only parent
+       portion of the remote table. In the results, it looks like
+       <command>TRUNCATE</command> command partially eliminated contents
+       of the foreign tables.
+      </para>
+     </listitem>
+    </varlistentry>
    </variablelist>
   </sect3>
 
diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c
index 420991e315..ac76dd93ba 100644
--- a/src/backend/commands/tablecmds.c
+++ b/src/backend/commands/tablecmds.c
@@ -57,6 +57,7 @@
 #include "commands/typecmds.h"
 #include "commands/user.h"
 #include "executor/executor.h"
+#include "foreign/fdwapi.h"
 #include "foreign/foreign.h"
 #include "miscadmin.h"
 #include "nodes/makefuncs.h"
@@ -296,6 +297,21 @@ struct DropRelationCallbackState
 #define		ATT_FOREIGN_TABLE		0x0020
 #define		ATT_PARTITIONED_INDEX	0x0040
 
+/*
+ * ForeignTruncateInfo
+ *
+ * Information related to truncation of foreign tables.  This is used for
+ * the elements in a hash table that uses the server OID as lookup key,
+ * and includes a per-server list of all foreign tables involved in the
+ * truncation.
+ */
+typedef struct
+{
+	Oid			server_oid;
+	List	   *frels_list;
+	List	   *frels_extra;
+} ForeignTruncateInfo;
+
 /*
  * Partition tables are expected to be dropped when the parent partitioned
  * table gets dropped. Hence for partitioning we use AUTO dependency.
@@ -1561,6 +1577,7 @@ ExecuteTruncate(TruncateStmt *stmt)
 {
 	List	   *rels = NIL;
 	List	   *relids = NIL;
+	List	   *relids_extra = NIL;
 	List	   *relids_logged = NIL;
 	ListCell   *cell;
 
@@ -1597,6 +1614,7 @@ ExecuteTruncate(TruncateStmt *stmt)
 
 		rels = lappend(rels, rel);
 		relids = lappend_oid(relids, myrelid);
+		relids_extra = lappend_int(relids_extra, (recurse ? 0 : 1));
 		/* Log this relation only if needed for logical decoding */
 		if (RelationIsLogicallyLogged(rel))
 			relids_logged = lappend_oid(relids_logged, myrelid);
@@ -1644,6 +1662,7 @@ ExecuteTruncate(TruncateStmt *stmt)
 
 				rels = lappend(rels, rel);
 				relids = lappend_oid(relids, childrelid);
+				relids_extra = lappend_int(relids_extra, -1);
 				/* Log this relation only if needed for logical decoding */
 				if (RelationIsLogicallyLogged(rel))
 					relids_logged = lappend_oid(relids_logged, childrelid);
@@ -1656,7 +1675,7 @@ ExecuteTruncate(TruncateStmt *stmt)
 					 errhint("Do not specify the ONLY keyword, or use TRUNCATE ONLY on the partitions directly.")));
 	}
 
-	ExecuteTruncateGuts(rels, relids, relids_logged,
+	ExecuteTruncateGuts(rels, relids, relids_extra, relids_logged,
 						stmt->behavior, stmt->restart_seqs);
 
 	/* And close the rels */
@@ -1682,16 +1701,21 @@ ExecuteTruncate(TruncateStmt *stmt)
  * this information handy in this form.
  */
 void
-ExecuteTruncateGuts(List *explicit_rels, List *relids, List *relids_logged,
+ExecuteTruncateGuts(List *explicit_rels,
+					List *relids,
+					List *relids_extra,
+					List *relids_logged,
 					DropBehavior behavior, bool restart_seqs)
 {
 	List	   *rels;
 	List	   *seq_relids = NIL;
+	HTAB	   *ft_htab = NULL;
 	EState	   *estate;
 	ResultRelInfo *resultRelInfos;
 	ResultRelInfo *resultRelInfo;
 	SubTransactionId mySubid;
 	ListCell   *cell;
+	ListCell   *lc1, *lc2;
 	Oid		   *logrelids;
 
 	/*
@@ -1729,6 +1753,7 @@ ExecuteTruncateGuts(List *explicit_rels, List *relids, List *relids_logged,
 				truncate_check_activity(rel);
 				rels = lappend(rels, rel);
 				relids = lappend_oid(relids, relid);
+				relids_extra = lappend_int(relids_extra, -1);
 				/* Log this relation only if needed for logical decoding */
 				if (RelationIsLogicallyLogged(rel))
 					relids_logged = lappend_oid(relids_logged, relid);
@@ -1829,14 +1854,68 @@ ExecuteTruncateGuts(List *explicit_rels, List *relids, List *relids_logged,
 	 */
 	mySubid = GetCurrentSubTransactionId();
 
-	foreach(cell, rels)
+	Assert(list_length(rels) == list_length(relids_extra));
+	forboth (lc1, rels,
+			 lc2, relids_extra)
 	{
-		Relation	rel = (Relation) lfirst(cell);
+		Relation	rel = (Relation) lfirst(lc1);
+		int			extra = lfirst_int(lc2);
 
 		/* Skip partitioned tables as there is nothing to do */
 		if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
 			continue;
 
+		/*
+		 * If truncating a foreign table, the foreign data wrapper callback
+		 * for TRUNCATE is called once for each server with a list of all the
+		 * relations to process linked to this server.  The list of relations
+		 * for each server is saved as a single entry in a hash table that
+		 * uses the server OID as lookup key.  Once the full set of lists is
+		 * built, all the entries of the hash table are scanned, and the list
+		 * of relations associated to the server is passed down to the
+		 * TRUNCATE callback of its foreign data wrapper.
+		 */
+		if (rel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
+		{
+			Oid			frel_oid = RelationGetRelid(rel);
+			Oid			server_oid = GetForeignServerIdByRelId(frel_oid);
+			bool		found;
+			ForeignTruncateInfo *ft_info;
+
+			/* if the hash table does not exist yet, initialize it */
+			if (!ft_htab)
+			{
+				HASHCTL		hctl;
+
+				memset(&hctl, 0, sizeof(HASHCTL));
+				hctl.keysize = sizeof(Oid);
+				hctl.entrysize = sizeof(ForeignTruncateInfo);
+				hctl.hcxt = CurrentMemoryContext;
+
+				ft_htab = hash_create("TRUNCATE for Foreign Tables",
+									  32,	/* start small and extend */
+									  &hctl,
+									  HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
+			}
+
+			/*
+			 * Look after the entry of the server in the hash table, and
+			 * initialize it if the entry does not exist yet.
+			 */
+			ft_info = hash_search(ft_htab, &server_oid, HASH_ENTER, &found);
+			if (!found)
+			{
+				ft_info->server_oid = server_oid;
+				ft_info->frels_list = NIL;
+				ft_info->frels_extra = NIL;
+			}
+
+			/* save the relation in the list */
+			ft_info->frels_list = lappend(ft_info->frels_list, rel);
+			ft_info->frels_extra = lappend_int(ft_info->frels_extra, extra);
+			continue;
+		}
+
 		/*
 		 * Normally, we need a transaction-safe truncation here.  However, if
 		 * the table was either created in the current (sub)transaction or has
@@ -1899,6 +1978,31 @@ ExecuteTruncateGuts(List *explicit_rels, List *relids, List *relids_logged,
 		pgstat_count_truncate(rel);
 	}
 
+	/*
+	 * Now go through the hash table, and process each entry associated to the
+	 * servers involved in the TRUNCATE.
+	 */
+	if (ft_htab)
+	{
+		ForeignTruncateInfo *ft_info;
+		HASH_SEQ_STATUS seq;
+
+		hash_seq_init(&seq, ft_htab);
+
+		while ((ft_info = hash_seq_search(&seq)) != NULL)
+		{
+			FdwRoutine *routine = GetFdwRoutineByServerId(ft_info->server_oid);
+
+			/* truncate_check_rel() has checked that already */
+			Assert(routine->ExecForeignTruncate != NULL);
+
+			routine->ExecForeignTruncate(ft_info->frels_list,
+										 ft_info->frels_extra,
+										 behavior,
+										 restart_seqs);
+		}
+	}
+
 	/*
 	 * Restart owned sequences if we were asked to.
 	 */
@@ -1984,12 +2088,24 @@ truncate_check_rel(Oid relid, Form_pg_class reltuple)
 	char	   *relname = NameStr(reltuple->relname);
 
 	/*
-	 * Only allow truncate on regular tables and partitioned tables (although,
-	 * the latter are only being included here for the following checks; no
-	 * physical truncation will occur in their case.)
+	 * Only allow truncate on regular tables, foreign tables using foreign
+	 * data wrappers supporting TRUNCATE and partitioned tables (although, the
+	 * latter are only being included here for the following checks; no
+	 * physical truncation will occur in their case.).
 	 */
-	if (reltuple->relkind != RELKIND_RELATION &&
-		reltuple->relkind != RELKIND_PARTITIONED_TABLE)
+	if (reltuple->relkind == RELKIND_FOREIGN_TABLE)
+	{
+		Oid			server_id = GetForeignServerIdByRelId(relid);
+		FdwRoutine *fdwroutine = GetFdwRoutineByServerId(server_id);
+
+		if (!fdwroutine->ExecForeignTruncate)
+			ereport(ERROR,
+					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+					 errmsg("cannot truncate foreign table \"%s\"",
+							relname)));
+	}
+	else if (reltuple->relkind != RELKIND_RELATION &&
+			 reltuple->relkind != RELKIND_PARTITIONED_TABLE)
 		ereport(ERROR,
 				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
 				 errmsg("\"%s\" is not a table", relname)));
diff --git a/src/backend/replication/logical/worker.c b/src/backend/replication/logical/worker.c
index eb7db89cef..3c80f1f890 100644
--- a/src/backend/replication/logical/worker.c
+++ b/src/backend/replication/logical/worker.c
@@ -1803,6 +1803,7 @@ apply_handle_truncate(StringInfo s)
 	List	   *rels = NIL;
 	List	   *part_rels = NIL;
 	List	   *relids = NIL;
+	List	   *relids_extra = NIL;
 	List	   *relids_logged = NIL;
 	ListCell   *lc;
 
@@ -1832,6 +1833,7 @@ apply_handle_truncate(StringInfo s)
 		remote_rels = lappend(remote_rels, rel);
 		rels = lappend(rels, rel->localrel);
 		relids = lappend_oid(relids, rel->localreloid);
+		relids_extra = lappend_int(relids_extra, 0);
 		if (RelationIsLogicallyLogged(rel->localrel))
 			relids_logged = lappend_oid(relids_logged, rel->localreloid);
 
@@ -1882,8 +1884,12 @@ apply_handle_truncate(StringInfo s)
 	 * to replaying changes without further cascading. This might be later
 	 * changeable with a user specified option.
 	 */
-	ExecuteTruncateGuts(rels, relids, relids_logged, DROP_RESTRICT, restart_seqs);
-
+	ExecuteTruncateGuts(rels,
+						relids,
+						relids_extra,
+						relids_logged,
+						DROP_RESTRICT,
+						restart_seqs);
 	foreach(lc, remote_rels)
 	{
 		LogicalRepRelMapEntry *rel = lfirst(lc);
diff --git a/src/include/commands/tablecmds.h b/src/include/commands/tablecmds.h
index b3d30acc35..2e30a09729 100644
--- a/src/include/commands/tablecmds.h
+++ b/src/include/commands/tablecmds.h
@@ -56,8 +56,12 @@ extern void AlterRelationNamespaceInternal(Relation classRel, Oid relOid,
 extern void CheckTableNotInUse(Relation rel, const char *stmt);
 
 extern void ExecuteTruncate(TruncateStmt *stmt);
-extern void ExecuteTruncateGuts(List *explicit_rels, List *relids, List *relids_logged,
-								DropBehavior behavior, bool restart_seqs);
+extern void ExecuteTruncateGuts(List *explicit_rels,
+								List *relids,
+								List *relids_extra,
+								List *relids_logged,
+								DropBehavior behavior,
+								bool restart_seqs);
 
 extern void SetRelationHasSubclass(Oid relationId, bool relhassubclass);
 
diff --git a/src/include/foreign/fdwapi.h b/src/include/foreign/fdwapi.h
index 248f78da45..7488ce1ea6 100644
--- a/src/include/foreign/fdwapi.h
+++ b/src/include/foreign/fdwapi.h
@@ -159,6 +159,11 @@ typedef bool (*AnalyzeForeignTable_function) (Relation relation,
 typedef List *(*ImportForeignSchema_function) (ImportForeignSchemaStmt *stmt,
 											   Oid serverOid);
 
+typedef void (*ExecForeignTruncate_function) (List *frels_list,
+											  List *frels_extra,
+											  DropBehavior behavior,
+											  bool restart_seqs);
+
 typedef Size (*EstimateDSMForeignScan_function) (ForeignScanState *node,
 												 ParallelContext *pcxt);
 typedef void (*InitializeDSMForeignScan_function) (ForeignScanState *node,
@@ -246,6 +251,9 @@ typedef struct FdwRoutine
 	/* Support functions for IMPORT FOREIGN SCHEMA */
 	ImportForeignSchema_function ImportForeignSchema;
 
+	/* Support functions for TRUNCATE */
+	ExecForeignTruncate_function ExecForeignTruncate;
+
 	/* Support functions for parallelism under Gather node */
 	IsForeignScanParallelSafe_function IsForeignScanParallelSafe;
 	EstimateDSMForeignScan_function EstimateDSMForeignScan;
diff --git a/src/test/regress/expected/foreign_data.out b/src/test/regress/expected/foreign_data.out
index b9e25820bc..e2c0bcea51 100644
--- a/src/test/regress/expected/foreign_data.out
+++ b/src/test/regress/expected/foreign_data.out
@@ -1807,9 +1807,9 @@ Inherits: fd_pt1
 
 -- TRUNCATE doesn't work on foreign tables, either directly or recursively
 TRUNCATE ft2;  -- ERROR
-ERROR:  "ft2" is not a table
+ERROR:  foreign-data wrapper "dummy" has no handler
 TRUNCATE fd_pt1;  -- ERROR
-ERROR:  "ft2" is not a table
+ERROR:  foreign-data wrapper "dummy" has no handler
 DROP TABLE fd_pt1 CASCADE;
 NOTICE:  drop cascades to foreign table ft2
 -- IMPORT FOREIGN SCHEMA
@@ -2032,9 +2032,9 @@ ALTER FOREIGN TABLE fd_pt2_1 ADD CONSTRAINT fd_pt2chk1 CHECK (c1 > 0);
 ALTER TABLE fd_pt2 ATTACH PARTITION fd_pt2_1 FOR VALUES IN (1);
 -- TRUNCATE doesn't work on foreign tables, either directly or recursively
 TRUNCATE fd_pt2_1;  -- ERROR
-ERROR:  "fd_pt2_1" is not a table
+ERROR:  foreign-data wrapper "dummy" has no handler
 TRUNCATE fd_pt2;  -- ERROR
-ERROR:  "fd_pt2_1" is not a table
+ERROR:  foreign-data wrapper "dummy" has no handler
 DROP FOREIGN TABLE fd_pt2_1;
 DROP TABLE fd_pt2;
 -- foreign table cannot be part of partition tree made of temporary
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index 1d540fe489..c63bfaf4b2 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -702,6 +702,7 @@ ForeignScanState
 ForeignServer
 ForeignServerInfo
 ForeignTable
+ForeignTruncateInfo
 ForkNumber
 FormData_pg_aggregate
 FormData_pg_am
#2Zhihong Yu
zyu@yugabyte.com
In reply to: Kazutaka Onishi (#1)
Re: TRUNCATE on foreign table
Hi,
+               if (strcmp(defel->defname, "truncatable") == 0)
+                   server_truncatable = defGetBoolean(defel);

Looks like we can break out of the loop when the condition is met.

+ /* ExecForeignTruncate() is invoked for each server */

The method name in the comment is slightly different from the actual method
name.

+           if (strcmp(defel->defname, "truncatable") == 0)
+               truncatable = defGetBoolean(defel);

We can break out of the loop when the condition is met.

Cheers

On Sat, Feb 6, 2021 at 5:11 AM Kazutaka Onishi <onishi@heterodb.com> wrote:

Show quoted text

Hello,

The attached patch is for supporting "TRUNCATE" on foreign tables.

This patch includes:
* Adding "ExecForeignTruncate" function into FdwRoutine.
* Enabling "postgres_fdw" to use TRUNCATE.

This patch was proposed by Kaigai-san in March 2020,
but it was returned because it can't be applied to the latest source codes.

Please refer to the discussion.

/messages/by-id/CAOP8fzb-t3WVNLjGMC+4sV4AZa9S=MAQ7Q6pQoADMCf_1jp4ew@mail.gmail.com

I have fixed the patch due to submit it to Commit Fest 2021-03.

regards,

--
------------------
Kazutaka Onishi
(onishi@heterodb.com)

#3Kazutaka Onishi
onishi@heterodb.com
In reply to: Zhihong Yu (#2)
1 attachment(s)
Re: TRUNCATE on foreign table

Thank you for your comment! :D
I have fixed it and attached the revised patch.

regards,

2021年2月7日(日) 2:08 Zhihong Yu <zyu@yugabyte.com>:

Hi,
+               if (strcmp(defel->defname, "truncatable") == 0)
+                   server_truncatable = defGetBoolean(defel);

Looks like we can break out of the loop when the condition is met.

+ /* ExecForeignTruncate() is invoked for each server */

The method name in the comment is slightly different from the actual
method name.

+           if (strcmp(defel->defname, "truncatable") == 0)
+               truncatable = defGetBoolean(defel);

We can break out of the loop when the condition is met.

Cheers

On Sat, Feb 6, 2021 at 5:11 AM Kazutaka Onishi <onishi@heterodb.com>
wrote:

Hello,

The attached patch is for supporting "TRUNCATE" on foreign tables.

This patch includes:
* Adding "ExecForeignTruncate" function into FdwRoutine.
* Enabling "postgres_fdw" to use TRUNCATE.

This patch was proposed by Kaigai-san in March 2020,
but it was returned because it can't be applied to the latest source
codes.

Please refer to the discussion.

/messages/by-id/CAOP8fzb-t3WVNLjGMC+4sV4AZa9S=MAQ7Q6pQoADMCf_1jp4ew@mail.gmail.com

I have fixed the patch due to submit it to Commit Fest 2021-03.

regards,

--
------------------
Kazutaka Onishi
(onishi@heterodb.com)

--
------------------
Kazutaka Onishi
(onishi@heterodb.com)

--
------------------
Kazutaka Onishi
(onishi@heterodb.com)

Attachments:

pgsql14-truncate-on-foreign-table.v2.patchapplication/x-patch; name=pgsql14-truncate-on-foreign-table.v2.patchDownload
diff --git a/contrib/postgres_fdw/deparse.c b/contrib/postgres_fdw/deparse.c
index 6faf499f9a..a9ce323a67 100644
--- a/contrib/postgres_fdw/deparse.c
+++ b/contrib/postgres_fdw/deparse.c
@@ -2171,6 +2171,44 @@ deparseAnalyzeSql(StringInfo buf, Relation rel, List **retrieved_attrs)
 	deparseRelation(buf, rel);
 }
 
+/*
+ * Construct a simple "TRUNCATE rel" statement
+ */
+void
+deparseTruncateSql(StringInfo buf,
+				   List *frels_list,
+				   List *frels_extra,
+				   DropBehavior behavior,
+				   bool restart_seqs)
+{
+	ListCell   *lc1, *lc2;
+
+	appendStringInfoString(buf, "TRUNCATE ");
+	forboth (lc1, frels_list,
+			 lc2, frels_extra)
+	{
+		Relation	frel = lfirst(lc1);
+		int			extra = lfirst_int(lc2);
+
+		if (lc1 != list_head(frels_list))
+			appendStringInfoString(buf, ", ");
+		if (extra != 0)
+			appendStringInfoString(buf, "ONLY ");
+		deparseRelation(buf, frel);
+	}
+	appendStringInfo(buf, " %s IDENTITY",
+					 restart_seqs ? "RESTART" : "CONTINUE");
+	switch (behavior)
+	{
+		case DROP_RESTRICT:
+			appendStringInfoString(buf, " RESTRICT");
+			break;
+		case DROP_CASCADE:
+			appendStringInfoString(buf, " CASCADE");
+			break;
+	}
+}
+
 /*
  * Construct name to use for given column, and emit it into buf.
  * If it has a column_name FDW option, use that instead of attribute name.
diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out
index 60c7e115d6..e50704d101 100644
--- a/contrib/postgres_fdw/expected/postgres_fdw.out
+++ b/contrib/postgres_fdw/expected/postgres_fdw.out
@@ -8247,6 +8247,239 @@ select * from rem3;
 
 drop foreign table rem3;
 drop table loc3;
+-- ===================================================================
+-- test for TRUNCATE
+-- ===================================================================
+CREATE TABLE tru_rtable0 (id int primary key, x text);
+CREATE TABLE tru_rtable1 (id int primary key, y text);
+CREATE FOREIGN TABLE tru_ftable (id int, x text)
+       SERVER loopback OPTIONS (table_name 'tru_rtable0');
+INSERT INTO tru_rtable0 (SELECT x,md5(x::text) FROM generate_series(1,10) x);
+CREATE TABLE tru_ptable (id int, y text) PARTITION BY HASH(id);
+CREATE TABLE tru_ptable__p0 PARTITION OF tru_ptable
+                            FOR VALUES WITH (MODULUS 2, REMAINDER 0);
+CREATE FOREIGN TABLE tru_ftable__p1 PARTITION OF tru_ptable
+                                    FOR VALUES WITH (MODULUS 2, REMAINDER 1)
+       SERVER loopback OPTIONS (table_name 'tru_rtable1');
+INSERT INTO tru_ptable (SELECT x,md5(x::text) FROM generate_series(11,20) x);
+CREATE TABLE tru_pk_table(id int primary key, z text);
+CREATE TABLE tru_fk_table(fkey int references tru_pk_table(id));
+INSERT INTO tru_pk_table (SELECT x,md5((x+1)::text) FROM generate_series(1,10) x);
+INSERT INTO tru_fk_table (SELECT x % 10 + 1 FROM generate_series(5,25) x);
+CREATE FOREIGN TABLE tru_pk_ftable (id int, z text)
+       SERVER loopback OPTIONS (table_name 'tru_pk_table');
+CREATE TABLE tru_rtable_parent (id int, a text);
+CREATE TABLE tru_rtable_child (id int, a text);
+CREATE FOREIGN TABLE tru_ftable_parent (id int, a text)
+       SERVER loopback OPTIONS (table_name 'tru_rtable_parent');
+CREATE FOREIGN TABLE tru_ftable_child () INHERITS (tru_ftable_parent)
+       SERVER loopback OPTIONS (table_name 'tru_rtable_child');
+INSERT INTO tru_rtable_parent (SELECT x, md5(x::text) FROM generate_series(1,8) x);
+INSERT INTO tru_rtable_child  (SELECT x, md5(x::text) FROM generate_series(10, 18) x);
+-- normal truncate
+SELECT * FROM tru_ftable;
+ id |                x                 
+----+----------------------------------
+  1 | c4ca4238a0b923820dcc509a6f75849b
+  2 | c81e728d9d4c2f636f067f89cc14862c
+  3 | eccbc87e4b5ce2fe28308fd9f2a7baf3
+  4 | a87ff679a2f3e71d9181a67b7542122c
+  5 | e4da3b7fbbce2345d7772b0674a318d5
+  6 | 1679091c5a880faf6fb5e6087eb1b2dc
+  7 | 8f14e45fceea167a5a36dedd4bea2543
+  8 | c9f0f895fb98ab9159f51fd0297e236d
+  9 | 45c48cce2e2d7fbdea1afc51c7c6ad26
+ 10 | d3d9446802a44259755d38e6d163e820
+(10 rows)
+
+TRUNCATE tru_ftable;
+SELECT * FROM tru_rtable0;		-- empty
+ id | x 
+----+---
+(0 rows)
+
+SELECT * FROM tru_ftable;		-- empty
+ id | x 
+----+---
+(0 rows)
+
+-- 'truncatable' option
+ALTER SERVER loopback OPTIONS (ADD truncatable 'false');
+TRUNCATE tru_ftable;			-- error
+ERROR:  truncate on "tru_ftable" is prohibited
+ALTER SERVER loopback OPTIONS (DROP truncatable);
+ALTER FOREIGN TABLE tru_ftable OPTIONS (ADD truncatable 'false');
+TRUNCATE tru_ftable;			-- error
+ERROR:  truncate on "tru_ftable" is prohibited
+ALTER FOREIGN TABLE tru_ftable OPTIONS (SET truncatable 'true');
+TRUNCATE tru_ftable;			-- accepted
+-- partition table mixtured by table and foreign table
+SELECT * FROM tru_ptable;
+ id |                y                 
+----+----------------------------------
+ 12 | c20ad4d76fe97759aa27a0c99bff6710
+ 13 | c51ce410c124a10e0db5e4b97fc2af39
+ 14 | aab3238922bcc25a6f606eb525ffdc56
+ 16 | c74d97b01eae257e44aa9d5bade97baf
+ 17 | 70efdf2ec9b086079795c442636b55fb
+ 18 | 6f4922f45568161a8cdf4ad2299f6d23
+ 11 | 6512bd43d9caa6e02c990b0a82652dca
+ 15 | 9bf31c7ff062936a96d3c8bd1f8f2ff3
+ 19 | 1f0e3dad99908345f7439f8ffabdffc4
+ 20 | 98f13708210194c475687be6106a3b84
+(10 rows)
+
+TRUNCATE tru_ptable;
+SELECT * FROM tru_ptable;		-- empty
+ id | y 
+----+---
+(0 rows)
+
+SELECT * FROM tru_ptable__p0;	-- empty
+ id | y 
+----+---
+(0 rows)
+
+SELECT * FROM tru_ftable__p1;	-- empty
+ id | y 
+----+---
+(0 rows)
+
+SELECT * FROM tru_rtable1;		-- empty
+ id | y 
+----+---
+(0 rows)
+
+-- 'CASCADE' option
+SELECT * FROM tru_pk_ftable;
+ id |                z                 
+----+----------------------------------
+  1 | c81e728d9d4c2f636f067f89cc14862c
+  2 | eccbc87e4b5ce2fe28308fd9f2a7baf3
+  3 | a87ff679a2f3e71d9181a67b7542122c
+  4 | e4da3b7fbbce2345d7772b0674a318d5
+  5 | 1679091c5a880faf6fb5e6087eb1b2dc
+  6 | 8f14e45fceea167a5a36dedd4bea2543
+  7 | c9f0f895fb98ab9159f51fd0297e236d
+  8 | 45c48cce2e2d7fbdea1afc51c7c6ad26
+  9 | d3d9446802a44259755d38e6d163e820
+ 10 | 6512bd43d9caa6e02c990b0a82652dca
+(10 rows)
+
+TRUNCATE tru_pk_ftable;	-- failed by FK reference
+ERROR:  cannot truncate a table referenced in a foreign key constraint
+DETAIL:  Table "tru_fk_table" references "tru_pk_table".
+HINT:  Truncate table "tru_fk_table" at the same time, or use TRUNCATE ... CASCADE.
+CONTEXT:  remote SQL command: TRUNCATE public.tru_pk_table CONTINUE IDENTITY RESTRICT
+TRUNCATE tru_pk_ftable CASCADE;
+SELECT * FROM tru_pk_ftable;
+ id | z 
+----+---
+(0 rows)
+
+SELECT * FROM tru_fk_table;		-- also truncated
+ fkey 
+------
+(0 rows)
+
+-- truncate two tables at a command
+INSERT INTO tru_ftable (SELECT x,md5((x+2)::text) FROM generate_series(1,8) x);
+INSERT INTO tru_pk_ftable (SELECT x,md5((x+3)::text) FROM generate_series(3,10) x);
+SELECT * FROM tru_ftable a FULL OUTER JOIN tru_pk_ftable b ON a.id = b.id;
+ id |                x                 | id |                z                 
+----+----------------------------------+----+----------------------------------
+  1 | eccbc87e4b5ce2fe28308fd9f2a7baf3 |    | 
+  2 | a87ff679a2f3e71d9181a67b7542122c |    | 
+  3 | e4da3b7fbbce2345d7772b0674a318d5 |  3 | 1679091c5a880faf6fb5e6087eb1b2dc
+  4 | 1679091c5a880faf6fb5e6087eb1b2dc |  4 | 8f14e45fceea167a5a36dedd4bea2543
+  5 | 8f14e45fceea167a5a36dedd4bea2543 |  5 | c9f0f895fb98ab9159f51fd0297e236d
+  6 | c9f0f895fb98ab9159f51fd0297e236d |  6 | 45c48cce2e2d7fbdea1afc51c7c6ad26
+  7 | 45c48cce2e2d7fbdea1afc51c7c6ad26 |  7 | d3d9446802a44259755d38e6d163e820
+  8 | d3d9446802a44259755d38e6d163e820 |  8 | 6512bd43d9caa6e02c990b0a82652dca
+    |                                  |  9 | c20ad4d76fe97759aa27a0c99bff6710
+    |                                  | 10 | c51ce410c124a10e0db5e4b97fc2af39
+(10 rows)
+
+TRUNCATE tru_ftable, tru_pk_ftable CASCADE;
+SELECT * FROM tru_ftable a FULL OUTER JOIN tru_pk_ftable b ON a.id = b.id;
+ id | x | id | z 
+----+---+----+---
+(0 rows)
+
+-- truncate with ONLY clause
+TRUNCATE ONLY tru_ftable_parent;
+SELECT * FROM tru_ftable_parent;
+ id |                a                 
+----+----------------------------------
+ 10 | d3d9446802a44259755d38e6d163e820
+ 11 | 6512bd43d9caa6e02c990b0a82652dca
+ 12 | c20ad4d76fe97759aa27a0c99bff6710
+ 13 | c51ce410c124a10e0db5e4b97fc2af39
+ 14 | aab3238922bcc25a6f606eb525ffdc56
+ 15 | 9bf31c7ff062936a96d3c8bd1f8f2ff3
+ 16 | c74d97b01eae257e44aa9d5bade97baf
+ 17 | 70efdf2ec9b086079795c442636b55fb
+ 18 | 6f4922f45568161a8cdf4ad2299f6d23
+(9 rows)
+
+TRUNCATE tru_ftable_parent;
+SELECT * FROM tru_ftable_parent;
+ id | a 
+----+---
+(0 rows)
+
+-- in case when remote table has inherited children
+CREATE TABLE tru_rtable0_child () INHERITS (tru_rtable0);
+INSERT INTO tru_rtable0 (SELECT x,md5(x::text) FROM generate_series(5,9) x);
+INSERT INTO tru_rtable0_child (SELECT x,md5(x::text) FROM generate_series(10,14) x);
+SELECT * FROM tru_ftable;
+ id |                x                 
+----+----------------------------------
+  5 | e4da3b7fbbce2345d7772b0674a318d5
+  6 | 1679091c5a880faf6fb5e6087eb1b2dc
+  7 | 8f14e45fceea167a5a36dedd4bea2543
+  8 | c9f0f895fb98ab9159f51fd0297e236d
+  9 | 45c48cce2e2d7fbdea1afc51c7c6ad26
+ 10 | d3d9446802a44259755d38e6d163e820
+ 11 | 6512bd43d9caa6e02c990b0a82652dca
+ 12 | c20ad4d76fe97759aa27a0c99bff6710
+ 13 | c51ce410c124a10e0db5e4b97fc2af39
+ 14 | aab3238922bcc25a6f606eb525ffdc56
+(10 rows)
+
+TRUNCATE ONLY tru_ftable;		-- truncate only parent portion
+SELECT * FROM tru_ftable;
+ id |                x                 
+----+----------------------------------
+ 10 | d3d9446802a44259755d38e6d163e820
+ 11 | 6512bd43d9caa6e02c990b0a82652dca
+ 12 | c20ad4d76fe97759aa27a0c99bff6710
+ 13 | c51ce410c124a10e0db5e4b97fc2af39
+ 14 | aab3238922bcc25a6f606eb525ffdc56
+(5 rows)
+
+INSERT INTO tru_rtable0 (SELECT x,md5(x::text) FROM generate_series(21,25) x);
+SELECT * FROM tru_ftable;
+ id |                x                 
+----+----------------------------------
+ 21 | 3c59dc048e8850243be8079a5c74d079
+ 22 | b6d767d2f8ed5d21a44b0e5886680cb9
+ 23 | 37693cfc748049e45d87b8c7d8b9aacd
+ 24 | 1ff1de774005f8da13f42943881c655f
+ 25 | 8e296a067a37563370ded05f5a3bf3ec
+ 10 | d3d9446802a44259755d38e6d163e820
+ 11 | 6512bd43d9caa6e02c990b0a82652dca
+ 12 | c20ad4d76fe97759aa27a0c99bff6710
+ 13 | c51ce410c124a10e0db5e4b97fc2af39
+ 14 | aab3238922bcc25a6f606eb525ffdc56
+(10 rows)
+
+TRUNCATE tru_ftable;			-- truncate both of parent and child
+SELECT * FROM tru_ftable;
+ id | x 
+----+---
+(0 rows)
+
 -- ===================================================================
 -- test IMPORT FOREIGN SCHEMA
 -- ===================================================================
@@ -8946,7 +9179,7 @@ DO $d$
     END;
 $d$;
 ERROR:  invalid option "password"
-HINT:  Valid options in this context are: service, passfile, channel_binding, connect_timeout, dbname, host, hostaddr, port, options, application_name, keepalives, keepalives_idle, keepalives_interval, keepalives_count, tcp_user_timeout, sslmode, sslcompression, sslcert, sslkey, sslrootcert, sslcrl, requirepeer, ssl_min_protocol_version, ssl_max_protocol_version, gssencmode, krbsrvname, gsslib, target_session_attrs, use_remote_estimate, fdw_startup_cost, fdw_tuple_cost, extensions, updatable, fetch_size, batch_size
+HINT:  Valid options in this context are: service, passfile, channel_binding, connect_timeout, dbname, host, hostaddr, port, options, application_name, keepalives, keepalives_idle, keepalives_interval, keepalives_count, tcp_user_timeout, sslmode, sslcompression, sslcert, sslkey, sslrootcert, sslcrl, requirepeer, ssl_min_protocol_version, ssl_max_protocol_version, gssencmode, krbsrvname, gsslib, target_session_attrs, use_remote_estimate, fdw_startup_cost, fdw_tuple_cost, extensions, updatable, truncatable, fetch_size, batch_size
 CONTEXT:  SQL statement "ALTER SERVER loopback_nopw OPTIONS (ADD password 'dummypw')"
 PL/pgSQL function inline_code_block line 3 at EXECUTE
 -- If we add a password for our user mapping instead, we should get a different
diff --git a/contrib/postgres_fdw/option.c b/contrib/postgres_fdw/option.c
index 64698c4da3..668178524d 100644
--- a/contrib/postgres_fdw/option.c
+++ b/contrib/postgres_fdw/option.c
@@ -107,7 +107,8 @@ postgres_fdw_validator(PG_FUNCTION_ARGS)
 		 * Validate option value, when we can do so without any context.
 		 */
 		if (strcmp(def->defname, "use_remote_estimate") == 0 ||
-			strcmp(def->defname, "updatable") == 0)
+			strcmp(def->defname, "updatable") == 0 ||
+			strcmp(def->defname, "truncatable") == 0)
 		{
 			/* these accept only boolean values */
 			(void) defGetBoolean(def);
@@ -211,6 +212,9 @@ InitPgFdwOptions(void)
 		/* updatable is available on both server and table */
 		{"updatable", ForeignServerRelationId, false},
 		{"updatable", ForeignTableRelationId, false},
+		/* truncatable is available on both server and table */
+		{"truncatable", ForeignServerRelationId, false},
+		{"truncatable", ForeignTableRelationId, false},
 		/* fetch_size is available on both server and table */
 		{"fetch_size", ForeignServerRelationId, false},
 		{"fetch_size", ForeignTableRelationId, false},
diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c
index 0e977066a8..f2f77e6257 100644
--- a/contrib/postgres_fdw/postgres_fdw.c
+++ b/contrib/postgres_fdw/postgres_fdw.c
@@ -390,6 +390,10 @@ static void postgresExplainForeignModify(ModifyTableState *mtstate,
 										 ExplainState *es);
 static void postgresExplainDirectModify(ForeignScanState *node,
 										ExplainState *es);
+static void postgresExecForeignTruncate(List *frels_list,
+										List *frels_extra,
+										DropBehavior behavior,
+										bool restart_seqs);
 static bool postgresAnalyzeForeignTable(Relation relation,
 										AcquireSampleRowsFunc *func,
 										BlockNumber *totalpages);
@@ -571,6 +575,9 @@ postgres_fdw_handler(PG_FUNCTION_ARGS)
 	routine->ExplainForeignModify = postgresExplainForeignModify;
 	routine->ExplainDirectModify = postgresExplainDirectModify;
 
+	/* Support function for TRUNCATE */
+	routine->ExecForeignTruncate = postgresExecForeignTruncate;
+
 	/* Support functions for ANALYZE */
 	routine->AnalyzeForeignTable = postgresAnalyzeForeignTable;
 
@@ -2772,6 +2779,90 @@ postgresExplainDirectModify(ForeignScanState *node, ExplainState *es)
 	}
 }
 
+/*
+ * postgresExecForeignTruncate
+ *		It propagates TRUNCATE command to the remote host inside of the
+ *		transaction block.
+ */
+static void
+postgresExecForeignTruncate(List *frels_list,
+							List *frels_extra,
+							DropBehavior behavior,
+							bool restart_seqs)
+{
+	Oid				server_id = InvalidOid;
+	ForeignServer  *serv = NULL;
+	UserMapping	   *user = NULL;
+	PGconn		   *conn = NULL;
+	PGresult	   *res;
+	StringInfoData	sql;
+	ListCell	   *lc;
+	bool			server_truncatable = true;
+
+	/* pick up remote connection, and sanity checks */
+	foreach (lc, frels_list)
+	{
+		Relation	frel = lfirst(lc);
+		Oid			frel_oid = RelationGetRelid(frel);
+		ForeignTable *ft = GetForeignTable(frel_oid);
+		ListCell   *cell;
+		bool		truncatable;
+
+		if (!OidIsValid(server_id))
+		{
+			server_id = GetForeignServerIdByRelId(frel_oid);
+			serv = GetForeignServer(server_id);
+			user = GetUserMapping(GetUserId(), server_id);
+			conn = GetConnection(user, false);
+			
+			foreach (cell, serv->options)
+			{
+				DefElem	   *defel = (DefElem *) lfirst(cell);
+
+				if (strcmp(defel->defname, "truncatable") == 0)
+				{
+					server_truncatable = defGetBoolean(defel);
+					break;
+				}
+			}
+		}
+		else
+		{
+			/* postgresExecForeignTruncate() is invoked for each server */
+			Assert(server_id == GetForeignServerIdByRelId(frel_oid));
+		}
+
+		/* ensure the target foreign table is truncatable */
+		truncatable = server_truncatable;
+		foreach (cell, ft->options)
+		{
+			DefElem	   *defel = (DefElem *) lfirst(cell);
+
+			if (strcmp(defel->defname, "truncatable") == 0)
+			{
+				truncatable = defGetBoolean(defel);
+				break;
+			}
+		}
+		if (!truncatable)
+			ereport(ERROR,
+					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+					 errmsg("truncate on \"%s\" is prohibited",
+							RelationGetRelationName(frel))));
+	}
+	/* set up remote query */
+	initStringInfo(&sql);
+	deparseTruncateSql(&sql, frels_list, frels_extra, behavior, restart_seqs);
+	/* run remote query */
+	if (!PQsendQuery(conn, sql.data))
+		pgfdw_report_error(ERROR, NULL, conn, false, sql.data);
+	res = pgfdw_get_result(conn, sql.data);
+	if (PQresultStatus(res) != PGRES_COMMAND_OK)
+		pgfdw_report_error(ERROR, res, conn, true, sql.data);
+	/* clean-up */
+	PQclear(res);
+	pfree(sql.data);
+}
 
 /*
  * estimate_path_cost_size
diff --git a/contrib/postgres_fdw/postgres_fdw.h b/contrib/postgres_fdw/postgres_fdw.h
index 1f67b4d9fd..c89af59973 100644
--- a/contrib/postgres_fdw/postgres_fdw.h
+++ b/contrib/postgres_fdw/postgres_fdw.h
@@ -193,6 +193,11 @@ extern void deparseDirectDeleteSql(StringInfo buf, PlannerInfo *root,
 extern void deparseAnalyzeSizeSql(StringInfo buf, Relation rel);
 extern void deparseAnalyzeSql(StringInfo buf, Relation rel,
 							  List **retrieved_attrs);
+extern void deparseTruncateSql(StringInfo buf,
+							   List *frels_list,
+							   List *frels_extra,
+							   DropBehavior behavior,
+							   bool restart_seqs);
 extern void deparseStringLiteral(StringInfo buf, const char *val);
 extern Expr *find_em_expr_for_rel(EquivalenceClass *ec, RelOptInfo *rel);
 extern Expr *find_em_expr_for_input_target(PlannerInfo *root,
diff --git a/contrib/postgres_fdw/sql/postgres_fdw.sql b/contrib/postgres_fdw/sql/postgres_fdw.sql
index 151f4f1834..8b028f67c9 100644
--- a/contrib/postgres_fdw/sql/postgres_fdw.sql
+++ b/contrib/postgres_fdw/sql/postgres_fdw.sql
@@ -2349,6 +2349,96 @@ select * from rem3;
 drop foreign table rem3;
 drop table loc3;
 
+-- ===================================================================
+-- test for TRUNCATE
+-- ===================================================================
+CREATE TABLE tru_rtable0 (id int primary key, x text);
+CREATE TABLE tru_rtable1 (id int primary key, y text);
+CREATE FOREIGN TABLE tru_ftable (id int, x text)
+       SERVER loopback OPTIONS (table_name 'tru_rtable0');
+INSERT INTO tru_rtable0 (SELECT x,md5(x::text) FROM generate_series(1,10) x);
+
+CREATE TABLE tru_ptable (id int, y text) PARTITION BY HASH(id);
+CREATE TABLE tru_ptable__p0 PARTITION OF tru_ptable
+                            FOR VALUES WITH (MODULUS 2, REMAINDER 0);
+CREATE FOREIGN TABLE tru_ftable__p1 PARTITION OF tru_ptable
+                                    FOR VALUES WITH (MODULUS 2, REMAINDER 1)
+       SERVER loopback OPTIONS (table_name 'tru_rtable1');
+INSERT INTO tru_ptable (SELECT x,md5(x::text) FROM generate_series(11,20) x);
+
+CREATE TABLE tru_pk_table(id int primary key, z text);
+CREATE TABLE tru_fk_table(fkey int references tru_pk_table(id));
+INSERT INTO tru_pk_table (SELECT x,md5((x+1)::text) FROM generate_series(1,10) x);
+INSERT INTO tru_fk_table (SELECT x % 10 + 1 FROM generate_series(5,25) x);
+CREATE FOREIGN TABLE tru_pk_ftable (id int, z text)
+       SERVER loopback OPTIONS (table_name 'tru_pk_table');
+
+CREATE TABLE tru_rtable_parent (id int, a text);
+CREATE TABLE tru_rtable_child (id int, a text);
+CREATE FOREIGN TABLE tru_ftable_parent (id int, a text)
+       SERVER loopback OPTIONS (table_name 'tru_rtable_parent');
+CREATE FOREIGN TABLE tru_ftable_child () INHERITS (tru_ftable_parent)
+       SERVER loopback OPTIONS (table_name 'tru_rtable_child');
+INSERT INTO tru_rtable_parent (SELECT x, md5(x::text) FROM generate_series(1,8) x);
+INSERT INTO tru_rtable_child  (SELECT x, md5(x::text) FROM generate_series(10, 18) x);
+
+-- normal truncate
+SELECT * FROM tru_ftable;
+TRUNCATE tru_ftable;
+SELECT * FROM tru_rtable0;		-- empty
+SELECT * FROM tru_ftable;		-- empty
+
+-- 'truncatable' option
+ALTER SERVER loopback OPTIONS (ADD truncatable 'false');
+TRUNCATE tru_ftable;			-- error
+ALTER SERVER loopback OPTIONS (DROP truncatable);
+ALTER FOREIGN TABLE tru_ftable OPTIONS (ADD truncatable 'false');
+TRUNCATE tru_ftable;			-- error
+ALTER FOREIGN TABLE tru_ftable OPTIONS (SET truncatable 'true');
+TRUNCATE tru_ftable;			-- accepted
+
+-- partition table mixtured by table and foreign table
+SELECT * FROM tru_ptable;
+TRUNCATE tru_ptable;
+SELECT * FROM tru_ptable;		-- empty
+SELECT * FROM tru_ptable__p0;	-- empty
+SELECT * FROM tru_ftable__p1;	-- empty
+SELECT * FROM tru_rtable1;		-- empty
+
+-- 'CASCADE' option
+SELECT * FROM tru_pk_ftable;
+TRUNCATE tru_pk_ftable;	-- failed by FK reference
+TRUNCATE tru_pk_ftable CASCADE;
+SELECT * FROM tru_pk_ftable;
+SELECT * FROM tru_fk_table;		-- also truncated
+
+-- truncate two tables at a command
+INSERT INTO tru_ftable (SELECT x,md5((x+2)::text) FROM generate_series(1,8) x);
+INSERT INTO tru_pk_ftable (SELECT x,md5((x+3)::text) FROM generate_series(3,10) x);
+SELECT * FROM tru_ftable a FULL OUTER JOIN tru_pk_ftable b ON a.id = b.id;
+TRUNCATE tru_ftable, tru_pk_ftable CASCADE;
+SELECT * FROM tru_ftable a FULL OUTER JOIN tru_pk_ftable b ON a.id = b.id;
+
+-- truncate with ONLY clause
+TRUNCATE ONLY tru_ftable_parent;
+SELECT * FROM tru_ftable_parent;
+TRUNCATE tru_ftable_parent;
+SELECT * FROM tru_ftable_parent;
+
+-- in case when remote table has inherited children
+CREATE TABLE tru_rtable0_child () INHERITS (tru_rtable0);
+INSERT INTO tru_rtable0 (SELECT x,md5(x::text) FROM generate_series(5,9) x);
+INSERT INTO tru_rtable0_child (SELECT x,md5(x::text) FROM generate_series(10,14) x);
+SELECT * FROM tru_ftable;
+
+TRUNCATE ONLY tru_ftable;		-- truncate only parent portion
+SELECT * FROM tru_ftable;
+
+INSERT INTO tru_rtable0 (SELECT x,md5(x::text) FROM generate_series(21,25) x);
+SELECT * FROM tru_ftable;
+TRUNCATE tru_ftable;			-- truncate both of parent and child
+SELECT * FROM tru_ftable;
+
 -- ===================================================================
 -- test IMPORT FOREIGN SCHEMA
 -- ===================================================================
diff --git a/doc/src/sgml/fdwhandler.sgml b/doc/src/sgml/fdwhandler.sgml
index 854913ae5f..1ce103fee7 100644
--- a/doc/src/sgml/fdwhandler.sgml
+++ b/doc/src/sgml/fdwhandler.sgml
@@ -1057,6 +1057,46 @@ EndDirectModify(ForeignScanState *node);
 
    </sect2>
 
+   <sect2 id="fdw-callbacks-truncate">
+    <title>FDW Routines for Truncate</title>
+<programlisting>
+void
+ExecForeignTruncate(List *frels_list, List *frels_extra,
+                    DropBehavior behavior, bool restart_seqs);
+</programlisting>
+    <para>
+     Truncate a set of foreign tables defined by
+     <literal>frels_list</literal> belonging to the same foreign server.
+     This optional function is called during execution of
+     <command>TRUNCATE</command> for each foreign server being involved
+     in one <command>TRUNCATE</command> command (note that invocations
+     are not per foreign table).
+
+     <literal>frels_extra</literal> is same length with
+     <literal>frels_list</literal>, that delivers extra information of
+     the context where the foreign-tables are truncated.
+    </para>
+
+    <para>
+     If the <function>ExecForeignTruncate</function> pointer is set to
+     <literal>NULL</literal>, attempts to truncate the foreign table will
+     fail with an error message.
+    </para>
+    
+    <para>
+     <literal>behavior</literal> defines how foreign tables should
+     be truncated, using as possible values <literal>DROP_RESTRICT</literal>
+     and <literal>DROP_CASCADE</literal> (to map with the equivalents of
+     <command>TRUNCATE</command>).
+    </para>
+    
+    <para>
+     <literal>restart_seqs</literal> is set to <literal>true</literal>
+     if <literal>RESTART IDENTITY</literal> was supplied in the
+     <command>TRUNCATE</command>.
+    </para>
+   </sect2>
+
    <sect2 id="fdw-callbacks-row-locking">
     <title>FDW Routines for Row Locking</title>
 
diff --git a/doc/src/sgml/postgres-fdw.sgml b/doc/src/sgml/postgres-fdw.sgml
index 8d6abd4c54..af360a6db7 100644
--- a/doc/src/sgml/postgres-fdw.sgml
+++ b/doc/src/sgml/postgres-fdw.sgml
@@ -63,9 +63,10 @@
  <para>
   Now you need only <command>SELECT</command> from a foreign table to access
   the data stored in its underlying remote table.  You can also modify
-  the remote table using <command>INSERT</command>, <command>UPDATE</command>, or
-  <command>DELETE</command>.  (Of course, the remote user you have specified
-  in your user mapping must have privileges to do these things.)
+  the remote table using <command>INSERT</command>, <command>UPDATE</command>,
+  <command>DELETE</command>, or <command>TRUNCATE</command>.
+  (Of course, the remote user you have specified in your user mapping must
+  have privileges to do these things.)
  </para>
 
  <para>
@@ -405,6 +406,29 @@ OPTIONS (ADD password_required 'false');
      </listitem>
     </varlistentry>
 
+    <varlistentry>
+     <term><literal>truncatable</literal></term>
+     <listitem>
+      <para>
+       This option controls whether <filename>postgres_fdw</filename> allows
+       foreign tables to be truncated using <command>TRUNCATE</command>
+       command. It can be specified for a foreign table or a foreign server.
+       A table-level option overrides a server-level option.
+       The default is <literal>true</literal>.
+      </para>
+      <para>
+       Pay attention for the case when a foreign table maps remote table
+       that has inherited children or partition leafs.
+       <command>TRUNCATE</command> specifies the foreign tables with
+       <literal>ONLY</literal> clause, remove queries over the
+       <filename>postgres_fdw</filename> also specify remote tables with
+       <literal>ONLY</literal> clause, that will truncate only parent
+       portion of the remote table. In the results, it looks like
+       <command>TRUNCATE</command> command partially eliminated contents
+       of the foreign tables.
+      </para>
+     </listitem>
+    </varlistentry>
    </variablelist>
   </sect3>
 
diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c
index 420991e315..ac76dd93ba 100644
--- a/src/backend/commands/tablecmds.c
+++ b/src/backend/commands/tablecmds.c
@@ -57,6 +57,7 @@
 #include "commands/typecmds.h"
 #include "commands/user.h"
 #include "executor/executor.h"
+#include "foreign/fdwapi.h"
 #include "foreign/foreign.h"
 #include "miscadmin.h"
 #include "nodes/makefuncs.h"
@@ -296,6 +297,21 @@ struct DropRelationCallbackState
 #define		ATT_FOREIGN_TABLE		0x0020
 #define		ATT_PARTITIONED_INDEX	0x0040
 
+/*
+ * ForeignTruncateInfo
+ *
+ * Information related to truncation of foreign tables.  This is used for
+ * the elements in a hash table that uses the server OID as lookup key,
+ * and includes a per-server list of all foreign tables involved in the
+ * truncation.
+ */
+typedef struct
+{
+	Oid			server_oid;
+	List	   *frels_list;
+	List	   *frels_extra;
+} ForeignTruncateInfo;
+
 /*
  * Partition tables are expected to be dropped when the parent partitioned
  * table gets dropped. Hence for partitioning we use AUTO dependency.
@@ -1561,6 +1577,7 @@ ExecuteTruncate(TruncateStmt *stmt)
 {
 	List	   *rels = NIL;
 	List	   *relids = NIL;
+	List	   *relids_extra = NIL;
 	List	   *relids_logged = NIL;
 	ListCell   *cell;
 
@@ -1597,6 +1614,7 @@ ExecuteTruncate(TruncateStmt *stmt)
 
 		rels = lappend(rels, rel);
 		relids = lappend_oid(relids, myrelid);
+		relids_extra = lappend_int(relids_extra, (recurse ? 0 : 1));
 		/* Log this relation only if needed for logical decoding */
 		if (RelationIsLogicallyLogged(rel))
 			relids_logged = lappend_oid(relids_logged, myrelid);
@@ -1644,6 +1662,7 @@ ExecuteTruncate(TruncateStmt *stmt)
 
 				rels = lappend(rels, rel);
 				relids = lappend_oid(relids, childrelid);
+				relids_extra = lappend_int(relids_extra, -1);
 				/* Log this relation only if needed for logical decoding */
 				if (RelationIsLogicallyLogged(rel))
 					relids_logged = lappend_oid(relids_logged, childrelid);
@@ -1656,7 +1675,7 @@ ExecuteTruncate(TruncateStmt *stmt)
 					 errhint("Do not specify the ONLY keyword, or use TRUNCATE ONLY on the partitions directly.")));
 	}
 
-	ExecuteTruncateGuts(rels, relids, relids_logged,
+	ExecuteTruncateGuts(rels, relids, relids_extra, relids_logged,
 						stmt->behavior, stmt->restart_seqs);
 
 	/* And close the rels */
@@ -1682,16 +1701,21 @@ ExecuteTruncate(TruncateStmt *stmt)
  * this information handy in this form.
  */
 void
-ExecuteTruncateGuts(List *explicit_rels, List *relids, List *relids_logged,
+ExecuteTruncateGuts(List *explicit_rels,
+					List *relids,
+					List *relids_extra,
+					List *relids_logged,
 					DropBehavior behavior, bool restart_seqs)
 {
 	List	   *rels;
 	List	   *seq_relids = NIL;
+	HTAB	   *ft_htab = NULL;
 	EState	   *estate;
 	ResultRelInfo *resultRelInfos;
 	ResultRelInfo *resultRelInfo;
 	SubTransactionId mySubid;
 	ListCell   *cell;
+	ListCell   *lc1, *lc2;
 	Oid		   *logrelids;
 
 	/*
@@ -1729,6 +1753,7 @@ ExecuteTruncateGuts(List *explicit_rels, List *relids, List *relids_logged,
 				truncate_check_activity(rel);
 				rels = lappend(rels, rel);
 				relids = lappend_oid(relids, relid);
+				relids_extra = lappend_int(relids_extra, -1);
 				/* Log this relation only if needed for logical decoding */
 				if (RelationIsLogicallyLogged(rel))
 					relids_logged = lappend_oid(relids_logged, relid);
@@ -1829,14 +1854,68 @@ ExecuteTruncateGuts(List *explicit_rels, List *relids, List *relids_logged,
 	 */
 	mySubid = GetCurrentSubTransactionId();
 
-	foreach(cell, rels)
+	Assert(list_length(rels) == list_length(relids_extra));
+	forboth (lc1, rels,
+			 lc2, relids_extra)
 	{
-		Relation	rel = (Relation) lfirst(cell);
+		Relation	rel = (Relation) lfirst(lc1);
+		int			extra = lfirst_int(lc2);
 
 		/* Skip partitioned tables as there is nothing to do */
 		if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
 			continue;
 
+		/*
+		 * If truncating a foreign table, the foreign data wrapper callback
+		 * for TRUNCATE is called once for each server with a list of all the
+		 * relations to process linked to this server.  The list of relations
+		 * for each server is saved as a single entry in a hash table that
+		 * uses the server OID as lookup key.  Once the full set of lists is
+		 * built, all the entries of the hash table are scanned, and the list
+		 * of relations associated to the server is passed down to the
+		 * TRUNCATE callback of its foreign data wrapper.
+		 */
+		if (rel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
+		{
+			Oid			frel_oid = RelationGetRelid(rel);
+			Oid			server_oid = GetForeignServerIdByRelId(frel_oid);
+			bool		found;
+			ForeignTruncateInfo *ft_info;
+
+			/* if the hash table does not exist yet, initialize it */
+			if (!ft_htab)
+			{
+				HASHCTL		hctl;
+
+				memset(&hctl, 0, sizeof(HASHCTL));
+				hctl.keysize = sizeof(Oid);
+				hctl.entrysize = sizeof(ForeignTruncateInfo);
+				hctl.hcxt = CurrentMemoryContext;
+
+				ft_htab = hash_create("TRUNCATE for Foreign Tables",
+									  32,	/* start small and extend */
+									  &hctl,
+									  HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
+			}
+
+			/*
+			 * Look after the entry of the server in the hash table, and
+			 * initialize it if the entry does not exist yet.
+			 */
+			ft_info = hash_search(ft_htab, &server_oid, HASH_ENTER, &found);
+			if (!found)
+			{
+				ft_info->server_oid = server_oid;
+				ft_info->frels_list = NIL;
+				ft_info->frels_extra = NIL;
+			}
+
+			/* save the relation in the list */
+			ft_info->frels_list = lappend(ft_info->frels_list, rel);
+			ft_info->frels_extra = lappend_int(ft_info->frels_extra, extra);
+			continue;
+		}
+
 		/*
 		 * Normally, we need a transaction-safe truncation here.  However, if
 		 * the table was either created in the current (sub)transaction or has
@@ -1899,6 +1978,31 @@ ExecuteTruncateGuts(List *explicit_rels, List *relids, List *relids_logged,
 		pgstat_count_truncate(rel);
 	}
 
+	/*
+	 * Now go through the hash table, and process each entry associated to the
+	 * servers involved in the TRUNCATE.
+	 */
+	if (ft_htab)
+	{
+		ForeignTruncateInfo *ft_info;
+		HASH_SEQ_STATUS seq;
+
+		hash_seq_init(&seq, ft_htab);
+
+		while ((ft_info = hash_seq_search(&seq)) != NULL)
+		{
+			FdwRoutine *routine = GetFdwRoutineByServerId(ft_info->server_oid);
+
+			/* truncate_check_rel() has checked that already */
+			Assert(routine->ExecForeignTruncate != NULL);
+
+			routine->ExecForeignTruncate(ft_info->frels_list,
+										 ft_info->frels_extra,
+										 behavior,
+										 restart_seqs);
+		}
+	}
+
 	/*
 	 * Restart owned sequences if we were asked to.
 	 */
@@ -1984,12 +2088,24 @@ truncate_check_rel(Oid relid, Form_pg_class reltuple)
 	char	   *relname = NameStr(reltuple->relname);
 
 	/*
-	 * Only allow truncate on regular tables and partitioned tables (although,
-	 * the latter are only being included here for the following checks; no
-	 * physical truncation will occur in their case.)
+	 * Only allow truncate on regular tables, foreign tables using foreign
+	 * data wrappers supporting TRUNCATE and partitioned tables (although, the
+	 * latter are only being included here for the following checks; no
+	 * physical truncation will occur in their case.).
 	 */
-	if (reltuple->relkind != RELKIND_RELATION &&
-		reltuple->relkind != RELKIND_PARTITIONED_TABLE)
+	if (reltuple->relkind == RELKIND_FOREIGN_TABLE)
+	{
+		Oid			server_id = GetForeignServerIdByRelId(relid);
+		FdwRoutine *fdwroutine = GetFdwRoutineByServerId(server_id);
+
+		if (!fdwroutine->ExecForeignTruncate)
+			ereport(ERROR,
+					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+					 errmsg("cannot truncate foreign table \"%s\"",
+							relname)));
+	}
+	else if (reltuple->relkind != RELKIND_RELATION &&
+			 reltuple->relkind != RELKIND_PARTITIONED_TABLE)
 		ereport(ERROR,
 				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
 				 errmsg("\"%s\" is not a table", relname)));
diff --git a/src/backend/replication/logical/worker.c b/src/backend/replication/logical/worker.c
index eb7db89cef..3c80f1f890 100644
--- a/src/backend/replication/logical/worker.c
+++ b/src/backend/replication/logical/worker.c
@@ -1803,6 +1803,7 @@ apply_handle_truncate(StringInfo s)
 	List	   *rels = NIL;
 	List	   *part_rels = NIL;
 	List	   *relids = NIL;
+	List	   *relids_extra = NIL;
 	List	   *relids_logged = NIL;
 	ListCell   *lc;
 
@@ -1832,6 +1833,7 @@ apply_handle_truncate(StringInfo s)
 		remote_rels = lappend(remote_rels, rel);
 		rels = lappend(rels, rel->localrel);
 		relids = lappend_oid(relids, rel->localreloid);
+		relids_extra = lappend_int(relids_extra, 0);
 		if (RelationIsLogicallyLogged(rel->localrel))
 			relids_logged = lappend_oid(relids_logged, rel->localreloid);
 
@@ -1882,8 +1884,12 @@ apply_handle_truncate(StringInfo s)
 	 * to replaying changes without further cascading. This might be later
 	 * changeable with a user specified option.
 	 */
-	ExecuteTruncateGuts(rels, relids, relids_logged, DROP_RESTRICT, restart_seqs);
-
+	ExecuteTruncateGuts(rels,
+						relids,
+						relids_extra,
+						relids_logged,
+						DROP_RESTRICT,
+						restart_seqs);
 	foreach(lc, remote_rels)
 	{
 		LogicalRepRelMapEntry *rel = lfirst(lc);
diff --git a/src/include/commands/tablecmds.h b/src/include/commands/tablecmds.h
index b3d30acc35..2e30a09729 100644
--- a/src/include/commands/tablecmds.h
+++ b/src/include/commands/tablecmds.h
@@ -56,8 +56,12 @@ extern void AlterRelationNamespaceInternal(Relation classRel, Oid relOid,
 extern void CheckTableNotInUse(Relation rel, const char *stmt);
 
 extern void ExecuteTruncate(TruncateStmt *stmt);
-extern void ExecuteTruncateGuts(List *explicit_rels, List *relids, List *relids_logged,
-								DropBehavior behavior, bool restart_seqs);
+extern void ExecuteTruncateGuts(List *explicit_rels,
+								List *relids,
+								List *relids_extra,
+								List *relids_logged,
+								DropBehavior behavior,
+								bool restart_seqs);
 
 extern void SetRelationHasSubclass(Oid relationId, bool relhassubclass);
 
diff --git a/src/include/foreign/fdwapi.h b/src/include/foreign/fdwapi.h
index 248f78da45..7488ce1ea6 100644
--- a/src/include/foreign/fdwapi.h
+++ b/src/include/foreign/fdwapi.h
@@ -159,6 +159,11 @@ typedef bool (*AnalyzeForeignTable_function) (Relation relation,
 typedef List *(*ImportForeignSchema_function) (ImportForeignSchemaStmt *stmt,
 											   Oid serverOid);
 
+typedef void (*ExecForeignTruncate_function) (List *frels_list,
+											  List *frels_extra,
+											  DropBehavior behavior,
+											  bool restart_seqs);
+
 typedef Size (*EstimateDSMForeignScan_function) (ForeignScanState *node,
 												 ParallelContext *pcxt);
 typedef void (*InitializeDSMForeignScan_function) (ForeignScanState *node,
@@ -246,6 +251,9 @@ typedef struct FdwRoutine
 	/* Support functions for IMPORT FOREIGN SCHEMA */
 	ImportForeignSchema_function ImportForeignSchema;
 
+	/* Support functions for TRUNCATE */
+	ExecForeignTruncate_function ExecForeignTruncate;
+
 	/* Support functions for parallelism under Gather node */
 	IsForeignScanParallelSafe_function IsForeignScanParallelSafe;
 	EstimateDSMForeignScan_function EstimateDSMForeignScan;
diff --git a/src/test/regress/expected/foreign_data.out b/src/test/regress/expected/foreign_data.out
index b9e25820bc..e2c0bcea51 100644
--- a/src/test/regress/expected/foreign_data.out
+++ b/src/test/regress/expected/foreign_data.out
@@ -1807,9 +1807,9 @@ Inherits: fd_pt1
 
 -- TRUNCATE doesn't work on foreign tables, either directly or recursively
 TRUNCATE ft2;  -- ERROR
-ERROR:  "ft2" is not a table
+ERROR:  foreign-data wrapper "dummy" has no handler
 TRUNCATE fd_pt1;  -- ERROR
-ERROR:  "ft2" is not a table
+ERROR:  foreign-data wrapper "dummy" has no handler
 DROP TABLE fd_pt1 CASCADE;
 NOTICE:  drop cascades to foreign table ft2
 -- IMPORT FOREIGN SCHEMA
@@ -2032,9 +2032,9 @@ ALTER FOREIGN TABLE fd_pt2_1 ADD CONSTRAINT fd_pt2chk1 CHECK (c1 > 0);
 ALTER TABLE fd_pt2 ATTACH PARTITION fd_pt2_1 FOR VALUES IN (1);
 -- TRUNCATE doesn't work on foreign tables, either directly or recursively
 TRUNCATE fd_pt2_1;  -- ERROR
-ERROR:  "fd_pt2_1" is not a table
+ERROR:  foreign-data wrapper "dummy" has no handler
 TRUNCATE fd_pt2;  -- ERROR
-ERROR:  "fd_pt2_1" is not a table
+ERROR:  foreign-data wrapper "dummy" has no handler
 DROP FOREIGN TABLE fd_pt2_1;
 DROP TABLE fd_pt2;
 -- foreign table cannot be part of partition tree made of temporary
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index 1d540fe489..c63bfaf4b2 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -702,6 +702,7 @@ ForeignScanState
 ForeignServer
 ForeignServerInfo
 ForeignTable
+ForeignTruncateInfo
 ForkNumber
 FormData_pg_aggregate
 FormData_pg_am
#4Ashutosh Bapat
ashutosh.bapat.oss@gmail.com
In reply to: Kazutaka Onishi (#3)
Re: TRUNCATE on foreign table

IIUC, "truncatable" would be set to "false" for relations which do not
have physical storage e.g. views but will be true for regular tables.
When we are importing schema we need to set "truncatable"
appropriately. Is that something we will support with this patch?

Why would one want to truncate a foreign table instead of truncating
actual table wherever it is?

On Sun, Feb 7, 2021 at 6:06 PM Kazutaka Onishi <onishi@heterodb.com> wrote:

Thank you for your comment! :D
I have fixed it and attached the revised patch.

regards,

2021年2月7日(日) 2:08 Zhihong Yu <zyu@yugabyte.com>:

Hi,
+               if (strcmp(defel->defname, "truncatable") == 0)
+                   server_truncatable = defGetBoolean(defel);

Looks like we can break out of the loop when the condition is met.

+ /* ExecForeignTruncate() is invoked for each server */

The method name in the comment is slightly different from the actual method name.

+           if (strcmp(defel->defname, "truncatable") == 0)
+               truncatable = defGetBoolean(defel);

We can break out of the loop when the condition is met.

Cheers

On Sat, Feb 6, 2021 at 5:11 AM Kazutaka Onishi <onishi@heterodb.com> wrote:

Hello,

The attached patch is for supporting "TRUNCATE" on foreign tables.

This patch includes:
* Adding "ExecForeignTruncate" function into FdwRoutine.
* Enabling "postgres_fdw" to use TRUNCATE.

This patch was proposed by Kaigai-san in March 2020,
but it was returned because it can't be applied to the latest source codes.

Please refer to the discussion.
/messages/by-id/CAOP8fzb-t3WVNLjGMC+4sV4AZa9S=MAQ7Q6pQoADMCf_1jp4ew@mail.gmail.com

I have fixed the patch due to submit it to Commit Fest 2021-03.

regards,

--
------------------
Kazutaka Onishi
(onishi@heterodb.com)

--
------------------
Kazutaka Onishi
(onishi@heterodb.com)

--
------------------
Kazutaka Onishi
(onishi@heterodb.com)

--
Best Wishes,
Ashutosh Bapat

#5Bharath Rupireddy
bharath.rupireddyforpostgres@gmail.com
In reply to: Ashutosh Bapat (#4)
Re: TRUNCATE on foreign table

On Tue, Feb 9, 2021 at 5:31 PM Ashutosh Bapat
<ashutosh.bapat.oss@gmail.com> wrote:

Why would one want to truncate a foreign table instead of truncating
actual table wherever it is?

I think when the deletion on foreign tables (which actually deletes
rows from the remote table?) is allowed, it does make sense to have a
way to truncate the remote table via foreign table. Also, it can avoid
going to each and every remote server and doing the truncation
instead.

With Regards,
Bharath Rupireddy.
EnterpriseDB: http://www.enterprisedb.com

#6Ashutosh Bapat
ashutosh.bapat.oss@gmail.com
In reply to: Bharath Rupireddy (#5)
Re: TRUNCATE on foreign table

On Tue, Feb 9, 2021 at 5:49 PM Bharath Rupireddy
<bharath.rupireddyforpostgres@gmail.com> wrote:

On Tue, Feb 9, 2021 at 5:31 PM Ashutosh Bapat
<ashutosh.bapat.oss@gmail.com> wrote:

Why would one want to truncate a foreign table instead of truncating
actual table wherever it is?

I think when the deletion on foreign tables (which actually deletes
rows from the remote table?) is allowed, it does make sense to have a
way to truncate the remote table via foreign table. Also, it can avoid
going to each and every remote server and doing the truncation
instead.

DELETE is very different from TRUNCATE. Application may want to DELETE
based on a join with a local table and hence it can not be executed on
a foreign server. That's not true with TRUNCATE.

--
Best Wishes,
Ashutosh Bapat

#7Kazutaka Onishi
onishi@heterodb.com
In reply to: Ashutosh Bapat (#6)
Re: TRUNCATE on foreign table

IIUC, "truncatable" would be set to "false" for relations which do not
have physical storage e.g. views but will be true for regular tables.

"truncatable" option is just for the foreign table and it's not related
with whether it's on a physical storage or not.
"postgres_fdw" already has "updatable" option to make the table read-only.
However, "updatable" is for DML, and it's not suitable for TRUNCATE.
Therefore new options "truncatable" was added.

Please refer to this message for details.
/messages/by-id/20200128040346.GC1552@paquier.xyz

DELETE is very different from TRUNCATE. Application may want to DELETE
based on a join with a local table and hence it can not be executed on
a foreign server. That's not true with TRUNCATE.

Yeah, As you say, Applications doesn't need TRUNCATE.
We're focusing for analytical use, namely operating huge data.
TRUNCATE can erase rows faster than DELETE.

#8Ashutosh Bapat
ashutosh.bapat.oss@gmail.com
In reply to: Kazutaka Onishi (#7)
Re: TRUNCATE on foreign table

On Tue, Feb 9, 2021 at 7:45 PM Kazutaka Onishi <onishi@heterodb.com> wrote:

IIUC, "truncatable" would be set to "false" for relations which do not
have physical storage e.g. views but will be true for regular tables.

"truncatable" option is just for the foreign table and it's not related with whether it's on a physical storage or not.
"postgres_fdw" already has "updatable" option to make the table read-only.
However, "updatable" is for DML, and it's not suitable for TRUNCATE.
Therefore new options "truncatable" was added.

Please refer to this message for details.
/messages/by-id/20200128040346.GC1552@paquier.xyz

DELETE is very different from TRUNCATE. Application may want to DELETE
based on a join with a local table and hence it can not be executed on
a foreign server. That's not true with TRUNCATE.

Yeah, As you say, Applications doesn't need TRUNCATE.
We're focusing for analytical use, namely operating huge data.
TRUNCATE can erase rows faster than DELETE.

The question is why can't that truncate be run on the foreign server
itself rather than local server?

--
Best Wishes,
Ashutosh Bapat

#9Kazutaka Onishi
onishi@heterodb.com
In reply to: Ashutosh Bapat (#8)
Re: TRUNCATE on foreign table

That's because using the foreign server is difficult for the user.

For example, the user doesn't always have the permission to login to
the forein server.
In some cases, the foreign table has been created by the administrator that
has permission to access the two servers and the user only uses the local
server.
Then the user has to ask the administrator to run TRUNCATE every time.

Furthermore,there are some fdw extensions which don't support SQL.
mongo_fdw, redis_fdw, etc...
These extensions have been used to provide SQL interfaces to the users.
It's hard for the user to run TRUNCATE after learning each database.

Anyway, it's more useful if the user can run queries in one place, right?
Do you have any concerns?

#10Kohei KaiGai
kaigai@heterodb.com
In reply to: Ashutosh Bapat (#8)
Re: TRUNCATE on foreign table

2021年2月10日(水) 13:55 Ashutosh Bapat <ashutosh.bapat.oss@gmail.com>:

On Tue, Feb 9, 2021 at 7:45 PM Kazutaka Onishi <onishi@heterodb.com> wrote:

IIUC, "truncatable" would be set to "false" for relations which do not
have physical storage e.g. views but will be true for regular tables.

"truncatable" option is just for the foreign table and it's not related with whether it's on a physical storage or not.
"postgres_fdw" already has "updatable" option to make the table read-only.
However, "updatable" is for DML, and it's not suitable for TRUNCATE.
Therefore new options "truncatable" was added.

Please refer to this message for details.
/messages/by-id/20200128040346.GC1552@paquier.xyz

DELETE is very different from TRUNCATE. Application may want to DELETE
based on a join with a local table and hence it can not be executed on
a foreign server. That's not true with TRUNCATE.

Yeah, As you say, Applications doesn't need TRUNCATE.
We're focusing for analytical use, namely operating huge data.
TRUNCATE can erase rows faster than DELETE.

The question is why can't that truncate be run on the foreign server
itself rather than local server?

At least, PostgreSQL applies different access permissions on TRUNCATE.
If unconditional DELETE implicitly promotes to TRUNCATE, DB administrator
has to allow TRUNCATE permission on the remote table also.

Also, TRUNCATE acquires stronger lock the DELETE.
DELETE still allows concurrent accesses to the table, even though TRUNCATE
takes AccessExclusive lock, thus, FDW driver has to control the
concurrent accesses
by itself, if we have no dedicated TRUNCATE interface.

Thanks,
--
HeteroDB, Inc / The PG-Strom Project
KaiGai Kohei <kaigai@heterodb.com>

#11Ashutosh Bapat
ashutosh.bapat.oss@gmail.com
In reply to: Kazutaka Onishi (#9)
Re: TRUNCATE on foreign table

On Wed, Feb 10, 2021 at 10:58 PM Kazutaka Onishi <onishi@heterodb.com> wrote:

That's because using the foreign server is difficult for the user.

For example, the user doesn't always have the permission to login to the forein server.
In some cases, the foreign table has been created by the administrator that has permission to access the two servers and the user only uses the local server.
Then the user has to ask the administrator to run TRUNCATE every time.

That might actually be seen as a loophole but ...

Furthermore,there are some fdw extensions which don't support SQL. mongo_fdw, redis_fdw, etc...
These extensions have been used to provide SQL interfaces to the users.
It's hard for the user to run TRUNCATE after learning each database.

this has some appeal.

Thanks for sharing the usecases.
--
Best Wishes,
Ashutosh Bapat

#12Ibrar Ahmed
ibrar.ahmad@gmail.com
In reply to: Ashutosh Bapat (#11)
1 attachment(s)
Re: TRUNCATE on foreign table

On Thu, Feb 11, 2021 at 6:23 PM Ashutosh Bapat <ashutosh.bapat.oss@gmail.com>
wrote:

On Wed, Feb 10, 2021 at 10:58 PM Kazutaka Onishi <onishi@heterodb.com>
wrote:

That's because using the foreign server is difficult for the user.

For example, the user doesn't always have the permission to login to the

forein server.

In some cases, the foreign table has been created by the administrator

that has permission to access the two servers and the user only uses the
local server.

Then the user has to ask the administrator to run TRUNCATE every time.

That might actually be seen as a loophole but ...

Furthermore,there are some fdw extensions which don't support SQL.

mongo_fdw, redis_fdw, etc...

These extensions have been used to provide SQL interfaces to the users.
It's hard for the user to run TRUNCATE after learning each database.

this has some appeal.

Thanks for sharing the usecases.
--
Best Wishes,
Ashutosh Bapat

The patch (pgsql14-truncate-on-foreign-table.v2.patch) does not apply

successfully.

http://cfbot.cputube.org/patch_32_2972.log

patching file contrib/postgres_fdw/expected/postgres_fdw.out
Hunk #2 FAILED at 9179.
1 out of 2 hunks FAILED -- saving rejects to file
contrib/postgres_fdw/expected/postgres_fdw.out.rej

As this is a minor change therefore I have updated the patch. Please take a
look.

--
Ibrar Ahmed

Attachments:

pgsql14-truncate-on-foreign-table.v3.patchapplication/octet-stream; name=pgsql14-truncate-on-foreign-table.v3.patchDownload
diff --git a/contrib/postgres_fdw/deparse.c b/contrib/postgres_fdw/deparse.c
index 6faf499f9a..a9ce323a67 100644
--- a/contrib/postgres_fdw/deparse.c
+++ b/contrib/postgres_fdw/deparse.c
@@ -2171,6 +2171,44 @@ deparseAnalyzeSql(StringInfo buf, Relation rel, List **retrieved_attrs)
 	deparseRelation(buf, rel);
 }
 
+/*
+ * Construct a simple "TRUNCATE rel" statement
+ */
+void
+deparseTruncateSql(StringInfo buf,
+				   List *frels_list,
+				   List *frels_extra,
+				   DropBehavior behavior,
+				   bool restart_seqs)
+{
+	ListCell   *lc1, *lc2;
+
+	appendStringInfoString(buf, "TRUNCATE ");
+	forboth (lc1, frels_list,
+			 lc2, frels_extra)
+	{
+		Relation	frel = lfirst(lc1);
+		int			extra = lfirst_int(lc2);
+
+		if (lc1 != list_head(frels_list))
+			appendStringInfoString(buf, ", ");
+		if (extra != 0)
+			appendStringInfoString(buf, "ONLY ");
+		deparseRelation(buf, frel);
+	}
+	appendStringInfo(buf, " %s IDENTITY",
+					 restart_seqs ? "RESTART" : "CONTINUE");
+	switch (behavior)
+	{
+		case DROP_RESTRICT:
+			appendStringInfoString(buf, " RESTRICT");
+			break;
+		case DROP_CASCADE:
+			appendStringInfoString(buf, " CASCADE");
+			break;
+	}
+}
+
 /*
  * Construct name to use for given column, and emit it into buf.
  * If it has a column_name FDW option, use that instead of attribute name.
diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out
index 0649b6b81c..ba2f0f4e80 100644
--- a/contrib/postgres_fdw/expected/postgres_fdw.out
+++ b/contrib/postgres_fdw/expected/postgres_fdw.out
@@ -8247,6 +8247,239 @@ select * from rem3;
 
 drop foreign table rem3;
 drop table loc3;
+-- ===================================================================
+-- test for TRUNCATE
+-- ===================================================================
+CREATE TABLE tru_rtable0 (id int primary key, x text);
+CREATE TABLE tru_rtable1 (id int primary key, y text);
+CREATE FOREIGN TABLE tru_ftable (id int, x text)
+       SERVER loopback OPTIONS (table_name 'tru_rtable0');
+INSERT INTO tru_rtable0 (SELECT x,md5(x::text) FROM generate_series(1,10) x);
+CREATE TABLE tru_ptable (id int, y text) PARTITION BY HASH(id);
+CREATE TABLE tru_ptable__p0 PARTITION OF tru_ptable
+                            FOR VALUES WITH (MODULUS 2, REMAINDER 0);
+CREATE FOREIGN TABLE tru_ftable__p1 PARTITION OF tru_ptable
+                                    FOR VALUES WITH (MODULUS 2, REMAINDER 1)
+       SERVER loopback OPTIONS (table_name 'tru_rtable1');
+INSERT INTO tru_ptable (SELECT x,md5(x::text) FROM generate_series(11,20) x);
+CREATE TABLE tru_pk_table(id int primary key, z text);
+CREATE TABLE tru_fk_table(fkey int references tru_pk_table(id));
+INSERT INTO tru_pk_table (SELECT x,md5((x+1)::text) FROM generate_series(1,10) x);
+INSERT INTO tru_fk_table (SELECT x % 10 + 1 FROM generate_series(5,25) x);
+CREATE FOREIGN TABLE tru_pk_ftable (id int, z text)
+       SERVER loopback OPTIONS (table_name 'tru_pk_table');
+CREATE TABLE tru_rtable_parent (id int, a text);
+CREATE TABLE tru_rtable_child (id int, a text);
+CREATE FOREIGN TABLE tru_ftable_parent (id int, a text)
+       SERVER loopback OPTIONS (table_name 'tru_rtable_parent');
+CREATE FOREIGN TABLE tru_ftable_child () INHERITS (tru_ftable_parent)
+       SERVER loopback OPTIONS (table_name 'tru_rtable_child');
+INSERT INTO tru_rtable_parent (SELECT x, md5(x::text) FROM generate_series(1,8) x);
+INSERT INTO tru_rtable_child  (SELECT x, md5(x::text) FROM generate_series(10, 18) x);
+-- normal truncate
+SELECT * FROM tru_ftable;
+ id |                x                 
+----+----------------------------------
+  1 | c4ca4238a0b923820dcc509a6f75849b
+  2 | c81e728d9d4c2f636f067f89cc14862c
+  3 | eccbc87e4b5ce2fe28308fd9f2a7baf3
+  4 | a87ff679a2f3e71d9181a67b7542122c
+  5 | e4da3b7fbbce2345d7772b0674a318d5
+  6 | 1679091c5a880faf6fb5e6087eb1b2dc
+  7 | 8f14e45fceea167a5a36dedd4bea2543
+  8 | c9f0f895fb98ab9159f51fd0297e236d
+  9 | 45c48cce2e2d7fbdea1afc51c7c6ad26
+ 10 | d3d9446802a44259755d38e6d163e820
+(10 rows)
+
+TRUNCATE tru_ftable;
+SELECT * FROM tru_rtable0;		-- empty
+ id | x 
+----+---
+(0 rows)
+
+SELECT * FROM tru_ftable;		-- empty
+ id | x 
+----+---
+(0 rows)
+
+-- 'truncatable' option
+ALTER SERVER loopback OPTIONS (ADD truncatable 'false');
+TRUNCATE tru_ftable;			-- error
+ERROR:  truncate on "tru_ftable" is prohibited
+ALTER SERVER loopback OPTIONS (DROP truncatable);
+ALTER FOREIGN TABLE tru_ftable OPTIONS (ADD truncatable 'false');
+TRUNCATE tru_ftable;			-- error
+ERROR:  truncate on "tru_ftable" is prohibited
+ALTER FOREIGN TABLE tru_ftable OPTIONS (SET truncatable 'true');
+TRUNCATE tru_ftable;			-- accepted
+-- partition table mixtured by table and foreign table
+SELECT * FROM tru_ptable;
+ id |                y                 
+----+----------------------------------
+ 12 | c20ad4d76fe97759aa27a0c99bff6710
+ 13 | c51ce410c124a10e0db5e4b97fc2af39
+ 14 | aab3238922bcc25a6f606eb525ffdc56
+ 16 | c74d97b01eae257e44aa9d5bade97baf
+ 17 | 70efdf2ec9b086079795c442636b55fb
+ 18 | 6f4922f45568161a8cdf4ad2299f6d23
+ 11 | 6512bd43d9caa6e02c990b0a82652dca
+ 15 | 9bf31c7ff062936a96d3c8bd1f8f2ff3
+ 19 | 1f0e3dad99908345f7439f8ffabdffc4
+ 20 | 98f13708210194c475687be6106a3b84
+(10 rows)
+
+TRUNCATE tru_ptable;
+SELECT * FROM tru_ptable;		-- empty
+ id | y 
+----+---
+(0 rows)
+
+SELECT * FROM tru_ptable__p0;	-- empty
+ id | y 
+----+---
+(0 rows)
+
+SELECT * FROM tru_ftable__p1;	-- empty
+ id | y 
+----+---
+(0 rows)
+
+SELECT * FROM tru_rtable1;		-- empty
+ id | y 
+----+---
+(0 rows)
+
+-- 'CASCADE' option
+SELECT * FROM tru_pk_ftable;
+ id |                z                 
+----+----------------------------------
+  1 | c81e728d9d4c2f636f067f89cc14862c
+  2 | eccbc87e4b5ce2fe28308fd9f2a7baf3
+  3 | a87ff679a2f3e71d9181a67b7542122c
+  4 | e4da3b7fbbce2345d7772b0674a318d5
+  5 | 1679091c5a880faf6fb5e6087eb1b2dc
+  6 | 8f14e45fceea167a5a36dedd4bea2543
+  7 | c9f0f895fb98ab9159f51fd0297e236d
+  8 | 45c48cce2e2d7fbdea1afc51c7c6ad26
+  9 | d3d9446802a44259755d38e6d163e820
+ 10 | 6512bd43d9caa6e02c990b0a82652dca
+(10 rows)
+
+TRUNCATE tru_pk_ftable;	-- failed by FK reference
+ERROR:  cannot truncate a table referenced in a foreign key constraint
+DETAIL:  Table "tru_fk_table" references "tru_pk_table".
+HINT:  Truncate table "tru_fk_table" at the same time, or use TRUNCATE ... CASCADE.
+CONTEXT:  remote SQL command: TRUNCATE public.tru_pk_table CONTINUE IDENTITY RESTRICT
+TRUNCATE tru_pk_ftable CASCADE;
+SELECT * FROM tru_pk_ftable;
+ id | z 
+----+---
+(0 rows)
+
+SELECT * FROM tru_fk_table;		-- also truncated
+ fkey 
+------
+(0 rows)
+
+-- truncate two tables at a command
+INSERT INTO tru_ftable (SELECT x,md5((x+2)::text) FROM generate_series(1,8) x);
+INSERT INTO tru_pk_ftable (SELECT x,md5((x+3)::text) FROM generate_series(3,10) x);
+SELECT * FROM tru_ftable a FULL OUTER JOIN tru_pk_ftable b ON a.id = b.id;
+ id |                x                 | id |                z                 
+----+----------------------------------+----+----------------------------------
+  1 | eccbc87e4b5ce2fe28308fd9f2a7baf3 |    | 
+  2 | a87ff679a2f3e71d9181a67b7542122c |    | 
+  3 | e4da3b7fbbce2345d7772b0674a318d5 |  3 | 1679091c5a880faf6fb5e6087eb1b2dc
+  4 | 1679091c5a880faf6fb5e6087eb1b2dc |  4 | 8f14e45fceea167a5a36dedd4bea2543
+  5 | 8f14e45fceea167a5a36dedd4bea2543 |  5 | c9f0f895fb98ab9159f51fd0297e236d
+  6 | c9f0f895fb98ab9159f51fd0297e236d |  6 | 45c48cce2e2d7fbdea1afc51c7c6ad26
+  7 | 45c48cce2e2d7fbdea1afc51c7c6ad26 |  7 | d3d9446802a44259755d38e6d163e820
+  8 | d3d9446802a44259755d38e6d163e820 |  8 | 6512bd43d9caa6e02c990b0a82652dca
+    |                                  |  9 | c20ad4d76fe97759aa27a0c99bff6710
+    |                                  | 10 | c51ce410c124a10e0db5e4b97fc2af39
+(10 rows)
+
+TRUNCATE tru_ftable, tru_pk_ftable CASCADE;
+SELECT * FROM tru_ftable a FULL OUTER JOIN tru_pk_ftable b ON a.id = b.id;
+ id | x | id | z 
+----+---+----+---
+(0 rows)
+
+-- truncate with ONLY clause
+TRUNCATE ONLY tru_ftable_parent;
+SELECT * FROM tru_ftable_parent;
+ id |                a                 
+----+----------------------------------
+ 10 | d3d9446802a44259755d38e6d163e820
+ 11 | 6512bd43d9caa6e02c990b0a82652dca
+ 12 | c20ad4d76fe97759aa27a0c99bff6710
+ 13 | c51ce410c124a10e0db5e4b97fc2af39
+ 14 | aab3238922bcc25a6f606eb525ffdc56
+ 15 | 9bf31c7ff062936a96d3c8bd1f8f2ff3
+ 16 | c74d97b01eae257e44aa9d5bade97baf
+ 17 | 70efdf2ec9b086079795c442636b55fb
+ 18 | 6f4922f45568161a8cdf4ad2299f6d23
+(9 rows)
+
+TRUNCATE tru_ftable_parent;
+SELECT * FROM tru_ftable_parent;
+ id | a 
+----+---
+(0 rows)
+
+-- in case when remote table has inherited children
+CREATE TABLE tru_rtable0_child () INHERITS (tru_rtable0);
+INSERT INTO tru_rtable0 (SELECT x,md5(x::text) FROM generate_series(5,9) x);
+INSERT INTO tru_rtable0_child (SELECT x,md5(x::text) FROM generate_series(10,14) x);
+SELECT * FROM tru_ftable;
+ id |                x                 
+----+----------------------------------
+  5 | e4da3b7fbbce2345d7772b0674a318d5
+  6 | 1679091c5a880faf6fb5e6087eb1b2dc
+  7 | 8f14e45fceea167a5a36dedd4bea2543
+  8 | c9f0f895fb98ab9159f51fd0297e236d
+  9 | 45c48cce2e2d7fbdea1afc51c7c6ad26
+ 10 | d3d9446802a44259755d38e6d163e820
+ 11 | 6512bd43d9caa6e02c990b0a82652dca
+ 12 | c20ad4d76fe97759aa27a0c99bff6710
+ 13 | c51ce410c124a10e0db5e4b97fc2af39
+ 14 | aab3238922bcc25a6f606eb525ffdc56
+(10 rows)
+
+TRUNCATE ONLY tru_ftable;		-- truncate only parent portion
+SELECT * FROM tru_ftable;
+ id |                x                 
+----+----------------------------------
+ 10 | d3d9446802a44259755d38e6d163e820
+ 11 | 6512bd43d9caa6e02c990b0a82652dca
+ 12 | c20ad4d76fe97759aa27a0c99bff6710
+ 13 | c51ce410c124a10e0db5e4b97fc2af39
+ 14 | aab3238922bcc25a6f606eb525ffdc56
+(5 rows)
+
+INSERT INTO tru_rtable0 (SELECT x,md5(x::text) FROM generate_series(21,25) x);
+SELECT * FROM tru_ftable;
+ id |                x                 
+----+----------------------------------
+ 21 | 3c59dc048e8850243be8079a5c74d079
+ 22 | b6d767d2f8ed5d21a44b0e5886680cb9
+ 23 | 37693cfc748049e45d87b8c7d8b9aacd
+ 24 | 1ff1de774005f8da13f42943881c655f
+ 25 | 8e296a067a37563370ded05f5a3bf3ec
+ 10 | d3d9446802a44259755d38e6d163e820
+ 11 | 6512bd43d9caa6e02c990b0a82652dca
+ 12 | c20ad4d76fe97759aa27a0c99bff6710
+ 13 | c51ce410c124a10e0db5e4b97fc2af39
+ 14 | aab3238922bcc25a6f606eb525ffdc56
+(10 rows)
+
+TRUNCATE tru_ftable;			-- truncate both of parent and child
+SELECT * FROM tru_ftable;
+ id | x 
+----+---
+(0 rows)
+
 -- ===================================================================
 -- test IMPORT FOREIGN SCHEMA
 -- ===================================================================
@@ -8946,7 +9179,7 @@ DO $d$
     END;
 $d$;
 ERROR:  invalid option "password"
-HINT:  Valid options in this context are: service, passfile, channel_binding, connect_timeout, dbname, host, hostaddr, port, options, application_name, keepalives, keepalives_idle, keepalives_interval, keepalives_count, tcp_user_timeout, sslmode, sslcompression, sslcert, sslkey, sslrootcert, sslcrl, sslcrldir, requirepeer, ssl_min_protocol_version, ssl_max_protocol_version, gssencmode, krbsrvname, gsslib, target_session_attrs, use_remote_estimate, fdw_startup_cost, fdw_tuple_cost, extensions, updatable, fetch_size, batch_size
+HINT:  Valid options in this context are: service, passfile, channel_binding, connect_timeout, dbname, host, hostaddr, port, options, application_name, keepalives, keepalives_idle, keepalives_interval, keepalives_count, tcp_user_timeout, sslmode, sslcompression, sslcert, sslkey, sslrootcert, sslcrl, sslcrldir, requirepeer, ssl_min_protocol_version, ssl_max_protocol_version, gssencmode, krbsrvname, gsslib, target_session_attrs, use_remote_estimate, fdw_startup_cost, fdw_tuple_cost, extensions, updatable, truncatable, fetch_size, batch_size
 CONTEXT:  SQL statement "ALTER SERVER loopback_nopw OPTIONS (ADD password 'dummypw')"
 PL/pgSQL function inline_code_block line 3 at EXECUTE
 -- If we add a password for our user mapping instead, we should get a different
diff --git a/contrib/postgres_fdw/option.c b/contrib/postgres_fdw/option.c
index 64698c4da3..668178524d 100644
--- a/contrib/postgres_fdw/option.c
+++ b/contrib/postgres_fdw/option.c
@@ -107,7 +107,8 @@ postgres_fdw_validator(PG_FUNCTION_ARGS)
 		 * Validate option value, when we can do so without any context.
 		 */
 		if (strcmp(def->defname, "use_remote_estimate") == 0 ||
-			strcmp(def->defname, "updatable") == 0)
+			strcmp(def->defname, "updatable") == 0 ||
+			strcmp(def->defname, "truncatable") == 0)
 		{
 			/* these accept only boolean values */
 			(void) defGetBoolean(def);
@@ -211,6 +212,9 @@ InitPgFdwOptions(void)
 		/* updatable is available on both server and table */
 		{"updatable", ForeignServerRelationId, false},
 		{"updatable", ForeignTableRelationId, false},
+		/* truncatable is available on both server and table */
+		{"truncatable", ForeignServerRelationId, false},
+		{"truncatable", ForeignTableRelationId, false},
 		/* fetch_size is available on both server and table */
 		{"fetch_size", ForeignServerRelationId, false},
 		{"fetch_size", ForeignTableRelationId, false},
diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c
index 35b48575c5..6f94c6830d 100644
--- a/contrib/postgres_fdw/postgres_fdw.c
+++ b/contrib/postgres_fdw/postgres_fdw.c
@@ -390,6 +390,10 @@ static void postgresExplainForeignModify(ModifyTableState *mtstate,
 										 ExplainState *es);
 static void postgresExplainDirectModify(ForeignScanState *node,
 										ExplainState *es);
+static void postgresExecForeignTruncate(List *frels_list,
+										List *frels_extra,
+										DropBehavior behavior,
+										bool restart_seqs);
 static bool postgresAnalyzeForeignTable(Relation relation,
 										AcquireSampleRowsFunc *func,
 										BlockNumber *totalpages);
@@ -571,6 +575,9 @@ postgres_fdw_handler(PG_FUNCTION_ARGS)
 	routine->ExplainForeignModify = postgresExplainForeignModify;
 	routine->ExplainDirectModify = postgresExplainDirectModify;
 
+	/* Support function for TRUNCATE */
+	routine->ExecForeignTruncate = postgresExecForeignTruncate;
+
 	/* Support functions for ANALYZE */
 	routine->AnalyzeForeignTable = postgresAnalyzeForeignTable;
 
@@ -2791,6 +2798,90 @@ postgresExplainDirectModify(ForeignScanState *node, ExplainState *es)
 	}
 }
 
+/*
+ * postgresExecForeignTruncate
+ *		It propagates TRUNCATE command to the remote host inside of the
+ *		transaction block.
+ */
+static void
+postgresExecForeignTruncate(List *frels_list,
+							List *frels_extra,
+							DropBehavior behavior,
+							bool restart_seqs)
+{
+	Oid				server_id = InvalidOid;
+	ForeignServer  *serv = NULL;
+	UserMapping	   *user = NULL;
+	PGconn		   *conn = NULL;
+	PGresult	   *res;
+	StringInfoData	sql;
+	ListCell	   *lc;
+	bool			server_truncatable = true;
+
+	/* pick up remote connection, and sanity checks */
+	foreach (lc, frels_list)
+	{
+		Relation	frel = lfirst(lc);
+		Oid			frel_oid = RelationGetRelid(frel);
+		ForeignTable *ft = GetForeignTable(frel_oid);
+		ListCell   *cell;
+		bool		truncatable;
+
+		if (!OidIsValid(server_id))
+		{
+			server_id = GetForeignServerIdByRelId(frel_oid);
+			serv = GetForeignServer(server_id);
+			user = GetUserMapping(GetUserId(), server_id);
+			conn = GetConnection(user, false);
+			
+			foreach (cell, serv->options)
+			{
+				DefElem	   *defel = (DefElem *) lfirst(cell);
+
+				if (strcmp(defel->defname, "truncatable") == 0)
+				{
+					server_truncatable = defGetBoolean(defel);
+					break;
+				}
+			}
+		}
+		else
+		{
+			/* postgresExecForeignTruncate() is invoked for each server */
+			Assert(server_id == GetForeignServerIdByRelId(frel_oid));
+		}
+
+		/* ensure the target foreign table is truncatable */
+		truncatable = server_truncatable;
+		foreach (cell, ft->options)
+		{
+			DefElem	   *defel = (DefElem *) lfirst(cell);
+
+			if (strcmp(defel->defname, "truncatable") == 0)
+			{
+				truncatable = defGetBoolean(defel);
+				break;
+			}
+		}
+		if (!truncatable)
+			ereport(ERROR,
+					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+					 errmsg("truncate on \"%s\" is prohibited",
+							RelationGetRelationName(frel))));
+	}
+	/* set up remote query */
+	initStringInfo(&sql);
+	deparseTruncateSql(&sql, frels_list, frels_extra, behavior, restart_seqs);
+	/* run remote query */
+	if (!PQsendQuery(conn, sql.data))
+		pgfdw_report_error(ERROR, NULL, conn, false, sql.data);
+	res = pgfdw_get_result(conn, sql.data);
+	if (PQresultStatus(res) != PGRES_COMMAND_OK)
+		pgfdw_report_error(ERROR, res, conn, true, sql.data);
+	/* clean-up */
+	PQclear(res);
+	pfree(sql.data);
+}
 
 /*
  * estimate_path_cost_size
diff --git a/contrib/postgres_fdw/postgres_fdw.h b/contrib/postgres_fdw/postgres_fdw.h
index 1f67b4d9fd..c89af59973 100644
--- a/contrib/postgres_fdw/postgres_fdw.h
+++ b/contrib/postgres_fdw/postgres_fdw.h
@@ -193,6 +193,11 @@ extern void deparseDirectDeleteSql(StringInfo buf, PlannerInfo *root,
 extern void deparseAnalyzeSizeSql(StringInfo buf, Relation rel);
 extern void deparseAnalyzeSql(StringInfo buf, Relation rel,
 							  List **retrieved_attrs);
+extern void deparseTruncateSql(StringInfo buf,
+							   List *frels_list,
+							   List *frels_extra,
+							   DropBehavior behavior,
+							   bool restart_seqs);
 extern void deparseStringLiteral(StringInfo buf, const char *val);
 extern Expr *find_em_expr_for_rel(EquivalenceClass *ec, RelOptInfo *rel);
 extern Expr *find_em_expr_for_input_target(PlannerInfo *root,
diff --git a/contrib/postgres_fdw/sql/postgres_fdw.sql b/contrib/postgres_fdw/sql/postgres_fdw.sql
index 2b525ea44a..106192ff51 100644
--- a/contrib/postgres_fdw/sql/postgres_fdw.sql
+++ b/contrib/postgres_fdw/sql/postgres_fdw.sql
@@ -2349,6 +2349,96 @@ select * from rem3;
 drop foreign table rem3;
 drop table loc3;
 
+-- ===================================================================
+-- test for TRUNCATE
+-- ===================================================================
+CREATE TABLE tru_rtable0 (id int primary key, x text);
+CREATE TABLE tru_rtable1 (id int primary key, y text);
+CREATE FOREIGN TABLE tru_ftable (id int, x text)
+       SERVER loopback OPTIONS (table_name 'tru_rtable0');
+INSERT INTO tru_rtable0 (SELECT x,md5(x::text) FROM generate_series(1,10) x);
+
+CREATE TABLE tru_ptable (id int, y text) PARTITION BY HASH(id);
+CREATE TABLE tru_ptable__p0 PARTITION OF tru_ptable
+                            FOR VALUES WITH (MODULUS 2, REMAINDER 0);
+CREATE FOREIGN TABLE tru_ftable__p1 PARTITION OF tru_ptable
+                                    FOR VALUES WITH (MODULUS 2, REMAINDER 1)
+       SERVER loopback OPTIONS (table_name 'tru_rtable1');
+INSERT INTO tru_ptable (SELECT x,md5(x::text) FROM generate_series(11,20) x);
+
+CREATE TABLE tru_pk_table(id int primary key, z text);
+CREATE TABLE tru_fk_table(fkey int references tru_pk_table(id));
+INSERT INTO tru_pk_table (SELECT x,md5((x+1)::text) FROM generate_series(1,10) x);
+INSERT INTO tru_fk_table (SELECT x % 10 + 1 FROM generate_series(5,25) x);
+CREATE FOREIGN TABLE tru_pk_ftable (id int, z text)
+       SERVER loopback OPTIONS (table_name 'tru_pk_table');
+
+CREATE TABLE tru_rtable_parent (id int, a text);
+CREATE TABLE tru_rtable_child (id int, a text);
+CREATE FOREIGN TABLE tru_ftable_parent (id int, a text)
+       SERVER loopback OPTIONS (table_name 'tru_rtable_parent');
+CREATE FOREIGN TABLE tru_ftable_child () INHERITS (tru_ftable_parent)
+       SERVER loopback OPTIONS (table_name 'tru_rtable_child');
+INSERT INTO tru_rtable_parent (SELECT x, md5(x::text) FROM generate_series(1,8) x);
+INSERT INTO tru_rtable_child  (SELECT x, md5(x::text) FROM generate_series(10, 18) x);
+
+-- normal truncate
+SELECT * FROM tru_ftable;
+TRUNCATE tru_ftable;
+SELECT * FROM tru_rtable0;		-- empty
+SELECT * FROM tru_ftable;		-- empty
+
+-- 'truncatable' option
+ALTER SERVER loopback OPTIONS (ADD truncatable 'false');
+TRUNCATE tru_ftable;			-- error
+ALTER SERVER loopback OPTIONS (DROP truncatable);
+ALTER FOREIGN TABLE tru_ftable OPTIONS (ADD truncatable 'false');
+TRUNCATE tru_ftable;			-- error
+ALTER FOREIGN TABLE tru_ftable OPTIONS (SET truncatable 'true');
+TRUNCATE tru_ftable;			-- accepted
+
+-- partition table mixtured by table and foreign table
+SELECT * FROM tru_ptable;
+TRUNCATE tru_ptable;
+SELECT * FROM tru_ptable;		-- empty
+SELECT * FROM tru_ptable__p0;	-- empty
+SELECT * FROM tru_ftable__p1;	-- empty
+SELECT * FROM tru_rtable1;		-- empty
+
+-- 'CASCADE' option
+SELECT * FROM tru_pk_ftable;
+TRUNCATE tru_pk_ftable;	-- failed by FK reference
+TRUNCATE tru_pk_ftable CASCADE;
+SELECT * FROM tru_pk_ftable;
+SELECT * FROM tru_fk_table;		-- also truncated
+
+-- truncate two tables at a command
+INSERT INTO tru_ftable (SELECT x,md5((x+2)::text) FROM generate_series(1,8) x);
+INSERT INTO tru_pk_ftable (SELECT x,md5((x+3)::text) FROM generate_series(3,10) x);
+SELECT * FROM tru_ftable a FULL OUTER JOIN tru_pk_ftable b ON a.id = b.id;
+TRUNCATE tru_ftable, tru_pk_ftable CASCADE;
+SELECT * FROM tru_ftable a FULL OUTER JOIN tru_pk_ftable b ON a.id = b.id;
+
+-- truncate with ONLY clause
+TRUNCATE ONLY tru_ftable_parent;
+SELECT * FROM tru_ftable_parent;
+TRUNCATE tru_ftable_parent;
+SELECT * FROM tru_ftable_parent;
+
+-- in case when remote table has inherited children
+CREATE TABLE tru_rtable0_child () INHERITS (tru_rtable0);
+INSERT INTO tru_rtable0 (SELECT x,md5(x::text) FROM generate_series(5,9) x);
+INSERT INTO tru_rtable0_child (SELECT x,md5(x::text) FROM generate_series(10,14) x);
+SELECT * FROM tru_ftable;
+
+TRUNCATE ONLY tru_ftable;		-- truncate only parent portion
+SELECT * FROM tru_ftable;
+
+INSERT INTO tru_rtable0 (SELECT x,md5(x::text) FROM generate_series(21,25) x);
+SELECT * FROM tru_ftable;
+TRUNCATE tru_ftable;			-- truncate both of parent and child
+SELECT * FROM tru_ftable;
+
 -- ===================================================================
 -- test IMPORT FOREIGN SCHEMA
 -- ===================================================================
diff --git a/doc/src/sgml/fdwhandler.sgml b/doc/src/sgml/fdwhandler.sgml
index 04bc052ee8..1315cb6b0f 100644
--- a/doc/src/sgml/fdwhandler.sgml
+++ b/doc/src/sgml/fdwhandler.sgml
@@ -1056,6 +1056,46 @@ EndDirectModify(ForeignScanState *node);
 
    </sect2>
 
+   <sect2 id="fdw-callbacks-truncate">
+    <title>FDW Routines for Truncate</title>
+<programlisting>
+void
+ExecForeignTruncate(List *frels_list, List *frels_extra,
+                    DropBehavior behavior, bool restart_seqs);
+</programlisting>
+    <para>
+     Truncate a set of foreign tables defined by
+     <literal>frels_list</literal> belonging to the same foreign server.
+     This optional function is called during execution of
+     <command>TRUNCATE</command> for each foreign server being involved
+     in one <command>TRUNCATE</command> command (note that invocations
+     are not per foreign table).
+
+     <literal>frels_extra</literal> is same length with
+     <literal>frels_list</literal>, that delivers extra information of
+     the context where the foreign-tables are truncated.
+    </para>
+
+    <para>
+     If the <function>ExecForeignTruncate</function> pointer is set to
+     <literal>NULL</literal>, attempts to truncate the foreign table will
+     fail with an error message.
+    </para>
+    
+    <para>
+     <literal>behavior</literal> defines how foreign tables should
+     be truncated, using as possible values <literal>DROP_RESTRICT</literal>
+     and <literal>DROP_CASCADE</literal> (to map with the equivalents of
+     <command>TRUNCATE</command>).
+    </para>
+    
+    <para>
+     <literal>restart_seqs</literal> is set to <literal>true</literal>
+     if <literal>RESTART IDENTITY</literal> was supplied in the
+     <command>TRUNCATE</command>.
+    </para>
+   </sect2>
+
    <sect2 id="fdw-callbacks-row-locking">
     <title>FDW Routines for Row Locking</title>
 
diff --git a/doc/src/sgml/postgres-fdw.sgml b/doc/src/sgml/postgres-fdw.sgml
index 07aa25799d..3526cdfcfa 100644
--- a/doc/src/sgml/postgres-fdw.sgml
+++ b/doc/src/sgml/postgres-fdw.sgml
@@ -63,9 +63,10 @@
  <para>
   Now you need only <command>SELECT</command> from a foreign table to access
   the data stored in its underlying remote table.  You can also modify
-  the remote table using <command>INSERT</command>, <command>UPDATE</command>, or
-  <command>DELETE</command>.  (Of course, the remote user you have specified
-  in your user mapping must have privileges to do these things.)
+  the remote table using <command>INSERT</command>, <command>UPDATE</command>,
+  <command>DELETE</command>, or <command>TRUNCATE</command>.
+  (Of course, the remote user you have specified in your user mapping must
+  have privileges to do these things.)
  </para>
 
  <para>
@@ -405,6 +406,29 @@ OPTIONS (ADD password_required 'false');
      </listitem>
     </varlistentry>
 
+    <varlistentry>
+     <term><literal>truncatable</literal></term>
+     <listitem>
+      <para>
+       This option controls whether <filename>postgres_fdw</filename> allows
+       foreign tables to be truncated using <command>TRUNCATE</command>
+       command. It can be specified for a foreign table or a foreign server.
+       A table-level option overrides a server-level option.
+       The default is <literal>true</literal>.
+      </para>
+      <para>
+       Pay attention for the case when a foreign table maps remote table
+       that has inherited children or partition leafs.
+       <command>TRUNCATE</command> specifies the foreign tables with
+       <literal>ONLY</literal> clause, remove queries over the
+       <filename>postgres_fdw</filename> also specify remote tables with
+       <literal>ONLY</literal> clause, that will truncate only parent
+       portion of the remote table. In the results, it looks like
+       <command>TRUNCATE</command> command partially eliminated contents
+       of the foreign tables.
+      </para>
+     </listitem>
+    </varlistentry>
    </variablelist>
   </sect3>
 
diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c
index 559fa1d2e5..4d059723a1 100644
--- a/src/backend/commands/tablecmds.c
+++ b/src/backend/commands/tablecmds.c
@@ -57,6 +57,7 @@
 #include "commands/typecmds.h"
 #include "commands/user.h"
 #include "executor/executor.h"
+#include "foreign/fdwapi.h"
 #include "foreign/foreign.h"
 #include "miscadmin.h"
 #include "nodes/makefuncs.h"
@@ -296,6 +297,21 @@ struct DropRelationCallbackState
 #define		ATT_FOREIGN_TABLE		0x0020
 #define		ATT_PARTITIONED_INDEX	0x0040
 
+/*
+ * ForeignTruncateInfo
+ *
+ * Information related to truncation of foreign tables.  This is used for
+ * the elements in a hash table that uses the server OID as lookup key,
+ * and includes a per-server list of all foreign tables involved in the
+ * truncation.
+ */
+typedef struct
+{
+	Oid			server_oid;
+	List	   *frels_list;
+	List	   *frels_extra;
+} ForeignTruncateInfo;
+
 /*
  * Partition tables are expected to be dropped when the parent partitioned
  * table gets dropped. Hence for partitioning we use AUTO dependency.
@@ -1560,6 +1576,7 @@ ExecuteTruncate(TruncateStmt *stmt)
 {
 	List	   *rels = NIL;
 	List	   *relids = NIL;
+	List	   *relids_extra = NIL;
 	List	   *relids_logged = NIL;
 	ListCell   *cell;
 
@@ -1596,6 +1613,7 @@ ExecuteTruncate(TruncateStmt *stmt)
 
 		rels = lappend(rels, rel);
 		relids = lappend_oid(relids, myrelid);
+		relids_extra = lappend_int(relids_extra, (recurse ? 0 : 1));
 		/* Log this relation only if needed for logical decoding */
 		if (RelationIsLogicallyLogged(rel))
 			relids_logged = lappend_oid(relids_logged, myrelid);
@@ -1643,6 +1661,7 @@ ExecuteTruncate(TruncateStmt *stmt)
 
 				rels = lappend(rels, rel);
 				relids = lappend_oid(relids, childrelid);
+				relids_extra = lappend_int(relids_extra, -1);
 				/* Log this relation only if needed for logical decoding */
 				if (RelationIsLogicallyLogged(rel))
 					relids_logged = lappend_oid(relids_logged, childrelid);
@@ -1655,7 +1674,7 @@ ExecuteTruncate(TruncateStmt *stmt)
 					 errhint("Do not specify the ONLY keyword, or use TRUNCATE ONLY on the partitions directly.")));
 	}
 
-	ExecuteTruncateGuts(rels, relids, relids_logged,
+	ExecuteTruncateGuts(rels, relids, relids_extra, relids_logged,
 						stmt->behavior, stmt->restart_seqs);
 
 	/* And close the rels */
@@ -1681,16 +1700,21 @@ ExecuteTruncate(TruncateStmt *stmt)
  * this information handy in this form.
  */
 void
-ExecuteTruncateGuts(List *explicit_rels, List *relids, List *relids_logged,
+ExecuteTruncateGuts(List *explicit_rels,
+					List *relids,
+					List *relids_extra,
+					List *relids_logged,
 					DropBehavior behavior, bool restart_seqs)
 {
 	List	   *rels;
 	List	   *seq_relids = NIL;
+	HTAB	   *ft_htab = NULL;
 	EState	   *estate;
 	ResultRelInfo *resultRelInfos;
 	ResultRelInfo *resultRelInfo;
 	SubTransactionId mySubid;
 	ListCell   *cell;
+	ListCell   *lc1, *lc2;
 	Oid		   *logrelids;
 
 	/*
@@ -1728,6 +1752,7 @@ ExecuteTruncateGuts(List *explicit_rels, List *relids, List *relids_logged,
 				truncate_check_activity(rel);
 				rels = lappend(rels, rel);
 				relids = lappend_oid(relids, relid);
+				relids_extra = lappend_int(relids_extra, -1);
 				/* Log this relation only if needed for logical decoding */
 				if (RelationIsLogicallyLogged(rel))
 					relids_logged = lappend_oid(relids_logged, relid);
@@ -1828,14 +1853,68 @@ ExecuteTruncateGuts(List *explicit_rels, List *relids, List *relids_logged,
 	 */
 	mySubid = GetCurrentSubTransactionId();
 
-	foreach(cell, rels)
+	Assert(list_length(rels) == list_length(relids_extra));
+	forboth (lc1, rels,
+			 lc2, relids_extra)
 	{
-		Relation	rel = (Relation) lfirst(cell);
+		Relation	rel = (Relation) lfirst(lc1);
+		int			extra = lfirst_int(lc2);
 
 		/* Skip partitioned tables as there is nothing to do */
 		if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
 			continue;
 
+		/*
+		 * If truncating a foreign table, the foreign data wrapper callback
+		 * for TRUNCATE is called once for each server with a list of all the
+		 * relations to process linked to this server.  The list of relations
+		 * for each server is saved as a single entry in a hash table that
+		 * uses the server OID as lookup key.  Once the full set of lists is
+		 * built, all the entries of the hash table are scanned, and the list
+		 * of relations associated to the server is passed down to the
+		 * TRUNCATE callback of its foreign data wrapper.
+		 */
+		if (rel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
+		{
+			Oid			frel_oid = RelationGetRelid(rel);
+			Oid			server_oid = GetForeignServerIdByRelId(frel_oid);
+			bool		found;
+			ForeignTruncateInfo *ft_info;
+
+			/* if the hash table does not exist yet, initialize it */
+			if (!ft_htab)
+			{
+				HASHCTL		hctl;
+
+				memset(&hctl, 0, sizeof(HASHCTL));
+				hctl.keysize = sizeof(Oid);
+				hctl.entrysize = sizeof(ForeignTruncateInfo);
+				hctl.hcxt = CurrentMemoryContext;
+
+				ft_htab = hash_create("TRUNCATE for Foreign Tables",
+									  32,	/* start small and extend */
+									  &hctl,
+									  HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
+			}
+
+			/*
+			 * Look after the entry of the server in the hash table, and
+			 * initialize it if the entry does not exist yet.
+			 */
+			ft_info = hash_search(ft_htab, &server_oid, HASH_ENTER, &found);
+			if (!found)
+			{
+				ft_info->server_oid = server_oid;
+				ft_info->frels_list = NIL;
+				ft_info->frels_extra = NIL;
+			}
+
+			/* save the relation in the list */
+			ft_info->frels_list = lappend(ft_info->frels_list, rel);
+			ft_info->frels_extra = lappend_int(ft_info->frels_extra, extra);
+			continue;
+		}
+
 		/*
 		 * Normally, we need a transaction-safe truncation here.  However, if
 		 * the table was either created in the current (sub)transaction or has
@@ -1898,6 +1977,31 @@ ExecuteTruncateGuts(List *explicit_rels, List *relids, List *relids_logged,
 		pgstat_count_truncate(rel);
 	}
 
+	/*
+	 * Now go through the hash table, and process each entry associated to the
+	 * servers involved in the TRUNCATE.
+	 */
+	if (ft_htab)
+	{
+		ForeignTruncateInfo *ft_info;
+		HASH_SEQ_STATUS seq;
+
+		hash_seq_init(&seq, ft_htab);
+
+		while ((ft_info = hash_seq_search(&seq)) != NULL)
+		{
+			FdwRoutine *routine = GetFdwRoutineByServerId(ft_info->server_oid);
+
+			/* truncate_check_rel() has checked that already */
+			Assert(routine->ExecForeignTruncate != NULL);
+
+			routine->ExecForeignTruncate(ft_info->frels_list,
+										 ft_info->frels_extra,
+										 behavior,
+										 restart_seqs);
+		}
+	}
+
 	/*
 	 * Restart owned sequences if we were asked to.
 	 */
@@ -1983,12 +2087,24 @@ truncate_check_rel(Oid relid, Form_pg_class reltuple)
 	char	   *relname = NameStr(reltuple->relname);
 
 	/*
-	 * Only allow truncate on regular tables and partitioned tables (although,
-	 * the latter are only being included here for the following checks; no
-	 * physical truncation will occur in their case.)
+	 * Only allow truncate on regular tables, foreign tables using foreign
+	 * data wrappers supporting TRUNCATE and partitioned tables (although, the
+	 * latter are only being included here for the following checks; no
+	 * physical truncation will occur in their case.).
 	 */
-	if (reltuple->relkind != RELKIND_RELATION &&
-		reltuple->relkind != RELKIND_PARTITIONED_TABLE)
+	if (reltuple->relkind == RELKIND_FOREIGN_TABLE)
+	{
+		Oid			server_id = GetForeignServerIdByRelId(relid);
+		FdwRoutine *fdwroutine = GetFdwRoutineByServerId(server_id);
+
+		if (!fdwroutine->ExecForeignTruncate)
+			ereport(ERROR,
+					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+					 errmsg("cannot truncate foreign table \"%s\"",
+							relname)));
+	}
+	else if (reltuple->relkind != RELKIND_RELATION &&
+			 reltuple->relkind != RELKIND_PARTITIONED_TABLE)
 		ereport(ERROR,
 				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
 				 errmsg("\"%s\" is not a table", relname)));
diff --git a/src/backend/replication/logical/worker.c b/src/backend/replication/logical/worker.c
index 18d05286b6..f7a5407c5a 100644
--- a/src/backend/replication/logical/worker.c
+++ b/src/backend/replication/logical/worker.c
@@ -1795,6 +1795,7 @@ apply_handle_truncate(StringInfo s)
 	List	   *rels = NIL;
 	List	   *part_rels = NIL;
 	List	   *relids = NIL;
+	List	   *relids_extra = NIL;
 	List	   *relids_logged = NIL;
 	ListCell   *lc;
 
@@ -1824,6 +1825,7 @@ apply_handle_truncate(StringInfo s)
 		remote_rels = lappend(remote_rels, rel);
 		rels = lappend(rels, rel->localrel);
 		relids = lappend_oid(relids, rel->localreloid);
+		relids_extra = lappend_int(relids_extra, 0);
 		if (RelationIsLogicallyLogged(rel->localrel))
 			relids_logged = lappend_oid(relids_logged, rel->localreloid);
 
@@ -1874,8 +1876,12 @@ apply_handle_truncate(StringInfo s)
 	 * to replaying changes without further cascading. This might be later
 	 * changeable with a user specified option.
 	 */
-	ExecuteTruncateGuts(rels, relids, relids_logged, DROP_RESTRICT, restart_seqs);
-
+	ExecuteTruncateGuts(rels,
+						relids,
+						relids_extra,
+						relids_logged,
+						DROP_RESTRICT,
+						restart_seqs);
 	foreach(lc, remote_rels)
 	{
 		LogicalRepRelMapEntry *rel = lfirst(lc);
diff --git a/src/include/commands/tablecmds.h b/src/include/commands/tablecmds.h
index b3d30acc35..2e30a09729 100644
--- a/src/include/commands/tablecmds.h
+++ b/src/include/commands/tablecmds.h
@@ -56,8 +56,12 @@ extern void AlterRelationNamespaceInternal(Relation classRel, Oid relOid,
 extern void CheckTableNotInUse(Relation rel, const char *stmt);
 
 extern void ExecuteTruncate(TruncateStmt *stmt);
-extern void ExecuteTruncateGuts(List *explicit_rels, List *relids, List *relids_logged,
-								DropBehavior behavior, bool restart_seqs);
+extern void ExecuteTruncateGuts(List *explicit_rels,
+								List *relids,
+								List *relids_extra,
+								List *relids_logged,
+								DropBehavior behavior,
+								bool restart_seqs);
 
 extern void SetRelationHasSubclass(Oid relationId, bool relhassubclass);
 
diff --git a/src/include/foreign/fdwapi.h b/src/include/foreign/fdwapi.h
index 248f78da45..7488ce1ea6 100644
--- a/src/include/foreign/fdwapi.h
+++ b/src/include/foreign/fdwapi.h
@@ -159,6 +159,11 @@ typedef bool (*AnalyzeForeignTable_function) (Relation relation,
 typedef List *(*ImportForeignSchema_function) (ImportForeignSchemaStmt *stmt,
 											   Oid serverOid);
 
+typedef void (*ExecForeignTruncate_function) (List *frels_list,
+											  List *frels_extra,
+											  DropBehavior behavior,
+											  bool restart_seqs);
+
 typedef Size (*EstimateDSMForeignScan_function) (ForeignScanState *node,
 												 ParallelContext *pcxt);
 typedef void (*InitializeDSMForeignScan_function) (ForeignScanState *node,
@@ -246,6 +251,9 @@ typedef struct FdwRoutine
 	/* Support functions for IMPORT FOREIGN SCHEMA */
 	ImportForeignSchema_function ImportForeignSchema;
 
+	/* Support functions for TRUNCATE */
+	ExecForeignTruncate_function ExecForeignTruncate;
+
 	/* Support functions for parallelism under Gather node */
 	IsForeignScanParallelSafe_function IsForeignScanParallelSafe;
 	EstimateDSMForeignScan_function EstimateDSMForeignScan;
diff --git a/src/test/regress/expected/foreign_data.out b/src/test/regress/expected/foreign_data.out
index b9e25820bc..e2c0bcea51 100644
--- a/src/test/regress/expected/foreign_data.out
+++ b/src/test/regress/expected/foreign_data.out
@@ -1807,9 +1807,9 @@ Inherits: fd_pt1
 
 -- TRUNCATE doesn't work on foreign tables, either directly or recursively
 TRUNCATE ft2;  -- ERROR
-ERROR:  "ft2" is not a table
+ERROR:  foreign-data wrapper "dummy" has no handler
 TRUNCATE fd_pt1;  -- ERROR
-ERROR:  "ft2" is not a table
+ERROR:  foreign-data wrapper "dummy" has no handler
 DROP TABLE fd_pt1 CASCADE;
 NOTICE:  drop cascades to foreign table ft2
 -- IMPORT FOREIGN SCHEMA
@@ -2032,9 +2032,9 @@ ALTER FOREIGN TABLE fd_pt2_1 ADD CONSTRAINT fd_pt2chk1 CHECK (c1 > 0);
 ALTER TABLE fd_pt2 ATTACH PARTITION fd_pt2_1 FOR VALUES IN (1);
 -- TRUNCATE doesn't work on foreign tables, either directly or recursively
 TRUNCATE fd_pt2_1;  -- ERROR
-ERROR:  "fd_pt2_1" is not a table
+ERROR:  foreign-data wrapper "dummy" has no handler
 TRUNCATE fd_pt2;  -- ERROR
-ERROR:  "fd_pt2_1" is not a table
+ERROR:  foreign-data wrapper "dummy" has no handler
 DROP FOREIGN TABLE fd_pt2_1;
 DROP TABLE fd_pt2;
 -- foreign table cannot be part of partition tree made of temporary
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index 8bd95aefa1..1e01d3fd62 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -702,6 +702,7 @@ ForeignScanState
 ForeignServer
 ForeignServerInfo
 ForeignTable
+ForeignTruncateInfo
 ForkNumber
 FormData_pg_aggregate
 FormData_pg_am
#13Amit Langote
amitlangote09@gmail.com
In reply to: Ibrar Ahmed (#12)
Re: TRUNCATE on foreign table

On Tue, Mar 9, 2021 at 2:24 AM Ibrar Ahmed <ibrar.ahmad@gmail.com> wrote:

The patch (pgsql14-truncate-on-foreign-table.v2.patch) does not apply successfully.

http://cfbot.cputube.org/patch_32_2972.log

patching file contrib/postgres_fdw/expected/postgres_fdw.out
Hunk #2 FAILED at 9179.
1 out of 2 hunks FAILED -- saving rejects to file contrib/postgres_fdw/expected/postgres_fdw.out.rej

As this is a minor change therefore I have updated the patch. Please take a look.

Thanks for updating the patch. I was able to apply it successfully
though I notice it doesn't pass make check-world.

Specifically, it fails the src/test/subscription/013_partition.pl
test. The problem seems to be that worker.c: apply_handle_truncate()
hasn't been updated to add entries to relids_extra for partitions
expanded from a partitioned table, like ExecuteTruncate() does. That
leads to relids and relids_extra having different lengths, which trips
the Assert in ExecuteTruncateGuts().

--
Amit Langote
EDB: http://www.enterprisedb.com

#14Kazutaka Onishi
onishi@heterodb.com
In reply to: Amit Langote (#13)
Re: TRUNCATE on foreign table

To Ibrar,
Thank you for updating the patch!

To Amit,
Thank you for checking the patch, and I have confirmed the failure.
Now I'm trying to fix it.

2021年3月9日(火) 11:54 Amit Langote <amitlangote09@gmail.com>:

Show quoted text

On Tue, Mar 9, 2021 at 2:24 AM Ibrar Ahmed <ibrar.ahmad@gmail.com> wrote:

The patch (pgsql14-truncate-on-foreign-table.v2.patch) does not apply

successfully.

http://cfbot.cputube.org/patch_32_2972.log

patching file contrib/postgres_fdw/expected/postgres_fdw.out
Hunk #2 FAILED at 9179.
1 out of 2 hunks FAILED -- saving rejects to file

contrib/postgres_fdw/expected/postgres_fdw.out.rej

As this is a minor change therefore I have updated the patch. Please

take a look.

Thanks for updating the patch. I was able to apply it successfully
though I notice it doesn't pass make check-world.

Specifically, it fails the src/test/subscription/013_partition.pl
test. The problem seems to be that worker.c: apply_handle_truncate()
hasn't been updated to add entries to relids_extra for partitions
expanded from a partitioned table, like ExecuteTruncate() does. That
leads to relids and relids_extra having different lengths, which trips
the Assert in ExecuteTruncateGuts().

--
Amit Langote
EDB: http://www.enterprisedb.com

#15Kazutaka Onishi
onishi@heterodb.com
In reply to: Kazutaka Onishi (#14)
1 attachment(s)
Re: TRUNCATE on foreign table

I have fixed the patch to pass check-world test. :D

2021年3月13日(土) 12:35 Kazutaka Onishi <onishi@heterodb.com>:

Show quoted text

To Ibrar,
Thank you for updating the patch!

To Amit,
Thank you for checking the patch, and I have confirmed the failure.
Now I'm trying to fix it.

2021年3月9日(火) 11:54 Amit Langote <amitlangote09@gmail.com>:

On Tue, Mar 9, 2021 at 2:24 AM Ibrar Ahmed <ibrar.ahmad@gmail.com> wrote:

The patch (pgsql14-truncate-on-foreign-table.v2.patch) does not apply

successfully.

http://cfbot.cputube.org/patch_32_2972.log

patching file contrib/postgres_fdw/expected/postgres_fdw.out
Hunk #2 FAILED at 9179.
1 out of 2 hunks FAILED -- saving rejects to file

contrib/postgres_fdw/expected/postgres_fdw.out.rej

As this is a minor change therefore I have updated the patch. Please

take a look.

Thanks for updating the patch. I was able to apply it successfully
though I notice it doesn't pass make check-world.

Specifically, it fails the src/test/subscription/013_partition.pl
test. The problem seems to be that worker.c: apply_handle_truncate()
hasn't been updated to add entries to relids_extra for partitions
expanded from a partitioned table, like ExecuteTruncate() does. That
leads to relids and relids_extra having different lengths, which trips
the Assert in ExecuteTruncateGuts().

--
Amit Langote
EDB: http://www.enterprisedb.com

Attachments:

pgsql14-truncate-on-foreign-table.v4.patchapplication/octet-stream; name=pgsql14-truncate-on-foreign-table.v4.patchDownload
diff --git a/contrib/postgres_fdw/deparse.c b/contrib/postgres_fdw/deparse.c
index 6faf499f9a..a9ce323a67 100644
--- a/contrib/postgres_fdw/deparse.c
+++ b/contrib/postgres_fdw/deparse.c
@@ -2171,6 +2171,44 @@ deparseAnalyzeSql(StringInfo buf, Relation rel, List **retrieved_attrs)
 	deparseRelation(buf, rel);
 }
 
+/*
+ * Construct a simple "TRUNCATE rel" statement
+ */
+void
+deparseTruncateSql(StringInfo buf,
+				   List *frels_list,
+				   List *frels_extra,
+				   DropBehavior behavior,
+				   bool restart_seqs)
+{
+	ListCell   *lc1, *lc2;
+
+	appendStringInfoString(buf, "TRUNCATE ");
+	forboth (lc1, frels_list,
+			 lc2, frels_extra)
+	{
+		Relation	frel = lfirst(lc1);
+		int			extra = lfirst_int(lc2);
+
+		if (lc1 != list_head(frels_list))
+			appendStringInfoString(buf, ", ");
+		if (extra != 0)
+			appendStringInfoString(buf, "ONLY ");
+		deparseRelation(buf, frel);
+	}
+	appendStringInfo(buf, " %s IDENTITY",
+					 restart_seqs ? "RESTART" : "CONTINUE");
+	switch (behavior)
+	{
+		case DROP_RESTRICT:
+			appendStringInfoString(buf, " RESTRICT");
+			break;
+		case DROP_CASCADE:
+			appendStringInfoString(buf, " CASCADE");
+			break;
+	}
+}
+
 /*
  * Construct name to use for given column, and emit it into buf.
  * If it has a column_name FDW option, use that instead of attribute name.
diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out
index 0649b6b81c..ba2f0f4e80 100644
--- a/contrib/postgres_fdw/expected/postgres_fdw.out
+++ b/contrib/postgres_fdw/expected/postgres_fdw.out
@@ -8247,6 +8247,239 @@ select * from rem3;
 
 drop foreign table rem3;
 drop table loc3;
+-- ===================================================================
+-- test for TRUNCATE
+-- ===================================================================
+CREATE TABLE tru_rtable0 (id int primary key, x text);
+CREATE TABLE tru_rtable1 (id int primary key, y text);
+CREATE FOREIGN TABLE tru_ftable (id int, x text)
+       SERVER loopback OPTIONS (table_name 'tru_rtable0');
+INSERT INTO tru_rtable0 (SELECT x,md5(x::text) FROM generate_series(1,10) x);
+CREATE TABLE tru_ptable (id int, y text) PARTITION BY HASH(id);
+CREATE TABLE tru_ptable__p0 PARTITION OF tru_ptable
+                            FOR VALUES WITH (MODULUS 2, REMAINDER 0);
+CREATE FOREIGN TABLE tru_ftable__p1 PARTITION OF tru_ptable
+                                    FOR VALUES WITH (MODULUS 2, REMAINDER 1)
+       SERVER loopback OPTIONS (table_name 'tru_rtable1');
+INSERT INTO tru_ptable (SELECT x,md5(x::text) FROM generate_series(11,20) x);
+CREATE TABLE tru_pk_table(id int primary key, z text);
+CREATE TABLE tru_fk_table(fkey int references tru_pk_table(id));
+INSERT INTO tru_pk_table (SELECT x,md5((x+1)::text) FROM generate_series(1,10) x);
+INSERT INTO tru_fk_table (SELECT x % 10 + 1 FROM generate_series(5,25) x);
+CREATE FOREIGN TABLE tru_pk_ftable (id int, z text)
+       SERVER loopback OPTIONS (table_name 'tru_pk_table');
+CREATE TABLE tru_rtable_parent (id int, a text);
+CREATE TABLE tru_rtable_child (id int, a text);
+CREATE FOREIGN TABLE tru_ftable_parent (id int, a text)
+       SERVER loopback OPTIONS (table_name 'tru_rtable_parent');
+CREATE FOREIGN TABLE tru_ftable_child () INHERITS (tru_ftable_parent)
+       SERVER loopback OPTIONS (table_name 'tru_rtable_child');
+INSERT INTO tru_rtable_parent (SELECT x, md5(x::text) FROM generate_series(1,8) x);
+INSERT INTO tru_rtable_child  (SELECT x, md5(x::text) FROM generate_series(10, 18) x);
+-- normal truncate
+SELECT * FROM tru_ftable;
+ id |                x                 
+----+----------------------------------
+  1 | c4ca4238a0b923820dcc509a6f75849b
+  2 | c81e728d9d4c2f636f067f89cc14862c
+  3 | eccbc87e4b5ce2fe28308fd9f2a7baf3
+  4 | a87ff679a2f3e71d9181a67b7542122c
+  5 | e4da3b7fbbce2345d7772b0674a318d5
+  6 | 1679091c5a880faf6fb5e6087eb1b2dc
+  7 | 8f14e45fceea167a5a36dedd4bea2543
+  8 | c9f0f895fb98ab9159f51fd0297e236d
+  9 | 45c48cce2e2d7fbdea1afc51c7c6ad26
+ 10 | d3d9446802a44259755d38e6d163e820
+(10 rows)
+
+TRUNCATE tru_ftable;
+SELECT * FROM tru_rtable0;		-- empty
+ id | x 
+----+---
+(0 rows)
+
+SELECT * FROM tru_ftable;		-- empty
+ id | x 
+----+---
+(0 rows)
+
+-- 'truncatable' option
+ALTER SERVER loopback OPTIONS (ADD truncatable 'false');
+TRUNCATE tru_ftable;			-- error
+ERROR:  truncate on "tru_ftable" is prohibited
+ALTER SERVER loopback OPTIONS (DROP truncatable);
+ALTER FOREIGN TABLE tru_ftable OPTIONS (ADD truncatable 'false');
+TRUNCATE tru_ftable;			-- error
+ERROR:  truncate on "tru_ftable" is prohibited
+ALTER FOREIGN TABLE tru_ftable OPTIONS (SET truncatable 'true');
+TRUNCATE tru_ftable;			-- accepted
+-- partition table mixtured by table and foreign table
+SELECT * FROM tru_ptable;
+ id |                y                 
+----+----------------------------------
+ 12 | c20ad4d76fe97759aa27a0c99bff6710
+ 13 | c51ce410c124a10e0db5e4b97fc2af39
+ 14 | aab3238922bcc25a6f606eb525ffdc56
+ 16 | c74d97b01eae257e44aa9d5bade97baf
+ 17 | 70efdf2ec9b086079795c442636b55fb
+ 18 | 6f4922f45568161a8cdf4ad2299f6d23
+ 11 | 6512bd43d9caa6e02c990b0a82652dca
+ 15 | 9bf31c7ff062936a96d3c8bd1f8f2ff3
+ 19 | 1f0e3dad99908345f7439f8ffabdffc4
+ 20 | 98f13708210194c475687be6106a3b84
+(10 rows)
+
+TRUNCATE tru_ptable;
+SELECT * FROM tru_ptable;		-- empty
+ id | y 
+----+---
+(0 rows)
+
+SELECT * FROM tru_ptable__p0;	-- empty
+ id | y 
+----+---
+(0 rows)
+
+SELECT * FROM tru_ftable__p1;	-- empty
+ id | y 
+----+---
+(0 rows)
+
+SELECT * FROM tru_rtable1;		-- empty
+ id | y 
+----+---
+(0 rows)
+
+-- 'CASCADE' option
+SELECT * FROM tru_pk_ftable;
+ id |                z                 
+----+----------------------------------
+  1 | c81e728d9d4c2f636f067f89cc14862c
+  2 | eccbc87e4b5ce2fe28308fd9f2a7baf3
+  3 | a87ff679a2f3e71d9181a67b7542122c
+  4 | e4da3b7fbbce2345d7772b0674a318d5
+  5 | 1679091c5a880faf6fb5e6087eb1b2dc
+  6 | 8f14e45fceea167a5a36dedd4bea2543
+  7 | c9f0f895fb98ab9159f51fd0297e236d
+  8 | 45c48cce2e2d7fbdea1afc51c7c6ad26
+  9 | d3d9446802a44259755d38e6d163e820
+ 10 | 6512bd43d9caa6e02c990b0a82652dca
+(10 rows)
+
+TRUNCATE tru_pk_ftable;	-- failed by FK reference
+ERROR:  cannot truncate a table referenced in a foreign key constraint
+DETAIL:  Table "tru_fk_table" references "tru_pk_table".
+HINT:  Truncate table "tru_fk_table" at the same time, or use TRUNCATE ... CASCADE.
+CONTEXT:  remote SQL command: TRUNCATE public.tru_pk_table CONTINUE IDENTITY RESTRICT
+TRUNCATE tru_pk_ftable CASCADE;
+SELECT * FROM tru_pk_ftable;
+ id | z 
+----+---
+(0 rows)
+
+SELECT * FROM tru_fk_table;		-- also truncated
+ fkey 
+------
+(0 rows)
+
+-- truncate two tables at a command
+INSERT INTO tru_ftable (SELECT x,md5((x+2)::text) FROM generate_series(1,8) x);
+INSERT INTO tru_pk_ftable (SELECT x,md5((x+3)::text) FROM generate_series(3,10) x);
+SELECT * FROM tru_ftable a FULL OUTER JOIN tru_pk_ftable b ON a.id = b.id;
+ id |                x                 | id |                z                 
+----+----------------------------------+----+----------------------------------
+  1 | eccbc87e4b5ce2fe28308fd9f2a7baf3 |    | 
+  2 | a87ff679a2f3e71d9181a67b7542122c |    | 
+  3 | e4da3b7fbbce2345d7772b0674a318d5 |  3 | 1679091c5a880faf6fb5e6087eb1b2dc
+  4 | 1679091c5a880faf6fb5e6087eb1b2dc |  4 | 8f14e45fceea167a5a36dedd4bea2543
+  5 | 8f14e45fceea167a5a36dedd4bea2543 |  5 | c9f0f895fb98ab9159f51fd0297e236d
+  6 | c9f0f895fb98ab9159f51fd0297e236d |  6 | 45c48cce2e2d7fbdea1afc51c7c6ad26
+  7 | 45c48cce2e2d7fbdea1afc51c7c6ad26 |  7 | d3d9446802a44259755d38e6d163e820
+  8 | d3d9446802a44259755d38e6d163e820 |  8 | 6512bd43d9caa6e02c990b0a82652dca
+    |                                  |  9 | c20ad4d76fe97759aa27a0c99bff6710
+    |                                  | 10 | c51ce410c124a10e0db5e4b97fc2af39
+(10 rows)
+
+TRUNCATE tru_ftable, tru_pk_ftable CASCADE;
+SELECT * FROM tru_ftable a FULL OUTER JOIN tru_pk_ftable b ON a.id = b.id;
+ id | x | id | z 
+----+---+----+---
+(0 rows)
+
+-- truncate with ONLY clause
+TRUNCATE ONLY tru_ftable_parent;
+SELECT * FROM tru_ftable_parent;
+ id |                a                 
+----+----------------------------------
+ 10 | d3d9446802a44259755d38e6d163e820
+ 11 | 6512bd43d9caa6e02c990b0a82652dca
+ 12 | c20ad4d76fe97759aa27a0c99bff6710
+ 13 | c51ce410c124a10e0db5e4b97fc2af39
+ 14 | aab3238922bcc25a6f606eb525ffdc56
+ 15 | 9bf31c7ff062936a96d3c8bd1f8f2ff3
+ 16 | c74d97b01eae257e44aa9d5bade97baf
+ 17 | 70efdf2ec9b086079795c442636b55fb
+ 18 | 6f4922f45568161a8cdf4ad2299f6d23
+(9 rows)
+
+TRUNCATE tru_ftable_parent;
+SELECT * FROM tru_ftable_parent;
+ id | a 
+----+---
+(0 rows)
+
+-- in case when remote table has inherited children
+CREATE TABLE tru_rtable0_child () INHERITS (tru_rtable0);
+INSERT INTO tru_rtable0 (SELECT x,md5(x::text) FROM generate_series(5,9) x);
+INSERT INTO tru_rtable0_child (SELECT x,md5(x::text) FROM generate_series(10,14) x);
+SELECT * FROM tru_ftable;
+ id |                x                 
+----+----------------------------------
+  5 | e4da3b7fbbce2345d7772b0674a318d5
+  6 | 1679091c5a880faf6fb5e6087eb1b2dc
+  7 | 8f14e45fceea167a5a36dedd4bea2543
+  8 | c9f0f895fb98ab9159f51fd0297e236d
+  9 | 45c48cce2e2d7fbdea1afc51c7c6ad26
+ 10 | d3d9446802a44259755d38e6d163e820
+ 11 | 6512bd43d9caa6e02c990b0a82652dca
+ 12 | c20ad4d76fe97759aa27a0c99bff6710
+ 13 | c51ce410c124a10e0db5e4b97fc2af39
+ 14 | aab3238922bcc25a6f606eb525ffdc56
+(10 rows)
+
+TRUNCATE ONLY tru_ftable;		-- truncate only parent portion
+SELECT * FROM tru_ftable;
+ id |                x                 
+----+----------------------------------
+ 10 | d3d9446802a44259755d38e6d163e820
+ 11 | 6512bd43d9caa6e02c990b0a82652dca
+ 12 | c20ad4d76fe97759aa27a0c99bff6710
+ 13 | c51ce410c124a10e0db5e4b97fc2af39
+ 14 | aab3238922bcc25a6f606eb525ffdc56
+(5 rows)
+
+INSERT INTO tru_rtable0 (SELECT x,md5(x::text) FROM generate_series(21,25) x);
+SELECT * FROM tru_ftable;
+ id |                x                 
+----+----------------------------------
+ 21 | 3c59dc048e8850243be8079a5c74d079
+ 22 | b6d767d2f8ed5d21a44b0e5886680cb9
+ 23 | 37693cfc748049e45d87b8c7d8b9aacd
+ 24 | 1ff1de774005f8da13f42943881c655f
+ 25 | 8e296a067a37563370ded05f5a3bf3ec
+ 10 | d3d9446802a44259755d38e6d163e820
+ 11 | 6512bd43d9caa6e02c990b0a82652dca
+ 12 | c20ad4d76fe97759aa27a0c99bff6710
+ 13 | c51ce410c124a10e0db5e4b97fc2af39
+ 14 | aab3238922bcc25a6f606eb525ffdc56
+(10 rows)
+
+TRUNCATE tru_ftable;			-- truncate both of parent and child
+SELECT * FROM tru_ftable;
+ id | x 
+----+---
+(0 rows)
+
 -- ===================================================================
 -- test IMPORT FOREIGN SCHEMA
 -- ===================================================================
@@ -8946,7 +9179,7 @@ DO $d$
     END;
 $d$;
 ERROR:  invalid option "password"
-HINT:  Valid options in this context are: service, passfile, channel_binding, connect_timeout, dbname, host, hostaddr, port, options, application_name, keepalives, keepalives_idle, keepalives_interval, keepalives_count, tcp_user_timeout, sslmode, sslcompression, sslcert, sslkey, sslrootcert, sslcrl, sslcrldir, requirepeer, ssl_min_protocol_version, ssl_max_protocol_version, gssencmode, krbsrvname, gsslib, target_session_attrs, use_remote_estimate, fdw_startup_cost, fdw_tuple_cost, extensions, updatable, fetch_size, batch_size
+HINT:  Valid options in this context are: service, passfile, channel_binding, connect_timeout, dbname, host, hostaddr, port, options, application_name, keepalives, keepalives_idle, keepalives_interval, keepalives_count, tcp_user_timeout, sslmode, sslcompression, sslcert, sslkey, sslrootcert, sslcrl, sslcrldir, requirepeer, ssl_min_protocol_version, ssl_max_protocol_version, gssencmode, krbsrvname, gsslib, target_session_attrs, use_remote_estimate, fdw_startup_cost, fdw_tuple_cost, extensions, updatable, truncatable, fetch_size, batch_size
 CONTEXT:  SQL statement "ALTER SERVER loopback_nopw OPTIONS (ADD password 'dummypw')"
 PL/pgSQL function inline_code_block line 3 at EXECUTE
 -- If we add a password for our user mapping instead, we should get a different
diff --git a/contrib/postgres_fdw/option.c b/contrib/postgres_fdw/option.c
index 64698c4da3..668178524d 100644
--- a/contrib/postgres_fdw/option.c
+++ b/contrib/postgres_fdw/option.c
@@ -107,7 +107,8 @@ postgres_fdw_validator(PG_FUNCTION_ARGS)
 		 * Validate option value, when we can do so without any context.
 		 */
 		if (strcmp(def->defname, "use_remote_estimate") == 0 ||
-			strcmp(def->defname, "updatable") == 0)
+			strcmp(def->defname, "updatable") == 0 ||
+			strcmp(def->defname, "truncatable") == 0)
 		{
 			/* these accept only boolean values */
 			(void) defGetBoolean(def);
@@ -211,6 +212,9 @@ InitPgFdwOptions(void)
 		/* updatable is available on both server and table */
 		{"updatable", ForeignServerRelationId, false},
 		{"updatable", ForeignTableRelationId, false},
+		/* truncatable is available on both server and table */
+		{"truncatable", ForeignServerRelationId, false},
+		{"truncatable", ForeignTableRelationId, false},
 		/* fetch_size is available on both server and table */
 		{"fetch_size", ForeignServerRelationId, false},
 		{"fetch_size", ForeignTableRelationId, false},
diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c
index 35b48575c5..6f94c6830d 100644
--- a/contrib/postgres_fdw/postgres_fdw.c
+++ b/contrib/postgres_fdw/postgres_fdw.c
@@ -390,6 +390,10 @@ static void postgresExplainForeignModify(ModifyTableState *mtstate,
 										 ExplainState *es);
 static void postgresExplainDirectModify(ForeignScanState *node,
 										ExplainState *es);
+static void postgresExecForeignTruncate(List *frels_list,
+										List *frels_extra,
+										DropBehavior behavior,
+										bool restart_seqs);
 static bool postgresAnalyzeForeignTable(Relation relation,
 										AcquireSampleRowsFunc *func,
 										BlockNumber *totalpages);
@@ -571,6 +575,9 @@ postgres_fdw_handler(PG_FUNCTION_ARGS)
 	routine->ExplainForeignModify = postgresExplainForeignModify;
 	routine->ExplainDirectModify = postgresExplainDirectModify;
 
+	/* Support function for TRUNCATE */
+	routine->ExecForeignTruncate = postgresExecForeignTruncate;
+
 	/* Support functions for ANALYZE */
 	routine->AnalyzeForeignTable = postgresAnalyzeForeignTable;
 
@@ -2791,6 +2798,90 @@ postgresExplainDirectModify(ForeignScanState *node, ExplainState *es)
 	}
 }
 
+/*
+ * postgresExecForeignTruncate
+ *		It propagates TRUNCATE command to the remote host inside of the
+ *		transaction block.
+ */
+static void
+postgresExecForeignTruncate(List *frels_list,
+							List *frels_extra,
+							DropBehavior behavior,
+							bool restart_seqs)
+{
+	Oid				server_id = InvalidOid;
+	ForeignServer  *serv = NULL;
+	UserMapping	   *user = NULL;
+	PGconn		   *conn = NULL;
+	PGresult	   *res;
+	StringInfoData	sql;
+	ListCell	   *lc;
+	bool			server_truncatable = true;
+
+	/* pick up remote connection, and sanity checks */
+	foreach (lc, frels_list)
+	{
+		Relation	frel = lfirst(lc);
+		Oid			frel_oid = RelationGetRelid(frel);
+		ForeignTable *ft = GetForeignTable(frel_oid);
+		ListCell   *cell;
+		bool		truncatable;
+
+		if (!OidIsValid(server_id))
+		{
+			server_id = GetForeignServerIdByRelId(frel_oid);
+			serv = GetForeignServer(server_id);
+			user = GetUserMapping(GetUserId(), server_id);
+			conn = GetConnection(user, false);
+			
+			foreach (cell, serv->options)
+			{
+				DefElem	   *defel = (DefElem *) lfirst(cell);
+
+				if (strcmp(defel->defname, "truncatable") == 0)
+				{
+					server_truncatable = defGetBoolean(defel);
+					break;
+				}
+			}
+		}
+		else
+		{
+			/* postgresExecForeignTruncate() is invoked for each server */
+			Assert(server_id == GetForeignServerIdByRelId(frel_oid));
+		}
+
+		/* ensure the target foreign table is truncatable */
+		truncatable = server_truncatable;
+		foreach (cell, ft->options)
+		{
+			DefElem	   *defel = (DefElem *) lfirst(cell);
+
+			if (strcmp(defel->defname, "truncatable") == 0)
+			{
+				truncatable = defGetBoolean(defel);
+				break;
+			}
+		}
+		if (!truncatable)
+			ereport(ERROR,
+					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+					 errmsg("truncate on \"%s\" is prohibited",
+							RelationGetRelationName(frel))));
+	}
+	/* set up remote query */
+	initStringInfo(&sql);
+	deparseTruncateSql(&sql, frels_list, frels_extra, behavior, restart_seqs);
+	/* run remote query */
+	if (!PQsendQuery(conn, sql.data))
+		pgfdw_report_error(ERROR, NULL, conn, false, sql.data);
+	res = pgfdw_get_result(conn, sql.data);
+	if (PQresultStatus(res) != PGRES_COMMAND_OK)
+		pgfdw_report_error(ERROR, res, conn, true, sql.data);
+	/* clean-up */
+	PQclear(res);
+	pfree(sql.data);
+}
 
 /*
  * estimate_path_cost_size
diff --git a/contrib/postgres_fdw/postgres_fdw.h b/contrib/postgres_fdw/postgres_fdw.h
index 1f67b4d9fd..c89af59973 100644
--- a/contrib/postgres_fdw/postgres_fdw.h
+++ b/contrib/postgres_fdw/postgres_fdw.h
@@ -193,6 +193,11 @@ extern void deparseDirectDeleteSql(StringInfo buf, PlannerInfo *root,
 extern void deparseAnalyzeSizeSql(StringInfo buf, Relation rel);
 extern void deparseAnalyzeSql(StringInfo buf, Relation rel,
 							  List **retrieved_attrs);
+extern void deparseTruncateSql(StringInfo buf,
+							   List *frels_list,
+							   List *frels_extra,
+							   DropBehavior behavior,
+							   bool restart_seqs);
 extern void deparseStringLiteral(StringInfo buf, const char *val);
 extern Expr *find_em_expr_for_rel(EquivalenceClass *ec, RelOptInfo *rel);
 extern Expr *find_em_expr_for_input_target(PlannerInfo *root,
diff --git a/contrib/postgres_fdw/sql/postgres_fdw.sql b/contrib/postgres_fdw/sql/postgres_fdw.sql
index 2b525ea44a..106192ff51 100644
--- a/contrib/postgres_fdw/sql/postgres_fdw.sql
+++ b/contrib/postgres_fdw/sql/postgres_fdw.sql
@@ -2349,6 +2349,96 @@ select * from rem3;
 drop foreign table rem3;
 drop table loc3;
 
+-- ===================================================================
+-- test for TRUNCATE
+-- ===================================================================
+CREATE TABLE tru_rtable0 (id int primary key, x text);
+CREATE TABLE tru_rtable1 (id int primary key, y text);
+CREATE FOREIGN TABLE tru_ftable (id int, x text)
+       SERVER loopback OPTIONS (table_name 'tru_rtable0');
+INSERT INTO tru_rtable0 (SELECT x,md5(x::text) FROM generate_series(1,10) x);
+
+CREATE TABLE tru_ptable (id int, y text) PARTITION BY HASH(id);
+CREATE TABLE tru_ptable__p0 PARTITION OF tru_ptable
+                            FOR VALUES WITH (MODULUS 2, REMAINDER 0);
+CREATE FOREIGN TABLE tru_ftable__p1 PARTITION OF tru_ptable
+                                    FOR VALUES WITH (MODULUS 2, REMAINDER 1)
+       SERVER loopback OPTIONS (table_name 'tru_rtable1');
+INSERT INTO tru_ptable (SELECT x,md5(x::text) FROM generate_series(11,20) x);
+
+CREATE TABLE tru_pk_table(id int primary key, z text);
+CREATE TABLE tru_fk_table(fkey int references tru_pk_table(id));
+INSERT INTO tru_pk_table (SELECT x,md5((x+1)::text) FROM generate_series(1,10) x);
+INSERT INTO tru_fk_table (SELECT x % 10 + 1 FROM generate_series(5,25) x);
+CREATE FOREIGN TABLE tru_pk_ftable (id int, z text)
+       SERVER loopback OPTIONS (table_name 'tru_pk_table');
+
+CREATE TABLE tru_rtable_parent (id int, a text);
+CREATE TABLE tru_rtable_child (id int, a text);
+CREATE FOREIGN TABLE tru_ftable_parent (id int, a text)
+       SERVER loopback OPTIONS (table_name 'tru_rtable_parent');
+CREATE FOREIGN TABLE tru_ftable_child () INHERITS (tru_ftable_parent)
+       SERVER loopback OPTIONS (table_name 'tru_rtable_child');
+INSERT INTO tru_rtable_parent (SELECT x, md5(x::text) FROM generate_series(1,8) x);
+INSERT INTO tru_rtable_child  (SELECT x, md5(x::text) FROM generate_series(10, 18) x);
+
+-- normal truncate
+SELECT * FROM tru_ftable;
+TRUNCATE tru_ftable;
+SELECT * FROM tru_rtable0;		-- empty
+SELECT * FROM tru_ftable;		-- empty
+
+-- 'truncatable' option
+ALTER SERVER loopback OPTIONS (ADD truncatable 'false');
+TRUNCATE tru_ftable;			-- error
+ALTER SERVER loopback OPTIONS (DROP truncatable);
+ALTER FOREIGN TABLE tru_ftable OPTIONS (ADD truncatable 'false');
+TRUNCATE tru_ftable;			-- error
+ALTER FOREIGN TABLE tru_ftable OPTIONS (SET truncatable 'true');
+TRUNCATE tru_ftable;			-- accepted
+
+-- partition table mixtured by table and foreign table
+SELECT * FROM tru_ptable;
+TRUNCATE tru_ptable;
+SELECT * FROM tru_ptable;		-- empty
+SELECT * FROM tru_ptable__p0;	-- empty
+SELECT * FROM tru_ftable__p1;	-- empty
+SELECT * FROM tru_rtable1;		-- empty
+
+-- 'CASCADE' option
+SELECT * FROM tru_pk_ftable;
+TRUNCATE tru_pk_ftable;	-- failed by FK reference
+TRUNCATE tru_pk_ftable CASCADE;
+SELECT * FROM tru_pk_ftable;
+SELECT * FROM tru_fk_table;		-- also truncated
+
+-- truncate two tables at a command
+INSERT INTO tru_ftable (SELECT x,md5((x+2)::text) FROM generate_series(1,8) x);
+INSERT INTO tru_pk_ftable (SELECT x,md5((x+3)::text) FROM generate_series(3,10) x);
+SELECT * FROM tru_ftable a FULL OUTER JOIN tru_pk_ftable b ON a.id = b.id;
+TRUNCATE tru_ftable, tru_pk_ftable CASCADE;
+SELECT * FROM tru_ftable a FULL OUTER JOIN tru_pk_ftable b ON a.id = b.id;
+
+-- truncate with ONLY clause
+TRUNCATE ONLY tru_ftable_parent;
+SELECT * FROM tru_ftable_parent;
+TRUNCATE tru_ftable_parent;
+SELECT * FROM tru_ftable_parent;
+
+-- in case when remote table has inherited children
+CREATE TABLE tru_rtable0_child () INHERITS (tru_rtable0);
+INSERT INTO tru_rtable0 (SELECT x,md5(x::text) FROM generate_series(5,9) x);
+INSERT INTO tru_rtable0_child (SELECT x,md5(x::text) FROM generate_series(10,14) x);
+SELECT * FROM tru_ftable;
+
+TRUNCATE ONLY tru_ftable;		-- truncate only parent portion
+SELECT * FROM tru_ftable;
+
+INSERT INTO tru_rtable0 (SELECT x,md5(x::text) FROM generate_series(21,25) x);
+SELECT * FROM tru_ftable;
+TRUNCATE tru_ftable;			-- truncate both of parent and child
+SELECT * FROM tru_ftable;
+
 -- ===================================================================
 -- test IMPORT FOREIGN SCHEMA
 -- ===================================================================
diff --git a/doc/src/sgml/fdwhandler.sgml b/doc/src/sgml/fdwhandler.sgml
index 04bc052ee8..1315cb6b0f 100644
--- a/doc/src/sgml/fdwhandler.sgml
+++ b/doc/src/sgml/fdwhandler.sgml
@@ -1056,6 +1056,46 @@ EndDirectModify(ForeignScanState *node);
 
    </sect2>
 
+   <sect2 id="fdw-callbacks-truncate">
+    <title>FDW Routines for Truncate</title>
+<programlisting>
+void
+ExecForeignTruncate(List *frels_list, List *frels_extra,
+                    DropBehavior behavior, bool restart_seqs);
+</programlisting>
+    <para>
+     Truncate a set of foreign tables defined by
+     <literal>frels_list</literal> belonging to the same foreign server.
+     This optional function is called during execution of
+     <command>TRUNCATE</command> for each foreign server being involved
+     in one <command>TRUNCATE</command> command (note that invocations
+     are not per foreign table).
+
+     <literal>frels_extra</literal> is same length with
+     <literal>frels_list</literal>, that delivers extra information of
+     the context where the foreign-tables are truncated.
+    </para>
+
+    <para>
+     If the <function>ExecForeignTruncate</function> pointer is set to
+     <literal>NULL</literal>, attempts to truncate the foreign table will
+     fail with an error message.
+    </para>
+    
+    <para>
+     <literal>behavior</literal> defines how foreign tables should
+     be truncated, using as possible values <literal>DROP_RESTRICT</literal>
+     and <literal>DROP_CASCADE</literal> (to map with the equivalents of
+     <command>TRUNCATE</command>).
+    </para>
+    
+    <para>
+     <literal>restart_seqs</literal> is set to <literal>true</literal>
+     if <literal>RESTART IDENTITY</literal> was supplied in the
+     <command>TRUNCATE</command>.
+    </para>
+   </sect2>
+
    <sect2 id="fdw-callbacks-row-locking">
     <title>FDW Routines for Row Locking</title>
 
diff --git a/doc/src/sgml/postgres-fdw.sgml b/doc/src/sgml/postgres-fdw.sgml
index 07aa25799d..3526cdfcfa 100644
--- a/doc/src/sgml/postgres-fdw.sgml
+++ b/doc/src/sgml/postgres-fdw.sgml
@@ -63,9 +63,10 @@
  <para>
   Now you need only <command>SELECT</command> from a foreign table to access
   the data stored in its underlying remote table.  You can also modify
-  the remote table using <command>INSERT</command>, <command>UPDATE</command>, or
-  <command>DELETE</command>.  (Of course, the remote user you have specified
-  in your user mapping must have privileges to do these things.)
+  the remote table using <command>INSERT</command>, <command>UPDATE</command>,
+  <command>DELETE</command>, or <command>TRUNCATE</command>.
+  (Of course, the remote user you have specified in your user mapping must
+  have privileges to do these things.)
  </para>
 
  <para>
@@ -405,6 +406,29 @@ OPTIONS (ADD password_required 'false');
      </listitem>
     </varlistentry>
 
+    <varlistentry>
+     <term><literal>truncatable</literal></term>
+     <listitem>
+      <para>
+       This option controls whether <filename>postgres_fdw</filename> allows
+       foreign tables to be truncated using <command>TRUNCATE</command>
+       command. It can be specified for a foreign table or a foreign server.
+       A table-level option overrides a server-level option.
+       The default is <literal>true</literal>.
+      </para>
+      <para>
+       Pay attention for the case when a foreign table maps remote table
+       that has inherited children or partition leafs.
+       <command>TRUNCATE</command> specifies the foreign tables with
+       <literal>ONLY</literal> clause, remove queries over the
+       <filename>postgres_fdw</filename> also specify remote tables with
+       <literal>ONLY</literal> clause, that will truncate only parent
+       portion of the remote table. In the results, it looks like
+       <command>TRUNCATE</command> command partially eliminated contents
+       of the foreign tables.
+      </para>
+     </listitem>
+    </varlistentry>
    </variablelist>
   </sect3>
 
diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c
index 559fa1d2e5..4d059723a1 100644
--- a/src/backend/commands/tablecmds.c
+++ b/src/backend/commands/tablecmds.c
@@ -57,6 +57,7 @@
 #include "commands/typecmds.h"
 #include "commands/user.h"
 #include "executor/executor.h"
+#include "foreign/fdwapi.h"
 #include "foreign/foreign.h"
 #include "miscadmin.h"
 #include "nodes/makefuncs.h"
@@ -296,6 +297,21 @@ struct DropRelationCallbackState
 #define		ATT_FOREIGN_TABLE		0x0020
 #define		ATT_PARTITIONED_INDEX	0x0040
 
+/*
+ * ForeignTruncateInfo
+ *
+ * Information related to truncation of foreign tables.  This is used for
+ * the elements in a hash table that uses the server OID as lookup key,
+ * and includes a per-server list of all foreign tables involved in the
+ * truncation.
+ */
+typedef struct
+{
+	Oid			server_oid;
+	List	   *frels_list;
+	List	   *frels_extra;
+} ForeignTruncateInfo;
+
 /*
  * Partition tables are expected to be dropped when the parent partitioned
  * table gets dropped. Hence for partitioning we use AUTO dependency.
@@ -1560,6 +1576,7 @@ ExecuteTruncate(TruncateStmt *stmt)
 {
 	List	   *rels = NIL;
 	List	   *relids = NIL;
+	List	   *relids_extra = NIL;
 	List	   *relids_logged = NIL;
 	ListCell   *cell;
 
@@ -1596,6 +1613,7 @@ ExecuteTruncate(TruncateStmt *stmt)
 
 		rels = lappend(rels, rel);
 		relids = lappend_oid(relids, myrelid);
+		relids_extra = lappend_int(relids_extra, (recurse ? 0 : 1));
 		/* Log this relation only if needed for logical decoding */
 		if (RelationIsLogicallyLogged(rel))
 			relids_logged = lappend_oid(relids_logged, myrelid);
@@ -1643,6 +1661,7 @@ ExecuteTruncate(TruncateStmt *stmt)
 
 				rels = lappend(rels, rel);
 				relids = lappend_oid(relids, childrelid);
+				relids_extra = lappend_int(relids_extra, -1);
 				/* Log this relation only if needed for logical decoding */
 				if (RelationIsLogicallyLogged(rel))
 					relids_logged = lappend_oid(relids_logged, childrelid);
@@ -1655,7 +1674,7 @@ ExecuteTruncate(TruncateStmt *stmt)
 					 errhint("Do not specify the ONLY keyword, or use TRUNCATE ONLY on the partitions directly.")));
 	}
 
-	ExecuteTruncateGuts(rels, relids, relids_logged,
+	ExecuteTruncateGuts(rels, relids, relids_extra, relids_logged,
 						stmt->behavior, stmt->restart_seqs);
 
 	/* And close the rels */
@@ -1681,16 +1700,21 @@ ExecuteTruncate(TruncateStmt *stmt)
  * this information handy in this form.
  */
 void
-ExecuteTruncateGuts(List *explicit_rels, List *relids, List *relids_logged,
+ExecuteTruncateGuts(List *explicit_rels,
+					List *relids,
+					List *relids_extra,
+					List *relids_logged,
 					DropBehavior behavior, bool restart_seqs)
 {
 	List	   *rels;
 	List	   *seq_relids = NIL;
+	HTAB	   *ft_htab = NULL;
 	EState	   *estate;
 	ResultRelInfo *resultRelInfos;
 	ResultRelInfo *resultRelInfo;
 	SubTransactionId mySubid;
 	ListCell   *cell;
+	ListCell   *lc1, *lc2;
 	Oid		   *logrelids;
 
 	/*
@@ -1728,6 +1752,7 @@ ExecuteTruncateGuts(List *explicit_rels, List *relids, List *relids_logged,
 				truncate_check_activity(rel);
 				rels = lappend(rels, rel);
 				relids = lappend_oid(relids, relid);
+				relids_extra = lappend_int(relids_extra, -1);
 				/* Log this relation only if needed for logical decoding */
 				if (RelationIsLogicallyLogged(rel))
 					relids_logged = lappend_oid(relids_logged, relid);
@@ -1828,14 +1853,68 @@ ExecuteTruncateGuts(List *explicit_rels, List *relids, List *relids_logged,
 	 */
 	mySubid = GetCurrentSubTransactionId();
 
-	foreach(cell, rels)
+	Assert(list_length(rels) == list_length(relids_extra));
+	forboth (lc1, rels,
+			 lc2, relids_extra)
 	{
-		Relation	rel = (Relation) lfirst(cell);
+		Relation	rel = (Relation) lfirst(lc1);
+		int			extra = lfirst_int(lc2);
 
 		/* Skip partitioned tables as there is nothing to do */
 		if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
 			continue;
 
+		/*
+		 * If truncating a foreign table, the foreign data wrapper callback
+		 * for TRUNCATE is called once for each server with a list of all the
+		 * relations to process linked to this server.  The list of relations
+		 * for each server is saved as a single entry in a hash table that
+		 * uses the server OID as lookup key.  Once the full set of lists is
+		 * built, all the entries of the hash table are scanned, and the list
+		 * of relations associated to the server is passed down to the
+		 * TRUNCATE callback of its foreign data wrapper.
+		 */
+		if (rel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
+		{
+			Oid			frel_oid = RelationGetRelid(rel);
+			Oid			server_oid = GetForeignServerIdByRelId(frel_oid);
+			bool		found;
+			ForeignTruncateInfo *ft_info;
+
+			/* if the hash table does not exist yet, initialize it */
+			if (!ft_htab)
+			{
+				HASHCTL		hctl;
+
+				memset(&hctl, 0, sizeof(HASHCTL));
+				hctl.keysize = sizeof(Oid);
+				hctl.entrysize = sizeof(ForeignTruncateInfo);
+				hctl.hcxt = CurrentMemoryContext;
+
+				ft_htab = hash_create("TRUNCATE for Foreign Tables",
+									  32,	/* start small and extend */
+									  &hctl,
+									  HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
+			}
+
+			/*
+			 * Look after the entry of the server in the hash table, and
+			 * initialize it if the entry does not exist yet.
+			 */
+			ft_info = hash_search(ft_htab, &server_oid, HASH_ENTER, &found);
+			if (!found)
+			{
+				ft_info->server_oid = server_oid;
+				ft_info->frels_list = NIL;
+				ft_info->frels_extra = NIL;
+			}
+
+			/* save the relation in the list */
+			ft_info->frels_list = lappend(ft_info->frels_list, rel);
+			ft_info->frels_extra = lappend_int(ft_info->frels_extra, extra);
+			continue;
+		}
+
 		/*
 		 * Normally, we need a transaction-safe truncation here.  However, if
 		 * the table was either created in the current (sub)transaction or has
@@ -1898,6 +1977,31 @@ ExecuteTruncateGuts(List *explicit_rels, List *relids, List *relids_logged,
 		pgstat_count_truncate(rel);
 	}
 
+	/*
+	 * Now go through the hash table, and process each entry associated to the
+	 * servers involved in the TRUNCATE.
+	 */
+	if (ft_htab)
+	{
+		ForeignTruncateInfo *ft_info;
+		HASH_SEQ_STATUS seq;
+
+		hash_seq_init(&seq, ft_htab);
+
+		while ((ft_info = hash_seq_search(&seq)) != NULL)
+		{
+			FdwRoutine *routine = GetFdwRoutineByServerId(ft_info->server_oid);
+
+			/* truncate_check_rel() has checked that already */
+			Assert(routine->ExecForeignTruncate != NULL);
+
+			routine->ExecForeignTruncate(ft_info->frels_list,
+										 ft_info->frels_extra,
+										 behavior,
+										 restart_seqs);
+		}
+	}
+
 	/*
 	 * Restart owned sequences if we were asked to.
 	 */
@@ -1983,12 +2087,24 @@ truncate_check_rel(Oid relid, Form_pg_class reltuple)
 	char	   *relname = NameStr(reltuple->relname);
 
 	/*
-	 * Only allow truncate on regular tables and partitioned tables (although,
-	 * the latter are only being included here for the following checks; no
-	 * physical truncation will occur in their case.)
+	 * Only allow truncate on regular tables, foreign tables using foreign
+	 * data wrappers supporting TRUNCATE and partitioned tables (although, the
+	 * latter are only being included here for the following checks; no
+	 * physical truncation will occur in their case.).
 	 */
-	if (reltuple->relkind != RELKIND_RELATION &&
-		reltuple->relkind != RELKIND_PARTITIONED_TABLE)
+	if (reltuple->relkind == RELKIND_FOREIGN_TABLE)
+	{
+		Oid			server_id = GetForeignServerIdByRelId(relid);
+		FdwRoutine *fdwroutine = GetFdwRoutineByServerId(server_id);
+
+		if (!fdwroutine->ExecForeignTruncate)
+			ereport(ERROR,
+					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+					 errmsg("cannot truncate foreign table \"%s\"",
+							relname)));
+	}
+	else if (reltuple->relkind != RELKIND_RELATION &&
+			 reltuple->relkind != RELKIND_PARTITIONED_TABLE)
 		ereport(ERROR,
 				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
 				 errmsg("\"%s\" is not a table", relname)));
diff --git a/src/backend/replication/logical/worker.c b/src/backend/replication/logical/worker.c
index 21d304a64c..e7438024c6 100644
--- a/src/backend/replication/logical/worker.c
+++ b/src/backend/replication/logical/worker.c
@@ -1795,6 +1795,7 @@ apply_handle_truncate(StringInfo s)
 	List	   *rels = NIL;
 	List	   *part_rels = NIL;
 	List	   *relids = NIL;
+	List	   *relids_extra = NIL;
 	List	   *relids_logged = NIL;
 	ListCell   *lc;
 
@@ -1824,6 +1825,7 @@ apply_handle_truncate(StringInfo s)
 		remote_rels = lappend(remote_rels, rel);
 		rels = lappend(rels, rel->localrel);
 		relids = lappend_oid(relids, rel->localreloid);
+		relids_extra = lappend_int(relids_extra, 0);
 		if (RelationIsLogicallyLogged(rel->localrel))
 			relids_logged = lappend_oid(relids_logged, rel->localreloid);
 
@@ -1862,6 +1864,7 @@ apply_handle_truncate(StringInfo s)
 				rels = lappend(rels, childrel);
 				part_rels = lappend(part_rels, childrel);
 				relids = lappend_oid(relids, childrelid);
+				relids_extra = lappend_int(relids_extra,-1);
 				/* Log this relation only if needed for logical decoding */
 				if (RelationIsLogicallyLogged(childrel))
 					relids_logged = lappend_oid(relids_logged, childrelid);
@@ -1874,8 +1877,12 @@ apply_handle_truncate(StringInfo s)
 	 * to replaying changes without further cascading. This might be later
 	 * changeable with a user specified option.
 	 */
-	ExecuteTruncateGuts(rels, relids, relids_logged, DROP_RESTRICT, restart_seqs);
-
+	ExecuteTruncateGuts(rels,
+						relids,
+						relids_extra,
+						relids_logged,
+						DROP_RESTRICT,
+						restart_seqs);
 	foreach(lc, remote_rels)
 	{
 		LogicalRepRelMapEntry *rel = lfirst(lc);
diff --git a/src/include/commands/tablecmds.h b/src/include/commands/tablecmds.h
index b3d30acc35..2e30a09729 100644
--- a/src/include/commands/tablecmds.h
+++ b/src/include/commands/tablecmds.h
@@ -56,8 +56,12 @@ extern void AlterRelationNamespaceInternal(Relation classRel, Oid relOid,
 extern void CheckTableNotInUse(Relation rel, const char *stmt);
 
 extern void ExecuteTruncate(TruncateStmt *stmt);
-extern void ExecuteTruncateGuts(List *explicit_rels, List *relids, List *relids_logged,
-								DropBehavior behavior, bool restart_seqs);
+extern void ExecuteTruncateGuts(List *explicit_rels,
+								List *relids,
+								List *relids_extra,
+								List *relids_logged,
+								DropBehavior behavior,
+								bool restart_seqs);
 
 extern void SetRelationHasSubclass(Oid relationId, bool relhassubclass);
 
diff --git a/src/include/foreign/fdwapi.h b/src/include/foreign/fdwapi.h
index 248f78da45..7488ce1ea6 100644
--- a/src/include/foreign/fdwapi.h
+++ b/src/include/foreign/fdwapi.h
@@ -159,6 +159,11 @@ typedef bool (*AnalyzeForeignTable_function) (Relation relation,
 typedef List *(*ImportForeignSchema_function) (ImportForeignSchemaStmt *stmt,
 											   Oid serverOid);
 
+typedef void (*ExecForeignTruncate_function) (List *frels_list,
+											  List *frels_extra,
+											  DropBehavior behavior,
+											  bool restart_seqs);
+
 typedef Size (*EstimateDSMForeignScan_function) (ForeignScanState *node,
 												 ParallelContext *pcxt);
 typedef void (*InitializeDSMForeignScan_function) (ForeignScanState *node,
@@ -246,6 +251,9 @@ typedef struct FdwRoutine
 	/* Support functions for IMPORT FOREIGN SCHEMA */
 	ImportForeignSchema_function ImportForeignSchema;
 
+	/* Support functions for TRUNCATE */
+	ExecForeignTruncate_function ExecForeignTruncate;
+
 	/* Support functions for parallelism under Gather node */
 	IsForeignScanParallelSafe_function IsForeignScanParallelSafe;
 	EstimateDSMForeignScan_function EstimateDSMForeignScan;
diff --git a/src/test/regress/expected/foreign_data.out b/src/test/regress/expected/foreign_data.out
index b9e25820bc..e2c0bcea51 100644
--- a/src/test/regress/expected/foreign_data.out
+++ b/src/test/regress/expected/foreign_data.out
@@ -1807,9 +1807,9 @@ Inherits: fd_pt1
 
 -- TRUNCATE doesn't work on foreign tables, either directly or recursively
 TRUNCATE ft2;  -- ERROR
-ERROR:  "ft2" is not a table
+ERROR:  foreign-data wrapper "dummy" has no handler
 TRUNCATE fd_pt1;  -- ERROR
-ERROR:  "ft2" is not a table
+ERROR:  foreign-data wrapper "dummy" has no handler
 DROP TABLE fd_pt1 CASCADE;
 NOTICE:  drop cascades to foreign table ft2
 -- IMPORT FOREIGN SCHEMA
@@ -2032,9 +2032,9 @@ ALTER FOREIGN TABLE fd_pt2_1 ADD CONSTRAINT fd_pt2chk1 CHECK (c1 > 0);
 ALTER TABLE fd_pt2 ATTACH PARTITION fd_pt2_1 FOR VALUES IN (1);
 -- TRUNCATE doesn't work on foreign tables, either directly or recursively
 TRUNCATE fd_pt2_1;  -- ERROR
-ERROR:  "fd_pt2_1" is not a table
+ERROR:  foreign-data wrapper "dummy" has no handler
 TRUNCATE fd_pt2;  -- ERROR
-ERROR:  "fd_pt2_1" is not a table
+ERROR:  foreign-data wrapper "dummy" has no handler
 DROP FOREIGN TABLE fd_pt2_1;
 DROP TABLE fd_pt2;
 -- foreign table cannot be part of partition tree made of temporary
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index e017557e3e..47f6815115 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -704,6 +704,7 @@ ForeignScanState
 ForeignServer
 ForeignServerInfo
 ForeignTable
+ForeignTruncateInfo
 ForkNumber
 FormData_pg_aggregate
 FormData_pg_am
#16Fujii Masao
masao.fujii@oss.nttdata.com
In reply to: Kazutaka Onishi (#15)
Re: TRUNCATE on foreign table

On 2021/03/13 18:57, Kazutaka Onishi wrote:

I have fixed the patch to pass check-world test. :D

Thanks for updating the patch! Here are some review comments from me.

By default all foreign tables using <filename>postgres_fdw</filename> are assumed
to be updatable. This may be overridden using the following option:

In postgres-fdw.sgml, "and truncatable" should be appended into
the above first description? Also "option" in the second description
should be a plural form "options"?

<command>TRUNCATE</command> is not currently supported for foreign tables.
This implies that if a specified table has any descendant tables that are
foreign, the command will fail.

truncate.sgml should be updated because, for example, it contains
the above descriptions.

+     <literal>frels_extra</literal> is same length with
+     <literal>frels_list</literal>, that delivers extra information of
+     the context where the foreign-tables are truncated.
+    </para>

Don't we need to document the detail information about frels_extra?
Otherwise the developers of FDW would fail to understand how to
handle the frels_extra when trying to make their FDWs support TRUNCATE.

+		relids_extra = lappend_int(relids_extra, (recurse ? 0 : 1));
+				relids_extra = lappend_int(relids_extra, -1);

postgres_fdw determines whether to specify ONLY or not by checking
whether the passed extra value is zero or not. That is, for example,
using only 0 and 1 for extra values is enough for the purpose. But
ExecuteTruncate() sets three values 0, -1 and 1 as extra ones. Why are
these three values necessary?

With the patch, if both local and foreign tables are specified as
the target tables to truncate, TRUNCATE command tries to truncate
foreign tables after truncating local ones. That is, if "truncatable"
option is set to false or enough permission to truncate is not granted
yet in the foreign server, an error will be thrown after the local tables
are truncated. I don't think this is good order of processings. IMO,
instead, we should check whether foreign tables can be truncated
before any actual truncation operations. For example, we can easily
do that by truncate foreign tables before local ones. Thought?

XLOG_HEAP_TRUNCATE record is written even for the truncation of
a foreign table. Why is this necessary?

Regards,

--
Fujii Masao
Advanced Computing Technology Center
Research and Development Headquarters
NTT DATA CORPORATION

#17Kazutaka Onishi
onishi@heterodb.com
In reply to: Fujii Masao (#16)
1 attachment(s)
Re: TRUNCATE on foreign table

Fujii-san,

Thank you for your review!
Now I prepare v5 patch and I'll answer to your each comment. please
check this again.
m(_ _)m

1. In postgres-fdw.sgml, "and truncatable" should be appended into the
above first description?
2. truncate.sgml should be updated because, for example, it contains
the above descriptions.

Yeah, you're right. I've fixed it.

3. Don't we need to document the detail information about frels_extra?

I've written about frels_extra into fdwhander.sgml.

4. postgres_fdw determines whether to specify ONLY or not by checking
whether the passed extra value is zero or not.

Please refer this:
/messages/by-id/CAOP8fzb-t3WVNLjGMC+4sV4AZa9S=MAQ7Q6pQoADMCf_1jp4ew@mail.gmail.com

Negative value means that foreign-tables are not specified in the TRUNCATE
command, but truncated due to dependency (like partition's child leaf).

I've added this information into fdwhandler.sgml.

5. For example, we can easily do that by truncate foreign tables
before local ones. Thought?

Umm... yeah, I feel it's better procedure, but not so required because
TRUNCATE is NOT called frequently.
Certainly, we already have postgresIsForeignUpdatable() to check
whether the foreign table is updatable or not.
Following this way, we have to add postgresIsForeignTruncatable() to check.
However, Unlike UPDATE, TRUNCATE is NOT called frequently. Current
procedure is inefficient but works correctly.
Thus, I feel postgresIsForeignTruncatable() is not needed.

6. XLOG_HEAP_TRUNCATE record is written even for the truncation of a
foreign table. Why is this necessary?

Please give us more time to investigate this.

2021年3月25日(木) 3:47 Fujii Masao <masao.fujii@oss.nttdata.com>:

Show quoted text

On 2021/03/13 18:57, Kazutaka Onishi wrote:

I have fixed the patch to pass check-world test. :D

Thanks for updating the patch! Here are some review comments from me.

By default all foreign tables using <filename>postgres_fdw</filename> are assumed
to be updatable. This may be overridden using the following option:

In postgres-fdw.sgml, "and truncatable" should be appended into
the above first description? Also "option" in the second description
should be a plural form "options"?

<command>TRUNCATE</command> is not currently supported for foreign tables.
This implies that if a specified table has any descendant tables that are
foreign, the command will fail.

truncate.sgml should be updated because, for example, it contains
the above descriptions.

+     <literal>frels_extra</literal> is same length with
+     <literal>frels_list</literal>, that delivers extra information of
+     the context where the foreign-tables are truncated.
+    </para>

Don't we need to document the detail information about frels_extra?
Otherwise the developers of FDW would fail to understand how to
handle the frels_extra when trying to make their FDWs support TRUNCATE.

+               relids_extra = lappend_int(relids_extra, (recurse ? 0 : 1));
+                               relids_extra = lappend_int(relids_extra, -1);

postgres_fdw determines whether to specify ONLY or not by checking
whether the passed extra value is zero or not. That is, for example,
using only 0 and 1 for extra values is enough for the purpose. But
ExecuteTruncate() sets three values 0, -1 and 1 as extra ones. Why are
these three values necessary?

With the patch, if both local and foreign tables are specified as
the target tables to truncate, TRUNCATE command tries to truncate
foreign tables after truncating local ones. That is, if "truncatable"
option is set to false or enough permission to truncate is not granted
yet in the foreign server, an error will be thrown after the local tables
are truncated. I don't think this is good order of processings. IMO,
instead, we should check whether foreign tables can be truncated
before any actual truncation operations. For example, we can easily
do that by truncate foreign tables before local ones. Thought?

XLOG_HEAP_TRUNCATE record is written even for the truncation of
a foreign table. Why is this necessary?

Regards,

--
Fujii Masao
Advanced Computing Technology Center
Research and Development Headquarters
NTT DATA CORPORATION

Attachments:

pgsql14-truncate-on-foreign-table.v5.patchapplication/octet-stream; name=pgsql14-truncate-on-foreign-table.v5.patchDownload
diff --git a/contrib/postgres_fdw/deparse.c b/contrib/postgres_fdw/deparse.c
index 6faf499f9a..a9ce323a67 100644
--- a/contrib/postgres_fdw/deparse.c
+++ b/contrib/postgres_fdw/deparse.c
@@ -2171,6 +2171,44 @@ deparseAnalyzeSql(StringInfo buf, Relation rel, List **retrieved_attrs)
 	deparseRelation(buf, rel);
 }
 
+/*
+ * Construct a simple "TRUNCATE rel" statement
+ */
+void
+deparseTruncateSql(StringInfo buf,
+				   List *frels_list,
+				   List *frels_extra,
+				   DropBehavior behavior,
+				   bool restart_seqs)
+{
+	ListCell   *lc1, *lc2;
+
+	appendStringInfoString(buf, "TRUNCATE ");
+	forboth (lc1, frels_list,
+			 lc2, frels_extra)
+	{
+		Relation	frel = lfirst(lc1);
+		int			extra = lfirst_int(lc2);
+
+		if (lc1 != list_head(frels_list))
+			appendStringInfoString(buf, ", ");
+		if (extra != 0)
+			appendStringInfoString(buf, "ONLY ");
+		deparseRelation(buf, frel);
+	}
+	appendStringInfo(buf, " %s IDENTITY",
+					 restart_seqs ? "RESTART" : "CONTINUE");
+	switch (behavior)
+	{
+		case DROP_RESTRICT:
+			appendStringInfoString(buf, " RESTRICT");
+			break;
+		case DROP_CASCADE:
+			appendStringInfoString(buf, " CASCADE");
+			break;
+	}
+}
+
 /*
  * Construct name to use for given column, and emit it into buf.
  * If it has a column_name FDW option, use that instead of attribute name.
diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out
index 0649b6b81c..ba2f0f4e80 100644
--- a/contrib/postgres_fdw/expected/postgres_fdw.out
+++ b/contrib/postgres_fdw/expected/postgres_fdw.out
@@ -8247,6 +8247,239 @@ select * from rem3;
 
 drop foreign table rem3;
 drop table loc3;
+-- ===================================================================
+-- test for TRUNCATE
+-- ===================================================================
+CREATE TABLE tru_rtable0 (id int primary key, x text);
+CREATE TABLE tru_rtable1 (id int primary key, y text);
+CREATE FOREIGN TABLE tru_ftable (id int, x text)
+       SERVER loopback OPTIONS (table_name 'tru_rtable0');
+INSERT INTO tru_rtable0 (SELECT x,md5(x::text) FROM generate_series(1,10) x);
+CREATE TABLE tru_ptable (id int, y text) PARTITION BY HASH(id);
+CREATE TABLE tru_ptable__p0 PARTITION OF tru_ptable
+                            FOR VALUES WITH (MODULUS 2, REMAINDER 0);
+CREATE FOREIGN TABLE tru_ftable__p1 PARTITION OF tru_ptable
+                                    FOR VALUES WITH (MODULUS 2, REMAINDER 1)
+       SERVER loopback OPTIONS (table_name 'tru_rtable1');
+INSERT INTO tru_ptable (SELECT x,md5(x::text) FROM generate_series(11,20) x);
+CREATE TABLE tru_pk_table(id int primary key, z text);
+CREATE TABLE tru_fk_table(fkey int references tru_pk_table(id));
+INSERT INTO tru_pk_table (SELECT x,md5((x+1)::text) FROM generate_series(1,10) x);
+INSERT INTO tru_fk_table (SELECT x % 10 + 1 FROM generate_series(5,25) x);
+CREATE FOREIGN TABLE tru_pk_ftable (id int, z text)
+       SERVER loopback OPTIONS (table_name 'tru_pk_table');
+CREATE TABLE tru_rtable_parent (id int, a text);
+CREATE TABLE tru_rtable_child (id int, a text);
+CREATE FOREIGN TABLE tru_ftable_parent (id int, a text)
+       SERVER loopback OPTIONS (table_name 'tru_rtable_parent');
+CREATE FOREIGN TABLE tru_ftable_child () INHERITS (tru_ftable_parent)
+       SERVER loopback OPTIONS (table_name 'tru_rtable_child');
+INSERT INTO tru_rtable_parent (SELECT x, md5(x::text) FROM generate_series(1,8) x);
+INSERT INTO tru_rtable_child  (SELECT x, md5(x::text) FROM generate_series(10, 18) x);
+-- normal truncate
+SELECT * FROM tru_ftable;
+ id |                x                 
+----+----------------------------------
+  1 | c4ca4238a0b923820dcc509a6f75849b
+  2 | c81e728d9d4c2f636f067f89cc14862c
+  3 | eccbc87e4b5ce2fe28308fd9f2a7baf3
+  4 | a87ff679a2f3e71d9181a67b7542122c
+  5 | e4da3b7fbbce2345d7772b0674a318d5
+  6 | 1679091c5a880faf6fb5e6087eb1b2dc
+  7 | 8f14e45fceea167a5a36dedd4bea2543
+  8 | c9f0f895fb98ab9159f51fd0297e236d
+  9 | 45c48cce2e2d7fbdea1afc51c7c6ad26
+ 10 | d3d9446802a44259755d38e6d163e820
+(10 rows)
+
+TRUNCATE tru_ftable;
+SELECT * FROM tru_rtable0;		-- empty
+ id | x 
+----+---
+(0 rows)
+
+SELECT * FROM tru_ftable;		-- empty
+ id | x 
+----+---
+(0 rows)
+
+-- 'truncatable' option
+ALTER SERVER loopback OPTIONS (ADD truncatable 'false');
+TRUNCATE tru_ftable;			-- error
+ERROR:  truncate on "tru_ftable" is prohibited
+ALTER SERVER loopback OPTIONS (DROP truncatable);
+ALTER FOREIGN TABLE tru_ftable OPTIONS (ADD truncatable 'false');
+TRUNCATE tru_ftable;			-- error
+ERROR:  truncate on "tru_ftable" is prohibited
+ALTER FOREIGN TABLE tru_ftable OPTIONS (SET truncatable 'true');
+TRUNCATE tru_ftable;			-- accepted
+-- partition table mixtured by table and foreign table
+SELECT * FROM tru_ptable;
+ id |                y                 
+----+----------------------------------
+ 12 | c20ad4d76fe97759aa27a0c99bff6710
+ 13 | c51ce410c124a10e0db5e4b97fc2af39
+ 14 | aab3238922bcc25a6f606eb525ffdc56
+ 16 | c74d97b01eae257e44aa9d5bade97baf
+ 17 | 70efdf2ec9b086079795c442636b55fb
+ 18 | 6f4922f45568161a8cdf4ad2299f6d23
+ 11 | 6512bd43d9caa6e02c990b0a82652dca
+ 15 | 9bf31c7ff062936a96d3c8bd1f8f2ff3
+ 19 | 1f0e3dad99908345f7439f8ffabdffc4
+ 20 | 98f13708210194c475687be6106a3b84
+(10 rows)
+
+TRUNCATE tru_ptable;
+SELECT * FROM tru_ptable;		-- empty
+ id | y 
+----+---
+(0 rows)
+
+SELECT * FROM tru_ptable__p0;	-- empty
+ id | y 
+----+---
+(0 rows)
+
+SELECT * FROM tru_ftable__p1;	-- empty
+ id | y 
+----+---
+(0 rows)
+
+SELECT * FROM tru_rtable1;		-- empty
+ id | y 
+----+---
+(0 rows)
+
+-- 'CASCADE' option
+SELECT * FROM tru_pk_ftable;
+ id |                z                 
+----+----------------------------------
+  1 | c81e728d9d4c2f636f067f89cc14862c
+  2 | eccbc87e4b5ce2fe28308fd9f2a7baf3
+  3 | a87ff679a2f3e71d9181a67b7542122c
+  4 | e4da3b7fbbce2345d7772b0674a318d5
+  5 | 1679091c5a880faf6fb5e6087eb1b2dc
+  6 | 8f14e45fceea167a5a36dedd4bea2543
+  7 | c9f0f895fb98ab9159f51fd0297e236d
+  8 | 45c48cce2e2d7fbdea1afc51c7c6ad26
+  9 | d3d9446802a44259755d38e6d163e820
+ 10 | 6512bd43d9caa6e02c990b0a82652dca
+(10 rows)
+
+TRUNCATE tru_pk_ftable;	-- failed by FK reference
+ERROR:  cannot truncate a table referenced in a foreign key constraint
+DETAIL:  Table "tru_fk_table" references "tru_pk_table".
+HINT:  Truncate table "tru_fk_table" at the same time, or use TRUNCATE ... CASCADE.
+CONTEXT:  remote SQL command: TRUNCATE public.tru_pk_table CONTINUE IDENTITY RESTRICT
+TRUNCATE tru_pk_ftable CASCADE;
+SELECT * FROM tru_pk_ftable;
+ id | z 
+----+---
+(0 rows)
+
+SELECT * FROM tru_fk_table;		-- also truncated
+ fkey 
+------
+(0 rows)
+
+-- truncate two tables at a command
+INSERT INTO tru_ftable (SELECT x,md5((x+2)::text) FROM generate_series(1,8) x);
+INSERT INTO tru_pk_ftable (SELECT x,md5((x+3)::text) FROM generate_series(3,10) x);
+SELECT * FROM tru_ftable a FULL OUTER JOIN tru_pk_ftable b ON a.id = b.id;
+ id |                x                 | id |                z                 
+----+----------------------------------+----+----------------------------------
+  1 | eccbc87e4b5ce2fe28308fd9f2a7baf3 |    | 
+  2 | a87ff679a2f3e71d9181a67b7542122c |    | 
+  3 | e4da3b7fbbce2345d7772b0674a318d5 |  3 | 1679091c5a880faf6fb5e6087eb1b2dc
+  4 | 1679091c5a880faf6fb5e6087eb1b2dc |  4 | 8f14e45fceea167a5a36dedd4bea2543
+  5 | 8f14e45fceea167a5a36dedd4bea2543 |  5 | c9f0f895fb98ab9159f51fd0297e236d
+  6 | c9f0f895fb98ab9159f51fd0297e236d |  6 | 45c48cce2e2d7fbdea1afc51c7c6ad26
+  7 | 45c48cce2e2d7fbdea1afc51c7c6ad26 |  7 | d3d9446802a44259755d38e6d163e820
+  8 | d3d9446802a44259755d38e6d163e820 |  8 | 6512bd43d9caa6e02c990b0a82652dca
+    |                                  |  9 | c20ad4d76fe97759aa27a0c99bff6710
+    |                                  | 10 | c51ce410c124a10e0db5e4b97fc2af39
+(10 rows)
+
+TRUNCATE tru_ftable, tru_pk_ftable CASCADE;
+SELECT * FROM tru_ftable a FULL OUTER JOIN tru_pk_ftable b ON a.id = b.id;
+ id | x | id | z 
+----+---+----+---
+(0 rows)
+
+-- truncate with ONLY clause
+TRUNCATE ONLY tru_ftable_parent;
+SELECT * FROM tru_ftable_parent;
+ id |                a                 
+----+----------------------------------
+ 10 | d3d9446802a44259755d38e6d163e820
+ 11 | 6512bd43d9caa6e02c990b0a82652dca
+ 12 | c20ad4d76fe97759aa27a0c99bff6710
+ 13 | c51ce410c124a10e0db5e4b97fc2af39
+ 14 | aab3238922bcc25a6f606eb525ffdc56
+ 15 | 9bf31c7ff062936a96d3c8bd1f8f2ff3
+ 16 | c74d97b01eae257e44aa9d5bade97baf
+ 17 | 70efdf2ec9b086079795c442636b55fb
+ 18 | 6f4922f45568161a8cdf4ad2299f6d23
+(9 rows)
+
+TRUNCATE tru_ftable_parent;
+SELECT * FROM tru_ftable_parent;
+ id | a 
+----+---
+(0 rows)
+
+-- in case when remote table has inherited children
+CREATE TABLE tru_rtable0_child () INHERITS (tru_rtable0);
+INSERT INTO tru_rtable0 (SELECT x,md5(x::text) FROM generate_series(5,9) x);
+INSERT INTO tru_rtable0_child (SELECT x,md5(x::text) FROM generate_series(10,14) x);
+SELECT * FROM tru_ftable;
+ id |                x                 
+----+----------------------------------
+  5 | e4da3b7fbbce2345d7772b0674a318d5
+  6 | 1679091c5a880faf6fb5e6087eb1b2dc
+  7 | 8f14e45fceea167a5a36dedd4bea2543
+  8 | c9f0f895fb98ab9159f51fd0297e236d
+  9 | 45c48cce2e2d7fbdea1afc51c7c6ad26
+ 10 | d3d9446802a44259755d38e6d163e820
+ 11 | 6512bd43d9caa6e02c990b0a82652dca
+ 12 | c20ad4d76fe97759aa27a0c99bff6710
+ 13 | c51ce410c124a10e0db5e4b97fc2af39
+ 14 | aab3238922bcc25a6f606eb525ffdc56
+(10 rows)
+
+TRUNCATE ONLY tru_ftable;		-- truncate only parent portion
+SELECT * FROM tru_ftable;
+ id |                x                 
+----+----------------------------------
+ 10 | d3d9446802a44259755d38e6d163e820
+ 11 | 6512bd43d9caa6e02c990b0a82652dca
+ 12 | c20ad4d76fe97759aa27a0c99bff6710
+ 13 | c51ce410c124a10e0db5e4b97fc2af39
+ 14 | aab3238922bcc25a6f606eb525ffdc56
+(5 rows)
+
+INSERT INTO tru_rtable0 (SELECT x,md5(x::text) FROM generate_series(21,25) x);
+SELECT * FROM tru_ftable;
+ id |                x                 
+----+----------------------------------
+ 21 | 3c59dc048e8850243be8079a5c74d079
+ 22 | b6d767d2f8ed5d21a44b0e5886680cb9
+ 23 | 37693cfc748049e45d87b8c7d8b9aacd
+ 24 | 1ff1de774005f8da13f42943881c655f
+ 25 | 8e296a067a37563370ded05f5a3bf3ec
+ 10 | d3d9446802a44259755d38e6d163e820
+ 11 | 6512bd43d9caa6e02c990b0a82652dca
+ 12 | c20ad4d76fe97759aa27a0c99bff6710
+ 13 | c51ce410c124a10e0db5e4b97fc2af39
+ 14 | aab3238922bcc25a6f606eb525ffdc56
+(10 rows)
+
+TRUNCATE tru_ftable;			-- truncate both of parent and child
+SELECT * FROM tru_ftable;
+ id | x 
+----+---
+(0 rows)
+
 -- ===================================================================
 -- test IMPORT FOREIGN SCHEMA
 -- ===================================================================
@@ -8946,7 +9179,7 @@ DO $d$
     END;
 $d$;
 ERROR:  invalid option "password"
-HINT:  Valid options in this context are: service, passfile, channel_binding, connect_timeout, dbname, host, hostaddr, port, options, application_name, keepalives, keepalives_idle, keepalives_interval, keepalives_count, tcp_user_timeout, sslmode, sslcompression, sslcert, sslkey, sslrootcert, sslcrl, sslcrldir, requirepeer, ssl_min_protocol_version, ssl_max_protocol_version, gssencmode, krbsrvname, gsslib, target_session_attrs, use_remote_estimate, fdw_startup_cost, fdw_tuple_cost, extensions, updatable, fetch_size, batch_size
+HINT:  Valid options in this context are: service, passfile, channel_binding, connect_timeout, dbname, host, hostaddr, port, options, application_name, keepalives, keepalives_idle, keepalives_interval, keepalives_count, tcp_user_timeout, sslmode, sslcompression, sslcert, sslkey, sslrootcert, sslcrl, sslcrldir, requirepeer, ssl_min_protocol_version, ssl_max_protocol_version, gssencmode, krbsrvname, gsslib, target_session_attrs, use_remote_estimate, fdw_startup_cost, fdw_tuple_cost, extensions, updatable, truncatable, fetch_size, batch_size
 CONTEXT:  SQL statement "ALTER SERVER loopback_nopw OPTIONS (ADD password 'dummypw')"
 PL/pgSQL function inline_code_block line 3 at EXECUTE
 -- If we add a password for our user mapping instead, we should get a different
diff --git a/contrib/postgres_fdw/option.c b/contrib/postgres_fdw/option.c
index 64698c4da3..668178524d 100644
--- a/contrib/postgres_fdw/option.c
+++ b/contrib/postgres_fdw/option.c
@@ -107,7 +107,8 @@ postgres_fdw_validator(PG_FUNCTION_ARGS)
 		 * Validate option value, when we can do so without any context.
 		 */
 		if (strcmp(def->defname, "use_remote_estimate") == 0 ||
-			strcmp(def->defname, "updatable") == 0)
+			strcmp(def->defname, "updatable") == 0 ||
+			strcmp(def->defname, "truncatable") == 0)
 		{
 			/* these accept only boolean values */
 			(void) defGetBoolean(def);
@@ -211,6 +212,9 @@ InitPgFdwOptions(void)
 		/* updatable is available on both server and table */
 		{"updatable", ForeignServerRelationId, false},
 		{"updatable", ForeignTableRelationId, false},
+		/* truncatable is available on both server and table */
+		{"truncatable", ForeignServerRelationId, false},
+		{"truncatable", ForeignTableRelationId, false},
 		/* fetch_size is available on both server and table */
 		{"fetch_size", ForeignServerRelationId, false},
 		{"fetch_size", ForeignTableRelationId, false},
diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c
index 35b48575c5..6f94c6830d 100644
--- a/contrib/postgres_fdw/postgres_fdw.c
+++ b/contrib/postgres_fdw/postgres_fdw.c
@@ -390,6 +390,10 @@ static void postgresExplainForeignModify(ModifyTableState *mtstate,
 										 ExplainState *es);
 static void postgresExplainDirectModify(ForeignScanState *node,
 										ExplainState *es);
+static void postgresExecForeignTruncate(List *frels_list,
+										List *frels_extra,
+										DropBehavior behavior,
+										bool restart_seqs);
 static bool postgresAnalyzeForeignTable(Relation relation,
 										AcquireSampleRowsFunc *func,
 										BlockNumber *totalpages);
@@ -571,6 +575,9 @@ postgres_fdw_handler(PG_FUNCTION_ARGS)
 	routine->ExplainForeignModify = postgresExplainForeignModify;
 	routine->ExplainDirectModify = postgresExplainDirectModify;
 
+	/* Support function for TRUNCATE */
+	routine->ExecForeignTruncate = postgresExecForeignTruncate;
+
 	/* Support functions for ANALYZE */
 	routine->AnalyzeForeignTable = postgresAnalyzeForeignTable;
 
@@ -2791,6 +2798,90 @@ postgresExplainDirectModify(ForeignScanState *node, ExplainState *es)
 	}
 }
 
+/*
+ * postgresExecForeignTruncate
+ *		It propagates TRUNCATE command to the remote host inside of the
+ *		transaction block.
+ */
+static void
+postgresExecForeignTruncate(List *frels_list,
+							List *frels_extra,
+							DropBehavior behavior,
+							bool restart_seqs)
+{
+	Oid				server_id = InvalidOid;
+	ForeignServer  *serv = NULL;
+	UserMapping	   *user = NULL;
+	PGconn		   *conn = NULL;
+	PGresult	   *res;
+	StringInfoData	sql;
+	ListCell	   *lc;
+	bool			server_truncatable = true;
+
+	/* pick up remote connection, and sanity checks */
+	foreach (lc, frels_list)
+	{
+		Relation	frel = lfirst(lc);
+		Oid			frel_oid = RelationGetRelid(frel);
+		ForeignTable *ft = GetForeignTable(frel_oid);
+		ListCell   *cell;
+		bool		truncatable;
+
+		if (!OidIsValid(server_id))
+		{
+			server_id = GetForeignServerIdByRelId(frel_oid);
+			serv = GetForeignServer(server_id);
+			user = GetUserMapping(GetUserId(), server_id);
+			conn = GetConnection(user, false);
+			
+			foreach (cell, serv->options)
+			{
+				DefElem	   *defel = (DefElem *) lfirst(cell);
+
+				if (strcmp(defel->defname, "truncatable") == 0)
+				{
+					server_truncatable = defGetBoolean(defel);
+					break;
+				}
+			}
+		}
+		else
+		{
+			/* postgresExecForeignTruncate() is invoked for each server */
+			Assert(server_id == GetForeignServerIdByRelId(frel_oid));
+		}
+
+		/* ensure the target foreign table is truncatable */
+		truncatable = server_truncatable;
+		foreach (cell, ft->options)
+		{
+			DefElem	   *defel = (DefElem *) lfirst(cell);
+
+			if (strcmp(defel->defname, "truncatable") == 0)
+			{
+				truncatable = defGetBoolean(defel);
+				break;
+			}
+		}
+		if (!truncatable)
+			ereport(ERROR,
+					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+					 errmsg("truncate on \"%s\" is prohibited",
+							RelationGetRelationName(frel))));
+	}
+	/* set up remote query */
+	initStringInfo(&sql);
+	deparseTruncateSql(&sql, frels_list, frels_extra, behavior, restart_seqs);
+	/* run remote query */
+	if (!PQsendQuery(conn, sql.data))
+		pgfdw_report_error(ERROR, NULL, conn, false, sql.data);
+	res = pgfdw_get_result(conn, sql.data);
+	if (PQresultStatus(res) != PGRES_COMMAND_OK)
+		pgfdw_report_error(ERROR, res, conn, true, sql.data);
+	/* clean-up */
+	PQclear(res);
+	pfree(sql.data);
+}
 
 /*
  * estimate_path_cost_size
diff --git a/contrib/postgres_fdw/postgres_fdw.h b/contrib/postgres_fdw/postgres_fdw.h
index 1f67b4d9fd..c89af59973 100644
--- a/contrib/postgres_fdw/postgres_fdw.h
+++ b/contrib/postgres_fdw/postgres_fdw.h
@@ -193,6 +193,11 @@ extern void deparseDirectDeleteSql(StringInfo buf, PlannerInfo *root,
 extern void deparseAnalyzeSizeSql(StringInfo buf, Relation rel);
 extern void deparseAnalyzeSql(StringInfo buf, Relation rel,
 							  List **retrieved_attrs);
+extern void deparseTruncateSql(StringInfo buf,
+							   List *frels_list,
+							   List *frels_extra,
+							   DropBehavior behavior,
+							   bool restart_seqs);
 extern void deparseStringLiteral(StringInfo buf, const char *val);
 extern Expr *find_em_expr_for_rel(EquivalenceClass *ec, RelOptInfo *rel);
 extern Expr *find_em_expr_for_input_target(PlannerInfo *root,
diff --git a/contrib/postgres_fdw/sql/postgres_fdw.sql b/contrib/postgres_fdw/sql/postgres_fdw.sql
index 2b525ea44a..106192ff51 100644
--- a/contrib/postgres_fdw/sql/postgres_fdw.sql
+++ b/contrib/postgres_fdw/sql/postgres_fdw.sql
@@ -2349,6 +2349,96 @@ select * from rem3;
 drop foreign table rem3;
 drop table loc3;
 
+-- ===================================================================
+-- test for TRUNCATE
+-- ===================================================================
+CREATE TABLE tru_rtable0 (id int primary key, x text);
+CREATE TABLE tru_rtable1 (id int primary key, y text);
+CREATE FOREIGN TABLE tru_ftable (id int, x text)
+       SERVER loopback OPTIONS (table_name 'tru_rtable0');
+INSERT INTO tru_rtable0 (SELECT x,md5(x::text) FROM generate_series(1,10) x);
+
+CREATE TABLE tru_ptable (id int, y text) PARTITION BY HASH(id);
+CREATE TABLE tru_ptable__p0 PARTITION OF tru_ptable
+                            FOR VALUES WITH (MODULUS 2, REMAINDER 0);
+CREATE FOREIGN TABLE tru_ftable__p1 PARTITION OF tru_ptable
+                                    FOR VALUES WITH (MODULUS 2, REMAINDER 1)
+       SERVER loopback OPTIONS (table_name 'tru_rtable1');
+INSERT INTO tru_ptable (SELECT x,md5(x::text) FROM generate_series(11,20) x);
+
+CREATE TABLE tru_pk_table(id int primary key, z text);
+CREATE TABLE tru_fk_table(fkey int references tru_pk_table(id));
+INSERT INTO tru_pk_table (SELECT x,md5((x+1)::text) FROM generate_series(1,10) x);
+INSERT INTO tru_fk_table (SELECT x % 10 + 1 FROM generate_series(5,25) x);
+CREATE FOREIGN TABLE tru_pk_ftable (id int, z text)
+       SERVER loopback OPTIONS (table_name 'tru_pk_table');
+
+CREATE TABLE tru_rtable_parent (id int, a text);
+CREATE TABLE tru_rtable_child (id int, a text);
+CREATE FOREIGN TABLE tru_ftable_parent (id int, a text)
+       SERVER loopback OPTIONS (table_name 'tru_rtable_parent');
+CREATE FOREIGN TABLE tru_ftable_child () INHERITS (tru_ftable_parent)
+       SERVER loopback OPTIONS (table_name 'tru_rtable_child');
+INSERT INTO tru_rtable_parent (SELECT x, md5(x::text) FROM generate_series(1,8) x);
+INSERT INTO tru_rtable_child  (SELECT x, md5(x::text) FROM generate_series(10, 18) x);
+
+-- normal truncate
+SELECT * FROM tru_ftable;
+TRUNCATE tru_ftable;
+SELECT * FROM tru_rtable0;		-- empty
+SELECT * FROM tru_ftable;		-- empty
+
+-- 'truncatable' option
+ALTER SERVER loopback OPTIONS (ADD truncatable 'false');
+TRUNCATE tru_ftable;			-- error
+ALTER SERVER loopback OPTIONS (DROP truncatable);
+ALTER FOREIGN TABLE tru_ftable OPTIONS (ADD truncatable 'false');
+TRUNCATE tru_ftable;			-- error
+ALTER FOREIGN TABLE tru_ftable OPTIONS (SET truncatable 'true');
+TRUNCATE tru_ftable;			-- accepted
+
+-- partition table mixtured by table and foreign table
+SELECT * FROM tru_ptable;
+TRUNCATE tru_ptable;
+SELECT * FROM tru_ptable;		-- empty
+SELECT * FROM tru_ptable__p0;	-- empty
+SELECT * FROM tru_ftable__p1;	-- empty
+SELECT * FROM tru_rtable1;		-- empty
+
+-- 'CASCADE' option
+SELECT * FROM tru_pk_ftable;
+TRUNCATE tru_pk_ftable;	-- failed by FK reference
+TRUNCATE tru_pk_ftable CASCADE;
+SELECT * FROM tru_pk_ftable;
+SELECT * FROM tru_fk_table;		-- also truncated
+
+-- truncate two tables at a command
+INSERT INTO tru_ftable (SELECT x,md5((x+2)::text) FROM generate_series(1,8) x);
+INSERT INTO tru_pk_ftable (SELECT x,md5((x+3)::text) FROM generate_series(3,10) x);
+SELECT * FROM tru_ftable a FULL OUTER JOIN tru_pk_ftable b ON a.id = b.id;
+TRUNCATE tru_ftable, tru_pk_ftable CASCADE;
+SELECT * FROM tru_ftable a FULL OUTER JOIN tru_pk_ftable b ON a.id = b.id;
+
+-- truncate with ONLY clause
+TRUNCATE ONLY tru_ftable_parent;
+SELECT * FROM tru_ftable_parent;
+TRUNCATE tru_ftable_parent;
+SELECT * FROM tru_ftable_parent;
+
+-- in case when remote table has inherited children
+CREATE TABLE tru_rtable0_child () INHERITS (tru_rtable0);
+INSERT INTO tru_rtable0 (SELECT x,md5(x::text) FROM generate_series(5,9) x);
+INSERT INTO tru_rtable0_child (SELECT x,md5(x::text) FROM generate_series(10,14) x);
+SELECT * FROM tru_ftable;
+
+TRUNCATE ONLY tru_ftable;		-- truncate only parent portion
+SELECT * FROM tru_ftable;
+
+INSERT INTO tru_rtable0 (SELECT x,md5(x::text) FROM generate_series(21,25) x);
+SELECT * FROM tru_ftable;
+TRUNCATE tru_ftable;			-- truncate both of parent and child
+SELECT * FROM tru_ftable;
+
 -- ===================================================================
 -- test IMPORT FOREIGN SCHEMA
 -- ===================================================================
diff --git a/doc/src/sgml/fdwhandler.sgml b/doc/src/sgml/fdwhandler.sgml
index 04bc052ee8..f5d7f7f002 100644
--- a/doc/src/sgml/fdwhandler.sgml
+++ b/doc/src/sgml/fdwhandler.sgml
@@ -1056,6 +1056,48 @@ EndDirectModify(ForeignScanState *node);
 
    </sect2>
 
+   <sect2 id="fdw-callbacks-truncate">
+    <title>FDW Routines for Truncate</title>
+<programlisting>
+void
+ExecForeignTruncate(List *frels_list, List *frels_extra,
+                    DropBehavior behavior, bool restart_seqs);
+</programlisting>
+    <para>
+     Truncate a set of foreign tables defined by
+     <literal>frels_list</literal> belonging to the same foreign server.
+     This optional function is called during execution of
+     <command>TRUNCATE</command> for each foreign server being involved
+     in one <command>TRUNCATE</command> command (note that invocations
+     are not per foreign table).
+
+     <literal>frels_extra</literal> delivers extra information about
+     the context where the foreign-tables are truncated. It is a list of integers and same length with
+     <literal>frels_list</literal>. 0 means that the foreign-table is specified WITHOUT "ONLY" clause, 
+     and positive means specified WITH "ONLY" clause. Negative values means that foreign-tables are not 
+     specified in the <command>TRUNCATE</command>, but truncated due to dependency(like partition's child leaf).
+    </para>
+
+    <para>
+     If the <function>ExecForeignTruncate</function> pointer is set to
+     <literal>NULL</literal>, attempts to truncate the foreign table will
+     fail with an error message.
+    </para>
+    
+    <para>
+     <literal>behavior</literal> defines how foreign tables should
+     be truncated, using as possible values <literal>DROP_RESTRICT</literal>
+     and <literal>DROP_CASCADE</literal> (to map with the equivalents of
+     <command>TRUNCATE</command>).
+    </para>
+    
+    <para>
+     <literal>restart_seqs</literal> is set to <literal>true</literal>
+     if <literal>RESTART IDENTITY</literal> was supplied in the
+     <command>TRUNCATE</command>.
+    </para>
+   </sect2>
+
    <sect2 id="fdw-callbacks-row-locking">
     <title>FDW Routines for Row Locking</title>
 
diff --git a/doc/src/sgml/postgres-fdw.sgml b/doc/src/sgml/postgres-fdw.sgml
index 07aa25799d..9f83c0cd3f 100644
--- a/doc/src/sgml/postgres-fdw.sgml
+++ b/doc/src/sgml/postgres-fdw.sgml
@@ -63,9 +63,10 @@
  <para>
   Now you need only <command>SELECT</command> from a foreign table to access
   the data stored in its underlying remote table.  You can also modify
-  the remote table using <command>INSERT</command>, <command>UPDATE</command>, or
-  <command>DELETE</command>.  (Of course, the remote user you have specified
-  in your user mapping must have privileges to do these things.)
+  the remote table using <command>INSERT</command>, <command>UPDATE</command>,
+  <command>DELETE</command>, or <command>TRUNCATE</command>.
+  (Of course, the remote user you have specified in your user mapping must
+  have privileges to do these things.)
  </para>
 
  <para>
@@ -376,7 +377,7 @@ OPTIONS (ADD password_required 'false');
 
    <para>
     By default all foreign tables using <filename>postgres_fdw</filename> are assumed
-    to be updatable.  This may be overridden using the following option:
+    to be updatable and truncatable.  This may be overridden using the following options:
    </para>
 
    <variablelist>
@@ -405,6 +406,29 @@ OPTIONS (ADD password_required 'false');
      </listitem>
     </varlistentry>
 
+    <varlistentry>
+     <term><literal>truncatable</literal></term>
+     <listitem>
+      <para>
+       This option controls whether <filename>postgres_fdw</filename> allows
+       foreign tables to be truncated using <command>TRUNCATE</command>
+       command. It can be specified for a foreign table or a foreign server.
+       A table-level option overrides a server-level option.
+       The default is <literal>true</literal>.
+      </para>
+      <para>
+       Pay attention for the case when a foreign table maps remote table
+       that has inherited children or partition leafs.
+       <command>TRUNCATE</command> specifies the foreign tables with
+       <literal>ONLY</literal> clause, remove queries over the
+       <filename>postgres_fdw</filename> also specify remote tables with
+       <literal>ONLY</literal> clause, that will truncate only parent
+       portion of the remote table. In the results, it looks like
+       <command>TRUNCATE</command> command partially eliminated contents
+       of the foreign tables.
+      </para>
+     </listitem>
+    </varlistentry>
    </variablelist>
   </sect3>
 
diff --git a/doc/src/sgml/ref/truncate.sgml b/doc/src/sgml/ref/truncate.sgml
index 91cdac5562..461be92565 100644
--- a/doc/src/sgml/ref/truncate.sgml
+++ b/doc/src/sgml/ref/truncate.sgml
@@ -172,9 +172,7 @@ TRUNCATE [ TABLE ] [ ONLY ] <replaceable class="parameter">name</replaceable> [
   </para>
 
   <para>
-   <command>TRUNCATE</command> is not currently supported for foreign tables.
-   This implies that if a specified table has any descendant tables that are
-   foreign, the command will fail.
+    <command>TRUNCATE</command> can be used for foreign tables if the foreign data wrapper supports.
   </para>
  </refsect1>
 
diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c
index 88a68a4697..693d4cc3bf 100644
--- a/src/backend/commands/tablecmds.c
+++ b/src/backend/commands/tablecmds.c
@@ -59,6 +59,7 @@
 #include "commands/typecmds.h"
 #include "commands/user.h"
 #include "executor/executor.h"
+#include "foreign/fdwapi.h"
 #include "foreign/foreign.h"
 #include "miscadmin.h"
 #include "nodes/makefuncs.h"
@@ -310,6 +311,21 @@ struct DropRelationCallbackState
 #define		ATT_FOREIGN_TABLE		0x0020
 #define		ATT_PARTITIONED_INDEX	0x0040
 
+/*
+ * ForeignTruncateInfo
+ *
+ * Information related to truncation of foreign tables.  This is used for
+ * the elements in a hash table that uses the server OID as lookup key,
+ * and includes a per-server list of all foreign tables involved in the
+ * truncation.
+ */
+typedef struct
+{
+	Oid			server_oid;
+	List	   *frels_list;
+	List	   *frels_extra;
+} ForeignTruncateInfo;
+
 /*
  * Partition tables are expected to be dropped when the parent partitioned
  * table gets dropped. Hence for partitioning we use AUTO dependency.
@@ -1600,6 +1616,7 @@ ExecuteTruncate(TruncateStmt *stmt)
 {
 	List	   *rels = NIL;
 	List	   *relids = NIL;
+	List	   *relids_extra = NIL;
 	List	   *relids_logged = NIL;
 	ListCell   *cell;
 
@@ -1636,6 +1653,7 @@ ExecuteTruncate(TruncateStmt *stmt)
 
 		rels = lappend(rels, rel);
 		relids = lappend_oid(relids, myrelid);
+		relids_extra = lappend_int(relids_extra, (recurse ? 0 : 1));
 		/* Log this relation only if needed for logical decoding */
 		if (RelationIsLogicallyLogged(rel))
 			relids_logged = lappend_oid(relids_logged, myrelid);
@@ -1683,6 +1701,7 @@ ExecuteTruncate(TruncateStmt *stmt)
 
 				rels = lappend(rels, rel);
 				relids = lappend_oid(relids, childrelid);
+				relids_extra = lappend_int(relids_extra, -1);
 				/* Log this relation only if needed for logical decoding */
 				if (RelationIsLogicallyLogged(rel))
 					relids_logged = lappend_oid(relids_logged, childrelid);
@@ -1695,7 +1714,7 @@ ExecuteTruncate(TruncateStmt *stmt)
 					 errhint("Do not specify the ONLY keyword, or use TRUNCATE ONLY on the partitions directly.")));
 	}
 
-	ExecuteTruncateGuts(rels, relids, relids_logged,
+	ExecuteTruncateGuts(rels, relids, relids_extra, relids_logged,
 						stmt->behavior, stmt->restart_seqs);
 
 	/* And close the rels */
@@ -1721,16 +1740,21 @@ ExecuteTruncate(TruncateStmt *stmt)
  * this information handy in this form.
  */
 void
-ExecuteTruncateGuts(List *explicit_rels, List *relids, List *relids_logged,
+ExecuteTruncateGuts(List *explicit_rels,
+					List *relids,
+					List *relids_extra,
+					List *relids_logged,
 					DropBehavior behavior, bool restart_seqs)
 {
 	List	   *rels;
 	List	   *seq_relids = NIL;
+	HTAB	   *ft_htab = NULL;
 	EState	   *estate;
 	ResultRelInfo *resultRelInfos;
 	ResultRelInfo *resultRelInfo;
 	SubTransactionId mySubid;
 	ListCell   *cell;
+	ListCell   *lc1, *lc2;
 	Oid		   *logrelids;
 
 	/*
@@ -1768,6 +1792,7 @@ ExecuteTruncateGuts(List *explicit_rels, List *relids, List *relids_logged,
 				truncate_check_activity(rel);
 				rels = lappend(rels, rel);
 				relids = lappend_oid(relids, relid);
+				relids_extra = lappend_int(relids_extra, -1);
 				/* Log this relation only if needed for logical decoding */
 				if (RelationIsLogicallyLogged(rel))
 					relids_logged = lappend_oid(relids_logged, relid);
@@ -1868,14 +1893,68 @@ ExecuteTruncateGuts(List *explicit_rels, List *relids, List *relids_logged,
 	 */
 	mySubid = GetCurrentSubTransactionId();
 
-	foreach(cell, rels)
+	Assert(list_length(rels) == list_length(relids_extra));
+	forboth (lc1, rels,
+			 lc2, relids_extra)
 	{
-		Relation	rel = (Relation) lfirst(cell);
+		Relation	rel = (Relation) lfirst(lc1);
+		int			extra = lfirst_int(lc2);
 
 		/* Skip partitioned tables as there is nothing to do */
 		if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
 			continue;
 
+		/*
+		 * If truncating a foreign table, the foreign data wrapper callback
+		 * for TRUNCATE is called once for each server with a list of all the
+		 * relations to process linked to this server.  The list of relations
+		 * for each server is saved as a single entry in a hash table that
+		 * uses the server OID as lookup key.  Once the full set of lists is
+		 * built, all the entries of the hash table are scanned, and the list
+		 * of relations associated to the server is passed down to the
+		 * TRUNCATE callback of its foreign data wrapper.
+		 */
+		if (rel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
+		{
+			Oid			frel_oid = RelationGetRelid(rel);
+			Oid			server_oid = GetForeignServerIdByRelId(frel_oid);
+			bool		found;
+			ForeignTruncateInfo *ft_info;
+
+			/* if the hash table does not exist yet, initialize it */
+			if (!ft_htab)
+			{
+				HASHCTL		hctl;
+
+				memset(&hctl, 0, sizeof(HASHCTL));
+				hctl.keysize = sizeof(Oid);
+				hctl.entrysize = sizeof(ForeignTruncateInfo);
+				hctl.hcxt = CurrentMemoryContext;
+
+				ft_htab = hash_create("TRUNCATE for Foreign Tables",
+									  32,	/* start small and extend */
+									  &hctl,
+									  HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
+			}
+
+			/*
+			 * Look after the entry of the server in the hash table, and
+			 * initialize it if the entry does not exist yet.
+			 */
+			ft_info = hash_search(ft_htab, &server_oid, HASH_ENTER, &found);
+			if (!found)
+			{
+				ft_info->server_oid = server_oid;
+				ft_info->frels_list = NIL;
+				ft_info->frels_extra = NIL;
+			}
+
+			/* save the relation in the list */
+			ft_info->frels_list = lappend(ft_info->frels_list, rel);
+			ft_info->frels_extra = lappend_int(ft_info->frels_extra, extra);
+			continue;
+		}
+
 		/*
 		 * Normally, we need a transaction-safe truncation here.  However, if
 		 * the table was either created in the current (sub)transaction or has
@@ -1938,6 +2017,31 @@ ExecuteTruncateGuts(List *explicit_rels, List *relids, List *relids_logged,
 		pgstat_count_truncate(rel);
 	}
 
+	/*
+	 * Now go through the hash table, and process each entry associated to the
+	 * servers involved in the TRUNCATE.
+	 */
+	if (ft_htab)
+	{
+		ForeignTruncateInfo *ft_info;
+		HASH_SEQ_STATUS seq;
+
+		hash_seq_init(&seq, ft_htab);
+
+		while ((ft_info = hash_seq_search(&seq)) != NULL)
+		{
+			FdwRoutine *routine = GetFdwRoutineByServerId(ft_info->server_oid);
+
+			/* truncate_check_rel() has checked that already */
+			Assert(routine->ExecForeignTruncate != NULL);
+
+			routine->ExecForeignTruncate(ft_info->frels_list,
+										 ft_info->frels_extra,
+										 behavior,
+										 restart_seqs);
+		}
+	}
+
 	/*
 	 * Restart owned sequences if we were asked to.
 	 */
@@ -2023,12 +2127,24 @@ truncate_check_rel(Oid relid, Form_pg_class reltuple)
 	char	   *relname = NameStr(reltuple->relname);
 
 	/*
-	 * Only allow truncate on regular tables and partitioned tables (although,
-	 * the latter are only being included here for the following checks; no
-	 * physical truncation will occur in their case.)
+	 * Only allow truncate on regular tables, foreign tables using foreign
+	 * data wrappers supporting TRUNCATE and partitioned tables (although, the
+	 * latter are only being included here for the following checks; no
+	 * physical truncation will occur in their case.).
 	 */
-	if (reltuple->relkind != RELKIND_RELATION &&
-		reltuple->relkind != RELKIND_PARTITIONED_TABLE)
+	if (reltuple->relkind == RELKIND_FOREIGN_TABLE)
+	{
+		Oid			server_id = GetForeignServerIdByRelId(relid);
+		FdwRoutine *fdwroutine = GetFdwRoutineByServerId(server_id);
+
+		if (!fdwroutine->ExecForeignTruncate)
+			ereport(ERROR,
+					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+					 errmsg("cannot truncate foreign table \"%s\"",
+							relname)));
+	}
+	else if (reltuple->relkind != RELKIND_RELATION &&
+			 reltuple->relkind != RELKIND_PARTITIONED_TABLE)
 		ereport(ERROR,
 				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
 				 errmsg("\"%s\" is not a table", relname)));
diff --git a/src/backend/commands/tablecmds.c.orig b/src/backend/commands/tablecmds.c.orig
new file mode 100644
index 0000000000..88a68a4697
--- /dev/null
+++ b/src/backend/commands/tablecmds.c.orig
@@ -0,0 +1,18336 @@
+/*-------------------------------------------------------------------------
+ *
+ * tablecmds.c
+ *	  Commands for creating and altering table structures and settings
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *	  src/backend/commands/tablecmds.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/attmap.h"
+#include "access/genam.h"
+#include "access/heapam.h"
+#include "access/heapam_xlog.h"
+#include "access/multixact.h"
+#include "access/reloptions.h"
+#include "access/relscan.h"
+#include "access/sysattr.h"
+#include "access/tableam.h"
+#include "access/toast_compression.h"
+#include "access/xact.h"
+#include "access/xlog.h"
+#include "catalog/catalog.h"
+#include "catalog/heap.h"
+#include "catalog/index.h"
+#include "catalog/namespace.h"
+#include "catalog/objectaccess.h"
+#include "catalog/partition.h"
+#include "catalog/pg_am.h"
+#include "catalog/pg_collation.h"
+#include "catalog/pg_constraint.h"
+#include "catalog/pg_depend.h"
+#include "catalog/pg_foreign_table.h"
+#include "catalog/pg_inherits.h"
+#include "catalog/pg_namespace.h"
+#include "catalog/pg_opclass.h"
+#include "catalog/pg_tablespace.h"
+#include "catalog/pg_statistic_ext.h"
+#include "catalog/pg_trigger.h"
+#include "catalog/pg_type.h"
+#include "catalog/storage.h"
+#include "catalog/storage_xlog.h"
+#include "catalog/toasting.h"
+#include "commands/cluster.h"
+#include "commands/comment.h"
+#include "commands/defrem.h"
+#include "commands/event_trigger.h"
+#include "commands/policy.h"
+#include "commands/sequence.h"
+#include "commands/tablecmds.h"
+#include "commands/tablespace.h"
+#include "commands/trigger.h"
+#include "commands/typecmds.h"
+#include "commands/user.h"
+#include "executor/executor.h"
+#include "foreign/foreign.h"
+#include "miscadmin.h"
+#include "nodes/makefuncs.h"
+#include "nodes/nodeFuncs.h"
+#include "nodes/parsenodes.h"
+#include "optimizer/optimizer.h"
+#include "parser/parse_clause.h"
+#include "parser/parse_coerce.h"
+#include "parser/parse_collate.h"
+#include "parser/parse_expr.h"
+#include "parser/parse_oper.h"
+#include "parser/parse_relation.h"
+#include "parser/parse_type.h"
+#include "parser/parse_utilcmd.h"
+#include "parser/parser.h"
+#include "partitioning/partbounds.h"
+#include "partitioning/partdesc.h"
+#include "pgstat.h"
+#include "rewrite/rewriteDefine.h"
+#include "rewrite/rewriteHandler.h"
+#include "rewrite/rewriteManip.h"
+#include "storage/bufmgr.h"
+#include "storage/lmgr.h"
+#include "storage/lock.h"
+#include "storage/predicate.h"
+#include "storage/smgr.h"
+#include "tcop/utility.h"
+#include "utils/acl.h"
+#include "utils/builtins.h"
+#include "utils/fmgroids.h"
+#include "utils/inval.h"
+#include "utils/lsyscache.h"
+#include "utils/memutils.h"
+#include "utils/partcache.h"
+#include "utils/pg_locale.h"
+#include "utils/relcache.h"
+#include "utils/ruleutils.h"
+#include "utils/snapmgr.h"
+#include "utils/syscache.h"
+#include "utils/timestamp.h"
+#include "utils/typcache.h"
+
+/*
+ * ON COMMIT action list
+ */
+typedef struct OnCommitItem
+{
+	Oid			relid;			/* relid of relation */
+	OnCommitAction oncommit;	/* what to do at end of xact */
+
+	/*
+	 * If this entry was created during the current transaction,
+	 * creating_subid is the ID of the creating subxact; if created in a prior
+	 * transaction, creating_subid is zero.  If deleted during the current
+	 * transaction, deleting_subid is the ID of the deleting subxact; if no
+	 * deletion request is pending, deleting_subid is zero.
+	 */
+	SubTransactionId creating_subid;
+	SubTransactionId deleting_subid;
+} OnCommitItem;
+
+static List *on_commits = NIL;
+
+
+/*
+ * State information for ALTER TABLE
+ *
+ * The pending-work queue for an ALTER TABLE is a List of AlteredTableInfo
+ * structs, one for each table modified by the operation (the named table
+ * plus any child tables that are affected).  We save lists of subcommands
+ * to apply to this table (possibly modified by parse transformation steps);
+ * these lists will be executed in Phase 2.  If a Phase 3 step is needed,
+ * necessary information is stored in the constraints and newvals lists.
+ *
+ * Phase 2 is divided into multiple passes; subcommands are executed in
+ * a pass determined by subcommand type.
+ */
+
+#define AT_PASS_UNSET			-1	/* UNSET will cause ERROR */
+#define AT_PASS_DROP			0	/* DROP (all flavors) */
+#define AT_PASS_ALTER_TYPE		1	/* ALTER COLUMN TYPE */
+#define AT_PASS_OLD_INDEX		2	/* re-add existing indexes */
+#define AT_PASS_OLD_CONSTR		3	/* re-add existing constraints */
+/* We could support a RENAME COLUMN pass here, but not currently used */
+#define AT_PASS_ADD_COL			4	/* ADD COLUMN */
+#define AT_PASS_ADD_CONSTR		5	/* ADD constraints (initial examination) */
+#define AT_PASS_COL_ATTRS		6	/* set column attributes, eg NOT NULL */
+#define AT_PASS_ADD_INDEXCONSTR	7	/* ADD index-based constraints */
+#define AT_PASS_ADD_INDEX		8	/* ADD indexes */
+#define AT_PASS_ADD_OTHERCONSTR	9	/* ADD other constraints, defaults */
+#define AT_PASS_MISC			10	/* other stuff */
+#define AT_NUM_PASSES			11
+
+typedef struct AlteredTableInfo
+{
+	/* Information saved before any work commences: */
+	Oid			relid;			/* Relation to work on */
+	char		relkind;		/* Its relkind */
+	TupleDesc	oldDesc;		/* Pre-modification tuple descriptor */
+
+	/*
+	 * Transiently set during Phase 2, normally set to NULL.
+	 *
+	 * ATRewriteCatalogs sets this when it starts, and closes when ATExecCmd
+	 * returns control.  This can be exploited by ATExecCmd subroutines to
+	 * close/reopen across transaction boundaries.
+	 */
+	Relation	rel;
+
+	/* Information saved by Phase 1 for Phase 2: */
+	List	   *subcmds[AT_NUM_PASSES]; /* Lists of AlterTableCmd */
+	/* Information saved by Phases 1/2 for Phase 3: */
+	List	   *constraints;	/* List of NewConstraint */
+	List	   *newvals;		/* List of NewColumnValue */
+	List	   *afterStmts;		/* List of utility command parsetrees */
+	bool		verify_new_notnull; /* T if we should recheck NOT NULL */
+	int			rewrite;		/* Reason for forced rewrite, if any */
+	Oid			newTableSpace;	/* new tablespace; 0 means no change */
+	bool		chgPersistence; /* T if SET LOGGED/UNLOGGED is used */
+	char		newrelpersistence;	/* if above is true */
+	Expr	   *partition_constraint;	/* for attach partition validation */
+	/* true, if validating default due to some other attach/detach */
+	bool		validate_default;
+	/* Objects to rebuild after completing ALTER TYPE operations */
+	List	   *changedConstraintOids;	/* OIDs of constraints to rebuild */
+	List	   *changedConstraintDefs;	/* string definitions of same */
+	List	   *changedIndexOids;	/* OIDs of indexes to rebuild */
+	List	   *changedIndexDefs;	/* string definitions of same */
+	char	   *replicaIdentityIndex;	/* index to reset as REPLICA IDENTITY */
+	char	   *clusterOnIndex; /* index to use for CLUSTER */
+	List	   *changedStatisticsOids;	/* OIDs of statistics to rebuild */
+	List	   *changedStatisticsDefs;	/* string definitions of same */
+} AlteredTableInfo;
+
+/* Struct describing one new constraint to check in Phase 3 scan */
+/* Note: new NOT NULL constraints are handled elsewhere */
+typedef struct NewConstraint
+{
+	char	   *name;			/* Constraint name, or NULL if none */
+	ConstrType	contype;		/* CHECK or FOREIGN */
+	Oid			refrelid;		/* PK rel, if FOREIGN */
+	Oid			refindid;		/* OID of PK's index, if FOREIGN */
+	Oid			conid;			/* OID of pg_constraint entry, if FOREIGN */
+	Node	   *qual;			/* Check expr or CONSTR_FOREIGN Constraint */
+	ExprState  *qualstate;		/* Execution state for CHECK expr */
+} NewConstraint;
+
+/*
+ * Struct describing one new column value that needs to be computed during
+ * Phase 3 copy (this could be either a new column with a non-null default, or
+ * a column that we're changing the type of).  Columns without such an entry
+ * are just copied from the old table during ATRewriteTable.  Note that the
+ * expr is an expression over *old* table values, except when is_generated
+ * is true; then it is an expression over columns of the *new* tuple.
+ */
+typedef struct NewColumnValue
+{
+	AttrNumber	attnum;			/* which column */
+	Expr	   *expr;			/* expression to compute */
+	ExprState  *exprstate;		/* execution state */
+	bool		is_generated;	/* is it a GENERATED expression? */
+} NewColumnValue;
+
+/*
+ * Error-reporting support for RemoveRelations
+ */
+struct dropmsgstrings
+{
+	char		kind;
+	int			nonexistent_code;
+	const char *nonexistent_msg;
+	const char *skipping_msg;
+	const char *nota_msg;
+	const char *drophint_msg;
+};
+
+static const struct dropmsgstrings dropmsgstringarray[] = {
+	{RELKIND_RELATION,
+		ERRCODE_UNDEFINED_TABLE,
+		gettext_noop("table \"%s\" does not exist"),
+		gettext_noop("table \"%s\" does not exist, skipping"),
+		gettext_noop("\"%s\" is not a table"),
+	gettext_noop("Use DROP TABLE to remove a table.")},
+	{RELKIND_SEQUENCE,
+		ERRCODE_UNDEFINED_TABLE,
+		gettext_noop("sequence \"%s\" does not exist"),
+		gettext_noop("sequence \"%s\" does not exist, skipping"),
+		gettext_noop("\"%s\" is not a sequence"),
+	gettext_noop("Use DROP SEQUENCE to remove a sequence.")},
+	{RELKIND_VIEW,
+		ERRCODE_UNDEFINED_TABLE,
+		gettext_noop("view \"%s\" does not exist"),
+		gettext_noop("view \"%s\" does not exist, skipping"),
+		gettext_noop("\"%s\" is not a view"),
+	gettext_noop("Use DROP VIEW to remove a view.")},
+	{RELKIND_MATVIEW,
+		ERRCODE_UNDEFINED_TABLE,
+		gettext_noop("materialized view \"%s\" does not exist"),
+		gettext_noop("materialized view \"%s\" does not exist, skipping"),
+		gettext_noop("\"%s\" is not a materialized view"),
+	gettext_noop("Use DROP MATERIALIZED VIEW to remove a materialized view.")},
+	{RELKIND_INDEX,
+		ERRCODE_UNDEFINED_OBJECT,
+		gettext_noop("index \"%s\" does not exist"),
+		gettext_noop("index \"%s\" does not exist, skipping"),
+		gettext_noop("\"%s\" is not an index"),
+	gettext_noop("Use DROP INDEX to remove an index.")},
+	{RELKIND_COMPOSITE_TYPE,
+		ERRCODE_UNDEFINED_OBJECT,
+		gettext_noop("type \"%s\" does not exist"),
+		gettext_noop("type \"%s\" does not exist, skipping"),
+		gettext_noop("\"%s\" is not a type"),
+	gettext_noop("Use DROP TYPE to remove a type.")},
+	{RELKIND_FOREIGN_TABLE,
+		ERRCODE_UNDEFINED_OBJECT,
+		gettext_noop("foreign table \"%s\" does not exist"),
+		gettext_noop("foreign table \"%s\" does not exist, skipping"),
+		gettext_noop("\"%s\" is not a foreign table"),
+	gettext_noop("Use DROP FOREIGN TABLE to remove a foreign table.")},
+	{RELKIND_PARTITIONED_TABLE,
+		ERRCODE_UNDEFINED_TABLE,
+		gettext_noop("table \"%s\" does not exist"),
+		gettext_noop("table \"%s\" does not exist, skipping"),
+		gettext_noop("\"%s\" is not a table"),
+	gettext_noop("Use DROP TABLE to remove a table.")},
+	{RELKIND_PARTITIONED_INDEX,
+		ERRCODE_UNDEFINED_OBJECT,
+		gettext_noop("index \"%s\" does not exist"),
+		gettext_noop("index \"%s\" does not exist, skipping"),
+		gettext_noop("\"%s\" is not an index"),
+	gettext_noop("Use DROP INDEX to remove an index.")},
+	{'\0', 0, NULL, NULL, NULL, NULL}
+};
+
+struct DropRelationCallbackState
+{
+	char		relkind;
+	Oid			heapOid;
+	Oid			partParentOid;
+	bool		concurrent;
+};
+
+/* Alter table target-type flags for ATSimplePermissions */
+#define		ATT_TABLE				0x0001
+#define		ATT_VIEW				0x0002
+#define		ATT_MATVIEW				0x0004
+#define		ATT_INDEX				0x0008
+#define		ATT_COMPOSITE_TYPE		0x0010
+#define		ATT_FOREIGN_TABLE		0x0020
+#define		ATT_PARTITIONED_INDEX	0x0040
+
+/*
+ * Partition tables are expected to be dropped when the parent partitioned
+ * table gets dropped. Hence for partitioning we use AUTO dependency.
+ * Otherwise, for regular inheritance use NORMAL dependency.
+ */
+#define child_dependency_type(child_is_partition)	\
+	((child_is_partition) ? DEPENDENCY_AUTO : DEPENDENCY_NORMAL)
+
+static void truncate_check_rel(Oid relid, Form_pg_class reltuple);
+static void truncate_check_perms(Oid relid, Form_pg_class reltuple);
+static void truncate_check_activity(Relation rel);
+static void RangeVarCallbackForTruncate(const RangeVar *relation,
+										Oid relId, Oid oldRelId, void *arg);
+static List *MergeAttributes(List *schema, List *supers, char relpersistence,
+							 bool is_partition, List **supconstr);
+static bool MergeCheckConstraint(List *constraints, char *name, Node *expr);
+static void MergeAttributesIntoExisting(Relation child_rel, Relation parent_rel);
+static void MergeConstraintsIntoExisting(Relation child_rel, Relation parent_rel);
+static void StoreCatalogInheritance(Oid relationId, List *supers,
+									bool child_is_partition);
+static void StoreCatalogInheritance1(Oid relationId, Oid parentOid,
+									 int32 seqNumber, Relation inhRelation,
+									 bool child_is_partition);
+static int	findAttrByName(const char *attributeName, List *schema);
+static void AlterIndexNamespaces(Relation classRel, Relation rel,
+								 Oid oldNspOid, Oid newNspOid, ObjectAddresses *objsMoved);
+static void AlterSeqNamespaces(Relation classRel, Relation rel,
+							   Oid oldNspOid, Oid newNspOid, ObjectAddresses *objsMoved,
+							   LOCKMODE lockmode);
+static ObjectAddress ATExecAlterConstraint(Relation rel, AlterTableCmd *cmd,
+										   bool recurse, bool recursing, LOCKMODE lockmode);
+static ObjectAddress ATExecValidateConstraint(List **wqueue,
+											  Relation rel, char *constrName,
+											  bool recurse, bool recursing, LOCKMODE lockmode);
+static int	transformColumnNameList(Oid relId, List *colList,
+									int16 *attnums, Oid *atttypids);
+static int	transformFkeyGetPrimaryKey(Relation pkrel, Oid *indexOid,
+									   List **attnamelist,
+									   int16 *attnums, Oid *atttypids,
+									   Oid *opclasses);
+static Oid	transformFkeyCheckAttrs(Relation pkrel,
+									int numattrs, int16 *attnums,
+									Oid *opclasses);
+static void checkFkeyPermissions(Relation rel, int16 *attnums, int natts);
+static CoercionPathType findFkeyCast(Oid targetTypeId, Oid sourceTypeId,
+									 Oid *funcid);
+static void validateForeignKeyConstraint(char *conname,
+										 Relation rel, Relation pkrel,
+										 Oid pkindOid, Oid constraintOid);
+static void ATController(AlterTableStmt *parsetree,
+						 Relation rel, List *cmds, bool recurse, LOCKMODE lockmode,
+						 AlterTableUtilityContext *context);
+static void ATPrepCmd(List **wqueue, Relation rel, AlterTableCmd *cmd,
+					  bool recurse, bool recursing, LOCKMODE lockmode,
+					  AlterTableUtilityContext *context);
+static void ATRewriteCatalogs(List **wqueue, LOCKMODE lockmode,
+							  AlterTableUtilityContext *context);
+static void ATExecCmd(List **wqueue, AlteredTableInfo *tab,
+					  AlterTableCmd *cmd, LOCKMODE lockmode, int cur_pass,
+					  AlterTableUtilityContext *context);
+static AlterTableCmd *ATParseTransformCmd(List **wqueue, AlteredTableInfo *tab,
+										  Relation rel, AlterTableCmd *cmd,
+										  bool recurse, LOCKMODE lockmode,
+										  int cur_pass,
+										  AlterTableUtilityContext *context);
+static void ATRewriteTables(AlterTableStmt *parsetree,
+							List **wqueue, LOCKMODE lockmode,
+							AlterTableUtilityContext *context);
+static void ATRewriteTable(AlteredTableInfo *tab, Oid OIDNewHeap, LOCKMODE lockmode);
+static AlteredTableInfo *ATGetQueueEntry(List **wqueue, Relation rel);
+static void ATSimplePermissions(Relation rel, int allowed_targets);
+static void ATWrongRelkindError(Relation rel, int allowed_targets);
+static void ATSimpleRecursion(List **wqueue, Relation rel,
+							  AlterTableCmd *cmd, bool recurse, LOCKMODE lockmode,
+							  AlterTableUtilityContext *context);
+static void ATCheckPartitionsNotInUse(Relation rel, LOCKMODE lockmode);
+static void ATTypedTableRecursion(List **wqueue, Relation rel, AlterTableCmd *cmd,
+								  LOCKMODE lockmode,
+								  AlterTableUtilityContext *context);
+static List *find_typed_table_dependencies(Oid typeOid, const char *typeName,
+										   DropBehavior behavior);
+static void ATPrepAddColumn(List **wqueue, Relation rel, bool recurse, bool recursing,
+							bool is_view, AlterTableCmd *cmd, LOCKMODE lockmode,
+							AlterTableUtilityContext *context);
+static ObjectAddress ATExecAddColumn(List **wqueue, AlteredTableInfo *tab,
+									 Relation rel, AlterTableCmd **cmd,
+									 bool recurse, bool recursing,
+									 LOCKMODE lockmode, int cur_pass,
+									 AlterTableUtilityContext *context);
+static bool check_for_column_name_collision(Relation rel, const char *colname,
+											bool if_not_exists);
+static void add_column_datatype_dependency(Oid relid, int32 attnum, Oid typid);
+static void add_column_collation_dependency(Oid relid, int32 attnum, Oid collid);
+static void ATPrepDropNotNull(Relation rel, bool recurse, bool recursing);
+static ObjectAddress ATExecDropNotNull(Relation rel, const char *colName, LOCKMODE lockmode);
+static void ATPrepSetNotNull(List **wqueue, Relation rel,
+							 AlterTableCmd *cmd, bool recurse, bool recursing,
+							 LOCKMODE lockmode,
+							 AlterTableUtilityContext *context);
+static ObjectAddress ATExecSetNotNull(AlteredTableInfo *tab, Relation rel,
+									  const char *colName, LOCKMODE lockmode);
+static void ATExecCheckNotNull(AlteredTableInfo *tab, Relation rel,
+							   const char *colName, LOCKMODE lockmode);
+static bool NotNullImpliedByRelConstraints(Relation rel, Form_pg_attribute attr);
+static bool ConstraintImpliedByRelConstraint(Relation scanrel,
+											 List *testConstraint, List *provenConstraint);
+static ObjectAddress ATExecColumnDefault(Relation rel, const char *colName,
+										 Node *newDefault, LOCKMODE lockmode);
+static ObjectAddress ATExecCookedColumnDefault(Relation rel, AttrNumber attnum,
+											   Node *newDefault);
+static ObjectAddress ATExecAddIdentity(Relation rel, const char *colName,
+									   Node *def, LOCKMODE lockmode);
+static ObjectAddress ATExecSetIdentity(Relation rel, const char *colName,
+									   Node *def, LOCKMODE lockmode);
+static ObjectAddress ATExecDropIdentity(Relation rel, const char *colName, bool missing_ok, LOCKMODE lockmode);
+static void ATPrepDropExpression(Relation rel, AlterTableCmd *cmd, bool recurse, bool recursing, LOCKMODE lockmode);
+static ObjectAddress ATExecDropExpression(Relation rel, const char *colName, bool missing_ok, LOCKMODE lockmode);
+static ObjectAddress ATExecSetStatistics(Relation rel, const char *colName, int16 colNum,
+										 Node *newValue, LOCKMODE lockmode);
+static ObjectAddress ATExecSetOptions(Relation rel, const char *colName,
+									  Node *options, bool isReset, LOCKMODE lockmode);
+static ObjectAddress ATExecSetStorage(Relation rel, const char *colName,
+									  Node *newValue, LOCKMODE lockmode);
+static void ATPrepDropColumn(List **wqueue, Relation rel, bool recurse, bool recursing,
+							 AlterTableCmd *cmd, LOCKMODE lockmode,
+							 AlterTableUtilityContext *context);
+static ObjectAddress ATExecDropColumn(List **wqueue, Relation rel, const char *colName,
+									  DropBehavior behavior,
+									  bool recurse, bool recursing,
+									  bool missing_ok, LOCKMODE lockmode,
+									  ObjectAddresses *addrs);
+static ObjectAddress ATExecAddIndex(AlteredTableInfo *tab, Relation rel,
+									IndexStmt *stmt, bool is_rebuild, LOCKMODE lockmode);
+static ObjectAddress ATExecAddStatistics(AlteredTableInfo *tab, Relation rel,
+										 CreateStatsStmt *stmt, bool is_rebuild, LOCKMODE lockmode);
+static ObjectAddress ATExecAddConstraint(List **wqueue,
+										 AlteredTableInfo *tab, Relation rel,
+										 Constraint *newConstraint, bool recurse, bool is_readd,
+										 LOCKMODE lockmode);
+static char *ChooseForeignKeyConstraintNameAddition(List *colnames);
+static ObjectAddress ATExecAddIndexConstraint(AlteredTableInfo *tab, Relation rel,
+											  IndexStmt *stmt, LOCKMODE lockmode);
+static ObjectAddress ATAddCheckConstraint(List **wqueue,
+										  AlteredTableInfo *tab, Relation rel,
+										  Constraint *constr,
+										  bool recurse, bool recursing, bool is_readd,
+										  LOCKMODE lockmode);
+static ObjectAddress ATAddForeignKeyConstraint(List **wqueue, AlteredTableInfo *tab,
+											   Relation rel, Constraint *fkconstraint, Oid parentConstr,
+											   bool recurse, bool recursing,
+											   LOCKMODE lockmode);
+static ObjectAddress addFkRecurseReferenced(List **wqueue, Constraint *fkconstraint,
+											Relation rel, Relation pkrel, Oid indexOid, Oid parentConstr,
+											int numfks, int16 *pkattnum, int16 *fkattnum,
+											Oid *pfeqoperators, Oid *ppeqoperators, Oid *ffeqoperators,
+											bool old_check_ok);
+static void addFkRecurseReferencing(List **wqueue, Constraint *fkconstraint,
+									Relation rel, Relation pkrel, Oid indexOid, Oid parentConstr,
+									int numfks, int16 *pkattnum, int16 *fkattnum,
+									Oid *pfeqoperators, Oid *ppeqoperators, Oid *ffeqoperators,
+									bool old_check_ok, LOCKMODE lockmode);
+static void CloneForeignKeyConstraints(List **wqueue, Relation parentRel,
+									   Relation partitionRel);
+static void CloneFkReferenced(Relation parentRel, Relation partitionRel);
+static void CloneFkReferencing(List **wqueue, Relation parentRel,
+							   Relation partRel);
+static void createForeignKeyCheckTriggers(Oid myRelOid, Oid refRelOid,
+										  Constraint *fkconstraint, Oid constraintOid,
+										  Oid indexOid);
+static void createForeignKeyActionTriggers(Relation rel, Oid refRelOid,
+										   Constraint *fkconstraint, Oid constraintOid,
+										   Oid indexOid);
+static bool tryAttachPartitionForeignKey(ForeignKeyCacheInfo *fk,
+										 Oid partRelid,
+										 Oid parentConstrOid, int numfks,
+										 AttrNumber *mapped_conkey, AttrNumber *confkey,
+										 Oid *conpfeqop);
+static void ATExecDropConstraint(Relation rel, const char *constrName,
+								 DropBehavior behavior,
+								 bool recurse, bool recursing,
+								 bool missing_ok, LOCKMODE lockmode);
+static void ATPrepAlterColumnType(List **wqueue,
+								  AlteredTableInfo *tab, Relation rel,
+								  bool recurse, bool recursing,
+								  AlterTableCmd *cmd, LOCKMODE lockmode,
+								  AlterTableUtilityContext *context);
+static bool ATColumnChangeRequiresRewrite(Node *expr, AttrNumber varattno);
+static ObjectAddress ATExecAlterColumnType(AlteredTableInfo *tab, Relation rel,
+										   AlterTableCmd *cmd, LOCKMODE lockmode);
+static void RememberConstraintForRebuilding(Oid conoid, AlteredTableInfo *tab);
+static void RememberIndexForRebuilding(Oid indoid, AlteredTableInfo *tab);
+static void RememberStatisticsForRebuilding(Oid indoid, AlteredTableInfo *tab);
+static void ATPostAlterTypeCleanup(List **wqueue, AlteredTableInfo *tab,
+								   LOCKMODE lockmode);
+static void ATPostAlterTypeParse(Oid oldId, Oid oldRelId, Oid refRelId,
+								 char *cmd, List **wqueue, LOCKMODE lockmode,
+								 bool rewrite);
+static void RebuildConstraintComment(AlteredTableInfo *tab, int pass,
+									 Oid objid, Relation rel, List *domname,
+									 const char *conname);
+static void TryReuseIndex(Oid oldId, IndexStmt *stmt);
+static void TryReuseForeignKey(Oid oldId, Constraint *con);
+static ObjectAddress ATExecAlterColumnGenericOptions(Relation rel, const char *colName,
+													 List *options, LOCKMODE lockmode);
+static void change_owner_fix_column_acls(Oid relationOid,
+										 Oid oldOwnerId, Oid newOwnerId);
+static void change_owner_recurse_to_sequences(Oid relationOid,
+											  Oid newOwnerId, LOCKMODE lockmode);
+static ObjectAddress ATExecClusterOn(Relation rel, const char *indexName,
+									 LOCKMODE lockmode);
+static void ATExecDropCluster(Relation rel, LOCKMODE lockmode);
+static bool ATPrepChangePersistence(Relation rel, bool toLogged);
+static void ATPrepSetTableSpace(AlteredTableInfo *tab, Relation rel,
+								const char *tablespacename, LOCKMODE lockmode);
+static void ATExecSetTableSpace(Oid tableOid, Oid newTableSpace, LOCKMODE lockmode);
+static void ATExecSetTableSpaceNoStorage(Relation rel, Oid newTableSpace);
+static void ATExecSetRelOptions(Relation rel, List *defList,
+								AlterTableType operation,
+								LOCKMODE lockmode);
+static void ATExecEnableDisableTrigger(Relation rel, const char *trigname,
+									   char fires_when, bool skip_system, LOCKMODE lockmode);
+static void ATExecEnableDisableRule(Relation rel, const char *rulename,
+									char fires_when, LOCKMODE lockmode);
+static void ATPrepAddInherit(Relation child_rel);
+static ObjectAddress ATExecAddInherit(Relation child_rel, RangeVar *parent, LOCKMODE lockmode);
+static ObjectAddress ATExecDropInherit(Relation rel, RangeVar *parent, LOCKMODE lockmode);
+static void drop_parent_dependency(Oid relid, Oid refclassid, Oid refobjid,
+								   DependencyType deptype);
+static ObjectAddress ATExecAddOf(Relation rel, const TypeName *ofTypename, LOCKMODE lockmode);
+static void ATExecDropOf(Relation rel, LOCKMODE lockmode);
+static void ATExecReplicaIdentity(Relation rel, ReplicaIdentityStmt *stmt, LOCKMODE lockmode);
+static void ATExecGenericOptions(Relation rel, List *options);
+static void ATExecSetRowSecurity(Relation rel, bool rls);
+static void ATExecForceNoForceRowSecurity(Relation rel, bool force_rls);
+static ObjectAddress ATExecSetCompression(AlteredTableInfo *tab, Relation rel,
+					 const char *column, Node *newValue, LOCKMODE lockmode);
+
+static void index_copy_data(Relation rel, RelFileNode newrnode);
+static const char *storage_name(char c);
+
+static void RangeVarCallbackForDropRelation(const RangeVar *rel, Oid relOid,
+											Oid oldRelOid, void *arg);
+static void RangeVarCallbackForAlterRelation(const RangeVar *rv, Oid relid,
+											 Oid oldrelid, void *arg);
+static PartitionSpec *transformPartitionSpec(Relation rel, PartitionSpec *partspec, char *strategy);
+static void ComputePartitionAttrs(ParseState *pstate, Relation rel, List *partParams, AttrNumber *partattrs,
+								  List **partexprs, Oid *partopclass, Oid *partcollation, char strategy);
+static void CreateInheritance(Relation child_rel, Relation parent_rel);
+static void RemoveInheritance(Relation child_rel, Relation parent_rel,
+							  bool allow_detached);
+static ObjectAddress ATExecAttachPartition(List **wqueue, Relation rel,
+										   PartitionCmd *cmd,
+										   AlterTableUtilityContext *context);
+static void AttachPartitionEnsureIndexes(Relation rel, Relation attachrel);
+static void QueuePartitionConstraintValidation(List **wqueue, Relation scanrel,
+											   List *partConstraint,
+											   bool validate_default);
+static void CloneRowTriggersToPartition(Relation parent, Relation partition);
+static void DetachAddConstraintIfNeeded(List **wqueue, Relation partRel);
+static void DropClonedTriggersFromPartition(Oid partitionId);
+static ObjectAddress ATExecDetachPartition(List **wqueue, AlteredTableInfo *tab,
+										   Relation rel, RangeVar *name,
+										   bool concurrent);
+static void DetachPartitionFinalize(Relation rel, Relation partRel,
+									bool concurrent, Oid defaultPartOid);
+static ObjectAddress ATExecDetachPartitionFinalize(Relation rel, RangeVar *name);
+static ObjectAddress ATExecAttachPartitionIdx(List **wqueue, Relation rel,
+											  RangeVar *name);
+static void validatePartitionedIndex(Relation partedIdx, Relation partedTbl);
+static void refuseDupeIndexAttach(Relation parentIdx, Relation partIdx,
+								  Relation partitionTbl);
+static List *GetParentedForeignKeyRefs(Relation partition);
+static void ATDetachCheckNoForeignKeyRefs(Relation partition);
+static void ATExecAlterCollationRefreshVersion(Relation rel, List *coll);
+static char GetAttributeCompression(Form_pg_attribute att, char *compression);
+
+
+/* ----------------------------------------------------------------
+ *		DefineRelation
+ *				Creates a new relation.
+ *
+ * stmt carries parsetree information from an ordinary CREATE TABLE statement.
+ * The other arguments are used to extend the behavior for other cases:
+ * relkind: relkind to assign to the new relation
+ * ownerId: if not InvalidOid, use this as the new relation's owner.
+ * typaddress: if not null, it's set to the pg_type entry's address.
+ * queryString: for error reporting
+ *
+ * Note that permissions checks are done against current user regardless of
+ * ownerId.  A nonzero ownerId is used when someone is creating a relation
+ * "on behalf of" someone else, so we still want to see that the current user
+ * has permissions to do it.
+ *
+ * If successful, returns the address of the new relation.
+ * ----------------------------------------------------------------
+ */
+ObjectAddress
+DefineRelation(CreateStmt *stmt, char relkind, Oid ownerId,
+			   ObjectAddress *typaddress, const char *queryString)
+{
+	char		relname[NAMEDATALEN];
+	Oid			namespaceId;
+	Oid			relationId;
+	Oid			tablespaceId;
+	Relation	rel;
+	TupleDesc	descriptor;
+	List	   *inheritOids;
+	List	   *old_constraints;
+	List	   *rawDefaults;
+	List	   *cookedDefaults;
+	Datum		reloptions;
+	ListCell   *listptr;
+	AttrNumber	attnum;
+	bool		partitioned;
+	static char *validnsps[] = HEAP_RELOPT_NAMESPACES;
+	Oid			ofTypeId;
+	ObjectAddress address;
+	LOCKMODE	parentLockmode;
+	const char *accessMethod = NULL;
+	Oid			accessMethodId = InvalidOid;
+
+	/*
+	 * Truncate relname to appropriate length (probably a waste of time, as
+	 * parser should have done this already).
+	 */
+	strlcpy(relname, stmt->relation->relname, NAMEDATALEN);
+
+	/*
+	 * Check consistency of arguments
+	 */
+	if (stmt->oncommit != ONCOMMIT_NOOP
+		&& stmt->relation->relpersistence != RELPERSISTENCE_TEMP)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+				 errmsg("ON COMMIT can only be used on temporary tables")));
+
+	if (stmt->partspec != NULL)
+	{
+		if (relkind != RELKIND_RELATION)
+			elog(ERROR, "unexpected relkind: %d", (int) relkind);
+
+		relkind = RELKIND_PARTITIONED_TABLE;
+		partitioned = true;
+	}
+	else
+		partitioned = false;
+
+	/*
+	 * Look up the namespace in which we are supposed to create the relation,
+	 * check we have permission to create there, lock it against concurrent
+	 * drop, and mark stmt->relation as RELPERSISTENCE_TEMP if a temporary
+	 * namespace is selected.
+	 */
+	namespaceId =
+		RangeVarGetAndCheckCreationNamespace(stmt->relation, NoLock, NULL);
+
+	/*
+	 * Security check: disallow creating temp tables from security-restricted
+	 * code.  This is needed because calling code might not expect untrusted
+	 * tables to appear in pg_temp at the front of its search path.
+	 */
+	if (stmt->relation->relpersistence == RELPERSISTENCE_TEMP
+		&& InSecurityRestrictedOperation())
+		ereport(ERROR,
+				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+				 errmsg("cannot create temporary table within security-restricted operation")));
+
+	/*
+	 * Determine the lockmode to use when scanning parents.  A self-exclusive
+	 * lock is needed here.
+	 *
+	 * For regular inheritance, if two backends attempt to add children to the
+	 * same parent simultaneously, and that parent has no pre-existing
+	 * children, then both will attempt to update the parent's relhassubclass
+	 * field, leading to a "tuple concurrently updated" error.  Also, this
+	 * interlocks against a concurrent ANALYZE on the parent table, which
+	 * might otherwise be attempting to clear the parent's relhassubclass
+	 * field, if its previous children were recently dropped.
+	 *
+	 * If the child table is a partition, then we instead grab an exclusive
+	 * lock on the parent because its partition descriptor will be changed by
+	 * addition of the new partition.
+	 */
+	parentLockmode = (stmt->partbound != NULL ? AccessExclusiveLock :
+					  ShareUpdateExclusiveLock);
+
+	/* Determine the list of OIDs of the parents. */
+	inheritOids = NIL;
+	foreach(listptr, stmt->inhRelations)
+	{
+		RangeVar   *rv = (RangeVar *) lfirst(listptr);
+		Oid			parentOid;
+
+		parentOid = RangeVarGetRelid(rv, parentLockmode, false);
+
+		/*
+		 * Reject duplications in the list of parents.
+		 */
+		if (list_member_oid(inheritOids, parentOid))
+			ereport(ERROR,
+					(errcode(ERRCODE_DUPLICATE_TABLE),
+					 errmsg("relation \"%s\" would be inherited from more than once",
+							get_rel_name(parentOid))));
+
+		inheritOids = lappend_oid(inheritOids, parentOid);
+	}
+
+	/*
+	 * Select tablespace to use: an explicitly indicated one, or (in the case
+	 * of a partitioned table) the parent's, if it has one.
+	 */
+	if (stmt->tablespacename)
+	{
+		tablespaceId = get_tablespace_oid(stmt->tablespacename, false);
+
+		if (partitioned && tablespaceId == MyDatabaseTableSpace)
+			ereport(ERROR,
+					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+					 errmsg("cannot specify default tablespace for partitioned relations")));
+	}
+	else if (stmt->partbound)
+	{
+		/*
+		 * For partitions, when no other tablespace is specified, we default
+		 * the tablespace to the parent partitioned table's.
+		 */
+		Assert(list_length(inheritOids) == 1);
+		tablespaceId = get_rel_tablespace(linitial_oid(inheritOids));
+	}
+	else
+		tablespaceId = InvalidOid;
+
+	/* still nothing? use the default */
+	if (!OidIsValid(tablespaceId))
+		tablespaceId = GetDefaultTablespace(stmt->relation->relpersistence,
+											partitioned);
+
+	/* Check permissions except when using database's default */
+	if (OidIsValid(tablespaceId) && tablespaceId != MyDatabaseTableSpace)
+	{
+		AclResult	aclresult;
+
+		aclresult = pg_tablespace_aclcheck(tablespaceId, GetUserId(),
+										   ACL_CREATE);
+		if (aclresult != ACLCHECK_OK)
+			aclcheck_error(aclresult, OBJECT_TABLESPACE,
+						   get_tablespace_name(tablespaceId));
+	}
+
+	/* In all cases disallow placing user relations in pg_global */
+	if (tablespaceId == GLOBALTABLESPACE_OID)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("only shared relations can be placed in pg_global tablespace")));
+
+	/* Identify user ID that will own the table */
+	if (!OidIsValid(ownerId))
+		ownerId = GetUserId();
+
+	/*
+	 * Parse and validate reloptions, if any.
+	 */
+	reloptions = transformRelOptions((Datum) 0, stmt->options, NULL, validnsps,
+									 true, false);
+
+	switch (relkind)
+	{
+		case RELKIND_VIEW:
+			(void) view_reloptions(reloptions, true);
+			break;
+		case RELKIND_PARTITIONED_TABLE:
+			(void) partitioned_table_reloptions(reloptions, true);
+			break;
+		default:
+			(void) heap_reloptions(relkind, reloptions, true);
+	}
+
+	if (stmt->ofTypename)
+	{
+		AclResult	aclresult;
+
+		ofTypeId = typenameTypeId(NULL, stmt->ofTypename);
+
+		aclresult = pg_type_aclcheck(ofTypeId, GetUserId(), ACL_USAGE);
+		if (aclresult != ACLCHECK_OK)
+			aclcheck_error_type(aclresult, ofTypeId);
+	}
+	else
+		ofTypeId = InvalidOid;
+
+	/*
+	 * Look up inheritance ancestors and generate relation schema, including
+	 * inherited attributes.  (Note that stmt->tableElts is destructively
+	 * modified by MergeAttributes.)
+	 */
+	stmt->tableElts =
+		MergeAttributes(stmt->tableElts, inheritOids,
+						stmt->relation->relpersistence,
+						stmt->partbound != NULL,
+						&old_constraints);
+
+	/*
+	 * Create a tuple descriptor from the relation schema.  Note that this
+	 * deals with column names, types, and NOT NULL constraints, but not
+	 * default values or CHECK constraints; we handle those below.
+	 */
+	descriptor = BuildDescForRelation(stmt->tableElts);
+
+	/*
+	 * Find columns with default values and prepare for insertion of the
+	 * defaults.  Pre-cooked (that is, inherited) defaults go into a list of
+	 * CookedConstraint structs that we'll pass to heap_create_with_catalog,
+	 * while raw defaults go into a list of RawColumnDefault structs that will
+	 * be processed by AddRelationNewConstraints.  (We can't deal with raw
+	 * expressions until we can do transformExpr.)
+	 *
+	 * We can set the atthasdef flags now in the tuple descriptor; this just
+	 * saves StoreAttrDefault from having to do an immediate update of the
+	 * pg_attribute rows.
+	 */
+	rawDefaults = NIL;
+	cookedDefaults = NIL;
+	attnum = 0;
+
+	foreach(listptr, stmt->tableElts)
+	{
+		ColumnDef  *colDef = lfirst(listptr);
+		Form_pg_attribute attr;
+
+		attnum++;
+		attr = TupleDescAttr(descriptor, attnum - 1);
+
+		if (colDef->raw_default != NULL)
+		{
+			RawColumnDefault *rawEnt;
+
+			Assert(colDef->cooked_default == NULL);
+
+			rawEnt = (RawColumnDefault *) palloc(sizeof(RawColumnDefault));
+			rawEnt->attnum = attnum;
+			rawEnt->raw_default = colDef->raw_default;
+			rawEnt->missingMode = false;
+			rawEnt->generated = colDef->generated;
+			rawDefaults = lappend(rawDefaults, rawEnt);
+			attr->atthasdef = true;
+		}
+		else if (colDef->cooked_default != NULL)
+		{
+			CookedConstraint *cooked;
+
+			cooked = (CookedConstraint *) palloc(sizeof(CookedConstraint));
+			cooked->contype = CONSTR_DEFAULT;
+			cooked->conoid = InvalidOid;	/* until created */
+			cooked->name = NULL;
+			cooked->attnum = attnum;
+			cooked->expr = colDef->cooked_default;
+			cooked->skip_validation = false;
+			cooked->is_local = true;	/* not used for defaults */
+			cooked->inhcount = 0;	/* ditto */
+			cooked->is_no_inherit = false;
+			cookedDefaults = lappend(cookedDefaults, cooked);
+			attr->atthasdef = true;
+		}
+
+		if (colDef->identity)
+			attr->attidentity = colDef->identity;
+
+		if (colDef->generated)
+			attr->attgenerated = colDef->generated;
+
+		/*
+		 * lookup attribute's compression method and store it in the
+		 * attr->attcompression.
+		 */
+		if (relkind == RELKIND_RELATION ||
+			relkind == RELKIND_PARTITIONED_TABLE ||
+			relkind == RELKIND_MATVIEW)
+			attr->attcompression =
+				GetAttributeCompression(attr, colDef->compression);
+		else
+			attr->attcompression = InvalidCompressionMethod;
+	}
+
+	/*
+	 * If the statement hasn't specified an access method, but we're defining
+	 * a type of relation that needs one, use the default.
+	 */
+	if (stmt->accessMethod != NULL)
+	{
+		accessMethod = stmt->accessMethod;
+
+		if (partitioned)
+			ereport(ERROR,
+					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+					 errmsg("specifying a table access method is not supported on a partitioned table")));
+
+	}
+	else if (relkind == RELKIND_RELATION ||
+			 relkind == RELKIND_TOASTVALUE ||
+			 relkind == RELKIND_MATVIEW)
+		accessMethod = default_table_access_method;
+
+	/* look up the access method, verify it is for a table */
+	if (accessMethod != NULL)
+		accessMethodId = get_table_am_oid(accessMethod, false);
+
+	/*
+	 * Create the relation.  Inherited defaults and constraints are passed in
+	 * for immediate handling --- since they don't need parsing, they can be
+	 * stored immediately.
+	 */
+	relationId = heap_create_with_catalog(relname,
+										  namespaceId,
+										  tablespaceId,
+										  InvalidOid,
+										  InvalidOid,
+										  ofTypeId,
+										  ownerId,
+										  accessMethodId,
+										  descriptor,
+										  list_concat(cookedDefaults,
+													  old_constraints),
+										  relkind,
+										  stmt->relation->relpersistence,
+										  false,
+										  false,
+										  stmt->oncommit,
+										  reloptions,
+										  true,
+										  allowSystemTableMods,
+										  false,
+										  InvalidOid,
+										  typaddress);
+
+	/*
+	 * We must bump the command counter to make the newly-created relation
+	 * tuple visible for opening.
+	 */
+	CommandCounterIncrement();
+
+	/*
+	 * Open the new relation and acquire exclusive lock on it.  This isn't
+	 * really necessary for locking out other backends (since they can't see
+	 * the new rel anyway until we commit), but it keeps the lock manager from
+	 * complaining about deadlock risks.
+	 */
+	rel = relation_open(relationId, AccessExclusiveLock);
+
+	/*
+	 * Now add any newly specified column default and generation expressions
+	 * to the new relation.  These are passed to us in the form of raw
+	 * parsetrees; we need to transform them to executable expression trees
+	 * before they can be added. The most convenient way to do that is to
+	 * apply the parser's transformExpr routine, but transformExpr doesn't
+	 * work unless we have a pre-existing relation. So, the transformation has
+	 * to be postponed to this final step of CREATE TABLE.
+	 *
+	 * This needs to be before processing the partitioning clauses because
+	 * those could refer to generated columns.
+	 */
+	if (rawDefaults)
+		AddRelationNewConstraints(rel, rawDefaults, NIL,
+								  true, true, false, queryString);
+
+	/*
+	 * Make column generation expressions visible for use by partitioning.
+	 */
+	CommandCounterIncrement();
+
+	/* Process and store partition bound, if any. */
+	if (stmt->partbound)
+	{
+		PartitionBoundSpec *bound;
+		ParseState *pstate;
+		Oid			parentId = linitial_oid(inheritOids),
+					defaultPartOid;
+		Relation	parent,
+					defaultRel = NULL;
+		ParseNamespaceItem *nsitem;
+
+		/* Already have strong enough lock on the parent */
+		parent = table_open(parentId, NoLock);
+
+		/*
+		 * We are going to try to validate the partition bound specification
+		 * against the partition key of parentRel, so it better have one.
+		 */
+		if (parent->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+					 errmsg("\"%s\" is not partitioned",
+							RelationGetRelationName(parent))));
+
+		/*
+		 * The partition constraint of the default partition depends on the
+		 * partition bounds of every other partition. It is possible that
+		 * another backend might be about to execute a query on the default
+		 * partition table, and that the query relies on previously cached
+		 * default partition constraints. We must therefore take a table lock
+		 * strong enough to prevent all queries on the default partition from
+		 * proceeding until we commit and send out a shared-cache-inval notice
+		 * that will make them update their index lists.
+		 *
+		 * Order of locking: The relation being added won't be visible to
+		 * other backends until it is committed, hence here in
+		 * DefineRelation() the order of locking the default partition and the
+		 * relation being added does not matter. But at all other places we
+		 * need to lock the default relation before we lock the relation being
+		 * added or removed i.e. we should take the lock in same order at all
+		 * the places such that lock parent, lock default partition and then
+		 * lock the partition so as to avoid a deadlock.
+		 */
+		defaultPartOid =
+			get_default_oid_from_partdesc(RelationGetPartitionDesc(parent,
+																   false));
+		if (OidIsValid(defaultPartOid))
+			defaultRel = table_open(defaultPartOid, AccessExclusiveLock);
+
+		/* Transform the bound values */
+		pstate = make_parsestate(NULL);
+		pstate->p_sourcetext = queryString;
+
+		/*
+		 * Add an nsitem containing this relation, so that transformExpr
+		 * called on partition bound expressions is able to report errors
+		 * using a proper context.
+		 */
+		nsitem = addRangeTableEntryForRelation(pstate, rel, AccessShareLock,
+											   NULL, false, false);
+		addNSItemToQuery(pstate, nsitem, false, true, true);
+
+		bound = transformPartitionBound(pstate, parent, stmt->partbound);
+
+		/*
+		 * Check first that the new partition's bound is valid and does not
+		 * overlap with any of existing partitions of the parent.
+		 */
+		check_new_partition_bound(relname, parent, bound, pstate);
+
+		/*
+		 * If the default partition exists, its partition constraints will
+		 * change after the addition of this new partition such that it won't
+		 * allow any row that qualifies for this new partition. So, check that
+		 * the existing data in the default partition satisfies the constraint
+		 * as it will exist after adding this partition.
+		 */
+		if (OidIsValid(defaultPartOid))
+		{
+			check_default_partition_contents(parent, defaultRel, bound);
+			/* Keep the lock until commit. */
+			table_close(defaultRel, NoLock);
+		}
+
+		/* Update the pg_class entry. */
+		StorePartitionBound(rel, parent, bound);
+
+		table_close(parent, NoLock);
+	}
+
+	/* Store inheritance information for new rel. */
+	StoreCatalogInheritance(relationId, inheritOids, stmt->partbound != NULL);
+
+	/*
+	 * Process the partitioning specification (if any) and store the partition
+	 * key information into the catalog.
+	 */
+	if (partitioned)
+	{
+		ParseState *pstate;
+		char		strategy;
+		int			partnatts;
+		AttrNumber	partattrs[PARTITION_MAX_KEYS];
+		Oid			partopclass[PARTITION_MAX_KEYS];
+		Oid			partcollation[PARTITION_MAX_KEYS];
+		List	   *partexprs = NIL;
+
+		pstate = make_parsestate(NULL);
+		pstate->p_sourcetext = queryString;
+
+		partnatts = list_length(stmt->partspec->partParams);
+
+		/* Protect fixed-size arrays here and in executor */
+		if (partnatts > PARTITION_MAX_KEYS)
+			ereport(ERROR,
+					(errcode(ERRCODE_TOO_MANY_COLUMNS),
+					 errmsg("cannot partition using more than %d columns",
+							PARTITION_MAX_KEYS)));
+
+		/*
+		 * We need to transform the raw parsetrees corresponding to partition
+		 * expressions into executable expression trees.  Like column defaults
+		 * and CHECK constraints, we could not have done the transformation
+		 * earlier.
+		 */
+		stmt->partspec = transformPartitionSpec(rel, stmt->partspec,
+												&strategy);
+
+		ComputePartitionAttrs(pstate, rel, stmt->partspec->partParams,
+							  partattrs, &partexprs, partopclass,
+							  partcollation, strategy);
+
+		StorePartitionKey(rel, strategy, partnatts, partattrs, partexprs,
+						  partopclass, partcollation);
+
+		/* make it all visible */
+		CommandCounterIncrement();
+	}
+
+	/*
+	 * If we're creating a partition, create now all the indexes, triggers,
+	 * FKs defined in the parent.
+	 *
+	 * We can't do it earlier, because DefineIndex wants to know the partition
+	 * key which we just stored.
+	 */
+	if (stmt->partbound)
+	{
+		Oid			parentId = linitial_oid(inheritOids);
+		Relation	parent;
+		List	   *idxlist;
+		ListCell   *cell;
+
+		/* Already have strong enough lock on the parent */
+		parent = table_open(parentId, NoLock);
+		idxlist = RelationGetIndexList(parent);
+
+		/*
+		 * For each index in the parent table, create one in the partition
+		 */
+		foreach(cell, idxlist)
+		{
+			Relation	idxRel = index_open(lfirst_oid(cell), AccessShareLock);
+			AttrMap    *attmap;
+			IndexStmt  *idxstmt;
+			Oid			constraintOid;
+
+			if (rel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
+			{
+				if (idxRel->rd_index->indisunique)
+					ereport(ERROR,
+							(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+							 errmsg("cannot create foreign partition of partitioned table \"%s\"",
+									RelationGetRelationName(parent)),
+							 errdetail("Table \"%s\" contains indexes that are unique.",
+									   RelationGetRelationName(parent))));
+				else
+				{
+					index_close(idxRel, AccessShareLock);
+					continue;
+				}
+			}
+
+			attmap = build_attrmap_by_name(RelationGetDescr(rel),
+										   RelationGetDescr(parent));
+			idxstmt =
+				generateClonedIndexStmt(NULL, idxRel,
+										attmap, &constraintOid);
+			DefineIndex(RelationGetRelid(rel),
+						idxstmt,
+						InvalidOid,
+						RelationGetRelid(idxRel),
+						constraintOid,
+						false, false, false, false, false);
+
+			index_close(idxRel, AccessShareLock);
+		}
+
+		list_free(idxlist);
+
+		/*
+		 * If there are any row-level triggers, clone them to the new
+		 * partition.
+		 */
+		if (parent->trigdesc != NULL)
+			CloneRowTriggersToPartition(parent, rel);
+
+		/*
+		 * And foreign keys too.  Note that because we're freshly creating the
+		 * table, there is no need to verify these new constraints.
+		 */
+		CloneForeignKeyConstraints(NULL, parent, rel);
+
+		table_close(parent, NoLock);
+	}
+
+	/*
+	 * Now add any newly specified CHECK constraints to the new relation. Same
+	 * as for defaults above, but these need to come after partitioning is set
+	 * up.
+	 */
+	if (stmt->constraints)
+		AddRelationNewConstraints(rel, NIL, stmt->constraints,
+								  true, true, false, queryString);
+
+	ObjectAddressSet(address, RelationRelationId, relationId);
+
+	/*
+	 * Clean up.  We keep lock on new relation (although it shouldn't be
+	 * visible to anyone else anyway, until commit).
+	 */
+	relation_close(rel, NoLock);
+
+	return address;
+}
+
+/*
+ * Emit the right error or warning message for a "DROP" command issued on a
+ * non-existent relation
+ */
+static void
+DropErrorMsgNonExistent(RangeVar *rel, char rightkind, bool missing_ok)
+{
+	const struct dropmsgstrings *rentry;
+
+	if (rel->schemaname != NULL &&
+		!OidIsValid(LookupNamespaceNoError(rel->schemaname)))
+	{
+		if (!missing_ok)
+		{
+			ereport(ERROR,
+					(errcode(ERRCODE_UNDEFINED_SCHEMA),
+					 errmsg("schema \"%s\" does not exist", rel->schemaname)));
+		}
+		else
+		{
+			ereport(NOTICE,
+					(errmsg("schema \"%s\" does not exist, skipping",
+							rel->schemaname)));
+		}
+		return;
+	}
+
+	for (rentry = dropmsgstringarray; rentry->kind != '\0'; rentry++)
+	{
+		if (rentry->kind == rightkind)
+		{
+			if (!missing_ok)
+			{
+				ereport(ERROR,
+						(errcode(rentry->nonexistent_code),
+						 errmsg(rentry->nonexistent_msg, rel->relname)));
+			}
+			else
+			{
+				ereport(NOTICE, (errmsg(rentry->skipping_msg, rel->relname)));
+				break;
+			}
+		}
+	}
+
+	Assert(rentry->kind != '\0');	/* Should be impossible */
+}
+
+/*
+ * Emit the right error message for a "DROP" command issued on a
+ * relation of the wrong type
+ */
+static void
+DropErrorMsgWrongType(const char *relname, char wrongkind, char rightkind)
+{
+	const struct dropmsgstrings *rentry;
+	const struct dropmsgstrings *wentry;
+
+	for (rentry = dropmsgstringarray; rentry->kind != '\0'; rentry++)
+		if (rentry->kind == rightkind)
+			break;
+	Assert(rentry->kind != '\0');
+
+	for (wentry = dropmsgstringarray; wentry->kind != '\0'; wentry++)
+		if (wentry->kind == wrongkind)
+			break;
+	/* wrongkind could be something we don't have in our table... */
+
+	ereport(ERROR,
+			(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+			 errmsg(rentry->nota_msg, relname),
+			 (wentry->kind != '\0') ? errhint("%s", _(wentry->drophint_msg)) : 0));
+}
+
+/*
+ * RemoveRelations
+ *		Implements DROP TABLE, DROP INDEX, DROP SEQUENCE, DROP VIEW,
+ *		DROP MATERIALIZED VIEW, DROP FOREIGN TABLE
+ */
+void
+RemoveRelations(DropStmt *drop)
+{
+	ObjectAddresses *objects;
+	char		relkind;
+	ListCell   *cell;
+	int			flags = 0;
+	LOCKMODE	lockmode = AccessExclusiveLock;
+
+	/* DROP CONCURRENTLY uses a weaker lock, and has some restrictions */
+	if (drop->concurrent)
+	{
+		/*
+		 * Note that for temporary relations this lock may get upgraded later
+		 * on, but as no other session can access a temporary relation, this
+		 * is actually fine.
+		 */
+		lockmode = ShareUpdateExclusiveLock;
+		Assert(drop->removeType == OBJECT_INDEX);
+		if (list_length(drop->objects) != 1)
+			ereport(ERROR,
+					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+					 errmsg("DROP INDEX CONCURRENTLY does not support dropping multiple objects")));
+		if (drop->behavior == DROP_CASCADE)
+			ereport(ERROR,
+					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+					 errmsg("DROP INDEX CONCURRENTLY does not support CASCADE")));
+	}
+
+	/*
+	 * First we identify all the relations, then we delete them in a single
+	 * performMultipleDeletions() call.  This is to avoid unwanted DROP
+	 * RESTRICT errors if one of the relations depends on another.
+	 */
+
+	/* Determine required relkind */
+	switch (drop->removeType)
+	{
+		case OBJECT_TABLE:
+			relkind = RELKIND_RELATION;
+			break;
+
+		case OBJECT_INDEX:
+			relkind = RELKIND_INDEX;
+			break;
+
+		case OBJECT_SEQUENCE:
+			relkind = RELKIND_SEQUENCE;
+			break;
+
+		case OBJECT_VIEW:
+			relkind = RELKIND_VIEW;
+			break;
+
+		case OBJECT_MATVIEW:
+			relkind = RELKIND_MATVIEW;
+			break;
+
+		case OBJECT_FOREIGN_TABLE:
+			relkind = RELKIND_FOREIGN_TABLE;
+			break;
+
+		default:
+			elog(ERROR, "unrecognized drop object type: %d",
+				 (int) drop->removeType);
+			relkind = 0;		/* keep compiler quiet */
+			break;
+	}
+
+	/* Lock and validate each relation; build a list of object addresses */
+	objects = new_object_addresses();
+
+	foreach(cell, drop->objects)
+	{
+		RangeVar   *rel = makeRangeVarFromNameList((List *) lfirst(cell));
+		Oid			relOid;
+		ObjectAddress obj;
+		struct DropRelationCallbackState state;
+
+		/*
+		 * These next few steps are a great deal like relation_openrv, but we
+		 * don't bother building a relcache entry since we don't need it.
+		 *
+		 * Check for shared-cache-inval messages before trying to access the
+		 * relation.  This is needed to cover the case where the name
+		 * identifies a rel that has been dropped and recreated since the
+		 * start of our transaction: if we don't flush the old syscache entry,
+		 * then we'll latch onto that entry and suffer an error later.
+		 */
+		AcceptInvalidationMessages();
+
+		/* Look up the appropriate relation using namespace search. */
+		state.relkind = relkind;
+		state.heapOid = InvalidOid;
+		state.partParentOid = InvalidOid;
+		state.concurrent = drop->concurrent;
+		relOid = RangeVarGetRelidExtended(rel, lockmode, RVR_MISSING_OK,
+										  RangeVarCallbackForDropRelation,
+										  (void *) &state);
+
+		/* Not there? */
+		if (!OidIsValid(relOid))
+		{
+			DropErrorMsgNonExistent(rel, relkind, drop->missing_ok);
+			continue;
+		}
+
+		/*
+		 * Decide if concurrent mode needs to be used here or not.  The
+		 * relation persistence cannot be known without its OID.
+		 */
+		if (drop->concurrent &&
+			get_rel_persistence(relOid) != RELPERSISTENCE_TEMP)
+		{
+			Assert(list_length(drop->objects) == 1 &&
+				   drop->removeType == OBJECT_INDEX);
+			flags |= PERFORM_DELETION_CONCURRENTLY;
+		}
+
+		/*
+		 * Concurrent index drop cannot be used with partitioned indexes,
+		 * either.
+		 */
+		if ((flags & PERFORM_DELETION_CONCURRENTLY) != 0 &&
+			get_rel_relkind(relOid) == RELKIND_PARTITIONED_INDEX)
+			ereport(ERROR,
+					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+					 errmsg("cannot drop partitioned index \"%s\" concurrently",
+							rel->relname)));
+
+		/* OK, we're ready to delete this one */
+		obj.classId = RelationRelationId;
+		obj.objectId = relOid;
+		obj.objectSubId = 0;
+
+		add_exact_object_address(&obj, objects);
+	}
+
+	performMultipleDeletions(objects, drop->behavior, flags);
+
+	free_object_addresses(objects);
+}
+
+/*
+ * Before acquiring a table lock, check whether we have sufficient rights.
+ * In the case of DROP INDEX, also try to lock the table before the index.
+ * Also, if the table to be dropped is a partition, we try to lock the parent
+ * first.
+ */
+static void
+RangeVarCallbackForDropRelation(const RangeVar *rel, Oid relOid, Oid oldRelOid,
+								void *arg)
+{
+	HeapTuple	tuple;
+	struct DropRelationCallbackState *state;
+	char		relkind;
+	char		expected_relkind;
+	bool		is_partition;
+	Form_pg_class classform;
+	LOCKMODE	heap_lockmode;
+	bool		invalid_system_index = false;
+
+	state = (struct DropRelationCallbackState *) arg;
+	relkind = state->relkind;
+	heap_lockmode = state->concurrent ?
+		ShareUpdateExclusiveLock : AccessExclusiveLock;
+
+	/*
+	 * If we previously locked some other index's heap, and the name we're
+	 * looking up no longer refers to that relation, release the now-useless
+	 * lock.
+	 */
+	if (relOid != oldRelOid && OidIsValid(state->heapOid))
+	{
+		UnlockRelationOid(state->heapOid, heap_lockmode);
+		state->heapOid = InvalidOid;
+	}
+
+	/*
+	 * Similarly, if we previously locked some other partition's heap, and the
+	 * name we're looking up no longer refers to that relation, release the
+	 * now-useless lock.
+	 */
+	if (relOid != oldRelOid && OidIsValid(state->partParentOid))
+	{
+		UnlockRelationOid(state->partParentOid, AccessExclusiveLock);
+		state->partParentOid = InvalidOid;
+	}
+
+	/* Didn't find a relation, so no need for locking or permission checks. */
+	if (!OidIsValid(relOid))
+		return;
+
+	tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relOid));
+	if (!HeapTupleIsValid(tuple))
+		return;					/* concurrently dropped, so nothing to do */
+	classform = (Form_pg_class) GETSTRUCT(tuple);
+	is_partition = classform->relispartition;
+
+	/*
+	 * Both RELKIND_RELATION and RELKIND_PARTITIONED_TABLE are OBJECT_TABLE,
+	 * but RemoveRelations() can only pass one relkind for a given relation.
+	 * It chooses RELKIND_RELATION for both regular and partitioned tables.
+	 * That means we must be careful before giving the wrong type error when
+	 * the relation is RELKIND_PARTITIONED_TABLE.  An equivalent problem
+	 * exists with indexes.
+	 */
+	if (classform->relkind == RELKIND_PARTITIONED_TABLE)
+		expected_relkind = RELKIND_RELATION;
+	else if (classform->relkind == RELKIND_PARTITIONED_INDEX)
+		expected_relkind = RELKIND_INDEX;
+	else
+		expected_relkind = classform->relkind;
+
+	if (relkind != expected_relkind)
+		DropErrorMsgWrongType(rel->relname, classform->relkind, relkind);
+
+	/* Allow DROP to either table owner or schema owner */
+	if (!pg_class_ownercheck(relOid, GetUserId()) &&
+		!pg_namespace_ownercheck(classform->relnamespace, GetUserId()))
+		aclcheck_error(ACLCHECK_NOT_OWNER, get_relkind_objtype(get_rel_relkind(relOid)),
+					   rel->relname);
+
+	/*
+	 * Check the case of a system index that might have been invalidated by a
+	 * failed concurrent process and allow its drop. For the time being, this
+	 * only concerns indexes of toast relations that became invalid during a
+	 * REINDEX CONCURRENTLY process.
+	 */
+	if (IsSystemClass(relOid, classform) && relkind == RELKIND_INDEX)
+	{
+		HeapTuple	locTuple;
+		Form_pg_index indexform;
+		bool		indisvalid;
+
+		locTuple = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(relOid));
+		if (!HeapTupleIsValid(locTuple))
+		{
+			ReleaseSysCache(tuple);
+			return;
+		}
+
+		indexform = (Form_pg_index) GETSTRUCT(locTuple);
+		indisvalid = indexform->indisvalid;
+		ReleaseSysCache(locTuple);
+
+		/* Mark object as being an invalid index of system catalogs */
+		if (!indisvalid)
+			invalid_system_index = true;
+	}
+
+	/* In the case of an invalid index, it is fine to bypass this check */
+	if (!invalid_system_index && !allowSystemTableMods && IsSystemClass(relOid, classform))
+		ereport(ERROR,
+				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+				 errmsg("permission denied: \"%s\" is a system catalog",
+						rel->relname)));
+
+	ReleaseSysCache(tuple);
+
+	/*
+	 * In DROP INDEX, attempt to acquire lock on the parent table before
+	 * locking the index.  index_drop() will need this anyway, and since
+	 * regular queries lock tables before their indexes, we risk deadlock if
+	 * we do it the other way around.  No error if we don't find a pg_index
+	 * entry, though --- the relation may have been dropped.
+	 */
+	if ((relkind == RELKIND_INDEX || relkind == RELKIND_PARTITIONED_INDEX) &&
+		relOid != oldRelOid)
+	{
+		state->heapOid = IndexGetRelation(relOid, true);
+		if (OidIsValid(state->heapOid))
+			LockRelationOid(state->heapOid, heap_lockmode);
+	}
+
+	/*
+	 * Similarly, if the relation is a partition, we must acquire lock on its
+	 * parent before locking the partition.  That's because queries lock the
+	 * parent before its partitions, so we risk deadlock it we do it the other
+	 * way around.
+	 */
+	if (is_partition && relOid != oldRelOid)
+	{
+		state->partParentOid = get_partition_parent(relOid, true);
+		if (OidIsValid(state->partParentOid))
+			LockRelationOid(state->partParentOid, AccessExclusiveLock);
+	}
+}
+
+/*
+ * ExecuteTruncate
+ *		Executes a TRUNCATE command.
+ *
+ * This is a multi-relation truncate.  We first open and grab exclusive
+ * lock on all relations involved, checking permissions and otherwise
+ * verifying that the relation is OK for truncation.  In CASCADE mode,
+ * relations having FK references to the targeted relations are automatically
+ * added to the group; in RESTRICT mode, we check that all FK references are
+ * internal to the group that's being truncated.  Finally all the relations
+ * are truncated and reindexed.
+ */
+void
+ExecuteTruncate(TruncateStmt *stmt)
+{
+	List	   *rels = NIL;
+	List	   *relids = NIL;
+	List	   *relids_logged = NIL;
+	ListCell   *cell;
+
+	/*
+	 * Open, exclusive-lock, and check all the explicitly-specified relations
+	 */
+	foreach(cell, stmt->relations)
+	{
+		RangeVar   *rv = lfirst(cell);
+		Relation	rel;
+		bool		recurse = rv->inh;
+		Oid			myrelid;
+		LOCKMODE	lockmode = AccessExclusiveLock;
+
+		myrelid = RangeVarGetRelidExtended(rv, lockmode,
+										   0, RangeVarCallbackForTruncate,
+										   NULL);
+
+		/* open the relation, we already hold a lock on it */
+		rel = table_open(myrelid, NoLock);
+
+		/* don't throw error for "TRUNCATE foo, foo" */
+		if (list_member_oid(relids, myrelid))
+		{
+			table_close(rel, lockmode);
+			continue;
+		}
+
+		/*
+		 * RangeVarGetRelidExtended() has done most checks with its callback,
+		 * but other checks with the now-opened Relation remain.
+		 */
+		truncate_check_activity(rel);
+
+		rels = lappend(rels, rel);
+		relids = lappend_oid(relids, myrelid);
+		/* Log this relation only if needed for logical decoding */
+		if (RelationIsLogicallyLogged(rel))
+			relids_logged = lappend_oid(relids_logged, myrelid);
+
+		if (recurse)
+		{
+			ListCell   *child;
+			List	   *children;
+
+			children = find_all_inheritors(myrelid, lockmode, NULL);
+
+			foreach(child, children)
+			{
+				Oid			childrelid = lfirst_oid(child);
+
+				if (list_member_oid(relids, childrelid))
+					continue;
+
+				/* find_all_inheritors already got lock */
+				rel = table_open(childrelid, NoLock);
+
+				/*
+				 * It is possible that the parent table has children that are
+				 * temp tables of other backends.  We cannot safely access
+				 * such tables (because of buffering issues), and the best
+				 * thing to do is to silently ignore them.  Note that this
+				 * check is the same as one of the checks done in
+				 * truncate_check_activity() called below, still it is kept
+				 * here for simplicity.
+				 */
+				if (RELATION_IS_OTHER_TEMP(rel))
+				{
+					table_close(rel, lockmode);
+					continue;
+				}
+
+				/*
+				 * Inherited TRUNCATE commands perform access permission
+				 * checks on the parent table only. So we skip checking the
+				 * children's permissions and don't call
+				 * truncate_check_perms() here.
+				 */
+				truncate_check_rel(RelationGetRelid(rel), rel->rd_rel);
+				truncate_check_activity(rel);
+
+				rels = lappend(rels, rel);
+				relids = lappend_oid(relids, childrelid);
+				/* Log this relation only if needed for logical decoding */
+				if (RelationIsLogicallyLogged(rel))
+					relids_logged = lappend_oid(relids_logged, childrelid);
+			}
+		}
+		else if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+			ereport(ERROR,
+					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+					 errmsg("cannot truncate only a partitioned table"),
+					 errhint("Do not specify the ONLY keyword, or use TRUNCATE ONLY on the partitions directly.")));
+	}
+
+	ExecuteTruncateGuts(rels, relids, relids_logged,
+						stmt->behavior, stmt->restart_seqs);
+
+	/* And close the rels */
+	foreach(cell, rels)
+	{
+		Relation	rel = (Relation) lfirst(cell);
+
+		table_close(rel, NoLock);
+	}
+}
+
+/*
+ * ExecuteTruncateGuts
+ *
+ * Internal implementation of TRUNCATE.  This is called by the actual TRUNCATE
+ * command (see above) as well as replication subscribers that execute a
+ * replicated TRUNCATE action.
+ *
+ * explicit_rels is the list of Relations to truncate that the command
+ * specified.  relids is the list of Oids corresponding to explicit_rels.
+ * relids_logged is the list of Oids (a subset of relids) that require
+ * WAL-logging.  This is all a bit redundant, but the existing callers have
+ * this information handy in this form.
+ */
+void
+ExecuteTruncateGuts(List *explicit_rels, List *relids, List *relids_logged,
+					DropBehavior behavior, bool restart_seqs)
+{
+	List	   *rels;
+	List	   *seq_relids = NIL;
+	EState	   *estate;
+	ResultRelInfo *resultRelInfos;
+	ResultRelInfo *resultRelInfo;
+	SubTransactionId mySubid;
+	ListCell   *cell;
+	Oid		   *logrelids;
+
+	/*
+	 * Check the explicitly-specified relations.
+	 *
+	 * In CASCADE mode, suck in all referencing relations as well.  This
+	 * requires multiple iterations to find indirectly-dependent relations. At
+	 * each phase, we need to exclusive-lock new rels before looking for their
+	 * dependencies, else we might miss something.  Also, we check each rel as
+	 * soon as we open it, to avoid a faux pas such as holding lock for a long
+	 * time on a rel we have no permissions for.
+	 */
+	rels = list_copy(explicit_rels);
+	if (behavior == DROP_CASCADE)
+	{
+		for (;;)
+		{
+			List	   *newrelids;
+
+			newrelids = heap_truncate_find_FKs(relids);
+			if (newrelids == NIL)
+				break;			/* nothing else to add */
+
+			foreach(cell, newrelids)
+			{
+				Oid			relid = lfirst_oid(cell);
+				Relation	rel;
+
+				rel = table_open(relid, AccessExclusiveLock);
+				ereport(NOTICE,
+						(errmsg("truncate cascades to table \"%s\"",
+								RelationGetRelationName(rel))));
+				truncate_check_rel(relid, rel->rd_rel);
+				truncate_check_perms(relid, rel->rd_rel);
+				truncate_check_activity(rel);
+				rels = lappend(rels, rel);
+				relids = lappend_oid(relids, relid);
+				/* Log this relation only if needed for logical decoding */
+				if (RelationIsLogicallyLogged(rel))
+					relids_logged = lappend_oid(relids_logged, relid);
+			}
+		}
+	}
+
+	/*
+	 * Check foreign key references.  In CASCADE mode, this should be
+	 * unnecessary since we just pulled in all the references; but as a
+	 * cross-check, do it anyway if in an Assert-enabled build.
+	 */
+#ifdef USE_ASSERT_CHECKING
+	heap_truncate_check_FKs(rels, false);
+#else
+	if (behavior == DROP_RESTRICT)
+		heap_truncate_check_FKs(rels, false);
+#endif
+
+	/*
+	 * If we are asked to restart sequences, find all the sequences, lock them
+	 * (we need AccessExclusiveLock for ResetSequence), and check permissions.
+	 * We want to do this early since it's pointless to do all the truncation
+	 * work only to fail on sequence permissions.
+	 */
+	if (restart_seqs)
+	{
+		foreach(cell, rels)
+		{
+			Relation	rel = (Relation) lfirst(cell);
+			List	   *seqlist = getOwnedSequences(RelationGetRelid(rel));
+			ListCell   *seqcell;
+
+			foreach(seqcell, seqlist)
+			{
+				Oid			seq_relid = lfirst_oid(seqcell);
+				Relation	seq_rel;
+
+				seq_rel = relation_open(seq_relid, AccessExclusiveLock);
+
+				/* This check must match AlterSequence! */
+				if (!pg_class_ownercheck(seq_relid, GetUserId()))
+					aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_SEQUENCE,
+								   RelationGetRelationName(seq_rel));
+
+				seq_relids = lappend_oid(seq_relids, seq_relid);
+
+				relation_close(seq_rel, NoLock);
+			}
+		}
+	}
+
+	/* Prepare to catch AFTER triggers. */
+	AfterTriggerBeginQuery();
+
+	/*
+	 * To fire triggers, we'll need an EState as well as a ResultRelInfo for
+	 * each relation.  We don't need to call ExecOpenIndices, though.
+	 *
+	 * We put the ResultRelInfos in the es_opened_result_relations list, even
+	 * though we don't have a range table and don't populate the
+	 * es_result_relations array.  That's a bit bogus, but it's enough to make
+	 * ExecGetTriggerResultRel() find them.
+	 */
+	estate = CreateExecutorState();
+	resultRelInfos = (ResultRelInfo *)
+		palloc(list_length(rels) * sizeof(ResultRelInfo));
+	resultRelInfo = resultRelInfos;
+	foreach(cell, rels)
+	{
+		Relation	rel = (Relation) lfirst(cell);
+
+		InitResultRelInfo(resultRelInfo,
+						  rel,
+						  0,	/* dummy rangetable index */
+						  NULL,
+						  0);
+		estate->es_opened_result_relations =
+			lappend(estate->es_opened_result_relations, resultRelInfo);
+		resultRelInfo++;
+	}
+
+	/*
+	 * Process all BEFORE STATEMENT TRUNCATE triggers before we begin
+	 * truncating (this is because one of them might throw an error). Also, if
+	 * we were to allow them to prevent statement execution, that would need
+	 * to be handled here.
+	 */
+	resultRelInfo = resultRelInfos;
+	foreach(cell, rels)
+	{
+		ExecBSTruncateTriggers(estate, resultRelInfo);
+		resultRelInfo++;
+	}
+
+	/*
+	 * OK, truncate each table.
+	 */
+	mySubid = GetCurrentSubTransactionId();
+
+	foreach(cell, rels)
+	{
+		Relation	rel = (Relation) lfirst(cell);
+
+		/* Skip partitioned tables as there is nothing to do */
+		if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+			continue;
+
+		/*
+		 * Normally, we need a transaction-safe truncation here.  However, if
+		 * the table was either created in the current (sub)transaction or has
+		 * a new relfilenode in the current (sub)transaction, then we can just
+		 * truncate it in-place, because a rollback would cause the whole
+		 * table or the current physical file to be thrown away anyway.
+		 */
+		if (rel->rd_createSubid == mySubid ||
+			rel->rd_newRelfilenodeSubid == mySubid)
+		{
+			/* Immediate, non-rollbackable truncation is OK */
+			heap_truncate_one_rel(rel);
+		}
+		else
+		{
+			Oid			heap_relid;
+			Oid			toast_relid;
+			ReindexParams reindex_params = {0};
+
+			/*
+			 * This effectively deletes all rows in the table, and may be done
+			 * in a serializable transaction.  In that case we must record a
+			 * rw-conflict in to this transaction from each transaction
+			 * holding a predicate lock on the table.
+			 */
+			CheckTableForSerializableConflictIn(rel);
+
+			/*
+			 * Need the full transaction-safe pushups.
+			 *
+			 * Create a new empty storage file for the relation, and assign it
+			 * as the relfilenode value. The old storage file is scheduled for
+			 * deletion at commit.
+			 */
+			RelationSetNewRelfilenode(rel, rel->rd_rel->relpersistence);
+
+			heap_relid = RelationGetRelid(rel);
+
+			/*
+			 * The same for the toast table, if any.
+			 */
+			toast_relid = rel->rd_rel->reltoastrelid;
+			if (OidIsValid(toast_relid))
+			{
+				Relation	toastrel = relation_open(toast_relid,
+													 AccessExclusiveLock);
+
+				RelationSetNewRelfilenode(toastrel,
+										  toastrel->rd_rel->relpersistence);
+				table_close(toastrel, NoLock);
+			}
+
+			/*
+			 * Reconstruct the indexes to match, and we're done.
+			 */
+			reindex_relation(heap_relid, REINDEX_REL_PROCESS_TOAST,
+							 &reindex_params);
+		}
+
+		pgstat_count_truncate(rel);
+	}
+
+	/*
+	 * Restart owned sequences if we were asked to.
+	 */
+	foreach(cell, seq_relids)
+	{
+		Oid			seq_relid = lfirst_oid(cell);
+
+		ResetSequence(seq_relid);
+	}
+
+	/*
+	 * Write a WAL record to allow this set of actions to be logically
+	 * decoded.
+	 *
+	 * Assemble an array of relids so we can write a single WAL record for the
+	 * whole action.
+	 */
+	if (list_length(relids_logged) > 0)
+	{
+		xl_heap_truncate xlrec;
+		int			i = 0;
+
+		/* should only get here if wal_level >= logical */
+		Assert(XLogLogicalInfoActive());
+
+		logrelids = palloc(list_length(relids_logged) * sizeof(Oid));
+		foreach(cell, relids_logged)
+			logrelids[i++] = lfirst_oid(cell);
+
+		xlrec.dbId = MyDatabaseId;
+		xlrec.nrelids = list_length(relids_logged);
+		xlrec.flags = 0;
+		if (behavior == DROP_CASCADE)
+			xlrec.flags |= XLH_TRUNCATE_CASCADE;
+		if (restart_seqs)
+			xlrec.flags |= XLH_TRUNCATE_RESTART_SEQS;
+
+		XLogBeginInsert();
+		XLogRegisterData((char *) &xlrec, SizeOfHeapTruncate);
+		XLogRegisterData((char *) logrelids, list_length(relids_logged) * sizeof(Oid));
+
+		XLogSetRecordFlags(XLOG_INCLUDE_ORIGIN);
+
+		(void) XLogInsert(RM_HEAP_ID, XLOG_HEAP_TRUNCATE);
+	}
+
+	/*
+	 * Process all AFTER STATEMENT TRUNCATE triggers.
+	 */
+	resultRelInfo = resultRelInfos;
+	foreach(cell, rels)
+	{
+		ExecASTruncateTriggers(estate, resultRelInfo);
+		resultRelInfo++;
+	}
+
+	/* Handle queued AFTER triggers */
+	AfterTriggerEndQuery(estate);
+
+	/* We can clean up the EState now */
+	FreeExecutorState(estate);
+
+	/*
+	 * Close any rels opened by CASCADE (can't do this while EState still
+	 * holds refs)
+	 */
+	rels = list_difference_ptr(rels, explicit_rels);
+	foreach(cell, rels)
+	{
+		Relation	rel = (Relation) lfirst(cell);
+
+		table_close(rel, NoLock);
+	}
+}
+
+/*
+ * Check that a given relation is safe to truncate.  Subroutine for
+ * ExecuteTruncate() and RangeVarCallbackForTruncate().
+ */
+static void
+truncate_check_rel(Oid relid, Form_pg_class reltuple)
+{
+	char	   *relname = NameStr(reltuple->relname);
+
+	/*
+	 * Only allow truncate on regular tables and partitioned tables (although,
+	 * the latter are only being included here for the following checks; no
+	 * physical truncation will occur in their case.)
+	 */
+	if (reltuple->relkind != RELKIND_RELATION &&
+		reltuple->relkind != RELKIND_PARTITIONED_TABLE)
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("\"%s\" is not a table", relname)));
+
+	if (!allowSystemTableMods && IsSystemClass(relid, reltuple))
+		ereport(ERROR,
+				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+				 errmsg("permission denied: \"%s\" is a system catalog",
+						relname)));
+
+	InvokeObjectTruncateHook(relid);
+}
+
+/*
+ * Check that current user has the permission to truncate given relation.
+ */
+static void
+truncate_check_perms(Oid relid, Form_pg_class reltuple)
+{
+	char	   *relname = NameStr(reltuple->relname);
+	AclResult	aclresult;
+
+	/* Permissions checks */
+	aclresult = pg_class_aclcheck(relid, GetUserId(), ACL_TRUNCATE);
+	if (aclresult != ACLCHECK_OK)
+		aclcheck_error(aclresult, get_relkind_objtype(reltuple->relkind),
+					   relname);
+}
+
+/*
+ * Set of extra sanity checks to check if a given relation is safe to
+ * truncate.  This is split with truncate_check_rel() as
+ * RangeVarCallbackForTruncate() cannot open a Relation yet.
+ */
+static void
+truncate_check_activity(Relation rel)
+{
+	/*
+	 * Don't allow truncate on temp tables of other backends ... their local
+	 * buffer manager is not going to cope.
+	 */
+	if (RELATION_IS_OTHER_TEMP(rel))
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("cannot truncate temporary tables of other sessions")));
+
+	/*
+	 * Also check for active uses of the relation in the current transaction,
+	 * including open scans and pending AFTER trigger events.
+	 */
+	CheckTableNotInUse(rel, "TRUNCATE");
+}
+
+/*
+ * storage_name
+ *	  returns the name corresponding to a typstorage/attstorage enum value
+ */
+static const char *
+storage_name(char c)
+{
+	switch (c)
+	{
+		case TYPSTORAGE_PLAIN:
+			return "PLAIN";
+		case TYPSTORAGE_EXTERNAL:
+			return "EXTERNAL";
+		case TYPSTORAGE_EXTENDED:
+			return "EXTENDED";
+		case TYPSTORAGE_MAIN:
+			return "MAIN";
+		default:
+			return "???";
+	}
+}
+
+/*----------
+ * MergeAttributes
+ *		Returns new schema given initial schema and superclasses.
+ *
+ * Input arguments:
+ * 'schema' is the column/attribute definition for the table. (It's a list
+ *		of ColumnDef's.) It is destructively changed.
+ * 'supers' is a list of OIDs of parent relations, already locked by caller.
+ * 'relpersistence' is the persistence type of the table.
+ * 'is_partition' tells if the table is a partition.
+ *
+ * Output arguments:
+ * 'supconstr' receives a list of constraints belonging to the parents,
+ *		updated as necessary to be valid for the child.
+ *
+ * Return value:
+ * Completed schema list.
+ *
+ * Notes:
+ *	  The order in which the attributes are inherited is very important.
+ *	  Intuitively, the inherited attributes should come first. If a table
+ *	  inherits from multiple parents, the order of those attributes are
+ *	  according to the order of the parents specified in CREATE TABLE.
+ *
+ *	  Here's an example:
+ *
+ *		create table person (name text, age int4, location point);
+ *		create table emp (salary int4, manager text) inherits(person);
+ *		create table student (gpa float8) inherits (person);
+ *		create table stud_emp (percent int4) inherits (emp, student);
+ *
+ *	  The order of the attributes of stud_emp is:
+ *
+ *							person {1:name, 2:age, 3:location}
+ *							/	 \
+ *			   {6:gpa}	student   emp {4:salary, 5:manager}
+ *							\	 /
+ *						   stud_emp {7:percent}
+ *
+ *	   If the same attribute name appears multiple times, then it appears
+ *	   in the result table in the proper location for its first appearance.
+ *
+ *	   Constraints (including NOT NULL constraints) for the child table
+ *	   are the union of all relevant constraints, from both the child schema
+ *	   and parent tables.
+ *
+ *	   The default value for a child column is defined as:
+ *		(1) If the child schema specifies a default, that value is used.
+ *		(2) If neither the child nor any parent specifies a default, then
+ *			the column will not have a default.
+ *		(3) If conflicting defaults are inherited from different parents
+ *			(and not overridden by the child), an error is raised.
+ *		(4) Otherwise the inherited default is used.
+ *		Rule (3) is new in Postgres 7.1; in earlier releases you got a
+ *		rather arbitrary choice of which parent default to use.
+ *----------
+ */
+static List *
+MergeAttributes(List *schema, List *supers, char relpersistence,
+				bool is_partition, List **supconstr)
+{
+	List	   *inhSchema = NIL;
+	List	   *constraints = NIL;
+	bool		have_bogus_defaults = false;
+	int			child_attno;
+	static Node bogus_marker = {0}; /* marks conflicting defaults */
+	List	   *saved_schema = NIL;
+	ListCell   *entry;
+
+	/*
+	 * Check for and reject tables with too many columns. We perform this
+	 * check relatively early for two reasons: (a) we don't run the risk of
+	 * overflowing an AttrNumber in subsequent code (b) an O(n^2) algorithm is
+	 * okay if we're processing <= 1600 columns, but could take minutes to
+	 * execute if the user attempts to create a table with hundreds of
+	 * thousands of columns.
+	 *
+	 * Note that we also need to check that we do not exceed this figure after
+	 * including columns from inherited relations.
+	 */
+	if (list_length(schema) > MaxHeapAttributeNumber)
+		ereport(ERROR,
+				(errcode(ERRCODE_TOO_MANY_COLUMNS),
+				 errmsg("tables can have at most %d columns",
+						MaxHeapAttributeNumber)));
+
+	/*
+	 * Check for duplicate names in the explicit list of attributes.
+	 *
+	 * Although we might consider merging such entries in the same way that we
+	 * handle name conflicts for inherited attributes, it seems to make more
+	 * sense to assume such conflicts are errors.
+	 *
+	 * We don't use foreach() here because we have two nested loops over the
+	 * schema list, with possible element deletions in the inner one.  If we
+	 * used foreach_delete_current() it could only fix up the state of one of
+	 * the loops, so it seems cleaner to use looping over list indexes for
+	 * both loops.  Note that any deletion will happen beyond where the outer
+	 * loop is, so its index never needs adjustment.
+	 */
+	for (int coldefpos = 0; coldefpos < list_length(schema); coldefpos++)
+	{
+		ColumnDef  *coldef = list_nth_node(ColumnDef, schema, coldefpos);
+
+		if (!is_partition && coldef->typeName == NULL)
+		{
+			/*
+			 * Typed table column option that does not belong to a column from
+			 * the type.  This works because the columns from the type come
+			 * first in the list.  (We omit this check for partition column
+			 * lists; those are processed separately below.)
+			 */
+			ereport(ERROR,
+					(errcode(ERRCODE_UNDEFINED_COLUMN),
+					 errmsg("column \"%s\" does not exist",
+							coldef->colname)));
+		}
+
+		/* restpos scans all entries beyond coldef; incr is in loop body */
+		for (int restpos = coldefpos + 1; restpos < list_length(schema);)
+		{
+			ColumnDef  *restdef = list_nth_node(ColumnDef, schema, restpos);
+
+			if (strcmp(coldef->colname, restdef->colname) == 0)
+			{
+				if (coldef->is_from_type)
+				{
+					/*
+					 * merge the column options into the column from the type
+					 */
+					coldef->is_not_null = restdef->is_not_null;
+					coldef->raw_default = restdef->raw_default;
+					coldef->cooked_default = restdef->cooked_default;
+					coldef->constraints = restdef->constraints;
+					coldef->is_from_type = false;
+					schema = list_delete_nth_cell(schema, restpos);
+				}
+				else
+					ereport(ERROR,
+							(errcode(ERRCODE_DUPLICATE_COLUMN),
+							 errmsg("column \"%s\" specified more than once",
+									coldef->colname)));
+			}
+			else
+				restpos++;
+		}
+	}
+
+	/*
+	 * In case of a partition, there are no new column definitions, only dummy
+	 * ColumnDefs created for column constraints.  Set them aside for now and
+	 * process them at the end.
+	 */
+	if (is_partition)
+	{
+		saved_schema = schema;
+		schema = NIL;
+	}
+
+	/*
+	 * Scan the parents left-to-right, and merge their attributes to form a
+	 * list of inherited attributes (inhSchema).  Also check to see if we need
+	 * to inherit an OID column.
+	 */
+	child_attno = 0;
+	foreach(entry, supers)
+	{
+		Oid			parent = lfirst_oid(entry);
+		Relation	relation;
+		TupleDesc	tupleDesc;
+		TupleConstr *constr;
+		AttrMap    *newattmap;
+		List	   *inherited_defaults;
+		List	   *cols_with_defaults;
+		AttrNumber	parent_attno;
+		ListCell   *lc1;
+		ListCell   *lc2;
+
+		/* caller already got lock */
+		relation = table_open(parent, NoLock);
+
+		/*
+		 * Check for active uses of the parent partitioned table in the
+		 * current transaction, such as being used in some manner by an
+		 * enclosing command.
+		 */
+		if (is_partition)
+			CheckTableNotInUse(relation, "CREATE TABLE .. PARTITION OF");
+
+		/*
+		 * We do not allow partitioned tables and partitions to participate in
+		 * regular inheritance.
+		 */
+		if (relation->rd_rel->relkind == RELKIND_PARTITIONED_TABLE &&
+			!is_partition)
+			ereport(ERROR,
+					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+					 errmsg("cannot inherit from partitioned table \"%s\"",
+							RelationGetRelationName(relation))));
+		if (relation->rd_rel->relispartition && !is_partition)
+			ereport(ERROR,
+					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+					 errmsg("cannot inherit from partition \"%s\"",
+							RelationGetRelationName(relation))));
+
+		if (relation->rd_rel->relkind != RELKIND_RELATION &&
+			relation->rd_rel->relkind != RELKIND_FOREIGN_TABLE &&
+			relation->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
+			ereport(ERROR,
+					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+					 errmsg("inherited relation \"%s\" is not a table or foreign table",
+							RelationGetRelationName(relation))));
+
+		/*
+		 * If the parent is permanent, so must be all of its partitions.  Note
+		 * that inheritance allows that case.
+		 */
+		if (is_partition &&
+			relation->rd_rel->relpersistence != RELPERSISTENCE_TEMP &&
+			relpersistence == RELPERSISTENCE_TEMP)
+			ereport(ERROR,
+					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+					 errmsg("cannot create a temporary relation as partition of permanent relation \"%s\"",
+							RelationGetRelationName(relation))));
+
+		/* Permanent rels cannot inherit from temporary ones */
+		if (relpersistence != RELPERSISTENCE_TEMP &&
+			relation->rd_rel->relpersistence == RELPERSISTENCE_TEMP)
+			ereport(ERROR,
+					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+					 errmsg(!is_partition
+							? "cannot inherit from temporary relation \"%s\""
+							: "cannot create a permanent relation as partition of temporary relation \"%s\"",
+							RelationGetRelationName(relation))));
+
+		/* If existing rel is temp, it must belong to this session */
+		if (relation->rd_rel->relpersistence == RELPERSISTENCE_TEMP &&
+			!relation->rd_islocaltemp)
+			ereport(ERROR,
+					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+					 errmsg(!is_partition
+							? "cannot inherit from temporary relation of another session"
+							: "cannot create as partition of temporary relation of another session")));
+
+		/*
+		 * We should have an UNDER permission flag for this, but for now,
+		 * demand that creator of a child table own the parent.
+		 */
+		if (!pg_class_ownercheck(RelationGetRelid(relation), GetUserId()))
+			aclcheck_error(ACLCHECK_NOT_OWNER, get_relkind_objtype(relation->rd_rel->relkind),
+						   RelationGetRelationName(relation));
+
+		tupleDesc = RelationGetDescr(relation);
+		constr = tupleDesc->constr;
+
+		/*
+		 * newattmap->attnums[] will contain the child-table attribute numbers
+		 * for the attributes of this parent table.  (They are not the same
+		 * for parents after the first one, nor if we have dropped columns.)
+		 */
+		newattmap = make_attrmap(tupleDesc->natts);
+
+		/* We can't process inherited defaults until newattmap is complete. */
+		inherited_defaults = cols_with_defaults = NIL;
+
+		for (parent_attno = 1; parent_attno <= tupleDesc->natts;
+			 parent_attno++)
+		{
+			Form_pg_attribute attribute = TupleDescAttr(tupleDesc,
+														parent_attno - 1);
+			char	   *attributeName = NameStr(attribute->attname);
+			int			exist_attno;
+			ColumnDef  *def;
+
+			/*
+			 * Ignore dropped columns in the parent.
+			 */
+			if (attribute->attisdropped)
+				continue;		/* leave newattmap->attnums entry as zero */
+
+			/*
+			 * Does it conflict with some previously inherited column?
+			 */
+			exist_attno = findAttrByName(attributeName, inhSchema);
+			if (exist_attno > 0)
+			{
+				Oid			defTypeId;
+				int32		deftypmod;
+				Oid			defCollId;
+
+				/*
+				 * Yes, try to merge the two column definitions. They must
+				 * have the same type, typmod, and collation.
+				 */
+				ereport(NOTICE,
+						(errmsg("merging multiple inherited definitions of column \"%s\"",
+								attributeName)));
+				def = (ColumnDef *) list_nth(inhSchema, exist_attno - 1);
+				typenameTypeIdAndMod(NULL, def->typeName, &defTypeId, &deftypmod);
+				if (defTypeId != attribute->atttypid ||
+					deftypmod != attribute->atttypmod)
+					ereport(ERROR,
+							(errcode(ERRCODE_DATATYPE_MISMATCH),
+							 errmsg("inherited column \"%s\" has a type conflict",
+									attributeName),
+							 errdetail("%s versus %s",
+									   format_type_with_typemod(defTypeId,
+																deftypmod),
+									   format_type_with_typemod(attribute->atttypid,
+																attribute->atttypmod))));
+				defCollId = GetColumnDefCollation(NULL, def, defTypeId);
+				if (defCollId != attribute->attcollation)
+					ereport(ERROR,
+							(errcode(ERRCODE_COLLATION_MISMATCH),
+							 errmsg("inherited column \"%s\" has a collation conflict",
+									attributeName),
+							 errdetail("\"%s\" versus \"%s\"",
+									   get_collation_name(defCollId),
+									   get_collation_name(attribute->attcollation))));
+
+				/* Copy/check storage parameter */
+				if (def->storage == 0)
+					def->storage = attribute->attstorage;
+				else if (def->storage != attribute->attstorage)
+					ereport(ERROR,
+							(errcode(ERRCODE_DATATYPE_MISMATCH),
+							 errmsg("inherited column \"%s\" has a storage parameter conflict",
+									attributeName),
+							 errdetail("%s versus %s",
+									   storage_name(def->storage),
+									   storage_name(attribute->attstorage))));
+
+				/* Copy/check compression parameter */
+				if (CompressionMethodIsValid(attribute->attcompression))
+				{
+					const char *compression =
+						GetCompressionMethodName(attribute->attcompression);
+
+					if (def->compression == NULL)
+						def->compression = pstrdup(compression);
+					else if (strcmp(def->compression, compression) != 0)
+						ereport(ERROR,
+								(errcode(ERRCODE_DATATYPE_MISMATCH),
+								 errmsg("column \"%s\" has a compression method conflict",
+										attributeName),
+								 errdetail("%s versus %s", def->compression, compression)));
+				}
+
+				def->inhcount++;
+				/* Merge of NOT NULL constraints = OR 'em together */
+				def->is_not_null |= attribute->attnotnull;
+				/* Default and other constraints are handled below */
+				newattmap->attnums[parent_attno - 1] = exist_attno;
+
+				/* Check for GENERATED conflicts */
+				if (def->generated != attribute->attgenerated)
+					ereport(ERROR,
+							(errcode(ERRCODE_DATATYPE_MISMATCH),
+							 errmsg("inherited column \"%s\" has a generation conflict",
+									attributeName)));
+			}
+			else
+			{
+				/*
+				 * No, create a new inherited column
+				 */
+				def = makeNode(ColumnDef);
+				def->colname = pstrdup(attributeName);
+				def->typeName = makeTypeNameFromOid(attribute->atttypid,
+													attribute->atttypmod);
+				def->inhcount = 1;
+				def->is_local = false;
+				def->is_not_null = attribute->attnotnull;
+				def->is_from_type = false;
+				def->storage = attribute->attstorage;
+				def->raw_default = NULL;
+				def->cooked_default = NULL;
+				def->generated = attribute->attgenerated;
+				def->collClause = NULL;
+				def->collOid = attribute->attcollation;
+				def->constraints = NIL;
+				def->location = -1;
+				if (CompressionMethodIsValid(attribute->attcompression))
+					def->compression = pstrdup(GetCompressionMethodName(
+													attribute->attcompression));
+				else
+					def->compression = NULL;
+				inhSchema = lappend(inhSchema, def);
+				newattmap->attnums[parent_attno - 1] = ++child_attno;
+			}
+
+			/*
+			 * Locate default if any
+			 */
+			if (attribute->atthasdef)
+			{
+				Node	   *this_default = NULL;
+				AttrDefault *attrdef;
+				int			i;
+
+				/* Find default in constraint structure */
+				Assert(constr != NULL);
+				attrdef = constr->defval;
+				for (i = 0; i < constr->num_defval; i++)
+				{
+					if (attrdef[i].adnum == parent_attno)
+					{
+						this_default = stringToNode(attrdef[i].adbin);
+						break;
+					}
+				}
+				Assert(this_default != NULL);
+
+				/*
+				 * If it's a GENERATED default, it might contain Vars that
+				 * need to be mapped to the inherited column(s)' new numbers.
+				 * We can't do that till newattmap is ready, so just remember
+				 * all the inherited default expressions for the moment.
+				 */
+				inherited_defaults = lappend(inherited_defaults, this_default);
+				cols_with_defaults = lappend(cols_with_defaults, def);
+			}
+		}
+
+		/*
+		 * Now process any inherited default expressions, adjusting attnos
+		 * using the completed newattmap map.
+		 */
+		forboth(lc1, inherited_defaults, lc2, cols_with_defaults)
+		{
+			Node	   *this_default = (Node *) lfirst(lc1);
+			ColumnDef  *def = (ColumnDef *) lfirst(lc2);
+			bool		found_whole_row;
+
+			/* Adjust Vars to match new table's column numbering */
+			this_default = map_variable_attnos(this_default,
+											   1, 0,
+											   newattmap,
+											   InvalidOid, &found_whole_row);
+
+			/*
+			 * For the moment we have to reject whole-row variables.  We could
+			 * convert them, if we knew the new table's rowtype OID, but that
+			 * hasn't been assigned yet.  (A variable could only appear in a
+			 * generation expression, so the error message is correct.)
+			 */
+			if (found_whole_row)
+				ereport(ERROR,
+						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+						 errmsg("cannot convert whole-row table reference"),
+						 errdetail("Generation expression for column \"%s\" contains a whole-row reference to table \"%s\".",
+								   def->colname,
+								   RelationGetRelationName(relation))));
+
+			/*
+			 * If we already had a default from some prior parent, check to
+			 * see if they are the same.  If so, no problem; if not, mark the
+			 * column as having a bogus default.  Below, we will complain if
+			 * the bogus default isn't overridden by the child schema.
+			 */
+			Assert(def->raw_default == NULL);
+			if (def->cooked_default == NULL)
+				def->cooked_default = this_default;
+			else if (!equal(def->cooked_default, this_default))
+			{
+				def->cooked_default = &bogus_marker;
+				have_bogus_defaults = true;
+			}
+		}
+
+		/*
+		 * Now copy the CHECK constraints of this parent, adjusting attnos
+		 * using the completed newattmap map.  Identically named constraints
+		 * are merged if possible, else we throw error.
+		 */
+		if (constr && constr->num_check > 0)
+		{
+			ConstrCheck *check = constr->check;
+			int			i;
+
+			for (i = 0; i < constr->num_check; i++)
+			{
+				char	   *name = check[i].ccname;
+				Node	   *expr;
+				bool		found_whole_row;
+
+				/* ignore if the constraint is non-inheritable */
+				if (check[i].ccnoinherit)
+					continue;
+
+				/* Adjust Vars to match new table's column numbering */
+				expr = map_variable_attnos(stringToNode(check[i].ccbin),
+										   1, 0,
+										   newattmap,
+										   InvalidOid, &found_whole_row);
+
+				/*
+				 * For the moment we have to reject whole-row variables. We
+				 * could convert them, if we knew the new table's rowtype OID,
+				 * but that hasn't been assigned yet.
+				 */
+				if (found_whole_row)
+					ereport(ERROR,
+							(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+							 errmsg("cannot convert whole-row table reference"),
+							 errdetail("Constraint \"%s\" contains a whole-row reference to table \"%s\".",
+									   name,
+									   RelationGetRelationName(relation))));
+
+				/* check for duplicate */
+				if (!MergeCheckConstraint(constraints, name, expr))
+				{
+					/* nope, this is a new one */
+					CookedConstraint *cooked;
+
+					cooked = (CookedConstraint *) palloc(sizeof(CookedConstraint));
+					cooked->contype = CONSTR_CHECK;
+					cooked->conoid = InvalidOid;	/* until created */
+					cooked->name = pstrdup(name);
+					cooked->attnum = 0; /* not used for constraints */
+					cooked->expr = expr;
+					cooked->skip_validation = false;
+					cooked->is_local = false;
+					cooked->inhcount = 1;
+					cooked->is_no_inherit = false;
+					constraints = lappend(constraints, cooked);
+				}
+			}
+		}
+
+		free_attrmap(newattmap);
+
+		/*
+		 * Close the parent rel, but keep our lock on it until xact commit.
+		 * That will prevent someone else from deleting or ALTERing the parent
+		 * before the child is committed.
+		 */
+		table_close(relation, NoLock);
+	}
+
+	/*
+	 * If we had no inherited attributes, the result schema is just the
+	 * explicitly declared columns.  Otherwise, we need to merge the declared
+	 * columns into the inherited schema list.  Although, we never have any
+	 * explicitly declared columns if the table is a partition.
+	 */
+	if (inhSchema != NIL)
+	{
+		int			schema_attno = 0;
+
+		foreach(entry, schema)
+		{
+			ColumnDef  *newdef = lfirst(entry);
+			char	   *attributeName = newdef->colname;
+			int			exist_attno;
+
+			schema_attno++;
+
+			/*
+			 * Does it conflict with some previously inherited column?
+			 */
+			exist_attno = findAttrByName(attributeName, inhSchema);
+			if (exist_attno > 0)
+			{
+				ColumnDef  *def;
+				Oid			defTypeId,
+							newTypeId;
+				int32		deftypmod,
+							newtypmod;
+				Oid			defcollid,
+							newcollid;
+
+				/*
+				 * Partitions have only one parent and have no column
+				 * definitions of their own, so conflict should never occur.
+				 */
+				Assert(!is_partition);
+
+				/*
+				 * Yes, try to merge the two column definitions. They must
+				 * have the same type, typmod, and collation.
+				 */
+				if (exist_attno == schema_attno)
+					ereport(NOTICE,
+							(errmsg("merging column \"%s\" with inherited definition",
+									attributeName)));
+				else
+					ereport(NOTICE,
+							(errmsg("moving and merging column \"%s\" with inherited definition", attributeName),
+							 errdetail("User-specified column moved to the position of the inherited column.")));
+				def = (ColumnDef *) list_nth(inhSchema, exist_attno - 1);
+				typenameTypeIdAndMod(NULL, def->typeName, &defTypeId, &deftypmod);
+				typenameTypeIdAndMod(NULL, newdef->typeName, &newTypeId, &newtypmod);
+				if (defTypeId != newTypeId || deftypmod != newtypmod)
+					ereport(ERROR,
+							(errcode(ERRCODE_DATATYPE_MISMATCH),
+							 errmsg("column \"%s\" has a type conflict",
+									attributeName),
+							 errdetail("%s versus %s",
+									   format_type_with_typemod(defTypeId,
+																deftypmod),
+									   format_type_with_typemod(newTypeId,
+																newtypmod))));
+				defcollid = GetColumnDefCollation(NULL, def, defTypeId);
+				newcollid = GetColumnDefCollation(NULL, newdef, newTypeId);
+				if (defcollid != newcollid)
+					ereport(ERROR,
+							(errcode(ERRCODE_COLLATION_MISMATCH),
+							 errmsg("column \"%s\" has a collation conflict",
+									attributeName),
+							 errdetail("\"%s\" versus \"%s\"",
+									   get_collation_name(defcollid),
+									   get_collation_name(newcollid))));
+
+				/*
+				 * Identity is never inherited.  The new column can have an
+				 * identity definition, so we always just take that one.
+				 */
+				def->identity = newdef->identity;
+
+				/* Copy storage parameter */
+				if (def->storage == 0)
+					def->storage = newdef->storage;
+				else if (newdef->storage != 0 && def->storage != newdef->storage)
+					ereport(ERROR,
+							(errcode(ERRCODE_DATATYPE_MISMATCH),
+							 errmsg("column \"%s\" has a storage parameter conflict",
+									attributeName),
+							 errdetail("%s versus %s",
+									   storage_name(def->storage),
+									   storage_name(newdef->storage))));
+
+				/* Copy compression parameter */
+				if (def->compression == NULL)
+					def->compression = newdef->compression;
+				else if (newdef->compression != NULL)
+				{
+					if (strcmp(def->compression, newdef->compression) != 0)
+						ereport(ERROR,
+								(errcode(ERRCODE_DATATYPE_MISMATCH),
+								 errmsg("column \"%s\" has a compression method conflict",
+										attributeName),
+								 errdetail("%s versus %s", def->compression, newdef->compression)));
+				}
+
+				/* Mark the column as locally defined */
+				def->is_local = true;
+				/* Merge of NOT NULL constraints = OR 'em together */
+				def->is_not_null |= newdef->is_not_null;
+
+				/*
+				 * Check for conflicts related to generated columns.
+				 *
+				 * If the parent column is generated, the child column must be
+				 * unadorned and will be made a generated column.  (We could
+				 * in theory allow the child column definition specifying the
+				 * exact same generation expression, but that's a bit
+				 * complicated to implement and doesn't seem very useful.)  We
+				 * also check that the child column doesn't specify a default
+				 * value or identity, which matches the rules for a single
+				 * column in parse_util.c.
+				 */
+				if (def->generated)
+				{
+					if (newdef->generated)
+						ereport(ERROR,
+								(errcode(ERRCODE_INVALID_COLUMN_DEFINITION),
+								 errmsg("child column \"%s\" specifies generation expression",
+										def->colname),
+								 errhint("Omit the generation expression in the definition of the child table column to inherit the generation expression from the parent table.")));
+					if (newdef->raw_default && !newdef->generated)
+						ereport(ERROR,
+								(errcode(ERRCODE_INVALID_COLUMN_DEFINITION),
+								 errmsg("column \"%s\" inherits from generated column but specifies default",
+										def->colname)));
+					if (newdef->identity)
+						ereport(ERROR,
+								(errcode(ERRCODE_INVALID_COLUMN_DEFINITION),
+								 errmsg("column \"%s\" inherits from generated column but specifies identity",
+										def->colname)));
+				}
+
+				/*
+				 * If the parent column is not generated, then take whatever
+				 * the child column definition says.
+				 */
+				else
+				{
+					if (newdef->generated)
+						def->generated = newdef->generated;
+				}
+
+				/* If new def has a default, override previous default */
+				if (newdef->raw_default != NULL)
+				{
+					def->raw_default = newdef->raw_default;
+					def->cooked_default = newdef->cooked_default;
+				}
+			}
+			else
+			{
+				/*
+				 * No, attach new column to result schema
+				 */
+				inhSchema = lappend(inhSchema, newdef);
+			}
+		}
+
+		schema = inhSchema;
+
+		/*
+		 * Check that we haven't exceeded the legal # of columns after merging
+		 * in inherited columns.
+		 */
+		if (list_length(schema) > MaxHeapAttributeNumber)
+			ereport(ERROR,
+					(errcode(ERRCODE_TOO_MANY_COLUMNS),
+					 errmsg("tables can have at most %d columns",
+							MaxHeapAttributeNumber)));
+	}
+
+	/*
+	 * Now that we have the column definition list for a partition, we can
+	 * check whether the columns referenced in the column constraint specs
+	 * actually exist.  Also, we merge NOT NULL and defaults into each
+	 * corresponding column definition.
+	 */
+	if (is_partition)
+	{
+		foreach(entry, saved_schema)
+		{
+			ColumnDef  *restdef = lfirst(entry);
+			bool		found = false;
+			ListCell   *l;
+
+			foreach(l, schema)
+			{
+				ColumnDef  *coldef = lfirst(l);
+
+				if (strcmp(coldef->colname, restdef->colname) == 0)
+				{
+					found = true;
+					coldef->is_not_null |= restdef->is_not_null;
+
+					/*
+					 * Override the parent's default value for this column
+					 * (coldef->cooked_default) with the partition's local
+					 * definition (restdef->raw_default), if there's one. It
+					 * should be physically impossible to get a cooked default
+					 * in the local definition or a raw default in the
+					 * inherited definition, but make sure they're nulls, for
+					 * future-proofing.
+					 */
+					Assert(restdef->cooked_default == NULL);
+					Assert(coldef->raw_default == NULL);
+					if (restdef->raw_default)
+					{
+						coldef->raw_default = restdef->raw_default;
+						coldef->cooked_default = NULL;
+					}
+				}
+			}
+
+			/* complain for constraints on columns not in parent */
+			if (!found)
+				ereport(ERROR,
+						(errcode(ERRCODE_UNDEFINED_COLUMN),
+						 errmsg("column \"%s\" does not exist",
+								restdef->colname)));
+		}
+	}
+
+	/*
+	 * If we found any conflicting parent default values, check to make sure
+	 * they were overridden by the child.
+	 */
+	if (have_bogus_defaults)
+	{
+		foreach(entry, schema)
+		{
+			ColumnDef  *def = lfirst(entry);
+
+			if (def->cooked_default == &bogus_marker)
+			{
+				if (def->generated)
+					ereport(ERROR,
+							(errcode(ERRCODE_INVALID_COLUMN_DEFINITION),
+							 errmsg("column \"%s\" inherits conflicting generation expressions",
+									def->colname)));
+				else
+					ereport(ERROR,
+							(errcode(ERRCODE_INVALID_COLUMN_DEFINITION),
+							 errmsg("column \"%s\" inherits conflicting default values",
+									def->colname),
+							 errhint("To resolve the conflict, specify a default explicitly.")));
+			}
+		}
+	}
+
+	*supconstr = constraints;
+	return schema;
+}
+
+
+/*
+ * MergeCheckConstraint
+ *		Try to merge an inherited CHECK constraint with previous ones
+ *
+ * If we inherit identically-named constraints from multiple parents, we must
+ * merge them, or throw an error if they don't have identical definitions.
+ *
+ * constraints is a list of CookedConstraint structs for previous constraints.
+ *
+ * Returns true if merged (constraint is a duplicate), or false if it's
+ * got a so-far-unique name, or throws error if conflict.
+ */
+static bool
+MergeCheckConstraint(List *constraints, char *name, Node *expr)
+{
+	ListCell   *lc;
+
+	foreach(lc, constraints)
+	{
+		CookedConstraint *ccon = (CookedConstraint *) lfirst(lc);
+
+		Assert(ccon->contype == CONSTR_CHECK);
+
+		/* Non-matching names never conflict */
+		if (strcmp(ccon->name, name) != 0)
+			continue;
+
+		if (equal(expr, ccon->expr))
+		{
+			/* OK to merge */
+			ccon->inhcount++;
+			return true;
+		}
+
+		ereport(ERROR,
+				(errcode(ERRCODE_DUPLICATE_OBJECT),
+				 errmsg("check constraint name \"%s\" appears multiple times but with different expressions",
+						name)));
+	}
+
+	return false;
+}
+
+
+/*
+ * StoreCatalogInheritance
+ *		Updates the system catalogs with proper inheritance information.
+ *
+ * supers is a list of the OIDs of the new relation's direct ancestors.
+ */
+static void
+StoreCatalogInheritance(Oid relationId, List *supers,
+						bool child_is_partition)
+{
+	Relation	relation;
+	int32		seqNumber;
+	ListCell   *entry;
+
+	/*
+	 * sanity checks
+	 */
+	AssertArg(OidIsValid(relationId));
+
+	if (supers == NIL)
+		return;
+
+	/*
+	 * Store INHERITS information in pg_inherits using direct ancestors only.
+	 * Also enter dependencies on the direct ancestors, and make sure they are
+	 * marked with relhassubclass = true.
+	 *
+	 * (Once upon a time, both direct and indirect ancestors were found here
+	 * and then entered into pg_ipl.  Since that catalog doesn't exist
+	 * anymore, there's no need to look for indirect ancestors.)
+	 */
+	relation = table_open(InheritsRelationId, RowExclusiveLock);
+
+	seqNumber = 1;
+	foreach(entry, supers)
+	{
+		Oid			parentOid = lfirst_oid(entry);
+
+		StoreCatalogInheritance1(relationId, parentOid, seqNumber, relation,
+								 child_is_partition);
+		seqNumber++;
+	}
+
+	table_close(relation, RowExclusiveLock);
+}
+
+/*
+ * Make catalog entries showing relationId as being an inheritance child
+ * of parentOid.  inhRelation is the already-opened pg_inherits catalog.
+ */
+static void
+StoreCatalogInheritance1(Oid relationId, Oid parentOid,
+						 int32 seqNumber, Relation inhRelation,
+						 bool child_is_partition)
+{
+	ObjectAddress childobject,
+				parentobject;
+
+	/* store the pg_inherits row */
+	StoreSingleInheritance(relationId, parentOid, seqNumber);
+
+	/*
+	 * Store a dependency too
+	 */
+	parentobject.classId = RelationRelationId;
+	parentobject.objectId = parentOid;
+	parentobject.objectSubId = 0;
+	childobject.classId = RelationRelationId;
+	childobject.objectId = relationId;
+	childobject.objectSubId = 0;
+
+	recordDependencyOn(&childobject, &parentobject,
+					   child_dependency_type(child_is_partition));
+
+	/*
+	 * Post creation hook of this inheritance. Since object_access_hook
+	 * doesn't take multiple object identifiers, we relay oid of parent
+	 * relation using auxiliary_id argument.
+	 */
+	InvokeObjectPostAlterHookArg(InheritsRelationId,
+								 relationId, 0,
+								 parentOid, false);
+
+	/*
+	 * Mark the parent as having subclasses.
+	 */
+	SetRelationHasSubclass(parentOid, true);
+}
+
+/*
+ * Look for an existing schema entry with the given name.
+ *
+ * Returns the index (starting with 1) if attribute already exists in schema,
+ * 0 if it doesn't.
+ */
+static int
+findAttrByName(const char *attributeName, List *schema)
+{
+	ListCell   *s;
+	int			i = 1;
+
+	foreach(s, schema)
+	{
+		ColumnDef  *def = lfirst(s);
+
+		if (strcmp(attributeName, def->colname) == 0)
+			return i;
+
+		i++;
+	}
+	return 0;
+}
+
+
+/*
+ * SetRelationHasSubclass
+ *		Set the value of the relation's relhassubclass field in pg_class.
+ *
+ * NOTE: caller must be holding an appropriate lock on the relation.
+ * ShareUpdateExclusiveLock is sufficient.
+ *
+ * NOTE: an important side-effect of this operation is that an SI invalidation
+ * message is sent out to all backends --- including me --- causing plans
+ * referencing the relation to be rebuilt with the new list of children.
+ * This must happen even if we find that no change is needed in the pg_class
+ * row.
+ */
+void
+SetRelationHasSubclass(Oid relationId, bool relhassubclass)
+{
+	Relation	relationRelation;
+	HeapTuple	tuple;
+	Form_pg_class classtuple;
+
+	/*
+	 * Fetch a modifiable copy of the tuple, modify it, update pg_class.
+	 */
+	relationRelation = table_open(RelationRelationId, RowExclusiveLock);
+	tuple = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(relationId));
+	if (!HeapTupleIsValid(tuple))
+		elog(ERROR, "cache lookup failed for relation %u", relationId);
+	classtuple = (Form_pg_class) GETSTRUCT(tuple);
+
+	if (classtuple->relhassubclass != relhassubclass)
+	{
+		classtuple->relhassubclass = relhassubclass;
+		CatalogTupleUpdate(relationRelation, &tuple->t_self, tuple);
+	}
+	else
+	{
+		/* no need to change tuple, but force relcache rebuild anyway */
+		CacheInvalidateRelcacheByTuple(tuple);
+	}
+
+	heap_freetuple(tuple);
+	table_close(relationRelation, RowExclusiveLock);
+}
+
+/*
+ * CheckRelationTableSpaceMove
+ *		Check if relation can be moved to new tablespace.
+ *
+ * NOTE: The caller must hold AccessExclusiveLock on the relation.
+ *
+ * Returns true if the relation can be moved to the new tablespace; raises
+ * an error if it is not possible to do the move; returns false if the move
+ * would have no effect.
+ */
+bool
+CheckRelationTableSpaceMove(Relation rel, Oid newTableSpaceId)
+{
+	Oid			oldTableSpaceId;
+
+	/*
+	 * No work if no change in tablespace.  Note that MyDatabaseTableSpace is
+	 * stored as 0.
+	 */
+	oldTableSpaceId = rel->rd_rel->reltablespace;
+	if (newTableSpaceId == oldTableSpaceId ||
+		(newTableSpaceId == MyDatabaseTableSpace && oldTableSpaceId == 0))
+		return false;
+
+	/*
+	 * We cannot support moving mapped relations into different tablespaces.
+	 * (In particular this eliminates all shared catalogs.)
+	 */
+	if (RelationIsMapped(rel))
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("cannot move system relation \"%s\"",
+						RelationGetRelationName(rel))));
+
+	/* Cannot move a non-shared relation into pg_global */
+	if (newTableSpaceId == GLOBALTABLESPACE_OID)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("only shared relations can be placed in pg_global tablespace")));
+
+	/*
+	 * Do not allow moving temp tables of other backends ... their local
+	 * buffer manager is not going to cope.
+	 */
+	if (RELATION_IS_OTHER_TEMP(rel))
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("cannot move temporary tables of other sessions")));
+
+	return true;
+}
+
+/*
+ * SetRelationTableSpace
+ *		Set new reltablespace and relfilenode in pg_class entry.
+ *
+ * newTableSpaceId is the new tablespace for the relation, and
+ * newRelFileNode its new filenode.  If newRelFileNode is InvalidOid,
+ * this field is not updated.
+ *
+ * NOTE: The caller must hold AccessExclusiveLock on the relation.
+ *
+ * The caller of this routine had better check if a relation can be
+ * moved to this new tablespace by calling CheckRelationTableSpaceMove()
+ * first, and is responsible for making the change visible with
+ * CommandCounterIncrement().
+ */
+void
+SetRelationTableSpace(Relation rel,
+					  Oid newTableSpaceId,
+					  Oid newRelFileNode)
+{
+	Relation	pg_class;
+	HeapTuple	tuple;
+	Form_pg_class rd_rel;
+	Oid			reloid = RelationGetRelid(rel);
+
+	Assert(CheckRelationTableSpaceMove(rel, newTableSpaceId));
+
+	/* Get a modifiable copy of the relation's pg_class row. */
+	pg_class = table_open(RelationRelationId, RowExclusiveLock);
+
+	tuple = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(reloid));
+	if (!HeapTupleIsValid(tuple))
+		elog(ERROR, "cache lookup failed for relation %u", reloid);
+	rd_rel = (Form_pg_class) GETSTRUCT(tuple);
+
+	/* Update the pg_class row. */
+	rd_rel->reltablespace = (newTableSpaceId == MyDatabaseTableSpace) ?
+		InvalidOid : newTableSpaceId;
+	if (OidIsValid(newRelFileNode))
+		rd_rel->relfilenode = newRelFileNode;
+	CatalogTupleUpdate(pg_class, &tuple->t_self, tuple);
+
+	/*
+	 * Record dependency on tablespace.  This is only required for relations
+	 * that have no physical storage.
+	 */
+	if (!RELKIND_HAS_STORAGE(rel->rd_rel->relkind))
+		changeDependencyOnTablespace(RelationRelationId, reloid,
+									 rd_rel->reltablespace);
+
+	heap_freetuple(tuple);
+	table_close(pg_class, RowExclusiveLock);
+}
+
+/*
+ *		renameatt_check			- basic sanity checks before attribute rename
+ */
+static void
+renameatt_check(Oid myrelid, Form_pg_class classform, bool recursing)
+{
+	char		relkind = classform->relkind;
+
+	if (classform->reloftype && !recursing)
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("cannot rename column of typed table")));
+
+	/*
+	 * Renaming the columns of sequences or toast tables doesn't actually
+	 * break anything from the system's point of view, since internal
+	 * references are by attnum.  But it doesn't seem right to allow users to
+	 * change names that are hardcoded into the system, hence the following
+	 * restriction.
+	 */
+	if (relkind != RELKIND_RELATION &&
+		relkind != RELKIND_VIEW &&
+		relkind != RELKIND_MATVIEW &&
+		relkind != RELKIND_COMPOSITE_TYPE &&
+		relkind != RELKIND_INDEX &&
+		relkind != RELKIND_PARTITIONED_INDEX &&
+		relkind != RELKIND_FOREIGN_TABLE &&
+		relkind != RELKIND_PARTITIONED_TABLE)
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("\"%s\" is not a table, view, materialized view, composite type, index, or foreign table",
+						NameStr(classform->relname))));
+
+	/*
+	 * permissions checking.  only the owner of a class can change its schema.
+	 */
+	if (!pg_class_ownercheck(myrelid, GetUserId()))
+		aclcheck_error(ACLCHECK_NOT_OWNER, get_relkind_objtype(get_rel_relkind(myrelid)),
+					   NameStr(classform->relname));
+	if (!allowSystemTableMods && IsSystemClass(myrelid, classform))
+		ereport(ERROR,
+				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+				 errmsg("permission denied: \"%s\" is a system catalog",
+						NameStr(classform->relname))));
+}
+
+/*
+ *		renameatt_internal		- workhorse for renameatt
+ *
+ * Return value is the attribute number in the 'myrelid' relation.
+ */
+static AttrNumber
+renameatt_internal(Oid myrelid,
+				   const char *oldattname,
+				   const char *newattname,
+				   bool recurse,
+				   bool recursing,
+				   int expected_parents,
+				   DropBehavior behavior)
+{
+	Relation	targetrelation;
+	Relation	attrelation;
+	HeapTuple	atttup;
+	Form_pg_attribute attform;
+	AttrNumber	attnum;
+
+	/*
+	 * Grab an exclusive lock on the target table, which we will NOT release
+	 * until end of transaction.
+	 */
+	targetrelation = relation_open(myrelid, AccessExclusiveLock);
+	renameatt_check(myrelid, RelationGetForm(targetrelation), recursing);
+
+	/*
+	 * if the 'recurse' flag is set then we are supposed to rename this
+	 * attribute in all classes that inherit from 'relname' (as well as in
+	 * 'relname').
+	 *
+	 * any permissions or problems with duplicate attributes will cause the
+	 * whole transaction to abort, which is what we want -- all or nothing.
+	 */
+	if (recurse)
+	{
+		List	   *child_oids,
+				   *child_numparents;
+		ListCell   *lo,
+				   *li;
+
+		/*
+		 * we need the number of parents for each child so that the recursive
+		 * calls to renameatt() can determine whether there are any parents
+		 * outside the inheritance hierarchy being processed.
+		 */
+		child_oids = find_all_inheritors(myrelid, AccessExclusiveLock,
+										 &child_numparents);
+
+		/*
+		 * find_all_inheritors does the recursive search of the inheritance
+		 * hierarchy, so all we have to do is process all of the relids in the
+		 * list that it returns.
+		 */
+		forboth(lo, child_oids, li, child_numparents)
+		{
+			Oid			childrelid = lfirst_oid(lo);
+			int			numparents = lfirst_int(li);
+
+			if (childrelid == myrelid)
+				continue;
+			/* note we need not recurse again */
+			renameatt_internal(childrelid, oldattname, newattname, false, true, numparents, behavior);
+		}
+	}
+	else
+	{
+		/*
+		 * If we are told not to recurse, there had better not be any child
+		 * tables; else the rename would put them out of step.
+		 *
+		 * expected_parents will only be 0 if we are not already recursing.
+		 */
+		if (expected_parents == 0 &&
+			find_inheritance_children(myrelid, false, NoLock) != NIL)
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+					 errmsg("inherited column \"%s\" must be renamed in child tables too",
+							oldattname)));
+	}
+
+	/* rename attributes in typed tables of composite type */
+	if (targetrelation->rd_rel->relkind == RELKIND_COMPOSITE_TYPE)
+	{
+		List	   *child_oids;
+		ListCell   *lo;
+
+		child_oids = find_typed_table_dependencies(targetrelation->rd_rel->reltype,
+												   RelationGetRelationName(targetrelation),
+												   behavior);
+
+		foreach(lo, child_oids)
+			renameatt_internal(lfirst_oid(lo), oldattname, newattname, true, true, 0, behavior);
+	}
+
+	attrelation = table_open(AttributeRelationId, RowExclusiveLock);
+
+	atttup = SearchSysCacheCopyAttName(myrelid, oldattname);
+	if (!HeapTupleIsValid(atttup))
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_COLUMN),
+				 errmsg("column \"%s\" does not exist",
+						oldattname)));
+	attform = (Form_pg_attribute) GETSTRUCT(atttup);
+
+	attnum = attform->attnum;
+	if (attnum <= 0)
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("cannot rename system column \"%s\"",
+						oldattname)));
+
+	/*
+	 * if the attribute is inherited, forbid the renaming.  if this is a
+	 * top-level call to renameatt(), then expected_parents will be 0, so the
+	 * effect of this code will be to prohibit the renaming if the attribute
+	 * is inherited at all.  if this is a recursive call to renameatt(),
+	 * expected_parents will be the number of parents the current relation has
+	 * within the inheritance hierarchy being processed, so we'll prohibit the
+	 * renaming only if there are additional parents from elsewhere.
+	 */
+	if (attform->attinhcount > expected_parents)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+				 errmsg("cannot rename inherited column \"%s\"",
+						oldattname)));
+
+	/* new name should not already exist */
+	(void) check_for_column_name_collision(targetrelation, newattname, false);
+
+	/* apply the update */
+	namestrcpy(&(attform->attname), newattname);
+
+	CatalogTupleUpdate(attrelation, &atttup->t_self, atttup);
+
+	InvokeObjectPostAlterHook(RelationRelationId, myrelid, attnum);
+
+	heap_freetuple(atttup);
+
+	table_close(attrelation, RowExclusiveLock);
+
+	relation_close(targetrelation, NoLock); /* close rel but keep lock */
+
+	return attnum;
+}
+
+/*
+ * Perform permissions and integrity checks before acquiring a relation lock.
+ */
+static void
+RangeVarCallbackForRenameAttribute(const RangeVar *rv, Oid relid, Oid oldrelid,
+								   void *arg)
+{
+	HeapTuple	tuple;
+	Form_pg_class form;
+
+	tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relid));
+	if (!HeapTupleIsValid(tuple))
+		return;					/* concurrently dropped */
+	form = (Form_pg_class) GETSTRUCT(tuple);
+	renameatt_check(relid, form, false);
+	ReleaseSysCache(tuple);
+}
+
+/*
+ *		renameatt		- changes the name of an attribute in a relation
+ *
+ * The returned ObjectAddress is that of the renamed column.
+ */
+ObjectAddress
+renameatt(RenameStmt *stmt)
+{
+	Oid			relid;
+	AttrNumber	attnum;
+	ObjectAddress address;
+
+	/* lock level taken here should match renameatt_internal */
+	relid = RangeVarGetRelidExtended(stmt->relation, AccessExclusiveLock,
+									 stmt->missing_ok ? RVR_MISSING_OK : 0,
+									 RangeVarCallbackForRenameAttribute,
+									 NULL);
+
+	if (!OidIsValid(relid))
+	{
+		ereport(NOTICE,
+				(errmsg("relation \"%s\" does not exist, skipping",
+						stmt->relation->relname)));
+		return InvalidObjectAddress;
+	}
+
+	attnum =
+		renameatt_internal(relid,
+						   stmt->subname,	/* old att name */
+						   stmt->newname,	/* new att name */
+						   stmt->relation->inh, /* recursive? */
+						   false,	/* recursing? */
+						   0,	/* expected inhcount */
+						   stmt->behavior);
+
+	ObjectAddressSubSet(address, RelationRelationId, relid, attnum);
+
+	return address;
+}
+
+/*
+ * same logic as renameatt_internal
+ */
+static ObjectAddress
+rename_constraint_internal(Oid myrelid,
+						   Oid mytypid,
+						   const char *oldconname,
+						   const char *newconname,
+						   bool recurse,
+						   bool recursing,
+						   int expected_parents)
+{
+	Relation	targetrelation = NULL;
+	Oid			constraintOid;
+	HeapTuple	tuple;
+	Form_pg_constraint con;
+	ObjectAddress address;
+
+	AssertArg(!myrelid || !mytypid);
+
+	if (mytypid)
+	{
+		constraintOid = get_domain_constraint_oid(mytypid, oldconname, false);
+	}
+	else
+	{
+		targetrelation = relation_open(myrelid, AccessExclusiveLock);
+
+		/*
+		 * don't tell it whether we're recursing; we allow changing typed
+		 * tables here
+		 */
+		renameatt_check(myrelid, RelationGetForm(targetrelation), false);
+
+		constraintOid = get_relation_constraint_oid(myrelid, oldconname, false);
+	}
+
+	tuple = SearchSysCache1(CONSTROID, ObjectIdGetDatum(constraintOid));
+	if (!HeapTupleIsValid(tuple))
+		elog(ERROR, "cache lookup failed for constraint %u",
+			 constraintOid);
+	con = (Form_pg_constraint) GETSTRUCT(tuple);
+
+	if (myrelid && con->contype == CONSTRAINT_CHECK && !con->connoinherit)
+	{
+		if (recurse)
+		{
+			List	   *child_oids,
+					   *child_numparents;
+			ListCell   *lo,
+					   *li;
+
+			child_oids = find_all_inheritors(myrelid, AccessExclusiveLock,
+											 &child_numparents);
+
+			forboth(lo, child_oids, li, child_numparents)
+			{
+				Oid			childrelid = lfirst_oid(lo);
+				int			numparents = lfirst_int(li);
+
+				if (childrelid == myrelid)
+					continue;
+
+				rename_constraint_internal(childrelid, InvalidOid, oldconname, newconname, false, true, numparents);
+			}
+		}
+		else
+		{
+			if (expected_parents == 0 &&
+				find_inheritance_children(myrelid, false, NoLock) != NIL)
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+						 errmsg("inherited constraint \"%s\" must be renamed in child tables too",
+								oldconname)));
+		}
+
+		if (con->coninhcount > expected_parents)
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+					 errmsg("cannot rename inherited constraint \"%s\"",
+							oldconname)));
+	}
+
+	if (con->conindid
+		&& (con->contype == CONSTRAINT_PRIMARY
+			|| con->contype == CONSTRAINT_UNIQUE
+			|| con->contype == CONSTRAINT_EXCLUSION))
+		/* rename the index; this renames the constraint as well */
+		RenameRelationInternal(con->conindid, newconname, false, true);
+	else
+		RenameConstraintById(constraintOid, newconname);
+
+	ObjectAddressSet(address, ConstraintRelationId, constraintOid);
+
+	ReleaseSysCache(tuple);
+
+	if (targetrelation)
+	{
+		/*
+		 * Invalidate relcache so as others can see the new constraint name.
+		 */
+		CacheInvalidateRelcache(targetrelation);
+
+		relation_close(targetrelation, NoLock); /* close rel but keep lock */
+	}
+
+	return address;
+}
+
+ObjectAddress
+RenameConstraint(RenameStmt *stmt)
+{
+	Oid			relid = InvalidOid;
+	Oid			typid = InvalidOid;
+
+	if (stmt->renameType == OBJECT_DOMCONSTRAINT)
+	{
+		Relation	rel;
+		HeapTuple	tup;
+
+		typid = typenameTypeId(NULL, makeTypeNameFromNameList(castNode(List, stmt->object)));
+		rel = table_open(TypeRelationId, RowExclusiveLock);
+		tup = SearchSysCache1(TYPEOID, ObjectIdGetDatum(typid));
+		if (!HeapTupleIsValid(tup))
+			elog(ERROR, "cache lookup failed for type %u", typid);
+		checkDomainOwner(tup);
+		ReleaseSysCache(tup);
+		table_close(rel, NoLock);
+	}
+	else
+	{
+		/* lock level taken here should match rename_constraint_internal */
+		relid = RangeVarGetRelidExtended(stmt->relation, AccessExclusiveLock,
+										 stmt->missing_ok ? RVR_MISSING_OK : 0,
+										 RangeVarCallbackForRenameAttribute,
+										 NULL);
+		if (!OidIsValid(relid))
+		{
+			ereport(NOTICE,
+					(errmsg("relation \"%s\" does not exist, skipping",
+							stmt->relation->relname)));
+			return InvalidObjectAddress;
+		}
+	}
+
+	return
+		rename_constraint_internal(relid, typid,
+								   stmt->subname,
+								   stmt->newname,
+								   (stmt->relation &&
+									stmt->relation->inh),	/* recursive? */
+								   false,	/* recursing? */
+								   0 /* expected inhcount */ );
+
+}
+
+/*
+ * Execute ALTER TABLE/INDEX/SEQUENCE/VIEW/MATERIALIZED VIEW/FOREIGN TABLE
+ * RENAME
+ */
+ObjectAddress
+RenameRelation(RenameStmt *stmt)
+{
+	bool		is_index = stmt->renameType == OBJECT_INDEX;
+	Oid			relid;
+	ObjectAddress address;
+
+	/*
+	 * Grab an exclusive lock on the target table, index, sequence, view,
+	 * materialized view, or foreign table, which we will NOT release until
+	 * end of transaction.
+	 *
+	 * Lock level used here should match RenameRelationInternal, to avoid lock
+	 * escalation.
+	 */
+	relid = RangeVarGetRelidExtended(stmt->relation,
+									 is_index ? ShareUpdateExclusiveLock : AccessExclusiveLock,
+									 stmt->missing_ok ? RVR_MISSING_OK : 0,
+									 RangeVarCallbackForAlterRelation,
+									 (void *) stmt);
+
+	if (!OidIsValid(relid))
+	{
+		ereport(NOTICE,
+				(errmsg("relation \"%s\" does not exist, skipping",
+						stmt->relation->relname)));
+		return InvalidObjectAddress;
+	}
+
+	/* Do the work */
+	RenameRelationInternal(relid, stmt->newname, false, is_index);
+
+	ObjectAddressSet(address, RelationRelationId, relid);
+
+	return address;
+}
+
+/*
+ *		RenameRelationInternal - change the name of a relation
+ */
+void
+RenameRelationInternal(Oid myrelid, const char *newrelname, bool is_internal, bool is_index)
+{
+	Relation	targetrelation;
+	Relation	relrelation;	/* for RELATION relation */
+	HeapTuple	reltup;
+	Form_pg_class relform;
+	Oid			namespaceId;
+
+	/*
+	 * Grab a lock on the target relation, which we will NOT release until end
+	 * of transaction.  We need at least a self-exclusive lock so that
+	 * concurrent DDL doesn't overwrite the rename if they start updating
+	 * while still seeing the old version.  The lock also guards against
+	 * triggering relcache reloads in concurrent sessions, which might not
+	 * handle this information changing under them.  For indexes, we can use a
+	 * reduced lock level because RelationReloadIndexInfo() handles indexes
+	 * specially.
+	 */
+	targetrelation = relation_open(myrelid, is_index ? ShareUpdateExclusiveLock : AccessExclusiveLock);
+	namespaceId = RelationGetNamespace(targetrelation);
+
+	/*
+	 * Find relation's pg_class tuple, and make sure newrelname isn't in use.
+	 */
+	relrelation = table_open(RelationRelationId, RowExclusiveLock);
+
+	reltup = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(myrelid));
+	if (!HeapTupleIsValid(reltup))	/* shouldn't happen */
+		elog(ERROR, "cache lookup failed for relation %u", myrelid);
+	relform = (Form_pg_class) GETSTRUCT(reltup);
+
+	if (get_relname_relid(newrelname, namespaceId) != InvalidOid)
+		ereport(ERROR,
+				(errcode(ERRCODE_DUPLICATE_TABLE),
+				 errmsg("relation \"%s\" already exists",
+						newrelname)));
+
+	/*
+	 * Update pg_class tuple with new relname.  (Scribbling on reltup is OK
+	 * because it's a copy...)
+	 */
+	namestrcpy(&(relform->relname), newrelname);
+
+	CatalogTupleUpdate(relrelation, &reltup->t_self, reltup);
+
+	InvokeObjectPostAlterHookArg(RelationRelationId, myrelid, 0,
+								 InvalidOid, is_internal);
+
+	heap_freetuple(reltup);
+	table_close(relrelation, RowExclusiveLock);
+
+	/*
+	 * Also rename the associated type, if any.
+	 */
+	if (OidIsValid(targetrelation->rd_rel->reltype))
+		RenameTypeInternal(targetrelation->rd_rel->reltype,
+						   newrelname, namespaceId);
+
+	/*
+	 * Also rename the associated constraint, if any.
+	 */
+	if (targetrelation->rd_rel->relkind == RELKIND_INDEX ||
+		targetrelation->rd_rel->relkind == RELKIND_PARTITIONED_INDEX)
+	{
+		Oid			constraintId = get_index_constraint(myrelid);
+
+		if (OidIsValid(constraintId))
+			RenameConstraintById(constraintId, newrelname);
+	}
+
+	/*
+	 * Close rel, but keep lock!
+	 */
+	relation_close(targetrelation, NoLock);
+}
+
+/*
+ * Disallow ALTER TABLE (and similar commands) when the current backend has
+ * any open reference to the target table besides the one just acquired by
+ * the calling command; this implies there's an open cursor or active plan.
+ * We need this check because our lock doesn't protect us against stomping
+ * on our own foot, only other people's feet!
+ *
+ * For ALTER TABLE, the only case known to cause serious trouble is ALTER
+ * COLUMN TYPE, and some changes are obviously pretty benign, so this could
+ * possibly be relaxed to only error out for certain types of alterations.
+ * But the use-case for allowing any of these things is not obvious, so we
+ * won't work hard at it for now.
+ *
+ * We also reject these commands if there are any pending AFTER trigger events
+ * for the rel.  This is certainly necessary for the rewriting variants of
+ * ALTER TABLE, because they don't preserve tuple TIDs and so the pending
+ * events would try to fetch the wrong tuples.  It might be overly cautious
+ * in other cases, but again it seems better to err on the side of paranoia.
+ *
+ * REINDEX calls this with "rel" referencing the index to be rebuilt; here
+ * we are worried about active indexscans on the index.  The trigger-event
+ * check can be skipped, since we are doing no damage to the parent table.
+ *
+ * The statement name (eg, "ALTER TABLE") is passed for use in error messages.
+ */
+void
+CheckTableNotInUse(Relation rel, const char *stmt)
+{
+	int			expected_refcnt;
+
+	expected_refcnt = rel->rd_isnailed ? 2 : 1;
+	if (rel->rd_refcnt != expected_refcnt)
+		ereport(ERROR,
+				(errcode(ERRCODE_OBJECT_IN_USE),
+		/* translator: first %s is a SQL command, eg ALTER TABLE */
+				 errmsg("cannot %s \"%s\" because it is being used by active queries in this session",
+						stmt, RelationGetRelationName(rel))));
+
+	if (rel->rd_rel->relkind != RELKIND_INDEX &&
+		rel->rd_rel->relkind != RELKIND_PARTITIONED_INDEX &&
+		AfterTriggerPendingOnRel(RelationGetRelid(rel)))
+		ereport(ERROR,
+				(errcode(ERRCODE_OBJECT_IN_USE),
+		/* translator: first %s is a SQL command, eg ALTER TABLE */
+				 errmsg("cannot %s \"%s\" because it has pending trigger events",
+						stmt, RelationGetRelationName(rel))));
+}
+
+/*
+ * AlterTableLookupRelation
+ *		Look up, and lock, the OID for the relation named by an alter table
+ *		statement.
+ */
+Oid
+AlterTableLookupRelation(AlterTableStmt *stmt, LOCKMODE lockmode)
+{
+	return RangeVarGetRelidExtended(stmt->relation, lockmode,
+									stmt->missing_ok ? RVR_MISSING_OK : 0,
+									RangeVarCallbackForAlterRelation,
+									(void *) stmt);
+}
+
+/*
+ * AlterTable
+ *		Execute ALTER TABLE, which can be a list of subcommands
+ *
+ * ALTER TABLE is performed in three phases:
+ *		1. Examine subcommands and perform pre-transformation checking.
+ *		2. Validate and transform subcommands, and update system catalogs.
+ *		3. Scan table(s) to check new constraints, and optionally recopy
+ *		   the data into new table(s).
+ * Phase 3 is not performed unless one or more of the subcommands requires
+ * it.  The intention of this design is to allow multiple independent
+ * updates of the table schema to be performed with only one pass over the
+ * data.
+ *
+ * ATPrepCmd performs phase 1.  A "work queue" entry is created for
+ * each table to be affected (there may be multiple affected tables if the
+ * commands traverse a table inheritance hierarchy).  Also we do preliminary
+ * validation of the subcommands.  Because earlier subcommands may change
+ * the catalog state seen by later commands, there are limits to what can
+ * be done in this phase.  Generally, this phase acquires table locks,
+ * checks permissions and relkind, and recurses to find child tables.
+ *
+ * ATRewriteCatalogs performs phase 2 for each affected table.  (Note that
+ * phases 2 and 3 normally do no explicit recursion, since phase 1 already
+ * did it --- although some subcommands have to recurse in phase 2 instead.)
+ * Certain subcommands need to be performed before others to avoid
+ * unnecessary conflicts; for example, DROP COLUMN should come before
+ * ADD COLUMN.  Therefore phase 1 divides the subcommands into multiple
+ * lists, one for each logical "pass" of phase 2.
+ *
+ * ATRewriteTables performs phase 3 for those tables that need it.
+ *
+ * Thanks to the magic of MVCC, an error anywhere along the way rolls back
+ * the whole operation; we don't have to do anything special to clean up.
+ *
+ * The caller must lock the relation, with an appropriate lock level
+ * for the subcommands requested, using AlterTableGetLockLevel(stmt->cmds)
+ * or higher. We pass the lock level down
+ * so that we can apply it recursively to inherited tables. Note that the
+ * lock level we want as we recurse might well be higher than required for
+ * that specific subcommand. So we pass down the overall lock requirement,
+ * rather than reassess it at lower levels.
+ *
+ * The caller also provides a "context" which is to be passed back to
+ * utility.c when we need to execute a subcommand such as CREATE INDEX.
+ * Some of the fields therein, such as the relid, are used here as well.
+ */
+void
+AlterTable(AlterTableStmt *stmt, LOCKMODE lockmode,
+		   AlterTableUtilityContext *context)
+{
+	Relation	rel;
+
+	/* Caller is required to provide an adequate lock. */
+	rel = relation_open(context->relid, NoLock);
+
+	CheckTableNotInUse(rel, "ALTER TABLE");
+
+	ATController(stmt, rel, stmt->cmds, stmt->relation->inh, lockmode, context);
+}
+
+/*
+ * AlterTableInternal
+ *
+ * ALTER TABLE with target specified by OID
+ *
+ * We do not reject if the relation is already open, because it's quite
+ * likely that one or more layers of caller have it open.  That means it
+ * is unsafe to use this entry point for alterations that could break
+ * existing query plans.  On the assumption it's not used for such, we
+ * don't have to reject pending AFTER triggers, either.
+ *
+ * Also, since we don't have an AlterTableUtilityContext, this cannot be
+ * used for any subcommand types that require parse transformation or
+ * could generate subcommands that have to be passed to ProcessUtility.
+ */
+void
+AlterTableInternal(Oid relid, List *cmds, bool recurse)
+{
+	Relation	rel;
+	LOCKMODE	lockmode = AlterTableGetLockLevel(cmds);
+
+	rel = relation_open(relid, lockmode);
+
+	EventTriggerAlterTableRelid(relid);
+
+	ATController(NULL, rel, cmds, recurse, lockmode, NULL);
+}
+
+/*
+ * AlterTableGetLockLevel
+ *
+ * Sets the overall lock level required for the supplied list of subcommands.
+ * Policy for doing this set according to needs of AlterTable(), see
+ * comments there for overall explanation.
+ *
+ * Function is called before and after parsing, so it must give same
+ * answer each time it is called. Some subcommands are transformed
+ * into other subcommand types, so the transform must never be made to a
+ * lower lock level than previously assigned. All transforms are noted below.
+ *
+ * Since this is called before we lock the table we cannot use table metadata
+ * to influence the type of lock we acquire.
+ *
+ * There should be no lockmodes hardcoded into the subcommand functions. All
+ * lockmode decisions for ALTER TABLE are made here only. The one exception is
+ * ALTER TABLE RENAME which is treated as a different statement type T_RenameStmt
+ * and does not travel through this section of code and cannot be combined with
+ * any of the subcommands given here.
+ *
+ * Note that Hot Standby only knows about AccessExclusiveLocks on the primary
+ * so any changes that might affect SELECTs running on standbys need to use
+ * AccessExclusiveLocks even if you think a lesser lock would do, unless you
+ * have a solution for that also.
+ *
+ * Also note that pg_dump uses only an AccessShareLock, meaning that anything
+ * that takes a lock less than AccessExclusiveLock can change object definitions
+ * while pg_dump is running. Be careful to check that the appropriate data is
+ * derived by pg_dump using an MVCC snapshot, rather than syscache lookups,
+ * otherwise we might end up with an inconsistent dump that can't restore.
+ */
+LOCKMODE
+AlterTableGetLockLevel(List *cmds)
+{
+	/*
+	 * This only works if we read catalog tables using MVCC snapshots.
+	 */
+	ListCell   *lcmd;
+	LOCKMODE	lockmode = ShareUpdateExclusiveLock;
+
+	foreach(lcmd, cmds)
+	{
+		AlterTableCmd *cmd = (AlterTableCmd *) lfirst(lcmd);
+		LOCKMODE	cmd_lockmode = AccessExclusiveLock; /* default for compiler */
+
+		switch (cmd->subtype)
+		{
+				/*
+				 * These subcommands rewrite the heap, so require full locks.
+				 */
+			case AT_AddColumn:	/* may rewrite heap, in some cases and visible
+								 * to SELECT */
+			case AT_SetTableSpace:	/* must rewrite heap */
+			case AT_AlterColumnType:	/* must rewrite heap */
+				cmd_lockmode = AccessExclusiveLock;
+				break;
+
+				/*
+				 * These subcommands may require addition of toast tables. If
+				 * we add a toast table to a table currently being scanned, we
+				 * might miss data added to the new toast table by concurrent
+				 * insert transactions.
+				 */
+			case AT_SetStorage: /* may add toast tables, see
+								 * ATRewriteCatalogs() */
+				cmd_lockmode = AccessExclusiveLock;
+				break;
+
+				/*
+				 * Removing constraints can affect SELECTs that have been
+				 * optimized assuming the constraint holds true. See also
+				 * CloneFkReferenced.
+				 */
+			case AT_DropConstraint: /* as DROP INDEX */
+			case AT_DropNotNull:	/* may change some SQL plans */
+				cmd_lockmode = AccessExclusiveLock;
+				break;
+
+				/*
+				 * Subcommands that may be visible to concurrent SELECTs
+				 */
+			case AT_DropColumn: /* change visible to SELECT */
+			case AT_AddColumnToView:	/* CREATE VIEW */
+			case AT_DropOids:	/* used to equiv to DropColumn */
+			case AT_EnableAlwaysRule:	/* may change SELECT rules */
+			case AT_EnableReplicaRule:	/* may change SELECT rules */
+			case AT_EnableRule: /* may change SELECT rules */
+			case AT_DisableRule:	/* may change SELECT rules */
+				cmd_lockmode = AccessExclusiveLock;
+				break;
+
+				/*
+				 * Changing owner may remove implicit SELECT privileges
+				 */
+			case AT_ChangeOwner:	/* change visible to SELECT */
+				cmd_lockmode = AccessExclusiveLock;
+				break;
+
+				/*
+				 * Changing foreign table options may affect optimization.
+				 */
+			case AT_GenericOptions:
+			case AT_AlterColumnGenericOptions:
+				cmd_lockmode = AccessExclusiveLock;
+				break;
+
+				/*
+				 * These subcommands affect write operations only.
+				 */
+			case AT_EnableTrig:
+			case AT_EnableAlwaysTrig:
+			case AT_EnableReplicaTrig:
+			case AT_EnableTrigAll:
+			case AT_EnableTrigUser:
+			case AT_DisableTrig:
+			case AT_DisableTrigAll:
+			case AT_DisableTrigUser:
+				cmd_lockmode = ShareRowExclusiveLock;
+				break;
+
+				/*
+				 * These subcommands affect write operations only. XXX
+				 * Theoretically, these could be ShareRowExclusiveLock.
+				 */
+			case AT_ColumnDefault:
+			case AT_CookedColumnDefault:
+			case AT_AlterConstraint:
+			case AT_AddIndex:	/* from ADD CONSTRAINT */
+			case AT_AddIndexConstraint:
+			case AT_ReplicaIdentity:
+			case AT_SetNotNull:
+			case AT_EnableRowSecurity:
+			case AT_DisableRowSecurity:
+			case AT_ForceRowSecurity:
+			case AT_NoForceRowSecurity:
+			case AT_AddIdentity:
+			case AT_DropIdentity:
+			case AT_SetIdentity:
+			case AT_DropExpression:
+			case AT_SetCompression:
+				cmd_lockmode = AccessExclusiveLock;
+				break;
+
+			case AT_AddConstraint:
+			case AT_AddConstraintRecurse:	/* becomes AT_AddConstraint */
+			case AT_ReAddConstraint:	/* becomes AT_AddConstraint */
+			case AT_ReAddDomainConstraint:	/* becomes AT_AddConstraint */
+				if (IsA(cmd->def, Constraint))
+				{
+					Constraint *con = (Constraint *) cmd->def;
+
+					switch (con->contype)
+					{
+						case CONSTR_EXCLUSION:
+						case CONSTR_PRIMARY:
+						case CONSTR_UNIQUE:
+
+							/*
+							 * Cases essentially the same as CREATE INDEX. We
+							 * could reduce the lock strength to ShareLock if
+							 * we can work out how to allow concurrent catalog
+							 * updates. XXX Might be set down to
+							 * ShareRowExclusiveLock but requires further
+							 * analysis.
+							 */
+							cmd_lockmode = AccessExclusiveLock;
+							break;
+						case CONSTR_FOREIGN:
+
+							/*
+							 * We add triggers to both tables when we add a
+							 * Foreign Key, so the lock level must be at least
+							 * as strong as CREATE TRIGGER.
+							 */
+							cmd_lockmode = ShareRowExclusiveLock;
+							break;
+
+						default:
+							cmd_lockmode = AccessExclusiveLock;
+					}
+				}
+				break;
+
+				/*
+				 * These subcommands affect inheritance behaviour. Queries
+				 * started before us will continue to see the old inheritance
+				 * behaviour, while queries started after we commit will see
+				 * new behaviour. No need to prevent reads or writes to the
+				 * subtable while we hook it up though. Changing the TupDesc
+				 * may be a problem, so keep highest lock.
+				 */
+			case AT_AddInherit:
+			case AT_DropInherit:
+				cmd_lockmode = AccessExclusiveLock;
+				break;
+
+				/*
+				 * These subcommands affect implicit row type conversion. They
+				 * have affects similar to CREATE/DROP CAST on queries. don't
+				 * provide for invalidating parse trees as a result of such
+				 * changes, so we keep these at AccessExclusiveLock.
+				 */
+			case AT_AddOf:
+			case AT_DropOf:
+				cmd_lockmode = AccessExclusiveLock;
+				break;
+
+				/*
+				 * Only used by CREATE OR REPLACE VIEW which must conflict
+				 * with an SELECTs currently using the view.
+				 */
+			case AT_ReplaceRelOptions:
+				cmd_lockmode = AccessExclusiveLock;
+				break;
+
+				/*
+				 * These subcommands affect general strategies for performance
+				 * and maintenance, though don't change the semantic results
+				 * from normal data reads and writes. Delaying an ALTER TABLE
+				 * behind currently active writes only delays the point where
+				 * the new strategy begins to take effect, so there is no
+				 * benefit in waiting. In this case the minimum restriction
+				 * applies: we don't currently allow concurrent catalog
+				 * updates.
+				 */
+			case AT_SetStatistics:	/* Uses MVCC in getTableAttrs() */
+			case AT_ClusterOn:	/* Uses MVCC in getIndexes() */
+			case AT_DropCluster:	/* Uses MVCC in getIndexes() */
+			case AT_SetOptions: /* Uses MVCC in getTableAttrs() */
+			case AT_ResetOptions:	/* Uses MVCC in getTableAttrs() */
+				cmd_lockmode = ShareUpdateExclusiveLock;
+				break;
+
+			case AT_SetLogged:
+			case AT_SetUnLogged:
+				cmd_lockmode = AccessExclusiveLock;
+				break;
+
+			case AT_ValidateConstraint: /* Uses MVCC in getConstraints() */
+				cmd_lockmode = ShareUpdateExclusiveLock;
+				break;
+
+				/*
+				 * Rel options are more complex than first appears. Options
+				 * are set here for tables, views and indexes; for historical
+				 * reasons these can all be used with ALTER TABLE, so we can't
+				 * decide between them using the basic grammar.
+				 */
+			case AT_SetRelOptions:	/* Uses MVCC in getIndexes() and
+									 * getTables() */
+			case AT_ResetRelOptions:	/* Uses MVCC in getIndexes() and
+										 * getTables() */
+				cmd_lockmode = AlterTableGetRelOptionsLockLevel((List *) cmd->def);
+				break;
+
+			case AT_AttachPartition:
+				cmd_lockmode = ShareUpdateExclusiveLock;
+				break;
+
+			case AT_DetachPartition:
+				if (((PartitionCmd *) cmd->def)->concurrent)
+					cmd_lockmode = ShareUpdateExclusiveLock;
+				else
+					cmd_lockmode = AccessExclusiveLock;
+				break;
+
+			case AT_DetachPartitionFinalize:
+				cmd_lockmode = ShareUpdateExclusiveLock;
+				break;
+
+			case AT_CheckNotNull:
+
+				/*
+				 * This only examines the table's schema; but lock must be
+				 * strong enough to prevent concurrent DROP NOT NULL.
+				 */
+				cmd_lockmode = AccessShareLock;
+				break;
+
+			case AT_AlterCollationRefreshVersion:
+				cmd_lockmode = AccessExclusiveLock;
+				break;
+
+			default:			/* oops */
+				elog(ERROR, "unrecognized alter table type: %d",
+					 (int) cmd->subtype);
+				break;
+		}
+
+		/*
+		 * Take the greatest lockmode from any subcommand
+		 */
+		if (cmd_lockmode > lockmode)
+			lockmode = cmd_lockmode;
+	}
+
+	return lockmode;
+}
+
+/*
+ * ATController provides top level control over the phases.
+ *
+ * parsetree is passed in to allow it to be passed to event triggers
+ * when requested.
+ */
+static void
+ATController(AlterTableStmt *parsetree,
+			 Relation rel, List *cmds, bool recurse, LOCKMODE lockmode,
+			 AlterTableUtilityContext *context)
+{
+	List	   *wqueue = NIL;
+	ListCell   *lcmd;
+
+	/* Phase 1: preliminary examination of commands, create work queue */
+	foreach(lcmd, cmds)
+	{
+		AlterTableCmd *cmd = (AlterTableCmd *) lfirst(lcmd);
+
+		ATPrepCmd(&wqueue, rel, cmd, recurse, false, lockmode, context);
+	}
+
+	/* Close the relation, but keep lock until commit */
+	relation_close(rel, NoLock);
+
+	/* Phase 2: update system catalogs */
+	ATRewriteCatalogs(&wqueue, lockmode, context);
+
+	/* Phase 3: scan/rewrite tables as needed, and run afterStmts */
+	ATRewriteTables(parsetree, &wqueue, lockmode, context);
+}
+
+/*
+ * ATPrepCmd
+ *
+ * Traffic cop for ALTER TABLE Phase 1 operations, including simple
+ * recursion and permission checks.
+ *
+ * Caller must have acquired appropriate lock type on relation already.
+ * This lock should be held until commit.
+ */
+static void
+ATPrepCmd(List **wqueue, Relation rel, AlterTableCmd *cmd,
+		  bool recurse, bool recursing, LOCKMODE lockmode,
+		  AlterTableUtilityContext *context)
+{
+	AlteredTableInfo *tab;
+	int			pass = AT_PASS_UNSET;
+
+	/* Find or create work queue entry for this table */
+	tab = ATGetQueueEntry(wqueue, rel);
+
+	/*
+	 * Disallow any ALTER TABLE other than ALTER TABLE DETACH FINALIZE on
+	 * partitions that are pending detach.
+	 */
+	if (rel->rd_rel->relispartition &&
+		cmd->subtype != AT_DetachPartitionFinalize &&
+		PartitionHasPendingDetach(RelationGetRelid(rel)))
+		ereport(ERROR,
+				errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+				errmsg("cannot alter partition \"%s\" with an incomplete detach",
+					   RelationGetRelationName(rel)),
+				errhint("Use ALTER TABLE ... DETACH PARTITION ... FINALIZE to complete the pending detach operation."));
+
+	/*
+	 * Copy the original subcommand for each table.  This avoids conflicts
+	 * when different child tables need to make different parse
+	 * transformations (for example, the same column may have different column
+	 * numbers in different children).  It also ensures that we don't corrupt
+	 * the original parse tree, in case it is saved in plancache.
+	 */
+	cmd = copyObject(cmd);
+
+	/*
+	 * Do permissions and relkind checking, recursion to child tables if
+	 * needed, and any additional phase-1 processing needed.  (But beware of
+	 * adding any processing that looks at table details that another
+	 * subcommand could change.  In some cases we reject multiple subcommands
+	 * that could try to change the same state in contrary ways.)
+	 */
+	switch (cmd->subtype)
+	{
+		case AT_AddColumn:		/* ADD COLUMN */
+			ATSimplePermissions(rel,
+								ATT_TABLE | ATT_COMPOSITE_TYPE | ATT_FOREIGN_TABLE);
+			ATPrepAddColumn(wqueue, rel, recurse, recursing, false, cmd,
+							lockmode, context);
+			/* Recursion occurs during execution phase */
+			pass = AT_PASS_ADD_COL;
+			break;
+		case AT_AddColumnToView:	/* add column via CREATE OR REPLACE VIEW */
+			ATSimplePermissions(rel, ATT_VIEW);
+			ATPrepAddColumn(wqueue, rel, recurse, recursing, true, cmd,
+							lockmode, context);
+			/* Recursion occurs during execution phase */
+			pass = AT_PASS_ADD_COL;
+			break;
+		case AT_ColumnDefault:	/* ALTER COLUMN DEFAULT */
+
+			/*
+			 * We allow defaults on views so that INSERT into a view can have
+			 * default-ish behavior.  This works because the rewriter
+			 * substitutes default values into INSERTs before it expands
+			 * rules.
+			 */
+			ATSimplePermissions(rel, ATT_TABLE | ATT_VIEW | ATT_FOREIGN_TABLE);
+			ATSimpleRecursion(wqueue, rel, cmd, recurse, lockmode, context);
+			/* No command-specific prep needed */
+			pass = cmd->def ? AT_PASS_ADD_OTHERCONSTR : AT_PASS_DROP;
+			break;
+		case AT_CookedColumnDefault:	/* add a pre-cooked default */
+			/* This is currently used only in CREATE TABLE */
+			/* (so the permission check really isn't necessary) */
+			ATSimplePermissions(rel, ATT_TABLE | ATT_FOREIGN_TABLE);
+			/* This command never recurses */
+			pass = AT_PASS_ADD_OTHERCONSTR;
+			break;
+		case AT_AddIdentity:
+			ATSimplePermissions(rel, ATT_TABLE | ATT_VIEW | ATT_FOREIGN_TABLE);
+			/* This command never recurses */
+			pass = AT_PASS_ADD_OTHERCONSTR;
+			break;
+		case AT_SetIdentity:
+			ATSimplePermissions(rel, ATT_TABLE | ATT_VIEW | ATT_FOREIGN_TABLE);
+			/* This command never recurses */
+			/* This should run after AddIdentity, so do it in MISC pass */
+			pass = AT_PASS_MISC;
+			break;
+		case AT_DropIdentity:
+			ATSimplePermissions(rel, ATT_TABLE | ATT_VIEW | ATT_FOREIGN_TABLE);
+			/* This command never recurses */
+			pass = AT_PASS_DROP;
+			break;
+		case AT_DropNotNull:	/* ALTER COLUMN DROP NOT NULL */
+			ATSimplePermissions(rel, ATT_TABLE | ATT_FOREIGN_TABLE);
+			ATPrepDropNotNull(rel, recurse, recursing);
+			ATSimpleRecursion(wqueue, rel, cmd, recurse, lockmode, context);
+			pass = AT_PASS_DROP;
+			break;
+		case AT_SetNotNull:		/* ALTER COLUMN SET NOT NULL */
+			ATSimplePermissions(rel, ATT_TABLE | ATT_FOREIGN_TABLE);
+			/* Need command-specific recursion decision */
+			ATPrepSetNotNull(wqueue, rel, cmd, recurse, recursing,
+							 lockmode, context);
+			pass = AT_PASS_COL_ATTRS;
+			break;
+		case AT_CheckNotNull:	/* check column is already marked NOT NULL */
+			ATSimplePermissions(rel, ATT_TABLE | ATT_FOREIGN_TABLE);
+			ATSimpleRecursion(wqueue, rel, cmd, recurse, lockmode, context);
+			/* No command-specific prep needed */
+			pass = AT_PASS_COL_ATTRS;
+			break;
+		case AT_DropExpression: /* ALTER COLUMN DROP EXPRESSION */
+			ATSimplePermissions(rel, ATT_TABLE | ATT_FOREIGN_TABLE);
+			ATSimpleRecursion(wqueue, rel, cmd, recurse, lockmode, context);
+			ATPrepDropExpression(rel, cmd, recurse, recursing, lockmode);
+			pass = AT_PASS_DROP;
+			break;
+		case AT_SetStatistics:	/* ALTER COLUMN SET STATISTICS */
+			ATSimplePermissions(rel, ATT_TABLE | ATT_MATVIEW | ATT_INDEX | ATT_PARTITIONED_INDEX | ATT_FOREIGN_TABLE);
+			ATSimpleRecursion(wqueue, rel, cmd, recurse, lockmode, context);
+			/* No command-specific prep needed */
+			pass = AT_PASS_MISC;
+			break;
+		case AT_SetOptions:		/* ALTER COLUMN SET ( options ) */
+		case AT_ResetOptions:	/* ALTER COLUMN RESET ( options ) */
+			ATSimplePermissions(rel, ATT_TABLE | ATT_MATVIEW | ATT_INDEX | ATT_FOREIGN_TABLE);
+			/* This command never recurses */
+			pass = AT_PASS_MISC;
+			break;
+		case AT_AlterCollationRefreshVersion:	/* ALTER COLLATION ... REFRESH
+												 * VERSION */
+			ATSimplePermissions(rel, ATT_INDEX);
+			/* This command never recurses */
+			pass = AT_PASS_MISC;
+			break;
+		case AT_SetStorage:		/* ALTER COLUMN SET STORAGE */
+			ATSimplePermissions(rel, ATT_TABLE | ATT_MATVIEW | ATT_FOREIGN_TABLE);
+			ATSimpleRecursion(wqueue, rel, cmd, recurse, lockmode, context);
+			/* No command-specific prep needed */
+			pass = AT_PASS_MISC;
+			break;
+		case AT_SetCompression:	/* ALTER COLUMN SET COMPRESSION */
+			ATSimplePermissions(rel, ATT_TABLE | ATT_MATVIEW);
+			/* This command never recurses */
+			/* No command-specific prep needed */
+			pass = AT_PASS_MISC;
+			break;
+		case AT_DropColumn:		/* DROP COLUMN */
+			ATSimplePermissions(rel,
+								ATT_TABLE | ATT_COMPOSITE_TYPE | ATT_FOREIGN_TABLE);
+			ATPrepDropColumn(wqueue, rel, recurse, recursing, cmd,
+							 lockmode, context);
+			/* Recursion occurs during execution phase */
+			pass = AT_PASS_DROP;
+			break;
+		case AT_AddIndex:		/* ADD INDEX */
+			ATSimplePermissions(rel, ATT_TABLE);
+			/* This command never recurses */
+			/* No command-specific prep needed */
+			pass = AT_PASS_ADD_INDEX;
+			break;
+		case AT_AddConstraint:	/* ADD CONSTRAINT */
+			ATSimplePermissions(rel, ATT_TABLE | ATT_FOREIGN_TABLE);
+			/* Recursion occurs during execution phase */
+			/* No command-specific prep needed except saving recurse flag */
+			if (recurse)
+				cmd->subtype = AT_AddConstraintRecurse;
+			pass = AT_PASS_ADD_CONSTR;
+			break;
+		case AT_AddIndexConstraint: /* ADD CONSTRAINT USING INDEX */
+			ATSimplePermissions(rel, ATT_TABLE);
+			/* This command never recurses */
+			/* No command-specific prep needed */
+			pass = AT_PASS_ADD_INDEXCONSTR;
+			break;
+		case AT_DropConstraint: /* DROP CONSTRAINT */
+			ATSimplePermissions(rel, ATT_TABLE | ATT_FOREIGN_TABLE);
+			ATCheckPartitionsNotInUse(rel, lockmode);
+			/* Other recursion occurs during execution phase */
+			/* No command-specific prep needed except saving recurse flag */
+			if (recurse)
+				cmd->subtype = AT_DropConstraintRecurse;
+			pass = AT_PASS_DROP;
+			break;
+		case AT_AlterColumnType:	/* ALTER COLUMN TYPE */
+			ATSimplePermissions(rel,
+								ATT_TABLE | ATT_COMPOSITE_TYPE | ATT_FOREIGN_TABLE);
+			/* See comments for ATPrepAlterColumnType */
+			cmd = ATParseTransformCmd(wqueue, tab, rel, cmd, recurse, lockmode,
+									  AT_PASS_UNSET, context);
+			Assert(cmd != NULL);
+			/* Performs own recursion */
+			ATPrepAlterColumnType(wqueue, tab, rel, recurse, recursing, cmd,
+								  lockmode, context);
+			pass = AT_PASS_ALTER_TYPE;
+			break;
+		case AT_AlterColumnGenericOptions:
+			ATSimplePermissions(rel, ATT_FOREIGN_TABLE);
+			/* This command never recurses */
+			/* No command-specific prep needed */
+			pass = AT_PASS_MISC;
+			break;
+		case AT_ChangeOwner:	/* ALTER OWNER */
+			/* This command never recurses */
+			/* No command-specific prep needed */
+			pass = AT_PASS_MISC;
+			break;
+		case AT_ClusterOn:		/* CLUSTER ON */
+		case AT_DropCluster:	/* SET WITHOUT CLUSTER */
+			ATSimplePermissions(rel, ATT_TABLE | ATT_MATVIEW);
+			/* These commands never recurse */
+			/* No command-specific prep needed */
+			pass = AT_PASS_MISC;
+			break;
+		case AT_SetLogged:		/* SET LOGGED */
+			ATSimplePermissions(rel, ATT_TABLE);
+			if (tab->chgPersistence)
+				ereport(ERROR,
+						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+						 errmsg("cannot change persistence setting twice")));
+			tab->chgPersistence = ATPrepChangePersistence(rel, true);
+			/* force rewrite if necessary; see comment in ATRewriteTables */
+			if (tab->chgPersistence)
+			{
+				tab->rewrite |= AT_REWRITE_ALTER_PERSISTENCE;
+				tab->newrelpersistence = RELPERSISTENCE_PERMANENT;
+			}
+			pass = AT_PASS_MISC;
+			break;
+		case AT_SetUnLogged:	/* SET UNLOGGED */
+			ATSimplePermissions(rel, ATT_TABLE);
+			if (tab->chgPersistence)
+				ereport(ERROR,
+						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+						 errmsg("cannot change persistence setting twice")));
+			tab->chgPersistence = ATPrepChangePersistence(rel, false);
+			/* force rewrite if necessary; see comment in ATRewriteTables */
+			if (tab->chgPersistence)
+			{
+				tab->rewrite |= AT_REWRITE_ALTER_PERSISTENCE;
+				tab->newrelpersistence = RELPERSISTENCE_UNLOGGED;
+			}
+			pass = AT_PASS_MISC;
+			break;
+		case AT_DropOids:		/* SET WITHOUT OIDS */
+			ATSimplePermissions(rel, ATT_TABLE | ATT_FOREIGN_TABLE);
+			pass = AT_PASS_DROP;
+			break;
+		case AT_SetTableSpace:	/* SET TABLESPACE */
+			ATSimplePermissions(rel, ATT_TABLE | ATT_MATVIEW | ATT_INDEX |
+								ATT_PARTITIONED_INDEX);
+			/* This command never recurses */
+			ATPrepSetTableSpace(tab, rel, cmd->name, lockmode);
+			pass = AT_PASS_MISC;	/* doesn't actually matter */
+			break;
+		case AT_SetRelOptions:	/* SET (...) */
+		case AT_ResetRelOptions:	/* RESET (...) */
+		case AT_ReplaceRelOptions:	/* reset them all, then set just these */
+			ATSimplePermissions(rel, ATT_TABLE | ATT_VIEW | ATT_MATVIEW | ATT_INDEX);
+			/* This command never recurses */
+			/* No command-specific prep needed */
+			pass = AT_PASS_MISC;
+			break;
+		case AT_AddInherit:		/* INHERIT */
+			ATSimplePermissions(rel, ATT_TABLE | ATT_FOREIGN_TABLE);
+			/* This command never recurses */
+			ATPrepAddInherit(rel);
+			pass = AT_PASS_MISC;
+			break;
+		case AT_DropInherit:	/* NO INHERIT */
+			ATSimplePermissions(rel, ATT_TABLE | ATT_FOREIGN_TABLE);
+			/* This command never recurses */
+			/* No command-specific prep needed */
+			pass = AT_PASS_MISC;
+			break;
+		case AT_AlterConstraint:	/* ALTER CONSTRAINT */
+			ATSimplePermissions(rel, ATT_TABLE);
+			pass = AT_PASS_MISC;
+			break;
+		case AT_ValidateConstraint: /* VALIDATE CONSTRAINT */
+			ATSimplePermissions(rel, ATT_TABLE | ATT_FOREIGN_TABLE);
+			/* Recursion occurs during execution phase */
+			/* No command-specific prep needed except saving recurse flag */
+			if (recurse)
+				cmd->subtype = AT_ValidateConstraintRecurse;
+			pass = AT_PASS_MISC;
+			break;
+		case AT_ReplicaIdentity:	/* REPLICA IDENTITY ... */
+			ATSimplePermissions(rel, ATT_TABLE | ATT_MATVIEW);
+			pass = AT_PASS_MISC;
+			/* This command never recurses */
+			/* No command-specific prep needed */
+			break;
+		case AT_EnableTrig:		/* ENABLE TRIGGER variants */
+		case AT_EnableAlwaysTrig:
+		case AT_EnableReplicaTrig:
+		case AT_EnableTrigAll:
+		case AT_EnableTrigUser:
+		case AT_DisableTrig:	/* DISABLE TRIGGER variants */
+		case AT_DisableTrigAll:
+		case AT_DisableTrigUser:
+			ATSimplePermissions(rel, ATT_TABLE | ATT_FOREIGN_TABLE);
+			if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+				ATSimpleRecursion(wqueue, rel, cmd, recurse, lockmode, context);
+			pass = AT_PASS_MISC;
+			break;
+		case AT_EnableRule:		/* ENABLE/DISABLE RULE variants */
+		case AT_EnableAlwaysRule:
+		case AT_EnableReplicaRule:
+		case AT_DisableRule:
+		case AT_AddOf:			/* OF */
+		case AT_DropOf:			/* NOT OF */
+		case AT_EnableRowSecurity:
+		case AT_DisableRowSecurity:
+		case AT_ForceRowSecurity:
+		case AT_NoForceRowSecurity:
+			ATSimplePermissions(rel, ATT_TABLE);
+			/* These commands never recurse */
+			/* No command-specific prep needed */
+			pass = AT_PASS_MISC;
+			break;
+		case AT_GenericOptions:
+			ATSimplePermissions(rel, ATT_FOREIGN_TABLE);
+			/* No command-specific prep needed */
+			pass = AT_PASS_MISC;
+			break;
+		case AT_AttachPartition:
+			ATSimplePermissions(rel, ATT_TABLE | ATT_PARTITIONED_INDEX);
+			/* No command-specific prep needed */
+			pass = AT_PASS_MISC;
+			break;
+		case AT_DetachPartition:
+			ATSimplePermissions(rel, ATT_TABLE);
+			/* No command-specific prep needed */
+			pass = AT_PASS_MISC;
+			break;
+		case AT_DetachPartitionFinalize:
+			ATSimplePermissions(rel, ATT_TABLE);
+			/* No command-specific prep needed */
+			pass = AT_PASS_MISC;
+			break;
+		default:				/* oops */
+			elog(ERROR, "unrecognized alter table type: %d",
+				 (int) cmd->subtype);
+			pass = AT_PASS_UNSET;	/* keep compiler quiet */
+			break;
+	}
+	Assert(pass > AT_PASS_UNSET);
+
+	/* Add the subcommand to the appropriate list for phase 2 */
+	tab->subcmds[pass] = lappend(tab->subcmds[pass], cmd);
+}
+
+/*
+ * ATRewriteCatalogs
+ *
+ * Traffic cop for ALTER TABLE Phase 2 operations.  Subcommands are
+ * dispatched in a "safe" execution order (designed to avoid unnecessary
+ * conflicts).
+ */
+static void
+ATRewriteCatalogs(List **wqueue, LOCKMODE lockmode,
+				  AlterTableUtilityContext *context)
+{
+	int			pass;
+	ListCell   *ltab;
+
+	/*
+	 * We process all the tables "in parallel", one pass at a time.  This is
+	 * needed because we may have to propagate work from one table to another
+	 * (specifically, ALTER TYPE on a foreign key's PK has to dispatch the
+	 * re-adding of the foreign key constraint to the other table).  Work can
+	 * only be propagated into later passes, however.
+	 */
+	for (pass = 0; pass < AT_NUM_PASSES; pass++)
+	{
+		/* Go through each table that needs to be processed */
+		foreach(ltab, *wqueue)
+		{
+			AlteredTableInfo *tab = (AlteredTableInfo *) lfirst(ltab);
+			List	   *subcmds = tab->subcmds[pass];
+			ListCell   *lcmd;
+
+			if (subcmds == NIL)
+				continue;
+
+			/*
+			 * Open the relation and store it in tab.  This allows subroutines
+			 * close and reopen, if necessary.  Appropriate lock was obtained
+			 * by phase 1, needn't get it again.
+			 */
+			tab->rel = relation_open(tab->relid, NoLock);
+
+			foreach(lcmd, subcmds)
+				ATExecCmd(wqueue, tab,
+						  castNode(AlterTableCmd, lfirst(lcmd)),
+						  lockmode, pass, context);
+
+			/*
+			 * After the ALTER TYPE pass, do cleanup work (this is not done in
+			 * ATExecAlterColumnType since it should be done only once if
+			 * multiple columns of a table are altered).
+			 */
+			if (pass == AT_PASS_ALTER_TYPE)
+				ATPostAlterTypeCleanup(wqueue, tab, lockmode);
+
+			if (tab->rel)
+			{
+				relation_close(tab->rel, NoLock);
+				tab->rel = NULL;
+			}
+		}
+	}
+
+	/* Check to see if a toast table must be added. */
+	foreach(ltab, *wqueue)
+	{
+		AlteredTableInfo *tab = (AlteredTableInfo *) lfirst(ltab);
+
+		/*
+		 * If the table is source table of ATTACH PARTITION command, we did
+		 * not modify anything about it that will change its toasting
+		 * requirement, so no need to check.
+		 */
+		if (((tab->relkind == RELKIND_RELATION ||
+			  tab->relkind == RELKIND_PARTITIONED_TABLE) &&
+			 tab->partition_constraint == NULL) ||
+			tab->relkind == RELKIND_MATVIEW)
+			AlterTableCreateToastTable(tab->relid, (Datum) 0, lockmode);
+	}
+}
+
+/*
+ * ATExecCmd: dispatch a subcommand to appropriate execution routine
+ */
+static void
+ATExecCmd(List **wqueue, AlteredTableInfo *tab,
+		  AlterTableCmd *cmd, LOCKMODE lockmode, int cur_pass,
+		  AlterTableUtilityContext *context)
+{
+	ObjectAddress address = InvalidObjectAddress;
+	Relation	rel = tab->rel;
+
+	switch (cmd->subtype)
+	{
+		case AT_AddColumn:		/* ADD COLUMN */
+		case AT_AddColumnToView:	/* add column via CREATE OR REPLACE VIEW */
+			address = ATExecAddColumn(wqueue, tab, rel, &cmd,
+									  false, false,
+									  lockmode, cur_pass, context);
+			break;
+		case AT_AddColumnRecurse:
+			address = ATExecAddColumn(wqueue, tab, rel, &cmd,
+									  true, false,
+									  lockmode, cur_pass, context);
+			break;
+		case AT_ColumnDefault:	/* ALTER COLUMN DEFAULT */
+			address = ATExecColumnDefault(rel, cmd->name, cmd->def, lockmode);
+			break;
+		case AT_CookedColumnDefault:	/* add a pre-cooked default */
+			address = ATExecCookedColumnDefault(rel, cmd->num, cmd->def);
+			break;
+		case AT_AddIdentity:
+			cmd = ATParseTransformCmd(wqueue, tab, rel, cmd, false, lockmode,
+									  cur_pass, context);
+			Assert(cmd != NULL);
+			address = ATExecAddIdentity(rel, cmd->name, cmd->def, lockmode);
+			break;
+		case AT_SetIdentity:
+			cmd = ATParseTransformCmd(wqueue, tab, rel, cmd, false, lockmode,
+									  cur_pass, context);
+			Assert(cmd != NULL);
+			address = ATExecSetIdentity(rel, cmd->name, cmd->def, lockmode);
+			break;
+		case AT_DropIdentity:
+			address = ATExecDropIdentity(rel, cmd->name, cmd->missing_ok, lockmode);
+			break;
+		case AT_DropNotNull:	/* ALTER COLUMN DROP NOT NULL */
+			address = ATExecDropNotNull(rel, cmd->name, lockmode);
+			break;
+		case AT_SetNotNull:		/* ALTER COLUMN SET NOT NULL */
+			address = ATExecSetNotNull(tab, rel, cmd->name, lockmode);
+			break;
+		case AT_CheckNotNull:	/* check column is already marked NOT NULL */
+			ATExecCheckNotNull(tab, rel, cmd->name, lockmode);
+			break;
+		case AT_DropExpression:
+			address = ATExecDropExpression(rel, cmd->name, cmd->missing_ok, lockmode);
+			break;
+		case AT_SetStatistics:	/* ALTER COLUMN SET STATISTICS */
+			address = ATExecSetStatistics(rel, cmd->name, cmd->num, cmd->def, lockmode);
+			break;
+		case AT_SetOptions:		/* ALTER COLUMN SET ( options ) */
+			address = ATExecSetOptions(rel, cmd->name, cmd->def, false, lockmode);
+			break;
+		case AT_ResetOptions:	/* ALTER COLUMN RESET ( options ) */
+			address = ATExecSetOptions(rel, cmd->name, cmd->def, true, lockmode);
+			break;
+		case AT_SetStorage:		/* ALTER COLUMN SET STORAGE */
+			address = ATExecSetStorage(rel, cmd->name, cmd->def, lockmode);
+			break;
+		case AT_SetCompression:
+			address = ATExecSetCompression(tab, rel, cmd->name, cmd->def,
+										   lockmode);
+			break;
+		case AT_DropColumn:		/* DROP COLUMN */
+			address = ATExecDropColumn(wqueue, rel, cmd->name,
+									   cmd->behavior, false, false,
+									   cmd->missing_ok, lockmode,
+									   NULL);
+			break;
+		case AT_DropColumnRecurse:	/* DROP COLUMN with recursion */
+			address = ATExecDropColumn(wqueue, rel, cmd->name,
+									   cmd->behavior, true, false,
+									   cmd->missing_ok, lockmode,
+									   NULL);
+			break;
+		case AT_AddIndex:		/* ADD INDEX */
+			address = ATExecAddIndex(tab, rel, (IndexStmt *) cmd->def, false,
+									 lockmode);
+			break;
+		case AT_ReAddIndex:		/* ADD INDEX */
+			address = ATExecAddIndex(tab, rel, (IndexStmt *) cmd->def, true,
+									 lockmode);
+			break;
+		case AT_ReAddStatistics:	/* ADD STATISTICS */
+			address = ATExecAddStatistics(tab, rel, (CreateStatsStmt *) cmd->def,
+										  true, lockmode);
+			break;
+		case AT_AddConstraint:	/* ADD CONSTRAINT */
+			/* Transform the command only during initial examination */
+			if (cur_pass == AT_PASS_ADD_CONSTR)
+				cmd = ATParseTransformCmd(wqueue, tab, rel, cmd,
+										  false, lockmode,
+										  cur_pass, context);
+			/* Depending on constraint type, might be no more work to do now */
+			if (cmd != NULL)
+				address =
+					ATExecAddConstraint(wqueue, tab, rel,
+										(Constraint *) cmd->def,
+										false, false, lockmode);
+			break;
+		case AT_AddConstraintRecurse:	/* ADD CONSTRAINT with recursion */
+			/* Transform the command only during initial examination */
+			if (cur_pass == AT_PASS_ADD_CONSTR)
+				cmd = ATParseTransformCmd(wqueue, tab, rel, cmd,
+										  true, lockmode,
+										  cur_pass, context);
+			/* Depending on constraint type, might be no more work to do now */
+			if (cmd != NULL)
+				address =
+					ATExecAddConstraint(wqueue, tab, rel,
+										(Constraint *) cmd->def,
+										true, false, lockmode);
+			break;
+		case AT_ReAddConstraint:	/* Re-add pre-existing check constraint */
+			address =
+				ATExecAddConstraint(wqueue, tab, rel, (Constraint *) cmd->def,
+									true, true, lockmode);
+			break;
+		case AT_ReAddDomainConstraint:	/* Re-add pre-existing domain check
+										 * constraint */
+			address =
+				AlterDomainAddConstraint(((AlterDomainStmt *) cmd->def)->typeName,
+										 ((AlterDomainStmt *) cmd->def)->def,
+										 NULL);
+			break;
+		case AT_ReAddComment:	/* Re-add existing comment */
+			address = CommentObject((CommentStmt *) cmd->def);
+			break;
+		case AT_AddIndexConstraint: /* ADD CONSTRAINT USING INDEX */
+			address = ATExecAddIndexConstraint(tab, rel, (IndexStmt *) cmd->def,
+											   lockmode);
+			break;
+		case AT_AlterConstraint:	/* ALTER CONSTRAINT */
+			address = ATExecAlterConstraint(rel, cmd, false, false, lockmode);
+			break;
+		case AT_ValidateConstraint: /* VALIDATE CONSTRAINT */
+			address = ATExecValidateConstraint(wqueue, rel, cmd->name, false,
+											   false, lockmode);
+			break;
+		case AT_ValidateConstraintRecurse:	/* VALIDATE CONSTRAINT with
+											 * recursion */
+			address = ATExecValidateConstraint(wqueue, rel, cmd->name, true,
+											   false, lockmode);
+			break;
+		case AT_DropConstraint: /* DROP CONSTRAINT */
+			ATExecDropConstraint(rel, cmd->name, cmd->behavior,
+								 false, false,
+								 cmd->missing_ok, lockmode);
+			break;
+		case AT_DropConstraintRecurse:	/* DROP CONSTRAINT with recursion */
+			ATExecDropConstraint(rel, cmd->name, cmd->behavior,
+								 true, false,
+								 cmd->missing_ok, lockmode);
+			break;
+		case AT_AlterColumnType:	/* ALTER COLUMN TYPE */
+			/* parse transformation was done earlier */
+			address = ATExecAlterColumnType(tab, rel, cmd, lockmode);
+			break;
+		case AT_AlterColumnGenericOptions:	/* ALTER COLUMN OPTIONS */
+			address =
+				ATExecAlterColumnGenericOptions(rel, cmd->name,
+												(List *) cmd->def, lockmode);
+			break;
+		case AT_ChangeOwner:	/* ALTER OWNER */
+			ATExecChangeOwner(RelationGetRelid(rel),
+							  get_rolespec_oid(cmd->newowner, false),
+							  false, lockmode);
+			break;
+		case AT_ClusterOn:		/* CLUSTER ON */
+			address = ATExecClusterOn(rel, cmd->name, lockmode);
+			break;
+		case AT_DropCluster:	/* SET WITHOUT CLUSTER */
+			ATExecDropCluster(rel, lockmode);
+			break;
+		case AT_SetLogged:		/* SET LOGGED */
+		case AT_SetUnLogged:	/* SET UNLOGGED */
+			break;
+		case AT_DropOids:		/* SET WITHOUT OIDS */
+			/* nothing to do here, oid columns don't exist anymore */
+			break;
+		case AT_SetTableSpace:	/* SET TABLESPACE */
+
+			/*
+			 * Only do this for partitioned tables and indexes, for which this
+			 * is just a catalog change.  Other relation types which have
+			 * storage are handled by Phase 3.
+			 */
+			if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE ||
+				rel->rd_rel->relkind == RELKIND_PARTITIONED_INDEX)
+				ATExecSetTableSpaceNoStorage(rel, tab->newTableSpace);
+
+			break;
+		case AT_SetRelOptions:	/* SET (...) */
+		case AT_ResetRelOptions:	/* RESET (...) */
+		case AT_ReplaceRelOptions:	/* replace entire option list */
+			ATExecSetRelOptions(rel, (List *) cmd->def, cmd->subtype, lockmode);
+			break;
+		case AT_EnableTrig:		/* ENABLE TRIGGER name */
+			ATExecEnableDisableTrigger(rel, cmd->name,
+									   TRIGGER_FIRES_ON_ORIGIN, false, lockmode);
+			break;
+		case AT_EnableAlwaysTrig:	/* ENABLE ALWAYS TRIGGER name */
+			ATExecEnableDisableTrigger(rel, cmd->name,
+									   TRIGGER_FIRES_ALWAYS, false, lockmode);
+			break;
+		case AT_EnableReplicaTrig:	/* ENABLE REPLICA TRIGGER name */
+			ATExecEnableDisableTrigger(rel, cmd->name,
+									   TRIGGER_FIRES_ON_REPLICA, false, lockmode);
+			break;
+		case AT_DisableTrig:	/* DISABLE TRIGGER name */
+			ATExecEnableDisableTrigger(rel, cmd->name,
+									   TRIGGER_DISABLED, false, lockmode);
+			break;
+		case AT_EnableTrigAll:	/* ENABLE TRIGGER ALL */
+			ATExecEnableDisableTrigger(rel, NULL,
+									   TRIGGER_FIRES_ON_ORIGIN, false, lockmode);
+			break;
+		case AT_DisableTrigAll: /* DISABLE TRIGGER ALL */
+			ATExecEnableDisableTrigger(rel, NULL,
+									   TRIGGER_DISABLED, false, lockmode);
+			break;
+		case AT_EnableTrigUser: /* ENABLE TRIGGER USER */
+			ATExecEnableDisableTrigger(rel, NULL,
+									   TRIGGER_FIRES_ON_ORIGIN, true, lockmode);
+			break;
+		case AT_DisableTrigUser:	/* DISABLE TRIGGER USER */
+			ATExecEnableDisableTrigger(rel, NULL,
+									   TRIGGER_DISABLED, true, lockmode);
+			break;
+
+		case AT_EnableRule:		/* ENABLE RULE name */
+			ATExecEnableDisableRule(rel, cmd->name,
+									RULE_FIRES_ON_ORIGIN, lockmode);
+			break;
+		case AT_EnableAlwaysRule:	/* ENABLE ALWAYS RULE name */
+			ATExecEnableDisableRule(rel, cmd->name,
+									RULE_FIRES_ALWAYS, lockmode);
+			break;
+		case AT_EnableReplicaRule:	/* ENABLE REPLICA RULE name */
+			ATExecEnableDisableRule(rel, cmd->name,
+									RULE_FIRES_ON_REPLICA, lockmode);
+			break;
+		case AT_DisableRule:	/* DISABLE RULE name */
+			ATExecEnableDisableRule(rel, cmd->name,
+									RULE_DISABLED, lockmode);
+			break;
+
+		case AT_AddInherit:
+			address = ATExecAddInherit(rel, (RangeVar *) cmd->def, lockmode);
+			break;
+		case AT_DropInherit:
+			address = ATExecDropInherit(rel, (RangeVar *) cmd->def, lockmode);
+			break;
+		case AT_AddOf:
+			address = ATExecAddOf(rel, (TypeName *) cmd->def, lockmode);
+			break;
+		case AT_DropOf:
+			ATExecDropOf(rel, lockmode);
+			break;
+		case AT_ReplicaIdentity:
+			ATExecReplicaIdentity(rel, (ReplicaIdentityStmt *) cmd->def, lockmode);
+			break;
+		case AT_EnableRowSecurity:
+			ATExecSetRowSecurity(rel, true);
+			break;
+		case AT_DisableRowSecurity:
+			ATExecSetRowSecurity(rel, false);
+			break;
+		case AT_ForceRowSecurity:
+			ATExecForceNoForceRowSecurity(rel, true);
+			break;
+		case AT_NoForceRowSecurity:
+			ATExecForceNoForceRowSecurity(rel, false);
+			break;
+		case AT_GenericOptions:
+			ATExecGenericOptions(rel, (List *) cmd->def);
+			break;
+		case AT_AttachPartition:
+			cmd = ATParseTransformCmd(wqueue, tab, rel, cmd, false, lockmode,
+									  cur_pass, context);
+			Assert(cmd != NULL);
+			if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+				ATExecAttachPartition(wqueue, rel, (PartitionCmd *) cmd->def,
+									  context);
+			else
+				ATExecAttachPartitionIdx(wqueue, rel,
+										 ((PartitionCmd *) cmd->def)->name);
+			break;
+		case AT_DetachPartition:
+			cmd = ATParseTransformCmd(wqueue, tab, rel, cmd, false, lockmode,
+									  cur_pass, context);
+			Assert(cmd != NULL);
+			/* ATPrepCmd ensures it must be a table */
+			Assert(rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE);
+			ATExecDetachPartition(wqueue, tab, rel,
+								  ((PartitionCmd *) cmd->def)->name,
+								  ((PartitionCmd *) cmd->def)->concurrent);
+			break;
+		case AT_DetachPartitionFinalize:
+			ATExecDetachPartitionFinalize(rel, ((PartitionCmd *) cmd->def)->name);
+			break;
+		case AT_AlterCollationRefreshVersion:
+			/* ATPrepCmd ensured it must be an index */
+			Assert(rel->rd_rel->relkind == RELKIND_INDEX);
+			ATExecAlterCollationRefreshVersion(rel, cmd->object);
+			break;
+		default:				/* oops */
+			elog(ERROR, "unrecognized alter table type: %d",
+				 (int) cmd->subtype);
+			break;
+	}
+
+	/*
+	 * Report the subcommand to interested event triggers.
+	 */
+	if (cmd)
+		EventTriggerCollectAlterTableSubcmd((Node *) cmd, address);
+
+	/*
+	 * Bump the command counter to ensure the next subcommand in the sequence
+	 * can see the changes so far
+	 */
+	CommandCounterIncrement();
+}
+
+/*
+ * ATParseTransformCmd: perform parse transformation for one subcommand
+ *
+ * Returns the transformed subcommand tree, if there is one, else NULL.
+ *
+ * The parser may hand back additional AlterTableCmd(s) and/or other
+ * utility statements, either before or after the original subcommand.
+ * Other AlterTableCmds are scheduled into the appropriate slot of the
+ * AlteredTableInfo (they had better be for later passes than the current one).
+ * Utility statements that are supposed to happen before the AlterTableCmd
+ * are executed immediately.  Those that are supposed to happen afterwards
+ * are added to the tab->afterStmts list to be done at the very end.
+ */
+static AlterTableCmd *
+ATParseTransformCmd(List **wqueue, AlteredTableInfo *tab, Relation rel,
+					AlterTableCmd *cmd, bool recurse, LOCKMODE lockmode,
+					int cur_pass, AlterTableUtilityContext *context)
+{
+	AlterTableCmd *newcmd = NULL;
+	AlterTableStmt *atstmt = makeNode(AlterTableStmt);
+	List	   *beforeStmts;
+	List	   *afterStmts;
+	ListCell   *lc;
+
+	/* Gin up an AlterTableStmt with just this subcommand and this table */
+	atstmt->relation =
+		makeRangeVar(get_namespace_name(RelationGetNamespace(rel)),
+					 pstrdup(RelationGetRelationName(rel)),
+					 -1);
+	atstmt->relation->inh = recurse;
+	atstmt->cmds = list_make1(cmd);
+	atstmt->objtype = OBJECT_TABLE; /* needn't be picky here */
+	atstmt->missing_ok = false;
+
+	/* Transform the AlterTableStmt */
+	atstmt = transformAlterTableStmt(RelationGetRelid(rel),
+									 atstmt,
+									 context->queryString,
+									 &beforeStmts,
+									 &afterStmts);
+
+	/* Execute any statements that should happen before these subcommand(s) */
+	foreach(lc, beforeStmts)
+	{
+		Node	   *stmt = (Node *) lfirst(lc);
+
+		ProcessUtilityForAlterTable(stmt, context);
+		CommandCounterIncrement();
+	}
+
+	/* Examine the transformed subcommands and schedule them appropriately */
+	foreach(lc, atstmt->cmds)
+	{
+		AlterTableCmd *cmd2 = lfirst_node(AlterTableCmd, lc);
+		int			pass;
+
+		/*
+		 * This switch need only cover the subcommand types that can be added
+		 * by parse_utilcmd.c; otherwise, we'll use the default strategy of
+		 * executing the subcommand immediately, as a substitute for the
+		 * original subcommand.  (Note, however, that this does cause
+		 * AT_AddConstraint subcommands to be rescheduled into later passes,
+		 * which is important for index and foreign key constraints.)
+		 *
+		 * We assume we needn't do any phase-1 checks for added subcommands.
+		 */
+		switch (cmd2->subtype)
+		{
+			case AT_SetNotNull:
+				/* Need command-specific recursion decision */
+				ATPrepSetNotNull(wqueue, rel, cmd2,
+								 recurse, false,
+								 lockmode, context);
+				pass = AT_PASS_COL_ATTRS;
+				break;
+			case AT_AddIndex:
+				/* This command never recurses */
+				/* No command-specific prep needed */
+				pass = AT_PASS_ADD_INDEX;
+				break;
+			case AT_AddIndexConstraint:
+				/* This command never recurses */
+				/* No command-specific prep needed */
+				pass = AT_PASS_ADD_INDEXCONSTR;
+				break;
+			case AT_AddConstraint:
+				/* Recursion occurs during execution phase */
+				if (recurse)
+					cmd2->subtype = AT_AddConstraintRecurse;
+				switch (castNode(Constraint, cmd2->def)->contype)
+				{
+					case CONSTR_PRIMARY:
+					case CONSTR_UNIQUE:
+					case CONSTR_EXCLUSION:
+						pass = AT_PASS_ADD_INDEXCONSTR;
+						break;
+					default:
+						pass = AT_PASS_ADD_OTHERCONSTR;
+						break;
+				}
+				break;
+			case AT_AlterColumnGenericOptions:
+				/* This command never recurses */
+				/* No command-specific prep needed */
+				pass = AT_PASS_MISC;
+				break;
+			default:
+				pass = cur_pass;
+				break;
+		}
+
+		if (pass < cur_pass)
+		{
+			/* Cannot schedule into a pass we already finished */
+			elog(ERROR, "ALTER TABLE scheduling failure: too late for pass %d",
+				 pass);
+		}
+		else if (pass > cur_pass)
+		{
+			/* OK, queue it up for later */
+			tab->subcmds[pass] = lappend(tab->subcmds[pass], cmd2);
+		}
+		else
+		{
+			/*
+			 * We should see at most one subcommand for the current pass,
+			 * which is the transformed version of the original subcommand.
+			 */
+			if (newcmd == NULL && cmd->subtype == cmd2->subtype)
+			{
+				/* Found the transformed version of our subcommand */
+				newcmd = cmd2;
+			}
+			else
+				elog(ERROR, "ALTER TABLE scheduling failure: bogus item for pass %d",
+					 pass);
+		}
+	}
+
+	/* Queue up any after-statements to happen at the end */
+	tab->afterStmts = list_concat(tab->afterStmts, afterStmts);
+
+	return newcmd;
+}
+
+/*
+ * ATRewriteTables: ALTER TABLE phase 3
+ */
+static void
+ATRewriteTables(AlterTableStmt *parsetree, List **wqueue, LOCKMODE lockmode,
+				AlterTableUtilityContext *context)
+{
+	ListCell   *ltab;
+
+	/* Go through each table that needs to be checked or rewritten */
+	foreach(ltab, *wqueue)
+	{
+		AlteredTableInfo *tab = (AlteredTableInfo *) lfirst(ltab);
+
+		/* Relations without storage may be ignored here */
+		if (!RELKIND_HAS_STORAGE(tab->relkind))
+			continue;
+
+		/*
+		 * If we change column data types, the operation has to be propagated
+		 * to tables that use this table's rowtype as a column type.
+		 * tab->newvals will also be non-NULL in the case where we're adding a
+		 * column with a default.  We choose to forbid that case as well,
+		 * since composite types might eventually support defaults.
+		 *
+		 * (Eventually we'll probably need to check for composite type
+		 * dependencies even when we're just scanning the table without a
+		 * rewrite, but at the moment a composite type does not enforce any
+		 * constraints, so it's not necessary/appropriate to enforce them just
+		 * during ALTER.)
+		 */
+		if (tab->newvals != NIL || tab->rewrite > 0)
+		{
+			Relation	rel;
+
+			rel = table_open(tab->relid, NoLock);
+			find_composite_type_dependencies(rel->rd_rel->reltype, rel, NULL);
+			table_close(rel, NoLock);
+		}
+
+		/*
+		 * We only need to rewrite the table if at least one column needs to
+		 * be recomputed, or we are changing its persistence.
+		 *
+		 * There are two reasons for requiring a rewrite when changing
+		 * persistence: on one hand, we need to ensure that the buffers
+		 * belonging to each of the two relations are marked with or without
+		 * BM_PERMANENT properly.  On the other hand, since rewriting creates
+		 * and assigns a new relfilenode, we automatically create or drop an
+		 * init fork for the relation as appropriate.
+		 */
+		if (tab->rewrite > 0)
+		{
+			/* Build a temporary relation and copy data */
+			Relation	OldHeap;
+			Oid			OIDNewHeap;
+			Oid			NewTableSpace;
+			char		persistence;
+
+			OldHeap = table_open(tab->relid, NoLock);
+
+			/*
+			 * We don't support rewriting of system catalogs; there are too
+			 * many corner cases and too little benefit.  In particular this
+			 * is certainly not going to work for mapped catalogs.
+			 */
+			if (IsSystemRelation(OldHeap))
+				ereport(ERROR,
+						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+						 errmsg("cannot rewrite system relation \"%s\"",
+								RelationGetRelationName(OldHeap))));
+
+			if (RelationIsUsedAsCatalogTable(OldHeap))
+				ereport(ERROR,
+						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+						 errmsg("cannot rewrite table \"%s\" used as a catalog table",
+								RelationGetRelationName(OldHeap))));
+
+			/*
+			 * Don't allow rewrite on temp tables of other backends ... their
+			 * local buffer manager is not going to cope.
+			 */
+			if (RELATION_IS_OTHER_TEMP(OldHeap))
+				ereport(ERROR,
+						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+						 errmsg("cannot rewrite temporary tables of other sessions")));
+
+			/*
+			 * Select destination tablespace (same as original unless user
+			 * requested a change)
+			 */
+			if (tab->newTableSpace)
+				NewTableSpace = tab->newTableSpace;
+			else
+				NewTableSpace = OldHeap->rd_rel->reltablespace;
+
+			/*
+			 * Select persistence of transient table (same as original unless
+			 * user requested a change)
+			 */
+			persistence = tab->chgPersistence ?
+				tab->newrelpersistence : OldHeap->rd_rel->relpersistence;
+
+			table_close(OldHeap, NoLock);
+
+			/*
+			 * Fire off an Event Trigger now, before actually rewriting the
+			 * table.
+			 *
+			 * We don't support Event Trigger for nested commands anywhere,
+			 * here included, and parsetree is given NULL when coming from
+			 * AlterTableInternal.
+			 *
+			 * And fire it only once.
+			 */
+			if (parsetree)
+				EventTriggerTableRewrite((Node *) parsetree,
+										 tab->relid,
+										 tab->rewrite);
+
+			/*
+			 * Create transient table that will receive the modified data.
+			 *
+			 * Ensure it is marked correctly as logged or unlogged.  We have
+			 * to do this here so that buffers for the new relfilenode will
+			 * have the right persistence set, and at the same time ensure
+			 * that the original filenode's buffers will get read in with the
+			 * correct setting (i.e. the original one).  Otherwise a rollback
+			 * after the rewrite would possibly result with buffers for the
+			 * original filenode having the wrong persistence setting.
+			 *
+			 * NB: This relies on swap_relation_files() also swapping the
+			 * persistence. That wouldn't work for pg_class, but that can't be
+			 * unlogged anyway.
+			 */
+			OIDNewHeap = make_new_heap(tab->relid, NewTableSpace, persistence,
+									   lockmode);
+
+			/*
+			 * Copy the heap data into the new table with the desired
+			 * modifications, and test the current data within the table
+			 * against new constraints generated by ALTER TABLE commands.
+			 */
+			ATRewriteTable(tab, OIDNewHeap, lockmode);
+
+			/*
+			 * Swap the physical files of the old and new heaps, then rebuild
+			 * indexes and discard the old heap.  We can use RecentXmin for
+			 * the table's new relfrozenxid because we rewrote all the tuples
+			 * in ATRewriteTable, so no older Xid remains in the table.  Also,
+			 * we never try to swap toast tables by content, since we have no
+			 * interest in letting this code work on system catalogs.
+			 */
+			finish_heap_swap(tab->relid, OIDNewHeap,
+							 false, false, true,
+							 !OidIsValid(tab->newTableSpace),
+							 RecentXmin,
+							 ReadNextMultiXactId(),
+							 persistence);
+		}
+		else
+		{
+			/*
+			 * If required, test the current data within the table against new
+			 * constraints generated by ALTER TABLE commands, but don't
+			 * rebuild data.
+			 */
+			if (tab->constraints != NIL || tab->verify_new_notnull ||
+				tab->partition_constraint != NULL)
+				ATRewriteTable(tab, InvalidOid, lockmode);
+
+			/*
+			 * If we had SET TABLESPACE but no reason to reconstruct tuples,
+			 * just do a block-by-block copy.
+			 */
+			if (tab->newTableSpace)
+				ATExecSetTableSpace(tab->relid, tab->newTableSpace, lockmode);
+		}
+	}
+
+	/*
+	 * Foreign key constraints are checked in a final pass, since (a) it's
+	 * generally best to examine each one separately, and (b) it's at least
+	 * theoretically possible that we have changed both relations of the
+	 * foreign key, and we'd better have finished both rewrites before we try
+	 * to read the tables.
+	 */
+	foreach(ltab, *wqueue)
+	{
+		AlteredTableInfo *tab = (AlteredTableInfo *) lfirst(ltab);
+		Relation	rel = NULL;
+		ListCell   *lcon;
+
+		/* Relations without storage may be ignored here too */
+		if (!RELKIND_HAS_STORAGE(tab->relkind))
+			continue;
+
+		foreach(lcon, tab->constraints)
+		{
+			NewConstraint *con = lfirst(lcon);
+
+			if (con->contype == CONSTR_FOREIGN)
+			{
+				Constraint *fkconstraint = (Constraint *) con->qual;
+				Relation	refrel;
+
+				if (rel == NULL)
+				{
+					/* Long since locked, no need for another */
+					rel = table_open(tab->relid, NoLock);
+				}
+
+				refrel = table_open(con->refrelid, RowShareLock);
+
+				validateForeignKeyConstraint(fkconstraint->conname, rel, refrel,
+											 con->refindid,
+											 con->conid);
+
+				/*
+				 * No need to mark the constraint row as validated, we did
+				 * that when we inserted the row earlier.
+				 */
+
+				table_close(refrel, NoLock);
+			}
+		}
+
+		if (rel)
+			table_close(rel, NoLock);
+	}
+
+	/* Finally, run any afterStmts that were queued up */
+	foreach(ltab, *wqueue)
+	{
+		AlteredTableInfo *tab = (AlteredTableInfo *) lfirst(ltab);
+		ListCell   *lc;
+
+		foreach(lc, tab->afterStmts)
+		{
+			Node	   *stmt = (Node *) lfirst(lc);
+
+			ProcessUtilityForAlterTable(stmt, context);
+			CommandCounterIncrement();
+		}
+	}
+}
+
+/*
+ * ATRewriteTable: scan or rewrite one table
+ *
+ * OIDNewHeap is InvalidOid if we don't need to rewrite
+ */
+static void
+ATRewriteTable(AlteredTableInfo *tab, Oid OIDNewHeap, LOCKMODE lockmode)
+{
+	Relation	oldrel;
+	Relation	newrel;
+	TupleDesc	oldTupDesc;
+	TupleDesc	newTupDesc;
+	bool		needscan = false;
+	List	   *notnull_attrs;
+	int			i;
+	ListCell   *l;
+	EState	   *estate;
+	CommandId	mycid;
+	BulkInsertState bistate;
+	int			ti_options;
+	ExprState  *partqualstate = NULL;
+
+	/*
+	 * Open the relation(s).  We have surely already locked the existing
+	 * table.
+	 */
+	oldrel = table_open(tab->relid, NoLock);
+	oldTupDesc = tab->oldDesc;
+	newTupDesc = RelationGetDescr(oldrel);	/* includes all mods */
+
+	if (OidIsValid(OIDNewHeap))
+		newrel = table_open(OIDNewHeap, lockmode);
+	else
+		newrel = NULL;
+
+	/*
+	 * Prepare a BulkInsertState and options for table_tuple_insert.  The FSM
+	 * is empty, so don't bother using it.
+	 */
+	if (newrel)
+	{
+		mycid = GetCurrentCommandId(true);
+		bistate = GetBulkInsertState();
+		ti_options = TABLE_INSERT_SKIP_FSM;
+	}
+	else
+	{
+		/* keep compiler quiet about using these uninitialized */
+		mycid = 0;
+		bistate = NULL;
+		ti_options = 0;
+	}
+
+	/*
+	 * Generate the constraint and default execution states
+	 */
+
+	estate = CreateExecutorState();
+
+	/* Build the needed expression execution states */
+	foreach(l, tab->constraints)
+	{
+		NewConstraint *con = lfirst(l);
+
+		switch (con->contype)
+		{
+			case CONSTR_CHECK:
+				needscan = true;
+				con->qualstate = ExecPrepareExpr((Expr *) con->qual, estate);
+				break;
+			case CONSTR_FOREIGN:
+				/* Nothing to do here */
+				break;
+			default:
+				elog(ERROR, "unrecognized constraint type: %d",
+					 (int) con->contype);
+		}
+	}
+
+	/* Build expression execution states for partition check quals */
+	if (tab->partition_constraint)
+	{
+		needscan = true;
+		partqualstate = ExecPrepareExpr(tab->partition_constraint, estate);
+	}
+
+	foreach(l, tab->newvals)
+	{
+		NewColumnValue *ex = lfirst(l);
+
+		/* expr already planned */
+		ex->exprstate = ExecInitExpr((Expr *) ex->expr, NULL);
+	}
+
+	notnull_attrs = NIL;
+	if (newrel || tab->verify_new_notnull)
+	{
+		/*
+		 * If we are rebuilding the tuples OR if we added any new but not
+		 * verified NOT NULL constraints, check all not-null constraints. This
+		 * is a bit of overkill but it minimizes risk of bugs, and
+		 * heap_attisnull is a pretty cheap test anyway.
+		 */
+		for (i = 0; i < newTupDesc->natts; i++)
+		{
+			Form_pg_attribute attr = TupleDescAttr(newTupDesc, i);
+
+			if (attr->attnotnull && !attr->attisdropped)
+				notnull_attrs = lappend_int(notnull_attrs, i);
+		}
+		if (notnull_attrs)
+			needscan = true;
+	}
+
+	if (newrel || needscan)
+	{
+		ExprContext *econtext;
+		TupleTableSlot *oldslot;
+		TupleTableSlot *newslot;
+		TableScanDesc scan;
+		MemoryContext oldCxt;
+		List	   *dropped_attrs = NIL;
+		ListCell   *lc;
+		Snapshot	snapshot;
+
+		if (newrel)
+			ereport(DEBUG1,
+					(errmsg_internal("rewriting table \"%s\"",
+							RelationGetRelationName(oldrel))));
+		else
+			ereport(DEBUG1,
+					(errmsg_internal("verifying table \"%s\"",
+							RelationGetRelationName(oldrel))));
+
+		if (newrel)
+		{
+			/*
+			 * All predicate locks on the tuples or pages are about to be made
+			 * invalid, because we move tuples around.  Promote them to
+			 * relation locks.
+			 */
+			TransferPredicateLocksToHeapRelation(oldrel);
+		}
+
+		econtext = GetPerTupleExprContext(estate);
+
+		/*
+		 * Create necessary tuple slots. When rewriting, two slots are needed,
+		 * otherwise one suffices. In the case where one slot suffices, we
+		 * need to use the new tuple descriptor, otherwise some constraints
+		 * can't be evaluated.  Note that even when the tuple layout is the
+		 * same and no rewrite is required, the tupDescs might not be
+		 * (consider ADD COLUMN without a default).
+		 */
+		if (tab->rewrite)
+		{
+			Assert(newrel != NULL);
+			oldslot = MakeSingleTupleTableSlot(oldTupDesc,
+											   table_slot_callbacks(oldrel));
+			newslot = MakeSingleTupleTableSlot(newTupDesc,
+											   table_slot_callbacks(newrel));
+
+			/*
+			 * Set all columns in the new slot to NULL initially, to ensure
+			 * columns added as part of the rewrite are initialized to NULL.
+			 * That is necessary as tab->newvals will not contain an
+			 * expression for columns with a NULL default, e.g. when adding a
+			 * column without a default together with a column with a default
+			 * requiring an actual rewrite.
+			 */
+			ExecStoreAllNullTuple(newslot);
+		}
+		else
+		{
+			oldslot = MakeSingleTupleTableSlot(newTupDesc,
+											   table_slot_callbacks(oldrel));
+			newslot = NULL;
+		}
+
+		/*
+		 * Any attributes that are dropped according to the new tuple
+		 * descriptor can be set to NULL. We precompute the list of dropped
+		 * attributes to avoid needing to do so in the per-tuple loop.
+		 */
+		for (i = 0; i < newTupDesc->natts; i++)
+		{
+			if (TupleDescAttr(newTupDesc, i)->attisdropped)
+				dropped_attrs = lappend_int(dropped_attrs, i);
+		}
+
+		/*
+		 * Scan through the rows, generating a new row if needed and then
+		 * checking all the constraints.
+		 */
+		snapshot = RegisterSnapshot(GetLatestSnapshot());
+		scan = table_beginscan(oldrel, snapshot, 0, NULL);
+
+		/*
+		 * Switch to per-tuple memory context and reset it for each tuple
+		 * produced, so we don't leak memory.
+		 */
+		oldCxt = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
+
+		while (table_scan_getnextslot(scan, ForwardScanDirection, oldslot))
+		{
+			TupleTableSlot *insertslot;
+
+			if (tab->rewrite > 0)
+			{
+				/* Extract data from old tuple */
+				slot_getallattrs(oldslot);
+				ExecClearTuple(newslot);
+
+				/* copy attributes */
+				memcpy(newslot->tts_values, oldslot->tts_values,
+					   sizeof(Datum) * oldslot->tts_nvalid);
+				memcpy(newslot->tts_isnull, oldslot->tts_isnull,
+					   sizeof(bool) * oldslot->tts_nvalid);
+
+				/* Set dropped attributes to null in new tuple */
+				foreach(lc, dropped_attrs)
+					newslot->tts_isnull[lfirst_int(lc)] = true;
+
+				/*
+				 * Process supplied expressions to replace selected columns.
+				 *
+				 * First, evaluate expressions whose inputs come from the old
+				 * tuple.
+				 */
+				econtext->ecxt_scantuple = oldslot;
+
+				foreach(l, tab->newvals)
+				{
+					NewColumnValue *ex = lfirst(l);
+
+					if (ex->is_generated)
+						continue;
+
+					newslot->tts_values[ex->attnum - 1]
+						= ExecEvalExpr(ex->exprstate,
+									   econtext,
+									   &newslot->tts_isnull[ex->attnum - 1]);
+				}
+
+				ExecStoreVirtualTuple(newslot);
+
+				/*
+				 * Now, evaluate any expressions whose inputs come from the
+				 * new tuple.  We assume these columns won't reference each
+				 * other, so that there's no ordering dependency.
+				 */
+				econtext->ecxt_scantuple = newslot;
+
+				foreach(l, tab->newvals)
+				{
+					NewColumnValue *ex = lfirst(l);
+
+					if (!ex->is_generated)
+						continue;
+
+					newslot->tts_values[ex->attnum - 1]
+						= ExecEvalExpr(ex->exprstate,
+									   econtext,
+									   &newslot->tts_isnull[ex->attnum - 1]);
+				}
+
+				/*
+				 * Constraints might reference the tableoid column, so
+				 * initialize t_tableOid before evaluating them.
+				 */
+				newslot->tts_tableOid = RelationGetRelid(oldrel);
+				insertslot = newslot;
+			}
+			else
+			{
+				/*
+				 * If there's no rewrite, old and new table are guaranteed to
+				 * have the same AM, so we can just use the old slot to verify
+				 * new constraints etc.
+				 */
+				insertslot = oldslot;
+			}
+
+			/* Now check any constraints on the possibly-changed tuple */
+			econtext->ecxt_scantuple = insertslot;
+
+			foreach(l, notnull_attrs)
+			{
+				int			attn = lfirst_int(l);
+
+				if (slot_attisnull(insertslot, attn + 1))
+				{
+					Form_pg_attribute attr = TupleDescAttr(newTupDesc, attn);
+
+					ereport(ERROR,
+							(errcode(ERRCODE_NOT_NULL_VIOLATION),
+							 errmsg("column \"%s\" of relation \"%s\" contains null values",
+									NameStr(attr->attname),
+									RelationGetRelationName(oldrel)),
+							 errtablecol(oldrel, attn + 1)));
+				}
+			}
+
+			foreach(l, tab->constraints)
+			{
+				NewConstraint *con = lfirst(l);
+
+				switch (con->contype)
+				{
+					case CONSTR_CHECK:
+						if (!ExecCheck(con->qualstate, econtext))
+							ereport(ERROR,
+									(errcode(ERRCODE_CHECK_VIOLATION),
+									 errmsg("check constraint \"%s\" of relation \"%s\" is violated by some row",
+											con->name,
+											RelationGetRelationName(oldrel)),
+									 errtableconstraint(oldrel, con->name)));
+						break;
+					case CONSTR_FOREIGN:
+						/* Nothing to do here */
+						break;
+					default:
+						elog(ERROR, "unrecognized constraint type: %d",
+							 (int) con->contype);
+				}
+			}
+
+			if (partqualstate && !ExecCheck(partqualstate, econtext))
+			{
+				if (tab->validate_default)
+					ereport(ERROR,
+							(errcode(ERRCODE_CHECK_VIOLATION),
+							 errmsg("updated partition constraint for default partition \"%s\" would be violated by some row",
+									RelationGetRelationName(oldrel)),
+							 errtable(oldrel)));
+				else
+					ereport(ERROR,
+							(errcode(ERRCODE_CHECK_VIOLATION),
+							 errmsg("partition constraint of relation \"%s\" is violated by some row",
+									RelationGetRelationName(oldrel)),
+							 errtable(oldrel)));
+			}
+
+			/* Write the tuple out to the new relation */
+			if (newrel)
+				table_tuple_insert(newrel, insertslot, mycid,
+								   ti_options, bistate);
+
+			ResetExprContext(econtext);
+
+			CHECK_FOR_INTERRUPTS();
+		}
+
+		MemoryContextSwitchTo(oldCxt);
+		table_endscan(scan);
+		UnregisterSnapshot(snapshot);
+
+		ExecDropSingleTupleTableSlot(oldslot);
+		if (newslot)
+			ExecDropSingleTupleTableSlot(newslot);
+	}
+
+	FreeExecutorState(estate);
+
+	table_close(oldrel, NoLock);
+	if (newrel)
+	{
+		FreeBulkInsertState(bistate);
+
+		table_finish_bulk_insert(newrel, ti_options);
+
+		table_close(newrel, NoLock);
+	}
+}
+
+/*
+ * ATGetQueueEntry: find or create an entry in the ALTER TABLE work queue
+ */
+static AlteredTableInfo *
+ATGetQueueEntry(List **wqueue, Relation rel)
+{
+	Oid			relid = RelationGetRelid(rel);
+	AlteredTableInfo *tab;
+	ListCell   *ltab;
+
+	foreach(ltab, *wqueue)
+	{
+		tab = (AlteredTableInfo *) lfirst(ltab);
+		if (tab->relid == relid)
+			return tab;
+	}
+
+	/*
+	 * Not there, so add it.  Note that we make a copy of the relation's
+	 * existing descriptor before anything interesting can happen to it.
+	 */
+	tab = (AlteredTableInfo *) palloc0(sizeof(AlteredTableInfo));
+	tab->relid = relid;
+	tab->rel = NULL;			/* set later */
+	tab->relkind = rel->rd_rel->relkind;
+	tab->oldDesc = CreateTupleDescCopyConstr(RelationGetDescr(rel));
+	tab->newrelpersistence = RELPERSISTENCE_PERMANENT;
+	tab->chgPersistence = false;
+
+	*wqueue = lappend(*wqueue, tab);
+
+	return tab;
+}
+
+/*
+ * ATSimplePermissions
+ *
+ * - Ensure that it is a relation (or possibly a view)
+ * - Ensure this user is the owner
+ * - Ensure that it is not a system table
+ */
+static void
+ATSimplePermissions(Relation rel, int allowed_targets)
+{
+	int			actual_target;
+
+	switch (rel->rd_rel->relkind)
+	{
+		case RELKIND_RELATION:
+		case RELKIND_PARTITIONED_TABLE:
+			actual_target = ATT_TABLE;
+			break;
+		case RELKIND_VIEW:
+			actual_target = ATT_VIEW;
+			break;
+		case RELKIND_MATVIEW:
+			actual_target = ATT_MATVIEW;
+			break;
+		case RELKIND_INDEX:
+			actual_target = ATT_INDEX;
+			break;
+		case RELKIND_PARTITIONED_INDEX:
+			actual_target = ATT_PARTITIONED_INDEX;
+			break;
+		case RELKIND_COMPOSITE_TYPE:
+			actual_target = ATT_COMPOSITE_TYPE;
+			break;
+		case RELKIND_FOREIGN_TABLE:
+			actual_target = ATT_FOREIGN_TABLE;
+			break;
+		default:
+			actual_target = 0;
+			break;
+	}
+
+	/* Wrong target type? */
+	if ((actual_target & allowed_targets) == 0)
+		ATWrongRelkindError(rel, allowed_targets);
+
+	/* Permissions checks */
+	if (!pg_class_ownercheck(RelationGetRelid(rel), GetUserId()))
+		aclcheck_error(ACLCHECK_NOT_OWNER, get_relkind_objtype(rel->rd_rel->relkind),
+					   RelationGetRelationName(rel));
+
+	if (!allowSystemTableMods && IsSystemRelation(rel))
+		ereport(ERROR,
+				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+				 errmsg("permission denied: \"%s\" is a system catalog",
+						RelationGetRelationName(rel))));
+}
+
+/*
+ * ATWrongRelkindError
+ *
+ * Throw an error when a relation has been determined to be of the wrong
+ * type.
+ */
+static void
+ATWrongRelkindError(Relation rel, int allowed_targets)
+{
+	char	   *msg;
+
+	switch (allowed_targets)
+	{
+		case ATT_TABLE:
+			msg = _("\"%s\" is not a table");
+			break;
+		case ATT_TABLE | ATT_VIEW:
+			msg = _("\"%s\" is not a table or view");
+			break;
+		case ATT_TABLE | ATT_VIEW | ATT_FOREIGN_TABLE:
+			msg = _("\"%s\" is not a table, view, or foreign table");
+			break;
+		case ATT_TABLE | ATT_VIEW | ATT_MATVIEW | ATT_INDEX:
+			msg = _("\"%s\" is not a table, view, materialized view, or index");
+			break;
+		case ATT_TABLE | ATT_MATVIEW:
+			msg = _("\"%s\" is not a table or materialized view");
+			break;
+		case ATT_TABLE | ATT_MATVIEW | ATT_INDEX:
+			msg = _("\"%s\" is not a table, materialized view, or index");
+			break;
+		case ATT_TABLE | ATT_MATVIEW | ATT_FOREIGN_TABLE:
+			msg = _("\"%s\" is not a table, materialized view, or foreign table");
+			break;
+		case ATT_TABLE | ATT_FOREIGN_TABLE:
+			msg = _("\"%s\" is not a table or foreign table");
+			break;
+		case ATT_TABLE | ATT_COMPOSITE_TYPE | ATT_FOREIGN_TABLE:
+			msg = _("\"%s\" is not a table, composite type, or foreign table");
+			break;
+		case ATT_TABLE | ATT_MATVIEW | ATT_INDEX | ATT_FOREIGN_TABLE:
+			msg = _("\"%s\" is not a table, materialized view, index, or foreign table");
+			break;
+		case ATT_VIEW:
+			msg = _("\"%s\" is not a view");
+			break;
+		case ATT_FOREIGN_TABLE:
+			msg = _("\"%s\" is not a foreign table");
+			break;
+		default:
+			/* shouldn't get here, add all necessary cases above */
+			msg = _("\"%s\" is of the wrong type");
+			break;
+	}
+
+	ereport(ERROR,
+			(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+			 errmsg(msg, RelationGetRelationName(rel))));
+}
+
+/*
+ * ATSimpleRecursion
+ *
+ * Simple table recursion sufficient for most ALTER TABLE operations.
+ * All direct and indirect children are processed in an unspecified order.
+ * Note that if a child inherits from the original table via multiple
+ * inheritance paths, it will be visited just once.
+ */
+static void
+ATSimpleRecursion(List **wqueue, Relation rel,
+				  AlterTableCmd *cmd, bool recurse, LOCKMODE lockmode,
+				  AlterTableUtilityContext *context)
+{
+	/*
+	 * Propagate to children, if desired and if there are (or might be) any
+	 * children.
+	 */
+	if (recurse && rel->rd_rel->relhassubclass)
+	{
+		Oid			relid = RelationGetRelid(rel);
+		ListCell   *child;
+		List	   *children;
+
+		children = find_all_inheritors(relid, lockmode, NULL);
+
+		/*
+		 * find_all_inheritors does the recursive search of the inheritance
+		 * hierarchy, so all we have to do is process all of the relids in the
+		 * list that it returns.
+		 */
+		foreach(child, children)
+		{
+			Oid			childrelid = lfirst_oid(child);
+			Relation	childrel;
+
+			if (childrelid == relid)
+				continue;
+			/* find_all_inheritors already got lock */
+			childrel = relation_open(childrelid, NoLock);
+			CheckTableNotInUse(childrel, "ALTER TABLE");
+			ATPrepCmd(wqueue, childrel, cmd, false, true, lockmode, context);
+			relation_close(childrel, NoLock);
+		}
+	}
+}
+
+/*
+ * Obtain list of partitions of the given table, locking them all at the given
+ * lockmode and ensuring that they all pass CheckTableNotInUse.
+ *
+ * This function is a no-op if the given relation is not a partitioned table;
+ * in particular, nothing is done if it's a legacy inheritance parent.
+ */
+static void
+ATCheckPartitionsNotInUse(Relation rel, LOCKMODE lockmode)
+{
+	if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+	{
+		List	   *inh;
+		ListCell   *cell;
+
+		inh = find_all_inheritors(RelationGetRelid(rel), lockmode, NULL);
+		/* first element is the parent rel; must ignore it */
+		for_each_from(cell, inh, 1)
+		{
+			Relation	childrel;
+
+			/* find_all_inheritors already got lock */
+			childrel = table_open(lfirst_oid(cell), NoLock);
+			CheckTableNotInUse(childrel, "ALTER TABLE");
+			table_close(childrel, NoLock);
+		}
+		list_free(inh);
+	}
+}
+
+/*
+ * ATTypedTableRecursion
+ *
+ * Propagate ALTER TYPE operations to the typed tables of that type.
+ * Also check the RESTRICT/CASCADE behavior.  Given CASCADE, also permit
+ * recursion to inheritance children of the typed tables.
+ */
+static void
+ATTypedTableRecursion(List **wqueue, Relation rel, AlterTableCmd *cmd,
+					  LOCKMODE lockmode, AlterTableUtilityContext *context)
+{
+	ListCell   *child;
+	List	   *children;
+
+	Assert(rel->rd_rel->relkind == RELKIND_COMPOSITE_TYPE);
+
+	children = find_typed_table_dependencies(rel->rd_rel->reltype,
+											 RelationGetRelationName(rel),
+											 cmd->behavior);
+
+	foreach(child, children)
+	{
+		Oid			childrelid = lfirst_oid(child);
+		Relation	childrel;
+
+		childrel = relation_open(childrelid, lockmode);
+		CheckTableNotInUse(childrel, "ALTER TABLE");
+		ATPrepCmd(wqueue, childrel, cmd, true, true, lockmode, context);
+		relation_close(childrel, NoLock);
+	}
+}
+
+
+/*
+ * find_composite_type_dependencies
+ *
+ * Check to see if the type "typeOid" is being used as a column in some table
+ * (possibly nested several levels deep in composite types, arrays, etc!).
+ * Eventually, we'd like to propagate the check or rewrite operation
+ * into such tables, but for now, just error out if we find any.
+ *
+ * Caller should provide either the associated relation of a rowtype,
+ * or a type name (not both) for use in the error message, if any.
+ *
+ * Note that "typeOid" is not necessarily a composite type; it could also be
+ * another container type such as an array or range, or a domain over one of
+ * these things.  The name of this function is therefore somewhat historical,
+ * but it's not worth changing.
+ *
+ * We assume that functions and views depending on the type are not reasons
+ * to reject the ALTER.  (How safe is this really?)
+ */
+void
+find_composite_type_dependencies(Oid typeOid, Relation origRelation,
+								 const char *origTypeName)
+{
+	Relation	depRel;
+	ScanKeyData key[2];
+	SysScanDesc depScan;
+	HeapTuple	depTup;
+
+	/* since this function recurses, it could be driven to stack overflow */
+	check_stack_depth();
+
+	/*
+	 * We scan pg_depend to find those things that depend on the given type.
+	 * (We assume we can ignore refobjsubid for a type.)
+	 */
+	depRel = table_open(DependRelationId, AccessShareLock);
+
+	ScanKeyInit(&key[0],
+				Anum_pg_depend_refclassid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(TypeRelationId));
+	ScanKeyInit(&key[1],
+				Anum_pg_depend_refobjid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(typeOid));
+
+	depScan = systable_beginscan(depRel, DependReferenceIndexId, true,
+								 NULL, 2, key);
+
+	while (HeapTupleIsValid(depTup = systable_getnext(depScan)))
+	{
+		Form_pg_depend pg_depend = (Form_pg_depend) GETSTRUCT(depTup);
+		Relation	rel;
+		Form_pg_attribute att;
+
+		/* Check for directly dependent types */
+		if (pg_depend->classid == TypeRelationId)
+		{
+			/*
+			 * This must be an array, domain, or range containing the given
+			 * type, so recursively check for uses of this type.  Note that
+			 * any error message will mention the original type not the
+			 * container; this is intentional.
+			 */
+			find_composite_type_dependencies(pg_depend->objid,
+											 origRelation, origTypeName);
+			continue;
+		}
+
+		/* Else, ignore dependees that aren't user columns of relations */
+		/* (we assume system columns are never of interesting types) */
+		if (pg_depend->classid != RelationRelationId ||
+			pg_depend->objsubid <= 0)
+			continue;
+
+		rel = relation_open(pg_depend->objid, AccessShareLock);
+		att = TupleDescAttr(rel->rd_att, pg_depend->objsubid - 1);
+
+		if (rel->rd_rel->relkind == RELKIND_RELATION ||
+			rel->rd_rel->relkind == RELKIND_MATVIEW ||
+			rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+		{
+			if (origTypeName)
+				ereport(ERROR,
+						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+						 errmsg("cannot alter type \"%s\" because column \"%s.%s\" uses it",
+								origTypeName,
+								RelationGetRelationName(rel),
+								NameStr(att->attname))));
+			else if (origRelation->rd_rel->relkind == RELKIND_COMPOSITE_TYPE)
+				ereport(ERROR,
+						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+						 errmsg("cannot alter type \"%s\" because column \"%s.%s\" uses it",
+								RelationGetRelationName(origRelation),
+								RelationGetRelationName(rel),
+								NameStr(att->attname))));
+			else if (origRelation->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
+				ereport(ERROR,
+						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+						 errmsg("cannot alter foreign table \"%s\" because column \"%s.%s\" uses its row type",
+								RelationGetRelationName(origRelation),
+								RelationGetRelationName(rel),
+								NameStr(att->attname))));
+			else
+				ereport(ERROR,
+						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+						 errmsg("cannot alter table \"%s\" because column \"%s.%s\" uses its row type",
+								RelationGetRelationName(origRelation),
+								RelationGetRelationName(rel),
+								NameStr(att->attname))));
+		}
+		else if (OidIsValid(rel->rd_rel->reltype))
+		{
+			/*
+			 * A view or composite type itself isn't a problem, but we must
+			 * recursively check for indirect dependencies via its rowtype.
+			 */
+			find_composite_type_dependencies(rel->rd_rel->reltype,
+											 origRelation, origTypeName);
+		}
+
+		relation_close(rel, AccessShareLock);
+	}
+
+	systable_endscan(depScan);
+
+	relation_close(depRel, AccessShareLock);
+}
+
+
+/*
+ * find_typed_table_dependencies
+ *
+ * Check to see if a composite type is being used as the type of a
+ * typed table.  Abort if any are found and behavior is RESTRICT.
+ * Else return the list of tables.
+ */
+static List *
+find_typed_table_dependencies(Oid typeOid, const char *typeName, DropBehavior behavior)
+{
+	Relation	classRel;
+	ScanKeyData key[1];
+	TableScanDesc scan;
+	HeapTuple	tuple;
+	List	   *result = NIL;
+
+	classRel = table_open(RelationRelationId, AccessShareLock);
+
+	ScanKeyInit(&key[0],
+				Anum_pg_class_reloftype,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(typeOid));
+
+	scan = table_beginscan_catalog(classRel, 1, key);
+
+	while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
+	{
+		Form_pg_class classform = (Form_pg_class) GETSTRUCT(tuple);
+
+		if (behavior == DROP_RESTRICT)
+			ereport(ERROR,
+					(errcode(ERRCODE_DEPENDENT_OBJECTS_STILL_EXIST),
+					 errmsg("cannot alter type \"%s\" because it is the type of a typed table",
+							typeName),
+					 errhint("Use ALTER ... CASCADE to alter the typed tables too.")));
+		else
+			result = lappend_oid(result, classform->oid);
+	}
+
+	table_endscan(scan);
+	table_close(classRel, AccessShareLock);
+
+	return result;
+}
+
+
+/*
+ * check_of_type
+ *
+ * Check whether a type is suitable for CREATE TABLE OF/ALTER TABLE OF.  If it
+ * isn't suitable, throw an error.  Currently, we require that the type
+ * originated with CREATE TYPE AS.  We could support any row type, but doing so
+ * would require handling a number of extra corner cases in the DDL commands.
+ * (Also, allowing domain-over-composite would open up a can of worms about
+ * whether and how the domain's constraints should apply to derived tables.)
+ */
+void
+check_of_type(HeapTuple typetuple)
+{
+	Form_pg_type typ = (Form_pg_type) GETSTRUCT(typetuple);
+	bool		typeOk = false;
+
+	if (typ->typtype == TYPTYPE_COMPOSITE)
+	{
+		Relation	typeRelation;
+
+		Assert(OidIsValid(typ->typrelid));
+		typeRelation = relation_open(typ->typrelid, AccessShareLock);
+		typeOk = (typeRelation->rd_rel->relkind == RELKIND_COMPOSITE_TYPE);
+
+		/*
+		 * Close the parent rel, but keep our AccessShareLock on it until xact
+		 * commit.  That will prevent someone else from deleting or ALTERing
+		 * the type before the typed table creation/conversion commits.
+		 */
+		relation_close(typeRelation, NoLock);
+	}
+	if (!typeOk)
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("type %s is not a composite type",
+						format_type_be(typ->oid))));
+}
+
+
+/*
+ * ALTER TABLE ADD COLUMN
+ *
+ * Adds an additional attribute to a relation making the assumption that
+ * CHECK, NOT NULL, and FOREIGN KEY constraints will be removed from the
+ * AT_AddColumn AlterTableCmd by parse_utilcmd.c and added as independent
+ * AlterTableCmd's.
+ *
+ * ADD COLUMN cannot use the normal ALTER TABLE recursion mechanism, because we
+ * have to decide at runtime whether to recurse or not depending on whether we
+ * actually add a column or merely merge with an existing column.  (We can't
+ * check this in a static pre-pass because it won't handle multiple inheritance
+ * situations correctly.)
+ */
+static void
+ATPrepAddColumn(List **wqueue, Relation rel, bool recurse, bool recursing,
+				bool is_view, AlterTableCmd *cmd, LOCKMODE lockmode,
+				AlterTableUtilityContext *context)
+{
+	if (rel->rd_rel->reloftype && !recursing)
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("cannot add column to typed table")));
+
+	if (rel->rd_rel->relkind == RELKIND_COMPOSITE_TYPE)
+		ATTypedTableRecursion(wqueue, rel, cmd, lockmode, context);
+
+	if (recurse && !is_view)
+		cmd->subtype = AT_AddColumnRecurse;
+}
+
+/*
+ * Add a column to a table.  The return value is the address of the
+ * new column in the parent relation.
+ *
+ * cmd is pass-by-ref so that we can replace it with the parse-transformed
+ * copy (but that happens only after we check for IF NOT EXISTS).
+ */
+static ObjectAddress
+ATExecAddColumn(List **wqueue, AlteredTableInfo *tab, Relation rel,
+				AlterTableCmd **cmd,
+				bool recurse, bool recursing,
+				LOCKMODE lockmode, int cur_pass,
+				AlterTableUtilityContext *context)
+{
+	Oid			myrelid = RelationGetRelid(rel);
+	ColumnDef  *colDef = castNode(ColumnDef, (*cmd)->def);
+	bool		if_not_exists = (*cmd)->missing_ok;
+	Relation	pgclass,
+				attrdesc;
+	HeapTuple	reltup;
+	FormData_pg_attribute attribute;
+	int			newattnum;
+	char		relkind;
+	HeapTuple	typeTuple;
+	Oid			typeOid;
+	int32		typmod;
+	Oid			collOid;
+	Form_pg_type tform;
+	Expr	   *defval;
+	List	   *children;
+	ListCell   *child;
+	AlterTableCmd *childcmd;
+	AclResult	aclresult;
+	ObjectAddress address;
+	TupleDesc	tupdesc;
+	FormData_pg_attribute *aattr[] = {&attribute};
+
+	/* At top level, permission check was done in ATPrepCmd, else do it */
+	if (recursing)
+		ATSimplePermissions(rel, ATT_TABLE | ATT_FOREIGN_TABLE);
+
+	if (rel->rd_rel->relispartition && !recursing)
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("cannot add column to a partition")));
+
+	attrdesc = table_open(AttributeRelationId, RowExclusiveLock);
+
+	/*
+	 * Are we adding the column to a recursion child?  If so, check whether to
+	 * merge with an existing definition for the column.  If we do merge, we
+	 * must not recurse.  Children will already have the column, and recursing
+	 * into them would mess up attinhcount.
+	 */
+	if (colDef->inhcount > 0)
+	{
+		HeapTuple	tuple;
+
+		/* Does child already have a column by this name? */
+		tuple = SearchSysCacheCopyAttName(myrelid, colDef->colname);
+		if (HeapTupleIsValid(tuple))
+		{
+			Form_pg_attribute childatt = (Form_pg_attribute) GETSTRUCT(tuple);
+			Oid			ctypeId;
+			int32		ctypmod;
+			Oid			ccollid;
+
+			/* Child column must match on type, typmod, and collation */
+			typenameTypeIdAndMod(NULL, colDef->typeName, &ctypeId, &ctypmod);
+			if (ctypeId != childatt->atttypid ||
+				ctypmod != childatt->atttypmod)
+				ereport(ERROR,
+						(errcode(ERRCODE_DATATYPE_MISMATCH),
+						 errmsg("child table \"%s\" has different type for column \"%s\"",
+								RelationGetRelationName(rel), colDef->colname)));
+			ccollid = GetColumnDefCollation(NULL, colDef, ctypeId);
+			if (ccollid != childatt->attcollation)
+				ereport(ERROR,
+						(errcode(ERRCODE_COLLATION_MISMATCH),
+						 errmsg("child table \"%s\" has different collation for column \"%s\"",
+								RelationGetRelationName(rel), colDef->colname),
+						 errdetail("\"%s\" versus \"%s\"",
+								   get_collation_name(ccollid),
+								   get_collation_name(childatt->attcollation))));
+
+			/* Bump the existing child att's inhcount */
+			childatt->attinhcount++;
+			CatalogTupleUpdate(attrdesc, &tuple->t_self, tuple);
+
+			heap_freetuple(tuple);
+
+			/* Inform the user about the merge */
+			ereport(NOTICE,
+					(errmsg("merging definition of column \"%s\" for child \"%s\"",
+							colDef->colname, RelationGetRelationName(rel))));
+
+			table_close(attrdesc, RowExclusiveLock);
+			return InvalidObjectAddress;
+		}
+	}
+
+	/* skip if the name already exists and if_not_exists is true */
+	if (!check_for_column_name_collision(rel, colDef->colname, if_not_exists))
+	{
+		table_close(attrdesc, RowExclusiveLock);
+		return InvalidObjectAddress;
+	}
+
+	/*
+	 * Okay, we need to add the column, so go ahead and do parse
+	 * transformation.  This can result in queueing up, or even immediately
+	 * executing, subsidiary operations (such as creation of unique indexes);
+	 * so we mustn't do it until we have made the if_not_exists check.
+	 *
+	 * When recursing, the command was already transformed and we needn't do
+	 * so again.  Also, if context isn't given we can't transform.  (That
+	 * currently happens only for AT_AddColumnToView; we expect that view.c
+	 * passed us a ColumnDef that doesn't need work.)
+	 */
+	if (context != NULL && !recursing)
+	{
+		*cmd = ATParseTransformCmd(wqueue, tab, rel, *cmd, recurse, lockmode,
+								   cur_pass, context);
+		Assert(*cmd != NULL);
+		colDef = castNode(ColumnDef, (*cmd)->def);
+	}
+
+	/*
+	 * Cannot add identity column if table has children, because identity does
+	 * not inherit.  (Adding column and identity separately will work.)
+	 */
+	if (colDef->identity &&
+		recurse &&
+		find_inheritance_children(myrelid, false, NoLock) != NIL)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+				 errmsg("cannot recursively add identity column to table that has child tables")));
+
+	pgclass = table_open(RelationRelationId, RowExclusiveLock);
+
+	reltup = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(myrelid));
+	if (!HeapTupleIsValid(reltup))
+		elog(ERROR, "cache lookup failed for relation %u", myrelid);
+	relkind = ((Form_pg_class) GETSTRUCT(reltup))->relkind;
+
+	/* Determine the new attribute's number */
+	newattnum = ((Form_pg_class) GETSTRUCT(reltup))->relnatts + 1;
+	if (newattnum > MaxHeapAttributeNumber)
+		ereport(ERROR,
+				(errcode(ERRCODE_TOO_MANY_COLUMNS),
+				 errmsg("tables can have at most %d columns",
+						MaxHeapAttributeNumber)));
+
+	typeTuple = typenameType(NULL, colDef->typeName, &typmod);
+	tform = (Form_pg_type) GETSTRUCT(typeTuple);
+	typeOid = tform->oid;
+
+	aclresult = pg_type_aclcheck(typeOid, GetUserId(), ACL_USAGE);
+	if (aclresult != ACLCHECK_OK)
+		aclcheck_error_type(aclresult, typeOid);
+
+	collOid = GetColumnDefCollation(NULL, colDef, typeOid);
+
+	/* make sure datatype is legal for a column */
+	CheckAttributeType(colDef->colname, typeOid, collOid,
+					   list_make1_oid(rel->rd_rel->reltype),
+					   0);
+
+	/* construct new attribute's pg_attribute entry */
+	attribute.attrelid = myrelid;
+	namestrcpy(&(attribute.attname), colDef->colname);
+	attribute.atttypid = typeOid;
+	attribute.attstattarget = (newattnum > 0) ? -1 : 0;
+	attribute.attlen = tform->typlen;
+	attribute.atttypmod = typmod;
+	attribute.attnum = newattnum;
+	attribute.attbyval = tform->typbyval;
+	attribute.attndims = list_length(colDef->typeName->arrayBounds);
+	attribute.attstorage = tform->typstorage;
+	attribute.attalign = tform->typalign;
+	attribute.attnotnull = colDef->is_not_null;
+	attribute.atthasdef = false;
+	attribute.atthasmissing = false;
+	attribute.attidentity = colDef->identity;
+	attribute.attgenerated = colDef->generated;
+	attribute.attisdropped = false;
+	attribute.attislocal = colDef->is_local;
+	attribute.attinhcount = colDef->inhcount;
+	attribute.attcollation = collOid;
+
+	/*
+	 * lookup attribute's compression method and store it in the
+	 * attr->attcompression.
+	 */
+	if (rel->rd_rel->relkind == RELKIND_RELATION ||
+		rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+		attribute.attcompression = GetAttributeCompression(&attribute,
+														   colDef->compression);
+	else
+		attribute.attcompression = InvalidCompressionMethod;
+
+	/* attribute.attacl is handled by InsertPgAttributeTuples() */
+
+	ReleaseSysCache(typeTuple);
+
+	tupdesc = CreateTupleDesc(lengthof(aattr), (FormData_pg_attribute **) &aattr);
+
+	InsertPgAttributeTuples(attrdesc, tupdesc, myrelid, NULL, NULL);
+
+	table_close(attrdesc, RowExclusiveLock);
+
+	/*
+	 * Update pg_class tuple as appropriate
+	 */
+	((Form_pg_class) GETSTRUCT(reltup))->relnatts = newattnum;
+
+	CatalogTupleUpdate(pgclass, &reltup->t_self, reltup);
+
+	heap_freetuple(reltup);
+
+	/* Post creation hook for new attribute */
+	InvokeObjectPostCreateHook(RelationRelationId, myrelid, newattnum);
+
+	table_close(pgclass, RowExclusiveLock);
+
+	/* Make the attribute's catalog entry visible */
+	CommandCounterIncrement();
+
+	/*
+	 * Store the DEFAULT, if any, in the catalogs
+	 */
+	if (colDef->raw_default)
+	{
+		RawColumnDefault *rawEnt;
+
+		rawEnt = (RawColumnDefault *) palloc(sizeof(RawColumnDefault));
+		rawEnt->attnum = attribute.attnum;
+		rawEnt->raw_default = copyObject(colDef->raw_default);
+
+		/*
+		 * Attempt to skip a complete table rewrite by storing the specified
+		 * DEFAULT value outside of the heap.  This may be disabled inside
+		 * AddRelationNewConstraints if the optimization cannot be applied.
+		 */
+		rawEnt->missingMode = (!colDef->generated);
+
+		rawEnt->generated = colDef->generated;
+
+		/*
+		 * This function is intended for CREATE TABLE, so it processes a
+		 * _list_ of defaults, but we just do one.
+		 */
+		AddRelationNewConstraints(rel, list_make1(rawEnt), NIL,
+								  false, true, false, NULL);
+
+		/* Make the additional catalog changes visible */
+		CommandCounterIncrement();
+
+		/*
+		 * Did the request for a missing value work? If not we'll have to do a
+		 * rewrite
+		 */
+		if (!rawEnt->missingMode)
+			tab->rewrite |= AT_REWRITE_DEFAULT_VAL;
+	}
+
+	/*
+	 * Tell Phase 3 to fill in the default expression, if there is one.
+	 *
+	 * If there is no default, Phase 3 doesn't have to do anything, because
+	 * that effectively means that the default is NULL.  The heap tuple access
+	 * routines always check for attnum > # of attributes in tuple, and return
+	 * NULL if so, so without any modification of the tuple data we will get
+	 * the effect of NULL values in the new column.
+	 *
+	 * An exception occurs when the new column is of a domain type: the domain
+	 * might have a NOT NULL constraint, or a check constraint that indirectly
+	 * rejects nulls.  If there are any domain constraints then we construct
+	 * an explicit NULL default value that will be passed through
+	 * CoerceToDomain processing.  (This is a tad inefficient, since it causes
+	 * rewriting the table which we really don't have to do, but the present
+	 * design of domain processing doesn't offer any simple way of checking
+	 * the constraints more directly.)
+	 *
+	 * Note: we use build_column_default, and not just the cooked default
+	 * returned by AddRelationNewConstraints, so that the right thing happens
+	 * when a datatype's default applies.
+	 *
+	 * Note: it might seem that this should happen at the end of Phase 2, so
+	 * that the effects of subsequent subcommands can be taken into account.
+	 * It's intentional that we do it now, though.  The new column should be
+	 * filled according to what is said in the ADD COLUMN subcommand, so that
+	 * the effects are the same as if this subcommand had been run by itself
+	 * and the later subcommands had been issued in new ALTER TABLE commands.
+	 *
+	 * We can skip this entirely for relations without storage, since Phase 3
+	 * is certainly not going to touch them.  System attributes don't have
+	 * interesting defaults, either.
+	 */
+	if (RELKIND_HAS_STORAGE(relkind) && attribute.attnum > 0)
+	{
+		/*
+		 * For an identity column, we can't use build_column_default(),
+		 * because the sequence ownership isn't set yet.  So do it manually.
+		 */
+		if (colDef->identity)
+		{
+			NextValueExpr *nve = makeNode(NextValueExpr);
+
+			nve->seqid = RangeVarGetRelid(colDef->identitySequence, NoLock, false);
+			nve->typeId = typeOid;
+
+			defval = (Expr *) nve;
+
+			/* must do a rewrite for identity columns */
+			tab->rewrite |= AT_REWRITE_DEFAULT_VAL;
+		}
+		else
+			defval = (Expr *) build_column_default(rel, attribute.attnum);
+
+		if (!defval && DomainHasConstraints(typeOid))
+		{
+			Oid			baseTypeId;
+			int32		baseTypeMod;
+			Oid			baseTypeColl;
+
+			baseTypeMod = typmod;
+			baseTypeId = getBaseTypeAndTypmod(typeOid, &baseTypeMod);
+			baseTypeColl = get_typcollation(baseTypeId);
+			defval = (Expr *) makeNullConst(baseTypeId, baseTypeMod, baseTypeColl);
+			defval = (Expr *) coerce_to_target_type(NULL,
+													(Node *) defval,
+													baseTypeId,
+													typeOid,
+													typmod,
+													COERCION_ASSIGNMENT,
+													COERCE_IMPLICIT_CAST,
+													-1);
+			if (defval == NULL) /* should not happen */
+				elog(ERROR, "failed to coerce base type to domain");
+		}
+
+		if (defval)
+		{
+			NewColumnValue *newval;
+
+			newval = (NewColumnValue *) palloc0(sizeof(NewColumnValue));
+			newval->attnum = attribute.attnum;
+			newval->expr = expression_planner(defval);
+			newval->is_generated = (colDef->generated != '\0');
+
+			tab->newvals = lappend(tab->newvals, newval);
+		}
+
+		if (DomainHasConstraints(typeOid))
+			tab->rewrite |= AT_REWRITE_DEFAULT_VAL;
+
+		if (!TupleDescAttr(rel->rd_att, attribute.attnum - 1)->atthasmissing)
+		{
+			/*
+			 * If the new column is NOT NULL, and there is no missing value,
+			 * tell Phase 3 it needs to check for NULLs.
+			 */
+			tab->verify_new_notnull |= colDef->is_not_null;
+		}
+	}
+
+	/*
+	 * Add needed dependency entries for the new column.
+	 */
+	add_column_datatype_dependency(myrelid, newattnum, attribute.atttypid);
+	add_column_collation_dependency(myrelid, newattnum, attribute.attcollation);
+
+	/*
+	 * Propagate to children as appropriate.  Unlike most other ALTER
+	 * routines, we have to do this one level of recursion at a time; we can't
+	 * use find_all_inheritors to do it in one pass.
+	 */
+	children =
+		find_inheritance_children(RelationGetRelid(rel), false, lockmode);
+
+	/*
+	 * If we are told not to recurse, there had better not be any child
+	 * tables; else the addition would put them out of step.
+	 */
+	if (children && !recurse)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+				 errmsg("column must be added to child tables too")));
+
+	/* Children should see column as singly inherited */
+	if (!recursing)
+	{
+		childcmd = copyObject(*cmd);
+		colDef = castNode(ColumnDef, childcmd->def);
+		colDef->inhcount = 1;
+		colDef->is_local = false;
+	}
+	else
+		childcmd = *cmd;		/* no need to copy again */
+
+	foreach(child, children)
+	{
+		Oid			childrelid = lfirst_oid(child);
+		Relation	childrel;
+		AlteredTableInfo *childtab;
+
+		/* find_inheritance_children already got lock */
+		childrel = table_open(childrelid, NoLock);
+		CheckTableNotInUse(childrel, "ALTER TABLE");
+
+		/* Find or create work queue entry for this table */
+		childtab = ATGetQueueEntry(wqueue, childrel);
+
+		/* Recurse to child; return value is ignored */
+		ATExecAddColumn(wqueue, childtab, childrel,
+						&childcmd, recurse, true,
+						lockmode, cur_pass, context);
+
+		table_close(childrel, NoLock);
+	}
+
+	ObjectAddressSubSet(address, RelationRelationId, myrelid, newattnum);
+	return address;
+}
+
+/*
+ * If a new or renamed column will collide with the name of an existing
+ * column and if_not_exists is false then error out, else do nothing.
+ */
+static bool
+check_for_column_name_collision(Relation rel, const char *colname,
+								bool if_not_exists)
+{
+	HeapTuple	attTuple;
+	int			attnum;
+
+	/*
+	 * this test is deliberately not attisdropped-aware, since if one tries to
+	 * add a column matching a dropped column name, it's gonna fail anyway.
+	 */
+	attTuple = SearchSysCache2(ATTNAME,
+							   ObjectIdGetDatum(RelationGetRelid(rel)),
+							   PointerGetDatum(colname));
+	if (!HeapTupleIsValid(attTuple))
+		return true;
+
+	attnum = ((Form_pg_attribute) GETSTRUCT(attTuple))->attnum;
+	ReleaseSysCache(attTuple);
+
+	/*
+	 * We throw a different error message for conflicts with system column
+	 * names, since they are normally not shown and the user might otherwise
+	 * be confused about the reason for the conflict.
+	 */
+	if (attnum <= 0)
+		ereport(ERROR,
+				(errcode(ERRCODE_DUPLICATE_COLUMN),
+				 errmsg("column name \"%s\" conflicts with a system column name",
+						colname)));
+	else
+	{
+		if (if_not_exists)
+		{
+			ereport(NOTICE,
+					(errcode(ERRCODE_DUPLICATE_COLUMN),
+					 errmsg("column \"%s\" of relation \"%s\" already exists, skipping",
+							colname, RelationGetRelationName(rel))));
+			return false;
+		}
+
+		ereport(ERROR,
+				(errcode(ERRCODE_DUPLICATE_COLUMN),
+				 errmsg("column \"%s\" of relation \"%s\" already exists",
+						colname, RelationGetRelationName(rel))));
+	}
+
+	return true;
+}
+
+/*
+ * Install a column's dependency on its datatype.
+ */
+static void
+add_column_datatype_dependency(Oid relid, int32 attnum, Oid typid)
+{
+	ObjectAddress myself,
+				referenced;
+
+	myself.classId = RelationRelationId;
+	myself.objectId = relid;
+	myself.objectSubId = attnum;
+	referenced.classId = TypeRelationId;
+	referenced.objectId = typid;
+	referenced.objectSubId = 0;
+	recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL);
+}
+
+/*
+ * Install a column's dependency on its collation.
+ */
+static void
+add_column_collation_dependency(Oid relid, int32 attnum, Oid collid)
+{
+	ObjectAddress myself,
+				referenced;
+
+	/* We know the default collation is pinned, so don't bother recording it */
+	if (OidIsValid(collid) && collid != DEFAULT_COLLATION_OID)
+	{
+		myself.classId = RelationRelationId;
+		myself.objectId = relid;
+		myself.objectSubId = attnum;
+		referenced.classId = CollationRelationId;
+		referenced.objectId = collid;
+		referenced.objectSubId = 0;
+		recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL);
+	}
+}
+
+/*
+ * ALTER TABLE ALTER COLUMN DROP NOT NULL
+ */
+
+static void
+ATPrepDropNotNull(Relation rel, bool recurse, bool recursing)
+{
+	/*
+	 * If the parent is a partitioned table, like check constraints, we do not
+	 * support removing the NOT NULL while partitions exist.
+	 */
+	if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+	{
+		PartitionDesc partdesc = RelationGetPartitionDesc(rel, false);
+
+		Assert(partdesc != NULL);
+		if (partdesc->nparts > 0 && !recurse && !recursing)
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+					 errmsg("cannot remove constraint from only the partitioned table when partitions exist"),
+					 errhint("Do not specify the ONLY keyword.")));
+	}
+}
+
+/*
+ * Return the address of the modified column.  If the column was already
+ * nullable, InvalidObjectAddress is returned.
+ */
+static ObjectAddress
+ATExecDropNotNull(Relation rel, const char *colName, LOCKMODE lockmode)
+{
+	HeapTuple	tuple;
+	Form_pg_attribute attTup;
+	AttrNumber	attnum;
+	Relation	attr_rel;
+	List	   *indexoidlist;
+	ListCell   *indexoidscan;
+	ObjectAddress address;
+
+	/*
+	 * lookup the attribute
+	 */
+	attr_rel = table_open(AttributeRelationId, RowExclusiveLock);
+
+	tuple = SearchSysCacheCopyAttName(RelationGetRelid(rel), colName);
+	if (!HeapTupleIsValid(tuple))
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_COLUMN),
+				 errmsg("column \"%s\" of relation \"%s\" does not exist",
+						colName, RelationGetRelationName(rel))));
+	attTup = (Form_pg_attribute) GETSTRUCT(tuple);
+	attnum = attTup->attnum;
+
+	/* Prevent them from altering a system attribute */
+	if (attnum <= 0)
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("cannot alter system column \"%s\"",
+						colName)));
+
+	if (attTup->attidentity)
+		ereport(ERROR,
+				(errcode(ERRCODE_SYNTAX_ERROR),
+				 errmsg("column \"%s\" of relation \"%s\" is an identity column",
+						colName, RelationGetRelationName(rel))));
+
+	/*
+	 * Check that the attribute is not in a primary key
+	 *
+	 * Note: we'll throw error even if the pkey index is not valid.
+	 */
+
+	/* Loop over all indexes on the relation */
+	indexoidlist = RelationGetIndexList(rel);
+
+	foreach(indexoidscan, indexoidlist)
+	{
+		Oid			indexoid = lfirst_oid(indexoidscan);
+		HeapTuple	indexTuple;
+		Form_pg_index indexStruct;
+		int			i;
+
+		indexTuple = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(indexoid));
+		if (!HeapTupleIsValid(indexTuple))
+			elog(ERROR, "cache lookup failed for index %u", indexoid);
+		indexStruct = (Form_pg_index) GETSTRUCT(indexTuple);
+
+		/* If the index is not a primary key, skip the check */
+		if (indexStruct->indisprimary)
+		{
+			/*
+			 * Loop over each attribute in the primary key and see if it
+			 * matches the to-be-altered attribute
+			 */
+			for (i = 0; i < indexStruct->indnkeyatts; i++)
+			{
+				if (indexStruct->indkey.values[i] == attnum)
+					ereport(ERROR,
+							(errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+							 errmsg("column \"%s\" is in a primary key",
+									colName)));
+			}
+		}
+
+		ReleaseSysCache(indexTuple);
+	}
+
+	list_free(indexoidlist);
+
+	/* If rel is partition, shouldn't drop NOT NULL if parent has the same */
+	if (rel->rd_rel->relispartition)
+	{
+		Oid			parentId = get_partition_parent(RelationGetRelid(rel), false);
+		Relation	parent = table_open(parentId, AccessShareLock);
+		TupleDesc	tupDesc = RelationGetDescr(parent);
+		AttrNumber	parent_attnum;
+
+		parent_attnum = get_attnum(parentId, colName);
+		if (TupleDescAttr(tupDesc, parent_attnum - 1)->attnotnull)
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+					 errmsg("column \"%s\" is marked NOT NULL in parent table",
+							colName)));
+		table_close(parent, AccessShareLock);
+	}
+
+	/*
+	 * Okay, actually perform the catalog change ... if needed
+	 */
+	if (attTup->attnotnull)
+	{
+		attTup->attnotnull = false;
+
+		CatalogTupleUpdate(attr_rel, &tuple->t_self, tuple);
+
+		ObjectAddressSubSet(address, RelationRelationId,
+							RelationGetRelid(rel), attnum);
+	}
+	else
+		address = InvalidObjectAddress;
+
+	InvokeObjectPostAlterHook(RelationRelationId,
+							  RelationGetRelid(rel), attnum);
+
+	table_close(attr_rel, RowExclusiveLock);
+
+	return address;
+}
+
+/*
+ * ALTER TABLE ALTER COLUMN SET NOT NULL
+ */
+
+static void
+ATPrepSetNotNull(List **wqueue, Relation rel,
+				 AlterTableCmd *cmd, bool recurse, bool recursing,
+				 LOCKMODE lockmode, AlterTableUtilityContext *context)
+{
+	/*
+	 * If we're already recursing, there's nothing to do; the topmost
+	 * invocation of ATSimpleRecursion already visited all children.
+	 */
+	if (recursing)
+		return;
+
+	/*
+	 * If the target column is already marked NOT NULL, we can skip recursing
+	 * to children, because their columns should already be marked NOT NULL as
+	 * well.  But there's no point in checking here unless the relation has
+	 * some children; else we can just wait till execution to check.  (If it
+	 * does have children, however, this can save taking per-child locks
+	 * unnecessarily.  This greatly improves concurrency in some parallel
+	 * restore scenarios.)
+	 *
+	 * Unfortunately, we can only apply this optimization to partitioned
+	 * tables, because traditional inheritance doesn't enforce that child
+	 * columns be NOT NULL when their parent is.  (That's a bug that should
+	 * get fixed someday.)
+	 */
+	if (rel->rd_rel->relhassubclass &&
+		rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+	{
+		HeapTuple	tuple;
+		bool		attnotnull;
+
+		tuple = SearchSysCacheAttName(RelationGetRelid(rel), cmd->name);
+
+		/* Might as well throw the error now, if name is bad */
+		if (!HeapTupleIsValid(tuple))
+			ereport(ERROR,
+					(errcode(ERRCODE_UNDEFINED_COLUMN),
+					 errmsg("column \"%s\" of relation \"%s\" does not exist",
+							cmd->name, RelationGetRelationName(rel))));
+
+		attnotnull = ((Form_pg_attribute) GETSTRUCT(tuple))->attnotnull;
+		ReleaseSysCache(tuple);
+		if (attnotnull)
+			return;
+	}
+
+	/*
+	 * If we have ALTER TABLE ONLY ... SET NOT NULL on a partitioned table,
+	 * apply ALTER TABLE ... CHECK NOT NULL to every child.  Otherwise, use
+	 * normal recursion logic.
+	 */
+	if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE &&
+		!recurse)
+	{
+		AlterTableCmd *newcmd = makeNode(AlterTableCmd);
+
+		newcmd->subtype = AT_CheckNotNull;
+		newcmd->name = pstrdup(cmd->name);
+		ATSimpleRecursion(wqueue, rel, newcmd, true, lockmode, context);
+	}
+	else
+		ATSimpleRecursion(wqueue, rel, cmd, recurse, lockmode, context);
+}
+
+/*
+ * Return the address of the modified column.  If the column was already NOT
+ * NULL, InvalidObjectAddress is returned.
+ */
+static ObjectAddress
+ATExecSetNotNull(AlteredTableInfo *tab, Relation rel,
+				 const char *colName, LOCKMODE lockmode)
+{
+	HeapTuple	tuple;
+	AttrNumber	attnum;
+	Relation	attr_rel;
+	ObjectAddress address;
+
+	/*
+	 * lookup the attribute
+	 */
+	attr_rel = table_open(AttributeRelationId, RowExclusiveLock);
+
+	tuple = SearchSysCacheCopyAttName(RelationGetRelid(rel), colName);
+
+	if (!HeapTupleIsValid(tuple))
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_COLUMN),
+				 errmsg("column \"%s\" of relation \"%s\" does not exist",
+						colName, RelationGetRelationName(rel))));
+
+	attnum = ((Form_pg_attribute) GETSTRUCT(tuple))->attnum;
+
+	/* Prevent them from altering a system attribute */
+	if (attnum <= 0)
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("cannot alter system column \"%s\"",
+						colName)));
+
+	/*
+	 * Okay, actually perform the catalog change ... if needed
+	 */
+	if (!((Form_pg_attribute) GETSTRUCT(tuple))->attnotnull)
+	{
+		((Form_pg_attribute) GETSTRUCT(tuple))->attnotnull = true;
+
+		CatalogTupleUpdate(attr_rel, &tuple->t_self, tuple);
+
+		/*
+		 * Ordinarily phase 3 must ensure that no NULLs exist in columns that
+		 * are set NOT NULL; however, if we can find a constraint which proves
+		 * this then we can skip that.  We needn't bother looking if we've
+		 * already found that we must verify some other NOT NULL constraint.
+		 */
+		if (!tab->verify_new_notnull &&
+			!NotNullImpliedByRelConstraints(rel, (Form_pg_attribute) GETSTRUCT(tuple)))
+		{
+			/* Tell Phase 3 it needs to test the constraint */
+			tab->verify_new_notnull = true;
+		}
+
+		ObjectAddressSubSet(address, RelationRelationId,
+							RelationGetRelid(rel), attnum);
+	}
+	else
+		address = InvalidObjectAddress;
+
+	InvokeObjectPostAlterHook(RelationRelationId,
+							  RelationGetRelid(rel), attnum);
+
+	table_close(attr_rel, RowExclusiveLock);
+
+	return address;
+}
+
+/*
+ * ALTER TABLE ALTER COLUMN CHECK NOT NULL
+ *
+ * This doesn't exist in the grammar, but we generate AT_CheckNotNull
+ * commands against the partitions of a partitioned table if the user
+ * writes ALTER TABLE ONLY ... SET NOT NULL on the partitioned table,
+ * or tries to create a primary key on it (which internally creates
+ * AT_SetNotNull on the partitioned table).   Such a command doesn't
+ * allow us to actually modify any partition, but we want to let it
+ * go through if the partitions are already properly marked.
+ *
+ * In future, this might need to adjust the child table's state, likely
+ * by incrementing an inheritance count for the attnotnull constraint.
+ * For now we need only check for the presence of the flag.
+ */
+static void
+ATExecCheckNotNull(AlteredTableInfo *tab, Relation rel,
+				   const char *colName, LOCKMODE lockmode)
+{
+	HeapTuple	tuple;
+
+	tuple = SearchSysCacheAttName(RelationGetRelid(rel), colName);
+
+	if (!HeapTupleIsValid(tuple))
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_COLUMN),
+				 errmsg("column \"%s\" of relation \"%s\" does not exist",
+						colName, RelationGetRelationName(rel))));
+
+	if (!((Form_pg_attribute) GETSTRUCT(tuple))->attnotnull)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+				 errmsg("constraint must be added to child tables too"),
+				 errdetail("Column \"%s\" of relation \"%s\" is not already NOT NULL.",
+						   colName, RelationGetRelationName(rel)),
+				 errhint("Do not specify the ONLY keyword.")));
+
+	ReleaseSysCache(tuple);
+}
+
+/*
+ * NotNullImpliedByRelConstraints
+ *		Does rel's existing constraints imply NOT NULL for the given attribute?
+ */
+static bool
+NotNullImpliedByRelConstraints(Relation rel, Form_pg_attribute attr)
+{
+	NullTest   *nnulltest = makeNode(NullTest);
+
+	nnulltest->arg = (Expr *) makeVar(1,
+									  attr->attnum,
+									  attr->atttypid,
+									  attr->atttypmod,
+									  attr->attcollation,
+									  0);
+	nnulltest->nulltesttype = IS_NOT_NULL;
+
+	/*
+	 * argisrow = false is correct even for a composite column, because
+	 * attnotnull does not represent a SQL-spec IS NOT NULL test in such a
+	 * case, just IS DISTINCT FROM NULL.
+	 */
+	nnulltest->argisrow = false;
+	nnulltest->location = -1;
+
+	if (ConstraintImpliedByRelConstraint(rel, list_make1(nnulltest), NIL))
+	{
+		ereport(DEBUG1,
+				(errmsg_internal("existing constraints on column \"%s.%s\" are sufficient to prove that it does not contain nulls",
+						RelationGetRelationName(rel), NameStr(attr->attname))));
+		return true;
+	}
+
+	return false;
+}
+
+/*
+ * ALTER TABLE ALTER COLUMN SET/DROP DEFAULT
+ *
+ * Return the address of the affected column.
+ */
+static ObjectAddress
+ATExecColumnDefault(Relation rel, const char *colName,
+					Node *newDefault, LOCKMODE lockmode)
+{
+	TupleDesc	tupdesc = RelationGetDescr(rel);
+	AttrNumber	attnum;
+	ObjectAddress address;
+
+	/*
+	 * get the number of the attribute
+	 */
+	attnum = get_attnum(RelationGetRelid(rel), colName);
+	if (attnum == InvalidAttrNumber)
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_COLUMN),
+				 errmsg("column \"%s\" of relation \"%s\" does not exist",
+						colName, RelationGetRelationName(rel))));
+
+	/* Prevent them from altering a system attribute */
+	if (attnum <= 0)
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("cannot alter system column \"%s\"",
+						colName)));
+
+	if (TupleDescAttr(tupdesc, attnum - 1)->attidentity)
+		ereport(ERROR,
+				(errcode(ERRCODE_SYNTAX_ERROR),
+				 errmsg("column \"%s\" of relation \"%s\" is an identity column",
+						colName, RelationGetRelationName(rel)),
+				 newDefault ? 0 : errhint("Use ALTER TABLE ... ALTER COLUMN ... DROP IDENTITY instead.")));
+
+	if (TupleDescAttr(tupdesc, attnum - 1)->attgenerated)
+		ereport(ERROR,
+				(errcode(ERRCODE_SYNTAX_ERROR),
+				 errmsg("column \"%s\" of relation \"%s\" is a generated column",
+						colName, RelationGetRelationName(rel)),
+				 newDefault || TupleDescAttr(tupdesc, attnum - 1)->attgenerated != ATTRIBUTE_GENERATED_STORED ? 0 :
+				 errhint("Use ALTER TABLE ... ALTER COLUMN ... DROP EXPRESSION instead.")));
+
+	/*
+	 * Remove any old default for the column.  We use RESTRICT here for
+	 * safety, but at present we do not expect anything to depend on the
+	 * default.
+	 *
+	 * We treat removing the existing default as an internal operation when it
+	 * is preparatory to adding a new default, but as a user-initiated
+	 * operation when the user asked for a drop.
+	 */
+	RemoveAttrDefault(RelationGetRelid(rel), attnum, DROP_RESTRICT, false,
+					  newDefault == NULL ? false : true);
+
+	if (newDefault)
+	{
+		/* SET DEFAULT */
+		RawColumnDefault *rawEnt;
+
+		rawEnt = (RawColumnDefault *) palloc(sizeof(RawColumnDefault));
+		rawEnt->attnum = attnum;
+		rawEnt->raw_default = newDefault;
+		rawEnt->missingMode = false;
+		rawEnt->generated = '\0';
+
+		/*
+		 * This function is intended for CREATE TABLE, so it processes a
+		 * _list_ of defaults, but we just do one.
+		 */
+		AddRelationNewConstraints(rel, list_make1(rawEnt), NIL,
+								  false, true, false, NULL);
+	}
+
+	ObjectAddressSubSet(address, RelationRelationId,
+						RelationGetRelid(rel), attnum);
+	return address;
+}
+
+/*
+ * Add a pre-cooked default expression.
+ *
+ * Return the address of the affected column.
+ */
+static ObjectAddress
+ATExecCookedColumnDefault(Relation rel, AttrNumber attnum,
+						  Node *newDefault)
+{
+	ObjectAddress address;
+
+	/* We assume no checking is required */
+
+	/*
+	 * Remove any old default for the column.  We use RESTRICT here for
+	 * safety, but at present we do not expect anything to depend on the
+	 * default.  (In ordinary cases, there could not be a default in place
+	 * anyway, but it's possible when combining LIKE with inheritance.)
+	 */
+	RemoveAttrDefault(RelationGetRelid(rel), attnum, DROP_RESTRICT, false,
+					  true);
+
+	(void) StoreAttrDefault(rel, attnum, newDefault, true, false);
+
+	ObjectAddressSubSet(address, RelationRelationId,
+						RelationGetRelid(rel), attnum);
+	return address;
+}
+
+/*
+ * ALTER TABLE ALTER COLUMN ADD IDENTITY
+ *
+ * Return the address of the affected column.
+ */
+static ObjectAddress
+ATExecAddIdentity(Relation rel, const char *colName,
+				  Node *def, LOCKMODE lockmode)
+{
+	Relation	attrelation;
+	HeapTuple	tuple;
+	Form_pg_attribute attTup;
+	AttrNumber	attnum;
+	ObjectAddress address;
+	ColumnDef  *cdef = castNode(ColumnDef, def);
+
+	attrelation = table_open(AttributeRelationId, RowExclusiveLock);
+
+	tuple = SearchSysCacheCopyAttName(RelationGetRelid(rel), colName);
+	if (!HeapTupleIsValid(tuple))
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_COLUMN),
+				 errmsg("column \"%s\" of relation \"%s\" does not exist",
+						colName, RelationGetRelationName(rel))));
+	attTup = (Form_pg_attribute) GETSTRUCT(tuple);
+	attnum = attTup->attnum;
+
+	/* Can't alter a system attribute */
+	if (attnum <= 0)
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("cannot alter system column \"%s\"",
+						colName)));
+
+	/*
+	 * Creating a column as identity implies NOT NULL, so adding the identity
+	 * to an existing column that is not NOT NULL would create a state that
+	 * cannot be reproduced without contortions.
+	 */
+	if (!attTup->attnotnull)
+		ereport(ERROR,
+				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+				 errmsg("column \"%s\" of relation \"%s\" must be declared NOT NULL before identity can be added",
+						colName, RelationGetRelationName(rel))));
+
+	if (attTup->attidentity)
+		ereport(ERROR,
+				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+				 errmsg("column \"%s\" of relation \"%s\" is already an identity column",
+						colName, RelationGetRelationName(rel))));
+
+	if (attTup->atthasdef)
+		ereport(ERROR,
+				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+				 errmsg("column \"%s\" of relation \"%s\" already has a default value",
+						colName, RelationGetRelationName(rel))));
+
+	attTup->attidentity = cdef->identity;
+	CatalogTupleUpdate(attrelation, &tuple->t_self, tuple);
+
+	InvokeObjectPostAlterHook(RelationRelationId,
+							  RelationGetRelid(rel),
+							  attTup->attnum);
+	ObjectAddressSubSet(address, RelationRelationId,
+						RelationGetRelid(rel), attnum);
+	heap_freetuple(tuple);
+
+	table_close(attrelation, RowExclusiveLock);
+
+	return address;
+}
+
+/*
+ * ALTER TABLE ALTER COLUMN SET { GENERATED or sequence options }
+ *
+ * Return the address of the affected column.
+ */
+static ObjectAddress
+ATExecSetIdentity(Relation rel, const char *colName, Node *def, LOCKMODE lockmode)
+{
+	ListCell   *option;
+	DefElem    *generatedEl = NULL;
+	HeapTuple	tuple;
+	Form_pg_attribute attTup;
+	AttrNumber	attnum;
+	Relation	attrelation;
+	ObjectAddress address;
+
+	foreach(option, castNode(List, def))
+	{
+		DefElem    *defel = lfirst_node(DefElem, option);
+
+		if (strcmp(defel->defname, "generated") == 0)
+		{
+			if (generatedEl)
+				ereport(ERROR,
+						(errcode(ERRCODE_SYNTAX_ERROR),
+						 errmsg("conflicting or redundant options")));
+			generatedEl = defel;
+		}
+		else
+			elog(ERROR, "option \"%s\" not recognized",
+				 defel->defname);
+	}
+
+	/*
+	 * Even if there is nothing to change here, we run all the checks.  There
+	 * will be a subsequent ALTER SEQUENCE that relies on everything being
+	 * there.
+	 */
+
+	attrelation = table_open(AttributeRelationId, RowExclusiveLock);
+	tuple = SearchSysCacheCopyAttName(RelationGetRelid(rel), colName);
+	if (!HeapTupleIsValid(tuple))
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_COLUMN),
+				 errmsg("column \"%s\" of relation \"%s\" does not exist",
+						colName, RelationGetRelationName(rel))));
+
+	attTup = (Form_pg_attribute) GETSTRUCT(tuple);
+	attnum = attTup->attnum;
+
+	if (attnum <= 0)
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("cannot alter system column \"%s\"",
+						colName)));
+
+	if (!attTup->attidentity)
+		ereport(ERROR,
+				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+				 errmsg("column \"%s\" of relation \"%s\" is not an identity column",
+						colName, RelationGetRelationName(rel))));
+
+	if (generatedEl)
+	{
+		attTup->attidentity = defGetInt32(generatedEl);
+		CatalogTupleUpdate(attrelation, &tuple->t_self, tuple);
+
+		InvokeObjectPostAlterHook(RelationRelationId,
+								  RelationGetRelid(rel),
+								  attTup->attnum);
+		ObjectAddressSubSet(address, RelationRelationId,
+							RelationGetRelid(rel), attnum);
+	}
+	else
+		address = InvalidObjectAddress;
+
+	heap_freetuple(tuple);
+	table_close(attrelation, RowExclusiveLock);
+
+	return address;
+}
+
+/*
+ * ALTER TABLE ALTER COLUMN DROP IDENTITY
+ *
+ * Return the address of the affected column.
+ */
+static ObjectAddress
+ATExecDropIdentity(Relation rel, const char *colName, bool missing_ok, LOCKMODE lockmode)
+{
+	HeapTuple	tuple;
+	Form_pg_attribute attTup;
+	AttrNumber	attnum;
+	Relation	attrelation;
+	ObjectAddress address;
+	Oid			seqid;
+	ObjectAddress seqaddress;
+
+	attrelation = table_open(AttributeRelationId, RowExclusiveLock);
+	tuple = SearchSysCacheCopyAttName(RelationGetRelid(rel), colName);
+	if (!HeapTupleIsValid(tuple))
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_COLUMN),
+				 errmsg("column \"%s\" of relation \"%s\" does not exist",
+						colName, RelationGetRelationName(rel))));
+
+	attTup = (Form_pg_attribute) GETSTRUCT(tuple);
+	attnum = attTup->attnum;
+
+	if (attnum <= 0)
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("cannot alter system column \"%s\"",
+						colName)));
+
+	if (!attTup->attidentity)
+	{
+		if (!missing_ok)
+			ereport(ERROR,
+					(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+					 errmsg("column \"%s\" of relation \"%s\" is not an identity column",
+							colName, RelationGetRelationName(rel))));
+		else
+		{
+			ereport(NOTICE,
+					(errmsg("column \"%s\" of relation \"%s\" is not an identity column, skipping",
+							colName, RelationGetRelationName(rel))));
+			heap_freetuple(tuple);
+			table_close(attrelation, RowExclusiveLock);
+			return InvalidObjectAddress;
+		}
+	}
+
+	attTup->attidentity = '\0';
+	CatalogTupleUpdate(attrelation, &tuple->t_self, tuple);
+
+	InvokeObjectPostAlterHook(RelationRelationId,
+							  RelationGetRelid(rel),
+							  attTup->attnum);
+	ObjectAddressSubSet(address, RelationRelationId,
+						RelationGetRelid(rel), attnum);
+	heap_freetuple(tuple);
+
+	table_close(attrelation, RowExclusiveLock);
+
+	/* drop the internal sequence */
+	seqid = getIdentitySequence(RelationGetRelid(rel), attnum, false);
+	deleteDependencyRecordsForClass(RelationRelationId, seqid,
+									RelationRelationId, DEPENDENCY_INTERNAL);
+	CommandCounterIncrement();
+	seqaddress.classId = RelationRelationId;
+	seqaddress.objectId = seqid;
+	seqaddress.objectSubId = 0;
+	performDeletion(&seqaddress, DROP_RESTRICT, PERFORM_DELETION_INTERNAL);
+
+	return address;
+}
+
+/*
+ * ALTER TABLE ALTER COLUMN DROP EXPRESSION
+ */
+static void
+ATPrepDropExpression(Relation rel, AlterTableCmd *cmd, bool recurse, bool recursing, LOCKMODE lockmode)
+{
+	/*
+	 * Reject ONLY if there are child tables.  We could implement this, but it
+	 * is a bit complicated.  GENERATED clauses must be attached to the column
+	 * definition and cannot be added later like DEFAULT, so if a child table
+	 * has a generation expression that the parent does not have, the child
+	 * column will necessarily be an attlocal column.  So to implement ONLY
+	 * here, we'd need extra code to update attislocal of the direct child
+	 * tables, somewhat similar to how DROP COLUMN does it, so that the
+	 * resulting state can be properly dumped and restored.
+	 */
+	if (!recurse &&
+		find_inheritance_children(RelationGetRelid(rel), false, lockmode))
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("ALTER TABLE / DROP EXPRESSION must be applied to child tables too")));
+
+	/*
+	 * Cannot drop generation expression from inherited columns.
+	 */
+	if (!recursing)
+	{
+		HeapTuple	tuple;
+		Form_pg_attribute attTup;
+
+		tuple = SearchSysCacheCopyAttName(RelationGetRelid(rel), cmd->name);
+		if (!HeapTupleIsValid(tuple))
+			ereport(ERROR,
+					(errcode(ERRCODE_UNDEFINED_COLUMN),
+					 errmsg("column \"%s\" of relation \"%s\" does not exist",
+							cmd->name, RelationGetRelationName(rel))));
+
+		attTup = (Form_pg_attribute) GETSTRUCT(tuple);
+
+		if (attTup->attinhcount > 0)
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+					 errmsg("cannot drop generation expression from inherited column")));
+	}
+}
+
+/*
+ * Return the address of the affected column.
+ */
+static ObjectAddress
+ATExecDropExpression(Relation rel, const char *colName, bool missing_ok, LOCKMODE lockmode)
+{
+	HeapTuple	tuple;
+	Form_pg_attribute attTup;
+	AttrNumber	attnum;
+	Relation	attrelation;
+	ObjectAddress address;
+
+	attrelation = table_open(AttributeRelationId, RowExclusiveLock);
+	tuple = SearchSysCacheCopyAttName(RelationGetRelid(rel), colName);
+	if (!HeapTupleIsValid(tuple))
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_COLUMN),
+				 errmsg("column \"%s\" of relation \"%s\" does not exist",
+						colName, RelationGetRelationName(rel))));
+
+	attTup = (Form_pg_attribute) GETSTRUCT(tuple);
+	attnum = attTup->attnum;
+
+	if (attnum <= 0)
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("cannot alter system column \"%s\"",
+						colName)));
+
+	if (attTup->attgenerated != ATTRIBUTE_GENERATED_STORED)
+	{
+		if (!missing_ok)
+			ereport(ERROR,
+					(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+					 errmsg("column \"%s\" of relation \"%s\" is not a stored generated column",
+							colName, RelationGetRelationName(rel))));
+		else
+		{
+			ereport(NOTICE,
+					(errmsg("column \"%s\" of relation \"%s\" is not a stored generated column, skipping",
+							colName, RelationGetRelationName(rel))));
+			heap_freetuple(tuple);
+			table_close(attrelation, RowExclusiveLock);
+			return InvalidObjectAddress;
+		}
+	}
+
+	attTup->attgenerated = '\0';
+	CatalogTupleUpdate(attrelation, &tuple->t_self, tuple);
+
+	InvokeObjectPostAlterHook(RelationRelationId,
+							  RelationGetRelid(rel),
+							  attTup->attnum);
+	ObjectAddressSubSet(address, RelationRelationId,
+						RelationGetRelid(rel), attnum);
+	heap_freetuple(tuple);
+
+	table_close(attrelation, RowExclusiveLock);
+
+	CommandCounterIncrement();
+
+	RemoveAttrDefault(RelationGetRelid(rel), attnum, DROP_RESTRICT, false, false);
+
+	/*
+	 * Remove all dependencies of this (formerly generated) column on other
+	 * columns in the same table.  (See StoreAttrDefault() for which
+	 * dependencies are created.)  We don't expect there to be dependencies
+	 * between columns of the same table for other reasons, so it's okay to
+	 * remove all of them.
+	 */
+	{
+		Relation	depRel;
+		ScanKeyData key[3];
+		SysScanDesc scan;
+		HeapTuple	tup;
+
+		depRel = table_open(DependRelationId, RowExclusiveLock);
+
+		ScanKeyInit(&key[0],
+					Anum_pg_depend_classid,
+					BTEqualStrategyNumber, F_OIDEQ,
+					ObjectIdGetDatum(RelationRelationId));
+		ScanKeyInit(&key[1],
+					Anum_pg_depend_objid,
+					BTEqualStrategyNumber, F_OIDEQ,
+					ObjectIdGetDatum(RelationGetRelid(rel)));
+		ScanKeyInit(&key[2],
+					Anum_pg_depend_objsubid,
+					BTEqualStrategyNumber, F_INT4EQ,
+					Int32GetDatum(attnum));
+
+		scan = systable_beginscan(depRel, DependDependerIndexId, true,
+								  NULL, 3, key);
+
+		while (HeapTupleIsValid(tup = systable_getnext(scan)))
+		{
+			Form_pg_depend depform = (Form_pg_depend) GETSTRUCT(tup);
+
+			if (depform->refclassid == RelationRelationId &&
+				depform->refobjid == RelationGetRelid(rel) &&
+				depform->refobjsubid != 0 &&
+				depform->deptype == DEPENDENCY_AUTO)
+			{
+				CatalogTupleDelete(depRel, &tup->t_self);
+			}
+		}
+
+		systable_endscan(scan);
+
+		table_close(depRel, RowExclusiveLock);
+	}
+
+	return address;
+}
+
+/*
+ * ALTER TABLE ALTER COLUMN SET STATISTICS
+ *
+ * Return value is the address of the modified column
+ */
+static ObjectAddress
+ATExecSetStatistics(Relation rel, const char *colName, int16 colNum, Node *newValue, LOCKMODE lockmode)
+{
+	int			newtarget;
+	Relation	attrelation;
+	HeapTuple	tuple;
+	Form_pg_attribute attrtuple;
+	AttrNumber	attnum;
+	ObjectAddress address;
+
+	/*
+	 * We allow referencing columns by numbers only for indexes, since table
+	 * column numbers could contain gaps if columns are later dropped.
+	 */
+	if (rel->rd_rel->relkind != RELKIND_INDEX &&
+		rel->rd_rel->relkind != RELKIND_PARTITIONED_INDEX &&
+		!colName)
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("cannot refer to non-index column by number")));
+
+	Assert(IsA(newValue, Integer));
+	newtarget = intVal(newValue);
+
+	/*
+	 * Limit target to a sane range
+	 */
+	if (newtarget < -1)
+	{
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("statistics target %d is too low",
+						newtarget)));
+	}
+	else if (newtarget > 10000)
+	{
+		newtarget = 10000;
+		ereport(WARNING,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("lowering statistics target to %d",
+						newtarget)));
+	}
+
+	attrelation = table_open(AttributeRelationId, RowExclusiveLock);
+
+	if (colName)
+	{
+		tuple = SearchSysCacheCopyAttName(RelationGetRelid(rel), colName);
+
+		if (!HeapTupleIsValid(tuple))
+			ereport(ERROR,
+					(errcode(ERRCODE_UNDEFINED_COLUMN),
+					 errmsg("column \"%s\" of relation \"%s\" does not exist",
+							colName, RelationGetRelationName(rel))));
+	}
+	else
+	{
+		tuple = SearchSysCacheCopyAttNum(RelationGetRelid(rel), colNum);
+
+		if (!HeapTupleIsValid(tuple))
+			ereport(ERROR,
+					(errcode(ERRCODE_UNDEFINED_COLUMN),
+					 errmsg("column number %d of relation \"%s\" does not exist",
+							colNum, RelationGetRelationName(rel))));
+	}
+
+	attrtuple = (Form_pg_attribute) GETSTRUCT(tuple);
+
+	attnum = attrtuple->attnum;
+	if (attnum <= 0)
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("cannot alter system column \"%s\"",
+						colName)));
+
+	if (rel->rd_rel->relkind == RELKIND_INDEX ||
+		rel->rd_rel->relkind == RELKIND_PARTITIONED_INDEX)
+	{
+		if (attnum > rel->rd_index->indnkeyatts)
+			ereport(ERROR,
+					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+					 errmsg("cannot alter statistics on included column \"%s\" of index \"%s\"",
+							NameStr(attrtuple->attname), RelationGetRelationName(rel))));
+		else if (rel->rd_index->indkey.values[attnum - 1] != 0)
+			ereport(ERROR,
+					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+					 errmsg("cannot alter statistics on non-expression column \"%s\" of index \"%s\"",
+							NameStr(attrtuple->attname), RelationGetRelationName(rel)),
+					 errhint("Alter statistics on table column instead.")));
+	}
+
+	attrtuple->attstattarget = newtarget;
+
+	CatalogTupleUpdate(attrelation, &tuple->t_self, tuple);
+
+	InvokeObjectPostAlterHook(RelationRelationId,
+							  RelationGetRelid(rel),
+							  attrtuple->attnum);
+	ObjectAddressSubSet(address, RelationRelationId,
+						RelationGetRelid(rel), attnum);
+	heap_freetuple(tuple);
+
+	table_close(attrelation, RowExclusiveLock);
+
+	return address;
+}
+
+/*
+ * Return value is the address of the modified column
+ */
+static ObjectAddress
+ATExecSetOptions(Relation rel, const char *colName, Node *options,
+				 bool isReset, LOCKMODE lockmode)
+{
+	Relation	attrelation;
+	HeapTuple	tuple,
+				newtuple;
+	Form_pg_attribute attrtuple;
+	AttrNumber	attnum;
+	Datum		datum,
+				newOptions;
+	bool		isnull;
+	ObjectAddress address;
+	Datum		repl_val[Natts_pg_attribute];
+	bool		repl_null[Natts_pg_attribute];
+	bool		repl_repl[Natts_pg_attribute];
+
+	attrelation = table_open(AttributeRelationId, RowExclusiveLock);
+
+	tuple = SearchSysCacheAttName(RelationGetRelid(rel), colName);
+
+	if (!HeapTupleIsValid(tuple))
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_COLUMN),
+				 errmsg("column \"%s\" of relation \"%s\" does not exist",
+						colName, RelationGetRelationName(rel))));
+	attrtuple = (Form_pg_attribute) GETSTRUCT(tuple);
+
+	attnum = attrtuple->attnum;
+	if (attnum <= 0)
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("cannot alter system column \"%s\"",
+						colName)));
+
+	/* Generate new proposed attoptions (text array) */
+	datum = SysCacheGetAttr(ATTNAME, tuple, Anum_pg_attribute_attoptions,
+							&isnull);
+	newOptions = transformRelOptions(isnull ? (Datum) 0 : datum,
+									 castNode(List, options), NULL, NULL,
+									 false, isReset);
+	/* Validate new options */
+	(void) attribute_reloptions(newOptions, true);
+
+	/* Build new tuple. */
+	memset(repl_null, false, sizeof(repl_null));
+	memset(repl_repl, false, sizeof(repl_repl));
+	if (newOptions != (Datum) 0)
+		repl_val[Anum_pg_attribute_attoptions - 1] = newOptions;
+	else
+		repl_null[Anum_pg_attribute_attoptions - 1] = true;
+	repl_repl[Anum_pg_attribute_attoptions - 1] = true;
+	newtuple = heap_modify_tuple(tuple, RelationGetDescr(attrelation),
+								 repl_val, repl_null, repl_repl);
+
+	/* Update system catalog. */
+	CatalogTupleUpdate(attrelation, &newtuple->t_self, newtuple);
+
+	InvokeObjectPostAlterHook(RelationRelationId,
+							  RelationGetRelid(rel),
+							  attrtuple->attnum);
+	ObjectAddressSubSet(address, RelationRelationId,
+						RelationGetRelid(rel), attnum);
+
+	heap_freetuple(newtuple);
+
+	ReleaseSysCache(tuple);
+
+	table_close(attrelation, RowExclusiveLock);
+
+	return address;
+}
+
+/*
+ * Helper function for ATExecSetStorage and ATExecSetCompression
+ *
+ * Set the attcompression and/or attstorage for the respective index attribute
+ * if the respective input values are valid.
+ */
+static void
+SetIndexStorageProperties(Relation rel, Relation attrelation,
+						  AttrNumber attnum, char newcompression,
+						  char newstorage, LOCKMODE lockmode)
+{
+	HeapTuple	tuple;
+	ListCell   *lc;
+	Form_pg_attribute attrtuple;
+
+	foreach(lc, RelationGetIndexList(rel))
+	{
+		Oid			indexoid = lfirst_oid(lc);
+		Relation	indrel;
+		AttrNumber	indattnum = 0;
+
+		indrel = index_open(indexoid, lockmode);
+
+		for (int i = 0; i < indrel->rd_index->indnatts; i++)
+		{
+			if (indrel->rd_index->indkey.values[i] == attnum)
+			{
+				indattnum = i + 1;
+				break;
+			}
+		}
+
+		if (indattnum == 0)
+		{
+			index_close(indrel, lockmode);
+			continue;
+		}
+
+		tuple = SearchSysCacheCopyAttNum(RelationGetRelid(indrel), indattnum);
+
+		if (HeapTupleIsValid(tuple))
+		{
+			attrtuple = (Form_pg_attribute) GETSTRUCT(tuple);
+
+			if (CompressionMethodIsValid(newcompression))
+				attrtuple->attcompression = newcompression;
+
+			if (newstorage != '\0')
+				attrtuple->attstorage = newstorage;
+
+			CatalogTupleUpdate(attrelation, &tuple->t_self, tuple);
+
+			InvokeObjectPostAlterHook(RelationRelationId,
+									  RelationGetRelid(rel),
+									  attrtuple->attnum);
+
+			heap_freetuple(tuple);
+		}
+
+		index_close(indrel, lockmode);
+	}
+}
+
+/*
+ * ALTER TABLE ALTER COLUMN SET STORAGE
+ *
+ * Return value is the address of the modified column
+ */
+static ObjectAddress
+ATExecSetStorage(Relation rel, const char *colName, Node *newValue, LOCKMODE lockmode)
+{
+	char	   *storagemode;
+	char		newstorage;
+	Relation	attrelation;
+	HeapTuple	tuple;
+	Form_pg_attribute attrtuple;
+	AttrNumber	attnum;
+	ObjectAddress address;
+
+	Assert(IsA(newValue, String));
+	storagemode = strVal(newValue);
+
+	if (pg_strcasecmp(storagemode, "plain") == 0)
+		newstorage = TYPSTORAGE_PLAIN;
+	else if (pg_strcasecmp(storagemode, "external") == 0)
+		newstorage = TYPSTORAGE_EXTERNAL;
+	else if (pg_strcasecmp(storagemode, "extended") == 0)
+		newstorage = TYPSTORAGE_EXTENDED;
+	else if (pg_strcasecmp(storagemode, "main") == 0)
+		newstorage = TYPSTORAGE_MAIN;
+	else
+	{
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("invalid storage type \"%s\"",
+						storagemode)));
+		newstorage = 0;			/* keep compiler quiet */
+	}
+
+	attrelation = table_open(AttributeRelationId, RowExclusiveLock);
+
+	tuple = SearchSysCacheCopyAttName(RelationGetRelid(rel), colName);
+
+	if (!HeapTupleIsValid(tuple))
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_COLUMN),
+				 errmsg("column \"%s\" of relation \"%s\" does not exist",
+						colName, RelationGetRelationName(rel))));
+	attrtuple = (Form_pg_attribute) GETSTRUCT(tuple);
+
+	attnum = attrtuple->attnum;
+	if (attnum <= 0)
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("cannot alter system column \"%s\"",
+						colName)));
+
+	/*
+	 * safety check: do not allow toasted storage modes unless column datatype
+	 * is TOAST-aware.
+	 */
+	if (newstorage == TYPSTORAGE_PLAIN || TypeIsToastable(attrtuple->atttypid))
+		attrtuple->attstorage = newstorage;
+	else
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("column data type %s can only have storage PLAIN",
+						format_type_be(attrtuple->atttypid))));
+
+	CatalogTupleUpdate(attrelation, &tuple->t_self, tuple);
+
+	InvokeObjectPostAlterHook(RelationRelationId,
+							  RelationGetRelid(rel),
+							  attrtuple->attnum);
+
+	heap_freetuple(tuple);
+
+	/*
+	 * Apply the change to indexes as well (only for simple index columns,
+	 * matching behavior of index.c ConstructTupleDescriptor()).
+	 */
+	SetIndexStorageProperties(rel, attrelation, attnum,
+							  InvalidCompressionMethod,
+							  newstorage, lockmode);
+
+	table_close(attrelation, RowExclusiveLock);
+
+	ObjectAddressSubSet(address, RelationRelationId,
+						RelationGetRelid(rel), attnum);
+	return address;
+}
+
+
+/*
+ * ALTER TABLE DROP COLUMN
+ *
+ * DROP COLUMN cannot use the normal ALTER TABLE recursion mechanism,
+ * because we have to decide at runtime whether to recurse or not depending
+ * on whether attinhcount goes to zero or not.  (We can't check this in a
+ * static pre-pass because it won't handle multiple inheritance situations
+ * correctly.)
+ */
+static void
+ATPrepDropColumn(List **wqueue, Relation rel, bool recurse, bool recursing,
+				 AlterTableCmd *cmd, LOCKMODE lockmode,
+				 AlterTableUtilityContext *context)
+{
+	if (rel->rd_rel->reloftype && !recursing)
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("cannot drop column from typed table")));
+
+	if (rel->rd_rel->relkind == RELKIND_COMPOSITE_TYPE)
+		ATTypedTableRecursion(wqueue, rel, cmd, lockmode, context);
+
+	if (recurse)
+		cmd->subtype = AT_DropColumnRecurse;
+}
+
+/*
+ * Drops column 'colName' from relation 'rel' and returns the address of the
+ * dropped column.  The column is also dropped (or marked as no longer
+ * inherited from relation) from the relation's inheritance children, if any.
+ *
+ * In the recursive invocations for inheritance child relations, instead of
+ * dropping the column directly (if to be dropped at all), its object address
+ * is added to 'addrs', which must be non-NULL in such invocations.  All
+ * columns are dropped at the same time after all the children have been
+ * checked recursively.
+ */
+static ObjectAddress
+ATExecDropColumn(List **wqueue, Relation rel, const char *colName,
+				 DropBehavior behavior,
+				 bool recurse, bool recursing,
+				 bool missing_ok, LOCKMODE lockmode,
+				 ObjectAddresses *addrs)
+{
+	HeapTuple	tuple;
+	Form_pg_attribute targetatt;
+	AttrNumber	attnum;
+	List	   *children;
+	ObjectAddress object;
+	bool		is_expr;
+
+	/* At top level, permission check was done in ATPrepCmd, else do it */
+	if (recursing)
+		ATSimplePermissions(rel, ATT_TABLE | ATT_FOREIGN_TABLE);
+
+	/* Initialize addrs on the first invocation */
+	Assert(!recursing || addrs != NULL);
+	if (!recursing)
+		addrs = new_object_addresses();
+
+	/*
+	 * get the number of the attribute
+	 */
+	tuple = SearchSysCacheAttName(RelationGetRelid(rel), colName);
+	if (!HeapTupleIsValid(tuple))
+	{
+		if (!missing_ok)
+		{
+			ereport(ERROR,
+					(errcode(ERRCODE_UNDEFINED_COLUMN),
+					 errmsg("column \"%s\" of relation \"%s\" does not exist",
+							colName, RelationGetRelationName(rel))));
+		}
+		else
+		{
+			ereport(NOTICE,
+					(errmsg("column \"%s\" of relation \"%s\" does not exist, skipping",
+							colName, RelationGetRelationName(rel))));
+			return InvalidObjectAddress;
+		}
+	}
+	targetatt = (Form_pg_attribute) GETSTRUCT(tuple);
+
+	attnum = targetatt->attnum;
+
+	/* Can't drop a system attribute */
+	if (attnum <= 0)
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("cannot drop system column \"%s\"",
+						colName)));
+
+	/*
+	 * Don't drop inherited columns, unless recursing (presumably from a drop
+	 * of the parent column)
+	 */
+	if (targetatt->attinhcount > 0 && !recursing)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+				 errmsg("cannot drop inherited column \"%s\"",
+						colName)));
+
+	/*
+	 * Don't drop columns used in the partition key, either.  (If we let this
+	 * go through, the key column's dependencies would cause a cascaded drop
+	 * of the whole table, which is surely not what the user expected.)
+	 */
+	if (has_partition_attrs(rel,
+							bms_make_singleton(attnum - FirstLowInvalidHeapAttributeNumber),
+							&is_expr))
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+				 errmsg("cannot drop column \"%s\" because it is part of the partition key of relation \"%s\"",
+						colName, RelationGetRelationName(rel))));
+
+	ReleaseSysCache(tuple);
+
+	/*
+	 * Propagate to children as appropriate.  Unlike most other ALTER
+	 * routines, we have to do this one level of recursion at a time; we can't
+	 * use find_all_inheritors to do it in one pass.
+	 */
+	children =
+		find_inheritance_children(RelationGetRelid(rel), false, lockmode);
+
+	if (children)
+	{
+		Relation	attr_rel;
+		ListCell   *child;
+
+		/*
+		 * In case of a partitioned table, the column must be dropped from the
+		 * partitions as well.
+		 */
+		if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE && !recurse)
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+					 errmsg("cannot drop column from only the partitioned table when partitions exist"),
+					 errhint("Do not specify the ONLY keyword.")));
+
+		attr_rel = table_open(AttributeRelationId, RowExclusiveLock);
+		foreach(child, children)
+		{
+			Oid			childrelid = lfirst_oid(child);
+			Relation	childrel;
+			Form_pg_attribute childatt;
+
+			/* find_inheritance_children already got lock */
+			childrel = table_open(childrelid, NoLock);
+			CheckTableNotInUse(childrel, "ALTER TABLE");
+
+			tuple = SearchSysCacheCopyAttName(childrelid, colName);
+			if (!HeapTupleIsValid(tuple))	/* shouldn't happen */
+				elog(ERROR, "cache lookup failed for attribute \"%s\" of relation %u",
+					 colName, childrelid);
+			childatt = (Form_pg_attribute) GETSTRUCT(tuple);
+
+			if (childatt->attinhcount <= 0) /* shouldn't happen */
+				elog(ERROR, "relation %u has non-inherited attribute \"%s\"",
+					 childrelid, colName);
+
+			if (recurse)
+			{
+				/*
+				 * If the child column has other definition sources, just
+				 * decrement its inheritance count; if not, recurse to delete
+				 * it.
+				 */
+				if (childatt->attinhcount == 1 && !childatt->attislocal)
+				{
+					/* Time to delete this child column, too */
+					ATExecDropColumn(wqueue, childrel, colName,
+									 behavior, true, true,
+									 false, lockmode, addrs);
+				}
+				else
+				{
+					/* Child column must survive my deletion */
+					childatt->attinhcount--;
+
+					CatalogTupleUpdate(attr_rel, &tuple->t_self, tuple);
+
+					/* Make update visible */
+					CommandCounterIncrement();
+				}
+			}
+			else
+			{
+				/*
+				 * If we were told to drop ONLY in this table (no recursion),
+				 * we need to mark the inheritors' attributes as locally
+				 * defined rather than inherited.
+				 */
+				childatt->attinhcount--;
+				childatt->attislocal = true;
+
+				CatalogTupleUpdate(attr_rel, &tuple->t_self, tuple);
+
+				/* Make update visible */
+				CommandCounterIncrement();
+			}
+
+			heap_freetuple(tuple);
+
+			table_close(childrel, NoLock);
+		}
+		table_close(attr_rel, RowExclusiveLock);
+	}
+
+	/* Add object to delete */
+	object.classId = RelationRelationId;
+	object.objectId = RelationGetRelid(rel);
+	object.objectSubId = attnum;
+	add_exact_object_address(&object, addrs);
+
+	if (!recursing)
+	{
+		/* Recursion has ended, drop everything that was collected */
+		performMultipleDeletions(addrs, behavior, 0);
+		free_object_addresses(addrs);
+	}
+
+	return object;
+}
+
+/*
+ * ALTER TABLE ADD INDEX
+ *
+ * There is no such command in the grammar, but parse_utilcmd.c converts
+ * UNIQUE and PRIMARY KEY constraints into AT_AddIndex subcommands.  This lets
+ * us schedule creation of the index at the appropriate time during ALTER.
+ *
+ * Return value is the address of the new index.
+ */
+static ObjectAddress
+ATExecAddIndex(AlteredTableInfo *tab, Relation rel,
+			   IndexStmt *stmt, bool is_rebuild, LOCKMODE lockmode)
+{
+	bool		check_rights;
+	bool		skip_build;
+	bool		quiet;
+	ObjectAddress address;
+
+	Assert(IsA(stmt, IndexStmt));
+	Assert(!stmt->concurrent);
+
+	/* The IndexStmt has already been through transformIndexStmt */
+	Assert(stmt->transformed);
+
+	/* suppress schema rights check when rebuilding existing index */
+	check_rights = !is_rebuild;
+	/* skip index build if phase 3 will do it or we're reusing an old one */
+	skip_build = tab->rewrite > 0 || OidIsValid(stmt->oldNode);
+	/* suppress notices when rebuilding existing index */
+	quiet = is_rebuild;
+
+	address = DefineIndex(RelationGetRelid(rel),
+						  stmt,
+						  InvalidOid,	/* no predefined OID */
+						  InvalidOid,	/* no parent index */
+						  InvalidOid,	/* no parent constraint */
+						  true, /* is_alter_table */
+						  check_rights,
+						  false,	/* check_not_in_use - we did it already */
+						  skip_build,
+						  quiet);
+
+	/*
+	 * If TryReuseIndex() stashed a relfilenode for us, we used it for the new
+	 * index instead of building from scratch.  Restore associated fields.
+	 * This may store InvalidSubTransactionId in both fields, in which case
+	 * relcache.c will assume it can rebuild the relcache entry.  Hence, do
+	 * this after the CCI that made catalog rows visible to any rebuild.  The
+	 * DROP of the old edition of this index will have scheduled the storage
+	 * for deletion at commit, so cancel that pending deletion.
+	 */
+	if (OidIsValid(stmt->oldNode))
+	{
+		Relation	irel = index_open(address.objectId, NoLock);
+
+		irel->rd_createSubid = stmt->oldCreateSubid;
+		irel->rd_firstRelfilenodeSubid = stmt->oldFirstRelfilenodeSubid;
+		RelationPreserveStorage(irel->rd_node, true);
+		index_close(irel, NoLock);
+	}
+
+	return address;
+}
+
+/*
+ * ALTER TABLE ADD STATISTICS
+ *
+ * This is no such command in the grammar, but we use this internally to add
+ * AT_ReAddStatistics subcommands to rebuild extended statistics after a table
+ * column type change.
+ */
+static ObjectAddress
+ATExecAddStatistics(AlteredTableInfo *tab, Relation rel,
+					CreateStatsStmt *stmt, bool is_rebuild, LOCKMODE lockmode)
+{
+	ObjectAddress address;
+
+	Assert(IsA(stmt, CreateStatsStmt));
+
+	/* The CreateStatsStmt has already been through transformStatsStmt */
+	Assert(stmt->transformed);
+
+	address = CreateStatistics(stmt);
+
+	return address;
+}
+
+/*
+ * ALTER TABLE ADD CONSTRAINT USING INDEX
+ *
+ * Returns the address of the new constraint.
+ */
+static ObjectAddress
+ATExecAddIndexConstraint(AlteredTableInfo *tab, Relation rel,
+						 IndexStmt *stmt, LOCKMODE lockmode)
+{
+	Oid			index_oid = stmt->indexOid;
+	Relation	indexRel;
+	char	   *indexName;
+	IndexInfo  *indexInfo;
+	char	   *constraintName;
+	char		constraintType;
+	ObjectAddress address;
+	bits16		flags;
+
+	Assert(IsA(stmt, IndexStmt));
+	Assert(OidIsValid(index_oid));
+	Assert(stmt->isconstraint);
+
+	/*
+	 * Doing this on partitioned tables is not a simple feature to implement,
+	 * so let's punt for now.
+	 */
+	if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("ALTER TABLE / ADD CONSTRAINT USING INDEX is not supported on partitioned tables")));
+
+	indexRel = index_open(index_oid, AccessShareLock);
+
+	indexName = pstrdup(RelationGetRelationName(indexRel));
+
+	indexInfo = BuildIndexInfo(indexRel);
+
+	/* this should have been checked at parse time */
+	if (!indexInfo->ii_Unique)
+		elog(ERROR, "index \"%s\" is not unique", indexName);
+
+	/*
+	 * Determine name to assign to constraint.  We require a constraint to
+	 * have the same name as the underlying index; therefore, use the index's
+	 * existing name as the default constraint name, and if the user
+	 * explicitly gives some other name for the constraint, rename the index
+	 * to match.
+	 */
+	constraintName = stmt->idxname;
+	if (constraintName == NULL)
+		constraintName = indexName;
+	else if (strcmp(constraintName, indexName) != 0)
+	{
+		ereport(NOTICE,
+				(errmsg("ALTER TABLE / ADD CONSTRAINT USING INDEX will rename index \"%s\" to \"%s\"",
+						indexName, constraintName)));
+		RenameRelationInternal(index_oid, constraintName, false, true);
+	}
+
+	/* Extra checks needed if making primary key */
+	if (stmt->primary)
+		index_check_primary_key(rel, indexInfo, true, stmt);
+
+	/* Note we currently don't support EXCLUSION constraints here */
+	if (stmt->primary)
+		constraintType = CONSTRAINT_PRIMARY;
+	else
+		constraintType = CONSTRAINT_UNIQUE;
+
+	/* Create the catalog entries for the constraint */
+	flags = INDEX_CONSTR_CREATE_UPDATE_INDEX |
+		INDEX_CONSTR_CREATE_REMOVE_OLD_DEPS |
+		(stmt->initdeferred ? INDEX_CONSTR_CREATE_INIT_DEFERRED : 0) |
+		(stmt->deferrable ? INDEX_CONSTR_CREATE_DEFERRABLE : 0) |
+		(stmt->primary ? INDEX_CONSTR_CREATE_MARK_AS_PRIMARY : 0);
+
+	address = index_constraint_create(rel,
+									  index_oid,
+									  InvalidOid,
+									  indexInfo,
+									  constraintName,
+									  constraintType,
+									  flags,
+									  allowSystemTableMods,
+									  false);	/* is_internal */
+
+	index_close(indexRel, NoLock);
+
+	return address;
+}
+
+/*
+ * ALTER TABLE ADD CONSTRAINT
+ *
+ * Return value is the address of the new constraint; if no constraint was
+ * added, InvalidObjectAddress is returned.
+ */
+static ObjectAddress
+ATExecAddConstraint(List **wqueue, AlteredTableInfo *tab, Relation rel,
+					Constraint *newConstraint, bool recurse, bool is_readd,
+					LOCKMODE lockmode)
+{
+	ObjectAddress address = InvalidObjectAddress;
+
+	Assert(IsA(newConstraint, Constraint));
+
+	/*
+	 * Currently, we only expect to see CONSTR_CHECK and CONSTR_FOREIGN nodes
+	 * arriving here (see the preprocessing done in parse_utilcmd.c).  Use a
+	 * switch anyway to make it easier to add more code later.
+	 */
+	switch (newConstraint->contype)
+	{
+		case CONSTR_CHECK:
+			address =
+				ATAddCheckConstraint(wqueue, tab, rel,
+									 newConstraint, recurse, false, is_readd,
+									 lockmode);
+			break;
+
+		case CONSTR_FOREIGN:
+
+			/*
+			 * Assign or validate constraint name
+			 */
+			if (newConstraint->conname)
+			{
+				if (ConstraintNameIsUsed(CONSTRAINT_RELATION,
+										 RelationGetRelid(rel),
+										 newConstraint->conname))
+					ereport(ERROR,
+							(errcode(ERRCODE_DUPLICATE_OBJECT),
+							 errmsg("constraint \"%s\" for relation \"%s\" already exists",
+									newConstraint->conname,
+									RelationGetRelationName(rel))));
+			}
+			else
+				newConstraint->conname =
+					ChooseConstraintName(RelationGetRelationName(rel),
+										 ChooseForeignKeyConstraintNameAddition(newConstraint->fk_attrs),
+										 "fkey",
+										 RelationGetNamespace(rel),
+										 NIL);
+
+			address = ATAddForeignKeyConstraint(wqueue, tab, rel,
+												newConstraint, InvalidOid,
+												recurse, false,
+												lockmode);
+			break;
+
+		default:
+			elog(ERROR, "unrecognized constraint type: %d",
+				 (int) newConstraint->contype);
+	}
+
+	return address;
+}
+
+/*
+ * Generate the column-name portion of the constraint name for a new foreign
+ * key given the list of column names that reference the referenced
+ * table.  This will be passed to ChooseConstraintName along with the parent
+ * table name and the "fkey" suffix.
+ *
+ * We know that less than NAMEDATALEN characters will actually be used, so we
+ * can truncate the result once we've generated that many.
+ *
+ * XXX see also ChooseExtendedStatisticNameAddition and
+ * ChooseIndexNameAddition.
+ */
+static char *
+ChooseForeignKeyConstraintNameAddition(List *colnames)
+{
+	char		buf[NAMEDATALEN * 2];
+	int			buflen = 0;
+	ListCell   *lc;
+
+	buf[0] = '\0';
+	foreach(lc, colnames)
+	{
+		const char *name = strVal(lfirst(lc));
+
+		if (buflen > 0)
+			buf[buflen++] = '_';	/* insert _ between names */
+
+		/*
+		 * At this point we have buflen <= NAMEDATALEN.  name should be less
+		 * than NAMEDATALEN already, but use strlcpy for paranoia.
+		 */
+		strlcpy(buf + buflen, name, NAMEDATALEN);
+		buflen += strlen(buf + buflen);
+		if (buflen >= NAMEDATALEN)
+			break;
+	}
+	return pstrdup(buf);
+}
+
+/*
+ * Add a check constraint to a single table and its children.  Returns the
+ * address of the constraint added to the parent relation, if one gets added,
+ * or InvalidObjectAddress otherwise.
+ *
+ * Subroutine for ATExecAddConstraint.
+ *
+ * We must recurse to child tables during execution, rather than using
+ * ALTER TABLE's normal prep-time recursion.  The reason is that all the
+ * constraints *must* be given the same name, else they won't be seen as
+ * related later.  If the user didn't explicitly specify a name, then
+ * AddRelationNewConstraints would normally assign different names to the
+ * child constraints.  To fix that, we must capture the name assigned at
+ * the parent table and pass that down.
+ */
+static ObjectAddress
+ATAddCheckConstraint(List **wqueue, AlteredTableInfo *tab, Relation rel,
+					 Constraint *constr, bool recurse, bool recursing,
+					 bool is_readd, LOCKMODE lockmode)
+{
+	List	   *newcons;
+	ListCell   *lcon;
+	List	   *children;
+	ListCell   *child;
+	ObjectAddress address = InvalidObjectAddress;
+
+	/* At top level, permission check was done in ATPrepCmd, else do it */
+	if (recursing)
+		ATSimplePermissions(rel, ATT_TABLE | ATT_FOREIGN_TABLE);
+
+	/*
+	 * Call AddRelationNewConstraints to do the work, making sure it works on
+	 * a copy of the Constraint so transformExpr can't modify the original. It
+	 * returns a list of cooked constraints.
+	 *
+	 * If the constraint ends up getting merged with a pre-existing one, it's
+	 * omitted from the returned list, which is what we want: we do not need
+	 * to do any validation work.  That can only happen at child tables,
+	 * though, since we disallow merging at the top level.
+	 */
+	newcons = AddRelationNewConstraints(rel, NIL,
+										list_make1(copyObject(constr)),
+										recursing | is_readd,	/* allow_merge */
+										!recursing, /* is_local */
+										is_readd,	/* is_internal */
+										NULL);	/* queryString not available
+												 * here */
+
+	/* we don't expect more than one constraint here */
+	Assert(list_length(newcons) <= 1);
+
+	/* Add each to-be-validated constraint to Phase 3's queue */
+	foreach(lcon, newcons)
+	{
+		CookedConstraint *ccon = (CookedConstraint *) lfirst(lcon);
+
+		if (!ccon->skip_validation)
+		{
+			NewConstraint *newcon;
+
+			newcon = (NewConstraint *) palloc0(sizeof(NewConstraint));
+			newcon->name = ccon->name;
+			newcon->contype = ccon->contype;
+			newcon->qual = ccon->expr;
+
+			tab->constraints = lappend(tab->constraints, newcon);
+		}
+
+		/* Save the actually assigned name if it was defaulted */
+		if (constr->conname == NULL)
+			constr->conname = ccon->name;
+
+		ObjectAddressSet(address, ConstraintRelationId, ccon->conoid);
+	}
+
+	/* At this point we must have a locked-down name to use */
+	Assert(constr->conname != NULL);
+
+	/* Advance command counter in case same table is visited multiple times */
+	CommandCounterIncrement();
+
+	/*
+	 * If the constraint got merged with an existing constraint, we're done.
+	 * We mustn't recurse to child tables in this case, because they've
+	 * already got the constraint, and visiting them again would lead to an
+	 * incorrect value for coninhcount.
+	 */
+	if (newcons == NIL)
+		return address;
+
+	/*
+	 * If adding a NO INHERIT constraint, no need to find our children.
+	 */
+	if (constr->is_no_inherit)
+		return address;
+
+	/*
+	 * Propagate to children as appropriate.  Unlike most other ALTER
+	 * routines, we have to do this one level of recursion at a time; we can't
+	 * use find_all_inheritors to do it in one pass.
+	 */
+	children =
+		find_inheritance_children(RelationGetRelid(rel), false, lockmode);
+
+	/*
+	 * Check if ONLY was specified with ALTER TABLE.  If so, allow the
+	 * constraint creation only if there are no children currently.  Error out
+	 * otherwise.
+	 */
+	if (!recurse && children != NIL)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+				 errmsg("constraint must be added to child tables too")));
+
+	foreach(child, children)
+	{
+		Oid			childrelid = lfirst_oid(child);
+		Relation	childrel;
+		AlteredTableInfo *childtab;
+
+		/* find_inheritance_children already got lock */
+		childrel = table_open(childrelid, NoLock);
+		CheckTableNotInUse(childrel, "ALTER TABLE");
+
+		/* Find or create work queue entry for this table */
+		childtab = ATGetQueueEntry(wqueue, childrel);
+
+		/* Recurse to child */
+		ATAddCheckConstraint(wqueue, childtab, childrel,
+							 constr, recurse, true, is_readd, lockmode);
+
+		table_close(childrel, NoLock);
+	}
+
+	return address;
+}
+
+/*
+ * Add a foreign-key constraint to a single table; return the new constraint's
+ * address.
+ *
+ * Subroutine for ATExecAddConstraint.  Must already hold exclusive
+ * lock on the rel, and have done appropriate validity checks for it.
+ * We do permissions checks here, however.
+ *
+ * When the referenced or referencing tables (or both) are partitioned,
+ * multiple pg_constraint rows are required -- one for each partitioned table
+ * and each partition on each side (fortunately, not one for every combination
+ * thereof).  We also need action triggers on each leaf partition on the
+ * referenced side, and check triggers on each leaf partition on the
+ * referencing side.
+ */
+static ObjectAddress
+ATAddForeignKeyConstraint(List **wqueue, AlteredTableInfo *tab, Relation rel,
+						  Constraint *fkconstraint, Oid parentConstr,
+						  bool recurse, bool recursing, LOCKMODE lockmode)
+{
+	Relation	pkrel;
+	int16		pkattnum[INDEX_MAX_KEYS];
+	int16		fkattnum[INDEX_MAX_KEYS];
+	Oid			pktypoid[INDEX_MAX_KEYS];
+	Oid			fktypoid[INDEX_MAX_KEYS];
+	Oid			opclasses[INDEX_MAX_KEYS];
+	Oid			pfeqoperators[INDEX_MAX_KEYS];
+	Oid			ppeqoperators[INDEX_MAX_KEYS];
+	Oid			ffeqoperators[INDEX_MAX_KEYS];
+	int			i;
+	int			numfks,
+				numpks;
+	Oid			indexOid;
+	bool		old_check_ok;
+	ObjectAddress address;
+	ListCell   *old_pfeqop_item = list_head(fkconstraint->old_conpfeqop);
+
+	/*
+	 * Grab ShareRowExclusiveLock on the pk table, so that someone doesn't
+	 * delete rows out from under us.
+	 */
+	if (OidIsValid(fkconstraint->old_pktable_oid))
+		pkrel = table_open(fkconstraint->old_pktable_oid, ShareRowExclusiveLock);
+	else
+		pkrel = table_openrv(fkconstraint->pktable, ShareRowExclusiveLock);
+
+	/*
+	 * Validity checks (permission checks wait till we have the column
+	 * numbers)
+	 */
+	if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+	{
+		if (!recurse)
+			ereport(ERROR,
+					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+					 errmsg("cannot use ONLY for foreign key on partitioned table \"%s\" referencing relation \"%s\"",
+							RelationGetRelationName(rel),
+							RelationGetRelationName(pkrel))));
+		if (fkconstraint->skip_validation && !fkconstraint->initially_valid)
+			ereport(ERROR,
+					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+					 errmsg("cannot add NOT VALID foreign key on partitioned table \"%s\" referencing relation \"%s\"",
+							RelationGetRelationName(rel),
+							RelationGetRelationName(pkrel)),
+					 errdetail("This feature is not yet supported on partitioned tables.")));
+	}
+
+	if (pkrel->rd_rel->relkind != RELKIND_RELATION &&
+		pkrel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("referenced relation \"%s\" is not a table",
+						RelationGetRelationName(pkrel))));
+
+	if (!allowSystemTableMods && IsSystemRelation(pkrel))
+		ereport(ERROR,
+				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+				 errmsg("permission denied: \"%s\" is a system catalog",
+						RelationGetRelationName(pkrel))));
+
+	/*
+	 * References from permanent or unlogged tables to temp tables, and from
+	 * permanent tables to unlogged tables, are disallowed because the
+	 * referenced data can vanish out from under us.  References from temp
+	 * tables to any other table type are also disallowed, because other
+	 * backends might need to run the RI triggers on the perm table, but they
+	 * can't reliably see tuples in the local buffers of other backends.
+	 */
+	switch (rel->rd_rel->relpersistence)
+	{
+		case RELPERSISTENCE_PERMANENT:
+			if (!RelationIsPermanent(pkrel))
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+						 errmsg("constraints on permanent tables may reference only permanent tables")));
+			break;
+		case RELPERSISTENCE_UNLOGGED:
+			if (!RelationIsPermanent(pkrel)
+				&& pkrel->rd_rel->relpersistence != RELPERSISTENCE_UNLOGGED)
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+						 errmsg("constraints on unlogged tables may reference only permanent or unlogged tables")));
+			break;
+		case RELPERSISTENCE_TEMP:
+			if (pkrel->rd_rel->relpersistence != RELPERSISTENCE_TEMP)
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+						 errmsg("constraints on temporary tables may reference only temporary tables")));
+			if (!pkrel->rd_islocaltemp || !rel->rd_islocaltemp)
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+						 errmsg("constraints on temporary tables must involve temporary tables of this session")));
+			break;
+	}
+
+	/*
+	 * Look up the referencing attributes to make sure they exist, and record
+	 * their attnums and type OIDs.
+	 */
+	MemSet(pkattnum, 0, sizeof(pkattnum));
+	MemSet(fkattnum, 0, sizeof(fkattnum));
+	MemSet(pktypoid, 0, sizeof(pktypoid));
+	MemSet(fktypoid, 0, sizeof(fktypoid));
+	MemSet(opclasses, 0, sizeof(opclasses));
+	MemSet(pfeqoperators, 0, sizeof(pfeqoperators));
+	MemSet(ppeqoperators, 0, sizeof(ppeqoperators));
+	MemSet(ffeqoperators, 0, sizeof(ffeqoperators));
+
+	numfks = transformColumnNameList(RelationGetRelid(rel),
+									 fkconstraint->fk_attrs,
+									 fkattnum, fktypoid);
+
+	/*
+	 * If the attribute list for the referenced table was omitted, lookup the
+	 * definition of the primary key and use it.  Otherwise, validate the
+	 * supplied attribute list.  In either case, discover the index OID and
+	 * index opclasses, and the attnums and type OIDs of the attributes.
+	 */
+	if (fkconstraint->pk_attrs == NIL)
+	{
+		numpks = transformFkeyGetPrimaryKey(pkrel, &indexOid,
+											&fkconstraint->pk_attrs,
+											pkattnum, pktypoid,
+											opclasses);
+	}
+	else
+	{
+		numpks = transformColumnNameList(RelationGetRelid(pkrel),
+										 fkconstraint->pk_attrs,
+										 pkattnum, pktypoid);
+		/* Look for an index matching the column list */
+		indexOid = transformFkeyCheckAttrs(pkrel, numpks, pkattnum,
+										   opclasses);
+	}
+
+	/*
+	 * Now we can check permissions.
+	 */
+	checkFkeyPermissions(pkrel, pkattnum, numpks);
+
+	/*
+	 * Check some things for generated columns.
+	 */
+	for (i = 0; i < numfks; i++)
+	{
+		char		attgenerated = TupleDescAttr(RelationGetDescr(rel), fkattnum[i] - 1)->attgenerated;
+
+		if (attgenerated)
+		{
+			/*
+			 * Check restrictions on UPDATE/DELETE actions, per SQL standard
+			 */
+			if (fkconstraint->fk_upd_action == FKCONSTR_ACTION_SETNULL ||
+				fkconstraint->fk_upd_action == FKCONSTR_ACTION_SETDEFAULT ||
+				fkconstraint->fk_upd_action == FKCONSTR_ACTION_CASCADE)
+				ereport(ERROR,
+						(errcode(ERRCODE_SYNTAX_ERROR),
+						 errmsg("invalid %s action for foreign key constraint containing generated column",
+								"ON UPDATE")));
+			if (fkconstraint->fk_del_action == FKCONSTR_ACTION_SETNULL ||
+				fkconstraint->fk_del_action == FKCONSTR_ACTION_SETDEFAULT)
+				ereport(ERROR,
+						(errcode(ERRCODE_SYNTAX_ERROR),
+						 errmsg("invalid %s action for foreign key constraint containing generated column",
+								"ON DELETE")));
+		}
+	}
+
+	/*
+	 * Look up the equality operators to use in the constraint.
+	 *
+	 * Note that we have to be careful about the difference between the actual
+	 * PK column type and the opclass' declared input type, which might be
+	 * only binary-compatible with it.  The declared opcintype is the right
+	 * thing to probe pg_amop with.
+	 */
+	if (numfks != numpks)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_FOREIGN_KEY),
+				 errmsg("number of referencing and referenced columns for foreign key disagree")));
+
+	/*
+	 * On the strength of a previous constraint, we might avoid scanning
+	 * tables to validate this one.  See below.
+	 */
+	old_check_ok = (fkconstraint->old_conpfeqop != NIL);
+	Assert(!old_check_ok || numfks == list_length(fkconstraint->old_conpfeqop));
+
+	for (i = 0; i < numpks; i++)
+	{
+		Oid			pktype = pktypoid[i];
+		Oid			fktype = fktypoid[i];
+		Oid			fktyped;
+		HeapTuple	cla_ht;
+		Form_pg_opclass cla_tup;
+		Oid			amid;
+		Oid			opfamily;
+		Oid			opcintype;
+		Oid			pfeqop;
+		Oid			ppeqop;
+		Oid			ffeqop;
+		int16		eqstrategy;
+		Oid			pfeqop_right;
+
+		/* We need several fields out of the pg_opclass entry */
+		cla_ht = SearchSysCache1(CLAOID, ObjectIdGetDatum(opclasses[i]));
+		if (!HeapTupleIsValid(cla_ht))
+			elog(ERROR, "cache lookup failed for opclass %u", opclasses[i]);
+		cla_tup = (Form_pg_opclass) GETSTRUCT(cla_ht);
+		amid = cla_tup->opcmethod;
+		opfamily = cla_tup->opcfamily;
+		opcintype = cla_tup->opcintype;
+		ReleaseSysCache(cla_ht);
+
+		/*
+		 * Check it's a btree; currently this can never fail since no other
+		 * index AMs support unique indexes.  If we ever did have other types
+		 * of unique indexes, we'd need a way to determine which operator
+		 * strategy number is equality.  (Is it reasonable to insist that
+		 * every such index AM use btree's number for equality?)
+		 */
+		if (amid != BTREE_AM_OID)
+			elog(ERROR, "only b-tree indexes are supported for foreign keys");
+		eqstrategy = BTEqualStrategyNumber;
+
+		/*
+		 * There had better be a primary equality operator for the index.
+		 * We'll use it for PK = PK comparisons.
+		 */
+		ppeqop = get_opfamily_member(opfamily, opcintype, opcintype,
+									 eqstrategy);
+
+		if (!OidIsValid(ppeqop))
+			elog(ERROR, "missing operator %d(%u,%u) in opfamily %u",
+				 eqstrategy, opcintype, opcintype, opfamily);
+
+		/*
+		 * Are there equality operators that take exactly the FK type? Assume
+		 * we should look through any domain here.
+		 */
+		fktyped = getBaseType(fktype);
+
+		pfeqop = get_opfamily_member(opfamily, opcintype, fktyped,
+									 eqstrategy);
+		if (OidIsValid(pfeqop))
+		{
+			pfeqop_right = fktyped;
+			ffeqop = get_opfamily_member(opfamily, fktyped, fktyped,
+										 eqstrategy);
+		}
+		else
+		{
+			/* keep compiler quiet */
+			pfeqop_right = InvalidOid;
+			ffeqop = InvalidOid;
+		}
+
+		if (!(OidIsValid(pfeqop) && OidIsValid(ffeqop)))
+		{
+			/*
+			 * Otherwise, look for an implicit cast from the FK type to the
+			 * opcintype, and if found, use the primary equality operator.
+			 * This is a bit tricky because opcintype might be a polymorphic
+			 * type such as ANYARRAY or ANYENUM; so what we have to test is
+			 * whether the two actual column types can be concurrently cast to
+			 * that type.  (Otherwise, we'd fail to reject combinations such
+			 * as int[] and point[].)
+			 */
+			Oid			input_typeids[2];
+			Oid			target_typeids[2];
+
+			input_typeids[0] = pktype;
+			input_typeids[1] = fktype;
+			target_typeids[0] = opcintype;
+			target_typeids[1] = opcintype;
+			if (can_coerce_type(2, input_typeids, target_typeids,
+								COERCION_IMPLICIT))
+			{
+				pfeqop = ffeqop = ppeqop;
+				pfeqop_right = opcintype;
+			}
+		}
+
+		if (!(OidIsValid(pfeqop) && OidIsValid(ffeqop)))
+			ereport(ERROR,
+					(errcode(ERRCODE_DATATYPE_MISMATCH),
+					 errmsg("foreign key constraint \"%s\" cannot be implemented",
+							fkconstraint->conname),
+					 errdetail("Key columns \"%s\" and \"%s\" "
+							   "are of incompatible types: %s and %s.",
+							   strVal(list_nth(fkconstraint->fk_attrs, i)),
+							   strVal(list_nth(fkconstraint->pk_attrs, i)),
+							   format_type_be(fktype),
+							   format_type_be(pktype))));
+
+		if (old_check_ok)
+		{
+			/*
+			 * When a pfeqop changes, revalidate the constraint.  We could
+			 * permit intra-opfamily changes, but that adds subtle complexity
+			 * without any concrete benefit for core types.  We need not
+			 * assess ppeqop or ffeqop, which RI_Initial_Check() does not use.
+			 */
+			old_check_ok = (pfeqop == lfirst_oid(old_pfeqop_item));
+			old_pfeqop_item = lnext(fkconstraint->old_conpfeqop,
+									old_pfeqop_item);
+		}
+		if (old_check_ok)
+		{
+			Oid			old_fktype;
+			Oid			new_fktype;
+			CoercionPathType old_pathtype;
+			CoercionPathType new_pathtype;
+			Oid			old_castfunc;
+			Oid			new_castfunc;
+			Form_pg_attribute attr = TupleDescAttr(tab->oldDesc,
+												   fkattnum[i] - 1);
+
+			/*
+			 * Identify coercion pathways from each of the old and new FK-side
+			 * column types to the right (foreign) operand type of the pfeqop.
+			 * We may assume that pg_constraint.conkey is not changing.
+			 */
+			old_fktype = attr->atttypid;
+			new_fktype = fktype;
+			old_pathtype = findFkeyCast(pfeqop_right, old_fktype,
+										&old_castfunc);
+			new_pathtype = findFkeyCast(pfeqop_right, new_fktype,
+										&new_castfunc);
+
+			/*
+			 * Upon a change to the cast from the FK column to its pfeqop
+			 * operand, revalidate the constraint.  For this evaluation, a
+			 * binary coercion cast is equivalent to no cast at all.  While
+			 * type implementors should design implicit casts with an eye
+			 * toward consistency of operations like equality, we cannot
+			 * assume here that they have done so.
+			 *
+			 * A function with a polymorphic argument could change behavior
+			 * arbitrarily in response to get_fn_expr_argtype().  Therefore,
+			 * when the cast destination is polymorphic, we only avoid
+			 * revalidation if the input type has not changed at all.  Given
+			 * just the core data types and operator classes, this requirement
+			 * prevents no would-be optimizations.
+			 *
+			 * If the cast converts from a base type to a domain thereon, then
+			 * that domain type must be the opcintype of the unique index.
+			 * Necessarily, the primary key column must then be of the domain
+			 * type.  Since the constraint was previously valid, all values on
+			 * the foreign side necessarily exist on the primary side and in
+			 * turn conform to the domain.  Consequently, we need not treat
+			 * domains specially here.
+			 *
+			 * Since we require that all collations share the same notion of
+			 * equality (which they do, because texteq reduces to bitwise
+			 * equality), we don't compare collation here.
+			 *
+			 * We need not directly consider the PK type.  It's necessarily
+			 * binary coercible to the opcintype of the unique index column,
+			 * and ri_triggers.c will only deal with PK datums in terms of
+			 * that opcintype.  Changing the opcintype also changes pfeqop.
+			 */
+			old_check_ok = (new_pathtype == old_pathtype &&
+							new_castfunc == old_castfunc &&
+							(!IsPolymorphicType(pfeqop_right) ||
+							 new_fktype == old_fktype));
+		}
+
+		pfeqoperators[i] = pfeqop;
+		ppeqoperators[i] = ppeqop;
+		ffeqoperators[i] = ffeqop;
+	}
+
+	/*
+	 * Create all the constraint and trigger objects, recursing to partitions
+	 * as necessary.  First handle the referenced side.
+	 */
+	address = addFkRecurseReferenced(wqueue, fkconstraint, rel, pkrel,
+									 indexOid,
+									 InvalidOid,	/* no parent constraint */
+									 numfks,
+									 pkattnum,
+									 fkattnum,
+									 pfeqoperators,
+									 ppeqoperators,
+									 ffeqoperators,
+									 old_check_ok);
+
+	/* Now handle the referencing side. */
+	addFkRecurseReferencing(wqueue, fkconstraint, rel, pkrel,
+							indexOid,
+							address.objectId,
+							numfks,
+							pkattnum,
+							fkattnum,
+							pfeqoperators,
+							ppeqoperators,
+							ffeqoperators,
+							old_check_ok,
+							lockmode);
+
+	/*
+	 * Done.  Close pk table, but keep lock until we've committed.
+	 */
+	table_close(pkrel, NoLock);
+
+	return address;
+}
+
+/*
+ * addFkRecurseReferenced
+ *		subroutine for ATAddForeignKeyConstraint; recurses on the referenced
+ *		side of the constraint
+ *
+ * Create pg_constraint rows for the referenced side of the constraint,
+ * referencing the parent of the referencing side; also create action triggers
+ * on leaf partitions.  If the table is partitioned, recurse to handle each
+ * partition.
+ *
+ * wqueue is the ALTER TABLE work queue; can be NULL when not running as part
+ * of an ALTER TABLE sequence.
+ * fkconstraint is the constraint being added.
+ * rel is the root referencing relation.
+ * pkrel is the referenced relation; might be a partition, if recursing.
+ * indexOid is the OID of the index (on pkrel) implementing this constraint.
+ * parentConstr is the OID of a parent constraint; InvalidOid if this is a
+ * top-level constraint.
+ * numfks is the number of columns in the foreign key
+ * pkattnum is the attnum array of referenced attributes.
+ * fkattnum is the attnum array of referencing attributes.
+ * pf/pp/ffeqoperators are OID array of operators between columns.
+ * old_check_ok signals that this constraint replaces an existing one that
+ * was already validated (thus this one doesn't need validation).
+ */
+static ObjectAddress
+addFkRecurseReferenced(List **wqueue, Constraint *fkconstraint, Relation rel,
+					   Relation pkrel, Oid indexOid, Oid parentConstr,
+					   int numfks,
+					   int16 *pkattnum, int16 *fkattnum, Oid *pfeqoperators,
+					   Oid *ppeqoperators, Oid *ffeqoperators, bool old_check_ok)
+{
+	ObjectAddress address;
+	Oid			constrOid;
+	char	   *conname;
+	bool		conislocal;
+	int			coninhcount;
+	bool		connoinherit;
+
+	/*
+	 * Verify relkind for each referenced partition.  At the top level, this
+	 * is redundant with a previous check, but we need it when recursing.
+	 */
+	if (pkrel->rd_rel->relkind != RELKIND_RELATION &&
+		pkrel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("referenced relation \"%s\" is not a table",
+						RelationGetRelationName(pkrel))));
+
+	/*
+	 * Caller supplies us with a constraint name; however, it may be used in
+	 * this partition, so come up with a different one in that case.
+	 */
+	if (ConstraintNameIsUsed(CONSTRAINT_RELATION,
+							 RelationGetRelid(rel),
+							 fkconstraint->conname))
+		conname = ChooseConstraintName(RelationGetRelationName(rel),
+									   ChooseForeignKeyConstraintNameAddition(fkconstraint->fk_attrs),
+									   "fkey",
+									   RelationGetNamespace(rel), NIL);
+	else
+		conname = fkconstraint->conname;
+
+	if (OidIsValid(parentConstr))
+	{
+		conislocal = false;
+		coninhcount = 1;
+		connoinherit = false;
+	}
+	else
+	{
+		conislocal = true;
+		coninhcount = 0;
+
+		/*
+		 * always inherit for partitioned tables, never for legacy inheritance
+		 */
+		connoinherit = rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE;
+	}
+
+	/*
+	 * Record the FK constraint in pg_constraint.
+	 */
+	constrOid = CreateConstraintEntry(conname,
+									  RelationGetNamespace(rel),
+									  CONSTRAINT_FOREIGN,
+									  fkconstraint->deferrable,
+									  fkconstraint->initdeferred,
+									  fkconstraint->initially_valid,
+									  parentConstr,
+									  RelationGetRelid(rel),
+									  fkattnum,
+									  numfks,
+									  numfks,
+									  InvalidOid,	/* not a domain constraint */
+									  indexOid,
+									  RelationGetRelid(pkrel),
+									  pkattnum,
+									  pfeqoperators,
+									  ppeqoperators,
+									  ffeqoperators,
+									  numfks,
+									  fkconstraint->fk_upd_action,
+									  fkconstraint->fk_del_action,
+									  fkconstraint->fk_matchtype,
+									  NULL, /* no exclusion constraint */
+									  NULL, /* no check constraint */
+									  NULL,
+									  conislocal,	/* islocal */
+									  coninhcount,	/* inhcount */
+									  connoinherit, /* conNoInherit */
+									  false);	/* is_internal */
+
+	ObjectAddressSet(address, ConstraintRelationId, constrOid);
+
+	/*
+	 * Mark the child constraint as part of the parent constraint; it must not
+	 * be dropped on its own.  (This constraint is deleted when the partition
+	 * is detached, but a special check needs to occur that the partition
+	 * contains no referenced values.)
+	 */
+	if (OidIsValid(parentConstr))
+	{
+		ObjectAddress referenced;
+
+		ObjectAddressSet(referenced, ConstraintRelationId, parentConstr);
+		recordDependencyOn(&address, &referenced, DEPENDENCY_INTERNAL);
+	}
+
+	/* make new constraint visible, in case we add more */
+	CommandCounterIncrement();
+
+	/*
+	 * If the referenced table is a plain relation, create the action triggers
+	 * that enforce the constraint.
+	 */
+	if (pkrel->rd_rel->relkind == RELKIND_RELATION)
+	{
+		createForeignKeyActionTriggers(rel, RelationGetRelid(pkrel),
+									   fkconstraint,
+									   constrOid, indexOid);
+	}
+
+	/*
+	 * If the referenced table is partitioned, recurse on ourselves to handle
+	 * each partition.  We need one pg_constraint row created for each
+	 * partition in addition to the pg_constraint row for the parent table.
+	 */
+	if (pkrel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+	{
+		PartitionDesc pd = RelationGetPartitionDesc(pkrel, false);
+
+		for (int i = 0; i < pd->nparts; i++)
+		{
+			Relation	partRel;
+			AttrMap    *map;
+			AttrNumber *mapped_pkattnum;
+			Oid			partIndexId;
+
+			partRel = table_open(pd->oids[i], ShareRowExclusiveLock);
+
+			/*
+			 * Map the attribute numbers in the referenced side of the FK
+			 * definition to match the partition's column layout.
+			 */
+			map = build_attrmap_by_name_if_req(RelationGetDescr(partRel),
+											   RelationGetDescr(pkrel));
+			if (map)
+			{
+				mapped_pkattnum = palloc(sizeof(AttrNumber) * numfks);
+				for (int j = 0; j < numfks; j++)
+					mapped_pkattnum[j] = map->attnums[pkattnum[j] - 1];
+			}
+			else
+				mapped_pkattnum = pkattnum;
+
+			/* do the deed */
+			partIndexId = index_get_partition(partRel, indexOid);
+			if (!OidIsValid(partIndexId))
+				elog(ERROR, "index for %u not found in partition %s",
+					 indexOid, RelationGetRelationName(partRel));
+			addFkRecurseReferenced(wqueue, fkconstraint, rel, partRel,
+								   partIndexId, constrOid, numfks,
+								   mapped_pkattnum, fkattnum,
+								   pfeqoperators, ppeqoperators, ffeqoperators,
+								   old_check_ok);
+
+			/* Done -- clean up (but keep the lock) */
+			table_close(partRel, NoLock);
+			if (map)
+			{
+				pfree(mapped_pkattnum);
+				free_attrmap(map);
+			}
+		}
+	}
+
+	return address;
+}
+
+/*
+ * addFkRecurseReferencing
+ *		subroutine for ATAddForeignKeyConstraint and CloneFkReferencing
+ *
+ * If the referencing relation is a plain relation, create the necessary check
+ * triggers that implement the constraint, and set up for Phase 3 constraint
+ * verification.  If the referencing relation is a partitioned table, then
+ * we create a pg_constraint row for it and recurse on this routine for each
+ * partition.
+ *
+ * We assume that the referenced relation is locked against concurrent
+ * deletions.  If it's a partitioned relation, every partition must be so
+ * locked.
+ *
+ * wqueue is the ALTER TABLE work queue; can be NULL when not running as part
+ * of an ALTER TABLE sequence.
+ * fkconstraint is the constraint being added.
+ * rel is the referencing relation; might be a partition, if recursing.
+ * pkrel is the root referenced relation.
+ * indexOid is the OID of the index (on pkrel) implementing this constraint.
+ * parentConstr is the OID of the parent constraint (there is always one).
+ * numfks is the number of columns in the foreign key
+ * pkattnum is the attnum array of referenced attributes.
+ * fkattnum is the attnum array of referencing attributes.
+ * pf/pp/ffeqoperators are OID array of operators between columns.
+ * old_check_ok signals that this constraint replaces an existing one that
+ *		was already validated (thus this one doesn't need validation).
+ * lockmode is the lockmode to acquire on partitions when recursing.
+ */
+static void
+addFkRecurseReferencing(List **wqueue, Constraint *fkconstraint, Relation rel,
+						Relation pkrel, Oid indexOid, Oid parentConstr,
+						int numfks, int16 *pkattnum, int16 *fkattnum,
+						Oid *pfeqoperators, Oid *ppeqoperators, Oid *ffeqoperators,
+						bool old_check_ok, LOCKMODE lockmode)
+{
+	AssertArg(OidIsValid(parentConstr));
+
+	if (rel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("foreign key constraints are not supported on foreign tables")));
+
+	/*
+	 * If the referencing relation is a plain table, add the check triggers to
+	 * it and, if necessary, schedule it to be checked in Phase 3.
+	 *
+	 * If the relation is partitioned, drill down to do it to its partitions.
+	 */
+	if (rel->rd_rel->relkind == RELKIND_RELATION)
+	{
+		createForeignKeyCheckTriggers(RelationGetRelid(rel),
+									  RelationGetRelid(pkrel),
+									  fkconstraint,
+									  parentConstr,
+									  indexOid);
+
+		/*
+		 * Tell Phase 3 to check that the constraint is satisfied by existing
+		 * rows. We can skip this during table creation, when requested
+		 * explicitly by specifying NOT VALID in an ADD FOREIGN KEY command,
+		 * and when we're recreating a constraint following a SET DATA TYPE
+		 * operation that did not impugn its validity.
+		 */
+		if (wqueue && !old_check_ok && !fkconstraint->skip_validation)
+		{
+			NewConstraint *newcon;
+			AlteredTableInfo *tab;
+
+			tab = ATGetQueueEntry(wqueue, rel);
+
+			newcon = (NewConstraint *) palloc0(sizeof(NewConstraint));
+			newcon->name = get_constraint_name(parentConstr);
+			newcon->contype = CONSTR_FOREIGN;
+			newcon->refrelid = RelationGetRelid(pkrel);
+			newcon->refindid = indexOid;
+			newcon->conid = parentConstr;
+			newcon->qual = (Node *) fkconstraint;
+
+			tab->constraints = lappend(tab->constraints, newcon);
+		}
+	}
+	else if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+	{
+		PartitionDesc pd = RelationGetPartitionDesc(rel, false);
+
+		/*
+		 * Recurse to take appropriate action on each partition; either we
+		 * find an existing constraint to reparent to ours, or we create a new
+		 * one.
+		 */
+		for (int i = 0; i < pd->nparts; i++)
+		{
+			Oid			partitionId = pd->oids[i];
+			Relation	partition = table_open(partitionId, lockmode);
+			List	   *partFKs;
+			AttrMap    *attmap;
+			AttrNumber	mapped_fkattnum[INDEX_MAX_KEYS];
+			bool		attached;
+			char	   *conname;
+			Oid			constrOid;
+			ObjectAddress address,
+						referenced;
+			ListCell   *cell;
+
+			CheckTableNotInUse(partition, "ALTER TABLE");
+
+			attmap = build_attrmap_by_name(RelationGetDescr(partition),
+										   RelationGetDescr(rel));
+			for (int j = 0; j < numfks; j++)
+				mapped_fkattnum[j] = attmap->attnums[fkattnum[j] - 1];
+
+			/* Check whether an existing constraint can be repurposed */
+			partFKs = copyObject(RelationGetFKeyList(partition));
+			attached = false;
+			foreach(cell, partFKs)
+			{
+				ForeignKeyCacheInfo *fk;
+
+				fk = lfirst_node(ForeignKeyCacheInfo, cell);
+				if (tryAttachPartitionForeignKey(fk,
+												 partitionId,
+												 parentConstr,
+												 numfks,
+												 mapped_fkattnum,
+												 pkattnum,
+												 pfeqoperators))
+				{
+					attached = true;
+					break;
+				}
+			}
+			if (attached)
+			{
+				table_close(partition, NoLock);
+				continue;
+			}
+
+			/*
+			 * No luck finding a good constraint to reuse; create our own.
+			 */
+			if (ConstraintNameIsUsed(CONSTRAINT_RELATION,
+									 RelationGetRelid(partition),
+									 fkconstraint->conname))
+				conname = ChooseConstraintName(RelationGetRelationName(partition),
+											   ChooseForeignKeyConstraintNameAddition(fkconstraint->fk_attrs),
+											   "fkey",
+											   RelationGetNamespace(partition), NIL);
+			else
+				conname = fkconstraint->conname;
+			constrOid =
+				CreateConstraintEntry(conname,
+									  RelationGetNamespace(partition),
+									  CONSTRAINT_FOREIGN,
+									  fkconstraint->deferrable,
+									  fkconstraint->initdeferred,
+									  fkconstraint->initially_valid,
+									  parentConstr,
+									  partitionId,
+									  mapped_fkattnum,
+									  numfks,
+									  numfks,
+									  InvalidOid,
+									  indexOid,
+									  RelationGetRelid(pkrel),
+									  pkattnum,
+									  pfeqoperators,
+									  ppeqoperators,
+									  ffeqoperators,
+									  numfks,
+									  fkconstraint->fk_upd_action,
+									  fkconstraint->fk_del_action,
+									  fkconstraint->fk_matchtype,
+									  NULL,
+									  NULL,
+									  NULL,
+									  false,
+									  1,
+									  false,
+									  false);
+
+			/*
+			 * Give this constraint partition-type dependencies on the parent
+			 * constraint as well as the table.
+			 */
+			ObjectAddressSet(address, ConstraintRelationId, constrOid);
+			ObjectAddressSet(referenced, ConstraintRelationId, parentConstr);
+			recordDependencyOn(&address, &referenced, DEPENDENCY_PARTITION_PRI);
+			ObjectAddressSet(referenced, RelationRelationId, partitionId);
+			recordDependencyOn(&address, &referenced, DEPENDENCY_PARTITION_SEC);
+
+			/* Make all this visible before recursing */
+			CommandCounterIncrement();
+
+			/* call ourselves to finalize the creation and we're done */
+			addFkRecurseReferencing(wqueue, fkconstraint, partition, pkrel,
+									indexOid,
+									constrOid,
+									numfks,
+									pkattnum,
+									mapped_fkattnum,
+									pfeqoperators,
+									ppeqoperators,
+									ffeqoperators,
+									old_check_ok,
+									lockmode);
+
+			table_close(partition, NoLock);
+		}
+	}
+}
+
+/*
+ * CloneForeignKeyConstraints
+ *		Clone foreign keys from a partitioned table to a newly acquired
+ *		partition.
+ *
+ * partitionRel is a partition of parentRel, so we can be certain that it has
+ * the same columns with the same datatypes.  The columns may be in different
+ * order, though.
+ *
+ * wqueue must be passed to set up phase 3 constraint checking, unless the
+ * referencing-side partition is known to be empty (such as in CREATE TABLE /
+ * PARTITION OF).
+ */
+static void
+CloneForeignKeyConstraints(List **wqueue, Relation parentRel,
+						   Relation partitionRel)
+{
+	/* This only works for declarative partitioning */
+	Assert(parentRel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE);
+
+	/*
+	 * Clone constraints for which the parent is on the referenced side.
+	 */
+	CloneFkReferenced(parentRel, partitionRel);
+
+	/*
+	 * Now clone constraints where the parent is on the referencing side.
+	 */
+	CloneFkReferencing(wqueue, parentRel, partitionRel);
+}
+
+/*
+ * CloneFkReferenced
+ *		Subroutine for CloneForeignKeyConstraints
+ *
+ * Find all the FKs that have the parent relation on the referenced side;
+ * clone those constraints to the given partition.  This is to be called
+ * when the partition is being created or attached.
+ *
+ * This recurses to partitions, if the relation being attached is partitioned.
+ * Recursion is done by calling addFkRecurseReferenced.
+ */
+static void
+CloneFkReferenced(Relation parentRel, Relation partitionRel)
+{
+	Relation	pg_constraint;
+	AttrMap    *attmap;
+	ListCell   *cell;
+	SysScanDesc scan;
+	ScanKeyData key[2];
+	HeapTuple	tuple;
+	List	   *clone = NIL;
+
+	/*
+	 * Search for any constraints where this partition's parent is in the
+	 * referenced side.  However, we must not clone any constraint whose
+	 * parent constraint is also going to be cloned, to avoid duplicates.  So
+	 * do it in two steps: first construct the list of constraints to clone,
+	 * then go over that list cloning those whose parents are not in the list.
+	 * (We must not rely on the parent being seen first, since the catalog
+	 * scan could return children first.)
+	 */
+	pg_constraint = table_open(ConstraintRelationId, RowShareLock);
+	ScanKeyInit(&key[0],
+				Anum_pg_constraint_confrelid, BTEqualStrategyNumber,
+				F_OIDEQ, ObjectIdGetDatum(RelationGetRelid(parentRel)));
+	ScanKeyInit(&key[1],
+				Anum_pg_constraint_contype, BTEqualStrategyNumber,
+				F_CHAREQ, CharGetDatum(CONSTRAINT_FOREIGN));
+	/* This is a seqscan, as we don't have a usable index ... */
+	scan = systable_beginscan(pg_constraint, InvalidOid, true,
+							  NULL, 2, key);
+	while ((tuple = systable_getnext(scan)) != NULL)
+	{
+		Form_pg_constraint constrForm = (Form_pg_constraint) GETSTRUCT(tuple);
+
+		clone = lappend_oid(clone, constrForm->oid);
+	}
+	systable_endscan(scan);
+	table_close(pg_constraint, RowShareLock);
+
+	attmap = build_attrmap_by_name(RelationGetDescr(partitionRel),
+								   RelationGetDescr(parentRel));
+	foreach(cell, clone)
+	{
+		Oid			constrOid = lfirst_oid(cell);
+		Form_pg_constraint constrForm;
+		Relation	fkRel;
+		Oid			indexOid;
+		Oid			partIndexId;
+		int			numfks;
+		AttrNumber	conkey[INDEX_MAX_KEYS];
+		AttrNumber	mapped_confkey[INDEX_MAX_KEYS];
+		AttrNumber	confkey[INDEX_MAX_KEYS];
+		Oid			conpfeqop[INDEX_MAX_KEYS];
+		Oid			conppeqop[INDEX_MAX_KEYS];
+		Oid			conffeqop[INDEX_MAX_KEYS];
+		Constraint *fkconstraint;
+
+		tuple = SearchSysCache1(CONSTROID, constrOid);
+		if (!HeapTupleIsValid(tuple))
+			elog(ERROR, "cache lookup failed for constraint %u", constrOid);
+		constrForm = (Form_pg_constraint) GETSTRUCT(tuple);
+
+		/*
+		 * As explained above: don't try to clone a constraint for which we're
+		 * going to clone the parent.
+		 */
+		if (list_member_oid(clone, constrForm->conparentid))
+		{
+			ReleaseSysCache(tuple);
+			continue;
+		}
+
+		/*
+		 * Because we're only expanding the key space at the referenced side,
+		 * we don't need to prevent any operation in the referencing table, so
+		 * AccessShareLock suffices (assumes that dropping the constraint
+		 * acquires AEL).
+		 */
+		fkRel = table_open(constrForm->conrelid, AccessShareLock);
+
+		indexOid = constrForm->conindid;
+		DeconstructFkConstraintRow(tuple,
+								   &numfks,
+								   conkey,
+								   confkey,
+								   conpfeqop,
+								   conppeqop,
+								   conffeqop);
+
+		for (int i = 0; i < numfks; i++)
+			mapped_confkey[i] = attmap->attnums[confkey[i] - 1];
+
+		fkconstraint = makeNode(Constraint);
+		/* for now this is all we need */
+		fkconstraint->conname = NameStr(constrForm->conname);
+		fkconstraint->fk_upd_action = constrForm->confupdtype;
+		fkconstraint->fk_del_action = constrForm->confdeltype;
+		fkconstraint->deferrable = constrForm->condeferrable;
+		fkconstraint->initdeferred = constrForm->condeferred;
+		fkconstraint->initially_valid = true;
+		fkconstraint->fk_matchtype = constrForm->confmatchtype;
+
+		/* set up colnames that are used to generate the constraint name */
+		for (int i = 0; i < numfks; i++)
+		{
+			Form_pg_attribute att;
+
+			att = TupleDescAttr(RelationGetDescr(fkRel),
+								conkey[i] - 1);
+			fkconstraint->fk_attrs = lappend(fkconstraint->fk_attrs,
+											 makeString(NameStr(att->attname)));
+		}
+
+		/*
+		 * Add the new foreign key constraint pointing to the new partition.
+		 * Because this new partition appears in the referenced side of the
+		 * constraint, we don't need to set up for Phase 3 check.
+		 */
+		partIndexId = index_get_partition(partitionRel, indexOid);
+		if (!OidIsValid(partIndexId))
+			elog(ERROR, "index for %u not found in partition %s",
+				 indexOid, RelationGetRelationName(partitionRel));
+		addFkRecurseReferenced(NULL,
+							   fkconstraint,
+							   fkRel,
+							   partitionRel,
+							   partIndexId,
+							   constrOid,
+							   numfks,
+							   mapped_confkey,
+							   conkey,
+							   conpfeqop,
+							   conppeqop,
+							   conffeqop,
+							   true);
+
+		table_close(fkRel, NoLock);
+		ReleaseSysCache(tuple);
+	}
+}
+
+/*
+ * CloneFkReferencing
+ *		Subroutine for CloneForeignKeyConstraints
+ *
+ * For each FK constraint of the parent relation in the given list, find an
+ * equivalent constraint in its partition relation that can be reparented;
+ * if one cannot be found, create a new constraint in the partition as its
+ * child.
+ *
+ * If wqueue is given, it is used to set up phase-3 verification for each
+ * cloned constraint; if omitted, we assume that such verification is not
+ * needed (example: the partition is being created anew).
+ */
+static void
+CloneFkReferencing(List **wqueue, Relation parentRel, Relation partRel)
+{
+	AttrMap    *attmap;
+	List	   *partFKs;
+	List	   *clone = NIL;
+	ListCell   *cell;
+
+	/* obtain a list of constraints that we need to clone */
+	foreach(cell, RelationGetFKeyList(parentRel))
+	{
+		ForeignKeyCacheInfo *fk = lfirst(cell);
+
+		clone = lappend_oid(clone, fk->conoid);
+	}
+
+	/*
+	 * Silently do nothing if there's nothing to do.  In particular, this
+	 * avoids throwing a spurious error for foreign tables.
+	 */
+	if (clone == NIL)
+		return;
+
+	if (partRel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("foreign key constraints are not supported on foreign tables")));
+
+	/*
+	 * The constraint key may differ, if the columns in the partition are
+	 * different.  This map is used to convert them.
+	 */
+	attmap = build_attrmap_by_name(RelationGetDescr(partRel),
+								   RelationGetDescr(parentRel));
+
+	partFKs = copyObject(RelationGetFKeyList(partRel));
+
+	foreach(cell, clone)
+	{
+		Oid			parentConstrOid = lfirst_oid(cell);
+		Form_pg_constraint constrForm;
+		Relation	pkrel;
+		HeapTuple	tuple;
+		int			numfks;
+		AttrNumber	conkey[INDEX_MAX_KEYS];
+		AttrNumber	mapped_conkey[INDEX_MAX_KEYS];
+		AttrNumber	confkey[INDEX_MAX_KEYS];
+		Oid			conpfeqop[INDEX_MAX_KEYS];
+		Oid			conppeqop[INDEX_MAX_KEYS];
+		Oid			conffeqop[INDEX_MAX_KEYS];
+		Constraint *fkconstraint;
+		bool		attached;
+		Oid			indexOid;
+		Oid			constrOid;
+		ObjectAddress address,
+					referenced;
+		ListCell   *cell;
+
+		tuple = SearchSysCache1(CONSTROID, parentConstrOid);
+		if (!HeapTupleIsValid(tuple))
+			elog(ERROR, "cache lookup failed for constraint %u",
+				 parentConstrOid);
+		constrForm = (Form_pg_constraint) GETSTRUCT(tuple);
+
+		/* Don't clone constraints whose parents are being cloned */
+		if (list_member_oid(clone, constrForm->conparentid))
+		{
+			ReleaseSysCache(tuple);
+			continue;
+		}
+
+		/*
+		 * Need to prevent concurrent deletions.  If pkrel is a partitioned
+		 * relation, that means to lock all partitions.
+		 */
+		pkrel = table_open(constrForm->confrelid, ShareRowExclusiveLock);
+		if (pkrel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+			(void) find_all_inheritors(RelationGetRelid(pkrel),
+									   ShareRowExclusiveLock, NULL);
+
+		DeconstructFkConstraintRow(tuple, &numfks, conkey, confkey,
+								   conpfeqop, conppeqop, conffeqop);
+		for (int i = 0; i < numfks; i++)
+			mapped_conkey[i] = attmap->attnums[conkey[i] - 1];
+
+		/*
+		 * Before creating a new constraint, see whether any existing FKs are
+		 * fit for the purpose.  If one is, attach the parent constraint to
+		 * it, and don't clone anything.  This way we avoid the expensive
+		 * verification step and don't end up with a duplicate FK, and we
+		 * don't need to recurse to partitions for this constraint.
+		 */
+		attached = false;
+		foreach(cell, partFKs)
+		{
+			ForeignKeyCacheInfo *fk = lfirst_node(ForeignKeyCacheInfo, cell);
+
+			if (tryAttachPartitionForeignKey(fk,
+											 RelationGetRelid(partRel),
+											 parentConstrOid,
+											 numfks,
+											 mapped_conkey,
+											 confkey,
+											 conpfeqop))
+			{
+				attached = true;
+				table_close(pkrel, NoLock);
+				break;
+			}
+		}
+		if (attached)
+		{
+			ReleaseSysCache(tuple);
+			continue;
+		}
+
+		/* No dice.  Set up to create our own constraint */
+		fkconstraint = makeNode(Constraint);
+		if (ConstraintNameIsUsed(CONSTRAINT_RELATION,
+								 RelationGetRelid(partRel),
+								 NameStr(constrForm->conname)))
+			fkconstraint->conname =
+				ChooseConstraintName(RelationGetRelationName(partRel),
+									 ChooseForeignKeyConstraintNameAddition(fkconstraint->fk_attrs),
+									 "fkey",
+									 RelationGetNamespace(partRel), NIL);
+		else
+			fkconstraint->conname = pstrdup(NameStr(constrForm->conname));
+		fkconstraint->fk_upd_action = constrForm->confupdtype;
+		fkconstraint->fk_del_action = constrForm->confdeltype;
+		fkconstraint->deferrable = constrForm->condeferrable;
+		fkconstraint->initdeferred = constrForm->condeferred;
+		fkconstraint->fk_matchtype = constrForm->confmatchtype;
+		for (int i = 0; i < numfks; i++)
+		{
+			Form_pg_attribute att;
+
+			att = TupleDescAttr(RelationGetDescr(partRel),
+								mapped_conkey[i] - 1);
+			fkconstraint->fk_attrs = lappend(fkconstraint->fk_attrs,
+											 makeString(NameStr(att->attname)));
+		}
+
+		indexOid = constrForm->conindid;
+		constrOid =
+			CreateConstraintEntry(fkconstraint->conname,
+								  constrForm->connamespace,
+								  CONSTRAINT_FOREIGN,
+								  fkconstraint->deferrable,
+								  fkconstraint->initdeferred,
+								  constrForm->convalidated,
+								  parentConstrOid,
+								  RelationGetRelid(partRel),
+								  mapped_conkey,
+								  numfks,
+								  numfks,
+								  InvalidOid,	/* not a domain constraint */
+								  indexOid,
+								  constrForm->confrelid,	/* same foreign rel */
+								  confkey,
+								  conpfeqop,
+								  conppeqop,
+								  conffeqop,
+								  numfks,
+								  fkconstraint->fk_upd_action,
+								  fkconstraint->fk_del_action,
+								  fkconstraint->fk_matchtype,
+								  NULL,
+								  NULL,
+								  NULL,
+								  false,	/* islocal */
+								  1,	/* inhcount */
+								  false,	/* conNoInherit */
+								  true);
+
+		/* Set up partition dependencies for the new constraint */
+		ObjectAddressSet(address, ConstraintRelationId, constrOid);
+		ObjectAddressSet(referenced, ConstraintRelationId, parentConstrOid);
+		recordDependencyOn(&address, &referenced, DEPENDENCY_PARTITION_PRI);
+		ObjectAddressSet(referenced, RelationRelationId,
+						 RelationGetRelid(partRel));
+		recordDependencyOn(&address, &referenced, DEPENDENCY_PARTITION_SEC);
+
+		/* Done with the cloned constraint's tuple */
+		ReleaseSysCache(tuple);
+
+		/* Make all this visible before recursing */
+		CommandCounterIncrement();
+
+		addFkRecurseReferencing(wqueue,
+								fkconstraint,
+								partRel,
+								pkrel,
+								indexOid,
+								constrOid,
+								numfks,
+								confkey,
+								mapped_conkey,
+								conpfeqop,
+								conppeqop,
+								conffeqop,
+								false,	/* no old check exists */
+								AccessExclusiveLock);
+		table_close(pkrel, NoLock);
+	}
+}
+
+/*
+ * When the parent of a partition receives [the referencing side of] a foreign
+ * key, we must propagate that foreign key to the partition.  However, the
+ * partition might already have an equivalent foreign key; this routine
+ * compares the given ForeignKeyCacheInfo (in the partition) to the FK defined
+ * by the other parameters.  If they are equivalent, create the link between
+ * the two constraints and return true.
+ *
+ * If the given FK does not match the one defined by rest of the params,
+ * return false.
+ */
+static bool
+tryAttachPartitionForeignKey(ForeignKeyCacheInfo *fk,
+							 Oid partRelid,
+							 Oid parentConstrOid,
+							 int numfks,
+							 AttrNumber *mapped_conkey,
+							 AttrNumber *confkey,
+							 Oid *conpfeqop)
+{
+	HeapTuple	parentConstrTup;
+	Form_pg_constraint parentConstr;
+	HeapTuple	partcontup;
+	Form_pg_constraint partConstr;
+	Relation	trigrel;
+	ScanKeyData key;
+	SysScanDesc scan;
+	HeapTuple	trigtup;
+
+	parentConstrTup = SearchSysCache1(CONSTROID,
+									  ObjectIdGetDatum(parentConstrOid));
+	if (!HeapTupleIsValid(parentConstrTup))
+		elog(ERROR, "cache lookup failed for constraint %u", parentConstrOid);
+	parentConstr = (Form_pg_constraint) GETSTRUCT(parentConstrTup);
+
+	/*
+	 * Do some quick & easy initial checks.  If any of these fail, we cannot
+	 * use this constraint.
+	 */
+	if (fk->confrelid != parentConstr->confrelid || fk->nkeys != numfks)
+	{
+		ReleaseSysCache(parentConstrTup);
+		return false;
+	}
+	for (int i = 0; i < numfks; i++)
+	{
+		if (fk->conkey[i] != mapped_conkey[i] ||
+			fk->confkey[i] != confkey[i] ||
+			fk->conpfeqop[i] != conpfeqop[i])
+		{
+			ReleaseSysCache(parentConstrTup);
+			return false;
+		}
+	}
+
+	/*
+	 * Looks good so far; do some more extensive checks.  Presumably the check
+	 * for 'convalidated' could be dropped, since we don't really care about
+	 * that, but let's be careful for now.
+	 */
+	partcontup = SearchSysCache1(CONSTROID,
+								 ObjectIdGetDatum(fk->conoid));
+	if (!HeapTupleIsValid(partcontup))
+		elog(ERROR, "cache lookup failed for constraint %u", fk->conoid);
+	partConstr = (Form_pg_constraint) GETSTRUCT(partcontup);
+	if (OidIsValid(partConstr->conparentid) ||
+		!partConstr->convalidated ||
+		partConstr->condeferrable != parentConstr->condeferrable ||
+		partConstr->condeferred != parentConstr->condeferred ||
+		partConstr->confupdtype != parentConstr->confupdtype ||
+		partConstr->confdeltype != parentConstr->confdeltype ||
+		partConstr->confmatchtype != parentConstr->confmatchtype)
+	{
+		ReleaseSysCache(parentConstrTup);
+		ReleaseSysCache(partcontup);
+		return false;
+	}
+
+	ReleaseSysCache(partcontup);
+	ReleaseSysCache(parentConstrTup);
+
+	/*
+	 * Looks good!  Attach this constraint.  The action triggers in the new
+	 * partition become redundant -- the parent table already has equivalent
+	 * ones, and those will be able to reach the partition.  Remove the ones
+	 * in the partition.  We identify them because they have our constraint
+	 * OID, as well as being on the referenced rel.
+	 */
+	trigrel = table_open(TriggerRelationId, RowExclusiveLock);
+	ScanKeyInit(&key,
+				Anum_pg_trigger_tgconstraint,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(fk->conoid));
+
+	scan = systable_beginscan(trigrel, TriggerConstraintIndexId, true,
+							  NULL, 1, &key);
+	while ((trigtup = systable_getnext(scan)) != NULL)
+	{
+		Form_pg_trigger trgform = (Form_pg_trigger) GETSTRUCT(trigtup);
+		ObjectAddress trigger;
+
+		if (trgform->tgconstrrelid != fk->conrelid)
+			continue;
+		if (trgform->tgrelid != fk->confrelid)
+			continue;
+
+		/*
+		 * The constraint is originally set up to contain this trigger as an
+		 * implementation object, so there's a dependency record that links
+		 * the two; however, since the trigger is no longer needed, we remove
+		 * the dependency link in order to be able to drop the trigger while
+		 * keeping the constraint intact.
+		 */
+		deleteDependencyRecordsFor(TriggerRelationId,
+								   trgform->oid,
+								   false);
+		/* make dependency deletion visible to performDeletion */
+		CommandCounterIncrement();
+		ObjectAddressSet(trigger, TriggerRelationId,
+						 trgform->oid);
+		performDeletion(&trigger, DROP_RESTRICT, 0);
+		/* make trigger drop visible, in case the loop iterates */
+		CommandCounterIncrement();
+	}
+
+	systable_endscan(scan);
+	table_close(trigrel, RowExclusiveLock);
+
+	ConstraintSetParentConstraint(fk->conoid, parentConstrOid, partRelid);
+	CommandCounterIncrement();
+	return true;
+}
+
+
+/*
+ * ALTER TABLE ALTER CONSTRAINT
+ *
+ * Update the attributes of a constraint.
+ *
+ * Currently only works for Foreign Key constraints.
+ * Foreign keys do not inherit, so we purposely ignore the
+ * recursion bit here, but we keep the API the same for when
+ * other constraint types are supported.
+ *
+ * If the constraint is modified, returns its address; otherwise, return
+ * InvalidObjectAddress.
+ */
+static ObjectAddress
+ATExecAlterConstraint(Relation rel, AlterTableCmd *cmd,
+					  bool recurse, bool recursing, LOCKMODE lockmode)
+{
+	Constraint *cmdcon;
+	Relation	conrel;
+	SysScanDesc scan;
+	ScanKeyData skey[3];
+	HeapTuple	contuple;
+	Form_pg_constraint currcon;
+	ObjectAddress address;
+
+	cmdcon = castNode(Constraint, cmd->def);
+
+	conrel = table_open(ConstraintRelationId, RowExclusiveLock);
+
+	/*
+	 * Find and check the target constraint
+	 */
+	ScanKeyInit(&skey[0],
+				Anum_pg_constraint_conrelid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(RelationGetRelid(rel)));
+	ScanKeyInit(&skey[1],
+				Anum_pg_constraint_contypid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(InvalidOid));
+	ScanKeyInit(&skey[2],
+				Anum_pg_constraint_conname,
+				BTEqualStrategyNumber, F_NAMEEQ,
+				CStringGetDatum(cmdcon->conname));
+	scan = systable_beginscan(conrel, ConstraintRelidTypidNameIndexId,
+							  true, NULL, 3, skey);
+
+	/* There can be at most one matching row */
+	if (!HeapTupleIsValid(contuple = systable_getnext(scan)))
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_OBJECT),
+				 errmsg("constraint \"%s\" of relation \"%s\" does not exist",
+						cmdcon->conname, RelationGetRelationName(rel))));
+
+	currcon = (Form_pg_constraint) GETSTRUCT(contuple);
+	if (currcon->contype != CONSTRAINT_FOREIGN)
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("constraint \"%s\" of relation \"%s\" is not a foreign key constraint",
+						cmdcon->conname, RelationGetRelationName(rel))));
+
+	if (currcon->condeferrable != cmdcon->deferrable ||
+		currcon->condeferred != cmdcon->initdeferred)
+	{
+		HeapTuple	copyTuple;
+		HeapTuple	tgtuple;
+		Form_pg_constraint copy_con;
+		List	   *otherrelids = NIL;
+		ScanKeyData tgkey;
+		SysScanDesc tgscan;
+		Relation	tgrel;
+		ListCell   *lc;
+
+		/*
+		 * Now update the catalog, while we have the door open.
+		 */
+		copyTuple = heap_copytuple(contuple);
+		copy_con = (Form_pg_constraint) GETSTRUCT(copyTuple);
+		copy_con->condeferrable = cmdcon->deferrable;
+		copy_con->condeferred = cmdcon->initdeferred;
+		CatalogTupleUpdate(conrel, &copyTuple->t_self, copyTuple);
+
+		InvokeObjectPostAlterHook(ConstraintRelationId,
+								  currcon->oid, 0);
+
+		heap_freetuple(copyTuple);
+
+		/*
+		 * Now we need to update the multiple entries in pg_trigger that
+		 * implement the constraint.
+		 */
+		tgrel = table_open(TriggerRelationId, RowExclusiveLock);
+
+		ScanKeyInit(&tgkey,
+					Anum_pg_trigger_tgconstraint,
+					BTEqualStrategyNumber, F_OIDEQ,
+					ObjectIdGetDatum(currcon->oid));
+
+		tgscan = systable_beginscan(tgrel, TriggerConstraintIndexId, true,
+									NULL, 1, &tgkey);
+
+		while (HeapTupleIsValid(tgtuple = systable_getnext(tgscan)))
+		{
+			Form_pg_trigger tgform = (Form_pg_trigger) GETSTRUCT(tgtuple);
+			Form_pg_trigger copy_tg;
+
+			/*
+			 * Remember OIDs of other relation(s) involved in FK constraint.
+			 * (Note: it's likely that we could skip forcing a relcache inval
+			 * for other rels that don't have a trigger whose properties
+			 * change, but let's be conservative.)
+			 */
+			if (tgform->tgrelid != RelationGetRelid(rel))
+				otherrelids = list_append_unique_oid(otherrelids,
+													 tgform->tgrelid);
+
+			/*
+			 * Update deferrability of RI_FKey_noaction_del,
+			 * RI_FKey_noaction_upd, RI_FKey_check_ins and RI_FKey_check_upd
+			 * triggers, but not others; see createForeignKeyActionTriggers
+			 * and CreateFKCheckTrigger.
+			 */
+			if (tgform->tgfoid != F_RI_FKEY_NOACTION_DEL &&
+				tgform->tgfoid != F_RI_FKEY_NOACTION_UPD &&
+				tgform->tgfoid != F_RI_FKEY_CHECK_INS &&
+				tgform->tgfoid != F_RI_FKEY_CHECK_UPD)
+				continue;
+
+			copyTuple = heap_copytuple(tgtuple);
+			copy_tg = (Form_pg_trigger) GETSTRUCT(copyTuple);
+
+			copy_tg->tgdeferrable = cmdcon->deferrable;
+			copy_tg->tginitdeferred = cmdcon->initdeferred;
+			CatalogTupleUpdate(tgrel, &copyTuple->t_self, copyTuple);
+
+			InvokeObjectPostAlterHook(TriggerRelationId, currcon->oid, 0);
+
+			heap_freetuple(copyTuple);
+		}
+
+		systable_endscan(tgscan);
+
+		table_close(tgrel, RowExclusiveLock);
+
+		/*
+		 * Invalidate relcache so that others see the new attributes.  We must
+		 * inval both the named rel and any others having relevant triggers.
+		 * (At present there should always be exactly one other rel, but
+		 * there's no need to hard-wire such an assumption here.)
+		 */
+		CacheInvalidateRelcache(rel);
+		foreach(lc, otherrelids)
+		{
+			CacheInvalidateRelcacheByRelid(lfirst_oid(lc));
+		}
+
+		ObjectAddressSet(address, ConstraintRelationId, currcon->oid);
+	}
+	else
+		address = InvalidObjectAddress;
+
+	systable_endscan(scan);
+
+	table_close(conrel, RowExclusiveLock);
+
+	return address;
+}
+
+/*
+ * ALTER TABLE VALIDATE CONSTRAINT
+ *
+ * XXX The reason we handle recursion here rather than at Phase 1 is because
+ * there's no good way to skip recursing when handling foreign keys: there is
+ * no need to lock children in that case, yet we wouldn't be able to avoid
+ * doing so at that level.
+ *
+ * Return value is the address of the validated constraint.  If the constraint
+ * was already validated, InvalidObjectAddress is returned.
+ */
+static ObjectAddress
+ATExecValidateConstraint(List **wqueue, Relation rel, char *constrName,
+						 bool recurse, bool recursing, LOCKMODE lockmode)
+{
+	Relation	conrel;
+	SysScanDesc scan;
+	ScanKeyData skey[3];
+	HeapTuple	tuple;
+	Form_pg_constraint con;
+	ObjectAddress address;
+
+	conrel = table_open(ConstraintRelationId, RowExclusiveLock);
+
+	/*
+	 * Find and check the target constraint
+	 */
+	ScanKeyInit(&skey[0],
+				Anum_pg_constraint_conrelid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(RelationGetRelid(rel)));
+	ScanKeyInit(&skey[1],
+				Anum_pg_constraint_contypid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(InvalidOid));
+	ScanKeyInit(&skey[2],
+				Anum_pg_constraint_conname,
+				BTEqualStrategyNumber, F_NAMEEQ,
+				CStringGetDatum(constrName));
+	scan = systable_beginscan(conrel, ConstraintRelidTypidNameIndexId,
+							  true, NULL, 3, skey);
+
+	/* There can be at most one matching row */
+	if (!HeapTupleIsValid(tuple = systable_getnext(scan)))
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_OBJECT),
+				 errmsg("constraint \"%s\" of relation \"%s\" does not exist",
+						constrName, RelationGetRelationName(rel))));
+
+	con = (Form_pg_constraint) GETSTRUCT(tuple);
+	if (con->contype != CONSTRAINT_FOREIGN &&
+		con->contype != CONSTRAINT_CHECK)
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("constraint \"%s\" of relation \"%s\" is not a foreign key or check constraint",
+						constrName, RelationGetRelationName(rel))));
+
+	if (!con->convalidated)
+	{
+		AlteredTableInfo *tab;
+		HeapTuple	copyTuple;
+		Form_pg_constraint copy_con;
+
+		if (con->contype == CONSTRAINT_FOREIGN)
+		{
+			NewConstraint *newcon;
+			Constraint *fkconstraint;
+
+			/* Queue validation for phase 3 */
+			fkconstraint = makeNode(Constraint);
+			/* for now this is all we need */
+			fkconstraint->conname = constrName;
+
+			newcon = (NewConstraint *) palloc0(sizeof(NewConstraint));
+			newcon->name = constrName;
+			newcon->contype = CONSTR_FOREIGN;
+			newcon->refrelid = con->confrelid;
+			newcon->refindid = con->conindid;
+			newcon->conid = con->oid;
+			newcon->qual = (Node *) fkconstraint;
+
+			/* Find or create work queue entry for this table */
+			tab = ATGetQueueEntry(wqueue, rel);
+			tab->constraints = lappend(tab->constraints, newcon);
+
+			/*
+			 * We disallow creating invalid foreign keys to or from
+			 * partitioned tables, so ignoring the recursion bit is okay.
+			 */
+		}
+		else if (con->contype == CONSTRAINT_CHECK)
+		{
+			List	   *children = NIL;
+			ListCell   *child;
+			NewConstraint *newcon;
+			bool		isnull;
+			Datum		val;
+			char	   *conbin;
+
+			/*
+			 * If we're recursing, the parent has already done this, so skip
+			 * it.  Also, if the constraint is a NO INHERIT constraint, we
+			 * shouldn't try to look for it in the children.
+			 */
+			if (!recursing && !con->connoinherit)
+				children = find_all_inheritors(RelationGetRelid(rel),
+											   lockmode, NULL);
+
+			/*
+			 * For CHECK constraints, we must ensure that we only mark the
+			 * constraint as validated on the parent if it's already validated
+			 * on the children.
+			 *
+			 * We recurse before validating on the parent, to reduce risk of
+			 * deadlocks.
+			 */
+			foreach(child, children)
+			{
+				Oid			childoid = lfirst_oid(child);
+				Relation	childrel;
+
+				if (childoid == RelationGetRelid(rel))
+					continue;
+
+				/*
+				 * If we are told not to recurse, there had better not be any
+				 * child tables, because we can't mark the constraint on the
+				 * parent valid unless it is valid for all child tables.
+				 */
+				if (!recurse)
+					ereport(ERROR,
+							(errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+							 errmsg("constraint must be validated on child tables too")));
+
+				/* find_all_inheritors already got lock */
+				childrel = table_open(childoid, NoLock);
+
+				ATExecValidateConstraint(wqueue, childrel, constrName, false,
+										 true, lockmode);
+				table_close(childrel, NoLock);
+			}
+
+			/* Queue validation for phase 3 */
+			newcon = (NewConstraint *) palloc0(sizeof(NewConstraint));
+			newcon->name = constrName;
+			newcon->contype = CONSTR_CHECK;
+			newcon->refrelid = InvalidOid;
+			newcon->refindid = InvalidOid;
+			newcon->conid = con->oid;
+
+			val = SysCacheGetAttr(CONSTROID, tuple,
+								  Anum_pg_constraint_conbin, &isnull);
+			if (isnull)
+				elog(ERROR, "null conbin for constraint %u", con->oid);
+
+			conbin = TextDatumGetCString(val);
+			newcon->qual = (Node *) stringToNode(conbin);
+
+			/* Find or create work queue entry for this table */
+			tab = ATGetQueueEntry(wqueue, rel);
+			tab->constraints = lappend(tab->constraints, newcon);
+
+			/*
+			 * Invalidate relcache so that others see the new validated
+			 * constraint.
+			 */
+			CacheInvalidateRelcache(rel);
+		}
+
+		/*
+		 * Now update the catalog, while we have the door open.
+		 */
+		copyTuple = heap_copytuple(tuple);
+		copy_con = (Form_pg_constraint) GETSTRUCT(copyTuple);
+		copy_con->convalidated = true;
+		CatalogTupleUpdate(conrel, &copyTuple->t_self, copyTuple);
+
+		InvokeObjectPostAlterHook(ConstraintRelationId, con->oid, 0);
+
+		heap_freetuple(copyTuple);
+
+		ObjectAddressSet(address, ConstraintRelationId, con->oid);
+	}
+	else
+		address = InvalidObjectAddress; /* already validated */
+
+	systable_endscan(scan);
+
+	table_close(conrel, RowExclusiveLock);
+
+	return address;
+}
+
+
+/*
+ * transformColumnNameList - transform list of column names
+ *
+ * Lookup each name and return its attnum and type OID
+ */
+static int
+transformColumnNameList(Oid relId, List *colList,
+						int16 *attnums, Oid *atttypids)
+{
+	ListCell   *l;
+	int			attnum;
+
+	attnum = 0;
+	foreach(l, colList)
+	{
+		char	   *attname = strVal(lfirst(l));
+		HeapTuple	atttuple;
+
+		atttuple = SearchSysCacheAttName(relId, attname);
+		if (!HeapTupleIsValid(atttuple))
+			ereport(ERROR,
+					(errcode(ERRCODE_UNDEFINED_COLUMN),
+					 errmsg("column \"%s\" referenced in foreign key constraint does not exist",
+							attname)));
+		if (attnum >= INDEX_MAX_KEYS)
+			ereport(ERROR,
+					(errcode(ERRCODE_TOO_MANY_COLUMNS),
+					 errmsg("cannot have more than %d keys in a foreign key",
+							INDEX_MAX_KEYS)));
+		attnums[attnum] = ((Form_pg_attribute) GETSTRUCT(atttuple))->attnum;
+		atttypids[attnum] = ((Form_pg_attribute) GETSTRUCT(atttuple))->atttypid;
+		ReleaseSysCache(atttuple);
+		attnum++;
+	}
+
+	return attnum;
+}
+
+/*
+ * transformFkeyGetPrimaryKey -
+ *
+ *	Look up the names, attnums, and types of the primary key attributes
+ *	for the pkrel.  Also return the index OID and index opclasses of the
+ *	index supporting the primary key.
+ *
+ *	All parameters except pkrel are output parameters.  Also, the function
+ *	return value is the number of attributes in the primary key.
+ *
+ *	Used when the column list in the REFERENCES specification is omitted.
+ */
+static int
+transformFkeyGetPrimaryKey(Relation pkrel, Oid *indexOid,
+						   List **attnamelist,
+						   int16 *attnums, Oid *atttypids,
+						   Oid *opclasses)
+{
+	List	   *indexoidlist;
+	ListCell   *indexoidscan;
+	HeapTuple	indexTuple = NULL;
+	Form_pg_index indexStruct = NULL;
+	Datum		indclassDatum;
+	bool		isnull;
+	oidvector  *indclass;
+	int			i;
+
+	/*
+	 * Get the list of index OIDs for the table from the relcache, and look up
+	 * each one in the pg_index syscache until we find one marked primary key
+	 * (hopefully there isn't more than one such).  Insist it's valid, too.
+	 */
+	*indexOid = InvalidOid;
+
+	indexoidlist = RelationGetIndexList(pkrel);
+
+	foreach(indexoidscan, indexoidlist)
+	{
+		Oid			indexoid = lfirst_oid(indexoidscan);
+
+		indexTuple = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(indexoid));
+		if (!HeapTupleIsValid(indexTuple))
+			elog(ERROR, "cache lookup failed for index %u", indexoid);
+		indexStruct = (Form_pg_index) GETSTRUCT(indexTuple);
+		if (indexStruct->indisprimary && indexStruct->indisvalid)
+		{
+			/*
+			 * Refuse to use a deferrable primary key.  This is per SQL spec,
+			 * and there would be a lot of interesting semantic problems if we
+			 * tried to allow it.
+			 */
+			if (!indexStruct->indimmediate)
+				ereport(ERROR,
+						(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+						 errmsg("cannot use a deferrable primary key for referenced table \"%s\"",
+								RelationGetRelationName(pkrel))));
+
+			*indexOid = indexoid;
+			break;
+		}
+		ReleaseSysCache(indexTuple);
+	}
+
+	list_free(indexoidlist);
+
+	/*
+	 * Check that we found it
+	 */
+	if (!OidIsValid(*indexOid))
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_OBJECT),
+				 errmsg("there is no primary key for referenced table \"%s\"",
+						RelationGetRelationName(pkrel))));
+
+	/* Must get indclass the hard way */
+	indclassDatum = SysCacheGetAttr(INDEXRELID, indexTuple,
+									Anum_pg_index_indclass, &isnull);
+	Assert(!isnull);
+	indclass = (oidvector *) DatumGetPointer(indclassDatum);
+
+	/*
+	 * Now build the list of PK attributes from the indkey definition (we
+	 * assume a primary key cannot have expressional elements)
+	 */
+	*attnamelist = NIL;
+	for (i = 0; i < indexStruct->indnkeyatts; i++)
+	{
+		int			pkattno = indexStruct->indkey.values[i];
+
+		attnums[i] = pkattno;
+		atttypids[i] = attnumTypeId(pkrel, pkattno);
+		opclasses[i] = indclass->values[i];
+		*attnamelist = lappend(*attnamelist,
+							   makeString(pstrdup(NameStr(*attnumAttName(pkrel, pkattno)))));
+	}
+
+	ReleaseSysCache(indexTuple);
+
+	return i;
+}
+
+/*
+ * transformFkeyCheckAttrs -
+ *
+ *	Make sure that the attributes of a referenced table belong to a unique
+ *	(or primary key) constraint.  Return the OID of the index supporting
+ *	the constraint, as well as the opclasses associated with the index
+ *	columns.
+ */
+static Oid
+transformFkeyCheckAttrs(Relation pkrel,
+						int numattrs, int16 *attnums,
+						Oid *opclasses) /* output parameter */
+{
+	Oid			indexoid = InvalidOid;
+	bool		found = false;
+	bool		found_deferrable = false;
+	List	   *indexoidlist;
+	ListCell   *indexoidscan;
+	int			i,
+				j;
+
+	/*
+	 * Reject duplicate appearances of columns in the referenced-columns list.
+	 * Such a case is forbidden by the SQL standard, and even if we thought it
+	 * useful to allow it, there would be ambiguity about how to match the
+	 * list to unique indexes (in particular, it'd be unclear which index
+	 * opclass goes with which FK column).
+	 */
+	for (i = 0; i < numattrs; i++)
+	{
+		for (j = i + 1; j < numattrs; j++)
+		{
+			if (attnums[i] == attnums[j])
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_FOREIGN_KEY),
+						 errmsg("foreign key referenced-columns list must not contain duplicates")));
+		}
+	}
+
+	/*
+	 * Get the list of index OIDs for the table from the relcache, and look up
+	 * each one in the pg_index syscache, and match unique indexes to the list
+	 * of attnums we are given.
+	 */
+	indexoidlist = RelationGetIndexList(pkrel);
+
+	foreach(indexoidscan, indexoidlist)
+	{
+		HeapTuple	indexTuple;
+		Form_pg_index indexStruct;
+
+		indexoid = lfirst_oid(indexoidscan);
+		indexTuple = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(indexoid));
+		if (!HeapTupleIsValid(indexTuple))
+			elog(ERROR, "cache lookup failed for index %u", indexoid);
+		indexStruct = (Form_pg_index) GETSTRUCT(indexTuple);
+
+		/*
+		 * Must have the right number of columns; must be unique and not a
+		 * partial index; forget it if there are any expressions, too. Invalid
+		 * indexes are out as well.
+		 */
+		if (indexStruct->indnkeyatts == numattrs &&
+			indexStruct->indisunique &&
+			indexStruct->indisvalid &&
+			heap_attisnull(indexTuple, Anum_pg_index_indpred, NULL) &&
+			heap_attisnull(indexTuple, Anum_pg_index_indexprs, NULL))
+		{
+			Datum		indclassDatum;
+			bool		isnull;
+			oidvector  *indclass;
+
+			/* Must get indclass the hard way */
+			indclassDatum = SysCacheGetAttr(INDEXRELID, indexTuple,
+											Anum_pg_index_indclass, &isnull);
+			Assert(!isnull);
+			indclass = (oidvector *) DatumGetPointer(indclassDatum);
+
+			/*
+			 * The given attnum list may match the index columns in any order.
+			 * Check for a match, and extract the appropriate opclasses while
+			 * we're at it.
+			 *
+			 * We know that attnums[] is duplicate-free per the test at the
+			 * start of this function, and we checked above that the number of
+			 * index columns agrees, so if we find a match for each attnums[]
+			 * entry then we must have a one-to-one match in some order.
+			 */
+			for (i = 0; i < numattrs; i++)
+			{
+				found = false;
+				for (j = 0; j < numattrs; j++)
+				{
+					if (attnums[i] == indexStruct->indkey.values[j])
+					{
+						opclasses[i] = indclass->values[j];
+						found = true;
+						break;
+					}
+				}
+				if (!found)
+					break;
+			}
+
+			/*
+			 * Refuse to use a deferrable unique/primary key.  This is per SQL
+			 * spec, and there would be a lot of interesting semantic problems
+			 * if we tried to allow it.
+			 */
+			if (found && !indexStruct->indimmediate)
+			{
+				/*
+				 * Remember that we found an otherwise matching index, so that
+				 * we can generate a more appropriate error message.
+				 */
+				found_deferrable = true;
+				found = false;
+			}
+		}
+		ReleaseSysCache(indexTuple);
+		if (found)
+			break;
+	}
+
+	if (!found)
+	{
+		if (found_deferrable)
+			ereport(ERROR,
+					(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+					 errmsg("cannot use a deferrable unique constraint for referenced table \"%s\"",
+							RelationGetRelationName(pkrel))));
+		else
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_FOREIGN_KEY),
+					 errmsg("there is no unique constraint matching given keys for referenced table \"%s\"",
+							RelationGetRelationName(pkrel))));
+	}
+
+	list_free(indexoidlist);
+
+	return indexoid;
+}
+
+/*
+ * findFkeyCast -
+ *
+ *	Wrapper around find_coercion_pathway() for ATAddForeignKeyConstraint().
+ *	Caller has equal regard for binary coercibility and for an exact match.
+*/
+static CoercionPathType
+findFkeyCast(Oid targetTypeId, Oid sourceTypeId, Oid *funcid)
+{
+	CoercionPathType ret;
+
+	if (targetTypeId == sourceTypeId)
+	{
+		ret = COERCION_PATH_RELABELTYPE;
+		*funcid = InvalidOid;
+	}
+	else
+	{
+		ret = find_coercion_pathway(targetTypeId, sourceTypeId,
+									COERCION_IMPLICIT, funcid);
+		if (ret == COERCION_PATH_NONE)
+			/* A previously-relied-upon cast is now gone. */
+			elog(ERROR, "could not find cast from %u to %u",
+				 sourceTypeId, targetTypeId);
+	}
+
+	return ret;
+}
+
+/*
+ * Permissions checks on the referenced table for ADD FOREIGN KEY
+ *
+ * Note: we have already checked that the user owns the referencing table,
+ * else we'd have failed much earlier; no additional checks are needed for it.
+ */
+static void
+checkFkeyPermissions(Relation rel, int16 *attnums, int natts)
+{
+	Oid			roleid = GetUserId();
+	AclResult	aclresult;
+	int			i;
+
+	/* Okay if we have relation-level REFERENCES permission */
+	aclresult = pg_class_aclcheck(RelationGetRelid(rel), roleid,
+								  ACL_REFERENCES);
+	if (aclresult == ACLCHECK_OK)
+		return;
+	/* Else we must have REFERENCES on each column */
+	for (i = 0; i < natts; i++)
+	{
+		aclresult = pg_attribute_aclcheck(RelationGetRelid(rel), attnums[i],
+										  roleid, ACL_REFERENCES);
+		if (aclresult != ACLCHECK_OK)
+			aclcheck_error(aclresult, get_relkind_objtype(rel->rd_rel->relkind),
+						   RelationGetRelationName(rel));
+	}
+}
+
+/*
+ * Scan the existing rows in a table to verify they meet a proposed FK
+ * constraint.
+ *
+ * Caller must have opened and locked both relations appropriately.
+ */
+static void
+validateForeignKeyConstraint(char *conname,
+							 Relation rel,
+							 Relation pkrel,
+							 Oid pkindOid,
+							 Oid constraintOid)
+{
+	TupleTableSlot *slot;
+	TableScanDesc scan;
+	Trigger		trig;
+	Snapshot	snapshot;
+	MemoryContext oldcxt;
+	MemoryContext perTupCxt;
+
+	ereport(DEBUG1,
+			(errmsg_internal("validating foreign key constraint \"%s\"", conname)));
+
+	/*
+	 * Build a trigger call structure; we'll need it either way.
+	 */
+	MemSet(&trig, 0, sizeof(trig));
+	trig.tgoid = InvalidOid;
+	trig.tgname = conname;
+	trig.tgenabled = TRIGGER_FIRES_ON_ORIGIN;
+	trig.tgisinternal = true;
+	trig.tgconstrrelid = RelationGetRelid(pkrel);
+	trig.tgconstrindid = pkindOid;
+	trig.tgconstraint = constraintOid;
+	trig.tgdeferrable = false;
+	trig.tginitdeferred = false;
+	/* we needn't fill in remaining fields */
+
+	/*
+	 * See if we can do it with a single LEFT JOIN query.  A false result
+	 * indicates we must proceed with the fire-the-trigger method.
+	 */
+	if (RI_Initial_Check(&trig, rel, pkrel))
+		return;
+
+	/*
+	 * Scan through each tuple, calling RI_FKey_check_ins (insert trigger) as
+	 * if that tuple had just been inserted.  If any of those fail, it should
+	 * ereport(ERROR) and that's that.
+	 */
+	snapshot = RegisterSnapshot(GetLatestSnapshot());
+	slot = table_slot_create(rel, NULL);
+	scan = table_beginscan(rel, snapshot, 0, NULL);
+
+	perTupCxt = AllocSetContextCreate(CurrentMemoryContext,
+									  "validateForeignKeyConstraint",
+									  ALLOCSET_SMALL_SIZES);
+	oldcxt = MemoryContextSwitchTo(perTupCxt);
+
+	while (table_scan_getnextslot(scan, ForwardScanDirection, slot))
+	{
+		LOCAL_FCINFO(fcinfo, 0);
+		TriggerData trigdata = {0};
+
+		CHECK_FOR_INTERRUPTS();
+
+		/*
+		 * Make a call to the trigger function
+		 *
+		 * No parameters are passed, but we do set a context
+		 */
+		MemSet(fcinfo, 0, SizeForFunctionCallInfo(0));
+
+		/*
+		 * We assume RI_FKey_check_ins won't look at flinfo...
+		 */
+		trigdata.type = T_TriggerData;
+		trigdata.tg_event = TRIGGER_EVENT_INSERT | TRIGGER_EVENT_ROW;
+		trigdata.tg_relation = rel;
+		trigdata.tg_trigtuple = ExecFetchSlotHeapTuple(slot, false, NULL);
+		trigdata.tg_trigslot = slot;
+		trigdata.tg_trigger = &trig;
+
+		fcinfo->context = (Node *) &trigdata;
+
+		RI_FKey_check_ins(fcinfo);
+
+		MemoryContextReset(perTupCxt);
+	}
+
+	MemoryContextSwitchTo(oldcxt);
+	MemoryContextDelete(perTupCxt);
+	table_endscan(scan);
+	UnregisterSnapshot(snapshot);
+	ExecDropSingleTupleTableSlot(slot);
+}
+
+static void
+CreateFKCheckTrigger(Oid myRelOid, Oid refRelOid, Constraint *fkconstraint,
+					 Oid constraintOid, Oid indexOid, bool on_insert)
+{
+	CreateTrigStmt *fk_trigger;
+
+	/*
+	 * Note: for a self-referential FK (referencing and referenced tables are
+	 * the same), it is important that the ON UPDATE action fires before the
+	 * CHECK action, since both triggers will fire on the same row during an
+	 * UPDATE event; otherwise the CHECK trigger will be checking a non-final
+	 * state of the row.  Triggers fire in name order, so we ensure this by
+	 * using names like "RI_ConstraintTrigger_a_NNNN" for the action triggers
+	 * and "RI_ConstraintTrigger_c_NNNN" for the check triggers.
+	 */
+	fk_trigger = makeNode(CreateTrigStmt);
+	fk_trigger->replace = false;
+	fk_trigger->isconstraint = true;
+	fk_trigger->trigname = "RI_ConstraintTrigger_c";
+	fk_trigger->relation = NULL;
+
+	/* Either ON INSERT or ON UPDATE */
+	if (on_insert)
+	{
+		fk_trigger->funcname = SystemFuncName("RI_FKey_check_ins");
+		fk_trigger->events = TRIGGER_TYPE_INSERT;
+	}
+	else
+	{
+		fk_trigger->funcname = SystemFuncName("RI_FKey_check_upd");
+		fk_trigger->events = TRIGGER_TYPE_UPDATE;
+	}
+
+	fk_trigger->args = NIL;
+	fk_trigger->row = true;
+	fk_trigger->timing = TRIGGER_TYPE_AFTER;
+	fk_trigger->columns = NIL;
+	fk_trigger->whenClause = NULL;
+	fk_trigger->transitionRels = NIL;
+	fk_trigger->deferrable = fkconstraint->deferrable;
+	fk_trigger->initdeferred = fkconstraint->initdeferred;
+	fk_trigger->constrrel = NULL;
+
+	(void) CreateTrigger(fk_trigger, NULL, myRelOid, refRelOid, constraintOid,
+						 indexOid, InvalidOid, InvalidOid, NULL, true, false);
+
+	/* Make changes-so-far visible */
+	CommandCounterIncrement();
+}
+
+/*
+ * createForeignKeyActionTriggers
+ *		Create the referenced-side "action" triggers that implement a foreign
+ *		key.
+ */
+static void
+createForeignKeyActionTriggers(Relation rel, Oid refRelOid, Constraint *fkconstraint,
+							   Oid constraintOid, Oid indexOid)
+{
+	CreateTrigStmt *fk_trigger;
+
+	/*
+	 * Build and execute a CREATE CONSTRAINT TRIGGER statement for the ON
+	 * DELETE action on the referenced table.
+	 */
+	fk_trigger = makeNode(CreateTrigStmt);
+	fk_trigger->replace = false;
+	fk_trigger->isconstraint = true;
+	fk_trigger->trigname = "RI_ConstraintTrigger_a";
+	fk_trigger->relation = NULL;
+	fk_trigger->args = NIL;
+	fk_trigger->row = true;
+	fk_trigger->timing = TRIGGER_TYPE_AFTER;
+	fk_trigger->events = TRIGGER_TYPE_DELETE;
+	fk_trigger->columns = NIL;
+	fk_trigger->whenClause = NULL;
+	fk_trigger->transitionRels = NIL;
+	fk_trigger->constrrel = NULL;
+	switch (fkconstraint->fk_del_action)
+	{
+		case FKCONSTR_ACTION_NOACTION:
+			fk_trigger->deferrable = fkconstraint->deferrable;
+			fk_trigger->initdeferred = fkconstraint->initdeferred;
+			fk_trigger->funcname = SystemFuncName("RI_FKey_noaction_del");
+			break;
+		case FKCONSTR_ACTION_RESTRICT:
+			fk_trigger->deferrable = false;
+			fk_trigger->initdeferred = false;
+			fk_trigger->funcname = SystemFuncName("RI_FKey_restrict_del");
+			break;
+		case FKCONSTR_ACTION_CASCADE:
+			fk_trigger->deferrable = false;
+			fk_trigger->initdeferred = false;
+			fk_trigger->funcname = SystemFuncName("RI_FKey_cascade_del");
+			break;
+		case FKCONSTR_ACTION_SETNULL:
+			fk_trigger->deferrable = false;
+			fk_trigger->initdeferred = false;
+			fk_trigger->funcname = SystemFuncName("RI_FKey_setnull_del");
+			break;
+		case FKCONSTR_ACTION_SETDEFAULT:
+			fk_trigger->deferrable = false;
+			fk_trigger->initdeferred = false;
+			fk_trigger->funcname = SystemFuncName("RI_FKey_setdefault_del");
+			break;
+		default:
+			elog(ERROR, "unrecognized FK action type: %d",
+				 (int) fkconstraint->fk_del_action);
+			break;
+	}
+
+	(void) CreateTrigger(fk_trigger, NULL, refRelOid, RelationGetRelid(rel),
+						 constraintOid,
+						 indexOid, InvalidOid, InvalidOid, NULL, true, false);
+
+	/* Make changes-so-far visible */
+	CommandCounterIncrement();
+
+	/*
+	 * Build and execute a CREATE CONSTRAINT TRIGGER statement for the ON
+	 * UPDATE action on the referenced table.
+	 */
+	fk_trigger = makeNode(CreateTrigStmt);
+	fk_trigger->replace = false;
+	fk_trigger->isconstraint = true;
+	fk_trigger->trigname = "RI_ConstraintTrigger_a";
+	fk_trigger->relation = NULL;
+	fk_trigger->args = NIL;
+	fk_trigger->row = true;
+	fk_trigger->timing = TRIGGER_TYPE_AFTER;
+	fk_trigger->events = TRIGGER_TYPE_UPDATE;
+	fk_trigger->columns = NIL;
+	fk_trigger->whenClause = NULL;
+	fk_trigger->transitionRels = NIL;
+	fk_trigger->constrrel = NULL;
+	switch (fkconstraint->fk_upd_action)
+	{
+		case FKCONSTR_ACTION_NOACTION:
+			fk_trigger->deferrable = fkconstraint->deferrable;
+			fk_trigger->initdeferred = fkconstraint->initdeferred;
+			fk_trigger->funcname = SystemFuncName("RI_FKey_noaction_upd");
+			break;
+		case FKCONSTR_ACTION_RESTRICT:
+			fk_trigger->deferrable = false;
+			fk_trigger->initdeferred = false;
+			fk_trigger->funcname = SystemFuncName("RI_FKey_restrict_upd");
+			break;
+		case FKCONSTR_ACTION_CASCADE:
+			fk_trigger->deferrable = false;
+			fk_trigger->initdeferred = false;
+			fk_trigger->funcname = SystemFuncName("RI_FKey_cascade_upd");
+			break;
+		case FKCONSTR_ACTION_SETNULL:
+			fk_trigger->deferrable = false;
+			fk_trigger->initdeferred = false;
+			fk_trigger->funcname = SystemFuncName("RI_FKey_setnull_upd");
+			break;
+		case FKCONSTR_ACTION_SETDEFAULT:
+			fk_trigger->deferrable = false;
+			fk_trigger->initdeferred = false;
+			fk_trigger->funcname = SystemFuncName("RI_FKey_setdefault_upd");
+			break;
+		default:
+			elog(ERROR, "unrecognized FK action type: %d",
+				 (int) fkconstraint->fk_upd_action);
+			break;
+	}
+
+	(void) CreateTrigger(fk_trigger, NULL, refRelOid, RelationGetRelid(rel),
+						 constraintOid,
+						 indexOid, InvalidOid, InvalidOid, NULL, true, false);
+}
+
+/*
+ * createForeignKeyCheckTriggers
+ *		Create the referencing-side "check" triggers that implement a foreign
+ *		key.
+ */
+static void
+createForeignKeyCheckTriggers(Oid myRelOid, Oid refRelOid,
+							  Constraint *fkconstraint, Oid constraintOid,
+							  Oid indexOid)
+{
+	CreateFKCheckTrigger(myRelOid, refRelOid, fkconstraint, constraintOid,
+						 indexOid, true);
+	CreateFKCheckTrigger(myRelOid, refRelOid, fkconstraint, constraintOid,
+						 indexOid, false);
+}
+
+/*
+ * ALTER TABLE DROP CONSTRAINT
+ *
+ * Like DROP COLUMN, we can't use the normal ALTER TABLE recursion mechanism.
+ */
+static void
+ATExecDropConstraint(Relation rel, const char *constrName,
+					 DropBehavior behavior,
+					 bool recurse, bool recursing,
+					 bool missing_ok, LOCKMODE lockmode)
+{
+	List	   *children;
+	ListCell   *child;
+	Relation	conrel;
+	Form_pg_constraint con;
+	SysScanDesc scan;
+	ScanKeyData skey[3];
+	HeapTuple	tuple;
+	bool		found = false;
+	bool		is_no_inherit_constraint = false;
+	char		contype;
+
+	/* At top level, permission check was done in ATPrepCmd, else do it */
+	if (recursing)
+		ATSimplePermissions(rel, ATT_TABLE | ATT_FOREIGN_TABLE);
+
+	conrel = table_open(ConstraintRelationId, RowExclusiveLock);
+
+	/*
+	 * Find and drop the target constraint
+	 */
+	ScanKeyInit(&skey[0],
+				Anum_pg_constraint_conrelid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(RelationGetRelid(rel)));
+	ScanKeyInit(&skey[1],
+				Anum_pg_constraint_contypid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(InvalidOid));
+	ScanKeyInit(&skey[2],
+				Anum_pg_constraint_conname,
+				BTEqualStrategyNumber, F_NAMEEQ,
+				CStringGetDatum(constrName));
+	scan = systable_beginscan(conrel, ConstraintRelidTypidNameIndexId,
+							  true, NULL, 3, skey);
+
+	/* There can be at most one matching row */
+	if (HeapTupleIsValid(tuple = systable_getnext(scan)))
+	{
+		ObjectAddress conobj;
+
+		con = (Form_pg_constraint) GETSTRUCT(tuple);
+
+		/* Don't drop inherited constraints */
+		if (con->coninhcount > 0 && !recursing)
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+					 errmsg("cannot drop inherited constraint \"%s\" of relation \"%s\"",
+							constrName, RelationGetRelationName(rel))));
+
+		is_no_inherit_constraint = con->connoinherit;
+		contype = con->contype;
+
+		/*
+		 * If it's a foreign-key constraint, we'd better lock the referenced
+		 * table and check that that's not in use, just as we've already done
+		 * for the constrained table (else we might, eg, be dropping a trigger
+		 * that has unfired events).  But we can/must skip that in the
+		 * self-referential case.
+		 */
+		if (contype == CONSTRAINT_FOREIGN &&
+			con->confrelid != RelationGetRelid(rel))
+		{
+			Relation	frel;
+
+			/* Must match lock taken by RemoveTriggerById: */
+			frel = table_open(con->confrelid, AccessExclusiveLock);
+			CheckTableNotInUse(frel, "ALTER TABLE");
+			table_close(frel, NoLock);
+		}
+
+		/*
+		 * Perform the actual constraint deletion
+		 */
+		conobj.classId = ConstraintRelationId;
+		conobj.objectId = con->oid;
+		conobj.objectSubId = 0;
+
+		performDeletion(&conobj, behavior, 0);
+
+		found = true;
+	}
+
+	systable_endscan(scan);
+
+	if (!found)
+	{
+		if (!missing_ok)
+		{
+			ereport(ERROR,
+					(errcode(ERRCODE_UNDEFINED_OBJECT),
+					 errmsg("constraint \"%s\" of relation \"%s\" does not exist",
+							constrName, RelationGetRelationName(rel))));
+		}
+		else
+		{
+			ereport(NOTICE,
+					(errmsg("constraint \"%s\" of relation \"%s\" does not exist, skipping",
+							constrName, RelationGetRelationName(rel))));
+			table_close(conrel, RowExclusiveLock);
+			return;
+		}
+	}
+
+	/*
+	 * For partitioned tables, non-CHECK inherited constraints are dropped via
+	 * the dependency mechanism, so we're done here.
+	 */
+	if (contype != CONSTRAINT_CHECK &&
+		rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+	{
+		table_close(conrel, RowExclusiveLock);
+		return;
+	}
+
+	/*
+	 * Propagate to children as appropriate.  Unlike most other ALTER
+	 * routines, we have to do this one level of recursion at a time; we can't
+	 * use find_all_inheritors to do it in one pass.
+	 */
+	if (!is_no_inherit_constraint)
+		children =
+			find_inheritance_children(RelationGetRelid(rel), false, lockmode);
+	else
+		children = NIL;
+
+	/*
+	 * For a partitioned table, if partitions exist and we are told not to
+	 * recurse, it's a user error.  It doesn't make sense to have a constraint
+	 * be defined only on the parent, especially if it's a partitioned table.
+	 */
+	if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE &&
+		children != NIL && !recurse)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+				 errmsg("cannot remove constraint from only the partitioned table when partitions exist"),
+				 errhint("Do not specify the ONLY keyword.")));
+
+	foreach(child, children)
+	{
+		Oid			childrelid = lfirst_oid(child);
+		Relation	childrel;
+		HeapTuple	copy_tuple;
+
+		/* find_inheritance_children already got lock */
+		childrel = table_open(childrelid, NoLock);
+		CheckTableNotInUse(childrel, "ALTER TABLE");
+
+		ScanKeyInit(&skey[0],
+					Anum_pg_constraint_conrelid,
+					BTEqualStrategyNumber, F_OIDEQ,
+					ObjectIdGetDatum(childrelid));
+		ScanKeyInit(&skey[1],
+					Anum_pg_constraint_contypid,
+					BTEqualStrategyNumber, F_OIDEQ,
+					ObjectIdGetDatum(InvalidOid));
+		ScanKeyInit(&skey[2],
+					Anum_pg_constraint_conname,
+					BTEqualStrategyNumber, F_NAMEEQ,
+					CStringGetDatum(constrName));
+		scan = systable_beginscan(conrel, ConstraintRelidTypidNameIndexId,
+								  true, NULL, 3, skey);
+
+		/* There can be at most one matching row */
+		if (!HeapTupleIsValid(tuple = systable_getnext(scan)))
+			ereport(ERROR,
+					(errcode(ERRCODE_UNDEFINED_OBJECT),
+					 errmsg("constraint \"%s\" of relation \"%s\" does not exist",
+							constrName,
+							RelationGetRelationName(childrel))));
+
+		copy_tuple = heap_copytuple(tuple);
+
+		systable_endscan(scan);
+
+		con = (Form_pg_constraint) GETSTRUCT(copy_tuple);
+
+		/* Right now only CHECK constraints can be inherited */
+		if (con->contype != CONSTRAINT_CHECK)
+			elog(ERROR, "inherited constraint is not a CHECK constraint");
+
+		if (con->coninhcount <= 0)	/* shouldn't happen */
+			elog(ERROR, "relation %u has non-inherited constraint \"%s\"",
+				 childrelid, constrName);
+
+		if (recurse)
+		{
+			/*
+			 * If the child constraint has other definition sources, just
+			 * decrement its inheritance count; if not, recurse to delete it.
+			 */
+			if (con->coninhcount == 1 && !con->conislocal)
+			{
+				/* Time to delete this child constraint, too */
+				ATExecDropConstraint(childrel, constrName, behavior,
+									 true, true,
+									 false, lockmode);
+			}
+			else
+			{
+				/* Child constraint must survive my deletion */
+				con->coninhcount--;
+				CatalogTupleUpdate(conrel, &copy_tuple->t_self, copy_tuple);
+
+				/* Make update visible */
+				CommandCounterIncrement();
+			}
+		}
+		else
+		{
+			/*
+			 * If we were told to drop ONLY in this table (no recursion), we
+			 * need to mark the inheritors' constraints as locally defined
+			 * rather than inherited.
+			 */
+			con->coninhcount--;
+			con->conislocal = true;
+
+			CatalogTupleUpdate(conrel, &copy_tuple->t_self, copy_tuple);
+
+			/* Make update visible */
+			CommandCounterIncrement();
+		}
+
+		heap_freetuple(copy_tuple);
+
+		table_close(childrel, NoLock);
+	}
+
+	table_close(conrel, RowExclusiveLock);
+}
+
+/*
+ * ALTER COLUMN TYPE
+ *
+ * Unlike other subcommand types, we do parse transformation for ALTER COLUMN
+ * TYPE during phase 1 --- the AlterTableCmd passed in here is already
+ * transformed (and must be, because we rely on some transformed fields).
+ *
+ * The point of this is that the execution of all ALTER COLUMN TYPEs for a
+ * table will be done "in parallel" during phase 3, so all the USING
+ * expressions should be parsed assuming the original column types.  Also,
+ * this allows a USING expression to refer to a field that will be dropped.
+ *
+ * To make this work safely, AT_PASS_DROP then AT_PASS_ALTER_TYPE must be
+ * the first two execution steps in phase 2; they must not see the effects
+ * of any other subcommand types, since the USING expressions are parsed
+ * against the unmodified table's state.
+ */
+static void
+ATPrepAlterColumnType(List **wqueue,
+					  AlteredTableInfo *tab, Relation rel,
+					  bool recurse, bool recursing,
+					  AlterTableCmd *cmd, LOCKMODE lockmode,
+					  AlterTableUtilityContext *context)
+{
+	char	   *colName = cmd->name;
+	ColumnDef  *def = (ColumnDef *) cmd->def;
+	TypeName   *typeName = def->typeName;
+	Node	   *transform = def->cooked_default;
+	HeapTuple	tuple;
+	Form_pg_attribute attTup;
+	AttrNumber	attnum;
+	Oid			targettype;
+	int32		targettypmod;
+	Oid			targetcollid;
+	NewColumnValue *newval;
+	ParseState *pstate = make_parsestate(NULL);
+	AclResult	aclresult;
+	bool		is_expr;
+
+	if (rel->rd_rel->reloftype && !recursing)
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("cannot alter column type of typed table")));
+
+	/* lookup the attribute so we can check inheritance status */
+	tuple = SearchSysCacheAttName(RelationGetRelid(rel), colName);
+	if (!HeapTupleIsValid(tuple))
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_COLUMN),
+				 errmsg("column \"%s\" of relation \"%s\" does not exist",
+						colName, RelationGetRelationName(rel))));
+	attTup = (Form_pg_attribute) GETSTRUCT(tuple);
+	attnum = attTup->attnum;
+
+	/* Can't alter a system attribute */
+	if (attnum <= 0)
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("cannot alter system column \"%s\"",
+						colName)));
+
+	/*
+	 * Don't alter inherited columns.  At outer level, there had better not be
+	 * any inherited definition; when recursing, we assume this was checked at
+	 * the parent level (see below).
+	 */
+	if (attTup->attinhcount > 0 && !recursing)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+				 errmsg("cannot alter inherited column \"%s\"",
+						colName)));
+
+	/* Don't alter columns used in the partition key */
+	if (has_partition_attrs(rel,
+							bms_make_singleton(attnum - FirstLowInvalidHeapAttributeNumber),
+							&is_expr))
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+				 errmsg("cannot alter column \"%s\" because it is part of the partition key of relation \"%s\"",
+						colName, RelationGetRelationName(rel))));
+
+	/* Look up the target type */
+	typenameTypeIdAndMod(NULL, typeName, &targettype, &targettypmod);
+
+	aclresult = pg_type_aclcheck(targettype, GetUserId(), ACL_USAGE);
+	if (aclresult != ACLCHECK_OK)
+		aclcheck_error_type(aclresult, targettype);
+
+	/* And the collation */
+	targetcollid = GetColumnDefCollation(NULL, def, targettype);
+
+	/* make sure datatype is legal for a column */
+	CheckAttributeType(colName, targettype, targetcollid,
+					   list_make1_oid(rel->rd_rel->reltype),
+					   0);
+
+	if (tab->relkind == RELKIND_RELATION ||
+		tab->relkind == RELKIND_PARTITIONED_TABLE)
+	{
+		/*
+		 * Set up an expression to transform the old data value to the new
+		 * type. If a USING option was given, use the expression as
+		 * transformed by transformAlterTableStmt, else just take the old
+		 * value and try to coerce it.  We do this first so that type
+		 * incompatibility can be detected before we waste effort, and because
+		 * we need the expression to be parsed against the original table row
+		 * type.
+		 */
+		if (!transform)
+		{
+			transform = (Node *) makeVar(1, attnum,
+										 attTup->atttypid, attTup->atttypmod,
+										 attTup->attcollation,
+										 0);
+		}
+
+		transform = coerce_to_target_type(pstate,
+										  transform, exprType(transform),
+										  targettype, targettypmod,
+										  COERCION_ASSIGNMENT,
+										  COERCE_IMPLICIT_CAST,
+										  -1);
+		if (transform == NULL)
+		{
+			/* error text depends on whether USING was specified or not */
+			if (def->cooked_default != NULL)
+				ereport(ERROR,
+						(errcode(ERRCODE_DATATYPE_MISMATCH),
+						 errmsg("result of USING clause for column \"%s\""
+								" cannot be cast automatically to type %s",
+								colName, format_type_be(targettype)),
+						 errhint("You might need to add an explicit cast.")));
+			else
+				ereport(ERROR,
+						(errcode(ERRCODE_DATATYPE_MISMATCH),
+						 errmsg("column \"%s\" cannot be cast automatically to type %s",
+								colName, format_type_be(targettype)),
+				/* translator: USING is SQL, don't translate it */
+						 errhint("You might need to specify \"USING %s::%s\".",
+								 quote_identifier(colName),
+								 format_type_with_typemod(targettype,
+														  targettypmod))));
+		}
+
+		/* Fix collations after all else */
+		assign_expr_collations(pstate, transform);
+
+		/* Plan the expr now so we can accurately assess the need to rewrite. */
+		transform = (Node *) expression_planner((Expr *) transform);
+
+		/*
+		 * Add a work queue item to make ATRewriteTable update the column
+		 * contents.
+		 */
+		newval = (NewColumnValue *) palloc0(sizeof(NewColumnValue));
+		newval->attnum = attnum;
+		newval->expr = (Expr *) transform;
+		newval->is_generated = false;
+
+		tab->newvals = lappend(tab->newvals, newval);
+		if (ATColumnChangeRequiresRewrite(transform, attnum))
+			tab->rewrite |= AT_REWRITE_COLUMN_REWRITE;
+	}
+	else if (transform)
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("\"%s\" is not a table",
+						RelationGetRelationName(rel))));
+
+	if (tab->relkind == RELKIND_COMPOSITE_TYPE ||
+		tab->relkind == RELKIND_FOREIGN_TABLE)
+	{
+		/*
+		 * For composite types and foreign tables, do this check now.  Regular
+		 * tables will check it later when the table is being rewritten.
+		 */
+		find_composite_type_dependencies(rel->rd_rel->reltype, rel, NULL);
+	}
+
+	ReleaseSysCache(tuple);
+
+	/*
+	 * Recurse manually by queueing a new command for each child, if
+	 * necessary. We cannot apply ATSimpleRecursion here because we need to
+	 * remap attribute numbers in the USING expression, if any.
+	 *
+	 * If we are told not to recurse, there had better not be any child
+	 * tables; else the alter would put them out of step.
+	 */
+	if (recurse)
+	{
+		Oid			relid = RelationGetRelid(rel);
+		List	   *child_oids,
+				   *child_numparents;
+		ListCell   *lo,
+				   *li;
+
+		child_oids = find_all_inheritors(relid, lockmode,
+										 &child_numparents);
+
+		/*
+		 * find_all_inheritors does the recursive search of the inheritance
+		 * hierarchy, so all we have to do is process all of the relids in the
+		 * list that it returns.
+		 */
+		forboth(lo, child_oids, li, child_numparents)
+		{
+			Oid			childrelid = lfirst_oid(lo);
+			int			numparents = lfirst_int(li);
+			Relation	childrel;
+			HeapTuple	childtuple;
+			Form_pg_attribute childattTup;
+
+			if (childrelid == relid)
+				continue;
+
+			/* find_all_inheritors already got lock */
+			childrel = relation_open(childrelid, NoLock);
+			CheckTableNotInUse(childrel, "ALTER TABLE");
+
+			/*
+			 * Verify that the child doesn't have any inherited definitions of
+			 * this column that came from outside this inheritance hierarchy.
+			 * (renameatt makes a similar test, though in a different way
+			 * because of its different recursion mechanism.)
+			 */
+			childtuple = SearchSysCacheAttName(RelationGetRelid(childrel),
+											   colName);
+			if (!HeapTupleIsValid(childtuple))
+				ereport(ERROR,
+						(errcode(ERRCODE_UNDEFINED_COLUMN),
+						 errmsg("column \"%s\" of relation \"%s\" does not exist",
+								colName, RelationGetRelationName(childrel))));
+			childattTup = (Form_pg_attribute) GETSTRUCT(childtuple);
+
+			if (childattTup->attinhcount > numparents)
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+						 errmsg("cannot alter inherited column \"%s\" of relation \"%s\"",
+								colName, RelationGetRelationName(childrel))));
+
+			ReleaseSysCache(childtuple);
+
+			/*
+			 * Remap the attribute numbers.  If no USING expression was
+			 * specified, there is no need for this step.
+			 */
+			if (def->cooked_default)
+			{
+				AttrMap    *attmap;
+				bool		found_whole_row;
+
+				/* create a copy to scribble on */
+				cmd = copyObject(cmd);
+
+				attmap = build_attrmap_by_name(RelationGetDescr(childrel),
+											   RelationGetDescr(rel));
+				((ColumnDef *) cmd->def)->cooked_default =
+					map_variable_attnos(def->cooked_default,
+										1, 0,
+										attmap,
+										InvalidOid, &found_whole_row);
+				if (found_whole_row)
+					ereport(ERROR,
+							(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+							 errmsg("cannot convert whole-row table reference"),
+							 errdetail("USING expression contains a whole-row table reference.")));
+				pfree(attmap);
+			}
+			ATPrepCmd(wqueue, childrel, cmd, false, true, lockmode, context);
+			relation_close(childrel, NoLock);
+		}
+	}
+	else if (!recursing &&
+			 find_inheritance_children(RelationGetRelid(rel), false,
+									   NoLock) != NIL)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+				 errmsg("type of inherited column \"%s\" must be changed in child tables too",
+						colName)));
+
+	if (tab->relkind == RELKIND_COMPOSITE_TYPE)
+		ATTypedTableRecursion(wqueue, rel, cmd, lockmode, context);
+}
+
+/*
+ * When the data type of a column is changed, a rewrite might not be required
+ * if the new type is sufficiently identical to the old one, and the USING
+ * clause isn't trying to insert some other value.  It's safe to skip the
+ * rewrite in these cases:
+ *
+ * - the old type is binary coercible to the new type
+ * - the new type is an unconstrained domain over the old type
+ * - {NEW,OLD} or {OLD,NEW} is {timestamptz,timestamp} and the timezone is UTC
+ *
+ * In the case of a constrained domain, we could get by with scanning the
+ * table and checking the constraint rather than actually rewriting it, but we
+ * don't currently try to do that.
+ */
+static bool
+ATColumnChangeRequiresRewrite(Node *expr, AttrNumber varattno)
+{
+	Assert(expr != NULL);
+
+	for (;;)
+	{
+		/* only one varno, so no need to check that */
+		if (IsA(expr, Var) && ((Var *) expr)->varattno == varattno)
+			return false;
+		else if (IsA(expr, RelabelType))
+			expr = (Node *) ((RelabelType *) expr)->arg;
+		else if (IsA(expr, CoerceToDomain))
+		{
+			CoerceToDomain *d = (CoerceToDomain *) expr;
+
+			if (DomainHasConstraints(d->resulttype))
+				return true;
+			expr = (Node *) d->arg;
+		}
+		else if (IsA(expr, FuncExpr))
+		{
+			FuncExpr   *f = (FuncExpr *) expr;
+
+			switch (f->funcid)
+			{
+				case F_TIMESTAMPTZ_TIMESTAMP:
+				case F_TIMESTAMP_TIMESTAMPTZ:
+					if (TimestampTimestampTzRequiresRewrite())
+						return true;
+					else
+						expr = linitial(f->args);
+					break;
+				default:
+					return true;
+			}
+		}
+		else
+			return true;
+	}
+}
+
+/*
+ * ALTER COLUMN .. SET DATA TYPE
+ *
+ * Return the address of the modified column.
+ */
+static ObjectAddress
+ATExecAlterColumnType(AlteredTableInfo *tab, Relation rel,
+					  AlterTableCmd *cmd, LOCKMODE lockmode)
+{
+	char	   *colName = cmd->name;
+	ColumnDef  *def = (ColumnDef *) cmd->def;
+	TypeName   *typeName = def->typeName;
+	HeapTuple	heapTup;
+	Form_pg_attribute attTup,
+				attOldTup;
+	AttrNumber	attnum;
+	HeapTuple	typeTuple;
+	Form_pg_type tform;
+	Oid			targettype;
+	int32		targettypmod;
+	Oid			targetcollid;
+	Node	   *defaultexpr;
+	Relation	attrelation;
+	Relation	depRel;
+	ScanKeyData key[3];
+	SysScanDesc scan;
+	HeapTuple	depTup;
+	ObjectAddress address;
+
+	/*
+	 * Clear all the missing values if we're rewriting the table, since this
+	 * renders them pointless.
+	 */
+	if (tab->rewrite)
+	{
+		Relation	newrel;
+
+		newrel = table_open(RelationGetRelid(rel), NoLock);
+		RelationClearMissing(newrel);
+		relation_close(newrel, NoLock);
+		/* make sure we don't conflict with later attribute modifications */
+		CommandCounterIncrement();
+	}
+
+	attrelation = table_open(AttributeRelationId, RowExclusiveLock);
+
+	/* Look up the target column */
+	heapTup = SearchSysCacheCopyAttName(RelationGetRelid(rel), colName);
+	if (!HeapTupleIsValid(heapTup)) /* shouldn't happen */
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_COLUMN),
+				 errmsg("column \"%s\" of relation \"%s\" does not exist",
+						colName, RelationGetRelationName(rel))));
+	attTup = (Form_pg_attribute) GETSTRUCT(heapTup);
+	attnum = attTup->attnum;
+	attOldTup = TupleDescAttr(tab->oldDesc, attnum - 1);
+
+	/* Check for multiple ALTER TYPE on same column --- can't cope */
+	if (attTup->atttypid != attOldTup->atttypid ||
+		attTup->atttypmod != attOldTup->atttypmod)
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("cannot alter type of column \"%s\" twice",
+						colName)));
+
+	/* Look up the target type (should not fail, since prep found it) */
+	typeTuple = typenameType(NULL, typeName, &targettypmod);
+	tform = (Form_pg_type) GETSTRUCT(typeTuple);
+	targettype = tform->oid;
+	/* And the collation */
+	targetcollid = GetColumnDefCollation(NULL, def, targettype);
+
+	/*
+	 * If there is a default expression for the column, get it and ensure we
+	 * can coerce it to the new datatype.  (We must do this before changing
+	 * the column type, because build_column_default itself will try to
+	 * coerce, and will not issue the error message we want if it fails.)
+	 *
+	 * We remove any implicit coercion steps at the top level of the old
+	 * default expression; this has been agreed to satisfy the principle of
+	 * least surprise.  (The conversion to the new column type should act like
+	 * it started from what the user sees as the stored expression, and the
+	 * implicit coercions aren't going to be shown.)
+	 */
+	if (attTup->atthasdef)
+	{
+		defaultexpr = build_column_default(rel, attnum);
+		Assert(defaultexpr);
+		defaultexpr = strip_implicit_coercions(defaultexpr);
+		defaultexpr = coerce_to_target_type(NULL,	/* no UNKNOWN params */
+											defaultexpr, exprType(defaultexpr),
+											targettype, targettypmod,
+											COERCION_ASSIGNMENT,
+											COERCE_IMPLICIT_CAST,
+											-1);
+		if (defaultexpr == NULL)
+		{
+			if (attTup->attgenerated)
+				ereport(ERROR,
+						(errcode(ERRCODE_DATATYPE_MISMATCH),
+						 errmsg("generation expression for column \"%s\" cannot be cast automatically to type %s",
+								colName, format_type_be(targettype))));
+			else
+				ereport(ERROR,
+						(errcode(ERRCODE_DATATYPE_MISMATCH),
+						 errmsg("default for column \"%s\" cannot be cast automatically to type %s",
+								colName, format_type_be(targettype))));
+		}
+	}
+	else
+		defaultexpr = NULL;
+
+	/*
+	 * Find everything that depends on the column (constraints, indexes, etc),
+	 * and record enough information to let us recreate the objects.
+	 *
+	 * The actual recreation does not happen here, but only after we have
+	 * performed all the individual ALTER TYPE operations.  We have to save
+	 * the info before executing ALTER TYPE, though, else the deparser will
+	 * get confused.
+	 */
+	depRel = table_open(DependRelationId, RowExclusiveLock);
+
+	ScanKeyInit(&key[0],
+				Anum_pg_depend_refclassid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(RelationRelationId));
+	ScanKeyInit(&key[1],
+				Anum_pg_depend_refobjid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(RelationGetRelid(rel)));
+	ScanKeyInit(&key[2],
+				Anum_pg_depend_refobjsubid,
+				BTEqualStrategyNumber, F_INT4EQ,
+				Int32GetDatum((int32) attnum));
+
+	scan = systable_beginscan(depRel, DependReferenceIndexId, true,
+							  NULL, 3, key);
+
+	while (HeapTupleIsValid(depTup = systable_getnext(scan)))
+	{
+		Form_pg_depend foundDep = (Form_pg_depend) GETSTRUCT(depTup);
+		ObjectAddress foundObject;
+
+		/* We don't expect any PIN dependencies on columns */
+		if (foundDep->deptype == DEPENDENCY_PIN)
+			elog(ERROR, "cannot alter type of a pinned column");
+
+		foundObject.classId = foundDep->classid;
+		foundObject.objectId = foundDep->objid;
+		foundObject.objectSubId = foundDep->objsubid;
+
+		switch (getObjectClass(&foundObject))
+		{
+			case OCLASS_CLASS:
+				{
+					char		relKind = get_rel_relkind(foundObject.objectId);
+
+					if (relKind == RELKIND_INDEX ||
+						relKind == RELKIND_PARTITIONED_INDEX)
+					{
+						Assert(foundObject.objectSubId == 0);
+						RememberIndexForRebuilding(foundObject.objectId, tab);
+					}
+					else if (relKind == RELKIND_SEQUENCE)
+					{
+						/*
+						 * This must be a SERIAL column's sequence.  We need
+						 * not do anything to it.
+						 */
+						Assert(foundObject.objectSubId == 0);
+					}
+					else if (relKind == RELKIND_RELATION &&
+							 foundObject.objectSubId != 0 &&
+							 get_attgenerated(foundObject.objectId, foundObject.objectSubId))
+					{
+						/*
+						 * Changing the type of a column that is used by a
+						 * generated column is not allowed by SQL standard. It
+						 * might be doable with some thinking and effort.
+						 */
+						ereport(ERROR,
+								(errcode(ERRCODE_SYNTAX_ERROR),
+								 errmsg("cannot alter type of a column used by a generated column"),
+								 errdetail("Column \"%s\" is used by generated column \"%s\".",
+										   colName, get_attname(foundObject.objectId, foundObject.objectSubId, false))));
+					}
+					else
+					{
+						/* Not expecting any other direct dependencies... */
+						elog(ERROR, "unexpected object depending on column: %s",
+							 getObjectDescription(&foundObject, false));
+					}
+					break;
+				}
+
+			case OCLASS_CONSTRAINT:
+				Assert(foundObject.objectSubId == 0);
+				RememberConstraintForRebuilding(foundObject.objectId, tab);
+				break;
+
+			case OCLASS_REWRITE:
+				/* XXX someday see if we can cope with revising views */
+				ereport(ERROR,
+						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+						 errmsg("cannot alter type of a column used by a view or rule"),
+						 errdetail("%s depends on column \"%s\"",
+								   getObjectDescription(&foundObject, false),
+								   colName)));
+				break;
+
+			case OCLASS_TRIGGER:
+
+				/*
+				 * A trigger can depend on a column because the column is
+				 * specified as an update target, or because the column is
+				 * used in the trigger's WHEN condition.  The first case would
+				 * not require any extra work, but the second case would
+				 * require updating the WHEN expression, which will take a
+				 * significant amount of new code.  Since we can't easily tell
+				 * which case applies, we punt for both.  FIXME someday.
+				 */
+				ereport(ERROR,
+						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+						 errmsg("cannot alter type of a column used in a trigger definition"),
+						 errdetail("%s depends on column \"%s\"",
+								   getObjectDescription(&foundObject, false),
+								   colName)));
+				break;
+
+			case OCLASS_POLICY:
+
+				/*
+				 * A policy can depend on a column because the column is
+				 * specified in the policy's USING or WITH CHECK qual
+				 * expressions.  It might be possible to rewrite and recheck
+				 * the policy expression, but punt for now.  It's certainly
+				 * easy enough to remove and recreate the policy; still, FIXME
+				 * someday.
+				 */
+				ereport(ERROR,
+						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+						 errmsg("cannot alter type of a column used in a policy definition"),
+						 errdetail("%s depends on column \"%s\"",
+								   getObjectDescription(&foundObject, false),
+								   colName)));
+				break;
+
+			case OCLASS_DEFAULT:
+
+				/*
+				 * Ignore the column's default expression, since we will fix
+				 * it below.
+				 */
+				Assert(defaultexpr);
+				break;
+
+			case OCLASS_STATISTIC_EXT:
+
+				/*
+				 * Give the extended-stats machinery a chance to fix anything
+				 * that this column type change would break.
+				 */
+				RememberStatisticsForRebuilding(foundObject.objectId, tab);
+				break;
+
+			case OCLASS_PROC:
+			case OCLASS_TYPE:
+			case OCLASS_CAST:
+			case OCLASS_COLLATION:
+			case OCLASS_CONVERSION:
+			case OCLASS_LANGUAGE:
+			case OCLASS_LARGEOBJECT:
+			case OCLASS_OPERATOR:
+			case OCLASS_OPCLASS:
+			case OCLASS_OPFAMILY:
+			case OCLASS_AM:
+			case OCLASS_AMOP:
+			case OCLASS_AMPROC:
+			case OCLASS_SCHEMA:
+			case OCLASS_TSPARSER:
+			case OCLASS_TSDICT:
+			case OCLASS_TSTEMPLATE:
+			case OCLASS_TSCONFIG:
+			case OCLASS_ROLE:
+			case OCLASS_DATABASE:
+			case OCLASS_TBLSPACE:
+			case OCLASS_FDW:
+			case OCLASS_FOREIGN_SERVER:
+			case OCLASS_USER_MAPPING:
+			case OCLASS_DEFACL:
+			case OCLASS_EXTENSION:
+			case OCLASS_EVENT_TRIGGER:
+			case OCLASS_PUBLICATION:
+			case OCLASS_PUBLICATION_REL:
+			case OCLASS_SUBSCRIPTION:
+			case OCLASS_TRANSFORM:
+
+				/*
+				 * We don't expect any of these sorts of objects to depend on
+				 * a column.
+				 */
+				elog(ERROR, "unexpected object depending on column: %s",
+					 getObjectDescription(&foundObject, false));
+				break;
+
+				/*
+				 * There's intentionally no default: case here; we want the
+				 * compiler to warn if a new OCLASS hasn't been handled above.
+				 */
+		}
+	}
+
+	systable_endscan(scan);
+
+	/*
+	 * Now scan for dependencies of this column on other things.  The only
+	 * thing we should find is the dependency on the column datatype, which we
+	 * want to remove, possibly a collation dependency, and dependencies on
+	 * other columns if it is a generated column.
+	 */
+	ScanKeyInit(&key[0],
+				Anum_pg_depend_classid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(RelationRelationId));
+	ScanKeyInit(&key[1],
+				Anum_pg_depend_objid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(RelationGetRelid(rel)));
+	ScanKeyInit(&key[2],
+				Anum_pg_depend_objsubid,
+				BTEqualStrategyNumber, F_INT4EQ,
+				Int32GetDatum((int32) attnum));
+
+	scan = systable_beginscan(depRel, DependDependerIndexId, true,
+							  NULL, 3, key);
+
+	while (HeapTupleIsValid(depTup = systable_getnext(scan)))
+	{
+		Form_pg_depend foundDep = (Form_pg_depend) GETSTRUCT(depTup);
+		ObjectAddress foundObject;
+
+		foundObject.classId = foundDep->refclassid;
+		foundObject.objectId = foundDep->refobjid;
+		foundObject.objectSubId = foundDep->refobjsubid;
+
+		if (foundDep->deptype != DEPENDENCY_NORMAL &&
+			foundDep->deptype != DEPENDENCY_AUTO)
+			elog(ERROR, "found unexpected dependency type '%c'",
+				 foundDep->deptype);
+		if (!(foundDep->refclassid == TypeRelationId &&
+			  foundDep->refobjid == attTup->atttypid) &&
+			!(foundDep->refclassid == CollationRelationId &&
+			  foundDep->refobjid == attTup->attcollation) &&
+			!(foundDep->refclassid == RelationRelationId &&
+			  foundDep->refobjid == RelationGetRelid(rel) &&
+			  foundDep->refobjsubid != 0)
+			)
+			elog(ERROR, "found unexpected dependency for column: %s",
+				 getObjectDescription(&foundObject, false));
+
+		CatalogTupleDelete(depRel, &depTup->t_self);
+	}
+
+	systable_endscan(scan);
+
+	table_close(depRel, RowExclusiveLock);
+
+	/*
+	 * Here we go --- change the recorded column type and collation.  (Note
+	 * heapTup is a copy of the syscache entry, so okay to scribble on.) First
+	 * fix up the missing value if any.
+	 */
+	if (attTup->atthasmissing)
+	{
+		Datum		missingval;
+		bool		missingNull;
+
+		/* if rewrite is true the missing value should already be cleared */
+		Assert(tab->rewrite == 0);
+
+		/* Get the missing value datum */
+		missingval = heap_getattr(heapTup,
+								  Anum_pg_attribute_attmissingval,
+								  attrelation->rd_att,
+								  &missingNull);
+
+		/* if it's a null array there is nothing to do */
+
+		if (!missingNull)
+		{
+			/*
+			 * Get the datum out of the array and repack it in a new array
+			 * built with the new type data. We assume that since the table
+			 * doesn't need rewriting, the actual Datum doesn't need to be
+			 * changed, only the array metadata.
+			 */
+
+			int			one = 1;
+			bool		isNull;
+			Datum		valuesAtt[Natts_pg_attribute];
+			bool		nullsAtt[Natts_pg_attribute];
+			bool		replacesAtt[Natts_pg_attribute];
+			HeapTuple	newTup;
+
+			MemSet(valuesAtt, 0, sizeof(valuesAtt));
+			MemSet(nullsAtt, false, sizeof(nullsAtt));
+			MemSet(replacesAtt, false, sizeof(replacesAtt));
+
+			missingval = array_get_element(missingval,
+										   1,
+										   &one,
+										   0,
+										   attTup->attlen,
+										   attTup->attbyval,
+										   attTup->attalign,
+										   &isNull);
+			missingval = PointerGetDatum(construct_array(&missingval,
+														 1,
+														 targettype,
+														 tform->typlen,
+														 tform->typbyval,
+														 tform->typalign));
+
+			valuesAtt[Anum_pg_attribute_attmissingval - 1] = missingval;
+			replacesAtt[Anum_pg_attribute_attmissingval - 1] = true;
+			nullsAtt[Anum_pg_attribute_attmissingval - 1] = false;
+
+			newTup = heap_modify_tuple(heapTup, RelationGetDescr(attrelation),
+									   valuesAtt, nullsAtt, replacesAtt);
+			heap_freetuple(heapTup);
+			heapTup = newTup;
+			attTup = (Form_pg_attribute) GETSTRUCT(heapTup);
+		}
+	}
+
+	attTup->atttypid = targettype;
+	attTup->atttypmod = targettypmod;
+	attTup->attcollation = targetcollid;
+	attTup->attndims = list_length(typeName->arrayBounds);
+	attTup->attlen = tform->typlen;
+	attTup->attbyval = tform->typbyval;
+	attTup->attalign = tform->typalign;
+	attTup->attstorage = tform->typstorage;
+
+	/* Setup attribute compression */
+	if (rel->rd_rel->relkind == RELKIND_RELATION ||
+		rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+	{
+		/*
+		 * No compression for plain/external storage, otherwise, default
+		 * compression method if it is not already set, refer comments atop
+		 * attcompression parameter in pg_attribute.h.
+		 */
+		if (!IsStorageCompressible(tform->typstorage))
+			attTup->attcompression = InvalidCompressionMethod;
+		else if (!CompressionMethodIsValid(attTup->attcompression))
+			attTup->attcompression = GetDefaultToastCompression();
+	}
+	else
+		attTup->attcompression = InvalidCompressionMethod;
+
+	ReleaseSysCache(typeTuple);
+
+	CatalogTupleUpdate(attrelation, &heapTup->t_self, heapTup);
+
+	table_close(attrelation, RowExclusiveLock);
+
+	/* Install dependencies on new datatype and collation */
+	add_column_datatype_dependency(RelationGetRelid(rel), attnum, targettype);
+	add_column_collation_dependency(RelationGetRelid(rel), attnum, targetcollid);
+
+	/*
+	 * Drop any pg_statistic entry for the column, since it's now wrong type
+	 */
+	RemoveStatistics(RelationGetRelid(rel), attnum);
+
+	InvokeObjectPostAlterHook(RelationRelationId,
+							  RelationGetRelid(rel), attnum);
+
+	/*
+	 * Update the default, if present, by brute force --- remove and re-add
+	 * the default.  Probably unsafe to take shortcuts, since the new version
+	 * may well have additional dependencies.  (It's okay to do this now,
+	 * rather than after other ALTER TYPE commands, since the default won't
+	 * depend on other column types.)
+	 */
+	if (defaultexpr)
+	{
+		/* Must make new row visible since it will be updated again */
+		CommandCounterIncrement();
+
+		/*
+		 * We use RESTRICT here for safety, but at present we do not expect
+		 * anything to depend on the default.
+		 */
+		RemoveAttrDefault(RelationGetRelid(rel), attnum, DROP_RESTRICT, true,
+						  true);
+
+		StoreAttrDefault(rel, attnum, defaultexpr, true, false);
+	}
+
+	ObjectAddressSubSet(address, RelationRelationId,
+						RelationGetRelid(rel), attnum);
+
+	/* Cleanup */
+	heap_freetuple(heapTup);
+
+	return address;
+}
+
+/*
+ * Subroutine for ATExecAlterColumnType: remember that a replica identity
+ * needs to be reset.
+ */
+static void
+RememberReplicaIdentityForRebuilding(Oid indoid, AlteredTableInfo *tab)
+{
+	if (!get_index_isreplident(indoid))
+		return;
+
+	if (tab->replicaIdentityIndex)
+		elog(ERROR, "relation %u has multiple indexes marked as replica identity", tab->relid);
+
+	tab->replicaIdentityIndex = get_rel_name(indoid);
+}
+
+/*
+ * Subroutine for ATExecAlterColumnType: remember any clustered index.
+ */
+static void
+RememberClusterOnForRebuilding(Oid indoid, AlteredTableInfo *tab)
+{
+	if (!get_index_isclustered(indoid))
+		return;
+
+	if (tab->clusterOnIndex)
+		elog(ERROR, "relation %u has multiple clustered indexes", tab->relid);
+
+	tab->clusterOnIndex = get_rel_name(indoid);
+}
+
+/*
+ * Subroutine for ATExecAlterColumnType: remember that a constraint needs
+ * to be rebuilt (which we might already know).
+ */
+static void
+RememberConstraintForRebuilding(Oid conoid, AlteredTableInfo *tab)
+{
+	/*
+	 * This de-duplication check is critical for two independent reasons: we
+	 * mustn't try to recreate the same constraint twice, and if a constraint
+	 * depends on more than one column whose type is to be altered, we must
+	 * capture its definition string before applying any of the column type
+	 * changes.  ruleutils.c will get confused if we ask again later.
+	 */
+	if (!list_member_oid(tab->changedConstraintOids, conoid))
+	{
+		/* OK, capture the constraint's existing definition string */
+		char	   *defstring = pg_get_constraintdef_command(conoid);
+		Oid			indoid;
+
+		tab->changedConstraintOids = lappend_oid(tab->changedConstraintOids,
+												 conoid);
+		tab->changedConstraintDefs = lappend(tab->changedConstraintDefs,
+											 defstring);
+
+		/*
+		 * For the index of a constraint, if any, remember if it is used for
+		 * the table's replica identity or if it is a clustered index, so that
+		 * ATPostAlterTypeCleanup() can queue up commands necessary to restore
+		 * those properties.
+		 */
+		indoid = get_constraint_index(conoid);
+		if (OidIsValid(indoid))
+		{
+			RememberReplicaIdentityForRebuilding(indoid, tab);
+			RememberClusterOnForRebuilding(indoid, tab);
+		}
+	}
+}
+
+/*
+ * Subroutine for ATExecAlterColumnType: remember that an index needs
+ * to be rebuilt (which we might already know).
+ */
+static void
+RememberIndexForRebuilding(Oid indoid, AlteredTableInfo *tab)
+{
+	/*
+	 * This de-duplication check is critical for two independent reasons: we
+	 * mustn't try to recreate the same index twice, and if an index depends
+	 * on more than one column whose type is to be altered, we must capture
+	 * its definition string before applying any of the column type changes.
+	 * ruleutils.c will get confused if we ask again later.
+	 */
+	if (!list_member_oid(tab->changedIndexOids, indoid))
+	{
+		/*
+		 * Before adding it as an index-to-rebuild, we'd better see if it
+		 * belongs to a constraint, and if so rebuild the constraint instead.
+		 * Typically this check fails, because constraint indexes normally
+		 * have only dependencies on their constraint.  But it's possible for
+		 * such an index to also have direct dependencies on table columns,
+		 * for example with a partial exclusion constraint.
+		 */
+		Oid			conoid = get_index_constraint(indoid);
+
+		if (OidIsValid(conoid))
+		{
+			RememberConstraintForRebuilding(conoid, tab);
+		}
+		else
+		{
+			/* OK, capture the index's existing definition string */
+			char	   *defstring = pg_get_indexdef_string(indoid);
+
+			tab->changedIndexOids = lappend_oid(tab->changedIndexOids,
+												indoid);
+			tab->changedIndexDefs = lappend(tab->changedIndexDefs,
+											defstring);
+
+			/*
+			 * Remember if this index is used for the table's replica identity
+			 * or if it is a clustered index, so that ATPostAlterTypeCleanup()
+			 * can queue up commands necessary to restore those properties.
+			 */
+			RememberReplicaIdentityForRebuilding(indoid, tab);
+			RememberClusterOnForRebuilding(indoid, tab);
+		}
+	}
+}
+
+/*
+ * Subroutine for ATExecAlterColumnType: remember that a statistics object
+ * needs to be rebuilt (which we might already know).
+ */
+static void
+RememberStatisticsForRebuilding(Oid stxoid, AlteredTableInfo *tab)
+{
+	/*
+	 * This de-duplication check is critical for two independent reasons: we
+	 * mustn't try to recreate the same statistics object twice, and if the
+	 * statistics depends on more than one column whose type is to be altered,
+	 * we must capture its definition string before applying any of the type
+	 * changes. ruleutils.c will get confused if we ask again later.
+	 */
+	if (!list_member_oid(tab->changedStatisticsOids, stxoid))
+	{
+		/* OK, capture the index's existing definition string */
+		char	   *defstring = pg_get_statisticsobjdef_string(stxoid);
+
+		tab->changedStatisticsOids = lappend_oid(tab->changedStatisticsOids,
+												 stxoid);
+		tab->changedStatisticsDefs = lappend(tab->changedStatisticsDefs,
+											 defstring);
+	}
+}
+
+/*
+ * Cleanup after we've finished all the ALTER TYPE operations for a
+ * particular relation.  We have to drop and recreate all the indexes
+ * and constraints that depend on the altered columns.  We do the
+ * actual dropping here, but re-creation is managed by adding work
+ * queue entries to do those steps later.
+ */
+static void
+ATPostAlterTypeCleanup(List **wqueue, AlteredTableInfo *tab, LOCKMODE lockmode)
+{
+	ObjectAddress obj;
+	ObjectAddresses *objects;
+	ListCell   *def_item;
+	ListCell   *oid_item;
+
+	/*
+	 * Collect all the constraints and indexes to drop so we can process them
+	 * in a single call.  That way we don't have to worry about dependencies
+	 * among them.
+	 */
+	objects = new_object_addresses();
+
+	/*
+	 * Re-parse the index and constraint definitions, and attach them to the
+	 * appropriate work queue entries.  We do this before dropping because in
+	 * the case of a FOREIGN KEY constraint, we might not yet have exclusive
+	 * lock on the table the constraint is attached to, and we need to get
+	 * that before reparsing/dropping.
+	 *
+	 * We can't rely on the output of deparsing to tell us which relation to
+	 * operate on, because concurrent activity might have made the name
+	 * resolve differently.  Instead, we've got to use the OID of the
+	 * constraint or index we're processing to figure out which relation to
+	 * operate on.
+	 */
+	forboth(oid_item, tab->changedConstraintOids,
+			def_item, tab->changedConstraintDefs)
+	{
+		Oid			oldId = lfirst_oid(oid_item);
+		HeapTuple	tup;
+		Form_pg_constraint con;
+		Oid			relid;
+		Oid			confrelid;
+		char		contype;
+		bool		conislocal;
+
+		tup = SearchSysCache1(CONSTROID, ObjectIdGetDatum(oldId));
+		if (!HeapTupleIsValid(tup)) /* should not happen */
+			elog(ERROR, "cache lookup failed for constraint %u", oldId);
+		con = (Form_pg_constraint) GETSTRUCT(tup);
+		if (OidIsValid(con->conrelid))
+			relid = con->conrelid;
+		else
+		{
+			/* must be a domain constraint */
+			relid = get_typ_typrelid(getBaseType(con->contypid));
+			if (!OidIsValid(relid))
+				elog(ERROR, "could not identify relation associated with constraint %u", oldId);
+		}
+		confrelid = con->confrelid;
+		contype = con->contype;
+		conislocal = con->conislocal;
+		ReleaseSysCache(tup);
+
+		ObjectAddressSet(obj, ConstraintRelationId, oldId);
+		add_exact_object_address(&obj, objects);
+
+		/*
+		 * If the constraint is inherited (only), we don't want to inject a
+		 * new definition here; it'll get recreated when ATAddCheckConstraint
+		 * recurses from adding the parent table's constraint.  But we had to
+		 * carry the info this far so that we can drop the constraint below.
+		 */
+		if (!conislocal)
+			continue;
+
+		/*
+		 * When rebuilding an FK constraint that references the table we're
+		 * modifying, we might not yet have any lock on the FK's table, so get
+		 * one now.  We'll need AccessExclusiveLock for the DROP CONSTRAINT
+		 * step, so there's no value in asking for anything weaker.
+		 */
+		if (relid != tab->relid && contype == CONSTRAINT_FOREIGN)
+			LockRelationOid(relid, AccessExclusiveLock);
+
+		ATPostAlterTypeParse(oldId, relid, confrelid,
+							 (char *) lfirst(def_item),
+							 wqueue, lockmode, tab->rewrite);
+	}
+	forboth(oid_item, tab->changedIndexOids,
+			def_item, tab->changedIndexDefs)
+	{
+		Oid			oldId = lfirst_oid(oid_item);
+		Oid			relid;
+
+		relid = IndexGetRelation(oldId, false);
+		ATPostAlterTypeParse(oldId, relid, InvalidOid,
+							 (char *) lfirst(def_item),
+							 wqueue, lockmode, tab->rewrite);
+
+		ObjectAddressSet(obj, RelationRelationId, oldId);
+		add_exact_object_address(&obj, objects);
+	}
+
+	/* add dependencies for new statistics */
+	forboth(oid_item, tab->changedStatisticsOids,
+			def_item, tab->changedStatisticsDefs)
+	{
+		Oid			oldId = lfirst_oid(oid_item);
+		Oid			relid;
+
+		relid = StatisticsGetRelation(oldId, false);
+		ATPostAlterTypeParse(oldId, relid, InvalidOid,
+							 (char *) lfirst(def_item),
+							 wqueue, lockmode, tab->rewrite);
+
+		ObjectAddressSet(obj, StatisticExtRelationId, oldId);
+		add_exact_object_address(&obj, objects);
+	}
+
+	/*
+	 * Queue up command to restore replica identity index marking
+	 */
+	if (tab->replicaIdentityIndex)
+	{
+		AlterTableCmd *cmd = makeNode(AlterTableCmd);
+		ReplicaIdentityStmt *subcmd = makeNode(ReplicaIdentityStmt);
+
+		subcmd->identity_type = REPLICA_IDENTITY_INDEX;
+		subcmd->name = tab->replicaIdentityIndex;
+		cmd->subtype = AT_ReplicaIdentity;
+		cmd->def = (Node *) subcmd;
+
+		/* do it after indexes and constraints */
+		tab->subcmds[AT_PASS_OLD_CONSTR] =
+			lappend(tab->subcmds[AT_PASS_OLD_CONSTR], cmd);
+	}
+
+	/*
+	 * Queue up command to restore marking of index used for cluster.
+	 */
+	if (tab->clusterOnIndex)
+	{
+		AlterTableCmd *cmd = makeNode(AlterTableCmd);
+
+		cmd->subtype = AT_ClusterOn;
+		cmd->name = tab->clusterOnIndex;
+
+		/* do it after indexes and constraints */
+		tab->subcmds[AT_PASS_OLD_CONSTR] =
+			lappend(tab->subcmds[AT_PASS_OLD_CONSTR], cmd);
+	}
+
+	/*
+	 * It should be okay to use DROP_RESTRICT here, since nothing else should
+	 * be depending on these objects.
+	 */
+	performMultipleDeletions(objects, DROP_RESTRICT, PERFORM_DELETION_INTERNAL);
+
+	free_object_addresses(objects);
+
+	/*
+	 * The objects will get recreated during subsequent passes over the work
+	 * queue.
+	 */
+}
+
+/*
+ * Parse the previously-saved definition string for a constraint, index or
+ * statistics object against the newly-established column data type(s), and
+ * queue up the resulting command parsetrees for execution.
+ *
+ * This might fail if, for example, you have a WHERE clause that uses an
+ * operator that's not available for the new column type.
+ */
+static void
+ATPostAlterTypeParse(Oid oldId, Oid oldRelId, Oid refRelId, char *cmd,
+					 List **wqueue, LOCKMODE lockmode, bool rewrite)
+{
+	List	   *raw_parsetree_list;
+	List	   *querytree_list;
+	ListCell   *list_item;
+	Relation	rel;
+
+	/*
+	 * We expect that we will get only ALTER TABLE and CREATE INDEX
+	 * statements. Hence, there is no need to pass them through
+	 * parse_analyze() or the rewriter, but instead we need to pass them
+	 * through parse_utilcmd.c to make them ready for execution.
+	 */
+	raw_parsetree_list = raw_parser(cmd, RAW_PARSE_DEFAULT);
+	querytree_list = NIL;
+	foreach(list_item, raw_parsetree_list)
+	{
+		RawStmt    *rs = lfirst_node(RawStmt, list_item);
+		Node	   *stmt = rs->stmt;
+
+		if (IsA(stmt, IndexStmt))
+			querytree_list = lappend(querytree_list,
+									 transformIndexStmt(oldRelId,
+														(IndexStmt *) stmt,
+														cmd));
+		else if (IsA(stmt, AlterTableStmt))
+		{
+			List	   *beforeStmts;
+			List	   *afterStmts;
+
+			stmt = (Node *) transformAlterTableStmt(oldRelId,
+													(AlterTableStmt *) stmt,
+													cmd,
+													&beforeStmts,
+													&afterStmts);
+			querytree_list = list_concat(querytree_list, beforeStmts);
+			querytree_list = lappend(querytree_list, stmt);
+			querytree_list = list_concat(querytree_list, afterStmts);
+		}
+		else if (IsA(stmt, CreateStatsStmt))
+			querytree_list = lappend(querytree_list,
+									 transformStatsStmt(oldRelId,
+														(CreateStatsStmt *) stmt,
+														cmd));
+		else
+			querytree_list = lappend(querytree_list, stmt);
+	}
+
+	/* Caller should already have acquired whatever lock we need. */
+	rel = relation_open(oldRelId, NoLock);
+
+	/*
+	 * Attach each generated command to the proper place in the work queue.
+	 * Note this could result in creation of entirely new work-queue entries.
+	 *
+	 * Also note that we have to tweak the command subtypes, because it turns
+	 * out that re-creation of indexes and constraints has to act a bit
+	 * differently from initial creation.
+	 */
+	foreach(list_item, querytree_list)
+	{
+		Node	   *stm = (Node *) lfirst(list_item);
+		AlteredTableInfo *tab;
+
+		tab = ATGetQueueEntry(wqueue, rel);
+
+		if (IsA(stm, IndexStmt))
+		{
+			IndexStmt  *stmt = (IndexStmt *) stm;
+			AlterTableCmd *newcmd;
+
+			if (!rewrite)
+				TryReuseIndex(oldId, stmt);
+			stmt->reset_default_tblspc = true;
+			/* keep the index's comment */
+			stmt->idxcomment = GetComment(oldId, RelationRelationId, 0);
+
+			newcmd = makeNode(AlterTableCmd);
+			newcmd->subtype = AT_ReAddIndex;
+			newcmd->def = (Node *) stmt;
+			tab->subcmds[AT_PASS_OLD_INDEX] =
+				lappend(tab->subcmds[AT_PASS_OLD_INDEX], newcmd);
+		}
+		else if (IsA(stm, AlterTableStmt))
+		{
+			AlterTableStmt *stmt = (AlterTableStmt *) stm;
+			ListCell   *lcmd;
+
+			foreach(lcmd, stmt->cmds)
+			{
+				AlterTableCmd *cmd = castNode(AlterTableCmd, lfirst(lcmd));
+
+				if (cmd->subtype == AT_AddIndex)
+				{
+					IndexStmt  *indstmt;
+					Oid			indoid;
+
+					indstmt = castNode(IndexStmt, cmd->def);
+					indoid = get_constraint_index(oldId);
+
+					if (!rewrite)
+						TryReuseIndex(indoid, indstmt);
+					/* keep any comment on the index */
+					indstmt->idxcomment = GetComment(indoid,
+													 RelationRelationId, 0);
+					indstmt->reset_default_tblspc = true;
+
+					cmd->subtype = AT_ReAddIndex;
+					tab->subcmds[AT_PASS_OLD_INDEX] =
+						lappend(tab->subcmds[AT_PASS_OLD_INDEX], cmd);
+
+					/* recreate any comment on the constraint */
+					RebuildConstraintComment(tab,
+											 AT_PASS_OLD_INDEX,
+											 oldId,
+											 rel,
+											 NIL,
+											 indstmt->idxname);
+				}
+				else if (cmd->subtype == AT_AddConstraint)
+				{
+					Constraint *con = castNode(Constraint, cmd->def);
+
+					con->old_pktable_oid = refRelId;
+					/* rewriting neither side of a FK */
+					if (con->contype == CONSTR_FOREIGN &&
+						!rewrite && tab->rewrite == 0)
+						TryReuseForeignKey(oldId, con);
+					con->reset_default_tblspc = true;
+					cmd->subtype = AT_ReAddConstraint;
+					tab->subcmds[AT_PASS_OLD_CONSTR] =
+						lappend(tab->subcmds[AT_PASS_OLD_CONSTR], cmd);
+
+					/* recreate any comment on the constraint */
+					RebuildConstraintComment(tab,
+											 AT_PASS_OLD_CONSTR,
+											 oldId,
+											 rel,
+											 NIL,
+											 con->conname);
+				}
+				else if (cmd->subtype == AT_SetNotNull)
+				{
+					/*
+					 * The parser will create AT_SetNotNull subcommands for
+					 * columns of PRIMARY KEY indexes/constraints, but we need
+					 * not do anything with them here, because the columns'
+					 * NOT NULL marks will already have been propagated into
+					 * the new table definition.
+					 */
+				}
+				else
+					elog(ERROR, "unexpected statement subtype: %d",
+						 (int) cmd->subtype);
+			}
+		}
+		else if (IsA(stm, AlterDomainStmt))
+		{
+			AlterDomainStmt *stmt = (AlterDomainStmt *) stm;
+
+			if (stmt->subtype == 'C')	/* ADD CONSTRAINT */
+			{
+				Constraint *con = castNode(Constraint, stmt->def);
+				AlterTableCmd *cmd = makeNode(AlterTableCmd);
+
+				cmd->subtype = AT_ReAddDomainConstraint;
+				cmd->def = (Node *) stmt;
+				tab->subcmds[AT_PASS_OLD_CONSTR] =
+					lappend(tab->subcmds[AT_PASS_OLD_CONSTR], cmd);
+
+				/* recreate any comment on the constraint */
+				RebuildConstraintComment(tab,
+										 AT_PASS_OLD_CONSTR,
+										 oldId,
+										 NULL,
+										 stmt->typeName,
+										 con->conname);
+			}
+			else
+				elog(ERROR, "unexpected statement subtype: %d",
+					 (int) stmt->subtype);
+		}
+		else if (IsA(stm, CreateStatsStmt))
+		{
+			CreateStatsStmt  *stmt = (CreateStatsStmt *) stm;
+			AlterTableCmd *newcmd;
+
+			/* keep the statistics object's comment */
+			stmt->stxcomment = GetComment(oldId, StatisticExtRelationId, 0);
+
+			newcmd = makeNode(AlterTableCmd);
+			newcmd->subtype = AT_ReAddStatistics;
+			newcmd->def = (Node *) stmt;
+			tab->subcmds[AT_PASS_MISC] =
+				lappend(tab->subcmds[AT_PASS_MISC], newcmd);
+		}
+		else
+			elog(ERROR, "unexpected statement type: %d",
+				 (int) nodeTag(stm));
+	}
+
+	relation_close(rel, NoLock);
+}
+
+/*
+ * Subroutine for ATPostAlterTypeParse() to recreate any existing comment
+ * for a table or domain constraint that is being rebuilt.
+ *
+ * objid is the OID of the constraint.
+ * Pass "rel" for a table constraint, or "domname" (domain's qualified name
+ * as a string list) for a domain constraint.
+ * (We could dig that info, as well as the conname, out of the pg_constraint
+ * entry; but callers already have them so might as well pass them.)
+ */
+static void
+RebuildConstraintComment(AlteredTableInfo *tab, int pass, Oid objid,
+						 Relation rel, List *domname,
+						 const char *conname)
+{
+	CommentStmt *cmd;
+	char	   *comment_str;
+	AlterTableCmd *newcmd;
+
+	/* Look for comment for object wanted, and leave if none */
+	comment_str = GetComment(objid, ConstraintRelationId, 0);
+	if (comment_str == NULL)
+		return;
+
+	/* Build CommentStmt node, copying all input data for safety */
+	cmd = makeNode(CommentStmt);
+	if (rel)
+	{
+		cmd->objtype = OBJECT_TABCONSTRAINT;
+		cmd->object = (Node *)
+			list_make3(makeString(get_namespace_name(RelationGetNamespace(rel))),
+					   makeString(pstrdup(RelationGetRelationName(rel))),
+					   makeString(pstrdup(conname)));
+	}
+	else
+	{
+		cmd->objtype = OBJECT_DOMCONSTRAINT;
+		cmd->object = (Node *)
+			list_make2(makeTypeNameFromNameList(copyObject(domname)),
+					   makeString(pstrdup(conname)));
+	}
+	cmd->comment = comment_str;
+
+	/* Append it to list of commands */
+	newcmd = makeNode(AlterTableCmd);
+	newcmd->subtype = AT_ReAddComment;
+	newcmd->def = (Node *) cmd;
+	tab->subcmds[pass] = lappend(tab->subcmds[pass], newcmd);
+}
+
+/*
+ * Subroutine for ATPostAlterTypeParse().  Calls out to CheckIndexCompatible()
+ * for the real analysis, then mutates the IndexStmt based on that verdict.
+ */
+static void
+TryReuseIndex(Oid oldId, IndexStmt *stmt)
+{
+	if (CheckIndexCompatible(oldId,
+							 stmt->accessMethod,
+							 stmt->indexParams,
+							 stmt->excludeOpNames))
+	{
+		Relation	irel = index_open(oldId, NoLock);
+
+		/* If it's a partitioned index, there is no storage to share. */
+		if (irel->rd_rel->relkind != RELKIND_PARTITIONED_INDEX)
+		{
+			stmt->oldNode = irel->rd_node.relNode;
+			stmt->oldCreateSubid = irel->rd_createSubid;
+			stmt->oldFirstRelfilenodeSubid = irel->rd_firstRelfilenodeSubid;
+		}
+		index_close(irel, NoLock);
+	}
+}
+
+/*
+ * Subroutine for ATPostAlterTypeParse().
+ *
+ * Stash the old P-F equality operator into the Constraint node, for possible
+ * use by ATAddForeignKeyConstraint() in determining whether revalidation of
+ * this constraint can be skipped.
+ */
+static void
+TryReuseForeignKey(Oid oldId, Constraint *con)
+{
+	HeapTuple	tup;
+	Datum		adatum;
+	bool		isNull;
+	ArrayType  *arr;
+	Oid		   *rawarr;
+	int			numkeys;
+	int			i;
+
+	Assert(con->contype == CONSTR_FOREIGN);
+	Assert(con->old_conpfeqop == NIL);	/* already prepared this node */
+
+	tup = SearchSysCache1(CONSTROID, ObjectIdGetDatum(oldId));
+	if (!HeapTupleIsValid(tup)) /* should not happen */
+		elog(ERROR, "cache lookup failed for constraint %u", oldId);
+
+	adatum = SysCacheGetAttr(CONSTROID, tup,
+							 Anum_pg_constraint_conpfeqop, &isNull);
+	if (isNull)
+		elog(ERROR, "null conpfeqop for constraint %u", oldId);
+	arr = DatumGetArrayTypeP(adatum);	/* ensure not toasted */
+	numkeys = ARR_DIMS(arr)[0];
+	/* test follows the one in ri_FetchConstraintInfo() */
+	if (ARR_NDIM(arr) != 1 ||
+		ARR_HASNULL(arr) ||
+		ARR_ELEMTYPE(arr) != OIDOID)
+		elog(ERROR, "conpfeqop is not a 1-D Oid array");
+	rawarr = (Oid *) ARR_DATA_PTR(arr);
+
+	/* stash a List of the operator Oids in our Constraint node */
+	for (i = 0; i < numkeys; i++)
+		con->old_conpfeqop = lappend_oid(con->old_conpfeqop, rawarr[i]);
+
+	ReleaseSysCache(tup);
+}
+
+/*
+ * ALTER COLUMN .. OPTIONS ( ... )
+ *
+ * Returns the address of the modified column
+ */
+static ObjectAddress
+ATExecAlterColumnGenericOptions(Relation rel,
+								const char *colName,
+								List *options,
+								LOCKMODE lockmode)
+{
+	Relation	ftrel;
+	Relation	attrel;
+	ForeignServer *server;
+	ForeignDataWrapper *fdw;
+	HeapTuple	tuple;
+	HeapTuple	newtuple;
+	bool		isnull;
+	Datum		repl_val[Natts_pg_attribute];
+	bool		repl_null[Natts_pg_attribute];
+	bool		repl_repl[Natts_pg_attribute];
+	Datum		datum;
+	Form_pg_foreign_table fttableform;
+	Form_pg_attribute atttableform;
+	AttrNumber	attnum;
+	ObjectAddress address;
+
+	if (options == NIL)
+		return InvalidObjectAddress;
+
+	/* First, determine FDW validator associated to the foreign table. */
+	ftrel = table_open(ForeignTableRelationId, AccessShareLock);
+	tuple = SearchSysCache1(FOREIGNTABLEREL, rel->rd_id);
+	if (!HeapTupleIsValid(tuple))
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_OBJECT),
+				 errmsg("foreign table \"%s\" does not exist",
+						RelationGetRelationName(rel))));
+	fttableform = (Form_pg_foreign_table) GETSTRUCT(tuple);
+	server = GetForeignServer(fttableform->ftserver);
+	fdw = GetForeignDataWrapper(server->fdwid);
+
+	table_close(ftrel, AccessShareLock);
+	ReleaseSysCache(tuple);
+
+	attrel = table_open(AttributeRelationId, RowExclusiveLock);
+	tuple = SearchSysCacheAttName(RelationGetRelid(rel), colName);
+	if (!HeapTupleIsValid(tuple))
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_COLUMN),
+				 errmsg("column \"%s\" of relation \"%s\" does not exist",
+						colName, RelationGetRelationName(rel))));
+
+	/* Prevent them from altering a system attribute */
+	atttableform = (Form_pg_attribute) GETSTRUCT(tuple);
+	attnum = atttableform->attnum;
+	if (attnum <= 0)
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("cannot alter system column \"%s\"", colName)));
+
+
+	/* Initialize buffers for new tuple values */
+	memset(repl_val, 0, sizeof(repl_val));
+	memset(repl_null, false, sizeof(repl_null));
+	memset(repl_repl, false, sizeof(repl_repl));
+
+	/* Extract the current options */
+	datum = SysCacheGetAttr(ATTNAME,
+							tuple,
+							Anum_pg_attribute_attfdwoptions,
+							&isnull);
+	if (isnull)
+		datum = PointerGetDatum(NULL);
+
+	/* Transform the options */
+	datum = transformGenericOptions(AttributeRelationId,
+									datum,
+									options,
+									fdw->fdwvalidator);
+
+	if (PointerIsValid(DatumGetPointer(datum)))
+		repl_val[Anum_pg_attribute_attfdwoptions - 1] = datum;
+	else
+		repl_null[Anum_pg_attribute_attfdwoptions - 1] = true;
+
+	repl_repl[Anum_pg_attribute_attfdwoptions - 1] = true;
+
+	/* Everything looks good - update the tuple */
+
+	newtuple = heap_modify_tuple(tuple, RelationGetDescr(attrel),
+								 repl_val, repl_null, repl_repl);
+
+	CatalogTupleUpdate(attrel, &newtuple->t_self, newtuple);
+
+	InvokeObjectPostAlterHook(RelationRelationId,
+							  RelationGetRelid(rel),
+							  atttableform->attnum);
+	ObjectAddressSubSet(address, RelationRelationId,
+						RelationGetRelid(rel), attnum);
+
+	ReleaseSysCache(tuple);
+
+	table_close(attrel, RowExclusiveLock);
+
+	heap_freetuple(newtuple);
+
+	return address;
+}
+
+/*
+ * ALTER TABLE OWNER
+ *
+ * recursing is true if we are recursing from a table to its indexes,
+ * sequences, or toast table.  We don't allow the ownership of those things to
+ * be changed separately from the parent table.  Also, we can skip permission
+ * checks (this is necessary not just an optimization, else we'd fail to
+ * handle toast tables properly).
+ *
+ * recursing is also true if ALTER TYPE OWNER is calling us to fix up a
+ * free-standing composite type.
+ */
+void
+ATExecChangeOwner(Oid relationOid, Oid newOwnerId, bool recursing, LOCKMODE lockmode)
+{
+	Relation	target_rel;
+	Relation	class_rel;
+	HeapTuple	tuple;
+	Form_pg_class tuple_class;
+
+	/*
+	 * Get exclusive lock till end of transaction on the target table. Use
+	 * relation_open so that we can work on indexes and sequences.
+	 */
+	target_rel = relation_open(relationOid, lockmode);
+
+	/* Get its pg_class tuple, too */
+	class_rel = table_open(RelationRelationId, RowExclusiveLock);
+
+	tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relationOid));
+	if (!HeapTupleIsValid(tuple))
+		elog(ERROR, "cache lookup failed for relation %u", relationOid);
+	tuple_class = (Form_pg_class) GETSTRUCT(tuple);
+
+	/* Can we change the ownership of this tuple? */
+	switch (tuple_class->relkind)
+	{
+		case RELKIND_RELATION:
+		case RELKIND_VIEW:
+		case RELKIND_MATVIEW:
+		case RELKIND_FOREIGN_TABLE:
+		case RELKIND_PARTITIONED_TABLE:
+			/* ok to change owner */
+			break;
+		case RELKIND_INDEX:
+			if (!recursing)
+			{
+				/*
+				 * Because ALTER INDEX OWNER used to be allowed, and in fact
+				 * is generated by old versions of pg_dump, we give a warning
+				 * and do nothing rather than erroring out.  Also, to avoid
+				 * unnecessary chatter while restoring those old dumps, say
+				 * nothing at all if the command would be a no-op anyway.
+				 */
+				if (tuple_class->relowner != newOwnerId)
+					ereport(WARNING,
+							(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+							 errmsg("cannot change owner of index \"%s\"",
+									NameStr(tuple_class->relname)),
+							 errhint("Change the ownership of the index's table, instead.")));
+				/* quick hack to exit via the no-op path */
+				newOwnerId = tuple_class->relowner;
+			}
+			break;
+		case RELKIND_PARTITIONED_INDEX:
+			if (recursing)
+				break;
+			ereport(ERROR,
+					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+					 errmsg("cannot change owner of index \"%s\"",
+							NameStr(tuple_class->relname)),
+					 errhint("Change the ownership of the index's table, instead.")));
+			break;
+		case RELKIND_SEQUENCE:
+			if (!recursing &&
+				tuple_class->relowner != newOwnerId)
+			{
+				/* if it's an owned sequence, disallow changing it by itself */
+				Oid			tableId;
+				int32		colId;
+
+				if (sequenceIsOwned(relationOid, DEPENDENCY_AUTO, &tableId, &colId) ||
+					sequenceIsOwned(relationOid, DEPENDENCY_INTERNAL, &tableId, &colId))
+					ereport(ERROR,
+							(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+							 errmsg("cannot change owner of sequence \"%s\"",
+									NameStr(tuple_class->relname)),
+							 errdetail("Sequence \"%s\" is linked to table \"%s\".",
+									   NameStr(tuple_class->relname),
+									   get_rel_name(tableId))));
+			}
+			break;
+		case RELKIND_COMPOSITE_TYPE:
+			if (recursing)
+				break;
+			ereport(ERROR,
+					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+					 errmsg("\"%s\" is a composite type",
+							NameStr(tuple_class->relname)),
+					 errhint("Use ALTER TYPE instead.")));
+			break;
+		case RELKIND_TOASTVALUE:
+			if (recursing)
+				break;
+			/* FALL THRU */
+		default:
+			ereport(ERROR,
+					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+					 errmsg("\"%s\" is not a table, view, sequence, or foreign table",
+							NameStr(tuple_class->relname))));
+	}
+
+	/*
+	 * If the new owner is the same as the existing owner, consider the
+	 * command to have succeeded.  This is for dump restoration purposes.
+	 */
+	if (tuple_class->relowner != newOwnerId)
+	{
+		Datum		repl_val[Natts_pg_class];
+		bool		repl_null[Natts_pg_class];
+		bool		repl_repl[Natts_pg_class];
+		Acl		   *newAcl;
+		Datum		aclDatum;
+		bool		isNull;
+		HeapTuple	newtuple;
+
+		/* skip permission checks when recursing to index or toast table */
+		if (!recursing)
+		{
+			/* Superusers can always do it */
+			if (!superuser())
+			{
+				Oid			namespaceOid = tuple_class->relnamespace;
+				AclResult	aclresult;
+
+				/* Otherwise, must be owner of the existing object */
+				if (!pg_class_ownercheck(relationOid, GetUserId()))
+					aclcheck_error(ACLCHECK_NOT_OWNER, get_relkind_objtype(get_rel_relkind(relationOid)),
+								   RelationGetRelationName(target_rel));
+
+				/* Must be able to become new owner */
+				check_is_member_of_role(GetUserId(), newOwnerId);
+
+				/* New owner must have CREATE privilege on namespace */
+				aclresult = pg_namespace_aclcheck(namespaceOid, newOwnerId,
+												  ACL_CREATE);
+				if (aclresult != ACLCHECK_OK)
+					aclcheck_error(aclresult, OBJECT_SCHEMA,
+								   get_namespace_name(namespaceOid));
+			}
+		}
+
+		memset(repl_null, false, sizeof(repl_null));
+		memset(repl_repl, false, sizeof(repl_repl));
+
+		repl_repl[Anum_pg_class_relowner - 1] = true;
+		repl_val[Anum_pg_class_relowner - 1] = ObjectIdGetDatum(newOwnerId);
+
+		/*
+		 * Determine the modified ACL for the new owner.  This is only
+		 * necessary when the ACL is non-null.
+		 */
+		aclDatum = SysCacheGetAttr(RELOID, tuple,
+								   Anum_pg_class_relacl,
+								   &isNull);
+		if (!isNull)
+		{
+			newAcl = aclnewowner(DatumGetAclP(aclDatum),
+								 tuple_class->relowner, newOwnerId);
+			repl_repl[Anum_pg_class_relacl - 1] = true;
+			repl_val[Anum_pg_class_relacl - 1] = PointerGetDatum(newAcl);
+		}
+
+		newtuple = heap_modify_tuple(tuple, RelationGetDescr(class_rel), repl_val, repl_null, repl_repl);
+
+		CatalogTupleUpdate(class_rel, &newtuple->t_self, newtuple);
+
+		heap_freetuple(newtuple);
+
+		/*
+		 * We must similarly update any per-column ACLs to reflect the new
+		 * owner; for neatness reasons that's split out as a subroutine.
+		 */
+		change_owner_fix_column_acls(relationOid,
+									 tuple_class->relowner,
+									 newOwnerId);
+
+		/*
+		 * Update owner dependency reference, if any.  A composite type has
+		 * none, because it's tracked for the pg_type entry instead of here;
+		 * indexes and TOAST tables don't have their own entries either.
+		 */
+		if (tuple_class->relkind != RELKIND_COMPOSITE_TYPE &&
+			tuple_class->relkind != RELKIND_INDEX &&
+			tuple_class->relkind != RELKIND_PARTITIONED_INDEX &&
+			tuple_class->relkind != RELKIND_TOASTVALUE)
+			changeDependencyOnOwner(RelationRelationId, relationOid,
+									newOwnerId);
+
+		/*
+		 * Also change the ownership of the table's row type, if it has one
+		 */
+		if (OidIsValid(tuple_class->reltype))
+			AlterTypeOwnerInternal(tuple_class->reltype, newOwnerId);
+
+		/*
+		 * If we are operating on a table or materialized view, also change
+		 * the ownership of any indexes and sequences that belong to the
+		 * relation, as well as its toast table (if it has one).
+		 */
+		if (tuple_class->relkind == RELKIND_RELATION ||
+			tuple_class->relkind == RELKIND_PARTITIONED_TABLE ||
+			tuple_class->relkind == RELKIND_MATVIEW ||
+			tuple_class->relkind == RELKIND_TOASTVALUE)
+		{
+			List	   *index_oid_list;
+			ListCell   *i;
+
+			/* Find all the indexes belonging to this relation */
+			index_oid_list = RelationGetIndexList(target_rel);
+
+			/* For each index, recursively change its ownership */
+			foreach(i, index_oid_list)
+				ATExecChangeOwner(lfirst_oid(i), newOwnerId, true, lockmode);
+
+			list_free(index_oid_list);
+		}
+
+		/* If it has a toast table, recurse to change its ownership */
+		if (tuple_class->reltoastrelid != InvalidOid)
+			ATExecChangeOwner(tuple_class->reltoastrelid, newOwnerId,
+							  true, lockmode);
+
+		/* If it has dependent sequences, recurse to change them too */
+		change_owner_recurse_to_sequences(relationOid, newOwnerId, lockmode);
+	}
+
+	InvokeObjectPostAlterHook(RelationRelationId, relationOid, 0);
+
+	ReleaseSysCache(tuple);
+	table_close(class_rel, RowExclusiveLock);
+	relation_close(target_rel, NoLock);
+}
+
+/*
+ * change_owner_fix_column_acls
+ *
+ * Helper function for ATExecChangeOwner.  Scan the columns of the table
+ * and fix any non-null column ACLs to reflect the new owner.
+ */
+static void
+change_owner_fix_column_acls(Oid relationOid, Oid oldOwnerId, Oid newOwnerId)
+{
+	Relation	attRelation;
+	SysScanDesc scan;
+	ScanKeyData key[1];
+	HeapTuple	attributeTuple;
+
+	attRelation = table_open(AttributeRelationId, RowExclusiveLock);
+	ScanKeyInit(&key[0],
+				Anum_pg_attribute_attrelid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(relationOid));
+	scan = systable_beginscan(attRelation, AttributeRelidNumIndexId,
+							  true, NULL, 1, key);
+	while (HeapTupleIsValid(attributeTuple = systable_getnext(scan)))
+	{
+		Form_pg_attribute att = (Form_pg_attribute) GETSTRUCT(attributeTuple);
+		Datum		repl_val[Natts_pg_attribute];
+		bool		repl_null[Natts_pg_attribute];
+		bool		repl_repl[Natts_pg_attribute];
+		Acl		   *newAcl;
+		Datum		aclDatum;
+		bool		isNull;
+		HeapTuple	newtuple;
+
+		/* Ignore dropped columns */
+		if (att->attisdropped)
+			continue;
+
+		aclDatum = heap_getattr(attributeTuple,
+								Anum_pg_attribute_attacl,
+								RelationGetDescr(attRelation),
+								&isNull);
+		/* Null ACLs do not require changes */
+		if (isNull)
+			continue;
+
+		memset(repl_null, false, sizeof(repl_null));
+		memset(repl_repl, false, sizeof(repl_repl));
+
+		newAcl = aclnewowner(DatumGetAclP(aclDatum),
+							 oldOwnerId, newOwnerId);
+		repl_repl[Anum_pg_attribute_attacl - 1] = true;
+		repl_val[Anum_pg_attribute_attacl - 1] = PointerGetDatum(newAcl);
+
+		newtuple = heap_modify_tuple(attributeTuple,
+									 RelationGetDescr(attRelation),
+									 repl_val, repl_null, repl_repl);
+
+		CatalogTupleUpdate(attRelation, &newtuple->t_self, newtuple);
+
+		heap_freetuple(newtuple);
+	}
+	systable_endscan(scan);
+	table_close(attRelation, RowExclusiveLock);
+}
+
+/*
+ * change_owner_recurse_to_sequences
+ *
+ * Helper function for ATExecChangeOwner.  Examines pg_depend searching
+ * for sequences that are dependent on serial columns, and changes their
+ * ownership.
+ */
+static void
+change_owner_recurse_to_sequences(Oid relationOid, Oid newOwnerId, LOCKMODE lockmode)
+{
+	Relation	depRel;
+	SysScanDesc scan;
+	ScanKeyData key[2];
+	HeapTuple	tup;
+
+	/*
+	 * SERIAL sequences are those having an auto dependency on one of the
+	 * table's columns (we don't care *which* column, exactly).
+	 */
+	depRel = table_open(DependRelationId, AccessShareLock);
+
+	ScanKeyInit(&key[0],
+				Anum_pg_depend_refclassid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(RelationRelationId));
+	ScanKeyInit(&key[1],
+				Anum_pg_depend_refobjid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(relationOid));
+	/* we leave refobjsubid unspecified */
+
+	scan = systable_beginscan(depRel, DependReferenceIndexId, true,
+							  NULL, 2, key);
+
+	while (HeapTupleIsValid(tup = systable_getnext(scan)))
+	{
+		Form_pg_depend depForm = (Form_pg_depend) GETSTRUCT(tup);
+		Relation	seqRel;
+
+		/* skip dependencies other than auto dependencies on columns */
+		if (depForm->refobjsubid == 0 ||
+			depForm->classid != RelationRelationId ||
+			depForm->objsubid != 0 ||
+			!(depForm->deptype == DEPENDENCY_AUTO || depForm->deptype == DEPENDENCY_INTERNAL))
+			continue;
+
+		/* Use relation_open just in case it's an index */
+		seqRel = relation_open(depForm->objid, lockmode);
+
+		/* skip non-sequence relations */
+		if (RelationGetForm(seqRel)->relkind != RELKIND_SEQUENCE)
+		{
+			/* No need to keep the lock */
+			relation_close(seqRel, lockmode);
+			continue;
+		}
+
+		/* We don't need to close the sequence while we alter it. */
+		ATExecChangeOwner(depForm->objid, newOwnerId, true, lockmode);
+
+		/* Now we can close it.  Keep the lock till end of transaction. */
+		relation_close(seqRel, NoLock);
+	}
+
+	systable_endscan(scan);
+
+	relation_close(depRel, AccessShareLock);
+}
+
+/*
+ * ALTER TABLE CLUSTER ON
+ *
+ * The only thing we have to do is to change the indisclustered bits.
+ *
+ * Return the address of the new clustering index.
+ */
+static ObjectAddress
+ATExecClusterOn(Relation rel, const char *indexName, LOCKMODE lockmode)
+{
+	Oid			indexOid;
+	ObjectAddress address;
+
+	indexOid = get_relname_relid(indexName, rel->rd_rel->relnamespace);
+
+	if (!OidIsValid(indexOid))
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_OBJECT),
+				 errmsg("index \"%s\" for table \"%s\" does not exist",
+						indexName, RelationGetRelationName(rel))));
+
+	/* Check index is valid to cluster on */
+	check_index_is_clusterable(rel, indexOid, false, lockmode);
+
+	/* And do the work */
+	mark_index_clustered(rel, indexOid, false);
+
+	ObjectAddressSet(address,
+					 RelationRelationId, indexOid);
+
+	return address;
+}
+
+/*
+ * ALTER TABLE SET WITHOUT CLUSTER
+ *
+ * We have to find any indexes on the table that have indisclustered bit
+ * set and turn it off.
+ */
+static void
+ATExecDropCluster(Relation rel, LOCKMODE lockmode)
+{
+	mark_index_clustered(rel, InvalidOid, false);
+}
+
+/*
+ * ALTER TABLE SET TABLESPACE
+ */
+static void
+ATPrepSetTableSpace(AlteredTableInfo *tab, Relation rel, const char *tablespacename, LOCKMODE lockmode)
+{
+	Oid			tablespaceId;
+
+	/* Check that the tablespace exists */
+	tablespaceId = get_tablespace_oid(tablespacename, false);
+
+	/* Check permissions except when moving to database's default */
+	if (OidIsValid(tablespaceId) && tablespaceId != MyDatabaseTableSpace)
+	{
+		AclResult	aclresult;
+
+		aclresult = pg_tablespace_aclcheck(tablespaceId, GetUserId(), ACL_CREATE);
+		if (aclresult != ACLCHECK_OK)
+			aclcheck_error(aclresult, OBJECT_TABLESPACE, tablespacename);
+	}
+
+	/* Save info for Phase 3 to do the real work */
+	if (OidIsValid(tab->newTableSpace))
+		ereport(ERROR,
+				(errcode(ERRCODE_SYNTAX_ERROR),
+				 errmsg("cannot have multiple SET TABLESPACE subcommands")));
+
+	tab->newTableSpace = tablespaceId;
+}
+
+/*
+ * Set, reset, or replace reloptions.
+ */
+static void
+ATExecSetRelOptions(Relation rel, List *defList, AlterTableType operation,
+					LOCKMODE lockmode)
+{
+	Oid			relid;
+	Relation	pgclass;
+	HeapTuple	tuple;
+	HeapTuple	newtuple;
+	Datum		datum;
+	bool		isnull;
+	Datum		newOptions;
+	Datum		repl_val[Natts_pg_class];
+	bool		repl_null[Natts_pg_class];
+	bool		repl_repl[Natts_pg_class];
+	static char *validnsps[] = HEAP_RELOPT_NAMESPACES;
+
+	if (defList == NIL && operation != AT_ReplaceRelOptions)
+		return;					/* nothing to do */
+
+	pgclass = table_open(RelationRelationId, RowExclusiveLock);
+
+	/* Fetch heap tuple */
+	relid = RelationGetRelid(rel);
+	tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relid));
+	if (!HeapTupleIsValid(tuple))
+		elog(ERROR, "cache lookup failed for relation %u", relid);
+
+	if (operation == AT_ReplaceRelOptions)
+	{
+		/*
+		 * If we're supposed to replace the reloptions list, we just pretend
+		 * there were none before.
+		 */
+		datum = (Datum) 0;
+		isnull = true;
+	}
+	else
+	{
+		/* Get the old reloptions */
+		datum = SysCacheGetAttr(RELOID, tuple, Anum_pg_class_reloptions,
+								&isnull);
+	}
+
+	/* Generate new proposed reloptions (text array) */
+	newOptions = transformRelOptions(isnull ? (Datum) 0 : datum,
+									 defList, NULL, validnsps, false,
+									 operation == AT_ResetRelOptions);
+
+	/* Validate */
+	switch (rel->rd_rel->relkind)
+	{
+		case RELKIND_RELATION:
+		case RELKIND_TOASTVALUE:
+		case RELKIND_MATVIEW:
+			(void) heap_reloptions(rel->rd_rel->relkind, newOptions, true);
+			break;
+		case RELKIND_PARTITIONED_TABLE:
+			(void) partitioned_table_reloptions(newOptions, true);
+			break;
+		case RELKIND_VIEW:
+			(void) view_reloptions(newOptions, true);
+			break;
+		case RELKIND_INDEX:
+		case RELKIND_PARTITIONED_INDEX:
+			(void) index_reloptions(rel->rd_indam->amoptions, newOptions, true);
+			break;
+		default:
+			ereport(ERROR,
+					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+					 errmsg("\"%s\" is not a table, view, materialized view, index, or TOAST table",
+							RelationGetRelationName(rel))));
+			break;
+	}
+
+	/* Special-case validation of view options */
+	if (rel->rd_rel->relkind == RELKIND_VIEW)
+	{
+		Query	   *view_query = get_view_query(rel);
+		List	   *view_options = untransformRelOptions(newOptions);
+		ListCell   *cell;
+		bool		check_option = false;
+
+		foreach(cell, view_options)
+		{
+			DefElem    *defel = (DefElem *) lfirst(cell);
+
+			if (strcmp(defel->defname, "check_option") == 0)
+				check_option = true;
+		}
+
+		/*
+		 * If the check option is specified, look to see if the view is
+		 * actually auto-updatable or not.
+		 */
+		if (check_option)
+		{
+			const char *view_updatable_error =
+			view_query_is_auto_updatable(view_query, true);
+
+			if (view_updatable_error)
+				ereport(ERROR,
+						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+						 errmsg("WITH CHECK OPTION is supported only on automatically updatable views"),
+						 errhint("%s", _(view_updatable_error))));
+		}
+	}
+
+	/*
+	 * All we need do here is update the pg_class row; the new options will be
+	 * propagated into relcaches during post-commit cache inval.
+	 */
+	memset(repl_val, 0, sizeof(repl_val));
+	memset(repl_null, false, sizeof(repl_null));
+	memset(repl_repl, false, sizeof(repl_repl));
+
+	if (newOptions != (Datum) 0)
+		repl_val[Anum_pg_class_reloptions - 1] = newOptions;
+	else
+		repl_null[Anum_pg_class_reloptions - 1] = true;
+
+	repl_repl[Anum_pg_class_reloptions - 1] = true;
+
+	newtuple = heap_modify_tuple(tuple, RelationGetDescr(pgclass),
+								 repl_val, repl_null, repl_repl);
+
+	CatalogTupleUpdate(pgclass, &newtuple->t_self, newtuple);
+
+	InvokeObjectPostAlterHook(RelationRelationId, RelationGetRelid(rel), 0);
+
+	heap_freetuple(newtuple);
+
+	ReleaseSysCache(tuple);
+
+	/* repeat the whole exercise for the toast table, if there's one */
+	if (OidIsValid(rel->rd_rel->reltoastrelid))
+	{
+		Relation	toastrel;
+		Oid			toastid = rel->rd_rel->reltoastrelid;
+
+		toastrel = table_open(toastid, lockmode);
+
+		/* Fetch heap tuple */
+		tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(toastid));
+		if (!HeapTupleIsValid(tuple))
+			elog(ERROR, "cache lookup failed for relation %u", toastid);
+
+		if (operation == AT_ReplaceRelOptions)
+		{
+			/*
+			 * If we're supposed to replace the reloptions list, we just
+			 * pretend there were none before.
+			 */
+			datum = (Datum) 0;
+			isnull = true;
+		}
+		else
+		{
+			/* Get the old reloptions */
+			datum = SysCacheGetAttr(RELOID, tuple, Anum_pg_class_reloptions,
+									&isnull);
+		}
+
+		newOptions = transformRelOptions(isnull ? (Datum) 0 : datum,
+										 defList, "toast", validnsps, false,
+										 operation == AT_ResetRelOptions);
+
+		(void) heap_reloptions(RELKIND_TOASTVALUE, newOptions, true);
+
+		memset(repl_val, 0, sizeof(repl_val));
+		memset(repl_null, false, sizeof(repl_null));
+		memset(repl_repl, false, sizeof(repl_repl));
+
+		if (newOptions != (Datum) 0)
+			repl_val[Anum_pg_class_reloptions - 1] = newOptions;
+		else
+			repl_null[Anum_pg_class_reloptions - 1] = true;
+
+		repl_repl[Anum_pg_class_reloptions - 1] = true;
+
+		newtuple = heap_modify_tuple(tuple, RelationGetDescr(pgclass),
+									 repl_val, repl_null, repl_repl);
+
+		CatalogTupleUpdate(pgclass, &newtuple->t_self, newtuple);
+
+		InvokeObjectPostAlterHookArg(RelationRelationId,
+									 RelationGetRelid(toastrel), 0,
+									 InvalidOid, true);
+
+		heap_freetuple(newtuple);
+
+		ReleaseSysCache(tuple);
+
+		table_close(toastrel, NoLock);
+	}
+
+	table_close(pgclass, RowExclusiveLock);
+}
+
+/*
+ * Execute ALTER TABLE SET TABLESPACE for cases where there is no tuple
+ * rewriting to be done, so we just want to copy the data as fast as possible.
+ */
+static void
+ATExecSetTableSpace(Oid tableOid, Oid newTableSpace, LOCKMODE lockmode)
+{
+	Relation	rel;
+	Oid			reltoastrelid;
+	Oid			newrelfilenode;
+	RelFileNode newrnode;
+	List	   *reltoastidxids = NIL;
+	ListCell   *lc;
+
+	/*
+	 * Need lock here in case we are recursing to toast table or index
+	 */
+	rel = relation_open(tableOid, lockmode);
+
+	/* Check first if relation can be moved to new tablespace */
+	if (!CheckRelationTableSpaceMove(rel, newTableSpace))
+	{
+		InvokeObjectPostAlterHook(RelationRelationId,
+								  RelationGetRelid(rel), 0);
+		relation_close(rel, NoLock);
+		return;
+	}
+
+	reltoastrelid = rel->rd_rel->reltoastrelid;
+	/* Fetch the list of indexes on toast relation if necessary */
+	if (OidIsValid(reltoastrelid))
+	{
+		Relation	toastRel = relation_open(reltoastrelid, lockmode);
+
+		reltoastidxids = RelationGetIndexList(toastRel);
+		relation_close(toastRel, lockmode);
+	}
+
+	/*
+	 * Relfilenodes are not unique in databases across tablespaces, so we need
+	 * to allocate a new one in the new tablespace.
+	 */
+	newrelfilenode = GetNewRelFileNode(newTableSpace, NULL,
+									   rel->rd_rel->relpersistence);
+
+	/* Open old and new relation */
+	newrnode = rel->rd_node;
+	newrnode.relNode = newrelfilenode;
+	newrnode.spcNode = newTableSpace;
+
+	/* hand off to AM to actually create the new filenode and copy the data */
+	if (rel->rd_rel->relkind == RELKIND_INDEX)
+	{
+		index_copy_data(rel, newrnode);
+	}
+	else
+	{
+		Assert(rel->rd_rel->relkind == RELKIND_RELATION ||
+			   rel->rd_rel->relkind == RELKIND_MATVIEW ||
+			   rel->rd_rel->relkind == RELKIND_TOASTVALUE);
+		table_relation_copy_data(rel, &newrnode);
+	}
+
+	/*
+	 * Update the pg_class row.
+	 *
+	 * NB: This wouldn't work if ATExecSetTableSpace() were allowed to be
+	 * executed on pg_class or its indexes (the above copy wouldn't contain
+	 * the updated pg_class entry), but that's forbidden with
+	 * CheckRelationTableSpaceMove().
+	 */
+	SetRelationTableSpace(rel, newTableSpace, newrelfilenode);
+
+	InvokeObjectPostAlterHook(RelationRelationId, RelationGetRelid(rel), 0);
+
+	RelationAssumeNewRelfilenode(rel);
+
+	relation_close(rel, NoLock);
+
+	/* Make sure the reltablespace change is visible */
+	CommandCounterIncrement();
+
+	/* Move associated toast relation and/or indexes, too */
+	if (OidIsValid(reltoastrelid))
+		ATExecSetTableSpace(reltoastrelid, newTableSpace, lockmode);
+	foreach(lc, reltoastidxids)
+		ATExecSetTableSpace(lfirst_oid(lc), newTableSpace, lockmode);
+
+	/* Clean up */
+	list_free(reltoastidxids);
+}
+
+/*
+ * Special handling of ALTER TABLE SET TABLESPACE for relations with no
+ * storage that have an interest in preserving tablespace.
+ *
+ * Since these have no storage the tablespace can be updated with a simple
+ * metadata only operation to update the tablespace.
+ */
+static void
+ATExecSetTableSpaceNoStorage(Relation rel, Oid newTableSpace)
+{
+	/*
+	 * Shouldn't be called on relations having storage; these are processed in
+	 * phase 3.
+	 */
+	Assert(!RELKIND_HAS_STORAGE(rel->rd_rel->relkind));
+
+	/* check if relation can be moved to its new tablespace */
+	if (!CheckRelationTableSpaceMove(rel, newTableSpace))
+	{
+		InvokeObjectPostAlterHook(RelationRelationId,
+								  RelationGetRelid(rel),
+								  0);
+		return;
+	}
+
+	/* Update can be done, so change reltablespace */
+	SetRelationTableSpace(rel, newTableSpace, InvalidOid);
+
+	InvokeObjectPostAlterHook(RelationRelationId, RelationGetRelid(rel), 0);
+
+	/* Make sure the reltablespace change is visible */
+	CommandCounterIncrement();
+}
+
+/*
+ * Alter Table ALL ... SET TABLESPACE
+ *
+ * Allows a user to move all objects of some type in a given tablespace in the
+ * current database to another tablespace.  Objects can be chosen based on the
+ * owner of the object also, to allow users to move only their objects.
+ * The user must have CREATE rights on the new tablespace, as usual.   The main
+ * permissions handling is done by the lower-level table move function.
+ *
+ * All to-be-moved objects are locked first. If NOWAIT is specified and the
+ * lock can't be acquired then we ereport(ERROR).
+ */
+Oid
+AlterTableMoveAll(AlterTableMoveAllStmt *stmt)
+{
+	List	   *relations = NIL;
+	ListCell   *l;
+	ScanKeyData key[1];
+	Relation	rel;
+	TableScanDesc scan;
+	HeapTuple	tuple;
+	Oid			orig_tablespaceoid;
+	Oid			new_tablespaceoid;
+	List	   *role_oids = roleSpecsToIds(stmt->roles);
+
+	/* Ensure we were not asked to move something we can't */
+	if (stmt->objtype != OBJECT_TABLE && stmt->objtype != OBJECT_INDEX &&
+		stmt->objtype != OBJECT_MATVIEW)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("only tables, indexes, and materialized views exist in tablespaces")));
+
+	/* Get the orig and new tablespace OIDs */
+	orig_tablespaceoid = get_tablespace_oid(stmt->orig_tablespacename, false);
+	new_tablespaceoid = get_tablespace_oid(stmt->new_tablespacename, false);
+
+	/* Can't move shared relations in to or out of pg_global */
+	/* This is also checked by ATExecSetTableSpace, but nice to stop earlier */
+	if (orig_tablespaceoid == GLOBALTABLESPACE_OID ||
+		new_tablespaceoid == GLOBALTABLESPACE_OID)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("cannot move relations in to or out of pg_global tablespace")));
+
+	/*
+	 * Must have CREATE rights on the new tablespace, unless it is the
+	 * database default tablespace (which all users implicitly have CREATE
+	 * rights on).
+	 */
+	if (OidIsValid(new_tablespaceoid) && new_tablespaceoid != MyDatabaseTableSpace)
+	{
+		AclResult	aclresult;
+
+		aclresult = pg_tablespace_aclcheck(new_tablespaceoid, GetUserId(),
+										   ACL_CREATE);
+		if (aclresult != ACLCHECK_OK)
+			aclcheck_error(aclresult, OBJECT_TABLESPACE,
+						   get_tablespace_name(new_tablespaceoid));
+	}
+
+	/*
+	 * Now that the checks are done, check if we should set either to
+	 * InvalidOid because it is our database's default tablespace.
+	 */
+	if (orig_tablespaceoid == MyDatabaseTableSpace)
+		orig_tablespaceoid = InvalidOid;
+
+	if (new_tablespaceoid == MyDatabaseTableSpace)
+		new_tablespaceoid = InvalidOid;
+
+	/* no-op */
+	if (orig_tablespaceoid == new_tablespaceoid)
+		return new_tablespaceoid;
+
+	/*
+	 * Walk the list of objects in the tablespace and move them. This will
+	 * only find objects in our database, of course.
+	 */
+	ScanKeyInit(&key[0],
+				Anum_pg_class_reltablespace,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(orig_tablespaceoid));
+
+	rel = table_open(RelationRelationId, AccessShareLock);
+	scan = table_beginscan_catalog(rel, 1, key);
+	while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
+	{
+		Form_pg_class relForm = (Form_pg_class) GETSTRUCT(tuple);
+		Oid			relOid = relForm->oid;
+
+		/*
+		 * Do not move objects in pg_catalog as part of this, if an admin
+		 * really wishes to do so, they can issue the individual ALTER
+		 * commands directly.
+		 *
+		 * Also, explicitly avoid any shared tables, temp tables, or TOAST
+		 * (TOAST will be moved with the main table).
+		 */
+		if (IsCatalogNamespace(relForm->relnamespace) ||
+			relForm->relisshared ||
+			isAnyTempNamespace(relForm->relnamespace) ||
+			IsToastNamespace(relForm->relnamespace))
+			continue;
+
+		/* Only move the object type requested */
+		if ((stmt->objtype == OBJECT_TABLE &&
+			 relForm->relkind != RELKIND_RELATION &&
+			 relForm->relkind != RELKIND_PARTITIONED_TABLE) ||
+			(stmt->objtype == OBJECT_INDEX &&
+			 relForm->relkind != RELKIND_INDEX &&
+			 relForm->relkind != RELKIND_PARTITIONED_INDEX) ||
+			(stmt->objtype == OBJECT_MATVIEW &&
+			 relForm->relkind != RELKIND_MATVIEW))
+			continue;
+
+		/* Check if we are only moving objects owned by certain roles */
+		if (role_oids != NIL && !list_member_oid(role_oids, relForm->relowner))
+			continue;
+
+		/*
+		 * Handle permissions-checking here since we are locking the tables
+		 * and also to avoid doing a bunch of work only to fail part-way. Note
+		 * that permissions will also be checked by AlterTableInternal().
+		 *
+		 * Caller must be considered an owner on the table to move it.
+		 */
+		if (!pg_class_ownercheck(relOid, GetUserId()))
+			aclcheck_error(ACLCHECK_NOT_OWNER, get_relkind_objtype(get_rel_relkind(relOid)),
+						   NameStr(relForm->relname));
+
+		if (stmt->nowait &&
+			!ConditionalLockRelationOid(relOid, AccessExclusiveLock))
+			ereport(ERROR,
+					(errcode(ERRCODE_OBJECT_IN_USE),
+					 errmsg("aborting because lock on relation \"%s.%s\" is not available",
+							get_namespace_name(relForm->relnamespace),
+							NameStr(relForm->relname))));
+		else
+			LockRelationOid(relOid, AccessExclusiveLock);
+
+		/* Add to our list of objects to move */
+		relations = lappend_oid(relations, relOid);
+	}
+
+	table_endscan(scan);
+	table_close(rel, AccessShareLock);
+
+	if (relations == NIL)
+		ereport(NOTICE,
+				(errcode(ERRCODE_NO_DATA_FOUND),
+				 errmsg("no matching relations in tablespace \"%s\" found",
+						orig_tablespaceoid == InvalidOid ? "(database default)" :
+						get_tablespace_name(orig_tablespaceoid))));
+
+	/* Everything is locked, loop through and move all of the relations. */
+	foreach(l, relations)
+	{
+		List	   *cmds = NIL;
+		AlterTableCmd *cmd = makeNode(AlterTableCmd);
+
+		cmd->subtype = AT_SetTableSpace;
+		cmd->name = stmt->new_tablespacename;
+
+		cmds = lappend(cmds, cmd);
+
+		EventTriggerAlterTableStart((Node *) stmt);
+		/* OID is set by AlterTableInternal */
+		AlterTableInternal(lfirst_oid(l), cmds, false);
+		EventTriggerAlterTableEnd();
+	}
+
+	return new_tablespaceoid;
+}
+
+static void
+index_copy_data(Relation rel, RelFileNode newrnode)
+{
+	SMgrRelation dstrel;
+
+	dstrel = smgropen(newrnode, rel->rd_backend);
+	RelationOpenSmgr(rel);
+
+	/*
+	 * Since we copy the file directly without looking at the shared buffers,
+	 * we'd better first flush out any pages of the source relation that are
+	 * in shared buffers.  We assume no new changes will be made while we are
+	 * holding exclusive lock on the rel.
+	 */
+	FlushRelationBuffers(rel);
+
+	/*
+	 * Create and copy all forks of the relation, and schedule unlinking of
+	 * old physical files.
+	 *
+	 * NOTE: any conflict in relfilenode value will be caught in
+	 * RelationCreateStorage().
+	 */
+	RelationCreateStorage(newrnode, rel->rd_rel->relpersistence);
+
+	/* copy main fork */
+	RelationCopyStorage(rel->rd_smgr, dstrel, MAIN_FORKNUM,
+						rel->rd_rel->relpersistence);
+
+	/* copy those extra forks that exist */
+	for (ForkNumber forkNum = MAIN_FORKNUM + 1;
+		 forkNum <= MAX_FORKNUM; forkNum++)
+	{
+		if (smgrexists(rel->rd_smgr, forkNum))
+		{
+			smgrcreate(dstrel, forkNum, false);
+
+			/*
+			 * WAL log creation if the relation is persistent, or this is the
+			 * init fork of an unlogged relation.
+			 */
+			if (RelationIsPermanent(rel) ||
+				(rel->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED &&
+				 forkNum == INIT_FORKNUM))
+				log_smgrcreate(&newrnode, forkNum);
+			RelationCopyStorage(rel->rd_smgr, dstrel, forkNum,
+								rel->rd_rel->relpersistence);
+		}
+	}
+
+	/* drop old relation, and close new one */
+	RelationDropStorage(rel);
+	smgrclose(dstrel);
+}
+
+/*
+ * ALTER TABLE ENABLE/DISABLE TRIGGER
+ *
+ * We just pass this off to trigger.c.
+ */
+static void
+ATExecEnableDisableTrigger(Relation rel, const char *trigname,
+						   char fires_when, bool skip_system, LOCKMODE lockmode)
+{
+	EnableDisableTrigger(rel, trigname, fires_when, skip_system, lockmode);
+}
+
+/*
+ * ALTER TABLE ENABLE/DISABLE RULE
+ *
+ * We just pass this off to rewriteDefine.c.
+ */
+static void
+ATExecEnableDisableRule(Relation rel, const char *rulename,
+						char fires_when, LOCKMODE lockmode)
+{
+	EnableDisableRule(rel, rulename, fires_when);
+}
+
+/*
+ * ALTER TABLE INHERIT
+ *
+ * Add a parent to the child's parents. This verifies that all the columns and
+ * check constraints of the parent appear in the child and that they have the
+ * same data types and expressions.
+ */
+static void
+ATPrepAddInherit(Relation child_rel)
+{
+	if (child_rel->rd_rel->reloftype)
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("cannot change inheritance of typed table")));
+
+	if (child_rel->rd_rel->relispartition)
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("cannot change inheritance of a partition")));
+
+	if (child_rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("cannot change inheritance of partitioned table")));
+}
+
+/*
+ * Return the address of the new parent relation.
+ */
+static ObjectAddress
+ATExecAddInherit(Relation child_rel, RangeVar *parent, LOCKMODE lockmode)
+{
+	Relation	parent_rel;
+	List	   *children;
+	ObjectAddress address;
+	const char *trigger_name;
+
+	/*
+	 * A self-exclusive lock is needed here.  See the similar case in
+	 * MergeAttributes() for a full explanation.
+	 */
+	parent_rel = table_openrv(parent, ShareUpdateExclusiveLock);
+
+	/*
+	 * Must be owner of both parent and child -- child was checked by
+	 * ATSimplePermissions call in ATPrepCmd
+	 */
+	ATSimplePermissions(parent_rel, ATT_TABLE | ATT_FOREIGN_TABLE);
+
+	/* Permanent rels cannot inherit from temporary ones */
+	if (parent_rel->rd_rel->relpersistence == RELPERSISTENCE_TEMP &&
+		child_rel->rd_rel->relpersistence != RELPERSISTENCE_TEMP)
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("cannot inherit from temporary relation \"%s\"",
+						RelationGetRelationName(parent_rel))));
+
+	/* If parent rel is temp, it must belong to this session */
+	if (parent_rel->rd_rel->relpersistence == RELPERSISTENCE_TEMP &&
+		!parent_rel->rd_islocaltemp)
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("cannot inherit from temporary relation of another session")));
+
+	/* Ditto for the child */
+	if (child_rel->rd_rel->relpersistence == RELPERSISTENCE_TEMP &&
+		!child_rel->rd_islocaltemp)
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("cannot inherit to temporary relation of another session")));
+
+	/* Prevent partitioned tables from becoming inheritance parents */
+	if (parent_rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("cannot inherit from partitioned table \"%s\"",
+						parent->relname)));
+
+	/* Likewise for partitions */
+	if (parent_rel->rd_rel->relispartition)
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("cannot inherit from a partition")));
+
+	/*
+	 * Prevent circularity by seeing if proposed parent inherits from child.
+	 * (In particular, this disallows making a rel inherit from itself.)
+	 *
+	 * This is not completely bulletproof because of race conditions: in
+	 * multi-level inheritance trees, someone else could concurrently be
+	 * making another inheritance link that closes the loop but does not join
+	 * either of the rels we have locked.  Preventing that seems to require
+	 * exclusive locks on the entire inheritance tree, which is a cure worse
+	 * than the disease.  find_all_inheritors() will cope with circularity
+	 * anyway, so don't sweat it too much.
+	 *
+	 * We use weakest lock we can on child's children, namely AccessShareLock.
+	 */
+	children = find_all_inheritors(RelationGetRelid(child_rel),
+								   AccessShareLock, NULL);
+
+	if (list_member_oid(children, RelationGetRelid(parent_rel)))
+		ereport(ERROR,
+				(errcode(ERRCODE_DUPLICATE_TABLE),
+				 errmsg("circular inheritance not allowed"),
+				 errdetail("\"%s\" is already a child of \"%s\".",
+						   parent->relname,
+						   RelationGetRelationName(child_rel))));
+
+	/*
+	 * If child_rel has row-level triggers with transition tables, we
+	 * currently don't allow it to become an inheritance child.  See also
+	 * prohibitions in ATExecAttachPartition() and CreateTrigger().
+	 */
+	trigger_name = FindTriggerIncompatibleWithInheritance(child_rel->trigdesc);
+	if (trigger_name != NULL)
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("trigger \"%s\" prevents table \"%s\" from becoming an inheritance child",
+						trigger_name, RelationGetRelationName(child_rel)),
+				 errdetail("ROW triggers with transition tables are not supported in inheritance hierarchies.")));
+
+	/* OK to create inheritance */
+	CreateInheritance(child_rel, parent_rel);
+
+	ObjectAddressSet(address, RelationRelationId,
+					 RelationGetRelid(parent_rel));
+
+	/* keep our lock on the parent relation until commit */
+	table_close(parent_rel, NoLock);
+
+	return address;
+}
+
+/*
+ * CreateInheritance
+ *		Catalog manipulation portion of creating inheritance between a child
+ *		table and a parent table.
+ *
+ * Common to ATExecAddInherit() and ATExecAttachPartition().
+ */
+static void
+CreateInheritance(Relation child_rel, Relation parent_rel)
+{
+	Relation	catalogRelation;
+	SysScanDesc scan;
+	ScanKeyData key;
+	HeapTuple	inheritsTuple;
+	int32		inhseqno;
+
+	/* Note: get RowExclusiveLock because we will write pg_inherits below. */
+	catalogRelation = table_open(InheritsRelationId, RowExclusiveLock);
+
+	/*
+	 * Check for duplicates in the list of parents, and determine the highest
+	 * inhseqno already present; we'll use the next one for the new parent.
+	 * Also, if proposed child is a partition, it cannot already be
+	 * inheriting.
+	 *
+	 * Note: we do not reject the case where the child already inherits from
+	 * the parent indirectly; CREATE TABLE doesn't reject comparable cases.
+	 */
+	ScanKeyInit(&key,
+				Anum_pg_inherits_inhrelid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(RelationGetRelid(child_rel)));
+	scan = systable_beginscan(catalogRelation, InheritsRelidSeqnoIndexId,
+							  true, NULL, 1, &key);
+
+	/* inhseqno sequences start at 1 */
+	inhseqno = 0;
+	while (HeapTupleIsValid(inheritsTuple = systable_getnext(scan)))
+	{
+		Form_pg_inherits inh = (Form_pg_inherits) GETSTRUCT(inheritsTuple);
+
+		if (inh->inhparent == RelationGetRelid(parent_rel))
+			ereport(ERROR,
+					(errcode(ERRCODE_DUPLICATE_TABLE),
+					 errmsg("relation \"%s\" would be inherited from more than once",
+							RelationGetRelationName(parent_rel))));
+
+		if (inh->inhseqno > inhseqno)
+			inhseqno = inh->inhseqno;
+	}
+	systable_endscan(scan);
+
+	/* Match up the columns and bump attinhcount as needed */
+	MergeAttributesIntoExisting(child_rel, parent_rel);
+
+	/* Match up the constraints and bump coninhcount as needed */
+	MergeConstraintsIntoExisting(child_rel, parent_rel);
+
+	/*
+	 * OK, it looks valid.  Make the catalog entries that show inheritance.
+	 */
+	StoreCatalogInheritance1(RelationGetRelid(child_rel),
+							 RelationGetRelid(parent_rel),
+							 inhseqno + 1,
+							 catalogRelation,
+							 parent_rel->rd_rel->relkind ==
+							 RELKIND_PARTITIONED_TABLE);
+
+	/* Now we're done with pg_inherits */
+	table_close(catalogRelation, RowExclusiveLock);
+}
+
+/*
+ * Obtain the source-text form of the constraint expression for a check
+ * constraint, given its pg_constraint tuple
+ */
+static char *
+decompile_conbin(HeapTuple contup, TupleDesc tupdesc)
+{
+	Form_pg_constraint con;
+	bool		isnull;
+	Datum		attr;
+	Datum		expr;
+
+	con = (Form_pg_constraint) GETSTRUCT(contup);
+	attr = heap_getattr(contup, Anum_pg_constraint_conbin, tupdesc, &isnull);
+	if (isnull)
+		elog(ERROR, "null conbin for constraint %u", con->oid);
+
+	expr = DirectFunctionCall2(pg_get_expr, attr,
+							   ObjectIdGetDatum(con->conrelid));
+	return TextDatumGetCString(expr);
+}
+
+/*
+ * Determine whether two check constraints are functionally equivalent
+ *
+ * The test we apply is to see whether they reverse-compile to the same
+ * source string.  This insulates us from issues like whether attributes
+ * have the same physical column numbers in parent and child relations.
+ */
+static bool
+constraints_equivalent(HeapTuple a, HeapTuple b, TupleDesc tupleDesc)
+{
+	Form_pg_constraint acon = (Form_pg_constraint) GETSTRUCT(a);
+	Form_pg_constraint bcon = (Form_pg_constraint) GETSTRUCT(b);
+
+	if (acon->condeferrable != bcon->condeferrable ||
+		acon->condeferred != bcon->condeferred ||
+		strcmp(decompile_conbin(a, tupleDesc),
+			   decompile_conbin(b, tupleDesc)) != 0)
+		return false;
+	else
+		return true;
+}
+
+/*
+ * Check columns in child table match up with columns in parent, and increment
+ * their attinhcount.
+ *
+ * Called by CreateInheritance
+ *
+ * Currently all parent columns must be found in child. Missing columns are an
+ * error.  One day we might consider creating new columns like CREATE TABLE
+ * does.  However, that is widely unpopular --- in the common use case of
+ * partitioned tables it's a foot-gun.
+ *
+ * The data type must match exactly. If the parent column is NOT NULL then
+ * the child must be as well. Defaults are not compared, however.
+ */
+static void
+MergeAttributesIntoExisting(Relation child_rel, Relation parent_rel)
+{
+	Relation	attrrel;
+	AttrNumber	parent_attno;
+	int			parent_natts;
+	TupleDesc	tupleDesc;
+	HeapTuple	tuple;
+	bool		child_is_partition = false;
+
+	attrrel = table_open(AttributeRelationId, RowExclusiveLock);
+
+	tupleDesc = RelationGetDescr(parent_rel);
+	parent_natts = tupleDesc->natts;
+
+	/* If parent_rel is a partitioned table, child_rel must be a partition */
+	if (parent_rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+		child_is_partition = true;
+
+	for (parent_attno = 1; parent_attno <= parent_natts; parent_attno++)
+	{
+		Form_pg_attribute attribute = TupleDescAttr(tupleDesc,
+													parent_attno - 1);
+		char	   *attributeName = NameStr(attribute->attname);
+
+		/* Ignore dropped columns in the parent. */
+		if (attribute->attisdropped)
+			continue;
+
+		/* Find same column in child (matching on column name). */
+		tuple = SearchSysCacheCopyAttName(RelationGetRelid(child_rel),
+										  attributeName);
+		if (HeapTupleIsValid(tuple))
+		{
+			/* Check they are same type, typmod, and collation */
+			Form_pg_attribute childatt = (Form_pg_attribute) GETSTRUCT(tuple);
+
+			if (attribute->atttypid != childatt->atttypid ||
+				attribute->atttypmod != childatt->atttypmod)
+				ereport(ERROR,
+						(errcode(ERRCODE_DATATYPE_MISMATCH),
+						 errmsg("child table \"%s\" has different type for column \"%s\"",
+								RelationGetRelationName(child_rel),
+								attributeName)));
+
+			if (attribute->attcollation != childatt->attcollation)
+				ereport(ERROR,
+						(errcode(ERRCODE_COLLATION_MISMATCH),
+						 errmsg("child table \"%s\" has different collation for column \"%s\"",
+								RelationGetRelationName(child_rel),
+								attributeName)));
+
+			/*
+			 * Check child doesn't discard NOT NULL property.  (Other
+			 * constraints are checked elsewhere.)
+			 */
+			if (attribute->attnotnull && !childatt->attnotnull)
+				ereport(ERROR,
+						(errcode(ERRCODE_DATATYPE_MISMATCH),
+						 errmsg("column \"%s\" in child table must be marked NOT NULL",
+								attributeName)));
+
+			/*
+			 * OK, bump the child column's inheritance count.  (If we fail
+			 * later on, this change will just roll back.)
+			 */
+			childatt->attinhcount++;
+
+			/*
+			 * In case of partitions, we must enforce that value of attislocal
+			 * is same in all partitions. (Note: there are only inherited
+			 * attributes in partitions)
+			 */
+			if (child_is_partition)
+			{
+				Assert(childatt->attinhcount == 1);
+				childatt->attislocal = false;
+			}
+
+			CatalogTupleUpdate(attrrel, &tuple->t_self, tuple);
+			heap_freetuple(tuple);
+		}
+		else
+		{
+			ereport(ERROR,
+					(errcode(ERRCODE_DATATYPE_MISMATCH),
+					 errmsg("child table is missing column \"%s\"",
+							attributeName)));
+		}
+	}
+
+	table_close(attrrel, RowExclusiveLock);
+}
+
+/*
+ * Check constraints in child table match up with constraints in parent,
+ * and increment their coninhcount.
+ *
+ * Constraints that are marked ONLY in the parent are ignored.
+ *
+ * Called by CreateInheritance
+ *
+ * Currently all constraints in parent must be present in the child. One day we
+ * may consider adding new constraints like CREATE TABLE does.
+ *
+ * XXX This is O(N^2) which may be an issue with tables with hundreds of
+ * constraints. As long as tables have more like 10 constraints it shouldn't be
+ * a problem though. Even 100 constraints ought not be the end of the world.
+ *
+ * XXX See MergeWithExistingConstraint too if you change this code.
+ */
+static void
+MergeConstraintsIntoExisting(Relation child_rel, Relation parent_rel)
+{
+	Relation	catalog_relation;
+	TupleDesc	tuple_desc;
+	SysScanDesc parent_scan;
+	ScanKeyData parent_key;
+	HeapTuple	parent_tuple;
+	bool		child_is_partition = false;
+
+	catalog_relation = table_open(ConstraintRelationId, RowExclusiveLock);
+	tuple_desc = RelationGetDescr(catalog_relation);
+
+	/* If parent_rel is a partitioned table, child_rel must be a partition */
+	if (parent_rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+		child_is_partition = true;
+
+	/* Outer loop scans through the parent's constraint definitions */
+	ScanKeyInit(&parent_key,
+				Anum_pg_constraint_conrelid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(RelationGetRelid(parent_rel)));
+	parent_scan = systable_beginscan(catalog_relation, ConstraintRelidTypidNameIndexId,
+									 true, NULL, 1, &parent_key);
+
+	while (HeapTupleIsValid(parent_tuple = systable_getnext(parent_scan)))
+	{
+		Form_pg_constraint parent_con = (Form_pg_constraint) GETSTRUCT(parent_tuple);
+		SysScanDesc child_scan;
+		ScanKeyData child_key;
+		HeapTuple	child_tuple;
+		bool		found = false;
+
+		if (parent_con->contype != CONSTRAINT_CHECK)
+			continue;
+
+		/* if the parent's constraint is marked NO INHERIT, it's not inherited */
+		if (parent_con->connoinherit)
+			continue;
+
+		/* Search for a child constraint matching this one */
+		ScanKeyInit(&child_key,
+					Anum_pg_constraint_conrelid,
+					BTEqualStrategyNumber, F_OIDEQ,
+					ObjectIdGetDatum(RelationGetRelid(child_rel)));
+		child_scan = systable_beginscan(catalog_relation, ConstraintRelidTypidNameIndexId,
+										true, NULL, 1, &child_key);
+
+		while (HeapTupleIsValid(child_tuple = systable_getnext(child_scan)))
+		{
+			Form_pg_constraint child_con = (Form_pg_constraint) GETSTRUCT(child_tuple);
+			HeapTuple	child_copy;
+
+			if (child_con->contype != CONSTRAINT_CHECK)
+				continue;
+
+			if (strcmp(NameStr(parent_con->conname),
+					   NameStr(child_con->conname)) != 0)
+				continue;
+
+			if (!constraints_equivalent(parent_tuple, child_tuple, tuple_desc))
+				ereport(ERROR,
+						(errcode(ERRCODE_DATATYPE_MISMATCH),
+						 errmsg("child table \"%s\" has different definition for check constraint \"%s\"",
+								RelationGetRelationName(child_rel),
+								NameStr(parent_con->conname))));
+
+			/* If the child constraint is "no inherit" then cannot merge */
+			if (child_con->connoinherit)
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+						 errmsg("constraint \"%s\" conflicts with non-inherited constraint on child table \"%s\"",
+								NameStr(child_con->conname),
+								RelationGetRelationName(child_rel))));
+
+			/*
+			 * If the child constraint is "not valid" then cannot merge with a
+			 * valid parent constraint
+			 */
+			if (parent_con->convalidated && !child_con->convalidated)
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+						 errmsg("constraint \"%s\" conflicts with NOT VALID constraint on child table \"%s\"",
+								NameStr(child_con->conname),
+								RelationGetRelationName(child_rel))));
+
+			/*
+			 * OK, bump the child constraint's inheritance count.  (If we fail
+			 * later on, this change will just roll back.)
+			 */
+			child_copy = heap_copytuple(child_tuple);
+			child_con = (Form_pg_constraint) GETSTRUCT(child_copy);
+			child_con->coninhcount++;
+
+			/*
+			 * In case of partitions, an inherited constraint must be
+			 * inherited only once since it cannot have multiple parents and
+			 * it is never considered local.
+			 */
+			if (child_is_partition)
+			{
+				Assert(child_con->coninhcount == 1);
+				child_con->conislocal = false;
+			}
+
+			CatalogTupleUpdate(catalog_relation, &child_copy->t_self, child_copy);
+			heap_freetuple(child_copy);
+
+			found = true;
+			break;
+		}
+
+		systable_endscan(child_scan);
+
+		if (!found)
+			ereport(ERROR,
+					(errcode(ERRCODE_DATATYPE_MISMATCH),
+					 errmsg("child table is missing constraint \"%s\"",
+							NameStr(parent_con->conname))));
+	}
+
+	systable_endscan(parent_scan);
+	table_close(catalog_relation, RowExclusiveLock);
+}
+
+/*
+ * ALTER TABLE NO INHERIT
+ *
+ * Return value is the address of the relation that is no longer parent.
+ */
+static ObjectAddress
+ATExecDropInherit(Relation rel, RangeVar *parent, LOCKMODE lockmode)
+{
+	ObjectAddress address;
+	Relation	parent_rel;
+
+	if (rel->rd_rel->relispartition)
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("cannot change inheritance of a partition")));
+
+	/*
+	 * AccessShareLock on the parent is probably enough, seeing that DROP
+	 * TABLE doesn't lock parent tables at all.  We need some lock since we'll
+	 * be inspecting the parent's schema.
+	 */
+	parent_rel = table_openrv(parent, AccessShareLock);
+
+	/*
+	 * We don't bother to check ownership of the parent table --- ownership of
+	 * the child is presumed enough rights.
+	 */
+
+	/* Off to RemoveInheritance() where most of the work happens */
+	RemoveInheritance(rel, parent_rel, false);
+
+	ObjectAddressSet(address, RelationRelationId,
+					 RelationGetRelid(parent_rel));
+
+	/* keep our lock on the parent relation until commit */
+	table_close(parent_rel, NoLock);
+
+	return address;
+}
+
+/*
+ * MarkInheritDetached
+ *
+ * Set inhdetachpending for a partition, for ATExecDetachPartition
+ * in concurrent mode.
+ */
+static void
+MarkInheritDetached(Relation child_rel, Relation parent_rel)
+{
+	Relation	catalogRelation;
+	SysScanDesc	scan;
+	ScanKeyData key;
+	HeapTuple	inheritsTuple;
+	bool		found = false;
+
+	Assert(child_rel->rd_rel->relkind == RELKIND_RELATION ||
+		   child_rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE);
+	Assert(parent_rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE);
+
+	/*
+	 * Find pg_inherits entries by inhrelid.
+	 */
+	catalogRelation = table_open(InheritsRelationId, RowExclusiveLock);
+	ScanKeyInit(&key,
+				Anum_pg_inherits_inhrelid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(RelationGetRelid(child_rel)));
+	scan = systable_beginscan(catalogRelation, InheritsRelidSeqnoIndexId,
+							  true, NULL, 1, &key);
+
+	while (HeapTupleIsValid(inheritsTuple = systable_getnext(scan)))
+	{
+		HeapTuple	newtup;
+
+		if (((Form_pg_inherits) GETSTRUCT(inheritsTuple))->inhparent !=
+			RelationGetRelid(parent_rel))
+			elog(ERROR, "bad parent tuple found for partition %u",
+				 RelationGetRelid(child_rel));
+
+		newtup = heap_copytuple(inheritsTuple);
+		((Form_pg_inherits) GETSTRUCT(newtup))->inhdetachpending = true;
+
+		CatalogTupleUpdate(catalogRelation,
+						   &inheritsTuple->t_self,
+						   newtup);
+		found = true;
+	}
+
+	/* Done */
+	systable_endscan(scan);
+	table_close(catalogRelation, RowExclusiveLock);
+
+	if (!found)
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_TABLE),
+				 errmsg("relation \"%s\" is not a partition of relation \"%s\"",
+						RelationGetRelationName(child_rel),
+						RelationGetRelationName(parent_rel))));
+}
+
+/*
+ * RemoveInheritance
+ *
+ * Drop a parent from the child's parents. This just adjusts the attinhcount
+ * and attislocal of the columns and removes the pg_inherit and pg_depend
+ * entries.  expect_detached is passed down to DeleteInheritsTuple, q.v..
+ *
+ * If attinhcount goes to 0 then attislocal gets set to true. If it goes back
+ * up attislocal stays true, which means if a child is ever removed from a
+ * parent then its columns will never be automatically dropped which may
+ * surprise. But at least we'll never surprise by dropping columns someone
+ * isn't expecting to be dropped which would actually mean data loss.
+ *
+ * coninhcount and conislocal for inherited constraints are adjusted in
+ * exactly the same way.
+ *
+ * Common to ATExecDropInherit() and ATExecDetachPartition().
+ */
+static void
+RemoveInheritance(Relation child_rel, Relation parent_rel, bool expect_detached)
+{
+	Relation	catalogRelation;
+	SysScanDesc scan;
+	ScanKeyData key[3];
+	HeapTuple	attributeTuple,
+				constraintTuple;
+	List	   *connames;
+	bool		found;
+	bool		child_is_partition = false;
+
+	/* If parent_rel is a partitioned table, child_rel must be a partition */
+	if (parent_rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+		child_is_partition = true;
+
+	found = DeleteInheritsTuple(RelationGetRelid(child_rel),
+								RelationGetRelid(parent_rel),
+								expect_detached,
+								RelationGetRelationName(child_rel));
+	if (!found)
+	{
+		if (child_is_partition)
+			ereport(ERROR,
+					(errcode(ERRCODE_UNDEFINED_TABLE),
+					 errmsg("relation \"%s\" is not a partition of relation \"%s\"",
+							RelationGetRelationName(child_rel),
+							RelationGetRelationName(parent_rel))));
+		else
+			ereport(ERROR,
+					(errcode(ERRCODE_UNDEFINED_TABLE),
+					 errmsg("relation \"%s\" is not a parent of relation \"%s\"",
+							RelationGetRelationName(parent_rel),
+							RelationGetRelationName(child_rel))));
+	}
+
+	/*
+	 * Search through child columns looking for ones matching parent rel
+	 */
+	catalogRelation = table_open(AttributeRelationId, RowExclusiveLock);
+	ScanKeyInit(&key[0],
+				Anum_pg_attribute_attrelid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(RelationGetRelid(child_rel)));
+	scan = systable_beginscan(catalogRelation, AttributeRelidNumIndexId,
+							  true, NULL, 1, key);
+	while (HeapTupleIsValid(attributeTuple = systable_getnext(scan)))
+	{
+		Form_pg_attribute att = (Form_pg_attribute) GETSTRUCT(attributeTuple);
+
+		/* Ignore if dropped or not inherited */
+		if (att->attisdropped)
+			continue;
+		if (att->attinhcount <= 0)
+			continue;
+
+		if (SearchSysCacheExistsAttName(RelationGetRelid(parent_rel),
+										NameStr(att->attname)))
+		{
+			/* Decrement inhcount and possibly set islocal to true */
+			HeapTuple	copyTuple = heap_copytuple(attributeTuple);
+			Form_pg_attribute copy_att = (Form_pg_attribute) GETSTRUCT(copyTuple);
+
+			copy_att->attinhcount--;
+			if (copy_att->attinhcount == 0)
+				copy_att->attislocal = true;
+
+			CatalogTupleUpdate(catalogRelation, &copyTuple->t_self, copyTuple);
+			heap_freetuple(copyTuple);
+		}
+	}
+	systable_endscan(scan);
+	table_close(catalogRelation, RowExclusiveLock);
+
+	/*
+	 * Likewise, find inherited check constraints and disinherit them. To do
+	 * this, we first need a list of the names of the parent's check
+	 * constraints.  (We cheat a bit by only checking for name matches,
+	 * assuming that the expressions will match.)
+	 */
+	catalogRelation = table_open(ConstraintRelationId, RowExclusiveLock);
+	ScanKeyInit(&key[0],
+				Anum_pg_constraint_conrelid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(RelationGetRelid(parent_rel)));
+	scan = systable_beginscan(catalogRelation, ConstraintRelidTypidNameIndexId,
+							  true, NULL, 1, key);
+
+	connames = NIL;
+
+	while (HeapTupleIsValid(constraintTuple = systable_getnext(scan)))
+	{
+		Form_pg_constraint con = (Form_pg_constraint) GETSTRUCT(constraintTuple);
+
+		if (con->contype == CONSTRAINT_CHECK)
+			connames = lappend(connames, pstrdup(NameStr(con->conname)));
+	}
+
+	systable_endscan(scan);
+
+	/* Now scan the child's constraints */
+	ScanKeyInit(&key[0],
+				Anum_pg_constraint_conrelid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(RelationGetRelid(child_rel)));
+	scan = systable_beginscan(catalogRelation, ConstraintRelidTypidNameIndexId,
+							  true, NULL, 1, key);
+
+	while (HeapTupleIsValid(constraintTuple = systable_getnext(scan)))
+	{
+		Form_pg_constraint con = (Form_pg_constraint) GETSTRUCT(constraintTuple);
+		bool		match;
+		ListCell   *lc;
+
+		if (con->contype != CONSTRAINT_CHECK)
+			continue;
+
+		match = false;
+		foreach(lc, connames)
+		{
+			if (strcmp(NameStr(con->conname), (char *) lfirst(lc)) == 0)
+			{
+				match = true;
+				break;
+			}
+		}
+
+		if (match)
+		{
+			/* Decrement inhcount and possibly set islocal to true */
+			HeapTuple	copyTuple = heap_copytuple(constraintTuple);
+			Form_pg_constraint copy_con = (Form_pg_constraint) GETSTRUCT(copyTuple);
+
+			if (copy_con->coninhcount <= 0) /* shouldn't happen */
+				elog(ERROR, "relation %u has non-inherited constraint \"%s\"",
+					 RelationGetRelid(child_rel), NameStr(copy_con->conname));
+
+			copy_con->coninhcount--;
+			if (copy_con->coninhcount == 0)
+				copy_con->conislocal = true;
+
+			CatalogTupleUpdate(catalogRelation, &copyTuple->t_self, copyTuple);
+			heap_freetuple(copyTuple);
+		}
+	}
+
+	systable_endscan(scan);
+	table_close(catalogRelation, RowExclusiveLock);
+
+	drop_parent_dependency(RelationGetRelid(child_rel),
+						   RelationRelationId,
+						   RelationGetRelid(parent_rel),
+						   child_dependency_type(child_is_partition));
+
+	/*
+	 * Post alter hook of this inherits. Since object_access_hook doesn't take
+	 * multiple object identifiers, we relay oid of parent relation using
+	 * auxiliary_id argument.
+	 */
+	InvokeObjectPostAlterHookArg(InheritsRelationId,
+								 RelationGetRelid(child_rel), 0,
+								 RelationGetRelid(parent_rel), false);
+}
+
+/*
+ * Drop the dependency created by StoreCatalogInheritance1 (CREATE TABLE
+ * INHERITS/ALTER TABLE INHERIT -- refclassid will be RelationRelationId) or
+ * heap_create_with_catalog (CREATE TABLE OF/ALTER TABLE OF -- refclassid will
+ * be TypeRelationId).  There's no convenient way to do this, so go trawling
+ * through pg_depend.
+ */
+static void
+drop_parent_dependency(Oid relid, Oid refclassid, Oid refobjid,
+					   DependencyType deptype)
+{
+	Relation	catalogRelation;
+	SysScanDesc scan;
+	ScanKeyData key[3];
+	HeapTuple	depTuple;
+
+	catalogRelation = table_open(DependRelationId, RowExclusiveLock);
+
+	ScanKeyInit(&key[0],
+				Anum_pg_depend_classid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(RelationRelationId));
+	ScanKeyInit(&key[1],
+				Anum_pg_depend_objid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(relid));
+	ScanKeyInit(&key[2],
+				Anum_pg_depend_objsubid,
+				BTEqualStrategyNumber, F_INT4EQ,
+				Int32GetDatum(0));
+
+	scan = systable_beginscan(catalogRelation, DependDependerIndexId, true,
+							  NULL, 3, key);
+
+	while (HeapTupleIsValid(depTuple = systable_getnext(scan)))
+	{
+		Form_pg_depend dep = (Form_pg_depend) GETSTRUCT(depTuple);
+
+		if (dep->refclassid == refclassid &&
+			dep->refobjid == refobjid &&
+			dep->refobjsubid == 0 &&
+			dep->deptype == deptype)
+			CatalogTupleDelete(catalogRelation, &depTuple->t_self);
+	}
+
+	systable_endscan(scan);
+	table_close(catalogRelation, RowExclusiveLock);
+}
+
+/*
+ * ALTER TABLE OF
+ *
+ * Attach a table to a composite type, as though it had been created with CREATE
+ * TABLE OF.  All attname, atttypid, atttypmod and attcollation must match.  The
+ * subject table must not have inheritance parents.  These restrictions ensure
+ * that you cannot create a configuration impossible with CREATE TABLE OF alone.
+ *
+ * The address of the type is returned.
+ */
+static ObjectAddress
+ATExecAddOf(Relation rel, const TypeName *ofTypename, LOCKMODE lockmode)
+{
+	Oid			relid = RelationGetRelid(rel);
+	Type		typetuple;
+	Form_pg_type typeform;
+	Oid			typeid;
+	Relation	inheritsRelation,
+				relationRelation;
+	SysScanDesc scan;
+	ScanKeyData key;
+	AttrNumber	table_attno,
+				type_attno;
+	TupleDesc	typeTupleDesc,
+				tableTupleDesc;
+	ObjectAddress tableobj,
+				typeobj;
+	HeapTuple	classtuple;
+
+	/* Validate the type. */
+	typetuple = typenameType(NULL, ofTypename, NULL);
+	check_of_type(typetuple);
+	typeform = (Form_pg_type) GETSTRUCT(typetuple);
+	typeid = typeform->oid;
+
+	/* Fail if the table has any inheritance parents. */
+	inheritsRelation = table_open(InheritsRelationId, AccessShareLock);
+	ScanKeyInit(&key,
+				Anum_pg_inherits_inhrelid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(relid));
+	scan = systable_beginscan(inheritsRelation, InheritsRelidSeqnoIndexId,
+							  true, NULL, 1, &key);
+	if (HeapTupleIsValid(systable_getnext(scan)))
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("typed tables cannot inherit")));
+	systable_endscan(scan);
+	table_close(inheritsRelation, AccessShareLock);
+
+	/*
+	 * Check the tuple descriptors for compatibility.  Unlike inheritance, we
+	 * require that the order also match.  However, attnotnull need not match.
+	 */
+	typeTupleDesc = lookup_rowtype_tupdesc(typeid, -1);
+	tableTupleDesc = RelationGetDescr(rel);
+	table_attno = 1;
+	for (type_attno = 1; type_attno <= typeTupleDesc->natts; type_attno++)
+	{
+		Form_pg_attribute type_attr,
+					table_attr;
+		const char *type_attname,
+				   *table_attname;
+
+		/* Get the next non-dropped type attribute. */
+		type_attr = TupleDescAttr(typeTupleDesc, type_attno - 1);
+		if (type_attr->attisdropped)
+			continue;
+		type_attname = NameStr(type_attr->attname);
+
+		/* Get the next non-dropped table attribute. */
+		do
+		{
+			if (table_attno > tableTupleDesc->natts)
+				ereport(ERROR,
+						(errcode(ERRCODE_DATATYPE_MISMATCH),
+						 errmsg("table is missing column \"%s\"",
+								type_attname)));
+			table_attr = TupleDescAttr(tableTupleDesc, table_attno - 1);
+			table_attno++;
+		} while (table_attr->attisdropped);
+		table_attname = NameStr(table_attr->attname);
+
+		/* Compare name. */
+		if (strncmp(table_attname, type_attname, NAMEDATALEN) != 0)
+			ereport(ERROR,
+					(errcode(ERRCODE_DATATYPE_MISMATCH),
+					 errmsg("table has column \"%s\" where type requires \"%s\"",
+							table_attname, type_attname)));
+
+		/* Compare type. */
+		if (table_attr->atttypid != type_attr->atttypid ||
+			table_attr->atttypmod != type_attr->atttypmod ||
+			table_attr->attcollation != type_attr->attcollation)
+			ereport(ERROR,
+					(errcode(ERRCODE_DATATYPE_MISMATCH),
+					 errmsg("table \"%s\" has different type for column \"%s\"",
+							RelationGetRelationName(rel), type_attname)));
+	}
+	DecrTupleDescRefCount(typeTupleDesc);
+
+	/* Any remaining columns at the end of the table had better be dropped. */
+	for (; table_attno <= tableTupleDesc->natts; table_attno++)
+	{
+		Form_pg_attribute table_attr = TupleDescAttr(tableTupleDesc,
+													 table_attno - 1);
+
+		if (!table_attr->attisdropped)
+			ereport(ERROR,
+					(errcode(ERRCODE_DATATYPE_MISMATCH),
+					 errmsg("table has extra column \"%s\"",
+							NameStr(table_attr->attname))));
+	}
+
+	/* If the table was already typed, drop the existing dependency. */
+	if (rel->rd_rel->reloftype)
+		drop_parent_dependency(relid, TypeRelationId, rel->rd_rel->reloftype,
+							   DEPENDENCY_NORMAL);
+
+	/* Record a dependency on the new type. */
+	tableobj.classId = RelationRelationId;
+	tableobj.objectId = relid;
+	tableobj.objectSubId = 0;
+	typeobj.classId = TypeRelationId;
+	typeobj.objectId = typeid;
+	typeobj.objectSubId = 0;
+	recordDependencyOn(&tableobj, &typeobj, DEPENDENCY_NORMAL);
+
+	/* Update pg_class.reloftype */
+	relationRelation = table_open(RelationRelationId, RowExclusiveLock);
+	classtuple = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(relid));
+	if (!HeapTupleIsValid(classtuple))
+		elog(ERROR, "cache lookup failed for relation %u", relid);
+	((Form_pg_class) GETSTRUCT(classtuple))->reloftype = typeid;
+	CatalogTupleUpdate(relationRelation, &classtuple->t_self, classtuple);
+
+	InvokeObjectPostAlterHook(RelationRelationId, relid, 0);
+
+	heap_freetuple(classtuple);
+	table_close(relationRelation, RowExclusiveLock);
+
+	ReleaseSysCache(typetuple);
+
+	return typeobj;
+}
+
+/*
+ * ALTER TABLE NOT OF
+ *
+ * Detach a typed table from its originating type.  Just clear reloftype and
+ * remove the dependency.
+ */
+static void
+ATExecDropOf(Relation rel, LOCKMODE lockmode)
+{
+	Oid			relid = RelationGetRelid(rel);
+	Relation	relationRelation;
+	HeapTuple	tuple;
+
+	if (!OidIsValid(rel->rd_rel->reloftype))
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("\"%s\" is not a typed table",
+						RelationGetRelationName(rel))));
+
+	/*
+	 * We don't bother to check ownership of the type --- ownership of the
+	 * table is presumed enough rights.  No lock required on the type, either.
+	 */
+
+	drop_parent_dependency(relid, TypeRelationId, rel->rd_rel->reloftype,
+						   DEPENDENCY_NORMAL);
+
+	/* Clear pg_class.reloftype */
+	relationRelation = table_open(RelationRelationId, RowExclusiveLock);
+	tuple = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(relid));
+	if (!HeapTupleIsValid(tuple))
+		elog(ERROR, "cache lookup failed for relation %u", relid);
+	((Form_pg_class) GETSTRUCT(tuple))->reloftype = InvalidOid;
+	CatalogTupleUpdate(relationRelation, &tuple->t_self, tuple);
+
+	InvokeObjectPostAlterHook(RelationRelationId, relid, 0);
+
+	heap_freetuple(tuple);
+	table_close(relationRelation, RowExclusiveLock);
+}
+
+/*
+ * relation_mark_replica_identity: Update a table's replica identity
+ *
+ * Iff ri_type = REPLICA_IDENTITY_INDEX, indexOid must be the Oid of a suitable
+ * index. Otherwise, it should be InvalidOid.
+ */
+static void
+relation_mark_replica_identity(Relation rel, char ri_type, Oid indexOid,
+							   bool is_internal)
+{
+	Relation	pg_index;
+	Relation	pg_class;
+	HeapTuple	pg_class_tuple;
+	HeapTuple	pg_index_tuple;
+	Form_pg_class pg_class_form;
+	Form_pg_index pg_index_form;
+
+	ListCell   *index;
+
+	/*
+	 * Check whether relreplident has changed, and update it if so.
+	 */
+	pg_class = table_open(RelationRelationId, RowExclusiveLock);
+	pg_class_tuple = SearchSysCacheCopy1(RELOID,
+										 ObjectIdGetDatum(RelationGetRelid(rel)));
+	if (!HeapTupleIsValid(pg_class_tuple))
+		elog(ERROR, "cache lookup failed for relation \"%s\"",
+			 RelationGetRelationName(rel));
+	pg_class_form = (Form_pg_class) GETSTRUCT(pg_class_tuple);
+	if (pg_class_form->relreplident != ri_type)
+	{
+		pg_class_form->relreplident = ri_type;
+		CatalogTupleUpdate(pg_class, &pg_class_tuple->t_self, pg_class_tuple);
+	}
+	table_close(pg_class, RowExclusiveLock);
+	heap_freetuple(pg_class_tuple);
+
+	/*
+	 * Check whether the correct index is marked indisreplident; if so, we're
+	 * done.
+	 */
+	if (OidIsValid(indexOid))
+	{
+		Assert(ri_type == REPLICA_IDENTITY_INDEX);
+
+		pg_index_tuple = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(indexOid));
+		if (!HeapTupleIsValid(pg_index_tuple))
+			elog(ERROR, "cache lookup failed for index %u", indexOid);
+		pg_index_form = (Form_pg_index) GETSTRUCT(pg_index_tuple);
+
+		if (pg_index_form->indisreplident)
+		{
+			ReleaseSysCache(pg_index_tuple);
+			return;
+		}
+		ReleaseSysCache(pg_index_tuple);
+	}
+
+	/*
+	 * Clear the indisreplident flag from any index that had it previously,
+	 * and set it for any index that should have it now.
+	 */
+	pg_index = table_open(IndexRelationId, RowExclusiveLock);
+	foreach(index, RelationGetIndexList(rel))
+	{
+		Oid			thisIndexOid = lfirst_oid(index);
+		bool		dirty = false;
+
+		pg_index_tuple = SearchSysCacheCopy1(INDEXRELID,
+											 ObjectIdGetDatum(thisIndexOid));
+		if (!HeapTupleIsValid(pg_index_tuple))
+			elog(ERROR, "cache lookup failed for index %u", thisIndexOid);
+		pg_index_form = (Form_pg_index) GETSTRUCT(pg_index_tuple);
+
+		/*
+		 * Unset the bit if set.  We know it's wrong because we checked this
+		 * earlier.
+		 */
+		if (pg_index_form->indisreplident)
+		{
+			dirty = true;
+			pg_index_form->indisreplident = false;
+		}
+		else if (thisIndexOid == indexOid)
+		{
+			dirty = true;
+			pg_index_form->indisreplident = true;
+		}
+
+		if (dirty)
+		{
+			CatalogTupleUpdate(pg_index, &pg_index_tuple->t_self, pg_index_tuple);
+			InvokeObjectPostAlterHookArg(IndexRelationId, thisIndexOid, 0,
+										 InvalidOid, is_internal);
+		}
+		heap_freetuple(pg_index_tuple);
+	}
+
+	table_close(pg_index, RowExclusiveLock);
+}
+
+/*
+ * ALTER TABLE <name> REPLICA IDENTITY ...
+ */
+static void
+ATExecReplicaIdentity(Relation rel, ReplicaIdentityStmt *stmt, LOCKMODE lockmode)
+{
+	Oid			indexOid;
+	Relation	indexRel;
+	int			key;
+
+	if (stmt->identity_type == REPLICA_IDENTITY_DEFAULT)
+	{
+		relation_mark_replica_identity(rel, stmt->identity_type, InvalidOid, true);
+		return;
+	}
+	else if (stmt->identity_type == REPLICA_IDENTITY_FULL)
+	{
+		relation_mark_replica_identity(rel, stmt->identity_type, InvalidOid, true);
+		return;
+	}
+	else if (stmt->identity_type == REPLICA_IDENTITY_NOTHING)
+	{
+		relation_mark_replica_identity(rel, stmt->identity_type, InvalidOid, true);
+		return;
+	}
+	else if (stmt->identity_type == REPLICA_IDENTITY_INDEX)
+	{
+		 /* fallthrough */ ;
+	}
+	else
+		elog(ERROR, "unexpected identity type %u", stmt->identity_type);
+
+
+	/* Check that the index exists */
+	indexOid = get_relname_relid(stmt->name, rel->rd_rel->relnamespace);
+	if (!OidIsValid(indexOid))
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_OBJECT),
+				 errmsg("index \"%s\" for table \"%s\" does not exist",
+						stmt->name, RelationGetRelationName(rel))));
+
+	indexRel = index_open(indexOid, ShareLock);
+
+	/* Check that the index is on the relation we're altering. */
+	if (indexRel->rd_index == NULL ||
+		indexRel->rd_index->indrelid != RelationGetRelid(rel))
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("\"%s\" is not an index for table \"%s\"",
+						RelationGetRelationName(indexRel),
+						RelationGetRelationName(rel))));
+	/* The AM must support uniqueness, and the index must in fact be unique. */
+	if (!indexRel->rd_indam->amcanunique ||
+		!indexRel->rd_index->indisunique)
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("cannot use non-unique index \"%s\" as replica identity",
+						RelationGetRelationName(indexRel))));
+	/* Deferred indexes are not guaranteed to be always unique. */
+	if (!indexRel->rd_index->indimmediate)
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("cannot use non-immediate index \"%s\" as replica identity",
+						RelationGetRelationName(indexRel))));
+	/* Expression indexes aren't supported. */
+	if (RelationGetIndexExpressions(indexRel) != NIL)
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("cannot use expression index \"%s\" as replica identity",
+						RelationGetRelationName(indexRel))));
+	/* Predicate indexes aren't supported. */
+	if (RelationGetIndexPredicate(indexRel) != NIL)
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("cannot use partial index \"%s\" as replica identity",
+						RelationGetRelationName(indexRel))));
+	/* And neither are invalid indexes. */
+	if (!indexRel->rd_index->indisvalid)
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("cannot use invalid index \"%s\" as replica identity",
+						RelationGetRelationName(indexRel))));
+
+	/* Check index for nullable columns. */
+	for (key = 0; key < IndexRelationGetNumberOfKeyAttributes(indexRel); key++)
+	{
+		int16		attno = indexRel->rd_index->indkey.values[key];
+		Form_pg_attribute attr;
+
+		/*
+		 * Reject any other system columns.  (Going forward, we'll disallow
+		 * indexes containing such columns in the first place, but they might
+		 * exist in older branches.)
+		 */
+		if (attno <= 0)
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
+					 errmsg("index \"%s\" cannot be used as replica identity because column %d is a system column",
+							RelationGetRelationName(indexRel), attno)));
+
+		attr = TupleDescAttr(rel->rd_att, attno - 1);
+		if (!attr->attnotnull)
+			ereport(ERROR,
+					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+					 errmsg("index \"%s\" cannot be used as replica identity because column \"%s\" is nullable",
+							RelationGetRelationName(indexRel),
+							NameStr(attr->attname))));
+	}
+
+	/* This index is suitable for use as a replica identity. Mark it. */
+	relation_mark_replica_identity(rel, stmt->identity_type, indexOid, true);
+
+	index_close(indexRel, NoLock);
+}
+
+/*
+ * ALTER TABLE ENABLE/DISABLE ROW LEVEL SECURITY
+ */
+static void
+ATExecSetRowSecurity(Relation rel, bool rls)
+{
+	Relation	pg_class;
+	Oid			relid;
+	HeapTuple	tuple;
+
+	relid = RelationGetRelid(rel);
+
+	/* Pull the record for this relation and update it */
+	pg_class = table_open(RelationRelationId, RowExclusiveLock);
+
+	tuple = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(relid));
+
+	if (!HeapTupleIsValid(tuple))
+		elog(ERROR, "cache lookup failed for relation %u", relid);
+
+	((Form_pg_class) GETSTRUCT(tuple))->relrowsecurity = rls;
+	CatalogTupleUpdate(pg_class, &tuple->t_self, tuple);
+
+	table_close(pg_class, RowExclusiveLock);
+	heap_freetuple(tuple);
+}
+
+/*
+ * ALTER TABLE FORCE/NO FORCE ROW LEVEL SECURITY
+ */
+static void
+ATExecForceNoForceRowSecurity(Relation rel, bool force_rls)
+{
+	Relation	pg_class;
+	Oid			relid;
+	HeapTuple	tuple;
+
+	relid = RelationGetRelid(rel);
+
+	pg_class = table_open(RelationRelationId, RowExclusiveLock);
+
+	tuple = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(relid));
+
+	if (!HeapTupleIsValid(tuple))
+		elog(ERROR, "cache lookup failed for relation %u", relid);
+
+	((Form_pg_class) GETSTRUCT(tuple))->relforcerowsecurity = force_rls;
+	CatalogTupleUpdate(pg_class, &tuple->t_self, tuple);
+
+	table_close(pg_class, RowExclusiveLock);
+	heap_freetuple(tuple);
+}
+
+/*
+ * ALTER FOREIGN TABLE <name> OPTIONS (...)
+ */
+static void
+ATExecGenericOptions(Relation rel, List *options)
+{
+	Relation	ftrel;
+	ForeignServer *server;
+	ForeignDataWrapper *fdw;
+	HeapTuple	tuple;
+	bool		isnull;
+	Datum		repl_val[Natts_pg_foreign_table];
+	bool		repl_null[Natts_pg_foreign_table];
+	bool		repl_repl[Natts_pg_foreign_table];
+	Datum		datum;
+	Form_pg_foreign_table tableform;
+
+	if (options == NIL)
+		return;
+
+	ftrel = table_open(ForeignTableRelationId, RowExclusiveLock);
+
+	tuple = SearchSysCacheCopy1(FOREIGNTABLEREL, rel->rd_id);
+	if (!HeapTupleIsValid(tuple))
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_OBJECT),
+				 errmsg("foreign table \"%s\" does not exist",
+						RelationGetRelationName(rel))));
+	tableform = (Form_pg_foreign_table) GETSTRUCT(tuple);
+	server = GetForeignServer(tableform->ftserver);
+	fdw = GetForeignDataWrapper(server->fdwid);
+
+	memset(repl_val, 0, sizeof(repl_val));
+	memset(repl_null, false, sizeof(repl_null));
+	memset(repl_repl, false, sizeof(repl_repl));
+
+	/* Extract the current options */
+	datum = SysCacheGetAttr(FOREIGNTABLEREL,
+							tuple,
+							Anum_pg_foreign_table_ftoptions,
+							&isnull);
+	if (isnull)
+		datum = PointerGetDatum(NULL);
+
+	/* Transform the options */
+	datum = transformGenericOptions(ForeignTableRelationId,
+									datum,
+									options,
+									fdw->fdwvalidator);
+
+	if (PointerIsValid(DatumGetPointer(datum)))
+		repl_val[Anum_pg_foreign_table_ftoptions - 1] = datum;
+	else
+		repl_null[Anum_pg_foreign_table_ftoptions - 1] = true;
+
+	repl_repl[Anum_pg_foreign_table_ftoptions - 1] = true;
+
+	/* Everything looks good - update the tuple */
+
+	tuple = heap_modify_tuple(tuple, RelationGetDescr(ftrel),
+							  repl_val, repl_null, repl_repl);
+
+	CatalogTupleUpdate(ftrel, &tuple->t_self, tuple);
+
+	/*
+	 * Invalidate relcache so that all sessions will refresh any cached plans
+	 * that might depend on the old options.
+	 */
+	CacheInvalidateRelcache(rel);
+
+	InvokeObjectPostAlterHook(ForeignTableRelationId,
+							  RelationGetRelid(rel), 0);
+
+	table_close(ftrel, RowExclusiveLock);
+
+	heap_freetuple(tuple);
+}
+
+/*
+ * ALTER TABLE ALTER COLUMN SET COMPRESSION
+ *
+ * Return value is the address of the modified column
+ */
+static ObjectAddress
+ATExecSetCompression(AlteredTableInfo *tab,
+					 Relation rel,
+					 const char *column,
+					 Node *newValue,
+					 LOCKMODE lockmode)
+{
+	Relation	attrel;
+	HeapTuple	tuple;
+	Form_pg_attribute atttableform;
+	AttrNumber	attnum;
+	char	   *compression;
+	char		typstorage;
+	char		cmethod;
+	ObjectAddress address;
+
+	Assert(IsA(newValue, String));
+	compression = strVal(newValue);
+
+	attrel = table_open(AttributeRelationId, RowExclusiveLock);
+
+	/* copy the cache entry so we can scribble on it below */
+	tuple = SearchSysCacheCopyAttName(RelationGetRelid(rel), column);
+	if (!HeapTupleIsValid(tuple))
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_COLUMN),
+				 errmsg("column \"%s\" of relation \"%s\" does not exist",
+						column, RelationGetRelationName(rel))));
+
+	/* prevent them from altering a system attribute */
+	atttableform = (Form_pg_attribute) GETSTRUCT(tuple);
+	attnum = atttableform->attnum;
+	if (attnum <= 0)
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("cannot alter system column \"%s\"", column)));
+
+	typstorage = get_typstorage(atttableform->atttypid);
+
+	/* prevent from setting compression methods for uncompressible type */
+	if (!IsStorageCompressible(typstorage))
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+					errmsg("column data type %s does not support compression",
+						format_type_be(atttableform->atttypid))));
+
+	/* get the attribute compression method. */
+	cmethod = GetAttributeCompression(atttableform, compression);
+
+	/* update pg_attribute entry */
+	atttableform->attcompression = cmethod;
+	CatalogTupleUpdate(attrel, &tuple->t_self, tuple);
+
+	InvokeObjectPostAlterHook(RelationRelationId,
+							  RelationGetRelid(rel),
+							  attnum);
+
+	/*
+	 * Apply the change to indexes as well (only for simple index columns,
+	 * matching behavior of index.c ConstructTupleDescriptor()).
+	 */
+	SetIndexStorageProperties(rel, attrel, attnum, cmethod, '\0', lockmode);
+
+	heap_freetuple(tuple);
+
+	table_close(attrel, RowExclusiveLock);
+
+	/* make changes visible */
+	CommandCounterIncrement();
+
+	ObjectAddressSubSet(address, RelationRelationId,
+						RelationGetRelid(rel), attnum);
+	return address;
+}
+
+
+/*
+ * Preparation phase for SET LOGGED/UNLOGGED
+ *
+ * This verifies that we're not trying to change a temp table.  Also,
+ * existing foreign key constraints are checked to avoid ending up with
+ * permanent tables referencing unlogged tables.
+ *
+ * Return value is false if the operation is a no-op (in which case the
+ * checks are skipped), otherwise true.
+ */
+static bool
+ATPrepChangePersistence(Relation rel, bool toLogged)
+{
+	Relation	pg_constraint;
+	HeapTuple	tuple;
+	SysScanDesc scan;
+	ScanKeyData skey[1];
+
+	/*
+	 * Disallow changing status for a temp table.  Also verify whether we can
+	 * get away with doing nothing; in such cases we don't need to run the
+	 * checks below, either.
+	 */
+	switch (rel->rd_rel->relpersistence)
+	{
+		case RELPERSISTENCE_TEMP:
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+					 errmsg("cannot change logged status of table \"%s\" because it is temporary",
+							RelationGetRelationName(rel)),
+					 errtable(rel)));
+			break;
+		case RELPERSISTENCE_PERMANENT:
+			if (toLogged)
+				/* nothing to do */
+				return false;
+			break;
+		case RELPERSISTENCE_UNLOGGED:
+			if (!toLogged)
+				/* nothing to do */
+				return false;
+			break;
+	}
+
+	/*
+	 * Check that the table is not part any publication when changing to
+	 * UNLOGGED as UNLOGGED tables can't be published.
+	 */
+	if (!toLogged &&
+		list_length(GetRelationPublications(RelationGetRelid(rel))) > 0)
+		ereport(ERROR,
+				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+				 errmsg("cannot change table \"%s\" to unlogged because it is part of a publication",
+						RelationGetRelationName(rel)),
+				 errdetail("Unlogged relations cannot be replicated.")));
+
+	/*
+	 * Check existing foreign key constraints to preserve the invariant that
+	 * permanent tables cannot reference unlogged ones.  Self-referencing
+	 * foreign keys can safely be ignored.
+	 */
+	pg_constraint = table_open(ConstraintRelationId, AccessShareLock);
+
+	/*
+	 * Scan conrelid if changing to permanent, else confrelid.  This also
+	 * determines whether a useful index exists.
+	 */
+	ScanKeyInit(&skey[0],
+				toLogged ? Anum_pg_constraint_conrelid :
+				Anum_pg_constraint_confrelid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(RelationGetRelid(rel)));
+	scan = systable_beginscan(pg_constraint,
+							  toLogged ? ConstraintRelidTypidNameIndexId : InvalidOid,
+							  true, NULL, 1, skey);
+
+	while (HeapTupleIsValid(tuple = systable_getnext(scan)))
+	{
+		Form_pg_constraint con = (Form_pg_constraint) GETSTRUCT(tuple);
+
+		if (con->contype == CONSTRAINT_FOREIGN)
+		{
+			Oid			foreignrelid;
+			Relation	foreignrel;
+
+			/* the opposite end of what we used as scankey */
+			foreignrelid = toLogged ? con->confrelid : con->conrelid;
+
+			/* ignore if self-referencing */
+			if (RelationGetRelid(rel) == foreignrelid)
+				continue;
+
+			foreignrel = relation_open(foreignrelid, AccessShareLock);
+
+			if (toLogged)
+			{
+				if (!RelationIsPermanent(foreignrel))
+					ereport(ERROR,
+							(errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+							 errmsg("could not change table \"%s\" to logged because it references unlogged table \"%s\"",
+									RelationGetRelationName(rel),
+									RelationGetRelationName(foreignrel)),
+							 errtableconstraint(rel, NameStr(con->conname))));
+			}
+			else
+			{
+				if (RelationIsPermanent(foreignrel))
+					ereport(ERROR,
+							(errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+							 errmsg("could not change table \"%s\" to unlogged because it references logged table \"%s\"",
+									RelationGetRelationName(rel),
+									RelationGetRelationName(foreignrel)),
+							 errtableconstraint(rel, NameStr(con->conname))));
+			}
+
+			relation_close(foreignrel, AccessShareLock);
+		}
+	}
+
+	systable_endscan(scan);
+
+	table_close(pg_constraint, AccessShareLock);
+
+	return true;
+}
+
+/*
+ * Execute ALTER TABLE SET SCHEMA
+ */
+ObjectAddress
+AlterTableNamespace(AlterObjectSchemaStmt *stmt, Oid *oldschema)
+{
+	Relation	rel;
+	Oid			relid;
+	Oid			oldNspOid;
+	Oid			nspOid;
+	RangeVar   *newrv;
+	ObjectAddresses *objsMoved;
+	ObjectAddress myself;
+
+	relid = RangeVarGetRelidExtended(stmt->relation, AccessExclusiveLock,
+									 stmt->missing_ok ? RVR_MISSING_OK : 0,
+									 RangeVarCallbackForAlterRelation,
+									 (void *) stmt);
+
+	if (!OidIsValid(relid))
+	{
+		ereport(NOTICE,
+				(errmsg("relation \"%s\" does not exist, skipping",
+						stmt->relation->relname)));
+		return InvalidObjectAddress;
+	}
+
+	rel = relation_open(relid, NoLock);
+
+	oldNspOid = RelationGetNamespace(rel);
+
+	/* If it's an owned sequence, disallow moving it by itself. */
+	if (rel->rd_rel->relkind == RELKIND_SEQUENCE)
+	{
+		Oid			tableId;
+		int32		colId;
+
+		if (sequenceIsOwned(relid, DEPENDENCY_AUTO, &tableId, &colId) ||
+			sequenceIsOwned(relid, DEPENDENCY_INTERNAL, &tableId, &colId))
+			ereport(ERROR,
+					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+					 errmsg("cannot move an owned sequence into another schema"),
+					 errdetail("Sequence \"%s\" is linked to table \"%s\".",
+							   RelationGetRelationName(rel),
+							   get_rel_name(tableId))));
+	}
+
+	/* Get and lock schema OID and check its permissions. */
+	newrv = makeRangeVar(stmt->newschema, RelationGetRelationName(rel), -1);
+	nspOid = RangeVarGetAndCheckCreationNamespace(newrv, NoLock, NULL);
+
+	/* common checks on switching namespaces */
+	CheckSetNamespace(oldNspOid, nspOid);
+
+	objsMoved = new_object_addresses();
+	AlterTableNamespaceInternal(rel, oldNspOid, nspOid, objsMoved);
+	free_object_addresses(objsMoved);
+
+	ObjectAddressSet(myself, RelationRelationId, relid);
+
+	if (oldschema)
+		*oldschema = oldNspOid;
+
+	/* close rel, but keep lock until commit */
+	relation_close(rel, NoLock);
+
+	return myself;
+}
+
+/*
+ * The guts of relocating a table or materialized view to another namespace:
+ * besides moving the relation itself, its dependent objects are relocated to
+ * the new schema.
+ */
+void
+AlterTableNamespaceInternal(Relation rel, Oid oldNspOid, Oid nspOid,
+							ObjectAddresses *objsMoved)
+{
+	Relation	classRel;
+
+	Assert(objsMoved != NULL);
+
+	/* OK, modify the pg_class row and pg_depend entry */
+	classRel = table_open(RelationRelationId, RowExclusiveLock);
+
+	AlterRelationNamespaceInternal(classRel, RelationGetRelid(rel), oldNspOid,
+								   nspOid, true, objsMoved);
+
+	/* Fix the table's row type too, if it has one */
+	if (OidIsValid(rel->rd_rel->reltype))
+		AlterTypeNamespaceInternal(rel->rd_rel->reltype,
+								   nspOid, false, false, objsMoved);
+
+	/* Fix other dependent stuff */
+	if (rel->rd_rel->relkind == RELKIND_RELATION ||
+		rel->rd_rel->relkind == RELKIND_MATVIEW ||
+		rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+	{
+		AlterIndexNamespaces(classRel, rel, oldNspOid, nspOid, objsMoved);
+		AlterSeqNamespaces(classRel, rel, oldNspOid, nspOid,
+						   objsMoved, AccessExclusiveLock);
+		AlterConstraintNamespaces(RelationGetRelid(rel), oldNspOid, nspOid,
+								  false, objsMoved);
+	}
+
+	table_close(classRel, RowExclusiveLock);
+}
+
+/*
+ * The guts of relocating a relation to another namespace: fix the pg_class
+ * entry, and the pg_depend entry if any.  Caller must already have
+ * opened and write-locked pg_class.
+ */
+void
+AlterRelationNamespaceInternal(Relation classRel, Oid relOid,
+							   Oid oldNspOid, Oid newNspOid,
+							   bool hasDependEntry,
+							   ObjectAddresses *objsMoved)
+{
+	HeapTuple	classTup;
+	Form_pg_class classForm;
+	ObjectAddress thisobj;
+	bool		already_done = false;
+
+	classTup = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(relOid));
+	if (!HeapTupleIsValid(classTup))
+		elog(ERROR, "cache lookup failed for relation %u", relOid);
+	classForm = (Form_pg_class) GETSTRUCT(classTup);
+
+	Assert(classForm->relnamespace == oldNspOid);
+
+	thisobj.classId = RelationRelationId;
+	thisobj.objectId = relOid;
+	thisobj.objectSubId = 0;
+
+	/*
+	 * If the object has already been moved, don't move it again.  If it's
+	 * already in the right place, don't move it, but still fire the object
+	 * access hook.
+	 */
+	already_done = object_address_present(&thisobj, objsMoved);
+	if (!already_done && oldNspOid != newNspOid)
+	{
+		/* check for duplicate name (more friendly than unique-index failure) */
+		if (get_relname_relid(NameStr(classForm->relname),
+							  newNspOid) != InvalidOid)
+			ereport(ERROR,
+					(errcode(ERRCODE_DUPLICATE_TABLE),
+					 errmsg("relation \"%s\" already exists in schema \"%s\"",
+							NameStr(classForm->relname),
+							get_namespace_name(newNspOid))));
+
+		/* classTup is a copy, so OK to scribble on */
+		classForm->relnamespace = newNspOid;
+
+		CatalogTupleUpdate(classRel, &classTup->t_self, classTup);
+
+		/* Update dependency on schema if caller said so */
+		if (hasDependEntry &&
+			changeDependencyFor(RelationRelationId,
+								relOid,
+								NamespaceRelationId,
+								oldNspOid,
+								newNspOid) != 1)
+			elog(ERROR, "failed to change schema dependency for relation \"%s\"",
+				 NameStr(classForm->relname));
+	}
+	if (!already_done)
+	{
+		add_exact_object_address(&thisobj, objsMoved);
+
+		InvokeObjectPostAlterHook(RelationRelationId, relOid, 0);
+	}
+
+	heap_freetuple(classTup);
+}
+
+/*
+ * Move all indexes for the specified relation to another namespace.
+ *
+ * Note: we assume adequate permission checking was done by the caller,
+ * and that the caller has a suitable lock on the owning relation.
+ */
+static void
+AlterIndexNamespaces(Relation classRel, Relation rel,
+					 Oid oldNspOid, Oid newNspOid, ObjectAddresses *objsMoved)
+{
+	List	   *indexList;
+	ListCell   *l;
+
+	indexList = RelationGetIndexList(rel);
+
+	foreach(l, indexList)
+	{
+		Oid			indexOid = lfirst_oid(l);
+		ObjectAddress thisobj;
+
+		thisobj.classId = RelationRelationId;
+		thisobj.objectId = indexOid;
+		thisobj.objectSubId = 0;
+
+		/*
+		 * Note: currently, the index will not have its own dependency on the
+		 * namespace, so we don't need to do changeDependencyFor(). There's no
+		 * row type in pg_type, either.
+		 *
+		 * XXX this objsMoved test may be pointless -- surely we have a single
+		 * dependency link from a relation to each index?
+		 */
+		if (!object_address_present(&thisobj, objsMoved))
+		{
+			AlterRelationNamespaceInternal(classRel, indexOid,
+										   oldNspOid, newNspOid,
+										   false, objsMoved);
+			add_exact_object_address(&thisobj, objsMoved);
+		}
+	}
+
+	list_free(indexList);
+}
+
+/*
+ * Move all identity and SERIAL-column sequences of the specified relation to another
+ * namespace.
+ *
+ * Note: we assume adequate permission checking was done by the caller,
+ * and that the caller has a suitable lock on the owning relation.
+ */
+static void
+AlterSeqNamespaces(Relation classRel, Relation rel,
+				   Oid oldNspOid, Oid newNspOid, ObjectAddresses *objsMoved,
+				   LOCKMODE lockmode)
+{
+	Relation	depRel;
+	SysScanDesc scan;
+	ScanKeyData key[2];
+	HeapTuple	tup;
+
+	/*
+	 * SERIAL sequences are those having an auto dependency on one of the
+	 * table's columns (we don't care *which* column, exactly).
+	 */
+	depRel = table_open(DependRelationId, AccessShareLock);
+
+	ScanKeyInit(&key[0],
+				Anum_pg_depend_refclassid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(RelationRelationId));
+	ScanKeyInit(&key[1],
+				Anum_pg_depend_refobjid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(RelationGetRelid(rel)));
+	/* we leave refobjsubid unspecified */
+
+	scan = systable_beginscan(depRel, DependReferenceIndexId, true,
+							  NULL, 2, key);
+
+	while (HeapTupleIsValid(tup = systable_getnext(scan)))
+	{
+		Form_pg_depend depForm = (Form_pg_depend) GETSTRUCT(tup);
+		Relation	seqRel;
+
+		/* skip dependencies other than auto dependencies on columns */
+		if (depForm->refobjsubid == 0 ||
+			depForm->classid != RelationRelationId ||
+			depForm->objsubid != 0 ||
+			!(depForm->deptype == DEPENDENCY_AUTO || depForm->deptype == DEPENDENCY_INTERNAL))
+			continue;
+
+		/* Use relation_open just in case it's an index */
+		seqRel = relation_open(depForm->objid, lockmode);
+
+		/* skip non-sequence relations */
+		if (RelationGetForm(seqRel)->relkind != RELKIND_SEQUENCE)
+		{
+			/* No need to keep the lock */
+			relation_close(seqRel, lockmode);
+			continue;
+		}
+
+		/* Fix the pg_class and pg_depend entries */
+		AlterRelationNamespaceInternal(classRel, depForm->objid,
+									   oldNspOid, newNspOid,
+									   true, objsMoved);
+
+		/*
+		 * Sequences used to have entries in pg_type, but no longer do.  If we
+		 * ever re-instate that, we'll need to move the pg_type entry to the
+		 * new namespace, too (using AlterTypeNamespaceInternal).
+		 */
+		Assert(RelationGetForm(seqRel)->reltype == InvalidOid);
+
+		/* Now we can close it.  Keep the lock till end of transaction. */
+		relation_close(seqRel, NoLock);
+	}
+
+	systable_endscan(scan);
+
+	relation_close(depRel, AccessShareLock);
+}
+
+
+/*
+ * This code supports
+ *	CREATE TEMP TABLE ... ON COMMIT { DROP | PRESERVE ROWS | DELETE ROWS }
+ *
+ * Because we only support this for TEMP tables, it's sufficient to remember
+ * the state in a backend-local data structure.
+ */
+
+/*
+ * Register a newly-created relation's ON COMMIT action.
+ */
+void
+register_on_commit_action(Oid relid, OnCommitAction action)
+{
+	OnCommitItem *oc;
+	MemoryContext oldcxt;
+
+	/*
+	 * We needn't bother registering the relation unless there is an ON COMMIT
+	 * action we need to take.
+	 */
+	if (action == ONCOMMIT_NOOP || action == ONCOMMIT_PRESERVE_ROWS)
+		return;
+
+	oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
+
+	oc = (OnCommitItem *) palloc(sizeof(OnCommitItem));
+	oc->relid = relid;
+	oc->oncommit = action;
+	oc->creating_subid = GetCurrentSubTransactionId();
+	oc->deleting_subid = InvalidSubTransactionId;
+
+	/*
+	 * We use lcons() here so that ON COMMIT actions are processed in reverse
+	 * order of registration.  That might not be essential but it seems
+	 * reasonable.
+	 */
+	on_commits = lcons(oc, on_commits);
+
+	MemoryContextSwitchTo(oldcxt);
+}
+
+/*
+ * Unregister any ON COMMIT action when a relation is deleted.
+ *
+ * Actually, we only mark the OnCommitItem entry as to be deleted after commit.
+ */
+void
+remove_on_commit_action(Oid relid)
+{
+	ListCell   *l;
+
+	foreach(l, on_commits)
+	{
+		OnCommitItem *oc = (OnCommitItem *) lfirst(l);
+
+		if (oc->relid == relid)
+		{
+			oc->deleting_subid = GetCurrentSubTransactionId();
+			break;
+		}
+	}
+}
+
+/*
+ * Perform ON COMMIT actions.
+ *
+ * This is invoked just before actually committing, since it's possible
+ * to encounter errors.
+ */
+void
+PreCommit_on_commit_actions(void)
+{
+	ListCell   *l;
+	List	   *oids_to_truncate = NIL;
+	List	   *oids_to_drop = NIL;
+
+	foreach(l, on_commits)
+	{
+		OnCommitItem *oc = (OnCommitItem *) lfirst(l);
+
+		/* Ignore entry if already dropped in this xact */
+		if (oc->deleting_subid != InvalidSubTransactionId)
+			continue;
+
+		switch (oc->oncommit)
+		{
+			case ONCOMMIT_NOOP:
+			case ONCOMMIT_PRESERVE_ROWS:
+				/* Do nothing (there shouldn't be such entries, actually) */
+				break;
+			case ONCOMMIT_DELETE_ROWS:
+
+				/*
+				 * If this transaction hasn't accessed any temporary
+				 * relations, we can skip truncating ON COMMIT DELETE ROWS
+				 * tables, as they must still be empty.
+				 */
+				if ((MyXactFlags & XACT_FLAGS_ACCESSEDTEMPNAMESPACE))
+					oids_to_truncate = lappend_oid(oids_to_truncate, oc->relid);
+				break;
+			case ONCOMMIT_DROP:
+				oids_to_drop = lappend_oid(oids_to_drop, oc->relid);
+				break;
+		}
+	}
+
+	/*
+	 * Truncate relations before dropping so that all dependencies between
+	 * relations are removed after they are worked on.  Doing it like this
+	 * might be a waste as it is possible that a relation being truncated will
+	 * be dropped anyway due to its parent being dropped, but this makes the
+	 * code more robust because of not having to re-check that the relation
+	 * exists at truncation time.
+	 */
+	if (oids_to_truncate != NIL)
+		heap_truncate(oids_to_truncate);
+
+	if (oids_to_drop != NIL)
+	{
+		ObjectAddresses *targetObjects = new_object_addresses();
+		ListCell   *l;
+
+		foreach(l, oids_to_drop)
+		{
+			ObjectAddress object;
+
+			object.classId = RelationRelationId;
+			object.objectId = lfirst_oid(l);
+			object.objectSubId = 0;
+
+			Assert(!object_address_present(&object, targetObjects));
+
+			add_exact_object_address(&object, targetObjects);
+		}
+
+		/*
+		 * Since this is an automatic drop, rather than one directly initiated
+		 * by the user, we pass the PERFORM_DELETION_INTERNAL flag.
+		 */
+		performMultipleDeletions(targetObjects, DROP_CASCADE,
+								 PERFORM_DELETION_INTERNAL | PERFORM_DELETION_QUIETLY);
+
+#ifdef USE_ASSERT_CHECKING
+
+		/*
+		 * Note that table deletion will call remove_on_commit_action, so the
+		 * entry should get marked as deleted.
+		 */
+		foreach(l, on_commits)
+		{
+			OnCommitItem *oc = (OnCommitItem *) lfirst(l);
+
+			if (oc->oncommit != ONCOMMIT_DROP)
+				continue;
+
+			Assert(oc->deleting_subid != InvalidSubTransactionId);
+		}
+#endif
+	}
+}
+
+/*
+ * Post-commit or post-abort cleanup for ON COMMIT management.
+ *
+ * All we do here is remove no-longer-needed OnCommitItem entries.
+ *
+ * During commit, remove entries that were deleted during this transaction;
+ * during abort, remove those created during this transaction.
+ */
+void
+AtEOXact_on_commit_actions(bool isCommit)
+{
+	ListCell   *cur_item;
+
+	foreach(cur_item, on_commits)
+	{
+		OnCommitItem *oc = (OnCommitItem *) lfirst(cur_item);
+
+		if (isCommit ? oc->deleting_subid != InvalidSubTransactionId :
+			oc->creating_subid != InvalidSubTransactionId)
+		{
+			/* cur_item must be removed */
+			on_commits = foreach_delete_current(on_commits, cur_item);
+			pfree(oc);
+		}
+		else
+		{
+			/* cur_item must be preserved */
+			oc->creating_subid = InvalidSubTransactionId;
+			oc->deleting_subid = InvalidSubTransactionId;
+		}
+	}
+}
+
+/*
+ * Post-subcommit or post-subabort cleanup for ON COMMIT management.
+ *
+ * During subabort, we can immediately remove entries created during this
+ * subtransaction.  During subcommit, just relabel entries marked during
+ * this subtransaction as being the parent's responsibility.
+ */
+void
+AtEOSubXact_on_commit_actions(bool isCommit, SubTransactionId mySubid,
+							  SubTransactionId parentSubid)
+{
+	ListCell   *cur_item;
+
+	foreach(cur_item, on_commits)
+	{
+		OnCommitItem *oc = (OnCommitItem *) lfirst(cur_item);
+
+		if (!isCommit && oc->creating_subid == mySubid)
+		{
+			/* cur_item must be removed */
+			on_commits = foreach_delete_current(on_commits, cur_item);
+			pfree(oc);
+		}
+		else
+		{
+			/* cur_item must be preserved */
+			if (oc->creating_subid == mySubid)
+				oc->creating_subid = parentSubid;
+			if (oc->deleting_subid == mySubid)
+				oc->deleting_subid = isCommit ? parentSubid : InvalidSubTransactionId;
+		}
+	}
+}
+
+/*
+ * This is intended as a callback for RangeVarGetRelidExtended().  It allows
+ * the relation to be locked only if (1) it's a plain table, materialized
+ * view, or TOAST table and (2) the current user is the owner (or the
+ * superuser).  This meets the permission-checking needs of CLUSTER, REINDEX
+ * TABLE, and REFRESH MATERIALIZED VIEW; we expose it here so that it can be
+ * used by all.
+ */
+void
+RangeVarCallbackOwnsTable(const RangeVar *relation,
+						  Oid relId, Oid oldRelId, void *arg)
+{
+	char		relkind;
+
+	/* Nothing to do if the relation was not found. */
+	if (!OidIsValid(relId))
+		return;
+
+	/*
+	 * If the relation does exist, check whether it's an index.  But note that
+	 * the relation might have been dropped between the time we did the name
+	 * lookup and now.  In that case, there's nothing to do.
+	 */
+	relkind = get_rel_relkind(relId);
+	if (!relkind)
+		return;
+	if (relkind != RELKIND_RELATION && relkind != RELKIND_TOASTVALUE &&
+		relkind != RELKIND_MATVIEW && relkind != RELKIND_PARTITIONED_TABLE)
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("\"%s\" is not a table or materialized view", relation->relname)));
+
+	/* Check permissions */
+	if (!pg_class_ownercheck(relId, GetUserId()))
+		aclcheck_error(ACLCHECK_NOT_OWNER, get_relkind_objtype(get_rel_relkind(relId)), relation->relname);
+}
+
+/*
+ * Callback to RangeVarGetRelidExtended() for TRUNCATE processing.
+ */
+static void
+RangeVarCallbackForTruncate(const RangeVar *relation,
+							Oid relId, Oid oldRelId, void *arg)
+{
+	HeapTuple	tuple;
+
+	/* Nothing to do if the relation was not found. */
+	if (!OidIsValid(relId))
+		return;
+
+	tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relId));
+	if (!HeapTupleIsValid(tuple))	/* should not happen */
+		elog(ERROR, "cache lookup failed for relation %u", relId);
+
+	truncate_check_rel(relId, (Form_pg_class) GETSTRUCT(tuple));
+	truncate_check_perms(relId, (Form_pg_class) GETSTRUCT(tuple));
+
+	ReleaseSysCache(tuple);
+}
+
+/*
+ * Callback to RangeVarGetRelidExtended(), similar to
+ * RangeVarCallbackOwnsTable() but without checks on the type of the relation.
+ */
+void
+RangeVarCallbackOwnsRelation(const RangeVar *relation,
+							 Oid relId, Oid oldRelId, void *arg)
+{
+	HeapTuple	tuple;
+
+	/* Nothing to do if the relation was not found. */
+	if (!OidIsValid(relId))
+		return;
+
+	tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relId));
+	if (!HeapTupleIsValid(tuple))	/* should not happen */
+		elog(ERROR, "cache lookup failed for relation %u", relId);
+
+	if (!pg_class_ownercheck(relId, GetUserId()))
+		aclcheck_error(ACLCHECK_NOT_OWNER, get_relkind_objtype(get_rel_relkind(relId)),
+					   relation->relname);
+
+	if (!allowSystemTableMods &&
+		IsSystemClass(relId, (Form_pg_class) GETSTRUCT(tuple)))
+		ereport(ERROR,
+				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+				 errmsg("permission denied: \"%s\" is a system catalog",
+						relation->relname)));
+
+	ReleaseSysCache(tuple);
+}
+
+/*
+ * Common RangeVarGetRelid callback for rename, set schema, and alter table
+ * processing.
+ */
+static void
+RangeVarCallbackForAlterRelation(const RangeVar *rv, Oid relid, Oid oldrelid,
+								 void *arg)
+{
+	Node	   *stmt = (Node *) arg;
+	ObjectType	reltype;
+	HeapTuple	tuple;
+	Form_pg_class classform;
+	AclResult	aclresult;
+	char		relkind;
+
+	tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relid));
+	if (!HeapTupleIsValid(tuple))
+		return;					/* concurrently dropped */
+	classform = (Form_pg_class) GETSTRUCT(tuple);
+	relkind = classform->relkind;
+
+	/* Must own relation. */
+	if (!pg_class_ownercheck(relid, GetUserId()))
+		aclcheck_error(ACLCHECK_NOT_OWNER, get_relkind_objtype(get_rel_relkind(relid)), rv->relname);
+
+	/* No system table modifications unless explicitly allowed. */
+	if (!allowSystemTableMods && IsSystemClass(relid, classform))
+		ereport(ERROR,
+				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+				 errmsg("permission denied: \"%s\" is a system catalog",
+						rv->relname)));
+
+	/*
+	 * Extract the specified relation type from the statement parse tree.
+	 *
+	 * Also, for ALTER .. RENAME, check permissions: the user must (still)
+	 * have CREATE rights on the containing namespace.
+	 */
+	if (IsA(stmt, RenameStmt))
+	{
+		aclresult = pg_namespace_aclcheck(classform->relnamespace,
+										  GetUserId(), ACL_CREATE);
+		if (aclresult != ACLCHECK_OK)
+			aclcheck_error(aclresult, OBJECT_SCHEMA,
+						   get_namespace_name(classform->relnamespace));
+		reltype = ((RenameStmt *) stmt)->renameType;
+	}
+	else if (IsA(stmt, AlterObjectSchemaStmt))
+		reltype = ((AlterObjectSchemaStmt *) stmt)->objectType;
+
+	else if (IsA(stmt, AlterTableStmt))
+		reltype = ((AlterTableStmt *) stmt)->objtype;
+	else
+	{
+		elog(ERROR, "unrecognized node type: %d", (int) nodeTag(stmt));
+		reltype = OBJECT_TABLE; /* placate compiler */
+	}
+
+	/*
+	 * For compatibility with prior releases, we allow ALTER TABLE to be used
+	 * with most other types of relations (but not composite types). We allow
+	 * similar flexibility for ALTER INDEX in the case of RENAME, but not
+	 * otherwise.  Otherwise, the user must select the correct form of the
+	 * command for the relation at issue.
+	 */
+	if (reltype == OBJECT_SEQUENCE && relkind != RELKIND_SEQUENCE)
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("\"%s\" is not a sequence", rv->relname)));
+
+	if (reltype == OBJECT_VIEW && relkind != RELKIND_VIEW)
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("\"%s\" is not a view", rv->relname)));
+
+	if (reltype == OBJECT_MATVIEW && relkind != RELKIND_MATVIEW)
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("\"%s\" is not a materialized view", rv->relname)));
+
+	if (reltype == OBJECT_FOREIGN_TABLE && relkind != RELKIND_FOREIGN_TABLE)
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("\"%s\" is not a foreign table", rv->relname)));
+
+	if (reltype == OBJECT_TYPE && relkind != RELKIND_COMPOSITE_TYPE)
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("\"%s\" is not a composite type", rv->relname)));
+
+	if (reltype == OBJECT_INDEX && relkind != RELKIND_INDEX &&
+		relkind != RELKIND_PARTITIONED_INDEX
+		&& !IsA(stmt, RenameStmt))
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("\"%s\" is not an index", rv->relname)));
+
+	/*
+	 * Don't allow ALTER TABLE on composite types. We want people to use ALTER
+	 * TYPE for that.
+	 */
+	if (reltype != OBJECT_TYPE && relkind == RELKIND_COMPOSITE_TYPE)
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("\"%s\" is a composite type", rv->relname),
+				 errhint("Use ALTER TYPE instead.")));
+
+	/*
+	 * Don't allow ALTER TABLE .. SET SCHEMA on relations that can't be moved
+	 * to a different schema, such as indexes and TOAST tables.
+	 */
+	if (IsA(stmt, AlterObjectSchemaStmt) &&
+		relkind != RELKIND_RELATION &&
+		relkind != RELKIND_VIEW &&
+		relkind != RELKIND_MATVIEW &&
+		relkind != RELKIND_SEQUENCE &&
+		relkind != RELKIND_FOREIGN_TABLE &&
+		relkind != RELKIND_PARTITIONED_TABLE)
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("\"%s\" is not a table, view, materialized view, sequence, or foreign table",
+						rv->relname)));
+
+	ReleaseSysCache(tuple);
+}
+
+/*
+ * Transform any expressions present in the partition key
+ *
+ * Returns a transformed PartitionSpec, as well as the strategy code
+ */
+static PartitionSpec *
+transformPartitionSpec(Relation rel, PartitionSpec *partspec, char *strategy)
+{
+	PartitionSpec *newspec;
+	ParseState *pstate;
+	ParseNamespaceItem *nsitem;
+	ListCell   *l;
+
+	newspec = makeNode(PartitionSpec);
+
+	newspec->strategy = partspec->strategy;
+	newspec->partParams = NIL;
+	newspec->location = partspec->location;
+
+	/* Parse partitioning strategy name */
+	if (pg_strcasecmp(partspec->strategy, "hash") == 0)
+		*strategy = PARTITION_STRATEGY_HASH;
+	else if (pg_strcasecmp(partspec->strategy, "list") == 0)
+		*strategy = PARTITION_STRATEGY_LIST;
+	else if (pg_strcasecmp(partspec->strategy, "range") == 0)
+		*strategy = PARTITION_STRATEGY_RANGE;
+	else
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("unrecognized partitioning strategy \"%s\"",
+						partspec->strategy)));
+
+	/* Check valid number of columns for strategy */
+	if (*strategy == PARTITION_STRATEGY_LIST &&
+		list_length(partspec->partParams) != 1)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+				 errmsg("cannot use \"list\" partition strategy with more than one column")));
+
+	/*
+	 * Create a dummy ParseState and insert the target relation as its sole
+	 * rangetable entry.  We need a ParseState for transformExpr.
+	 */
+	pstate = make_parsestate(NULL);
+	nsitem = addRangeTableEntryForRelation(pstate, rel, AccessShareLock,
+										   NULL, false, true);
+	addNSItemToQuery(pstate, nsitem, true, true, true);
+
+	/* take care of any partition expressions */
+	foreach(l, partspec->partParams)
+	{
+		PartitionElem *pelem = castNode(PartitionElem, lfirst(l));
+
+		if (pelem->expr)
+		{
+			/* Copy, to avoid scribbling on the input */
+			pelem = copyObject(pelem);
+
+			/* Now do parse transformation of the expression */
+			pelem->expr = transformExpr(pstate, pelem->expr,
+										EXPR_KIND_PARTITION_EXPRESSION);
+
+			/* we have to fix its collations too */
+			assign_expr_collations(pstate, pelem->expr);
+		}
+
+		newspec->partParams = lappend(newspec->partParams, pelem);
+	}
+
+	return newspec;
+}
+
+/*
+ * Compute per-partition-column information from a list of PartitionElems.
+ * Expressions in the PartitionElems must be parse-analyzed already.
+ */
+static void
+ComputePartitionAttrs(ParseState *pstate, Relation rel, List *partParams, AttrNumber *partattrs,
+					  List **partexprs, Oid *partopclass, Oid *partcollation,
+					  char strategy)
+{
+	int			attn;
+	ListCell   *lc;
+	Oid			am_oid;
+
+	attn = 0;
+	foreach(lc, partParams)
+	{
+		PartitionElem *pelem = castNode(PartitionElem, lfirst(lc));
+		Oid			atttype;
+		Oid			attcollation;
+
+		if (pelem->name != NULL)
+		{
+			/* Simple attribute reference */
+			HeapTuple	atttuple;
+			Form_pg_attribute attform;
+
+			atttuple = SearchSysCacheAttName(RelationGetRelid(rel),
+											 pelem->name);
+			if (!HeapTupleIsValid(atttuple))
+				ereport(ERROR,
+						(errcode(ERRCODE_UNDEFINED_COLUMN),
+						 errmsg("column \"%s\" named in partition key does not exist",
+								pelem->name),
+						 parser_errposition(pstate, pelem->location)));
+			attform = (Form_pg_attribute) GETSTRUCT(atttuple);
+
+			if (attform->attnum <= 0)
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+						 errmsg("cannot use system column \"%s\" in partition key",
+								pelem->name),
+						 parser_errposition(pstate, pelem->location)));
+
+			/*
+			 * Generated columns cannot work: They are computed after BEFORE
+			 * triggers, but partition routing is done before all triggers.
+			 */
+			if (attform->attgenerated)
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+						 errmsg("cannot use generated column in partition key"),
+						 errdetail("Column \"%s\" is a generated column.",
+								   pelem->name),
+						 parser_errposition(pstate, pelem->location)));
+
+			partattrs[attn] = attform->attnum;
+			atttype = attform->atttypid;
+			attcollation = attform->attcollation;
+			ReleaseSysCache(atttuple);
+		}
+		else
+		{
+			/* Expression */
+			Node	   *expr = pelem->expr;
+			char		partattname[16];
+
+			Assert(expr != NULL);
+			atttype = exprType(expr);
+			attcollation = exprCollation(expr);
+
+			/*
+			 * The expression must be of a storable type (e.g., not RECORD).
+			 * The test is the same as for whether a table column is of a safe
+			 * type (which is why we needn't check for the non-expression
+			 * case).
+			 */
+			snprintf(partattname, sizeof(partattname), "%d", attn + 1);
+			CheckAttributeType(partattname,
+							   atttype, attcollation,
+							   NIL, CHKATYPE_IS_PARTKEY);
+
+			/*
+			 * Strip any top-level COLLATE clause.  This ensures that we treat
+			 * "x COLLATE y" and "(x COLLATE y)" alike.
+			 */
+			while (IsA(expr, CollateExpr))
+				expr = (Node *) ((CollateExpr *) expr)->arg;
+
+			if (IsA(expr, Var) &&
+				((Var *) expr)->varattno > 0)
+			{
+				/*
+				 * User wrote "(column)" or "(column COLLATE something)".
+				 * Treat it like simple attribute anyway.
+				 */
+				partattrs[attn] = ((Var *) expr)->varattno;
+			}
+			else
+			{
+				Bitmapset  *expr_attrs = NULL;
+				int			i;
+
+				partattrs[attn] = 0;	/* marks the column as expression */
+				*partexprs = lappend(*partexprs, expr);
+
+				/*
+				 * Try to simplify the expression before checking for
+				 * mutability.  The main practical value of doing it in this
+				 * order is that an inline-able SQL-language function will be
+				 * accepted if its expansion is immutable, whether or not the
+				 * function itself is marked immutable.
+				 *
+				 * Note that expression_planner does not change the passed in
+				 * expression destructively and we have already saved the
+				 * expression to be stored into the catalog above.
+				 */
+				expr = (Node *) expression_planner((Expr *) expr);
+
+				/*
+				 * Partition expression cannot contain mutable functions,
+				 * because a given row must always map to the same partition
+				 * as long as there is no change in the partition boundary
+				 * structure.
+				 */
+				if (contain_mutable_functions(expr))
+					ereport(ERROR,
+							(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+							 errmsg("functions in partition key expression must be marked IMMUTABLE")));
+
+				/*
+				 * transformPartitionSpec() should have already rejected
+				 * subqueries, aggregates, window functions, and SRFs, based
+				 * on the EXPR_KIND_ for partition expressions.
+				 */
+
+				/*
+				 * Cannot allow system column references, since that would
+				 * make partition routing impossible: their values won't be
+				 * known yet when we need to do that.
+				 */
+				pull_varattnos(expr, 1, &expr_attrs);
+				for (i = FirstLowInvalidHeapAttributeNumber; i < 0; i++)
+				{
+					if (bms_is_member(i - FirstLowInvalidHeapAttributeNumber,
+									  expr_attrs))
+						ereport(ERROR,
+								(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+								 errmsg("partition key expressions cannot contain system column references")));
+				}
+
+				/*
+				 * Generated columns cannot work: They are computed after
+				 * BEFORE triggers, but partition routing is done before all
+				 * triggers.
+				 */
+				i = -1;
+				while ((i = bms_next_member(expr_attrs, i)) >= 0)
+				{
+					AttrNumber	attno = i + FirstLowInvalidHeapAttributeNumber;
+
+					if (attno > 0 &&
+						TupleDescAttr(RelationGetDescr(rel), attno - 1)->attgenerated)
+						ereport(ERROR,
+								(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+								 errmsg("cannot use generated column in partition key"),
+								 errdetail("Column \"%s\" is a generated column.",
+										   get_attname(RelationGetRelid(rel), attno, false)),
+								 parser_errposition(pstate, pelem->location)));
+				}
+
+				/*
+				 * While it is not exactly *wrong* for a partition expression
+				 * to be a constant, it seems better to reject such keys.
+				 */
+				if (IsA(expr, Const))
+					ereport(ERROR,
+							(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+							 errmsg("cannot use constant expression as partition key")));
+			}
+		}
+
+		/*
+		 * Apply collation override if any
+		 */
+		if (pelem->collation)
+			attcollation = get_collation_oid(pelem->collation, false);
+
+		/*
+		 * Check we have a collation iff it's a collatable type.  The only
+		 * expected failures here are (1) COLLATE applied to a noncollatable
+		 * type, or (2) partition expression had an unresolved collation. But
+		 * we might as well code this to be a complete consistency check.
+		 */
+		if (type_is_collatable(atttype))
+		{
+			if (!OidIsValid(attcollation))
+				ereport(ERROR,
+						(errcode(ERRCODE_INDETERMINATE_COLLATION),
+						 errmsg("could not determine which collation to use for partition expression"),
+						 errhint("Use the COLLATE clause to set the collation explicitly.")));
+		}
+		else
+		{
+			if (OidIsValid(attcollation))
+				ereport(ERROR,
+						(errcode(ERRCODE_DATATYPE_MISMATCH),
+						 errmsg("collations are not supported by type %s",
+								format_type_be(atttype))));
+		}
+
+		partcollation[attn] = attcollation;
+
+		/*
+		 * Identify the appropriate operator class.  For list and range
+		 * partitioning, we use a btree operator class; hash partitioning uses
+		 * a hash operator class.
+		 */
+		if (strategy == PARTITION_STRATEGY_HASH)
+			am_oid = HASH_AM_OID;
+		else
+			am_oid = BTREE_AM_OID;
+
+		if (!pelem->opclass)
+		{
+			partopclass[attn] = GetDefaultOpClass(atttype, am_oid);
+
+			if (!OidIsValid(partopclass[attn]))
+			{
+				if (strategy == PARTITION_STRATEGY_HASH)
+					ereport(ERROR,
+							(errcode(ERRCODE_UNDEFINED_OBJECT),
+							 errmsg("data type %s has no default operator class for access method \"%s\"",
+									format_type_be(atttype), "hash"),
+							 errhint("You must specify a hash operator class or define a default hash operator class for the data type.")));
+				else
+					ereport(ERROR,
+							(errcode(ERRCODE_UNDEFINED_OBJECT),
+							 errmsg("data type %s has no default operator class for access method \"%s\"",
+									format_type_be(atttype), "btree"),
+							 errhint("You must specify a btree operator class or define a default btree operator class for the data type.")));
+
+			}
+		}
+		else
+			partopclass[attn] = ResolveOpClass(pelem->opclass,
+											   atttype,
+											   am_oid == HASH_AM_OID ? "hash" : "btree",
+											   am_oid);
+
+		attn++;
+	}
+}
+
+/*
+ * PartConstraintImpliedByRelConstraint
+ *		Do scanrel's existing constraints imply the partition constraint?
+ *
+ * "Existing constraints" include its check constraints and column-level
+ * NOT NULL constraints.  partConstraint describes the partition constraint,
+ * in implicit-AND form.
+ */
+bool
+PartConstraintImpliedByRelConstraint(Relation scanrel,
+									 List *partConstraint)
+{
+	List	   *existConstraint = NIL;
+	TupleConstr *constr = RelationGetDescr(scanrel)->constr;
+	int			i;
+
+	if (constr && constr->has_not_null)
+	{
+		int			natts = scanrel->rd_att->natts;
+
+		for (i = 1; i <= natts; i++)
+		{
+			Form_pg_attribute att = TupleDescAttr(scanrel->rd_att, i - 1);
+
+			if (att->attnotnull && !att->attisdropped)
+			{
+				NullTest   *ntest = makeNode(NullTest);
+
+				ntest->arg = (Expr *) makeVar(1,
+											  i,
+											  att->atttypid,
+											  att->atttypmod,
+											  att->attcollation,
+											  0);
+				ntest->nulltesttype = IS_NOT_NULL;
+
+				/*
+				 * argisrow=false is correct even for a composite column,
+				 * because attnotnull does not represent a SQL-spec IS NOT
+				 * NULL test in such a case, just IS DISTINCT FROM NULL.
+				 */
+				ntest->argisrow = false;
+				ntest->location = -1;
+				existConstraint = lappend(existConstraint, ntest);
+			}
+		}
+	}
+
+	return ConstraintImpliedByRelConstraint(scanrel, partConstraint, existConstraint);
+}
+
+/*
+ * ConstraintImpliedByRelConstraint
+ *		Do scanrel's existing constraints imply the given constraint?
+ *
+ * testConstraint is the constraint to validate. provenConstraint is a
+ * caller-provided list of conditions which this function may assume
+ * to be true. Both provenConstraint and testConstraint must be in
+ * implicit-AND form, must only contain immutable clauses, and must
+ * contain only Vars with varno = 1.
+ */
+bool
+ConstraintImpliedByRelConstraint(Relation scanrel, List *testConstraint, List *provenConstraint)
+{
+	List	   *existConstraint = list_copy(provenConstraint);
+	TupleConstr *constr = RelationGetDescr(scanrel)->constr;
+	int			num_check,
+				i;
+
+	num_check = (constr != NULL) ? constr->num_check : 0;
+	for (i = 0; i < num_check; i++)
+	{
+		Node	   *cexpr;
+
+		/*
+		 * If this constraint hasn't been fully validated yet, we must ignore
+		 * it here.
+		 */
+		if (!constr->check[i].ccvalid)
+			continue;
+
+		cexpr = stringToNode(constr->check[i].ccbin);
+
+		/*
+		 * Run each expression through const-simplification and
+		 * canonicalization.  It is necessary, because we will be comparing it
+		 * to similarly-processed partition constraint expressions, and may
+		 * fail to detect valid matches without this.
+		 */
+		cexpr = eval_const_expressions(NULL, cexpr);
+		cexpr = (Node *) canonicalize_qual((Expr *) cexpr, true);
+
+		existConstraint = list_concat(existConstraint,
+									  make_ands_implicit((Expr *) cexpr));
+	}
+
+	/*
+	 * Try to make the proof.  Since we are comparing CHECK constraints, we
+	 * need to use weak implication, i.e., we assume existConstraint is
+	 * not-false and try to prove the same for testConstraint.
+	 *
+	 * Note that predicate_implied_by assumes its first argument is known
+	 * immutable.  That should always be true for both NOT NULL and partition
+	 * constraints, so we don't test it here.
+	 */
+	return predicate_implied_by(testConstraint, existConstraint, true);
+}
+
+/*
+ * QueuePartitionConstraintValidation
+ *
+ * Add an entry to wqueue to have the given partition constraint validated by
+ * Phase 3, for the given relation, and all its children.
+ *
+ * We first verify whether the given constraint is implied by pre-existing
+ * relation constraints; if it is, there's no need to scan the table to
+ * validate, so don't queue in that case.
+ */
+static void
+QueuePartitionConstraintValidation(List **wqueue, Relation scanrel,
+								   List *partConstraint,
+								   bool validate_default)
+{
+	/*
+	 * Based on the table's existing constraints, determine whether or not we
+	 * may skip scanning the table.
+	 */
+	if (PartConstraintImpliedByRelConstraint(scanrel, partConstraint))
+	{
+		if (!validate_default)
+			ereport(DEBUG1,
+					(errmsg_internal("partition constraint for table \"%s\" is implied by existing constraints",
+							RelationGetRelationName(scanrel))));
+		else
+			ereport(DEBUG1,
+					(errmsg_internal("updated partition constraint for default partition \"%s\" is implied by existing constraints",
+							RelationGetRelationName(scanrel))));
+		return;
+	}
+
+	/*
+	 * Constraints proved insufficient. For plain relations, queue a
+	 * validation item now; for partitioned tables, recurse to process each
+	 * partition.
+	 */
+	if (scanrel->rd_rel->relkind == RELKIND_RELATION)
+	{
+		AlteredTableInfo *tab;
+
+		/* Grab a work queue entry. */
+		tab = ATGetQueueEntry(wqueue, scanrel);
+		Assert(tab->partition_constraint == NULL);
+		tab->partition_constraint = (Expr *) linitial(partConstraint);
+		tab->validate_default = validate_default;
+	}
+	else if (scanrel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+	{
+		PartitionDesc partdesc = RelationGetPartitionDesc(scanrel, false);
+		int			i;
+
+		for (i = 0; i < partdesc->nparts; i++)
+		{
+			Relation	part_rel;
+			List	   *thisPartConstraint;
+
+			/*
+			 * This is the minimum lock we need to prevent deadlocks.
+			 */
+			part_rel = table_open(partdesc->oids[i], AccessExclusiveLock);
+
+			/*
+			 * Adjust the constraint for scanrel so that it matches this
+			 * partition's attribute numbers.
+			 */
+			thisPartConstraint =
+				map_partition_varattnos(partConstraint, 1,
+										part_rel, scanrel);
+
+			QueuePartitionConstraintValidation(wqueue, part_rel,
+											   thisPartConstraint,
+											   validate_default);
+			table_close(part_rel, NoLock);	/* keep lock till commit */
+		}
+	}
+}
+
+/*
+ * ALTER TABLE <name> ATTACH PARTITION <partition-name> FOR VALUES
+ *
+ * Return the address of the newly attached partition.
+ */
+static ObjectAddress
+ATExecAttachPartition(List **wqueue, Relation rel, PartitionCmd *cmd,
+					  AlterTableUtilityContext *context)
+{
+	Relation	attachrel,
+				catalog;
+	List	   *attachrel_children;
+	List	   *partConstraint;
+	SysScanDesc scan;
+	ScanKeyData skey;
+	AttrNumber	attno;
+	int			natts;
+	TupleDesc	tupleDesc;
+	ObjectAddress address;
+	const char *trigger_name;
+	Oid			defaultPartOid;
+	List	   *partBoundConstraint;
+	ParseState *pstate = make_parsestate(NULL);
+
+	pstate->p_sourcetext = context->queryString;
+
+	/*
+	 * We must lock the default partition if one exists, because attaching a
+	 * new partition will change its partition constraint.
+	 */
+	defaultPartOid =
+		get_default_oid_from_partdesc(RelationGetPartitionDesc(rel, false));
+	if (OidIsValid(defaultPartOid))
+		LockRelationOid(defaultPartOid, AccessExclusiveLock);
+
+	attachrel = table_openrv(cmd->name, AccessExclusiveLock);
+
+	/*
+	 * XXX I think it'd be a good idea to grab locks on all tables referenced
+	 * by FKs at this point also.
+	 */
+
+	/*
+	 * Must be owner of both parent and source table -- parent was checked by
+	 * ATSimplePermissions call in ATPrepCmd
+	 */
+	ATSimplePermissions(attachrel, ATT_TABLE | ATT_FOREIGN_TABLE);
+
+	/* A partition can only have one parent */
+	if (attachrel->rd_rel->relispartition)
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("\"%s\" is already a partition",
+						RelationGetRelationName(attachrel))));
+
+	if (OidIsValid(attachrel->rd_rel->reloftype))
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("cannot attach a typed table as partition")));
+
+	/*
+	 * Table being attached should not already be part of inheritance; either
+	 * as a child table...
+	 */
+	catalog = table_open(InheritsRelationId, AccessShareLock);
+	ScanKeyInit(&skey,
+				Anum_pg_inherits_inhrelid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(RelationGetRelid(attachrel)));
+	scan = systable_beginscan(catalog, InheritsRelidSeqnoIndexId, true,
+							  NULL, 1, &skey);
+	if (HeapTupleIsValid(systable_getnext(scan)))
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("cannot attach inheritance child as partition")));
+	systable_endscan(scan);
+
+	/* ...or as a parent table (except the case when it is partitioned) */
+	ScanKeyInit(&skey,
+				Anum_pg_inherits_inhparent,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(RelationGetRelid(attachrel)));
+	scan = systable_beginscan(catalog, InheritsParentIndexId, true, NULL,
+							  1, &skey);
+	if (HeapTupleIsValid(systable_getnext(scan)) &&
+		attachrel->rd_rel->relkind == RELKIND_RELATION)
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("cannot attach inheritance parent as partition")));
+	systable_endscan(scan);
+	table_close(catalog, AccessShareLock);
+
+	/*
+	 * Prevent circularity by seeing if rel is a partition of attachrel. (In
+	 * particular, this disallows making a rel a partition of itself.)
+	 *
+	 * We do that by checking if rel is a member of the list of attachrel's
+	 * partitions provided the latter is partitioned at all.  We want to avoid
+	 * having to construct this list again, so we request the strongest lock
+	 * on all partitions.  We need the strongest lock, because we may decide
+	 * to scan them if we find out that the table being attached (or its leaf
+	 * partitions) may contain rows that violate the partition constraint. If
+	 * the table has a constraint that would prevent such rows, which by
+	 * definition is present in all the partitions, we need not scan the
+	 * table, nor its partitions.  But we cannot risk a deadlock by taking a
+	 * weaker lock now and the stronger one only when needed.
+	 */
+	attachrel_children = find_all_inheritors(RelationGetRelid(attachrel),
+											 AccessExclusiveLock, NULL);
+	if (list_member_oid(attachrel_children, RelationGetRelid(rel)))
+		ereport(ERROR,
+				(errcode(ERRCODE_DUPLICATE_TABLE),
+				 errmsg("circular inheritance not allowed"),
+				 errdetail("\"%s\" is already a child of \"%s\".",
+						   RelationGetRelationName(rel),
+						   RelationGetRelationName(attachrel))));
+
+	/* If the parent is permanent, so must be all of its partitions. */
+	if (rel->rd_rel->relpersistence != RELPERSISTENCE_TEMP &&
+		attachrel->rd_rel->relpersistence == RELPERSISTENCE_TEMP)
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("cannot attach a temporary relation as partition of permanent relation \"%s\"",
+						RelationGetRelationName(rel))));
+
+	/* Temp parent cannot have a partition that is itself not a temp */
+	if (rel->rd_rel->relpersistence == RELPERSISTENCE_TEMP &&
+		attachrel->rd_rel->relpersistence != RELPERSISTENCE_TEMP)
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("cannot attach a permanent relation as partition of temporary relation \"%s\"",
+						RelationGetRelationName(rel))));
+
+	/* If the parent is temp, it must belong to this session */
+	if (rel->rd_rel->relpersistence == RELPERSISTENCE_TEMP &&
+		!rel->rd_islocaltemp)
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("cannot attach as partition of temporary relation of another session")));
+
+	/* Ditto for the partition */
+	if (attachrel->rd_rel->relpersistence == RELPERSISTENCE_TEMP &&
+		!attachrel->rd_islocaltemp)
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("cannot attach temporary relation of another session as partition")));
+
+	/* Check if there are any columns in attachrel that aren't in the parent */
+	tupleDesc = RelationGetDescr(attachrel);
+	natts = tupleDesc->natts;
+	for (attno = 1; attno <= natts; attno++)
+	{
+		Form_pg_attribute attribute = TupleDescAttr(tupleDesc, attno - 1);
+		char	   *attributeName = NameStr(attribute->attname);
+
+		/* Ignore dropped */
+		if (attribute->attisdropped)
+			continue;
+
+		/* Try to find the column in parent (matching on column name) */
+		if (!SearchSysCacheExists2(ATTNAME,
+								   ObjectIdGetDatum(RelationGetRelid(rel)),
+								   CStringGetDatum(attributeName)))
+			ereport(ERROR,
+					(errcode(ERRCODE_DATATYPE_MISMATCH),
+					 errmsg("table \"%s\" contains column \"%s\" not found in parent \"%s\"",
+							RelationGetRelationName(attachrel), attributeName,
+							RelationGetRelationName(rel)),
+					 errdetail("The new partition may contain only the columns present in parent.")));
+	}
+
+	/*
+	 * If child_rel has row-level triggers with transition tables, we
+	 * currently don't allow it to become a partition.  See also prohibitions
+	 * in ATExecAddInherit() and CreateTrigger().
+	 */
+	trigger_name = FindTriggerIncompatibleWithInheritance(attachrel->trigdesc);
+	if (trigger_name != NULL)
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("trigger \"%s\" prevents table \"%s\" from becoming a partition",
+						trigger_name, RelationGetRelationName(attachrel)),
+				 errdetail("ROW triggers with transition tables are not supported on partitions")));
+
+	/*
+	 * Check that the new partition's bound is valid and does not overlap any
+	 * of existing partitions of the parent - note that it does not return on
+	 * error.
+	 */
+	check_new_partition_bound(RelationGetRelationName(attachrel), rel,
+							  cmd->bound, pstate);
+
+	/* OK to create inheritance.  Rest of the checks performed there */
+	CreateInheritance(attachrel, rel);
+
+	/* Update the pg_class entry. */
+	StorePartitionBound(attachrel, rel, cmd->bound);
+
+	/* Ensure there exists a correct set of indexes in the partition. */
+	AttachPartitionEnsureIndexes(rel, attachrel);
+
+	/* and triggers */
+	CloneRowTriggersToPartition(rel, attachrel);
+
+	/*
+	 * Clone foreign key constraints.  Callee is responsible for setting up
+	 * for phase 3 constraint verification.
+	 */
+	CloneForeignKeyConstraints(wqueue, rel, attachrel);
+
+	/*
+	 * Generate partition constraint from the partition bound specification.
+	 * If the parent itself is a partition, make sure to include its
+	 * constraint as well.
+	 */
+	partBoundConstraint = get_qual_from_partbound(attachrel, rel, cmd->bound);
+	partConstraint = list_concat(partBoundConstraint,
+								 RelationGetPartitionQual(rel));
+
+	/* Skip validation if there are no constraints to validate. */
+	if (partConstraint)
+	{
+		/*
+		 * Run the partition quals through const-simplification similar to
+		 * check constraints.  We skip canonicalize_qual, though, because
+		 * partition quals should be in canonical form already.
+		 */
+		partConstraint =
+			(List *) eval_const_expressions(NULL,
+											(Node *) partConstraint);
+
+		/* XXX this sure looks wrong */
+		partConstraint = list_make1(make_ands_explicit(partConstraint));
+
+		/*
+		 * Adjust the generated constraint to match this partition's attribute
+		 * numbers.
+		 */
+		partConstraint = map_partition_varattnos(partConstraint, 1, attachrel,
+												 rel);
+
+		/* Validate partition constraints against the table being attached. */
+		QueuePartitionConstraintValidation(wqueue, attachrel, partConstraint,
+										   false);
+	}
+
+	/*
+	 * If we're attaching a partition other than the default partition and a
+	 * default one exists, then that partition's partition constraint changes,
+	 * so add an entry to the work queue to validate it, too.  (We must not do
+	 * this when the partition being attached is the default one; we already
+	 * did it above!)
+	 */
+	if (OidIsValid(defaultPartOid))
+	{
+		Relation	defaultrel;
+		List	   *defPartConstraint;
+
+		Assert(!cmd->bound->is_default);
+
+		/* we already hold a lock on the default partition */
+		defaultrel = table_open(defaultPartOid, NoLock);
+		defPartConstraint =
+			get_proposed_default_constraint(partBoundConstraint);
+
+		/*
+		 * Map the Vars in the constraint expression from rel's attnos to
+		 * defaultrel's.
+		 */
+		defPartConstraint =
+			map_partition_varattnos(defPartConstraint,
+									1, defaultrel, rel);
+		QueuePartitionConstraintValidation(wqueue, defaultrel,
+										   defPartConstraint, true);
+
+		/* keep our lock until commit. */
+		table_close(defaultrel, NoLock);
+	}
+
+	ObjectAddressSet(address, RelationRelationId, RelationGetRelid(attachrel));
+
+	/* keep our lock until commit */
+	table_close(attachrel, NoLock);
+
+	return address;
+}
+
+/*
+ * AttachPartitionEnsureIndexes
+ *		subroutine for ATExecAttachPartition to create/match indexes
+ *
+ * Enforce the indexing rule for partitioned tables during ALTER TABLE / ATTACH
+ * PARTITION: every partition must have an index attached to each index on the
+ * partitioned table.
+ */
+static void
+AttachPartitionEnsureIndexes(Relation rel, Relation attachrel)
+{
+	List	   *idxes;
+	List	   *attachRelIdxs;
+	Relation   *attachrelIdxRels;
+	IndexInfo **attachInfos;
+	int			i;
+	ListCell   *cell;
+	MemoryContext cxt;
+	MemoryContext oldcxt;
+
+	cxt = AllocSetContextCreate(CurrentMemoryContext,
+								"AttachPartitionEnsureIndexes",
+								ALLOCSET_DEFAULT_SIZES);
+	oldcxt = MemoryContextSwitchTo(cxt);
+
+	idxes = RelationGetIndexList(rel);
+	attachRelIdxs = RelationGetIndexList(attachrel);
+	attachrelIdxRels = palloc(sizeof(Relation) * list_length(attachRelIdxs));
+	attachInfos = palloc(sizeof(IndexInfo *) * list_length(attachRelIdxs));
+
+	/* Build arrays of all existing indexes and their IndexInfos */
+	i = 0;
+	foreach(cell, attachRelIdxs)
+	{
+		Oid			cldIdxId = lfirst_oid(cell);
+
+		attachrelIdxRels[i] = index_open(cldIdxId, AccessShareLock);
+		attachInfos[i] = BuildIndexInfo(attachrelIdxRels[i]);
+		i++;
+	}
+
+	/*
+	 * If we're attaching a foreign table, we must fail if any of the indexes
+	 * is a constraint index; otherwise, there's nothing to do here.  Do this
+	 * before starting work, to avoid wasting the effort of building a few
+	 * non-unique indexes before coming across a unique one.
+	 */
+	if (attachrel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
+	{
+		foreach(cell, idxes)
+		{
+			Oid			idx = lfirst_oid(cell);
+			Relation	idxRel = index_open(idx, AccessShareLock);
+
+			if (idxRel->rd_index->indisunique ||
+				idxRel->rd_index->indisprimary)
+				ereport(ERROR,
+						(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+						 errmsg("cannot attach foreign table \"%s\" as partition of partitioned table \"%s\"",
+								RelationGetRelationName(attachrel),
+								RelationGetRelationName(rel)),
+						 errdetail("Partitioned table \"%s\" contains unique indexes.",
+								   RelationGetRelationName(rel))));
+			index_close(idxRel, AccessShareLock);
+		}
+
+		goto out;
+	}
+
+	/*
+	 * For each index on the partitioned table, find a matching one in the
+	 * partition-to-be; if one is not found, create one.
+	 */
+	foreach(cell, idxes)
+	{
+		Oid			idx = lfirst_oid(cell);
+		Relation	idxRel = index_open(idx, AccessShareLock);
+		IndexInfo  *info;
+		AttrMap    *attmap;
+		bool		found = false;
+		Oid			constraintOid;
+
+		/*
+		 * Ignore indexes in the partitioned table other than partitioned
+		 * indexes.
+		 */
+		if (idxRel->rd_rel->relkind != RELKIND_PARTITIONED_INDEX)
+		{
+			index_close(idxRel, AccessShareLock);
+			continue;
+		}
+
+		/* construct an indexinfo to compare existing indexes against */
+		info = BuildIndexInfo(idxRel);
+		attmap = build_attrmap_by_name(RelationGetDescr(attachrel),
+									   RelationGetDescr(rel));
+		constraintOid = get_relation_idx_constraint_oid(RelationGetRelid(rel), idx);
+
+		/*
+		 * Scan the list of existing indexes in the partition-to-be, and mark
+		 * the first matching, unattached one we find, if any, as partition of
+		 * the parent index.  If we find one, we're done.
+		 */
+		for (i = 0; i < list_length(attachRelIdxs); i++)
+		{
+			Oid			cldIdxId = RelationGetRelid(attachrelIdxRels[i]);
+			Oid			cldConstrOid = InvalidOid;
+
+			/* does this index have a parent?  if so, can't use it */
+			if (attachrelIdxRels[i]->rd_rel->relispartition)
+				continue;
+
+			if (CompareIndexInfo(attachInfos[i], info,
+								 attachrelIdxRels[i]->rd_indcollation,
+								 idxRel->rd_indcollation,
+								 attachrelIdxRels[i]->rd_opfamily,
+								 idxRel->rd_opfamily,
+								 attmap))
+			{
+				/*
+				 * If this index is being created in the parent because of a
+				 * constraint, then the child needs to have a constraint also,
+				 * so look for one.  If there is no such constraint, this
+				 * index is no good, so keep looking.
+				 */
+				if (OidIsValid(constraintOid))
+				{
+					cldConstrOid =
+						get_relation_idx_constraint_oid(RelationGetRelid(attachrel),
+														cldIdxId);
+					/* no dice */
+					if (!OidIsValid(cldConstrOid))
+						continue;
+				}
+
+				/* bingo. */
+				IndexSetParentIndex(attachrelIdxRels[i], idx);
+				if (OidIsValid(constraintOid))
+					ConstraintSetParentConstraint(cldConstrOid, constraintOid,
+												  RelationGetRelid(attachrel));
+				found = true;
+
+				CommandCounterIncrement();
+				break;
+			}
+		}
+
+		/*
+		 * If no suitable index was found in the partition-to-be, create one
+		 * now.
+		 */
+		if (!found)
+		{
+			IndexStmt  *stmt;
+			Oid			constraintOid;
+
+			stmt = generateClonedIndexStmt(NULL,
+										   idxRel, attmap,
+										   &constraintOid);
+			DefineIndex(RelationGetRelid(attachrel), stmt, InvalidOid,
+						RelationGetRelid(idxRel),
+						constraintOid,
+						true, false, false, false, false);
+		}
+
+		index_close(idxRel, AccessShareLock);
+	}
+
+out:
+	/* Clean up. */
+	for (i = 0; i < list_length(attachRelIdxs); i++)
+		index_close(attachrelIdxRels[i], AccessShareLock);
+	MemoryContextSwitchTo(oldcxt);
+	MemoryContextDelete(cxt);
+}
+
+/*
+ * CloneRowTriggersToPartition
+ *		subroutine for ATExecAttachPartition/DefineRelation to create row
+ *		triggers on partitions
+ */
+static void
+CloneRowTriggersToPartition(Relation parent, Relation partition)
+{
+	Relation	pg_trigger;
+	ScanKeyData key;
+	SysScanDesc scan;
+	HeapTuple	tuple;
+	MemoryContext perTupCxt;
+
+	ScanKeyInit(&key, Anum_pg_trigger_tgrelid, BTEqualStrategyNumber,
+				F_OIDEQ, ObjectIdGetDatum(RelationGetRelid(parent)));
+	pg_trigger = table_open(TriggerRelationId, RowExclusiveLock);
+	scan = systable_beginscan(pg_trigger, TriggerRelidNameIndexId,
+							  true, NULL, 1, &key);
+
+	perTupCxt = AllocSetContextCreate(CurrentMemoryContext,
+									  "clone trig", ALLOCSET_SMALL_SIZES);
+
+	while (HeapTupleIsValid(tuple = systable_getnext(scan)))
+	{
+		Form_pg_trigger trigForm = (Form_pg_trigger) GETSTRUCT(tuple);
+		CreateTrigStmt *trigStmt;
+		Node	   *qual = NULL;
+		Datum		value;
+		bool		isnull;
+		List	   *cols = NIL;
+		List	   *trigargs = NIL;
+		MemoryContext oldcxt;
+
+		/*
+		 * Ignore statement-level triggers; those are not cloned.
+		 */
+		if (!TRIGGER_FOR_ROW(trigForm->tgtype))
+			continue;
+
+		/*
+		 * Internal triggers require careful examination.  Ideally, we don't
+		 * clone them.  However, if our parent is itself a partition, there
+		 * might be internal triggers that must not be skipped; for example,
+		 * triggers on our parent that are in turn clones from its parent (our
+		 * grandparent) are marked internal, yet they are to be cloned.
+		 *
+		 * Note we dare not verify that the other trigger belongs to an
+		 * ancestor relation of our parent, because that creates deadlock
+		 * opportunities.
+		 */
+		if (trigForm->tgisinternal &&
+			(!parent->rd_rel->relispartition ||
+			 !OidIsValid(trigForm->tgparentid)))
+			continue;
+
+		/*
+		 * Complain if we find an unexpected trigger type.
+		 */
+		if (!TRIGGER_FOR_BEFORE(trigForm->tgtype) &&
+			!TRIGGER_FOR_AFTER(trigForm->tgtype))
+			elog(ERROR, "unexpected trigger \"%s\" found",
+				 NameStr(trigForm->tgname));
+
+		/* Use short-lived context for CREATE TRIGGER */
+		oldcxt = MemoryContextSwitchTo(perTupCxt);
+
+		/*
+		 * If there is a WHEN clause, generate a 'cooked' version of it that's
+		 * appropriate for the partition.
+		 */
+		value = heap_getattr(tuple, Anum_pg_trigger_tgqual,
+							 RelationGetDescr(pg_trigger), &isnull);
+		if (!isnull)
+		{
+			qual = stringToNode(TextDatumGetCString(value));
+			qual = (Node *) map_partition_varattnos((List *) qual, PRS2_OLD_VARNO,
+													partition, parent);
+			qual = (Node *) map_partition_varattnos((List *) qual, PRS2_NEW_VARNO,
+													partition, parent);
+		}
+
+		/*
+		 * If there is a column list, transform it to a list of column names.
+		 * Note we don't need to map this list in any way ...
+		 */
+		if (trigForm->tgattr.dim1 > 0)
+		{
+			int			i;
+
+			for (i = 0; i < trigForm->tgattr.dim1; i++)
+			{
+				Form_pg_attribute col;
+
+				col = TupleDescAttr(parent->rd_att,
+									trigForm->tgattr.values[i] - 1);
+				cols = lappend(cols,
+							   makeString(pstrdup(NameStr(col->attname))));
+			}
+		}
+
+		/* Reconstruct trigger arguments list. */
+		if (trigForm->tgnargs > 0)
+		{
+			char	   *p;
+
+			value = heap_getattr(tuple, Anum_pg_trigger_tgargs,
+								 RelationGetDescr(pg_trigger), &isnull);
+			if (isnull)
+				elog(ERROR, "tgargs is null for trigger \"%s\" in partition \"%s\"",
+					 NameStr(trigForm->tgname), RelationGetRelationName(partition));
+
+			p = (char *) VARDATA_ANY(DatumGetByteaPP(value));
+
+			for (int i = 0; i < trigForm->tgnargs; i++)
+			{
+				trigargs = lappend(trigargs, makeString(pstrdup(p)));
+				p += strlen(p) + 1;
+			}
+		}
+
+		trigStmt = makeNode(CreateTrigStmt);
+		trigStmt->replace = false;
+		trigStmt->isconstraint = OidIsValid(trigForm->tgconstraint);
+		trigStmt->trigname = NameStr(trigForm->tgname);
+		trigStmt->relation = NULL;
+		trigStmt->funcname = NULL;	/* passed separately */
+		trigStmt->args = trigargs;
+		trigStmt->row = true;
+		trigStmt->timing = trigForm->tgtype & TRIGGER_TYPE_TIMING_MASK;
+		trigStmt->events = trigForm->tgtype & TRIGGER_TYPE_EVENT_MASK;
+		trigStmt->columns = cols;
+		trigStmt->whenClause = NULL;	/* passed separately */
+		trigStmt->transitionRels = NIL; /* not supported at present */
+		trigStmt->deferrable = trigForm->tgdeferrable;
+		trigStmt->initdeferred = trigForm->tginitdeferred;
+		trigStmt->constrrel = NULL; /* passed separately */
+
+		CreateTrigger(trigStmt, NULL, RelationGetRelid(partition),
+					  trigForm->tgconstrrelid, InvalidOid, InvalidOid,
+					  trigForm->tgfoid, trigForm->oid, qual,
+					  false, true);
+
+		MemoryContextSwitchTo(oldcxt);
+		MemoryContextReset(perTupCxt);
+	}
+
+	MemoryContextDelete(perTupCxt);
+
+	systable_endscan(scan);
+	table_close(pg_trigger, RowExclusiveLock);
+}
+
+/*
+ * ALTER TABLE DETACH PARTITION
+ *
+ * Return the address of the relation that is no longer a partition of rel.
+ *
+ * If concurrent mode is requested, we run in two transactions.  A side-
+ * effect is that this command cannot run in a multi-part ALTER TABLE.
+ * Currently, that's enforced by the grammar.
+ *
+ * The strategy for concurrency is to first modify the partition's
+ * pg_inherit catalog row to make it visible to everyone that the
+ * partition is detached, lock the partition against writes, and commit
+ * the transaction; anyone who requests the partition descriptor from
+ * that point onwards has to ignore such a partition.  In a second
+ * transaction, we wait until all transactions that could have seen the
+ * partition as attached are gone, then we remove the rest of partition
+ * metadata (pg_inherits and pg_class.relpartbounds).
+ */
+static ObjectAddress
+ATExecDetachPartition(List **wqueue, AlteredTableInfo *tab, Relation rel,
+					  RangeVar *name, bool concurrent)
+{
+	Relation	partRel;
+	ObjectAddress address;
+	Oid			defaultPartOid;
+
+	/*
+	 * We must lock the default partition, because detaching this partition
+	 * will change its partition constraint.
+	 */
+	defaultPartOid =
+		get_default_oid_from_partdesc(RelationGetPartitionDesc(rel, false));
+	if (OidIsValid(defaultPartOid))
+	{
+		/*
+		 * Concurrent detaching when a default partition exists is not
+		 * supported. The main problem is that the default partition
+		 * constraint would change.  And there's a definitional problem: what
+		 * should happen to the tuples that are being inserted that belong to
+		 * the partition being detached?  Putting them on the partition being
+		 * detached would be wrong, since they'd become "lost" after the but
+		 * we cannot put them in the default partition either until we alter
+		 * its partition constraint.
+		 *
+		 * I think we could solve this problem if we effected the constraint
+		 * change before committing the first transaction.  But the lock would
+		 * have to remain AEL and it would cause concurrent query planning to
+		 * be blocked, so changing it that way would be even worse.
+		 */
+		if (concurrent)
+			ereport(ERROR,
+					(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+					 errmsg("cannot detach partitions concurrently when a default partition exists")));
+		LockRelationOid(defaultPartOid, AccessExclusiveLock);
+	}
+
+	/*
+	 * In concurrent mode, the partition is locked with share-update-exclusive
+	 * in the first transaction.  This allows concurrent transactions to be
+	 * doing DML to the partition.
+	 */
+	partRel = table_openrv(name, concurrent ? ShareUpdateExclusiveLock :
+						   AccessExclusiveLock);
+
+	/*
+	 * Check inheritance conditions and either delete the pg_inherits row
+	 * (in non-concurrent mode) or just set the inhdetachpending flag.
+	 */
+	if (!concurrent)
+		RemoveInheritance(partRel, rel, false);
+	else
+		MarkInheritDetached(partRel, rel);
+
+	/*
+	 * Ensure that foreign keys still hold after this detach.  This keeps
+	 * locks on the referencing tables, which prevents concurrent transactions
+	 * from adding rows that we wouldn't see.  For this to work in concurrent
+	 * mode, it is critical that the partition appears as no longer attached
+	 * for the RI queries as soon as the first transaction commits.
+	 */
+	ATDetachCheckNoForeignKeyRefs(partRel);
+
+	/*
+	 * Concurrent mode has to work harder; first we add a new constraint to
+	 * the partition that matches the partition constraint.  Then we close our
+	 * existing transaction, and in a new one wait for all processes to catch
+	 * up on the catalog updates we've done so far; at that point we can
+	 * complete the operation.
+	 */
+	if (concurrent)
+	{
+		Oid		partrelid,
+				parentrelid;
+		LOCKTAG		tag;
+		char   *parentrelname;
+		char   *partrelname;
+
+		/*
+		 * Add a new constraint to the partition being detached, which
+		 * supplants the partition constraint (unless there is one already).
+		 */
+		DetachAddConstraintIfNeeded(wqueue, partRel);
+
+		/*
+		 * We're almost done now; the only traces that remain are the
+		 * pg_inherits tuple and the partition's relpartbounds.  Before we can
+		 * remove those, we need to wait until all transactions that know that
+		 * this is a partition are gone.
+		 */
+
+		/*
+		 * Remember relation OIDs to re-acquire them later; and relation names
+		 * too, for error messages if something is dropped in between.
+		 */
+		partrelid = RelationGetRelid(partRel);
+		parentrelid = RelationGetRelid(rel);
+		parentrelname = MemoryContextStrdup(PortalContext,
+											RelationGetRelationName(rel));
+		partrelname = MemoryContextStrdup(PortalContext,
+										  RelationGetRelationName(partRel));
+
+		/* Invalidate relcache entries for the parent -- must be before close */
+		CacheInvalidateRelcache(rel);
+
+		table_close(partRel, NoLock);
+		table_close(rel, NoLock);
+		tab->rel = NULL;
+
+		/* Make updated catalog entry visible */
+		PopActiveSnapshot();
+		CommitTransactionCommand();
+
+		StartTransactionCommand();
+
+		/*
+		 * Now wait.  This ensures that all queries that were planned including
+		 * the partition are finished before we remove the rest of catalog
+		 * entries.  We don't need or indeed want to acquire this lock, though
+		 * -- that would block later queries.
+		 *
+		 * We don't need to concern ourselves with waiting for a lock on the
+		 * partition itself, since we will acquire AccessExclusiveLock below.
+		 */
+		SET_LOCKTAG_RELATION(tag, MyDatabaseId, parentrelid);
+		WaitForLockersMultiple(list_make1(&tag), AccessExclusiveLock, false);
+
+		/*
+		 * Now acquire locks in both relations again.  Note they may have been
+		 * removed in the meantime, so care is required.
+		 */
+		rel = try_relation_open(parentrelid, ShareUpdateExclusiveLock);
+		partRel = try_relation_open(partrelid, AccessExclusiveLock);
+
+		/* If the relations aren't there, something bad happened; bail out */
+		if (rel == NULL)
+		{
+			if (partRel != NULL)	/* shouldn't happen */
+				elog(WARNING, "dangling partition \"%s\" remains, can't fix",
+					 partrelname);
+			ereport(ERROR,
+					(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+					 errmsg("partitioned table \"%s\" was removed concurrently",
+							parentrelname)));
+		}
+		if (partRel == NULL)
+			ereport(ERROR,
+					(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+					 errmsg("partition \"%s\" was removed concurrently", partrelname)));
+
+		tab->rel = rel;
+	}
+
+	/* Do the final part of detaching */
+	DetachPartitionFinalize(rel, partRel, concurrent, defaultPartOid);
+
+	ObjectAddressSet(address, RelationRelationId, RelationGetRelid(partRel));
+
+	/* keep our lock until commit */
+	table_close(partRel, NoLock);
+
+	return address;
+}
+
+/*
+ * Second part of ALTER TABLE .. DETACH.
+ *
+ * This is separate so that it can be run independently when the second
+ * transaction of the concurrent algorithm fails (crash or abort).
+ */
+static void
+DetachPartitionFinalize(Relation rel, Relation partRel, bool concurrent,
+						Oid defaultPartOid)
+{
+	Relation	classRel;
+	List	   *fks;
+	ListCell   *cell;
+	List	   *indexes;
+	Datum		new_val[Natts_pg_class];
+	bool		new_null[Natts_pg_class],
+				new_repl[Natts_pg_class];
+	HeapTuple	tuple,
+				newtuple;
+
+	if (concurrent)
+	{
+		/*
+		 * We can remove the pg_inherits row now. (In the non-concurrent case,
+		 * this was already done).
+		 */
+		RemoveInheritance(partRel, rel, true);
+	}
+
+	/* Drop any triggers that were cloned on creation/attach. */
+	DropClonedTriggersFromPartition(RelationGetRelid(partRel));
+
+	/*
+	 * Detach any foreign keys that are inherited.  This includes creating
+	 * additional action triggers.
+	 */
+	fks = copyObject(RelationGetFKeyList(partRel));
+	foreach(cell, fks)
+	{
+		ForeignKeyCacheInfo *fk = lfirst(cell);
+		HeapTuple	contup;
+		Form_pg_constraint conform;
+		Constraint *fkconstraint;
+
+		contup = SearchSysCache1(CONSTROID, ObjectIdGetDatum(fk->conoid));
+		if (!HeapTupleIsValid(contup))
+			elog(ERROR, "cache lookup failed for constraint %u", fk->conoid);
+		conform = (Form_pg_constraint) GETSTRUCT(contup);
+
+		/* consider only the inherited foreign keys */
+		if (conform->contype != CONSTRAINT_FOREIGN ||
+			!OidIsValid(conform->conparentid))
+		{
+			ReleaseSysCache(contup);
+			continue;
+		}
+
+		/* unset conparentid and adjust conislocal, coninhcount, etc. */
+		ConstraintSetParentConstraint(fk->conoid, InvalidOid, InvalidOid);
+
+		/*
+		 * Make the action triggers on the referenced relation.  When this was
+		 * a partition the action triggers pointed to the parent rel (they
+		 * still do), but now we need separate ones of our own.
+		 */
+		fkconstraint = makeNode(Constraint);
+		fkconstraint->conname = pstrdup(NameStr(conform->conname));
+		fkconstraint->fk_upd_action = conform->confupdtype;
+		fkconstraint->fk_del_action = conform->confdeltype;
+		fkconstraint->deferrable = conform->condeferrable;
+		fkconstraint->initdeferred = conform->condeferred;
+
+		createForeignKeyActionTriggers(partRel, conform->confrelid,
+									   fkconstraint, fk->conoid,
+									   conform->conindid);
+
+		ReleaseSysCache(contup);
+	}
+	list_free_deep(fks);
+
+	/*
+	 * Any sub-constraints that are in the referenced-side of a larger
+	 * constraint have to be removed.  This partition is no longer part of the
+	 * key space of the constraint.
+	 */
+	foreach(cell, GetParentedForeignKeyRefs(partRel))
+	{
+		Oid			constrOid = lfirst_oid(cell);
+		ObjectAddress constraint;
+
+		ConstraintSetParentConstraint(constrOid, InvalidOid, InvalidOid);
+		deleteDependencyRecordsForClass(ConstraintRelationId,
+										constrOid,
+										ConstraintRelationId,
+										DEPENDENCY_INTERNAL);
+		CommandCounterIncrement();
+
+		ObjectAddressSet(constraint, ConstraintRelationId, constrOid);
+		performDeletion(&constraint, DROP_RESTRICT, 0);
+	}
+
+	/* Now we can detach indexes */
+	indexes = RelationGetIndexList(partRel);
+	foreach(cell, indexes)
+	{
+		Oid			idxid = lfirst_oid(cell);
+		Relation	idx;
+		Oid			constrOid;
+
+		if (!has_superclass(idxid))
+			continue;
+
+		Assert((IndexGetRelation(get_partition_parent(idxid, false), false) ==
+				RelationGetRelid(rel)));
+
+		idx = index_open(idxid, AccessExclusiveLock);
+		IndexSetParentIndex(idx, InvalidOid);
+
+		/* If there's a constraint associated with the index, detach it too */
+		constrOid = get_relation_idx_constraint_oid(RelationGetRelid(partRel),
+													idxid);
+		if (OidIsValid(constrOid))
+			ConstraintSetParentConstraint(constrOid, InvalidOid, InvalidOid);
+
+		index_close(idx, NoLock);
+	}
+
+	/* Update pg_class tuple */
+	classRel = table_open(RelationRelationId, RowExclusiveLock);
+	tuple = SearchSysCacheCopy1(RELOID,
+								ObjectIdGetDatum(RelationGetRelid(partRel)));
+	if (!HeapTupleIsValid(tuple))
+		elog(ERROR, "cache lookup failed for relation %u",
+			 RelationGetRelid(partRel));
+	Assert(((Form_pg_class) GETSTRUCT(tuple))->relispartition);
+
+	/* Clear relpartbound and reset relispartition */
+	memset(new_val, 0, sizeof(new_val));
+	memset(new_null, false, sizeof(new_null));
+	memset(new_repl, false, sizeof(new_repl));
+	new_val[Anum_pg_class_relpartbound - 1] = (Datum) 0;
+	new_null[Anum_pg_class_relpartbound - 1] = true;
+	new_repl[Anum_pg_class_relpartbound - 1] = true;
+	newtuple = heap_modify_tuple(tuple, RelationGetDescr(classRel),
+								 new_val, new_null, new_repl);
+
+	((Form_pg_class) GETSTRUCT(newtuple))->relispartition = false;
+	CatalogTupleUpdate(classRel, &newtuple->t_self, newtuple);
+	heap_freetuple(newtuple);
+	table_close(classRel, RowExclusiveLock);
+
+	if (OidIsValid(defaultPartOid))
+	{
+		/*
+		 * If the relation being detached is the default partition itself,
+		 * remove it from the parent's pg_partitioned_table entry.
+		 *
+		 * If not, we must invalidate default partition's relcache entry, as
+		 * in StorePartitionBound: its partition constraint depends on every
+		 * other partition's partition constraint.
+		 */
+		if (RelationGetRelid(partRel) == defaultPartOid)
+			update_default_partition_oid(RelationGetRelid(rel), InvalidOid);
+		else
+			CacheInvalidateRelcacheByRelid(defaultPartOid);
+	}
+
+	/*
+	 * Invalidate the parent's relcache so that the partition is no longer
+	 * included in its partition descriptor.
+	 */
+	CacheInvalidateRelcache(rel);
+}
+
+/*
+ * ALTER TABLE ... DETACH PARTITION ... FINALIZE
+ *
+ * To use when a DETACH PARTITION command previously did not run to
+ * completion; this completes the detaching process.
+ */
+static ObjectAddress
+ATExecDetachPartitionFinalize(Relation rel, RangeVar *name)
+{
+	Relation    partRel;
+	ObjectAddress address;
+	Snapshot	snap = GetActiveSnapshot();
+
+	partRel = table_openrv(name, AccessExclusiveLock);
+
+	/*
+	 * Wait until existing snapshots are gone.  This is important if the
+	 * second transaction of DETACH PARTITION CONCURRENTLY is canceled: the
+	 * user could immediately run DETACH FINALIZE without actually waiting for
+	 * existing transactions.  We must not complete the detach action until
+	 * all such queries are complete (otherwise we would present them with an
+	 * inconsistent view of catalogs).
+	 */
+	WaitForOlderSnapshots(snap->xmin, false);
+
+	DetachPartitionFinalize(rel, partRel, true, InvalidOid);
+
+	ObjectAddressSet(address, RelationRelationId, RelationGetRelid(partRel));
+
+	table_close(partRel, NoLock);
+
+	return address;
+}
+
+/*
+ * DetachAddConstraintIfNeeded
+ *		Subroutine for ATExecDetachPartition.  Create a constraint that
+ *		takes the place of the partition constraint, but avoid creating
+ *		a dupe if an equivalent constraint already exists.
+ */
+static void
+DetachAddConstraintIfNeeded(List **wqueue, Relation partRel)
+{
+	AlteredTableInfo *tab;
+	Expr	   *constraintExpr;
+	TupleDesc	td = RelationGetDescr(partRel);
+	Constraint *n;
+
+	constraintExpr = make_ands_explicit(RelationGetPartitionQual(partRel));
+
+	/* If an identical constraint exists, we don't need to create one */
+	if (td->constr && td->constr->num_check > 0)
+	{
+		for (int i = 0; i < td->constr->num_check; i++)
+		{
+			Node	*thisconstr;
+
+			thisconstr = stringToNode(td->constr->check[i].ccbin);
+
+			if (equal(constraintExpr, thisconstr))
+				return;
+		}
+	}
+
+	tab = ATGetQueueEntry(wqueue, partRel);
+
+	/* Add constraint on partition, equivalent to the partition constraint */
+	n = makeNode(Constraint);
+	n->contype = CONSTR_CHECK;
+	n->conname = NULL;
+	n->location = -1;
+	n->is_no_inherit = false;
+	n->raw_expr = NULL;
+	n->cooked_expr = nodeToString(constraintExpr);
+	n->initially_valid = true;
+	n->skip_validation = true;
+	/* It's a re-add, since it nominally already exists */
+	ATAddCheckConstraint(wqueue, tab, partRel, n,
+						 true, false, true, ShareUpdateExclusiveLock);
+}
+
+/*
+ * DropClonedTriggersFromPartition
+ *		subroutine for ATExecDetachPartition to remove any triggers that were
+ *		cloned to the partition when it was created-as-partition or attached.
+ *		This undoes what CloneRowTriggersToPartition did.
+ */
+static void
+DropClonedTriggersFromPartition(Oid partitionId)
+{
+	ScanKeyData skey;
+	SysScanDesc scan;
+	HeapTuple	trigtup;
+	Relation	tgrel;
+	ObjectAddresses *objects;
+
+	objects = new_object_addresses();
+
+	/*
+	 * Scan pg_trigger to search for all triggers on this rel.
+	 */
+	ScanKeyInit(&skey, Anum_pg_trigger_tgrelid, BTEqualStrategyNumber,
+				F_OIDEQ, ObjectIdGetDatum(partitionId));
+	tgrel = table_open(TriggerRelationId, RowExclusiveLock);
+	scan = systable_beginscan(tgrel, TriggerRelidNameIndexId,
+							  true, NULL, 1, &skey);
+	while (HeapTupleIsValid(trigtup = systable_getnext(scan)))
+	{
+		Form_pg_trigger pg_trigger = (Form_pg_trigger) GETSTRUCT(trigtup);
+		ObjectAddress trig;
+
+		/* Ignore triggers that weren't cloned */
+		if (!OidIsValid(pg_trigger->tgparentid))
+			continue;
+
+		/*
+		 * This is ugly, but necessary: remove the dependency markings on the
+		 * trigger so that it can be removed.
+		 */
+		deleteDependencyRecordsForClass(TriggerRelationId, pg_trigger->oid,
+										TriggerRelationId,
+										DEPENDENCY_PARTITION_PRI);
+		deleteDependencyRecordsForClass(TriggerRelationId, pg_trigger->oid,
+										RelationRelationId,
+										DEPENDENCY_PARTITION_SEC);
+
+		/* remember this trigger to remove it below */
+		ObjectAddressSet(trig, TriggerRelationId, pg_trigger->oid);
+		add_exact_object_address(&trig, objects);
+	}
+
+	/* make the dependency removal visible to the deletion below */
+	CommandCounterIncrement();
+	performMultipleDeletions(objects, DROP_RESTRICT, PERFORM_DELETION_INTERNAL);
+
+	/* done */
+	free_object_addresses(objects);
+	systable_endscan(scan);
+	table_close(tgrel, RowExclusiveLock);
+}
+
+/*
+ * Before acquiring lock on an index, acquire the same lock on the owning
+ * table.
+ */
+struct AttachIndexCallbackState
+{
+	Oid			partitionOid;
+	Oid			parentTblOid;
+	bool		lockedParentTbl;
+};
+
+static void
+RangeVarCallbackForAttachIndex(const RangeVar *rv, Oid relOid, Oid oldRelOid,
+							   void *arg)
+{
+	struct AttachIndexCallbackState *state;
+	Form_pg_class classform;
+	HeapTuple	tuple;
+
+	state = (struct AttachIndexCallbackState *) arg;
+
+	if (!state->lockedParentTbl)
+	{
+		LockRelationOid(state->parentTblOid, AccessShareLock);
+		state->lockedParentTbl = true;
+	}
+
+	/*
+	 * If we previously locked some other heap, and the name we're looking up
+	 * no longer refers to an index on that relation, release the now-useless
+	 * lock.  XXX maybe we should do *after* we verify whether the index does
+	 * not actually belong to the same relation ...
+	 */
+	if (relOid != oldRelOid && OidIsValid(state->partitionOid))
+	{
+		UnlockRelationOid(state->partitionOid, AccessShareLock);
+		state->partitionOid = InvalidOid;
+	}
+
+	/* Didn't find a relation, so no need for locking or permission checks. */
+	if (!OidIsValid(relOid))
+		return;
+
+	tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relOid));
+	if (!HeapTupleIsValid(tuple))
+		return;					/* concurrently dropped, so nothing to do */
+	classform = (Form_pg_class) GETSTRUCT(tuple);
+	if (classform->relkind != RELKIND_PARTITIONED_INDEX &&
+		classform->relkind != RELKIND_INDEX)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+				 errmsg("\"%s\" is not an index", rv->relname)));
+	ReleaseSysCache(tuple);
+
+	/*
+	 * Since we need only examine the heap's tupledesc, an access share lock
+	 * on it (preventing any DDL) is sufficient.
+	 */
+	state->partitionOid = IndexGetRelation(relOid, false);
+	LockRelationOid(state->partitionOid, AccessShareLock);
+}
+
+/*
+ * ALTER INDEX i1 ATTACH PARTITION i2
+ */
+static ObjectAddress
+ATExecAttachPartitionIdx(List **wqueue, Relation parentIdx, RangeVar *name)
+{
+	Relation	partIdx;
+	Relation	partTbl;
+	Relation	parentTbl;
+	ObjectAddress address;
+	Oid			partIdxId;
+	Oid			currParent;
+	struct AttachIndexCallbackState state;
+
+	/*
+	 * We need to obtain lock on the index 'name' to modify it, but we also
+	 * need to read its owning table's tuple descriptor -- so we need to lock
+	 * both.  To avoid deadlocks, obtain lock on the table before doing so on
+	 * the index.  Furthermore, we need to examine the parent table of the
+	 * partition, so lock that one too.
+	 */
+	state.partitionOid = InvalidOid;
+	state.parentTblOid = parentIdx->rd_index->indrelid;
+	state.lockedParentTbl = false;
+	partIdxId =
+		RangeVarGetRelidExtended(name, AccessExclusiveLock, 0,
+								 RangeVarCallbackForAttachIndex,
+								 (void *) &state);
+	/* Not there? */
+	if (!OidIsValid(partIdxId))
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_OBJECT),
+				 errmsg("index \"%s\" does not exist", name->relname)));
+
+	/* no deadlock risk: RangeVarGetRelidExtended already acquired the lock */
+	partIdx = relation_open(partIdxId, AccessExclusiveLock);
+
+	/* we already hold locks on both tables, so this is safe: */
+	parentTbl = relation_open(parentIdx->rd_index->indrelid, AccessShareLock);
+	partTbl = relation_open(partIdx->rd_index->indrelid, NoLock);
+
+	ObjectAddressSet(address, RelationRelationId, RelationGetRelid(partIdx));
+
+	/* Silently do nothing if already in the right state */
+	currParent = partIdx->rd_rel->relispartition ?
+		get_partition_parent(partIdxId, false) : InvalidOid;
+	if (currParent != RelationGetRelid(parentIdx))
+	{
+		IndexInfo  *childInfo;
+		IndexInfo  *parentInfo;
+		AttrMap    *attmap;
+		bool		found;
+		int			i;
+		PartitionDesc partDesc;
+		Oid			constraintOid,
+					cldConstrId = InvalidOid;
+
+		/*
+		 * If this partition already has an index attached, refuse the
+		 * operation.
+		 */
+		refuseDupeIndexAttach(parentIdx, partIdx, partTbl);
+
+		if (OidIsValid(currParent))
+			ereport(ERROR,
+					(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+					 errmsg("cannot attach index \"%s\" as a partition of index \"%s\"",
+							RelationGetRelationName(partIdx),
+							RelationGetRelationName(parentIdx)),
+					 errdetail("Index \"%s\" is already attached to another index.",
+							   RelationGetRelationName(partIdx))));
+
+		/* Make sure it indexes a partition of the other index's table */
+		partDesc = RelationGetPartitionDesc(parentTbl, false);
+		found = false;
+		for (i = 0; i < partDesc->nparts; i++)
+		{
+			if (partDesc->oids[i] == state.partitionOid)
+			{
+				found = true;
+				break;
+			}
+		}
+		if (!found)
+			ereport(ERROR,
+					(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+					 errmsg("cannot attach index \"%s\" as a partition of index \"%s\"",
+							RelationGetRelationName(partIdx),
+							RelationGetRelationName(parentIdx)),
+					 errdetail("Index \"%s\" is not an index on any partition of table \"%s\".",
+							   RelationGetRelationName(partIdx),
+							   RelationGetRelationName(parentTbl))));
+
+		/* Ensure the indexes are compatible */
+		childInfo = BuildIndexInfo(partIdx);
+		parentInfo = BuildIndexInfo(parentIdx);
+		attmap = build_attrmap_by_name(RelationGetDescr(partTbl),
+									   RelationGetDescr(parentTbl));
+		if (!CompareIndexInfo(childInfo, parentInfo,
+							  partIdx->rd_indcollation,
+							  parentIdx->rd_indcollation,
+							  partIdx->rd_opfamily,
+							  parentIdx->rd_opfamily,
+							  attmap))
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+					 errmsg("cannot attach index \"%s\" as a partition of index \"%s\"",
+							RelationGetRelationName(partIdx),
+							RelationGetRelationName(parentIdx)),
+					 errdetail("The index definitions do not match.")));
+
+		/*
+		 * If there is a constraint in the parent, make sure there is one in
+		 * the child too.
+		 */
+		constraintOid = get_relation_idx_constraint_oid(RelationGetRelid(parentTbl),
+														RelationGetRelid(parentIdx));
+
+		if (OidIsValid(constraintOid))
+		{
+			cldConstrId = get_relation_idx_constraint_oid(RelationGetRelid(partTbl),
+														  partIdxId);
+			if (!OidIsValid(cldConstrId))
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+						 errmsg("cannot attach index \"%s\" as a partition of index \"%s\"",
+								RelationGetRelationName(partIdx),
+								RelationGetRelationName(parentIdx)),
+						 errdetail("The index \"%s\" belongs to a constraint in table \"%s\" but no constraint exists for index \"%s\".",
+								   RelationGetRelationName(parentIdx),
+								   RelationGetRelationName(parentTbl),
+								   RelationGetRelationName(partIdx))));
+		}
+
+		/* All good -- do it */
+		IndexSetParentIndex(partIdx, RelationGetRelid(parentIdx));
+		if (OidIsValid(constraintOid))
+			ConstraintSetParentConstraint(cldConstrId, constraintOid,
+										  RelationGetRelid(partTbl));
+
+		free_attrmap(attmap);
+
+		validatePartitionedIndex(parentIdx, parentTbl);
+	}
+
+	relation_close(parentTbl, AccessShareLock);
+	/* keep these locks till commit */
+	relation_close(partTbl, NoLock);
+	relation_close(partIdx, NoLock);
+
+	return address;
+}
+
+/*
+ * Verify whether the given partition already contains an index attached
+ * to the given partitioned index.  If so, raise an error.
+ */
+static void
+refuseDupeIndexAttach(Relation parentIdx, Relation partIdx, Relation partitionTbl)
+{
+	Oid			existingIdx;
+
+	existingIdx = index_get_partition(partitionTbl,
+									  RelationGetRelid(parentIdx));
+	if (OidIsValid(existingIdx))
+		ereport(ERROR,
+				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+				 errmsg("cannot attach index \"%s\" as a partition of index \"%s\"",
+						RelationGetRelationName(partIdx),
+						RelationGetRelationName(parentIdx)),
+				 errdetail("Another index is already attached for partition \"%s\".",
+						   RelationGetRelationName(partitionTbl))));
+}
+
+/*
+ * Verify whether the set of attached partition indexes to a parent index on
+ * a partitioned table is complete.  If it is, mark the parent index valid.
+ *
+ * This should be called each time a partition index is attached.
+ */
+static void
+validatePartitionedIndex(Relation partedIdx, Relation partedTbl)
+{
+	Relation	inheritsRel;
+	SysScanDesc scan;
+	ScanKeyData key;
+	int			tuples = 0;
+	HeapTuple	inhTup;
+	bool		updated = false;
+
+	Assert(partedIdx->rd_rel->relkind == RELKIND_PARTITIONED_INDEX);
+
+	/*
+	 * Scan pg_inherits for this parent index.  Count each valid index we find
+	 * (verifying the pg_index entry for each), and if we reach the total
+	 * amount we expect, we can mark this parent index as valid.
+	 */
+	inheritsRel = table_open(InheritsRelationId, AccessShareLock);
+	ScanKeyInit(&key, Anum_pg_inherits_inhparent,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(RelationGetRelid(partedIdx)));
+	scan = systable_beginscan(inheritsRel, InheritsParentIndexId, true,
+							  NULL, 1, &key);
+	while ((inhTup = systable_getnext(scan)) != NULL)
+	{
+		Form_pg_inherits inhForm = (Form_pg_inherits) GETSTRUCT(inhTup);
+		HeapTuple	indTup;
+		Form_pg_index indexForm;
+
+		indTup = SearchSysCache1(INDEXRELID,
+								 ObjectIdGetDatum(inhForm->inhrelid));
+		if (!HeapTupleIsValid(indTup))
+			elog(ERROR, "cache lookup failed for index %u", inhForm->inhrelid);
+		indexForm = (Form_pg_index) GETSTRUCT(indTup);
+		if (indexForm->indisvalid)
+			tuples += 1;
+		ReleaseSysCache(indTup);
+	}
+
+	/* Done with pg_inherits */
+	systable_endscan(scan);
+	table_close(inheritsRel, AccessShareLock);
+
+	/*
+	 * If we found as many inherited indexes as the partitioned table has
+	 * partitions, we're good; update pg_index to set indisvalid.
+	 */
+	if (tuples == RelationGetPartitionDesc(partedTbl, false)->nparts)
+	{
+		Relation	idxRel;
+		HeapTuple	newtup;
+
+		idxRel = table_open(IndexRelationId, RowExclusiveLock);
+
+		newtup = heap_copytuple(partedIdx->rd_indextuple);
+		((Form_pg_index) GETSTRUCT(newtup))->indisvalid = true;
+		updated = true;
+
+		CatalogTupleUpdate(idxRel, &partedIdx->rd_indextuple->t_self, newtup);
+
+		table_close(idxRel, RowExclusiveLock);
+	}
+
+	/*
+	 * If this index is in turn a partition of a larger index, validating it
+	 * might cause the parent to become valid also.  Try that.
+	 */
+	if (updated && partedIdx->rd_rel->relispartition)
+	{
+		Oid			parentIdxId,
+					parentTblId;
+		Relation	parentIdx,
+					parentTbl;
+
+		/* make sure we see the validation we just did */
+		CommandCounterIncrement();
+
+		parentIdxId = get_partition_parent(RelationGetRelid(partedIdx), false);
+		parentTblId = get_partition_parent(RelationGetRelid(partedTbl), false);
+		parentIdx = relation_open(parentIdxId, AccessExclusiveLock);
+		parentTbl = relation_open(parentTblId, AccessExclusiveLock);
+		Assert(!parentIdx->rd_index->indisvalid);
+
+		validatePartitionedIndex(parentIdx, parentTbl);
+
+		relation_close(parentIdx, AccessExclusiveLock);
+		relation_close(parentTbl, AccessExclusiveLock);
+	}
+}
+
+/*
+ * Return an OID list of constraints that reference the given relation
+ * that are marked as having a parent constraints.
+ */
+static List *
+GetParentedForeignKeyRefs(Relation partition)
+{
+	Relation	pg_constraint;
+	HeapTuple	tuple;
+	SysScanDesc scan;
+	ScanKeyData key[2];
+	List	   *constraints = NIL;
+
+	/*
+	 * If no indexes, or no columns are referenceable by FKs, we can avoid the
+	 * scan.
+	 */
+	if (RelationGetIndexList(partition) == NIL ||
+		bms_is_empty(RelationGetIndexAttrBitmap(partition,
+												INDEX_ATTR_BITMAP_KEY)))
+		return NIL;
+
+	/* Search for constraints referencing this table */
+	pg_constraint = table_open(ConstraintRelationId, AccessShareLock);
+	ScanKeyInit(&key[0],
+				Anum_pg_constraint_confrelid, BTEqualStrategyNumber,
+				F_OIDEQ, ObjectIdGetDatum(RelationGetRelid(partition)));
+	ScanKeyInit(&key[1],
+				Anum_pg_constraint_contype, BTEqualStrategyNumber,
+				F_CHAREQ, CharGetDatum(CONSTRAINT_FOREIGN));
+
+	/* XXX This is a seqscan, as we don't have a usable index */
+	scan = systable_beginscan(pg_constraint, InvalidOid, true, NULL, 2, key);
+	while ((tuple = systable_getnext(scan)) != NULL)
+	{
+		Form_pg_constraint constrForm = (Form_pg_constraint) GETSTRUCT(tuple);
+
+		/*
+		 * We only need to process constraints that are part of larger ones.
+		 */
+		if (!OidIsValid(constrForm->conparentid))
+			continue;
+
+		constraints = lappend_oid(constraints, constrForm->oid);
+	}
+
+	systable_endscan(scan);
+	table_close(pg_constraint, AccessShareLock);
+
+	return constraints;
+}
+
+/*
+ * During DETACH PARTITION, verify that any foreign keys pointing to the
+ * partitioned table would not become invalid.  An error is raised if any
+ * referenced values exist.
+ */
+static void
+ATDetachCheckNoForeignKeyRefs(Relation partition)
+{
+	List	   *constraints;
+	ListCell   *cell;
+
+	constraints = GetParentedForeignKeyRefs(partition);
+
+	foreach(cell, constraints)
+	{
+		Oid			constrOid = lfirst_oid(cell);
+		HeapTuple	tuple;
+		Form_pg_constraint constrForm;
+		Relation	rel;
+		Trigger		trig;
+
+		tuple = SearchSysCache1(CONSTROID, ObjectIdGetDatum(constrOid));
+		if (!HeapTupleIsValid(tuple))
+			elog(ERROR, "cache lookup failed for constraint %u", constrOid);
+		constrForm = (Form_pg_constraint) GETSTRUCT(tuple);
+
+		Assert(OidIsValid(constrForm->conparentid));
+		Assert(constrForm->confrelid == RelationGetRelid(partition));
+
+		/* prevent data changes into the referencing table until commit */
+		rel = table_open(constrForm->conrelid, ShareLock);
+
+		MemSet(&trig, 0, sizeof(trig));
+		trig.tgoid = InvalidOid;
+		trig.tgname = NameStr(constrForm->conname);
+		trig.tgenabled = TRIGGER_FIRES_ON_ORIGIN;
+		trig.tgisinternal = true;
+		trig.tgconstrrelid = RelationGetRelid(partition);
+		trig.tgconstrindid = constrForm->conindid;
+		trig.tgconstraint = constrForm->oid;
+		trig.tgdeferrable = false;
+		trig.tginitdeferred = false;
+		/* we needn't fill in remaining fields */
+
+		RI_PartitionRemove_Check(&trig, rel, partition);
+
+		ReleaseSysCache(tuple);
+
+		table_close(rel, NoLock);
+	}
+}
+
+/*
+ * ALTER INDEX ... ALTER COLLATION ... REFRESH VERSION
+ *
+ * Update refobjversion to the current collation version by force.  This clears
+ * warnings about version mismatches without the need to run REINDEX,
+ * potentially hiding corruption due to ordering changes.
+ */
+static void
+ATExecAlterCollationRefreshVersion(Relation rel, List *coll)
+{
+	index_update_collation_versions(rel->rd_id, get_collation_oid(coll, false));
+	CacheInvalidateRelcache(rel);
+}
+
+/*
+ * resolve column compression specification to compression method.
+ */
+static char
+GetAttributeCompression(Form_pg_attribute att, char *compression)
+{
+	char		typstorage = get_typstorage(att->atttypid);
+	char		cmethod;
+
+	/*
+	 * No compression for plain/external storage, refer comments atop
+	 * attcompression parameter in pg_attribute.h
+	 */
+	if (!IsStorageCompressible(typstorage))
+	{
+		if (compression == NULL)
+			return InvalidCompressionMethod;
+
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("column data type %s does not support compression",
+						format_type_be(att->atttypid))));
+	}
+
+	/* fallback to default compression if it's not specified */
+	if (compression == NULL)
+		return GetDefaultToastCompression();
+
+	cmethod = CompressionNameToMethod(compression);
+	if (!CompressionMethodIsValid(cmethod))
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("invalid compression method \"%s\"", compression)));
+
+	return cmethod;
+}
diff --git a/src/backend/replication/logical/worker.c b/src/backend/replication/logical/worker.c
index 354fbe4b4b..e62bf7e853 100644
--- a/src/backend/replication/logical/worker.c
+++ b/src/backend/replication/logical/worker.c
@@ -1795,6 +1795,7 @@ apply_handle_truncate(StringInfo s)
 	List	   *rels = NIL;
 	List	   *part_rels = NIL;
 	List	   *relids = NIL;
+	List	   *relids_extra = NIL;
 	List	   *relids_logged = NIL;
 	ListCell   *lc;
 
@@ -1824,6 +1825,7 @@ apply_handle_truncate(StringInfo s)
 		remote_rels = lappend(remote_rels, rel);
 		rels = lappend(rels, rel->localrel);
 		relids = lappend_oid(relids, rel->localreloid);
+		relids_extra = lappend_int(relids_extra, 0);
 		if (RelationIsLogicallyLogged(rel->localrel))
 			relids_logged = lappend_oid(relids_logged, rel->localreloid);
 
@@ -1862,6 +1864,7 @@ apply_handle_truncate(StringInfo s)
 				rels = lappend(rels, childrel);
 				part_rels = lappend(part_rels, childrel);
 				relids = lappend_oid(relids, childrelid);
+				relids_extra = lappend_int(relids_extra,-1);
 				/* Log this relation only if needed for logical decoding */
 				if (RelationIsLogicallyLogged(childrel))
 					relids_logged = lappend_oid(relids_logged, childrelid);
@@ -1874,8 +1877,12 @@ apply_handle_truncate(StringInfo s)
 	 * to replaying changes without further cascading. This might be later
 	 * changeable with a user specified option.
 	 */
-	ExecuteTruncateGuts(rels, relids, relids_logged, DROP_RESTRICT, restart_seqs);
-
+	ExecuteTruncateGuts(rels,
+						relids,
+						relids_extra,
+						relids_logged,
+						DROP_RESTRICT,
+						restart_seqs);
 	foreach(lc, remote_rels)
 	{
 		LogicalRepRelMapEntry *rel = lfirst(lc);
diff --git a/src/include/commands/tablecmds.h b/src/include/commands/tablecmds.h
index b3d30acc35..2e30a09729 100644
--- a/src/include/commands/tablecmds.h
+++ b/src/include/commands/tablecmds.h
@@ -56,8 +56,12 @@ extern void AlterRelationNamespaceInternal(Relation classRel, Oid relOid,
 extern void CheckTableNotInUse(Relation rel, const char *stmt);
 
 extern void ExecuteTruncate(TruncateStmt *stmt);
-extern void ExecuteTruncateGuts(List *explicit_rels, List *relids, List *relids_logged,
-								DropBehavior behavior, bool restart_seqs);
+extern void ExecuteTruncateGuts(List *explicit_rels,
+								List *relids,
+								List *relids_extra,
+								List *relids_logged,
+								DropBehavior behavior,
+								bool restart_seqs);
 
 extern void SetRelationHasSubclass(Oid relationId, bool relhassubclass);
 
diff --git a/src/include/foreign/fdwapi.h b/src/include/foreign/fdwapi.h
index 248f78da45..7488ce1ea6 100644
--- a/src/include/foreign/fdwapi.h
+++ b/src/include/foreign/fdwapi.h
@@ -159,6 +159,11 @@ typedef bool (*AnalyzeForeignTable_function) (Relation relation,
 typedef List *(*ImportForeignSchema_function) (ImportForeignSchemaStmt *stmt,
 											   Oid serverOid);
 
+typedef void (*ExecForeignTruncate_function) (List *frels_list,
+											  List *frels_extra,
+											  DropBehavior behavior,
+											  bool restart_seqs);
+
 typedef Size (*EstimateDSMForeignScan_function) (ForeignScanState *node,
 												 ParallelContext *pcxt);
 typedef void (*InitializeDSMForeignScan_function) (ForeignScanState *node,
@@ -246,6 +251,9 @@ typedef struct FdwRoutine
 	/* Support functions for IMPORT FOREIGN SCHEMA */
 	ImportForeignSchema_function ImportForeignSchema;
 
+	/* Support functions for TRUNCATE */
+	ExecForeignTruncate_function ExecForeignTruncate;
+
 	/* Support functions for parallelism under Gather node */
 	IsForeignScanParallelSafe_function IsForeignScanParallelSafe;
 	EstimateDSMForeignScan_function EstimateDSMForeignScan;
diff --git a/src/test/regress/expected/foreign_data.out b/src/test/regress/expected/foreign_data.out
index e4cdb780d0..5385f98a0f 100644
--- a/src/test/regress/expected/foreign_data.out
+++ b/src/test/regress/expected/foreign_data.out
@@ -1807,9 +1807,9 @@ Inherits: fd_pt1
 
 -- TRUNCATE doesn't work on foreign tables, either directly or recursively
 TRUNCATE ft2;  -- ERROR
-ERROR:  "ft2" is not a table
+ERROR:  foreign-data wrapper "dummy" has no handler
 TRUNCATE fd_pt1;  -- ERROR
-ERROR:  "ft2" is not a table
+ERROR:  foreign-data wrapper "dummy" has no handler
 DROP TABLE fd_pt1 CASCADE;
 NOTICE:  drop cascades to foreign table ft2
 -- IMPORT FOREIGN SCHEMA
@@ -2032,9 +2032,9 @@ ALTER FOREIGN TABLE fd_pt2_1 ADD CONSTRAINT fd_pt2chk1 CHECK (c1 > 0);
 ALTER TABLE fd_pt2 ATTACH PARTITION fd_pt2_1 FOR VALUES IN (1);
 -- TRUNCATE doesn't work on foreign tables, either directly or recursively
 TRUNCATE fd_pt2_1;  -- ERROR
-ERROR:  "fd_pt2_1" is not a table
+ERROR:  foreign-data wrapper "dummy" has no handler
 TRUNCATE fd_pt2;  -- ERROR
-ERROR:  "fd_pt2_1" is not a table
+ERROR:  foreign-data wrapper "dummy" has no handler
 DROP FOREIGN TABLE fd_pt2_1;
 DROP TABLE fd_pt2;
 -- foreign table cannot be part of partition tree made of temporary
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index 9e6777e9d0..d04c980168 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -704,6 +704,7 @@ ForeignScanState
 ForeignServer
 ForeignServerInfo
 ForeignTable
+ForeignTruncateInfo
 ForkNumber
 FormData_pg_aggregate
 FormData_pg_am
#18Kohei KaiGai
kaigai@heterodb.com
In reply to: Kazutaka Onishi (#17)
Re: TRUNCATE on foreign table

Onishi-san,

The v5 patch contains full-contents of "src/backend/commands/tablecmds.c.orig".
Please check it.

2021年3月28日(日) 2:37 Kazutaka Onishi <onishi@heterodb.com>:

Fujii-san,

Thank you for your review!
Now I prepare v5 patch and I'll answer to your each comment. please
check this again.
m(_ _)m

1. In postgres-fdw.sgml, "and truncatable" should be appended into the
above first description?
2. truncate.sgml should be updated because, for example, it contains
the above descriptions.

Yeah, you're right. I've fixed it.

3. Don't we need to document the detail information about frels_extra?

I've written about frels_extra into fdwhander.sgml.

4. postgres_fdw determines whether to specify ONLY or not by checking
whether the passed extra value is zero or not.

Please refer this:
/messages/by-id/CAOP8fzb-t3WVNLjGMC+4sV4AZa9S=MAQ7Q6pQoADMCf_1jp4ew@mail.gmail.com

Negative value means that foreign-tables are not specified in the TRUNCATE
command, but truncated due to dependency (like partition's child leaf).

I've added this information into fdwhandler.sgml.

5. For example, we can easily do that by truncate foreign tables
before local ones. Thought?

Umm... yeah, I feel it's better procedure, but not so required because
TRUNCATE is NOT called frequently.
Certainly, we already have postgresIsForeignUpdatable() to check
whether the foreign table is updatable or not.
Following this way, we have to add postgresIsForeignTruncatable() to check.
However, Unlike UPDATE, TRUNCATE is NOT called frequently. Current
procedure is inefficient but works correctly.
Thus, I feel postgresIsForeignTruncatable() is not needed.

6. XLOG_HEAP_TRUNCATE record is written even for the truncation of a
foreign table. Why is this necessary?

Please give us more time to investigate this.

2021年3月25日(木) 3:47 Fujii Masao <masao.fujii@oss.nttdata.com>:

On 2021/03/13 18:57, Kazutaka Onishi wrote:

I have fixed the patch to pass check-world test. :D

Thanks for updating the patch! Here are some review comments from me.

By default all foreign tables using <filename>postgres_fdw</filename> are assumed
to be updatable. This may be overridden using the following option:

In postgres-fdw.sgml, "and truncatable" should be appended into
the above first description? Also "option" in the second description
should be a plural form "options"?

<command>TRUNCATE</command> is not currently supported for foreign tables.
This implies that if a specified table has any descendant tables that are
foreign, the command will fail.

truncate.sgml should be updated because, for example, it contains
the above descriptions.

+     <literal>frels_extra</literal> is same length with
+     <literal>frels_list</literal>, that delivers extra information of
+     the context where the foreign-tables are truncated.
+    </para>

Don't we need to document the detail information about frels_extra?
Otherwise the developers of FDW would fail to understand how to
handle the frels_extra when trying to make their FDWs support TRUNCATE.

+               relids_extra = lappend_int(relids_extra, (recurse ? 0 : 1));
+                               relids_extra = lappend_int(relids_extra, -1);

postgres_fdw determines whether to specify ONLY or not by checking
whether the passed extra value is zero or not. That is, for example,
using only 0 and 1 for extra values is enough for the purpose. But
ExecuteTruncate() sets three values 0, -1 and 1 as extra ones. Why are
these three values necessary?

With the patch, if both local and foreign tables are specified as
the target tables to truncate, TRUNCATE command tries to truncate
foreign tables after truncating local ones. That is, if "truncatable"
option is set to false or enough permission to truncate is not granted
yet in the foreign server, an error will be thrown after the local tables
are truncated. I don't think this is good order of processings. IMO,
instead, we should check whether foreign tables can be truncated
before any actual truncation operations. For example, we can easily
do that by truncate foreign tables before local ones. Thought?

XLOG_HEAP_TRUNCATE record is written even for the truncation of
a foreign table. Why is this necessary?

Regards,

--
Fujii Masao
Advanced Computing Technology Center
Research and Development Headquarters
NTT DATA CORPORATION

--
HeteroDB, Inc / The PG-Strom Project
KaiGai Kohei <kaigai@heterodb.com>

#19Kohei KaiGai
kaigai@heterodb.com
In reply to: Fujii Masao (#16)
Re: TRUNCATE on foreign table

Fujii-san,

XLOG_HEAP_TRUNCATE record is written even for the truncation of
a foreign table. Why is this necessary?

Foreign-tables are often used to access local data structure, like
columnar data files
on filesystem, not only remote accesses like postgres_fdw.
In case when we want to implement logical replication on this kind of
foreign-tables,
truncate-command must be delivered to subscriber node - to truncate
its local data.

In case of remote-access FDW drivers, truncate-command on the subscriber-side is
probably waste of cycles, however, only FDW driver and DBA who configured the
foreign-table know whether it is necessary, or not.

How about your opinions?

Best regards,

2021年3月25日(木) 3:47 Fujii Masao <masao.fujii@oss.nttdata.com>:

On 2021/03/13 18:57, Kazutaka Onishi wrote:

I have fixed the patch to pass check-world test. :D

Thanks for updating the patch! Here are some review comments from me.

By default all foreign tables using <filename>postgres_fdw</filename> are assumed
to be updatable. This may be overridden using the following option:

In postgres-fdw.sgml, "and truncatable" should be appended into
the above first description? Also "option" in the second description
should be a plural form "options"?

<command>TRUNCATE</command> is not currently supported for foreign tables.
This implies that if a specified table has any descendant tables that are
foreign, the command will fail.

truncate.sgml should be updated because, for example, it contains
the above descriptions.

+     <literal>frels_extra</literal> is same length with
+     <literal>frels_list</literal>, that delivers extra information of
+     the context where the foreign-tables are truncated.
+    </para>

Don't we need to document the detail information about frels_extra?
Otherwise the developers of FDW would fail to understand how to
handle the frels_extra when trying to make their FDWs support TRUNCATE.

+               relids_extra = lappend_int(relids_extra, (recurse ? 0 : 1));
+                               relids_extra = lappend_int(relids_extra, -1);

postgres_fdw determines whether to specify ONLY or not by checking
whether the passed extra value is zero or not. That is, for example,
using only 0 and 1 for extra values is enough for the purpose. But
ExecuteTruncate() sets three values 0, -1 and 1 as extra ones. Why are
these three values necessary?

With the patch, if both local and foreign tables are specified as
the target tables to truncate, TRUNCATE command tries to truncate
foreign tables after truncating local ones. That is, if "truncatable"
option is set to false or enough permission to truncate is not granted
yet in the foreign server, an error will be thrown after the local tables
are truncated. I don't think this is good order of processings. IMO,
instead, we should check whether foreign tables can be truncated
before any actual truncation operations. For example, we can easily
do that by truncate foreign tables before local ones. Thought?

XLOG_HEAP_TRUNCATE record is written even for the truncation of
a foreign table. Why is this necessary?

Regards,

--
Fujii Masao
Advanced Computing Technology Center
Research and Development Headquarters
NTT DATA CORPORATION

--
HeteroDB, Inc / The PG-Strom Project
KaiGai Kohei <kaigai@heterodb.com>

#20Fujii Masao
masao.fujii@oss.nttdata.com
In reply to: Kohei KaiGai (#19)
Re: TRUNCATE on foreign table

On 2021/03/29 9:31, Kohei KaiGai wrote:

Fujii-san,

XLOG_HEAP_TRUNCATE record is written even for the truncation of
a foreign table. Why is this necessary?

Foreign-tables are often used to access local data structure, like
columnar data files
on filesystem, not only remote accesses like postgres_fdw.
In case when we want to implement logical replication on this kind of
foreign-tables,
truncate-command must be delivered to subscriber node - to truncate
its local data.

In case of remote-access FDW drivers, truncate-command on the subscriber-side is
probably waste of cycles, however, only FDW driver and DBA who configured the
foreign-table know whether it is necessary, or not.

How about your opinions?

I understand the motivation of this. But the other DMLs like UPDATE also
do the same thing for foreign tables? That is, when those DML commands
are executed on foreign tables, their changes are WAL-logged in a publisher side,
e.g., for logical replication? If not, it seems strange to allow only TRUNCATE
on foreign tables to be WAL-logged in a publisher side...

Regards,

--
Fujii Masao
Advanced Computing Technology Center
Research and Development Headquarters
NTT DATA CORPORATION

#21Michael Paquier
michael@paquier.xyz
In reply to: Fujii Masao (#20)
Re: TRUNCATE on foreign table

On Mon, Mar 29, 2021 at 10:53:14AM +0900, Fujii Masao wrote:

I understand the motivation of this. But the other DMLs like UPDATE also
do the same thing for foreign tables? That is, when those DML commands
are executed on foreign tables, their changes are WAL-logged in a publisher side,
e.g., for logical replication? If not, it seems strange to allow only TRUNCATE
on foreign tables to be WAL-logged in a publisher side...

Executing DMLs on foreign tables does not generate any WAL AFAIK with
the backend core code, even with wal_level = logical, as the DML is
executed within the FDW callback (see just ExecUpdate() or
ExecInsert() in nodeModifyTable.c), and foreign tables don't have an
AM set as they have no physical storage. A FDW may decide to generate
some WAL records by itself though when doing the opeation, using the
generic WAL interface but that's rather limited.

Generating WAL for the truncation of foreign tables sounds also like a
strange concept to me. I think that you should just make the patch
work so as the truncation is passed down to the FDW that decides what
it needs to do with it, and do nothing more than that.
--
Michael

#22Fujii Masao
masao.fujii@oss.nttdata.com
In reply to: Kazutaka Onishi (#17)
Re: TRUNCATE on foreign table

On 2021/03/28 2:37, Kazutaka Onishi wrote:

Fujii-san,

Thank you for your review!
Now I prepare v5 patch and I'll answer to your each comment. please
check this again.

Thanks a lot!

5. For example, we can easily do that by truncate foreign tables
before local ones. Thought?

Umm... yeah, I feel it's better procedure, but not so required because
TRUNCATE is NOT called frequently.
Certainly, we already have postgresIsForeignUpdatable() to check
whether the foreign table is updatable or not.
Following this way, we have to add postgresIsForeignTruncatable() to check.
However, Unlike UPDATE, TRUNCATE is NOT called frequently. Current
procedure is inefficient but works correctly.
Thus, I feel postgresIsForeignTruncatable() is not needed.

I'm concerned about the case where permission errors at the remote servers
rather than that truncatable option is disabled. The comments of
ExecuteTruncate() explains its design as follows. But the patch seems to break
this because it truncates the local tables before checking the permission on
foreign tables (i.e., the local tables in remote servers)... No?

We first open and grab exclusive
lock on all relations involved, checking permissions and otherwise
verifying that the relation is OK for truncation
Finally all the relations are truncated and reindexed.

Regards,

--
Fujii Masao
Advanced Computing Technology Center
Research and Development Headquarters
NTT DATA CORPORATION

#23Fujii Masao
masao.fujii@oss.nttdata.com
In reply to: Michael Paquier (#21)
Re: TRUNCATE on foreign table

On 2021/03/29 13:55, Michael Paquier wrote:

On Mon, Mar 29, 2021 at 10:53:14AM +0900, Fujii Masao wrote:

I understand the motivation of this. But the other DMLs like UPDATE also
do the same thing for foreign tables? That is, when those DML commands
are executed on foreign tables, their changes are WAL-logged in a publisher side,
e.g., for logical replication? If not, it seems strange to allow only TRUNCATE
on foreign tables to be WAL-logged in a publisher side...

Executing DMLs on foreign tables does not generate any WAL AFAIK with
the backend core code, even with wal_level = logical, as the DML is
executed within the FDW callback (see just ExecUpdate() or
ExecInsert() in nodeModifyTable.c), and foreign tables don't have an
AM set as they have no physical storage. A FDW may decide to generate
some WAL records by itself though when doing the opeation, using the
generic WAL interface but that's rather limited.

Generating WAL for the truncation of foreign tables sounds also like a
strange concept to me. I think that you should just make the patch
work so as the truncation is passed down to the FDW that decides what
it needs to do with it, and do nothing more than that.

Agreed.

Regards,

--
Fujii Masao
Advanced Computing Technology Center
Research and Development Headquarters
NTT DATA CORPORATION

#24Fujii Masao
masao.fujii@oss.nttdata.com
In reply to: Kazutaka Onishi (#17)
Re: TRUNCATE on foreign table

On 2021/03/28 2:37, Kazutaka Onishi wrote:

Fujii-san,

Thank you for your review!
Now I prepare v5 patch and I'll answer to your each comment. please
check this again.
m(_ _)m

1. In postgres-fdw.sgml, "and truncatable" should be appended into the
above first description?
2. truncate.sgml should be updated because, for example, it contains
the above descriptions.

Yeah, you're right. I've fixed it.

3. Don't we need to document the detail information about frels_extra?

I've written about frels_extra into fdwhander.sgml.

4. postgres_fdw determines whether to specify ONLY or not by checking
whether the passed extra value is zero or not.

Please refer this:
/messages/by-id/CAOP8fzb-t3WVNLjGMC+4sV4AZa9S=MAQ7Q6pQoADMCf_1jp4ew@mail.gmail.com

Negative value means that foreign-tables are not specified in the TRUNCATE
command, but truncated due to dependency (like partition's child leaf).

I've added this information into fdwhandler.sgml.

Even when a foreign table is specified explicitly in TRUNCATE command,
its extra value can be negative if it's found as an inherited children firstly
(i.e., in the case where the partitioned table having that foreign table as
its partition is specified explicitly in TRUNCATE command).
Isn't this a problem?

Please imagine the following example;

----------------------------------
create extension postgres_fdw;
create server loopback foreign data wrapper postgres_fdw;
create user mapping for public server loopback;

create table t (i int, j int) partition by hash (j);
create table t0 partition of t for values with (modulus 2, remainder 0);
create table t1 partition of t for values with (modulus 2, remainder 1);

create table test (i int, j int) partition by hash (i);
create table test0 partition of test for values with (modulus 2, remainder 0);
create foreign table ft partition of test for values with (modulus 2, remainder 1) server loopback options (table_name 't');
----------------------------------

In this example, "truncate ft, test" works fine, but "truncate test, ft" causes
an error though they should work in the same way basically.

Regards,

--
Fujii Masao
Advanced Computing Technology Center
Research and Development Headquarters
NTT DATA CORPORATION

#25Kohei KaiGai
kaigai@heterodb.com
In reply to: Fujii Masao (#23)
Re: TRUNCATE on foreign table

2021年3月30日(火) 2:54 Fujii Masao <masao.fujii@oss.nttdata.com>:

On 2021/03/29 13:55, Michael Paquier wrote:

On Mon, Mar 29, 2021 at 10:53:14AM +0900, Fujii Masao wrote:

I understand the motivation of this. But the other DMLs like UPDATE also
do the same thing for foreign tables? That is, when those DML commands
are executed on foreign tables, their changes are WAL-logged in a publisher side,
e.g., for logical replication? If not, it seems strange to allow only TRUNCATE
on foreign tables to be WAL-logged in a publisher side...

Executing DMLs on foreign tables does not generate any WAL AFAIK with
the backend core code, even with wal_level = logical, as the DML is
executed within the FDW callback (see just ExecUpdate() or
ExecInsert() in nodeModifyTable.c), and foreign tables don't have an
AM set as they have no physical storage. A FDW may decide to generate
some WAL records by itself though when doing the opeation, using the
generic WAL interface but that's rather limited.

Generating WAL for the truncation of foreign tables sounds also like a
strange concept to me. I think that you should just make the patch
work so as the truncation is passed down to the FDW that decides what
it needs to do with it, and do nothing more than that.

Agreed.

Ok, it's fair enough.

Best regards,
--
HeteroDB, Inc / The PG-Strom Project
KaiGai Kohei <kaigai@heterodb.com>

#26Kohei KaiGai
kaigai@heterodb.com
In reply to: Fujii Masao (#24)
Re: TRUNCATE on foreign table

2021年3月30日(火) 3:45 Fujii Masao <masao.fujii@oss.nttdata.com>:

On 2021/03/28 2:37, Kazutaka Onishi wrote:

Fujii-san,

Thank you for your review!
Now I prepare v5 patch and I'll answer to your each comment. please
check this again.
m(_ _)m

1. In postgres-fdw.sgml, "and truncatable" should be appended into the
above first description?
2. truncate.sgml should be updated because, for example, it contains
the above descriptions.

Yeah, you're right. I've fixed it.

3. Don't we need to document the detail information about frels_extra?

I've written about frels_extra into fdwhander.sgml.

4. postgres_fdw determines whether to specify ONLY or not by checking
whether the passed extra value is zero or not.

Please refer this:
/messages/by-id/CAOP8fzb-t3WVNLjGMC+4sV4AZa9S=MAQ7Q6pQoADMCf_1jp4ew@mail.gmail.com

Negative value means that foreign-tables are not specified in the TRUNCATE
command, but truncated due to dependency (like partition's child leaf).

I've added this information into fdwhandler.sgml.

Even when a foreign table is specified explicitly in TRUNCATE command,
its extra value can be negative if it's found as an inherited children firstly
(i.e., in the case where the partitioned table having that foreign table as
its partition is specified explicitly in TRUNCATE command).
Isn't this a problem?

Please imagine the following example;

----------------------------------
create extension postgres_fdw;
create server loopback foreign data wrapper postgres_fdw;
create user mapping for public server loopback;

create table t (i int, j int) partition by hash (j);
create table t0 partition of t for values with (modulus 2, remainder 0);
create table t1 partition of t for values with (modulus 2, remainder 1);

create table test (i int, j int) partition by hash (i);
create table test0 partition of test for values with (modulus 2, remainder 0);
create foreign table ft partition of test for values with (modulus 2, remainder 1) server loopback options (table_name 't');
----------------------------------

In this example, "truncate ft, test" works fine, but "truncate test, ft" causes
an error though they should work in the same way basically.

(Although it was originally designed by me...)
If frels_extra would be a bit-masked value, we can avoid the problem.

Please assume the three labels below:
#define TRUNCATE_REL_CONTEXT__NORMAL 0x01
#define TRUNCATE_REL_CONTEXT__ONLY 0x02
#define TRUNCATE_REL_CONTEXT__CASCADED 0x04

Then, assign these labels on the extra flag according to the context where
the foreign-tables appeared in the truncate command.
Even if it is specified multiple times in the different context, FDW extension
can handle the best option according to the flags.

In this example, "truncate ft, test" works fine, but "truncate test, ft" causes

In both cases, ExecForeignTruncate shall be invoked to "ft" with
(NORMAL | CASCADED),
thus, postgres_fdw can determine the remote truncate command shall be
executed without "ONLY" clause.

How about the idea?

Best regards,
--
HeteroDB, Inc / The PG-Strom Project
KaiGai Kohei <kaigai@heterodb.com>

#27Fujii Masao
masao.fujii@oss.nttdata.com
In reply to: Kohei KaiGai (#26)
Re: TRUNCATE on foreign table

On 2021/03/30 10:11, Kohei KaiGai wrote:

2021年3月30日(火) 3:45 Fujii Masao <masao.fujii@oss.nttdata.com>:

On 2021/03/28 2:37, Kazutaka Onishi wrote:

Fujii-san,

Thank you for your review!
Now I prepare v5 patch and I'll answer to your each comment. please
check this again.
m(_ _)m

1. In postgres-fdw.sgml, "and truncatable" should be appended into the
above first description?
2. truncate.sgml should be updated because, for example, it contains
the above descriptions.

Yeah, you're right. I've fixed it.

3. Don't we need to document the detail information about frels_extra?

I've written about frels_extra into fdwhander.sgml.

4. postgres_fdw determines whether to specify ONLY or not by checking
whether the passed extra value is zero or not.

Please refer this:
/messages/by-id/CAOP8fzb-t3WVNLjGMC+4sV4AZa9S=MAQ7Q6pQoADMCf_1jp4ew@mail.gmail.com

Negative value means that foreign-tables are not specified in the TRUNCATE
command, but truncated due to dependency (like partition's child leaf).

I've added this information into fdwhandler.sgml.

Even when a foreign table is specified explicitly in TRUNCATE command,
its extra value can be negative if it's found as an inherited children firstly
(i.e., in the case where the partitioned table having that foreign table as
its partition is specified explicitly in TRUNCATE command).
Isn't this a problem?

Please imagine the following example;

----------------------------------
create extension postgres_fdw;
create server loopback foreign data wrapper postgres_fdw;
create user mapping for public server loopback;

create table t (i int, j int) partition by hash (j);
create table t0 partition of t for values with (modulus 2, remainder 0);
create table t1 partition of t for values with (modulus 2, remainder 1);

create table test (i int, j int) partition by hash (i);
create table test0 partition of test for values with (modulus 2, remainder 0);
create foreign table ft partition of test for values with (modulus 2, remainder 1) server loopback options (table_name 't');
----------------------------------

In this example, "truncate ft, test" works fine, but "truncate test, ft" causes
an error though they should work in the same way basically.

(Although it was originally designed by me...)
If frels_extra would be a bit-masked value, we can avoid the problem.

Please assume the three labels below:
#define TRUNCATE_REL_CONTEXT__NORMAL 0x01
#define TRUNCATE_REL_CONTEXT__ONLY 0x02
#define TRUNCATE_REL_CONTEXT__CASCADED 0x04

Then, assign these labels on the extra flag according to the context where
the foreign-tables appeared in the truncate command.
Even if it is specified multiple times in the different context, FDW extension
can handle the best option according to the flags.

In this example, "truncate ft, test" works fine, but "truncate test, ft" causes

In both cases, ExecForeignTruncate shall be invoked to "ft" with
(NORMAL | CASCADED),
thus, postgres_fdw can determine the remote truncate command shall be
executed without "ONLY" clause.

How about the idea?

This idea looks better to me.

Regards,

--
Fujii Masao
Advanced Computing Technology Center
Research and Development Headquarters
NTT DATA CORPORATION

#28Kohei KaiGai
kaigai@heterodb.com
In reply to: Fujii Masao (#22)
Re: TRUNCATE on foreign table

2021年3月30日(火) 2:53 Fujii Masao <masao.fujii@oss.nttdata.com>:

On 2021/03/28 2:37, Kazutaka Onishi wrote:

Fujii-san,

Thank you for your review!
Now I prepare v5 patch and I'll answer to your each comment. please
check this again.

Thanks a lot!

5. For example, we can easily do that by truncate foreign tables
before local ones. Thought?

Umm... yeah, I feel it's better procedure, but not so required because
TRUNCATE is NOT called frequently.
Certainly, we already have postgresIsForeignUpdatable() to check
whether the foreign table is updatable or not.
Following this way, we have to add postgresIsForeignTruncatable() to check.
However, Unlike UPDATE, TRUNCATE is NOT called frequently. Current
procedure is inefficient but works correctly.
Thus, I feel postgresIsForeignTruncatable() is not needed.

I'm concerned about the case where permission errors at the remote servers
rather than that truncatable option is disabled. The comments of
ExecuteTruncate() explains its design as follows. But the patch seems to break
this because it truncates the local tables before checking the permission on
foreign tables (i.e., the local tables in remote servers)... No?

We first open and grab exclusive
lock on all relations involved, checking permissions and otherwise
verifying that the relation is OK for truncation
Finally all the relations are truncated and reindexed.

Fujii-san,

What does the "permission checks" mean in this context?
The permission checks on the foreign tables involved are already checked
at truncate_check_rel(), by PostgreSQL's standard access control.

Please assume an extreme example below:
1. I defined a foreign table with file_fdw onto a local CSV file.
2. Someone tries to scan the foreign table, and ACL allows it.
3. I disallow the read remission of the CSV using chmod, after the above step,
but prior to the query execution.
4. Someone's query moved to the execution stage, then IterateForeignScan()
raises an error due to OS level permission checks.

FDW is a mechanism to convert from/to external data sources to/from PostgreSQL's
structured data, as literal. Once we checked the permissions of
foreign-tables by
database ACLs, any other permission checks handled by FDW driver are a part of
execution (like, OS permission check when file_fdw read(2) the
underlying CSV files).
And, we have no reliable way to check entire permissions preliminary,
like OS file
permission check or database permission checks by remote server. Even
if a checker
routine returned a "hint", it may be changed at the execution time.
Somebody might
change the "truncate" permission at the remote server.

How about your opinions?

Best regards,
--
HeteroDB, Inc / The PG-Strom Project
KaiGai Kohei <kaigai@heterodb.com>

#29Fujii Masao
masao.fujii@oss.nttdata.com
In reply to: Kohei KaiGai (#28)
Re: TRUNCATE on foreign table

On 2021/04/01 0:09, Kohei KaiGai wrote:

What does the "permission checks" mean in this context?
The permission checks on the foreign tables involved are already checked
at truncate_check_rel(), by PostgreSQL's standard access control.

I meant that's the permission check that happens in the remote server side.
For example, when the foreign table is defined on postgres_fdw and truncated,
TRUNCATE command is issued to the remote server via postgres_fdw and
it checks the permission of the table before performing actual truncation.

Please assume an extreme example below:
1. I defined a foreign table with file_fdw onto a local CSV file.
2. Someone tries to scan the foreign table, and ACL allows it.
3. I disallow the read remission of the CSV using chmod, after the above step,
but prior to the query execution.
4. Someone's query moved to the execution stage, then IterateForeignScan()
raises an error due to OS level permission checks.

FDW is a mechanism to convert from/to external data sources to/from PostgreSQL's
structured data, as literal. Once we checked the permissions of
foreign-tables by
database ACLs, any other permission checks handled by FDW driver are a part of
execution (like, OS permission check when file_fdw read(2) the
underlying CSV files).
And, we have no reliable way to check entire permissions preliminary,
like OS file
permission check or database permission checks by remote server. Even
if a checker
routine returned a "hint", it may be changed at the execution time.
Somebody might
change the "truncate" permission at the remote server.

How about your opinions?

I agree that something like checker routine might not be so useful and
also be overkill. I was thinking that it's better to truncate the foreign tables first
and the local ones later. Otherwise it's a waste of cycles to truncate
the local tables if the truncation on foreign table causes an permission error
on the remote server.

But ISTM that the order of tables to truncate that the current patch provides
doesn't cause any actual bugs. So if you think the current order is better,
I'm ok with that for now. In this case, the comments for ExecuteTruncate()
should be updated.

BTW, the latest patch doesn't seem to be applied cleanly to the master
because of commit 27e1f14563. Could you rebase it?

Regards,

--
Fujii Masao
Advanced Computing Technology Center
Research and Development Headquarters
NTT DATA CORPORATION

#30Kohei KaiGai
kaigai@heterodb.com
In reply to: Fujii Masao (#29)
Re: TRUNCATE on foreign table

2021年4月1日(木) 18:53 Fujii Masao <masao.fujii@oss.nttdata.com>:

On 2021/04/01 0:09, Kohei KaiGai wrote:

What does the "permission checks" mean in this context?
The permission checks on the foreign tables involved are already checked
at truncate_check_rel(), by PostgreSQL's standard access control.

I meant that's the permission check that happens in the remote server side.
For example, when the foreign table is defined on postgres_fdw and truncated,
TRUNCATE command is issued to the remote server via postgres_fdw and
it checks the permission of the table before performing actual truncation.

Please assume an extreme example below:
1. I defined a foreign table with file_fdw onto a local CSV file.
2. Someone tries to scan the foreign table, and ACL allows it.
3. I disallow the read remission of the CSV using chmod, after the above step,
but prior to the query execution.
4. Someone's query moved to the execution stage, then IterateForeignScan()
raises an error due to OS level permission checks.

FDW is a mechanism to convert from/to external data sources to/from PostgreSQL's
structured data, as literal. Once we checked the permissions of
foreign-tables by
database ACLs, any other permission checks handled by FDW driver are a part of
execution (like, OS permission check when file_fdw read(2) the
underlying CSV files).
And, we have no reliable way to check entire permissions preliminary,
like OS file
permission check or database permission checks by remote server. Even
if a checker
routine returned a "hint", it may be changed at the execution time.
Somebody might
change the "truncate" permission at the remote server.

How about your opinions?

I agree that something like checker routine might not be so useful and
also be overkill. I was thinking that it's better to truncate the foreign tables first
and the local ones later. Otherwise it's a waste of cycles to truncate
the local tables if the truncation on foreign table causes an permission error
on the remote server.

But ISTM that the order of tables to truncate that the current patch provides
doesn't cause any actual bugs. So if you think the current order is better,
I'm ok with that for now. In this case, the comments for ExecuteTruncate()
should be updated.

It is fair enough for me to reverse the order of actual truncation.

How about the updated comments below?

This is a multi-relation truncate. We first open and grab exclusive
lock on all relations involved, checking permissions (local database
ACLs even if relations are foreign-tables) and otherwise verifying
that the relation is OK for truncation. In CASCADE mode, ...(snip)...
Finally all the relations are truncated and reindexed. If any foreign-
tables are involved, its callback shall be invoked prior to the truncation
of regular tables.

BTW, the latest patch doesn't seem to be applied cleanly to the master
because of commit 27e1f14563. Could you rebase it?

Onishi-san, go ahead. :-)

Best regards,
--
HeteroDB, Inc / The PG-Strom Project
KaiGai Kohei <kaigai@heterodb.com>

#31Fujii Masao
masao.fujii@oss.nttdata.com
In reply to: Kohei KaiGai (#30)
Re: TRUNCATE on foreign table

On 2021/04/02 9:37, Kohei KaiGai wrote:

It is fair enough for me to reverse the order of actual truncation.

How about the updated comments below?

This is a multi-relation truncate. We first open and grab exclusive
lock on all relations involved, checking permissions (local database
ACLs even if relations are foreign-tables) and otherwise verifying
that the relation is OK for truncation. In CASCADE mode, ...(snip)...
Finally all the relations are truncated and reindexed. If any foreign-
tables are involved, its callback shall be invoked prior to the truncation
of regular tables.

LGTM.

BTW, the latest patch doesn't seem to be applied cleanly to the master
because of commit 27e1f14563. Could you rebase it?

Onishi-san, go ahead. :-)

+1

Regards,

--
Fujii Masao
Advanced Computing Technology Center
Research and Development Headquarters
NTT DATA CORPORATION

#32Kazutaka Onishi
onishi@heterodb.com
In reply to: Fujii Masao (#31)
1 attachment(s)
Re: TRUNCATE on foreign table

All,

Thank you for discussion.
I've updated the patch (v6->v7) according to the conclusion.

I'll show the modified points:
1. Comments for ExecuteTuncate()
2. Replacing extra value in frels_extra with integer to label.
3. Skipping XLOG_HEAP_TRUNCATE on foreign table

Regards,

2021年4月2日(金) 11:44 Fujii Masao <masao.fujii@oss.nttdata.com>:

Show quoted text

On 2021/04/02 9:37, Kohei KaiGai wrote:

It is fair enough for me to reverse the order of actual truncation.

How about the updated comments below?

This is a multi-relation truncate. We first open and grab exclusive
lock on all relations involved, checking permissions (local database
ACLs even if relations are foreign-tables) and otherwise verifying
that the relation is OK for truncation. In CASCADE mode, ...(snip)...
Finally all the relations are truncated and reindexed. If any foreign-
tables are involved, its callback shall be invoked prior to the truncation
of regular tables.

LGTM.

BTW, the latest patch doesn't seem to be applied cleanly to the master
because of commit 27e1f14563. Could you rebase it?

Onishi-san, go ahead. :-)

+1

Regards,

--
Fujii Masao
Advanced Computing Technology Center
Research and Development Headquarters
NTT DATA CORPORATION

Attachments:

pgsql14-truncate-on-foreign-table.v7.patchapplication/octet-stream; name=pgsql14-truncate-on-foreign-table.v7.patchDownload
diff --git a/contrib/postgres_fdw/deparse.c b/contrib/postgres_fdw/deparse.c
index 5aa3455e30..bebac2f2e0 100644
--- a/contrib/postgres_fdw/deparse.c
+++ b/contrib/postgres_fdw/deparse.c
@@ -56,6 +56,7 @@
 #include "utils/rel.h"
 #include "utils/syscache.h"
 #include "utils/typcache.h"
+#include "commands/tablecmds.h"
 
 /*
  * Global context for foreign_expr_walker's search of an expression tree.
@@ -2172,6 +2173,44 @@ deparseAnalyzeSql(StringInfo buf, Relation rel, List **retrieved_attrs)
 	deparseRelation(buf, rel);
 }
 
+/*
+ * Construct a simple "TRUNCATE rel" statement
+ */
+void
+deparseTruncateSql(StringInfo buf,
+				   List *frels_list,
+				   List *frels_extra,
+				   DropBehavior behavior,
+				   bool restart_seqs)
+{
+	ListCell   *lc1, *lc2;
+
+	appendStringInfoString(buf, "TRUNCATE ");
+	forboth (lc1, frels_list,
+			 lc2, frels_extra)
+	{
+		Relation	frel = lfirst(lc1);
+		int			extra = lfirst_int(lc2);
+
+		if (lc1 != list_head(frels_list))
+			appendStringInfoString(buf, ", ");
+		if (extra & TRUNCATE_REL_CONTEXT__ONLY)
+			appendStringInfoString(buf, "ONLY ");
+		deparseRelation(buf, frel);
+	}
+	appendStringInfo(buf, " %s IDENTITY",
+					 restart_seqs ? "RESTART" : "CONTINUE");
+	switch (behavior)
+	{
+		case DROP_RESTRICT:
+			appendStringInfoString(buf, " RESTRICT");
+			break;
+		case DROP_CASCADE:
+			appendStringInfoString(buf, " CASCADE");
+			break;
+	}
+}
+
 /*
  * Construct name to use for given column, and emit it into buf.
  * If it has a column_name FDW option, use that instead of attribute name.
diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out
index 59e4e27ffb..6a1c2bf5ee 100644
--- a/contrib/postgres_fdw/expected/postgres_fdw.out
+++ b/contrib/postgres_fdw/expected/postgres_fdw.out
@@ -8214,6 +8214,239 @@ select * from rem3;
 
 drop foreign table rem3;
 drop table loc3;
+-- ===================================================================
+-- test for TRUNCATE
+-- ===================================================================
+CREATE TABLE tru_rtable0 (id int primary key, x text);
+CREATE TABLE tru_rtable1 (id int primary key, y text);
+CREATE FOREIGN TABLE tru_ftable (id int, x text)
+       SERVER loopback OPTIONS (table_name 'tru_rtable0');
+INSERT INTO tru_rtable0 (SELECT x,md5(x::text) FROM generate_series(1,10) x);
+CREATE TABLE tru_ptable (id int, y text) PARTITION BY HASH(id);
+CREATE TABLE tru_ptable__p0 PARTITION OF tru_ptable
+                            FOR VALUES WITH (MODULUS 2, REMAINDER 0);
+CREATE FOREIGN TABLE tru_ftable__p1 PARTITION OF tru_ptable
+                                    FOR VALUES WITH (MODULUS 2, REMAINDER 1)
+       SERVER loopback OPTIONS (table_name 'tru_rtable1');
+INSERT INTO tru_ptable (SELECT x,md5(x::text) FROM generate_series(11,20) x);
+CREATE TABLE tru_pk_table(id int primary key, z text);
+CREATE TABLE tru_fk_table(fkey int references tru_pk_table(id));
+INSERT INTO tru_pk_table (SELECT x,md5((x+1)::text) FROM generate_series(1,10) x);
+INSERT INTO tru_fk_table (SELECT x % 10 + 1 FROM generate_series(5,25) x);
+CREATE FOREIGN TABLE tru_pk_ftable (id int, z text)
+       SERVER loopback OPTIONS (table_name 'tru_pk_table');
+CREATE TABLE tru_rtable_parent (id int, a text);
+CREATE TABLE tru_rtable_child (id int, a text);
+CREATE FOREIGN TABLE tru_ftable_parent (id int, a text)
+       SERVER loopback OPTIONS (table_name 'tru_rtable_parent');
+CREATE FOREIGN TABLE tru_ftable_child () INHERITS (tru_ftable_parent)
+       SERVER loopback OPTIONS (table_name 'tru_rtable_child');
+INSERT INTO tru_rtable_parent (SELECT x, md5(x::text) FROM generate_series(1,8) x);
+INSERT INTO tru_rtable_child  (SELECT x, md5(x::text) FROM generate_series(10, 18) x);
+-- normal truncate
+SELECT * FROM tru_ftable;
+ id |                x                 
+----+----------------------------------
+  1 | c4ca4238a0b923820dcc509a6f75849b
+  2 | c81e728d9d4c2f636f067f89cc14862c
+  3 | eccbc87e4b5ce2fe28308fd9f2a7baf3
+  4 | a87ff679a2f3e71d9181a67b7542122c
+  5 | e4da3b7fbbce2345d7772b0674a318d5
+  6 | 1679091c5a880faf6fb5e6087eb1b2dc
+  7 | 8f14e45fceea167a5a36dedd4bea2543
+  8 | c9f0f895fb98ab9159f51fd0297e236d
+  9 | 45c48cce2e2d7fbdea1afc51c7c6ad26
+ 10 | d3d9446802a44259755d38e6d163e820
+(10 rows)
+
+TRUNCATE tru_ftable;
+SELECT * FROM tru_rtable0;		-- empty
+ id | x 
+----+---
+(0 rows)
+
+SELECT * FROM tru_ftable;		-- empty
+ id | x 
+----+---
+(0 rows)
+
+-- 'truncatable' option
+ALTER SERVER loopback OPTIONS (ADD truncatable 'false');
+TRUNCATE tru_ftable;			-- error
+ERROR:  truncate on "tru_ftable" is prohibited
+ALTER SERVER loopback OPTIONS (DROP truncatable);
+ALTER FOREIGN TABLE tru_ftable OPTIONS (ADD truncatable 'false');
+TRUNCATE tru_ftable;			-- error
+ERROR:  truncate on "tru_ftable" is prohibited
+ALTER FOREIGN TABLE tru_ftable OPTIONS (SET truncatable 'true');
+TRUNCATE tru_ftable;			-- accepted
+-- partition table mixtured by table and foreign table
+SELECT * FROM tru_ptable;
+ id |                y                 
+----+----------------------------------
+ 12 | c20ad4d76fe97759aa27a0c99bff6710
+ 13 | c51ce410c124a10e0db5e4b97fc2af39
+ 14 | aab3238922bcc25a6f606eb525ffdc56
+ 16 | c74d97b01eae257e44aa9d5bade97baf
+ 17 | 70efdf2ec9b086079795c442636b55fb
+ 18 | 6f4922f45568161a8cdf4ad2299f6d23
+ 11 | 6512bd43d9caa6e02c990b0a82652dca
+ 15 | 9bf31c7ff062936a96d3c8bd1f8f2ff3
+ 19 | 1f0e3dad99908345f7439f8ffabdffc4
+ 20 | 98f13708210194c475687be6106a3b84
+(10 rows)
+
+TRUNCATE tru_ptable;
+SELECT * FROM tru_ptable;		-- empty
+ id | y 
+----+---
+(0 rows)
+
+SELECT * FROM tru_ptable__p0;	-- empty
+ id | y 
+----+---
+(0 rows)
+
+SELECT * FROM tru_ftable__p1;	-- empty
+ id | y 
+----+---
+(0 rows)
+
+SELECT * FROM tru_rtable1;		-- empty
+ id | y 
+----+---
+(0 rows)
+
+-- 'CASCADE' option
+SELECT * FROM tru_pk_ftable;
+ id |                z                 
+----+----------------------------------
+  1 | c81e728d9d4c2f636f067f89cc14862c
+  2 | eccbc87e4b5ce2fe28308fd9f2a7baf3
+  3 | a87ff679a2f3e71d9181a67b7542122c
+  4 | e4da3b7fbbce2345d7772b0674a318d5
+  5 | 1679091c5a880faf6fb5e6087eb1b2dc
+  6 | 8f14e45fceea167a5a36dedd4bea2543
+  7 | c9f0f895fb98ab9159f51fd0297e236d
+  8 | 45c48cce2e2d7fbdea1afc51c7c6ad26
+  9 | d3d9446802a44259755d38e6d163e820
+ 10 | 6512bd43d9caa6e02c990b0a82652dca
+(10 rows)
+
+TRUNCATE tru_pk_ftable;	-- failed by FK reference
+ERROR:  cannot truncate a table referenced in a foreign key constraint
+DETAIL:  Table "tru_fk_table" references "tru_pk_table".
+HINT:  Truncate table "tru_fk_table" at the same time, or use TRUNCATE ... CASCADE.
+CONTEXT:  remote SQL command: TRUNCATE public.tru_pk_table CONTINUE IDENTITY RESTRICT
+TRUNCATE tru_pk_ftable CASCADE;
+SELECT * FROM tru_pk_ftable;
+ id | z 
+----+---
+(0 rows)
+
+SELECT * FROM tru_fk_table;		-- also truncated
+ fkey 
+------
+(0 rows)
+
+-- truncate two tables at a command
+INSERT INTO tru_ftable (SELECT x,md5((x+2)::text) FROM generate_series(1,8) x);
+INSERT INTO tru_pk_ftable (SELECT x,md5((x+3)::text) FROM generate_series(3,10) x);
+SELECT * FROM tru_ftable a FULL OUTER JOIN tru_pk_ftable b ON a.id = b.id;
+ id |                x                 | id |                z                 
+----+----------------------------------+----+----------------------------------
+  1 | eccbc87e4b5ce2fe28308fd9f2a7baf3 |    | 
+  2 | a87ff679a2f3e71d9181a67b7542122c |    | 
+  3 | e4da3b7fbbce2345d7772b0674a318d5 |  3 | 1679091c5a880faf6fb5e6087eb1b2dc
+  4 | 1679091c5a880faf6fb5e6087eb1b2dc |  4 | 8f14e45fceea167a5a36dedd4bea2543
+  5 | 8f14e45fceea167a5a36dedd4bea2543 |  5 | c9f0f895fb98ab9159f51fd0297e236d
+  6 | c9f0f895fb98ab9159f51fd0297e236d |  6 | 45c48cce2e2d7fbdea1afc51c7c6ad26
+  7 | 45c48cce2e2d7fbdea1afc51c7c6ad26 |  7 | d3d9446802a44259755d38e6d163e820
+  8 | d3d9446802a44259755d38e6d163e820 |  8 | 6512bd43d9caa6e02c990b0a82652dca
+    |                                  |  9 | c20ad4d76fe97759aa27a0c99bff6710
+    |                                  | 10 | c51ce410c124a10e0db5e4b97fc2af39
+(10 rows)
+
+TRUNCATE tru_ftable, tru_pk_ftable CASCADE;
+SELECT * FROM tru_ftable a FULL OUTER JOIN tru_pk_ftable b ON a.id = b.id;
+ id | x | id | z 
+----+---+----+---
+(0 rows)
+
+-- truncate with ONLY clause
+TRUNCATE ONLY tru_ftable_parent;
+SELECT * FROM tru_ftable_parent;
+ id |                a                 
+----+----------------------------------
+ 10 | d3d9446802a44259755d38e6d163e820
+ 11 | 6512bd43d9caa6e02c990b0a82652dca
+ 12 | c20ad4d76fe97759aa27a0c99bff6710
+ 13 | c51ce410c124a10e0db5e4b97fc2af39
+ 14 | aab3238922bcc25a6f606eb525ffdc56
+ 15 | 9bf31c7ff062936a96d3c8bd1f8f2ff3
+ 16 | c74d97b01eae257e44aa9d5bade97baf
+ 17 | 70efdf2ec9b086079795c442636b55fb
+ 18 | 6f4922f45568161a8cdf4ad2299f6d23
+(9 rows)
+
+TRUNCATE tru_ftable_parent;
+SELECT * FROM tru_ftable_parent;
+ id | a 
+----+---
+(0 rows)
+
+-- in case when remote table has inherited children
+CREATE TABLE tru_rtable0_child () INHERITS (tru_rtable0);
+INSERT INTO tru_rtable0 (SELECT x,md5(x::text) FROM generate_series(5,9) x);
+INSERT INTO tru_rtable0_child (SELECT x,md5(x::text) FROM generate_series(10,14) x);
+SELECT * FROM tru_ftable;
+ id |                x                 
+----+----------------------------------
+  5 | e4da3b7fbbce2345d7772b0674a318d5
+  6 | 1679091c5a880faf6fb5e6087eb1b2dc
+  7 | 8f14e45fceea167a5a36dedd4bea2543
+  8 | c9f0f895fb98ab9159f51fd0297e236d
+  9 | 45c48cce2e2d7fbdea1afc51c7c6ad26
+ 10 | d3d9446802a44259755d38e6d163e820
+ 11 | 6512bd43d9caa6e02c990b0a82652dca
+ 12 | c20ad4d76fe97759aa27a0c99bff6710
+ 13 | c51ce410c124a10e0db5e4b97fc2af39
+ 14 | aab3238922bcc25a6f606eb525ffdc56
+(10 rows)
+
+TRUNCATE ONLY tru_ftable;		-- truncate only parent portion
+SELECT * FROM tru_ftable;
+ id |                x                 
+----+----------------------------------
+ 10 | d3d9446802a44259755d38e6d163e820
+ 11 | 6512bd43d9caa6e02c990b0a82652dca
+ 12 | c20ad4d76fe97759aa27a0c99bff6710
+ 13 | c51ce410c124a10e0db5e4b97fc2af39
+ 14 | aab3238922bcc25a6f606eb525ffdc56
+(5 rows)
+
+INSERT INTO tru_rtable0 (SELECT x,md5(x::text) FROM generate_series(21,25) x);
+SELECT * FROM tru_ftable;
+ id |                x                 
+----+----------------------------------
+ 21 | 3c59dc048e8850243be8079a5c74d079
+ 22 | b6d767d2f8ed5d21a44b0e5886680cb9
+ 23 | 37693cfc748049e45d87b8c7d8b9aacd
+ 24 | 1ff1de774005f8da13f42943881c655f
+ 25 | 8e296a067a37563370ded05f5a3bf3ec
+ 10 | d3d9446802a44259755d38e6d163e820
+ 11 | 6512bd43d9caa6e02c990b0a82652dca
+ 12 | c20ad4d76fe97759aa27a0c99bff6710
+ 13 | c51ce410c124a10e0db5e4b97fc2af39
+ 14 | aab3238922bcc25a6f606eb525ffdc56
+(10 rows)
+
+TRUNCATE tru_ftable;			-- truncate both of parent and child
+SELECT * FROM tru_ftable;
+ id | x 
+----+---
+(0 rows)
+
 -- ===================================================================
 -- test IMPORT FOREIGN SCHEMA
 -- ===================================================================
@@ -8913,7 +9146,7 @@ DO $d$
     END;
 $d$;
 ERROR:  invalid option "password"
-HINT:  Valid options in this context are: service, passfile, channel_binding, connect_timeout, dbname, host, hostaddr, port, options, application_name, keepalives, keepalives_idle, keepalives_interval, keepalives_count, tcp_user_timeout, sslmode, sslcompression, sslcert, sslkey, sslrootcert, sslcrl, sslcrldir, requirepeer, ssl_min_protocol_version, ssl_max_protocol_version, gssencmode, krbsrvname, gsslib, target_session_attrs, use_remote_estimate, fdw_startup_cost, fdw_tuple_cost, extensions, updatable, fetch_size, batch_size, async_capable, keep_connections
+HINT:  Valid options in this context are: service, passfile, channel_binding, connect_timeout, dbname, host, hostaddr, port, options, application_name, keepalives, keepalives_idle, keepalives_interval, keepalives_count, tcp_user_timeout, sslmode, sslcompression, sslcert, sslkey, sslrootcert, sslcrl, sslcrldir, requirepeer, ssl_min_protocol_version, ssl_max_protocol_version, gssencmode, krbsrvname, gsslib, target_session_attrs, use_remote_estimate, fdw_startup_cost, fdw_tuple_cost, extensions, updatable, truncatable, fetch_size, batch_size, async_capable, keep_connections
 CONTEXT:  SQL statement "ALTER SERVER loopback_nopw OPTIONS (ADD password 'dummypw')"
 PL/pgSQL function inline_code_block line 3 at EXECUTE
 -- If we add a password for our user mapping instead, we should get a different
diff --git a/contrib/postgres_fdw/option.c b/contrib/postgres_fdw/option.c
index f1d0c8bd41..fc196fabc0 100644
--- a/contrib/postgres_fdw/option.c
+++ b/contrib/postgres_fdw/option.c
@@ -109,6 +109,7 @@ postgres_fdw_validator(PG_FUNCTION_ARGS)
 		if (strcmp(def->defname, "use_remote_estimate") == 0 ||
 			strcmp(def->defname, "updatable") == 0 ||
 			strcmp(def->defname, "async_capable") == 0 ||
+			strcmp(def->defname, "truncatable") == 0 ||
 			strcmp(def->defname, "keep_connections") == 0)
 		{
 			/* these accept only boolean values */
@@ -213,6 +214,9 @@ InitPgFdwOptions(void)
 		/* updatable is available on both server and table */
 		{"updatable", ForeignServerRelationId, false},
 		{"updatable", ForeignTableRelationId, false},
+		/* truncatable is available on both server and table */
+		{"truncatable", ForeignServerRelationId, false},
+		{"truncatable", ForeignTableRelationId, false},
 		/* fetch_size is available on both server and table */
 		{"fetch_size", ForeignServerRelationId, false},
 		{"fetch_size", ForeignTableRelationId, false},
diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c
index 16c2979f2d..e336f27bd8 100644
--- a/contrib/postgres_fdw/postgres_fdw.c
+++ b/contrib/postgres_fdw/postgres_fdw.c
@@ -400,6 +400,10 @@ static void postgresExplainForeignModify(ModifyTableState *mtstate,
 										 ExplainState *es);
 static void postgresExplainDirectModify(ForeignScanState *node,
 										ExplainState *es);
+static void postgresExecForeignTruncate(List *frels_list,
+										List *frels_extra,
+										DropBehavior behavior,
+										bool restart_seqs);
 static bool postgresAnalyzeForeignTable(Relation relation,
 										AcquireSampleRowsFunc *func,
 										BlockNumber *totalpages);
@@ -588,6 +592,9 @@ postgres_fdw_handler(PG_FUNCTION_ARGS)
 	routine->ExplainForeignModify = postgresExplainForeignModify;
 	routine->ExplainDirectModify = postgresExplainDirectModify;
 
+	/* Support function for TRUNCATE */
+	routine->ExecForeignTruncate = postgresExecForeignTruncate;
+
 	/* Support functions for ANALYZE */
 	routine->AnalyzeForeignTable = postgresAnalyzeForeignTable;
 
@@ -2868,6 +2875,90 @@ postgresExplainDirectModify(ForeignScanState *node, ExplainState *es)
 	}
 }
 
+/*
+ * postgresExecForeignTruncate
+ *		It propagates TRUNCATE command to the remote host inside of the
+ *		transaction block.
+ */
+static void
+postgresExecForeignTruncate(List *frels_list,
+							List *frels_extra,
+							DropBehavior behavior,
+							bool restart_seqs)
+{
+	Oid				server_id = InvalidOid;
+	ForeignServer  *serv = NULL;
+	UserMapping	   *user = NULL;
+	PGconn		   *conn = NULL;
+	PGresult	   *res;
+	StringInfoData	sql;
+	ListCell	   *lc;
+	bool			server_truncatable = true;
+
+	/* pick up remote connection, and sanity checks */
+	foreach (lc, frels_list)
+	{
+		Relation	frel = lfirst(lc);
+		Oid			frel_oid = RelationGetRelid(frel);
+		ForeignTable *ft = GetForeignTable(frel_oid);
+		ListCell   *cell;
+		bool		truncatable;
+
+		if (!OidIsValid(server_id))
+		{
+			server_id = GetForeignServerIdByRelId(frel_oid);
+			serv = GetForeignServer(server_id);
+			user = GetUserMapping(GetUserId(), server_id);
+			conn = GetConnection(user, false,NULL);
+			
+			foreach (cell, serv->options)
+			{
+				DefElem	   *defel = (DefElem *) lfirst(cell);
+
+				if (strcmp(defel->defname, "truncatable") == 0)
+				{
+					server_truncatable = defGetBoolean(defel);
+					break;
+				}
+			}
+		}
+		else
+		{
+			/* postgresExecForeignTruncate() is invoked for each server */
+			Assert(server_id == GetForeignServerIdByRelId(frel_oid));
+		}
+
+		/* ensure the target foreign table is truncatable */
+		truncatable = server_truncatable;
+		foreach (cell, ft->options)
+		{
+			DefElem	   *defel = (DefElem *) lfirst(cell);
+
+			if (strcmp(defel->defname, "truncatable") == 0)
+			{
+				truncatable = defGetBoolean(defel);
+				break;
+			}
+		}
+		if (!truncatable)
+			ereport(ERROR,
+					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+					 errmsg("truncate on \"%s\" is prohibited",
+							RelationGetRelationName(frel))));
+	}
+	/* set up remote query */
+	initStringInfo(&sql);
+	deparseTruncateSql(&sql, frels_list, frels_extra, behavior, restart_seqs);
+	/* run remote query */
+	if (!PQsendQuery(conn, sql.data))
+		pgfdw_report_error(ERROR, NULL, conn, false, sql.data);
+	res = pgfdw_get_result(conn, sql.data);
+	if (PQresultStatus(res) != PGRES_COMMAND_OK)
+		pgfdw_report_error(ERROR, res, conn, true, sql.data);
+	/* clean-up */
+	PQclear(res);
+	pfree(sql.data);
+}
 
 /*
  * estimate_path_cost_size
diff --git a/contrib/postgres_fdw/postgres_fdw.h b/contrib/postgres_fdw/postgres_fdw.h
index 88d94da6f6..68599892f8 100644
--- a/contrib/postgres_fdw/postgres_fdw.h
+++ b/contrib/postgres_fdw/postgres_fdw.h
@@ -206,6 +206,11 @@ extern void deparseDirectDeleteSql(StringInfo buf, PlannerInfo *root,
 extern void deparseAnalyzeSizeSql(StringInfo buf, Relation rel);
 extern void deparseAnalyzeSql(StringInfo buf, Relation rel,
 							  List **retrieved_attrs);
+extern void deparseTruncateSql(StringInfo buf,
+							   List *frels_list,
+							   List *frels_extra,
+							   DropBehavior behavior,
+							   bool restart_seqs);
 extern void deparseStringLiteral(StringInfo buf, const char *val);
 extern Expr *find_em_expr_for_rel(EquivalenceClass *ec, RelOptInfo *rel);
 extern Expr *find_em_expr_for_input_target(PlannerInfo *root,
diff --git a/contrib/postgres_fdw/sql/postgres_fdw.sql b/contrib/postgres_fdw/sql/postgres_fdw.sql
index 107d1c0e03..047e3e4dd9 100644
--- a/contrib/postgres_fdw/sql/postgres_fdw.sql
+++ b/contrib/postgres_fdw/sql/postgres_fdw.sql
@@ -2351,6 +2351,96 @@ select * from rem3;
 drop foreign table rem3;
 drop table loc3;
 
+-- ===================================================================
+-- test for TRUNCATE
+-- ===================================================================
+CREATE TABLE tru_rtable0 (id int primary key, x text);
+CREATE TABLE tru_rtable1 (id int primary key, y text);
+CREATE FOREIGN TABLE tru_ftable (id int, x text)
+       SERVER loopback OPTIONS (table_name 'tru_rtable0');
+INSERT INTO tru_rtable0 (SELECT x,md5(x::text) FROM generate_series(1,10) x);
+
+CREATE TABLE tru_ptable (id int, y text) PARTITION BY HASH(id);
+CREATE TABLE tru_ptable__p0 PARTITION OF tru_ptable
+                            FOR VALUES WITH (MODULUS 2, REMAINDER 0);
+CREATE FOREIGN TABLE tru_ftable__p1 PARTITION OF tru_ptable
+                                    FOR VALUES WITH (MODULUS 2, REMAINDER 1)
+       SERVER loopback OPTIONS (table_name 'tru_rtable1');
+INSERT INTO tru_ptable (SELECT x,md5(x::text) FROM generate_series(11,20) x);
+
+CREATE TABLE tru_pk_table(id int primary key, z text);
+CREATE TABLE tru_fk_table(fkey int references tru_pk_table(id));
+INSERT INTO tru_pk_table (SELECT x,md5((x+1)::text) FROM generate_series(1,10) x);
+INSERT INTO tru_fk_table (SELECT x % 10 + 1 FROM generate_series(5,25) x);
+CREATE FOREIGN TABLE tru_pk_ftable (id int, z text)
+       SERVER loopback OPTIONS (table_name 'tru_pk_table');
+
+CREATE TABLE tru_rtable_parent (id int, a text);
+CREATE TABLE tru_rtable_child (id int, a text);
+CREATE FOREIGN TABLE tru_ftable_parent (id int, a text)
+       SERVER loopback OPTIONS (table_name 'tru_rtable_parent');
+CREATE FOREIGN TABLE tru_ftable_child () INHERITS (tru_ftable_parent)
+       SERVER loopback OPTIONS (table_name 'tru_rtable_child');
+INSERT INTO tru_rtable_parent (SELECT x, md5(x::text) FROM generate_series(1,8) x);
+INSERT INTO tru_rtable_child  (SELECT x, md5(x::text) FROM generate_series(10, 18) x);
+
+-- normal truncate
+SELECT * FROM tru_ftable;
+TRUNCATE tru_ftable;
+SELECT * FROM tru_rtable0;		-- empty
+SELECT * FROM tru_ftable;		-- empty
+
+-- 'truncatable' option
+ALTER SERVER loopback OPTIONS (ADD truncatable 'false');
+TRUNCATE tru_ftable;			-- error
+ALTER SERVER loopback OPTIONS (DROP truncatable);
+ALTER FOREIGN TABLE tru_ftable OPTIONS (ADD truncatable 'false');
+TRUNCATE tru_ftable;			-- error
+ALTER FOREIGN TABLE tru_ftable OPTIONS (SET truncatable 'true');
+TRUNCATE tru_ftable;			-- accepted
+
+-- partition table mixtured by table and foreign table
+SELECT * FROM tru_ptable;
+TRUNCATE tru_ptable;
+SELECT * FROM tru_ptable;		-- empty
+SELECT * FROM tru_ptable__p0;	-- empty
+SELECT * FROM tru_ftable__p1;	-- empty
+SELECT * FROM tru_rtable1;		-- empty
+
+-- 'CASCADE' option
+SELECT * FROM tru_pk_ftable;
+TRUNCATE tru_pk_ftable;	-- failed by FK reference
+TRUNCATE tru_pk_ftable CASCADE;
+SELECT * FROM tru_pk_ftable;
+SELECT * FROM tru_fk_table;		-- also truncated
+
+-- truncate two tables at a command
+INSERT INTO tru_ftable (SELECT x,md5((x+2)::text) FROM generate_series(1,8) x);
+INSERT INTO tru_pk_ftable (SELECT x,md5((x+3)::text) FROM generate_series(3,10) x);
+SELECT * FROM tru_ftable a FULL OUTER JOIN tru_pk_ftable b ON a.id = b.id;
+TRUNCATE tru_ftable, tru_pk_ftable CASCADE;
+SELECT * FROM tru_ftable a FULL OUTER JOIN tru_pk_ftable b ON a.id = b.id;
+
+-- truncate with ONLY clause
+TRUNCATE ONLY tru_ftable_parent;
+SELECT * FROM tru_ftable_parent;
+TRUNCATE tru_ftable_parent;
+SELECT * FROM tru_ftable_parent;
+
+-- in case when remote table has inherited children
+CREATE TABLE tru_rtable0_child () INHERITS (tru_rtable0);
+INSERT INTO tru_rtable0 (SELECT x,md5(x::text) FROM generate_series(5,9) x);
+INSERT INTO tru_rtable0_child (SELECT x,md5(x::text) FROM generate_series(10,14) x);
+SELECT * FROM tru_ftable;
+
+TRUNCATE ONLY tru_ftable;		-- truncate only parent portion
+SELECT * FROM tru_ftable;
+
+INSERT INTO tru_rtable0 (SELECT x,md5(x::text) FROM generate_series(21,25) x);
+SELECT * FROM tru_ftable;
+TRUNCATE tru_ftable;			-- truncate both of parent and child
+SELECT * FROM tru_ftable;
+
 -- ===================================================================
 -- test IMPORT FOREIGN SCHEMA
 -- ===================================================================
diff --git a/doc/src/sgml/fdwhandler.sgml b/doc/src/sgml/fdwhandler.sgml
index 0f2397df49..706c41d34b 100644
--- a/doc/src/sgml/fdwhandler.sgml
+++ b/doc/src/sgml/fdwhandler.sgml
@@ -1065,6 +1065,50 @@ EndDirectModify(ForeignScanState *node);
 
    </sect2>
 
+   <sect2 id="fdw-callbacks-truncate">
+    <title>FDW Routines for Truncate</title>
+<programlisting>
+void
+ExecForeignTruncate(List *frels_list, List *frels_extra,
+                    DropBehavior behavior, bool restart_seqs);
+</programlisting>
+    <para>
+     Truncate a set of foreign tables defined by
+     <literal>frels_list</literal> belonging to the same foreign server.
+     This optional function is called during execution of
+     <command>TRUNCATE</command> for each foreign server being involved
+     in one <command>TRUNCATE</command> command (note that invocations
+     are not per foreign table).
+
+     <literal>frels_extra</literal> delivers extra information about
+     the context where the foreign-tables are truncated. It is a list of integers and same length with
+     <literal>frels_list</literal>. <literal>TRUNCATE_REL_CONTEXT__NORMAL</literal> means that
+     the foreign-table is specified WITHOUT "ONLY" clause, and <literal>TRUNCATE_REL_CONTEXT__ONLY</literal> means
+     specified WITH "ONLY" clause. <literal>TRUNCATE_REL_CONTEXT__CASCADED</literal> 
+     values means that foreign-tables are not specified in the <command>TRUNCATE</command>, 
+     but truncated due to dependency(like partition's child leaf).
+    </para>
+
+    <para>
+     If the <function>ExecForeignTruncate</function> pointer is set to
+     <literal>NULL</literal>, attempts to truncate the foreign table will
+     fail with an error message.
+    </para>
+    
+    <para>
+     <literal>behavior</literal> defines how foreign tables should
+     be truncated, using as possible values <literal>DROP_RESTRICT</literal>
+     and <literal>DROP_CASCADE</literal> (to map with the equivalents of
+     <command>TRUNCATE</command>).
+    </para>
+    
+    <para>
+     <literal>restart_seqs</literal> is set to <literal>true</literal>
+     if <literal>RESTART IDENTITY</literal> was supplied in the
+     <command>TRUNCATE</command>.
+    </para>
+   </sect2>
+
    <sect2 id="fdw-callbacks-row-locking">
     <title>FDW Routines for Row Locking</title>
 
diff --git a/doc/src/sgml/postgres-fdw.sgml b/doc/src/sgml/postgres-fdw.sgml
index a7c695b000..1092160603 100644
--- a/doc/src/sgml/postgres-fdw.sgml
+++ b/doc/src/sgml/postgres-fdw.sgml
@@ -63,9 +63,10 @@
  <para>
   Now you need only <command>SELECT</command> from a foreign table to access
   the data stored in its underlying remote table.  You can also modify
-  the remote table using <command>INSERT</command>, <command>UPDATE</command>, or
-  <command>DELETE</command>.  (Of course, the remote user you have specified
-  in your user mapping must have privileges to do these things.)
+  the remote table using <command>INSERT</command>, <command>UPDATE</command>,
+  <command>DELETE</command>, or <command>TRUNCATE</command>.
+  (Of course, the remote user you have specified in your user mapping must
+  have privileges to do these things.)
  </para>
 
  <para>
@@ -404,7 +405,7 @@ OPTIONS (ADD password_required 'false');
 
    <para>
     By default all foreign tables using <filename>postgres_fdw</filename> are assumed
-    to be updatable.  This may be overridden using the following option:
+    to be updatable and truncatable.  This may be overridden using the following options:
    </para>
 
    <variablelist>
@@ -433,6 +434,29 @@ OPTIONS (ADD password_required 'false');
      </listitem>
     </varlistentry>
 
+    <varlistentry>
+     <term><literal>truncatable</literal></term>
+     <listitem>
+      <para>
+       This option controls whether <filename>postgres_fdw</filename> allows
+       foreign tables to be truncated using <command>TRUNCATE</command>
+       command. It can be specified for a foreign table or a foreign server.
+       A table-level option overrides a server-level option.
+       The default is <literal>true</literal>.
+      </para>
+      <para>
+       Pay attention for the case when a foreign table maps remote table
+       that has inherited children or partition leafs.
+       <command>TRUNCATE</command> specifies the foreign tables with
+       <literal>ONLY</literal> clause, remove queries over the
+       <filename>postgres_fdw</filename> also specify remote tables with
+       <literal>ONLY</literal> clause, that will truncate only parent
+       portion of the remote table. In the results, it looks like
+       <command>TRUNCATE</command> command partially eliminated contents
+       of the foreign tables.
+      </para>
+     </listitem>
+    </varlistentry>
    </variablelist>
   </sect3>
 
diff --git a/doc/src/sgml/ref/truncate.sgml b/doc/src/sgml/ref/truncate.sgml
index 91cdac5562..461be92565 100644
--- a/doc/src/sgml/ref/truncate.sgml
+++ b/doc/src/sgml/ref/truncate.sgml
@@ -172,9 +172,7 @@ TRUNCATE [ TABLE ] [ ONLY ] <replaceable class="parameter">name</replaceable> [
   </para>
 
   <para>
-   <command>TRUNCATE</command> is not currently supported for foreign tables.
-   This implies that if a specified table has any descendant tables that are
-   foreign, the command will fail.
+    <command>TRUNCATE</command> can be used for foreign tables if the foreign data wrapper supports.
   </para>
  </refsect1>
 
diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c
index 88a68a4697..baad36817e 100644
--- a/src/backend/commands/tablecmds.c
+++ b/src/backend/commands/tablecmds.c
@@ -59,6 +59,7 @@
 #include "commands/typecmds.h"
 #include "commands/user.h"
 #include "executor/executor.h"
+#include "foreign/fdwapi.h"
 #include "foreign/foreign.h"
 #include "miscadmin.h"
 #include "nodes/makefuncs.h"
@@ -310,6 +311,21 @@ struct DropRelationCallbackState
 #define		ATT_FOREIGN_TABLE		0x0020
 #define		ATT_PARTITIONED_INDEX	0x0040
 
+/*
+ * ForeignTruncateInfo
+ *
+ * Information related to truncation of foreign tables.  This is used for
+ * the elements in a hash table that uses the server OID as lookup key,
+ * and includes a per-server list of all foreign tables involved in the
+ * truncation.
+ */
+typedef struct
+{
+	Oid			server_oid;
+	List	   *frels_list;
+	List	   *frels_extra;
+} ForeignTruncateInfo;
+
 /*
  * Partition tables are expected to be dropped when the parent partitioned
  * table gets dropped. Hence for partitioning we use AUTO dependency.
@@ -1588,18 +1604,22 @@ RangeVarCallbackForDropRelation(const RangeVar *rel, Oid relOid, Oid oldRelOid,
  *		Executes a TRUNCATE command.
  *
  * This is a multi-relation truncate.  We first open and grab exclusive
- * lock on all relations involved, checking permissions and otherwise
- * verifying that the relation is OK for truncation.  In CASCADE mode,
+ * lock on all relations involved, checking permissions (local database
+ * ACLs even if relations are foreign-tables) and otherwise verifying
+ * that the relation is OK for truncation. In CASCADE mode,
  * relations having FK references to the targeted relations are automatically
  * added to the group; in RESTRICT mode, we check that all FK references are
  * internal to the group that's being truncated.  Finally all the relations
- * are truncated and reindexed.
+ * are truncated and reindexed. If any foreign-
+ * tables are involved, its callback shall be invoked prior to the truncation
+ * of regular tables.
  */
 void
 ExecuteTruncate(TruncateStmt *stmt)
 {
 	List	   *rels = NIL;
 	List	   *relids = NIL;
+	List	   *relids_extra = NIL;
 	List	   *relids_logged = NIL;
 	ListCell   *cell;
 
@@ -1636,8 +1656,9 @@ ExecuteTruncate(TruncateStmt *stmt)
 
 		rels = lappend(rels, rel);
 		relids = lappend_oid(relids, myrelid);
+		relids_extra = lappend_int(relids_extra, (recurse ? TRUNCATE_REL_CONTEXT__NORMAL : TRUNCATE_REL_CONTEXT__ONLY));
 		/* Log this relation only if needed for logical decoding */
-		if (RelationIsLogicallyLogged(rel))
+		if (RelationIsLogicallyLogged(rel) && rel->rd_rel->relkind != RELKIND_FOREIGN_TABLE)
 			relids_logged = lappend_oid(relids_logged, myrelid);
 
 		if (recurse)
@@ -1683,8 +1704,9 @@ ExecuteTruncate(TruncateStmt *stmt)
 
 				rels = lappend(rels, rel);
 				relids = lappend_oid(relids, childrelid);
+				relids_extra = lappend_int(relids_extra, TRUNCATE_REL_CONTEXT__CASCADED);
 				/* Log this relation only if needed for logical decoding */
-				if (RelationIsLogicallyLogged(rel))
+				if (RelationIsLogicallyLogged(rel) && rel->rd_rel->relkind != RELKIND_FOREIGN_TABLE)
 					relids_logged = lappend_oid(relids_logged, childrelid);
 			}
 		}
@@ -1695,7 +1717,7 @@ ExecuteTruncate(TruncateStmt *stmt)
 					 errhint("Do not specify the ONLY keyword, or use TRUNCATE ONLY on the partitions directly.")));
 	}
 
-	ExecuteTruncateGuts(rels, relids, relids_logged,
+	ExecuteTruncateGuts(rels, relids, relids_extra, relids_logged,
 						stmt->behavior, stmt->restart_seqs);
 
 	/* And close the rels */
@@ -1721,16 +1743,21 @@ ExecuteTruncate(TruncateStmt *stmt)
  * this information handy in this form.
  */
 void
-ExecuteTruncateGuts(List *explicit_rels, List *relids, List *relids_logged,
+ExecuteTruncateGuts(List *explicit_rels,
+					List *relids,
+					List *relids_extra,
+					List *relids_logged,
 					DropBehavior behavior, bool restart_seqs)
 {
 	List	   *rels;
 	List	   *seq_relids = NIL;
+	HTAB	   *ft_htab = NULL;
 	EState	   *estate;
 	ResultRelInfo *resultRelInfos;
 	ResultRelInfo *resultRelInfo;
 	SubTransactionId mySubid;
 	ListCell   *cell;
+	ListCell   *lc1, *lc2;
 	Oid		   *logrelids;
 
 	/*
@@ -1768,8 +1795,9 @@ ExecuteTruncateGuts(List *explicit_rels, List *relids, List *relids_logged,
 				truncate_check_activity(rel);
 				rels = lappend(rels, rel);
 				relids = lappend_oid(relids, relid);
+				relids_extra = lappend_int(relids_extra, TRUNCATE_REL_CONTEXT__CASCADED);
 				/* Log this relation only if needed for logical decoding */
-				if (RelationIsLogicallyLogged(rel))
+				if (RelationIsLogicallyLogged(rel) && rel->rd_rel->relkind != RELKIND_FOREIGN_TABLE)
 					relids_logged = lappend_oid(relids_logged, relid);
 			}
 		}
@@ -1868,14 +1896,68 @@ ExecuteTruncateGuts(List *explicit_rels, List *relids, List *relids_logged,
 	 */
 	mySubid = GetCurrentSubTransactionId();
 
-	foreach(cell, rels)
+	Assert(list_length(rels) == list_length(relids_extra));
+	forboth (lc1, rels,
+			 lc2, relids_extra)
 	{
-		Relation	rel = (Relation) lfirst(cell);
+		Relation	rel = (Relation) lfirst(lc1);
+		int			extra = lfirst_int(lc2);
 
 		/* Skip partitioned tables as there is nothing to do */
 		if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
 			continue;
 
+		/*
+		 * If truncating a foreign table, the foreign data wrapper callback
+		 * for TRUNCATE is called once for each server with a list of all the
+		 * relations to process linked to this server.  The list of relations
+		 * for each server is saved as a single entry in a hash table that
+		 * uses the server OID as lookup key.  Once the full set of lists is
+		 * built, all the entries of the hash table are scanned, and the list
+		 * of relations associated to the server is passed down to the
+		 * TRUNCATE callback of its foreign data wrapper.
+		 */
+		if (rel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
+		{
+			Oid			frel_oid = RelationGetRelid(rel);
+			Oid			server_oid = GetForeignServerIdByRelId(frel_oid);
+			bool		found;
+			ForeignTruncateInfo *ft_info;
+
+			/* if the hash table does not exist yet, initialize it */
+			if (!ft_htab)
+			{
+				HASHCTL		hctl;
+
+				memset(&hctl, 0, sizeof(HASHCTL));
+				hctl.keysize = sizeof(Oid);
+				hctl.entrysize = sizeof(ForeignTruncateInfo);
+				hctl.hcxt = CurrentMemoryContext;
+
+				ft_htab = hash_create("TRUNCATE for Foreign Tables",
+									  32,	/* start small and extend */
+									  &hctl,
+									  HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
+			}
+
+			/*
+			 * Look after the entry of the server in the hash table, and
+			 * initialize it if the entry does not exist yet.
+			 */
+			ft_info = hash_search(ft_htab, &server_oid, HASH_ENTER, &found);
+			if (!found)
+			{
+				ft_info->server_oid = server_oid;
+				ft_info->frels_list = NIL;
+				ft_info->frels_extra = NIL;
+			}
+
+			/* save the relation in the list */
+			ft_info->frels_list = lappend(ft_info->frels_list, rel);
+			ft_info->frels_extra = lappend_int(ft_info->frels_extra, extra);
+			continue;
+		}
+
 		/*
 		 * Normally, we need a transaction-safe truncation here.  However, if
 		 * the table was either created in the current (sub)transaction or has
@@ -1938,6 +2020,31 @@ ExecuteTruncateGuts(List *explicit_rels, List *relids, List *relids_logged,
 		pgstat_count_truncate(rel);
 	}
 
+	/*
+	 * Now go through the hash table, and process each entry associated to the
+	 * servers involved in the TRUNCATE.
+	 */
+	if (ft_htab)
+	{
+		ForeignTruncateInfo *ft_info;
+		HASH_SEQ_STATUS seq;
+
+		hash_seq_init(&seq, ft_htab);
+
+		while ((ft_info = hash_seq_search(&seq)) != NULL)
+		{
+			FdwRoutine *routine = GetFdwRoutineByServerId(ft_info->server_oid);
+
+			/* truncate_check_rel() has checked that already */
+			Assert(routine->ExecForeignTruncate != NULL);
+
+			routine->ExecForeignTruncate(ft_info->frels_list,
+										 ft_info->frels_extra,
+										 behavior,
+										 restart_seqs);
+		}
+	}
+
 	/*
 	 * Restart owned sequences if we were asked to.
 	 */
@@ -2023,12 +2130,24 @@ truncate_check_rel(Oid relid, Form_pg_class reltuple)
 	char	   *relname = NameStr(reltuple->relname);
 
 	/*
-	 * Only allow truncate on regular tables and partitioned tables (although,
-	 * the latter are only being included here for the following checks; no
-	 * physical truncation will occur in their case.)
+	 * Only allow truncate on regular tables, foreign tables using foreign
+	 * data wrappers supporting TRUNCATE and partitioned tables (although, the
+	 * latter are only being included here for the following checks; no
+	 * physical truncation will occur in their case.).
 	 */
-	if (reltuple->relkind != RELKIND_RELATION &&
-		reltuple->relkind != RELKIND_PARTITIONED_TABLE)
+	if (reltuple->relkind == RELKIND_FOREIGN_TABLE)
+	{
+		Oid			server_id = GetForeignServerIdByRelId(relid);
+		FdwRoutine *fdwroutine = GetFdwRoutineByServerId(server_id);
+
+		if (!fdwroutine->ExecForeignTruncate)
+			ereport(ERROR,
+					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+					 errmsg("cannot truncate foreign table \"%s\"",
+							relname)));
+	}
+	else if (reltuple->relkind != RELKIND_RELATION &&
+			 reltuple->relkind != RELKIND_PARTITIONED_TABLE)
 		ereport(ERROR,
 				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
 				 errmsg("\"%s\" is not a table", relname)));
diff --git a/src/backend/replication/logical/worker.c b/src/backend/replication/logical/worker.c
index 354fbe4b4b..0dbbf125c3 100644
--- a/src/backend/replication/logical/worker.c
+++ b/src/backend/replication/logical/worker.c
@@ -1795,6 +1795,7 @@ apply_handle_truncate(StringInfo s)
 	List	   *rels = NIL;
 	List	   *part_rels = NIL;
 	List	   *relids = NIL;
+	List	   *relids_extra = NIL;
 	List	   *relids_logged = NIL;
 	ListCell   *lc;
 
@@ -1824,7 +1825,8 @@ apply_handle_truncate(StringInfo s)
 		remote_rels = lappend(remote_rels, rel);
 		rels = lappend(rels, rel->localrel);
 		relids = lappend_oid(relids, rel->localreloid);
-		if (RelationIsLogicallyLogged(rel->localrel))
+		relids_extra = lappend_int(relids_extra, TRUNCATE_REL_CONTEXT__NORMAL);
+		if (RelationIsLogicallyLogged(rel->localrel) && rel->localrel->rd_rel->relkind != RELKIND_FOREIGN_TABLE))
 			relids_logged = lappend_oid(relids_logged, rel->localreloid);
 
 		/*
@@ -1862,8 +1864,9 @@ apply_handle_truncate(StringInfo s)
 				rels = lappend(rels, childrel);
 				part_rels = lappend(part_rels, childrel);
 				relids = lappend_oid(relids, childrelid);
+				relids_extra = lappend_int(relids_extra,TRUNCATE_REL_CONTEXT__CASCADED);
 				/* Log this relation only if needed for logical decoding */
-				if (RelationIsLogicallyLogged(childrel))
+				if (RelationIsLogicallyLogged(childrel) && childrel->rd_rel->relkind != RELKIND_FOREIGN_TABLE)
 					relids_logged = lappend_oid(relids_logged, childrelid);
 			}
 		}
@@ -1874,8 +1877,12 @@ apply_handle_truncate(StringInfo s)
 	 * to replaying changes without further cascading. This might be later
 	 * changeable with a user specified option.
 	 */
-	ExecuteTruncateGuts(rels, relids, relids_logged, DROP_RESTRICT, restart_seqs);
-
+	ExecuteTruncateGuts(rels,
+						relids,
+						relids_extra,
+						relids_logged,
+						DROP_RESTRICT,
+						restart_seqs);
 	foreach(lc, remote_rels)
 	{
 		LogicalRepRelMapEntry *rel = lfirst(lc);
diff --git a/src/include/commands/tablecmds.h b/src/include/commands/tablecmds.h
index b3d30acc35..d66760e4bf 100644
--- a/src/include/commands/tablecmds.h
+++ b/src/include/commands/tablecmds.h
@@ -55,9 +55,16 @@ extern void AlterRelationNamespaceInternal(Relation classRel, Oid relOid,
 
 extern void CheckTableNotInUse(Relation rel, const char *stmt);
 
+#define TRUNCATE_REL_CONTEXT__NORMAL       0x01
+#define TRUNCATE_REL_CONTEXT__ONLY         0x02
+#define TRUNCATE_REL_CONTEXT__CASCADED     0x04
 extern void ExecuteTruncate(TruncateStmt *stmt);
-extern void ExecuteTruncateGuts(List *explicit_rels, List *relids, List *relids_logged,
-								DropBehavior behavior, bool restart_seqs);
+extern void ExecuteTruncateGuts(List *explicit_rels,
+								List *relids,
+								List *relids_extra,
+								List *relids_logged,
+								DropBehavior behavior,
+								bool restart_seqs);
 
 extern void SetRelationHasSubclass(Oid relationId, bool relhassubclass);
 
diff --git a/src/include/foreign/fdwapi.h b/src/include/foreign/fdwapi.h
index 10d29ff292..205afe5ec3 100644
--- a/src/include/foreign/fdwapi.h
+++ b/src/include/foreign/fdwapi.h
@@ -160,6 +160,11 @@ typedef bool (*AnalyzeForeignTable_function) (Relation relation,
 typedef List *(*ImportForeignSchema_function) (ImportForeignSchemaStmt *stmt,
 											   Oid serverOid);
 
+typedef void (*ExecForeignTruncate_function) (List *frels_list,
+											  List *frels_extra,
+											  DropBehavior behavior,
+											  bool restart_seqs);
+
 typedef Size (*EstimateDSMForeignScan_function) (ForeignScanState *node,
 												 ParallelContext *pcxt);
 typedef void (*InitializeDSMForeignScan_function) (ForeignScanState *node,
@@ -255,6 +260,9 @@ typedef struct FdwRoutine
 	/* Support functions for IMPORT FOREIGN SCHEMA */
 	ImportForeignSchema_function ImportForeignSchema;
 
+	/* Support functions for TRUNCATE */
+	ExecForeignTruncate_function ExecForeignTruncate;
+
 	/* Support functions for parallelism under Gather node */
 	IsForeignScanParallelSafe_function IsForeignScanParallelSafe;
 	EstimateDSMForeignScan_function EstimateDSMForeignScan;
diff --git a/src/test/regress/expected/foreign_data.out b/src/test/regress/expected/foreign_data.out
index e4cdb780d0..5385f98a0f 100644
--- a/src/test/regress/expected/foreign_data.out
+++ b/src/test/regress/expected/foreign_data.out
@@ -1807,9 +1807,9 @@ Inherits: fd_pt1
 
 -- TRUNCATE doesn't work on foreign tables, either directly or recursively
 TRUNCATE ft2;  -- ERROR
-ERROR:  "ft2" is not a table
+ERROR:  foreign-data wrapper "dummy" has no handler
 TRUNCATE fd_pt1;  -- ERROR
-ERROR:  "ft2" is not a table
+ERROR:  foreign-data wrapper "dummy" has no handler
 DROP TABLE fd_pt1 CASCADE;
 NOTICE:  drop cascades to foreign table ft2
 -- IMPORT FOREIGN SCHEMA
@@ -2032,9 +2032,9 @@ ALTER FOREIGN TABLE fd_pt2_1 ADD CONSTRAINT fd_pt2chk1 CHECK (c1 > 0);
 ALTER TABLE fd_pt2 ATTACH PARTITION fd_pt2_1 FOR VALUES IN (1);
 -- TRUNCATE doesn't work on foreign tables, either directly or recursively
 TRUNCATE fd_pt2_1;  -- ERROR
-ERROR:  "fd_pt2_1" is not a table
+ERROR:  foreign-data wrapper "dummy" has no handler
 TRUNCATE fd_pt2;  -- ERROR
-ERROR:  "fd_pt2_1" is not a table
+ERROR:  foreign-data wrapper "dummy" has no handler
 DROP FOREIGN TABLE fd_pt2_1;
 DROP TABLE fd_pt2;
 -- foreign table cannot be part of partition tree made of temporary
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index 9e6777e9d0..d04c980168 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -704,6 +704,7 @@ ForeignScanState
 ForeignServer
 ForeignServerInfo
 ForeignTable
+ForeignTruncateInfo
 ForkNumber
 FormData_pg_aggregate
 FormData_pg_am
#33Kazutaka Onishi
onishi@heterodb.com
In reply to: Kazutaka Onishi (#32)
1 attachment(s)
Re: TRUNCATE on foreign table

Sorry but I found the v7 patch has typo and it can't be built...
I attached fixed one(v8).

2021年4月3日(土) 9:53 Kazutaka Onishi <onishi@heterodb.com>:

Show quoted text

All,

Thank you for discussion.
I've updated the patch (v6->v7) according to the conclusion.

I'll show the modified points:
1. Comments for ExecuteTuncate()
2. Replacing extra value in frels_extra with integer to label.
3. Skipping XLOG_HEAP_TRUNCATE on foreign table

Regards,

2021年4月2日(金) 11:44 Fujii Masao <masao.fujii@oss.nttdata.com>:

On 2021/04/02 9:37, Kohei KaiGai wrote:

It is fair enough for me to reverse the order of actual truncation.

How about the updated comments below?

This is a multi-relation truncate. We first open and grab exclusive
lock on all relations involved, checking permissions (local database
ACLs even if relations are foreign-tables) and otherwise verifying
that the relation is OK for truncation. In CASCADE mode, ...(snip)...
Finally all the relations are truncated and reindexed. If any foreign-
tables are involved, its callback shall be invoked prior to the truncation
of regular tables.

LGTM.

BTW, the latest patch doesn't seem to be applied cleanly to the master
because of commit 27e1f14563. Could you rebase it?

Onishi-san, go ahead. :-)

+1

Regards,

--
Fujii Masao
Advanced Computing Technology Center
Research and Development Headquarters
NTT DATA CORPORATION

Attachments:

pgsql14-truncate-on-foreign-table.v8.patchapplication/octet-stream; name=pgsql14-truncate-on-foreign-table.v8.patchDownload
diff --git a/contrib/postgres_fdw/deparse.c b/contrib/postgres_fdw/deparse.c
index 5aa3455e30..bebac2f2e0 100644
--- a/contrib/postgres_fdw/deparse.c
+++ b/contrib/postgres_fdw/deparse.c
@@ -56,6 +56,7 @@
 #include "utils/rel.h"
 #include "utils/syscache.h"
 #include "utils/typcache.h"
+#include "commands/tablecmds.h"
 
 /*
  * Global context for foreign_expr_walker's search of an expression tree.
@@ -2172,6 +2173,44 @@ deparseAnalyzeSql(StringInfo buf, Relation rel, List **retrieved_attrs)
 	deparseRelation(buf, rel);
 }
 
+/*
+ * Construct a simple "TRUNCATE rel" statement
+ */
+void
+deparseTruncateSql(StringInfo buf,
+				   List *frels_list,
+				   List *frels_extra,
+				   DropBehavior behavior,
+				   bool restart_seqs)
+{
+	ListCell   *lc1, *lc2;
+
+	appendStringInfoString(buf, "TRUNCATE ");
+	forboth (lc1, frels_list,
+			 lc2, frels_extra)
+	{
+		Relation	frel = lfirst(lc1);
+		int			extra = lfirst_int(lc2);
+
+		if (lc1 != list_head(frels_list))
+			appendStringInfoString(buf, ", ");
+		if (extra & TRUNCATE_REL_CONTEXT__ONLY)
+			appendStringInfoString(buf, "ONLY ");
+		deparseRelation(buf, frel);
+	}
+	appendStringInfo(buf, " %s IDENTITY",
+					 restart_seqs ? "RESTART" : "CONTINUE");
+	switch (behavior)
+	{
+		case DROP_RESTRICT:
+			appendStringInfoString(buf, " RESTRICT");
+			break;
+		case DROP_CASCADE:
+			appendStringInfoString(buf, " CASCADE");
+			break;
+	}
+}
+
 /*
  * Construct name to use for given column, and emit it into buf.
  * If it has a column_name FDW option, use that instead of attribute name.
diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out
index 59e4e27ffb..6a1c2bf5ee 100644
--- a/contrib/postgres_fdw/expected/postgres_fdw.out
+++ b/contrib/postgres_fdw/expected/postgres_fdw.out
@@ -8214,6 +8214,239 @@ select * from rem3;
 
 drop foreign table rem3;
 drop table loc3;
+-- ===================================================================
+-- test for TRUNCATE
+-- ===================================================================
+CREATE TABLE tru_rtable0 (id int primary key, x text);
+CREATE TABLE tru_rtable1 (id int primary key, y text);
+CREATE FOREIGN TABLE tru_ftable (id int, x text)
+       SERVER loopback OPTIONS (table_name 'tru_rtable0');
+INSERT INTO tru_rtable0 (SELECT x,md5(x::text) FROM generate_series(1,10) x);
+CREATE TABLE tru_ptable (id int, y text) PARTITION BY HASH(id);
+CREATE TABLE tru_ptable__p0 PARTITION OF tru_ptable
+                            FOR VALUES WITH (MODULUS 2, REMAINDER 0);
+CREATE FOREIGN TABLE tru_ftable__p1 PARTITION OF tru_ptable
+                                    FOR VALUES WITH (MODULUS 2, REMAINDER 1)
+       SERVER loopback OPTIONS (table_name 'tru_rtable1');
+INSERT INTO tru_ptable (SELECT x,md5(x::text) FROM generate_series(11,20) x);
+CREATE TABLE tru_pk_table(id int primary key, z text);
+CREATE TABLE tru_fk_table(fkey int references tru_pk_table(id));
+INSERT INTO tru_pk_table (SELECT x,md5((x+1)::text) FROM generate_series(1,10) x);
+INSERT INTO tru_fk_table (SELECT x % 10 + 1 FROM generate_series(5,25) x);
+CREATE FOREIGN TABLE tru_pk_ftable (id int, z text)
+       SERVER loopback OPTIONS (table_name 'tru_pk_table');
+CREATE TABLE tru_rtable_parent (id int, a text);
+CREATE TABLE tru_rtable_child (id int, a text);
+CREATE FOREIGN TABLE tru_ftable_parent (id int, a text)
+       SERVER loopback OPTIONS (table_name 'tru_rtable_parent');
+CREATE FOREIGN TABLE tru_ftable_child () INHERITS (tru_ftable_parent)
+       SERVER loopback OPTIONS (table_name 'tru_rtable_child');
+INSERT INTO tru_rtable_parent (SELECT x, md5(x::text) FROM generate_series(1,8) x);
+INSERT INTO tru_rtable_child  (SELECT x, md5(x::text) FROM generate_series(10, 18) x);
+-- normal truncate
+SELECT * FROM tru_ftable;
+ id |                x                 
+----+----------------------------------
+  1 | c4ca4238a0b923820dcc509a6f75849b
+  2 | c81e728d9d4c2f636f067f89cc14862c
+  3 | eccbc87e4b5ce2fe28308fd9f2a7baf3
+  4 | a87ff679a2f3e71d9181a67b7542122c
+  5 | e4da3b7fbbce2345d7772b0674a318d5
+  6 | 1679091c5a880faf6fb5e6087eb1b2dc
+  7 | 8f14e45fceea167a5a36dedd4bea2543
+  8 | c9f0f895fb98ab9159f51fd0297e236d
+  9 | 45c48cce2e2d7fbdea1afc51c7c6ad26
+ 10 | d3d9446802a44259755d38e6d163e820
+(10 rows)
+
+TRUNCATE tru_ftable;
+SELECT * FROM tru_rtable0;		-- empty
+ id | x 
+----+---
+(0 rows)
+
+SELECT * FROM tru_ftable;		-- empty
+ id | x 
+----+---
+(0 rows)
+
+-- 'truncatable' option
+ALTER SERVER loopback OPTIONS (ADD truncatable 'false');
+TRUNCATE tru_ftable;			-- error
+ERROR:  truncate on "tru_ftable" is prohibited
+ALTER SERVER loopback OPTIONS (DROP truncatable);
+ALTER FOREIGN TABLE tru_ftable OPTIONS (ADD truncatable 'false');
+TRUNCATE tru_ftable;			-- error
+ERROR:  truncate on "tru_ftable" is prohibited
+ALTER FOREIGN TABLE tru_ftable OPTIONS (SET truncatable 'true');
+TRUNCATE tru_ftable;			-- accepted
+-- partition table mixtured by table and foreign table
+SELECT * FROM tru_ptable;
+ id |                y                 
+----+----------------------------------
+ 12 | c20ad4d76fe97759aa27a0c99bff6710
+ 13 | c51ce410c124a10e0db5e4b97fc2af39
+ 14 | aab3238922bcc25a6f606eb525ffdc56
+ 16 | c74d97b01eae257e44aa9d5bade97baf
+ 17 | 70efdf2ec9b086079795c442636b55fb
+ 18 | 6f4922f45568161a8cdf4ad2299f6d23
+ 11 | 6512bd43d9caa6e02c990b0a82652dca
+ 15 | 9bf31c7ff062936a96d3c8bd1f8f2ff3
+ 19 | 1f0e3dad99908345f7439f8ffabdffc4
+ 20 | 98f13708210194c475687be6106a3b84
+(10 rows)
+
+TRUNCATE tru_ptable;
+SELECT * FROM tru_ptable;		-- empty
+ id | y 
+----+---
+(0 rows)
+
+SELECT * FROM tru_ptable__p0;	-- empty
+ id | y 
+----+---
+(0 rows)
+
+SELECT * FROM tru_ftable__p1;	-- empty
+ id | y 
+----+---
+(0 rows)
+
+SELECT * FROM tru_rtable1;		-- empty
+ id | y 
+----+---
+(0 rows)
+
+-- 'CASCADE' option
+SELECT * FROM tru_pk_ftable;
+ id |                z                 
+----+----------------------------------
+  1 | c81e728d9d4c2f636f067f89cc14862c
+  2 | eccbc87e4b5ce2fe28308fd9f2a7baf3
+  3 | a87ff679a2f3e71d9181a67b7542122c
+  4 | e4da3b7fbbce2345d7772b0674a318d5
+  5 | 1679091c5a880faf6fb5e6087eb1b2dc
+  6 | 8f14e45fceea167a5a36dedd4bea2543
+  7 | c9f0f895fb98ab9159f51fd0297e236d
+  8 | 45c48cce2e2d7fbdea1afc51c7c6ad26
+  9 | d3d9446802a44259755d38e6d163e820
+ 10 | 6512bd43d9caa6e02c990b0a82652dca
+(10 rows)
+
+TRUNCATE tru_pk_ftable;	-- failed by FK reference
+ERROR:  cannot truncate a table referenced in a foreign key constraint
+DETAIL:  Table "tru_fk_table" references "tru_pk_table".
+HINT:  Truncate table "tru_fk_table" at the same time, or use TRUNCATE ... CASCADE.
+CONTEXT:  remote SQL command: TRUNCATE public.tru_pk_table CONTINUE IDENTITY RESTRICT
+TRUNCATE tru_pk_ftable CASCADE;
+SELECT * FROM tru_pk_ftable;
+ id | z 
+----+---
+(0 rows)
+
+SELECT * FROM tru_fk_table;		-- also truncated
+ fkey 
+------
+(0 rows)
+
+-- truncate two tables at a command
+INSERT INTO tru_ftable (SELECT x,md5((x+2)::text) FROM generate_series(1,8) x);
+INSERT INTO tru_pk_ftable (SELECT x,md5((x+3)::text) FROM generate_series(3,10) x);
+SELECT * FROM tru_ftable a FULL OUTER JOIN tru_pk_ftable b ON a.id = b.id;
+ id |                x                 | id |                z                 
+----+----------------------------------+----+----------------------------------
+  1 | eccbc87e4b5ce2fe28308fd9f2a7baf3 |    | 
+  2 | a87ff679a2f3e71d9181a67b7542122c |    | 
+  3 | e4da3b7fbbce2345d7772b0674a318d5 |  3 | 1679091c5a880faf6fb5e6087eb1b2dc
+  4 | 1679091c5a880faf6fb5e6087eb1b2dc |  4 | 8f14e45fceea167a5a36dedd4bea2543
+  5 | 8f14e45fceea167a5a36dedd4bea2543 |  5 | c9f0f895fb98ab9159f51fd0297e236d
+  6 | c9f0f895fb98ab9159f51fd0297e236d |  6 | 45c48cce2e2d7fbdea1afc51c7c6ad26
+  7 | 45c48cce2e2d7fbdea1afc51c7c6ad26 |  7 | d3d9446802a44259755d38e6d163e820
+  8 | d3d9446802a44259755d38e6d163e820 |  8 | 6512bd43d9caa6e02c990b0a82652dca
+    |                                  |  9 | c20ad4d76fe97759aa27a0c99bff6710
+    |                                  | 10 | c51ce410c124a10e0db5e4b97fc2af39
+(10 rows)
+
+TRUNCATE tru_ftable, tru_pk_ftable CASCADE;
+SELECT * FROM tru_ftable a FULL OUTER JOIN tru_pk_ftable b ON a.id = b.id;
+ id | x | id | z 
+----+---+----+---
+(0 rows)
+
+-- truncate with ONLY clause
+TRUNCATE ONLY tru_ftable_parent;
+SELECT * FROM tru_ftable_parent;
+ id |                a                 
+----+----------------------------------
+ 10 | d3d9446802a44259755d38e6d163e820
+ 11 | 6512bd43d9caa6e02c990b0a82652dca
+ 12 | c20ad4d76fe97759aa27a0c99bff6710
+ 13 | c51ce410c124a10e0db5e4b97fc2af39
+ 14 | aab3238922bcc25a6f606eb525ffdc56
+ 15 | 9bf31c7ff062936a96d3c8bd1f8f2ff3
+ 16 | c74d97b01eae257e44aa9d5bade97baf
+ 17 | 70efdf2ec9b086079795c442636b55fb
+ 18 | 6f4922f45568161a8cdf4ad2299f6d23
+(9 rows)
+
+TRUNCATE tru_ftable_parent;
+SELECT * FROM tru_ftable_parent;
+ id | a 
+----+---
+(0 rows)
+
+-- in case when remote table has inherited children
+CREATE TABLE tru_rtable0_child () INHERITS (tru_rtable0);
+INSERT INTO tru_rtable0 (SELECT x,md5(x::text) FROM generate_series(5,9) x);
+INSERT INTO tru_rtable0_child (SELECT x,md5(x::text) FROM generate_series(10,14) x);
+SELECT * FROM tru_ftable;
+ id |                x                 
+----+----------------------------------
+  5 | e4da3b7fbbce2345d7772b0674a318d5
+  6 | 1679091c5a880faf6fb5e6087eb1b2dc
+  7 | 8f14e45fceea167a5a36dedd4bea2543
+  8 | c9f0f895fb98ab9159f51fd0297e236d
+  9 | 45c48cce2e2d7fbdea1afc51c7c6ad26
+ 10 | d3d9446802a44259755d38e6d163e820
+ 11 | 6512bd43d9caa6e02c990b0a82652dca
+ 12 | c20ad4d76fe97759aa27a0c99bff6710
+ 13 | c51ce410c124a10e0db5e4b97fc2af39
+ 14 | aab3238922bcc25a6f606eb525ffdc56
+(10 rows)
+
+TRUNCATE ONLY tru_ftable;		-- truncate only parent portion
+SELECT * FROM tru_ftable;
+ id |                x                 
+----+----------------------------------
+ 10 | d3d9446802a44259755d38e6d163e820
+ 11 | 6512bd43d9caa6e02c990b0a82652dca
+ 12 | c20ad4d76fe97759aa27a0c99bff6710
+ 13 | c51ce410c124a10e0db5e4b97fc2af39
+ 14 | aab3238922bcc25a6f606eb525ffdc56
+(5 rows)
+
+INSERT INTO tru_rtable0 (SELECT x,md5(x::text) FROM generate_series(21,25) x);
+SELECT * FROM tru_ftable;
+ id |                x                 
+----+----------------------------------
+ 21 | 3c59dc048e8850243be8079a5c74d079
+ 22 | b6d767d2f8ed5d21a44b0e5886680cb9
+ 23 | 37693cfc748049e45d87b8c7d8b9aacd
+ 24 | 1ff1de774005f8da13f42943881c655f
+ 25 | 8e296a067a37563370ded05f5a3bf3ec
+ 10 | d3d9446802a44259755d38e6d163e820
+ 11 | 6512bd43d9caa6e02c990b0a82652dca
+ 12 | c20ad4d76fe97759aa27a0c99bff6710
+ 13 | c51ce410c124a10e0db5e4b97fc2af39
+ 14 | aab3238922bcc25a6f606eb525ffdc56
+(10 rows)
+
+TRUNCATE tru_ftable;			-- truncate both of parent and child
+SELECT * FROM tru_ftable;
+ id | x 
+----+---
+(0 rows)
+
 -- ===================================================================
 -- test IMPORT FOREIGN SCHEMA
 -- ===================================================================
@@ -8913,7 +9146,7 @@ DO $d$
     END;
 $d$;
 ERROR:  invalid option "password"
-HINT:  Valid options in this context are: service, passfile, channel_binding, connect_timeout, dbname, host, hostaddr, port, options, application_name, keepalives, keepalives_idle, keepalives_interval, keepalives_count, tcp_user_timeout, sslmode, sslcompression, sslcert, sslkey, sslrootcert, sslcrl, sslcrldir, requirepeer, ssl_min_protocol_version, ssl_max_protocol_version, gssencmode, krbsrvname, gsslib, target_session_attrs, use_remote_estimate, fdw_startup_cost, fdw_tuple_cost, extensions, updatable, fetch_size, batch_size, async_capable, keep_connections
+HINT:  Valid options in this context are: service, passfile, channel_binding, connect_timeout, dbname, host, hostaddr, port, options, application_name, keepalives, keepalives_idle, keepalives_interval, keepalives_count, tcp_user_timeout, sslmode, sslcompression, sslcert, sslkey, sslrootcert, sslcrl, sslcrldir, requirepeer, ssl_min_protocol_version, ssl_max_protocol_version, gssencmode, krbsrvname, gsslib, target_session_attrs, use_remote_estimate, fdw_startup_cost, fdw_tuple_cost, extensions, updatable, truncatable, fetch_size, batch_size, async_capable, keep_connections
 CONTEXT:  SQL statement "ALTER SERVER loopback_nopw OPTIONS (ADD password 'dummypw')"
 PL/pgSQL function inline_code_block line 3 at EXECUTE
 -- If we add a password for our user mapping instead, we should get a different
diff --git a/contrib/postgres_fdw/option.c b/contrib/postgres_fdw/option.c
index f1d0c8bd41..fc196fabc0 100644
--- a/contrib/postgres_fdw/option.c
+++ b/contrib/postgres_fdw/option.c
@@ -109,6 +109,7 @@ postgres_fdw_validator(PG_FUNCTION_ARGS)
 		if (strcmp(def->defname, "use_remote_estimate") == 0 ||
 			strcmp(def->defname, "updatable") == 0 ||
 			strcmp(def->defname, "async_capable") == 0 ||
+			strcmp(def->defname, "truncatable") == 0 ||
 			strcmp(def->defname, "keep_connections") == 0)
 		{
 			/* these accept only boolean values */
@@ -213,6 +214,9 @@ InitPgFdwOptions(void)
 		/* updatable is available on both server and table */
 		{"updatable", ForeignServerRelationId, false},
 		{"updatable", ForeignTableRelationId, false},
+		/* truncatable is available on both server and table */
+		{"truncatable", ForeignServerRelationId, false},
+		{"truncatable", ForeignTableRelationId, false},
 		/* fetch_size is available on both server and table */
 		{"fetch_size", ForeignServerRelationId, false},
 		{"fetch_size", ForeignTableRelationId, false},
diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c
index 16c2979f2d..e336f27bd8 100644
--- a/contrib/postgres_fdw/postgres_fdw.c
+++ b/contrib/postgres_fdw/postgres_fdw.c
@@ -400,6 +400,10 @@ static void postgresExplainForeignModify(ModifyTableState *mtstate,
 										 ExplainState *es);
 static void postgresExplainDirectModify(ForeignScanState *node,
 										ExplainState *es);
+static void postgresExecForeignTruncate(List *frels_list,
+										List *frels_extra,
+										DropBehavior behavior,
+										bool restart_seqs);
 static bool postgresAnalyzeForeignTable(Relation relation,
 										AcquireSampleRowsFunc *func,
 										BlockNumber *totalpages);
@@ -588,6 +592,9 @@ postgres_fdw_handler(PG_FUNCTION_ARGS)
 	routine->ExplainForeignModify = postgresExplainForeignModify;
 	routine->ExplainDirectModify = postgresExplainDirectModify;
 
+	/* Support function for TRUNCATE */
+	routine->ExecForeignTruncate = postgresExecForeignTruncate;
+
 	/* Support functions for ANALYZE */
 	routine->AnalyzeForeignTable = postgresAnalyzeForeignTable;
 
@@ -2868,6 +2875,90 @@ postgresExplainDirectModify(ForeignScanState *node, ExplainState *es)
 	}
 }
 
+/*
+ * postgresExecForeignTruncate
+ *		It propagates TRUNCATE command to the remote host inside of the
+ *		transaction block.
+ */
+static void
+postgresExecForeignTruncate(List *frels_list,
+							List *frels_extra,
+							DropBehavior behavior,
+							bool restart_seqs)
+{
+	Oid				server_id = InvalidOid;
+	ForeignServer  *serv = NULL;
+	UserMapping	   *user = NULL;
+	PGconn		   *conn = NULL;
+	PGresult	   *res;
+	StringInfoData	sql;
+	ListCell	   *lc;
+	bool			server_truncatable = true;
+
+	/* pick up remote connection, and sanity checks */
+	foreach (lc, frels_list)
+	{
+		Relation	frel = lfirst(lc);
+		Oid			frel_oid = RelationGetRelid(frel);
+		ForeignTable *ft = GetForeignTable(frel_oid);
+		ListCell   *cell;
+		bool		truncatable;
+
+		if (!OidIsValid(server_id))
+		{
+			server_id = GetForeignServerIdByRelId(frel_oid);
+			serv = GetForeignServer(server_id);
+			user = GetUserMapping(GetUserId(), server_id);
+			conn = GetConnection(user, false,NULL);
+			
+			foreach (cell, serv->options)
+			{
+				DefElem	   *defel = (DefElem *) lfirst(cell);
+
+				if (strcmp(defel->defname, "truncatable") == 0)
+				{
+					server_truncatable = defGetBoolean(defel);
+					break;
+				}
+			}
+		}
+		else
+		{
+			/* postgresExecForeignTruncate() is invoked for each server */
+			Assert(server_id == GetForeignServerIdByRelId(frel_oid));
+		}
+
+		/* ensure the target foreign table is truncatable */
+		truncatable = server_truncatable;
+		foreach (cell, ft->options)
+		{
+			DefElem	   *defel = (DefElem *) lfirst(cell);
+
+			if (strcmp(defel->defname, "truncatable") == 0)
+			{
+				truncatable = defGetBoolean(defel);
+				break;
+			}
+		}
+		if (!truncatable)
+			ereport(ERROR,
+					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+					 errmsg("truncate on \"%s\" is prohibited",
+							RelationGetRelationName(frel))));
+	}
+	/* set up remote query */
+	initStringInfo(&sql);
+	deparseTruncateSql(&sql, frels_list, frels_extra, behavior, restart_seqs);
+	/* run remote query */
+	if (!PQsendQuery(conn, sql.data))
+		pgfdw_report_error(ERROR, NULL, conn, false, sql.data);
+	res = pgfdw_get_result(conn, sql.data);
+	if (PQresultStatus(res) != PGRES_COMMAND_OK)
+		pgfdw_report_error(ERROR, res, conn, true, sql.data);
+	/* clean-up */
+	PQclear(res);
+	pfree(sql.data);
+}
 
 /*
  * estimate_path_cost_size
diff --git a/contrib/postgres_fdw/postgres_fdw.h b/contrib/postgres_fdw/postgres_fdw.h
index 88d94da6f6..68599892f8 100644
--- a/contrib/postgres_fdw/postgres_fdw.h
+++ b/contrib/postgres_fdw/postgres_fdw.h
@@ -206,6 +206,11 @@ extern void deparseDirectDeleteSql(StringInfo buf, PlannerInfo *root,
 extern void deparseAnalyzeSizeSql(StringInfo buf, Relation rel);
 extern void deparseAnalyzeSql(StringInfo buf, Relation rel,
 							  List **retrieved_attrs);
+extern void deparseTruncateSql(StringInfo buf,
+							   List *frels_list,
+							   List *frels_extra,
+							   DropBehavior behavior,
+							   bool restart_seqs);
 extern void deparseStringLiteral(StringInfo buf, const char *val);
 extern Expr *find_em_expr_for_rel(EquivalenceClass *ec, RelOptInfo *rel);
 extern Expr *find_em_expr_for_input_target(PlannerInfo *root,
diff --git a/contrib/postgres_fdw/sql/postgres_fdw.sql b/contrib/postgres_fdw/sql/postgres_fdw.sql
index 107d1c0e03..047e3e4dd9 100644
--- a/contrib/postgres_fdw/sql/postgres_fdw.sql
+++ b/contrib/postgres_fdw/sql/postgres_fdw.sql
@@ -2351,6 +2351,96 @@ select * from rem3;
 drop foreign table rem3;
 drop table loc3;
 
+-- ===================================================================
+-- test for TRUNCATE
+-- ===================================================================
+CREATE TABLE tru_rtable0 (id int primary key, x text);
+CREATE TABLE tru_rtable1 (id int primary key, y text);
+CREATE FOREIGN TABLE tru_ftable (id int, x text)
+       SERVER loopback OPTIONS (table_name 'tru_rtable0');
+INSERT INTO tru_rtable0 (SELECT x,md5(x::text) FROM generate_series(1,10) x);
+
+CREATE TABLE tru_ptable (id int, y text) PARTITION BY HASH(id);
+CREATE TABLE tru_ptable__p0 PARTITION OF tru_ptable
+                            FOR VALUES WITH (MODULUS 2, REMAINDER 0);
+CREATE FOREIGN TABLE tru_ftable__p1 PARTITION OF tru_ptable
+                                    FOR VALUES WITH (MODULUS 2, REMAINDER 1)
+       SERVER loopback OPTIONS (table_name 'tru_rtable1');
+INSERT INTO tru_ptable (SELECT x,md5(x::text) FROM generate_series(11,20) x);
+
+CREATE TABLE tru_pk_table(id int primary key, z text);
+CREATE TABLE tru_fk_table(fkey int references tru_pk_table(id));
+INSERT INTO tru_pk_table (SELECT x,md5((x+1)::text) FROM generate_series(1,10) x);
+INSERT INTO tru_fk_table (SELECT x % 10 + 1 FROM generate_series(5,25) x);
+CREATE FOREIGN TABLE tru_pk_ftable (id int, z text)
+       SERVER loopback OPTIONS (table_name 'tru_pk_table');
+
+CREATE TABLE tru_rtable_parent (id int, a text);
+CREATE TABLE tru_rtable_child (id int, a text);
+CREATE FOREIGN TABLE tru_ftable_parent (id int, a text)
+       SERVER loopback OPTIONS (table_name 'tru_rtable_parent');
+CREATE FOREIGN TABLE tru_ftable_child () INHERITS (tru_ftable_parent)
+       SERVER loopback OPTIONS (table_name 'tru_rtable_child');
+INSERT INTO tru_rtable_parent (SELECT x, md5(x::text) FROM generate_series(1,8) x);
+INSERT INTO tru_rtable_child  (SELECT x, md5(x::text) FROM generate_series(10, 18) x);
+
+-- normal truncate
+SELECT * FROM tru_ftable;
+TRUNCATE tru_ftable;
+SELECT * FROM tru_rtable0;		-- empty
+SELECT * FROM tru_ftable;		-- empty
+
+-- 'truncatable' option
+ALTER SERVER loopback OPTIONS (ADD truncatable 'false');
+TRUNCATE tru_ftable;			-- error
+ALTER SERVER loopback OPTIONS (DROP truncatable);
+ALTER FOREIGN TABLE tru_ftable OPTIONS (ADD truncatable 'false');
+TRUNCATE tru_ftable;			-- error
+ALTER FOREIGN TABLE tru_ftable OPTIONS (SET truncatable 'true');
+TRUNCATE tru_ftable;			-- accepted
+
+-- partition table mixtured by table and foreign table
+SELECT * FROM tru_ptable;
+TRUNCATE tru_ptable;
+SELECT * FROM tru_ptable;		-- empty
+SELECT * FROM tru_ptable__p0;	-- empty
+SELECT * FROM tru_ftable__p1;	-- empty
+SELECT * FROM tru_rtable1;		-- empty
+
+-- 'CASCADE' option
+SELECT * FROM tru_pk_ftable;
+TRUNCATE tru_pk_ftable;	-- failed by FK reference
+TRUNCATE tru_pk_ftable CASCADE;
+SELECT * FROM tru_pk_ftable;
+SELECT * FROM tru_fk_table;		-- also truncated
+
+-- truncate two tables at a command
+INSERT INTO tru_ftable (SELECT x,md5((x+2)::text) FROM generate_series(1,8) x);
+INSERT INTO tru_pk_ftable (SELECT x,md5((x+3)::text) FROM generate_series(3,10) x);
+SELECT * FROM tru_ftable a FULL OUTER JOIN tru_pk_ftable b ON a.id = b.id;
+TRUNCATE tru_ftable, tru_pk_ftable CASCADE;
+SELECT * FROM tru_ftable a FULL OUTER JOIN tru_pk_ftable b ON a.id = b.id;
+
+-- truncate with ONLY clause
+TRUNCATE ONLY tru_ftable_parent;
+SELECT * FROM tru_ftable_parent;
+TRUNCATE tru_ftable_parent;
+SELECT * FROM tru_ftable_parent;
+
+-- in case when remote table has inherited children
+CREATE TABLE tru_rtable0_child () INHERITS (tru_rtable0);
+INSERT INTO tru_rtable0 (SELECT x,md5(x::text) FROM generate_series(5,9) x);
+INSERT INTO tru_rtable0_child (SELECT x,md5(x::text) FROM generate_series(10,14) x);
+SELECT * FROM tru_ftable;
+
+TRUNCATE ONLY tru_ftable;		-- truncate only parent portion
+SELECT * FROM tru_ftable;
+
+INSERT INTO tru_rtable0 (SELECT x,md5(x::text) FROM generate_series(21,25) x);
+SELECT * FROM tru_ftable;
+TRUNCATE tru_ftable;			-- truncate both of parent and child
+SELECT * FROM tru_ftable;
+
 -- ===================================================================
 -- test IMPORT FOREIGN SCHEMA
 -- ===================================================================
diff --git a/doc/src/sgml/fdwhandler.sgml b/doc/src/sgml/fdwhandler.sgml
index 0f2397df49..706c41d34b 100644
--- a/doc/src/sgml/fdwhandler.sgml
+++ b/doc/src/sgml/fdwhandler.sgml
@@ -1065,6 +1065,50 @@ EndDirectModify(ForeignScanState *node);
 
    </sect2>
 
+   <sect2 id="fdw-callbacks-truncate">
+    <title>FDW Routines for Truncate</title>
+<programlisting>
+void
+ExecForeignTruncate(List *frels_list, List *frels_extra,
+                    DropBehavior behavior, bool restart_seqs);
+</programlisting>
+    <para>
+     Truncate a set of foreign tables defined by
+     <literal>frels_list</literal> belonging to the same foreign server.
+     This optional function is called during execution of
+     <command>TRUNCATE</command> for each foreign server being involved
+     in one <command>TRUNCATE</command> command (note that invocations
+     are not per foreign table).
+
+     <literal>frels_extra</literal> delivers extra information about
+     the context where the foreign-tables are truncated. It is a list of integers and same length with
+     <literal>frels_list</literal>. <literal>TRUNCATE_REL_CONTEXT__NORMAL</literal> means that
+     the foreign-table is specified WITHOUT "ONLY" clause, and <literal>TRUNCATE_REL_CONTEXT__ONLY</literal> means
+     specified WITH "ONLY" clause. <literal>TRUNCATE_REL_CONTEXT__CASCADED</literal> 
+     values means that foreign-tables are not specified in the <command>TRUNCATE</command>, 
+     but truncated due to dependency(like partition's child leaf).
+    </para>
+
+    <para>
+     If the <function>ExecForeignTruncate</function> pointer is set to
+     <literal>NULL</literal>, attempts to truncate the foreign table will
+     fail with an error message.
+    </para>
+    
+    <para>
+     <literal>behavior</literal> defines how foreign tables should
+     be truncated, using as possible values <literal>DROP_RESTRICT</literal>
+     and <literal>DROP_CASCADE</literal> (to map with the equivalents of
+     <command>TRUNCATE</command>).
+    </para>
+    
+    <para>
+     <literal>restart_seqs</literal> is set to <literal>true</literal>
+     if <literal>RESTART IDENTITY</literal> was supplied in the
+     <command>TRUNCATE</command>.
+    </para>
+   </sect2>
+
    <sect2 id="fdw-callbacks-row-locking">
     <title>FDW Routines for Row Locking</title>
 
diff --git a/doc/src/sgml/postgres-fdw.sgml b/doc/src/sgml/postgres-fdw.sgml
index a7c695b000..1092160603 100644
--- a/doc/src/sgml/postgres-fdw.sgml
+++ b/doc/src/sgml/postgres-fdw.sgml
@@ -63,9 +63,10 @@
  <para>
   Now you need only <command>SELECT</command> from a foreign table to access
   the data stored in its underlying remote table.  You can also modify
-  the remote table using <command>INSERT</command>, <command>UPDATE</command>, or
-  <command>DELETE</command>.  (Of course, the remote user you have specified
-  in your user mapping must have privileges to do these things.)
+  the remote table using <command>INSERT</command>, <command>UPDATE</command>,
+  <command>DELETE</command>, or <command>TRUNCATE</command>.
+  (Of course, the remote user you have specified in your user mapping must
+  have privileges to do these things.)
  </para>
 
  <para>
@@ -404,7 +405,7 @@ OPTIONS (ADD password_required 'false');
 
    <para>
     By default all foreign tables using <filename>postgres_fdw</filename> are assumed
-    to be updatable.  This may be overridden using the following option:
+    to be updatable and truncatable.  This may be overridden using the following options:
    </para>
 
    <variablelist>
@@ -433,6 +434,29 @@ OPTIONS (ADD password_required 'false');
      </listitem>
     </varlistentry>
 
+    <varlistentry>
+     <term><literal>truncatable</literal></term>
+     <listitem>
+      <para>
+       This option controls whether <filename>postgres_fdw</filename> allows
+       foreign tables to be truncated using <command>TRUNCATE</command>
+       command. It can be specified for a foreign table or a foreign server.
+       A table-level option overrides a server-level option.
+       The default is <literal>true</literal>.
+      </para>
+      <para>
+       Pay attention for the case when a foreign table maps remote table
+       that has inherited children or partition leafs.
+       <command>TRUNCATE</command> specifies the foreign tables with
+       <literal>ONLY</literal> clause, remove queries over the
+       <filename>postgres_fdw</filename> also specify remote tables with
+       <literal>ONLY</literal> clause, that will truncate only parent
+       portion of the remote table. In the results, it looks like
+       <command>TRUNCATE</command> command partially eliminated contents
+       of the foreign tables.
+      </para>
+     </listitem>
+    </varlistentry>
    </variablelist>
   </sect3>
 
diff --git a/doc/src/sgml/ref/truncate.sgml b/doc/src/sgml/ref/truncate.sgml
index 91cdac5562..461be92565 100644
--- a/doc/src/sgml/ref/truncate.sgml
+++ b/doc/src/sgml/ref/truncate.sgml
@@ -172,9 +172,7 @@ TRUNCATE [ TABLE ] [ ONLY ] <replaceable class="parameter">name</replaceable> [
   </para>
 
   <para>
-   <command>TRUNCATE</command> is not currently supported for foreign tables.
-   This implies that if a specified table has any descendant tables that are
-   foreign, the command will fail.
+    <command>TRUNCATE</command> can be used for foreign tables if the foreign data wrapper supports.
   </para>
  </refsect1>
 
diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c
index 88a68a4697..baad36817e 100644
--- a/src/backend/commands/tablecmds.c
+++ b/src/backend/commands/tablecmds.c
@@ -59,6 +59,7 @@
 #include "commands/typecmds.h"
 #include "commands/user.h"
 #include "executor/executor.h"
+#include "foreign/fdwapi.h"
 #include "foreign/foreign.h"
 #include "miscadmin.h"
 #include "nodes/makefuncs.h"
@@ -310,6 +311,21 @@ struct DropRelationCallbackState
 #define		ATT_FOREIGN_TABLE		0x0020
 #define		ATT_PARTITIONED_INDEX	0x0040
 
+/*
+ * ForeignTruncateInfo
+ *
+ * Information related to truncation of foreign tables.  This is used for
+ * the elements in a hash table that uses the server OID as lookup key,
+ * and includes a per-server list of all foreign tables involved in the
+ * truncation.
+ */
+typedef struct
+{
+	Oid			server_oid;
+	List	   *frels_list;
+	List	   *frels_extra;
+} ForeignTruncateInfo;
+
 /*
  * Partition tables are expected to be dropped when the parent partitioned
  * table gets dropped. Hence for partitioning we use AUTO dependency.
@@ -1588,18 +1604,22 @@ RangeVarCallbackForDropRelation(const RangeVar *rel, Oid relOid, Oid oldRelOid,
  *		Executes a TRUNCATE command.
  *
  * This is a multi-relation truncate.  We first open and grab exclusive
- * lock on all relations involved, checking permissions and otherwise
- * verifying that the relation is OK for truncation.  In CASCADE mode,
+ * lock on all relations involved, checking permissions (local database
+ * ACLs even if relations are foreign-tables) and otherwise verifying
+ * that the relation is OK for truncation. In CASCADE mode,
  * relations having FK references to the targeted relations are automatically
  * added to the group; in RESTRICT mode, we check that all FK references are
  * internal to the group that's being truncated.  Finally all the relations
- * are truncated and reindexed.
+ * are truncated and reindexed. If any foreign-
+ * tables are involved, its callback shall be invoked prior to the truncation
+ * of regular tables.
  */
 void
 ExecuteTruncate(TruncateStmt *stmt)
 {
 	List	   *rels = NIL;
 	List	   *relids = NIL;
+	List	   *relids_extra = NIL;
 	List	   *relids_logged = NIL;
 	ListCell   *cell;
 
@@ -1636,8 +1656,9 @@ ExecuteTruncate(TruncateStmt *stmt)
 
 		rels = lappend(rels, rel);
 		relids = lappend_oid(relids, myrelid);
+		relids_extra = lappend_int(relids_extra, (recurse ? TRUNCATE_REL_CONTEXT__NORMAL : TRUNCATE_REL_CONTEXT__ONLY));
 		/* Log this relation only if needed for logical decoding */
-		if (RelationIsLogicallyLogged(rel))
+		if (RelationIsLogicallyLogged(rel) && rel->rd_rel->relkind != RELKIND_FOREIGN_TABLE)
 			relids_logged = lappend_oid(relids_logged, myrelid);
 
 		if (recurse)
@@ -1683,8 +1704,9 @@ ExecuteTruncate(TruncateStmt *stmt)
 
 				rels = lappend(rels, rel);
 				relids = lappend_oid(relids, childrelid);
+				relids_extra = lappend_int(relids_extra, TRUNCATE_REL_CONTEXT__CASCADED);
 				/* Log this relation only if needed for logical decoding */
-				if (RelationIsLogicallyLogged(rel))
+				if (RelationIsLogicallyLogged(rel) && rel->rd_rel->relkind != RELKIND_FOREIGN_TABLE)
 					relids_logged = lappend_oid(relids_logged, childrelid);
 			}
 		}
@@ -1695,7 +1717,7 @@ ExecuteTruncate(TruncateStmt *stmt)
 					 errhint("Do not specify the ONLY keyword, or use TRUNCATE ONLY on the partitions directly.")));
 	}
 
-	ExecuteTruncateGuts(rels, relids, relids_logged,
+	ExecuteTruncateGuts(rels, relids, relids_extra, relids_logged,
 						stmt->behavior, stmt->restart_seqs);
 
 	/* And close the rels */
@@ -1721,16 +1743,21 @@ ExecuteTruncate(TruncateStmt *stmt)
  * this information handy in this form.
  */
 void
-ExecuteTruncateGuts(List *explicit_rels, List *relids, List *relids_logged,
+ExecuteTruncateGuts(List *explicit_rels,
+					List *relids,
+					List *relids_extra,
+					List *relids_logged,
 					DropBehavior behavior, bool restart_seqs)
 {
 	List	   *rels;
 	List	   *seq_relids = NIL;
+	HTAB	   *ft_htab = NULL;
 	EState	   *estate;
 	ResultRelInfo *resultRelInfos;
 	ResultRelInfo *resultRelInfo;
 	SubTransactionId mySubid;
 	ListCell   *cell;
+	ListCell   *lc1, *lc2;
 	Oid		   *logrelids;
 
 	/*
@@ -1768,8 +1795,9 @@ ExecuteTruncateGuts(List *explicit_rels, List *relids, List *relids_logged,
 				truncate_check_activity(rel);
 				rels = lappend(rels, rel);
 				relids = lappend_oid(relids, relid);
+				relids_extra = lappend_int(relids_extra, TRUNCATE_REL_CONTEXT__CASCADED);
 				/* Log this relation only if needed for logical decoding */
-				if (RelationIsLogicallyLogged(rel))
+				if (RelationIsLogicallyLogged(rel) && rel->rd_rel->relkind != RELKIND_FOREIGN_TABLE)
 					relids_logged = lappend_oid(relids_logged, relid);
 			}
 		}
@@ -1868,14 +1896,68 @@ ExecuteTruncateGuts(List *explicit_rels, List *relids, List *relids_logged,
 	 */
 	mySubid = GetCurrentSubTransactionId();
 
-	foreach(cell, rels)
+	Assert(list_length(rels) == list_length(relids_extra));
+	forboth (lc1, rels,
+			 lc2, relids_extra)
 	{
-		Relation	rel = (Relation) lfirst(cell);
+		Relation	rel = (Relation) lfirst(lc1);
+		int			extra = lfirst_int(lc2);
 
 		/* Skip partitioned tables as there is nothing to do */
 		if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
 			continue;
 
+		/*
+		 * If truncating a foreign table, the foreign data wrapper callback
+		 * for TRUNCATE is called once for each server with a list of all the
+		 * relations to process linked to this server.  The list of relations
+		 * for each server is saved as a single entry in a hash table that
+		 * uses the server OID as lookup key.  Once the full set of lists is
+		 * built, all the entries of the hash table are scanned, and the list
+		 * of relations associated to the server is passed down to the
+		 * TRUNCATE callback of its foreign data wrapper.
+		 */
+		if (rel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
+		{
+			Oid			frel_oid = RelationGetRelid(rel);
+			Oid			server_oid = GetForeignServerIdByRelId(frel_oid);
+			bool		found;
+			ForeignTruncateInfo *ft_info;
+
+			/* if the hash table does not exist yet, initialize it */
+			if (!ft_htab)
+			{
+				HASHCTL		hctl;
+
+				memset(&hctl, 0, sizeof(HASHCTL));
+				hctl.keysize = sizeof(Oid);
+				hctl.entrysize = sizeof(ForeignTruncateInfo);
+				hctl.hcxt = CurrentMemoryContext;
+
+				ft_htab = hash_create("TRUNCATE for Foreign Tables",
+									  32,	/* start small and extend */
+									  &hctl,
+									  HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
+			}
+
+			/*
+			 * Look after the entry of the server in the hash table, and
+			 * initialize it if the entry does not exist yet.
+			 */
+			ft_info = hash_search(ft_htab, &server_oid, HASH_ENTER, &found);
+			if (!found)
+			{
+				ft_info->server_oid = server_oid;
+				ft_info->frels_list = NIL;
+				ft_info->frels_extra = NIL;
+			}
+
+			/* save the relation in the list */
+			ft_info->frels_list = lappend(ft_info->frels_list, rel);
+			ft_info->frels_extra = lappend_int(ft_info->frels_extra, extra);
+			continue;
+		}
+
 		/*
 		 * Normally, we need a transaction-safe truncation here.  However, if
 		 * the table was either created in the current (sub)transaction or has
@@ -1938,6 +2020,31 @@ ExecuteTruncateGuts(List *explicit_rels, List *relids, List *relids_logged,
 		pgstat_count_truncate(rel);
 	}
 
+	/*
+	 * Now go through the hash table, and process each entry associated to the
+	 * servers involved in the TRUNCATE.
+	 */
+	if (ft_htab)
+	{
+		ForeignTruncateInfo *ft_info;
+		HASH_SEQ_STATUS seq;
+
+		hash_seq_init(&seq, ft_htab);
+
+		while ((ft_info = hash_seq_search(&seq)) != NULL)
+		{
+			FdwRoutine *routine = GetFdwRoutineByServerId(ft_info->server_oid);
+
+			/* truncate_check_rel() has checked that already */
+			Assert(routine->ExecForeignTruncate != NULL);
+
+			routine->ExecForeignTruncate(ft_info->frels_list,
+										 ft_info->frels_extra,
+										 behavior,
+										 restart_seqs);
+		}
+	}
+
 	/*
 	 * Restart owned sequences if we were asked to.
 	 */
@@ -2023,12 +2130,24 @@ truncate_check_rel(Oid relid, Form_pg_class reltuple)
 	char	   *relname = NameStr(reltuple->relname);
 
 	/*
-	 * Only allow truncate on regular tables and partitioned tables (although,
-	 * the latter are only being included here for the following checks; no
-	 * physical truncation will occur in their case.)
+	 * Only allow truncate on regular tables, foreign tables using foreign
+	 * data wrappers supporting TRUNCATE and partitioned tables (although, the
+	 * latter are only being included here for the following checks; no
+	 * physical truncation will occur in their case.).
 	 */
-	if (reltuple->relkind != RELKIND_RELATION &&
-		reltuple->relkind != RELKIND_PARTITIONED_TABLE)
+	if (reltuple->relkind == RELKIND_FOREIGN_TABLE)
+	{
+		Oid			server_id = GetForeignServerIdByRelId(relid);
+		FdwRoutine *fdwroutine = GetFdwRoutineByServerId(server_id);
+
+		if (!fdwroutine->ExecForeignTruncate)
+			ereport(ERROR,
+					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+					 errmsg("cannot truncate foreign table \"%s\"",
+							relname)));
+	}
+	else if (reltuple->relkind != RELKIND_RELATION &&
+			 reltuple->relkind != RELKIND_PARTITIONED_TABLE)
 		ereport(ERROR,
 				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
 				 errmsg("\"%s\" is not a table", relname)));
diff --git a/src/backend/replication/logical/worker.c b/src/backend/replication/logical/worker.c
index 354fbe4b4b..0dbbf125c3 100644
--- a/src/backend/replication/logical/worker.c
+++ b/src/backend/replication/logical/worker.c
@@ -1795,6 +1795,7 @@ apply_handle_truncate(StringInfo s)
 	List	   *rels = NIL;
 	List	   *part_rels = NIL;
 	List	   *relids = NIL;
+	List	   *relids_extra = NIL;
 	List	   *relids_logged = NIL;
 	ListCell   *lc;
 
@@ -1824,7 +1825,8 @@ apply_handle_truncate(StringInfo s)
 		remote_rels = lappend(remote_rels, rel);
 		rels = lappend(rels, rel->localrel);
 		relids = lappend_oid(relids, rel->localreloid);
-		if (RelationIsLogicallyLogged(rel->localrel))
+		relids_extra = lappend_int(relids_extra, TRUNCATE_REL_CONTEXT__NORMAL);
+		if (RelationIsLogicallyLogged(rel->localrel) && rel->localrel->rd_rel->relkind != RELKIND_FOREIGN_TABLE)
 			relids_logged = lappend_oid(relids_logged, rel->localreloid);
 
 		/*
@@ -1862,8 +1864,9 @@ apply_handle_truncate(StringInfo s)
 				rels = lappend(rels, childrel);
 				part_rels = lappend(part_rels, childrel);
 				relids = lappend_oid(relids, childrelid);
+				relids_extra = lappend_int(relids_extra,TRUNCATE_REL_CONTEXT__CASCADED);
 				/* Log this relation only if needed for logical decoding */
-				if (RelationIsLogicallyLogged(childrel))
+				if (RelationIsLogicallyLogged(childrel) && childrel->rd_rel->relkind != RELKIND_FOREIGN_TABLE)
 					relids_logged = lappend_oid(relids_logged, childrelid);
 			}
 		}
@@ -1874,8 +1877,12 @@ apply_handle_truncate(StringInfo s)
 	 * to replaying changes without further cascading. This might be later
 	 * changeable with a user specified option.
 	 */
-	ExecuteTruncateGuts(rels, relids, relids_logged, DROP_RESTRICT, restart_seqs);
-
+	ExecuteTruncateGuts(rels,
+						relids,
+						relids_extra,
+						relids_logged,
+						DROP_RESTRICT,
+						restart_seqs);
 	foreach(lc, remote_rels)
 	{
 		LogicalRepRelMapEntry *rel = lfirst(lc);
diff --git a/src/include/commands/tablecmds.h b/src/include/commands/tablecmds.h
index b3d30acc35..d66760e4bf 100644
--- a/src/include/commands/tablecmds.h
+++ b/src/include/commands/tablecmds.h
@@ -55,9 +55,16 @@ extern void AlterRelationNamespaceInternal(Relation classRel, Oid relOid,
 
 extern void CheckTableNotInUse(Relation rel, const char *stmt);
 
+#define TRUNCATE_REL_CONTEXT__NORMAL       0x01
+#define TRUNCATE_REL_CONTEXT__ONLY         0x02
+#define TRUNCATE_REL_CONTEXT__CASCADED     0x04
 extern void ExecuteTruncate(TruncateStmt *stmt);
-extern void ExecuteTruncateGuts(List *explicit_rels, List *relids, List *relids_logged,
-								DropBehavior behavior, bool restart_seqs);
+extern void ExecuteTruncateGuts(List *explicit_rels,
+								List *relids,
+								List *relids_extra,
+								List *relids_logged,
+								DropBehavior behavior,
+								bool restart_seqs);
 
 extern void SetRelationHasSubclass(Oid relationId, bool relhassubclass);
 
diff --git a/src/include/foreign/fdwapi.h b/src/include/foreign/fdwapi.h
index 10d29ff292..205afe5ec3 100644
--- a/src/include/foreign/fdwapi.h
+++ b/src/include/foreign/fdwapi.h
@@ -160,6 +160,11 @@ typedef bool (*AnalyzeForeignTable_function) (Relation relation,
 typedef List *(*ImportForeignSchema_function) (ImportForeignSchemaStmt *stmt,
 											   Oid serverOid);
 
+typedef void (*ExecForeignTruncate_function) (List *frels_list,
+											  List *frels_extra,
+											  DropBehavior behavior,
+											  bool restart_seqs);
+
 typedef Size (*EstimateDSMForeignScan_function) (ForeignScanState *node,
 												 ParallelContext *pcxt);
 typedef void (*InitializeDSMForeignScan_function) (ForeignScanState *node,
@@ -255,6 +260,9 @@ typedef struct FdwRoutine
 	/* Support functions for IMPORT FOREIGN SCHEMA */
 	ImportForeignSchema_function ImportForeignSchema;
 
+	/* Support functions for TRUNCATE */
+	ExecForeignTruncate_function ExecForeignTruncate;
+
 	/* Support functions for parallelism under Gather node */
 	IsForeignScanParallelSafe_function IsForeignScanParallelSafe;
 	EstimateDSMForeignScan_function EstimateDSMForeignScan;
diff --git a/src/test/regress/expected/foreign_data.out b/src/test/regress/expected/foreign_data.out
index e4cdb780d0..5385f98a0f 100644
--- a/src/test/regress/expected/foreign_data.out
+++ b/src/test/regress/expected/foreign_data.out
@@ -1807,9 +1807,9 @@ Inherits: fd_pt1
 
 -- TRUNCATE doesn't work on foreign tables, either directly or recursively
 TRUNCATE ft2;  -- ERROR
-ERROR:  "ft2" is not a table
+ERROR:  foreign-data wrapper "dummy" has no handler
 TRUNCATE fd_pt1;  -- ERROR
-ERROR:  "ft2" is not a table
+ERROR:  foreign-data wrapper "dummy" has no handler
 DROP TABLE fd_pt1 CASCADE;
 NOTICE:  drop cascades to foreign table ft2
 -- IMPORT FOREIGN SCHEMA
@@ -2032,9 +2032,9 @@ ALTER FOREIGN TABLE fd_pt2_1 ADD CONSTRAINT fd_pt2chk1 CHECK (c1 > 0);
 ALTER TABLE fd_pt2 ATTACH PARTITION fd_pt2_1 FOR VALUES IN (1);
 -- TRUNCATE doesn't work on foreign tables, either directly or recursively
 TRUNCATE fd_pt2_1;  -- ERROR
-ERROR:  "fd_pt2_1" is not a table
+ERROR:  foreign-data wrapper "dummy" has no handler
 TRUNCATE fd_pt2;  -- ERROR
-ERROR:  "fd_pt2_1" is not a table
+ERROR:  foreign-data wrapper "dummy" has no handler
 DROP FOREIGN TABLE fd_pt2_1;
 DROP TABLE fd_pt2;
 -- foreign table cannot be part of partition tree made of temporary
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index 9e6777e9d0..d04c980168 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -704,6 +704,7 @@ ForeignScanState
 ForeignServer
 ForeignServerInfo
 ForeignTable
+ForeignTruncateInfo
 ForkNumber
 FormData_pg_aggregate
 FormData_pg_am
#34Bharath Rupireddy
bharath.rupireddyforpostgres@gmail.com
In reply to: Kazutaka Onishi (#33)
Re: TRUNCATE on foreign table

On Sat, Apr 3, 2021 at 7:16 PM Kazutaka Onishi <onishi@heterodb.com> wrote:

Sorry but I found the v7 patch has typo and it can't be built...
I attached fixed one(v8).

Thanks for the patch. Here are some comments on v8 patch:
1) We usually have the struct name after "+typedef struct
ForeignTruncateInfo", please refer to other struct defs in the code
base.

2) We should add ORDER BY clause(probably ORDER BY id?) for data
generating select queries in added tests, otherwise tests might become
unstable.

3) How about dropping the tables, foreign tables that got created for
testing in postgres_fdw.sql?

4) I think it's not "foreign-tables"/"foreign-table", it can be
"foreign tables"/"foreign table", other places in the docs use this
convention.
+ the context where the foreign-tables are truncated. It is a list
of integers and same length with

5) Can't we use do_sql_command function after making it non static? We
could go extra mile, that is we could make do_sql_command little more
generic by passing some enum for each of PQsendQuery,
PQsendQueryParams, PQsendQueryPrepared and PQsendPrepare and replace
the respective code chunks with do_sql_command in postgres_fdw.c.

+    /* run remote query */
+    if (!PQsendQuery(conn, sql.data))
+        pgfdw_report_error(ERROR, NULL, conn, false, sql.data);
+    res = pgfdw_get_result(conn, sql.data);
+    if (PQresultStatus(res) != PGRES_COMMAND_OK)
+        pgfdw_report_error(ERROR, res, conn, true, sql.data);
+    /* clean-up */
+    PQclear(res);

6) A white space error when the patch is applied.
contrib/postgres_fdw/postgres_fdw.c:2913: trailing whitespace.
+

7) I may be missing something here. Why do we need a hash table at
all? We could just do it with a linked list right? Is there a specific
reason to use a hash table? IIUC, the hash table entries will be lying
around until the local session exists since we are not doing
hash_destroy.

8) How about having something like this?
+ <command>TRUNCATE</command> can be used for foreign tables if the
foreign data wrapper supports, for instance, see <xref
linkend="postgres-fdw"/>.

With Regards,
Bharath Rupireddy.
EnterpriseDB: http://www.enterprisedb.com

#35Zhihong Yu
zyu@yugabyte.com
In reply to: Kazutaka Onishi (#33)
Re: TRUNCATE on foreign table
Hi,
+     <command>TRUNCATE</command> for each foreign server being involved
+     in one <command>TRUNCATE</command> command (note that invocations

The 'being' in above sentence can be omitted.

+ the context where the foreign-tables are truncated. It is a list of
integers and same length with

There should be a verb between 'and' and same :
It is a list of integers and has same length with

+ * Information related to truncation of foreign tables.  This is used for
+ * the elements in a hash table *that* uses the server OID as lookup key,

The 'uses' is for 'This' (the struct), so 'that' should be 'and':

the elements in a hash table and uses

Alternatively:

the elements in a hash table. It uses

+ relids_extra = lappend_int(relids_extra, (recurse ?
TRUNCATE_REL_CONTEXT__NORMAL : TRUNCATE_REL_CONTEXT__ONLY));

I am curious: isn't one underscore enough in the identifier (before NORMAL
and ONLY) ?

I suggest naming them TRUNCATE_REL_CONTEXT_NORMAL and
TRUNCATE_REL_CONTEXT_ONLY

Cheers

On Sat, Apr 3, 2021 at 6:46 AM Kazutaka Onishi <onishi@heterodb.com> wrote:

Show quoted text

Sorry but I found the v7 patch has typo and it can't be built...
I attached fixed one(v8).

2021年4月3日(土) 9:53 Kazutaka Onishi <onishi@heterodb.com>:

All,

Thank you for discussion.
I've updated the patch (v6->v7) according to the conclusion.

I'll show the modified points:
1. Comments for ExecuteTuncate()
2. Replacing extra value in frels_extra with integer to label.
3. Skipping XLOG_HEAP_TRUNCATE on foreign table

Regards,

2021年4月2日(金) 11:44 Fujii Masao <masao.fujii@oss.nttdata.com>:

On 2021/04/02 9:37, Kohei KaiGai wrote:

It is fair enough for me to reverse the order of actual truncation.

How about the updated comments below?

This is a multi-relation truncate. We first open and grab

exclusive

lock on all relations involved, checking permissions (local

database

ACLs even if relations are foreign-tables) and otherwise

verifying

that the relation is OK for truncation. In CASCADE mode,

...(snip)...

Finally all the relations are truncated and reindexed. If any

foreign-

tables are involved, its callback shall be invoked prior to the

truncation

of regular tables.

LGTM.

BTW, the latest patch doesn't seem to be applied cleanly to the

master

because of commit 27e1f14563. Could you rebase it?

Onishi-san, go ahead. :-)

+1

Regards,

--
Fujii Masao
Advanced Computing Technology Center
Research and Development Headquarters
NTT DATA CORPORATION

#36Zhihong Yu
zyu@yugabyte.com
In reply to: Zhihong Yu (#35)
Re: TRUNCATE on foreign table

Continuing previous review...

+ relids_extra = lappend_int(relids_extra,
TRUNCATE_REL_CONTEXT__CASCADED);

I wonder if TRUNCATE_REL_CONTEXT_CASCADING is better
than TRUNCATE_REL_CONTEXT__CASCADED. Note the removal of the extra
underscore.
In English, we say: truncation cascading to foreign table.

w.r.t. Bharath's question on using hash table, I think the reason is that
the search would be more efficient:

+           ft_info = hash_search(ft_htab, &server_oid, HASH_ENTER, &found);
and
+       while ((ft_info = hash_seq_search(&seq)) != NULL)
+    * Now go through the hash table, and process each entry associated to
the
+    * servers involved in the TRUNCATE.

associated to -> associated with

Should the hash table be released at the end of ExecuteTruncateGuts() ?

Cheers

On Sat, Apr 3, 2021 at 7:38 AM Zhihong Yu <zyu@yugabyte.com> wrote:

Show quoted text
Hi,
+     <command>TRUNCATE</command> for each foreign server being involved
+     in one <command>TRUNCATE</command> command (note that invocations

The 'being' in above sentence can be omitted.

+ the context where the foreign-tables are truncated. It is a list of
integers and same length with

There should be a verb between 'and' and same :
It is a list of integers and has same length with

+ * Information related to truncation of foreign tables.  This is used for
+ * the elements in a hash table *that* uses the server OID as lookup key,

The 'uses' is for 'This' (the struct), so 'that' should be 'and':

the elements in a hash table and uses

Alternatively:

the elements in a hash table. It uses

+ relids_extra = lappend_int(relids_extra, (recurse ?
TRUNCATE_REL_CONTEXT__NORMAL : TRUNCATE_REL_CONTEXT__ONLY));

I am curious: isn't one underscore enough in the identifier (before NORMAL
and ONLY) ?

I suggest naming them TRUNCATE_REL_CONTEXT_NORMAL and
TRUNCATE_REL_CONTEXT_ONLY

Cheers

On Sat, Apr 3, 2021 at 6:46 AM Kazutaka Onishi <onishi@heterodb.com>
wrote:

Sorry but I found the v7 patch has typo and it can't be built...
I attached fixed one(v8).

2021年4月3日(土) 9:53 Kazutaka Onishi <onishi@heterodb.com>:

All,

Thank you for discussion.
I've updated the patch (v6->v7) according to the conclusion.

I'll show the modified points:
1. Comments for ExecuteTuncate()
2. Replacing extra value in frels_extra with integer to label.
3. Skipping XLOG_HEAP_TRUNCATE on foreign table

Regards,

2021年4月2日(金) 11:44 Fujii Masao <masao.fujii@oss.nttdata.com>:

On 2021/04/02 9:37, Kohei KaiGai wrote:

It is fair enough for me to reverse the order of actual truncation.

How about the updated comments below?

This is a multi-relation truncate. We first open and grab

exclusive

lock on all relations involved, checking permissions (local

database

ACLs even if relations are foreign-tables) and otherwise

verifying

that the relation is OK for truncation. In CASCADE mode,

...(snip)...

Finally all the relations are truncated and reindexed. If any

foreign-

tables are involved, its callback shall be invoked prior to

the truncation

of regular tables.

LGTM.

BTW, the latest patch doesn't seem to be applied cleanly to the

master

because of commit 27e1f14563. Could you rebase it?

Onishi-san, go ahead. :-)

+1

Regards,

--
Fujii Masao
Advanced Computing Technology Center
Research and Development Headquarters
NTT DATA CORPORATION

#37Bharath Rupireddy
bharath.rupireddyforpostgres@gmail.com
In reply to: Zhihong Yu (#36)
Re: TRUNCATE on foreign table

On Sat, Apr 3, 2021 at 8:31 PM Zhihong Yu <zyu@yugabyte.com> wrote:

w.r.t. Bharath's question on using hash table, I think the reason is that the search would be more efficient:

Generally, sequential search would be slower if there are many entries
in a list. Here, the use case is to store all the foreign table ids
associated with each foreign server and I'm not sure how many foreign
tables will be provided in a single truncate command that belong to
different foreign servers. I strongly feel the count will be less and
using a list would be easier than to have a hash table. Others may
have better opinions.

Should the hash table be released at the end of ExecuteTruncateGuts() ?

If we go with a hash table and think that the frequency of "TRUNCATE"
commands on foreign tables is heavy in a local session, then it does
make sense to not destroy the hash, otherwise destroy the hash.

With Regards,
Bharath Rupireddy.
EnterpriseDB: http://www.enterprisedb.com

#38Kohei KaiGai
kaigai@heterodb.com
In reply to: Bharath Rupireddy (#37)
Re: TRUNCATE on foreign table

2021年4月4日(日) 13:07 Bharath Rupireddy <bharath.rupireddyforpostgres@gmail.com>:

On Sat, Apr 3, 2021 at 8:31 PM Zhihong Yu <zyu@yugabyte.com> wrote:

w.r.t. Bharath's question on using hash table, I think the reason is that the search would be more efficient:

Generally, sequential search would be slower if there are many entries
in a list. Here, the use case is to store all the foreign table ids
associated with each foreign server and I'm not sure how many foreign
tables will be provided in a single truncate command that belong to
different foreign servers. I strongly feel the count will be less and
using a list would be easier than to have a hash table. Others may
have better opinions.

/messages/by-id/20200115081126.GK2243@paquier.xyz

It was originally implemented using a simple list, then modified according to
the comment by Michael.
I think it is just a matter of preference.

Should the hash table be released at the end of ExecuteTruncateGuts() ?

If we go with a hash table and think that the frequency of "TRUNCATE"
commands on foreign tables is heavy in a local session, then it does
make sense to not destroy the hash, otherwise destroy the hash.

In most cases, TRUNCATE is not a command frequently executed.
So, exactly, it is just a matter of preference.

Best regards,
--
HeteroDB, Inc / The PG-Strom Project
KaiGai Kohei <kaigai@heterodb.com>

#39Kazutaka Onishi
onishi@heterodb.com
In reply to: Kohei KaiGai (#38)
1 attachment(s)
Re: TRUNCATE on foreign table

Hi

For now, I've fixed the v8 according to your comments, excluding
anything related with 'hash table' and 'do_sql_commands'.

1) We usually have the struct name after "+typedef struct
ForeignTruncateInfo", please refer to other struct defs in the code
base.

I've modified the definition.
By the way, there're many "typedef struct{ ... }NameOfStruct;" in
codes, about 40% of other struct defs (checked by find&grep),
thus I felt the way is not "MUST".

2) We should add ORDER BY clause(probably ORDER BY id?) for data
generating select queries in added tests, otherwise tests might become
unstable.

I've added "ORDER BY" at the postges_fdw test.

3) How about dropping the tables, foreign tables that got created for
testing in postgres_fdw.sql?

I've added "cleanup" commands.

4) I think it's not "foreign-tables"/"foreign-table", it can be
"foreign tables"/"foreign table", other places in the docs use this
convention.
+ the context where the foreign-tables are truncated. It is a list
of integers and same length with

I've replaced "foreign-table" to "foreign table".

5) Can't we use do_sql_command function after making it non static? We
could go extra mile, that is we could make do_sql_command little more
generic by passing some enum for each of PQsendQuery,
PQsendQueryParams, PQsendQueryPrepared and PQsendPrepare and replace
the respective code chunks with do_sql_command in postgres_fdw.c.

I've skipped this for now.
I feel it sounds cool, but not easy.
It should be added by another patch because it's not only related to TRUNCATE.

6) A white space error when the patch is applied.
contrib/postgres_fdw/postgres_fdw.c:2913: trailing whitespace.

I've checked the patch and clean spaces.
But I can't confirmed this message by attaching(patch -p1 < ...) my v8 patch.
If this still occurs, please tell me how you attach the patch.

7) I may be missing something here. Why do we need a hash table at
all? We could just do it with a linked list right? Is there a specific
reason to use a hash table? IIUC, the hash table entries will be lying
around until the local session exists since we are not doing
hash_destroy.

I've skipped this for now.

8) How about having something like this?
+ <command>TRUNCATE</command> can be used for foreign tables if the
foreign data wrapper supports, for instance, see <xref
linkend="postgres-fdw"/>.

Sounds good. I've added.

9)

+ <command>TRUNCATE</command> for each foreign server being involved

+ in one <command>TRUNCATE</command> command (note that invocations
The 'being' in above sentence can be omitted.

I've fixed this.

10)

+ the context where the foreign-tables are truncated. It is a list of integers and same length with
There should be a verb between 'and' and same :
It is a list of integers and has same length with

I've fixed this.

11)

+ * Information related to truncation of foreign tables.  This is used for
+ * the elements in a hash table that uses the server OID as lookup key,
The 'uses' is for 'This' (the struct), so 'that' should be 'and':
the elements in a hash table and uses
Alternatively:
the elements in a hash table. It uses

I've fixed this.

12)

+ relids_extra = lappend_int(relids_extra, (recurse ? TRUNCATE_REL_CONTEXT__NORMAL : TRUNCATE_REL_CONTEXT__ONLY));
I am curious: isn't one underscore enough in the identifier (before NORMAL and ONLY) ?
I suggest naming them TRUNCATE_REL_CONTEXT_NORMAL and TRUNCATE_REL_CONTEXT_ONLY

+ relids_extra = lappend_int(relids_extra, TRUNCATE_REL_CONTEXT__CASCADED);
I wonder if TRUNCATE_REL_CONTEXT_CASCADING is better than TRUNCATE_REL_CONTEXT__CASCADED. Note the removal of the extra underscore.
In English, we say: truncation cascading to foreign table.
w.r.t. Bharath's question on using hash table, I think the reason is that the search would be more efficient:

I've changed these labels shown below:
TRUNCATE_REL_CONTEXT__NORMAL -> TRUNCATE_REL_CONTEXT_NORMAL
TRUNCATE_REL_CONTEXT__ONLY -> TRUNCATE_REL_CONTEXT_ONLY
TRUNCATE_REL_CONTEXT__CASCADED -> TRUNCATE_REL_CONTEXT_CASCADING

14)

+           ft_info = hash_search(ft_htab, &server_oid, HASH_ENTER, &found);
and
+       while ((ft_info = hash_seq_search(&seq)) != NULL)
+    * Now go through the hash table, and process each entry associated to the
+    * servers involved in the TRUNCATE.
associated to -> associated with

I've fixed this.

14) Should the hash table be released at the end of ExecuteTruncateGuts() ?

I've skipped this for now.

2021年4月4日(日) 14:13 Kohei KaiGai <kaigai@heterodb.com>:

Show quoted text

2021年4月4日(日) 13:07 Bharath Rupireddy <bharath.rupireddyforpostgres@gmail.com>:

On Sat, Apr 3, 2021 at 8:31 PM Zhihong Yu <zyu@yugabyte.com> wrote:

w.r.t. Bharath's question on using hash table, I think the reason is that the search would be more efficient:

Generally, sequential search would be slower if there are many entries
in a list. Here, the use case is to store all the foreign table ids
associated with each foreign server and I'm not sure how many foreign
tables will be provided in a single truncate command that belong to
different foreign servers. I strongly feel the count will be less and
using a list would be easier than to have a hash table. Others may
have better opinions.

/messages/by-id/20200115081126.GK2243@paquier.xyz

It was originally implemented using a simple list, then modified according to
the comment by Michael.
I think it is just a matter of preference.

Should the hash table be released at the end of ExecuteTruncateGuts() ?

If we go with a hash table and think that the frequency of "TRUNCATE"
commands on foreign tables is heavy in a local session, then it does
make sense to not destroy the hash, otherwise destroy the hash.

In most cases, TRUNCATE is not a command frequently executed.
So, exactly, it is just a matter of preference.

Best regards,
--
HeteroDB, Inc / The PG-Strom Project
KaiGai Kohei <kaigai@heterodb.com>

Attachments:

pgsql14-truncate-on-foreign-table.v9.patchapplication/octet-stream; name=pgsql14-truncate-on-foreign-table.v9.patchDownload
diff --git a/contrib/postgres_fdw/deparse.c b/contrib/postgres_fdw/deparse.c
index 5aa3455e30..bebac2f2e0 100644
--- a/contrib/postgres_fdw/deparse.c
+++ b/contrib/postgres_fdw/deparse.c
@@ -56,6 +56,7 @@
 #include "utils/rel.h"
 #include "utils/syscache.h"
 #include "utils/typcache.h"
+#include "commands/tablecmds.h"
 
 /*
  * Global context for foreign_expr_walker's search of an expression tree.
@@ -2172,6 +2173,44 @@ deparseAnalyzeSql(StringInfo buf, Relation rel, List **retrieved_attrs)
 	deparseRelation(buf, rel);
 }
 
+/*
+ * Construct a simple "TRUNCATE rel" statement
+ */
+void
+deparseTruncateSql(StringInfo buf,
+				   List *frels_list,
+				   List *frels_extra,
+				   DropBehavior behavior,
+				   bool restart_seqs)
+{
+	ListCell   *lc1, *lc2;
+
+	appendStringInfoString(buf, "TRUNCATE ");
+	forboth (lc1, frels_list,
+			 lc2, frels_extra)
+	{
+		Relation	frel = lfirst(lc1);
+		int			extra = lfirst_int(lc2);
+
+		if (lc1 != list_head(frels_list))
+			appendStringInfoString(buf, ", ");
+		if (extra & TRUNCATE_REL_CONTEXT_ONLY)
+			appendStringInfoString(buf, "ONLY ");
+		deparseRelation(buf, frel);
+	}
+	appendStringInfo(buf, " %s IDENTITY",
+					 restart_seqs ? "RESTART" : "CONTINUE");
+	switch (behavior)
+	{
+		case DROP_RESTRICT:
+			appendStringInfoString(buf, " RESTRICT");
+			break;
+		case DROP_CASCADE:
+			appendStringInfoString(buf, " CASCADE");
+			break;
+	}
+}
+
 /*
  * Construct name to use for given column, and emit it into buf.
  * If it has a column_name FDW option, use that instead of attribute name.
diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out
index 59e4e27ffb..45bb03f56d 100644
--- a/contrib/postgres_fdw/expected/postgres_fdw.out
+++ b/contrib/postgres_fdw/expected/postgres_fdw.out
@@ -8215,6 +8215,243 @@ select * from rem3;
 drop foreign table rem3;
 drop table loc3;
 -- ===================================================================
+-- test for TRUNCATE
+-- ===================================================================
+CREATE TABLE tru_rtable0 (id int primary key, x text);
+CREATE TABLE tru_rtable1 (id int primary key, y text);
+CREATE FOREIGN TABLE tru_ftable (id int, x text)
+       SERVER loopback OPTIONS (table_name 'tru_rtable0');
+INSERT INTO tru_rtable0 (SELECT x,md5(x::text) FROM generate_series(1,10) x);
+CREATE TABLE tru_ptable (id int, y text) PARTITION BY HASH(id);
+CREATE TABLE tru_ptable__p0 PARTITION OF tru_ptable
+                            FOR VALUES WITH (MODULUS 2, REMAINDER 0);
+CREATE FOREIGN TABLE tru_ftable__p1 PARTITION OF tru_ptable
+                                    FOR VALUES WITH (MODULUS 2, REMAINDER 1)
+       SERVER loopback OPTIONS (table_name 'tru_rtable1');
+INSERT INTO tru_ptable (SELECT x,md5(x::text) FROM generate_series(11,20) x);
+CREATE TABLE tru_pk_table(id int primary key, z text);
+CREATE TABLE tru_fk_table(fkey int references tru_pk_table(id));
+INSERT INTO tru_pk_table (SELECT x,md5((x+1)::text) FROM generate_series(1,10) x);
+INSERT INTO tru_fk_table (SELECT x % 10 + 1 FROM generate_series(5,25) x);
+CREATE FOREIGN TABLE tru_pk_ftable (id int, z text)
+       SERVER loopback OPTIONS (table_name 'tru_pk_table');
+CREATE TABLE tru_rtable_parent (id int, a text);
+CREATE TABLE tru_rtable_child (id int, a text);
+CREATE FOREIGN TABLE tru_ftable_parent (id int, a text)
+       SERVER loopback OPTIONS (table_name 'tru_rtable_parent');
+CREATE FOREIGN TABLE tru_ftable_child () INHERITS (tru_ftable_parent)
+       SERVER loopback OPTIONS (table_name 'tru_rtable_child');
+INSERT INTO tru_rtable_parent (SELECT x, md5(x::text) FROM generate_series(1,8) x);
+INSERT INTO tru_rtable_child  (SELECT x, md5(x::text) FROM generate_series(10, 18) x);
+-- normal truncate
+SELECT * FROM tru_ftable ORDER BY id;
+ id |                x                 
+----+----------------------------------
+  1 | c4ca4238a0b923820dcc509a6f75849b
+  2 | c81e728d9d4c2f636f067f89cc14862c
+  3 | eccbc87e4b5ce2fe28308fd9f2a7baf3
+  4 | a87ff679a2f3e71d9181a67b7542122c
+  5 | e4da3b7fbbce2345d7772b0674a318d5
+  6 | 1679091c5a880faf6fb5e6087eb1b2dc
+  7 | 8f14e45fceea167a5a36dedd4bea2543
+  8 | c9f0f895fb98ab9159f51fd0297e236d
+  9 | 45c48cce2e2d7fbdea1afc51c7c6ad26
+ 10 | d3d9446802a44259755d38e6d163e820
+(10 rows)
+
+TRUNCATE tru_ftable;
+SELECT * FROM tru_rtable0;		-- empty
+ id | x 
+----+---
+(0 rows)
+
+SELECT * FROM tru_ftable;		-- empty
+ id | x 
+----+---
+(0 rows)
+
+-- 'truncatable' option
+ALTER SERVER loopback OPTIONS (ADD truncatable 'false');
+TRUNCATE tru_ftable;			-- error
+ERROR:  truncate on "tru_ftable" is prohibited
+ALTER SERVER loopback OPTIONS (DROP truncatable);
+ALTER FOREIGN TABLE tru_ftable OPTIONS (ADD truncatable 'false');
+TRUNCATE tru_ftable;			-- error
+ERROR:  truncate on "tru_ftable" is prohibited
+ALTER FOREIGN TABLE tru_ftable OPTIONS (SET truncatable 'true');
+TRUNCATE tru_ftable;			-- accepted
+-- partition table mixtured by table and foreign table
+SELECT * FROM tru_ptable ORDER BY id;
+ id |                y                 
+----+----------------------------------
+ 11 | 6512bd43d9caa6e02c990b0a82652dca
+ 12 | c20ad4d76fe97759aa27a0c99bff6710
+ 13 | c51ce410c124a10e0db5e4b97fc2af39
+ 14 | aab3238922bcc25a6f606eb525ffdc56
+ 15 | 9bf31c7ff062936a96d3c8bd1f8f2ff3
+ 16 | c74d97b01eae257e44aa9d5bade97baf
+ 17 | 70efdf2ec9b086079795c442636b55fb
+ 18 | 6f4922f45568161a8cdf4ad2299f6d23
+ 19 | 1f0e3dad99908345f7439f8ffabdffc4
+ 20 | 98f13708210194c475687be6106a3b84
+(10 rows)
+
+TRUNCATE tru_ptable;
+SELECT * FROM tru_ptable;		-- empty
+ id | y 
+----+---
+(0 rows)
+
+SELECT * FROM tru_ptable__p0;	-- empty
+ id | y 
+----+---
+(0 rows)
+
+SELECT * FROM tru_ftable__p1;	-- empty
+ id | y 
+----+---
+(0 rows)
+
+SELECT * FROM tru_rtable1;		-- empty
+ id | y 
+----+---
+(0 rows)
+
+-- 'CASCADE' option
+SELECT * FROM tru_pk_ftable ORDER BY id;
+ id |                z                 
+----+----------------------------------
+  1 | c81e728d9d4c2f636f067f89cc14862c
+  2 | eccbc87e4b5ce2fe28308fd9f2a7baf3
+  3 | a87ff679a2f3e71d9181a67b7542122c
+  4 | e4da3b7fbbce2345d7772b0674a318d5
+  5 | 1679091c5a880faf6fb5e6087eb1b2dc
+  6 | 8f14e45fceea167a5a36dedd4bea2543
+  7 | c9f0f895fb98ab9159f51fd0297e236d
+  8 | 45c48cce2e2d7fbdea1afc51c7c6ad26
+  9 | d3d9446802a44259755d38e6d163e820
+ 10 | 6512bd43d9caa6e02c990b0a82652dca
+(10 rows)
+
+TRUNCATE tru_pk_ftable;	-- failed by FK reference
+ERROR:  cannot truncate a table referenced in a foreign key constraint
+DETAIL:  Table "tru_fk_table" references "tru_pk_table".
+HINT:  Truncate table "tru_fk_table" at the same time, or use TRUNCATE ... CASCADE.
+CONTEXT:  remote SQL command: TRUNCATE public.tru_pk_table CONTINUE IDENTITY RESTRICT
+TRUNCATE tru_pk_ftable CASCADE;
+SELECT * FROM tru_pk_ftable;    -- empty
+ id | z 
+----+---
+(0 rows)
+
+SELECT * FROM tru_fk_table;		-- also truncated
+ fkey 
+------
+(0 rows)
+
+-- truncate two tables at a command
+INSERT INTO tru_ftable (SELECT x,md5((x+2)::text) FROM generate_series(1,8) x);
+INSERT INTO tru_pk_ftable (SELECT x,md5((x+3)::text) FROM generate_series(3,10) x);
+SELECT * FROM tru_ftable a FULL OUTER JOIN tru_pk_ftable b ON a.id = b.id ORDER BY a.id;
+ id |                x                 | id |                z                 
+----+----------------------------------+----+----------------------------------
+  1 | eccbc87e4b5ce2fe28308fd9f2a7baf3 |    | 
+  2 | a87ff679a2f3e71d9181a67b7542122c |    | 
+  3 | e4da3b7fbbce2345d7772b0674a318d5 |  3 | 1679091c5a880faf6fb5e6087eb1b2dc
+  4 | 1679091c5a880faf6fb5e6087eb1b2dc |  4 | 8f14e45fceea167a5a36dedd4bea2543
+  5 | 8f14e45fceea167a5a36dedd4bea2543 |  5 | c9f0f895fb98ab9159f51fd0297e236d
+  6 | c9f0f895fb98ab9159f51fd0297e236d |  6 | 45c48cce2e2d7fbdea1afc51c7c6ad26
+  7 | 45c48cce2e2d7fbdea1afc51c7c6ad26 |  7 | d3d9446802a44259755d38e6d163e820
+  8 | d3d9446802a44259755d38e6d163e820 |  8 | 6512bd43d9caa6e02c990b0a82652dca
+    |                                  |  9 | c20ad4d76fe97759aa27a0c99bff6710
+    |                                  | 10 | c51ce410c124a10e0db5e4b97fc2af39
+(10 rows)
+
+TRUNCATE tru_ftable, tru_pk_ftable CASCADE;
+SELECT * FROM tru_ftable a FULL OUTER JOIN tru_pk_ftable b ON a.id = b.id ORDER BY a.id;  -- empty
+ id | x | id | z 
+----+---+----+---
+(0 rows)
+
+-- truncate with ONLY clause
+TRUNCATE ONLY tru_ftable_parent;
+SELECT * FROM tru_ftable_parent ORDER BY id;
+ id |                a                 
+----+----------------------------------
+ 10 | d3d9446802a44259755d38e6d163e820
+ 11 | 6512bd43d9caa6e02c990b0a82652dca
+ 12 | c20ad4d76fe97759aa27a0c99bff6710
+ 13 | c51ce410c124a10e0db5e4b97fc2af39
+ 14 | aab3238922bcc25a6f606eb525ffdc56
+ 15 | 9bf31c7ff062936a96d3c8bd1f8f2ff3
+ 16 | c74d97b01eae257e44aa9d5bade97baf
+ 17 | 70efdf2ec9b086079795c442636b55fb
+ 18 | 6f4922f45568161a8cdf4ad2299f6d23
+(9 rows)
+
+TRUNCATE tru_ftable_parent;
+SELECT * FROM tru_ftable_parent; -- empty
+ id | a 
+----+---
+(0 rows)
+
+-- in case when remote table has inherited children
+CREATE TABLE tru_rtable0_child () INHERITS (tru_rtable0);
+INSERT INTO tru_rtable0 (SELECT x,md5(x::text) FROM generate_series(5,9) x);
+INSERT INTO tru_rtable0_child (SELECT x,md5(x::text) FROM generate_series(10,14) x);
+SELECT * FROM tru_ftable ORDER BY id;
+ id |                x                 
+----+----------------------------------
+  5 | e4da3b7fbbce2345d7772b0674a318d5
+  6 | 1679091c5a880faf6fb5e6087eb1b2dc
+  7 | 8f14e45fceea167a5a36dedd4bea2543
+  8 | c9f0f895fb98ab9159f51fd0297e236d
+  9 | 45c48cce2e2d7fbdea1afc51c7c6ad26
+ 10 | d3d9446802a44259755d38e6d163e820
+ 11 | 6512bd43d9caa6e02c990b0a82652dca
+ 12 | c20ad4d76fe97759aa27a0c99bff6710
+ 13 | c51ce410c124a10e0db5e4b97fc2af39
+ 14 | aab3238922bcc25a6f606eb525ffdc56
+(10 rows)
+
+TRUNCATE ONLY tru_ftable;		-- truncate only parent portion
+SELECT * FROM tru_ftable ORDER BY id;
+ id |                x                 
+----+----------------------------------
+ 10 | d3d9446802a44259755d38e6d163e820
+ 11 | 6512bd43d9caa6e02c990b0a82652dca
+ 12 | c20ad4d76fe97759aa27a0c99bff6710
+ 13 | c51ce410c124a10e0db5e4b97fc2af39
+ 14 | aab3238922bcc25a6f606eb525ffdc56
+(5 rows)
+
+INSERT INTO tru_rtable0 (SELECT x,md5(x::text) FROM generate_series(21,25) x);
+SELECT * FROM tru_ftable;
+ id |                x                 
+----+----------------------------------
+ 21 | 3c59dc048e8850243be8079a5c74d079
+ 22 | b6d767d2f8ed5d21a44b0e5886680cb9
+ 23 | 37693cfc748049e45d87b8c7d8b9aacd
+ 24 | 1ff1de774005f8da13f42943881c655f
+ 25 | 8e296a067a37563370ded05f5a3bf3ec
+ 10 | d3d9446802a44259755d38e6d163e820
+ 11 | 6512bd43d9caa6e02c990b0a82652dca
+ 12 | c20ad4d76fe97759aa27a0c99bff6710
+ 13 | c51ce410c124a10e0db5e4b97fc2af39
+ 14 | aab3238922bcc25a6f606eb525ffdc56
+(10 rows)
+
+TRUNCATE tru_ftable;			-- truncate both of parent and child
+SELECT * FROM tru_ftable;    -- empty
+ id | x 
+----+---
+(0 rows)
+
+-- cleanup
+DROP FOREIGN TABLE tru_ftable_parent, tru_ftable_child, tru_pk_ftable,tru_ftable__p1,tru_ftable;
+DROP TABLE tru_rtable0, tru_rtable1, tru_ptable, tru_ptable__p0, tru_pk_table, tru_fk_table, 
+tru_rtable_parent,tru_rtable_child, tru_rtable0_child;
+-- ===================================================================
 -- test IMPORT FOREIGN SCHEMA
 -- ===================================================================
 CREATE SCHEMA import_source;
@@ -8913,7 +9150,7 @@ DO $d$
     END;
 $d$;
 ERROR:  invalid option "password"
-HINT:  Valid options in this context are: service, passfile, channel_binding, connect_timeout, dbname, host, hostaddr, port, options, application_name, keepalives, keepalives_idle, keepalives_interval, keepalives_count, tcp_user_timeout, sslmode, sslcompression, sslcert, sslkey, sslrootcert, sslcrl, sslcrldir, requirepeer, ssl_min_protocol_version, ssl_max_protocol_version, gssencmode, krbsrvname, gsslib, target_session_attrs, use_remote_estimate, fdw_startup_cost, fdw_tuple_cost, extensions, updatable, fetch_size, batch_size, async_capable, keep_connections
+HINT:  Valid options in this context are: service, passfile, channel_binding, connect_timeout, dbname, host, hostaddr, port, options, application_name, keepalives, keepalives_idle, keepalives_interval, keepalives_count, tcp_user_timeout, sslmode, sslcompression, sslcert, sslkey, sslrootcert, sslcrl, sslcrldir, requirepeer, ssl_min_protocol_version, ssl_max_protocol_version, gssencmode, krbsrvname, gsslib, target_session_attrs, use_remote_estimate, fdw_startup_cost, fdw_tuple_cost, extensions, updatable, truncatable, fetch_size, batch_size, async_capable, keep_connections
 CONTEXT:  SQL statement "ALTER SERVER loopback_nopw OPTIONS (ADD password 'dummypw')"
 PL/pgSQL function inline_code_block line 3 at EXECUTE
 -- If we add a password for our user mapping instead, we should get a different
diff --git a/contrib/postgres_fdw/option.c b/contrib/postgres_fdw/option.c
index f1d0c8bd41..fc196fabc0 100644
--- a/contrib/postgres_fdw/option.c
+++ b/contrib/postgres_fdw/option.c
@@ -109,6 +109,7 @@ postgres_fdw_validator(PG_FUNCTION_ARGS)
 		if (strcmp(def->defname, "use_remote_estimate") == 0 ||
 			strcmp(def->defname, "updatable") == 0 ||
 			strcmp(def->defname, "async_capable") == 0 ||
+			strcmp(def->defname, "truncatable") == 0 ||
 			strcmp(def->defname, "keep_connections") == 0)
 		{
 			/* these accept only boolean values */
@@ -213,6 +214,9 @@ InitPgFdwOptions(void)
 		/* updatable is available on both server and table */
 		{"updatable", ForeignServerRelationId, false},
 		{"updatable", ForeignTableRelationId, false},
+		/* truncatable is available on both server and table */
+		{"truncatable", ForeignServerRelationId, false},
+		{"truncatable", ForeignTableRelationId, false},
 		/* fetch_size is available on both server and table */
 		{"fetch_size", ForeignServerRelationId, false},
 		{"fetch_size", ForeignTableRelationId, false},
diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c
index 16c2979f2d..6fa20834a4 100644
--- a/contrib/postgres_fdw/postgres_fdw.c
+++ b/contrib/postgres_fdw/postgres_fdw.c
@@ -400,6 +400,10 @@ static void postgresExplainForeignModify(ModifyTableState *mtstate,
 										 ExplainState *es);
 static void postgresExplainDirectModify(ForeignScanState *node,
 										ExplainState *es);
+static void postgresExecForeignTruncate(List *frels_list,
+										List *frels_extra,
+										DropBehavior behavior,
+										bool restart_seqs);
 static bool postgresAnalyzeForeignTable(Relation relation,
 										AcquireSampleRowsFunc *func,
 										BlockNumber *totalpages);
@@ -588,6 +592,9 @@ postgres_fdw_handler(PG_FUNCTION_ARGS)
 	routine->ExplainForeignModify = postgresExplainForeignModify;
 	routine->ExplainDirectModify = postgresExplainDirectModify;
 
+	/* Support function for TRUNCATE */
+	routine->ExecForeignTruncate = postgresExecForeignTruncate;
+
 	/* Support functions for ANALYZE */
 	routine->AnalyzeForeignTable = postgresAnalyzeForeignTable;
 
@@ -2868,6 +2875,90 @@ postgresExplainDirectModify(ForeignScanState *node, ExplainState *es)
 	}
 }
 
+/*
+ * postgresExecForeignTruncate
+ *		It propagates TRUNCATE command to the remote host inside of the
+ *		transaction block.
+ */
+static void
+postgresExecForeignTruncate(List *frels_list,
+							List *frels_extra,
+							DropBehavior behavior,
+							bool restart_seqs)
+{
+	Oid				server_id = InvalidOid;
+	ForeignServer  *serv = NULL;
+	UserMapping	   *user = NULL;
+	PGconn		   *conn = NULL;
+	PGresult	   *res;
+	StringInfoData	sql;
+	ListCell	   *lc;
+	bool			server_truncatable = true;
+
+	/* pick up remote connection, and sanity checks */
+	foreach (lc, frels_list)
+	{
+		Relation	frel = lfirst(lc);
+		Oid			frel_oid = RelationGetRelid(frel);
+		ForeignTable *ft = GetForeignTable(frel_oid);
+		ListCell   *cell;
+		bool		truncatable;
+
+		if (!OidIsValid(server_id))
+		{
+			server_id = GetForeignServerIdByRelId(frel_oid);
+			serv = GetForeignServer(server_id);
+			user = GetUserMapping(GetUserId(), server_id);
+			conn = GetConnection(user, false,NULL);
+
+			foreach (cell, serv->options)
+			{
+				DefElem	   *defel = (DefElem *) lfirst(cell);
+
+				if (strcmp(defel->defname, "truncatable") == 0)
+				{
+					server_truncatable = defGetBoolean(defel);
+					break;
+				}
+			}
+		}
+		else
+		{
+			/* postgresExecForeignTruncate() is invoked for each server */
+			Assert(server_id == GetForeignServerIdByRelId(frel_oid));
+		}
+
+		/* ensure the target foreign table is truncatable */
+		truncatable = server_truncatable;
+		foreach (cell, ft->options)
+		{
+			DefElem	   *defel = (DefElem *) lfirst(cell);
+
+			if (strcmp(defel->defname, "truncatable") == 0)
+			{
+				truncatable = defGetBoolean(defel);
+				break;
+			}
+		}
+		if (!truncatable)
+			ereport(ERROR,
+					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+					 errmsg("truncate on \"%s\" is prohibited",
+							RelationGetRelationName(frel))));
+	}
+	/* set up remote query */
+	initStringInfo(&sql);
+	deparseTruncateSql(&sql, frels_list, frels_extra, behavior, restart_seqs);
+	/* run remote query */
+	if (!PQsendQuery(conn, sql.data))
+		pgfdw_report_error(ERROR, NULL, conn, false, sql.data);
+	res = pgfdw_get_result(conn, sql.data);
+	if (PQresultStatus(res) != PGRES_COMMAND_OK)
+		pgfdw_report_error(ERROR, res, conn, true, sql.data);
+	/* clean-up */
+	PQclear(res);
+	pfree(sql.data);
+}
 
 /*
  * estimate_path_cost_size
diff --git a/contrib/postgres_fdw/postgres_fdw.h b/contrib/postgres_fdw/postgres_fdw.h
index 88d94da6f6..68599892f8 100644
--- a/contrib/postgres_fdw/postgres_fdw.h
+++ b/contrib/postgres_fdw/postgres_fdw.h
@@ -206,6 +206,11 @@ extern void deparseDirectDeleteSql(StringInfo buf, PlannerInfo *root,
 extern void deparseAnalyzeSizeSql(StringInfo buf, Relation rel);
 extern void deparseAnalyzeSql(StringInfo buf, Relation rel,
 							  List **retrieved_attrs);
+extern void deparseTruncateSql(StringInfo buf,
+							   List *frels_list,
+							   List *frels_extra,
+							   DropBehavior behavior,
+							   bool restart_seqs);
 extern void deparseStringLiteral(StringInfo buf, const char *val);
 extern Expr *find_em_expr_for_rel(EquivalenceClass *ec, RelOptInfo *rel);
 extern Expr *find_em_expr_for_input_target(PlannerInfo *root,
diff --git a/contrib/postgres_fdw/sql/postgres_fdw.sql b/contrib/postgres_fdw/sql/postgres_fdw.sql
index 107d1c0e03..4e620f67ff 100644
--- a/contrib/postgres_fdw/sql/postgres_fdw.sql
+++ b/contrib/postgres_fdw/sql/postgres_fdw.sql
@@ -2351,6 +2351,101 @@ select * from rem3;
 drop foreign table rem3;
 drop table loc3;
 
+-- ===================================================================
+-- test for TRUNCATE
+-- ===================================================================
+CREATE TABLE tru_rtable0 (id int primary key, x text);
+CREATE TABLE tru_rtable1 (id int primary key, y text);
+CREATE FOREIGN TABLE tru_ftable (id int, x text)
+       SERVER loopback OPTIONS (table_name 'tru_rtable0');
+INSERT INTO tru_rtable0 (SELECT x,md5(x::text) FROM generate_series(1,10) x);
+
+CREATE TABLE tru_ptable (id int, y text) PARTITION BY HASH(id);
+CREATE TABLE tru_ptable__p0 PARTITION OF tru_ptable
+                            FOR VALUES WITH (MODULUS 2, REMAINDER 0);
+CREATE FOREIGN TABLE tru_ftable__p1 PARTITION OF tru_ptable
+                                    FOR VALUES WITH (MODULUS 2, REMAINDER 1)
+       SERVER loopback OPTIONS (table_name 'tru_rtable1');
+INSERT INTO tru_ptable (SELECT x,md5(x::text) FROM generate_series(11,20) x);
+
+CREATE TABLE tru_pk_table(id int primary key, z text);
+CREATE TABLE tru_fk_table(fkey int references tru_pk_table(id));
+INSERT INTO tru_pk_table (SELECT x,md5((x+1)::text) FROM generate_series(1,10) x);
+INSERT INTO tru_fk_table (SELECT x % 10 + 1 FROM generate_series(5,25) x);
+CREATE FOREIGN TABLE tru_pk_ftable (id int, z text)
+       SERVER loopback OPTIONS (table_name 'tru_pk_table');
+
+CREATE TABLE tru_rtable_parent (id int, a text);
+CREATE TABLE tru_rtable_child (id int, a text);
+CREATE FOREIGN TABLE tru_ftable_parent (id int, a text)
+       SERVER loopback OPTIONS (table_name 'tru_rtable_parent');
+CREATE FOREIGN TABLE tru_ftable_child () INHERITS (tru_ftable_parent)
+       SERVER loopback OPTIONS (table_name 'tru_rtable_child');
+INSERT INTO tru_rtable_parent (SELECT x, md5(x::text) FROM generate_series(1,8) x);
+INSERT INTO tru_rtable_child  (SELECT x, md5(x::text) FROM generate_series(10, 18) x);
+
+-- normal truncate
+SELECT * FROM tru_ftable ORDER BY id;
+TRUNCATE tru_ftable;
+SELECT * FROM tru_rtable0;		-- empty
+SELECT * FROM tru_ftable;		-- empty
+
+-- 'truncatable' option
+ALTER SERVER loopback OPTIONS (ADD truncatable 'false');
+TRUNCATE tru_ftable;			-- error
+ALTER SERVER loopback OPTIONS (DROP truncatable);
+ALTER FOREIGN TABLE tru_ftable OPTIONS (ADD truncatable 'false');
+TRUNCATE tru_ftable;			-- error
+ALTER FOREIGN TABLE tru_ftable OPTIONS (SET truncatable 'true');
+TRUNCATE tru_ftable;			-- accepted
+
+-- partition table mixtured by table and foreign table
+SELECT * FROM tru_ptable ORDER BY id;
+TRUNCATE tru_ptable;
+SELECT * FROM tru_ptable;		-- empty
+SELECT * FROM tru_ptable__p0;	-- empty
+SELECT * FROM tru_ftable__p1;	-- empty
+SELECT * FROM tru_rtable1;		-- empty
+
+-- 'CASCADE' option
+SELECT * FROM tru_pk_ftable ORDER BY id;
+TRUNCATE tru_pk_ftable;	-- failed by FK reference
+TRUNCATE tru_pk_ftable CASCADE;
+SELECT * FROM tru_pk_ftable;    -- empty
+SELECT * FROM tru_fk_table;		-- also truncated
+
+-- truncate two tables at a command
+INSERT INTO tru_ftable (SELECT x,md5((x+2)::text) FROM generate_series(1,8) x);
+INSERT INTO tru_pk_ftable (SELECT x,md5((x+3)::text) FROM generate_series(3,10) x);
+SELECT * FROM tru_ftable a FULL OUTER JOIN tru_pk_ftable b ON a.id = b.id ORDER BY a.id;
+TRUNCATE tru_ftable, tru_pk_ftable CASCADE;
+SELECT * FROM tru_ftable a FULL OUTER JOIN tru_pk_ftable b ON a.id = b.id ORDER BY a.id;  -- empty
+
+-- truncate with ONLY clause
+TRUNCATE ONLY tru_ftable_parent;
+SELECT * FROM tru_ftable_parent ORDER BY id;
+TRUNCATE tru_ftable_parent;
+SELECT * FROM tru_ftable_parent; -- empty
+
+-- in case when remote table has inherited children
+CREATE TABLE tru_rtable0_child () INHERITS (tru_rtable0);
+INSERT INTO tru_rtable0 (SELECT x,md5(x::text) FROM generate_series(5,9) x);
+INSERT INTO tru_rtable0_child (SELECT x,md5(x::text) FROM generate_series(10,14) x);
+SELECT * FROM tru_ftable ORDER BY id;
+
+TRUNCATE ONLY tru_ftable;		-- truncate only parent portion
+SELECT * FROM tru_ftable ORDER BY id;
+
+INSERT INTO tru_rtable0 (SELECT x,md5(x::text) FROM generate_series(21,25) x);
+SELECT * FROM tru_ftable;
+TRUNCATE tru_ftable;			-- truncate both of parent and child
+SELECT * FROM tru_ftable;    -- empty
+
+-- cleanup
+DROP FOREIGN TABLE tru_ftable_parent, tru_ftable_child, tru_pk_ftable,tru_ftable__p1,tru_ftable;
+DROP TABLE tru_rtable0, tru_rtable1, tru_ptable, tru_ptable__p0, tru_pk_table, tru_fk_table, 
+tru_rtable_parent,tru_rtable_child, tru_rtable0_child;
+
 -- ===================================================================
 -- test IMPORT FOREIGN SCHEMA
 -- ===================================================================
diff --git a/doc/src/sgml/fdwhandler.sgml b/doc/src/sgml/fdwhandler.sgml
index 0f2397df49..97eea600cc 100644
--- a/doc/src/sgml/fdwhandler.sgml
+++ b/doc/src/sgml/fdwhandler.sgml
@@ -1065,6 +1065,50 @@ EndDirectModify(ForeignScanState *node);
 
    </sect2>
 
+   <sect2 id="fdw-callbacks-truncate">
+    <title>FDW Routines for Truncate</title>
+<programlisting>
+void
+ExecForeignTruncate(List *frels_list, List *frels_extra,
+                    DropBehavior behavior, bool restart_seqs);
+</programlisting>
+    <para>
+     Truncate a set of foreign tables defined by
+     <literal>frels_list</literal> belonging to the same foreign server.
+     This optional function is called during execution of
+     <command>TRUNCATE</command> for each foreign server involved
+     in one <command>TRUNCATE</command> command (note that invocations
+     are not per foreign table).
+
+     <literal>frels_extra</literal> delivers extra information about
+     the context where the foreign tables are truncated. It is a list of integers and has same length with
+     <literal>frels_list</literal>. <literal>TRUNCATE_REL_CONTEXT_NORMAL</literal> means that
+     the foreign table is specified WITHOUT "ONLY" clause, and <literal>TRUNCATE_REL_CONTEXT_ONLY</literal> means
+     specified WITH "ONLY" clause. <literal>TRUNCATE_REL_CONTEXT_CASCADING</literal> 
+     values means that foreign tables are not specified in the <command>TRUNCATE</command>, 
+     but truncated due to dependency(like partition's child leaf).
+    </para>
+
+    <para>
+     If the <function>ExecForeignTruncate</function> pointer is set to
+     <literal>NULL</literal>, attempts to truncate the foreign table will
+     fail with an error message.
+    </para>
+    
+    <para>
+     <literal>behavior</literal> defines how foreign tables should
+     be truncated, using as possible values <literal>DROP_RESTRICT</literal>
+     and <literal>DROP_CASCADE</literal> (to map with the equivalents of
+     <command>TRUNCATE</command>).
+    </para>
+    
+    <para>
+     <literal>restart_seqs</literal> is set to <literal>true</literal>
+     if <literal>RESTART IDENTITY</literal> was supplied in the
+     <command>TRUNCATE</command>.
+    </para>
+   </sect2>
+
    <sect2 id="fdw-callbacks-row-locking">
     <title>FDW Routines for Row Locking</title>
 
diff --git a/doc/src/sgml/postgres-fdw.sgml b/doc/src/sgml/postgres-fdw.sgml
index a7c695b000..1092160603 100644
--- a/doc/src/sgml/postgres-fdw.sgml
+++ b/doc/src/sgml/postgres-fdw.sgml
@@ -63,9 +63,10 @@
  <para>
   Now you need only <command>SELECT</command> from a foreign table to access
   the data stored in its underlying remote table.  You can also modify
-  the remote table using <command>INSERT</command>, <command>UPDATE</command>, or
-  <command>DELETE</command>.  (Of course, the remote user you have specified
-  in your user mapping must have privileges to do these things.)
+  the remote table using <command>INSERT</command>, <command>UPDATE</command>,
+  <command>DELETE</command>, or <command>TRUNCATE</command>.
+  (Of course, the remote user you have specified in your user mapping must
+  have privileges to do these things.)
  </para>
 
  <para>
@@ -404,7 +405,7 @@ OPTIONS (ADD password_required 'false');
 
    <para>
     By default all foreign tables using <filename>postgres_fdw</filename> are assumed
-    to be updatable.  This may be overridden using the following option:
+    to be updatable and truncatable.  This may be overridden using the following options:
    </para>
 
    <variablelist>
@@ -433,6 +434,29 @@ OPTIONS (ADD password_required 'false');
      </listitem>
     </varlistentry>
 
+    <varlistentry>
+     <term><literal>truncatable</literal></term>
+     <listitem>
+      <para>
+       This option controls whether <filename>postgres_fdw</filename> allows
+       foreign tables to be truncated using <command>TRUNCATE</command>
+       command. It can be specified for a foreign table or a foreign server.
+       A table-level option overrides a server-level option.
+       The default is <literal>true</literal>.
+      </para>
+      <para>
+       Pay attention for the case when a foreign table maps remote table
+       that has inherited children or partition leafs.
+       <command>TRUNCATE</command> specifies the foreign tables with
+       <literal>ONLY</literal> clause, remove queries over the
+       <filename>postgres_fdw</filename> also specify remote tables with
+       <literal>ONLY</literal> clause, that will truncate only parent
+       portion of the remote table. In the results, it looks like
+       <command>TRUNCATE</command> command partially eliminated contents
+       of the foreign tables.
+      </para>
+     </listitem>
+    </varlistentry>
    </variablelist>
   </sect3>
 
diff --git a/doc/src/sgml/ref/psql-ref.sgml b/doc/src/sgml/ref/psql-ref.sgml
index 0ba1e2d44d..01ec9b8b0a 100644
--- a/doc/src/sgml/ref/psql-ref.sgml
+++ b/doc/src/sgml/ref/psql-ref.sgml
@@ -1970,9 +1970,7 @@ testdb=&gt;
         </para>
 
         <para>
-        If you edit a file or the previous query, and you quit the editor without
-        modifying the file, the query buffer is cleared.
-        Otherwise, the new contents of the query buffer are re-parsed according to
+        The new contents of the query buffer are then re-parsed according to
         the normal rules of <application>psql</application>, treating the
         whole buffer as a single line.  Any complete queries are immediately
         executed; that is, if the query buffer contains or ends with a
@@ -2041,8 +2039,7 @@ Tue Oct 26 21:40:57 CEST 1999
          in the form of a <command>CREATE OR REPLACE FUNCTION</command> or
          <command>CREATE OR REPLACE PROCEDURE</command> command.
          Editing is done in the same way as for <literal>\edit</literal>.
-         If you quit the editor without saving, the statement is discarded.
-         If you save and exit the editor, the updated command is executed immediately
+         After the editor exits, the updated command is executed immediately
          if you added a semicolon to it.  Otherwise it is redisplayed;
          type semicolon or <literal>\g</literal> to send it, or <literal>\r</literal>
          to cancel.
@@ -2118,8 +2115,7 @@ Tue Oct 26 21:40:57 CEST 1999
          This command fetches and edits the definition of the named view,
          in the form of a <command>CREATE OR REPLACE VIEW</command> command.
          Editing is done in the same way as for <literal>\edit</literal>.
-         If you quit the editor without saving, the statement is discarded.
-         If you save and exit the editor, the updated command is executed immediately
+         After the editor exits, the updated command is executed immediately
          if you added a semicolon to it.  Otherwise it is redisplayed;
          type semicolon or <literal>\g</literal> to send it, or <literal>\r</literal>
          to cancel.
diff --git a/doc/src/sgml/ref/truncate.sgml b/doc/src/sgml/ref/truncate.sgml
index 91cdac5562..d9f97561a5 100644
--- a/doc/src/sgml/ref/truncate.sgml
+++ b/doc/src/sgml/ref/truncate.sgml
@@ -172,9 +172,8 @@ TRUNCATE [ TABLE ] [ ONLY ] <replaceable class="parameter">name</replaceable> [
   </para>
 
   <para>
-   <command>TRUNCATE</command> is not currently supported for foreign tables.
-   This implies that if a specified table has any descendant tables that are
-   foreign, the command will fail.
+    <command>TRUNCATE</command> can be used for foreign tables if
+     the foreign data wrapper supports, for instance, see <xref linked="postgres-fdw"/>.
   </para>
  </refsect1>
 
diff --git a/src/backend/bootstrap/bootstrap.c b/src/backend/bootstrap/bootstrap.c
index 174727b501..dce7088e6b 100644
--- a/src/backend/bootstrap/bootstrap.c
+++ b/src/backend/bootstrap/bootstrap.c
@@ -407,11 +407,8 @@ AuxiliaryProcessMain(int argc, char *argv[])
 		 */
 		CreateAuxProcessResourceOwner();
 
-		/* Initialize statistics reporting */
-		pgstat_initialize();
-
 		/* Initialize backend status information */
-		pgstat_beinit();
+		pgstat_initialize();
 		pgstat_bestart();
 
 		/* register a before-shutdown callback for LWLock cleanup */
diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c
index 88a68a4697..05bcb12d1d 100644
--- a/src/backend/commands/tablecmds.c
+++ b/src/backend/commands/tablecmds.c
@@ -59,6 +59,7 @@
 #include "commands/typecmds.h"
 #include "commands/user.h"
 #include "executor/executor.h"
+#include "foreign/fdwapi.h"
 #include "foreign/foreign.h"
 #include "miscadmin.h"
 #include "nodes/makefuncs.h"
@@ -310,6 +311,21 @@ struct DropRelationCallbackState
 #define		ATT_FOREIGN_TABLE		0x0020
 #define		ATT_PARTITIONED_INDEX	0x0040
 
+/*
+ * ForeignTruncateInfo
+ *
+ * Information related to truncation of foreign tables.  This is used for
+ * the elements in a hash table. It uses the server OID as lookup key,
+ * and includes a per-server list of all foreign tables involved in the
+ * truncation.
+ */
+typedef struct ForeignTruncateInfo
+{
+	Oid			server_oid;
+	List	   *frels_list;
+	List	   *frels_extra;
+} ForeignTruncateInfo;
+
 /*
  * Partition tables are expected to be dropped when the parent partitioned
  * table gets dropped. Hence for partitioning we use AUTO dependency.
@@ -1588,18 +1604,21 @@ RangeVarCallbackForDropRelation(const RangeVar *rel, Oid relOid, Oid oldRelOid,
  *		Executes a TRUNCATE command.
  *
  * This is a multi-relation truncate.  We first open and grab exclusive
- * lock on all relations involved, checking permissions and otherwise
- * verifying that the relation is OK for truncation.  In CASCADE mode,
+ * lock on all relations involved, checking permissions (local database
+ * ACLs even if relations are foreign tables) and otherwise verifying
+ * that the relation is OK for truncation. In CASCADE mode,
  * relations having FK references to the targeted relations are automatically
  * added to the group; in RESTRICT mode, we check that all FK references are
  * internal to the group that's being truncated.  Finally all the relations
- * are truncated and reindexed.
+ * are truncated and reindexed. If any foreign tables are involved, 
+ * its callback shall be invoked prior to the truncation of regular tables.
  */
 void
 ExecuteTruncate(TruncateStmt *stmt)
 {
 	List	   *rels = NIL;
 	List	   *relids = NIL;
+	List	   *relids_extra = NIL;
 	List	   *relids_logged = NIL;
 	ListCell   *cell;
 
@@ -1636,8 +1655,9 @@ ExecuteTruncate(TruncateStmt *stmt)
 
 		rels = lappend(rels, rel);
 		relids = lappend_oid(relids, myrelid);
+		relids_extra = lappend_int(relids_extra, (recurse ? TRUNCATE_REL_CONTEXT_NORMAL : TRUNCATE_REL_CONTEXT_ONLY));
 		/* Log this relation only if needed for logical decoding */
-		if (RelationIsLogicallyLogged(rel))
+		if (RelationIsLogicallyLogged(rel) && rel->rd_rel->relkind != RELKIND_FOREIGN_TABLE)
 			relids_logged = lappend_oid(relids_logged, myrelid);
 
 		if (recurse)
@@ -1683,8 +1703,9 @@ ExecuteTruncate(TruncateStmt *stmt)
 
 				rels = lappend(rels, rel);
 				relids = lappend_oid(relids, childrelid);
+				relids_extra = lappend_int(relids_extra, TRUNCATE_REL_CONTEXT_CASCADING);
 				/* Log this relation only if needed for logical decoding */
-				if (RelationIsLogicallyLogged(rel))
+				if (RelationIsLogicallyLogged(rel) && rel->rd_rel->relkind != RELKIND_FOREIGN_TABLE)
 					relids_logged = lappend_oid(relids_logged, childrelid);
 			}
 		}
@@ -1695,7 +1716,7 @@ ExecuteTruncate(TruncateStmt *stmt)
 					 errhint("Do not specify the ONLY keyword, or use TRUNCATE ONLY on the partitions directly.")));
 	}
 
-	ExecuteTruncateGuts(rels, relids, relids_logged,
+	ExecuteTruncateGuts(rels, relids, relids_extra, relids_logged,
 						stmt->behavior, stmt->restart_seqs);
 
 	/* And close the rels */
@@ -1721,16 +1742,21 @@ ExecuteTruncate(TruncateStmt *stmt)
  * this information handy in this form.
  */
 void
-ExecuteTruncateGuts(List *explicit_rels, List *relids, List *relids_logged,
+ExecuteTruncateGuts(List *explicit_rels,
+					List *relids,
+					List *relids_extra,
+					List *relids_logged,
 					DropBehavior behavior, bool restart_seqs)
 {
 	List	   *rels;
 	List	   *seq_relids = NIL;
+	HTAB	   *ft_htab = NULL;
 	EState	   *estate;
 	ResultRelInfo *resultRelInfos;
 	ResultRelInfo *resultRelInfo;
 	SubTransactionId mySubid;
 	ListCell   *cell;
+	ListCell   *lc1, *lc2;
 	Oid		   *logrelids;
 
 	/*
@@ -1768,8 +1794,9 @@ ExecuteTruncateGuts(List *explicit_rels, List *relids, List *relids_logged,
 				truncate_check_activity(rel);
 				rels = lappend(rels, rel);
 				relids = lappend_oid(relids, relid);
+				relids_extra = lappend_int(relids_extra, TRUNCATE_REL_CONTEXT_CASCADING);
 				/* Log this relation only if needed for logical decoding */
-				if (RelationIsLogicallyLogged(rel))
+				if (RelationIsLogicallyLogged(rel) && rel->rd_rel->relkind != RELKIND_FOREIGN_TABLE)
 					relids_logged = lappend_oid(relids_logged, relid);
 			}
 		}
@@ -1868,14 +1895,68 @@ ExecuteTruncateGuts(List *explicit_rels, List *relids, List *relids_logged,
 	 */
 	mySubid = GetCurrentSubTransactionId();
 
-	foreach(cell, rels)
+	Assert(list_length(rels) == list_length(relids_extra));
+	forboth (lc1, rels,
+			 lc2, relids_extra)
 	{
-		Relation	rel = (Relation) lfirst(cell);
+		Relation	rel = (Relation) lfirst(lc1);
+		int			extra = lfirst_int(lc2);
 
 		/* Skip partitioned tables as there is nothing to do */
 		if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
 			continue;
 
+		/*
+		 * If truncating a foreign table, the foreign data wrapper callback
+		 * for TRUNCATE is called once for each server with a list of all the
+		 * relations to process linked to this server.  The list of relations
+		 * for each server is saved as a single entry in a hash table that
+		 * uses the server OID as lookup key.  Once the full set of lists is
+		 * built, all the entries of the hash table are scanned, and the list
+		 * of relations associated with the server is passed down to the
+		 * TRUNCATE callback of its foreign data wrapper.
+		 */
+		if (rel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
+		{
+			Oid			frel_oid = RelationGetRelid(rel);
+			Oid			server_oid = GetForeignServerIdByRelId(frel_oid);
+			bool		found;
+			ForeignTruncateInfo *ft_info;
+
+			/* if the hash table does not exist yet, initialize it */
+			if (!ft_htab)
+			{
+				HASHCTL		hctl;
+
+				memset(&hctl, 0, sizeof(HASHCTL));
+				hctl.keysize = sizeof(Oid);
+				hctl.entrysize = sizeof(ForeignTruncateInfo);
+				hctl.hcxt = CurrentMemoryContext;
+
+				ft_htab = hash_create("TRUNCATE for Foreign Tables",
+									  32,	/* start small and extend */
+									  &hctl,
+									  HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
+			}
+
+			/*
+			 * Look after the entry of the server in the hash table, and
+			 * initialize it if the entry does not exist yet.
+			 */
+			ft_info = hash_search(ft_htab, &server_oid, HASH_ENTER, &found);
+			if (!found)
+			{
+				ft_info->server_oid = server_oid;
+				ft_info->frels_list = NIL;
+				ft_info->frels_extra = NIL;
+			}
+
+			/* save the relation in the list */
+			ft_info->frels_list = lappend(ft_info->frels_list, rel);
+			ft_info->frels_extra = lappend_int(ft_info->frels_extra, extra);
+			continue;
+		}
+
 		/*
 		 * Normally, we need a transaction-safe truncation here.  However, if
 		 * the table was either created in the current (sub)transaction or has
@@ -1938,6 +2019,31 @@ ExecuteTruncateGuts(List *explicit_rels, List *relids, List *relids_logged,
 		pgstat_count_truncate(rel);
 	}
 
+	/*
+	 * Now go through the hash table, and process each entry associated to the
+	 * servers involved in the TRUNCATE.
+	 */
+	if (ft_htab)
+	{
+		ForeignTruncateInfo *ft_info;
+		HASH_SEQ_STATUS seq;
+
+		hash_seq_init(&seq, ft_htab);
+
+		while ((ft_info = hash_seq_search(&seq)) != NULL)
+		{
+			FdwRoutine *routine = GetFdwRoutineByServerId(ft_info->server_oid);
+
+			/* truncate_check_rel() has checked that already */
+			Assert(routine->ExecForeignTruncate != NULL);
+
+			routine->ExecForeignTruncate(ft_info->frels_list,
+										 ft_info->frels_extra,
+										 behavior,
+										 restart_seqs);
+		}
+	}
+
 	/*
 	 * Restart owned sequences if we were asked to.
 	 */
@@ -2023,12 +2129,24 @@ truncate_check_rel(Oid relid, Form_pg_class reltuple)
 	char	   *relname = NameStr(reltuple->relname);
 
 	/*
-	 * Only allow truncate on regular tables and partitioned tables (although,
-	 * the latter are only being included here for the following checks; no
-	 * physical truncation will occur in their case.)
+	 * Only allow truncate on regular tables, foreign tables using foreign
+	 * data wrappers supporting TRUNCATE and partitioned tables (although, the
+	 * latter are only being included here for the following checks; no
+	 * physical truncation will occur in their case.).
 	 */
-	if (reltuple->relkind != RELKIND_RELATION &&
-		reltuple->relkind != RELKIND_PARTITIONED_TABLE)
+	if (reltuple->relkind == RELKIND_FOREIGN_TABLE)
+	{
+		Oid			server_id = GetForeignServerIdByRelId(relid);
+		FdwRoutine *fdwroutine = GetFdwRoutineByServerId(server_id);
+
+		if (!fdwroutine->ExecForeignTruncate)
+			ereport(ERROR,
+					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+					 errmsg("cannot truncate foreign table \"%s\"",
+							relname)));
+	}
+	else if (reltuple->relkind != RELKIND_RELATION &&
+			 reltuple->relkind != RELKIND_PARTITIONED_TABLE)
 		ereport(ERROR,
 				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
 				 errmsg("\"%s\" is not a table", relname)));
diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c
index 4c4b072068..498d6ee123 100644
--- a/src/backend/postmaster/pgstat.c
+++ b/src/backend/postmaster/pgstat.c
@@ -46,6 +46,7 @@
 #include "libpq/pqsignal.h"
 #include "mb/pg_wchar.h"
 #include "miscadmin.h"
+#include "pg_trace.h"
 #include "pgstat.h"
 #include "postmaster/autovacuum.h"
 #include "postmaster/fork_process.h"
@@ -62,6 +63,8 @@
 #include "storage/pg_shmem.h"
 #include "storage/proc.h"
 #include "storage/procsignal.h"
+#include "storage/sinvaladt.h"
+#include "utils/ascii.h"
 #include "utils/guc.h"
 #include "utils/memutils.h"
 #include "utils/ps_status.h"
@@ -105,12 +108,26 @@
 #define PGSTAT_FUNCTION_HASH_SIZE	512
 
 
+/* ----------
+ * Total number of backends including auxiliary
+ *
+ * We reserve a slot for each possible BackendId, plus one for each
+ * possible auxiliary process type.  (This scheme assumes there is not
+ * more than one of any auxiliary process type at a time.) MaxBackends
+ * includes autovacuum workers and background workers as well.
+ * ----------
+ */
+#define NumBackendStatSlots (MaxBackends + NUM_AUXPROCTYPES)
+
+
 /* ----------
  * GUC parameters
  * ----------
  */
+bool		pgstat_track_activities = false;
 bool		pgstat_track_counts = false;
 int			pgstat_track_functions = TRACK_FUNC_OFF;
+int			pgstat_track_activity_query_size = 1024;
 
 /* ----------
  * Built from GUC parameter
@@ -242,8 +259,8 @@ static int	pgStatXactCommit = 0;
 static int	pgStatXactRollback = 0;
 PgStat_Counter pgStatBlockReadTime = 0;
 PgStat_Counter pgStatBlockWriteTime = 0;
-PgStat_Counter pgStatActiveTime = 0;
-PgStat_Counter pgStatTransactionIdleTime = 0;
+static PgStat_Counter pgStatActiveTime = 0;
+static PgStat_Counter pgStatTransactionIdleTime = 0;
 SessionEndType pgStatSessionEndCause = DISCONNECT_NORMAL;
 
 /* Record that's written to 2PC state file when pgstat state is persisted */
@@ -266,6 +283,12 @@ typedef struct TwoPhasePgStatRecord
 static MemoryContext pgStatLocalContext = NULL;
 static HTAB *pgStatDBHash = NULL;
 
+/* Status for backends including auxiliary */
+static LocalPgBackendStatus *localBackendStatusTable = NULL;
+
+/* Total number of backends including auxiliary */
+static int	localNumBackends = 0;
+
 /*
  * Cluster wide statistics, kept in the stats collector.
  * Contains statistics that are not collected per database
@@ -302,6 +325,7 @@ static pid_t pgstat_forkexec(void);
 #endif
 
 NON_EXEC_STATIC void PgstatCollectorMain(int argc, char *argv[]) pg_attribute_noreturn();
+static void pgstat_beshutdown_hook(int code, Datum arg);
 
 static PgStat_StatDBEntry *pgstat_get_db_entry(Oid databaseid, bool create);
 static PgStat_StatTabEntry *pgstat_get_tab_entry(PgStat_StatDBEntry *dbentry,
@@ -311,6 +335,7 @@ static void pgstat_write_db_statsfile(PgStat_StatDBEntry *dbentry, bool permanen
 static HTAB *pgstat_read_statsfiles(Oid onlydb, bool permanent, bool deep);
 static void pgstat_read_db_statsfile(Oid databaseid, HTAB *tabhash, HTAB *funchash, bool permanent);
 static void backend_read_statsfile(void);
+static void pgstat_read_current_status(void);
 
 static bool pgstat_write_statsfile_needed(void);
 static bool pgstat_db_requested(Oid databaseid);
@@ -2732,6 +2757,65 @@ pgstat_fetch_stat_funcentry(Oid func_id)
 }
 
 
+/* ----------
+ * pgstat_fetch_stat_beentry() -
+ *
+ *	Support function for the SQL-callable pgstat* functions. Returns
+ *	our local copy of the current-activity entry for one backend.
+ *
+ *	NB: caller is responsible for a check if the user is permitted to see
+ *	this info (especially the querystring).
+ * ----------
+ */
+PgBackendStatus *
+pgstat_fetch_stat_beentry(int beid)
+{
+	pgstat_read_current_status();
+
+	if (beid < 1 || beid > localNumBackends)
+		return NULL;
+
+	return &localBackendStatusTable[beid - 1].backendStatus;
+}
+
+
+/* ----------
+ * pgstat_fetch_stat_local_beentry() -
+ *
+ *	Like pgstat_fetch_stat_beentry() but with locally computed additions (like
+ *	xid and xmin values of the backend)
+ *
+ *	NB: caller is responsible for a check if the user is permitted to see
+ *	this info (especially the querystring).
+ * ----------
+ */
+LocalPgBackendStatus *
+pgstat_fetch_stat_local_beentry(int beid)
+{
+	pgstat_read_current_status();
+
+	if (beid < 1 || beid > localNumBackends)
+		return NULL;
+
+	return &localBackendStatusTable[beid - 1];
+}
+
+
+/* ----------
+ * pgstat_fetch_stat_numbackends() -
+ *
+ *	Support function for the SQL-callable pgstat* functions. Returns
+ *	the maximum current backend id.
+ * ----------
+ */
+int
+pgstat_fetch_stat_numbackends(void)
+{
+	pgstat_read_current_status();
+
+	return localNumBackends;
+}
+
 /*
  * ---------
  * pgstat_fetch_stat_archiver() -
@@ -2815,16 +2899,421 @@ pgstat_fetch_replslot(int *nslots_p)
 	return replSlotStats;
 }
 
+/* ------------------------------------------------------------
+ * Functions for management of the shared-memory PgBackendStatus array
+ * ------------------------------------------------------------
+ */
+
+static PgBackendStatus *BackendStatusArray = NULL;
+static PgBackendStatus *MyBEEntry = NULL;
+static char *BackendAppnameBuffer = NULL;
+static char *BackendClientHostnameBuffer = NULL;
+static char *BackendActivityBuffer = NULL;
+static Size BackendActivityBufferSize = 0;
+#ifdef USE_SSL
+static PgBackendSSLStatus *BackendSslStatusBuffer = NULL;
+#endif
+#ifdef ENABLE_GSS
+static PgBackendGSSStatus *BackendGssStatusBuffer = NULL;
+#endif
+
+
+/*
+ * Report shared-memory space needed by CreateSharedBackendStatus.
+ */
+Size
+BackendStatusShmemSize(void)
+{
+	Size		size;
+
+	/* BackendStatusArray: */
+	size = mul_size(sizeof(PgBackendStatus), NumBackendStatSlots);
+	/* BackendAppnameBuffer: */
+	size = add_size(size,
+					mul_size(NAMEDATALEN, NumBackendStatSlots));
+	/* BackendClientHostnameBuffer: */
+	size = add_size(size,
+					mul_size(NAMEDATALEN, NumBackendStatSlots));
+	/* BackendActivityBuffer: */
+	size = add_size(size,
+					mul_size(pgstat_track_activity_query_size, NumBackendStatSlots));
+#ifdef USE_SSL
+	/* BackendSslStatusBuffer: */
+	size = add_size(size,
+					mul_size(sizeof(PgBackendSSLStatus), NumBackendStatSlots));
+#endif
+#ifdef ENABLE_GSS
+	/* BackendGssStatusBuffer: */
+	size = add_size(size,
+					mul_size(sizeof(PgBackendGSSStatus), NumBackendStatSlots));
+#endif
+	return size;
+}
+
+/*
+ * Initialize the shared status array and several string buffers
+ * during postmaster startup.
+ */
+void
+CreateSharedBackendStatus(void)
+{
+	Size		size;
+	bool		found;
+	int			i;
+	char	   *buffer;
+
+	/* Create or attach to the shared array */
+	size = mul_size(sizeof(PgBackendStatus), NumBackendStatSlots);
+	BackendStatusArray = (PgBackendStatus *)
+		ShmemInitStruct("Backend Status Array", size, &found);
+
+	if (!found)
+	{
+		/*
+		 * We're the first - initialize.
+		 */
+		MemSet(BackendStatusArray, 0, size);
+	}
+
+	/* Create or attach to the shared appname buffer */
+	size = mul_size(NAMEDATALEN, NumBackendStatSlots);
+	BackendAppnameBuffer = (char *)
+		ShmemInitStruct("Backend Application Name Buffer", size, &found);
+
+	if (!found)
+	{
+		MemSet(BackendAppnameBuffer, 0, size);
+
+		/* Initialize st_appname pointers. */
+		buffer = BackendAppnameBuffer;
+		for (i = 0; i < NumBackendStatSlots; i++)
+		{
+			BackendStatusArray[i].st_appname = buffer;
+			buffer += NAMEDATALEN;
+		}
+	}
+
+	/* Create or attach to the shared client hostname buffer */
+	size = mul_size(NAMEDATALEN, NumBackendStatSlots);
+	BackendClientHostnameBuffer = (char *)
+		ShmemInitStruct("Backend Client Host Name Buffer", size, &found);
+
+	if (!found)
+	{
+		MemSet(BackendClientHostnameBuffer, 0, size);
+
+		/* Initialize st_clienthostname pointers. */
+		buffer = BackendClientHostnameBuffer;
+		for (i = 0; i < NumBackendStatSlots; i++)
+		{
+			BackendStatusArray[i].st_clienthostname = buffer;
+			buffer += NAMEDATALEN;
+		}
+	}
+
+	/* Create or attach to the shared activity buffer */
+	BackendActivityBufferSize = mul_size(pgstat_track_activity_query_size,
+										 NumBackendStatSlots);
+	BackendActivityBuffer = (char *)
+		ShmemInitStruct("Backend Activity Buffer",
+						BackendActivityBufferSize,
+						&found);
+
+	if (!found)
+	{
+		MemSet(BackendActivityBuffer, 0, BackendActivityBufferSize);
+
+		/* Initialize st_activity pointers. */
+		buffer = BackendActivityBuffer;
+		for (i = 0; i < NumBackendStatSlots; i++)
+		{
+			BackendStatusArray[i].st_activity_raw = buffer;
+			buffer += pgstat_track_activity_query_size;
+		}
+	}
+
+#ifdef USE_SSL
+	/* Create or attach to the shared SSL status buffer */
+	size = mul_size(sizeof(PgBackendSSLStatus), NumBackendStatSlots);
+	BackendSslStatusBuffer = (PgBackendSSLStatus *)
+		ShmemInitStruct("Backend SSL Status Buffer", size, &found);
+
+	if (!found)
+	{
+		PgBackendSSLStatus *ptr;
+
+		MemSet(BackendSslStatusBuffer, 0, size);
+
+		/* Initialize st_sslstatus pointers. */
+		ptr = BackendSslStatusBuffer;
+		for (i = 0; i < NumBackendStatSlots; i++)
+		{
+			BackendStatusArray[i].st_sslstatus = ptr;
+			ptr++;
+		}
+	}
+#endif
+
+#ifdef ENABLE_GSS
+	/* Create or attach to the shared GSSAPI status buffer */
+	size = mul_size(sizeof(PgBackendGSSStatus), NumBackendStatSlots);
+	BackendGssStatusBuffer = (PgBackendGSSStatus *)
+		ShmemInitStruct("Backend GSS Status Buffer", size, &found);
+
+	if (!found)
+	{
+		PgBackendGSSStatus *ptr;
+
+		MemSet(BackendGssStatusBuffer, 0, size);
+
+		/* Initialize st_gssstatus pointers. */
+		ptr = BackendGssStatusBuffer;
+		for (i = 0; i < NumBackendStatSlots; i++)
+		{
+			BackendStatusArray[i].st_gssstatus = ptr;
+			ptr++;
+		}
+	}
+#endif
+}
+
+
+/* ----------
+ * pgstat_initialize() -
+ *
+ *	Initialize pgstats state, and set up our on-proc-exit hook.
+ *	Called from InitPostgres and AuxiliaryProcessMain. For auxiliary process,
+ *	MyBackendId is invalid. Otherwise, MyBackendId must be set,
+ *	but we must not have started any transaction yet (since the
+ *	exit hook must run after the last transaction exit).
+ *	NOTE: MyDatabaseId isn't set yet; so the shutdown hook has to be careful.
+ * ----------
+ */
+void
+pgstat_initialize(void)
+{
+	/* Initialize MyBEEntry */
+	if (MyBackendId != InvalidBackendId)
+	{
+		Assert(MyBackendId >= 1 && MyBackendId <= MaxBackends);
+		MyBEEntry = &BackendStatusArray[MyBackendId - 1];
+	}
+	else
+	{
+		/* Must be an auxiliary process */
+		Assert(MyAuxProcType != NotAnAuxProcess);
+
+		/*
+		 * Assign the MyBEEntry for an auxiliary process.  Since it doesn't
+		 * have a BackendId, the slot is statically allocated based on the
+		 * auxiliary process type (MyAuxProcType).  Backends use slots indexed
+		 * in the range from 1 to MaxBackends (inclusive), so we use
+		 * MaxBackends + AuxBackendType + 1 as the index of the slot for an
+		 * auxiliary process.
+		 */
+		MyBEEntry = &BackendStatusArray[MaxBackends + MyAuxProcType];
+	}
+
+	/*
+	 * Initialize prevWalUsage with pgWalUsage so that pgstat_report_wal() can
+	 * calculate how much pgWalUsage counters are increased by substracting
+	 * prevWalUsage from pgWalUsage.
+	 */
+	prevWalUsage = pgWalUsage;
+
+	/* Set up a process-exit hook to clean up */
+	on_shmem_exit(pgstat_beshutdown_hook, 0);
+}
+
+/* ----------
+ * pgstat_bestart() -
+ *
+ *	Initialize this backend's entry in the PgBackendStatus array.
+ *	Called from InitPostgres.
+ *
+ *	Apart from auxiliary processes, MyBackendId, MyDatabaseId,
+ *	session userid, and application_name must be set for a
+ *	backend (hence, this cannot be combined with pgstat_initialize).
+ *	Note also that we must be inside a transaction if this isn't an aux
+ *	process, as we may need to do encoding conversion on some strings.
+ * ----------
+ */
+void
+pgstat_bestart(void)
+{
+	volatile PgBackendStatus *vbeentry = MyBEEntry;
+	PgBackendStatus lbeentry;
+#ifdef USE_SSL
+	PgBackendSSLStatus lsslstatus;
+#endif
+#ifdef ENABLE_GSS
+	PgBackendGSSStatus lgssstatus;
+#endif
+
+	/* pgstats state must be initialized from pgstat_initialize() */
+	Assert(vbeentry != NULL);
+
+	/*
+	 * To minimize the time spent modifying the PgBackendStatus entry, and
+	 * avoid risk of errors inside the critical section, we first copy the
+	 * shared-memory struct to a local variable, then modify the data in the
+	 * local variable, then copy the local variable back to shared memory.
+	 * Only the last step has to be inside the critical section.
+	 *
+	 * Most of the data we copy from shared memory is just going to be
+	 * overwritten, but the struct's not so large that it's worth the
+	 * maintenance hassle to copy only the needful fields.
+	 */
+	memcpy(&lbeentry,
+		   unvolatize(PgBackendStatus *, vbeentry),
+		   sizeof(PgBackendStatus));
+
+	/* These structs can just start from zeroes each time, though */
+#ifdef USE_SSL
+	memset(&lsslstatus, 0, sizeof(lsslstatus));
+#endif
+#ifdef ENABLE_GSS
+	memset(&lgssstatus, 0, sizeof(lgssstatus));
+#endif
+
+	/*
+	 * Now fill in all the fields of lbeentry, except for strings that are
+	 * out-of-line data.  Those have to be handled separately, below.
+	 */
+	lbeentry.st_procpid = MyProcPid;
+	lbeentry.st_backendType = MyBackendType;
+	lbeentry.st_proc_start_timestamp = MyStartTimestamp;
+	lbeentry.st_activity_start_timestamp = 0;
+	lbeentry.st_state_start_timestamp = 0;
+	lbeentry.st_xact_start_timestamp = 0;
+	lbeentry.st_databaseid = MyDatabaseId;
+
+	/* We have userid for client-backends, wal-sender and bgworker processes */
+	if (lbeentry.st_backendType == B_BACKEND
+		|| lbeentry.st_backendType == B_WAL_SENDER
+		|| lbeentry.st_backendType == B_BG_WORKER)
+		lbeentry.st_userid = GetSessionUserId();
+	else
+		lbeentry.st_userid = InvalidOid;
+
+	/*
+	 * We may not have a MyProcPort (eg, if this is the autovacuum process).
+	 * If so, use all-zeroes client address, which is dealt with specially in
+	 * pg_stat_get_backend_client_addr and pg_stat_get_backend_client_port.
+	 */
+	if (MyProcPort)
+		memcpy(&lbeentry.st_clientaddr, &MyProcPort->raddr,
+			   sizeof(lbeentry.st_clientaddr));
+	else
+		MemSet(&lbeentry.st_clientaddr, 0, sizeof(lbeentry.st_clientaddr));
+
+#ifdef USE_SSL
+	if (MyProcPort && MyProcPort->ssl_in_use)
+	{
+		lbeentry.st_ssl = true;
+		lsslstatus.ssl_bits = be_tls_get_cipher_bits(MyProcPort);
+		strlcpy(lsslstatus.ssl_version, be_tls_get_version(MyProcPort), NAMEDATALEN);
+		strlcpy(lsslstatus.ssl_cipher, be_tls_get_cipher(MyProcPort), NAMEDATALEN);
+		be_tls_get_peer_subject_name(MyProcPort, lsslstatus.ssl_client_dn, NAMEDATALEN);
+		be_tls_get_peer_serial(MyProcPort, lsslstatus.ssl_client_serial, NAMEDATALEN);
+		be_tls_get_peer_issuer_name(MyProcPort, lsslstatus.ssl_issuer_dn, NAMEDATALEN);
+	}
+	else
+	{
+		lbeentry.st_ssl = false;
+	}
+#else
+	lbeentry.st_ssl = false;
+#endif
+
+#ifdef ENABLE_GSS
+	if (MyProcPort && MyProcPort->gss != NULL)
+	{
+		const char *princ = be_gssapi_get_princ(MyProcPort);
+
+		lbeentry.st_gss = true;
+		lgssstatus.gss_auth = be_gssapi_get_auth(MyProcPort);
+		lgssstatus.gss_enc = be_gssapi_get_enc(MyProcPort);
+		if (princ)
+			strlcpy(lgssstatus.gss_princ, princ, NAMEDATALEN);
+	}
+	else
+	{
+		lbeentry.st_gss = false;
+	}
+#else
+	lbeentry.st_gss = false;
+#endif
+
+	lbeentry.st_state = STATE_UNDEFINED;
+	lbeentry.st_progress_command = PROGRESS_COMMAND_INVALID;
+	lbeentry.st_progress_command_target = InvalidOid;
+
+	/*
+	 * we don't zero st_progress_param here to save cycles; nobody should
+	 * examine it until st_progress_command has been set to something other
+	 * than PROGRESS_COMMAND_INVALID
+	 */
+
+	/*
+	 * We're ready to enter the critical section that fills the shared-memory
+	 * status entry.  We follow the protocol of bumping st_changecount before
+	 * and after; and make sure it's even afterwards.  We use a volatile
+	 * pointer here to ensure the compiler doesn't try to get cute.
+	 */
+	PGSTAT_BEGIN_WRITE_ACTIVITY(vbeentry);
+
+	/* make sure we'll memcpy the same st_changecount back */
+	lbeentry.st_changecount = vbeentry->st_changecount;
+
+	memcpy(unvolatize(PgBackendStatus *, vbeentry),
+		   &lbeentry,
+		   sizeof(PgBackendStatus));
+
+	/*
+	 * We can write the out-of-line strings and structs using the pointers
+	 * that are in lbeentry; this saves some de-volatilizing messiness.
+	 */
+	lbeentry.st_appname[0] = '\0';
+	if (MyProcPort && MyProcPort->remote_hostname)
+		strlcpy(lbeentry.st_clienthostname, MyProcPort->remote_hostname,
+				NAMEDATALEN);
+	else
+		lbeentry.st_clienthostname[0] = '\0';
+	lbeentry.st_activity_raw[0] = '\0';
+	/* Also make sure the last byte in each string area is always 0 */
+	lbeentry.st_appname[NAMEDATALEN - 1] = '\0';
+	lbeentry.st_clienthostname[NAMEDATALEN - 1] = '\0';
+	lbeentry.st_activity_raw[pgstat_track_activity_query_size - 1] = '\0';
+
+#ifdef USE_SSL
+	memcpy(lbeentry.st_sslstatus, &lsslstatus, sizeof(PgBackendSSLStatus));
+#endif
+#ifdef ENABLE_GSS
+	memcpy(lbeentry.st_gssstatus, &lgssstatus, sizeof(PgBackendGSSStatus));
+#endif
+
+	PGSTAT_END_WRITE_ACTIVITY(vbeentry);
+
+	/* Update app name to current GUC setting */
+	if (application_name)
+		pgstat_report_appname(application_name);
+}
+
 /*
  * Shut down a single backend's statistics reporting at process exit.
  *
  * Flush any remaining statistics counts out to the collector.
  * Without this, operations triggered during backend exit (such as
  * temp table deletions) won't be counted.
+ *
+ * Lastly, clear out our entry in the PgBackendStatus array.
  */
 static void
-pgstat_shutdown_hook(int code, Datum arg)
+pgstat_beshutdown_hook(int code, Datum arg)
 {
+	volatile PgBackendStatus *beentry = MyBEEntry;
+
 	/*
 	 * If we got as far as discovering our own database ID, we can report what
 	 * we did to the collector.  Otherwise, we'd be sending an invalid
@@ -2833,29 +3322,584 @@ pgstat_shutdown_hook(int code, Datum arg)
 	 */
 	if (OidIsValid(MyDatabaseId))
 		pgstat_report_stat(true);
+
+	/*
+	 * Clear my status entry, following the protocol of bumping st_changecount
+	 * before and after.  We use a volatile pointer here to ensure the
+	 * compiler doesn't try to get cute.
+	 */
+	PGSTAT_BEGIN_WRITE_ACTIVITY(beentry);
+
+	beentry->st_procpid = 0;	/* mark invalid */
+
+	PGSTAT_END_WRITE_ACTIVITY(beentry);
 }
 
+
 /* ----------
- * pgstat_initialize() -
+ * pgstat_report_activity() -
  *
- *	Initialize pgstats state, and set up our on-proc-exit hook.
- *	Called from InitPostgres and AuxiliaryProcessMain.
+ *	Called from tcop/postgres.c to report what the backend is actually doing
+ *	(but note cmd_str can be NULL for certain cases).
  *
- *	NOTE: MyDatabaseId isn't set yet; so the shutdown hook has to be careful.
+ * All updates of the status entry follow the protocol of bumping
+ * st_changecount before and after.  We use a volatile pointer here to
+ * ensure the compiler doesn't try to get cute.
  * ----------
  */
 void
-pgstat_initialize(void)
+pgstat_report_activity(BackendState state, const char *cmd_str)
 {
+	volatile PgBackendStatus *beentry = MyBEEntry;
+	TimestampTz start_timestamp;
+	TimestampTz current_timestamp;
+	int			len = 0;
+
+	TRACE_POSTGRESQL_STATEMENT_STATUS(cmd_str);
+
+	if (!beentry)
+		return;
+
+	if (!pgstat_track_activities)
+	{
+		if (beentry->st_state != STATE_DISABLED)
+		{
+			volatile PGPROC *proc = MyProc;
+
+			/*
+			 * track_activities is disabled, but we last reported a
+			 * non-disabled state.  As our final update, change the state and
+			 * clear fields we will not be updating anymore.
+			 */
+			PGSTAT_BEGIN_WRITE_ACTIVITY(beentry);
+			beentry->st_state = STATE_DISABLED;
+			beentry->st_state_start_timestamp = 0;
+			beentry->st_activity_raw[0] = '\0';
+			beentry->st_activity_start_timestamp = 0;
+			/* st_xact_start_timestamp and wait_event_info are also disabled */
+			beentry->st_xact_start_timestamp = 0;
+			proc->wait_event_info = 0;
+			PGSTAT_END_WRITE_ACTIVITY(beentry);
+		}
+		return;
+	}
+
 	/*
-	 * Initialize prevWalUsage with pgWalUsage so that pgstat_report_wal() can
-	 * calculate how much pgWalUsage counters are increased by substracting
-	 * prevWalUsage from pgWalUsage.
+	 * To minimize the time spent modifying the entry, and avoid risk of
+	 * errors inside the critical section, fetch all the needed data first.
 	 */
-	prevWalUsage = pgWalUsage;
+	start_timestamp = GetCurrentStatementStartTimestamp();
+	if (cmd_str != NULL)
+	{
+		/*
+		 * Compute length of to-be-stored string unaware of multi-byte
+		 * characters. For speed reasons that'll get corrected on read, rather
+		 * than computed every write.
+		 */
+		len = Min(strlen(cmd_str), pgstat_track_activity_query_size - 1);
+	}
+	current_timestamp = GetCurrentTimestamp();
 
-	/* Set up a process-exit hook to clean up */
-	on_shmem_exit(pgstat_shutdown_hook, 0);
+	/*
+	 * If the state has changed from "active" or "idle in transaction",
+	 * calculate the duration.
+	 */
+	if ((beentry->st_state == STATE_RUNNING ||
+		 beentry->st_state == STATE_FASTPATH ||
+		 beentry->st_state == STATE_IDLEINTRANSACTION ||
+		 beentry->st_state == STATE_IDLEINTRANSACTION_ABORTED) &&
+		state != beentry->st_state)
+	{
+		long		secs;
+		int			usecs;
+
+		TimestampDifference(beentry->st_state_start_timestamp,
+							current_timestamp,
+							&secs, &usecs);
+
+		if (beentry->st_state == STATE_RUNNING ||
+			beentry->st_state == STATE_FASTPATH)
+			pgStatActiveTime += secs * 1000000 + usecs;
+		else
+			pgStatTransactionIdleTime += secs * 1000000 + usecs;
+	}
+
+	/*
+	 * Now update the status entry
+	 */
+	PGSTAT_BEGIN_WRITE_ACTIVITY(beentry);
+
+	beentry->st_state = state;
+	beentry->st_state_start_timestamp = current_timestamp;
+
+	if (cmd_str != NULL)
+	{
+		memcpy((char *) beentry->st_activity_raw, cmd_str, len);
+		beentry->st_activity_raw[len] = '\0';
+		beentry->st_activity_start_timestamp = start_timestamp;
+	}
+
+	PGSTAT_END_WRITE_ACTIVITY(beentry);
+}
+
+/*-----------
+ * pgstat_progress_start_command() -
+ *
+ * Set st_progress_command (and st_progress_command_target) in own backend
+ * entry.  Also, zero-initialize st_progress_param array.
+ *-----------
+ */
+void
+pgstat_progress_start_command(ProgressCommandType cmdtype, Oid relid)
+{
+	volatile PgBackendStatus *beentry = MyBEEntry;
+
+	if (!beentry || !pgstat_track_activities)
+		return;
+
+	PGSTAT_BEGIN_WRITE_ACTIVITY(beentry);
+	beentry->st_progress_command = cmdtype;
+	beentry->st_progress_command_target = relid;
+	MemSet(&beentry->st_progress_param, 0, sizeof(beentry->st_progress_param));
+	PGSTAT_END_WRITE_ACTIVITY(beentry);
+}
+
+/*-----------
+ * pgstat_progress_update_param() -
+ *
+ * Update index'th member in st_progress_param[] of own backend entry.
+ *-----------
+ */
+void
+pgstat_progress_update_param(int index, int64 val)
+{
+	volatile PgBackendStatus *beentry = MyBEEntry;
+
+	Assert(index >= 0 && index < PGSTAT_NUM_PROGRESS_PARAM);
+
+	if (!beentry || !pgstat_track_activities)
+		return;
+
+	PGSTAT_BEGIN_WRITE_ACTIVITY(beentry);
+	beentry->st_progress_param[index] = val;
+	PGSTAT_END_WRITE_ACTIVITY(beentry);
+}
+
+/*-----------
+ * pgstat_progress_update_multi_param() -
+ *
+ * Update multiple members in st_progress_param[] of own backend entry.
+ * This is atomic; readers won't see intermediate states.
+ *-----------
+ */
+void
+pgstat_progress_update_multi_param(int nparam, const int *index,
+								   const int64 *val)
+{
+	volatile PgBackendStatus *beentry = MyBEEntry;
+	int			i;
+
+	if (!beentry || !pgstat_track_activities || nparam == 0)
+		return;
+
+	PGSTAT_BEGIN_WRITE_ACTIVITY(beentry);
+
+	for (i = 0; i < nparam; ++i)
+	{
+		Assert(index[i] >= 0 && index[i] < PGSTAT_NUM_PROGRESS_PARAM);
+
+		beentry->st_progress_param[index[i]] = val[i];
+	}
+
+	PGSTAT_END_WRITE_ACTIVITY(beentry);
+}
+
+/*-----------
+ * pgstat_progress_end_command() -
+ *
+ * Reset st_progress_command (and st_progress_command_target) in own backend
+ * entry.  This signals the end of the command.
+ *-----------
+ */
+void
+pgstat_progress_end_command(void)
+{
+	volatile PgBackendStatus *beentry = MyBEEntry;
+
+	if (!beentry || !pgstat_track_activities)
+		return;
+
+	if (beentry->st_progress_command == PROGRESS_COMMAND_INVALID)
+		return;
+
+	PGSTAT_BEGIN_WRITE_ACTIVITY(beentry);
+	beentry->st_progress_command = PROGRESS_COMMAND_INVALID;
+	beentry->st_progress_command_target = InvalidOid;
+	PGSTAT_END_WRITE_ACTIVITY(beentry);
+}
+
+/* ----------
+ * pgstat_report_appname() -
+ *
+ *	Called to update our application name.
+ * ----------
+ */
+void
+pgstat_report_appname(const char *appname)
+{
+	volatile PgBackendStatus *beentry = MyBEEntry;
+	int			len;
+
+	if (!beentry)
+		return;
+
+	/* This should be unnecessary if GUC did its job, but be safe */
+	len = pg_mbcliplen(appname, strlen(appname), NAMEDATALEN - 1);
+
+	/*
+	 * Update my status entry, following the protocol of bumping
+	 * st_changecount before and after.  We use a volatile pointer here to
+	 * ensure the compiler doesn't try to get cute.
+	 */
+	PGSTAT_BEGIN_WRITE_ACTIVITY(beentry);
+
+	memcpy((char *) beentry->st_appname, appname, len);
+	beentry->st_appname[len] = '\0';
+
+	PGSTAT_END_WRITE_ACTIVITY(beentry);
+}
+
+/*
+ * Report current transaction start timestamp as the specified value.
+ * Zero means there is no active transaction.
+ */
+void
+pgstat_report_xact_timestamp(TimestampTz tstamp)
+{
+	volatile PgBackendStatus *beentry = MyBEEntry;
+
+	if (!pgstat_track_activities || !beentry)
+		return;
+
+	/*
+	 * Update my status entry, following the protocol of bumping
+	 * st_changecount before and after.  We use a volatile pointer here to
+	 * ensure the compiler doesn't try to get cute.
+	 */
+	PGSTAT_BEGIN_WRITE_ACTIVITY(beentry);
+
+	beentry->st_xact_start_timestamp = tstamp;
+
+	PGSTAT_END_WRITE_ACTIVITY(beentry);
+}
+
+/* ----------
+ * pgstat_read_current_status() -
+ *
+ *	Copy the current contents of the PgBackendStatus array to local memory,
+ *	if not already done in this transaction.
+ * ----------
+ */
+static void
+pgstat_read_current_status(void)
+{
+	volatile PgBackendStatus *beentry;
+	LocalPgBackendStatus *localtable;
+	LocalPgBackendStatus *localentry;
+	char	   *localappname,
+			   *localclienthostname,
+			   *localactivity;
+#ifdef USE_SSL
+	PgBackendSSLStatus *localsslstatus;
+#endif
+#ifdef ENABLE_GSS
+	PgBackendGSSStatus *localgssstatus;
+#endif
+	int			i;
+
+	Assert(!pgStatRunningInCollector);
+	if (localBackendStatusTable)
+		return;					/* already done */
+
+	pgstat_setup_memcxt();
+
+	/*
+	 * Allocate storage for local copy of state data.  We can presume that
+	 * none of these requests overflow size_t, because we already calculated
+	 * the same values using mul_size during shmem setup.  However, with
+	 * probably-silly values of pgstat_track_activity_query_size and
+	 * max_connections, the localactivity buffer could exceed 1GB, so use
+	 * "huge" allocation for that one.
+	 */
+	localtable = (LocalPgBackendStatus *)
+		MemoryContextAlloc(pgStatLocalContext,
+						   sizeof(LocalPgBackendStatus) * NumBackendStatSlots);
+	localappname = (char *)
+		MemoryContextAlloc(pgStatLocalContext,
+						   NAMEDATALEN * NumBackendStatSlots);
+	localclienthostname = (char *)
+		MemoryContextAlloc(pgStatLocalContext,
+						   NAMEDATALEN * NumBackendStatSlots);
+	localactivity = (char *)
+		MemoryContextAllocHuge(pgStatLocalContext,
+							   pgstat_track_activity_query_size * NumBackendStatSlots);
+#ifdef USE_SSL
+	localsslstatus = (PgBackendSSLStatus *)
+		MemoryContextAlloc(pgStatLocalContext,
+						   sizeof(PgBackendSSLStatus) * NumBackendStatSlots);
+#endif
+#ifdef ENABLE_GSS
+	localgssstatus = (PgBackendGSSStatus *)
+		MemoryContextAlloc(pgStatLocalContext,
+						   sizeof(PgBackendGSSStatus) * NumBackendStatSlots);
+#endif
+
+	localNumBackends = 0;
+
+	beentry = BackendStatusArray;
+	localentry = localtable;
+	for (i = 1; i <= NumBackendStatSlots; i++)
+	{
+		/*
+		 * Follow the protocol of retrying if st_changecount changes while we
+		 * copy the entry, or if it's odd.  (The check for odd is needed to
+		 * cover the case where we are able to completely copy the entry while
+		 * the source backend is between increment steps.)	We use a volatile
+		 * pointer here to ensure the compiler doesn't try to get cute.
+		 */
+		for (;;)
+		{
+			int			before_changecount;
+			int			after_changecount;
+
+			pgstat_begin_read_activity(beentry, before_changecount);
+
+			localentry->backendStatus.st_procpid = beentry->st_procpid;
+			/* Skip all the data-copying work if entry is not in use */
+			if (localentry->backendStatus.st_procpid > 0)
+			{
+				memcpy(&localentry->backendStatus, unvolatize(PgBackendStatus *, beentry), sizeof(PgBackendStatus));
+
+				/*
+				 * For each PgBackendStatus field that is a pointer, copy the
+				 * pointed-to data, then adjust the local copy of the pointer
+				 * field to point at the local copy of the data.
+				 *
+				 * strcpy is safe even if the string is modified concurrently,
+				 * because there's always a \0 at the end of the buffer.
+				 */
+				strcpy(localappname, (char *) beentry->st_appname);
+				localentry->backendStatus.st_appname = localappname;
+				strcpy(localclienthostname, (char *) beentry->st_clienthostname);
+				localentry->backendStatus.st_clienthostname = localclienthostname;
+				strcpy(localactivity, (char *) beentry->st_activity_raw);
+				localentry->backendStatus.st_activity_raw = localactivity;
+#ifdef USE_SSL
+				if (beentry->st_ssl)
+				{
+					memcpy(localsslstatus, beentry->st_sslstatus, sizeof(PgBackendSSLStatus));
+					localentry->backendStatus.st_sslstatus = localsslstatus;
+				}
+#endif
+#ifdef ENABLE_GSS
+				if (beentry->st_gss)
+				{
+					memcpy(localgssstatus, beentry->st_gssstatus, sizeof(PgBackendGSSStatus));
+					localentry->backendStatus.st_gssstatus = localgssstatus;
+				}
+#endif
+			}
+
+			pgstat_end_read_activity(beentry, after_changecount);
+
+			if (pgstat_read_activity_complete(before_changecount,
+											  after_changecount))
+				break;
+
+			/* Make sure we can break out of loop if stuck... */
+			CHECK_FOR_INTERRUPTS();
+		}
+
+		beentry++;
+		/* Only valid entries get included into the local array */
+		if (localentry->backendStatus.st_procpid > 0)
+		{
+			BackendIdGetTransactionIds(i,
+									   &localentry->backend_xid,
+									   &localentry->backend_xmin);
+
+			localentry++;
+			localappname += NAMEDATALEN;
+			localclienthostname += NAMEDATALEN;
+			localactivity += pgstat_track_activity_query_size;
+#ifdef USE_SSL
+			localsslstatus++;
+#endif
+#ifdef ENABLE_GSS
+			localgssstatus++;
+#endif
+			localNumBackends++;
+		}
+	}
+
+	/* Set the pointer only after completion of a valid table */
+	localBackendStatusTable = localtable;
+}
+
+/* ----------
+ * pgstat_get_backend_current_activity() -
+ *
+ *	Return a string representing the current activity of the backend with
+ *	the specified PID.  This looks directly at the BackendStatusArray,
+ *	and so will provide current information regardless of the age of our
+ *	transaction's snapshot of the status array.
+ *
+ *	It is the caller's responsibility to invoke this only for backends whose
+ *	state is expected to remain stable while the result is in use.  The
+ *	only current use is in deadlock reporting, where we can expect that
+ *	the target backend is blocked on a lock.  (There are corner cases
+ *	where the target's wait could get aborted while we are looking at it,
+ *	but the very worst consequence is to return a pointer to a string
+ *	that's been changed, so we won't worry too much.)
+ *
+ *	Note: return strings for special cases match pg_stat_get_backend_activity.
+ * ----------
+ */
+const char *
+pgstat_get_backend_current_activity(int pid, bool checkUser)
+{
+	PgBackendStatus *beentry;
+	int			i;
+
+	beentry = BackendStatusArray;
+	for (i = 1; i <= MaxBackends; i++)
+	{
+		/*
+		 * Although we expect the target backend's entry to be stable, that
+		 * doesn't imply that anyone else's is.  To avoid identifying the
+		 * wrong backend, while we check for a match to the desired PID we
+		 * must follow the protocol of retrying if st_changecount changes
+		 * while we examine the entry, or if it's odd.  (This might be
+		 * unnecessary, since fetching or storing an int is almost certainly
+		 * atomic, but let's play it safe.)  We use a volatile pointer here to
+		 * ensure the compiler doesn't try to get cute.
+		 */
+		volatile PgBackendStatus *vbeentry = beentry;
+		bool		found;
+
+		for (;;)
+		{
+			int			before_changecount;
+			int			after_changecount;
+
+			pgstat_begin_read_activity(vbeentry, before_changecount);
+
+			found = (vbeentry->st_procpid == pid);
+
+			pgstat_end_read_activity(vbeentry, after_changecount);
+
+			if (pgstat_read_activity_complete(before_changecount,
+											  after_changecount))
+				break;
+
+			/* Make sure we can break out of loop if stuck... */
+			CHECK_FOR_INTERRUPTS();
+		}
+
+		if (found)
+		{
+			/* Now it is safe to use the non-volatile pointer */
+			if (checkUser && !superuser() && beentry->st_userid != GetUserId())
+				return "<insufficient privilege>";
+			else if (*(beentry->st_activity_raw) == '\0')
+				return "<command string not enabled>";
+			else
+			{
+				/* this'll leak a bit of memory, but that seems acceptable */
+				return pgstat_clip_activity(beentry->st_activity_raw);
+			}
+		}
+
+		beentry++;
+	}
+
+	/* If we get here, caller is in error ... */
+	return "<backend information not available>";
+}
+
+/* ----------
+ * pgstat_get_crashed_backend_activity() -
+ *
+ *	Return a string representing the current activity of the backend with
+ *	the specified PID.  Like the function above, but reads shared memory with
+ *	the expectation that it may be corrupt.  On success, copy the string
+ *	into the "buffer" argument and return that pointer.  On failure,
+ *	return NULL.
+ *
+ *	This function is only intended to be used by the postmaster to report the
+ *	query that crashed a backend.  In particular, no attempt is made to
+ *	follow the correct concurrency protocol when accessing the
+ *	BackendStatusArray.  But that's OK, in the worst case we'll return a
+ *	corrupted message.  We also must take care not to trip on ereport(ERROR).
+ * ----------
+ */
+const char *
+pgstat_get_crashed_backend_activity(int pid, char *buffer, int buflen)
+{
+	volatile PgBackendStatus *beentry;
+	int			i;
+
+	beentry = BackendStatusArray;
+
+	/*
+	 * We probably shouldn't get here before shared memory has been set up,
+	 * but be safe.
+	 */
+	if (beentry == NULL || BackendActivityBuffer == NULL)
+		return NULL;
+
+	for (i = 1; i <= MaxBackends; i++)
+	{
+		if (beentry->st_procpid == pid)
+		{
+			/* Read pointer just once, so it can't change after validation */
+			const char *activity = beentry->st_activity_raw;
+			const char *activity_last;
+
+			/*
+			 * We mustn't access activity string before we verify that it
+			 * falls within the BackendActivityBuffer. To make sure that the
+			 * entire string including its ending is contained within the
+			 * buffer, subtract one activity length from the buffer size.
+			 */
+			activity_last = BackendActivityBuffer + BackendActivityBufferSize
+				- pgstat_track_activity_query_size;
+
+			if (activity < BackendActivityBuffer ||
+				activity > activity_last)
+				return NULL;
+
+			/* If no string available, no point in a report */
+			if (activity[0] == '\0')
+				return NULL;
+
+			/*
+			 * Copy only ASCII-safe characters so we don't run into encoding
+			 * problems when reporting the message; and be sure not to run off
+			 * the end of memory.  As only ASCII characters are reported, it
+			 * doesn't seem necessary to perform multibyte aware clipping.
+			 */
+			ascii_safe_strlcpy(buffer, activity,
+							   Min(buflen, pgstat_track_activity_query_size));
+
+			return buffer;
+		}
+
+		beentry++;
+	}
+
+	/* PID not found */
+	return NULL;
 }
 
 /* ------------------------------------------------------------
@@ -4612,15 +5656,10 @@ pgstat_clear_snapshot(void)
 	/* Reset variables */
 	pgStatLocalContext = NULL;
 	pgStatDBHash = NULL;
+	localBackendStatusTable = NULL;
+	localNumBackends = 0;
 	replSlotStats = NULL;
 	nReplSlotStats = 0;
-
-	/*
-	 * Historically the backend_status.c facilities lived in this file, and
-	 * were reset with the same function. For now keep it that way, and
-	 * forward the reset request.
-	 */
-	pgstat_clear_backend_activity_snapshot();
 }
 
 
@@ -5555,6 +6594,50 @@ pgstat_db_requested(Oid databaseid)
 	return false;
 }
 
+/*
+ * Convert a potentially unsafely truncated activity string (see
+ * PgBackendStatus.st_activity_raw's documentation) into a correctly truncated
+ * one.
+ *
+ * The returned string is allocated in the caller's memory context and may be
+ * freed.
+ */
+char *
+pgstat_clip_activity(const char *raw_activity)
+{
+	char	   *activity;
+	int			rawlen;
+	int			cliplen;
+
+	/*
+	 * Some callers, like pgstat_get_backend_current_activity(), do not
+	 * guarantee that the buffer isn't concurrently modified. We try to take
+	 * care that the buffer is always terminated by a NUL byte regardless, but
+	 * let's still be paranoid about the string's length. In those cases the
+	 * underlying buffer is guaranteed to be pgstat_track_activity_query_size
+	 * large.
+	 */
+	activity = pnstrdup(raw_activity, pgstat_track_activity_query_size - 1);
+
+	/* now double-guaranteed to be NUL terminated */
+	rawlen = strlen(activity);
+
+	/*
+	 * All supported server-encodings make it possible to determine the length
+	 * of a multi-byte character from its first byte (this is not the case for
+	 * client encodings, see GB18030). As st_activity is always stored using
+	 * server encoding, this allows us to perform multi-byte aware truncation,
+	 * even if the string earlier was truncated in the middle of a multi-byte
+	 * character.
+	 */
+	cliplen = pg_mbcliplen(activity, rawlen,
+						   pgstat_track_activity_query_size - 1);
+
+	activity[cliplen] = '\0';
+
+	return activity;
+}
+
 /* ----------
  * pgstat_replslot_index
  *
diff --git a/src/backend/replication/logical/worker.c b/src/backend/replication/logical/worker.c
index 354fbe4b4b..7dffd187fa 100644
--- a/src/backend/replication/logical/worker.c
+++ b/src/backend/replication/logical/worker.c
@@ -1795,6 +1795,7 @@ apply_handle_truncate(StringInfo s)
 	List	   *rels = NIL;
 	List	   *part_rels = NIL;
 	List	   *relids = NIL;
+	List	   *relids_extra = NIL;
 	List	   *relids_logged = NIL;
 	ListCell   *lc;
 
@@ -1824,7 +1825,8 @@ apply_handle_truncate(StringInfo s)
 		remote_rels = lappend(remote_rels, rel);
 		rels = lappend(rels, rel->localrel);
 		relids = lappend_oid(relids, rel->localreloid);
-		if (RelationIsLogicallyLogged(rel->localrel))
+		relids_extra = lappend_int(relids_extra, TRUNCATE_REL_CONTEXT_NORMAL);
+		if (RelationIsLogicallyLogged(rel->localrel) && rel->localrel->rd_rel->relkind != RELKIND_FOREIGN_TABLE)
 			relids_logged = lappend_oid(relids_logged, rel->localreloid);
 
 		/*
@@ -1862,8 +1864,9 @@ apply_handle_truncate(StringInfo s)
 				rels = lappend(rels, childrel);
 				part_rels = lappend(part_rels, childrel);
 				relids = lappend_oid(relids, childrelid);
+				relids_extra = lappend_int(relids_extra,TRUNCATE_REL_CONTEXT_CASCADING);
 				/* Log this relation only if needed for logical decoding */
-				if (RelationIsLogicallyLogged(childrel))
+				if (RelationIsLogicallyLogged(childrel) && childrel->rd_rel->relkind != RELKIND_FOREIGN_TABLE)
 					relids_logged = lappend_oid(relids_logged, childrelid);
 			}
 		}
@@ -1874,8 +1877,12 @@ apply_handle_truncate(StringInfo s)
 	 * to replaying changes without further cascading. This might be later
 	 * changeable with a user specified option.
 	 */
-	ExecuteTruncateGuts(rels, relids, relids_logged, DROP_RESTRICT, restart_seqs);
-
+	ExecuteTruncateGuts(rels,
+						relids,
+						relids_extra,
+						relids_logged,
+						DROP_RESTRICT,
+						restart_seqs);
 	foreach(lc, remote_rels)
 	{
 		LogicalRepRelMapEntry *rel = lfirst(lc);
diff --git a/src/backend/storage/lmgr/proc.c b/src/backend/storage/lmgr/proc.c
index 692f21ef6a..897045ee27 100644
--- a/src/backend/storage/lmgr/proc.c
+++ b/src/backend/storage/lmgr/proc.c
@@ -448,9 +448,6 @@ InitProcess(void)
 	OwnLatch(&MyProc->procLatch);
 	SwitchToSharedLatch();
 
-	/* now that we have a proc, report wait events to shared memory */
-	pgstat_set_wait_event_storage(&MyProc->wait_event_info);
-
 	/*
 	 * We might be reusing a semaphore that belonged to a failed process. So
 	 * be careful and reinitialize its value here.  (This is not strictly
@@ -604,9 +601,6 @@ InitAuxiliaryProcess(void)
 	OwnLatch(&MyProc->procLatch);
 	SwitchToSharedLatch();
 
-	/* now that we have a proc, report wait events to shared memory */
-	pgstat_set_wait_event_storage(&MyProc->wait_event_info);
-
 	/* Check that group locking fields are in a proper initial state. */
 	Assert(MyProc->lockGroupLeader == NULL);
 	Assert(dlist_is_empty(&MyProc->lockGroupMembers));
@@ -890,15 +884,10 @@ ProcKill(int code, Datum arg)
 	/*
 	 * Reset MyLatch to the process local one.  This is so that signal
 	 * handlers et al can continue using the latch after the shared latch
-	 * isn't ours anymore.
-	 *
-	 * Similarly, stop reporting wait events to MyProc->wait_event_info.
-	 *
-	 * After that clear MyProc and disown the shared latch.
+	 * isn't ours anymore. After that clear MyProc and disown the shared
+	 * latch.
 	 */
 	SwitchBackToLocalLatch();
-	pgstat_reset_wait_event_storage();
-
 	proc = MyProc;
 	MyProc = NULL;
 	DisownLatch(&proc->procLatch);
@@ -963,10 +952,13 @@ AuxiliaryProcKill(int code, Datum arg)
 	/* Cancel any pending condition variable sleep, too */
 	ConditionVariableCancelSleep();
 
-	/* look at the equivalent ProcKill() code for comments */
+	/*
+	 * Reset MyLatch to the process local one.  This is so that signal
+	 * handlers et al can continue using the latch after the shared latch
+	 * isn't ours anymore. After that clear MyProc and disown the shared
+	 * latch.
+	 */
 	SwitchBackToLocalLatch();
-	pgstat_reset_wait_event_storage();
-
 	proc = MyProc;
 	MyProc = NULL;
 	DisownLatch(&proc->procLatch);
diff --git a/src/backend/utils/activity/Makefile b/src/backend/utils/activity/Makefile
index 59196f278d..7cbe0cdbe8 100644
--- a/src/backend/utils/activity/Makefile
+++ b/src/backend/utils/activity/Makefile
@@ -14,8 +14,6 @@ top_builddir = ../../../..
 include $(top_builddir)/src/Makefile.global
 
 OBJS = \
-	backend_progress.o \
-	backend_status.o \
 	wait_event.o
 
 include $(top_srcdir)/src/backend/common.mk
diff --git a/src/backend/utils/activity/backend_progress.c b/src/backend/utils/activity/backend_progress.c
deleted file mode 100644
index 293254993c..0000000000
--- a/src/backend/utils/activity/backend_progress.c
+++ /dev/null
@@ -1,112 +0,0 @@
-/* ----------
- * progress.c
- *
- *	Command progress reporting infrastructure.
- *
- *	Copyright (c) 2001-2021, PostgreSQL Global Development Group
- *
- *	src/backend/postmaster/progress.c
- * ----------
- */
-#include "postgres.h"
-
-#include "port/atomics.h" /* for memory barriers */
-#include "utils/backend_progress.h"
-#include "utils/backend_status.h"
-
-
-/*-----------
- * pgstat_progress_start_command() -
- *
- * Set st_progress_command (and st_progress_command_target) in own backend
- * entry.  Also, zero-initialize st_progress_param array.
- *-----------
- */
-void
-pgstat_progress_start_command(ProgressCommandType cmdtype, Oid relid)
-{
-	volatile PgBackendStatus *beentry = MyBEEntry;
-
-	if (!beentry || !pgstat_track_activities)
-		return;
-
-	PGSTAT_BEGIN_WRITE_ACTIVITY(beentry);
-	beentry->st_progress_command = cmdtype;
-	beentry->st_progress_command_target = relid;
-	MemSet(&beentry->st_progress_param, 0, sizeof(beentry->st_progress_param));
-	PGSTAT_END_WRITE_ACTIVITY(beentry);
-}
-
-/*-----------
- * pgstat_progress_update_param() -
- *
- * Update index'th member in st_progress_param[] of own backend entry.
- *-----------
- */
-void
-pgstat_progress_update_param(int index, int64 val)
-{
-	volatile PgBackendStatus *beentry = MyBEEntry;
-
-	Assert(index >= 0 && index < PGSTAT_NUM_PROGRESS_PARAM);
-
-	if (!beentry || !pgstat_track_activities)
-		return;
-
-	PGSTAT_BEGIN_WRITE_ACTIVITY(beentry);
-	beentry->st_progress_param[index] = val;
-	PGSTAT_END_WRITE_ACTIVITY(beentry);
-}
-
-/*-----------
- * pgstat_progress_update_multi_param() -
- *
- * Update multiple members in st_progress_param[] of own backend entry.
- * This is atomic; readers won't see intermediate states.
- *-----------
- */
-void
-pgstat_progress_update_multi_param(int nparam, const int *index,
-								   const int64 *val)
-{
-	volatile PgBackendStatus *beentry = MyBEEntry;
-	int			i;
-
-	if (!beentry || !pgstat_track_activities || nparam == 0)
-		return;
-
-	PGSTAT_BEGIN_WRITE_ACTIVITY(beentry);
-
-	for (i = 0; i < nparam; ++i)
-	{
-		Assert(index[i] >= 0 && index[i] < PGSTAT_NUM_PROGRESS_PARAM);
-
-		beentry->st_progress_param[index[i]] = val[i];
-	}
-
-	PGSTAT_END_WRITE_ACTIVITY(beentry);
-}
-
-/*-----------
- * pgstat_progress_end_command() -
- *
- * Reset st_progress_command (and st_progress_command_target) in own backend
- * entry.  This signals the end of the command.
- *-----------
- */
-void
-pgstat_progress_end_command(void)
-{
-	volatile PgBackendStatus *beentry = MyBEEntry;
-
-	if (!beentry || !pgstat_track_activities)
-		return;
-
-	if (beentry->st_progress_command == PROGRESS_COMMAND_INVALID)
-		return;
-
-	PGSTAT_BEGIN_WRITE_ACTIVITY(beentry);
-	beentry->st_progress_command = PROGRESS_COMMAND_INVALID;
-	beentry->st_progress_command_target = InvalidOid;
-	PGSTAT_END_WRITE_ACTIVITY(beentry);
-}
diff --git a/src/backend/utils/activity/backend_status.c b/src/backend/utils/activity/backend_status.c
deleted file mode 100644
index a25ec0ee3c..0000000000
--- a/src/backend/utils/activity/backend_status.c
+++ /dev/null
@@ -1,1077 +0,0 @@
-/* ----------
- * backend_status.c
- *	  Backend status reporting infrastructure.
- *
- * Copyright (c) 2001-2021, PostgreSQL Global Development Group
- *
- *
- * IDENTIFICATION
- *	  src/backend/postmaster/backend_status.c
- * ----------
- */
-#include "postgres.h"
-
-#include "access/xact.h"
-#include "libpq/libpq.h"
-#include "miscadmin.h"
-#include "pg_trace.h"
-#include "pgstat.h"
-#include "port/atomics.h" /* for memory barriers */
-#include "storage/ipc.h"
-#include "storage/proc.h" /* for MyProc */
-#include "storage/sinvaladt.h"
-#include "utils/ascii.h"
-#include "utils/backend_status.h"
-#include "utils/guc.h" /* for application_name */
-#include "utils/memutils.h"
-
-
-/* ----------
- * Total number of backends including auxiliary
- *
- * We reserve a slot for each possible BackendId, plus one for each
- * possible auxiliary process type.  (This scheme assumes there is not
- * more than one of any auxiliary process type at a time.) MaxBackends
- * includes autovacuum workers and background workers as well.
- * ----------
- */
-#define NumBackendStatSlots (MaxBackends + NUM_AUXPROCTYPES)
-
-
-/* ----------
- * GUC parameters
- * ----------
- */
-bool		pgstat_track_activities = false;
-int			pgstat_track_activity_query_size = 1024;
-
-
-/* exposed so that progress.c can access it */
-PgBackendStatus *MyBEEntry = NULL;
-
-
-static PgBackendStatus *BackendStatusArray = NULL;
-static char *BackendAppnameBuffer = NULL;
-static char *BackendClientHostnameBuffer = NULL;
-static char *BackendActivityBuffer = NULL;
-static Size BackendActivityBufferSize = 0;
-#ifdef USE_SSL
-static PgBackendSSLStatus *BackendSslStatusBuffer = NULL;
-#endif
-#ifdef ENABLE_GSS
-static PgBackendGSSStatus *BackendGssStatusBuffer = NULL;
-#endif
-
-
-/* Status for backends including auxiliary */
-static LocalPgBackendStatus *localBackendStatusTable = NULL;
-
-/* Total number of backends including auxiliary */
-static int	localNumBackends = 0;
-
-static MemoryContext backendStatusSnapContext;
-
-
-static void pgstat_beshutdown_hook(int code, Datum arg);
-static void pgstat_read_current_status(void);
-static void pgstat_setup_backend_status_context(void);
-
-
-/*
- * Report shared-memory space needed by CreateSharedBackendStatus.
- */
-Size
-BackendStatusShmemSize(void)
-{
-	Size		size;
-
-	/* BackendStatusArray: */
-	size = mul_size(sizeof(PgBackendStatus), NumBackendStatSlots);
-	/* BackendAppnameBuffer: */
-	size = add_size(size,
-					mul_size(NAMEDATALEN, NumBackendStatSlots));
-	/* BackendClientHostnameBuffer: */
-	size = add_size(size,
-					mul_size(NAMEDATALEN, NumBackendStatSlots));
-	/* BackendActivityBuffer: */
-	size = add_size(size,
-					mul_size(pgstat_track_activity_query_size, NumBackendStatSlots));
-#ifdef USE_SSL
-	/* BackendSslStatusBuffer: */
-	size = add_size(size,
-					mul_size(sizeof(PgBackendSSLStatus), NumBackendStatSlots));
-#endif
-#ifdef ENABLE_GSS
-	/* BackendGssStatusBuffer: */
-	size = add_size(size,
-					mul_size(sizeof(PgBackendGSSStatus), NumBackendStatSlots));
-#endif
-	return size;
-}
-
-/*
- * Initialize the shared status array and several string buffers
- * during postmaster startup.
- */
-void
-CreateSharedBackendStatus(void)
-{
-	Size		size;
-	bool		found;
-	int			i;
-	char	   *buffer;
-
-	/* Create or attach to the shared array */
-	size = mul_size(sizeof(PgBackendStatus), NumBackendStatSlots);
-	BackendStatusArray = (PgBackendStatus *)
-		ShmemInitStruct("Backend Status Array", size, &found);
-
-	if (!found)
-	{
-		/*
-		 * We're the first - initialize.
-		 */
-		MemSet(BackendStatusArray, 0, size);
-	}
-
-	/* Create or attach to the shared appname buffer */
-	size = mul_size(NAMEDATALEN, NumBackendStatSlots);
-	BackendAppnameBuffer = (char *)
-		ShmemInitStruct("Backend Application Name Buffer", size, &found);
-
-	if (!found)
-	{
-		MemSet(BackendAppnameBuffer, 0, size);
-
-		/* Initialize st_appname pointers. */
-		buffer = BackendAppnameBuffer;
-		for (i = 0; i < NumBackendStatSlots; i++)
-		{
-			BackendStatusArray[i].st_appname = buffer;
-			buffer += NAMEDATALEN;
-		}
-	}
-
-	/* Create or attach to the shared client hostname buffer */
-	size = mul_size(NAMEDATALEN, NumBackendStatSlots);
-	BackendClientHostnameBuffer = (char *)
-		ShmemInitStruct("Backend Client Host Name Buffer", size, &found);
-
-	if (!found)
-	{
-		MemSet(BackendClientHostnameBuffer, 0, size);
-
-		/* Initialize st_clienthostname pointers. */
-		buffer = BackendClientHostnameBuffer;
-		for (i = 0; i < NumBackendStatSlots; i++)
-		{
-			BackendStatusArray[i].st_clienthostname = buffer;
-			buffer += NAMEDATALEN;
-		}
-	}
-
-	/* Create or attach to the shared activity buffer */
-	BackendActivityBufferSize = mul_size(pgstat_track_activity_query_size,
-										 NumBackendStatSlots);
-	BackendActivityBuffer = (char *)
-		ShmemInitStruct("Backend Activity Buffer",
-						BackendActivityBufferSize,
-						&found);
-
-	if (!found)
-	{
-		MemSet(BackendActivityBuffer, 0, BackendActivityBufferSize);
-
-		/* Initialize st_activity pointers. */
-		buffer = BackendActivityBuffer;
-		for (i = 0; i < NumBackendStatSlots; i++)
-		{
-			BackendStatusArray[i].st_activity_raw = buffer;
-			buffer += pgstat_track_activity_query_size;
-		}
-	}
-
-#ifdef USE_SSL
-	/* Create or attach to the shared SSL status buffer */
-	size = mul_size(sizeof(PgBackendSSLStatus), NumBackendStatSlots);
-	BackendSslStatusBuffer = (PgBackendSSLStatus *)
-		ShmemInitStruct("Backend SSL Status Buffer", size, &found);
-
-	if (!found)
-	{
-		PgBackendSSLStatus *ptr;
-
-		MemSet(BackendSslStatusBuffer, 0, size);
-
-		/* Initialize st_sslstatus pointers. */
-		ptr = BackendSslStatusBuffer;
-		for (i = 0; i < NumBackendStatSlots; i++)
-		{
-			BackendStatusArray[i].st_sslstatus = ptr;
-			ptr++;
-		}
-	}
-#endif
-
-#ifdef ENABLE_GSS
-	/* Create or attach to the shared GSSAPI status buffer */
-	size = mul_size(sizeof(PgBackendGSSStatus), NumBackendStatSlots);
-	BackendGssStatusBuffer = (PgBackendGSSStatus *)
-		ShmemInitStruct("Backend GSS Status Buffer", size, &found);
-
-	if (!found)
-	{
-		PgBackendGSSStatus *ptr;
-
-		MemSet(BackendGssStatusBuffer, 0, size);
-
-		/* Initialize st_gssstatus pointers. */
-		ptr = BackendGssStatusBuffer;
-		for (i = 0; i < NumBackendStatSlots; i++)
-		{
-			BackendStatusArray[i].st_gssstatus = ptr;
-			ptr++;
-		}
-	}
-#endif
-}
-
-/*
- * Initialize pgstats backend activity state, and set up our on-proc-exit
- * hook.  Called from InitPostgres and AuxiliaryProcessMain. For auxiliary
- * process, MyBackendId is invalid. Otherwise, MyBackendId must be set, but we
- * must not have started any transaction yet (since the exit hook must run
- * after the last transaction exit).
- *
- * NOTE: MyDatabaseId isn't set yet; so the shutdown hook has to be careful.
- */
-void
-pgstat_beinit(void)
-{
-	/* Initialize MyBEEntry */
-	if (MyBackendId != InvalidBackendId)
-	{
-		Assert(MyBackendId >= 1 && MyBackendId <= MaxBackends);
-		MyBEEntry = &BackendStatusArray[MyBackendId - 1];
-	}
-	else
-	{
-		/* Must be an auxiliary process */
-		Assert(MyAuxProcType != NotAnAuxProcess);
-
-		/*
-		 * Assign the MyBEEntry for an auxiliary process.  Since it doesn't
-		 * have a BackendId, the slot is statically allocated based on the
-		 * auxiliary process type (MyAuxProcType).  Backends use slots indexed
-		 * in the range from 1 to MaxBackends (inclusive), so we use
-		 * MaxBackends + AuxBackendType + 1 as the index of the slot for an
-		 * auxiliary process.
-		 */
-		MyBEEntry = &BackendStatusArray[MaxBackends + MyAuxProcType];
-	}
-
-	/* Set up a process-exit hook to clean up */
-	on_shmem_exit(pgstat_beshutdown_hook, 0);
-}
-
-
-/* ----------
- * pgstat_bestart() -
- *
- *	Initialize this backend's entry in the PgBackendStatus array.
- *	Called from InitPostgres.
- *
- *	Apart from auxiliary processes, MyBackendId, MyDatabaseId,
- *	session userid, and application_name must be set for a
- *	backend (hence, this cannot be combined with pgbestat_beinit).
- *	Note also that we must be inside a transaction if this isn't an aux
- *	process, as we may need to do encoding conversion on some strings.
- * ----------
- */
-void
-pgstat_bestart(void)
-{
-	volatile PgBackendStatus *vbeentry = MyBEEntry;
-	PgBackendStatus lbeentry;
-#ifdef USE_SSL
-	PgBackendSSLStatus lsslstatus;
-#endif
-#ifdef ENABLE_GSS
-	PgBackendGSSStatus lgssstatus;
-#endif
-
-	/* pgstats state must be initialized from pgstat_beinit() */
-	Assert(vbeentry != NULL);
-
-	/*
-	 * To minimize the time spent modifying the PgBackendStatus entry, and
-	 * avoid risk of errors inside the critical section, we first copy the
-	 * shared-memory struct to a local variable, then modify the data in the
-	 * local variable, then copy the local variable back to shared memory.
-	 * Only the last step has to be inside the critical section.
-	 *
-	 * Most of the data we copy from shared memory is just going to be
-	 * overwritten, but the struct's not so large that it's worth the
-	 * maintenance hassle to copy only the needful fields.
-	 */
-	memcpy(&lbeentry,
-		   unvolatize(PgBackendStatus *, vbeentry),
-		   sizeof(PgBackendStatus));
-
-	/* These structs can just start from zeroes each time, though */
-#ifdef USE_SSL
-	memset(&lsslstatus, 0, sizeof(lsslstatus));
-#endif
-#ifdef ENABLE_GSS
-	memset(&lgssstatus, 0, sizeof(lgssstatus));
-#endif
-
-	/*
-	 * Now fill in all the fields of lbeentry, except for strings that are
-	 * out-of-line data.  Those have to be handled separately, below.
-	 */
-	lbeentry.st_procpid = MyProcPid;
-	lbeentry.st_backendType = MyBackendType;
-	lbeentry.st_proc_start_timestamp = MyStartTimestamp;
-	lbeentry.st_activity_start_timestamp = 0;
-	lbeentry.st_state_start_timestamp = 0;
-	lbeentry.st_xact_start_timestamp = 0;
-	lbeentry.st_databaseid = MyDatabaseId;
-
-	/* We have userid for client-backends, wal-sender and bgworker processes */
-	if (lbeentry.st_backendType == B_BACKEND
-		|| lbeentry.st_backendType == B_WAL_SENDER
-		|| lbeentry.st_backendType == B_BG_WORKER)
-		lbeentry.st_userid = GetSessionUserId();
-	else
-		lbeentry.st_userid = InvalidOid;
-
-	/*
-	 * We may not have a MyProcPort (eg, if this is the autovacuum process).
-	 * If so, use all-zeroes client address, which is dealt with specially in
-	 * pg_stat_get_backend_client_addr and pg_stat_get_backend_client_port.
-	 */
-	if (MyProcPort)
-		memcpy(&lbeentry.st_clientaddr, &MyProcPort->raddr,
-			   sizeof(lbeentry.st_clientaddr));
-	else
-		MemSet(&lbeentry.st_clientaddr, 0, sizeof(lbeentry.st_clientaddr));
-
-#ifdef USE_SSL
-	if (MyProcPort && MyProcPort->ssl_in_use)
-	{
-		lbeentry.st_ssl = true;
-		lsslstatus.ssl_bits = be_tls_get_cipher_bits(MyProcPort);
-		strlcpy(lsslstatus.ssl_version, be_tls_get_version(MyProcPort), NAMEDATALEN);
-		strlcpy(lsslstatus.ssl_cipher, be_tls_get_cipher(MyProcPort), NAMEDATALEN);
-		be_tls_get_peer_subject_name(MyProcPort, lsslstatus.ssl_client_dn, NAMEDATALEN);
-		be_tls_get_peer_serial(MyProcPort, lsslstatus.ssl_client_serial, NAMEDATALEN);
-		be_tls_get_peer_issuer_name(MyProcPort, lsslstatus.ssl_issuer_dn, NAMEDATALEN);
-	}
-	else
-	{
-		lbeentry.st_ssl = false;
-	}
-#else
-	lbeentry.st_ssl = false;
-#endif
-
-#ifdef ENABLE_GSS
-	if (MyProcPort && MyProcPort->gss != NULL)
-	{
-		const char *princ = be_gssapi_get_princ(MyProcPort);
-
-		lbeentry.st_gss = true;
-		lgssstatus.gss_auth = be_gssapi_get_auth(MyProcPort);
-		lgssstatus.gss_enc = be_gssapi_get_enc(MyProcPort);
-		if (princ)
-			strlcpy(lgssstatus.gss_princ, princ, NAMEDATALEN);
-	}
-	else
-	{
-		lbeentry.st_gss = false;
-	}
-#else
-	lbeentry.st_gss = false;
-#endif
-
-	lbeentry.st_state = STATE_UNDEFINED;
-	lbeentry.st_progress_command = PROGRESS_COMMAND_INVALID;
-	lbeentry.st_progress_command_target = InvalidOid;
-
-	/*
-	 * we don't zero st_progress_param here to save cycles; nobody should
-	 * examine it until st_progress_command has been set to something other
-	 * than PROGRESS_COMMAND_INVALID
-	 */
-
-	/*
-	 * We're ready to enter the critical section that fills the shared-memory
-	 * status entry.  We follow the protocol of bumping st_changecount before
-	 * and after; and make sure it's even afterwards.  We use a volatile
-	 * pointer here to ensure the compiler doesn't try to get cute.
-	 */
-	PGSTAT_BEGIN_WRITE_ACTIVITY(vbeentry);
-
-	/* make sure we'll memcpy the same st_changecount back */
-	lbeentry.st_changecount = vbeentry->st_changecount;
-
-	memcpy(unvolatize(PgBackendStatus *, vbeentry),
-		   &lbeentry,
-		   sizeof(PgBackendStatus));
-
-	/*
-	 * We can write the out-of-line strings and structs using the pointers
-	 * that are in lbeentry; this saves some de-volatilizing messiness.
-	 */
-	lbeentry.st_appname[0] = '\0';
-	if (MyProcPort && MyProcPort->remote_hostname)
-		strlcpy(lbeentry.st_clienthostname, MyProcPort->remote_hostname,
-				NAMEDATALEN);
-	else
-		lbeentry.st_clienthostname[0] = '\0';
-	lbeentry.st_activity_raw[0] = '\0';
-	/* Also make sure the last byte in each string area is always 0 */
-	lbeentry.st_appname[NAMEDATALEN - 1] = '\0';
-	lbeentry.st_clienthostname[NAMEDATALEN - 1] = '\0';
-	lbeentry.st_activity_raw[pgstat_track_activity_query_size - 1] = '\0';
-
-#ifdef USE_SSL
-	memcpy(lbeentry.st_sslstatus, &lsslstatus, sizeof(PgBackendSSLStatus));
-#endif
-#ifdef ENABLE_GSS
-	memcpy(lbeentry.st_gssstatus, &lgssstatus, sizeof(PgBackendGSSStatus));
-#endif
-
-	PGSTAT_END_WRITE_ACTIVITY(vbeentry);
-
-	/* Update app name to current GUC setting */
-	if (application_name)
-		pgstat_report_appname(application_name);
-}
-
-/*
- * Clear out our entry in the PgBackendStatus array.
- */
-static void
-pgstat_beshutdown_hook(int code, Datum arg)
-{
-	volatile PgBackendStatus *beentry = MyBEEntry;
-
-	/*
-	 * Clear my status entry, following the protocol of bumping st_changecount
-	 * before and after.  We use a volatile pointer here to ensure the
-	 * compiler doesn't try to get cute.
-	 */
-	PGSTAT_BEGIN_WRITE_ACTIVITY(beentry);
-
-	beentry->st_procpid = 0;	/* mark invalid */
-
-	PGSTAT_END_WRITE_ACTIVITY(beentry);
-}
-
-/*
- * Discard any data collected in the current transaction.  Any subsequent
- * request will cause new snapshots to be read.
- *
- * This is also invoked during transaction commit or abort to discard the
- * no-longer-wanted snapshot.
- */
-void
-pgstat_clear_backend_activity_snapshot(void)
-{
-	/* Release memory, if any was allocated */
-	if (backendStatusSnapContext)
-	{
-		MemoryContextDelete(backendStatusSnapContext);
-		backendStatusSnapContext = NULL;
-	}
-
-	/* Reset variables */
-	localBackendStatusTable = NULL;
-	localNumBackends = 0;
-}
-
-static void
-pgstat_setup_backend_status_context(void)
-{
-	if (!backendStatusSnapContext)
-		backendStatusSnapContext = AllocSetContextCreate(TopMemoryContext,
-													 "Backend Status Snapshot",
-													 ALLOCSET_SMALL_SIZES);
-}
-
-
-/* ----------
- * pgstat_report_activity() -
- *
- *	Called from tcop/postgres.c to report what the backend is actually doing
- *	(but note cmd_str can be NULL for certain cases).
- *
- * All updates of the status entry follow the protocol of bumping
- * st_changecount before and after.  We use a volatile pointer here to
- * ensure the compiler doesn't try to get cute.
- * ----------
- */
-void
-pgstat_report_activity(BackendState state, const char *cmd_str)
-{
-	volatile PgBackendStatus *beentry = MyBEEntry;
-	TimestampTz start_timestamp;
-	TimestampTz current_timestamp;
-	int			len = 0;
-
-	TRACE_POSTGRESQL_STATEMENT_STATUS(cmd_str);
-
-	if (!beentry)
-		return;
-
-	if (!pgstat_track_activities)
-	{
-		if (beentry->st_state != STATE_DISABLED)
-		{
-			volatile PGPROC *proc = MyProc;
-
-			/*
-			 * track_activities is disabled, but we last reported a
-			 * non-disabled state.  As our final update, change the state and
-			 * clear fields we will not be updating anymore.
-			 */
-			PGSTAT_BEGIN_WRITE_ACTIVITY(beentry);
-			beentry->st_state = STATE_DISABLED;
-			beentry->st_state_start_timestamp = 0;
-			beentry->st_activity_raw[0] = '\0';
-			beentry->st_activity_start_timestamp = 0;
-			/* st_xact_start_timestamp and wait_event_info are also disabled */
-			beentry->st_xact_start_timestamp = 0;
-			proc->wait_event_info = 0;
-			PGSTAT_END_WRITE_ACTIVITY(beentry);
-		}
-		return;
-	}
-
-	/*
-	 * To minimize the time spent modifying the entry, and avoid risk of
-	 * errors inside the critical section, fetch all the needed data first.
-	 */
-	start_timestamp = GetCurrentStatementStartTimestamp();
-	if (cmd_str != NULL)
-	{
-		/*
-		 * Compute length of to-be-stored string unaware of multi-byte
-		 * characters. For speed reasons that'll get corrected on read, rather
-		 * than computed every write.
-		 */
-		len = Min(strlen(cmd_str), pgstat_track_activity_query_size - 1);
-	}
-	current_timestamp = GetCurrentTimestamp();
-
-	/*
-	 * If the state has changed from "active" or "idle in transaction",
-	 * calculate the duration.
-	 */
-	if ((beentry->st_state == STATE_RUNNING ||
-		 beentry->st_state == STATE_FASTPATH ||
-		 beentry->st_state == STATE_IDLEINTRANSACTION ||
-		 beentry->st_state == STATE_IDLEINTRANSACTION_ABORTED) &&
-		state != beentry->st_state)
-	{
-		long		secs;
-		int			usecs;
-
-		TimestampDifference(beentry->st_state_start_timestamp,
-							current_timestamp,
-							&secs, &usecs);
-
-		if (beentry->st_state == STATE_RUNNING ||
-			beentry->st_state == STATE_FASTPATH)
-			pgstat_count_conn_active_time(secs * 1000000 + usecs);
-		else
-			pgstat_count_conn_txn_idle_time(secs * 1000000 + usecs);
-	}
-
-	/*
-	 * Now update the status entry
-	 */
-	PGSTAT_BEGIN_WRITE_ACTIVITY(beentry);
-
-	beentry->st_state = state;
-	beentry->st_state_start_timestamp = current_timestamp;
-
-	if (cmd_str != NULL)
-	{
-		memcpy((char *) beentry->st_activity_raw, cmd_str, len);
-		beentry->st_activity_raw[len] = '\0';
-		beentry->st_activity_start_timestamp = start_timestamp;
-	}
-
-	PGSTAT_END_WRITE_ACTIVITY(beentry);
-}
-
-/* ----------
- * pgstat_report_appname() -
- *
- *	Called to update our application name.
- * ----------
- */
-void
-pgstat_report_appname(const char *appname)
-{
-	volatile PgBackendStatus *beentry = MyBEEntry;
-	int			len;
-
-	if (!beentry)
-		return;
-
-	/* This should be unnecessary if GUC did its job, but be safe */
-	len = pg_mbcliplen(appname, strlen(appname), NAMEDATALEN - 1);
-
-	/*
-	 * Update my status entry, following the protocol of bumping
-	 * st_changecount before and after.  We use a volatile pointer here to
-	 * ensure the compiler doesn't try to get cute.
-	 */
-	PGSTAT_BEGIN_WRITE_ACTIVITY(beentry);
-
-	memcpy((char *) beentry->st_appname, appname, len);
-	beentry->st_appname[len] = '\0';
-
-	PGSTAT_END_WRITE_ACTIVITY(beentry);
-}
-
-/*
- * Report current transaction start timestamp as the specified value.
- * Zero means there is no active transaction.
- */
-void
-pgstat_report_xact_timestamp(TimestampTz tstamp)
-{
-	volatile PgBackendStatus *beentry = MyBEEntry;
-
-	if (!pgstat_track_activities || !beentry)
-		return;
-
-	/*
-	 * Update my status entry, following the protocol of bumping
-	 * st_changecount before and after.  We use a volatile pointer here to
-	 * ensure the compiler doesn't try to get cute.
-	 */
-	PGSTAT_BEGIN_WRITE_ACTIVITY(beentry);
-
-	beentry->st_xact_start_timestamp = tstamp;
-
-	PGSTAT_END_WRITE_ACTIVITY(beentry);
-}
-
-/* ----------
- * pgstat_read_current_status() -
- *
- *	Copy the current contents of the PgBackendStatus array to local memory,
- *	if not already done in this transaction.
- * ----------
- */
-static void
-pgstat_read_current_status(void)
-{
-	volatile PgBackendStatus *beentry;
-	LocalPgBackendStatus *localtable;
-	LocalPgBackendStatus *localentry;
-	char	   *localappname,
-			   *localclienthostname,
-			   *localactivity;
-#ifdef USE_SSL
-	PgBackendSSLStatus *localsslstatus;
-#endif
-#ifdef ENABLE_GSS
-	PgBackendGSSStatus *localgssstatus;
-#endif
-	int			i;
-
-	if (localBackendStatusTable)
-		return;					/* already done */
-
-	pgstat_setup_backend_status_context();
-
-	/*
-	 * Allocate storage for local copy of state data.  We can presume that
-	 * none of these requests overflow size_t, because we already calculated
-	 * the same values using mul_size during shmem setup.  However, with
-	 * probably-silly values of pgstat_track_activity_query_size and
-	 * max_connections, the localactivity buffer could exceed 1GB, so use
-	 * "huge" allocation for that one.
-	 */
-	localtable = (LocalPgBackendStatus *)
-		MemoryContextAlloc(backendStatusSnapContext,
-						   sizeof(LocalPgBackendStatus) * NumBackendStatSlots);
-	localappname = (char *)
-		MemoryContextAlloc(backendStatusSnapContext,
-						   NAMEDATALEN * NumBackendStatSlots);
-	localclienthostname = (char *)
-		MemoryContextAlloc(backendStatusSnapContext,
-						   NAMEDATALEN * NumBackendStatSlots);
-	localactivity = (char *)
-		MemoryContextAllocHuge(backendStatusSnapContext,
-							   pgstat_track_activity_query_size * NumBackendStatSlots);
-#ifdef USE_SSL
-	localsslstatus = (PgBackendSSLStatus *)
-		MemoryContextAlloc(backendStatusSnapContext,
-						   sizeof(PgBackendSSLStatus) * NumBackendStatSlots);
-#endif
-#ifdef ENABLE_GSS
-	localgssstatus = (PgBackendGSSStatus *)
-		MemoryContextAlloc(backendStatusSnapContext,
-						   sizeof(PgBackendGSSStatus) * NumBackendStatSlots);
-#endif
-
-	localNumBackends = 0;
-
-	beentry = BackendStatusArray;
-	localentry = localtable;
-	for (i = 1; i <= NumBackendStatSlots; i++)
-	{
-		/*
-		 * Follow the protocol of retrying if st_changecount changes while we
-		 * copy the entry, or if it's odd.  (The check for odd is needed to
-		 * cover the case where we are able to completely copy the entry while
-		 * the source backend is between increment steps.)	We use a volatile
-		 * pointer here to ensure the compiler doesn't try to get cute.
-		 */
-		for (;;)
-		{
-			int			before_changecount;
-			int			after_changecount;
-
-			pgstat_begin_read_activity(beentry, before_changecount);
-
-			localentry->backendStatus.st_procpid = beentry->st_procpid;
-			/* Skip all the data-copying work if entry is not in use */
-			if (localentry->backendStatus.st_procpid > 0)
-			{
-				memcpy(&localentry->backendStatus, unvolatize(PgBackendStatus *, beentry), sizeof(PgBackendStatus));
-
-				/*
-				 * For each PgBackendStatus field that is a pointer, copy the
-				 * pointed-to data, then adjust the local copy of the pointer
-				 * field to point at the local copy of the data.
-				 *
-				 * strcpy is safe even if the string is modified concurrently,
-				 * because there's always a \0 at the end of the buffer.
-				 */
-				strcpy(localappname, (char *) beentry->st_appname);
-				localentry->backendStatus.st_appname = localappname;
-				strcpy(localclienthostname, (char *) beentry->st_clienthostname);
-				localentry->backendStatus.st_clienthostname = localclienthostname;
-				strcpy(localactivity, (char *) beentry->st_activity_raw);
-				localentry->backendStatus.st_activity_raw = localactivity;
-#ifdef USE_SSL
-				if (beentry->st_ssl)
-				{
-					memcpy(localsslstatus, beentry->st_sslstatus, sizeof(PgBackendSSLStatus));
-					localentry->backendStatus.st_sslstatus = localsslstatus;
-				}
-#endif
-#ifdef ENABLE_GSS
-				if (beentry->st_gss)
-				{
-					memcpy(localgssstatus, beentry->st_gssstatus, sizeof(PgBackendGSSStatus));
-					localentry->backendStatus.st_gssstatus = localgssstatus;
-				}
-#endif
-			}
-
-			pgstat_end_read_activity(beentry, after_changecount);
-
-			if (pgstat_read_activity_complete(before_changecount,
-											  after_changecount))
-				break;
-
-			/* Make sure we can break out of loop if stuck... */
-			CHECK_FOR_INTERRUPTS();
-		}
-
-		beentry++;
-		/* Only valid entries get included into the local array */
-		if (localentry->backendStatus.st_procpid > 0)
-		{
-			BackendIdGetTransactionIds(i,
-									   &localentry->backend_xid,
-									   &localentry->backend_xmin);
-
-			localentry++;
-			localappname += NAMEDATALEN;
-			localclienthostname += NAMEDATALEN;
-			localactivity += pgstat_track_activity_query_size;
-#ifdef USE_SSL
-			localsslstatus++;
-#endif
-#ifdef ENABLE_GSS
-			localgssstatus++;
-#endif
-			localNumBackends++;
-		}
-	}
-
-	/* Set the pointer only after completion of a valid table */
-	localBackendStatusTable = localtable;
-}
-
-
-/* ----------
- * pgstat_get_backend_current_activity() -
- *
- *	Return a string representing the current activity of the backend with
- *	the specified PID.  This looks directly at the BackendStatusArray,
- *	and so will provide current information regardless of the age of our
- *	transaction's snapshot of the status array.
- *
- *	It is the caller's responsibility to invoke this only for backends whose
- *	state is expected to remain stable while the result is in use.  The
- *	only current use is in deadlock reporting, where we can expect that
- *	the target backend is blocked on a lock.  (There are corner cases
- *	where the target's wait could get aborted while we are looking at it,
- *	but the very worst consequence is to return a pointer to a string
- *	that's been changed, so we won't worry too much.)
- *
- *	Note: return strings for special cases match pg_stat_get_backend_activity.
- * ----------
- */
-const char *
-pgstat_get_backend_current_activity(int pid, bool checkUser)
-{
-	PgBackendStatus *beentry;
-	int			i;
-
-	beentry = BackendStatusArray;
-	for (i = 1; i <= MaxBackends; i++)
-	{
-		/*
-		 * Although we expect the target backend's entry to be stable, that
-		 * doesn't imply that anyone else's is.  To avoid identifying the
-		 * wrong backend, while we check for a match to the desired PID we
-		 * must follow the protocol of retrying if st_changecount changes
-		 * while we examine the entry, or if it's odd.  (This might be
-		 * unnecessary, since fetching or storing an int is almost certainly
-		 * atomic, but let's play it safe.)  We use a volatile pointer here to
-		 * ensure the compiler doesn't try to get cute.
-		 */
-		volatile PgBackendStatus *vbeentry = beentry;
-		bool		found;
-
-		for (;;)
-		{
-			int			before_changecount;
-			int			after_changecount;
-
-			pgstat_begin_read_activity(vbeentry, before_changecount);
-
-			found = (vbeentry->st_procpid == pid);
-
-			pgstat_end_read_activity(vbeentry, after_changecount);
-
-			if (pgstat_read_activity_complete(before_changecount,
-											  after_changecount))
-				break;
-
-			/* Make sure we can break out of loop if stuck... */
-			CHECK_FOR_INTERRUPTS();
-		}
-
-		if (found)
-		{
-			/* Now it is safe to use the non-volatile pointer */
-			if (checkUser && !superuser() && beentry->st_userid != GetUserId())
-				return "<insufficient privilege>";
-			else if (*(beentry->st_activity_raw) == '\0')
-				return "<command string not enabled>";
-			else
-			{
-				/* this'll leak a bit of memory, but that seems acceptable */
-				return pgstat_clip_activity(beentry->st_activity_raw);
-			}
-		}
-
-		beentry++;
-	}
-
-	/* If we get here, caller is in error ... */
-	return "<backend information not available>";
-}
-
-/* ----------
- * pgstat_get_crashed_backend_activity() -
- *
- *	Return a string representing the current activity of the backend with
- *	the specified PID.  Like the function above, but reads shared memory with
- *	the expectation that it may be corrupt.  On success, copy the string
- *	into the "buffer" argument and return that pointer.  On failure,
- *	return NULL.
- *
- *	This function is only intended to be used by the postmaster to report the
- *	query that crashed a backend.  In particular, no attempt is made to
- *	follow the correct concurrency protocol when accessing the
- *	BackendStatusArray.  But that's OK, in the worst case we'll return a
- *	corrupted message.  We also must take care not to trip on ereport(ERROR).
- * ----------
- */
-const char *
-pgstat_get_crashed_backend_activity(int pid, char *buffer, int buflen)
-{
-	volatile PgBackendStatus *beentry;
-	int			i;
-
-	beentry = BackendStatusArray;
-
-	/*
-	 * We probably shouldn't get here before shared memory has been set up,
-	 * but be safe.
-	 */
-	if (beentry == NULL || BackendActivityBuffer == NULL)
-		return NULL;
-
-	for (i = 1; i <= MaxBackends; i++)
-	{
-		if (beentry->st_procpid == pid)
-		{
-			/* Read pointer just once, so it can't change after validation */
-			const char *activity = beentry->st_activity_raw;
-			const char *activity_last;
-
-			/*
-			 * We mustn't access activity string before we verify that it
-			 * falls within the BackendActivityBuffer. To make sure that the
-			 * entire string including its ending is contained within the
-			 * buffer, subtract one activity length from the buffer size.
-			 */
-			activity_last = BackendActivityBuffer + BackendActivityBufferSize
-				- pgstat_track_activity_query_size;
-
-			if (activity < BackendActivityBuffer ||
-				activity > activity_last)
-				return NULL;
-
-			/* If no string available, no point in a report */
-			if (activity[0] == '\0')
-				return NULL;
-
-			/*
-			 * Copy only ASCII-safe characters so we don't run into encoding
-			 * problems when reporting the message; and be sure not to run off
-			 * the end of memory.  As only ASCII characters are reported, it
-			 * doesn't seem necessary to perform multibyte aware clipping.
-			 */
-			ascii_safe_strlcpy(buffer, activity,
-							   Min(buflen, pgstat_track_activity_query_size));
-
-			return buffer;
-		}
-
-		beentry++;
-	}
-
-	/* PID not found */
-	return NULL;
-}
-
-
-/* ----------
- * pgstat_fetch_stat_beentry() -
- *
- *	Support function for the SQL-callable pgstat* functions. Returns
- *	our local copy of the current-activity entry for one backend.
- *
- *	NB: caller is responsible for a check if the user is permitted to see
- *	this info (especially the querystring).
- * ----------
- */
-PgBackendStatus *
-pgstat_fetch_stat_beentry(int beid)
-{
-	pgstat_read_current_status();
-
-	if (beid < 1 || beid > localNumBackends)
-		return NULL;
-
-	return &localBackendStatusTable[beid - 1].backendStatus;
-}
-
-
-/* ----------
- * pgstat_fetch_stat_local_beentry() -
- *
- *	Like pgstat_fetch_stat_beentry() but with locally computed additions (like
- *	xid and xmin values of the backend)
- *
- *	NB: caller is responsible for a check if the user is permitted to see
- *	this info (especially the querystring).
- * ----------
- */
-LocalPgBackendStatus *
-pgstat_fetch_stat_local_beentry(int beid)
-{
-	pgstat_read_current_status();
-
-	if (beid < 1 || beid > localNumBackends)
-		return NULL;
-
-	return &localBackendStatusTable[beid - 1];
-}
-
-
-/* ----------
- * pgstat_fetch_stat_numbackends() -
- *
- *	Support function for the SQL-callable pgstat* functions. Returns
- *	the maximum current backend id.
- * ----------
- */
-int
-pgstat_fetch_stat_numbackends(void)
-{
-	pgstat_read_current_status();
-
-	return localNumBackends;
-}
-
-/*
- * Convert a potentially unsafely truncated activity string (see
- * PgBackendStatus.st_activity_raw's documentation) into a correctly truncated
- * one.
- *
- * The returned string is allocated in the caller's memory context and may be
- * freed.
- */
-char *
-pgstat_clip_activity(const char *raw_activity)
-{
-	char	   *activity;
-	int			rawlen;
-	int			cliplen;
-
-	/*
-	 * Some callers, like pgstat_get_backend_current_activity(), do not
-	 * guarantee that the buffer isn't concurrently modified. We try to take
-	 * care that the buffer is always terminated by a NUL byte regardless, but
-	 * let's still be paranoid about the string's length. In those cases the
-	 * underlying buffer is guaranteed to be pgstat_track_activity_query_size
-	 * large.
-	 */
-	activity = pnstrdup(raw_activity, pgstat_track_activity_query_size - 1);
-
-	/* now double-guaranteed to be NUL terminated */
-	rawlen = strlen(activity);
-
-	/*
-	 * All supported server-encodings make it possible to determine the length
-	 * of a multi-byte character from its first byte (this is not the case for
-	 * client encodings, see GB18030). As st_activity is always stored using
-	 * server encoding, this allows us to perform multi-byte aware truncation,
-	 * even if the string earlier was truncated in the middle of a multi-byte
-	 * character.
-	 */
-	cliplen = pg_mbcliplen(activity, rawlen,
-						   pgstat_track_activity_query_size - 1);
-
-	activity[cliplen] = '\0';
-
-	return activity;
-}
diff --git a/src/backend/utils/activity/wait_event.c b/src/backend/utils/activity/wait_event.c
index accc1eb577..840ebef92a 100644
--- a/src/backend/utils/activity/wait_event.c
+++ b/src/backend/utils/activity/wait_event.c
@@ -7,17 +7,6 @@
  *
  * IDENTIFICATION
  *	  src/backend/postmaster/wait_event.c
- *
- * NOTES
- *
- * To make pgstat_report_wait_start() and pgstat_report_wait_end() as
- * lightweight as possible, they do not check if shared memory (MyProc
- * specifically, where the wait event is stored) is already available. Instead
- * we initially set my_wait_event_info to a process local variable, which then
- * is redirected to shared memory using pgstat_set_wait_event_storage(). For
- * the same reason pgstat_track_activities is not checked - the check adds
- * more work than it saves.
- *
  * ----------
  */
 #include "postgres.h"
@@ -34,36 +23,6 @@ static const char *pgstat_get_wait_timeout(WaitEventTimeout w);
 static const char *pgstat_get_wait_io(WaitEventIO w);
 
 
-static uint32 local_my_wait_event_info;
-uint32	   *my_wait_event_info = &local_my_wait_event_info;
-
-
-/*
- * Configure wait event reporting to report wait events to *wait_event_info.
- * *wait_event_info needs to be valid until pgstat_reset_wait_event_storage()
- * is called.
- *
- * Expected to be called during backend startup, to point my_wait_event_info
- * into shared memory.
- */
-void
-pgstat_set_wait_event_storage(uint32 *wait_event_info)
-{
-	my_wait_event_info = wait_event_info;
-}
-
-/*
- * Reset wait event storage location.
- *
- * Expected to be called during backend shutdown, before the location set up
- * pgstat_set_wait_event_storage() becomes invalid.
- */
-void
-pgstat_reset_wait_event_storage(void)
-{
-	my_wait_event_info = &local_my_wait_event_info;
-}
-
 /* ----------
  * pgstat_get_wait_event_type() -
  *
diff --git a/src/backend/utils/init/postinit.c b/src/backend/utils/init/postinit.c
index 51d1bbef30..a3ec358538 100644
--- a/src/backend/utils/init/postinit.c
+++ b/src/backend/utils/init/postinit.c
@@ -681,10 +681,6 @@ InitPostgres(const char *in_dbname, Oid dboid, const char *username,
 	if (!bootstrap)
 		pgstat_initialize();
 
-	/* Initialize status reporting */
-	if (!bootstrap)
-		pgstat_beinit();
-
 	/*
 	 * Load relcache entries for the shared system catalogs.  This must create
 	 * at least entries for pg_database and catalogs used for authentication.
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index c9c9da85f3..60a9c7a2a0 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -90,7 +90,6 @@
 #include "tcop/tcopprot.h"
 #include "tsearch/ts_cache.h"
 #include "utils/acl.h"
-#include "utils/backend_status.h"
 #include "utils/builtins.h"
 #include "utils/bytea.h"
 #include "utils/float.h"
diff --git a/src/bin/psql/command.c b/src/bin/psql/command.c
index e04ccc5b62..e42be914d2 100644
--- a/src/bin/psql/command.c
+++ b/src/bin/psql/command.c
@@ -148,11 +148,11 @@ static void save_query_text_state(PsqlScanState scan_state, ConditionalStack cst
 								  PQExpBuffer query_buf);
 static void discard_query_text(PsqlScanState scan_state, ConditionalStack cstack,
 							   PQExpBuffer query_buf);
-static bool copy_previous_query(PQExpBuffer query_buf, PQExpBuffer previous_buf);
+static void copy_previous_query(PQExpBuffer query_buf, PQExpBuffer previous_buf);
 static bool do_connect(enum trivalue reuse_previous_specification,
 					   char *dbname, char *user, char *host, char *port);
 static bool do_edit(const char *filename_arg, PQExpBuffer query_buf,
-					int lineno, bool discard_on_quit, bool *edited);
+					int lineno, bool *edited);
 static bool do_shell(const char *command);
 static bool do_watch(PQExpBuffer query_buf, double sleep);
 static bool lookup_object_oid(EditableObjectType obj_type, const char *desc,
@@ -418,7 +418,7 @@ exec_command(const char *cmd,
 	 * the individual command subroutines.
 	 */
 	if (status == PSQL_CMD_SEND)
-		(void) copy_previous_query(query_buf, previous_buf);
+		copy_previous_query(query_buf, previous_buf);
 
 	return status;
 }
@@ -1004,27 +1004,14 @@ exec_command_edit(PsqlScanState scan_state, bool active_branch,
 			}
 			if (status != PSQL_CMD_ERROR)
 			{
-				bool		discard_on_quit;
-
 				expand_tilde(&fname);
 				if (fname)
-				{
 					canonicalize_path(fname);
-					/* Always clear buffer if the file isn't modified */
-					discard_on_quit = true;
-				}
-				else
-				{
-					/*
-					 * If query_buf is empty, recall previous query for
-					 * editing.  But in that case, the query buffer should be
-					 * emptied if editing doesn't modify the file.
-					 */
-					discard_on_quit = copy_previous_query(query_buf,
-														  previous_buf);
-				}
 
-				if (do_edit(fname, query_buf, lineno, discard_on_quit, NULL))
+				/* If query_buf is empty, recall previous query for editing */
+				copy_previous_query(query_buf, previous_buf);
+
+				if (do_edit(fname, query_buf, lineno, NULL))
 					status = PSQL_CMD_NEWEDIT;
 				else
 					status = PSQL_CMD_ERROR;
@@ -1147,7 +1134,7 @@ exec_command_ef_ev(PsqlScanState scan_state, bool active_branch,
 		{
 			bool		edited = false;
 
-			if (!do_edit(NULL, query_buf, lineno, true, &edited))
+			if (!do_edit(NULL, query_buf, lineno, &edited))
 				status = PSQL_CMD_ERROR;
 			else if (!edited)
 				puts(_("No changes"));
@@ -2650,7 +2637,7 @@ exec_command_watch(PsqlScanState scan_state, bool active_branch,
 		}
 
 		/* If query_buf is empty, recall and execute previous query */
-		(void) copy_previous_query(query_buf, previous_buf);
+		copy_previous_query(query_buf, previous_buf);
 
 		success = do_watch(query_buf, sleep);
 
@@ -2974,19 +2961,12 @@ discard_query_text(PsqlScanState scan_state, ConditionalStack cstack,
  * This is used by various slash commands for which re-execution of a
  * previous query is a common usage.  For convenience, we allow the
  * case of query_buf == NULL (and do nothing).
- *
- * Returns "true" if the previous query was copied into the query
- * buffer, else "false".
  */
-static bool
+static void
 copy_previous_query(PQExpBuffer query_buf, PQExpBuffer previous_buf)
 {
 	if (query_buf && query_buf->len == 0)
-	{
 		appendPQExpBufferStr(query_buf, previous_buf->data);
-		return true;
-	}
-	return false;
 }
 
 /*
@@ -3667,11 +3647,10 @@ UnsyncVariables(void)
 
 
 /*
- * helper for do_edit(): actually invoke the editor
+ * do_edit -- handler for \e
  *
- * Returns true on success, false if we failed to invoke the editor or
- * it returned nonzero status.  (An error message is printed for failed-
- * to-invoke cases, but not if the editor returns nonzero status.)
+ * If you do not specify a filename, the current query buffer will be copied
+ * into a temporary one.
  */
 static bool
 editFile(const char *fname, int lineno)
@@ -3740,23 +3719,10 @@ editFile(const char *fname, int lineno)
 }
 
 
-/*
- * do_edit -- handler for \e
- *
- * If you do not specify a filename, the current query buffer will be copied
- * into a temporary file.
- *
- * After this function is done, the resulting file will be copied back into the
- * query buffer.  As an exception to this, the query buffer will be emptied
- * if the file was not modified (or the editor failed) and the caller passes
- * "discard_on_quit" = true.
- *
- * If "edited" isn't NULL, *edited will be set to true if the query buffer
- * is successfully replaced.
- */
+/* call this one */
 static bool
 do_edit(const char *filename_arg, PQExpBuffer query_buf,
-		int lineno, bool discard_on_quit, bool *edited)
+		int lineno, bool *edited)
 {
 	char		fnametmp[MAXPGPATH];
 	FILE	   *stream = NULL;
@@ -3904,7 +3870,6 @@ do_edit(const char *filename_arg, PQExpBuffer query_buf,
 			{
 				pg_log_error("%s: %m", fname);
 				error = true;
-				resetPQExpBuffer(query_buf);
 			}
 			else if (edited)
 			{
@@ -3914,15 +3879,6 @@ do_edit(const char *filename_arg, PQExpBuffer query_buf,
 			fclose(stream);
 		}
 	}
-	else
-	{
-		/*
-		 * If the file was not modified, and the caller requested it, discard
-		 * the query buffer.
-		 */
-		if (discard_on_quit)
-			resetPQExpBuffer(query_buf);
-	}
 
 	/* remove temp file */
 	if (!filename_arg)
diff --git a/src/include/commands/progress.h b/src/include/commands/progress.h
index d7bf16368b..c6b139d57d 100644
--- a/src/include/commands/progress.h
+++ b/src/include/commands/progress.h
@@ -2,7 +2,7 @@
  *
  * progress.h
  *	  Constants used with the progress reporting facilities defined in
- *	  backend_status.h.  These are possibly interesting to extensions, so we
+ *	  pgstat.h.  These are possibly interesting to extensions, so we
  *	  expose them via this header file.  Note that if you update these
  *	  constants, you probably also need to update the views based on them
  *	  in system_views.sql.
diff --git a/src/include/commands/tablecmds.h b/src/include/commands/tablecmds.h
index b3d30acc35..d66760e4bf 100644
--- a/src/include/commands/tablecmds.h
+++ b/src/include/commands/tablecmds.h
@@ -55,9 +55,16 @@ extern void AlterRelationNamespaceInternal(Relation classRel, Oid relOid,
 
 extern void CheckTableNotInUse(Relation rel, const char *stmt);
 
+#define TRUNCATE_REL_CONTEXT_NORMAL       0x01
+#define TRUNCATE_REL_CONTEXT_ONLY         0x02
+#define TRUNCATE_REL_CONTEXT_CASCADING     0x04
 extern void ExecuteTruncate(TruncateStmt *stmt);
-extern void ExecuteTruncateGuts(List *explicit_rels, List *relids, List *relids_logged,
-								DropBehavior behavior, bool restart_seqs);
+extern void ExecuteTruncateGuts(List *explicit_rels,
+								List *relids,
+								List *relids_extra,
+								List *relids_logged,
+								DropBehavior behavior,
+								bool restart_seqs);
 
 extern void SetRelationHasSubclass(Oid relationId, bool relhassubclass);
 
diff --git a/src/include/foreign/fdwapi.h b/src/include/foreign/fdwapi.h
index 10d29ff292..205afe5ec3 100644
--- a/src/include/foreign/fdwapi.h
+++ b/src/include/foreign/fdwapi.h
@@ -160,6 +160,11 @@ typedef bool (*AnalyzeForeignTable_function) (Relation relation,
 typedef List *(*ImportForeignSchema_function) (ImportForeignSchemaStmt *stmt,
 											   Oid serverOid);
 
+typedef void (*ExecForeignTruncate_function) (List *frels_list,
+											  List *frels_extra,
+											  DropBehavior behavior,
+											  bool restart_seqs);
+
 typedef Size (*EstimateDSMForeignScan_function) (ForeignScanState *node,
 												 ParallelContext *pcxt);
 typedef void (*InitializeDSMForeignScan_function) (ForeignScanState *node,
@@ -255,6 +260,9 @@ typedef struct FdwRoutine
 	/* Support functions for IMPORT FOREIGN SCHEMA */
 	ImportForeignSchema_function ImportForeignSchema;
 
+	/* Support functions for TRUNCATE */
+	ExecForeignTruncate_function ExecForeignTruncate;
+
 	/* Support functions for parallelism under Gather node */
 	IsForeignScanParallelSafe_function IsForeignScanParallelSafe;
 	EstimateDSMForeignScan_function EstimateDSMForeignScan;
diff --git a/src/include/pgstat.h b/src/include/pgstat.h
index 7cd137506e..3247c7b8ad 100644
--- a/src/include/pgstat.h
+++ b/src/include/pgstat.h
@@ -12,10 +12,11 @@
 #define PGSTAT_H
 
 #include "datatype/timestamp.h"
+#include "libpq/pqcomm.h"
+#include "miscadmin.h"
+#include "port/atomics.h"
 #include "portability/instr_time.h"
-#include "postmaster/pgarch.h" /* for MAX_XFN_CHARS */
-#include "utils/backend_progress.h" /* for backward compatibility */
-#include "utils/backend_status.h" /* for backward compatibility */
+#include "postmaster/pgarch.h"
 #include "utils/hsearch.h"
 #include "utils/relcache.h"
 #include "utils/wait_event.h" /* for backward compatibility */
@@ -881,6 +882,262 @@ typedef struct PgStat_ReplSlotStats
 	TimestampTz stat_reset_timestamp;
 } PgStat_ReplSlotStats;
 
+/* ----------
+ * Backend states
+ * ----------
+ */
+typedef enum BackendState
+{
+	STATE_UNDEFINED,
+	STATE_IDLE,
+	STATE_RUNNING,
+	STATE_IDLEINTRANSACTION,
+	STATE_FASTPATH,
+	STATE_IDLEINTRANSACTION_ABORTED,
+	STATE_DISABLED
+} BackendState;
+
+/* ----------
+ * Command type for progress reporting purposes
+ * ----------
+ */
+typedef enum ProgressCommandType
+{
+	PROGRESS_COMMAND_INVALID,
+	PROGRESS_COMMAND_VACUUM,
+	PROGRESS_COMMAND_ANALYZE,
+	PROGRESS_COMMAND_CLUSTER,
+	PROGRESS_COMMAND_CREATE_INDEX,
+	PROGRESS_COMMAND_BASEBACKUP,
+	PROGRESS_COMMAND_COPY
+} ProgressCommandType;
+
+#define PGSTAT_NUM_PROGRESS_PARAM	20
+
+/* ----------
+ * Shared-memory data structures
+ * ----------
+ */
+
+
+/*
+ * PgBackendSSLStatus
+ *
+ * For each backend, we keep the SSL status in a separate struct, that
+ * is only filled in if SSL is enabled.
+ *
+ * All char arrays must be null-terminated.
+ */
+typedef struct PgBackendSSLStatus
+{
+	/* Information about SSL connection */
+	int			ssl_bits;
+	char		ssl_version[NAMEDATALEN];
+	char		ssl_cipher[NAMEDATALEN];
+	char		ssl_client_dn[NAMEDATALEN];
+
+	/*
+	 * serial number is max "20 octets" per RFC 5280, so this size should be
+	 * fine
+	 */
+	char		ssl_client_serial[NAMEDATALEN];
+
+	char		ssl_issuer_dn[NAMEDATALEN];
+} PgBackendSSLStatus;
+
+/*
+ * PgBackendGSSStatus
+ *
+ * For each backend, we keep the GSS status in a separate struct, that
+ * is only filled in if GSS is enabled.
+ *
+ * All char arrays must be null-terminated.
+ */
+typedef struct PgBackendGSSStatus
+{
+	/* Information about GSSAPI connection */
+	char		gss_princ[NAMEDATALEN]; /* GSSAPI Principal used to auth */
+	bool		gss_auth;		/* If GSSAPI authentication was used */
+	bool		gss_enc;		/* If encryption is being used */
+
+} PgBackendGSSStatus;
+
+
+/* ----------
+ * PgBackendStatus
+ *
+ * Each live backend maintains a PgBackendStatus struct in shared memory
+ * showing its current activity.  (The structs are allocated according to
+ * BackendId, but that is not critical.)  Note that the collector process
+ * has no involvement in, or even access to, these structs.
+ *
+ * Each auxiliary process also maintains a PgBackendStatus struct in shared
+ * memory.
+ * ----------
+ */
+typedef struct PgBackendStatus
+{
+	/*
+	 * To avoid locking overhead, we use the following protocol: a backend
+	 * increments st_changecount before modifying its entry, and again after
+	 * finishing a modification.  A would-be reader should note the value of
+	 * st_changecount, copy the entry into private memory, then check
+	 * st_changecount again.  If the value hasn't changed, and if it's even,
+	 * the copy is valid; otherwise start over.  This makes updates cheap
+	 * while reads are potentially expensive, but that's the tradeoff we want.
+	 *
+	 * The above protocol needs memory barriers to ensure that the apparent
+	 * order of execution is as it desires.  Otherwise, for example, the CPU
+	 * might rearrange the code so that st_changecount is incremented twice
+	 * before the modification on a machine with weak memory ordering.  Hence,
+	 * use the macros defined below for manipulating st_changecount, rather
+	 * than touching it directly.
+	 */
+	int			st_changecount;
+
+	/* The entry is valid iff st_procpid > 0, unused if st_procpid == 0 */
+	int			st_procpid;
+
+	/* Type of backends */
+	BackendType st_backendType;
+
+	/* Times when current backend, transaction, and activity started */
+	TimestampTz st_proc_start_timestamp;
+	TimestampTz st_xact_start_timestamp;
+	TimestampTz st_activity_start_timestamp;
+	TimestampTz st_state_start_timestamp;
+
+	/* Database OID, owning user's OID, connection client address */
+	Oid			st_databaseid;
+	Oid			st_userid;
+	SockAddr	st_clientaddr;
+	char	   *st_clienthostname;	/* MUST be null-terminated */
+
+	/* Information about SSL connection */
+	bool		st_ssl;
+	PgBackendSSLStatus *st_sslstatus;
+
+	/* Information about GSSAPI connection */
+	bool		st_gss;
+	PgBackendGSSStatus *st_gssstatus;
+
+	/* current state */
+	BackendState st_state;
+
+	/* application name; MUST be null-terminated */
+	char	   *st_appname;
+
+	/*
+	 * Current command string; MUST be null-terminated. Note that this string
+	 * possibly is truncated in the middle of a multi-byte character. As
+	 * activity strings are stored more frequently than read, that allows to
+	 * move the cost of correct truncation to the display side. Use
+	 * pgstat_clip_activity() to truncate correctly.
+	 */
+	char	   *st_activity_raw;
+
+	/*
+	 * Command progress reporting.  Any command which wishes can advertise
+	 * that it is running by setting st_progress_command,
+	 * st_progress_command_target, and st_progress_param[].
+	 * st_progress_command_target should be the OID of the relation which the
+	 * command targets (we assume there's just one, as this is meant for
+	 * utility commands), but the meaning of each element in the
+	 * st_progress_param array is command-specific.
+	 */
+	ProgressCommandType st_progress_command;
+	Oid			st_progress_command_target;
+	int64		st_progress_param[PGSTAT_NUM_PROGRESS_PARAM];
+} PgBackendStatus;
+
+/*
+ * Macros to load and store st_changecount with appropriate memory barriers.
+ *
+ * Use PGSTAT_BEGIN_WRITE_ACTIVITY() before, and PGSTAT_END_WRITE_ACTIVITY()
+ * after, modifying the current process's PgBackendStatus data.  Note that,
+ * since there is no mechanism for cleaning up st_changecount after an error,
+ * THESE MACROS FORM A CRITICAL SECTION.  Any error between them will be
+ * promoted to PANIC, causing a database restart to clean up shared memory!
+ * Hence, keep the critical section as short and straight-line as possible.
+ * Aside from being safer, that minimizes the window in which readers will
+ * have to loop.
+ *
+ * Reader logic should follow this sketch:
+ *
+ *		for (;;)
+ *		{
+ *			int before_ct, after_ct;
+ *
+ *			pgstat_begin_read_activity(beentry, before_ct);
+ *			... copy beentry data to local memory ...
+ *			pgstat_end_read_activity(beentry, after_ct);
+ *			if (pgstat_read_activity_complete(before_ct, after_ct))
+ *				break;
+ *			CHECK_FOR_INTERRUPTS();
+ *		}
+ *
+ * For extra safety, we generally use volatile beentry pointers, although
+ * the memory barriers should theoretically be sufficient.
+ */
+#define PGSTAT_BEGIN_WRITE_ACTIVITY(beentry) \
+	do { \
+		START_CRIT_SECTION(); \
+		(beentry)->st_changecount++; \
+		pg_write_barrier(); \
+	} while (0)
+
+#define PGSTAT_END_WRITE_ACTIVITY(beentry) \
+	do { \
+		pg_write_barrier(); \
+		(beentry)->st_changecount++; \
+		Assert(((beentry)->st_changecount & 1) == 0); \
+		END_CRIT_SECTION(); \
+	} while (0)
+
+#define pgstat_begin_read_activity(beentry, before_changecount) \
+	do { \
+		(before_changecount) = (beentry)->st_changecount; \
+		pg_read_barrier(); \
+	} while (0)
+
+#define pgstat_end_read_activity(beentry, after_changecount) \
+	do { \
+		pg_read_barrier(); \
+		(after_changecount) = (beentry)->st_changecount; \
+	} while (0)
+
+#define pgstat_read_activity_complete(before_changecount, after_changecount) \
+	((before_changecount) == (after_changecount) && \
+	 ((before_changecount) & 1) == 0)
+
+
+/* ----------
+ * LocalPgBackendStatus
+ *
+ * When we build the backend status array, we use LocalPgBackendStatus to be
+ * able to add new values to the struct when needed without adding new fields
+ * to the shared memory. It contains the backend status as a first member.
+ * ----------
+ */
+typedef struct LocalPgBackendStatus
+{
+	/*
+	 * Local version of the backend status entry.
+	 */
+	PgBackendStatus backendStatus;
+
+	/*
+	 * The xid of the current transaction if available, InvalidTransactionId
+	 * if not.
+	 */
+	TransactionId backend_xid;
+
+	/*
+	 * The xmin of the current session if available, InvalidTransactionId if
+	 * not.
+	 */
+	TransactionId backend_xmin;
+} LocalPgBackendStatus;
 
 /*
  * Working state needed to accumulate per-function-call timing statistics.
@@ -903,8 +1160,10 @@ typedef struct PgStat_FunctionCallUsage
  * GUC parameters
  * ----------
  */
+extern PGDLLIMPORT bool pgstat_track_activities;
 extern PGDLLIMPORT bool pgstat_track_counts;
 extern PGDLLIMPORT int pgstat_track_functions;
+extern PGDLLIMPORT int pgstat_track_activity_query_size;
 extern char *pgstat_stat_directory;
 extern char *pgstat_stat_tmpname;
 extern char *pgstat_stat_filename;
@@ -925,14 +1184,6 @@ extern PgStat_MsgWal WalStats;
 extern PgStat_Counter pgStatBlockReadTime;
 extern PgStat_Counter pgStatBlockWriteTime;
 
-/*
- * Updated by pgstat_count_conn_*_time macros, called by
- * pgstat_report_activity().
- */
-extern PgStat_Counter pgStatActiveTime;
-extern PgStat_Counter pgStatTransactionIdleTime;
-
-
 /*
  * Updated by the traffic cop and in errfinish()
  */
@@ -942,6 +1193,9 @@ extern SessionEndType pgStatSessionEndCause;
  * Functions called from postmaster
  * ----------
  */
+extern Size BackendStatusShmemSize(void);
+extern void CreateSharedBackendStatus(void);
+
 extern void pgstat_init(void);
 extern int	pgstat_start(void);
 extern void pgstat_reset_all(void);
@@ -987,13 +1241,30 @@ extern void pgstat_report_replslot(const char *slotname, PgStat_Counter spilltxn
 extern void pgstat_report_replslot_drop(const char *slotname);
 
 extern void pgstat_initialize(void);
-
+extern void pgstat_bestart(void);
+
+extern void pgstat_report_activity(BackendState state, const char *cmd_str);
+extern void pgstat_report_tempfile(size_t filesize);
+extern void pgstat_report_appname(const char *appname);
+extern void pgstat_report_xact_timestamp(TimestampTz tstamp);
+extern const char *pgstat_get_backend_current_activity(int pid, bool checkUser);
+extern const char *pgstat_get_crashed_backend_activity(int pid, char *buffer,
+													   int buflen);
+
+extern void pgstat_progress_start_command(ProgressCommandType cmdtype,
+										  Oid relid);
+extern void pgstat_progress_update_param(int index, int64 val);
+extern void pgstat_progress_update_multi_param(int nparam, const int *index,
+											   const int64 *val);
+extern void pgstat_progress_end_command(void);
 
 extern PgStat_TableStatus *find_tabstat_entry(Oid rel_id);
 extern PgStat_BackendFunctionEntry *find_funcstat_entry(Oid func_id);
 
 extern void pgstat_initstats(Relation rel);
 
+extern char *pgstat_clip_activity(const char *raw_activity);
+
 /* nontransactional event counts are simple enough to inline */
 
 #define pgstat_count_heap_scan(rel)									\
@@ -1035,10 +1306,6 @@ extern void pgstat_initstats(Relation rel);
 	(pgStatBlockReadTime += (n))
 #define pgstat_count_buffer_write_time(n)							\
 	(pgStatBlockWriteTime += (n))
-#define pgstat_count_conn_active_time(n)							\
-	(pgStatActiveTime += (n))
-#define pgstat_count_conn_txn_idle_time(n)							\
-	(pgStatTransactionIdleTime += (n))
 
 extern void pgstat_count_heap_insert(Relation rel, PgStat_Counter n);
 extern void pgstat_count_heap_update(Relation rel, bool hot);
@@ -1075,7 +1342,10 @@ extern bool pgstat_send_wal(bool force);
  */
 extern PgStat_StatDBEntry *pgstat_fetch_stat_dbentry(Oid dbid);
 extern PgStat_StatTabEntry *pgstat_fetch_stat_tabentry(Oid relid);
+extern PgBackendStatus *pgstat_fetch_stat_beentry(int beid);
+extern LocalPgBackendStatus *pgstat_fetch_stat_local_beentry(int beid);
 extern PgStat_StatFuncEntry *pgstat_fetch_stat_funcentry(Oid funcid);
+extern int	pgstat_fetch_stat_numbackends(void);
 extern PgStat_ArchiverStats *pgstat_fetch_stat_archiver(void);
 extern PgStat_GlobalStats *pgstat_fetch_global(void);
 extern PgStat_WalStats *pgstat_fetch_stat_wal(void);
diff --git a/src/include/utils/backend_progress.h b/src/include/utils/backend_progress.h
deleted file mode 100644
index 1714fa09c1..0000000000
--- a/src/include/utils/backend_progress.h
+++ /dev/null
@@ -1,44 +0,0 @@
-/* ----------
- * backend_progress.h
- *	  Command progress reporting definition.
- *
- * Note that this file provides the infrastructure for storing a single
- * backend's command progress counters, without ascribing meaning to the
- * individual fields. See commands/progress.h and system_views.sql for that.
- *
- * Copyright (c) 2001-2021, PostgreSQL Global Development Group
- *
- * src/include/utils/backend_progress.h
- * ----------
- */
-#ifndef BACKEND_PROGRESS_H
-#define BACKEND_PROGRESS_H
-
-
-/* ----------
- * Command type for progress reporting purposes
- * ----------
- */
-typedef enum ProgressCommandType
-{
-	PROGRESS_COMMAND_INVALID,
-	PROGRESS_COMMAND_VACUUM,
-	PROGRESS_COMMAND_ANALYZE,
-	PROGRESS_COMMAND_CLUSTER,
-	PROGRESS_COMMAND_CREATE_INDEX,
-	PROGRESS_COMMAND_BASEBACKUP,
-	PROGRESS_COMMAND_COPY
-} ProgressCommandType;
-
-#define PGSTAT_NUM_PROGRESS_PARAM	20
-
-
-extern void pgstat_progress_start_command(ProgressCommandType cmdtype,
-										  Oid relid);
-extern void pgstat_progress_update_param(int index, int64 val);
-extern void pgstat_progress_update_multi_param(int nparam, const int *index,
-											   const int64 *val);
-extern void pgstat_progress_end_command(void);
-
-
-#endif /* BACKEND_PROGRESS_H */
diff --git a/src/include/utils/backend_status.h b/src/include/utils/backend_status.h
deleted file mode 100644
index 3fd7370d41..0000000000
--- a/src/include/utils/backend_status.h
+++ /dev/null
@@ -1,316 +0,0 @@
-/* ----------
- * backend_status.h
- *	  Definitions related to backend status reporting
- *
- * Copyright (c) 2001-2021, PostgreSQL Global Development Group
- *
- * src/include/utils/backend_status.h
- * ----------
- */
-#ifndef BACKEND_STATUS_H
-#define BACKEND_STATUS_H
-
-#include "datatype/timestamp.h"
-#include "libpq/pqcomm.h"
-#include "miscadmin.h" /* for BackendType */
-#include "utils/backend_progress.h"
-
-
-/* ----------
- * Backend states
- * ----------
- */
-typedef enum BackendState
-{
-	STATE_UNDEFINED,
-	STATE_IDLE,
-	STATE_RUNNING,
-	STATE_IDLEINTRANSACTION,
-	STATE_FASTPATH,
-	STATE_IDLEINTRANSACTION_ABORTED,
-	STATE_DISABLED
-} BackendState;
-
-
-/* ----------
- * Shared-memory data structures
- * ----------
- */
-
-/*
- * PgBackendSSLStatus
- *
- * For each backend, we keep the SSL status in a separate struct, that
- * is only filled in if SSL is enabled.
- *
- * All char arrays must be null-terminated.
- */
-typedef struct PgBackendSSLStatus
-{
-	/* Information about SSL connection */
-	int			ssl_bits;
-	char		ssl_version[NAMEDATALEN];
-	char		ssl_cipher[NAMEDATALEN];
-	char		ssl_client_dn[NAMEDATALEN];
-
-	/*
-	 * serial number is max "20 octets" per RFC 5280, so this size should be
-	 * fine
-	 */
-	char		ssl_client_serial[NAMEDATALEN];
-
-	char		ssl_issuer_dn[NAMEDATALEN];
-} PgBackendSSLStatus;
-
-/*
- * PgBackendGSSStatus
- *
- * For each backend, we keep the GSS status in a separate struct, that
- * is only filled in if GSS is enabled.
- *
- * All char arrays must be null-terminated.
- */
-typedef struct PgBackendGSSStatus
-{
-	/* Information about GSSAPI connection */
-	char		gss_princ[NAMEDATALEN]; /* GSSAPI Principal used to auth */
-	bool		gss_auth;		/* If GSSAPI authentication was used */
-	bool		gss_enc;		/* If encryption is being used */
-
-} PgBackendGSSStatus;
-
-
-/* ----------
- * PgBackendStatus
- *
- * Each live backend maintains a PgBackendStatus struct in shared memory
- * showing its current activity.  (The structs are allocated according to
- * BackendId, but that is not critical.)  Note that the collector process
- * has no involvement in, or even access to, these structs.
- *
- * Each auxiliary process also maintains a PgBackendStatus struct in shared
- * memory.
- * ----------
- */
-typedef struct PgBackendStatus
-{
-	/*
-	 * To avoid locking overhead, we use the following protocol: a backend
-	 * increments st_changecount before modifying its entry, and again after
-	 * finishing a modification.  A would-be reader should note the value of
-	 * st_changecount, copy the entry into private memory, then check
-	 * st_changecount again.  If the value hasn't changed, and if it's even,
-	 * the copy is valid; otherwise start over.  This makes updates cheap
-	 * while reads are potentially expensive, but that's the tradeoff we want.
-	 *
-	 * The above protocol needs memory barriers to ensure that the apparent
-	 * order of execution is as it desires.  Otherwise, for example, the CPU
-	 * might rearrange the code so that st_changecount is incremented twice
-	 * before the modification on a machine with weak memory ordering.  Hence,
-	 * use the macros defined below for manipulating st_changecount, rather
-	 * than touching it directly.
-	 */
-	int			st_changecount;
-
-	/* The entry is valid iff st_procpid > 0, unused if st_procpid == 0 */
-	int			st_procpid;
-
-	/* Type of backends */
-	BackendType st_backendType;
-
-	/* Times when current backend, transaction, and activity started */
-	TimestampTz st_proc_start_timestamp;
-	TimestampTz st_xact_start_timestamp;
-	TimestampTz st_activity_start_timestamp;
-	TimestampTz st_state_start_timestamp;
-
-	/* Database OID, owning user's OID, connection client address */
-	Oid			st_databaseid;
-	Oid			st_userid;
-	SockAddr	st_clientaddr;
-	char	   *st_clienthostname;	/* MUST be null-terminated */
-
-	/* Information about SSL connection */
-	bool		st_ssl;
-	PgBackendSSLStatus *st_sslstatus;
-
-	/* Information about GSSAPI connection */
-	bool		st_gss;
-	PgBackendGSSStatus *st_gssstatus;
-
-	/* current state */
-	BackendState st_state;
-
-	/* application name; MUST be null-terminated */
-	char	   *st_appname;
-
-	/*
-	 * Current command string; MUST be null-terminated. Note that this string
-	 * possibly is truncated in the middle of a multi-byte character. As
-	 * activity strings are stored more frequently than read, that allows to
-	 * move the cost of correct truncation to the display side. Use
-	 * pgstat_clip_activity() to truncate correctly.
-	 */
-	char	   *st_activity_raw;
-
-	/*
-	 * Command progress reporting.  Any command which wishes can advertise
-	 * that it is running by setting st_progress_command,
-	 * st_progress_command_target, and st_progress_param[].
-	 * st_progress_command_target should be the OID of the relation which the
-	 * command targets (we assume there's just one, as this is meant for
-	 * utility commands), but the meaning of each element in the
-	 * st_progress_param array is command-specific.
-	 */
-	ProgressCommandType st_progress_command;
-	Oid			st_progress_command_target;
-	int64		st_progress_param[PGSTAT_NUM_PROGRESS_PARAM];
-} PgBackendStatus;
-
-
-/*
- * Macros to load and store st_changecount with appropriate memory barriers.
- *
- * Use PGSTAT_BEGIN_WRITE_ACTIVITY() before, and PGSTAT_END_WRITE_ACTIVITY()
- * after, modifying the current process's PgBackendStatus data.  Note that,
- * since there is no mechanism for cleaning up st_changecount after an error,
- * THESE MACROS FORM A CRITICAL SECTION.  Any error between them will be
- * promoted to PANIC, causing a database restart to clean up shared memory!
- * Hence, keep the critical section as short and straight-line as possible.
- * Aside from being safer, that minimizes the window in which readers will
- * have to loop.
- *
- * Reader logic should follow this sketch:
- *
- *		for (;;)
- *		{
- *			int before_ct, after_ct;
- *
- *			pgstat_begin_read_activity(beentry, before_ct);
- *			... copy beentry data to local memory ...
- *			pgstat_end_read_activity(beentry, after_ct);
- *			if (pgstat_read_activity_complete(before_ct, after_ct))
- *				break;
- *			CHECK_FOR_INTERRUPTS();
- *		}
- *
- * For extra safety, we generally use volatile beentry pointers, although
- * the memory barriers should theoretically be sufficient.
- */
-#define PGSTAT_BEGIN_WRITE_ACTIVITY(beentry) \
-	do { \
-		START_CRIT_SECTION(); \
-		(beentry)->st_changecount++; \
-		pg_write_barrier(); \
-	} while (0)
-
-#define PGSTAT_END_WRITE_ACTIVITY(beentry) \
-	do { \
-		pg_write_barrier(); \
-		(beentry)->st_changecount++; \
-		Assert(((beentry)->st_changecount & 1) == 0); \
-		END_CRIT_SECTION(); \
-	} while (0)
-
-#define pgstat_begin_read_activity(beentry, before_changecount) \
-	do { \
-		(before_changecount) = (beentry)->st_changecount; \
-		pg_read_barrier(); \
-	} while (0)
-
-#define pgstat_end_read_activity(beentry, after_changecount) \
-	do { \
-		pg_read_barrier(); \
-		(after_changecount) = (beentry)->st_changecount; \
-	} while (0)
-
-#define pgstat_read_activity_complete(before_changecount, after_changecount) \
-	((before_changecount) == (after_changecount) && \
-	 ((before_changecount) & 1) == 0)
-
-
-/* ----------
- * LocalPgBackendStatus
- *
- * When we build the backend status array, we use LocalPgBackendStatus to be
- * able to add new values to the struct when needed without adding new fields
- * to the shared memory. It contains the backend status as a first member.
- * ----------
- */
-typedef struct LocalPgBackendStatus
-{
-	/*
-	 * Local version of the backend status entry.
-	 */
-	PgBackendStatus backendStatus;
-
-	/*
-	 * The xid of the current transaction if available, InvalidTransactionId
-	 * if not.
-	 */
-	TransactionId backend_xid;
-
-	/*
-	 * The xmin of the current session if available, InvalidTransactionId if
-	 * not.
-	 */
-	TransactionId backend_xmin;
-} LocalPgBackendStatus;
-
-
-/* ----------
- * GUC parameters
- * ----------
- */
-extern PGDLLIMPORT bool pgstat_track_activities;
-extern PGDLLIMPORT int pgstat_track_activity_query_size;
-
-
-/* ----------
- * Other global variables
- * ----------
- */
-extern PGDLLIMPORT  PgBackendStatus *MyBEEntry;
-
-
-/* ----------
- * Functions called from postmaster
- * ----------
- */
-extern Size BackendStatusShmemSize(void);
-extern void CreateSharedBackendStatus(void);
-
-
-/* ----------
- * Functions called from backends
- * ----------
- */
-
-/* Initialization functions */
-extern void pgstat_beinit(void);
-extern void pgstat_bestart(void);
-
-extern void pgstat_clear_backend_activity_snapshot(void);
-
-/* Activity reporting functions */
-extern void pgstat_report_activity(BackendState state, const char *cmd_str);
-extern void pgstat_report_tempfile(size_t filesize);
-extern void pgstat_report_appname(const char *appname);
-extern void pgstat_report_xact_timestamp(TimestampTz tstamp);
-extern const char *pgstat_get_backend_current_activity(int pid, bool checkUser);
-extern const char *pgstat_get_crashed_backend_activity(int pid, char *buffer,
-													   int buflen);
-
-
-/* ----------
- * Support functions for the SQL-callable functions to
- * generate the pgstat* views.
- * ----------
- */
-extern int	pgstat_fetch_stat_numbackends(void);
-extern PgBackendStatus *pgstat_fetch_stat_beentry(int beid);
-extern LocalPgBackendStatus *pgstat_fetch_stat_local_beentry(int beid);
-extern char *pgstat_clip_activity(const char *raw_activity);
-
-
-#endif /* BACKEND_STATUS_H */
diff --git a/src/include/utils/wait_event.h b/src/include/utils/wait_event.h
index 44448b48ec..2c883467f3 100644
--- a/src/include/utils/wait_event.h
+++ b/src/include/utils/wait_event.h
@@ -11,6 +11,9 @@
 #define WAIT_EVENT_H
 
 
+#include "storage/proc.h" /* for MyProc */
+
+
 /* ----------
  * Wait Classes
  * ----------
@@ -231,10 +234,13 @@ extern const char *pgstat_get_wait_event(uint32 wait_event_info);
 extern const char *pgstat_get_wait_event_type(uint32 wait_event_info);
 static inline void pgstat_report_wait_start(uint32 wait_event_info);
 static inline void pgstat_report_wait_end(void);
-extern void pgstat_set_wait_event_storage(uint32 *wait_event_info);
-extern void pgstat_reset_wait_event_storage(void);
 
-extern PGDLLIMPORT uint32 *my_wait_event_info;
+
+/*
+ * Repeated here for the inline functions because it is declared in pgstat.h,
+ * which includes this header.
+ */
+extern PGDLLIMPORT bool pgstat_track_activities;
 
 
 /* ----------
@@ -248,35 +254,47 @@ extern PGDLLIMPORT uint32 *my_wait_event_info;
  *	for wait event which is sufficient for current usage, 1-byte is
  *	reserved for future usage.
  *
- *	Historically we used to make this reporting conditional on
- *	pgstat_track_activities, but the check for that seems to add more cost
- *	than it saves.
- *
- *	my_wait_event_info initially points to local memory, making it safe to
- *	call this before MyProc has been initialized.
+ * NB: this *must* be able to survive being called before MyProc has been
+ * initialized.
  * ----------
  */
 static inline void
 pgstat_report_wait_start(uint32 wait_event_info)
 {
+	volatile PGPROC *proc = MyProc;
+
+	if (!pgstat_track_activities || !proc)
+		return;
+
 	/*
 	 * Since this is a four-byte field which is always read and written as
 	 * four-bytes, updates are atomic.
 	 */
-	*(volatile uint32 *) my_wait_event_info = wait_event_info;
+	proc->wait_event_info = wait_event_info;
 }
 
 /* ----------
  * pgstat_report_wait_end() -
  *
  *	Called to report end of a wait.
+ *
+ * NB: this *must* be able to survive being called before MyProc has been
+ * initialized.
  * ----------
  */
 static inline void
 pgstat_report_wait_end(void)
 {
-	/* see pgstat_report_wait_start() */
-	*(volatile uint32 *) my_wait_event_info = 0;
+	volatile PGPROC *proc = MyProc;
+
+	if (!pgstat_track_activities || !proc)
+		return;
+
+	/*
+	 * Since this is a four-byte field which is always read and written as
+	 * four-bytes, updates are atomic.
+	 */
+	proc->wait_event_info = 0;
 }
 
 
diff --git a/src/test/regress/expected/foreign_data.out b/src/test/regress/expected/foreign_data.out
index e4cdb780d0..5385f98a0f 100644
--- a/src/test/regress/expected/foreign_data.out
+++ b/src/test/regress/expected/foreign_data.out
@@ -1807,9 +1807,9 @@ Inherits: fd_pt1
 
 -- TRUNCATE doesn't work on foreign tables, either directly or recursively
 TRUNCATE ft2;  -- ERROR
-ERROR:  "ft2" is not a table
+ERROR:  foreign-data wrapper "dummy" has no handler
 TRUNCATE fd_pt1;  -- ERROR
-ERROR:  "ft2" is not a table
+ERROR:  foreign-data wrapper "dummy" has no handler
 DROP TABLE fd_pt1 CASCADE;
 NOTICE:  drop cascades to foreign table ft2
 -- IMPORT FOREIGN SCHEMA
@@ -2032,9 +2032,9 @@ ALTER FOREIGN TABLE fd_pt2_1 ADD CONSTRAINT fd_pt2chk1 CHECK (c1 > 0);
 ALTER TABLE fd_pt2 ATTACH PARTITION fd_pt2_1 FOR VALUES IN (1);
 -- TRUNCATE doesn't work on foreign tables, either directly or recursively
 TRUNCATE fd_pt2_1;  -- ERROR
-ERROR:  "fd_pt2_1" is not a table
+ERROR:  foreign-data wrapper "dummy" has no handler
 TRUNCATE fd_pt2;  -- ERROR
-ERROR:  "fd_pt2_1" is not a table
+ERROR:  foreign-data wrapper "dummy" has no handler
 DROP FOREIGN TABLE fd_pt2_1;
 DROP TABLE fd_pt2;
 -- foreign table cannot be part of partition tree made of temporary
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index 6a98064b2b..a173d604d6 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -704,6 +704,7 @@ ForeignScanState
 ForeignServer
 ForeignServerInfo
 ForeignTable
+ForeignTruncateInfo
 ForkNumber
 FormData_pg_aggregate
 FormData_pg_am
#40Kazutaka Onishi
onishi@heterodb.com
In reply to: Kazutaka Onishi (#39)
1 attachment(s)
Re: TRUNCATE on foreign table

v9 has also typo because I haven't checked about doc... sorry.

2021年4月4日(日) 15:30 Kazutaka Onishi <onishi@heterodb.com>:

Show quoted text

Hi

For now, I've fixed the v8 according to your comments, excluding
anything related with 'hash table' and 'do_sql_commands'.

1) We usually have the struct name after "+typedef struct
ForeignTruncateInfo", please refer to other struct defs in the code
base.

I've modified the definition.
By the way, there're many "typedef struct{ ... }NameOfStruct;" in
codes, about 40% of other struct defs (checked by find&grep),
thus I felt the way is not "MUST".

2) We should add ORDER BY clause(probably ORDER BY id?) for data
generating select queries in added tests, otherwise tests might become
unstable.

I've added "ORDER BY" at the postges_fdw test.

3) How about dropping the tables, foreign tables that got created for
testing in postgres_fdw.sql?

I've added "cleanup" commands.

4) I think it's not "foreign-tables"/"foreign-table", it can be
"foreign tables"/"foreign table", other places in the docs use this
convention.
+ the context where the foreign-tables are truncated. It is a list
of integers and same length with

I've replaced "foreign-table" to "foreign table".

5) Can't we use do_sql_command function after making it non static? We
could go extra mile, that is we could make do_sql_command little more
generic by passing some enum for each of PQsendQuery,
PQsendQueryParams, PQsendQueryPrepared and PQsendPrepare and replace
the respective code chunks with do_sql_command in postgres_fdw.c.

I've skipped this for now.
I feel it sounds cool, but not easy.
It should be added by another patch because it's not only related to TRUNCATE.

6) A white space error when the patch is applied.
contrib/postgres_fdw/postgres_fdw.c:2913: trailing whitespace.

I've checked the patch and clean spaces.
But I can't confirmed this message by attaching(patch -p1 < ...) my v8 patch.
If this still occurs, please tell me how you attach the patch.

7) I may be missing something here. Why do we need a hash table at
all? We could just do it with a linked list right? Is there a specific
reason to use a hash table? IIUC, the hash table entries will be lying
around until the local session exists since we are not doing
hash_destroy.

I've skipped this for now.

8) How about having something like this?
+ <command>TRUNCATE</command> can be used for foreign tables if the
foreign data wrapper supports, for instance, see <xref
linkend="postgres-fdw"/>.

Sounds good. I've added.

9)

+ <command>TRUNCATE</command> for each foreign server being involved

+ in one <command>TRUNCATE</command> command (note that invocations
The 'being' in above sentence can be omitted.

I've fixed this.

10)

+ the context where the foreign-tables are truncated. It is a list of integers and same length with
There should be a verb between 'and' and same :
It is a list of integers and has same length with

I've fixed this.

11)

+ * Information related to truncation of foreign tables.  This is used for
+ * the elements in a hash table that uses the server OID as lookup key,
The 'uses' is for 'This' (the struct), so 'that' should be 'and':
the elements in a hash table and uses
Alternatively:
the elements in a hash table. It uses

I've fixed this.

12)

+ relids_extra = lappend_int(relids_extra, (recurse ? TRUNCATE_REL_CONTEXT__NORMAL : TRUNCATE_REL_CONTEXT__ONLY));
I am curious: isn't one underscore enough in the identifier (before NORMAL and ONLY) ?
I suggest naming them TRUNCATE_REL_CONTEXT_NORMAL and TRUNCATE_REL_CONTEXT_ONLY

+ relids_extra = lappend_int(relids_extra, TRUNCATE_REL_CONTEXT__CASCADED);
I wonder if TRUNCATE_REL_CONTEXT_CASCADING is better than TRUNCATE_REL_CONTEXT__CASCADED. Note the removal of the extra underscore.
In English, we say: truncation cascading to foreign table.
w.r.t. Bharath's question on using hash table, I think the reason is that the search would be more efficient:

I've changed these labels shown below:
TRUNCATE_REL_CONTEXT__NORMAL -> TRUNCATE_REL_CONTEXT_NORMAL
TRUNCATE_REL_CONTEXT__ONLY -> TRUNCATE_REL_CONTEXT_ONLY
TRUNCATE_REL_CONTEXT__CASCADED -> TRUNCATE_REL_CONTEXT_CASCADING

14)

+           ft_info = hash_search(ft_htab, &server_oid, HASH_ENTER, &found);
and
+       while ((ft_info = hash_seq_search(&seq)) != NULL)
+    * Now go through the hash table, and process each entry associated to the
+    * servers involved in the TRUNCATE.
associated to -> associated with

I've fixed this.

14) Should the hash table be released at the end of ExecuteTruncateGuts() ?

I've skipped this for now.

2021年4月4日(日) 14:13 Kohei KaiGai <kaigai@heterodb.com>:

2021年4月4日(日) 13:07 Bharath Rupireddy <bharath.rupireddyforpostgres@gmail.com>:

On Sat, Apr 3, 2021 at 8:31 PM Zhihong Yu <zyu@yugabyte.com> wrote:

w.r.t. Bharath's question on using hash table, I think the reason is that the search would be more efficient:

Generally, sequential search would be slower if there are many entries
in a list. Here, the use case is to store all the foreign table ids
associated with each foreign server and I'm not sure how many foreign
tables will be provided in a single truncate command that belong to
different foreign servers. I strongly feel the count will be less and
using a list would be easier than to have a hash table. Others may
have better opinions.

/messages/by-id/20200115081126.GK2243@paquier.xyz

It was originally implemented using a simple list, then modified according to
the comment by Michael.
I think it is just a matter of preference.

Should the hash table be released at the end of ExecuteTruncateGuts() ?

If we go with a hash table and think that the frequency of "TRUNCATE"
commands on foreign tables is heavy in a local session, then it does
make sense to not destroy the hash, otherwise destroy the hash.

In most cases, TRUNCATE is not a command frequently executed.
So, exactly, it is just a matter of preference.

Best regards,
--
HeteroDB, Inc / The PG-Strom Project
KaiGai Kohei <kaigai@heterodb.com>

Attachments:

pgsql14-truncate-on-foreign-table.v10.patchapplication/octet-stream; name=pgsql14-truncate-on-foreign-table.v10.patchDownload
diff --git a/contrib/postgres_fdw/deparse.c b/contrib/postgres_fdw/deparse.c
index 5aa3455e30..bebac2f2e0 100644
--- a/contrib/postgres_fdw/deparse.c
+++ b/contrib/postgres_fdw/deparse.c
@@ -56,6 +56,7 @@
 #include "utils/rel.h"
 #include "utils/syscache.h"
 #include "utils/typcache.h"
+#include "commands/tablecmds.h"
 
 /*
  * Global context for foreign_expr_walker's search of an expression tree.
@@ -2172,6 +2173,44 @@ deparseAnalyzeSql(StringInfo buf, Relation rel, List **retrieved_attrs)
 	deparseRelation(buf, rel);
 }
 
+/*
+ * Construct a simple "TRUNCATE rel" statement
+ */
+void
+deparseTruncateSql(StringInfo buf,
+				   List *frels_list,
+				   List *frels_extra,
+				   DropBehavior behavior,
+				   bool restart_seqs)
+{
+	ListCell   *lc1, *lc2;
+
+	appendStringInfoString(buf, "TRUNCATE ");
+	forboth (lc1, frels_list,
+			 lc2, frels_extra)
+	{
+		Relation	frel = lfirst(lc1);
+		int			extra = lfirst_int(lc2);
+
+		if (lc1 != list_head(frels_list))
+			appendStringInfoString(buf, ", ");
+		if (extra & TRUNCATE_REL_CONTEXT_ONLY)
+			appendStringInfoString(buf, "ONLY ");
+		deparseRelation(buf, frel);
+	}
+	appendStringInfo(buf, " %s IDENTITY",
+					 restart_seqs ? "RESTART" : "CONTINUE");
+	switch (behavior)
+	{
+		case DROP_RESTRICT:
+			appendStringInfoString(buf, " RESTRICT");
+			break;
+		case DROP_CASCADE:
+			appendStringInfoString(buf, " CASCADE");
+			break;
+	}
+}
+
 /*
  * Construct name to use for given column, and emit it into buf.
  * If it has a column_name FDW option, use that instead of attribute name.
diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out
index 59e4e27ffb..45bb03f56d 100644
--- a/contrib/postgres_fdw/expected/postgres_fdw.out
+++ b/contrib/postgres_fdw/expected/postgres_fdw.out
@@ -8215,6 +8215,243 @@ select * from rem3;
 drop foreign table rem3;
 drop table loc3;
 -- ===================================================================
+-- test for TRUNCATE
+-- ===================================================================
+CREATE TABLE tru_rtable0 (id int primary key, x text);
+CREATE TABLE tru_rtable1 (id int primary key, y text);
+CREATE FOREIGN TABLE tru_ftable (id int, x text)
+       SERVER loopback OPTIONS (table_name 'tru_rtable0');
+INSERT INTO tru_rtable0 (SELECT x,md5(x::text) FROM generate_series(1,10) x);
+CREATE TABLE tru_ptable (id int, y text) PARTITION BY HASH(id);
+CREATE TABLE tru_ptable__p0 PARTITION OF tru_ptable
+                            FOR VALUES WITH (MODULUS 2, REMAINDER 0);
+CREATE FOREIGN TABLE tru_ftable__p1 PARTITION OF tru_ptable
+                                    FOR VALUES WITH (MODULUS 2, REMAINDER 1)
+       SERVER loopback OPTIONS (table_name 'tru_rtable1');
+INSERT INTO tru_ptable (SELECT x,md5(x::text) FROM generate_series(11,20) x);
+CREATE TABLE tru_pk_table(id int primary key, z text);
+CREATE TABLE tru_fk_table(fkey int references tru_pk_table(id));
+INSERT INTO tru_pk_table (SELECT x,md5((x+1)::text) FROM generate_series(1,10) x);
+INSERT INTO tru_fk_table (SELECT x % 10 + 1 FROM generate_series(5,25) x);
+CREATE FOREIGN TABLE tru_pk_ftable (id int, z text)
+       SERVER loopback OPTIONS (table_name 'tru_pk_table');
+CREATE TABLE tru_rtable_parent (id int, a text);
+CREATE TABLE tru_rtable_child (id int, a text);
+CREATE FOREIGN TABLE tru_ftable_parent (id int, a text)
+       SERVER loopback OPTIONS (table_name 'tru_rtable_parent');
+CREATE FOREIGN TABLE tru_ftable_child () INHERITS (tru_ftable_parent)
+       SERVER loopback OPTIONS (table_name 'tru_rtable_child');
+INSERT INTO tru_rtable_parent (SELECT x, md5(x::text) FROM generate_series(1,8) x);
+INSERT INTO tru_rtable_child  (SELECT x, md5(x::text) FROM generate_series(10, 18) x);
+-- normal truncate
+SELECT * FROM tru_ftable ORDER BY id;
+ id |                x                 
+----+----------------------------------
+  1 | c4ca4238a0b923820dcc509a6f75849b
+  2 | c81e728d9d4c2f636f067f89cc14862c
+  3 | eccbc87e4b5ce2fe28308fd9f2a7baf3
+  4 | a87ff679a2f3e71d9181a67b7542122c
+  5 | e4da3b7fbbce2345d7772b0674a318d5
+  6 | 1679091c5a880faf6fb5e6087eb1b2dc
+  7 | 8f14e45fceea167a5a36dedd4bea2543
+  8 | c9f0f895fb98ab9159f51fd0297e236d
+  9 | 45c48cce2e2d7fbdea1afc51c7c6ad26
+ 10 | d3d9446802a44259755d38e6d163e820
+(10 rows)
+
+TRUNCATE tru_ftable;
+SELECT * FROM tru_rtable0;		-- empty
+ id | x 
+----+---
+(0 rows)
+
+SELECT * FROM tru_ftable;		-- empty
+ id | x 
+----+---
+(0 rows)
+
+-- 'truncatable' option
+ALTER SERVER loopback OPTIONS (ADD truncatable 'false');
+TRUNCATE tru_ftable;			-- error
+ERROR:  truncate on "tru_ftable" is prohibited
+ALTER SERVER loopback OPTIONS (DROP truncatable);
+ALTER FOREIGN TABLE tru_ftable OPTIONS (ADD truncatable 'false');
+TRUNCATE tru_ftable;			-- error
+ERROR:  truncate on "tru_ftable" is prohibited
+ALTER FOREIGN TABLE tru_ftable OPTIONS (SET truncatable 'true');
+TRUNCATE tru_ftable;			-- accepted
+-- partition table mixtured by table and foreign table
+SELECT * FROM tru_ptable ORDER BY id;
+ id |                y                 
+----+----------------------------------
+ 11 | 6512bd43d9caa6e02c990b0a82652dca
+ 12 | c20ad4d76fe97759aa27a0c99bff6710
+ 13 | c51ce410c124a10e0db5e4b97fc2af39
+ 14 | aab3238922bcc25a6f606eb525ffdc56
+ 15 | 9bf31c7ff062936a96d3c8bd1f8f2ff3
+ 16 | c74d97b01eae257e44aa9d5bade97baf
+ 17 | 70efdf2ec9b086079795c442636b55fb
+ 18 | 6f4922f45568161a8cdf4ad2299f6d23
+ 19 | 1f0e3dad99908345f7439f8ffabdffc4
+ 20 | 98f13708210194c475687be6106a3b84
+(10 rows)
+
+TRUNCATE tru_ptable;
+SELECT * FROM tru_ptable;		-- empty
+ id | y 
+----+---
+(0 rows)
+
+SELECT * FROM tru_ptable__p0;	-- empty
+ id | y 
+----+---
+(0 rows)
+
+SELECT * FROM tru_ftable__p1;	-- empty
+ id | y 
+----+---
+(0 rows)
+
+SELECT * FROM tru_rtable1;		-- empty
+ id | y 
+----+---
+(0 rows)
+
+-- 'CASCADE' option
+SELECT * FROM tru_pk_ftable ORDER BY id;
+ id |                z                 
+----+----------------------------------
+  1 | c81e728d9d4c2f636f067f89cc14862c
+  2 | eccbc87e4b5ce2fe28308fd9f2a7baf3
+  3 | a87ff679a2f3e71d9181a67b7542122c
+  4 | e4da3b7fbbce2345d7772b0674a318d5
+  5 | 1679091c5a880faf6fb5e6087eb1b2dc
+  6 | 8f14e45fceea167a5a36dedd4bea2543
+  7 | c9f0f895fb98ab9159f51fd0297e236d
+  8 | 45c48cce2e2d7fbdea1afc51c7c6ad26
+  9 | d3d9446802a44259755d38e6d163e820
+ 10 | 6512bd43d9caa6e02c990b0a82652dca
+(10 rows)
+
+TRUNCATE tru_pk_ftable;	-- failed by FK reference
+ERROR:  cannot truncate a table referenced in a foreign key constraint
+DETAIL:  Table "tru_fk_table" references "tru_pk_table".
+HINT:  Truncate table "tru_fk_table" at the same time, or use TRUNCATE ... CASCADE.
+CONTEXT:  remote SQL command: TRUNCATE public.tru_pk_table CONTINUE IDENTITY RESTRICT
+TRUNCATE tru_pk_ftable CASCADE;
+SELECT * FROM tru_pk_ftable;    -- empty
+ id | z 
+----+---
+(0 rows)
+
+SELECT * FROM tru_fk_table;		-- also truncated
+ fkey 
+------
+(0 rows)
+
+-- truncate two tables at a command
+INSERT INTO tru_ftable (SELECT x,md5((x+2)::text) FROM generate_series(1,8) x);
+INSERT INTO tru_pk_ftable (SELECT x,md5((x+3)::text) FROM generate_series(3,10) x);
+SELECT * FROM tru_ftable a FULL OUTER JOIN tru_pk_ftable b ON a.id = b.id ORDER BY a.id;
+ id |                x                 | id |                z                 
+----+----------------------------------+----+----------------------------------
+  1 | eccbc87e4b5ce2fe28308fd9f2a7baf3 |    | 
+  2 | a87ff679a2f3e71d9181a67b7542122c |    | 
+  3 | e4da3b7fbbce2345d7772b0674a318d5 |  3 | 1679091c5a880faf6fb5e6087eb1b2dc
+  4 | 1679091c5a880faf6fb5e6087eb1b2dc |  4 | 8f14e45fceea167a5a36dedd4bea2543
+  5 | 8f14e45fceea167a5a36dedd4bea2543 |  5 | c9f0f895fb98ab9159f51fd0297e236d
+  6 | c9f0f895fb98ab9159f51fd0297e236d |  6 | 45c48cce2e2d7fbdea1afc51c7c6ad26
+  7 | 45c48cce2e2d7fbdea1afc51c7c6ad26 |  7 | d3d9446802a44259755d38e6d163e820
+  8 | d3d9446802a44259755d38e6d163e820 |  8 | 6512bd43d9caa6e02c990b0a82652dca
+    |                                  |  9 | c20ad4d76fe97759aa27a0c99bff6710
+    |                                  | 10 | c51ce410c124a10e0db5e4b97fc2af39
+(10 rows)
+
+TRUNCATE tru_ftable, tru_pk_ftable CASCADE;
+SELECT * FROM tru_ftable a FULL OUTER JOIN tru_pk_ftable b ON a.id = b.id ORDER BY a.id;  -- empty
+ id | x | id | z 
+----+---+----+---
+(0 rows)
+
+-- truncate with ONLY clause
+TRUNCATE ONLY tru_ftable_parent;
+SELECT * FROM tru_ftable_parent ORDER BY id;
+ id |                a                 
+----+----------------------------------
+ 10 | d3d9446802a44259755d38e6d163e820
+ 11 | 6512bd43d9caa6e02c990b0a82652dca
+ 12 | c20ad4d76fe97759aa27a0c99bff6710
+ 13 | c51ce410c124a10e0db5e4b97fc2af39
+ 14 | aab3238922bcc25a6f606eb525ffdc56
+ 15 | 9bf31c7ff062936a96d3c8bd1f8f2ff3
+ 16 | c74d97b01eae257e44aa9d5bade97baf
+ 17 | 70efdf2ec9b086079795c442636b55fb
+ 18 | 6f4922f45568161a8cdf4ad2299f6d23
+(9 rows)
+
+TRUNCATE tru_ftable_parent;
+SELECT * FROM tru_ftable_parent; -- empty
+ id | a 
+----+---
+(0 rows)
+
+-- in case when remote table has inherited children
+CREATE TABLE tru_rtable0_child () INHERITS (tru_rtable0);
+INSERT INTO tru_rtable0 (SELECT x,md5(x::text) FROM generate_series(5,9) x);
+INSERT INTO tru_rtable0_child (SELECT x,md5(x::text) FROM generate_series(10,14) x);
+SELECT * FROM tru_ftable ORDER BY id;
+ id |                x                 
+----+----------------------------------
+  5 | e4da3b7fbbce2345d7772b0674a318d5
+  6 | 1679091c5a880faf6fb5e6087eb1b2dc
+  7 | 8f14e45fceea167a5a36dedd4bea2543
+  8 | c9f0f895fb98ab9159f51fd0297e236d
+  9 | 45c48cce2e2d7fbdea1afc51c7c6ad26
+ 10 | d3d9446802a44259755d38e6d163e820
+ 11 | 6512bd43d9caa6e02c990b0a82652dca
+ 12 | c20ad4d76fe97759aa27a0c99bff6710
+ 13 | c51ce410c124a10e0db5e4b97fc2af39
+ 14 | aab3238922bcc25a6f606eb525ffdc56
+(10 rows)
+
+TRUNCATE ONLY tru_ftable;		-- truncate only parent portion
+SELECT * FROM tru_ftable ORDER BY id;
+ id |                x                 
+----+----------------------------------
+ 10 | d3d9446802a44259755d38e6d163e820
+ 11 | 6512bd43d9caa6e02c990b0a82652dca
+ 12 | c20ad4d76fe97759aa27a0c99bff6710
+ 13 | c51ce410c124a10e0db5e4b97fc2af39
+ 14 | aab3238922bcc25a6f606eb525ffdc56
+(5 rows)
+
+INSERT INTO tru_rtable0 (SELECT x,md5(x::text) FROM generate_series(21,25) x);
+SELECT * FROM tru_ftable;
+ id |                x                 
+----+----------------------------------
+ 21 | 3c59dc048e8850243be8079a5c74d079
+ 22 | b6d767d2f8ed5d21a44b0e5886680cb9
+ 23 | 37693cfc748049e45d87b8c7d8b9aacd
+ 24 | 1ff1de774005f8da13f42943881c655f
+ 25 | 8e296a067a37563370ded05f5a3bf3ec
+ 10 | d3d9446802a44259755d38e6d163e820
+ 11 | 6512bd43d9caa6e02c990b0a82652dca
+ 12 | c20ad4d76fe97759aa27a0c99bff6710
+ 13 | c51ce410c124a10e0db5e4b97fc2af39
+ 14 | aab3238922bcc25a6f606eb525ffdc56
+(10 rows)
+
+TRUNCATE tru_ftable;			-- truncate both of parent and child
+SELECT * FROM tru_ftable;    -- empty
+ id | x 
+----+---
+(0 rows)
+
+-- cleanup
+DROP FOREIGN TABLE tru_ftable_parent, tru_ftable_child, tru_pk_ftable,tru_ftable__p1,tru_ftable;
+DROP TABLE tru_rtable0, tru_rtable1, tru_ptable, tru_ptable__p0, tru_pk_table, tru_fk_table, 
+tru_rtable_parent,tru_rtable_child, tru_rtable0_child;
+-- ===================================================================
 -- test IMPORT FOREIGN SCHEMA
 -- ===================================================================
 CREATE SCHEMA import_source;
@@ -8913,7 +9150,7 @@ DO $d$
     END;
 $d$;
 ERROR:  invalid option "password"
-HINT:  Valid options in this context are: service, passfile, channel_binding, connect_timeout, dbname, host, hostaddr, port, options, application_name, keepalives, keepalives_idle, keepalives_interval, keepalives_count, tcp_user_timeout, sslmode, sslcompression, sslcert, sslkey, sslrootcert, sslcrl, sslcrldir, requirepeer, ssl_min_protocol_version, ssl_max_protocol_version, gssencmode, krbsrvname, gsslib, target_session_attrs, use_remote_estimate, fdw_startup_cost, fdw_tuple_cost, extensions, updatable, fetch_size, batch_size, async_capable, keep_connections
+HINT:  Valid options in this context are: service, passfile, channel_binding, connect_timeout, dbname, host, hostaddr, port, options, application_name, keepalives, keepalives_idle, keepalives_interval, keepalives_count, tcp_user_timeout, sslmode, sslcompression, sslcert, sslkey, sslrootcert, sslcrl, sslcrldir, requirepeer, ssl_min_protocol_version, ssl_max_protocol_version, gssencmode, krbsrvname, gsslib, target_session_attrs, use_remote_estimate, fdw_startup_cost, fdw_tuple_cost, extensions, updatable, truncatable, fetch_size, batch_size, async_capable, keep_connections
 CONTEXT:  SQL statement "ALTER SERVER loopback_nopw OPTIONS (ADD password 'dummypw')"
 PL/pgSQL function inline_code_block line 3 at EXECUTE
 -- If we add a password for our user mapping instead, we should get a different
diff --git a/contrib/postgres_fdw/option.c b/contrib/postgres_fdw/option.c
index f1d0c8bd41..fc196fabc0 100644
--- a/contrib/postgres_fdw/option.c
+++ b/contrib/postgres_fdw/option.c
@@ -109,6 +109,7 @@ postgres_fdw_validator(PG_FUNCTION_ARGS)
 		if (strcmp(def->defname, "use_remote_estimate") == 0 ||
 			strcmp(def->defname, "updatable") == 0 ||
 			strcmp(def->defname, "async_capable") == 0 ||
+			strcmp(def->defname, "truncatable") == 0 ||
 			strcmp(def->defname, "keep_connections") == 0)
 		{
 			/* these accept only boolean values */
@@ -213,6 +214,9 @@ InitPgFdwOptions(void)
 		/* updatable is available on both server and table */
 		{"updatable", ForeignServerRelationId, false},
 		{"updatable", ForeignTableRelationId, false},
+		/* truncatable is available on both server and table */
+		{"truncatable", ForeignServerRelationId, false},
+		{"truncatable", ForeignTableRelationId, false},
 		/* fetch_size is available on both server and table */
 		{"fetch_size", ForeignServerRelationId, false},
 		{"fetch_size", ForeignTableRelationId, false},
diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c
index 16c2979f2d..6fa20834a4 100644
--- a/contrib/postgres_fdw/postgres_fdw.c
+++ b/contrib/postgres_fdw/postgres_fdw.c
@@ -400,6 +400,10 @@ static void postgresExplainForeignModify(ModifyTableState *mtstate,
 										 ExplainState *es);
 static void postgresExplainDirectModify(ForeignScanState *node,
 										ExplainState *es);
+static void postgresExecForeignTruncate(List *frels_list,
+										List *frels_extra,
+										DropBehavior behavior,
+										bool restart_seqs);
 static bool postgresAnalyzeForeignTable(Relation relation,
 										AcquireSampleRowsFunc *func,
 										BlockNumber *totalpages);
@@ -588,6 +592,9 @@ postgres_fdw_handler(PG_FUNCTION_ARGS)
 	routine->ExplainForeignModify = postgresExplainForeignModify;
 	routine->ExplainDirectModify = postgresExplainDirectModify;
 
+	/* Support function for TRUNCATE */
+	routine->ExecForeignTruncate = postgresExecForeignTruncate;
+
 	/* Support functions for ANALYZE */
 	routine->AnalyzeForeignTable = postgresAnalyzeForeignTable;
 
@@ -2868,6 +2875,90 @@ postgresExplainDirectModify(ForeignScanState *node, ExplainState *es)
 	}
 }
 
+/*
+ * postgresExecForeignTruncate
+ *		It propagates TRUNCATE command to the remote host inside of the
+ *		transaction block.
+ */
+static void
+postgresExecForeignTruncate(List *frels_list,
+							List *frels_extra,
+							DropBehavior behavior,
+							bool restart_seqs)
+{
+	Oid				server_id = InvalidOid;
+	ForeignServer  *serv = NULL;
+	UserMapping	   *user = NULL;
+	PGconn		   *conn = NULL;
+	PGresult	   *res;
+	StringInfoData	sql;
+	ListCell	   *lc;
+	bool			server_truncatable = true;
+
+	/* pick up remote connection, and sanity checks */
+	foreach (lc, frels_list)
+	{
+		Relation	frel = lfirst(lc);
+		Oid			frel_oid = RelationGetRelid(frel);
+		ForeignTable *ft = GetForeignTable(frel_oid);
+		ListCell   *cell;
+		bool		truncatable;
+
+		if (!OidIsValid(server_id))
+		{
+			server_id = GetForeignServerIdByRelId(frel_oid);
+			serv = GetForeignServer(server_id);
+			user = GetUserMapping(GetUserId(), server_id);
+			conn = GetConnection(user, false,NULL);
+
+			foreach (cell, serv->options)
+			{
+				DefElem	   *defel = (DefElem *) lfirst(cell);
+
+				if (strcmp(defel->defname, "truncatable") == 0)
+				{
+					server_truncatable = defGetBoolean(defel);
+					break;
+				}
+			}
+		}
+		else
+		{
+			/* postgresExecForeignTruncate() is invoked for each server */
+			Assert(server_id == GetForeignServerIdByRelId(frel_oid));
+		}
+
+		/* ensure the target foreign table is truncatable */
+		truncatable = server_truncatable;
+		foreach (cell, ft->options)
+		{
+			DefElem	   *defel = (DefElem *) lfirst(cell);
+
+			if (strcmp(defel->defname, "truncatable") == 0)
+			{
+				truncatable = defGetBoolean(defel);
+				break;
+			}
+		}
+		if (!truncatable)
+			ereport(ERROR,
+					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+					 errmsg("truncate on \"%s\" is prohibited",
+							RelationGetRelationName(frel))));
+	}
+	/* set up remote query */
+	initStringInfo(&sql);
+	deparseTruncateSql(&sql, frels_list, frels_extra, behavior, restart_seqs);
+	/* run remote query */
+	if (!PQsendQuery(conn, sql.data))
+		pgfdw_report_error(ERROR, NULL, conn, false, sql.data);
+	res = pgfdw_get_result(conn, sql.data);
+	if (PQresultStatus(res) != PGRES_COMMAND_OK)
+		pgfdw_report_error(ERROR, res, conn, true, sql.data);
+	/* clean-up */
+	PQclear(res);
+	pfree(sql.data);
+}
 
 /*
  * estimate_path_cost_size
diff --git a/contrib/postgres_fdw/postgres_fdw.h b/contrib/postgres_fdw/postgres_fdw.h
index 88d94da6f6..68599892f8 100644
--- a/contrib/postgres_fdw/postgres_fdw.h
+++ b/contrib/postgres_fdw/postgres_fdw.h
@@ -206,6 +206,11 @@ extern void deparseDirectDeleteSql(StringInfo buf, PlannerInfo *root,
 extern void deparseAnalyzeSizeSql(StringInfo buf, Relation rel);
 extern void deparseAnalyzeSql(StringInfo buf, Relation rel,
 							  List **retrieved_attrs);
+extern void deparseTruncateSql(StringInfo buf,
+							   List *frels_list,
+							   List *frels_extra,
+							   DropBehavior behavior,
+							   bool restart_seqs);
 extern void deparseStringLiteral(StringInfo buf, const char *val);
 extern Expr *find_em_expr_for_rel(EquivalenceClass *ec, RelOptInfo *rel);
 extern Expr *find_em_expr_for_input_target(PlannerInfo *root,
diff --git a/contrib/postgres_fdw/sql/postgres_fdw.sql b/contrib/postgres_fdw/sql/postgres_fdw.sql
index 107d1c0e03..4e620f67ff 100644
--- a/contrib/postgres_fdw/sql/postgres_fdw.sql
+++ b/contrib/postgres_fdw/sql/postgres_fdw.sql
@@ -2351,6 +2351,101 @@ select * from rem3;
 drop foreign table rem3;
 drop table loc3;
 
+-- ===================================================================
+-- test for TRUNCATE
+-- ===================================================================
+CREATE TABLE tru_rtable0 (id int primary key, x text);
+CREATE TABLE tru_rtable1 (id int primary key, y text);
+CREATE FOREIGN TABLE tru_ftable (id int, x text)
+       SERVER loopback OPTIONS (table_name 'tru_rtable0');
+INSERT INTO tru_rtable0 (SELECT x,md5(x::text) FROM generate_series(1,10) x);
+
+CREATE TABLE tru_ptable (id int, y text) PARTITION BY HASH(id);
+CREATE TABLE tru_ptable__p0 PARTITION OF tru_ptable
+                            FOR VALUES WITH (MODULUS 2, REMAINDER 0);
+CREATE FOREIGN TABLE tru_ftable__p1 PARTITION OF tru_ptable
+                                    FOR VALUES WITH (MODULUS 2, REMAINDER 1)
+       SERVER loopback OPTIONS (table_name 'tru_rtable1');
+INSERT INTO tru_ptable (SELECT x,md5(x::text) FROM generate_series(11,20) x);
+
+CREATE TABLE tru_pk_table(id int primary key, z text);
+CREATE TABLE tru_fk_table(fkey int references tru_pk_table(id));
+INSERT INTO tru_pk_table (SELECT x,md5((x+1)::text) FROM generate_series(1,10) x);
+INSERT INTO tru_fk_table (SELECT x % 10 + 1 FROM generate_series(5,25) x);
+CREATE FOREIGN TABLE tru_pk_ftable (id int, z text)
+       SERVER loopback OPTIONS (table_name 'tru_pk_table');
+
+CREATE TABLE tru_rtable_parent (id int, a text);
+CREATE TABLE tru_rtable_child (id int, a text);
+CREATE FOREIGN TABLE tru_ftable_parent (id int, a text)
+       SERVER loopback OPTIONS (table_name 'tru_rtable_parent');
+CREATE FOREIGN TABLE tru_ftable_child () INHERITS (tru_ftable_parent)
+       SERVER loopback OPTIONS (table_name 'tru_rtable_child');
+INSERT INTO tru_rtable_parent (SELECT x, md5(x::text) FROM generate_series(1,8) x);
+INSERT INTO tru_rtable_child  (SELECT x, md5(x::text) FROM generate_series(10, 18) x);
+
+-- normal truncate
+SELECT * FROM tru_ftable ORDER BY id;
+TRUNCATE tru_ftable;
+SELECT * FROM tru_rtable0;		-- empty
+SELECT * FROM tru_ftable;		-- empty
+
+-- 'truncatable' option
+ALTER SERVER loopback OPTIONS (ADD truncatable 'false');
+TRUNCATE tru_ftable;			-- error
+ALTER SERVER loopback OPTIONS (DROP truncatable);
+ALTER FOREIGN TABLE tru_ftable OPTIONS (ADD truncatable 'false');
+TRUNCATE tru_ftable;			-- error
+ALTER FOREIGN TABLE tru_ftable OPTIONS (SET truncatable 'true');
+TRUNCATE tru_ftable;			-- accepted
+
+-- partition table mixtured by table and foreign table
+SELECT * FROM tru_ptable ORDER BY id;
+TRUNCATE tru_ptable;
+SELECT * FROM tru_ptable;		-- empty
+SELECT * FROM tru_ptable__p0;	-- empty
+SELECT * FROM tru_ftable__p1;	-- empty
+SELECT * FROM tru_rtable1;		-- empty
+
+-- 'CASCADE' option
+SELECT * FROM tru_pk_ftable ORDER BY id;
+TRUNCATE tru_pk_ftable;	-- failed by FK reference
+TRUNCATE tru_pk_ftable CASCADE;
+SELECT * FROM tru_pk_ftable;    -- empty
+SELECT * FROM tru_fk_table;		-- also truncated
+
+-- truncate two tables at a command
+INSERT INTO tru_ftable (SELECT x,md5((x+2)::text) FROM generate_series(1,8) x);
+INSERT INTO tru_pk_ftable (SELECT x,md5((x+3)::text) FROM generate_series(3,10) x);
+SELECT * FROM tru_ftable a FULL OUTER JOIN tru_pk_ftable b ON a.id = b.id ORDER BY a.id;
+TRUNCATE tru_ftable, tru_pk_ftable CASCADE;
+SELECT * FROM tru_ftable a FULL OUTER JOIN tru_pk_ftable b ON a.id = b.id ORDER BY a.id;  -- empty
+
+-- truncate with ONLY clause
+TRUNCATE ONLY tru_ftable_parent;
+SELECT * FROM tru_ftable_parent ORDER BY id;
+TRUNCATE tru_ftable_parent;
+SELECT * FROM tru_ftable_parent; -- empty
+
+-- in case when remote table has inherited children
+CREATE TABLE tru_rtable0_child () INHERITS (tru_rtable0);
+INSERT INTO tru_rtable0 (SELECT x,md5(x::text) FROM generate_series(5,9) x);
+INSERT INTO tru_rtable0_child (SELECT x,md5(x::text) FROM generate_series(10,14) x);
+SELECT * FROM tru_ftable ORDER BY id;
+
+TRUNCATE ONLY tru_ftable;		-- truncate only parent portion
+SELECT * FROM tru_ftable ORDER BY id;
+
+INSERT INTO tru_rtable0 (SELECT x,md5(x::text) FROM generate_series(21,25) x);
+SELECT * FROM tru_ftable;
+TRUNCATE tru_ftable;			-- truncate both of parent and child
+SELECT * FROM tru_ftable;    -- empty
+
+-- cleanup
+DROP FOREIGN TABLE tru_ftable_parent, tru_ftable_child, tru_pk_ftable,tru_ftable__p1,tru_ftable;
+DROP TABLE tru_rtable0, tru_rtable1, tru_ptable, tru_ptable__p0, tru_pk_table, tru_fk_table, 
+tru_rtable_parent,tru_rtable_child, tru_rtable0_child;
+
 -- ===================================================================
 -- test IMPORT FOREIGN SCHEMA
 -- ===================================================================
diff --git a/doc/src/sgml/fdwhandler.sgml b/doc/src/sgml/fdwhandler.sgml
index 0f2397df49..97eea600cc 100644
--- a/doc/src/sgml/fdwhandler.sgml
+++ b/doc/src/sgml/fdwhandler.sgml
@@ -1065,6 +1065,50 @@ EndDirectModify(ForeignScanState *node);
 
    </sect2>
 
+   <sect2 id="fdw-callbacks-truncate">
+    <title>FDW Routines for Truncate</title>
+<programlisting>
+void
+ExecForeignTruncate(List *frels_list, List *frels_extra,
+                    DropBehavior behavior, bool restart_seqs);
+</programlisting>
+    <para>
+     Truncate a set of foreign tables defined by
+     <literal>frels_list</literal> belonging to the same foreign server.
+     This optional function is called during execution of
+     <command>TRUNCATE</command> for each foreign server involved
+     in one <command>TRUNCATE</command> command (note that invocations
+     are not per foreign table).
+
+     <literal>frels_extra</literal> delivers extra information about
+     the context where the foreign tables are truncated. It is a list of integers and has same length with
+     <literal>frels_list</literal>. <literal>TRUNCATE_REL_CONTEXT_NORMAL</literal> means that
+     the foreign table is specified WITHOUT "ONLY" clause, and <literal>TRUNCATE_REL_CONTEXT_ONLY</literal> means
+     specified WITH "ONLY" clause. <literal>TRUNCATE_REL_CONTEXT_CASCADING</literal> 
+     values means that foreign tables are not specified in the <command>TRUNCATE</command>, 
+     but truncated due to dependency(like partition's child leaf).
+    </para>
+
+    <para>
+     If the <function>ExecForeignTruncate</function> pointer is set to
+     <literal>NULL</literal>, attempts to truncate the foreign table will
+     fail with an error message.
+    </para>
+    
+    <para>
+     <literal>behavior</literal> defines how foreign tables should
+     be truncated, using as possible values <literal>DROP_RESTRICT</literal>
+     and <literal>DROP_CASCADE</literal> (to map with the equivalents of
+     <command>TRUNCATE</command>).
+    </para>
+    
+    <para>
+     <literal>restart_seqs</literal> is set to <literal>true</literal>
+     if <literal>RESTART IDENTITY</literal> was supplied in the
+     <command>TRUNCATE</command>.
+    </para>
+   </sect2>
+
    <sect2 id="fdw-callbacks-row-locking">
     <title>FDW Routines for Row Locking</title>
 
diff --git a/doc/src/sgml/postgres-fdw.sgml b/doc/src/sgml/postgres-fdw.sgml
index a7c695b000..1092160603 100644
--- a/doc/src/sgml/postgres-fdw.sgml
+++ b/doc/src/sgml/postgres-fdw.sgml
@@ -63,9 +63,10 @@
  <para>
   Now you need only <command>SELECT</command> from a foreign table to access
   the data stored in its underlying remote table.  You can also modify
-  the remote table using <command>INSERT</command>, <command>UPDATE</command>, or
-  <command>DELETE</command>.  (Of course, the remote user you have specified
-  in your user mapping must have privileges to do these things.)
+  the remote table using <command>INSERT</command>, <command>UPDATE</command>,
+  <command>DELETE</command>, or <command>TRUNCATE</command>.
+  (Of course, the remote user you have specified in your user mapping must
+  have privileges to do these things.)
  </para>
 
  <para>
@@ -404,7 +405,7 @@ OPTIONS (ADD password_required 'false');
 
    <para>
     By default all foreign tables using <filename>postgres_fdw</filename> are assumed
-    to be updatable.  This may be overridden using the following option:
+    to be updatable and truncatable.  This may be overridden using the following options:
    </para>
 
    <variablelist>
@@ -433,6 +434,29 @@ OPTIONS (ADD password_required 'false');
      </listitem>
     </varlistentry>
 
+    <varlistentry>
+     <term><literal>truncatable</literal></term>
+     <listitem>
+      <para>
+       This option controls whether <filename>postgres_fdw</filename> allows
+       foreign tables to be truncated using <command>TRUNCATE</command>
+       command. It can be specified for a foreign table or a foreign server.
+       A table-level option overrides a server-level option.
+       The default is <literal>true</literal>.
+      </para>
+      <para>
+       Pay attention for the case when a foreign table maps remote table
+       that has inherited children or partition leafs.
+       <command>TRUNCATE</command> specifies the foreign tables with
+       <literal>ONLY</literal> clause, remove queries over the
+       <filename>postgres_fdw</filename> also specify remote tables with
+       <literal>ONLY</literal> clause, that will truncate only parent
+       portion of the remote table. In the results, it looks like
+       <command>TRUNCATE</command> command partially eliminated contents
+       of the foreign tables.
+      </para>
+     </listitem>
+    </varlistentry>
    </variablelist>
   </sect3>
 
diff --git a/doc/src/sgml/ref/psql-ref.sgml b/doc/src/sgml/ref/psql-ref.sgml
index 0ba1e2d44d..01ec9b8b0a 100644
--- a/doc/src/sgml/ref/psql-ref.sgml
+++ b/doc/src/sgml/ref/psql-ref.sgml
@@ -1970,9 +1970,7 @@ testdb=&gt;
         </para>
 
         <para>
-        If you edit a file or the previous query, and you quit the editor without
-        modifying the file, the query buffer is cleared.
-        Otherwise, the new contents of the query buffer are re-parsed according to
+        The new contents of the query buffer are then re-parsed according to
         the normal rules of <application>psql</application>, treating the
         whole buffer as a single line.  Any complete queries are immediately
         executed; that is, if the query buffer contains or ends with a
@@ -2041,8 +2039,7 @@ Tue Oct 26 21:40:57 CEST 1999
          in the form of a <command>CREATE OR REPLACE FUNCTION</command> or
          <command>CREATE OR REPLACE PROCEDURE</command> command.
          Editing is done in the same way as for <literal>\edit</literal>.
-         If you quit the editor without saving, the statement is discarded.
-         If you save and exit the editor, the updated command is executed immediately
+         After the editor exits, the updated command is executed immediately
          if you added a semicolon to it.  Otherwise it is redisplayed;
          type semicolon or <literal>\g</literal> to send it, or <literal>\r</literal>
          to cancel.
@@ -2118,8 +2115,7 @@ Tue Oct 26 21:40:57 CEST 1999
          This command fetches and edits the definition of the named view,
          in the form of a <command>CREATE OR REPLACE VIEW</command> command.
          Editing is done in the same way as for <literal>\edit</literal>.
-         If you quit the editor without saving, the statement is discarded.
-         If you save and exit the editor, the updated command is executed immediately
+         After the editor exits, the updated command is executed immediately
          if you added a semicolon to it.  Otherwise it is redisplayed;
          type semicolon or <literal>\g</literal> to send it, or <literal>\r</literal>
          to cancel.
diff --git a/doc/src/sgml/ref/truncate.sgml b/doc/src/sgml/ref/truncate.sgml
index 91cdac5562..d9f97561a5 100644
--- a/doc/src/sgml/ref/truncate.sgml
+++ b/doc/src/sgml/ref/truncate.sgml
@@ -172,9 +172,8 @@ TRUNCATE [ TABLE ] [ ONLY ] <replaceable class="parameter">name</replaceable> [
   </para>
 
   <para>
-   <command>TRUNCATE</command> is not currently supported for foreign tables.
-   This implies that if a specified table has any descendant tables that are
-   foreign, the command will fail.
+    <command>TRUNCATE</command> can be used for foreign tables if
+     the foreign data wrapper supports, for instance, see <xref linkend="postgres-fdw"/>.
   </para>
  </refsect1>
 
diff --git a/src/backend/bootstrap/bootstrap.c b/src/backend/bootstrap/bootstrap.c
index 174727b501..dce7088e6b 100644
--- a/src/backend/bootstrap/bootstrap.c
+++ b/src/backend/bootstrap/bootstrap.c
@@ -407,11 +407,8 @@ AuxiliaryProcessMain(int argc, char *argv[])
 		 */
 		CreateAuxProcessResourceOwner();
 
-		/* Initialize statistics reporting */
-		pgstat_initialize();
-
 		/* Initialize backend status information */
-		pgstat_beinit();
+		pgstat_initialize();
 		pgstat_bestart();
 
 		/* register a before-shutdown callback for LWLock cleanup */
diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c
index 88a68a4697..05bcb12d1d 100644
--- a/src/backend/commands/tablecmds.c
+++ b/src/backend/commands/tablecmds.c
@@ -59,6 +59,7 @@
 #include "commands/typecmds.h"
 #include "commands/user.h"
 #include "executor/executor.h"
+#include "foreign/fdwapi.h"
 #include "foreign/foreign.h"
 #include "miscadmin.h"
 #include "nodes/makefuncs.h"
@@ -310,6 +311,21 @@ struct DropRelationCallbackState
 #define		ATT_FOREIGN_TABLE		0x0020
 #define		ATT_PARTITIONED_INDEX	0x0040
 
+/*
+ * ForeignTruncateInfo
+ *
+ * Information related to truncation of foreign tables.  This is used for
+ * the elements in a hash table. It uses the server OID as lookup key,
+ * and includes a per-server list of all foreign tables involved in the
+ * truncation.
+ */
+typedef struct ForeignTruncateInfo
+{
+	Oid			server_oid;
+	List	   *frels_list;
+	List	   *frels_extra;
+} ForeignTruncateInfo;
+
 /*
  * Partition tables are expected to be dropped when the parent partitioned
  * table gets dropped. Hence for partitioning we use AUTO dependency.
@@ -1588,18 +1604,21 @@ RangeVarCallbackForDropRelation(const RangeVar *rel, Oid relOid, Oid oldRelOid,
  *		Executes a TRUNCATE command.
  *
  * This is a multi-relation truncate.  We first open and grab exclusive
- * lock on all relations involved, checking permissions and otherwise
- * verifying that the relation is OK for truncation.  In CASCADE mode,
+ * lock on all relations involved, checking permissions (local database
+ * ACLs even if relations are foreign tables) and otherwise verifying
+ * that the relation is OK for truncation. In CASCADE mode,
  * relations having FK references to the targeted relations are automatically
  * added to the group; in RESTRICT mode, we check that all FK references are
  * internal to the group that's being truncated.  Finally all the relations
- * are truncated and reindexed.
+ * are truncated and reindexed. If any foreign tables are involved, 
+ * its callback shall be invoked prior to the truncation of regular tables.
  */
 void
 ExecuteTruncate(TruncateStmt *stmt)
 {
 	List	   *rels = NIL;
 	List	   *relids = NIL;
+	List	   *relids_extra = NIL;
 	List	   *relids_logged = NIL;
 	ListCell   *cell;
 
@@ -1636,8 +1655,9 @@ ExecuteTruncate(TruncateStmt *stmt)
 
 		rels = lappend(rels, rel);
 		relids = lappend_oid(relids, myrelid);
+		relids_extra = lappend_int(relids_extra, (recurse ? TRUNCATE_REL_CONTEXT_NORMAL : TRUNCATE_REL_CONTEXT_ONLY));
 		/* Log this relation only if needed for logical decoding */
-		if (RelationIsLogicallyLogged(rel))
+		if (RelationIsLogicallyLogged(rel) && rel->rd_rel->relkind != RELKIND_FOREIGN_TABLE)
 			relids_logged = lappend_oid(relids_logged, myrelid);
 
 		if (recurse)
@@ -1683,8 +1703,9 @@ ExecuteTruncate(TruncateStmt *stmt)
 
 				rels = lappend(rels, rel);
 				relids = lappend_oid(relids, childrelid);
+				relids_extra = lappend_int(relids_extra, TRUNCATE_REL_CONTEXT_CASCADING);
 				/* Log this relation only if needed for logical decoding */
-				if (RelationIsLogicallyLogged(rel))
+				if (RelationIsLogicallyLogged(rel) && rel->rd_rel->relkind != RELKIND_FOREIGN_TABLE)
 					relids_logged = lappend_oid(relids_logged, childrelid);
 			}
 		}
@@ -1695,7 +1716,7 @@ ExecuteTruncate(TruncateStmt *stmt)
 					 errhint("Do not specify the ONLY keyword, or use TRUNCATE ONLY on the partitions directly.")));
 	}
 
-	ExecuteTruncateGuts(rels, relids, relids_logged,
+	ExecuteTruncateGuts(rels, relids, relids_extra, relids_logged,
 						stmt->behavior, stmt->restart_seqs);
 
 	/* And close the rels */
@@ -1721,16 +1742,21 @@ ExecuteTruncate(TruncateStmt *stmt)
  * this information handy in this form.
  */
 void
-ExecuteTruncateGuts(List *explicit_rels, List *relids, List *relids_logged,
+ExecuteTruncateGuts(List *explicit_rels,
+					List *relids,
+					List *relids_extra,
+					List *relids_logged,
 					DropBehavior behavior, bool restart_seqs)
 {
 	List	   *rels;
 	List	   *seq_relids = NIL;
+	HTAB	   *ft_htab = NULL;
 	EState	   *estate;
 	ResultRelInfo *resultRelInfos;
 	ResultRelInfo *resultRelInfo;
 	SubTransactionId mySubid;
 	ListCell   *cell;
+	ListCell   *lc1, *lc2;
 	Oid		   *logrelids;
 
 	/*
@@ -1768,8 +1794,9 @@ ExecuteTruncateGuts(List *explicit_rels, List *relids, List *relids_logged,
 				truncate_check_activity(rel);
 				rels = lappend(rels, rel);
 				relids = lappend_oid(relids, relid);
+				relids_extra = lappend_int(relids_extra, TRUNCATE_REL_CONTEXT_CASCADING);
 				/* Log this relation only if needed for logical decoding */
-				if (RelationIsLogicallyLogged(rel))
+				if (RelationIsLogicallyLogged(rel) && rel->rd_rel->relkind != RELKIND_FOREIGN_TABLE)
 					relids_logged = lappend_oid(relids_logged, relid);
 			}
 		}
@@ -1868,14 +1895,68 @@ ExecuteTruncateGuts(List *explicit_rels, List *relids, List *relids_logged,
 	 */
 	mySubid = GetCurrentSubTransactionId();
 
-	foreach(cell, rels)
+	Assert(list_length(rels) == list_length(relids_extra));
+	forboth (lc1, rels,
+			 lc2, relids_extra)
 	{
-		Relation	rel = (Relation) lfirst(cell);
+		Relation	rel = (Relation) lfirst(lc1);
+		int			extra = lfirst_int(lc2);
 
 		/* Skip partitioned tables as there is nothing to do */
 		if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
 			continue;
 
+		/*
+		 * If truncating a foreign table, the foreign data wrapper callback
+		 * for TRUNCATE is called once for each server with a list of all the
+		 * relations to process linked to this server.  The list of relations
+		 * for each server is saved as a single entry in a hash table that
+		 * uses the server OID as lookup key.  Once the full set of lists is
+		 * built, all the entries of the hash table are scanned, and the list
+		 * of relations associated with the server is passed down to the
+		 * TRUNCATE callback of its foreign data wrapper.
+		 */
+		if (rel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
+		{
+			Oid			frel_oid = RelationGetRelid(rel);
+			Oid			server_oid = GetForeignServerIdByRelId(frel_oid);
+			bool		found;
+			ForeignTruncateInfo *ft_info;
+
+			/* if the hash table does not exist yet, initialize it */
+			if (!ft_htab)
+			{
+				HASHCTL		hctl;
+
+				memset(&hctl, 0, sizeof(HASHCTL));
+				hctl.keysize = sizeof(Oid);
+				hctl.entrysize = sizeof(ForeignTruncateInfo);
+				hctl.hcxt = CurrentMemoryContext;
+
+				ft_htab = hash_create("TRUNCATE for Foreign Tables",
+									  32,	/* start small and extend */
+									  &hctl,
+									  HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
+			}
+
+			/*
+			 * Look after the entry of the server in the hash table, and
+			 * initialize it if the entry does not exist yet.
+			 */
+			ft_info = hash_search(ft_htab, &server_oid, HASH_ENTER, &found);
+			if (!found)
+			{
+				ft_info->server_oid = server_oid;
+				ft_info->frels_list = NIL;
+				ft_info->frels_extra = NIL;
+			}
+
+			/* save the relation in the list */
+			ft_info->frels_list = lappend(ft_info->frels_list, rel);
+			ft_info->frels_extra = lappend_int(ft_info->frels_extra, extra);
+			continue;
+		}
+
 		/*
 		 * Normally, we need a transaction-safe truncation here.  However, if
 		 * the table was either created in the current (sub)transaction or has
@@ -1938,6 +2019,31 @@ ExecuteTruncateGuts(List *explicit_rels, List *relids, List *relids_logged,
 		pgstat_count_truncate(rel);
 	}
 
+	/*
+	 * Now go through the hash table, and process each entry associated to the
+	 * servers involved in the TRUNCATE.
+	 */
+	if (ft_htab)
+	{
+		ForeignTruncateInfo *ft_info;
+		HASH_SEQ_STATUS seq;
+
+		hash_seq_init(&seq, ft_htab);
+
+		while ((ft_info = hash_seq_search(&seq)) != NULL)
+		{
+			FdwRoutine *routine = GetFdwRoutineByServerId(ft_info->server_oid);
+
+			/* truncate_check_rel() has checked that already */
+			Assert(routine->ExecForeignTruncate != NULL);
+
+			routine->ExecForeignTruncate(ft_info->frels_list,
+										 ft_info->frels_extra,
+										 behavior,
+										 restart_seqs);
+		}
+	}
+
 	/*
 	 * Restart owned sequences if we were asked to.
 	 */
@@ -2023,12 +2129,24 @@ truncate_check_rel(Oid relid, Form_pg_class reltuple)
 	char	   *relname = NameStr(reltuple->relname);
 
 	/*
-	 * Only allow truncate on regular tables and partitioned tables (although,
-	 * the latter are only being included here for the following checks; no
-	 * physical truncation will occur in their case.)
+	 * Only allow truncate on regular tables, foreign tables using foreign
+	 * data wrappers supporting TRUNCATE and partitioned tables (although, the
+	 * latter are only being included here for the following checks; no
+	 * physical truncation will occur in their case.).
 	 */
-	if (reltuple->relkind != RELKIND_RELATION &&
-		reltuple->relkind != RELKIND_PARTITIONED_TABLE)
+	if (reltuple->relkind == RELKIND_FOREIGN_TABLE)
+	{
+		Oid			server_id = GetForeignServerIdByRelId(relid);
+		FdwRoutine *fdwroutine = GetFdwRoutineByServerId(server_id);
+
+		if (!fdwroutine->ExecForeignTruncate)
+			ereport(ERROR,
+					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+					 errmsg("cannot truncate foreign table \"%s\"",
+							relname)));
+	}
+	else if (reltuple->relkind != RELKIND_RELATION &&
+			 reltuple->relkind != RELKIND_PARTITIONED_TABLE)
 		ereport(ERROR,
 				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
 				 errmsg("\"%s\" is not a table", relname)));
diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c
index 4c4b072068..498d6ee123 100644
--- a/src/backend/postmaster/pgstat.c
+++ b/src/backend/postmaster/pgstat.c
@@ -46,6 +46,7 @@
 #include "libpq/pqsignal.h"
 #include "mb/pg_wchar.h"
 #include "miscadmin.h"
+#include "pg_trace.h"
 #include "pgstat.h"
 #include "postmaster/autovacuum.h"
 #include "postmaster/fork_process.h"
@@ -62,6 +63,8 @@
 #include "storage/pg_shmem.h"
 #include "storage/proc.h"
 #include "storage/procsignal.h"
+#include "storage/sinvaladt.h"
+#include "utils/ascii.h"
 #include "utils/guc.h"
 #include "utils/memutils.h"
 #include "utils/ps_status.h"
@@ -105,12 +108,26 @@
 #define PGSTAT_FUNCTION_HASH_SIZE	512
 
 
+/* ----------
+ * Total number of backends including auxiliary
+ *
+ * We reserve a slot for each possible BackendId, plus one for each
+ * possible auxiliary process type.  (This scheme assumes there is not
+ * more than one of any auxiliary process type at a time.) MaxBackends
+ * includes autovacuum workers and background workers as well.
+ * ----------
+ */
+#define NumBackendStatSlots (MaxBackends + NUM_AUXPROCTYPES)
+
+
 /* ----------
  * GUC parameters
  * ----------
  */
+bool		pgstat_track_activities = false;
 bool		pgstat_track_counts = false;
 int			pgstat_track_functions = TRACK_FUNC_OFF;
+int			pgstat_track_activity_query_size = 1024;
 
 /* ----------
  * Built from GUC parameter
@@ -242,8 +259,8 @@ static int	pgStatXactCommit = 0;
 static int	pgStatXactRollback = 0;
 PgStat_Counter pgStatBlockReadTime = 0;
 PgStat_Counter pgStatBlockWriteTime = 0;
-PgStat_Counter pgStatActiveTime = 0;
-PgStat_Counter pgStatTransactionIdleTime = 0;
+static PgStat_Counter pgStatActiveTime = 0;
+static PgStat_Counter pgStatTransactionIdleTime = 0;
 SessionEndType pgStatSessionEndCause = DISCONNECT_NORMAL;
 
 /* Record that's written to 2PC state file when pgstat state is persisted */
@@ -266,6 +283,12 @@ typedef struct TwoPhasePgStatRecord
 static MemoryContext pgStatLocalContext = NULL;
 static HTAB *pgStatDBHash = NULL;
 
+/* Status for backends including auxiliary */
+static LocalPgBackendStatus *localBackendStatusTable = NULL;
+
+/* Total number of backends including auxiliary */
+static int	localNumBackends = 0;
+
 /*
  * Cluster wide statistics, kept in the stats collector.
  * Contains statistics that are not collected per database
@@ -302,6 +325,7 @@ static pid_t pgstat_forkexec(void);
 #endif
 
 NON_EXEC_STATIC void PgstatCollectorMain(int argc, char *argv[]) pg_attribute_noreturn();
+static void pgstat_beshutdown_hook(int code, Datum arg);
 
 static PgStat_StatDBEntry *pgstat_get_db_entry(Oid databaseid, bool create);
 static PgStat_StatTabEntry *pgstat_get_tab_entry(PgStat_StatDBEntry *dbentry,
@@ -311,6 +335,7 @@ static void pgstat_write_db_statsfile(PgStat_StatDBEntry *dbentry, bool permanen
 static HTAB *pgstat_read_statsfiles(Oid onlydb, bool permanent, bool deep);
 static void pgstat_read_db_statsfile(Oid databaseid, HTAB *tabhash, HTAB *funchash, bool permanent);
 static void backend_read_statsfile(void);
+static void pgstat_read_current_status(void);
 
 static bool pgstat_write_statsfile_needed(void);
 static bool pgstat_db_requested(Oid databaseid);
@@ -2732,6 +2757,65 @@ pgstat_fetch_stat_funcentry(Oid func_id)
 }
 
 
+/* ----------
+ * pgstat_fetch_stat_beentry() -
+ *
+ *	Support function for the SQL-callable pgstat* functions. Returns
+ *	our local copy of the current-activity entry for one backend.
+ *
+ *	NB: caller is responsible for a check if the user is permitted to see
+ *	this info (especially the querystring).
+ * ----------
+ */
+PgBackendStatus *
+pgstat_fetch_stat_beentry(int beid)
+{
+	pgstat_read_current_status();
+
+	if (beid < 1 || beid > localNumBackends)
+		return NULL;
+
+	return &localBackendStatusTable[beid - 1].backendStatus;
+}
+
+
+/* ----------
+ * pgstat_fetch_stat_local_beentry() -
+ *
+ *	Like pgstat_fetch_stat_beentry() but with locally computed additions (like
+ *	xid and xmin values of the backend)
+ *
+ *	NB: caller is responsible for a check if the user is permitted to see
+ *	this info (especially the querystring).
+ * ----------
+ */
+LocalPgBackendStatus *
+pgstat_fetch_stat_local_beentry(int beid)
+{
+	pgstat_read_current_status();
+
+	if (beid < 1 || beid > localNumBackends)
+		return NULL;
+
+	return &localBackendStatusTable[beid - 1];
+}
+
+
+/* ----------
+ * pgstat_fetch_stat_numbackends() -
+ *
+ *	Support function for the SQL-callable pgstat* functions. Returns
+ *	the maximum current backend id.
+ * ----------
+ */
+int
+pgstat_fetch_stat_numbackends(void)
+{
+	pgstat_read_current_status();
+
+	return localNumBackends;
+}
+
 /*
  * ---------
  * pgstat_fetch_stat_archiver() -
@@ -2815,16 +2899,421 @@ pgstat_fetch_replslot(int *nslots_p)
 	return replSlotStats;
 }
 
+/* ------------------------------------------------------------
+ * Functions for management of the shared-memory PgBackendStatus array
+ * ------------------------------------------------------------
+ */
+
+static PgBackendStatus *BackendStatusArray = NULL;
+static PgBackendStatus *MyBEEntry = NULL;
+static char *BackendAppnameBuffer = NULL;
+static char *BackendClientHostnameBuffer = NULL;
+static char *BackendActivityBuffer = NULL;
+static Size BackendActivityBufferSize = 0;
+#ifdef USE_SSL
+static PgBackendSSLStatus *BackendSslStatusBuffer = NULL;
+#endif
+#ifdef ENABLE_GSS
+static PgBackendGSSStatus *BackendGssStatusBuffer = NULL;
+#endif
+
+
+/*
+ * Report shared-memory space needed by CreateSharedBackendStatus.
+ */
+Size
+BackendStatusShmemSize(void)
+{
+	Size		size;
+
+	/* BackendStatusArray: */
+	size = mul_size(sizeof(PgBackendStatus), NumBackendStatSlots);
+	/* BackendAppnameBuffer: */
+	size = add_size(size,
+					mul_size(NAMEDATALEN, NumBackendStatSlots));
+	/* BackendClientHostnameBuffer: */
+	size = add_size(size,
+					mul_size(NAMEDATALEN, NumBackendStatSlots));
+	/* BackendActivityBuffer: */
+	size = add_size(size,
+					mul_size(pgstat_track_activity_query_size, NumBackendStatSlots));
+#ifdef USE_SSL
+	/* BackendSslStatusBuffer: */
+	size = add_size(size,
+					mul_size(sizeof(PgBackendSSLStatus), NumBackendStatSlots));
+#endif
+#ifdef ENABLE_GSS
+	/* BackendGssStatusBuffer: */
+	size = add_size(size,
+					mul_size(sizeof(PgBackendGSSStatus), NumBackendStatSlots));
+#endif
+	return size;
+}
+
+/*
+ * Initialize the shared status array and several string buffers
+ * during postmaster startup.
+ */
+void
+CreateSharedBackendStatus(void)
+{
+	Size		size;
+	bool		found;
+	int			i;
+	char	   *buffer;
+
+	/* Create or attach to the shared array */
+	size = mul_size(sizeof(PgBackendStatus), NumBackendStatSlots);
+	BackendStatusArray = (PgBackendStatus *)
+		ShmemInitStruct("Backend Status Array", size, &found);
+
+	if (!found)
+	{
+		/*
+		 * We're the first - initialize.
+		 */
+		MemSet(BackendStatusArray, 0, size);
+	}
+
+	/* Create or attach to the shared appname buffer */
+	size = mul_size(NAMEDATALEN, NumBackendStatSlots);
+	BackendAppnameBuffer = (char *)
+		ShmemInitStruct("Backend Application Name Buffer", size, &found);
+
+	if (!found)
+	{
+		MemSet(BackendAppnameBuffer, 0, size);
+
+		/* Initialize st_appname pointers. */
+		buffer = BackendAppnameBuffer;
+		for (i = 0; i < NumBackendStatSlots; i++)
+		{
+			BackendStatusArray[i].st_appname = buffer;
+			buffer += NAMEDATALEN;
+		}
+	}
+
+	/* Create or attach to the shared client hostname buffer */
+	size = mul_size(NAMEDATALEN, NumBackendStatSlots);
+	BackendClientHostnameBuffer = (char *)
+		ShmemInitStruct("Backend Client Host Name Buffer", size, &found);
+
+	if (!found)
+	{
+		MemSet(BackendClientHostnameBuffer, 0, size);
+
+		/* Initialize st_clienthostname pointers. */
+		buffer = BackendClientHostnameBuffer;
+		for (i = 0; i < NumBackendStatSlots; i++)
+		{
+			BackendStatusArray[i].st_clienthostname = buffer;
+			buffer += NAMEDATALEN;
+		}
+	}
+
+	/* Create or attach to the shared activity buffer */
+	BackendActivityBufferSize = mul_size(pgstat_track_activity_query_size,
+										 NumBackendStatSlots);
+	BackendActivityBuffer = (char *)
+		ShmemInitStruct("Backend Activity Buffer",
+						BackendActivityBufferSize,
+						&found);
+
+	if (!found)
+	{
+		MemSet(BackendActivityBuffer, 0, BackendActivityBufferSize);
+
+		/* Initialize st_activity pointers. */
+		buffer = BackendActivityBuffer;
+		for (i = 0; i < NumBackendStatSlots; i++)
+		{
+			BackendStatusArray[i].st_activity_raw = buffer;
+			buffer += pgstat_track_activity_query_size;
+		}
+	}
+
+#ifdef USE_SSL
+	/* Create or attach to the shared SSL status buffer */
+	size = mul_size(sizeof(PgBackendSSLStatus), NumBackendStatSlots);
+	BackendSslStatusBuffer = (PgBackendSSLStatus *)
+		ShmemInitStruct("Backend SSL Status Buffer", size, &found);
+
+	if (!found)
+	{
+		PgBackendSSLStatus *ptr;
+
+		MemSet(BackendSslStatusBuffer, 0, size);
+
+		/* Initialize st_sslstatus pointers. */
+		ptr = BackendSslStatusBuffer;
+		for (i = 0; i < NumBackendStatSlots; i++)
+		{
+			BackendStatusArray[i].st_sslstatus = ptr;
+			ptr++;
+		}
+	}
+#endif
+
+#ifdef ENABLE_GSS
+	/* Create or attach to the shared GSSAPI status buffer */
+	size = mul_size(sizeof(PgBackendGSSStatus), NumBackendStatSlots);
+	BackendGssStatusBuffer = (PgBackendGSSStatus *)
+		ShmemInitStruct("Backend GSS Status Buffer", size, &found);
+
+	if (!found)
+	{
+		PgBackendGSSStatus *ptr;
+
+		MemSet(BackendGssStatusBuffer, 0, size);
+
+		/* Initialize st_gssstatus pointers. */
+		ptr = BackendGssStatusBuffer;
+		for (i = 0; i < NumBackendStatSlots; i++)
+		{
+			BackendStatusArray[i].st_gssstatus = ptr;
+			ptr++;
+		}
+	}
+#endif
+}
+
+
+/* ----------
+ * pgstat_initialize() -
+ *
+ *	Initialize pgstats state, and set up our on-proc-exit hook.
+ *	Called from InitPostgres and AuxiliaryProcessMain. For auxiliary process,
+ *	MyBackendId is invalid. Otherwise, MyBackendId must be set,
+ *	but we must not have started any transaction yet (since the
+ *	exit hook must run after the last transaction exit).
+ *	NOTE: MyDatabaseId isn't set yet; so the shutdown hook has to be careful.
+ * ----------
+ */
+void
+pgstat_initialize(void)
+{
+	/* Initialize MyBEEntry */
+	if (MyBackendId != InvalidBackendId)
+	{
+		Assert(MyBackendId >= 1 && MyBackendId <= MaxBackends);
+		MyBEEntry = &BackendStatusArray[MyBackendId - 1];
+	}
+	else
+	{
+		/* Must be an auxiliary process */
+		Assert(MyAuxProcType != NotAnAuxProcess);
+
+		/*
+		 * Assign the MyBEEntry for an auxiliary process.  Since it doesn't
+		 * have a BackendId, the slot is statically allocated based on the
+		 * auxiliary process type (MyAuxProcType).  Backends use slots indexed
+		 * in the range from 1 to MaxBackends (inclusive), so we use
+		 * MaxBackends + AuxBackendType + 1 as the index of the slot for an
+		 * auxiliary process.
+		 */
+		MyBEEntry = &BackendStatusArray[MaxBackends + MyAuxProcType];
+	}
+
+	/*
+	 * Initialize prevWalUsage with pgWalUsage so that pgstat_report_wal() can
+	 * calculate how much pgWalUsage counters are increased by substracting
+	 * prevWalUsage from pgWalUsage.
+	 */
+	prevWalUsage = pgWalUsage;
+
+	/* Set up a process-exit hook to clean up */
+	on_shmem_exit(pgstat_beshutdown_hook, 0);
+}
+
+/* ----------
+ * pgstat_bestart() -
+ *
+ *	Initialize this backend's entry in the PgBackendStatus array.
+ *	Called from InitPostgres.
+ *
+ *	Apart from auxiliary processes, MyBackendId, MyDatabaseId,
+ *	session userid, and application_name must be set for a
+ *	backend (hence, this cannot be combined with pgstat_initialize).
+ *	Note also that we must be inside a transaction if this isn't an aux
+ *	process, as we may need to do encoding conversion on some strings.
+ * ----------
+ */
+void
+pgstat_bestart(void)
+{
+	volatile PgBackendStatus *vbeentry = MyBEEntry;
+	PgBackendStatus lbeentry;
+#ifdef USE_SSL
+	PgBackendSSLStatus lsslstatus;
+#endif
+#ifdef ENABLE_GSS
+	PgBackendGSSStatus lgssstatus;
+#endif
+
+	/* pgstats state must be initialized from pgstat_initialize() */
+	Assert(vbeentry != NULL);
+
+	/*
+	 * To minimize the time spent modifying the PgBackendStatus entry, and
+	 * avoid risk of errors inside the critical section, we first copy the
+	 * shared-memory struct to a local variable, then modify the data in the
+	 * local variable, then copy the local variable back to shared memory.
+	 * Only the last step has to be inside the critical section.
+	 *
+	 * Most of the data we copy from shared memory is just going to be
+	 * overwritten, but the struct's not so large that it's worth the
+	 * maintenance hassle to copy only the needful fields.
+	 */
+	memcpy(&lbeentry,
+		   unvolatize(PgBackendStatus *, vbeentry),
+		   sizeof(PgBackendStatus));
+
+	/* These structs can just start from zeroes each time, though */
+#ifdef USE_SSL
+	memset(&lsslstatus, 0, sizeof(lsslstatus));
+#endif
+#ifdef ENABLE_GSS
+	memset(&lgssstatus, 0, sizeof(lgssstatus));
+#endif
+
+	/*
+	 * Now fill in all the fields of lbeentry, except for strings that are
+	 * out-of-line data.  Those have to be handled separately, below.
+	 */
+	lbeentry.st_procpid = MyProcPid;
+	lbeentry.st_backendType = MyBackendType;
+	lbeentry.st_proc_start_timestamp = MyStartTimestamp;
+	lbeentry.st_activity_start_timestamp = 0;
+	lbeentry.st_state_start_timestamp = 0;
+	lbeentry.st_xact_start_timestamp = 0;
+	lbeentry.st_databaseid = MyDatabaseId;
+
+	/* We have userid for client-backends, wal-sender and bgworker processes */
+	if (lbeentry.st_backendType == B_BACKEND
+		|| lbeentry.st_backendType == B_WAL_SENDER
+		|| lbeentry.st_backendType == B_BG_WORKER)
+		lbeentry.st_userid = GetSessionUserId();
+	else
+		lbeentry.st_userid = InvalidOid;
+
+	/*
+	 * We may not have a MyProcPort (eg, if this is the autovacuum process).
+	 * If so, use all-zeroes client address, which is dealt with specially in
+	 * pg_stat_get_backend_client_addr and pg_stat_get_backend_client_port.
+	 */
+	if (MyProcPort)
+		memcpy(&lbeentry.st_clientaddr, &MyProcPort->raddr,
+			   sizeof(lbeentry.st_clientaddr));
+	else
+		MemSet(&lbeentry.st_clientaddr, 0, sizeof(lbeentry.st_clientaddr));
+
+#ifdef USE_SSL
+	if (MyProcPort && MyProcPort->ssl_in_use)
+	{
+		lbeentry.st_ssl = true;
+		lsslstatus.ssl_bits = be_tls_get_cipher_bits(MyProcPort);
+		strlcpy(lsslstatus.ssl_version, be_tls_get_version(MyProcPort), NAMEDATALEN);
+		strlcpy(lsslstatus.ssl_cipher, be_tls_get_cipher(MyProcPort), NAMEDATALEN);
+		be_tls_get_peer_subject_name(MyProcPort, lsslstatus.ssl_client_dn, NAMEDATALEN);
+		be_tls_get_peer_serial(MyProcPort, lsslstatus.ssl_client_serial, NAMEDATALEN);
+		be_tls_get_peer_issuer_name(MyProcPort, lsslstatus.ssl_issuer_dn, NAMEDATALEN);
+	}
+	else
+	{
+		lbeentry.st_ssl = false;
+	}
+#else
+	lbeentry.st_ssl = false;
+#endif
+
+#ifdef ENABLE_GSS
+	if (MyProcPort && MyProcPort->gss != NULL)
+	{
+		const char *princ = be_gssapi_get_princ(MyProcPort);
+
+		lbeentry.st_gss = true;
+		lgssstatus.gss_auth = be_gssapi_get_auth(MyProcPort);
+		lgssstatus.gss_enc = be_gssapi_get_enc(MyProcPort);
+		if (princ)
+			strlcpy(lgssstatus.gss_princ, princ, NAMEDATALEN);
+	}
+	else
+	{
+		lbeentry.st_gss = false;
+	}
+#else
+	lbeentry.st_gss = false;
+#endif
+
+	lbeentry.st_state = STATE_UNDEFINED;
+	lbeentry.st_progress_command = PROGRESS_COMMAND_INVALID;
+	lbeentry.st_progress_command_target = InvalidOid;
+
+	/*
+	 * we don't zero st_progress_param here to save cycles; nobody should
+	 * examine it until st_progress_command has been set to something other
+	 * than PROGRESS_COMMAND_INVALID
+	 */
+
+	/*
+	 * We're ready to enter the critical section that fills the shared-memory
+	 * status entry.  We follow the protocol of bumping st_changecount before
+	 * and after; and make sure it's even afterwards.  We use a volatile
+	 * pointer here to ensure the compiler doesn't try to get cute.
+	 */
+	PGSTAT_BEGIN_WRITE_ACTIVITY(vbeentry);
+
+	/* make sure we'll memcpy the same st_changecount back */
+	lbeentry.st_changecount = vbeentry->st_changecount;
+
+	memcpy(unvolatize(PgBackendStatus *, vbeentry),
+		   &lbeentry,
+		   sizeof(PgBackendStatus));
+
+	/*
+	 * We can write the out-of-line strings and structs using the pointers
+	 * that are in lbeentry; this saves some de-volatilizing messiness.
+	 */
+	lbeentry.st_appname[0] = '\0';
+	if (MyProcPort && MyProcPort->remote_hostname)
+		strlcpy(lbeentry.st_clienthostname, MyProcPort->remote_hostname,
+				NAMEDATALEN);
+	else
+		lbeentry.st_clienthostname[0] = '\0';
+	lbeentry.st_activity_raw[0] = '\0';
+	/* Also make sure the last byte in each string area is always 0 */
+	lbeentry.st_appname[NAMEDATALEN - 1] = '\0';
+	lbeentry.st_clienthostname[NAMEDATALEN - 1] = '\0';
+	lbeentry.st_activity_raw[pgstat_track_activity_query_size - 1] = '\0';
+
+#ifdef USE_SSL
+	memcpy(lbeentry.st_sslstatus, &lsslstatus, sizeof(PgBackendSSLStatus));
+#endif
+#ifdef ENABLE_GSS
+	memcpy(lbeentry.st_gssstatus, &lgssstatus, sizeof(PgBackendGSSStatus));
+#endif
+
+	PGSTAT_END_WRITE_ACTIVITY(vbeentry);
+
+	/* Update app name to current GUC setting */
+	if (application_name)
+		pgstat_report_appname(application_name);
+}
+
 /*
  * Shut down a single backend's statistics reporting at process exit.
  *
  * Flush any remaining statistics counts out to the collector.
  * Without this, operations triggered during backend exit (such as
  * temp table deletions) won't be counted.
+ *
+ * Lastly, clear out our entry in the PgBackendStatus array.
  */
 static void
-pgstat_shutdown_hook(int code, Datum arg)
+pgstat_beshutdown_hook(int code, Datum arg)
 {
+	volatile PgBackendStatus *beentry = MyBEEntry;
+
 	/*
 	 * If we got as far as discovering our own database ID, we can report what
 	 * we did to the collector.  Otherwise, we'd be sending an invalid
@@ -2833,29 +3322,584 @@ pgstat_shutdown_hook(int code, Datum arg)
 	 */
 	if (OidIsValid(MyDatabaseId))
 		pgstat_report_stat(true);
+
+	/*
+	 * Clear my status entry, following the protocol of bumping st_changecount
+	 * before and after.  We use a volatile pointer here to ensure the
+	 * compiler doesn't try to get cute.
+	 */
+	PGSTAT_BEGIN_WRITE_ACTIVITY(beentry);
+
+	beentry->st_procpid = 0;	/* mark invalid */
+
+	PGSTAT_END_WRITE_ACTIVITY(beentry);
 }
 
+
 /* ----------
- * pgstat_initialize() -
+ * pgstat_report_activity() -
  *
- *	Initialize pgstats state, and set up our on-proc-exit hook.
- *	Called from InitPostgres and AuxiliaryProcessMain.
+ *	Called from tcop/postgres.c to report what the backend is actually doing
+ *	(but note cmd_str can be NULL for certain cases).
  *
- *	NOTE: MyDatabaseId isn't set yet; so the shutdown hook has to be careful.
+ * All updates of the status entry follow the protocol of bumping
+ * st_changecount before and after.  We use a volatile pointer here to
+ * ensure the compiler doesn't try to get cute.
  * ----------
  */
 void
-pgstat_initialize(void)
+pgstat_report_activity(BackendState state, const char *cmd_str)
 {
+	volatile PgBackendStatus *beentry = MyBEEntry;
+	TimestampTz start_timestamp;
+	TimestampTz current_timestamp;
+	int			len = 0;
+
+	TRACE_POSTGRESQL_STATEMENT_STATUS(cmd_str);
+
+	if (!beentry)
+		return;
+
+	if (!pgstat_track_activities)
+	{
+		if (beentry->st_state != STATE_DISABLED)
+		{
+			volatile PGPROC *proc = MyProc;
+
+			/*
+			 * track_activities is disabled, but we last reported a
+			 * non-disabled state.  As our final update, change the state and
+			 * clear fields we will not be updating anymore.
+			 */
+			PGSTAT_BEGIN_WRITE_ACTIVITY(beentry);
+			beentry->st_state = STATE_DISABLED;
+			beentry->st_state_start_timestamp = 0;
+			beentry->st_activity_raw[0] = '\0';
+			beentry->st_activity_start_timestamp = 0;
+			/* st_xact_start_timestamp and wait_event_info are also disabled */
+			beentry->st_xact_start_timestamp = 0;
+			proc->wait_event_info = 0;
+			PGSTAT_END_WRITE_ACTIVITY(beentry);
+		}
+		return;
+	}
+
 	/*
-	 * Initialize prevWalUsage with pgWalUsage so that pgstat_report_wal() can
-	 * calculate how much pgWalUsage counters are increased by substracting
-	 * prevWalUsage from pgWalUsage.
+	 * To minimize the time spent modifying the entry, and avoid risk of
+	 * errors inside the critical section, fetch all the needed data first.
 	 */
-	prevWalUsage = pgWalUsage;
+	start_timestamp = GetCurrentStatementStartTimestamp();
+	if (cmd_str != NULL)
+	{
+		/*
+		 * Compute length of to-be-stored string unaware of multi-byte
+		 * characters. For speed reasons that'll get corrected on read, rather
+		 * than computed every write.
+		 */
+		len = Min(strlen(cmd_str), pgstat_track_activity_query_size - 1);
+	}
+	current_timestamp = GetCurrentTimestamp();
 
-	/* Set up a process-exit hook to clean up */
-	on_shmem_exit(pgstat_shutdown_hook, 0);
+	/*
+	 * If the state has changed from "active" or "idle in transaction",
+	 * calculate the duration.
+	 */
+	if ((beentry->st_state == STATE_RUNNING ||
+		 beentry->st_state == STATE_FASTPATH ||
+		 beentry->st_state == STATE_IDLEINTRANSACTION ||
+		 beentry->st_state == STATE_IDLEINTRANSACTION_ABORTED) &&
+		state != beentry->st_state)
+	{
+		long		secs;
+		int			usecs;
+
+		TimestampDifference(beentry->st_state_start_timestamp,
+							current_timestamp,
+							&secs, &usecs);
+
+		if (beentry->st_state == STATE_RUNNING ||
+			beentry->st_state == STATE_FASTPATH)
+			pgStatActiveTime += secs * 1000000 + usecs;
+		else
+			pgStatTransactionIdleTime += secs * 1000000 + usecs;
+	}
+
+	/*
+	 * Now update the status entry
+	 */
+	PGSTAT_BEGIN_WRITE_ACTIVITY(beentry);
+
+	beentry->st_state = state;
+	beentry->st_state_start_timestamp = current_timestamp;
+
+	if (cmd_str != NULL)
+	{
+		memcpy((char *) beentry->st_activity_raw, cmd_str, len);
+		beentry->st_activity_raw[len] = '\0';
+		beentry->st_activity_start_timestamp = start_timestamp;
+	}
+
+	PGSTAT_END_WRITE_ACTIVITY(beentry);
+}
+
+/*-----------
+ * pgstat_progress_start_command() -
+ *
+ * Set st_progress_command (and st_progress_command_target) in own backend
+ * entry.  Also, zero-initialize st_progress_param array.
+ *-----------
+ */
+void
+pgstat_progress_start_command(ProgressCommandType cmdtype, Oid relid)
+{
+	volatile PgBackendStatus *beentry = MyBEEntry;
+
+	if (!beentry || !pgstat_track_activities)
+		return;
+
+	PGSTAT_BEGIN_WRITE_ACTIVITY(beentry);
+	beentry->st_progress_command = cmdtype;
+	beentry->st_progress_command_target = relid;
+	MemSet(&beentry->st_progress_param, 0, sizeof(beentry->st_progress_param));
+	PGSTAT_END_WRITE_ACTIVITY(beentry);
+}
+
+/*-----------
+ * pgstat_progress_update_param() -
+ *
+ * Update index'th member in st_progress_param[] of own backend entry.
+ *-----------
+ */
+void
+pgstat_progress_update_param(int index, int64 val)
+{
+	volatile PgBackendStatus *beentry = MyBEEntry;
+
+	Assert(index >= 0 && index < PGSTAT_NUM_PROGRESS_PARAM);
+
+	if (!beentry || !pgstat_track_activities)
+		return;
+
+	PGSTAT_BEGIN_WRITE_ACTIVITY(beentry);
+	beentry->st_progress_param[index] = val;
+	PGSTAT_END_WRITE_ACTIVITY(beentry);
+}
+
+/*-----------
+ * pgstat_progress_update_multi_param() -
+ *
+ * Update multiple members in st_progress_param[] of own backend entry.
+ * This is atomic; readers won't see intermediate states.
+ *-----------
+ */
+void
+pgstat_progress_update_multi_param(int nparam, const int *index,
+								   const int64 *val)
+{
+	volatile PgBackendStatus *beentry = MyBEEntry;
+	int			i;
+
+	if (!beentry || !pgstat_track_activities || nparam == 0)
+		return;
+
+	PGSTAT_BEGIN_WRITE_ACTIVITY(beentry);
+
+	for (i = 0; i < nparam; ++i)
+	{
+		Assert(index[i] >= 0 && index[i] < PGSTAT_NUM_PROGRESS_PARAM);
+
+		beentry->st_progress_param[index[i]] = val[i];
+	}
+
+	PGSTAT_END_WRITE_ACTIVITY(beentry);
+}
+
+/*-----------
+ * pgstat_progress_end_command() -
+ *
+ * Reset st_progress_command (and st_progress_command_target) in own backend
+ * entry.  This signals the end of the command.
+ *-----------
+ */
+void
+pgstat_progress_end_command(void)
+{
+	volatile PgBackendStatus *beentry = MyBEEntry;
+
+	if (!beentry || !pgstat_track_activities)
+		return;
+
+	if (beentry->st_progress_command == PROGRESS_COMMAND_INVALID)
+		return;
+
+	PGSTAT_BEGIN_WRITE_ACTIVITY(beentry);
+	beentry->st_progress_command = PROGRESS_COMMAND_INVALID;
+	beentry->st_progress_command_target = InvalidOid;
+	PGSTAT_END_WRITE_ACTIVITY(beentry);
+}
+
+/* ----------
+ * pgstat_report_appname() -
+ *
+ *	Called to update our application name.
+ * ----------
+ */
+void
+pgstat_report_appname(const char *appname)
+{
+	volatile PgBackendStatus *beentry = MyBEEntry;
+	int			len;
+
+	if (!beentry)
+		return;
+
+	/* This should be unnecessary if GUC did its job, but be safe */
+	len = pg_mbcliplen(appname, strlen(appname), NAMEDATALEN - 1);
+
+	/*
+	 * Update my status entry, following the protocol of bumping
+	 * st_changecount before and after.  We use a volatile pointer here to
+	 * ensure the compiler doesn't try to get cute.
+	 */
+	PGSTAT_BEGIN_WRITE_ACTIVITY(beentry);
+
+	memcpy((char *) beentry->st_appname, appname, len);
+	beentry->st_appname[len] = '\0';
+
+	PGSTAT_END_WRITE_ACTIVITY(beentry);
+}
+
+/*
+ * Report current transaction start timestamp as the specified value.
+ * Zero means there is no active transaction.
+ */
+void
+pgstat_report_xact_timestamp(TimestampTz tstamp)
+{
+	volatile PgBackendStatus *beentry = MyBEEntry;
+
+	if (!pgstat_track_activities || !beentry)
+		return;
+
+	/*
+	 * Update my status entry, following the protocol of bumping
+	 * st_changecount before and after.  We use a volatile pointer here to
+	 * ensure the compiler doesn't try to get cute.
+	 */
+	PGSTAT_BEGIN_WRITE_ACTIVITY(beentry);
+
+	beentry->st_xact_start_timestamp = tstamp;
+
+	PGSTAT_END_WRITE_ACTIVITY(beentry);
+}
+
+/* ----------
+ * pgstat_read_current_status() -
+ *
+ *	Copy the current contents of the PgBackendStatus array to local memory,
+ *	if not already done in this transaction.
+ * ----------
+ */
+static void
+pgstat_read_current_status(void)
+{
+	volatile PgBackendStatus *beentry;
+	LocalPgBackendStatus *localtable;
+	LocalPgBackendStatus *localentry;
+	char	   *localappname,
+			   *localclienthostname,
+			   *localactivity;
+#ifdef USE_SSL
+	PgBackendSSLStatus *localsslstatus;
+#endif
+#ifdef ENABLE_GSS
+	PgBackendGSSStatus *localgssstatus;
+#endif
+	int			i;
+
+	Assert(!pgStatRunningInCollector);
+	if (localBackendStatusTable)
+		return;					/* already done */
+
+	pgstat_setup_memcxt();
+
+	/*
+	 * Allocate storage for local copy of state data.  We can presume that
+	 * none of these requests overflow size_t, because we already calculated
+	 * the same values using mul_size during shmem setup.  However, with
+	 * probably-silly values of pgstat_track_activity_query_size and
+	 * max_connections, the localactivity buffer could exceed 1GB, so use
+	 * "huge" allocation for that one.
+	 */
+	localtable = (LocalPgBackendStatus *)
+		MemoryContextAlloc(pgStatLocalContext,
+						   sizeof(LocalPgBackendStatus) * NumBackendStatSlots);
+	localappname = (char *)
+		MemoryContextAlloc(pgStatLocalContext,
+						   NAMEDATALEN * NumBackendStatSlots);
+	localclienthostname = (char *)
+		MemoryContextAlloc(pgStatLocalContext,
+						   NAMEDATALEN * NumBackendStatSlots);
+	localactivity = (char *)
+		MemoryContextAllocHuge(pgStatLocalContext,
+							   pgstat_track_activity_query_size * NumBackendStatSlots);
+#ifdef USE_SSL
+	localsslstatus = (PgBackendSSLStatus *)
+		MemoryContextAlloc(pgStatLocalContext,
+						   sizeof(PgBackendSSLStatus) * NumBackendStatSlots);
+#endif
+#ifdef ENABLE_GSS
+	localgssstatus = (PgBackendGSSStatus *)
+		MemoryContextAlloc(pgStatLocalContext,
+						   sizeof(PgBackendGSSStatus) * NumBackendStatSlots);
+#endif
+
+	localNumBackends = 0;
+
+	beentry = BackendStatusArray;
+	localentry = localtable;
+	for (i = 1; i <= NumBackendStatSlots; i++)
+	{
+		/*
+		 * Follow the protocol of retrying if st_changecount changes while we
+		 * copy the entry, or if it's odd.  (The check for odd is needed to
+		 * cover the case where we are able to completely copy the entry while
+		 * the source backend is between increment steps.)	We use a volatile
+		 * pointer here to ensure the compiler doesn't try to get cute.
+		 */
+		for (;;)
+		{
+			int			before_changecount;
+			int			after_changecount;
+
+			pgstat_begin_read_activity(beentry, before_changecount);
+
+			localentry->backendStatus.st_procpid = beentry->st_procpid;
+			/* Skip all the data-copying work if entry is not in use */
+			if (localentry->backendStatus.st_procpid > 0)
+			{
+				memcpy(&localentry->backendStatus, unvolatize(PgBackendStatus *, beentry), sizeof(PgBackendStatus));
+
+				/*
+				 * For each PgBackendStatus field that is a pointer, copy the
+				 * pointed-to data, then adjust the local copy of the pointer
+				 * field to point at the local copy of the data.
+				 *
+				 * strcpy is safe even if the string is modified concurrently,
+				 * because there's always a \0 at the end of the buffer.
+				 */
+				strcpy(localappname, (char *) beentry->st_appname);
+				localentry->backendStatus.st_appname = localappname;
+				strcpy(localclienthostname, (char *) beentry->st_clienthostname);
+				localentry->backendStatus.st_clienthostname = localclienthostname;
+				strcpy(localactivity, (char *) beentry->st_activity_raw);
+				localentry->backendStatus.st_activity_raw = localactivity;
+#ifdef USE_SSL
+				if (beentry->st_ssl)
+				{
+					memcpy(localsslstatus, beentry->st_sslstatus, sizeof(PgBackendSSLStatus));
+					localentry->backendStatus.st_sslstatus = localsslstatus;
+				}
+#endif
+#ifdef ENABLE_GSS
+				if (beentry->st_gss)
+				{
+					memcpy(localgssstatus, beentry->st_gssstatus, sizeof(PgBackendGSSStatus));
+					localentry->backendStatus.st_gssstatus = localgssstatus;
+				}
+#endif
+			}
+
+			pgstat_end_read_activity(beentry, after_changecount);
+
+			if (pgstat_read_activity_complete(before_changecount,
+											  after_changecount))
+				break;
+
+			/* Make sure we can break out of loop if stuck... */
+			CHECK_FOR_INTERRUPTS();
+		}
+
+		beentry++;
+		/* Only valid entries get included into the local array */
+		if (localentry->backendStatus.st_procpid > 0)
+		{
+			BackendIdGetTransactionIds(i,
+									   &localentry->backend_xid,
+									   &localentry->backend_xmin);
+
+			localentry++;
+			localappname += NAMEDATALEN;
+			localclienthostname += NAMEDATALEN;
+			localactivity += pgstat_track_activity_query_size;
+#ifdef USE_SSL
+			localsslstatus++;
+#endif
+#ifdef ENABLE_GSS
+			localgssstatus++;
+#endif
+			localNumBackends++;
+		}
+	}
+
+	/* Set the pointer only after completion of a valid table */
+	localBackendStatusTable = localtable;
+}
+
+/* ----------
+ * pgstat_get_backend_current_activity() -
+ *
+ *	Return a string representing the current activity of the backend with
+ *	the specified PID.  This looks directly at the BackendStatusArray,
+ *	and so will provide current information regardless of the age of our
+ *	transaction's snapshot of the status array.
+ *
+ *	It is the caller's responsibility to invoke this only for backends whose
+ *	state is expected to remain stable while the result is in use.  The
+ *	only current use is in deadlock reporting, where we can expect that
+ *	the target backend is blocked on a lock.  (There are corner cases
+ *	where the target's wait could get aborted while we are looking at it,
+ *	but the very worst consequence is to return a pointer to a string
+ *	that's been changed, so we won't worry too much.)
+ *
+ *	Note: return strings for special cases match pg_stat_get_backend_activity.
+ * ----------
+ */
+const char *
+pgstat_get_backend_current_activity(int pid, bool checkUser)
+{
+	PgBackendStatus *beentry;
+	int			i;
+
+	beentry = BackendStatusArray;
+	for (i = 1; i <= MaxBackends; i++)
+	{
+		/*
+		 * Although we expect the target backend's entry to be stable, that
+		 * doesn't imply that anyone else's is.  To avoid identifying the
+		 * wrong backend, while we check for a match to the desired PID we
+		 * must follow the protocol of retrying if st_changecount changes
+		 * while we examine the entry, or if it's odd.  (This might be
+		 * unnecessary, since fetching or storing an int is almost certainly
+		 * atomic, but let's play it safe.)  We use a volatile pointer here to
+		 * ensure the compiler doesn't try to get cute.
+		 */
+		volatile PgBackendStatus *vbeentry = beentry;
+		bool		found;
+
+		for (;;)
+		{
+			int			before_changecount;
+			int			after_changecount;
+
+			pgstat_begin_read_activity(vbeentry, before_changecount);
+
+			found = (vbeentry->st_procpid == pid);
+
+			pgstat_end_read_activity(vbeentry, after_changecount);
+
+			if (pgstat_read_activity_complete(before_changecount,
+											  after_changecount))
+				break;
+
+			/* Make sure we can break out of loop if stuck... */
+			CHECK_FOR_INTERRUPTS();
+		}
+
+		if (found)
+		{
+			/* Now it is safe to use the non-volatile pointer */
+			if (checkUser && !superuser() && beentry->st_userid != GetUserId())
+				return "<insufficient privilege>";
+			else if (*(beentry->st_activity_raw) == '\0')
+				return "<command string not enabled>";
+			else
+			{
+				/* this'll leak a bit of memory, but that seems acceptable */
+				return pgstat_clip_activity(beentry->st_activity_raw);
+			}
+		}
+
+		beentry++;
+	}
+
+	/* If we get here, caller is in error ... */
+	return "<backend information not available>";
+}
+
+/* ----------
+ * pgstat_get_crashed_backend_activity() -
+ *
+ *	Return a string representing the current activity of the backend with
+ *	the specified PID.  Like the function above, but reads shared memory with
+ *	the expectation that it may be corrupt.  On success, copy the string
+ *	into the "buffer" argument and return that pointer.  On failure,
+ *	return NULL.
+ *
+ *	This function is only intended to be used by the postmaster to report the
+ *	query that crashed a backend.  In particular, no attempt is made to
+ *	follow the correct concurrency protocol when accessing the
+ *	BackendStatusArray.  But that's OK, in the worst case we'll return a
+ *	corrupted message.  We also must take care not to trip on ereport(ERROR).
+ * ----------
+ */
+const char *
+pgstat_get_crashed_backend_activity(int pid, char *buffer, int buflen)
+{
+	volatile PgBackendStatus *beentry;
+	int			i;
+
+	beentry = BackendStatusArray;
+
+	/*
+	 * We probably shouldn't get here before shared memory has been set up,
+	 * but be safe.
+	 */
+	if (beentry == NULL || BackendActivityBuffer == NULL)
+		return NULL;
+
+	for (i = 1; i <= MaxBackends; i++)
+	{
+		if (beentry->st_procpid == pid)
+		{
+			/* Read pointer just once, so it can't change after validation */
+			const char *activity = beentry->st_activity_raw;
+			const char *activity_last;
+
+			/*
+			 * We mustn't access activity string before we verify that it
+			 * falls within the BackendActivityBuffer. To make sure that the
+			 * entire string including its ending is contained within the
+			 * buffer, subtract one activity length from the buffer size.
+			 */
+			activity_last = BackendActivityBuffer + BackendActivityBufferSize
+				- pgstat_track_activity_query_size;
+
+			if (activity < BackendActivityBuffer ||
+				activity > activity_last)
+				return NULL;
+
+			/* If no string available, no point in a report */
+			if (activity[0] == '\0')
+				return NULL;
+
+			/*
+			 * Copy only ASCII-safe characters so we don't run into encoding
+			 * problems when reporting the message; and be sure not to run off
+			 * the end of memory.  As only ASCII characters are reported, it
+			 * doesn't seem necessary to perform multibyte aware clipping.
+			 */
+			ascii_safe_strlcpy(buffer, activity,
+							   Min(buflen, pgstat_track_activity_query_size));
+
+			return buffer;
+		}
+
+		beentry++;
+	}
+
+	/* PID not found */
+	return NULL;
 }
 
 /* ------------------------------------------------------------
@@ -4612,15 +5656,10 @@ pgstat_clear_snapshot(void)
 	/* Reset variables */
 	pgStatLocalContext = NULL;
 	pgStatDBHash = NULL;
+	localBackendStatusTable = NULL;
+	localNumBackends = 0;
 	replSlotStats = NULL;
 	nReplSlotStats = 0;
-
-	/*
-	 * Historically the backend_status.c facilities lived in this file, and
-	 * were reset with the same function. For now keep it that way, and
-	 * forward the reset request.
-	 */
-	pgstat_clear_backend_activity_snapshot();
 }
 
 
@@ -5555,6 +6594,50 @@ pgstat_db_requested(Oid databaseid)
 	return false;
 }
 
+/*
+ * Convert a potentially unsafely truncated activity string (see
+ * PgBackendStatus.st_activity_raw's documentation) into a correctly truncated
+ * one.
+ *
+ * The returned string is allocated in the caller's memory context and may be
+ * freed.
+ */
+char *
+pgstat_clip_activity(const char *raw_activity)
+{
+	char	   *activity;
+	int			rawlen;
+	int			cliplen;
+
+	/*
+	 * Some callers, like pgstat_get_backend_current_activity(), do not
+	 * guarantee that the buffer isn't concurrently modified. We try to take
+	 * care that the buffer is always terminated by a NUL byte regardless, but
+	 * let's still be paranoid about the string's length. In those cases the
+	 * underlying buffer is guaranteed to be pgstat_track_activity_query_size
+	 * large.
+	 */
+	activity = pnstrdup(raw_activity, pgstat_track_activity_query_size - 1);
+
+	/* now double-guaranteed to be NUL terminated */
+	rawlen = strlen(activity);
+
+	/*
+	 * All supported server-encodings make it possible to determine the length
+	 * of a multi-byte character from its first byte (this is not the case for
+	 * client encodings, see GB18030). As st_activity is always stored using
+	 * server encoding, this allows us to perform multi-byte aware truncation,
+	 * even if the string earlier was truncated in the middle of a multi-byte
+	 * character.
+	 */
+	cliplen = pg_mbcliplen(activity, rawlen,
+						   pgstat_track_activity_query_size - 1);
+
+	activity[cliplen] = '\0';
+
+	return activity;
+}
+
 /* ----------
  * pgstat_replslot_index
  *
diff --git a/src/backend/replication/logical/worker.c b/src/backend/replication/logical/worker.c
index 354fbe4b4b..7dffd187fa 100644
--- a/src/backend/replication/logical/worker.c
+++ b/src/backend/replication/logical/worker.c
@@ -1795,6 +1795,7 @@ apply_handle_truncate(StringInfo s)
 	List	   *rels = NIL;
 	List	   *part_rels = NIL;
 	List	   *relids = NIL;
+	List	   *relids_extra = NIL;
 	List	   *relids_logged = NIL;
 	ListCell   *lc;
 
@@ -1824,7 +1825,8 @@ apply_handle_truncate(StringInfo s)
 		remote_rels = lappend(remote_rels, rel);
 		rels = lappend(rels, rel->localrel);
 		relids = lappend_oid(relids, rel->localreloid);
-		if (RelationIsLogicallyLogged(rel->localrel))
+		relids_extra = lappend_int(relids_extra, TRUNCATE_REL_CONTEXT_NORMAL);
+		if (RelationIsLogicallyLogged(rel->localrel) && rel->localrel->rd_rel->relkind != RELKIND_FOREIGN_TABLE)
 			relids_logged = lappend_oid(relids_logged, rel->localreloid);
 
 		/*
@@ -1862,8 +1864,9 @@ apply_handle_truncate(StringInfo s)
 				rels = lappend(rels, childrel);
 				part_rels = lappend(part_rels, childrel);
 				relids = lappend_oid(relids, childrelid);
+				relids_extra = lappend_int(relids_extra,TRUNCATE_REL_CONTEXT_CASCADING);
 				/* Log this relation only if needed for logical decoding */
-				if (RelationIsLogicallyLogged(childrel))
+				if (RelationIsLogicallyLogged(childrel) && childrel->rd_rel->relkind != RELKIND_FOREIGN_TABLE)
 					relids_logged = lappend_oid(relids_logged, childrelid);
 			}
 		}
@@ -1874,8 +1877,12 @@ apply_handle_truncate(StringInfo s)
 	 * to replaying changes without further cascading. This might be later
 	 * changeable with a user specified option.
 	 */
-	ExecuteTruncateGuts(rels, relids, relids_logged, DROP_RESTRICT, restart_seqs);
-
+	ExecuteTruncateGuts(rels,
+						relids,
+						relids_extra,
+						relids_logged,
+						DROP_RESTRICT,
+						restart_seqs);
 	foreach(lc, remote_rels)
 	{
 		LogicalRepRelMapEntry *rel = lfirst(lc);
diff --git a/src/backend/storage/lmgr/proc.c b/src/backend/storage/lmgr/proc.c
index 692f21ef6a..897045ee27 100644
--- a/src/backend/storage/lmgr/proc.c
+++ b/src/backend/storage/lmgr/proc.c
@@ -448,9 +448,6 @@ InitProcess(void)
 	OwnLatch(&MyProc->procLatch);
 	SwitchToSharedLatch();
 
-	/* now that we have a proc, report wait events to shared memory */
-	pgstat_set_wait_event_storage(&MyProc->wait_event_info);
-
 	/*
 	 * We might be reusing a semaphore that belonged to a failed process. So
 	 * be careful and reinitialize its value here.  (This is not strictly
@@ -604,9 +601,6 @@ InitAuxiliaryProcess(void)
 	OwnLatch(&MyProc->procLatch);
 	SwitchToSharedLatch();
 
-	/* now that we have a proc, report wait events to shared memory */
-	pgstat_set_wait_event_storage(&MyProc->wait_event_info);
-
 	/* Check that group locking fields are in a proper initial state. */
 	Assert(MyProc->lockGroupLeader == NULL);
 	Assert(dlist_is_empty(&MyProc->lockGroupMembers));
@@ -890,15 +884,10 @@ ProcKill(int code, Datum arg)
 	/*
 	 * Reset MyLatch to the process local one.  This is so that signal
 	 * handlers et al can continue using the latch after the shared latch
-	 * isn't ours anymore.
-	 *
-	 * Similarly, stop reporting wait events to MyProc->wait_event_info.
-	 *
-	 * After that clear MyProc and disown the shared latch.
+	 * isn't ours anymore. After that clear MyProc and disown the shared
+	 * latch.
 	 */
 	SwitchBackToLocalLatch();
-	pgstat_reset_wait_event_storage();
-
 	proc = MyProc;
 	MyProc = NULL;
 	DisownLatch(&proc->procLatch);
@@ -963,10 +952,13 @@ AuxiliaryProcKill(int code, Datum arg)
 	/* Cancel any pending condition variable sleep, too */
 	ConditionVariableCancelSleep();
 
-	/* look at the equivalent ProcKill() code for comments */
+	/*
+	 * Reset MyLatch to the process local one.  This is so that signal
+	 * handlers et al can continue using the latch after the shared latch
+	 * isn't ours anymore. After that clear MyProc and disown the shared
+	 * latch.
+	 */
 	SwitchBackToLocalLatch();
-	pgstat_reset_wait_event_storage();
-
 	proc = MyProc;
 	MyProc = NULL;
 	DisownLatch(&proc->procLatch);
diff --git a/src/backend/utils/activity/Makefile b/src/backend/utils/activity/Makefile
index 59196f278d..7cbe0cdbe8 100644
--- a/src/backend/utils/activity/Makefile
+++ b/src/backend/utils/activity/Makefile
@@ -14,8 +14,6 @@ top_builddir = ../../../..
 include $(top_builddir)/src/Makefile.global
 
 OBJS = \
-	backend_progress.o \
-	backend_status.o \
 	wait_event.o
 
 include $(top_srcdir)/src/backend/common.mk
diff --git a/src/backend/utils/activity/backend_progress.c b/src/backend/utils/activity/backend_progress.c
deleted file mode 100644
index 293254993c..0000000000
--- a/src/backend/utils/activity/backend_progress.c
+++ /dev/null
@@ -1,112 +0,0 @@
-/* ----------
- * progress.c
- *
- *	Command progress reporting infrastructure.
- *
- *	Copyright (c) 2001-2021, PostgreSQL Global Development Group
- *
- *	src/backend/postmaster/progress.c
- * ----------
- */
-#include "postgres.h"
-
-#include "port/atomics.h" /* for memory barriers */
-#include "utils/backend_progress.h"
-#include "utils/backend_status.h"
-
-
-/*-----------
- * pgstat_progress_start_command() -
- *
- * Set st_progress_command (and st_progress_command_target) in own backend
- * entry.  Also, zero-initialize st_progress_param array.
- *-----------
- */
-void
-pgstat_progress_start_command(ProgressCommandType cmdtype, Oid relid)
-{
-	volatile PgBackendStatus *beentry = MyBEEntry;
-
-	if (!beentry || !pgstat_track_activities)
-		return;
-
-	PGSTAT_BEGIN_WRITE_ACTIVITY(beentry);
-	beentry->st_progress_command = cmdtype;
-	beentry->st_progress_command_target = relid;
-	MemSet(&beentry->st_progress_param, 0, sizeof(beentry->st_progress_param));
-	PGSTAT_END_WRITE_ACTIVITY(beentry);
-}
-
-/*-----------
- * pgstat_progress_update_param() -
- *
- * Update index'th member in st_progress_param[] of own backend entry.
- *-----------
- */
-void
-pgstat_progress_update_param(int index, int64 val)
-{
-	volatile PgBackendStatus *beentry = MyBEEntry;
-
-	Assert(index >= 0 && index < PGSTAT_NUM_PROGRESS_PARAM);
-
-	if (!beentry || !pgstat_track_activities)
-		return;
-
-	PGSTAT_BEGIN_WRITE_ACTIVITY(beentry);
-	beentry->st_progress_param[index] = val;
-	PGSTAT_END_WRITE_ACTIVITY(beentry);
-}
-
-/*-----------
- * pgstat_progress_update_multi_param() -
- *
- * Update multiple members in st_progress_param[] of own backend entry.
- * This is atomic; readers won't see intermediate states.
- *-----------
- */
-void
-pgstat_progress_update_multi_param(int nparam, const int *index,
-								   const int64 *val)
-{
-	volatile PgBackendStatus *beentry = MyBEEntry;
-	int			i;
-
-	if (!beentry || !pgstat_track_activities || nparam == 0)
-		return;
-
-	PGSTAT_BEGIN_WRITE_ACTIVITY(beentry);
-
-	for (i = 0; i < nparam; ++i)
-	{
-		Assert(index[i] >= 0 && index[i] < PGSTAT_NUM_PROGRESS_PARAM);
-
-		beentry->st_progress_param[index[i]] = val[i];
-	}
-
-	PGSTAT_END_WRITE_ACTIVITY(beentry);
-}
-
-/*-----------
- * pgstat_progress_end_command() -
- *
- * Reset st_progress_command (and st_progress_command_target) in own backend
- * entry.  This signals the end of the command.
- *-----------
- */
-void
-pgstat_progress_end_command(void)
-{
-	volatile PgBackendStatus *beentry = MyBEEntry;
-
-	if (!beentry || !pgstat_track_activities)
-		return;
-
-	if (beentry->st_progress_command == PROGRESS_COMMAND_INVALID)
-		return;
-
-	PGSTAT_BEGIN_WRITE_ACTIVITY(beentry);
-	beentry->st_progress_command = PROGRESS_COMMAND_INVALID;
-	beentry->st_progress_command_target = InvalidOid;
-	PGSTAT_END_WRITE_ACTIVITY(beentry);
-}
diff --git a/src/backend/utils/activity/backend_status.c b/src/backend/utils/activity/backend_status.c
deleted file mode 100644
index a25ec0ee3c..0000000000
--- a/src/backend/utils/activity/backend_status.c
+++ /dev/null
@@ -1,1077 +0,0 @@
-/* ----------
- * backend_status.c
- *	  Backend status reporting infrastructure.
- *
- * Copyright (c) 2001-2021, PostgreSQL Global Development Group
- *
- *
- * IDENTIFICATION
- *	  src/backend/postmaster/backend_status.c
- * ----------
- */
-#include "postgres.h"
-
-#include "access/xact.h"
-#include "libpq/libpq.h"
-#include "miscadmin.h"
-#include "pg_trace.h"
-#include "pgstat.h"
-#include "port/atomics.h" /* for memory barriers */
-#include "storage/ipc.h"
-#include "storage/proc.h" /* for MyProc */
-#include "storage/sinvaladt.h"
-#include "utils/ascii.h"
-#include "utils/backend_status.h"
-#include "utils/guc.h" /* for application_name */
-#include "utils/memutils.h"
-
-
-/* ----------
- * Total number of backends including auxiliary
- *
- * We reserve a slot for each possible BackendId, plus one for each
- * possible auxiliary process type.  (This scheme assumes there is not
- * more than one of any auxiliary process type at a time.) MaxBackends
- * includes autovacuum workers and background workers as well.
- * ----------
- */
-#define NumBackendStatSlots (MaxBackends + NUM_AUXPROCTYPES)
-
-
-/* ----------
- * GUC parameters
- * ----------
- */
-bool		pgstat_track_activities = false;
-int			pgstat_track_activity_query_size = 1024;
-
-
-/* exposed so that progress.c can access it */
-PgBackendStatus *MyBEEntry = NULL;
-
-
-static PgBackendStatus *BackendStatusArray = NULL;
-static char *BackendAppnameBuffer = NULL;
-static char *BackendClientHostnameBuffer = NULL;
-static char *BackendActivityBuffer = NULL;
-static Size BackendActivityBufferSize = 0;
-#ifdef USE_SSL
-static PgBackendSSLStatus *BackendSslStatusBuffer = NULL;
-#endif
-#ifdef ENABLE_GSS
-static PgBackendGSSStatus *BackendGssStatusBuffer = NULL;
-#endif
-
-
-/* Status for backends including auxiliary */
-static LocalPgBackendStatus *localBackendStatusTable = NULL;
-
-/* Total number of backends including auxiliary */
-static int	localNumBackends = 0;
-
-static MemoryContext backendStatusSnapContext;
-
-
-static void pgstat_beshutdown_hook(int code, Datum arg);
-static void pgstat_read_current_status(void);
-static void pgstat_setup_backend_status_context(void);
-
-
-/*
- * Report shared-memory space needed by CreateSharedBackendStatus.
- */
-Size
-BackendStatusShmemSize(void)
-{
-	Size		size;
-
-	/* BackendStatusArray: */
-	size = mul_size(sizeof(PgBackendStatus), NumBackendStatSlots);
-	/* BackendAppnameBuffer: */
-	size = add_size(size,
-					mul_size(NAMEDATALEN, NumBackendStatSlots));
-	/* BackendClientHostnameBuffer: */
-	size = add_size(size,
-					mul_size(NAMEDATALEN, NumBackendStatSlots));
-	/* BackendActivityBuffer: */
-	size = add_size(size,
-					mul_size(pgstat_track_activity_query_size, NumBackendStatSlots));
-#ifdef USE_SSL
-	/* BackendSslStatusBuffer: */
-	size = add_size(size,
-					mul_size(sizeof(PgBackendSSLStatus), NumBackendStatSlots));
-#endif
-#ifdef ENABLE_GSS
-	/* BackendGssStatusBuffer: */
-	size = add_size(size,
-					mul_size(sizeof(PgBackendGSSStatus), NumBackendStatSlots));
-#endif
-	return size;
-}
-
-/*
- * Initialize the shared status array and several string buffers
- * during postmaster startup.
- */
-void
-CreateSharedBackendStatus(void)
-{
-	Size		size;
-	bool		found;
-	int			i;
-	char	   *buffer;
-
-	/* Create or attach to the shared array */
-	size = mul_size(sizeof(PgBackendStatus), NumBackendStatSlots);
-	BackendStatusArray = (PgBackendStatus *)
-		ShmemInitStruct("Backend Status Array", size, &found);
-
-	if (!found)
-	{
-		/*
-		 * We're the first - initialize.
-		 */
-		MemSet(BackendStatusArray, 0, size);
-	}
-
-	/* Create or attach to the shared appname buffer */
-	size = mul_size(NAMEDATALEN, NumBackendStatSlots);
-	BackendAppnameBuffer = (char *)
-		ShmemInitStruct("Backend Application Name Buffer", size, &found);
-
-	if (!found)
-	{
-		MemSet(BackendAppnameBuffer, 0, size);
-
-		/* Initialize st_appname pointers. */
-		buffer = BackendAppnameBuffer;
-		for (i = 0; i < NumBackendStatSlots; i++)
-		{
-			BackendStatusArray[i].st_appname = buffer;
-			buffer += NAMEDATALEN;
-		}
-	}
-
-	/* Create or attach to the shared client hostname buffer */
-	size = mul_size(NAMEDATALEN, NumBackendStatSlots);
-	BackendClientHostnameBuffer = (char *)
-		ShmemInitStruct("Backend Client Host Name Buffer", size, &found);
-
-	if (!found)
-	{
-		MemSet(BackendClientHostnameBuffer, 0, size);
-
-		/* Initialize st_clienthostname pointers. */
-		buffer = BackendClientHostnameBuffer;
-		for (i = 0; i < NumBackendStatSlots; i++)
-		{
-			BackendStatusArray[i].st_clienthostname = buffer;
-			buffer += NAMEDATALEN;
-		}
-	}
-
-	/* Create or attach to the shared activity buffer */
-	BackendActivityBufferSize = mul_size(pgstat_track_activity_query_size,
-										 NumBackendStatSlots);
-	BackendActivityBuffer = (char *)
-		ShmemInitStruct("Backend Activity Buffer",
-						BackendActivityBufferSize,
-						&found);
-
-	if (!found)
-	{
-		MemSet(BackendActivityBuffer, 0, BackendActivityBufferSize);
-
-		/* Initialize st_activity pointers. */
-		buffer = BackendActivityBuffer;
-		for (i = 0; i < NumBackendStatSlots; i++)
-		{
-			BackendStatusArray[i].st_activity_raw = buffer;
-			buffer += pgstat_track_activity_query_size;
-		}
-	}
-
-#ifdef USE_SSL
-	/* Create or attach to the shared SSL status buffer */
-	size = mul_size(sizeof(PgBackendSSLStatus), NumBackendStatSlots);
-	BackendSslStatusBuffer = (PgBackendSSLStatus *)
-		ShmemInitStruct("Backend SSL Status Buffer", size, &found);
-
-	if (!found)
-	{
-		PgBackendSSLStatus *ptr;
-
-		MemSet(BackendSslStatusBuffer, 0, size);
-
-		/* Initialize st_sslstatus pointers. */
-		ptr = BackendSslStatusBuffer;
-		for (i = 0; i < NumBackendStatSlots; i++)
-		{
-			BackendStatusArray[i].st_sslstatus = ptr;
-			ptr++;
-		}
-	}
-#endif
-
-#ifdef ENABLE_GSS
-	/* Create or attach to the shared GSSAPI status buffer */
-	size = mul_size(sizeof(PgBackendGSSStatus), NumBackendStatSlots);
-	BackendGssStatusBuffer = (PgBackendGSSStatus *)
-		ShmemInitStruct("Backend GSS Status Buffer", size, &found);
-
-	if (!found)
-	{
-		PgBackendGSSStatus *ptr;
-
-		MemSet(BackendGssStatusBuffer, 0, size);
-
-		/* Initialize st_gssstatus pointers. */
-		ptr = BackendGssStatusBuffer;
-		for (i = 0; i < NumBackendStatSlots; i++)
-		{
-			BackendStatusArray[i].st_gssstatus = ptr;
-			ptr++;
-		}
-	}
-#endif
-}
-
-/*
- * Initialize pgstats backend activity state, and set up our on-proc-exit
- * hook.  Called from InitPostgres and AuxiliaryProcessMain. For auxiliary
- * process, MyBackendId is invalid. Otherwise, MyBackendId must be set, but we
- * must not have started any transaction yet (since the exit hook must run
- * after the last transaction exit).
- *
- * NOTE: MyDatabaseId isn't set yet; so the shutdown hook has to be careful.
- */
-void
-pgstat_beinit(void)
-{
-	/* Initialize MyBEEntry */
-	if (MyBackendId != InvalidBackendId)
-	{
-		Assert(MyBackendId >= 1 && MyBackendId <= MaxBackends);
-		MyBEEntry = &BackendStatusArray[MyBackendId - 1];
-	}
-	else
-	{
-		/* Must be an auxiliary process */
-		Assert(MyAuxProcType != NotAnAuxProcess);
-
-		/*
-		 * Assign the MyBEEntry for an auxiliary process.  Since it doesn't
-		 * have a BackendId, the slot is statically allocated based on the
-		 * auxiliary process type (MyAuxProcType).  Backends use slots indexed
-		 * in the range from 1 to MaxBackends (inclusive), so we use
-		 * MaxBackends + AuxBackendType + 1 as the index of the slot for an
-		 * auxiliary process.
-		 */
-		MyBEEntry = &BackendStatusArray[MaxBackends + MyAuxProcType];
-	}
-
-	/* Set up a process-exit hook to clean up */
-	on_shmem_exit(pgstat_beshutdown_hook, 0);
-}
-
-
-/* ----------
- * pgstat_bestart() -
- *
- *	Initialize this backend's entry in the PgBackendStatus array.
- *	Called from InitPostgres.
- *
- *	Apart from auxiliary processes, MyBackendId, MyDatabaseId,
- *	session userid, and application_name must be set for a
- *	backend (hence, this cannot be combined with pgbestat_beinit).
- *	Note also that we must be inside a transaction if this isn't an aux
- *	process, as we may need to do encoding conversion on some strings.
- * ----------
- */
-void
-pgstat_bestart(void)
-{
-	volatile PgBackendStatus *vbeentry = MyBEEntry;
-	PgBackendStatus lbeentry;
-#ifdef USE_SSL
-	PgBackendSSLStatus lsslstatus;
-#endif
-#ifdef ENABLE_GSS
-	PgBackendGSSStatus lgssstatus;
-#endif
-
-	/* pgstats state must be initialized from pgstat_beinit() */
-	Assert(vbeentry != NULL);
-
-	/*
-	 * To minimize the time spent modifying the PgBackendStatus entry, and
-	 * avoid risk of errors inside the critical section, we first copy the
-	 * shared-memory struct to a local variable, then modify the data in the
-	 * local variable, then copy the local variable back to shared memory.
-	 * Only the last step has to be inside the critical section.
-	 *
-	 * Most of the data we copy from shared memory is just going to be
-	 * overwritten, but the struct's not so large that it's worth the
-	 * maintenance hassle to copy only the needful fields.
-	 */
-	memcpy(&lbeentry,
-		   unvolatize(PgBackendStatus *, vbeentry),
-		   sizeof(PgBackendStatus));
-
-	/* These structs can just start from zeroes each time, though */
-#ifdef USE_SSL
-	memset(&lsslstatus, 0, sizeof(lsslstatus));
-#endif
-#ifdef ENABLE_GSS
-	memset(&lgssstatus, 0, sizeof(lgssstatus));
-#endif
-
-	/*
-	 * Now fill in all the fields of lbeentry, except for strings that are
-	 * out-of-line data.  Those have to be handled separately, below.
-	 */
-	lbeentry.st_procpid = MyProcPid;
-	lbeentry.st_backendType = MyBackendType;
-	lbeentry.st_proc_start_timestamp = MyStartTimestamp;
-	lbeentry.st_activity_start_timestamp = 0;
-	lbeentry.st_state_start_timestamp = 0;
-	lbeentry.st_xact_start_timestamp = 0;
-	lbeentry.st_databaseid = MyDatabaseId;
-
-	/* We have userid for client-backends, wal-sender and bgworker processes */
-	if (lbeentry.st_backendType == B_BACKEND
-		|| lbeentry.st_backendType == B_WAL_SENDER
-		|| lbeentry.st_backendType == B_BG_WORKER)
-		lbeentry.st_userid = GetSessionUserId();
-	else
-		lbeentry.st_userid = InvalidOid;
-
-	/*
-	 * We may not have a MyProcPort (eg, if this is the autovacuum process).
-	 * If so, use all-zeroes client address, which is dealt with specially in
-	 * pg_stat_get_backend_client_addr and pg_stat_get_backend_client_port.
-	 */
-	if (MyProcPort)
-		memcpy(&lbeentry.st_clientaddr, &MyProcPort->raddr,
-			   sizeof(lbeentry.st_clientaddr));
-	else
-		MemSet(&lbeentry.st_clientaddr, 0, sizeof(lbeentry.st_clientaddr));
-
-#ifdef USE_SSL
-	if (MyProcPort && MyProcPort->ssl_in_use)
-	{
-		lbeentry.st_ssl = true;
-		lsslstatus.ssl_bits = be_tls_get_cipher_bits(MyProcPort);
-		strlcpy(lsslstatus.ssl_version, be_tls_get_version(MyProcPort), NAMEDATALEN);
-		strlcpy(lsslstatus.ssl_cipher, be_tls_get_cipher(MyProcPort), NAMEDATALEN);
-		be_tls_get_peer_subject_name(MyProcPort, lsslstatus.ssl_client_dn, NAMEDATALEN);
-		be_tls_get_peer_serial(MyProcPort, lsslstatus.ssl_client_serial, NAMEDATALEN);
-		be_tls_get_peer_issuer_name(MyProcPort, lsslstatus.ssl_issuer_dn, NAMEDATALEN);
-	}
-	else
-	{
-		lbeentry.st_ssl = false;
-	}
-#else
-	lbeentry.st_ssl = false;
-#endif
-
-#ifdef ENABLE_GSS
-	if (MyProcPort && MyProcPort->gss != NULL)
-	{
-		const char *princ = be_gssapi_get_princ(MyProcPort);
-
-		lbeentry.st_gss = true;
-		lgssstatus.gss_auth = be_gssapi_get_auth(MyProcPort);
-		lgssstatus.gss_enc = be_gssapi_get_enc(MyProcPort);
-		if (princ)
-			strlcpy(lgssstatus.gss_princ, princ, NAMEDATALEN);
-	}
-	else
-	{
-		lbeentry.st_gss = false;
-	}
-#else
-	lbeentry.st_gss = false;
-#endif
-
-	lbeentry.st_state = STATE_UNDEFINED;
-	lbeentry.st_progress_command = PROGRESS_COMMAND_INVALID;
-	lbeentry.st_progress_command_target = InvalidOid;
-
-	/*
-	 * we don't zero st_progress_param here to save cycles; nobody should
-	 * examine it until st_progress_command has been set to something other
-	 * than PROGRESS_COMMAND_INVALID
-	 */
-
-	/*
-	 * We're ready to enter the critical section that fills the shared-memory
-	 * status entry.  We follow the protocol of bumping st_changecount before
-	 * and after; and make sure it's even afterwards.  We use a volatile
-	 * pointer here to ensure the compiler doesn't try to get cute.
-	 */
-	PGSTAT_BEGIN_WRITE_ACTIVITY(vbeentry);
-
-	/* make sure we'll memcpy the same st_changecount back */
-	lbeentry.st_changecount = vbeentry->st_changecount;
-
-	memcpy(unvolatize(PgBackendStatus *, vbeentry),
-		   &lbeentry,
-		   sizeof(PgBackendStatus));
-
-	/*
-	 * We can write the out-of-line strings and structs using the pointers
-	 * that are in lbeentry; this saves some de-volatilizing messiness.
-	 */
-	lbeentry.st_appname[0] = '\0';
-	if (MyProcPort && MyProcPort->remote_hostname)
-		strlcpy(lbeentry.st_clienthostname, MyProcPort->remote_hostname,
-				NAMEDATALEN);
-	else
-		lbeentry.st_clienthostname[0] = '\0';
-	lbeentry.st_activity_raw[0] = '\0';
-	/* Also make sure the last byte in each string area is always 0 */
-	lbeentry.st_appname[NAMEDATALEN - 1] = '\0';
-	lbeentry.st_clienthostname[NAMEDATALEN - 1] = '\0';
-	lbeentry.st_activity_raw[pgstat_track_activity_query_size - 1] = '\0';
-
-#ifdef USE_SSL
-	memcpy(lbeentry.st_sslstatus, &lsslstatus, sizeof(PgBackendSSLStatus));
-#endif
-#ifdef ENABLE_GSS
-	memcpy(lbeentry.st_gssstatus, &lgssstatus, sizeof(PgBackendGSSStatus));
-#endif
-
-	PGSTAT_END_WRITE_ACTIVITY(vbeentry);
-
-	/* Update app name to current GUC setting */
-	if (application_name)
-		pgstat_report_appname(application_name);
-}
-
-/*
- * Clear out our entry in the PgBackendStatus array.
- */
-static void
-pgstat_beshutdown_hook(int code, Datum arg)
-{
-	volatile PgBackendStatus *beentry = MyBEEntry;
-
-	/*
-	 * Clear my status entry, following the protocol of bumping st_changecount
-	 * before and after.  We use a volatile pointer here to ensure the
-	 * compiler doesn't try to get cute.
-	 */
-	PGSTAT_BEGIN_WRITE_ACTIVITY(beentry);
-
-	beentry->st_procpid = 0;	/* mark invalid */
-
-	PGSTAT_END_WRITE_ACTIVITY(beentry);
-}
-
-/*
- * Discard any data collected in the current transaction.  Any subsequent
- * request will cause new snapshots to be read.
- *
- * This is also invoked during transaction commit or abort to discard the
- * no-longer-wanted snapshot.
- */
-void
-pgstat_clear_backend_activity_snapshot(void)
-{
-	/* Release memory, if any was allocated */
-	if (backendStatusSnapContext)
-	{
-		MemoryContextDelete(backendStatusSnapContext);
-		backendStatusSnapContext = NULL;
-	}
-
-	/* Reset variables */
-	localBackendStatusTable = NULL;
-	localNumBackends = 0;
-}
-
-static void
-pgstat_setup_backend_status_context(void)
-{
-	if (!backendStatusSnapContext)
-		backendStatusSnapContext = AllocSetContextCreate(TopMemoryContext,
-													 "Backend Status Snapshot",
-													 ALLOCSET_SMALL_SIZES);
-}
-
-
-/* ----------
- * pgstat_report_activity() -
- *
- *	Called from tcop/postgres.c to report what the backend is actually doing
- *	(but note cmd_str can be NULL for certain cases).
- *
- * All updates of the status entry follow the protocol of bumping
- * st_changecount before and after.  We use a volatile pointer here to
- * ensure the compiler doesn't try to get cute.
- * ----------
- */
-void
-pgstat_report_activity(BackendState state, const char *cmd_str)
-{
-	volatile PgBackendStatus *beentry = MyBEEntry;
-	TimestampTz start_timestamp;
-	TimestampTz current_timestamp;
-	int			len = 0;
-
-	TRACE_POSTGRESQL_STATEMENT_STATUS(cmd_str);
-
-	if (!beentry)
-		return;
-
-	if (!pgstat_track_activities)
-	{
-		if (beentry->st_state != STATE_DISABLED)
-		{
-			volatile PGPROC *proc = MyProc;
-
-			/*
-			 * track_activities is disabled, but we last reported a
-			 * non-disabled state.  As our final update, change the state and
-			 * clear fields we will not be updating anymore.
-			 */
-			PGSTAT_BEGIN_WRITE_ACTIVITY(beentry);
-			beentry->st_state = STATE_DISABLED;
-			beentry->st_state_start_timestamp = 0;
-			beentry->st_activity_raw[0] = '\0';
-			beentry->st_activity_start_timestamp = 0;
-			/* st_xact_start_timestamp and wait_event_info are also disabled */
-			beentry->st_xact_start_timestamp = 0;
-			proc->wait_event_info = 0;
-			PGSTAT_END_WRITE_ACTIVITY(beentry);
-		}
-		return;
-	}
-
-	/*
-	 * To minimize the time spent modifying the entry, and avoid risk of
-	 * errors inside the critical section, fetch all the needed data first.
-	 */
-	start_timestamp = GetCurrentStatementStartTimestamp();
-	if (cmd_str != NULL)
-	{
-		/*
-		 * Compute length of to-be-stored string unaware of multi-byte
-		 * characters. For speed reasons that'll get corrected on read, rather
-		 * than computed every write.
-		 */
-		len = Min(strlen(cmd_str), pgstat_track_activity_query_size - 1);
-	}
-	current_timestamp = GetCurrentTimestamp();
-
-	/*
-	 * If the state has changed from "active" or "idle in transaction",
-	 * calculate the duration.
-	 */
-	if ((beentry->st_state == STATE_RUNNING ||
-		 beentry->st_state == STATE_FASTPATH ||
-		 beentry->st_state == STATE_IDLEINTRANSACTION ||
-		 beentry->st_state == STATE_IDLEINTRANSACTION_ABORTED) &&
-		state != beentry->st_state)
-	{
-		long		secs;
-		int			usecs;
-
-		TimestampDifference(beentry->st_state_start_timestamp,
-							current_timestamp,
-							&secs, &usecs);
-
-		if (beentry->st_state == STATE_RUNNING ||
-			beentry->st_state == STATE_FASTPATH)
-			pgstat_count_conn_active_time(secs * 1000000 + usecs);
-		else
-			pgstat_count_conn_txn_idle_time(secs * 1000000 + usecs);
-	}
-
-	/*
-	 * Now update the status entry
-	 */
-	PGSTAT_BEGIN_WRITE_ACTIVITY(beentry);
-
-	beentry->st_state = state;
-	beentry->st_state_start_timestamp = current_timestamp;
-
-	if (cmd_str != NULL)
-	{
-		memcpy((char *) beentry->st_activity_raw, cmd_str, len);
-		beentry->st_activity_raw[len] = '\0';
-		beentry->st_activity_start_timestamp = start_timestamp;
-	}
-
-	PGSTAT_END_WRITE_ACTIVITY(beentry);
-}
-
-/* ----------
- * pgstat_report_appname() -
- *
- *	Called to update our application name.
- * ----------
- */
-void
-pgstat_report_appname(const char *appname)
-{
-	volatile PgBackendStatus *beentry = MyBEEntry;
-	int			len;
-
-	if (!beentry)
-		return;
-
-	/* This should be unnecessary if GUC did its job, but be safe */
-	len = pg_mbcliplen(appname, strlen(appname), NAMEDATALEN - 1);
-
-	/*
-	 * Update my status entry, following the protocol of bumping
-	 * st_changecount before and after.  We use a volatile pointer here to
-	 * ensure the compiler doesn't try to get cute.
-	 */
-	PGSTAT_BEGIN_WRITE_ACTIVITY(beentry);
-
-	memcpy((char *) beentry->st_appname, appname, len);
-	beentry->st_appname[len] = '\0';
-
-	PGSTAT_END_WRITE_ACTIVITY(beentry);
-}
-
-/*
- * Report current transaction start timestamp as the specified value.
- * Zero means there is no active transaction.
- */
-void
-pgstat_report_xact_timestamp(TimestampTz tstamp)
-{
-	volatile PgBackendStatus *beentry = MyBEEntry;
-
-	if (!pgstat_track_activities || !beentry)
-		return;
-
-	/*
-	 * Update my status entry, following the protocol of bumping
-	 * st_changecount before and after.  We use a volatile pointer here to
-	 * ensure the compiler doesn't try to get cute.
-	 */
-	PGSTAT_BEGIN_WRITE_ACTIVITY(beentry);
-
-	beentry->st_xact_start_timestamp = tstamp;
-
-	PGSTAT_END_WRITE_ACTIVITY(beentry);
-}
-
-/* ----------
- * pgstat_read_current_status() -
- *
- *	Copy the current contents of the PgBackendStatus array to local memory,
- *	if not already done in this transaction.
- * ----------
- */
-static void
-pgstat_read_current_status(void)
-{
-	volatile PgBackendStatus *beentry;
-	LocalPgBackendStatus *localtable;
-	LocalPgBackendStatus *localentry;
-	char	   *localappname,
-			   *localclienthostname,
-			   *localactivity;
-#ifdef USE_SSL
-	PgBackendSSLStatus *localsslstatus;
-#endif
-#ifdef ENABLE_GSS
-	PgBackendGSSStatus *localgssstatus;
-#endif
-	int			i;
-
-	if (localBackendStatusTable)
-		return;					/* already done */
-
-	pgstat_setup_backend_status_context();
-
-	/*
-	 * Allocate storage for local copy of state data.  We can presume that
-	 * none of these requests overflow size_t, because we already calculated
-	 * the same values using mul_size during shmem setup.  However, with
-	 * probably-silly values of pgstat_track_activity_query_size and
-	 * max_connections, the localactivity buffer could exceed 1GB, so use
-	 * "huge" allocation for that one.
-	 */
-	localtable = (LocalPgBackendStatus *)
-		MemoryContextAlloc(backendStatusSnapContext,
-						   sizeof(LocalPgBackendStatus) * NumBackendStatSlots);
-	localappname = (char *)
-		MemoryContextAlloc(backendStatusSnapContext,
-						   NAMEDATALEN * NumBackendStatSlots);
-	localclienthostname = (char *)
-		MemoryContextAlloc(backendStatusSnapContext,
-						   NAMEDATALEN * NumBackendStatSlots);
-	localactivity = (char *)
-		MemoryContextAllocHuge(backendStatusSnapContext,
-							   pgstat_track_activity_query_size * NumBackendStatSlots);
-#ifdef USE_SSL
-	localsslstatus = (PgBackendSSLStatus *)
-		MemoryContextAlloc(backendStatusSnapContext,
-						   sizeof(PgBackendSSLStatus) * NumBackendStatSlots);
-#endif
-#ifdef ENABLE_GSS
-	localgssstatus = (PgBackendGSSStatus *)
-		MemoryContextAlloc(backendStatusSnapContext,
-						   sizeof(PgBackendGSSStatus) * NumBackendStatSlots);
-#endif
-
-	localNumBackends = 0;
-
-	beentry = BackendStatusArray;
-	localentry = localtable;
-	for (i = 1; i <= NumBackendStatSlots; i++)
-	{
-		/*
-		 * Follow the protocol of retrying if st_changecount changes while we
-		 * copy the entry, or if it's odd.  (The check for odd is needed to
-		 * cover the case where we are able to completely copy the entry while
-		 * the source backend is between increment steps.)	We use a volatile
-		 * pointer here to ensure the compiler doesn't try to get cute.
-		 */
-		for (;;)
-		{
-			int			before_changecount;
-			int			after_changecount;
-
-			pgstat_begin_read_activity(beentry, before_changecount);
-
-			localentry->backendStatus.st_procpid = beentry->st_procpid;
-			/* Skip all the data-copying work if entry is not in use */
-			if (localentry->backendStatus.st_procpid > 0)
-			{
-				memcpy(&localentry->backendStatus, unvolatize(PgBackendStatus *, beentry), sizeof(PgBackendStatus));
-
-				/*
-				 * For each PgBackendStatus field that is a pointer, copy the
-				 * pointed-to data, then adjust the local copy of the pointer
-				 * field to point at the local copy of the data.
-				 *
-				 * strcpy is safe even if the string is modified concurrently,
-				 * because there's always a \0 at the end of the buffer.
-				 */
-				strcpy(localappname, (char *) beentry->st_appname);
-				localentry->backendStatus.st_appname = localappname;
-				strcpy(localclienthostname, (char *) beentry->st_clienthostname);
-				localentry->backendStatus.st_clienthostname = localclienthostname;
-				strcpy(localactivity, (char *) beentry->st_activity_raw);
-				localentry->backendStatus.st_activity_raw = localactivity;
-#ifdef USE_SSL
-				if (beentry->st_ssl)
-				{
-					memcpy(localsslstatus, beentry->st_sslstatus, sizeof(PgBackendSSLStatus));
-					localentry->backendStatus.st_sslstatus = localsslstatus;
-				}
-#endif
-#ifdef ENABLE_GSS
-				if (beentry->st_gss)
-				{
-					memcpy(localgssstatus, beentry->st_gssstatus, sizeof(PgBackendGSSStatus));
-					localentry->backendStatus.st_gssstatus = localgssstatus;
-				}
-#endif
-			}
-
-			pgstat_end_read_activity(beentry, after_changecount);
-
-			if (pgstat_read_activity_complete(before_changecount,
-											  after_changecount))
-				break;
-
-			/* Make sure we can break out of loop if stuck... */
-			CHECK_FOR_INTERRUPTS();
-		}
-
-		beentry++;
-		/* Only valid entries get included into the local array */
-		if (localentry->backendStatus.st_procpid > 0)
-		{
-			BackendIdGetTransactionIds(i,
-									   &localentry->backend_xid,
-									   &localentry->backend_xmin);
-
-			localentry++;
-			localappname += NAMEDATALEN;
-			localclienthostname += NAMEDATALEN;
-			localactivity += pgstat_track_activity_query_size;
-#ifdef USE_SSL
-			localsslstatus++;
-#endif
-#ifdef ENABLE_GSS
-			localgssstatus++;
-#endif
-			localNumBackends++;
-		}
-	}
-
-	/* Set the pointer only after completion of a valid table */
-	localBackendStatusTable = localtable;
-}
-
-
-/* ----------
- * pgstat_get_backend_current_activity() -
- *
- *	Return a string representing the current activity of the backend with
- *	the specified PID.  This looks directly at the BackendStatusArray,
- *	and so will provide current information regardless of the age of our
- *	transaction's snapshot of the status array.
- *
- *	It is the caller's responsibility to invoke this only for backends whose
- *	state is expected to remain stable while the result is in use.  The
- *	only current use is in deadlock reporting, where we can expect that
- *	the target backend is blocked on a lock.  (There are corner cases
- *	where the target's wait could get aborted while we are looking at it,
- *	but the very worst consequence is to return a pointer to a string
- *	that's been changed, so we won't worry too much.)
- *
- *	Note: return strings for special cases match pg_stat_get_backend_activity.
- * ----------
- */
-const char *
-pgstat_get_backend_current_activity(int pid, bool checkUser)
-{
-	PgBackendStatus *beentry;
-	int			i;
-
-	beentry = BackendStatusArray;
-	for (i = 1; i <= MaxBackends; i++)
-	{
-		/*
-		 * Although we expect the target backend's entry to be stable, that
-		 * doesn't imply that anyone else's is.  To avoid identifying the
-		 * wrong backend, while we check for a match to the desired PID we
-		 * must follow the protocol of retrying if st_changecount changes
-		 * while we examine the entry, or if it's odd.  (This might be
-		 * unnecessary, since fetching or storing an int is almost certainly
-		 * atomic, but let's play it safe.)  We use a volatile pointer here to
-		 * ensure the compiler doesn't try to get cute.
-		 */
-		volatile PgBackendStatus *vbeentry = beentry;
-		bool		found;
-
-		for (;;)
-		{
-			int			before_changecount;
-			int			after_changecount;
-
-			pgstat_begin_read_activity(vbeentry, before_changecount);
-
-			found = (vbeentry->st_procpid == pid);
-
-			pgstat_end_read_activity(vbeentry, after_changecount);
-
-			if (pgstat_read_activity_complete(before_changecount,
-											  after_changecount))
-				break;
-
-			/* Make sure we can break out of loop if stuck... */
-			CHECK_FOR_INTERRUPTS();
-		}
-
-		if (found)
-		{
-			/* Now it is safe to use the non-volatile pointer */
-			if (checkUser && !superuser() && beentry->st_userid != GetUserId())
-				return "<insufficient privilege>";
-			else if (*(beentry->st_activity_raw) == '\0')
-				return "<command string not enabled>";
-			else
-			{
-				/* this'll leak a bit of memory, but that seems acceptable */
-				return pgstat_clip_activity(beentry->st_activity_raw);
-			}
-		}
-
-		beentry++;
-	}
-
-	/* If we get here, caller is in error ... */
-	return "<backend information not available>";
-}
-
-/* ----------
- * pgstat_get_crashed_backend_activity() -
- *
- *	Return a string representing the current activity of the backend with
- *	the specified PID.  Like the function above, but reads shared memory with
- *	the expectation that it may be corrupt.  On success, copy the string
- *	into the "buffer" argument and return that pointer.  On failure,
- *	return NULL.
- *
- *	This function is only intended to be used by the postmaster to report the
- *	query that crashed a backend.  In particular, no attempt is made to
- *	follow the correct concurrency protocol when accessing the
- *	BackendStatusArray.  But that's OK, in the worst case we'll return a
- *	corrupted message.  We also must take care not to trip on ereport(ERROR).
- * ----------
- */
-const char *
-pgstat_get_crashed_backend_activity(int pid, char *buffer, int buflen)
-{
-	volatile PgBackendStatus *beentry;
-	int			i;
-
-	beentry = BackendStatusArray;
-
-	/*
-	 * We probably shouldn't get here before shared memory has been set up,
-	 * but be safe.
-	 */
-	if (beentry == NULL || BackendActivityBuffer == NULL)
-		return NULL;
-
-	for (i = 1; i <= MaxBackends; i++)
-	{
-		if (beentry->st_procpid == pid)
-		{
-			/* Read pointer just once, so it can't change after validation */
-			const char *activity = beentry->st_activity_raw;
-			const char *activity_last;
-
-			/*
-			 * We mustn't access activity string before we verify that it
-			 * falls within the BackendActivityBuffer. To make sure that the
-			 * entire string including its ending is contained within the
-			 * buffer, subtract one activity length from the buffer size.
-			 */
-			activity_last = BackendActivityBuffer + BackendActivityBufferSize
-				- pgstat_track_activity_query_size;
-
-			if (activity < BackendActivityBuffer ||
-				activity > activity_last)
-				return NULL;
-
-			/* If no string available, no point in a report */
-			if (activity[0] == '\0')
-				return NULL;
-
-			/*
-			 * Copy only ASCII-safe characters so we don't run into encoding
-			 * problems when reporting the message; and be sure not to run off
-			 * the end of memory.  As only ASCII characters are reported, it
-			 * doesn't seem necessary to perform multibyte aware clipping.
-			 */
-			ascii_safe_strlcpy(buffer, activity,
-							   Min(buflen, pgstat_track_activity_query_size));
-
-			return buffer;
-		}
-
-		beentry++;
-	}
-
-	/* PID not found */
-	return NULL;
-}
-
-
-/* ----------
- * pgstat_fetch_stat_beentry() -
- *
- *	Support function for the SQL-callable pgstat* functions. Returns
- *	our local copy of the current-activity entry for one backend.
- *
- *	NB: caller is responsible for a check if the user is permitted to see
- *	this info (especially the querystring).
- * ----------
- */
-PgBackendStatus *
-pgstat_fetch_stat_beentry(int beid)
-{
-	pgstat_read_current_status();
-
-	if (beid < 1 || beid > localNumBackends)
-		return NULL;
-
-	return &localBackendStatusTable[beid - 1].backendStatus;
-}
-
-
-/* ----------
- * pgstat_fetch_stat_local_beentry() -
- *
- *	Like pgstat_fetch_stat_beentry() but with locally computed additions (like
- *	xid and xmin values of the backend)
- *
- *	NB: caller is responsible for a check if the user is permitted to see
- *	this info (especially the querystring).
- * ----------
- */
-LocalPgBackendStatus *
-pgstat_fetch_stat_local_beentry(int beid)
-{
-	pgstat_read_current_status();
-
-	if (beid < 1 || beid > localNumBackends)
-		return NULL;
-
-	return &localBackendStatusTable[beid - 1];
-}
-
-
-/* ----------
- * pgstat_fetch_stat_numbackends() -
- *
- *	Support function for the SQL-callable pgstat* functions. Returns
- *	the maximum current backend id.
- * ----------
- */
-int
-pgstat_fetch_stat_numbackends(void)
-{
-	pgstat_read_current_status();
-
-	return localNumBackends;
-}
-
-/*
- * Convert a potentially unsafely truncated activity string (see
- * PgBackendStatus.st_activity_raw's documentation) into a correctly truncated
- * one.
- *
- * The returned string is allocated in the caller's memory context and may be
- * freed.
- */
-char *
-pgstat_clip_activity(const char *raw_activity)
-{
-	char	   *activity;
-	int			rawlen;
-	int			cliplen;
-
-	/*
-	 * Some callers, like pgstat_get_backend_current_activity(), do not
-	 * guarantee that the buffer isn't concurrently modified. We try to take
-	 * care that the buffer is always terminated by a NUL byte regardless, but
-	 * let's still be paranoid about the string's length. In those cases the
-	 * underlying buffer is guaranteed to be pgstat_track_activity_query_size
-	 * large.
-	 */
-	activity = pnstrdup(raw_activity, pgstat_track_activity_query_size - 1);
-
-	/* now double-guaranteed to be NUL terminated */
-	rawlen = strlen(activity);
-
-	/*
-	 * All supported server-encodings make it possible to determine the length
-	 * of a multi-byte character from its first byte (this is not the case for
-	 * client encodings, see GB18030). As st_activity is always stored using
-	 * server encoding, this allows us to perform multi-byte aware truncation,
-	 * even if the string earlier was truncated in the middle of a multi-byte
-	 * character.
-	 */
-	cliplen = pg_mbcliplen(activity, rawlen,
-						   pgstat_track_activity_query_size - 1);
-
-	activity[cliplen] = '\0';
-
-	return activity;
-}
diff --git a/src/backend/utils/activity/wait_event.c b/src/backend/utils/activity/wait_event.c
index accc1eb577..840ebef92a 100644
--- a/src/backend/utils/activity/wait_event.c
+++ b/src/backend/utils/activity/wait_event.c
@@ -7,17 +7,6 @@
  *
  * IDENTIFICATION
  *	  src/backend/postmaster/wait_event.c
- *
- * NOTES
- *
- * To make pgstat_report_wait_start() and pgstat_report_wait_end() as
- * lightweight as possible, they do not check if shared memory (MyProc
- * specifically, where the wait event is stored) is already available. Instead
- * we initially set my_wait_event_info to a process local variable, which then
- * is redirected to shared memory using pgstat_set_wait_event_storage(). For
- * the same reason pgstat_track_activities is not checked - the check adds
- * more work than it saves.
- *
  * ----------
  */
 #include "postgres.h"
@@ -34,36 +23,6 @@ static const char *pgstat_get_wait_timeout(WaitEventTimeout w);
 static const char *pgstat_get_wait_io(WaitEventIO w);
 
 
-static uint32 local_my_wait_event_info;
-uint32	   *my_wait_event_info = &local_my_wait_event_info;
-
-
-/*
- * Configure wait event reporting to report wait events to *wait_event_info.
- * *wait_event_info needs to be valid until pgstat_reset_wait_event_storage()
- * is called.
- *
- * Expected to be called during backend startup, to point my_wait_event_info
- * into shared memory.
- */
-void
-pgstat_set_wait_event_storage(uint32 *wait_event_info)
-{
-	my_wait_event_info = wait_event_info;
-}
-
-/*
- * Reset wait event storage location.
- *
- * Expected to be called during backend shutdown, before the location set up
- * pgstat_set_wait_event_storage() becomes invalid.
- */
-void
-pgstat_reset_wait_event_storage(void)
-{
-	my_wait_event_info = &local_my_wait_event_info;
-}
-
 /* ----------
  * pgstat_get_wait_event_type() -
  *
diff --git a/src/backend/utils/init/postinit.c b/src/backend/utils/init/postinit.c
index 51d1bbef30..a3ec358538 100644
--- a/src/backend/utils/init/postinit.c
+++ b/src/backend/utils/init/postinit.c
@@ -681,10 +681,6 @@ InitPostgres(const char *in_dbname, Oid dboid, const char *username,
 	if (!bootstrap)
 		pgstat_initialize();
 
-	/* Initialize status reporting */
-	if (!bootstrap)
-		pgstat_beinit();
-
 	/*
 	 * Load relcache entries for the shared system catalogs.  This must create
 	 * at least entries for pg_database and catalogs used for authentication.
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index c9c9da85f3..60a9c7a2a0 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -90,7 +90,6 @@
 #include "tcop/tcopprot.h"
 #include "tsearch/ts_cache.h"
 #include "utils/acl.h"
-#include "utils/backend_status.h"
 #include "utils/builtins.h"
 #include "utils/bytea.h"
 #include "utils/float.h"
diff --git a/src/bin/psql/command.c b/src/bin/psql/command.c
index e04ccc5b62..e42be914d2 100644
--- a/src/bin/psql/command.c
+++ b/src/bin/psql/command.c
@@ -148,11 +148,11 @@ static void save_query_text_state(PsqlScanState scan_state, ConditionalStack cst
 								  PQExpBuffer query_buf);
 static void discard_query_text(PsqlScanState scan_state, ConditionalStack cstack,
 							   PQExpBuffer query_buf);
-static bool copy_previous_query(PQExpBuffer query_buf, PQExpBuffer previous_buf);
+static void copy_previous_query(PQExpBuffer query_buf, PQExpBuffer previous_buf);
 static bool do_connect(enum trivalue reuse_previous_specification,
 					   char *dbname, char *user, char *host, char *port);
 static bool do_edit(const char *filename_arg, PQExpBuffer query_buf,
-					int lineno, bool discard_on_quit, bool *edited);
+					int lineno, bool *edited);
 static bool do_shell(const char *command);
 static bool do_watch(PQExpBuffer query_buf, double sleep);
 static bool lookup_object_oid(EditableObjectType obj_type, const char *desc,
@@ -418,7 +418,7 @@ exec_command(const char *cmd,
 	 * the individual command subroutines.
 	 */
 	if (status == PSQL_CMD_SEND)
-		(void) copy_previous_query(query_buf, previous_buf);
+		copy_previous_query(query_buf, previous_buf);
 
 	return status;
 }
@@ -1004,27 +1004,14 @@ exec_command_edit(PsqlScanState scan_state, bool active_branch,
 			}
 			if (status != PSQL_CMD_ERROR)
 			{
-				bool		discard_on_quit;
-
 				expand_tilde(&fname);
 				if (fname)
-				{
 					canonicalize_path(fname);
-					/* Always clear buffer if the file isn't modified */
-					discard_on_quit = true;
-				}
-				else
-				{
-					/*
-					 * If query_buf is empty, recall previous query for
-					 * editing.  But in that case, the query buffer should be
-					 * emptied if editing doesn't modify the file.
-					 */
-					discard_on_quit = copy_previous_query(query_buf,
-														  previous_buf);
-				}
 
-				if (do_edit(fname, query_buf, lineno, discard_on_quit, NULL))
+				/* If query_buf is empty, recall previous query for editing */
+				copy_previous_query(query_buf, previous_buf);
+
+				if (do_edit(fname, query_buf, lineno, NULL))
 					status = PSQL_CMD_NEWEDIT;
 				else
 					status = PSQL_CMD_ERROR;
@@ -1147,7 +1134,7 @@ exec_command_ef_ev(PsqlScanState scan_state, bool active_branch,
 		{
 			bool		edited = false;
 
-			if (!do_edit(NULL, query_buf, lineno, true, &edited))
+			if (!do_edit(NULL, query_buf, lineno, &edited))
 				status = PSQL_CMD_ERROR;
 			else if (!edited)
 				puts(_("No changes"));
@@ -2650,7 +2637,7 @@ exec_command_watch(PsqlScanState scan_state, bool active_branch,
 		}
 
 		/* If query_buf is empty, recall and execute previous query */
-		(void) copy_previous_query(query_buf, previous_buf);
+		copy_previous_query(query_buf, previous_buf);
 
 		success = do_watch(query_buf, sleep);
 
@@ -2974,19 +2961,12 @@ discard_query_text(PsqlScanState scan_state, ConditionalStack cstack,
  * This is used by various slash commands for which re-execution of a
  * previous query is a common usage.  For convenience, we allow the
  * case of query_buf == NULL (and do nothing).
- *
- * Returns "true" if the previous query was copied into the query
- * buffer, else "false".
  */
-static bool
+static void
 copy_previous_query(PQExpBuffer query_buf, PQExpBuffer previous_buf)
 {
 	if (query_buf && query_buf->len == 0)
-	{
 		appendPQExpBufferStr(query_buf, previous_buf->data);
-		return true;
-	}
-	return false;
 }
 
 /*
@@ -3667,11 +3647,10 @@ UnsyncVariables(void)
 
 
 /*
- * helper for do_edit(): actually invoke the editor
+ * do_edit -- handler for \e
  *
- * Returns true on success, false if we failed to invoke the editor or
- * it returned nonzero status.  (An error message is printed for failed-
- * to-invoke cases, but not if the editor returns nonzero status.)
+ * If you do not specify a filename, the current query buffer will be copied
+ * into a temporary one.
  */
 static bool
 editFile(const char *fname, int lineno)
@@ -3740,23 +3719,10 @@ editFile(const char *fname, int lineno)
 }
 
 
-/*
- * do_edit -- handler for \e
- *
- * If you do not specify a filename, the current query buffer will be copied
- * into a temporary file.
- *
- * After this function is done, the resulting file will be copied back into the
- * query buffer.  As an exception to this, the query buffer will be emptied
- * if the file was not modified (or the editor failed) and the caller passes
- * "discard_on_quit" = true.
- *
- * If "edited" isn't NULL, *edited will be set to true if the query buffer
- * is successfully replaced.
- */
+/* call this one */
 static bool
 do_edit(const char *filename_arg, PQExpBuffer query_buf,
-		int lineno, bool discard_on_quit, bool *edited)
+		int lineno, bool *edited)
 {
 	char		fnametmp[MAXPGPATH];
 	FILE	   *stream = NULL;
@@ -3904,7 +3870,6 @@ do_edit(const char *filename_arg, PQExpBuffer query_buf,
 			{
 				pg_log_error("%s: %m", fname);
 				error = true;
-				resetPQExpBuffer(query_buf);
 			}
 			else if (edited)
 			{
@@ -3914,15 +3879,6 @@ do_edit(const char *filename_arg, PQExpBuffer query_buf,
 			fclose(stream);
 		}
 	}
-	else
-	{
-		/*
-		 * If the file was not modified, and the caller requested it, discard
-		 * the query buffer.
-		 */
-		if (discard_on_quit)
-			resetPQExpBuffer(query_buf);
-	}
 
 	/* remove temp file */
 	if (!filename_arg)
diff --git a/src/include/commands/progress.h b/src/include/commands/progress.h
index d7bf16368b..c6b139d57d 100644
--- a/src/include/commands/progress.h
+++ b/src/include/commands/progress.h
@@ -2,7 +2,7 @@
  *
  * progress.h
  *	  Constants used with the progress reporting facilities defined in
- *	  backend_status.h.  These are possibly interesting to extensions, so we
+ *	  pgstat.h.  These are possibly interesting to extensions, so we
  *	  expose them via this header file.  Note that if you update these
  *	  constants, you probably also need to update the views based on them
  *	  in system_views.sql.
diff --git a/src/include/commands/tablecmds.h b/src/include/commands/tablecmds.h
index b3d30acc35..d66760e4bf 100644
--- a/src/include/commands/tablecmds.h
+++ b/src/include/commands/tablecmds.h
@@ -55,9 +55,16 @@ extern void AlterRelationNamespaceInternal(Relation classRel, Oid relOid,
 
 extern void CheckTableNotInUse(Relation rel, const char *stmt);
 
+#define TRUNCATE_REL_CONTEXT_NORMAL       0x01
+#define TRUNCATE_REL_CONTEXT_ONLY         0x02
+#define TRUNCATE_REL_CONTEXT_CASCADING     0x04
 extern void ExecuteTruncate(TruncateStmt *stmt);
-extern void ExecuteTruncateGuts(List *explicit_rels, List *relids, List *relids_logged,
-								DropBehavior behavior, bool restart_seqs);
+extern void ExecuteTruncateGuts(List *explicit_rels,
+								List *relids,
+								List *relids_extra,
+								List *relids_logged,
+								DropBehavior behavior,
+								bool restart_seqs);
 
 extern void SetRelationHasSubclass(Oid relationId, bool relhassubclass);
 
diff --git a/src/include/foreign/fdwapi.h b/src/include/foreign/fdwapi.h
index 10d29ff292..205afe5ec3 100644
--- a/src/include/foreign/fdwapi.h
+++ b/src/include/foreign/fdwapi.h
@@ -160,6 +160,11 @@ typedef bool (*AnalyzeForeignTable_function) (Relation relation,
 typedef List *(*ImportForeignSchema_function) (ImportForeignSchemaStmt *stmt,
 											   Oid serverOid);
 
+typedef void (*ExecForeignTruncate_function) (List *frels_list,
+											  List *frels_extra,
+											  DropBehavior behavior,
+											  bool restart_seqs);
+
 typedef Size (*EstimateDSMForeignScan_function) (ForeignScanState *node,
 												 ParallelContext *pcxt);
 typedef void (*InitializeDSMForeignScan_function) (ForeignScanState *node,
@@ -255,6 +260,9 @@ typedef struct FdwRoutine
 	/* Support functions for IMPORT FOREIGN SCHEMA */
 	ImportForeignSchema_function ImportForeignSchema;
 
+	/* Support functions for TRUNCATE */
+	ExecForeignTruncate_function ExecForeignTruncate;
+
 	/* Support functions for parallelism under Gather node */
 	IsForeignScanParallelSafe_function IsForeignScanParallelSafe;
 	EstimateDSMForeignScan_function EstimateDSMForeignScan;
diff --git a/src/include/pgstat.h b/src/include/pgstat.h
index 7cd137506e..3247c7b8ad 100644
--- a/src/include/pgstat.h
+++ b/src/include/pgstat.h
@@ -12,10 +12,11 @@
 #define PGSTAT_H
 
 #include "datatype/timestamp.h"
+#include "libpq/pqcomm.h"
+#include "miscadmin.h"
+#include "port/atomics.h"
 #include "portability/instr_time.h"
-#include "postmaster/pgarch.h" /* for MAX_XFN_CHARS */
-#include "utils/backend_progress.h" /* for backward compatibility */
-#include "utils/backend_status.h" /* for backward compatibility */
+#include "postmaster/pgarch.h"
 #include "utils/hsearch.h"
 #include "utils/relcache.h"
 #include "utils/wait_event.h" /* for backward compatibility */
@@ -881,6 +882,262 @@ typedef struct PgStat_ReplSlotStats
 	TimestampTz stat_reset_timestamp;
 } PgStat_ReplSlotStats;
 
+/* ----------
+ * Backend states
+ * ----------
+ */
+typedef enum BackendState
+{
+	STATE_UNDEFINED,
+	STATE_IDLE,
+	STATE_RUNNING,
+	STATE_IDLEINTRANSACTION,
+	STATE_FASTPATH,
+	STATE_IDLEINTRANSACTION_ABORTED,
+	STATE_DISABLED
+} BackendState;
+
+/* ----------
+ * Command type for progress reporting purposes
+ * ----------
+ */
+typedef enum ProgressCommandType
+{
+	PROGRESS_COMMAND_INVALID,
+	PROGRESS_COMMAND_VACUUM,
+	PROGRESS_COMMAND_ANALYZE,
+	PROGRESS_COMMAND_CLUSTER,
+	PROGRESS_COMMAND_CREATE_INDEX,
+	PROGRESS_COMMAND_BASEBACKUP,
+	PROGRESS_COMMAND_COPY
+} ProgressCommandType;
+
+#define PGSTAT_NUM_PROGRESS_PARAM	20
+
+/* ----------
+ * Shared-memory data structures
+ * ----------
+ */
+
+
+/*
+ * PgBackendSSLStatus
+ *
+ * For each backend, we keep the SSL status in a separate struct, that
+ * is only filled in if SSL is enabled.
+ *
+ * All char arrays must be null-terminated.
+ */
+typedef struct PgBackendSSLStatus
+{
+	/* Information about SSL connection */
+	int			ssl_bits;
+	char		ssl_version[NAMEDATALEN];
+	char		ssl_cipher[NAMEDATALEN];
+	char		ssl_client_dn[NAMEDATALEN];
+
+	/*
+	 * serial number is max "20 octets" per RFC 5280, so this size should be
+	 * fine
+	 */
+	char		ssl_client_serial[NAMEDATALEN];
+
+	char		ssl_issuer_dn[NAMEDATALEN];
+} PgBackendSSLStatus;
+
+/*
+ * PgBackendGSSStatus
+ *
+ * For each backend, we keep the GSS status in a separate struct, that
+ * is only filled in if GSS is enabled.
+ *
+ * All char arrays must be null-terminated.
+ */
+typedef struct PgBackendGSSStatus
+{
+	/* Information about GSSAPI connection */
+	char		gss_princ[NAMEDATALEN]; /* GSSAPI Principal used to auth */
+	bool		gss_auth;		/* If GSSAPI authentication was used */
+	bool		gss_enc;		/* If encryption is being used */
+
+} PgBackendGSSStatus;
+
+
+/* ----------
+ * PgBackendStatus
+ *
+ * Each live backend maintains a PgBackendStatus struct in shared memory
+ * showing its current activity.  (The structs are allocated according to
+ * BackendId, but that is not critical.)  Note that the collector process
+ * has no involvement in, or even access to, these structs.
+ *
+ * Each auxiliary process also maintains a PgBackendStatus struct in shared
+ * memory.
+ * ----------
+ */
+typedef struct PgBackendStatus
+{
+	/*
+	 * To avoid locking overhead, we use the following protocol: a backend
+	 * increments st_changecount before modifying its entry, and again after
+	 * finishing a modification.  A would-be reader should note the value of
+	 * st_changecount, copy the entry into private memory, then check
+	 * st_changecount again.  If the value hasn't changed, and if it's even,
+	 * the copy is valid; otherwise start over.  This makes updates cheap
+	 * while reads are potentially expensive, but that's the tradeoff we want.
+	 *
+	 * The above protocol needs memory barriers to ensure that the apparent
+	 * order of execution is as it desires.  Otherwise, for example, the CPU
+	 * might rearrange the code so that st_changecount is incremented twice
+	 * before the modification on a machine with weak memory ordering.  Hence,
+	 * use the macros defined below for manipulating st_changecount, rather
+	 * than touching it directly.
+	 */
+	int			st_changecount;
+
+	/* The entry is valid iff st_procpid > 0, unused if st_procpid == 0 */
+	int			st_procpid;
+
+	/* Type of backends */
+	BackendType st_backendType;
+
+	/* Times when current backend, transaction, and activity started */
+	TimestampTz st_proc_start_timestamp;
+	TimestampTz st_xact_start_timestamp;
+	TimestampTz st_activity_start_timestamp;
+	TimestampTz st_state_start_timestamp;
+
+	/* Database OID, owning user's OID, connection client address */
+	Oid			st_databaseid;
+	Oid			st_userid;
+	SockAddr	st_clientaddr;
+	char	   *st_clienthostname;	/* MUST be null-terminated */
+
+	/* Information about SSL connection */
+	bool		st_ssl;
+	PgBackendSSLStatus *st_sslstatus;
+
+	/* Information about GSSAPI connection */
+	bool		st_gss;
+	PgBackendGSSStatus *st_gssstatus;
+
+	/* current state */
+	BackendState st_state;
+
+	/* application name; MUST be null-terminated */
+	char	   *st_appname;
+
+	/*
+	 * Current command string; MUST be null-terminated. Note that this string
+	 * possibly is truncated in the middle of a multi-byte character. As
+	 * activity strings are stored more frequently than read, that allows to
+	 * move the cost of correct truncation to the display side. Use
+	 * pgstat_clip_activity() to truncate correctly.
+	 */
+	char	   *st_activity_raw;
+
+	/*
+	 * Command progress reporting.  Any command which wishes can advertise
+	 * that it is running by setting st_progress_command,
+	 * st_progress_command_target, and st_progress_param[].
+	 * st_progress_command_target should be the OID of the relation which the
+	 * command targets (we assume there's just one, as this is meant for
+	 * utility commands), but the meaning of each element in the
+	 * st_progress_param array is command-specific.
+	 */
+	ProgressCommandType st_progress_command;
+	Oid			st_progress_command_target;
+	int64		st_progress_param[PGSTAT_NUM_PROGRESS_PARAM];
+} PgBackendStatus;
+
+/*
+ * Macros to load and store st_changecount with appropriate memory barriers.
+ *
+ * Use PGSTAT_BEGIN_WRITE_ACTIVITY() before, and PGSTAT_END_WRITE_ACTIVITY()
+ * after, modifying the current process's PgBackendStatus data.  Note that,
+ * since there is no mechanism for cleaning up st_changecount after an error,
+ * THESE MACROS FORM A CRITICAL SECTION.  Any error between them will be
+ * promoted to PANIC, causing a database restart to clean up shared memory!
+ * Hence, keep the critical section as short and straight-line as possible.
+ * Aside from being safer, that minimizes the window in which readers will
+ * have to loop.
+ *
+ * Reader logic should follow this sketch:
+ *
+ *		for (;;)
+ *		{
+ *			int before_ct, after_ct;
+ *
+ *			pgstat_begin_read_activity(beentry, before_ct);
+ *			... copy beentry data to local memory ...
+ *			pgstat_end_read_activity(beentry, after_ct);
+ *			if (pgstat_read_activity_complete(before_ct, after_ct))
+ *				break;
+ *			CHECK_FOR_INTERRUPTS();
+ *		}
+ *
+ * For extra safety, we generally use volatile beentry pointers, although
+ * the memory barriers should theoretically be sufficient.
+ */
+#define PGSTAT_BEGIN_WRITE_ACTIVITY(beentry) \
+	do { \
+		START_CRIT_SECTION(); \
+		(beentry)->st_changecount++; \
+		pg_write_barrier(); \
+	} while (0)
+
+#define PGSTAT_END_WRITE_ACTIVITY(beentry) \
+	do { \
+		pg_write_barrier(); \
+		(beentry)->st_changecount++; \
+		Assert(((beentry)->st_changecount & 1) == 0); \
+		END_CRIT_SECTION(); \
+	} while (0)
+
+#define pgstat_begin_read_activity(beentry, before_changecount) \
+	do { \
+		(before_changecount) = (beentry)->st_changecount; \
+		pg_read_barrier(); \
+	} while (0)
+
+#define pgstat_end_read_activity(beentry, after_changecount) \
+	do { \
+		pg_read_barrier(); \
+		(after_changecount) = (beentry)->st_changecount; \
+	} while (0)
+
+#define pgstat_read_activity_complete(before_changecount, after_changecount) \
+	((before_changecount) == (after_changecount) && \
+	 ((before_changecount) & 1) == 0)
+
+
+/* ----------
+ * LocalPgBackendStatus
+ *
+ * When we build the backend status array, we use LocalPgBackendStatus to be
+ * able to add new values to the struct when needed without adding new fields
+ * to the shared memory. It contains the backend status as a first member.
+ * ----------
+ */
+typedef struct LocalPgBackendStatus
+{
+	/*
+	 * Local version of the backend status entry.
+	 */
+	PgBackendStatus backendStatus;
+
+	/*
+	 * The xid of the current transaction if available, InvalidTransactionId
+	 * if not.
+	 */
+	TransactionId backend_xid;
+
+	/*
+	 * The xmin of the current session if available, InvalidTransactionId if
+	 * not.
+	 */
+	TransactionId backend_xmin;
+} LocalPgBackendStatus;
 
 /*
  * Working state needed to accumulate per-function-call timing statistics.
@@ -903,8 +1160,10 @@ typedef struct PgStat_FunctionCallUsage
  * GUC parameters
  * ----------
  */
+extern PGDLLIMPORT bool pgstat_track_activities;
 extern PGDLLIMPORT bool pgstat_track_counts;
 extern PGDLLIMPORT int pgstat_track_functions;
+extern PGDLLIMPORT int pgstat_track_activity_query_size;
 extern char *pgstat_stat_directory;
 extern char *pgstat_stat_tmpname;
 extern char *pgstat_stat_filename;
@@ -925,14 +1184,6 @@ extern PgStat_MsgWal WalStats;
 extern PgStat_Counter pgStatBlockReadTime;
 extern PgStat_Counter pgStatBlockWriteTime;
 
-/*
- * Updated by pgstat_count_conn_*_time macros, called by
- * pgstat_report_activity().
- */
-extern PgStat_Counter pgStatActiveTime;
-extern PgStat_Counter pgStatTransactionIdleTime;
-
-
 /*
  * Updated by the traffic cop and in errfinish()
  */
@@ -942,6 +1193,9 @@ extern SessionEndType pgStatSessionEndCause;
  * Functions called from postmaster
  * ----------
  */
+extern Size BackendStatusShmemSize(void);
+extern void CreateSharedBackendStatus(void);
+
 extern void pgstat_init(void);
 extern int	pgstat_start(void);
 extern void pgstat_reset_all(void);
@@ -987,13 +1241,30 @@ extern void pgstat_report_replslot(const char *slotname, PgStat_Counter spilltxn
 extern void pgstat_report_replslot_drop(const char *slotname);
 
 extern void pgstat_initialize(void);
-
+extern void pgstat_bestart(void);
+
+extern void pgstat_report_activity(BackendState state, const char *cmd_str);
+extern void pgstat_report_tempfile(size_t filesize);
+extern void pgstat_report_appname(const char *appname);
+extern void pgstat_report_xact_timestamp(TimestampTz tstamp);
+extern const char *pgstat_get_backend_current_activity(int pid, bool checkUser);
+extern const char *pgstat_get_crashed_backend_activity(int pid, char *buffer,
+													   int buflen);
+
+extern void pgstat_progress_start_command(ProgressCommandType cmdtype,
+										  Oid relid);
+extern void pgstat_progress_update_param(int index, int64 val);
+extern void pgstat_progress_update_multi_param(int nparam, const int *index,
+											   const int64 *val);
+extern void pgstat_progress_end_command(void);
 
 extern PgStat_TableStatus *find_tabstat_entry(Oid rel_id);
 extern PgStat_BackendFunctionEntry *find_funcstat_entry(Oid func_id);
 
 extern void pgstat_initstats(Relation rel);
 
+extern char *pgstat_clip_activity(const char *raw_activity);
+
 /* nontransactional event counts are simple enough to inline */
 
 #define pgstat_count_heap_scan(rel)									\
@@ -1035,10 +1306,6 @@ extern void pgstat_initstats(Relation rel);
 	(pgStatBlockReadTime += (n))
 #define pgstat_count_buffer_write_time(n)							\
 	(pgStatBlockWriteTime += (n))
-#define pgstat_count_conn_active_time(n)							\
-	(pgStatActiveTime += (n))
-#define pgstat_count_conn_txn_idle_time(n)							\
-	(pgStatTransactionIdleTime += (n))
 
 extern void pgstat_count_heap_insert(Relation rel, PgStat_Counter n);
 extern void pgstat_count_heap_update(Relation rel, bool hot);
@@ -1075,7 +1342,10 @@ extern bool pgstat_send_wal(bool force);
  */
 extern PgStat_StatDBEntry *pgstat_fetch_stat_dbentry(Oid dbid);
 extern PgStat_StatTabEntry *pgstat_fetch_stat_tabentry(Oid relid);
+extern PgBackendStatus *pgstat_fetch_stat_beentry(int beid);
+extern LocalPgBackendStatus *pgstat_fetch_stat_local_beentry(int beid);
 extern PgStat_StatFuncEntry *pgstat_fetch_stat_funcentry(Oid funcid);
+extern int	pgstat_fetch_stat_numbackends(void);
 extern PgStat_ArchiverStats *pgstat_fetch_stat_archiver(void);
 extern PgStat_GlobalStats *pgstat_fetch_global(void);
 extern PgStat_WalStats *pgstat_fetch_stat_wal(void);
diff --git a/src/include/utils/backend_progress.h b/src/include/utils/backend_progress.h
deleted file mode 100644
index 1714fa09c1..0000000000
--- a/src/include/utils/backend_progress.h
+++ /dev/null
@@ -1,44 +0,0 @@
-/* ----------
- * backend_progress.h
- *	  Command progress reporting definition.
- *
- * Note that this file provides the infrastructure for storing a single
- * backend's command progress counters, without ascribing meaning to the
- * individual fields. See commands/progress.h and system_views.sql for that.
- *
- * Copyright (c) 2001-2021, PostgreSQL Global Development Group
- *
- * src/include/utils/backend_progress.h
- * ----------
- */
-#ifndef BACKEND_PROGRESS_H
-#define BACKEND_PROGRESS_H
-
-
-/* ----------
- * Command type for progress reporting purposes
- * ----------
- */
-typedef enum ProgressCommandType
-{
-	PROGRESS_COMMAND_INVALID,
-	PROGRESS_COMMAND_VACUUM,
-	PROGRESS_COMMAND_ANALYZE,
-	PROGRESS_COMMAND_CLUSTER,
-	PROGRESS_COMMAND_CREATE_INDEX,
-	PROGRESS_COMMAND_BASEBACKUP,
-	PROGRESS_COMMAND_COPY
-} ProgressCommandType;
-
-#define PGSTAT_NUM_PROGRESS_PARAM	20
-
-
-extern void pgstat_progress_start_command(ProgressCommandType cmdtype,
-										  Oid relid);
-extern void pgstat_progress_update_param(int index, int64 val);
-extern void pgstat_progress_update_multi_param(int nparam, const int *index,
-											   const int64 *val);
-extern void pgstat_progress_end_command(void);
-
-
-#endif /* BACKEND_PROGRESS_H */
diff --git a/src/include/utils/backend_status.h b/src/include/utils/backend_status.h
deleted file mode 100644
index 3fd7370d41..0000000000
--- a/src/include/utils/backend_status.h
+++ /dev/null
@@ -1,316 +0,0 @@
-/* ----------
- * backend_status.h
- *	  Definitions related to backend status reporting
- *
- * Copyright (c) 2001-2021, PostgreSQL Global Development Group
- *
- * src/include/utils/backend_status.h
- * ----------
- */
-#ifndef BACKEND_STATUS_H
-#define BACKEND_STATUS_H
-
-#include "datatype/timestamp.h"
-#include "libpq/pqcomm.h"
-#include "miscadmin.h" /* for BackendType */
-#include "utils/backend_progress.h"
-
-
-/* ----------
- * Backend states
- * ----------
- */
-typedef enum BackendState
-{
-	STATE_UNDEFINED,
-	STATE_IDLE,
-	STATE_RUNNING,
-	STATE_IDLEINTRANSACTION,
-	STATE_FASTPATH,
-	STATE_IDLEINTRANSACTION_ABORTED,
-	STATE_DISABLED
-} BackendState;
-
-
-/* ----------
- * Shared-memory data structures
- * ----------
- */
-
-/*
- * PgBackendSSLStatus
- *
- * For each backend, we keep the SSL status in a separate struct, that
- * is only filled in if SSL is enabled.
- *
- * All char arrays must be null-terminated.
- */
-typedef struct PgBackendSSLStatus
-{
-	/* Information about SSL connection */
-	int			ssl_bits;
-	char		ssl_version[NAMEDATALEN];
-	char		ssl_cipher[NAMEDATALEN];
-	char		ssl_client_dn[NAMEDATALEN];
-
-	/*
-	 * serial number is max "20 octets" per RFC 5280, so this size should be
-	 * fine
-	 */
-	char		ssl_client_serial[NAMEDATALEN];
-
-	char		ssl_issuer_dn[NAMEDATALEN];
-} PgBackendSSLStatus;
-
-/*
- * PgBackendGSSStatus
- *
- * For each backend, we keep the GSS status in a separate struct, that
- * is only filled in if GSS is enabled.
- *
- * All char arrays must be null-terminated.
- */
-typedef struct PgBackendGSSStatus
-{
-	/* Information about GSSAPI connection */
-	char		gss_princ[NAMEDATALEN]; /* GSSAPI Principal used to auth */
-	bool		gss_auth;		/* If GSSAPI authentication was used */
-	bool		gss_enc;		/* If encryption is being used */
-
-} PgBackendGSSStatus;
-
-
-/* ----------
- * PgBackendStatus
- *
- * Each live backend maintains a PgBackendStatus struct in shared memory
- * showing its current activity.  (The structs are allocated according to
- * BackendId, but that is not critical.)  Note that the collector process
- * has no involvement in, or even access to, these structs.
- *
- * Each auxiliary process also maintains a PgBackendStatus struct in shared
- * memory.
- * ----------
- */
-typedef struct PgBackendStatus
-{
-	/*
-	 * To avoid locking overhead, we use the following protocol: a backend
-	 * increments st_changecount before modifying its entry, and again after
-	 * finishing a modification.  A would-be reader should note the value of
-	 * st_changecount, copy the entry into private memory, then check
-	 * st_changecount again.  If the value hasn't changed, and if it's even,
-	 * the copy is valid; otherwise start over.  This makes updates cheap
-	 * while reads are potentially expensive, but that's the tradeoff we want.
-	 *
-	 * The above protocol needs memory barriers to ensure that the apparent
-	 * order of execution is as it desires.  Otherwise, for example, the CPU
-	 * might rearrange the code so that st_changecount is incremented twice
-	 * before the modification on a machine with weak memory ordering.  Hence,
-	 * use the macros defined below for manipulating st_changecount, rather
-	 * than touching it directly.
-	 */
-	int			st_changecount;
-
-	/* The entry is valid iff st_procpid > 0, unused if st_procpid == 0 */
-	int			st_procpid;
-
-	/* Type of backends */
-	BackendType st_backendType;
-
-	/* Times when current backend, transaction, and activity started */
-	TimestampTz st_proc_start_timestamp;
-	TimestampTz st_xact_start_timestamp;
-	TimestampTz st_activity_start_timestamp;
-	TimestampTz st_state_start_timestamp;
-
-	/* Database OID, owning user's OID, connection client address */
-	Oid			st_databaseid;
-	Oid			st_userid;
-	SockAddr	st_clientaddr;
-	char	   *st_clienthostname;	/* MUST be null-terminated */
-
-	/* Information about SSL connection */
-	bool		st_ssl;
-	PgBackendSSLStatus *st_sslstatus;
-
-	/* Information about GSSAPI connection */
-	bool		st_gss;
-	PgBackendGSSStatus *st_gssstatus;
-
-	/* current state */
-	BackendState st_state;
-
-	/* application name; MUST be null-terminated */
-	char	   *st_appname;
-
-	/*
-	 * Current command string; MUST be null-terminated. Note that this string
-	 * possibly is truncated in the middle of a multi-byte character. As
-	 * activity strings are stored more frequently than read, that allows to
-	 * move the cost of correct truncation to the display side. Use
-	 * pgstat_clip_activity() to truncate correctly.
-	 */
-	char	   *st_activity_raw;
-
-	/*
-	 * Command progress reporting.  Any command which wishes can advertise
-	 * that it is running by setting st_progress_command,
-	 * st_progress_command_target, and st_progress_param[].
-	 * st_progress_command_target should be the OID of the relation which the
-	 * command targets (we assume there's just one, as this is meant for
-	 * utility commands), but the meaning of each element in the
-	 * st_progress_param array is command-specific.
-	 */
-	ProgressCommandType st_progress_command;
-	Oid			st_progress_command_target;
-	int64		st_progress_param[PGSTAT_NUM_PROGRESS_PARAM];
-} PgBackendStatus;
-
-
-/*
- * Macros to load and store st_changecount with appropriate memory barriers.
- *
- * Use PGSTAT_BEGIN_WRITE_ACTIVITY() before, and PGSTAT_END_WRITE_ACTIVITY()
- * after, modifying the current process's PgBackendStatus data.  Note that,
- * since there is no mechanism for cleaning up st_changecount after an error,
- * THESE MACROS FORM A CRITICAL SECTION.  Any error between them will be
- * promoted to PANIC, causing a database restart to clean up shared memory!
- * Hence, keep the critical section as short and straight-line as possible.
- * Aside from being safer, that minimizes the window in which readers will
- * have to loop.
- *
- * Reader logic should follow this sketch:
- *
- *		for (;;)
- *		{
- *			int before_ct, after_ct;
- *
- *			pgstat_begin_read_activity(beentry, before_ct);
- *			... copy beentry data to local memory ...
- *			pgstat_end_read_activity(beentry, after_ct);
- *			if (pgstat_read_activity_complete(before_ct, after_ct))
- *				break;
- *			CHECK_FOR_INTERRUPTS();
- *		}
- *
- * For extra safety, we generally use volatile beentry pointers, although
- * the memory barriers should theoretically be sufficient.
- */
-#define PGSTAT_BEGIN_WRITE_ACTIVITY(beentry) \
-	do { \
-		START_CRIT_SECTION(); \
-		(beentry)->st_changecount++; \
-		pg_write_barrier(); \
-	} while (0)
-
-#define PGSTAT_END_WRITE_ACTIVITY(beentry) \
-	do { \
-		pg_write_barrier(); \
-		(beentry)->st_changecount++; \
-		Assert(((beentry)->st_changecount & 1) == 0); \
-		END_CRIT_SECTION(); \
-	} while (0)
-
-#define pgstat_begin_read_activity(beentry, before_changecount) \
-	do { \
-		(before_changecount) = (beentry)->st_changecount; \
-		pg_read_barrier(); \
-	} while (0)
-
-#define pgstat_end_read_activity(beentry, after_changecount) \
-	do { \
-		pg_read_barrier(); \
-		(after_changecount) = (beentry)->st_changecount; \
-	} while (0)
-
-#define pgstat_read_activity_complete(before_changecount, after_changecount) \
-	((before_changecount) == (after_changecount) && \
-	 ((before_changecount) & 1) == 0)
-
-
-/* ----------
- * LocalPgBackendStatus
- *
- * When we build the backend status array, we use LocalPgBackendStatus to be
- * able to add new values to the struct when needed without adding new fields
- * to the shared memory. It contains the backend status as a first member.
- * ----------
- */
-typedef struct LocalPgBackendStatus
-{
-	/*
-	 * Local version of the backend status entry.
-	 */
-	PgBackendStatus backendStatus;
-
-	/*
-	 * The xid of the current transaction if available, InvalidTransactionId
-	 * if not.
-	 */
-	TransactionId backend_xid;
-
-	/*
-	 * The xmin of the current session if available, InvalidTransactionId if
-	 * not.
-	 */
-	TransactionId backend_xmin;
-} LocalPgBackendStatus;
-
-
-/* ----------
- * GUC parameters
- * ----------
- */
-extern PGDLLIMPORT bool pgstat_track_activities;
-extern PGDLLIMPORT int pgstat_track_activity_query_size;
-
-
-/* ----------
- * Other global variables
- * ----------
- */
-extern PGDLLIMPORT  PgBackendStatus *MyBEEntry;
-
-
-/* ----------
- * Functions called from postmaster
- * ----------
- */
-extern Size BackendStatusShmemSize(void);
-extern void CreateSharedBackendStatus(void);
-
-
-/* ----------
- * Functions called from backends
- * ----------
- */
-
-/* Initialization functions */
-extern void pgstat_beinit(void);
-extern void pgstat_bestart(void);
-
-extern void pgstat_clear_backend_activity_snapshot(void);
-
-/* Activity reporting functions */
-extern void pgstat_report_activity(BackendState state, const char *cmd_str);
-extern void pgstat_report_tempfile(size_t filesize);
-extern void pgstat_report_appname(const char *appname);
-extern void pgstat_report_xact_timestamp(TimestampTz tstamp);
-extern const char *pgstat_get_backend_current_activity(int pid, bool checkUser);
-extern const char *pgstat_get_crashed_backend_activity(int pid, char *buffer,
-													   int buflen);
-
-
-/* ----------
- * Support functions for the SQL-callable functions to
- * generate the pgstat* views.
- * ----------
- */
-extern int	pgstat_fetch_stat_numbackends(void);
-extern PgBackendStatus *pgstat_fetch_stat_beentry(int beid);
-extern LocalPgBackendStatus *pgstat_fetch_stat_local_beentry(int beid);
-extern char *pgstat_clip_activity(const char *raw_activity);
-
-
-#endif /* BACKEND_STATUS_H */
diff --git a/src/include/utils/wait_event.h b/src/include/utils/wait_event.h
index 44448b48ec..2c883467f3 100644
--- a/src/include/utils/wait_event.h
+++ b/src/include/utils/wait_event.h
@@ -11,6 +11,9 @@
 #define WAIT_EVENT_H
 
 
+#include "storage/proc.h" /* for MyProc */
+
+
 /* ----------
  * Wait Classes
  * ----------
@@ -231,10 +234,13 @@ extern const char *pgstat_get_wait_event(uint32 wait_event_info);
 extern const char *pgstat_get_wait_event_type(uint32 wait_event_info);
 static inline void pgstat_report_wait_start(uint32 wait_event_info);
 static inline void pgstat_report_wait_end(void);
-extern void pgstat_set_wait_event_storage(uint32 *wait_event_info);
-extern void pgstat_reset_wait_event_storage(void);
 
-extern PGDLLIMPORT uint32 *my_wait_event_info;
+
+/*
+ * Repeated here for the inline functions because it is declared in pgstat.h,
+ * which includes this header.
+ */
+extern PGDLLIMPORT bool pgstat_track_activities;
 
 
 /* ----------
@@ -248,35 +254,47 @@ extern PGDLLIMPORT uint32 *my_wait_event_info;
  *	for wait event which is sufficient for current usage, 1-byte is
  *	reserved for future usage.
  *
- *	Historically we used to make this reporting conditional on
- *	pgstat_track_activities, but the check for that seems to add more cost
- *	than it saves.
- *
- *	my_wait_event_info initially points to local memory, making it safe to
- *	call this before MyProc has been initialized.
+ * NB: this *must* be able to survive being called before MyProc has been
+ * initialized.
  * ----------
  */
 static inline void
 pgstat_report_wait_start(uint32 wait_event_info)
 {
+	volatile PGPROC *proc = MyProc;
+
+	if (!pgstat_track_activities || !proc)
+		return;
+
 	/*
 	 * Since this is a four-byte field which is always read and written as
 	 * four-bytes, updates are atomic.
 	 */
-	*(volatile uint32 *) my_wait_event_info = wait_event_info;
+	proc->wait_event_info = wait_event_info;
 }
 
 /* ----------
  * pgstat_report_wait_end() -
  *
  *	Called to report end of a wait.
+ *
+ * NB: this *must* be able to survive being called before MyProc has been
+ * initialized.
  * ----------
  */
 static inline void
 pgstat_report_wait_end(void)
 {
-	/* see pgstat_report_wait_start() */
-	*(volatile uint32 *) my_wait_event_info = 0;
+	volatile PGPROC *proc = MyProc;
+
+	if (!pgstat_track_activities || !proc)
+		return;
+
+	/*
+	 * Since this is a four-byte field which is always read and written as
+	 * four-bytes, updates are atomic.
+	 */
+	proc->wait_event_info = 0;
 }
 
 
diff --git a/src/test/regress/expected/foreign_data.out b/src/test/regress/expected/foreign_data.out
index e4cdb780d0..5385f98a0f 100644
--- a/src/test/regress/expected/foreign_data.out
+++ b/src/test/regress/expected/foreign_data.out
@@ -1807,9 +1807,9 @@ Inherits: fd_pt1
 
 -- TRUNCATE doesn't work on foreign tables, either directly or recursively
 TRUNCATE ft2;  -- ERROR
-ERROR:  "ft2" is not a table
+ERROR:  foreign-data wrapper "dummy" has no handler
 TRUNCATE fd_pt1;  -- ERROR
-ERROR:  "ft2" is not a table
+ERROR:  foreign-data wrapper "dummy" has no handler
 DROP TABLE fd_pt1 CASCADE;
 NOTICE:  drop cascades to foreign table ft2
 -- IMPORT FOREIGN SCHEMA
@@ -2032,9 +2032,9 @@ ALTER FOREIGN TABLE fd_pt2_1 ADD CONSTRAINT fd_pt2chk1 CHECK (c1 > 0);
 ALTER TABLE fd_pt2 ATTACH PARTITION fd_pt2_1 FOR VALUES IN (1);
 -- TRUNCATE doesn't work on foreign tables, either directly or recursively
 TRUNCATE fd_pt2_1;  -- ERROR
-ERROR:  "fd_pt2_1" is not a table
+ERROR:  foreign-data wrapper "dummy" has no handler
 TRUNCATE fd_pt2;  -- ERROR
-ERROR:  "fd_pt2_1" is not a table
+ERROR:  foreign-data wrapper "dummy" has no handler
 DROP FOREIGN TABLE fd_pt2_1;
 DROP TABLE fd_pt2;
 -- foreign table cannot be part of partition tree made of temporary
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index 6a98064b2b..a173d604d6 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -704,6 +704,7 @@ ForeignScanState
 ForeignServer
 ForeignServerInfo
 ForeignTable
+ForeignTruncateInfo
 ForkNumber
 FormData_pg_aggregate
 FormData_pg_am
#41Bharath Rupireddy
bharath.rupireddyforpostgres@gmail.com
In reply to: Kazutaka Onishi (#40)
Re: TRUNCATE on foreign table

On Sun, Apr 4, 2021 at 12:48 PM Kazutaka Onishi <onishi@heterodb.com> wrote:

v9 has also typo because I haven't checked about doc... sorry.

I think v9 has some changes not related to the foreign table truncate
feature. If yes, please remove those changes and provide a proper
patch.

diff --git a/doc/src/sgml/ref/psql-ref.sgml b/doc/src/sgml/ref/psql-ref.sgml
diff --git a/src/backend/bootstrap/bootstrap.c
b/src/backend/bootstrap/bootstrap.c
diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c
....
....

With Regards,
Bharath Rupireddy.
EnterpriseDB: http://www.enterprisedb.com

#42Kazutaka Onishi
onishi@heterodb.com
In reply to: Bharath Rupireddy (#41)
1 attachment(s)
Re: TRUNCATE on foreign table

Oops... sorry.
I haven't merged my working git branch with remote master branch.
Please check this v11.

2021年4月4日(日) 23:56 Bharath Rupireddy <bharath.rupireddyforpostgres@gmail.com>:

Show quoted text

On Sun, Apr 4, 2021 at 12:48 PM Kazutaka Onishi <onishi@heterodb.com> wrote:

v9 has also typo because I haven't checked about doc... sorry.

I think v9 has some changes not related to the foreign table truncate
feature. If yes, please remove those changes and provide a proper
patch.

diff --git a/doc/src/sgml/ref/psql-ref.sgml b/doc/src/sgml/ref/psql-ref.sgml
diff --git a/src/backend/bootstrap/bootstrap.c
b/src/backend/bootstrap/bootstrap.c
diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c
....
....

With Regards,
Bharath Rupireddy.
EnterpriseDB: http://www.enterprisedb.com

Attachments:

pgsql14-truncate-on-foreign-table.v11.patchapplication/octet-stream; name=pgsql14-truncate-on-foreign-table.v11.patchDownload
diff --git a/contrib/postgres_fdw/deparse.c b/contrib/postgres_fdw/deparse.c
index 5aa3455e30..45b980433b 100644
--- a/contrib/postgres_fdw/deparse.c
+++ b/contrib/postgres_fdw/deparse.c
@@ -56,6 +56,7 @@
 #include "utils/rel.h"
 #include "utils/syscache.h"
 #include "utils/typcache.h"
+#include "commands/tablecmds.h"
 
 /*
  * Global context for foreign_expr_walker's search of an expression tree.
@@ -2172,6 +2173,44 @@ deparseAnalyzeSql(StringInfo buf, Relation rel, List **retrieved_attrs)
 	deparseRelation(buf, rel);
 }
 
+/*
+ * Construct a simple "TRUNCATE rel" statement
+ */
+void
+deparseTruncateSql(StringInfo buf,
+				   List *frels_list,
+				   List *frels_extra,
+				   DropBehavior behavior,
+				   bool restart_seqs)
+{
+	ListCell   *lc1, *lc2;
+
+	appendStringInfoString(buf, "TRUNCATE ");
+	forboth (lc1, frels_list,
+			 lc2, frels_extra)
+	{
+		Relation	frel = lfirst(lc1);
+		int			extra = lfirst_int(lc2);
+
+		if (lc1 != list_head(frels_list))
+			appendStringInfoString(buf, ", ");
+		if (extra & TRUNCATE_REL_CONTEXT_ONLY)
+			appendStringInfoString(buf, "ONLY ");
+		deparseRelation(buf, frel);
+	}
+	appendStringInfo(buf, " %s IDENTITY",
+					 restart_seqs ? "RESTART" : "CONTINUE");
+	switch (behavior)
+	{
+		case DROP_RESTRICT:
+			appendStringInfoString(buf, " RESTRICT");
+			break;
+		case DROP_CASCADE:
+			appendStringInfoString(buf, " CASCADE");
+			break;
+	}
+}
+
 /*
  * Construct name to use for given column, and emit it into buf.
  * If it has a column_name FDW option, use that instead of attribute name.
diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out
index 59e4e27ffb..45bb03f56d 100644
--- a/contrib/postgres_fdw/expected/postgres_fdw.out
+++ b/contrib/postgres_fdw/expected/postgres_fdw.out
@@ -8215,6 +8215,243 @@ select * from rem3;
 drop foreign table rem3;
 drop table loc3;
 -- ===================================================================
+-- test for TRUNCATE
+-- ===================================================================
+CREATE TABLE tru_rtable0 (id int primary key, x text);
+CREATE TABLE tru_rtable1 (id int primary key, y text);
+CREATE FOREIGN TABLE tru_ftable (id int, x text)
+       SERVER loopback OPTIONS (table_name 'tru_rtable0');
+INSERT INTO tru_rtable0 (SELECT x,md5(x::text) FROM generate_series(1,10) x);
+CREATE TABLE tru_ptable (id int, y text) PARTITION BY HASH(id);
+CREATE TABLE tru_ptable__p0 PARTITION OF tru_ptable
+                            FOR VALUES WITH (MODULUS 2, REMAINDER 0);
+CREATE FOREIGN TABLE tru_ftable__p1 PARTITION OF tru_ptable
+                                    FOR VALUES WITH (MODULUS 2, REMAINDER 1)
+       SERVER loopback OPTIONS (table_name 'tru_rtable1');
+INSERT INTO tru_ptable (SELECT x,md5(x::text) FROM generate_series(11,20) x);
+CREATE TABLE tru_pk_table(id int primary key, z text);
+CREATE TABLE tru_fk_table(fkey int references tru_pk_table(id));
+INSERT INTO tru_pk_table (SELECT x,md5((x+1)::text) FROM generate_series(1,10) x);
+INSERT INTO tru_fk_table (SELECT x % 10 + 1 FROM generate_series(5,25) x);
+CREATE FOREIGN TABLE tru_pk_ftable (id int, z text)
+       SERVER loopback OPTIONS (table_name 'tru_pk_table');
+CREATE TABLE tru_rtable_parent (id int, a text);
+CREATE TABLE tru_rtable_child (id int, a text);
+CREATE FOREIGN TABLE tru_ftable_parent (id int, a text)
+       SERVER loopback OPTIONS (table_name 'tru_rtable_parent');
+CREATE FOREIGN TABLE tru_ftable_child () INHERITS (tru_ftable_parent)
+       SERVER loopback OPTIONS (table_name 'tru_rtable_child');
+INSERT INTO tru_rtable_parent (SELECT x, md5(x::text) FROM generate_series(1,8) x);
+INSERT INTO tru_rtable_child  (SELECT x, md5(x::text) FROM generate_series(10, 18) x);
+-- normal truncate
+SELECT * FROM tru_ftable ORDER BY id;
+ id |                x                 
+----+----------------------------------
+  1 | c4ca4238a0b923820dcc509a6f75849b
+  2 | c81e728d9d4c2f636f067f89cc14862c
+  3 | eccbc87e4b5ce2fe28308fd9f2a7baf3
+  4 | a87ff679a2f3e71d9181a67b7542122c
+  5 | e4da3b7fbbce2345d7772b0674a318d5
+  6 | 1679091c5a880faf6fb5e6087eb1b2dc
+  7 | 8f14e45fceea167a5a36dedd4bea2543
+  8 | c9f0f895fb98ab9159f51fd0297e236d
+  9 | 45c48cce2e2d7fbdea1afc51c7c6ad26
+ 10 | d3d9446802a44259755d38e6d163e820
+(10 rows)
+
+TRUNCATE tru_ftable;
+SELECT * FROM tru_rtable0;		-- empty
+ id | x 
+----+---
+(0 rows)
+
+SELECT * FROM tru_ftable;		-- empty
+ id | x 
+----+---
+(0 rows)
+
+-- 'truncatable' option
+ALTER SERVER loopback OPTIONS (ADD truncatable 'false');
+TRUNCATE tru_ftable;			-- error
+ERROR:  truncate on "tru_ftable" is prohibited
+ALTER SERVER loopback OPTIONS (DROP truncatable);
+ALTER FOREIGN TABLE tru_ftable OPTIONS (ADD truncatable 'false');
+TRUNCATE tru_ftable;			-- error
+ERROR:  truncate on "tru_ftable" is prohibited
+ALTER FOREIGN TABLE tru_ftable OPTIONS (SET truncatable 'true');
+TRUNCATE tru_ftable;			-- accepted
+-- partition table mixtured by table and foreign table
+SELECT * FROM tru_ptable ORDER BY id;
+ id |                y                 
+----+----------------------------------
+ 11 | 6512bd43d9caa6e02c990b0a82652dca
+ 12 | c20ad4d76fe97759aa27a0c99bff6710
+ 13 | c51ce410c124a10e0db5e4b97fc2af39
+ 14 | aab3238922bcc25a6f606eb525ffdc56
+ 15 | 9bf31c7ff062936a96d3c8bd1f8f2ff3
+ 16 | c74d97b01eae257e44aa9d5bade97baf
+ 17 | 70efdf2ec9b086079795c442636b55fb
+ 18 | 6f4922f45568161a8cdf4ad2299f6d23
+ 19 | 1f0e3dad99908345f7439f8ffabdffc4
+ 20 | 98f13708210194c475687be6106a3b84
+(10 rows)
+
+TRUNCATE tru_ptable;
+SELECT * FROM tru_ptable;		-- empty
+ id | y 
+----+---
+(0 rows)
+
+SELECT * FROM tru_ptable__p0;	-- empty
+ id | y 
+----+---
+(0 rows)
+
+SELECT * FROM tru_ftable__p1;	-- empty
+ id | y 
+----+---
+(0 rows)
+
+SELECT * FROM tru_rtable1;		-- empty
+ id | y 
+----+---
+(0 rows)
+
+-- 'CASCADE' option
+SELECT * FROM tru_pk_ftable ORDER BY id;
+ id |                z                 
+----+----------------------------------
+  1 | c81e728d9d4c2f636f067f89cc14862c
+  2 | eccbc87e4b5ce2fe28308fd9f2a7baf3
+  3 | a87ff679a2f3e71d9181a67b7542122c
+  4 | e4da3b7fbbce2345d7772b0674a318d5
+  5 | 1679091c5a880faf6fb5e6087eb1b2dc
+  6 | 8f14e45fceea167a5a36dedd4bea2543
+  7 | c9f0f895fb98ab9159f51fd0297e236d
+  8 | 45c48cce2e2d7fbdea1afc51c7c6ad26
+  9 | d3d9446802a44259755d38e6d163e820
+ 10 | 6512bd43d9caa6e02c990b0a82652dca
+(10 rows)
+
+TRUNCATE tru_pk_ftable;	-- failed by FK reference
+ERROR:  cannot truncate a table referenced in a foreign key constraint
+DETAIL:  Table "tru_fk_table" references "tru_pk_table".
+HINT:  Truncate table "tru_fk_table" at the same time, or use TRUNCATE ... CASCADE.
+CONTEXT:  remote SQL command: TRUNCATE public.tru_pk_table CONTINUE IDENTITY RESTRICT
+TRUNCATE tru_pk_ftable CASCADE;
+SELECT * FROM tru_pk_ftable;    -- empty
+ id | z 
+----+---
+(0 rows)
+
+SELECT * FROM tru_fk_table;		-- also truncated
+ fkey 
+------
+(0 rows)
+
+-- truncate two tables at a command
+INSERT INTO tru_ftable (SELECT x,md5((x+2)::text) FROM generate_series(1,8) x);
+INSERT INTO tru_pk_ftable (SELECT x,md5((x+3)::text) FROM generate_series(3,10) x);
+SELECT * FROM tru_ftable a FULL OUTER JOIN tru_pk_ftable b ON a.id = b.id ORDER BY a.id;
+ id |                x                 | id |                z                 
+----+----------------------------------+----+----------------------------------
+  1 | eccbc87e4b5ce2fe28308fd9f2a7baf3 |    | 
+  2 | a87ff679a2f3e71d9181a67b7542122c |    | 
+  3 | e4da3b7fbbce2345d7772b0674a318d5 |  3 | 1679091c5a880faf6fb5e6087eb1b2dc
+  4 | 1679091c5a880faf6fb5e6087eb1b2dc |  4 | 8f14e45fceea167a5a36dedd4bea2543
+  5 | 8f14e45fceea167a5a36dedd4bea2543 |  5 | c9f0f895fb98ab9159f51fd0297e236d
+  6 | c9f0f895fb98ab9159f51fd0297e236d |  6 | 45c48cce2e2d7fbdea1afc51c7c6ad26
+  7 | 45c48cce2e2d7fbdea1afc51c7c6ad26 |  7 | d3d9446802a44259755d38e6d163e820
+  8 | d3d9446802a44259755d38e6d163e820 |  8 | 6512bd43d9caa6e02c990b0a82652dca
+    |                                  |  9 | c20ad4d76fe97759aa27a0c99bff6710
+    |                                  | 10 | c51ce410c124a10e0db5e4b97fc2af39
+(10 rows)
+
+TRUNCATE tru_ftable, tru_pk_ftable CASCADE;
+SELECT * FROM tru_ftable a FULL OUTER JOIN tru_pk_ftable b ON a.id = b.id ORDER BY a.id;  -- empty
+ id | x | id | z 
+----+---+----+---
+(0 rows)
+
+-- truncate with ONLY clause
+TRUNCATE ONLY tru_ftable_parent;
+SELECT * FROM tru_ftable_parent ORDER BY id;
+ id |                a                 
+----+----------------------------------
+ 10 | d3d9446802a44259755d38e6d163e820
+ 11 | 6512bd43d9caa6e02c990b0a82652dca
+ 12 | c20ad4d76fe97759aa27a0c99bff6710
+ 13 | c51ce410c124a10e0db5e4b97fc2af39
+ 14 | aab3238922bcc25a6f606eb525ffdc56
+ 15 | 9bf31c7ff062936a96d3c8bd1f8f2ff3
+ 16 | c74d97b01eae257e44aa9d5bade97baf
+ 17 | 70efdf2ec9b086079795c442636b55fb
+ 18 | 6f4922f45568161a8cdf4ad2299f6d23
+(9 rows)
+
+TRUNCATE tru_ftable_parent;
+SELECT * FROM tru_ftable_parent; -- empty
+ id | a 
+----+---
+(0 rows)
+
+-- in case when remote table has inherited children
+CREATE TABLE tru_rtable0_child () INHERITS (tru_rtable0);
+INSERT INTO tru_rtable0 (SELECT x,md5(x::text) FROM generate_series(5,9) x);
+INSERT INTO tru_rtable0_child (SELECT x,md5(x::text) FROM generate_series(10,14) x);
+SELECT * FROM tru_ftable ORDER BY id;
+ id |                x                 
+----+----------------------------------
+  5 | e4da3b7fbbce2345d7772b0674a318d5
+  6 | 1679091c5a880faf6fb5e6087eb1b2dc
+  7 | 8f14e45fceea167a5a36dedd4bea2543
+  8 | c9f0f895fb98ab9159f51fd0297e236d
+  9 | 45c48cce2e2d7fbdea1afc51c7c6ad26
+ 10 | d3d9446802a44259755d38e6d163e820
+ 11 | 6512bd43d9caa6e02c990b0a82652dca
+ 12 | c20ad4d76fe97759aa27a0c99bff6710
+ 13 | c51ce410c124a10e0db5e4b97fc2af39
+ 14 | aab3238922bcc25a6f606eb525ffdc56
+(10 rows)
+
+TRUNCATE ONLY tru_ftable;		-- truncate only parent portion
+SELECT * FROM tru_ftable ORDER BY id;
+ id |                x                 
+----+----------------------------------
+ 10 | d3d9446802a44259755d38e6d163e820
+ 11 | 6512bd43d9caa6e02c990b0a82652dca
+ 12 | c20ad4d76fe97759aa27a0c99bff6710
+ 13 | c51ce410c124a10e0db5e4b97fc2af39
+ 14 | aab3238922bcc25a6f606eb525ffdc56
+(5 rows)
+
+INSERT INTO tru_rtable0 (SELECT x,md5(x::text) FROM generate_series(21,25) x);
+SELECT * FROM tru_ftable;
+ id |                x                 
+----+----------------------------------
+ 21 | 3c59dc048e8850243be8079a5c74d079
+ 22 | b6d767d2f8ed5d21a44b0e5886680cb9
+ 23 | 37693cfc748049e45d87b8c7d8b9aacd
+ 24 | 1ff1de774005f8da13f42943881c655f
+ 25 | 8e296a067a37563370ded05f5a3bf3ec
+ 10 | d3d9446802a44259755d38e6d163e820
+ 11 | 6512bd43d9caa6e02c990b0a82652dca
+ 12 | c20ad4d76fe97759aa27a0c99bff6710
+ 13 | c51ce410c124a10e0db5e4b97fc2af39
+ 14 | aab3238922bcc25a6f606eb525ffdc56
+(10 rows)
+
+TRUNCATE tru_ftable;			-- truncate both of parent and child
+SELECT * FROM tru_ftable;    -- empty
+ id | x 
+----+---
+(0 rows)
+
+-- cleanup
+DROP FOREIGN TABLE tru_ftable_parent, tru_ftable_child, tru_pk_ftable,tru_ftable__p1,tru_ftable;
+DROP TABLE tru_rtable0, tru_rtable1, tru_ptable, tru_ptable__p0, tru_pk_table, tru_fk_table, 
+tru_rtable_parent,tru_rtable_child, tru_rtable0_child;
+-- ===================================================================
 -- test IMPORT FOREIGN SCHEMA
 -- ===================================================================
 CREATE SCHEMA import_source;
@@ -8913,7 +9150,7 @@ DO $d$
     END;
 $d$;
 ERROR:  invalid option "password"
-HINT:  Valid options in this context are: service, passfile, channel_binding, connect_timeout, dbname, host, hostaddr, port, options, application_name, keepalives, keepalives_idle, keepalives_interval, keepalives_count, tcp_user_timeout, sslmode, sslcompression, sslcert, sslkey, sslrootcert, sslcrl, sslcrldir, requirepeer, ssl_min_protocol_version, ssl_max_protocol_version, gssencmode, krbsrvname, gsslib, target_session_attrs, use_remote_estimate, fdw_startup_cost, fdw_tuple_cost, extensions, updatable, fetch_size, batch_size, async_capable, keep_connections
+HINT:  Valid options in this context are: service, passfile, channel_binding, connect_timeout, dbname, host, hostaddr, port, options, application_name, keepalives, keepalives_idle, keepalives_interval, keepalives_count, tcp_user_timeout, sslmode, sslcompression, sslcert, sslkey, sslrootcert, sslcrl, sslcrldir, requirepeer, ssl_min_protocol_version, ssl_max_protocol_version, gssencmode, krbsrvname, gsslib, target_session_attrs, use_remote_estimate, fdw_startup_cost, fdw_tuple_cost, extensions, updatable, truncatable, fetch_size, batch_size, async_capable, keep_connections
 CONTEXT:  SQL statement "ALTER SERVER loopback_nopw OPTIONS (ADD password 'dummypw')"
 PL/pgSQL function inline_code_block line 3 at EXECUTE
 -- If we add a password for our user mapping instead, we should get a different
diff --git a/contrib/postgres_fdw/option.c b/contrib/postgres_fdw/option.c
index f1d0c8bd41..fc196fabc0 100644
--- a/contrib/postgres_fdw/option.c
+++ b/contrib/postgres_fdw/option.c
@@ -109,6 +109,7 @@ postgres_fdw_validator(PG_FUNCTION_ARGS)
 		if (strcmp(def->defname, "use_remote_estimate") == 0 ||
 			strcmp(def->defname, "updatable") == 0 ||
 			strcmp(def->defname, "async_capable") == 0 ||
+			strcmp(def->defname, "truncatable") == 0 ||
 			strcmp(def->defname, "keep_connections") == 0)
 		{
 			/* these accept only boolean values */
@@ -213,6 +214,9 @@ InitPgFdwOptions(void)
 		/* updatable is available on both server and table */
 		{"updatable", ForeignServerRelationId, false},
 		{"updatable", ForeignTableRelationId, false},
+		/* truncatable is available on both server and table */
+		{"truncatable", ForeignServerRelationId, false},
+		{"truncatable", ForeignTableRelationId, false},
 		/* fetch_size is available on both server and table */
 		{"fetch_size", ForeignServerRelationId, false},
 		{"fetch_size", ForeignTableRelationId, false},
diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c
index 16c2979f2d..6fa20834a4 100644
--- a/contrib/postgres_fdw/postgres_fdw.c
+++ b/contrib/postgres_fdw/postgres_fdw.c
@@ -400,6 +400,10 @@ static void postgresExplainForeignModify(ModifyTableState *mtstate,
 										 ExplainState *es);
 static void postgresExplainDirectModify(ForeignScanState *node,
 										ExplainState *es);
+static void postgresExecForeignTruncate(List *frels_list,
+										List *frels_extra,
+										DropBehavior behavior,
+										bool restart_seqs);
 static bool postgresAnalyzeForeignTable(Relation relation,
 										AcquireSampleRowsFunc *func,
 										BlockNumber *totalpages);
@@ -588,6 +592,9 @@ postgres_fdw_handler(PG_FUNCTION_ARGS)
 	routine->ExplainForeignModify = postgresExplainForeignModify;
 	routine->ExplainDirectModify = postgresExplainDirectModify;
 
+	/* Support function for TRUNCATE */
+	routine->ExecForeignTruncate = postgresExecForeignTruncate;
+
 	/* Support functions for ANALYZE */
 	routine->AnalyzeForeignTable = postgresAnalyzeForeignTable;
 
@@ -2868,6 +2875,90 @@ postgresExplainDirectModify(ForeignScanState *node, ExplainState *es)
 	}
 }
 
+/*
+ * postgresExecForeignTruncate
+ *		It propagates TRUNCATE command to the remote host inside of the
+ *		transaction block.
+ */
+static void
+postgresExecForeignTruncate(List *frels_list,
+							List *frels_extra,
+							DropBehavior behavior,
+							bool restart_seqs)
+{
+	Oid				server_id = InvalidOid;
+	ForeignServer  *serv = NULL;
+	UserMapping	   *user = NULL;
+	PGconn		   *conn = NULL;
+	PGresult	   *res;
+	StringInfoData	sql;
+	ListCell	   *lc;
+	bool			server_truncatable = true;
+
+	/* pick up remote connection, and sanity checks */
+	foreach (lc, frels_list)
+	{
+		Relation	frel = lfirst(lc);
+		Oid			frel_oid = RelationGetRelid(frel);
+		ForeignTable *ft = GetForeignTable(frel_oid);
+		ListCell   *cell;
+		bool		truncatable;
+
+		if (!OidIsValid(server_id))
+		{
+			server_id = GetForeignServerIdByRelId(frel_oid);
+			serv = GetForeignServer(server_id);
+			user = GetUserMapping(GetUserId(), server_id);
+			conn = GetConnection(user, false,NULL);
+
+			foreach (cell, serv->options)
+			{
+				DefElem	   *defel = (DefElem *) lfirst(cell);
+
+				if (strcmp(defel->defname, "truncatable") == 0)
+				{
+					server_truncatable = defGetBoolean(defel);
+					break;
+				}
+			}
+		}
+		else
+		{
+			/* postgresExecForeignTruncate() is invoked for each server */
+			Assert(server_id == GetForeignServerIdByRelId(frel_oid));
+		}
+
+		/* ensure the target foreign table is truncatable */
+		truncatable = server_truncatable;
+		foreach (cell, ft->options)
+		{
+			DefElem	   *defel = (DefElem *) lfirst(cell);
+
+			if (strcmp(defel->defname, "truncatable") == 0)
+			{
+				truncatable = defGetBoolean(defel);
+				break;
+			}
+		}
+		if (!truncatable)
+			ereport(ERROR,
+					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+					 errmsg("truncate on \"%s\" is prohibited",
+							RelationGetRelationName(frel))));
+	}
+	/* set up remote query */
+	initStringInfo(&sql);
+	deparseTruncateSql(&sql, frels_list, frels_extra, behavior, restart_seqs);
+	/* run remote query */
+	if (!PQsendQuery(conn, sql.data))
+		pgfdw_report_error(ERROR, NULL, conn, false, sql.data);
+	res = pgfdw_get_result(conn, sql.data);
+	if (PQresultStatus(res) != PGRES_COMMAND_OK)
+		pgfdw_report_error(ERROR, res, conn, true, sql.data);
+	/* clean-up */
+	PQclear(res);
+	pfree(sql.data);
+}
 
 /*
  * estimate_path_cost_size
diff --git a/contrib/postgres_fdw/postgres_fdw.h b/contrib/postgres_fdw/postgres_fdw.h
index 88d94da6f6..68599892f8 100644
--- a/contrib/postgres_fdw/postgres_fdw.h
+++ b/contrib/postgres_fdw/postgres_fdw.h
@@ -206,6 +206,11 @@ extern void deparseDirectDeleteSql(StringInfo buf, PlannerInfo *root,
 extern void deparseAnalyzeSizeSql(StringInfo buf, Relation rel);
 extern void deparseAnalyzeSql(StringInfo buf, Relation rel,
 							  List **retrieved_attrs);
+extern void deparseTruncateSql(StringInfo buf,
+							   List *frels_list,
+							   List *frels_extra,
+							   DropBehavior behavior,
+							   bool restart_seqs);
 extern void deparseStringLiteral(StringInfo buf, const char *val);
 extern Expr *find_em_expr_for_rel(EquivalenceClass *ec, RelOptInfo *rel);
 extern Expr *find_em_expr_for_input_target(PlannerInfo *root,
diff --git a/contrib/postgres_fdw/sql/postgres_fdw.sql b/contrib/postgres_fdw/sql/postgres_fdw.sql
index 107d1c0e03..4e620f67ff 100644
--- a/contrib/postgres_fdw/sql/postgres_fdw.sql
+++ b/contrib/postgres_fdw/sql/postgres_fdw.sql
@@ -2351,6 +2351,101 @@ select * from rem3;
 drop foreign table rem3;
 drop table loc3;
 
+-- ===================================================================
+-- test for TRUNCATE
+-- ===================================================================
+CREATE TABLE tru_rtable0 (id int primary key, x text);
+CREATE TABLE tru_rtable1 (id int primary key, y text);
+CREATE FOREIGN TABLE tru_ftable (id int, x text)
+       SERVER loopback OPTIONS (table_name 'tru_rtable0');
+INSERT INTO tru_rtable0 (SELECT x,md5(x::text) FROM generate_series(1,10) x);
+
+CREATE TABLE tru_ptable (id int, y text) PARTITION BY HASH(id);
+CREATE TABLE tru_ptable__p0 PARTITION OF tru_ptable
+                            FOR VALUES WITH (MODULUS 2, REMAINDER 0);
+CREATE FOREIGN TABLE tru_ftable__p1 PARTITION OF tru_ptable
+                                    FOR VALUES WITH (MODULUS 2, REMAINDER 1)
+       SERVER loopback OPTIONS (table_name 'tru_rtable1');
+INSERT INTO tru_ptable (SELECT x,md5(x::text) FROM generate_series(11,20) x);
+
+CREATE TABLE tru_pk_table(id int primary key, z text);
+CREATE TABLE tru_fk_table(fkey int references tru_pk_table(id));
+INSERT INTO tru_pk_table (SELECT x,md5((x+1)::text) FROM generate_series(1,10) x);
+INSERT INTO tru_fk_table (SELECT x % 10 + 1 FROM generate_series(5,25) x);
+CREATE FOREIGN TABLE tru_pk_ftable (id int, z text)
+       SERVER loopback OPTIONS (table_name 'tru_pk_table');
+
+CREATE TABLE tru_rtable_parent (id int, a text);
+CREATE TABLE tru_rtable_child (id int, a text);
+CREATE FOREIGN TABLE tru_ftable_parent (id int, a text)
+       SERVER loopback OPTIONS (table_name 'tru_rtable_parent');
+CREATE FOREIGN TABLE tru_ftable_child () INHERITS (tru_ftable_parent)
+       SERVER loopback OPTIONS (table_name 'tru_rtable_child');
+INSERT INTO tru_rtable_parent (SELECT x, md5(x::text) FROM generate_series(1,8) x);
+INSERT INTO tru_rtable_child  (SELECT x, md5(x::text) FROM generate_series(10, 18) x);
+
+-- normal truncate
+SELECT * FROM tru_ftable ORDER BY id;
+TRUNCATE tru_ftable;
+SELECT * FROM tru_rtable0;		-- empty
+SELECT * FROM tru_ftable;		-- empty
+
+-- 'truncatable' option
+ALTER SERVER loopback OPTIONS (ADD truncatable 'false');
+TRUNCATE tru_ftable;			-- error
+ALTER SERVER loopback OPTIONS (DROP truncatable);
+ALTER FOREIGN TABLE tru_ftable OPTIONS (ADD truncatable 'false');
+TRUNCATE tru_ftable;			-- error
+ALTER FOREIGN TABLE tru_ftable OPTIONS (SET truncatable 'true');
+TRUNCATE tru_ftable;			-- accepted
+
+-- partition table mixtured by table and foreign table
+SELECT * FROM tru_ptable ORDER BY id;
+TRUNCATE tru_ptable;
+SELECT * FROM tru_ptable;		-- empty
+SELECT * FROM tru_ptable__p0;	-- empty
+SELECT * FROM tru_ftable__p1;	-- empty
+SELECT * FROM tru_rtable1;		-- empty
+
+-- 'CASCADE' option
+SELECT * FROM tru_pk_ftable ORDER BY id;
+TRUNCATE tru_pk_ftable;	-- failed by FK reference
+TRUNCATE tru_pk_ftable CASCADE;
+SELECT * FROM tru_pk_ftable;    -- empty
+SELECT * FROM tru_fk_table;		-- also truncated
+
+-- truncate two tables at a command
+INSERT INTO tru_ftable (SELECT x,md5((x+2)::text) FROM generate_series(1,8) x);
+INSERT INTO tru_pk_ftable (SELECT x,md5((x+3)::text) FROM generate_series(3,10) x);
+SELECT * FROM tru_ftable a FULL OUTER JOIN tru_pk_ftable b ON a.id = b.id ORDER BY a.id;
+TRUNCATE tru_ftable, tru_pk_ftable CASCADE;
+SELECT * FROM tru_ftable a FULL OUTER JOIN tru_pk_ftable b ON a.id = b.id ORDER BY a.id;  -- empty
+
+-- truncate with ONLY clause
+TRUNCATE ONLY tru_ftable_parent;
+SELECT * FROM tru_ftable_parent ORDER BY id;
+TRUNCATE tru_ftable_parent;
+SELECT * FROM tru_ftable_parent; -- empty
+
+-- in case when remote table has inherited children
+CREATE TABLE tru_rtable0_child () INHERITS (tru_rtable0);
+INSERT INTO tru_rtable0 (SELECT x,md5(x::text) FROM generate_series(5,9) x);
+INSERT INTO tru_rtable0_child (SELECT x,md5(x::text) FROM generate_series(10,14) x);
+SELECT * FROM tru_ftable ORDER BY id;
+
+TRUNCATE ONLY tru_ftable;		-- truncate only parent portion
+SELECT * FROM tru_ftable ORDER BY id;
+
+INSERT INTO tru_rtable0 (SELECT x,md5(x::text) FROM generate_series(21,25) x);
+SELECT * FROM tru_ftable;
+TRUNCATE tru_ftable;			-- truncate both of parent and child
+SELECT * FROM tru_ftable;    -- empty
+
+-- cleanup
+DROP FOREIGN TABLE tru_ftable_parent, tru_ftable_child, tru_pk_ftable,tru_ftable__p1,tru_ftable;
+DROP TABLE tru_rtable0, tru_rtable1, tru_ptable, tru_ptable__p0, tru_pk_table, tru_fk_table, 
+tru_rtable_parent,tru_rtable_child, tru_rtable0_child;
+
 -- ===================================================================
 -- test IMPORT FOREIGN SCHEMA
 -- ===================================================================
diff --git a/doc/src/sgml/fdwhandler.sgml b/doc/src/sgml/fdwhandler.sgml
index 0f2397df49..66605b7361 100644
--- a/doc/src/sgml/fdwhandler.sgml
+++ b/doc/src/sgml/fdwhandler.sgml
@@ -1065,6 +1065,50 @@ EndDirectModify(ForeignScanState *node);
 
    </sect2>
 
+   <sect2 id="fdw-callbacks-truncate">
+    <title>FDW Routines for Truncate</title>
+<programlisting>
+void
+ExecForeignTruncate(List *frels_list, List *frels_extra,
+                    DropBehavior behavior, bool restart_seqs);
+</programlisting>
+    <para>
+     Truncate a set of foreign tables defined by
+     <literal>frels_list</literal> belonging to the same foreign server.
+     This optional function is called during execution of
+     <command>TRUNCATE</command> for each foreign server involved
+     in one <command>TRUNCATE</command> command (note that invocations
+     are not per foreign table).
+
+     <literal>frels_extra</literal> delivers extra information about
+     the context where the foreign tables are truncated. It is a list of integers and has same length with
+     <literal>frels_list</literal>. <literal>TRUNCATE_REL_CONTEXT_NORMAL</literal> means that
+     the foreign table is specified WITHOUT "ONLY" clause, and <literal>TRUNCATE_REL_CONTEXT_ONLY</literal> means
+     specified WITH "ONLY" clause. <literal>TRUNCATE_REL_CONTEXT_CASCADING</literal> 
+     values means that foreign tables are not specified in the <command>TRUNCATE</command>, 
+     but truncated due to dependency(like partition's child leaf).
+    </para>
+
+    <para>
+     If the <function>ExecForeignTruncate</function> pointer is set to
+     <literal>NULL</literal>, attempts to truncate the foreign table will
+     fail with an error message.
+    </para>
+    
+    <para>
+     <literal>behavior</literal> defines how foreign tables should
+     be truncated, using as possible values <literal>DROP_RESTRICT</literal>
+     and <literal>DROP_CASCADE</literal> (to map with the equivalents of
+     <command>TRUNCATE</command>).
+    </para>
+    
+    <para>
+     <literal>restart_seqs</literal> is set to <literal>true</literal>
+     if <literal>RESTART IDENTITY</literal> was supplied in the
+     <command>TRUNCATE</command>.
+    </para>
+   </sect2>
+
    <sect2 id="fdw-callbacks-row-locking">
     <title>FDW Routines for Row Locking</title>
 
diff --git a/doc/src/sgml/postgres-fdw.sgml b/doc/src/sgml/postgres-fdw.sgml
index a7c695b000..1092160603 100644
--- a/doc/src/sgml/postgres-fdw.sgml
+++ b/doc/src/sgml/postgres-fdw.sgml
@@ -63,9 +63,10 @@
  <para>
   Now you need only <command>SELECT</command> from a foreign table to access
   the data stored in its underlying remote table.  You can also modify
-  the remote table using <command>INSERT</command>, <command>UPDATE</command>, or
-  <command>DELETE</command>.  (Of course, the remote user you have specified
-  in your user mapping must have privileges to do these things.)
+  the remote table using <command>INSERT</command>, <command>UPDATE</command>,
+  <command>DELETE</command>, or <command>TRUNCATE</command>.
+  (Of course, the remote user you have specified in your user mapping must
+  have privileges to do these things.)
  </para>
 
  <para>
@@ -404,7 +405,7 @@ OPTIONS (ADD password_required 'false');
 
    <para>
     By default all foreign tables using <filename>postgres_fdw</filename> are assumed
-    to be updatable.  This may be overridden using the following option:
+    to be updatable and truncatable.  This may be overridden using the following options:
    </para>
 
    <variablelist>
@@ -433,6 +434,29 @@ OPTIONS (ADD password_required 'false');
      </listitem>
     </varlistentry>
 
+    <varlistentry>
+     <term><literal>truncatable</literal></term>
+     <listitem>
+      <para>
+       This option controls whether <filename>postgres_fdw</filename> allows
+       foreign tables to be truncated using <command>TRUNCATE</command>
+       command. It can be specified for a foreign table or a foreign server.
+       A table-level option overrides a server-level option.
+       The default is <literal>true</literal>.
+      </para>
+      <para>
+       Pay attention for the case when a foreign table maps remote table
+       that has inherited children or partition leafs.
+       <command>TRUNCATE</command> specifies the foreign tables with
+       <literal>ONLY</literal> clause, remove queries over the
+       <filename>postgres_fdw</filename> also specify remote tables with
+       <literal>ONLY</literal> clause, that will truncate only parent
+       portion of the remote table. In the results, it looks like
+       <command>TRUNCATE</command> command partially eliminated contents
+       of the foreign tables.
+      </para>
+     </listitem>
+    </varlistentry>
    </variablelist>
   </sect3>
 
diff --git a/doc/src/sgml/ref/truncate.sgml b/doc/src/sgml/ref/truncate.sgml
index 91cdac5562..7899106ba1 100644
--- a/doc/src/sgml/ref/truncate.sgml
+++ b/doc/src/sgml/ref/truncate.sgml
@@ -172,9 +172,8 @@ TRUNCATE [ TABLE ] [ ONLY ] <replaceable class="parameter">name</replaceable> [
   </para>
 
   <para>
-   <command>TRUNCATE</command> is not currently supported for foreign tables.
-   This implies that if a specified table has any descendant tables that are
-   foreign, the command will fail.
+    <command>TRUNCATE</command> can be used for foreign tables if
+     the foreign data wrapper supports, for instance, see <xref linkend="postgres-fdw"/>.
   </para>
  </refsect1>
 
diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c
index 88a68a4697..11fd96d7dc 100644
--- a/src/backend/commands/tablecmds.c
+++ b/src/backend/commands/tablecmds.c
@@ -59,6 +59,7 @@
 #include "commands/typecmds.h"
 #include "commands/user.h"
 #include "executor/executor.h"
+#include "foreign/fdwapi.h"
 #include "foreign/foreign.h"
 #include "miscadmin.h"
 #include "nodes/makefuncs.h"
@@ -310,6 +311,21 @@ struct DropRelationCallbackState
 #define		ATT_FOREIGN_TABLE		0x0020
 #define		ATT_PARTITIONED_INDEX	0x0040
 
+/*
+ * ForeignTruncateInfo
+ *
+ * Information related to truncation of foreign tables.  This is used for
+ * the elements in a hash table. It uses the server OID as lookup key,
+ * and includes a per-server list of all foreign tables involved in the
+ * truncation.
+ */
+typedef struct ForeignTruncateInfo
+{
+	Oid			server_oid;
+	List	   *frels_list;
+	List	   *frels_extra;
+} ForeignTruncateInfo;
+
 /*
  * Partition tables are expected to be dropped when the parent partitioned
  * table gets dropped. Hence for partitioning we use AUTO dependency.
@@ -1588,18 +1604,21 @@ RangeVarCallbackForDropRelation(const RangeVar *rel, Oid relOid, Oid oldRelOid,
  *		Executes a TRUNCATE command.
  *
  * This is a multi-relation truncate.  We first open and grab exclusive
- * lock on all relations involved, checking permissions and otherwise
- * verifying that the relation is OK for truncation.  In CASCADE mode,
+ * lock on all relations involved, checking permissions (local database
+ * ACLs even if relations are foreign tables) and otherwise verifying
+ * that the relation is OK for truncation. In CASCADE mode,
  * relations having FK references to the targeted relations are automatically
  * added to the group; in RESTRICT mode, we check that all FK references are
  * internal to the group that's being truncated.  Finally all the relations
- * are truncated and reindexed.
+ * are truncated and reindexed. If any foreign tables are involved, 
+ * its callback shall be invoked prior to the truncation of regular tables.
  */
 void
 ExecuteTruncate(TruncateStmt *stmt)
 {
 	List	   *rels = NIL;
 	List	   *relids = NIL;
+	List	   *relids_extra = NIL;
 	List	   *relids_logged = NIL;
 	ListCell   *cell;
 
@@ -1636,8 +1655,9 @@ ExecuteTruncate(TruncateStmt *stmt)
 
 		rels = lappend(rels, rel);
 		relids = lappend_oid(relids, myrelid);
+		relids_extra = lappend_int(relids_extra, (recurse ? TRUNCATE_REL_CONTEXT_NORMAL : TRUNCATE_REL_CONTEXT_ONLY));
 		/* Log this relation only if needed for logical decoding */
-		if (RelationIsLogicallyLogged(rel))
+		if (RelationIsLogicallyLogged(rel) && rel->rd_rel->relkind != RELKIND_FOREIGN_TABLE)
 			relids_logged = lappend_oid(relids_logged, myrelid);
 
 		if (recurse)
@@ -1683,8 +1703,9 @@ ExecuteTruncate(TruncateStmt *stmt)
 
 				rels = lappend(rels, rel);
 				relids = lappend_oid(relids, childrelid);
+				relids_extra = lappend_int(relids_extra, TRUNCATE_REL_CONTEXT_CASCADING);
 				/* Log this relation only if needed for logical decoding */
-				if (RelationIsLogicallyLogged(rel))
+				if (RelationIsLogicallyLogged(rel) && rel->rd_rel->relkind != RELKIND_FOREIGN_TABLE)
 					relids_logged = lappend_oid(relids_logged, childrelid);
 			}
 		}
@@ -1695,7 +1716,7 @@ ExecuteTruncate(TruncateStmt *stmt)
 					 errhint("Do not specify the ONLY keyword, or use TRUNCATE ONLY on the partitions directly.")));
 	}
 
-	ExecuteTruncateGuts(rels, relids, relids_logged,
+	ExecuteTruncateGuts(rels, relids, relids_extra, relids_logged,
 						stmt->behavior, stmt->restart_seqs);
 
 	/* And close the rels */
@@ -1721,16 +1742,21 @@ ExecuteTruncate(TruncateStmt *stmt)
  * this information handy in this form.
  */
 void
-ExecuteTruncateGuts(List *explicit_rels, List *relids, List *relids_logged,
+ExecuteTruncateGuts(List *explicit_rels,
+					List *relids,
+					List *relids_extra,
+					List *relids_logged,
 					DropBehavior behavior, bool restart_seqs)
 {
 	List	   *rels;
 	List	   *seq_relids = NIL;
+	HTAB	   *ft_htab = NULL;
 	EState	   *estate;
 	ResultRelInfo *resultRelInfos;
 	ResultRelInfo *resultRelInfo;
 	SubTransactionId mySubid;
 	ListCell   *cell;
+	ListCell   *lc1, *lc2;
 	Oid		   *logrelids;
 
 	/*
@@ -1768,8 +1794,9 @@ ExecuteTruncateGuts(List *explicit_rels, List *relids, List *relids_logged,
 				truncate_check_activity(rel);
 				rels = lappend(rels, rel);
 				relids = lappend_oid(relids, relid);
+				relids_extra = lappend_int(relids_extra, TRUNCATE_REL_CONTEXT_CASCADING);
 				/* Log this relation only if needed for logical decoding */
-				if (RelationIsLogicallyLogged(rel))
+				if (RelationIsLogicallyLogged(rel) && rel->rd_rel->relkind != RELKIND_FOREIGN_TABLE)
 					relids_logged = lappend_oid(relids_logged, relid);
 			}
 		}
@@ -1868,14 +1895,68 @@ ExecuteTruncateGuts(List *explicit_rels, List *relids, List *relids_logged,
 	 */
 	mySubid = GetCurrentSubTransactionId();
 
-	foreach(cell, rels)
+	Assert(list_length(rels) == list_length(relids_extra));
+	forboth (lc1, rels,
+			 lc2, relids_extra)
 	{
-		Relation	rel = (Relation) lfirst(cell);
+		Relation	rel = (Relation) lfirst(lc1);
+		int			extra = lfirst_int(lc2);
 
 		/* Skip partitioned tables as there is nothing to do */
 		if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
 			continue;
 
+		/*
+		 * If truncating a foreign table, the foreign data wrapper callback
+		 * for TRUNCATE is called once for each server with a list of all the
+		 * relations to process linked to this server.  The list of relations
+		 * for each server is saved as a single entry in a hash table that
+		 * uses the server OID as lookup key.  Once the full set of lists is
+		 * built, all the entries of the hash table are scanned, and the list
+		 * of relations associated with the server is passed down to the
+		 * TRUNCATE callback of its foreign data wrapper.
+		 */
+		if (rel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
+		{
+			Oid			frel_oid = RelationGetRelid(rel);
+			Oid			server_oid = GetForeignServerIdByRelId(frel_oid);
+			bool		found;
+			ForeignTruncateInfo *ft_info;
+
+			/* if the hash table does not exist yet, initialize it */
+			if (!ft_htab)
+			{
+				HASHCTL		hctl;
+
+				memset(&hctl, 0, sizeof(HASHCTL));
+				hctl.keysize = sizeof(Oid);
+				hctl.entrysize = sizeof(ForeignTruncateInfo);
+				hctl.hcxt = CurrentMemoryContext;
+
+				ft_htab = hash_create("TRUNCATE for Foreign Tables",
+									  32,	/* start small and extend */
+									  &hctl,
+									  HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
+			}
+
+			/*
+			 * Look after the entry of the server in the hash table, and
+			 * initialize it if the entry does not exist yet.
+			 */
+			ft_info = hash_search(ft_htab, &server_oid, HASH_ENTER, &found);
+			if (!found)
+			{
+				ft_info->server_oid = server_oid;
+				ft_info->frels_list = NIL;
+				ft_info->frels_extra = NIL;
+			}
+
+			/* save the relation in the list */
+			ft_info->frels_list = lappend(ft_info->frels_list, rel);
+			ft_info->frels_extra = lappend_int(ft_info->frels_extra, extra);
+			continue;
+		}
+
 		/*
 		 * Normally, we need a transaction-safe truncation here.  However, if
 		 * the table was either created in the current (sub)transaction or has
@@ -1938,6 +2019,31 @@ ExecuteTruncateGuts(List *explicit_rels, List *relids, List *relids_logged,
 		pgstat_count_truncate(rel);
 	}
 
+	/*
+	 * Now go through the hash table, and process each entry associated to the
+	 * servers involved in the TRUNCATE.
+	 */
+	if (ft_htab)
+	{
+		ForeignTruncateInfo *ft_info;
+		HASH_SEQ_STATUS seq;
+
+		hash_seq_init(&seq, ft_htab);
+
+		while ((ft_info = hash_seq_search(&seq)) != NULL)
+		{
+			FdwRoutine *routine = GetFdwRoutineByServerId(ft_info->server_oid);
+
+			/* truncate_check_rel() has checked that already */
+			Assert(routine->ExecForeignTruncate != NULL);
+
+			routine->ExecForeignTruncate(ft_info->frels_list,
+										 ft_info->frels_extra,
+										 behavior,
+										 restart_seqs);
+		}
+	}
+
 	/*
 	 * Restart owned sequences if we were asked to.
 	 */
@@ -2023,12 +2129,24 @@ truncate_check_rel(Oid relid, Form_pg_class reltuple)
 	char	   *relname = NameStr(reltuple->relname);
 
 	/*
-	 * Only allow truncate on regular tables and partitioned tables (although,
-	 * the latter are only being included here for the following checks; no
-	 * physical truncation will occur in their case.)
+	 * Only allow truncate on regular tables, foreign tables using foreign
+	 * data wrappers supporting TRUNCATE and partitioned tables (although, the
+	 * latter are only being included here for the following checks; no
+	 * physical truncation will occur in their case.).
 	 */
-	if (reltuple->relkind != RELKIND_RELATION &&
-		reltuple->relkind != RELKIND_PARTITIONED_TABLE)
+	if (reltuple->relkind == RELKIND_FOREIGN_TABLE)
+	{
+		Oid			server_id = GetForeignServerIdByRelId(relid);
+		FdwRoutine *fdwroutine = GetFdwRoutineByServerId(server_id);
+
+		if (!fdwroutine->ExecForeignTruncate)
+			ereport(ERROR,
+					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+					 errmsg("cannot truncate foreign table \"%s\"",
+							relname)));
+	}
+	else if (reltuple->relkind != RELKIND_RELATION &&
+			 reltuple->relkind != RELKIND_PARTITIONED_TABLE)
 		ereport(ERROR,
 				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
 				 errmsg("\"%s\" is not a table", relname)));
diff --git a/src/backend/replication/logical/worker.c b/src/backend/replication/logical/worker.c
index 354fbe4b4b..152f0332ac 100644
--- a/src/backend/replication/logical/worker.c
+++ b/src/backend/replication/logical/worker.c
@@ -1795,6 +1795,7 @@ apply_handle_truncate(StringInfo s)
 	List	   *rels = NIL;
 	List	   *part_rels = NIL;
 	List	   *relids = NIL;
+	List	   *relids_extra = NIL;
 	List	   *relids_logged = NIL;
 	ListCell   *lc;
 
@@ -1824,7 +1825,8 @@ apply_handle_truncate(StringInfo s)
 		remote_rels = lappend(remote_rels, rel);
 		rels = lappend(rels, rel->localrel);
 		relids = lappend_oid(relids, rel->localreloid);
-		if (RelationIsLogicallyLogged(rel->localrel))
+		relids_extra = lappend_int(relids_extra, TRUNCATE_REL_CONTEXT_NORMAL);
+		if (RelationIsLogicallyLogged(rel->localrel) && rel->localrel->rd_rel->relkind != RELKIND_FOREIGN_TABLE)
 			relids_logged = lappend_oid(relids_logged, rel->localreloid);
 
 		/*
@@ -1862,8 +1864,9 @@ apply_handle_truncate(StringInfo s)
 				rels = lappend(rels, childrel);
 				part_rels = lappend(part_rels, childrel);
 				relids = lappend_oid(relids, childrelid);
+				relids_extra = lappend_int(relids_extra,TRUNCATE_REL_CONTEXT_CASCADING);
 				/* Log this relation only if needed for logical decoding */
-				if (RelationIsLogicallyLogged(childrel))
+				if (RelationIsLogicallyLogged(childrel) && childrel->rd_rel->relkind != RELKIND_FOREIGN_TABLE)
 					relids_logged = lappend_oid(relids_logged, childrelid);
 			}
 		}
@@ -1874,8 +1877,12 @@ apply_handle_truncate(StringInfo s)
 	 * to replaying changes without further cascading. This might be later
 	 * changeable with a user specified option.
 	 */
-	ExecuteTruncateGuts(rels, relids, relids_logged, DROP_RESTRICT, restart_seqs);
-
+	ExecuteTruncateGuts(rels,
+						relids,
+						relids_extra,
+						relids_logged,
+						DROP_RESTRICT,
+						restart_seqs);
 	foreach(lc, remote_rels)
 	{
 		LogicalRepRelMapEntry *rel = lfirst(lc);
diff --git a/src/include/commands/tablecmds.h b/src/include/commands/tablecmds.h
index b3d30acc35..57763ed734 100644
--- a/src/include/commands/tablecmds.h
+++ b/src/include/commands/tablecmds.h
@@ -55,9 +55,16 @@ extern void AlterRelationNamespaceInternal(Relation classRel, Oid relOid,
 
 extern void CheckTableNotInUse(Relation rel, const char *stmt);
 
+#define TRUNCATE_REL_CONTEXT_NORMAL       0x01
+#define TRUNCATE_REL_CONTEXT_ONLY         0x02
+#define TRUNCATE_REL_CONTEXT_CASCADING     0x04
 extern void ExecuteTruncate(TruncateStmt *stmt);
-extern void ExecuteTruncateGuts(List *explicit_rels, List *relids, List *relids_logged,
-								DropBehavior behavior, bool restart_seqs);
+extern void ExecuteTruncateGuts(List *explicit_rels,
+								List *relids,
+								List *relids_extra,
+								List *relids_logged,
+								DropBehavior behavior,
+								bool restart_seqs);
 
 extern void SetRelationHasSubclass(Oid relationId, bool relhassubclass);
 
diff --git a/src/include/foreign/fdwapi.h b/src/include/foreign/fdwapi.h
index 10d29ff292..205afe5ec3 100644
--- a/src/include/foreign/fdwapi.h
+++ b/src/include/foreign/fdwapi.h
@@ -160,6 +160,11 @@ typedef bool (*AnalyzeForeignTable_function) (Relation relation,
 typedef List *(*ImportForeignSchema_function) (ImportForeignSchemaStmt *stmt,
 											   Oid serverOid);
 
+typedef void (*ExecForeignTruncate_function) (List *frels_list,
+											  List *frels_extra,
+											  DropBehavior behavior,
+											  bool restart_seqs);
+
 typedef Size (*EstimateDSMForeignScan_function) (ForeignScanState *node,
 												 ParallelContext *pcxt);
 typedef void (*InitializeDSMForeignScan_function) (ForeignScanState *node,
@@ -255,6 +260,9 @@ typedef struct FdwRoutine
 	/* Support functions for IMPORT FOREIGN SCHEMA */
 	ImportForeignSchema_function ImportForeignSchema;
 
+	/* Support functions for TRUNCATE */
+	ExecForeignTruncate_function ExecForeignTruncate;
+
 	/* Support functions for parallelism under Gather node */
 	IsForeignScanParallelSafe_function IsForeignScanParallelSafe;
 	EstimateDSMForeignScan_function EstimateDSMForeignScan;
diff --git a/src/test/regress/expected/foreign_data.out b/src/test/regress/expected/foreign_data.out
index e4cdb780d0..5385f98a0f 100644
--- a/src/test/regress/expected/foreign_data.out
+++ b/src/test/regress/expected/foreign_data.out
@@ -1807,9 +1807,9 @@ Inherits: fd_pt1
 
 -- TRUNCATE doesn't work on foreign tables, either directly or recursively
 TRUNCATE ft2;  -- ERROR
-ERROR:  "ft2" is not a table
+ERROR:  foreign-data wrapper "dummy" has no handler
 TRUNCATE fd_pt1;  -- ERROR
-ERROR:  "ft2" is not a table
+ERROR:  foreign-data wrapper "dummy" has no handler
 DROP TABLE fd_pt1 CASCADE;
 NOTICE:  drop cascades to foreign table ft2
 -- IMPORT FOREIGN SCHEMA
@@ -2032,9 +2032,9 @@ ALTER FOREIGN TABLE fd_pt2_1 ADD CONSTRAINT fd_pt2chk1 CHECK (c1 > 0);
 ALTER TABLE fd_pt2 ATTACH PARTITION fd_pt2_1 FOR VALUES IN (1);
 -- TRUNCATE doesn't work on foreign tables, either directly or recursively
 TRUNCATE fd_pt2_1;  -- ERROR
-ERROR:  "fd_pt2_1" is not a table
+ERROR:  foreign-data wrapper "dummy" has no handler
 TRUNCATE fd_pt2;  -- ERROR
-ERROR:  "fd_pt2_1" is not a table
+ERROR:  foreign-data wrapper "dummy" has no handler
 DROP FOREIGN TABLE fd_pt2_1;
 DROP TABLE fd_pt2;
 -- foreign table cannot be part of partition tree made of temporary
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index 6a98064b2b..a173d604d6 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -704,6 +704,7 @@ ForeignScanState
 ForeignServer
 ForeignServerInfo
 ForeignTable
+ForeignTruncateInfo
 ForkNumber
 FormData_pg_aggregate
 FormData_pg_am
#43Bharath Rupireddy
bharath.rupireddyforpostgres@gmail.com
In reply to: Kazutaka Onishi (#39)
Re: TRUNCATE on foreign table

On Sun, Apr 4, 2021 at 12:00 PM Kazutaka Onishi <onishi@heterodb.com> wrote:

5) Can't we use do_sql_command function after making it non static? We
could go extra mile, that is we could make do_sql_command little more
generic by passing some enum for each of PQsendQuery,
PQsendQueryParams, PQsendQueryPrepared and PQsendPrepare and replace
the respective code chunks with do_sql_command in postgres_fdw.c.

I've skipped this for now.
I feel it sounds cool, but not easy.
It should be added by another patch because it's not only related to TRUNCATE.

Fair enough! I will give it a thought and provide a patch separately.

6) A white space error when the patch is applied.
contrib/postgres_fdw/postgres_fdw.c:2913: trailing whitespace.

I've checked the patch and clean spaces.
But I can't confirmed this message by attaching(patch -p1 < ...) my v8 patch.
If this still occurs, please tell me how you attach the patch.

I usually follow these steps:
1) write code 2) git diff --check (will give if there are any white
space or indentation errors) 3) git add -u 4) git commit (will enter a
commit message) 5) git format-patch -1 <<sha of the commit>> -v
<<version number>> 6) to apply patch, git apply <<patch_name>>.patch

7) I may be missing something here. Why do we need a hash table at
all? We could just do it with a linked list right? Is there a specific
reason to use a hash table? IIUC, the hash table entries will be lying
around until the local session exists since we are not doing
hash_destroy.

I've skipped this for now.

If you don't destroy the hash, you are going to cause a memory leak.
Because, the pointer to hash tableft_htab is local to
ExecuteTruncateGuts (note that it's not a static variable) and you are
creating a memory for the hash table and leaving the function without
cleaning it up. IMHO, we should destroy the hash memory at the end of
ExecuteTruncateGuts.

Another comment for tests, why do we need to do full outer join of two
tables to just show up there are some rows in the table? I would
suggest that all the tests introduced in the patch can be something
like this: 1) before truncate, just show the count(*) from the table
2) truncate the foreign tables 3) after truncate, just show the
count(*) which should be 0. Because we don't care what the data is in
the tables, we only care about whether truncate is happened or not.

+SELECT * FROM tru_ftable a FULL OUTER JOIN tru_pk_ftable b ON a.id =
b.id ORDER BY a.id;
+ id |                x                 | id |                z
+----+----------------------------------+----+----------------------------------
+  1 | eccbc87e4b5ce2fe28308fd9f2a7baf3 |    |
+  2 | a87ff679a2f3e71d9181a67b7542122c |    |
+  3 | e4da3b7fbbce2345d7772b0674a318d5 |  3 | 1679091c5a880faf6fb5e6087eb1b2dc
+  4 | 1679091c5a880faf6fb5e6087eb1b2dc |  4 | 8f14e45fceea167a5a36dedd4bea2543
+  5 | 8f14e45fceea167a5a36dedd4bea2543 |  5 | c9f0f895fb98ab9159f51fd0297e236d
+  6 | c9f0f895fb98ab9159f51fd0297e236d |  6 | 45c48cce2e2d7fbdea1afc51c7c6ad26
+  7 | 45c48cce2e2d7fbdea1afc51c7c6ad26 |  7 | d3d9446802a44259755d38e6d163e820
+  8 | d3d9446802a44259755d38e6d163e820 |  8 | 6512bd43d9caa6e02c990b0a82652dca
+    |                                  |  9 | c20ad4d76fe97759aa27a0c99bff6710
+    |                                  | 10 | c51ce410c124a10e0db5e4b97fc2af39
+(10 rows)
+
+TRUNCATE tru_ftable, tru_pk_ftable CASCADE;
+SELECT * FROM tru_ftable a FULL OUTER JOIN tru_pk_ftable b ON a.id =
b.id ORDER BY a.id;  -- empty

With Regards,
Bharath Rupireddy.
EnterpriseDB: http://www.enterprisedb.com

#44Kazutaka Onishi
onishi@heterodb.com
In reply to: Bharath Rupireddy (#43)
1 attachment(s)
Re: TRUNCATE on foreign table

Thank you for checking v11!
I've updated it and attached v12.

I usually follow these steps:
1) write code 2) git diff --check (will give if there are any white
space or indentation errors) 3) git add -u 4) git commit (will enter a
commit message) 5) git format-patch -1 <<sha of the commit>> -v
<<version number>> 6) to apply patch, git apply <<patch_name>>.patch

thanks, I've removed these whitespaces and confirmed no warnings
occurred when I run "git apply <<patch_name>>.patch"

If you don't destroy the hash, you are going to cause a memory leak.
Because, the pointer to hash tableft_htab is local to
ExecuteTruncateGuts (note that it's not a static variable) and you are
creating a memory for the hash table and leaving the function without
cleaning it up. IMHO, we should destroy the hash memory at the end of
ExecuteTruncateGuts.

Sure. I've added head_destroy().

Another comment for tests, why do we need to do full outer join of two
tables to just show up there are some rows in the table? I would
suggest that all the tests introduced in the patch can be something
like this: 1) before truncate, just show the count(*) from the table
2) truncate the foreign tables 3) after truncate, just show the
count(*) which should be 0. Because we don't care what the data is in
the tables, we only care about whether truncate is happened or not.

Sure. I've replaced with the test command "SELECT * FROM ..." to
"SELECT COUNT(*) FROM ..."
However, for example, the "id" column is used to check after running
TRUNCATE with ONLY clause to the inherited table.
Thus, I use "sum(id)" instead of "count(*)" to check the result when
the table has records.

2021年4月5日(月) 0:20 Bharath Rupireddy <bharath.rupireddyforpostgres@gmail.com>:

Show quoted text

On Sun, Apr 4, 2021 at 12:00 PM Kazutaka Onishi <onishi@heterodb.com> wrote:

5) Can't we use do_sql_command function after making it non static? We
could go extra mile, that is we could make do_sql_command little more
generic by passing some enum for each of PQsendQuery,
PQsendQueryParams, PQsendQueryPrepared and PQsendPrepare and replace
the respective code chunks with do_sql_command in postgres_fdw.c.

I've skipped this for now.
I feel it sounds cool, but not easy.
It should be added by another patch because it's not only related to TRUNCATE.

Fair enough! I will give it a thought and provide a patch separately.

6) A white space error when the patch is applied.
contrib/postgres_fdw/postgres_fdw.c:2913: trailing whitespace.

I've checked the patch and clean spaces.
But I can't confirmed this message by attaching(patch -p1 < ...) my v8 patch.
If this still occurs, please tell me how you attach the patch.

I usually follow these steps:
1) write code 2) git diff --check (will give if there are any white
space or indentation errors) 3) git add -u 4) git commit (will enter a
commit message) 5) git format-patch -1 <<sha of the commit>> -v
<<version number>> 6) to apply patch, git apply <<patch_name>>.patch

7) I may be missing something here. Why do we need a hash table at
all? We could just do it with a linked list right? Is there a specific
reason to use a hash table? IIUC, the hash table entries will be lying
around until the local session exists since we are not doing
hash_destroy.

I've skipped this for now.

If you don't destroy the hash, you are going to cause a memory leak.
Because, the pointer to hash tableft_htab is local to
ExecuteTruncateGuts (note that it's not a static variable) and you are
creating a memory for the hash table and leaving the function without
cleaning it up. IMHO, we should destroy the hash memory at the end of
ExecuteTruncateGuts.

Another comment for tests, why do we need to do full outer join of two
tables to just show up there are some rows in the table? I would
suggest that all the tests introduced in the patch can be something
like this: 1) before truncate, just show the count(*) from the table
2) truncate the foreign tables 3) after truncate, just show the
count(*) which should be 0. Because we don't care what the data is in
the tables, we only care about whether truncate is happened or not.

+SELECT * FROM tru_ftable a FULL OUTER JOIN tru_pk_ftable b ON a.id =
b.id ORDER BY a.id;
+ id |                x                 | id |                z
+----+----------------------------------+----+----------------------------------
+  1 | eccbc87e4b5ce2fe28308fd9f2a7baf3 |    |
+  2 | a87ff679a2f3e71d9181a67b7542122c |    |
+  3 | e4da3b7fbbce2345d7772b0674a318d5 |  3 | 1679091c5a880faf6fb5e6087eb1b2dc
+  4 | 1679091c5a880faf6fb5e6087eb1b2dc |  4 | 8f14e45fceea167a5a36dedd4bea2543
+  5 | 8f14e45fceea167a5a36dedd4bea2543 |  5 | c9f0f895fb98ab9159f51fd0297e236d
+  6 | c9f0f895fb98ab9159f51fd0297e236d |  6 | 45c48cce2e2d7fbdea1afc51c7c6ad26
+  7 | 45c48cce2e2d7fbdea1afc51c7c6ad26 |  7 | d3d9446802a44259755d38e6d163e820
+  8 | d3d9446802a44259755d38e6d163e820 |  8 | 6512bd43d9caa6e02c990b0a82652dca
+    |                                  |  9 | c20ad4d76fe97759aa27a0c99bff6710
+    |                                  | 10 | c51ce410c124a10e0db5e4b97fc2af39
+(10 rows)
+
+TRUNCATE tru_ftable, tru_pk_ftable CASCADE;
+SELECT * FROM tru_ftable a FULL OUTER JOIN tru_pk_ftable b ON a.id =
b.id ORDER BY a.id;  -- empty

With Regards,
Bharath Rupireddy.
EnterpriseDB: http://www.enterprisedb.com

Attachments:

pgsql14-truncate-on-foreign-table.v12.patchapplication/octet-stream; name=pgsql14-truncate-on-foreign-table.v12.patchDownload
diff --git a/contrib/postgres_fdw/deparse.c b/contrib/postgres_fdw/deparse.c
index 5aa3455e30..45b980433b 100644
--- a/contrib/postgres_fdw/deparse.c
+++ b/contrib/postgres_fdw/deparse.c
@@ -56,6 +56,7 @@
 #include "utils/rel.h"
 #include "utils/syscache.h"
 #include "utils/typcache.h"
+#include "commands/tablecmds.h"
 
 /*
  * Global context for foreign_expr_walker's search of an expression tree.
@@ -2172,6 +2173,44 @@ deparseAnalyzeSql(StringInfo buf, Relation rel, List **retrieved_attrs)
 	deparseRelation(buf, rel);
 }
 
+/*
+ * Construct a simple "TRUNCATE rel" statement
+ */
+void
+deparseTruncateSql(StringInfo buf,
+				   List *frels_list,
+				   List *frels_extra,
+				   DropBehavior behavior,
+				   bool restart_seqs)
+{
+	ListCell   *lc1, *lc2;
+
+	appendStringInfoString(buf, "TRUNCATE ");
+	forboth (lc1, frels_list,
+			 lc2, frels_extra)
+	{
+		Relation	frel = lfirst(lc1);
+		int			extra = lfirst_int(lc2);
+
+		if (lc1 != list_head(frels_list))
+			appendStringInfoString(buf, ", ");
+		if (extra & TRUNCATE_REL_CONTEXT_ONLY)
+			appendStringInfoString(buf, "ONLY ");
+		deparseRelation(buf, frel);
+	}
+	appendStringInfo(buf, " %s IDENTITY",
+					 restart_seqs ? "RESTART" : "CONTINUE");
+	switch (behavior)
+	{
+		case DROP_RESTRICT:
+			appendStringInfoString(buf, " RESTRICT");
+			break;
+		case DROP_CASCADE:
+			appendStringInfoString(buf, " CASCADE");
+			break;
+	}
+}
+
 /*
  * Construct name to use for given column, and emit it into buf.
  * If it has a column_name FDW option, use that instead of attribute name.
diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out
index 59e4e27ffb..5702450591 100644
--- a/contrib/postgres_fdw/expected/postgres_fdw.out
+++ b/contrib/postgres_fdw/expected/postgres_fdw.out
@@ -8215,6 +8215,200 @@ select * from rem3;
 drop foreign table rem3;
 drop table loc3;
 -- ===================================================================
+-- test for TRUNCATE
+-- ===================================================================
+CREATE TABLE tru_rtable0 (id int primary key, x text);
+CREATE TABLE tru_rtable1 (id int primary key, y text);
+CREATE FOREIGN TABLE tru_ftable (id int, x text)
+       SERVER loopback OPTIONS (table_name 'tru_rtable0');
+INSERT INTO tru_rtable0 (SELECT x,md5(x::text) FROM generate_series(1,10) x);
+CREATE TABLE tru_ptable (id int, y text) PARTITION BY HASH(id);
+CREATE TABLE tru_ptable__p0 PARTITION OF tru_ptable
+                            FOR VALUES WITH (MODULUS 2, REMAINDER 0);
+CREATE FOREIGN TABLE tru_ftable__p1 PARTITION OF tru_ptable
+                                    FOR VALUES WITH (MODULUS 2, REMAINDER 1)
+       SERVER loopback OPTIONS (table_name 'tru_rtable1');
+INSERT INTO tru_ptable (SELECT x,md5(x::text) FROM generate_series(11,20) x);
+CREATE TABLE tru_pk_table(id int primary key, z text);
+CREATE TABLE tru_fk_table(fkey int references tru_pk_table(id));
+INSERT INTO tru_pk_table (SELECT x,md5((x+1)::text) FROM generate_series(1,10) x);
+INSERT INTO tru_fk_table (SELECT x % 10 + 1 FROM generate_series(5,25) x);
+CREATE FOREIGN TABLE tru_pk_ftable (id int, z text)
+       SERVER loopback OPTIONS (table_name 'tru_pk_table');
+CREATE TABLE tru_rtable_parent (id int, a text);
+CREATE TABLE tru_rtable_child (id int, a text);
+CREATE FOREIGN TABLE tru_ftable_parent (id int, a text)
+       SERVER loopback OPTIONS (table_name 'tru_rtable_parent');
+CREATE FOREIGN TABLE tru_ftable_child () INHERITS (tru_ftable_parent)
+       SERVER loopback OPTIONS (table_name 'tru_rtable_child');
+INSERT INTO tru_rtable_parent (SELECT x, md5(x::text) FROM generate_series(1,8) x);
+INSERT INTO tru_rtable_child  (SELECT x, md5(x::text) FROM generate_series(10, 18) x);
+-- normal truncate
+SELECT sum(id) FROM tru_ftable;        -- 55
+ sum 
+-----
+  55
+(1 row)
+
+TRUNCATE tru_ftable;
+SELECT count(*) FROM tru_rtable0;		-- 0
+ count 
+-------
+     0
+(1 row)
+
+SELECT count(*) FROM tru_ftable;		-- 0
+ count 
+-------
+     0
+(1 row)
+
+-- 'truncatable' option
+ALTER SERVER loopback OPTIONS (ADD truncatable 'false');
+TRUNCATE tru_ftable;			-- error
+ERROR:  truncate on "tru_ftable" is prohibited
+ALTER SERVER loopback OPTIONS (DROP truncatable);
+ALTER FOREIGN TABLE tru_ftable OPTIONS (ADD truncatable 'false');
+TRUNCATE tru_ftable;			-- error
+ERROR:  truncate on "tru_ftable" is prohibited
+ALTER FOREIGN TABLE tru_ftable OPTIONS (SET truncatable 'true');
+TRUNCATE tru_ftable;			-- accepted
+-- partition table mixtured by table and foreign table
+SELECT sum(id) FROM tru_ptable;        -- 155
+ sum 
+-----
+ 155
+(1 row)
+
+TRUNCATE tru_ptable;
+SELECT count(*) FROM tru_ptable;		-- 0
+ count 
+-------
+     0
+(1 row)
+
+SELECT count(*) FROM tru_ptable__p0;	-- 0
+ count 
+-------
+     0
+(1 row)
+
+SELECT count(*) FROM tru_ftable__p1;	-- 0
+ count 
+-------
+     0
+(1 row)
+
+SELECT count(*) FROM tru_rtable1;		-- 0
+ count 
+-------
+     0
+(1 row)
+
+-- 'CASCADE' option
+SELECT sum(id) FROM tru_pk_ftable;      -- 55
+ sum 
+-----
+  55
+(1 row)
+
+TRUNCATE tru_pk_ftable;	-- failed by FK reference
+ERROR:  cannot truncate a table referenced in a foreign key constraint
+DETAIL:  Table "tru_fk_table" references "tru_pk_table".
+HINT:  Truncate table "tru_fk_table" at the same time, or use TRUNCATE ... CASCADE.
+CONTEXT:  remote SQL command: TRUNCATE public.tru_pk_table CONTINUE IDENTITY RESTRICT
+TRUNCATE tru_pk_ftable CASCADE;
+SELECT count(*) FROM tru_pk_ftable;    -- 0
+ count 
+-------
+     0
+(1 row)
+
+SELECT count(*) FROM tru_fk_table;		-- also truncated,0
+ count 
+-------
+     0
+(1 row)
+
+-- truncate two tables at a command
+INSERT INTO tru_ftable (SELECT x,md5((x+2)::text) FROM generate_series(1,8) x);
+INSERT INTO tru_pk_ftable (SELECT x,md5((x+3)::text) FROM generate_series(3,10) x);
+SELECT count(*) from tru_ftable; -- 8
+ count 
+-------
+     8
+(1 row)
+
+SELECT count(*) from tru_pk_ftable; -- 8
+ count 
+-------
+     8
+(1 row)
+
+TRUNCATE tru_ftable, tru_pk_ftable CASCADE;
+SELECT count(*) from tru_ftable; -- 0
+ count 
+-------
+     0
+(1 row)
+
+SELECT count(*) from tru_pk_ftable; -- 0
+ count 
+-------
+     0
+(1 row)
+
+-- truncate with ONLY clause
+TRUNCATE ONLY tru_ftable_parent;
+SELECT sum(id) FROM tru_ftable_parent;  -- 126
+ sum 
+-----
+ 126
+(1 row)
+
+TRUNCATE tru_ftable_parent;
+SELECT count(*) FROM tru_ftable_parent; -- 0
+ count 
+-------
+     0
+(1 row)
+
+-- in case when remote table has inherited children
+CREATE TABLE tru_rtable0_child () INHERITS (tru_rtable0);
+INSERT INTO tru_rtable0 (SELECT x,md5(x::text) FROM generate_series(5,9) x);
+INSERT INTO tru_rtable0_child (SELECT x,md5(x::text) FROM generate_series(10,14) x);
+SELECT sum(id) FROM tru_ftable;   -- 95
+ sum 
+-----
+  95
+(1 row)
+
+TRUNCATE ONLY tru_ftable;		-- truncate only parent portion
+SELECT sum(id) FROM tru_ftable;   -- 60
+ sum 
+-----
+  60
+(1 row)
+
+INSERT INTO tru_rtable0 (SELECT x,md5(x::text) FROM generate_series(21,25) x);
+SELECT sum(id) FROM tru_ftable;		-- 175
+ sum 
+-----
+ 175
+(1 row)
+
+TRUNCATE tru_ftable;			-- truncate both of parent and child
+SELECT count(*) FROM tru_ftable;    -- empty
+ count 
+-------
+     0
+(1 row)
+
+-- cleanup
+DROP FOREIGN TABLE tru_ftable_parent, tru_ftable_child, tru_pk_ftable,tru_ftable__p1,tru_ftable;
+DROP TABLE tru_rtable0, tru_rtable1, tru_ptable, tru_ptable__p0, tru_pk_table, tru_fk_table,
+tru_rtable_parent,tru_rtable_child, tru_rtable0_child;
+-- ===================================================================
 -- test IMPORT FOREIGN SCHEMA
 -- ===================================================================
 CREATE SCHEMA import_source;
@@ -8913,7 +9107,7 @@ DO $d$
     END;
 $d$;
 ERROR:  invalid option "password"
-HINT:  Valid options in this context are: service, passfile, channel_binding, connect_timeout, dbname, host, hostaddr, port, options, application_name, keepalives, keepalives_idle, keepalives_interval, keepalives_count, tcp_user_timeout, sslmode, sslcompression, sslcert, sslkey, sslrootcert, sslcrl, sslcrldir, requirepeer, ssl_min_protocol_version, ssl_max_protocol_version, gssencmode, krbsrvname, gsslib, target_session_attrs, use_remote_estimate, fdw_startup_cost, fdw_tuple_cost, extensions, updatable, fetch_size, batch_size, async_capable, keep_connections
+HINT:  Valid options in this context are: service, passfile, channel_binding, connect_timeout, dbname, host, hostaddr, port, options, application_name, keepalives, keepalives_idle, keepalives_interval, keepalives_count, tcp_user_timeout, sslmode, sslcompression, sslcert, sslkey, sslrootcert, sslcrl, sslcrldir, requirepeer, ssl_min_protocol_version, ssl_max_protocol_version, gssencmode, krbsrvname, gsslib, target_session_attrs, use_remote_estimate, fdw_startup_cost, fdw_tuple_cost, extensions, updatable, truncatable, fetch_size, batch_size, async_capable, keep_connections
 CONTEXT:  SQL statement "ALTER SERVER loopback_nopw OPTIONS (ADD password 'dummypw')"
 PL/pgSQL function inline_code_block line 3 at EXECUTE
 -- If we add a password for our user mapping instead, we should get a different
diff --git a/contrib/postgres_fdw/option.c b/contrib/postgres_fdw/option.c
index f1d0c8bd41..fc196fabc0 100644
--- a/contrib/postgres_fdw/option.c
+++ b/contrib/postgres_fdw/option.c
@@ -109,6 +109,7 @@ postgres_fdw_validator(PG_FUNCTION_ARGS)
 		if (strcmp(def->defname, "use_remote_estimate") == 0 ||
 			strcmp(def->defname, "updatable") == 0 ||
 			strcmp(def->defname, "async_capable") == 0 ||
+			strcmp(def->defname, "truncatable") == 0 ||
 			strcmp(def->defname, "keep_connections") == 0)
 		{
 			/* these accept only boolean values */
@@ -213,6 +214,9 @@ InitPgFdwOptions(void)
 		/* updatable is available on both server and table */
 		{"updatable", ForeignServerRelationId, false},
 		{"updatable", ForeignTableRelationId, false},
+		/* truncatable is available on both server and table */
+		{"truncatable", ForeignServerRelationId, false},
+		{"truncatable", ForeignTableRelationId, false},
 		/* fetch_size is available on both server and table */
 		{"fetch_size", ForeignServerRelationId, false},
 		{"fetch_size", ForeignTableRelationId, false},
diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c
index 16c2979f2d..6fa20834a4 100644
--- a/contrib/postgres_fdw/postgres_fdw.c
+++ b/contrib/postgres_fdw/postgres_fdw.c
@@ -400,6 +400,10 @@ static void postgresExplainForeignModify(ModifyTableState *mtstate,
 										 ExplainState *es);
 static void postgresExplainDirectModify(ForeignScanState *node,
 										ExplainState *es);
+static void postgresExecForeignTruncate(List *frels_list,
+										List *frels_extra,
+										DropBehavior behavior,
+										bool restart_seqs);
 static bool postgresAnalyzeForeignTable(Relation relation,
 										AcquireSampleRowsFunc *func,
 										BlockNumber *totalpages);
@@ -588,6 +592,9 @@ postgres_fdw_handler(PG_FUNCTION_ARGS)
 	routine->ExplainForeignModify = postgresExplainForeignModify;
 	routine->ExplainDirectModify = postgresExplainDirectModify;
 
+	/* Support function for TRUNCATE */
+	routine->ExecForeignTruncate = postgresExecForeignTruncate;
+
 	/* Support functions for ANALYZE */
 	routine->AnalyzeForeignTable = postgresAnalyzeForeignTable;
 
@@ -2868,6 +2875,90 @@ postgresExplainDirectModify(ForeignScanState *node, ExplainState *es)
 	}
 }
 
+/*
+ * postgresExecForeignTruncate
+ *		It propagates TRUNCATE command to the remote host inside of the
+ *		transaction block.
+ */
+static void
+postgresExecForeignTruncate(List *frels_list,
+							List *frels_extra,
+							DropBehavior behavior,
+							bool restart_seqs)
+{
+	Oid				server_id = InvalidOid;
+	ForeignServer  *serv = NULL;
+	UserMapping	   *user = NULL;
+	PGconn		   *conn = NULL;
+	PGresult	   *res;
+	StringInfoData	sql;
+	ListCell	   *lc;
+	bool			server_truncatable = true;
+
+	/* pick up remote connection, and sanity checks */
+	foreach (lc, frels_list)
+	{
+		Relation	frel = lfirst(lc);
+		Oid			frel_oid = RelationGetRelid(frel);
+		ForeignTable *ft = GetForeignTable(frel_oid);
+		ListCell   *cell;
+		bool		truncatable;
+
+		if (!OidIsValid(server_id))
+		{
+			server_id = GetForeignServerIdByRelId(frel_oid);
+			serv = GetForeignServer(server_id);
+			user = GetUserMapping(GetUserId(), server_id);
+			conn = GetConnection(user, false,NULL);
+
+			foreach (cell, serv->options)
+			{
+				DefElem	   *defel = (DefElem *) lfirst(cell);
+
+				if (strcmp(defel->defname, "truncatable") == 0)
+				{
+					server_truncatable = defGetBoolean(defel);
+					break;
+				}
+			}
+		}
+		else
+		{
+			/* postgresExecForeignTruncate() is invoked for each server */
+			Assert(server_id == GetForeignServerIdByRelId(frel_oid));
+		}
+
+		/* ensure the target foreign table is truncatable */
+		truncatable = server_truncatable;
+		foreach (cell, ft->options)
+		{
+			DefElem	   *defel = (DefElem *) lfirst(cell);
+
+			if (strcmp(defel->defname, "truncatable") == 0)
+			{
+				truncatable = defGetBoolean(defel);
+				break;
+			}
+		}
+		if (!truncatable)
+			ereport(ERROR,
+					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+					 errmsg("truncate on \"%s\" is prohibited",
+							RelationGetRelationName(frel))));
+	}
+	/* set up remote query */
+	initStringInfo(&sql);
+	deparseTruncateSql(&sql, frels_list, frels_extra, behavior, restart_seqs);
+	/* run remote query */
+	if (!PQsendQuery(conn, sql.data))
+		pgfdw_report_error(ERROR, NULL, conn, false, sql.data);
+	res = pgfdw_get_result(conn, sql.data);
+	if (PQresultStatus(res) != PGRES_COMMAND_OK)
+		pgfdw_report_error(ERROR, res, conn, true, sql.data);
+	/* clean-up */
+	PQclear(res);
+	pfree(sql.data);
+}
 
 /*
  * estimate_path_cost_size
diff --git a/contrib/postgres_fdw/postgres_fdw.h b/contrib/postgres_fdw/postgres_fdw.h
index 88d94da6f6..68599892f8 100644
--- a/contrib/postgres_fdw/postgres_fdw.h
+++ b/contrib/postgres_fdw/postgres_fdw.h
@@ -206,6 +206,11 @@ extern void deparseDirectDeleteSql(StringInfo buf, PlannerInfo *root,
 extern void deparseAnalyzeSizeSql(StringInfo buf, Relation rel);
 extern void deparseAnalyzeSql(StringInfo buf, Relation rel,
 							  List **retrieved_attrs);
+extern void deparseTruncateSql(StringInfo buf,
+							   List *frels_list,
+							   List *frels_extra,
+							   DropBehavior behavior,
+							   bool restart_seqs);
 extern void deparseStringLiteral(StringInfo buf, const char *val);
 extern Expr *find_em_expr_for_rel(EquivalenceClass *ec, RelOptInfo *rel);
 extern Expr *find_em_expr_for_input_target(PlannerInfo *root,
diff --git a/contrib/postgres_fdw/sql/postgres_fdw.sql b/contrib/postgres_fdw/sql/postgres_fdw.sql
index 107d1c0e03..ead3164ea8 100644
--- a/contrib/postgres_fdw/sql/postgres_fdw.sql
+++ b/contrib/postgres_fdw/sql/postgres_fdw.sql
@@ -2351,6 +2351,103 @@ select * from rem3;
 drop foreign table rem3;
 drop table loc3;
 
+-- ===================================================================
+-- test for TRUNCATE
+-- ===================================================================
+CREATE TABLE tru_rtable0 (id int primary key, x text);
+CREATE TABLE tru_rtable1 (id int primary key, y text);
+CREATE FOREIGN TABLE tru_ftable (id int, x text)
+       SERVER loopback OPTIONS (table_name 'tru_rtable0');
+INSERT INTO tru_rtable0 (SELECT x,md5(x::text) FROM generate_series(1,10) x);
+
+CREATE TABLE tru_ptable (id int, y text) PARTITION BY HASH(id);
+CREATE TABLE tru_ptable__p0 PARTITION OF tru_ptable
+                            FOR VALUES WITH (MODULUS 2, REMAINDER 0);
+CREATE FOREIGN TABLE tru_ftable__p1 PARTITION OF tru_ptable
+                                    FOR VALUES WITH (MODULUS 2, REMAINDER 1)
+       SERVER loopback OPTIONS (table_name 'tru_rtable1');
+INSERT INTO tru_ptable (SELECT x,md5(x::text) FROM generate_series(11,20) x);
+
+CREATE TABLE tru_pk_table(id int primary key, z text);
+CREATE TABLE tru_fk_table(fkey int references tru_pk_table(id));
+INSERT INTO tru_pk_table (SELECT x,md5((x+1)::text) FROM generate_series(1,10) x);
+INSERT INTO tru_fk_table (SELECT x % 10 + 1 FROM generate_series(5,25) x);
+CREATE FOREIGN TABLE tru_pk_ftable (id int, z text)
+       SERVER loopback OPTIONS (table_name 'tru_pk_table');
+
+CREATE TABLE tru_rtable_parent (id int, a text);
+CREATE TABLE tru_rtable_child (id int, a text);
+CREATE FOREIGN TABLE tru_ftable_parent (id int, a text)
+       SERVER loopback OPTIONS (table_name 'tru_rtable_parent');
+CREATE FOREIGN TABLE tru_ftable_child () INHERITS (tru_ftable_parent)
+       SERVER loopback OPTIONS (table_name 'tru_rtable_child');
+INSERT INTO tru_rtable_parent (SELECT x, md5(x::text) FROM generate_series(1,8) x);
+INSERT INTO tru_rtable_child  (SELECT x, md5(x::text) FROM generate_series(10, 18) x);
+
+-- normal truncate
+SELECT sum(id) FROM tru_ftable;        -- 55
+TRUNCATE tru_ftable;
+SELECT count(*) FROM tru_rtable0;		-- 0
+SELECT count(*) FROM tru_ftable;		-- 0
+
+-- 'truncatable' option
+ALTER SERVER loopback OPTIONS (ADD truncatable 'false');
+TRUNCATE tru_ftable;			-- error
+ALTER SERVER loopback OPTIONS (DROP truncatable);
+ALTER FOREIGN TABLE tru_ftable OPTIONS (ADD truncatable 'false');
+TRUNCATE tru_ftable;			-- error
+ALTER FOREIGN TABLE tru_ftable OPTIONS (SET truncatable 'true');
+TRUNCATE tru_ftable;			-- accepted
+
+-- partition table mixtured by table and foreign table
+SELECT sum(id) FROM tru_ptable;        -- 155
+TRUNCATE tru_ptable;
+SELECT count(*) FROM tru_ptable;		-- 0
+SELECT count(*) FROM tru_ptable__p0;	-- 0
+SELECT count(*) FROM tru_ftable__p1;	-- 0
+SELECT count(*) FROM tru_rtable1;		-- 0
+
+-- 'CASCADE' option
+SELECT sum(id) FROM tru_pk_ftable;      -- 55
+TRUNCATE tru_pk_ftable;	-- failed by FK reference
+TRUNCATE tru_pk_ftable CASCADE;
+SELECT count(*) FROM tru_pk_ftable;    -- 0
+SELECT count(*) FROM tru_fk_table;		-- also truncated,0
+
+-- truncate two tables at a command
+INSERT INTO tru_ftable (SELECT x,md5((x+2)::text) FROM generate_series(1,8) x);
+INSERT INTO tru_pk_ftable (SELECT x,md5((x+3)::text) FROM generate_series(3,10) x);
+SELECT count(*) from tru_ftable; -- 8
+SELECT count(*) from tru_pk_ftable; -- 8
+TRUNCATE tru_ftable, tru_pk_ftable CASCADE;
+SELECT count(*) from tru_ftable; -- 0
+SELECT count(*) from tru_pk_ftable; -- 0
+
+-- truncate with ONLY clause
+TRUNCATE ONLY tru_ftable_parent;
+SELECT sum(id) FROM tru_ftable_parent;  -- 126
+TRUNCATE tru_ftable_parent;
+SELECT count(*) FROM tru_ftable_parent; -- 0
+
+-- in case when remote table has inherited children
+CREATE TABLE tru_rtable0_child () INHERITS (tru_rtable0);
+INSERT INTO tru_rtable0 (SELECT x,md5(x::text) FROM generate_series(5,9) x);
+INSERT INTO tru_rtable0_child (SELECT x,md5(x::text) FROM generate_series(10,14) x);
+SELECT sum(id) FROM tru_ftable;   -- 95
+
+TRUNCATE ONLY tru_ftable;		-- truncate only parent portion
+SELECT sum(id) FROM tru_ftable;   -- 60
+
+INSERT INTO tru_rtable0 (SELECT x,md5(x::text) FROM generate_series(21,25) x);
+SELECT sum(id) FROM tru_ftable;		-- 175
+TRUNCATE tru_ftable;			-- truncate both of parent and child
+SELECT count(*) FROM tru_ftable;    -- empty
+
+-- cleanup
+DROP FOREIGN TABLE tru_ftable_parent, tru_ftable_child, tru_pk_ftable,tru_ftable__p1,tru_ftable;
+DROP TABLE tru_rtable0, tru_rtable1, tru_ptable, tru_ptable__p0, tru_pk_table, tru_fk_table,
+tru_rtable_parent,tru_rtable_child, tru_rtable0_child;
+
 -- ===================================================================
 -- test IMPORT FOREIGN SCHEMA
 -- ===================================================================
diff --git a/doc/src/sgml/fdwhandler.sgml b/doc/src/sgml/fdwhandler.sgml
index 0f2397df49..66605b7361 100644
--- a/doc/src/sgml/fdwhandler.sgml
+++ b/doc/src/sgml/fdwhandler.sgml
@@ -1065,6 +1065,50 @@ EndDirectModify(ForeignScanState *node);
 
    </sect2>
 
+   <sect2 id="fdw-callbacks-truncate">
+    <title>FDW Routines for Truncate</title>
+<programlisting>
+void
+ExecForeignTruncate(List *frels_list, List *frels_extra,
+                    DropBehavior behavior, bool restart_seqs);
+</programlisting>
+    <para>
+     Truncate a set of foreign tables defined by
+     <literal>frels_list</literal> belonging to the same foreign server.
+     This optional function is called during execution of
+     <command>TRUNCATE</command> for each foreign server involved
+     in one <command>TRUNCATE</command> command (note that invocations
+     are not per foreign table).
+
+     <literal>frels_extra</literal> delivers extra information about
+     the context where the foreign tables are truncated. It is a list of integers and has same length with
+     <literal>frels_list</literal>. <literal>TRUNCATE_REL_CONTEXT_NORMAL</literal> means that
+     the foreign table is specified WITHOUT "ONLY" clause, and <literal>TRUNCATE_REL_CONTEXT_ONLY</literal> means
+     specified WITH "ONLY" clause. <literal>TRUNCATE_REL_CONTEXT_CASCADING</literal> 
+     values means that foreign tables are not specified in the <command>TRUNCATE</command>, 
+     but truncated due to dependency(like partition's child leaf).
+    </para>
+
+    <para>
+     If the <function>ExecForeignTruncate</function> pointer is set to
+     <literal>NULL</literal>, attempts to truncate the foreign table will
+     fail with an error message.
+    </para>
+    
+    <para>
+     <literal>behavior</literal> defines how foreign tables should
+     be truncated, using as possible values <literal>DROP_RESTRICT</literal>
+     and <literal>DROP_CASCADE</literal> (to map with the equivalents of
+     <command>TRUNCATE</command>).
+    </para>
+    
+    <para>
+     <literal>restart_seqs</literal> is set to <literal>true</literal>
+     if <literal>RESTART IDENTITY</literal> was supplied in the
+     <command>TRUNCATE</command>.
+    </para>
+   </sect2>
+
    <sect2 id="fdw-callbacks-row-locking">
     <title>FDW Routines for Row Locking</title>
 
diff --git a/doc/src/sgml/postgres-fdw.sgml b/doc/src/sgml/postgres-fdw.sgml
index a7c695b000..1092160603 100644
--- a/doc/src/sgml/postgres-fdw.sgml
+++ b/doc/src/sgml/postgres-fdw.sgml
@@ -63,9 +63,10 @@
  <para>
   Now you need only <command>SELECT</command> from a foreign table to access
   the data stored in its underlying remote table.  You can also modify
-  the remote table using <command>INSERT</command>, <command>UPDATE</command>, or
-  <command>DELETE</command>.  (Of course, the remote user you have specified
-  in your user mapping must have privileges to do these things.)
+  the remote table using <command>INSERT</command>, <command>UPDATE</command>,
+  <command>DELETE</command>, or <command>TRUNCATE</command>.
+  (Of course, the remote user you have specified in your user mapping must
+  have privileges to do these things.)
  </para>
 
  <para>
@@ -404,7 +405,7 @@ OPTIONS (ADD password_required 'false');
 
    <para>
     By default all foreign tables using <filename>postgres_fdw</filename> are assumed
-    to be updatable.  This may be overridden using the following option:
+    to be updatable and truncatable.  This may be overridden using the following options:
    </para>
 
    <variablelist>
@@ -433,6 +434,29 @@ OPTIONS (ADD password_required 'false');
      </listitem>
     </varlistentry>
 
+    <varlistentry>
+     <term><literal>truncatable</literal></term>
+     <listitem>
+      <para>
+       This option controls whether <filename>postgres_fdw</filename> allows
+       foreign tables to be truncated using <command>TRUNCATE</command>
+       command. It can be specified for a foreign table or a foreign server.
+       A table-level option overrides a server-level option.
+       The default is <literal>true</literal>.
+      </para>
+      <para>
+       Pay attention for the case when a foreign table maps remote table
+       that has inherited children or partition leafs.
+       <command>TRUNCATE</command> specifies the foreign tables with
+       <literal>ONLY</literal> clause, remove queries over the
+       <filename>postgres_fdw</filename> also specify remote tables with
+       <literal>ONLY</literal> clause, that will truncate only parent
+       portion of the remote table. In the results, it looks like
+       <command>TRUNCATE</command> command partially eliminated contents
+       of the foreign tables.
+      </para>
+     </listitem>
+    </varlistentry>
    </variablelist>
   </sect3>
 
diff --git a/doc/src/sgml/ref/truncate.sgml b/doc/src/sgml/ref/truncate.sgml
index 91cdac5562..7899106ba1 100644
--- a/doc/src/sgml/ref/truncate.sgml
+++ b/doc/src/sgml/ref/truncate.sgml
@@ -172,9 +172,8 @@ TRUNCATE [ TABLE ] [ ONLY ] <replaceable class="parameter">name</replaceable> [
   </para>
 
   <para>
-   <command>TRUNCATE</command> is not currently supported for foreign tables.
-   This implies that if a specified table has any descendant tables that are
-   foreign, the command will fail.
+    <command>TRUNCATE</command> can be used for foreign tables if
+     the foreign data wrapper supports, for instance, see <xref linkend="postgres-fdw"/>.
   </para>
  </refsect1>
 
diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c
index 88a68a4697..6cc279d58d 100644
--- a/src/backend/commands/tablecmds.c
+++ b/src/backend/commands/tablecmds.c
@@ -59,6 +59,7 @@
 #include "commands/typecmds.h"
 #include "commands/user.h"
 #include "executor/executor.h"
+#include "foreign/fdwapi.h"
 #include "foreign/foreign.h"
 #include "miscadmin.h"
 #include "nodes/makefuncs.h"
@@ -310,6 +311,21 @@ struct DropRelationCallbackState
 #define		ATT_FOREIGN_TABLE		0x0020
 #define		ATT_PARTITIONED_INDEX	0x0040
 
+/*
+ * ForeignTruncateInfo
+ *
+ * Information related to truncation of foreign tables.  This is used for
+ * the elements in a hash table. It uses the server OID as lookup key,
+ * and includes a per-server list of all foreign tables involved in the
+ * truncation.
+ */
+typedef struct ForeignTruncateInfo
+{
+	Oid			server_oid;
+	List	   *frels_list;
+	List	   *frels_extra;
+} ForeignTruncateInfo;
+
 /*
  * Partition tables are expected to be dropped when the parent partitioned
  * table gets dropped. Hence for partitioning we use AUTO dependency.
@@ -1588,18 +1604,21 @@ RangeVarCallbackForDropRelation(const RangeVar *rel, Oid relOid, Oid oldRelOid,
  *		Executes a TRUNCATE command.
  *
  * This is a multi-relation truncate.  We first open and grab exclusive
- * lock on all relations involved, checking permissions and otherwise
- * verifying that the relation is OK for truncation.  In CASCADE mode,
+ * lock on all relations involved, checking permissions (local database
+ * ACLs even if relations are foreign tables) and otherwise verifying
+ * that the relation is OK for truncation. In CASCADE mode,
  * relations having FK references to the targeted relations are automatically
  * added to the group; in RESTRICT mode, we check that all FK references are
  * internal to the group that's being truncated.  Finally all the relations
- * are truncated and reindexed.
+ * are truncated and reindexed. If any foreign tables are involved,
+ * its callback shall be invoked prior to the truncation of regular tables.
  */
 void
 ExecuteTruncate(TruncateStmt *stmt)
 {
 	List	   *rels = NIL;
 	List	   *relids = NIL;
+	List	   *relids_extra = NIL;
 	List	   *relids_logged = NIL;
 	ListCell   *cell;
 
@@ -1636,8 +1655,9 @@ ExecuteTruncate(TruncateStmt *stmt)
 
 		rels = lappend(rels, rel);
 		relids = lappend_oid(relids, myrelid);
+		relids_extra = lappend_int(relids_extra, (recurse ? TRUNCATE_REL_CONTEXT_NORMAL : TRUNCATE_REL_CONTEXT_ONLY));
 		/* Log this relation only if needed for logical decoding */
-		if (RelationIsLogicallyLogged(rel))
+		if (RelationIsLogicallyLogged(rel) && rel->rd_rel->relkind != RELKIND_FOREIGN_TABLE)
 			relids_logged = lappend_oid(relids_logged, myrelid);
 
 		if (recurse)
@@ -1683,8 +1703,9 @@ ExecuteTruncate(TruncateStmt *stmt)
 
 				rels = lappend(rels, rel);
 				relids = lappend_oid(relids, childrelid);
+				relids_extra = lappend_int(relids_extra, TRUNCATE_REL_CONTEXT_CASCADING);
 				/* Log this relation only if needed for logical decoding */
-				if (RelationIsLogicallyLogged(rel))
+				if (RelationIsLogicallyLogged(rel) && rel->rd_rel->relkind != RELKIND_FOREIGN_TABLE)
 					relids_logged = lappend_oid(relids_logged, childrelid);
 			}
 		}
@@ -1695,7 +1716,7 @@ ExecuteTruncate(TruncateStmt *stmt)
 					 errhint("Do not specify the ONLY keyword, or use TRUNCATE ONLY on the partitions directly.")));
 	}
 
-	ExecuteTruncateGuts(rels, relids, relids_logged,
+	ExecuteTruncateGuts(rels, relids, relids_extra, relids_logged,
 						stmt->behavior, stmt->restart_seqs);
 
 	/* And close the rels */
@@ -1721,16 +1742,21 @@ ExecuteTruncate(TruncateStmt *stmt)
  * this information handy in this form.
  */
 void
-ExecuteTruncateGuts(List *explicit_rels, List *relids, List *relids_logged,
+ExecuteTruncateGuts(List *explicit_rels,
+					List *relids,
+					List *relids_extra,
+					List *relids_logged,
 					DropBehavior behavior, bool restart_seqs)
 {
 	List	   *rels;
 	List	   *seq_relids = NIL;
+	HTAB	   *ft_htab = NULL;
 	EState	   *estate;
 	ResultRelInfo *resultRelInfos;
 	ResultRelInfo *resultRelInfo;
 	SubTransactionId mySubid;
 	ListCell   *cell;
+	ListCell   *lc1, *lc2;
 	Oid		   *logrelids;
 
 	/*
@@ -1768,8 +1794,9 @@ ExecuteTruncateGuts(List *explicit_rels, List *relids, List *relids_logged,
 				truncate_check_activity(rel);
 				rels = lappend(rels, rel);
 				relids = lappend_oid(relids, relid);
+				relids_extra = lappend_int(relids_extra, TRUNCATE_REL_CONTEXT_CASCADING);
 				/* Log this relation only if needed for logical decoding */
-				if (RelationIsLogicallyLogged(rel))
+				if (RelationIsLogicallyLogged(rel) && rel->rd_rel->relkind != RELKIND_FOREIGN_TABLE)
 					relids_logged = lappend_oid(relids_logged, relid);
 			}
 		}
@@ -1868,14 +1895,68 @@ ExecuteTruncateGuts(List *explicit_rels, List *relids, List *relids_logged,
 	 */
 	mySubid = GetCurrentSubTransactionId();
 
-	foreach(cell, rels)
+	Assert(list_length(rels) == list_length(relids_extra));
+	forboth (lc1, rels,
+			 lc2, relids_extra)
 	{
-		Relation	rel = (Relation) lfirst(cell);
+		Relation	rel = (Relation) lfirst(lc1);
+		int			extra = lfirst_int(lc2);
 
 		/* Skip partitioned tables as there is nothing to do */
 		if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
 			continue;
 
+		/*
+		 * If truncating a foreign table, the foreign data wrapper callback
+		 * for TRUNCATE is called once for each server with a list of all the
+		 * relations to process linked to this server.  The list of relations
+		 * for each server is saved as a single entry in a hash table that
+		 * uses the server OID as lookup key.  Once the full set of lists is
+		 * built, all the entries of the hash table are scanned, and the list
+		 * of relations associated with the server is passed down to the
+		 * TRUNCATE callback of its foreign data wrapper.
+		 */
+		if (rel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
+		{
+			Oid			frel_oid = RelationGetRelid(rel);
+			Oid			server_oid = GetForeignServerIdByRelId(frel_oid);
+			bool		found;
+			ForeignTruncateInfo *ft_info;
+
+			/* if the hash table does not exist yet, initialize it */
+			if (!ft_htab)
+			{
+				HASHCTL		hctl;
+
+				memset(&hctl, 0, sizeof(HASHCTL));
+				hctl.keysize = sizeof(Oid);
+				hctl.entrysize = sizeof(ForeignTruncateInfo);
+				hctl.hcxt = CurrentMemoryContext;
+
+				ft_htab = hash_create("TRUNCATE for Foreign Tables",
+									  32,	/* start small and extend */
+									  &hctl,
+									  HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
+			}
+
+			/*
+			 * Look after the entry of the server in the hash table, and
+			 * initialize it if the entry does not exist yet.
+			 */
+			ft_info = hash_search(ft_htab, &server_oid, HASH_ENTER, &found);
+			if (!found)
+			{
+				ft_info->server_oid = server_oid;
+				ft_info->frels_list = NIL;
+				ft_info->frels_extra = NIL;
+			}
+
+			/* save the relation in the list */
+			ft_info->frels_list = lappend(ft_info->frels_list, rel);
+			ft_info->frels_extra = lappend_int(ft_info->frels_extra, extra);
+			continue;
+		}
+
 		/*
 		 * Normally, we need a transaction-safe truncation here.  However, if
 		 * the table was either created in the current (sub)transaction or has
@@ -1938,6 +2019,33 @@ ExecuteTruncateGuts(List *explicit_rels, List *relids, List *relids_logged,
 		pgstat_count_truncate(rel);
 	}
 
+	/*
+	 * Now go through the hash table, and process each entry associated to the
+	 * servers involved in the TRUNCATE.
+	 */
+	if (ft_htab)
+	{
+		ForeignTruncateInfo *ft_info;
+		HASH_SEQ_STATUS seq;
+
+		hash_seq_init(&seq, ft_htab);
+
+		while ((ft_info = hash_seq_search(&seq)) != NULL)
+		{
+			FdwRoutine *routine = GetFdwRoutineByServerId(ft_info->server_oid);
+
+			/* truncate_check_rel() has checked that already */
+			Assert(routine->ExecForeignTruncate != NULL);
+
+			routine->ExecForeignTruncate(ft_info->frels_list,
+										 ft_info->frels_extra,
+										 behavior,
+										 restart_seqs);
+		}
+
+		hash_destroy(ft_htab);
+	}
+
 	/*
 	 * Restart owned sequences if we were asked to.
 	 */
@@ -2023,12 +2131,24 @@ truncate_check_rel(Oid relid, Form_pg_class reltuple)
 	char	   *relname = NameStr(reltuple->relname);
 
 	/*
-	 * Only allow truncate on regular tables and partitioned tables (although,
-	 * the latter are only being included here for the following checks; no
-	 * physical truncation will occur in their case.)
+	 * Only allow truncate on regular tables, foreign tables using foreign
+	 * data wrappers supporting TRUNCATE and partitioned tables (although, the
+	 * latter are only being included here for the following checks; no
+	 * physical truncation will occur in their case.).
 	 */
-	if (reltuple->relkind != RELKIND_RELATION &&
-		reltuple->relkind != RELKIND_PARTITIONED_TABLE)
+	if (reltuple->relkind == RELKIND_FOREIGN_TABLE)
+	{
+		Oid			server_id = GetForeignServerIdByRelId(relid);
+		FdwRoutine *fdwroutine = GetFdwRoutineByServerId(server_id);
+
+		if (!fdwroutine->ExecForeignTruncate)
+			ereport(ERROR,
+					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+					 errmsg("cannot truncate foreign table \"%s\"",
+							relname)));
+	}
+	else if (reltuple->relkind != RELKIND_RELATION &&
+			 reltuple->relkind != RELKIND_PARTITIONED_TABLE)
 		ereport(ERROR,
 				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
 				 errmsg("\"%s\" is not a table", relname)));
diff --git a/src/backend/replication/logical/worker.c b/src/backend/replication/logical/worker.c
index 354fbe4b4b..152f0332ac 100644
--- a/src/backend/replication/logical/worker.c
+++ b/src/backend/replication/logical/worker.c
@@ -1795,6 +1795,7 @@ apply_handle_truncate(StringInfo s)
 	List	   *rels = NIL;
 	List	   *part_rels = NIL;
 	List	   *relids = NIL;
+	List	   *relids_extra = NIL;
 	List	   *relids_logged = NIL;
 	ListCell   *lc;
 
@@ -1824,7 +1825,8 @@ apply_handle_truncate(StringInfo s)
 		remote_rels = lappend(remote_rels, rel);
 		rels = lappend(rels, rel->localrel);
 		relids = lappend_oid(relids, rel->localreloid);
-		if (RelationIsLogicallyLogged(rel->localrel))
+		relids_extra = lappend_int(relids_extra, TRUNCATE_REL_CONTEXT_NORMAL);
+		if (RelationIsLogicallyLogged(rel->localrel) && rel->localrel->rd_rel->relkind != RELKIND_FOREIGN_TABLE)
 			relids_logged = lappend_oid(relids_logged, rel->localreloid);
 
 		/*
@@ -1862,8 +1864,9 @@ apply_handle_truncate(StringInfo s)
 				rels = lappend(rels, childrel);
 				part_rels = lappend(part_rels, childrel);
 				relids = lappend_oid(relids, childrelid);
+				relids_extra = lappend_int(relids_extra,TRUNCATE_REL_CONTEXT_CASCADING);
 				/* Log this relation only if needed for logical decoding */
-				if (RelationIsLogicallyLogged(childrel))
+				if (RelationIsLogicallyLogged(childrel) && childrel->rd_rel->relkind != RELKIND_FOREIGN_TABLE)
 					relids_logged = lappend_oid(relids_logged, childrelid);
 			}
 		}
@@ -1874,8 +1877,12 @@ apply_handle_truncate(StringInfo s)
 	 * to replaying changes without further cascading. This might be later
 	 * changeable with a user specified option.
 	 */
-	ExecuteTruncateGuts(rels, relids, relids_logged, DROP_RESTRICT, restart_seqs);
-
+	ExecuteTruncateGuts(rels,
+						relids,
+						relids_extra,
+						relids_logged,
+						DROP_RESTRICT,
+						restart_seqs);
 	foreach(lc, remote_rels)
 	{
 		LogicalRepRelMapEntry *rel = lfirst(lc);
diff --git a/src/include/commands/tablecmds.h b/src/include/commands/tablecmds.h
index b3d30acc35..57763ed734 100644
--- a/src/include/commands/tablecmds.h
+++ b/src/include/commands/tablecmds.h
@@ -55,9 +55,16 @@ extern void AlterRelationNamespaceInternal(Relation classRel, Oid relOid,
 
 extern void CheckTableNotInUse(Relation rel, const char *stmt);
 
+#define TRUNCATE_REL_CONTEXT_NORMAL       0x01
+#define TRUNCATE_REL_CONTEXT_ONLY         0x02
+#define TRUNCATE_REL_CONTEXT_CASCADING     0x04
 extern void ExecuteTruncate(TruncateStmt *stmt);
-extern void ExecuteTruncateGuts(List *explicit_rels, List *relids, List *relids_logged,
-								DropBehavior behavior, bool restart_seqs);
+extern void ExecuteTruncateGuts(List *explicit_rels,
+								List *relids,
+								List *relids_extra,
+								List *relids_logged,
+								DropBehavior behavior,
+								bool restart_seqs);
 
 extern void SetRelationHasSubclass(Oid relationId, bool relhassubclass);
 
diff --git a/src/include/foreign/fdwapi.h b/src/include/foreign/fdwapi.h
index 10d29ff292..205afe5ec3 100644
--- a/src/include/foreign/fdwapi.h
+++ b/src/include/foreign/fdwapi.h
@@ -160,6 +160,11 @@ typedef bool (*AnalyzeForeignTable_function) (Relation relation,
 typedef List *(*ImportForeignSchema_function) (ImportForeignSchemaStmt *stmt,
 											   Oid serverOid);
 
+typedef void (*ExecForeignTruncate_function) (List *frels_list,
+											  List *frels_extra,
+											  DropBehavior behavior,
+											  bool restart_seqs);
+
 typedef Size (*EstimateDSMForeignScan_function) (ForeignScanState *node,
 												 ParallelContext *pcxt);
 typedef void (*InitializeDSMForeignScan_function) (ForeignScanState *node,
@@ -255,6 +260,9 @@ typedef struct FdwRoutine
 	/* Support functions for IMPORT FOREIGN SCHEMA */
 	ImportForeignSchema_function ImportForeignSchema;
 
+	/* Support functions for TRUNCATE */
+	ExecForeignTruncate_function ExecForeignTruncate;
+
 	/* Support functions for parallelism under Gather node */
 	IsForeignScanParallelSafe_function IsForeignScanParallelSafe;
 	EstimateDSMForeignScan_function EstimateDSMForeignScan;
diff --git a/src/test/regress/expected/foreign_data.out b/src/test/regress/expected/foreign_data.out
index e4cdb780d0..5385f98a0f 100644
--- a/src/test/regress/expected/foreign_data.out
+++ b/src/test/regress/expected/foreign_data.out
@@ -1807,9 +1807,9 @@ Inherits: fd_pt1
 
 -- TRUNCATE doesn't work on foreign tables, either directly or recursively
 TRUNCATE ft2;  -- ERROR
-ERROR:  "ft2" is not a table
+ERROR:  foreign-data wrapper "dummy" has no handler
 TRUNCATE fd_pt1;  -- ERROR
-ERROR:  "ft2" is not a table
+ERROR:  foreign-data wrapper "dummy" has no handler
 DROP TABLE fd_pt1 CASCADE;
 NOTICE:  drop cascades to foreign table ft2
 -- IMPORT FOREIGN SCHEMA
@@ -2032,9 +2032,9 @@ ALTER FOREIGN TABLE fd_pt2_1 ADD CONSTRAINT fd_pt2chk1 CHECK (c1 > 0);
 ALTER TABLE fd_pt2 ATTACH PARTITION fd_pt2_1 FOR VALUES IN (1);
 -- TRUNCATE doesn't work on foreign tables, either directly or recursively
 TRUNCATE fd_pt2_1;  -- ERROR
-ERROR:  "fd_pt2_1" is not a table
+ERROR:  foreign-data wrapper "dummy" has no handler
 TRUNCATE fd_pt2;  -- ERROR
-ERROR:  "fd_pt2_1" is not a table
+ERROR:  foreign-data wrapper "dummy" has no handler
 DROP FOREIGN TABLE fd_pt2_1;
 DROP TABLE fd_pt2;
 -- foreign table cannot be part of partition tree made of temporary
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index 6a98064b2b..a173d604d6 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -704,6 +704,7 @@ ForeignScanState
 ForeignServer
 ForeignServerInfo
 ForeignTable
+ForeignTruncateInfo
 ForkNumber
 FormData_pg_aggregate
 FormData_pg_am
#45Bharath Rupireddy
bharath.rupireddyforpostgres@gmail.com
In reply to: Kazutaka Onishi (#44)
Re: TRUNCATE on foreign table

On Sun, Apr 4, 2021 at 10:23 PM Kazutaka Onishi <onishi@heterodb.com> wrote:

Sure. I've replaced with the test command "SELECT * FROM ..." to
"SELECT COUNT(*) FROM ..."
However, for example, the "id" column is used to check after running
TRUNCATE with ONLY clause to the inherited table.
Thus, I use "sum(id)" instead of "count(*)" to check the result when
the table has records.

I still don't understand why we need sum(id), not count(*). Am I
missing something here?

Here are some more comments on the v12 patch:
1) Instead of switch case, a simple if else clause would reduce the code a bit:
if (behavior == DROP_RESTRICT)
appendStringInfoString(buf, " RESTRICT");
else if (behavior == DROP_CASCADE)
appendStringInfoString(buf, " CASCADE");

2) Some coding style comments:
It's better to have a new line after variable declarations,
assignments, function calls, before if blocks, after if blocks for
better readability of the code.
+    appendStringInfoString(buf, "TRUNCATE ");   ---> new line after this
+    forboth (lc1, frels_list,
+    }         ---> new line after this
+    appendStringInfo(buf, " %s IDENTITY",
+        /* ensure the target foreign table is truncatable */
+        truncatable = server_truncatable;    ---> new line after this
+        foreach (cell, ft->options)

+ } ---> new line after this
+ if (!truncatable)

+    }    ---> new line after this
+    /* set up remote query */
+    initStringInfo(&sql);
+    deparseTruncateSql(&sql, frels_list, frels_extra, behavior,
restart_seqs);    ---> new line after this
+    /* run remote query */
+    if (!PQsendQuery(conn, sql.data))
+        pgfdw_report_error(ERROR, NULL, conn, false, sql.data);
---> new line after this
+    res = pgfdw_get_result(conn, sql.data);    ---> new line after this
+    if (PQresultStatus(res) != PGRES_COMMAND_OK)
+        pgfdw_report_error(ERROR, res, conn, true, sql.data);    --->
new line after this
+    /* clean-up */
+    PQclear(res);
+    pfree(sql.data);
+}

and so on.

a space after "false," and before "NULL"
+ conn = GetConnection(user, false,NULL);

bring lc2, frels_extra to the same of lc1, frels_list
+    forboth (lc1, frels_list,
+             lc2, frels_extra)

3) I think we need truncatable behaviour that is consistent with updatable.
With your patch, seems like below is the behaviour for truncatable:
both server and foreign table are truncatable = truncated
server is not truncatable and foreign table is truncatable = not
truncated and error out
server is truncatable and foreign table is not truncatable = not
truncated and error out
server is not truncatable and foreign table is not truncatable = not
truncated and error out

Below is the behaviour for updatable:
both server and foreign table are updatable = updated
server is not updatable and foreign table is updatable = updated
server is updatable and foreign table is not updatable = not updated
server is not updatable and foreign table is not updatable = not updated

And also see comment in postgresIsForeignRelUpdatable
/*
* By default, all postgres_fdw foreign tables are assumed updatable. This
* can be overridden by a per-server setting, which in turn can be
* overridden by a per-table setting.
*/

IMO, you could do the same thing for truncatable, change is required
in your patch:
both server and foreign table are truncatable = truncated
server is not truncatable and foreign table is truncatable = truncated
server is truncatable and foreign table is not truncatable = not
truncated and error out
server is not truncatable and foreign table is not truncatable = not
truncated and error out

4) GetConnection needs to be done after all the error checking code
otherwise on error we would have opened a new connection without
actually using it. Just move below code outside of the for loop in
postgresExecForeignTruncate
+            user = GetUserMapping(GetUserId(), server_id);
+            conn = GetConnection(user, false,NULL);
to here:
+    Assert (OidIsValid(server_id)));
+
+    /* get a connection to the server */
+    user = GetUserMapping(GetUserId(), server_id);
+    conn = GetConnection(user, false, NULL);
+
+    /* set up remote query */
+    initStringInfo(&sql);
+    deparseTruncateSql(&sql, frels_list, frels_extra, behavior, restart_seqs);
+    /* run remote query */
+    if (!PQsendQuery(conn, sql.data))
+        pgfdw_report_error(ERROR, NULL, conn, false, sql.data);
+    res = pgfdw_get_result(conn, sql.data);
+    if (PQresultStatus(res) != PGRES_COMMAND_OK)
+        pgfdw_report_error(ERROR, res, conn, true, sql.data);
5) This assertion is bogus, because GetForeignServerIdByRelId will
return valid server id and otherwise it fails with "cache lookup
error", so please remove it.
+        else
+        {
+            /* postgresExecForeignTruncate() is invoked for each server */
+            Assert(server_id == GetForeignServerIdByRelId(frel_oid));
+        }
6) You can add a comment here saying this if-clause gets executed only
once per server.
+
+        if (!OidIsValid(server_id))
+        {
+            server_id = GetForeignServerIdByRelId(frel_oid);
7) Did you try checking whether we reach hash_destroy code when a
failure happens on executing truncate on a remote table? Otherwise we
might want to do routine->ExecForeignTruncate inside PG_TRY block?
+            /* truncate_check_rel() has checked that already */
+            Assert(routine->ExecForeignTruncate != NULL);
+
+            routine->ExecForeignTruncate(ft_info->frels_list,
+                                         ft_info->frels_extra,
+                                         behavior,
+                                         restart_seqs);
+        }
+
+        hash_destroy(ft_htab);
+    }

8) This comment can be removed and have more descriptive comment
before the for loop in postgresExecForeignTruncate similar to the
comment what we have in postgresIsForeignRelUpdatable for updatable.
+ /* pick up remote connection, and sanity checks */

9) It will be good if you can divide up your patch into 3 separate
patches - 0001 code, 0002 tests, 0003 documentation

10) Why do we need many new tables for tests? Can't we do this -
insert, show count(*) as non-zero, truncate, show count(*) as 0, again
insert, another truncate test? And we can also have a very minimal
number of rows say 1 or 2 not 10? If possible, reduce the number of
new tables introduced. And do you have a specific reason to have a
text column in each of the tables? AFAICS, we don't care about the
column type, you could just have another int column and use
generate_series while inserting. We can remove md5 function calls.
Your tests will look clean.

With Regards,
Bharath Rupireddy.
EnterpriseDB: http://www.enterprisedb.com

#46Kazutaka Onishi
onishi@heterodb.com
In reply to: Bharath Rupireddy (#45)
1 attachment(s)
Re: TRUNCATE on foreign table

Thank you for your comments.
I've attached v13.

Here are some more comments on the v12 patch:
I still don't understand why we need sum(id), not count(*). Am I
missing something here?

The value of "id" is used for checking whether correct records are
truncated or not.
For instance, on "truncate with ONLY clause",
At first, There are 16 values in "tru_ftable_parent", for instance,
[1,2,3,...,8,10,11,12,...,18].
By "TRUNCATE ONLY tru_ftable_parent", [1,2,3,...,8] will be truncated.
Thus, the "sum(id)" = 126.
If we use "count(*)" here, then the value will be 8.
Let's consider the case that there are 8 values [1,2,3,...,8] by some
kind of bug after running "TRUNCATE ONLY tru_ftable_parent".
Then, we miss this bug by "count(*)" because the value is the same as
the correct pattern.

1) Instead of switch case, a simple if else clause would reduce the code a bit:
if (behavior == DROP_RESTRICT)
appendStringInfoString(buf, " RESTRICT");
else if (behavior == DROP_CASCADE)
appendStringInfoString(buf, " CASCADE");

I've modified it.

2) Some coding style comments:
It's better to have a new line after variable declarations,
assignments, function calls, before if blocks, after if blocks for
better readability of the code.

I've fixed it.

3) I think we need truncatable behaviour that is consistent with updatable.

It's not correct. "truncatable" option works the same as "updatable".
I've confirmed that the table can be truncated with the combination:
truncatable on the server setting is false & truncatable on the table
setting is true.
I've also added to the test.

4) GetConnection needs to be done after all the error checking code
otherwise on error we would have opened a new connection without
actually using it. Just move below code outside of the for loop in
postgresExecForeignTruncate

Sure, I've moved it.

5) This assertion is bogus, because GetForeignServerIdByRelId will
return valid server id and otherwise it fails with "cache lookup
error", so please remove it.

I've removed it.

6) You can add a comment here saying this if-clause gets executed only
once per server.

I've added it.

7) Did you try checking whether we reach hash_destroy code when a
failure happens on executing truncate on a remote table? Otherwise we
might want to do routine->ExecForeignTruncate inside PG_TRY block?

I've added PG_TRY block.

8) This comment can be removed and have more descriptive comment
before the for loop in postgresExecForeignTruncate similar to the
comment what we have in postgresIsForeignRelUpdatable for updatable.

I've removed the comment and copied the comment from
postgresIsForeignRelUpdatable,
and modified it.

9) It will be good if you can divide up your patch into 3 separate
patches - 0001 code, 0002 tests, 0003 documentation

I'll do it when I send a patch in the future, please forgive me on this patch.

10) Why do we need many new tables for tests? Can't we do this -
insert, show count(*) as non-zero, truncate, show count(*) as 0, again
insert, another truncate test? And we can also have a very minimal
number of rows say 1 or 2 not 10? If possible, reduce the number of
new tables introduced. And do you have a specific reason to have a
text column in each of the tables? AFAICS, we don't care about the
column type, you could just have another int column and use
generate_series while inserting. We can remove md5 function calls.
Your tests will look clean.

I've removed the text field but the number of records are kept.
As I say at the top, the value of id is checked so I want to keep the
number of rows.

2021年4月5日(月) 14:59 Bharath Rupireddy <bharath.rupireddyforpostgres@gmail.com>:

Show quoted text

On Sun, Apr 4, 2021 at 10:23 PM Kazutaka Onishi <onishi@heterodb.com> wrote:

Sure. I've replaced with the test command "SELECT * FROM ..." to
"SELECT COUNT(*) FROM ..."
However, for example, the "id" column is used to check after running
TRUNCATE with ONLY clause to the inherited table.
Thus, I use "sum(id)" instead of "count(*)" to check the result when
the table has records.

I still don't understand why we need sum(id), not count(*). Am I
missing something here?

Here are some more comments on the v12 patch:
1) Instead of switch case, a simple if else clause would reduce the code a bit:
if (behavior == DROP_RESTRICT)
appendStringInfoString(buf, " RESTRICT");
else if (behavior == DROP_CASCADE)
appendStringInfoString(buf, " CASCADE");

2) Some coding style comments:
It's better to have a new line after variable declarations,
assignments, function calls, before if blocks, after if blocks for
better readability of the code.
+    appendStringInfoString(buf, "TRUNCATE ");   ---> new line after this
+    forboth (lc1, frels_list,
+    }         ---> new line after this
+    appendStringInfo(buf, " %s IDENTITY",
+        /* ensure the target foreign table is truncatable */
+        truncatable = server_truncatable;    ---> new line after this
+        foreach (cell, ft->options)

+ } ---> new line after this
+ if (!truncatable)

+    }    ---> new line after this
+    /* set up remote query */
+    initStringInfo(&sql);
+    deparseTruncateSql(&sql, frels_list, frels_extra, behavior,
restart_seqs);    ---> new line after this
+    /* run remote query */
+    if (!PQsendQuery(conn, sql.data))
+        pgfdw_report_error(ERROR, NULL, conn, false, sql.data);
---> new line after this
+    res = pgfdw_get_result(conn, sql.data);    ---> new line after this
+    if (PQresultStatus(res) != PGRES_COMMAND_OK)
+        pgfdw_report_error(ERROR, res, conn, true, sql.data);    --->
new line after this
+    /* clean-up */
+    PQclear(res);
+    pfree(sql.data);
+}

and so on.

a space after "false," and before "NULL"
+ conn = GetConnection(user, false,NULL);

bring lc2, frels_extra to the same of lc1, frels_list
+    forboth (lc1, frels_list,
+             lc2, frels_extra)

3) I think we need truncatable behaviour that is consistent with updatable.
With your patch, seems like below is the behaviour for truncatable:
both server and foreign table are truncatable = truncated
server is not truncatable and foreign table is truncatable = not
truncated and error out
server is truncatable and foreign table is not truncatable = not
truncated and error out
server is not truncatable and foreign table is not truncatable = not
truncated and error out

Below is the behaviour for updatable:
both server and foreign table are updatable = updated
server is not updatable and foreign table is updatable = updated
server is updatable and foreign table is not updatable = not updated
server is not updatable and foreign table is not updatable = not updated

And also see comment in postgresIsForeignRelUpdatable
/*
* By default, all postgres_fdw foreign tables are assumed updatable. This
* can be overridden by a per-server setting, which in turn can be
* overridden by a per-table setting.
*/

IMO, you could do the same thing for truncatable, change is required
in your patch:
both server and foreign table are truncatable = truncated
server is not truncatable and foreign table is truncatable = truncated
server is truncatable and foreign table is not truncatable = not
truncated and error out
server is not truncatable and foreign table is not truncatable = not
truncated and error out

4) GetConnection needs to be done after all the error checking code
otherwise on error we would have opened a new connection without
actually using it. Just move below code outside of the for loop in
postgresExecForeignTruncate
+            user = GetUserMapping(GetUserId(), server_id);
+            conn = GetConnection(user, false,NULL);
to here:
+    Assert (OidIsValid(server_id)));
+
+    /* get a connection to the server */
+    user = GetUserMapping(GetUserId(), server_id);
+    conn = GetConnection(user, false, NULL);
+
+    /* set up remote query */
+    initStringInfo(&sql);
+    deparseTruncateSql(&sql, frels_list, frels_extra, behavior, restart_seqs);
+    /* run remote query */
+    if (!PQsendQuery(conn, sql.data))
+        pgfdw_report_error(ERROR, NULL, conn, false, sql.data);
+    res = pgfdw_get_result(conn, sql.data);
+    if (PQresultStatus(res) != PGRES_COMMAND_OK)
+        pgfdw_report_error(ERROR, res, conn, true, sql.data);
5) This assertion is bogus, because GetForeignServerIdByRelId will
return valid server id and otherwise it fails with "cache lookup
error", so please remove it.
+        else
+        {
+            /* postgresExecForeignTruncate() is invoked for each server */
+            Assert(server_id == GetForeignServerIdByRelId(frel_oid));
+        }
6) You can add a comment here saying this if-clause gets executed only
once per server.
+
+        if (!OidIsValid(server_id))
+        {
+            server_id = GetForeignServerIdByRelId(frel_oid);
7) Did you try checking whether we reach hash_destroy code when a
failure happens on executing truncate on a remote table? Otherwise we
might want to do routine->ExecForeignTruncate inside PG_TRY block?
+            /* truncate_check_rel() has checked that already */
+            Assert(routine->ExecForeignTruncate != NULL);
+
+            routine->ExecForeignTruncate(ft_info->frels_list,
+                                         ft_info->frels_extra,
+                                         behavior,
+                                         restart_seqs);
+        }
+
+        hash_destroy(ft_htab);
+    }

8) This comment can be removed and have more descriptive comment
before the for loop in postgresExecForeignTruncate similar to the
comment what we have in postgresIsForeignRelUpdatable for updatable.
+ /* pick up remote connection, and sanity checks */

9) It will be good if you can divide up your patch into 3 separate
patches - 0001 code, 0002 tests, 0003 documentation

10) Why do we need many new tables for tests? Can't we do this -
insert, show count(*) as non-zero, truncate, show count(*) as 0, again
insert, another truncate test? And we can also have a very minimal
number of rows say 1 or 2 not 10? If possible, reduce the number of
new tables introduced. And do you have a specific reason to have a
text column in each of the tables? AFAICS, we don't care about the
column type, you could just have another int column and use
generate_series while inserting. We can remove md5 function calls.
Your tests will look clean.

With Regards,
Bharath Rupireddy.
EnterpriseDB: http://www.enterprisedb.com

Attachments:

pgsql14-truncate-on-foreign-table.v13.patchapplication/octet-stream; name=pgsql14-truncate-on-foreign-table.v13.patchDownload
diff --git a/contrib/postgres_fdw/deparse.c b/contrib/postgres_fdw/deparse.c
index 5aa3455e30..4f9df003c2 100644
--- a/contrib/postgres_fdw/deparse.c
+++ b/contrib/postgres_fdw/deparse.c
@@ -56,6 +56,7 @@
 #include "utils/rel.h"
 #include "utils/syscache.h"
 #include "utils/typcache.h"
+#include "commands/tablecmds.h"
 
 /*
  * Global context for foreign_expr_walker's search of an expression tree.
@@ -2172,6 +2173,43 @@ deparseAnalyzeSql(StringInfo buf, Relation rel, List **retrieved_attrs)
 	deparseRelation(buf, rel);
 }
 
+/*
+ * Construct a simple "TRUNCATE rel" statement
+ */
+void
+deparseTruncateSql(StringInfo buf,
+				   List *frels_list,
+				   List *frels_extra,
+				   DropBehavior behavior,
+				   bool restart_seqs)
+{
+	ListCell   *lc1, *lc2;
+
+	appendStringInfoString(buf, "TRUNCATE ");
+
+	forboth (lc1, frels_list,
+			 lc2, frels_extra)
+	{
+		Relation	frel = lfirst(lc1);
+		int			extra = lfirst_int(lc2);
+
+		if (lc1 != list_head(frels_list))
+			appendStringInfoString(buf, ", ");
+		if (extra & TRUNCATE_REL_CONTEXT_ONLY)
+			appendStringInfoString(buf, "ONLY ");
+			
+		deparseRelation(buf, frel);
+	}
+
+	appendStringInfo(buf, " %s IDENTITY",
+					 restart_seqs ? "RESTART" : "CONTINUE");
+					 
+	if (behavior==DROP_RESTRICT)
+		appendStringInfoString(buf, " RESTRICT");
+	else if(behavior==DROP_CASCADE)
+		appendStringInfoString(buf, " CASCADE");
+}
+
 /*
  * Construct name to use for given column, and emit it into buf.
  * If it has a column_name FDW option, use that instead of attribute name.
diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out
index 59e4e27ffb..b1388a799c 100644
--- a/contrib/postgres_fdw/expected/postgres_fdw.out
+++ b/contrib/postgres_fdw/expected/postgres_fdw.out
@@ -8215,6 +8215,205 @@ select * from rem3;
 drop foreign table rem3;
 drop table loc3;
 -- ===================================================================
+-- test for TRUNCATE
+-- ===================================================================
+CREATE TABLE tru_rtable0 (id int primary key);
+CREATE TABLE tru_rtable1 (id int primary key);
+CREATE FOREIGN TABLE tru_ftable (id int)
+       SERVER loopback OPTIONS (table_name 'tru_rtable0');
+INSERT INTO tru_rtable0 (SELECT x FROM generate_series(1,10) x);
+CREATE TABLE tru_ptable (id int) PARTITION BY HASH(id);
+CREATE TABLE tru_ptable__p0 PARTITION OF tru_ptable
+                            FOR VALUES WITH (MODULUS 2, REMAINDER 0);
+CREATE FOREIGN TABLE tru_ftable__p1 PARTITION OF tru_ptable
+                                    FOR VALUES WITH (MODULUS 2, REMAINDER 1)
+       SERVER loopback OPTIONS (table_name 'tru_rtable1');
+INSERT INTO tru_ptable (SELECT x FROM generate_series(11,20) x);
+CREATE TABLE tru_pk_table(id int primary key);
+CREATE TABLE tru_fk_table(fkey int references tru_pk_table(id));
+INSERT INTO tru_pk_table (SELECT x FROM generate_series(1,10) x);
+INSERT INTO tru_fk_table (SELECT x % 10 + 1 FROM generate_series(5,25) x);
+CREATE FOREIGN TABLE tru_pk_ftable (id int)
+       SERVER loopback OPTIONS (table_name 'tru_pk_table');
+CREATE TABLE tru_rtable_parent (id int);
+CREATE TABLE tru_rtable_child (id int);
+CREATE FOREIGN TABLE tru_ftable_parent (id int)
+       SERVER loopback OPTIONS (table_name 'tru_rtable_parent');
+CREATE FOREIGN TABLE tru_ftable_child () INHERITS (tru_ftable_parent)
+       SERVER loopback OPTIONS (table_name 'tru_rtable_child');
+INSERT INTO tru_rtable_parent (SELECT x FROM generate_series(1,8) x);
+INSERT INTO tru_rtable_child  (SELECT x FROM generate_series(10, 18) x);
+-- normal truncate
+SELECT sum(id) FROM tru_ftable;        -- 55
+ sum 
+-----
+  55
+(1 row)
+
+TRUNCATE tru_ftable;
+SELECT count(*) FROM tru_rtable0;		-- 0
+ count 
+-------
+     0
+(1 row)
+
+SELECT count(*) FROM tru_ftable;		-- 0
+ count 
+-------
+     0
+(1 row)
+
+-- 'truncatable' option
+ALTER SERVER loopback OPTIONS (ADD truncatable 'false');
+TRUNCATE tru_ftable;			-- error
+ERROR:  truncate on "tru_ftable" is prohibited
+ALTER FOREIGN TABLE tru_ftable OPTIONS (ADD truncatable 'true');
+TRUNCATE tru_ftable;			-- accepted
+ALTER FOREIGN TABLE tru_ftable OPTIONS (SET truncatable 'false');
+TRUNCATE tru_ftable;			-- error
+ERROR:  truncate on "tru_ftable" is prohibited
+ALTER SERVER loopback OPTIONS (DROP truncatable);
+ALTER FOREIGN TABLE tru_ftable OPTIONS (SET truncatable 'false');
+TRUNCATE tru_ftable;			-- error
+ERROR:  truncate on "tru_ftable" is prohibited
+ALTER FOREIGN TABLE tru_ftable OPTIONS (SET truncatable 'true');
+TRUNCATE tru_ftable;			-- accepted
+-- partition table mixtured by table and foreign table
+SELECT sum(id) FROM tru_ptable;        -- 155
+ sum 
+-----
+ 155
+(1 row)
+
+TRUNCATE tru_ptable;
+SELECT count(*) FROM tru_ptable;		-- 0
+ count 
+-------
+     0
+(1 row)
+
+SELECT count(*) FROM tru_ptable__p0;	-- 0
+ count 
+-------
+     0
+(1 row)
+
+SELECT count(*) FROM tru_ftable__p1;	-- 0
+ count 
+-------
+     0
+(1 row)
+
+SELECT count(*) FROM tru_rtable1;		-- 0
+ count 
+-------
+     0
+(1 row)
+
+-- 'CASCADE' option
+SELECT sum(id) FROM tru_pk_ftable;      -- 55
+ sum 
+-----
+  55
+(1 row)
+
+TRUNCATE tru_pk_ftable;	-- failed by FK reference
+ERROR:  cannot truncate a table referenced in a foreign key constraint
+DETAIL:  Table "tru_fk_table" references "tru_pk_table".
+HINT:  Truncate table "tru_fk_table" at the same time, or use TRUNCATE ... CASCADE.
+CONTEXT:  remote SQL command: TRUNCATE public.tru_pk_table CONTINUE IDENTITY RESTRICT
+TRUNCATE tru_pk_ftable CASCADE;
+SELECT count(*) FROM tru_pk_ftable;    -- 0
+ count 
+-------
+     0
+(1 row)
+
+SELECT count(*) FROM tru_fk_table;		-- also truncated,0
+ count 
+-------
+     0
+(1 row)
+
+-- truncate two tables at a command
+INSERT INTO tru_ftable (SELECT x FROM generate_series(1,8) x);
+INSERT INTO tru_pk_ftable (SELECT x FROM generate_series(3,10) x);
+SELECT count(*) from tru_ftable; -- 8
+ count 
+-------
+     8
+(1 row)
+
+SELECT count(*) from tru_pk_ftable; -- 8
+ count 
+-------
+     8
+(1 row)
+
+TRUNCATE tru_ftable, tru_pk_ftable CASCADE;
+SELECT count(*) from tru_ftable; -- 0
+ count 
+-------
+     0
+(1 row)
+
+SELECT count(*) from tru_pk_ftable; -- 0
+ count 
+-------
+     0
+(1 row)
+
+-- truncate with ONLY clause
+TRUNCATE ONLY tru_ftable_parent;
+SELECT sum(id) FROM tru_ftable_parent;  -- 126
+ sum 
+-----
+ 126
+(1 row)
+
+TRUNCATE tru_ftable_parent;
+SELECT count(*) FROM tru_ftable_parent; -- 0
+ count 
+-------
+     0
+(1 row)
+
+-- in case when remote table has inherited children
+CREATE TABLE tru_rtable0_child () INHERITS (tru_rtable0);
+INSERT INTO tru_rtable0 (SELECT x FROM generate_series(5,9) x);
+INSERT INTO tru_rtable0_child (SELECT x FROM generate_series(10,14) x);
+SELECT sum(id) FROM tru_ftable;   -- 95
+ sum 
+-----
+  95
+(1 row)
+
+TRUNCATE ONLY tru_ftable;		-- truncate only parent portion
+SELECT sum(id) FROM tru_ftable;   -- 60
+ sum 
+-----
+  60
+(1 row)
+
+INSERT INTO tru_rtable0 (SELECT x FROM generate_series(21,25) x);
+SELECT sum(id) FROM tru_ftable;		-- 175
+ sum 
+-----
+ 175
+(1 row)
+
+TRUNCATE tru_ftable;			-- truncate both of parent and child
+SELECT count(*) FROM tru_ftable;    -- empty
+ count 
+-------
+     0
+(1 row)
+
+-- cleanup
+DROP FOREIGN TABLE tru_ftable_parent, tru_ftable_child, tru_pk_ftable,tru_ftable__p1,tru_ftable;
+DROP TABLE tru_rtable0, tru_rtable1, tru_ptable, tru_ptable__p0, tru_pk_table, tru_fk_table,
+tru_rtable_parent,tru_rtable_child, tru_rtable0_child;
+-- ===================================================================
 -- test IMPORT FOREIGN SCHEMA
 -- ===================================================================
 CREATE SCHEMA import_source;
@@ -8913,7 +9112,7 @@ DO $d$
     END;
 $d$;
 ERROR:  invalid option "password"
-HINT:  Valid options in this context are: service, passfile, channel_binding, connect_timeout, dbname, host, hostaddr, port, options, application_name, keepalives, keepalives_idle, keepalives_interval, keepalives_count, tcp_user_timeout, sslmode, sslcompression, sslcert, sslkey, sslrootcert, sslcrl, sslcrldir, requirepeer, ssl_min_protocol_version, ssl_max_protocol_version, gssencmode, krbsrvname, gsslib, target_session_attrs, use_remote_estimate, fdw_startup_cost, fdw_tuple_cost, extensions, updatable, fetch_size, batch_size, async_capable, keep_connections
+HINT:  Valid options in this context are: service, passfile, channel_binding, connect_timeout, dbname, host, hostaddr, port, options, application_name, keepalives, keepalives_idle, keepalives_interval, keepalives_count, tcp_user_timeout, sslmode, sslcompression, sslcert, sslkey, sslrootcert, sslcrl, sslcrldir, requirepeer, ssl_min_protocol_version, ssl_max_protocol_version, gssencmode, krbsrvname, gsslib, target_session_attrs, use_remote_estimate, fdw_startup_cost, fdw_tuple_cost, extensions, updatable, truncatable, fetch_size, batch_size, async_capable, keep_connections
 CONTEXT:  SQL statement "ALTER SERVER loopback_nopw OPTIONS (ADD password 'dummypw')"
 PL/pgSQL function inline_code_block line 3 at EXECUTE
 -- If we add a password for our user mapping instead, we should get a different
diff --git a/contrib/postgres_fdw/option.c b/contrib/postgres_fdw/option.c
index f1d0c8bd41..fc196fabc0 100644
--- a/contrib/postgres_fdw/option.c
+++ b/contrib/postgres_fdw/option.c
@@ -109,6 +109,7 @@ postgres_fdw_validator(PG_FUNCTION_ARGS)
 		if (strcmp(def->defname, "use_remote_estimate") == 0 ||
 			strcmp(def->defname, "updatable") == 0 ||
 			strcmp(def->defname, "async_capable") == 0 ||
+			strcmp(def->defname, "truncatable") == 0 ||
 			strcmp(def->defname, "keep_connections") == 0)
 		{
 			/* these accept only boolean values */
@@ -213,6 +214,9 @@ InitPgFdwOptions(void)
 		/* updatable is available on both server and table */
 		{"updatable", ForeignServerRelationId, false},
 		{"updatable", ForeignTableRelationId, false},
+		/* truncatable is available on both server and table */
+		{"truncatable", ForeignServerRelationId, false},
+		{"truncatable", ForeignTableRelationId, false},
 		/* fetch_size is available on both server and table */
 		{"fetch_size", ForeignServerRelationId, false},
 		{"fetch_size", ForeignTableRelationId, false},
diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c
index 16c2979f2d..f4a4f773b8 100644
--- a/contrib/postgres_fdw/postgres_fdw.c
+++ b/contrib/postgres_fdw/postgres_fdw.c
@@ -400,6 +400,10 @@ static void postgresExplainForeignModify(ModifyTableState *mtstate,
 										 ExplainState *es);
 static void postgresExplainDirectModify(ForeignScanState *node,
 										ExplainState *es);
+static void postgresExecForeignTruncate(List *frels_list,
+										List *frels_extra,
+										DropBehavior behavior,
+										bool restart_seqs);
 static bool postgresAnalyzeForeignTable(Relation relation,
 										AcquireSampleRowsFunc *func,
 										BlockNumber *totalpages);
@@ -588,6 +592,9 @@ postgres_fdw_handler(PG_FUNCTION_ARGS)
 	routine->ExplainForeignModify = postgresExplainForeignModify;
 	routine->ExplainDirectModify = postgresExplainDirectModify;
 
+	/* Support function for TRUNCATE */
+	routine->ExecForeignTruncate = postgresExecForeignTruncate;
+
 	/* Support functions for ANALYZE */
 	routine->AnalyzeForeignTable = postgresAnalyzeForeignTable;
 
@@ -2216,7 +2223,7 @@ postgresIsForeignRelUpdatable(Relation rel)
 	ListCell   *lc;
 
 	/*
-	 * By default, all postgres_fdw foreign tables are assumed updatable. This
+	 * By default, all postgres_fdw foreign tables are assumed NOT truncatable. This
 	 * can be overridden by a per-server setting, which in turn can be
 	 * overridden by a per-table setting.
 	 */
@@ -2868,6 +2875,98 @@ postgresExplainDirectModify(ForeignScanState *node, ExplainState *es)
 	}
 }
 
+/*
+ * postgresExecForeignTruncate
+ *		It propagates TRUNCATE command to the remote host inside of the
+ *		transaction block.
+ */
+static void
+postgresExecForeignTruncate(List *frels_list,
+							List *frels_extra,
+							DropBehavior behavior,
+							bool restart_seqs)
+{
+	Oid				server_id = InvalidOid;
+	ForeignServer  *serv = NULL;
+	UserMapping	   *user = NULL;
+	PGconn		   *conn = NULL;
+	PGresult	   *res;
+	StringInfoData	sql;
+	ListCell	   *lc;
+	bool			server_truncatable = true;
+
+	/*
+	 * By default, all postgres_fdw foreign tables are NOT assumed truncatable. This
+	 * can be overridden by a per-server setting, which in turn can be
+	 * overridden by a per-table setting.
+	 */
+	foreach (lc, frels_list)
+	{
+		Relation	frel = lfirst(lc);
+		Oid			frel_oid = RelationGetRelid(frel);
+		ForeignTable *ft = GetForeignTable(frel_oid);
+		ListCell   *cell;
+		bool		truncatable;
+
+		if (!OidIsValid(server_id))
+		{
+			/* This is executed only once per server */
+			server_id = GetForeignServerIdByRelId(frel_oid);
+			serv = GetForeignServer(server_id);
+
+			foreach (cell, serv->options)
+			{
+				DefElem	   *defel = (DefElem *) lfirst(cell);
+
+				if (strcmp(defel->defname, "truncatable") == 0)
+				{
+					server_truncatable = defGetBoolean(defel);
+					break;
+				}
+			}
+		}
+
+		/* ensure the target foreign table is truncatable */
+		truncatable = server_truncatable;
+		foreach (cell, ft->options)
+		{
+			DefElem	   *defel = (DefElem *) lfirst(cell);
+
+			if (strcmp(defel->defname, "truncatable") == 0)
+			{
+				truncatable = defGetBoolean(defel);
+				break;
+			}
+		}
+
+		if (!truncatable)
+			ereport(ERROR,
+					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+					 errmsg("truncate on \"%s\" is prohibited",
+							RelationGetRelationName(frel))));
+	}
+
+	Assert(OidIsValid(server_id));
+	user = GetUserMapping(GetUserId(), server_id);
+	conn = GetConnection(user, false, NULL);
+
+	/* set up remote query */
+	initStringInfo(&sql);
+	deparseTruncateSql(&sql, frels_list, frels_extra, behavior, restart_seqs);
+
+	/* run remote query */
+	if (!PQsendQuery(conn, sql.data))
+		pgfdw_report_error(ERROR, NULL, conn, false, sql.data);
+
+	res = pgfdw_get_result(conn, sql.data);
+
+	if (PQresultStatus(res) != PGRES_COMMAND_OK)
+		pgfdw_report_error(ERROR, res, conn, true, sql.data);
+
+	/* clean-up */
+	PQclear(res);
+	pfree(sql.data);
+}
 
 /*
  * estimate_path_cost_size
diff --git a/contrib/postgres_fdw/postgres_fdw.h b/contrib/postgres_fdw/postgres_fdw.h
index 88d94da6f6..68599892f8 100644
--- a/contrib/postgres_fdw/postgres_fdw.h
+++ b/contrib/postgres_fdw/postgres_fdw.h
@@ -206,6 +206,11 @@ extern void deparseDirectDeleteSql(StringInfo buf, PlannerInfo *root,
 extern void deparseAnalyzeSizeSql(StringInfo buf, Relation rel);
 extern void deparseAnalyzeSql(StringInfo buf, Relation rel,
 							  List **retrieved_attrs);
+extern void deparseTruncateSql(StringInfo buf,
+							   List *frels_list,
+							   List *frels_extra,
+							   DropBehavior behavior,
+							   bool restart_seqs);
 extern void deparseStringLiteral(StringInfo buf, const char *val);
 extern Expr *find_em_expr_for_rel(EquivalenceClass *ec, RelOptInfo *rel);
 extern Expr *find_em_expr_for_input_target(PlannerInfo *root,
diff --git a/contrib/postgres_fdw/sql/postgres_fdw.sql b/contrib/postgres_fdw/sql/postgres_fdw.sql
index 107d1c0e03..eb5e7e3bd6 100644
--- a/contrib/postgres_fdw/sql/postgres_fdw.sql
+++ b/contrib/postgres_fdw/sql/postgres_fdw.sql
@@ -2351,6 +2351,107 @@ select * from rem3;
 drop foreign table rem3;
 drop table loc3;
 
+-- ===================================================================
+-- test for TRUNCATE
+-- ===================================================================
+CREATE TABLE tru_rtable0 (id int primary key);
+CREATE TABLE tru_rtable1 (id int primary key);
+CREATE FOREIGN TABLE tru_ftable (id int)
+       SERVER loopback OPTIONS (table_name 'tru_rtable0');
+INSERT INTO tru_rtable0 (SELECT x FROM generate_series(1,10) x);
+
+CREATE TABLE tru_ptable (id int) PARTITION BY HASH(id);
+CREATE TABLE tru_ptable__p0 PARTITION OF tru_ptable
+                            FOR VALUES WITH (MODULUS 2, REMAINDER 0);
+CREATE FOREIGN TABLE tru_ftable__p1 PARTITION OF tru_ptable
+                                    FOR VALUES WITH (MODULUS 2, REMAINDER 1)
+       SERVER loopback OPTIONS (table_name 'tru_rtable1');
+INSERT INTO tru_ptable (SELECT x FROM generate_series(11,20) x);
+
+CREATE TABLE tru_pk_table(id int primary key);
+CREATE TABLE tru_fk_table(fkey int references tru_pk_table(id));
+INSERT INTO tru_pk_table (SELECT x FROM generate_series(1,10) x);
+INSERT INTO tru_fk_table (SELECT x % 10 + 1 FROM generate_series(5,25) x);
+CREATE FOREIGN TABLE tru_pk_ftable (id int)
+       SERVER loopback OPTIONS (table_name 'tru_pk_table');
+
+CREATE TABLE tru_rtable_parent (id int);
+CREATE TABLE tru_rtable_child (id int);
+CREATE FOREIGN TABLE tru_ftable_parent (id int)
+       SERVER loopback OPTIONS (table_name 'tru_rtable_parent');
+CREATE FOREIGN TABLE tru_ftable_child () INHERITS (tru_ftable_parent)
+       SERVER loopback OPTIONS (table_name 'tru_rtable_child');
+INSERT INTO tru_rtable_parent (SELECT x FROM generate_series(1,8) x);
+INSERT INTO tru_rtable_child  (SELECT x FROM generate_series(10, 18) x);
+
+-- normal truncate
+SELECT sum(id) FROM tru_ftable;        -- 55
+TRUNCATE tru_ftable;
+SELECT count(*) FROM tru_rtable0;		-- 0
+SELECT count(*) FROM tru_ftable;		-- 0
+
+-- 'truncatable' option
+ALTER SERVER loopback OPTIONS (ADD truncatable 'false');
+TRUNCATE tru_ftable;			-- error
+ALTER FOREIGN TABLE tru_ftable OPTIONS (ADD truncatable 'true');
+TRUNCATE tru_ftable;			-- accepted
+ALTER FOREIGN TABLE tru_ftable OPTIONS (SET truncatable 'false');
+TRUNCATE tru_ftable;			-- error
+ALTER SERVER loopback OPTIONS (DROP truncatable);
+ALTER FOREIGN TABLE tru_ftable OPTIONS (SET truncatable 'false');
+TRUNCATE tru_ftable;			-- error
+ALTER FOREIGN TABLE tru_ftable OPTIONS (SET truncatable 'true');
+TRUNCATE tru_ftable;			-- accepted
+
+-- partition table mixtured by table and foreign table
+SELECT sum(id) FROM tru_ptable;        -- 155
+TRUNCATE tru_ptable;
+SELECT count(*) FROM tru_ptable;		-- 0
+SELECT count(*) FROM tru_ptable__p0;	-- 0
+SELECT count(*) FROM tru_ftable__p1;	-- 0
+SELECT count(*) FROM tru_rtable1;		-- 0
+
+-- 'CASCADE' option
+SELECT sum(id) FROM tru_pk_ftable;      -- 55
+TRUNCATE tru_pk_ftable;	-- failed by FK reference
+TRUNCATE tru_pk_ftable CASCADE;
+SELECT count(*) FROM tru_pk_ftable;    -- 0
+SELECT count(*) FROM tru_fk_table;		-- also truncated,0
+
+-- truncate two tables at a command
+INSERT INTO tru_ftable (SELECT x FROM generate_series(1,8) x);
+INSERT INTO tru_pk_ftable (SELECT x FROM generate_series(3,10) x);
+SELECT count(*) from tru_ftable; -- 8
+SELECT count(*) from tru_pk_ftable; -- 8
+TRUNCATE tru_ftable, tru_pk_ftable CASCADE;
+SELECT count(*) from tru_ftable; -- 0
+SELECT count(*) from tru_pk_ftable; -- 0
+
+-- truncate with ONLY clause
+TRUNCATE ONLY tru_ftable_parent;
+SELECT sum(id) FROM tru_ftable_parent;  -- 126
+TRUNCATE tru_ftable_parent;
+SELECT count(*) FROM tru_ftable_parent; -- 0
+
+-- in case when remote table has inherited children
+CREATE TABLE tru_rtable0_child () INHERITS (tru_rtable0);
+INSERT INTO tru_rtable0 (SELECT x FROM generate_series(5,9) x);
+INSERT INTO tru_rtable0_child (SELECT x FROM generate_series(10,14) x);
+SELECT sum(id) FROM tru_ftable;   -- 95
+
+TRUNCATE ONLY tru_ftable;		-- truncate only parent portion
+SELECT sum(id) FROM tru_ftable;   -- 60
+
+INSERT INTO tru_rtable0 (SELECT x FROM generate_series(21,25) x);
+SELECT sum(id) FROM tru_ftable;		-- 175
+TRUNCATE tru_ftable;			-- truncate both of parent and child
+SELECT count(*) FROM tru_ftable;    -- empty
+
+-- cleanup
+DROP FOREIGN TABLE tru_ftable_parent, tru_ftable_child, tru_pk_ftable,tru_ftable__p1,tru_ftable;
+DROP TABLE tru_rtable0, tru_rtable1, tru_ptable, tru_ptable__p0, tru_pk_table, tru_fk_table,
+tru_rtable_parent,tru_rtable_child, tru_rtable0_child;
+
 -- ===================================================================
 -- test IMPORT FOREIGN SCHEMA
 -- ===================================================================
diff --git a/doc/src/sgml/fdwhandler.sgml b/doc/src/sgml/fdwhandler.sgml
index 0f2397df49..66605b7361 100644
--- a/doc/src/sgml/fdwhandler.sgml
+++ b/doc/src/sgml/fdwhandler.sgml
@@ -1065,6 +1065,50 @@ EndDirectModify(ForeignScanState *node);
 
    </sect2>
 
+   <sect2 id="fdw-callbacks-truncate">
+    <title>FDW Routines for Truncate</title>
+<programlisting>
+void
+ExecForeignTruncate(List *frels_list, List *frels_extra,
+                    DropBehavior behavior, bool restart_seqs);
+</programlisting>
+    <para>
+     Truncate a set of foreign tables defined by
+     <literal>frels_list</literal> belonging to the same foreign server.
+     This optional function is called during execution of
+     <command>TRUNCATE</command> for each foreign server involved
+     in one <command>TRUNCATE</command> command (note that invocations
+     are not per foreign table).
+
+     <literal>frels_extra</literal> delivers extra information about
+     the context where the foreign tables are truncated. It is a list of integers and has same length with
+     <literal>frels_list</literal>. <literal>TRUNCATE_REL_CONTEXT_NORMAL</literal> means that
+     the foreign table is specified WITHOUT "ONLY" clause, and <literal>TRUNCATE_REL_CONTEXT_ONLY</literal> means
+     specified WITH "ONLY" clause. <literal>TRUNCATE_REL_CONTEXT_CASCADING</literal> 
+     values means that foreign tables are not specified in the <command>TRUNCATE</command>, 
+     but truncated due to dependency(like partition's child leaf).
+    </para>
+
+    <para>
+     If the <function>ExecForeignTruncate</function> pointer is set to
+     <literal>NULL</literal>, attempts to truncate the foreign table will
+     fail with an error message.
+    </para>
+    
+    <para>
+     <literal>behavior</literal> defines how foreign tables should
+     be truncated, using as possible values <literal>DROP_RESTRICT</literal>
+     and <literal>DROP_CASCADE</literal> (to map with the equivalents of
+     <command>TRUNCATE</command>).
+    </para>
+    
+    <para>
+     <literal>restart_seqs</literal> is set to <literal>true</literal>
+     if <literal>RESTART IDENTITY</literal> was supplied in the
+     <command>TRUNCATE</command>.
+    </para>
+   </sect2>
+
    <sect2 id="fdw-callbacks-row-locking">
     <title>FDW Routines for Row Locking</title>
 
diff --git a/doc/src/sgml/postgres-fdw.sgml b/doc/src/sgml/postgres-fdw.sgml
index a7c695b000..1092160603 100644
--- a/doc/src/sgml/postgres-fdw.sgml
+++ b/doc/src/sgml/postgres-fdw.sgml
@@ -63,9 +63,10 @@
  <para>
   Now you need only <command>SELECT</command> from a foreign table to access
   the data stored in its underlying remote table.  You can also modify
-  the remote table using <command>INSERT</command>, <command>UPDATE</command>, or
-  <command>DELETE</command>.  (Of course, the remote user you have specified
-  in your user mapping must have privileges to do these things.)
+  the remote table using <command>INSERT</command>, <command>UPDATE</command>,
+  <command>DELETE</command>, or <command>TRUNCATE</command>.
+  (Of course, the remote user you have specified in your user mapping must
+  have privileges to do these things.)
  </para>
 
  <para>
@@ -404,7 +405,7 @@ OPTIONS (ADD password_required 'false');
 
    <para>
     By default all foreign tables using <filename>postgres_fdw</filename> are assumed
-    to be updatable.  This may be overridden using the following option:
+    to be updatable and truncatable.  This may be overridden using the following options:
    </para>
 
    <variablelist>
@@ -433,6 +434,29 @@ OPTIONS (ADD password_required 'false');
      </listitem>
     </varlistentry>
 
+    <varlistentry>
+     <term><literal>truncatable</literal></term>
+     <listitem>
+      <para>
+       This option controls whether <filename>postgres_fdw</filename> allows
+       foreign tables to be truncated using <command>TRUNCATE</command>
+       command. It can be specified for a foreign table or a foreign server.
+       A table-level option overrides a server-level option.
+       The default is <literal>true</literal>.
+      </para>
+      <para>
+       Pay attention for the case when a foreign table maps remote table
+       that has inherited children or partition leafs.
+       <command>TRUNCATE</command> specifies the foreign tables with
+       <literal>ONLY</literal> clause, remove queries over the
+       <filename>postgres_fdw</filename> also specify remote tables with
+       <literal>ONLY</literal> clause, that will truncate only parent
+       portion of the remote table. In the results, it looks like
+       <command>TRUNCATE</command> command partially eliminated contents
+       of the foreign tables.
+      </para>
+     </listitem>
+    </varlistentry>
    </variablelist>
   </sect3>
 
diff --git a/doc/src/sgml/ref/truncate.sgml b/doc/src/sgml/ref/truncate.sgml
index 91cdac5562..7899106ba1 100644
--- a/doc/src/sgml/ref/truncate.sgml
+++ b/doc/src/sgml/ref/truncate.sgml
@@ -172,9 +172,8 @@ TRUNCATE [ TABLE ] [ ONLY ] <replaceable class="parameter">name</replaceable> [
   </para>
 
   <para>
-   <command>TRUNCATE</command> is not currently supported for foreign tables.
-   This implies that if a specified table has any descendant tables that are
-   foreign, the command will fail.
+    <command>TRUNCATE</command> can be used for foreign tables if
+     the foreign data wrapper supports, for instance, see <xref linkend="postgres-fdw"/>.
   </para>
  </refsect1>
 
diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c
index 88a68a4697..65497c40eb 100644
--- a/src/backend/commands/tablecmds.c
+++ b/src/backend/commands/tablecmds.c
@@ -59,6 +59,7 @@
 #include "commands/typecmds.h"
 #include "commands/user.h"
 #include "executor/executor.h"
+#include "foreign/fdwapi.h"
 #include "foreign/foreign.h"
 #include "miscadmin.h"
 #include "nodes/makefuncs.h"
@@ -310,6 +311,21 @@ struct DropRelationCallbackState
 #define		ATT_FOREIGN_TABLE		0x0020
 #define		ATT_PARTITIONED_INDEX	0x0040
 
+/*
+ * ForeignTruncateInfo
+ *
+ * Information related to truncation of foreign tables.  This is used for
+ * the elements in a hash table. It uses the server OID as lookup key,
+ * and includes a per-server list of all foreign tables involved in the
+ * truncation.
+ */
+typedef struct ForeignTruncateInfo
+{
+	Oid			server_oid;
+	List	   *frels_list;
+	List	   *frels_extra;
+} ForeignTruncateInfo;
+
 /*
  * Partition tables are expected to be dropped when the parent partitioned
  * table gets dropped. Hence for partitioning we use AUTO dependency.
@@ -1588,18 +1604,21 @@ RangeVarCallbackForDropRelation(const RangeVar *rel, Oid relOid, Oid oldRelOid,
  *		Executes a TRUNCATE command.
  *
  * This is a multi-relation truncate.  We first open and grab exclusive
- * lock on all relations involved, checking permissions and otherwise
- * verifying that the relation is OK for truncation.  In CASCADE mode,
+ * lock on all relations involved, checking permissions (local database
+ * ACLs even if relations are foreign tables) and otherwise verifying
+ * that the relation is OK for truncation. In CASCADE mode,
  * relations having FK references to the targeted relations are automatically
  * added to the group; in RESTRICT mode, we check that all FK references are
  * internal to the group that's being truncated.  Finally all the relations
- * are truncated and reindexed.
+ * are truncated and reindexed. If any foreign tables are involved,
+ * its callback shall be invoked prior to the truncation of regular tables.
  */
 void
 ExecuteTruncate(TruncateStmt *stmt)
 {
 	List	   *rels = NIL;
 	List	   *relids = NIL;
+	List	   *relids_extra = NIL;
 	List	   *relids_logged = NIL;
 	ListCell   *cell;
 
@@ -1636,8 +1655,9 @@ ExecuteTruncate(TruncateStmt *stmt)
 
 		rels = lappend(rels, rel);
 		relids = lappend_oid(relids, myrelid);
+		relids_extra = lappend_int(relids_extra, (recurse ? TRUNCATE_REL_CONTEXT_NORMAL : TRUNCATE_REL_CONTEXT_ONLY));
 		/* Log this relation only if needed for logical decoding */
-		if (RelationIsLogicallyLogged(rel))
+		if (RelationIsLogicallyLogged(rel) && rel->rd_rel->relkind != RELKIND_FOREIGN_TABLE)
 			relids_logged = lappend_oid(relids_logged, myrelid);
 
 		if (recurse)
@@ -1683,8 +1703,9 @@ ExecuteTruncate(TruncateStmt *stmt)
 
 				rels = lappend(rels, rel);
 				relids = lappend_oid(relids, childrelid);
+				relids_extra = lappend_int(relids_extra, TRUNCATE_REL_CONTEXT_CASCADING);
 				/* Log this relation only if needed for logical decoding */
-				if (RelationIsLogicallyLogged(rel))
+				if (RelationIsLogicallyLogged(rel) && rel->rd_rel->relkind != RELKIND_FOREIGN_TABLE)
 					relids_logged = lappend_oid(relids_logged, childrelid);
 			}
 		}
@@ -1695,7 +1716,7 @@ ExecuteTruncate(TruncateStmt *stmt)
 					 errhint("Do not specify the ONLY keyword, or use TRUNCATE ONLY on the partitions directly.")));
 	}
 
-	ExecuteTruncateGuts(rels, relids, relids_logged,
+	ExecuteTruncateGuts(rels, relids, relids_extra, relids_logged,
 						stmt->behavior, stmt->restart_seqs);
 
 	/* And close the rels */
@@ -1721,16 +1742,21 @@ ExecuteTruncate(TruncateStmt *stmt)
  * this information handy in this form.
  */
 void
-ExecuteTruncateGuts(List *explicit_rels, List *relids, List *relids_logged,
+ExecuteTruncateGuts(List *explicit_rels,
+					List *relids,
+					List *relids_extra,
+					List *relids_logged,
 					DropBehavior behavior, bool restart_seqs)
 {
 	List	   *rels;
 	List	   *seq_relids = NIL;
+	HTAB	   *ft_htab = NULL;
 	EState	   *estate;
 	ResultRelInfo *resultRelInfos;
 	ResultRelInfo *resultRelInfo;
 	SubTransactionId mySubid;
 	ListCell   *cell;
+	ListCell   *lc1, *lc2;
 	Oid		   *logrelids;
 
 	/*
@@ -1768,8 +1794,9 @@ ExecuteTruncateGuts(List *explicit_rels, List *relids, List *relids_logged,
 				truncate_check_activity(rel);
 				rels = lappend(rels, rel);
 				relids = lappend_oid(relids, relid);
+				relids_extra = lappend_int(relids_extra, TRUNCATE_REL_CONTEXT_CASCADING);
 				/* Log this relation only if needed for logical decoding */
-				if (RelationIsLogicallyLogged(rel))
+				if (RelationIsLogicallyLogged(rel) && rel->rd_rel->relkind != RELKIND_FOREIGN_TABLE)
 					relids_logged = lappend_oid(relids_logged, relid);
 			}
 		}
@@ -1868,14 +1895,68 @@ ExecuteTruncateGuts(List *explicit_rels, List *relids, List *relids_logged,
 	 */
 	mySubid = GetCurrentSubTransactionId();
 
-	foreach(cell, rels)
+	Assert(list_length(rels) == list_length(relids_extra));
+	forboth (lc1, rels,
+			 lc2, relids_extra)
 	{
-		Relation	rel = (Relation) lfirst(cell);
+		Relation	rel = (Relation) lfirst(lc1);
+		int			extra = lfirst_int(lc2);
 
 		/* Skip partitioned tables as there is nothing to do */
 		if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
 			continue;
 
+		/*
+		 * If truncating a foreign table, the foreign data wrapper callback
+		 * for TRUNCATE is called once for each server with a list of all the
+		 * relations to process linked to this server.  The list of relations
+		 * for each server is saved as a single entry in a hash table that
+		 * uses the server OID as lookup key.  Once the full set of lists is
+		 * built, all the entries of the hash table are scanned, and the list
+		 * of relations associated with the server is passed down to the
+		 * TRUNCATE callback of its foreign data wrapper.
+		 */
+		if (rel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
+		{
+			Oid			frel_oid = RelationGetRelid(rel);
+			Oid			server_oid = GetForeignServerIdByRelId(frel_oid);
+			bool		found;
+			ForeignTruncateInfo *ft_info;
+
+			/* if the hash table does not exist yet, initialize it */
+			if (!ft_htab)
+			{
+				HASHCTL		hctl;
+
+				memset(&hctl, 0, sizeof(HASHCTL));
+				hctl.keysize = sizeof(Oid);
+				hctl.entrysize = sizeof(ForeignTruncateInfo);
+				hctl.hcxt = CurrentMemoryContext;
+
+				ft_htab = hash_create("TRUNCATE for Foreign Tables",
+									  32,	/* start small and extend */
+									  &hctl,
+									  HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
+			}
+
+			/*
+			 * Look after the entry of the server in the hash table, and
+			 * initialize it if the entry does not exist yet.
+			 */
+			ft_info = hash_search(ft_htab, &server_oid, HASH_ENTER, &found);
+			if (!found)
+			{
+				ft_info->server_oid = server_oid;
+				ft_info->frels_list = NIL;
+				ft_info->frels_extra = NIL;
+			}
+
+			/* save the relation in the list */
+			ft_info->frels_list = lappend(ft_info->frels_list, rel);
+			ft_info->frels_extra = lappend_int(ft_info->frels_extra, extra);
+			continue;
+		}
+
 		/*
 		 * Normally, we need a transaction-safe truncation here.  However, if
 		 * the table was either created in the current (sub)transaction or has
@@ -1938,6 +2019,39 @@ ExecuteTruncateGuts(List *explicit_rels, List *relids, List *relids_logged,
 		pgstat_count_truncate(rel);
 	}
 
+	/*
+	 * Now go through the hash table, and process each entry associated to the
+	 * servers involved in the TRUNCATE.
+	 */
+	if (ft_htab)
+	{
+		ForeignTruncateInfo *ft_info;
+		HASH_SEQ_STATUS seq;
+
+		hash_seq_init(&seq, ft_htab);
+
+		PG_TRY();
+		{
+			while ((ft_info = hash_seq_search(&seq)) != NULL)
+			{
+				FdwRoutine *routine = GetFdwRoutineByServerId(ft_info->server_oid);
+
+				/* truncate_check_rel() has checked that already */
+				Assert(routine->ExecForeignTruncate != NULL);
+
+				routine->ExecForeignTruncate(ft_info->frels_list,
+											ft_info->frels_extra,
+											behavior,
+											restart_seqs);
+			}
+		}
+		PG_FINALLY();
+		{
+			hash_destroy(ft_htab);
+		}
+		PG_END_TRY();
+	}
+
 	/*
 	 * Restart owned sequences if we were asked to.
 	 */
@@ -2023,12 +2137,24 @@ truncate_check_rel(Oid relid, Form_pg_class reltuple)
 	char	   *relname = NameStr(reltuple->relname);
 
 	/*
-	 * Only allow truncate on regular tables and partitioned tables (although,
-	 * the latter are only being included here for the following checks; no
-	 * physical truncation will occur in their case.)
+	 * Only allow truncate on regular tables, foreign tables using foreign
+	 * data wrappers supporting TRUNCATE and partitioned tables (although, the
+	 * latter are only being included here for the following checks; no
+	 * physical truncation will occur in their case.).
 	 */
-	if (reltuple->relkind != RELKIND_RELATION &&
-		reltuple->relkind != RELKIND_PARTITIONED_TABLE)
+	if (reltuple->relkind == RELKIND_FOREIGN_TABLE)
+	{
+		Oid			server_id = GetForeignServerIdByRelId(relid);
+		FdwRoutine *fdwroutine = GetFdwRoutineByServerId(server_id);
+
+		if (!fdwroutine->ExecForeignTruncate)
+			ereport(ERROR,
+					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+					 errmsg("cannot truncate foreign table \"%s\"",
+							relname)));
+	}
+	else if (reltuple->relkind != RELKIND_RELATION &&
+			 reltuple->relkind != RELKIND_PARTITIONED_TABLE)
 		ereport(ERROR,
 				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
 				 errmsg("\"%s\" is not a table", relname)));
diff --git a/src/backend/replication/logical/worker.c b/src/backend/replication/logical/worker.c
index 354fbe4b4b..152f0332ac 100644
--- a/src/backend/replication/logical/worker.c
+++ b/src/backend/replication/logical/worker.c
@@ -1795,6 +1795,7 @@ apply_handle_truncate(StringInfo s)
 	List	   *rels = NIL;
 	List	   *part_rels = NIL;
 	List	   *relids = NIL;
+	List	   *relids_extra = NIL;
 	List	   *relids_logged = NIL;
 	ListCell   *lc;
 
@@ -1824,7 +1825,8 @@ apply_handle_truncate(StringInfo s)
 		remote_rels = lappend(remote_rels, rel);
 		rels = lappend(rels, rel->localrel);
 		relids = lappend_oid(relids, rel->localreloid);
-		if (RelationIsLogicallyLogged(rel->localrel))
+		relids_extra = lappend_int(relids_extra, TRUNCATE_REL_CONTEXT_NORMAL);
+		if (RelationIsLogicallyLogged(rel->localrel) && rel->localrel->rd_rel->relkind != RELKIND_FOREIGN_TABLE)
 			relids_logged = lappend_oid(relids_logged, rel->localreloid);
 
 		/*
@@ -1862,8 +1864,9 @@ apply_handle_truncate(StringInfo s)
 				rels = lappend(rels, childrel);
 				part_rels = lappend(part_rels, childrel);
 				relids = lappend_oid(relids, childrelid);
+				relids_extra = lappend_int(relids_extra,TRUNCATE_REL_CONTEXT_CASCADING);
 				/* Log this relation only if needed for logical decoding */
-				if (RelationIsLogicallyLogged(childrel))
+				if (RelationIsLogicallyLogged(childrel) && childrel->rd_rel->relkind != RELKIND_FOREIGN_TABLE)
 					relids_logged = lappend_oid(relids_logged, childrelid);
 			}
 		}
@@ -1874,8 +1877,12 @@ apply_handle_truncate(StringInfo s)
 	 * to replaying changes without further cascading. This might be later
 	 * changeable with a user specified option.
 	 */
-	ExecuteTruncateGuts(rels, relids, relids_logged, DROP_RESTRICT, restart_seqs);
-
+	ExecuteTruncateGuts(rels,
+						relids,
+						relids_extra,
+						relids_logged,
+						DROP_RESTRICT,
+						restart_seqs);
 	foreach(lc, remote_rels)
 	{
 		LogicalRepRelMapEntry *rel = lfirst(lc);
diff --git a/src/include/commands/tablecmds.h b/src/include/commands/tablecmds.h
index b3d30acc35..57763ed734 100644
--- a/src/include/commands/tablecmds.h
+++ b/src/include/commands/tablecmds.h
@@ -55,9 +55,16 @@ extern void AlterRelationNamespaceInternal(Relation classRel, Oid relOid,
 
 extern void CheckTableNotInUse(Relation rel, const char *stmt);
 
+#define TRUNCATE_REL_CONTEXT_NORMAL       0x01
+#define TRUNCATE_REL_CONTEXT_ONLY         0x02
+#define TRUNCATE_REL_CONTEXT_CASCADING     0x04
 extern void ExecuteTruncate(TruncateStmt *stmt);
-extern void ExecuteTruncateGuts(List *explicit_rels, List *relids, List *relids_logged,
-								DropBehavior behavior, bool restart_seqs);
+extern void ExecuteTruncateGuts(List *explicit_rels,
+								List *relids,
+								List *relids_extra,
+								List *relids_logged,
+								DropBehavior behavior,
+								bool restart_seqs);
 
 extern void SetRelationHasSubclass(Oid relationId, bool relhassubclass);
 
diff --git a/src/include/foreign/fdwapi.h b/src/include/foreign/fdwapi.h
index 10d29ff292..205afe5ec3 100644
--- a/src/include/foreign/fdwapi.h
+++ b/src/include/foreign/fdwapi.h
@@ -160,6 +160,11 @@ typedef bool (*AnalyzeForeignTable_function) (Relation relation,
 typedef List *(*ImportForeignSchema_function) (ImportForeignSchemaStmt *stmt,
 											   Oid serverOid);
 
+typedef void (*ExecForeignTruncate_function) (List *frels_list,
+											  List *frels_extra,
+											  DropBehavior behavior,
+											  bool restart_seqs);
+
 typedef Size (*EstimateDSMForeignScan_function) (ForeignScanState *node,
 												 ParallelContext *pcxt);
 typedef void (*InitializeDSMForeignScan_function) (ForeignScanState *node,
@@ -255,6 +260,9 @@ typedef struct FdwRoutine
 	/* Support functions for IMPORT FOREIGN SCHEMA */
 	ImportForeignSchema_function ImportForeignSchema;
 
+	/* Support functions for TRUNCATE */
+	ExecForeignTruncate_function ExecForeignTruncate;
+
 	/* Support functions for parallelism under Gather node */
 	IsForeignScanParallelSafe_function IsForeignScanParallelSafe;
 	EstimateDSMForeignScan_function EstimateDSMForeignScan;
diff --git a/src/test/regress/expected/foreign_data.out b/src/test/regress/expected/foreign_data.out
index e4cdb780d0..5385f98a0f 100644
--- a/src/test/regress/expected/foreign_data.out
+++ b/src/test/regress/expected/foreign_data.out
@@ -1807,9 +1807,9 @@ Inherits: fd_pt1
 
 -- TRUNCATE doesn't work on foreign tables, either directly or recursively
 TRUNCATE ft2;  -- ERROR
-ERROR:  "ft2" is not a table
+ERROR:  foreign-data wrapper "dummy" has no handler
 TRUNCATE fd_pt1;  -- ERROR
-ERROR:  "ft2" is not a table
+ERROR:  foreign-data wrapper "dummy" has no handler
 DROP TABLE fd_pt1 CASCADE;
 NOTICE:  drop cascades to foreign table ft2
 -- IMPORT FOREIGN SCHEMA
@@ -2032,9 +2032,9 @@ ALTER FOREIGN TABLE fd_pt2_1 ADD CONSTRAINT fd_pt2chk1 CHECK (c1 > 0);
 ALTER TABLE fd_pt2 ATTACH PARTITION fd_pt2_1 FOR VALUES IN (1);
 -- TRUNCATE doesn't work on foreign tables, either directly or recursively
 TRUNCATE fd_pt2_1;  -- ERROR
-ERROR:  "fd_pt2_1" is not a table
+ERROR:  foreign-data wrapper "dummy" has no handler
 TRUNCATE fd_pt2;  -- ERROR
-ERROR:  "fd_pt2_1" is not a table
+ERROR:  foreign-data wrapper "dummy" has no handler
 DROP FOREIGN TABLE fd_pt2_1;
 DROP TABLE fd_pt2;
 -- foreign table cannot be part of partition tree made of temporary
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index 6a98064b2b..a173d604d6 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -704,6 +704,7 @@ ForeignScanState
 ForeignServer
 ForeignServerInfo
 ForeignTable
+ForeignTruncateInfo
 ForkNumber
 FormData_pg_aggregate
 FormData_pg_am
#47Bharath Rupireddy
bharath.rupireddyforpostgres@gmail.com
In reply to: Kazutaka Onishi (#46)
Re: TRUNCATE on foreign table

On Mon, Apr 5, 2021 at 7:38 PM Kazutaka Onishi <onishi@heterodb.com> wrote:

3) I think we need truncatable behaviour that is consistent with updatable.

It's not correct. "truncatable" option works the same as "updatable".
I've confirmed that the table can be truncated with the combination:
truncatable on the server setting is false & truncatable on the table
setting is true.
I've also added to the test.

Yeah you are right! I was wrong in understanding.

7) Did you try checking whether we reach hash_destroy code when a
failure happens on executing truncate on a remote table? Otherwise we
might want to do routine->ExecForeignTruncate inside PG_TRY block?

I've added PG_TRY block.

Did you check that hash_destroy is not reachable when an error occurs
on the remote server while executing truncate command? If yes, then
only we will have PG_TRY block, otherwise not.

9) It will be good if you can divide up your patch into 3 separate
patches - 0001 code, 0002 tests, 0003 documentation

I'll do it when I send a patch in the future, please forgive me on this patch.

That's okay.

With Regards,
Bharath Rupireddy.
EnterpriseDB: http://www.enterprisedb.com

#48Kazutaka Onishi
onishi@heterodb.com
In reply to: Bharath Rupireddy (#47)
Re: TRUNCATE on foreign table

Did you check that hash_destroy is not reachable when an error occurs
on the remote server while executing truncate command?

I've checked it and hash_destroy doesn't work on error.

I just adding elog() to check this:
+ elog(NOTICE,"destroyed");
+ hash_destroy(ft_htab);

Then I've checked by the test.

+ -- 'truncatable' option
+ ALTER SERVER loopback OPTIONS (ADD truncatable 'false');
+ TRUNCATE tru_ftable; -- error
+ ERROR:  truncate on "tru_ftable" is prohibited
<-   hash_destroy doesn't work.
+ ALTER FOREIGN TABLE tru_ftable OPTIONS (ADD truncatable 'true');
+ TRUNCATE tru_ftable; -- accepted
+ NOTICE:  destroyed     <-  hash_destroy works.

Of course, the elog() is not included in v13 patch.

2021年4月5日(月) 23:35 Bharath Rupireddy <bharath.rupireddyforpostgres@gmail.com>:

Show quoted text

On Mon, Apr 5, 2021 at 7:38 PM Kazutaka Onishi <onishi@heterodb.com> wrote:

3) I think we need truncatable behaviour that is consistent with updatable.

It's not correct. "truncatable" option works the same as "updatable".
I've confirmed that the table can be truncated with the combination:
truncatable on the server setting is false & truncatable on the table
setting is true.
I've also added to the test.

Yeah you are right! I was wrong in understanding.

7) Did you try checking whether we reach hash_destroy code when a
failure happens on executing truncate on a remote table? Otherwise we
might want to do routine->ExecForeignTruncate inside PG_TRY block?

I've added PG_TRY block.

Did you check that hash_destroy is not reachable when an error occurs
on the remote server while executing truncate command? If yes, then
only we will have PG_TRY block, otherwise not.

9) It will be good if you can divide up your patch into 3 separate
patches - 0001 code, 0002 tests, 0003 documentation

I'll do it when I send a patch in the future, please forgive me on this patch.

That's okay.

With Regards,
Bharath Rupireddy.
EnterpriseDB: http://www.enterprisedb.com

#49Bharath Rupireddy
bharath.rupireddyforpostgres@gmail.com
In reply to: Kazutaka Onishi (#48)
Re: TRUNCATE on foreign table

On Mon, Apr 5, 2021 at 8:47 PM Kazutaka Onishi <onishi@heterodb.com> wrote:

Did you check that hash_destroy is not reachable when an error occurs
on the remote server while executing truncate command?

I've checked it and hash_destroy doesn't work on error.

I just adding elog() to check this:
+ elog(NOTICE,"destroyed");
+ hash_destroy(ft_htab);

Then I've checked by the test.

+ -- 'truncatable' option
+ ALTER SERVER loopback OPTIONS (ADD truncatable 'false');
+ TRUNCATE tru_ftable; -- error
+ ERROR:  truncate on "tru_ftable" is prohibited
<-   hash_destroy doesn't work.
+ ALTER FOREIGN TABLE tru_ftable OPTIONS (ADD truncatable 'true');
+ TRUNCATE tru_ftable; -- accepted
+ NOTICE:  destroyed     <-  hash_destroy works.

Of course, the elog() is not included in v13 patch.

Few more comments on v13:

1) Are we using all of these macros? I see that we are setting them
but we only use TRUNCATE_REL_CONTEXT_ONLY. If not used, can we remove
them?
+#define TRUNCATE_REL_CONTEXT_NORMAL       0x01
+#define TRUNCATE_REL_CONTEXT_ONLY         0x02
+#define TRUNCATE_REL_CONTEXT_CASCADING     0x04

2) Why is this change for? The existing comment properly says the
behaviour i.e. all foreign tables are updatable by default.
@@ -2216,7 +2223,7 @@ postgresIsForeignRelUpdatable(Relation rel)
ListCell *lc;

     /*
-     * By default, all postgres_fdw foreign tables are assumed updatable. This
+     * By default, all postgres_fdw foreign tables are assumed NOT
truncatable. This
And the below comment is wrong, by default foreign tables are assumed
truncatable.
+     * By default, all postgres_fdw foreign tables are NOT assumed
truncatable. This
+     * can be overridden by a per-server setting, which in turn can be
+     * overridden by a per-table setting.
+     */
3) In the docs, let's not combine updatable and truncatable together.
Have a separate section for <title>Truncatability Options</title>, all
the documentation related to it be under this new section.
    <para>
     By default all foreign tables using
<filename>postgres_fdw</filename> are assumed
-    to be updatable.  This may be overridden using the following option:
+    to be updatable and truncatable.  This may be overridden using
the following options:
    </para>

4) I have a basic question: If I have a truncate statement with a mix
of local and foreign tables, IIUC, the patch is dividing up a single
truncate statement into two truncate local tables, truncate foreign
tables. Is this transaction safe at all?
A better illustration: TRUNCATE local_rel1, local_rel2, local_rel3,
foreign_rel1, foreign_rel2, foreign_rel3;
Your patch executes TRUNCATE local_rel1, local_rel2, local_rel3; on
local server and TRUNCATE foreign_rel1, foreign_rel2, foreign_rel3; on
remote server. Am I right?
Now the question is: if any failure occurs either in local server
execution or in remote server execution, the other truncate command
would succeed right? Isn't this non-transactional and we are breaking
the transactional guarantee of the truncation.
Looks like the order of execution is - first local rel truncation and
then foreign rel truncation, so what happens if foreign rel truncation
fails? Can we revert the local rel truncation?

6) Again v13 has white space errors, please ensure to run git diff
--check on every patch.
bharath@ubuntu:~/workspace/postgres$ git apply
/mnt/hgfs/Shared/pgsql14-truncate-on-foreign-table.v13.patch
/mnt/hgfs/Shared/pgsql14-truncate-on-foreign-table.v13.patch:41:
trailing whitespace.
/mnt/hgfs/Shared/pgsql14-truncate-on-foreign-table.v13.patch:47:
trailing whitespace.

warning: 2 lines add whitespace errors.
bharath@ubuntu:~/workspace/postgres$ git diff --check
contrib/postgres_fdw/deparse.c:2200: trailing whitespace.
+
contrib/postgres_fdw/deparse.c:2206: trailing whitespace.
+

With Regards,
Bharath Rupireddy.
EnterpriseDB: http://www.enterprisedb.com

#50Kazutaka Onishi
onishi@heterodb.com
In reply to: Bharath Rupireddy (#49)
1 attachment(s)
Re: TRUNCATE on foreign table

Thank you for checking v13, and here is v14 patch.

1) Are we using all of these macros? I see that we are setting them
but we only use TRUNCATE_REL_CONTEXT_ONLY. If not used, can we remove
them?

These may be needed for the foreign data handler other than postgres_fdw.

2) Why is this change for? The existing comment properly says the
behaviour i.e. all foreign tables are updatable by default.

This is just a mistake. I've fixed it.

3) In the docs, let's not combine updatable and truncatable together.
Have a separate section for <title>Truncatability Options</title>, all
the documentation related to it be under this new section.

Sure. I've added new section.

4) I have a basic question: If I have a truncate statement with a mix
of local and foreign tables, IIUC, the patch is dividing up a single
truncate statement into two truncate local tables, truncate foreign
tables. Is this transaction safe at all?

According to this discussion, we can revert both tables in the local
and the server.
/messages/by-id/CAOP8fzbuJ5GdKa+=GtizbqFtO2xsQbn4mVjjzunmsNVJMChSMQ@mail.gmail.com

6) Again v13 has white space errors, please ensure to run git diff
--check on every patch.

Umm.. I'm sure I've checked it on v13.
I've confirmed it on v14.

2021年4月6日(火) 13:33 Bharath Rupireddy <bharath.rupireddyforpostgres@gmail.com>:

Show quoted text

On Mon, Apr 5, 2021 at 8:47 PM Kazutaka Onishi <onishi@heterodb.com> wrote:

Did you check that hash_destroy is not reachable when an error occurs
on the remote server while executing truncate command?

I've checked it and hash_destroy doesn't work on error.

I just adding elog() to check this:
+ elog(NOTICE,"destroyed");
+ hash_destroy(ft_htab);

Then I've checked by the test.

+ -- 'truncatable' option
+ ALTER SERVER loopback OPTIONS (ADD truncatable 'false');
+ TRUNCATE tru_ftable; -- error
+ ERROR:  truncate on "tru_ftable" is prohibited
<-   hash_destroy doesn't work.
+ ALTER FOREIGN TABLE tru_ftable OPTIONS (ADD truncatable 'true');
+ TRUNCATE tru_ftable; -- accepted
+ NOTICE:  destroyed     <-  hash_destroy works.

Of course, the elog() is not included in v13 patch.

Few more comments on v13:

1) Are we using all of these macros? I see that we are setting them
but we only use TRUNCATE_REL_CONTEXT_ONLY. If not used, can we remove
them?
+#define TRUNCATE_REL_CONTEXT_NORMAL       0x01
+#define TRUNCATE_REL_CONTEXT_ONLY         0x02
+#define TRUNCATE_REL_CONTEXT_CASCADING     0x04

2) Why is this change for? The existing comment properly says the
behaviour i.e. all foreign tables are updatable by default.
@@ -2216,7 +2223,7 @@ postgresIsForeignRelUpdatable(Relation rel)
ListCell *lc;

/*
-     * By default, all postgres_fdw foreign tables are assumed updatable. This
+     * By default, all postgres_fdw foreign tables are assumed NOT
truncatable. This
And the below comment is wrong, by default foreign tables are assumed
truncatable.
+     * By default, all postgres_fdw foreign tables are NOT assumed
truncatable. This
+     * can be overridden by a per-server setting, which in turn can be
+     * overridden by a per-table setting.
+     */
3) In the docs, let's not combine updatable and truncatable together.
Have a separate section for <title>Truncatability Options</title>, all
the documentation related to it be under this new section.
<para>
By default all foreign tables using
<filename>postgres_fdw</filename> are assumed
-    to be updatable.  This may be overridden using the following option:
+    to be updatable and truncatable.  This may be overridden using
the following options:
</para>

4) I have a basic question: If I have a truncate statement with a mix
of local and foreign tables, IIUC, the patch is dividing up a single
truncate statement into two truncate local tables, truncate foreign
tables. Is this transaction safe at all?
A better illustration: TRUNCATE local_rel1, local_rel2, local_rel3,
foreign_rel1, foreign_rel2, foreign_rel3;
Your patch executes TRUNCATE local_rel1, local_rel2, local_rel3; on
local server and TRUNCATE foreign_rel1, foreign_rel2, foreign_rel3; on
remote server. Am I right?
Now the question is: if any failure occurs either in local server
execution or in remote server execution, the other truncate command
would succeed right? Isn't this non-transactional and we are breaking
the transactional guarantee of the truncation.
Looks like the order of execution is - first local rel truncation and
then foreign rel truncation, so what happens if foreign rel truncation
fails? Can we revert the local rel truncation?

6) Again v13 has white space errors, please ensure to run git diff
--check on every patch.
bharath@ubuntu:~/workspace/postgres$ git apply
/mnt/hgfs/Shared/pgsql14-truncate-on-foreign-table.v13.patch
/mnt/hgfs/Shared/pgsql14-truncate-on-foreign-table.v13.patch:41:
trailing whitespace.
/mnt/hgfs/Shared/pgsql14-truncate-on-foreign-table.v13.patch:47:
trailing whitespace.

warning: 2 lines add whitespace errors.
bharath@ubuntu:~/workspace/postgres$ git diff --check
contrib/postgres_fdw/deparse.c:2200: trailing whitespace.
+
contrib/postgres_fdw/deparse.c:2206: trailing whitespace.
+

With Regards,
Bharath Rupireddy.
EnterpriseDB: http://www.enterprisedb.com

Attachments:

pgsql14-truncate-on-foreign-table.v14.patchapplication/octet-stream; name=pgsql14-truncate-on-foreign-table.v14.patchDownload
diff --git a/contrib/postgres_fdw/deparse.c b/contrib/postgres_fdw/deparse.c
index 5aa3455e30..b9e4e471c1 100644
--- a/contrib/postgres_fdw/deparse.c
+++ b/contrib/postgres_fdw/deparse.c
@@ -56,6 +56,7 @@
 #include "utils/rel.h"
 #include "utils/syscache.h"
 #include "utils/typcache.h"
+#include "commands/tablecmds.h"
 
 /*
  * Global context for foreign_expr_walker's search of an expression tree.
@@ -2172,6 +2173,43 @@ deparseAnalyzeSql(StringInfo buf, Relation rel, List **retrieved_attrs)
 	deparseRelation(buf, rel);
 }
 
+/*
+ * Construct a simple "TRUNCATE rel" statement
+ */
+void
+deparseTruncateSql(StringInfo buf,
+				   List *frels_list,
+				   List *frels_extra,
+				   DropBehavior behavior,
+				   bool restart_seqs)
+{
+	ListCell   *lc1, *lc2;
+
+	appendStringInfoString(buf, "TRUNCATE ");
+
+	forboth (lc1, frels_list,
+			 lc2, frels_extra)
+	{
+		Relation	frel = lfirst(lc1);
+		int			extra = lfirst_int(lc2);
+
+		if (lc1 != list_head(frels_list))
+			appendStringInfoString(buf, ", ");
+		if (extra & TRUNCATE_REL_CONTEXT_ONLY)
+			appendStringInfoString(buf, "ONLY ");
+
+		deparseRelation(buf, frel);
+	}
+
+	appendStringInfo(buf, " %s IDENTITY",
+					 restart_seqs ? "RESTART" : "CONTINUE");
+
+	if (behavior==DROP_RESTRICT)
+		appendStringInfoString(buf, " RESTRICT");
+	else if(behavior==DROP_CASCADE)
+		appendStringInfoString(buf, " CASCADE");
+}
+
 /*
  * Construct name to use for given column, and emit it into buf.
  * If it has a column_name FDW option, use that instead of attribute name.
diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out
index 59e4e27ffb..b1388a799c 100644
--- a/contrib/postgres_fdw/expected/postgres_fdw.out
+++ b/contrib/postgres_fdw/expected/postgres_fdw.out
@@ -8215,6 +8215,205 @@ select * from rem3;
 drop foreign table rem3;
 drop table loc3;
 -- ===================================================================
+-- test for TRUNCATE
+-- ===================================================================
+CREATE TABLE tru_rtable0 (id int primary key);
+CREATE TABLE tru_rtable1 (id int primary key);
+CREATE FOREIGN TABLE tru_ftable (id int)
+       SERVER loopback OPTIONS (table_name 'tru_rtable0');
+INSERT INTO tru_rtable0 (SELECT x FROM generate_series(1,10) x);
+CREATE TABLE tru_ptable (id int) PARTITION BY HASH(id);
+CREATE TABLE tru_ptable__p0 PARTITION OF tru_ptable
+                            FOR VALUES WITH (MODULUS 2, REMAINDER 0);
+CREATE FOREIGN TABLE tru_ftable__p1 PARTITION OF tru_ptable
+                                    FOR VALUES WITH (MODULUS 2, REMAINDER 1)
+       SERVER loopback OPTIONS (table_name 'tru_rtable1');
+INSERT INTO tru_ptable (SELECT x FROM generate_series(11,20) x);
+CREATE TABLE tru_pk_table(id int primary key);
+CREATE TABLE tru_fk_table(fkey int references tru_pk_table(id));
+INSERT INTO tru_pk_table (SELECT x FROM generate_series(1,10) x);
+INSERT INTO tru_fk_table (SELECT x % 10 + 1 FROM generate_series(5,25) x);
+CREATE FOREIGN TABLE tru_pk_ftable (id int)
+       SERVER loopback OPTIONS (table_name 'tru_pk_table');
+CREATE TABLE tru_rtable_parent (id int);
+CREATE TABLE tru_rtable_child (id int);
+CREATE FOREIGN TABLE tru_ftable_parent (id int)
+       SERVER loopback OPTIONS (table_name 'tru_rtable_parent');
+CREATE FOREIGN TABLE tru_ftable_child () INHERITS (tru_ftable_parent)
+       SERVER loopback OPTIONS (table_name 'tru_rtable_child');
+INSERT INTO tru_rtable_parent (SELECT x FROM generate_series(1,8) x);
+INSERT INTO tru_rtable_child  (SELECT x FROM generate_series(10, 18) x);
+-- normal truncate
+SELECT sum(id) FROM tru_ftable;        -- 55
+ sum 
+-----
+  55
+(1 row)
+
+TRUNCATE tru_ftable;
+SELECT count(*) FROM tru_rtable0;		-- 0
+ count 
+-------
+     0
+(1 row)
+
+SELECT count(*) FROM tru_ftable;		-- 0
+ count 
+-------
+     0
+(1 row)
+
+-- 'truncatable' option
+ALTER SERVER loopback OPTIONS (ADD truncatable 'false');
+TRUNCATE tru_ftable;			-- error
+ERROR:  truncate on "tru_ftable" is prohibited
+ALTER FOREIGN TABLE tru_ftable OPTIONS (ADD truncatable 'true');
+TRUNCATE tru_ftable;			-- accepted
+ALTER FOREIGN TABLE tru_ftable OPTIONS (SET truncatable 'false');
+TRUNCATE tru_ftable;			-- error
+ERROR:  truncate on "tru_ftable" is prohibited
+ALTER SERVER loopback OPTIONS (DROP truncatable);
+ALTER FOREIGN TABLE tru_ftable OPTIONS (SET truncatable 'false');
+TRUNCATE tru_ftable;			-- error
+ERROR:  truncate on "tru_ftable" is prohibited
+ALTER FOREIGN TABLE tru_ftable OPTIONS (SET truncatable 'true');
+TRUNCATE tru_ftable;			-- accepted
+-- partition table mixtured by table and foreign table
+SELECT sum(id) FROM tru_ptable;        -- 155
+ sum 
+-----
+ 155
+(1 row)
+
+TRUNCATE tru_ptable;
+SELECT count(*) FROM tru_ptable;		-- 0
+ count 
+-------
+     0
+(1 row)
+
+SELECT count(*) FROM tru_ptable__p0;	-- 0
+ count 
+-------
+     0
+(1 row)
+
+SELECT count(*) FROM tru_ftable__p1;	-- 0
+ count 
+-------
+     0
+(1 row)
+
+SELECT count(*) FROM tru_rtable1;		-- 0
+ count 
+-------
+     0
+(1 row)
+
+-- 'CASCADE' option
+SELECT sum(id) FROM tru_pk_ftable;      -- 55
+ sum 
+-----
+  55
+(1 row)
+
+TRUNCATE tru_pk_ftable;	-- failed by FK reference
+ERROR:  cannot truncate a table referenced in a foreign key constraint
+DETAIL:  Table "tru_fk_table" references "tru_pk_table".
+HINT:  Truncate table "tru_fk_table" at the same time, or use TRUNCATE ... CASCADE.
+CONTEXT:  remote SQL command: TRUNCATE public.tru_pk_table CONTINUE IDENTITY RESTRICT
+TRUNCATE tru_pk_ftable CASCADE;
+SELECT count(*) FROM tru_pk_ftable;    -- 0
+ count 
+-------
+     0
+(1 row)
+
+SELECT count(*) FROM tru_fk_table;		-- also truncated,0
+ count 
+-------
+     0
+(1 row)
+
+-- truncate two tables at a command
+INSERT INTO tru_ftable (SELECT x FROM generate_series(1,8) x);
+INSERT INTO tru_pk_ftable (SELECT x FROM generate_series(3,10) x);
+SELECT count(*) from tru_ftable; -- 8
+ count 
+-------
+     8
+(1 row)
+
+SELECT count(*) from tru_pk_ftable; -- 8
+ count 
+-------
+     8
+(1 row)
+
+TRUNCATE tru_ftable, tru_pk_ftable CASCADE;
+SELECT count(*) from tru_ftable; -- 0
+ count 
+-------
+     0
+(1 row)
+
+SELECT count(*) from tru_pk_ftable; -- 0
+ count 
+-------
+     0
+(1 row)
+
+-- truncate with ONLY clause
+TRUNCATE ONLY tru_ftable_parent;
+SELECT sum(id) FROM tru_ftable_parent;  -- 126
+ sum 
+-----
+ 126
+(1 row)
+
+TRUNCATE tru_ftable_parent;
+SELECT count(*) FROM tru_ftable_parent; -- 0
+ count 
+-------
+     0
+(1 row)
+
+-- in case when remote table has inherited children
+CREATE TABLE tru_rtable0_child () INHERITS (tru_rtable0);
+INSERT INTO tru_rtable0 (SELECT x FROM generate_series(5,9) x);
+INSERT INTO tru_rtable0_child (SELECT x FROM generate_series(10,14) x);
+SELECT sum(id) FROM tru_ftable;   -- 95
+ sum 
+-----
+  95
+(1 row)
+
+TRUNCATE ONLY tru_ftable;		-- truncate only parent portion
+SELECT sum(id) FROM tru_ftable;   -- 60
+ sum 
+-----
+  60
+(1 row)
+
+INSERT INTO tru_rtable0 (SELECT x FROM generate_series(21,25) x);
+SELECT sum(id) FROM tru_ftable;		-- 175
+ sum 
+-----
+ 175
+(1 row)
+
+TRUNCATE tru_ftable;			-- truncate both of parent and child
+SELECT count(*) FROM tru_ftable;    -- empty
+ count 
+-------
+     0
+(1 row)
+
+-- cleanup
+DROP FOREIGN TABLE tru_ftable_parent, tru_ftable_child, tru_pk_ftable,tru_ftable__p1,tru_ftable;
+DROP TABLE tru_rtable0, tru_rtable1, tru_ptable, tru_ptable__p0, tru_pk_table, tru_fk_table,
+tru_rtable_parent,tru_rtable_child, tru_rtable0_child;
+-- ===================================================================
 -- test IMPORT FOREIGN SCHEMA
 -- ===================================================================
 CREATE SCHEMA import_source;
@@ -8913,7 +9112,7 @@ DO $d$
     END;
 $d$;
 ERROR:  invalid option "password"
-HINT:  Valid options in this context are: service, passfile, channel_binding, connect_timeout, dbname, host, hostaddr, port, options, application_name, keepalives, keepalives_idle, keepalives_interval, keepalives_count, tcp_user_timeout, sslmode, sslcompression, sslcert, sslkey, sslrootcert, sslcrl, sslcrldir, requirepeer, ssl_min_protocol_version, ssl_max_protocol_version, gssencmode, krbsrvname, gsslib, target_session_attrs, use_remote_estimate, fdw_startup_cost, fdw_tuple_cost, extensions, updatable, fetch_size, batch_size, async_capable, keep_connections
+HINT:  Valid options in this context are: service, passfile, channel_binding, connect_timeout, dbname, host, hostaddr, port, options, application_name, keepalives, keepalives_idle, keepalives_interval, keepalives_count, tcp_user_timeout, sslmode, sslcompression, sslcert, sslkey, sslrootcert, sslcrl, sslcrldir, requirepeer, ssl_min_protocol_version, ssl_max_protocol_version, gssencmode, krbsrvname, gsslib, target_session_attrs, use_remote_estimate, fdw_startup_cost, fdw_tuple_cost, extensions, updatable, truncatable, fetch_size, batch_size, async_capable, keep_connections
 CONTEXT:  SQL statement "ALTER SERVER loopback_nopw OPTIONS (ADD password 'dummypw')"
 PL/pgSQL function inline_code_block line 3 at EXECUTE
 -- If we add a password for our user mapping instead, we should get a different
diff --git a/contrib/postgres_fdw/option.c b/contrib/postgres_fdw/option.c
index f1d0c8bd41..fc196fabc0 100644
--- a/contrib/postgres_fdw/option.c
+++ b/contrib/postgres_fdw/option.c
@@ -109,6 +109,7 @@ postgres_fdw_validator(PG_FUNCTION_ARGS)
 		if (strcmp(def->defname, "use_remote_estimate") == 0 ||
 			strcmp(def->defname, "updatable") == 0 ||
 			strcmp(def->defname, "async_capable") == 0 ||
+			strcmp(def->defname, "truncatable") == 0 ||
 			strcmp(def->defname, "keep_connections") == 0)
 		{
 			/* these accept only boolean values */
@@ -213,6 +214,9 @@ InitPgFdwOptions(void)
 		/* updatable is available on both server and table */
 		{"updatable", ForeignServerRelationId, false},
 		{"updatable", ForeignTableRelationId, false},
+		/* truncatable is available on both server and table */
+		{"truncatable", ForeignServerRelationId, false},
+		{"truncatable", ForeignTableRelationId, false},
 		/* fetch_size is available on both server and table */
 		{"fetch_size", ForeignServerRelationId, false},
 		{"fetch_size", ForeignTableRelationId, false},
diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c
index 16c2979f2d..74c419fb15 100644
--- a/contrib/postgres_fdw/postgres_fdw.c
+++ b/contrib/postgres_fdw/postgres_fdw.c
@@ -400,6 +400,10 @@ static void postgresExplainForeignModify(ModifyTableState *mtstate,
 										 ExplainState *es);
 static void postgresExplainDirectModify(ForeignScanState *node,
 										ExplainState *es);
+static void postgresExecForeignTruncate(List *frels_list,
+										List *frels_extra,
+										DropBehavior behavior,
+										bool restart_seqs);
 static bool postgresAnalyzeForeignTable(Relation relation,
 										AcquireSampleRowsFunc *func,
 										BlockNumber *totalpages);
@@ -588,6 +592,9 @@ postgres_fdw_handler(PG_FUNCTION_ARGS)
 	routine->ExplainForeignModify = postgresExplainForeignModify;
 	routine->ExplainDirectModify = postgresExplainDirectModify;
 
+	/* Support function for TRUNCATE */
+	routine->ExecForeignTruncate = postgresExecForeignTruncate;
+
 	/* Support functions for ANALYZE */
 	routine->AnalyzeForeignTable = postgresAnalyzeForeignTable;
 
@@ -2868,6 +2875,98 @@ postgresExplainDirectModify(ForeignScanState *node, ExplainState *es)
 	}
 }
 
+/*
+ * postgresExecForeignTruncate
+ *		It propagates TRUNCATE command to the remote host inside of the
+ *		transaction block.
+ */
+static void
+postgresExecForeignTruncate(List *frels_list,
+							List *frels_extra,
+							DropBehavior behavior,
+							bool restart_seqs)
+{
+	Oid				server_id = InvalidOid;
+	ForeignServer  *serv = NULL;
+	UserMapping	   *user = NULL;
+	PGconn		   *conn = NULL;
+	PGresult	   *res;
+	StringInfoData	sql;
+	ListCell	   *lc;
+	bool			server_truncatable = true;
+
+	/*
+	 * By default, all postgres_fdw foreign tables are assumed truncatable. This
+	 * can be overridden by a per-server setting, which in turn can be
+	 * overridden by a per-table setting.
+	 */
+	foreach (lc, frels_list)
+	{
+		Relation	frel = lfirst(lc);
+		Oid			frel_oid = RelationGetRelid(frel);
+		ForeignTable *ft = GetForeignTable(frel_oid);
+		ListCell   *cell;
+		bool		truncatable;
+
+		if (!OidIsValid(server_id))
+		{
+			/* This is executed only once per server */
+			server_id = GetForeignServerIdByRelId(frel_oid);
+			serv = GetForeignServer(server_id);
+
+			foreach (cell, serv->options)
+			{
+				DefElem	   *defel = (DefElem *) lfirst(cell);
+
+				if (strcmp(defel->defname, "truncatable") == 0)
+				{
+					server_truncatable = defGetBoolean(defel);
+					break;
+				}
+			}
+		}
+
+		/* ensure the target foreign table is truncatable */
+		truncatable = server_truncatable;
+		foreach (cell, ft->options)
+		{
+			DefElem	   *defel = (DefElem *) lfirst(cell);
+
+			if (strcmp(defel->defname, "truncatable") == 0)
+			{
+				truncatable = defGetBoolean(defel);
+				break;
+			}
+		}
+
+		if (!truncatable)
+			ereport(ERROR,
+					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+					 errmsg("truncate on \"%s\" is prohibited",
+							RelationGetRelationName(frel))));
+	}
+
+	Assert(OidIsValid(server_id));
+	user = GetUserMapping(GetUserId(), server_id);
+	conn = GetConnection(user, false, NULL);
+
+	/* set up remote query */
+	initStringInfo(&sql);
+	deparseTruncateSql(&sql, frels_list, frels_extra, behavior, restart_seqs);
+
+	/* run remote query */
+	if (!PQsendQuery(conn, sql.data))
+		pgfdw_report_error(ERROR, NULL, conn, false, sql.data);
+
+	res = pgfdw_get_result(conn, sql.data);
+
+	if (PQresultStatus(res) != PGRES_COMMAND_OK)
+		pgfdw_report_error(ERROR, res, conn, true, sql.data);
+
+	/* clean-up */
+	PQclear(res);
+	pfree(sql.data);
+}
 
 /*
  * estimate_path_cost_size
diff --git a/contrib/postgres_fdw/postgres_fdw.h b/contrib/postgres_fdw/postgres_fdw.h
index 88d94da6f6..68599892f8 100644
--- a/contrib/postgres_fdw/postgres_fdw.h
+++ b/contrib/postgres_fdw/postgres_fdw.h
@@ -206,6 +206,11 @@ extern void deparseDirectDeleteSql(StringInfo buf, PlannerInfo *root,
 extern void deparseAnalyzeSizeSql(StringInfo buf, Relation rel);
 extern void deparseAnalyzeSql(StringInfo buf, Relation rel,
 							  List **retrieved_attrs);
+extern void deparseTruncateSql(StringInfo buf,
+							   List *frels_list,
+							   List *frels_extra,
+							   DropBehavior behavior,
+							   bool restart_seqs);
 extern void deparseStringLiteral(StringInfo buf, const char *val);
 extern Expr *find_em_expr_for_rel(EquivalenceClass *ec, RelOptInfo *rel);
 extern Expr *find_em_expr_for_input_target(PlannerInfo *root,
diff --git a/contrib/postgres_fdw/sql/postgres_fdw.sql b/contrib/postgres_fdw/sql/postgres_fdw.sql
index 107d1c0e03..eb5e7e3bd6 100644
--- a/contrib/postgres_fdw/sql/postgres_fdw.sql
+++ b/contrib/postgres_fdw/sql/postgres_fdw.sql
@@ -2351,6 +2351,107 @@ select * from rem3;
 drop foreign table rem3;
 drop table loc3;
 
+-- ===================================================================
+-- test for TRUNCATE
+-- ===================================================================
+CREATE TABLE tru_rtable0 (id int primary key);
+CREATE TABLE tru_rtable1 (id int primary key);
+CREATE FOREIGN TABLE tru_ftable (id int)
+       SERVER loopback OPTIONS (table_name 'tru_rtable0');
+INSERT INTO tru_rtable0 (SELECT x FROM generate_series(1,10) x);
+
+CREATE TABLE tru_ptable (id int) PARTITION BY HASH(id);
+CREATE TABLE tru_ptable__p0 PARTITION OF tru_ptable
+                            FOR VALUES WITH (MODULUS 2, REMAINDER 0);
+CREATE FOREIGN TABLE tru_ftable__p1 PARTITION OF tru_ptable
+                                    FOR VALUES WITH (MODULUS 2, REMAINDER 1)
+       SERVER loopback OPTIONS (table_name 'tru_rtable1');
+INSERT INTO tru_ptable (SELECT x FROM generate_series(11,20) x);
+
+CREATE TABLE tru_pk_table(id int primary key);
+CREATE TABLE tru_fk_table(fkey int references tru_pk_table(id));
+INSERT INTO tru_pk_table (SELECT x FROM generate_series(1,10) x);
+INSERT INTO tru_fk_table (SELECT x % 10 + 1 FROM generate_series(5,25) x);
+CREATE FOREIGN TABLE tru_pk_ftable (id int)
+       SERVER loopback OPTIONS (table_name 'tru_pk_table');
+
+CREATE TABLE tru_rtable_parent (id int);
+CREATE TABLE tru_rtable_child (id int);
+CREATE FOREIGN TABLE tru_ftable_parent (id int)
+       SERVER loopback OPTIONS (table_name 'tru_rtable_parent');
+CREATE FOREIGN TABLE tru_ftable_child () INHERITS (tru_ftable_parent)
+       SERVER loopback OPTIONS (table_name 'tru_rtable_child');
+INSERT INTO tru_rtable_parent (SELECT x FROM generate_series(1,8) x);
+INSERT INTO tru_rtable_child  (SELECT x FROM generate_series(10, 18) x);
+
+-- normal truncate
+SELECT sum(id) FROM tru_ftable;        -- 55
+TRUNCATE tru_ftable;
+SELECT count(*) FROM tru_rtable0;		-- 0
+SELECT count(*) FROM tru_ftable;		-- 0
+
+-- 'truncatable' option
+ALTER SERVER loopback OPTIONS (ADD truncatable 'false');
+TRUNCATE tru_ftable;			-- error
+ALTER FOREIGN TABLE tru_ftable OPTIONS (ADD truncatable 'true');
+TRUNCATE tru_ftable;			-- accepted
+ALTER FOREIGN TABLE tru_ftable OPTIONS (SET truncatable 'false');
+TRUNCATE tru_ftable;			-- error
+ALTER SERVER loopback OPTIONS (DROP truncatable);
+ALTER FOREIGN TABLE tru_ftable OPTIONS (SET truncatable 'false');
+TRUNCATE tru_ftable;			-- error
+ALTER FOREIGN TABLE tru_ftable OPTIONS (SET truncatable 'true');
+TRUNCATE tru_ftable;			-- accepted
+
+-- partition table mixtured by table and foreign table
+SELECT sum(id) FROM tru_ptable;        -- 155
+TRUNCATE tru_ptable;
+SELECT count(*) FROM tru_ptable;		-- 0
+SELECT count(*) FROM tru_ptable__p0;	-- 0
+SELECT count(*) FROM tru_ftable__p1;	-- 0
+SELECT count(*) FROM tru_rtable1;		-- 0
+
+-- 'CASCADE' option
+SELECT sum(id) FROM tru_pk_ftable;      -- 55
+TRUNCATE tru_pk_ftable;	-- failed by FK reference
+TRUNCATE tru_pk_ftable CASCADE;
+SELECT count(*) FROM tru_pk_ftable;    -- 0
+SELECT count(*) FROM tru_fk_table;		-- also truncated,0
+
+-- truncate two tables at a command
+INSERT INTO tru_ftable (SELECT x FROM generate_series(1,8) x);
+INSERT INTO tru_pk_ftable (SELECT x FROM generate_series(3,10) x);
+SELECT count(*) from tru_ftable; -- 8
+SELECT count(*) from tru_pk_ftable; -- 8
+TRUNCATE tru_ftable, tru_pk_ftable CASCADE;
+SELECT count(*) from tru_ftable; -- 0
+SELECT count(*) from tru_pk_ftable; -- 0
+
+-- truncate with ONLY clause
+TRUNCATE ONLY tru_ftable_parent;
+SELECT sum(id) FROM tru_ftable_parent;  -- 126
+TRUNCATE tru_ftable_parent;
+SELECT count(*) FROM tru_ftable_parent; -- 0
+
+-- in case when remote table has inherited children
+CREATE TABLE tru_rtable0_child () INHERITS (tru_rtable0);
+INSERT INTO tru_rtable0 (SELECT x FROM generate_series(5,9) x);
+INSERT INTO tru_rtable0_child (SELECT x FROM generate_series(10,14) x);
+SELECT sum(id) FROM tru_ftable;   -- 95
+
+TRUNCATE ONLY tru_ftable;		-- truncate only parent portion
+SELECT sum(id) FROM tru_ftable;   -- 60
+
+INSERT INTO tru_rtable0 (SELECT x FROM generate_series(21,25) x);
+SELECT sum(id) FROM tru_ftable;		-- 175
+TRUNCATE tru_ftable;			-- truncate both of parent and child
+SELECT count(*) FROM tru_ftable;    -- empty
+
+-- cleanup
+DROP FOREIGN TABLE tru_ftable_parent, tru_ftable_child, tru_pk_ftable,tru_ftable__p1,tru_ftable;
+DROP TABLE tru_rtable0, tru_rtable1, tru_ptable, tru_ptable__p0, tru_pk_table, tru_fk_table,
+tru_rtable_parent,tru_rtable_child, tru_rtable0_child;
+
 -- ===================================================================
 -- test IMPORT FOREIGN SCHEMA
 -- ===================================================================
diff --git a/doc/src/sgml/fdwhandler.sgml b/doc/src/sgml/fdwhandler.sgml
index 0f2397df49..66605b7361 100644
--- a/doc/src/sgml/fdwhandler.sgml
+++ b/doc/src/sgml/fdwhandler.sgml
@@ -1065,6 +1065,50 @@ EndDirectModify(ForeignScanState *node);
 
    </sect2>
 
+   <sect2 id="fdw-callbacks-truncate">
+    <title>FDW Routines for Truncate</title>
+<programlisting>
+void
+ExecForeignTruncate(List *frels_list, List *frels_extra,
+                    DropBehavior behavior, bool restart_seqs);
+</programlisting>
+    <para>
+     Truncate a set of foreign tables defined by
+     <literal>frels_list</literal> belonging to the same foreign server.
+     This optional function is called during execution of
+     <command>TRUNCATE</command> for each foreign server involved
+     in one <command>TRUNCATE</command> command (note that invocations
+     are not per foreign table).
+
+     <literal>frels_extra</literal> delivers extra information about
+     the context where the foreign tables are truncated. It is a list of integers and has same length with
+     <literal>frels_list</literal>. <literal>TRUNCATE_REL_CONTEXT_NORMAL</literal> means that
+     the foreign table is specified WITHOUT "ONLY" clause, and <literal>TRUNCATE_REL_CONTEXT_ONLY</literal> means
+     specified WITH "ONLY" clause. <literal>TRUNCATE_REL_CONTEXT_CASCADING</literal> 
+     values means that foreign tables are not specified in the <command>TRUNCATE</command>, 
+     but truncated due to dependency(like partition's child leaf).
+    </para>
+
+    <para>
+     If the <function>ExecForeignTruncate</function> pointer is set to
+     <literal>NULL</literal>, attempts to truncate the foreign table will
+     fail with an error message.
+    </para>
+    
+    <para>
+     <literal>behavior</literal> defines how foreign tables should
+     be truncated, using as possible values <literal>DROP_RESTRICT</literal>
+     and <literal>DROP_CASCADE</literal> (to map with the equivalents of
+     <command>TRUNCATE</command>).
+    </para>
+    
+    <para>
+     <literal>restart_seqs</literal> is set to <literal>true</literal>
+     if <literal>RESTART IDENTITY</literal> was supplied in the
+     <command>TRUNCATE</command>.
+    </para>
+   </sect2>
+
    <sect2 id="fdw-callbacks-row-locking">
     <title>FDW Routines for Row Locking</title>
 
diff --git a/doc/src/sgml/postgres-fdw.sgml b/doc/src/sgml/postgres-fdw.sgml
index a7c695b000..7bdf85d4c8 100644
--- a/doc/src/sgml/postgres-fdw.sgml
+++ b/doc/src/sgml/postgres-fdw.sgml
@@ -63,9 +63,10 @@
  <para>
   Now you need only <command>SELECT</command> from a foreign table to access
   the data stored in its underlying remote table.  You can also modify
-  the remote table using <command>INSERT</command>, <command>UPDATE</command>, or
-  <command>DELETE</command>.  (Of course, the remote user you have specified
-  in your user mapping must have privileges to do these things.)
+  the remote table using <command>INSERT</command>, <command>UPDATE</command>,
+  <command>DELETE</command>, or <command>TRUNCATE</command>.
+  (Of course, the remote user you have specified in your user mapping must
+  have privileges to do these things.)
  </para>
 
  <para>
@@ -436,6 +437,41 @@ OPTIONS (ADD password_required 'false');
    </variablelist>
   </sect3>
 
+  <sect3>
+    <title>Truncatability Options</title>
+
+    <para>
+     By default all foreign tables using <filename>postgres_fdw</filename> are assumed
+     to be updatable.  This may be overridden using the following option:
+    </para>
+
+    <variablelist>
+      <varlistentry>
+      <term><literal>truncatable</literal></term>
+      <listitem>
+        <para>
+        This option controls whether <filename>postgres_fdw</filename> allows
+        foreign tables to be truncated using <command>TRUNCATE</command>
+        command. It can be specified for a foreign table or a foreign server.
+        A table-level option overrides a server-level option.
+        The default is <literal>true</literal>.
+        </para>
+        <para>
+        Pay attention for the case when a foreign table maps remote table
+        that has inherited children or partition leafs.
+        <command>TRUNCATE</command> specifies the foreign tables with
+        <literal>ONLY</literal> clause, remove queries over the
+        <filename>postgres_fdw</filename> also specify remote tables with
+        <literal>ONLY</literal> clause, that will truncate only parent
+        portion of the remote table. In the results, it looks like
+        <command>TRUNCATE</command> command partially eliminated contents
+        of the foreign tables.
+        </para>
+      </listitem>
+      </varlistentry>
+    </variablelist>
+  </sect3>
+
   <sect3>
    <title>Importing Options</title>
 
diff --git a/doc/src/sgml/ref/truncate.sgml b/doc/src/sgml/ref/truncate.sgml
index 91cdac5562..7899106ba1 100644
--- a/doc/src/sgml/ref/truncate.sgml
+++ b/doc/src/sgml/ref/truncate.sgml
@@ -172,9 +172,8 @@ TRUNCATE [ TABLE ] [ ONLY ] <replaceable class="parameter">name</replaceable> [
   </para>
 
   <para>
-   <command>TRUNCATE</command> is not currently supported for foreign tables.
-   This implies that if a specified table has any descendant tables that are
-   foreign, the command will fail.
+    <command>TRUNCATE</command> can be used for foreign tables if
+     the foreign data wrapper supports, for instance, see <xref linkend="postgres-fdw"/>.
   </para>
  </refsect1>
 
diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c
index 88a68a4697..65497c40eb 100644
--- a/src/backend/commands/tablecmds.c
+++ b/src/backend/commands/tablecmds.c
@@ -59,6 +59,7 @@
 #include "commands/typecmds.h"
 #include "commands/user.h"
 #include "executor/executor.h"
+#include "foreign/fdwapi.h"
 #include "foreign/foreign.h"
 #include "miscadmin.h"
 #include "nodes/makefuncs.h"
@@ -310,6 +311,21 @@ struct DropRelationCallbackState
 #define		ATT_FOREIGN_TABLE		0x0020
 #define		ATT_PARTITIONED_INDEX	0x0040
 
+/*
+ * ForeignTruncateInfo
+ *
+ * Information related to truncation of foreign tables.  This is used for
+ * the elements in a hash table. It uses the server OID as lookup key,
+ * and includes a per-server list of all foreign tables involved in the
+ * truncation.
+ */
+typedef struct ForeignTruncateInfo
+{
+	Oid			server_oid;
+	List	   *frels_list;
+	List	   *frels_extra;
+} ForeignTruncateInfo;
+
 /*
  * Partition tables are expected to be dropped when the parent partitioned
  * table gets dropped. Hence for partitioning we use AUTO dependency.
@@ -1588,18 +1604,21 @@ RangeVarCallbackForDropRelation(const RangeVar *rel, Oid relOid, Oid oldRelOid,
  *		Executes a TRUNCATE command.
  *
  * This is a multi-relation truncate.  We first open and grab exclusive
- * lock on all relations involved, checking permissions and otherwise
- * verifying that the relation is OK for truncation.  In CASCADE mode,
+ * lock on all relations involved, checking permissions (local database
+ * ACLs even if relations are foreign tables) and otherwise verifying
+ * that the relation is OK for truncation. In CASCADE mode,
  * relations having FK references to the targeted relations are automatically
  * added to the group; in RESTRICT mode, we check that all FK references are
  * internal to the group that's being truncated.  Finally all the relations
- * are truncated and reindexed.
+ * are truncated and reindexed. If any foreign tables are involved,
+ * its callback shall be invoked prior to the truncation of regular tables.
  */
 void
 ExecuteTruncate(TruncateStmt *stmt)
 {
 	List	   *rels = NIL;
 	List	   *relids = NIL;
+	List	   *relids_extra = NIL;
 	List	   *relids_logged = NIL;
 	ListCell   *cell;
 
@@ -1636,8 +1655,9 @@ ExecuteTruncate(TruncateStmt *stmt)
 
 		rels = lappend(rels, rel);
 		relids = lappend_oid(relids, myrelid);
+		relids_extra = lappend_int(relids_extra, (recurse ? TRUNCATE_REL_CONTEXT_NORMAL : TRUNCATE_REL_CONTEXT_ONLY));
 		/* Log this relation only if needed for logical decoding */
-		if (RelationIsLogicallyLogged(rel))
+		if (RelationIsLogicallyLogged(rel) && rel->rd_rel->relkind != RELKIND_FOREIGN_TABLE)
 			relids_logged = lappend_oid(relids_logged, myrelid);
 
 		if (recurse)
@@ -1683,8 +1703,9 @@ ExecuteTruncate(TruncateStmt *stmt)
 
 				rels = lappend(rels, rel);
 				relids = lappend_oid(relids, childrelid);
+				relids_extra = lappend_int(relids_extra, TRUNCATE_REL_CONTEXT_CASCADING);
 				/* Log this relation only if needed for logical decoding */
-				if (RelationIsLogicallyLogged(rel))
+				if (RelationIsLogicallyLogged(rel) && rel->rd_rel->relkind != RELKIND_FOREIGN_TABLE)
 					relids_logged = lappend_oid(relids_logged, childrelid);
 			}
 		}
@@ -1695,7 +1716,7 @@ ExecuteTruncate(TruncateStmt *stmt)
 					 errhint("Do not specify the ONLY keyword, or use TRUNCATE ONLY on the partitions directly.")));
 	}
 
-	ExecuteTruncateGuts(rels, relids, relids_logged,
+	ExecuteTruncateGuts(rels, relids, relids_extra, relids_logged,
 						stmt->behavior, stmt->restart_seqs);
 
 	/* And close the rels */
@@ -1721,16 +1742,21 @@ ExecuteTruncate(TruncateStmt *stmt)
  * this information handy in this form.
  */
 void
-ExecuteTruncateGuts(List *explicit_rels, List *relids, List *relids_logged,
+ExecuteTruncateGuts(List *explicit_rels,
+					List *relids,
+					List *relids_extra,
+					List *relids_logged,
 					DropBehavior behavior, bool restart_seqs)
 {
 	List	   *rels;
 	List	   *seq_relids = NIL;
+	HTAB	   *ft_htab = NULL;
 	EState	   *estate;
 	ResultRelInfo *resultRelInfos;
 	ResultRelInfo *resultRelInfo;
 	SubTransactionId mySubid;
 	ListCell   *cell;
+	ListCell   *lc1, *lc2;
 	Oid		   *logrelids;
 
 	/*
@@ -1768,8 +1794,9 @@ ExecuteTruncateGuts(List *explicit_rels, List *relids, List *relids_logged,
 				truncate_check_activity(rel);
 				rels = lappend(rels, rel);
 				relids = lappend_oid(relids, relid);
+				relids_extra = lappend_int(relids_extra, TRUNCATE_REL_CONTEXT_CASCADING);
 				/* Log this relation only if needed for logical decoding */
-				if (RelationIsLogicallyLogged(rel))
+				if (RelationIsLogicallyLogged(rel) && rel->rd_rel->relkind != RELKIND_FOREIGN_TABLE)
 					relids_logged = lappend_oid(relids_logged, relid);
 			}
 		}
@@ -1868,14 +1895,68 @@ ExecuteTruncateGuts(List *explicit_rels, List *relids, List *relids_logged,
 	 */
 	mySubid = GetCurrentSubTransactionId();
 
-	foreach(cell, rels)
+	Assert(list_length(rels) == list_length(relids_extra));
+	forboth (lc1, rels,
+			 lc2, relids_extra)
 	{
-		Relation	rel = (Relation) lfirst(cell);
+		Relation	rel = (Relation) lfirst(lc1);
+		int			extra = lfirst_int(lc2);
 
 		/* Skip partitioned tables as there is nothing to do */
 		if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
 			continue;
 
+		/*
+		 * If truncating a foreign table, the foreign data wrapper callback
+		 * for TRUNCATE is called once for each server with a list of all the
+		 * relations to process linked to this server.  The list of relations
+		 * for each server is saved as a single entry in a hash table that
+		 * uses the server OID as lookup key.  Once the full set of lists is
+		 * built, all the entries of the hash table are scanned, and the list
+		 * of relations associated with the server is passed down to the
+		 * TRUNCATE callback of its foreign data wrapper.
+		 */
+		if (rel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
+		{
+			Oid			frel_oid = RelationGetRelid(rel);
+			Oid			server_oid = GetForeignServerIdByRelId(frel_oid);
+			bool		found;
+			ForeignTruncateInfo *ft_info;
+
+			/* if the hash table does not exist yet, initialize it */
+			if (!ft_htab)
+			{
+				HASHCTL		hctl;
+
+				memset(&hctl, 0, sizeof(HASHCTL));
+				hctl.keysize = sizeof(Oid);
+				hctl.entrysize = sizeof(ForeignTruncateInfo);
+				hctl.hcxt = CurrentMemoryContext;
+
+				ft_htab = hash_create("TRUNCATE for Foreign Tables",
+									  32,	/* start small and extend */
+									  &hctl,
+									  HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
+			}
+
+			/*
+			 * Look after the entry of the server in the hash table, and
+			 * initialize it if the entry does not exist yet.
+			 */
+			ft_info = hash_search(ft_htab, &server_oid, HASH_ENTER, &found);
+			if (!found)
+			{
+				ft_info->server_oid = server_oid;
+				ft_info->frels_list = NIL;
+				ft_info->frels_extra = NIL;
+			}
+
+			/* save the relation in the list */
+			ft_info->frels_list = lappend(ft_info->frels_list, rel);
+			ft_info->frels_extra = lappend_int(ft_info->frels_extra, extra);
+			continue;
+		}
+
 		/*
 		 * Normally, we need a transaction-safe truncation here.  However, if
 		 * the table was either created in the current (sub)transaction or has
@@ -1938,6 +2019,39 @@ ExecuteTruncateGuts(List *explicit_rels, List *relids, List *relids_logged,
 		pgstat_count_truncate(rel);
 	}
 
+	/*
+	 * Now go through the hash table, and process each entry associated to the
+	 * servers involved in the TRUNCATE.
+	 */
+	if (ft_htab)
+	{
+		ForeignTruncateInfo *ft_info;
+		HASH_SEQ_STATUS seq;
+
+		hash_seq_init(&seq, ft_htab);
+
+		PG_TRY();
+		{
+			while ((ft_info = hash_seq_search(&seq)) != NULL)
+			{
+				FdwRoutine *routine = GetFdwRoutineByServerId(ft_info->server_oid);
+
+				/* truncate_check_rel() has checked that already */
+				Assert(routine->ExecForeignTruncate != NULL);
+
+				routine->ExecForeignTruncate(ft_info->frels_list,
+											ft_info->frels_extra,
+											behavior,
+											restart_seqs);
+			}
+		}
+		PG_FINALLY();
+		{
+			hash_destroy(ft_htab);
+		}
+		PG_END_TRY();
+	}
+
 	/*
 	 * Restart owned sequences if we were asked to.
 	 */
@@ -2023,12 +2137,24 @@ truncate_check_rel(Oid relid, Form_pg_class reltuple)
 	char	   *relname = NameStr(reltuple->relname);
 
 	/*
-	 * Only allow truncate on regular tables and partitioned tables (although,
-	 * the latter are only being included here for the following checks; no
-	 * physical truncation will occur in their case.)
+	 * Only allow truncate on regular tables, foreign tables using foreign
+	 * data wrappers supporting TRUNCATE and partitioned tables (although, the
+	 * latter are only being included here for the following checks; no
+	 * physical truncation will occur in their case.).
 	 */
-	if (reltuple->relkind != RELKIND_RELATION &&
-		reltuple->relkind != RELKIND_PARTITIONED_TABLE)
+	if (reltuple->relkind == RELKIND_FOREIGN_TABLE)
+	{
+		Oid			server_id = GetForeignServerIdByRelId(relid);
+		FdwRoutine *fdwroutine = GetFdwRoutineByServerId(server_id);
+
+		if (!fdwroutine->ExecForeignTruncate)
+			ereport(ERROR,
+					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+					 errmsg("cannot truncate foreign table \"%s\"",
+							relname)));
+	}
+	else if (reltuple->relkind != RELKIND_RELATION &&
+			 reltuple->relkind != RELKIND_PARTITIONED_TABLE)
 		ereport(ERROR,
 				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
 				 errmsg("\"%s\" is not a table", relname)));
diff --git a/src/backend/replication/logical/worker.c b/src/backend/replication/logical/worker.c
index 74d538b5e3..d145d04850 100644
--- a/src/backend/replication/logical/worker.c
+++ b/src/backend/replication/logical/worker.c
@@ -1795,6 +1795,7 @@ apply_handle_truncate(StringInfo s)
 	List	   *rels = NIL;
 	List	   *part_rels = NIL;
 	List	   *relids = NIL;
+	List	   *relids_extra = NIL;
 	List	   *relids_logged = NIL;
 	ListCell   *lc;
 
@@ -1824,7 +1825,8 @@ apply_handle_truncate(StringInfo s)
 		remote_rels = lappend(remote_rels, rel);
 		rels = lappend(rels, rel->localrel);
 		relids = lappend_oid(relids, rel->localreloid);
-		if (RelationIsLogicallyLogged(rel->localrel))
+		relids_extra = lappend_int(relids_extra, TRUNCATE_REL_CONTEXT_NORMAL);
+		if (RelationIsLogicallyLogged(rel->localrel) && rel->localrel->rd_rel->relkind != RELKIND_FOREIGN_TABLE)
 			relids_logged = lappend_oid(relids_logged, rel->localreloid);
 
 		/*
@@ -1862,8 +1864,9 @@ apply_handle_truncate(StringInfo s)
 				rels = lappend(rels, childrel);
 				part_rels = lappend(part_rels, childrel);
 				relids = lappend_oid(relids, childrelid);
+				relids_extra = lappend_int(relids_extra,TRUNCATE_REL_CONTEXT_CASCADING);
 				/* Log this relation only if needed for logical decoding */
-				if (RelationIsLogicallyLogged(childrel))
+				if (RelationIsLogicallyLogged(childrel) && childrel->rd_rel->relkind != RELKIND_FOREIGN_TABLE)
 					relids_logged = lappend_oid(relids_logged, childrelid);
 			}
 		}
@@ -1874,8 +1877,12 @@ apply_handle_truncate(StringInfo s)
 	 * to replaying changes without further cascading. This might be later
 	 * changeable with a user specified option.
 	 */
-	ExecuteTruncateGuts(rels, relids, relids_logged, DROP_RESTRICT, restart_seqs);
-
+	ExecuteTruncateGuts(rels,
+						relids,
+						relids_extra,
+						relids_logged,
+						DROP_RESTRICT,
+						restart_seqs);
 	foreach(lc, remote_rels)
 	{
 		LogicalRepRelMapEntry *rel = lfirst(lc);
diff --git a/src/include/commands/tablecmds.h b/src/include/commands/tablecmds.h
index b3d30acc35..57763ed734 100644
--- a/src/include/commands/tablecmds.h
+++ b/src/include/commands/tablecmds.h
@@ -55,9 +55,16 @@ extern void AlterRelationNamespaceInternal(Relation classRel, Oid relOid,
 
 extern void CheckTableNotInUse(Relation rel, const char *stmt);
 
+#define TRUNCATE_REL_CONTEXT_NORMAL       0x01
+#define TRUNCATE_REL_CONTEXT_ONLY         0x02
+#define TRUNCATE_REL_CONTEXT_CASCADING     0x04
 extern void ExecuteTruncate(TruncateStmt *stmt);
-extern void ExecuteTruncateGuts(List *explicit_rels, List *relids, List *relids_logged,
-								DropBehavior behavior, bool restart_seqs);
+extern void ExecuteTruncateGuts(List *explicit_rels,
+								List *relids,
+								List *relids_extra,
+								List *relids_logged,
+								DropBehavior behavior,
+								bool restart_seqs);
 
 extern void SetRelationHasSubclass(Oid relationId, bool relhassubclass);
 
diff --git a/src/include/foreign/fdwapi.h b/src/include/foreign/fdwapi.h
index 10d29ff292..205afe5ec3 100644
--- a/src/include/foreign/fdwapi.h
+++ b/src/include/foreign/fdwapi.h
@@ -160,6 +160,11 @@ typedef bool (*AnalyzeForeignTable_function) (Relation relation,
 typedef List *(*ImportForeignSchema_function) (ImportForeignSchemaStmt *stmt,
 											   Oid serverOid);
 
+typedef void (*ExecForeignTruncate_function) (List *frels_list,
+											  List *frels_extra,
+											  DropBehavior behavior,
+											  bool restart_seqs);
+
 typedef Size (*EstimateDSMForeignScan_function) (ForeignScanState *node,
 												 ParallelContext *pcxt);
 typedef void (*InitializeDSMForeignScan_function) (ForeignScanState *node,
@@ -255,6 +260,9 @@ typedef struct FdwRoutine
 	/* Support functions for IMPORT FOREIGN SCHEMA */
 	ImportForeignSchema_function ImportForeignSchema;
 
+	/* Support functions for TRUNCATE */
+	ExecForeignTruncate_function ExecForeignTruncate;
+
 	/* Support functions for parallelism under Gather node */
 	IsForeignScanParallelSafe_function IsForeignScanParallelSafe;
 	EstimateDSMForeignScan_function EstimateDSMForeignScan;
diff --git a/src/test/regress/expected/foreign_data.out b/src/test/regress/expected/foreign_data.out
index e4cdb780d0..5385f98a0f 100644
--- a/src/test/regress/expected/foreign_data.out
+++ b/src/test/regress/expected/foreign_data.out
@@ -1807,9 +1807,9 @@ Inherits: fd_pt1
 
 -- TRUNCATE doesn't work on foreign tables, either directly or recursively
 TRUNCATE ft2;  -- ERROR
-ERROR:  "ft2" is not a table
+ERROR:  foreign-data wrapper "dummy" has no handler
 TRUNCATE fd_pt1;  -- ERROR
-ERROR:  "ft2" is not a table
+ERROR:  foreign-data wrapper "dummy" has no handler
 DROP TABLE fd_pt1 CASCADE;
 NOTICE:  drop cascades to foreign table ft2
 -- IMPORT FOREIGN SCHEMA
@@ -2032,9 +2032,9 @@ ALTER FOREIGN TABLE fd_pt2_1 ADD CONSTRAINT fd_pt2chk1 CHECK (c1 > 0);
 ALTER TABLE fd_pt2 ATTACH PARTITION fd_pt2_1 FOR VALUES IN (1);
 -- TRUNCATE doesn't work on foreign tables, either directly or recursively
 TRUNCATE fd_pt2_1;  -- ERROR
-ERROR:  "fd_pt2_1" is not a table
+ERROR:  foreign-data wrapper "dummy" has no handler
 TRUNCATE fd_pt2;  -- ERROR
-ERROR:  "fd_pt2_1" is not a table
+ERROR:  foreign-data wrapper "dummy" has no handler
 DROP FOREIGN TABLE fd_pt2_1;
 DROP TABLE fd_pt2;
 -- foreign table cannot be part of partition tree made of temporary
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index 6a98064b2b..a173d604d6 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -704,6 +704,7 @@ ForeignScanState
 ForeignServer
 ForeignServerInfo
 ForeignTable
+ForeignTruncateInfo
 ForkNumber
 FormData_pg_aggregate
 FormData_pg_am
#51Bharath Rupireddy
bharath.rupireddyforpostgres@gmail.com
In reply to: Kazutaka Onishi (#50)
Re: TRUNCATE on foreign table

On Tue, Apr 6, 2021 at 5:36 PM Kazutaka Onishi <onishi@heterodb.com> wrote:

Thank you for checking v13, and here is v14 patch.

1) Are we using all of these macros? I see that we are setting them
but we only use TRUNCATE_REL_CONTEXT_ONLY. If not used, can we remove
them?

These may be needed for the foreign data handler other than postgres_fdw.

I'm not sure about this, but if it's discussed upthread and agreed
upon, I'm fine with it.

4) I have a basic question: If I have a truncate statement with a mix
of local and foreign tables, IIUC, the patch is dividing up a single
truncate statement into two truncate local tables, truncate foreign
tables. Is this transaction safe at all?

According to this discussion, we can revert both tables in the local
and the server.
/messages/by-id/CAOP8fzbuJ5GdKa+=GtizbqFtO2xsQbn4mVjjzunmsNVJMChSMQ@mail.gmail.com

On giving more thought on this, it looks like we are safe i.e. local
truncation will get reverted. Because if an error occurs on foreign
table truncation, the control in the local server would go to
pgfdw_report_error which generates an error in the local server which
aborts the local transaction and so the local table truncations would
get reverted.

+    /* run remote query */
+    if (!PQsendQuery(conn, sql.data))
+        pgfdw_report_error(ERROR, NULL, conn, false, sql.data);
+
+    res = pgfdw_get_result(conn, sql.data);
+
+    if (PQresultStatus(res) != PGRES_COMMAND_OK)
+        pgfdw_report_error(ERROR, res, conn, true, sql.data);

I still feel that the above bunch of code is duplicate of what
do_sql_command function already has. I would recommend that we just
make that function non-static(it's easy to do) and keep the
declaration in postgres_fdw.h and use it in the
postgresExecForeignTruncate.

Another minor comment:
We could move + ForeignServer *serv = NULL; within foreach (lc,
frels_list), because it's not being used outside.

The v14 patch mostly looks good to me other than the above comments.

With Regards,
Bharath Rupireddy.
EnterpriseDB: http://www.enterprisedb.com

#52Bharath Rupireddy
bharath.rupireddyforpostgres@gmail.com
In reply to: Kazutaka Onishi (#50)
Re: TRUNCATE on foreign table

On Tue, Apr 6, 2021 at 5:36 PM Kazutaka Onishi <onishi@heterodb.com> wrote:

Thank you for checking v13, and here is v14 patch.

cfbot failure on v14 - https://cirrus-ci.com/task/4772360931770368.
Looks like it is not related to this patch, please re-confirm it.

With Regards,
Bharath Rupireddy.
EnterpriseDB: http://www.enterprisedb.com

#53Kazutaka Onishi
onishi@heterodb.com
In reply to: Bharath Rupireddy (#52)
1 attachment(s)
Re: TRUNCATE on foreign table

I've attached v15.

I still feel that the above bunch of code is duplicate of what
do_sql_command function already has. I would recommend that we just
make that function non-static(it's easy to do) and keep the
declaration in postgres_fdw.h and use it in the
postgresExecForeignTruncate.

I've tried this on v15.

Another minor comment:
We could move + ForeignServer *serv = NULL; within foreach (lc,
frels_list), because it's not being used outside.

I've moved it.

cfbot failure on v14 - https://cirrus-ci.com/task/4772360931770368.
Looks like it is not related to this patch, please re-confirm it.

I've checked v15 patch with "make check-world" and confirmed this passed.

2021年4月6日(火) 23:25 Bharath Rupireddy <bharath.rupireddyforpostgres@gmail.com>:

Show quoted text

On Tue, Apr 6, 2021 at 5:36 PM Kazutaka Onishi <onishi@heterodb.com> wrote:

Thank you for checking v13, and here is v14 patch.

cfbot failure on v14 - https://cirrus-ci.com/task/4772360931770368.
Looks like it is not related to this patch, please re-confirm it.

With Regards,
Bharath Rupireddy.
EnterpriseDB: http://www.enterprisedb.com

Attachments:

pgsql14-truncate-on-foreign-table.v15.patchapplication/octet-stream; name=pgsql14-truncate-on-foreign-table.v15.patchDownload
diff --git a/contrib/postgres_fdw/connection.c b/contrib/postgres_fdw/connection.c
index 6a61d83862..82aa14a65d 100644
--- a/contrib/postgres_fdw/connection.c
+++ b/contrib/postgres_fdw/connection.c
@@ -92,7 +92,6 @@ static PGconn *connect_pg_server(ForeignServer *server, UserMapping *user);
 static void disconnect_pg_server(ConnCacheEntry *entry);
 static void check_conn_params(const char **keywords, const char **values, UserMapping *user);
 static void configure_remote_session(PGconn *conn);
-static void do_sql_command(PGconn *conn, const char *sql);
 static void begin_remote_xact(ConnCacheEntry *entry);
 static void pgfdw_xact_callback(XactEvent event, void *arg);
 static void pgfdw_subxact_callback(SubXactEvent event,
@@ -568,7 +567,7 @@ configure_remote_session(PGconn *conn)
 /*
  * Convenience subroutine to issue a non-data-returning SQL command to remote
  */
-static void
+void
 do_sql_command(PGconn *conn, const char *sql)
 {
 	PGresult   *res;
diff --git a/contrib/postgres_fdw/deparse.c b/contrib/postgres_fdw/deparse.c
index 5aa3455e30..b9e4e471c1 100644
--- a/contrib/postgres_fdw/deparse.c
+++ b/contrib/postgres_fdw/deparse.c
@@ -56,6 +56,7 @@
 #include "utils/rel.h"
 #include "utils/syscache.h"
 #include "utils/typcache.h"
+#include "commands/tablecmds.h"
 
 /*
  * Global context for foreign_expr_walker's search of an expression tree.
@@ -2172,6 +2173,43 @@ deparseAnalyzeSql(StringInfo buf, Relation rel, List **retrieved_attrs)
 	deparseRelation(buf, rel);
 }
 
+/*
+ * Construct a simple "TRUNCATE rel" statement
+ */
+void
+deparseTruncateSql(StringInfo buf,
+				   List *frels_list,
+				   List *frels_extra,
+				   DropBehavior behavior,
+				   bool restart_seqs)
+{
+	ListCell   *lc1, *lc2;
+
+	appendStringInfoString(buf, "TRUNCATE ");
+
+	forboth (lc1, frels_list,
+			 lc2, frels_extra)
+	{
+		Relation	frel = lfirst(lc1);
+		int			extra = lfirst_int(lc2);
+
+		if (lc1 != list_head(frels_list))
+			appendStringInfoString(buf, ", ");
+		if (extra & TRUNCATE_REL_CONTEXT_ONLY)
+			appendStringInfoString(buf, "ONLY ");
+
+		deparseRelation(buf, frel);
+	}
+
+	appendStringInfo(buf, " %s IDENTITY",
+					 restart_seqs ? "RESTART" : "CONTINUE");
+
+	if (behavior==DROP_RESTRICT)
+		appendStringInfoString(buf, " RESTRICT");
+	else if(behavior==DROP_CASCADE)
+		appendStringInfoString(buf, " CASCADE");
+}
+
 /*
  * Construct name to use for given column, and emit it into buf.
  * If it has a column_name FDW option, use that instead of attribute name.
diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out
index 59e4e27ffb..b1388a799c 100644
--- a/contrib/postgres_fdw/expected/postgres_fdw.out
+++ b/contrib/postgres_fdw/expected/postgres_fdw.out
@@ -8215,6 +8215,205 @@ select * from rem3;
 drop foreign table rem3;
 drop table loc3;
 -- ===================================================================
+-- test for TRUNCATE
+-- ===================================================================
+CREATE TABLE tru_rtable0 (id int primary key);
+CREATE TABLE tru_rtable1 (id int primary key);
+CREATE FOREIGN TABLE tru_ftable (id int)
+       SERVER loopback OPTIONS (table_name 'tru_rtable0');
+INSERT INTO tru_rtable0 (SELECT x FROM generate_series(1,10) x);
+CREATE TABLE tru_ptable (id int) PARTITION BY HASH(id);
+CREATE TABLE tru_ptable__p0 PARTITION OF tru_ptable
+                            FOR VALUES WITH (MODULUS 2, REMAINDER 0);
+CREATE FOREIGN TABLE tru_ftable__p1 PARTITION OF tru_ptable
+                                    FOR VALUES WITH (MODULUS 2, REMAINDER 1)
+       SERVER loopback OPTIONS (table_name 'tru_rtable1');
+INSERT INTO tru_ptable (SELECT x FROM generate_series(11,20) x);
+CREATE TABLE tru_pk_table(id int primary key);
+CREATE TABLE tru_fk_table(fkey int references tru_pk_table(id));
+INSERT INTO tru_pk_table (SELECT x FROM generate_series(1,10) x);
+INSERT INTO tru_fk_table (SELECT x % 10 + 1 FROM generate_series(5,25) x);
+CREATE FOREIGN TABLE tru_pk_ftable (id int)
+       SERVER loopback OPTIONS (table_name 'tru_pk_table');
+CREATE TABLE tru_rtable_parent (id int);
+CREATE TABLE tru_rtable_child (id int);
+CREATE FOREIGN TABLE tru_ftable_parent (id int)
+       SERVER loopback OPTIONS (table_name 'tru_rtable_parent');
+CREATE FOREIGN TABLE tru_ftable_child () INHERITS (tru_ftable_parent)
+       SERVER loopback OPTIONS (table_name 'tru_rtable_child');
+INSERT INTO tru_rtable_parent (SELECT x FROM generate_series(1,8) x);
+INSERT INTO tru_rtable_child  (SELECT x FROM generate_series(10, 18) x);
+-- normal truncate
+SELECT sum(id) FROM tru_ftable;        -- 55
+ sum 
+-----
+  55
+(1 row)
+
+TRUNCATE tru_ftable;
+SELECT count(*) FROM tru_rtable0;		-- 0
+ count 
+-------
+     0
+(1 row)
+
+SELECT count(*) FROM tru_ftable;		-- 0
+ count 
+-------
+     0
+(1 row)
+
+-- 'truncatable' option
+ALTER SERVER loopback OPTIONS (ADD truncatable 'false');
+TRUNCATE tru_ftable;			-- error
+ERROR:  truncate on "tru_ftable" is prohibited
+ALTER FOREIGN TABLE tru_ftable OPTIONS (ADD truncatable 'true');
+TRUNCATE tru_ftable;			-- accepted
+ALTER FOREIGN TABLE tru_ftable OPTIONS (SET truncatable 'false');
+TRUNCATE tru_ftable;			-- error
+ERROR:  truncate on "tru_ftable" is prohibited
+ALTER SERVER loopback OPTIONS (DROP truncatable);
+ALTER FOREIGN TABLE tru_ftable OPTIONS (SET truncatable 'false');
+TRUNCATE tru_ftable;			-- error
+ERROR:  truncate on "tru_ftable" is prohibited
+ALTER FOREIGN TABLE tru_ftable OPTIONS (SET truncatable 'true');
+TRUNCATE tru_ftable;			-- accepted
+-- partition table mixtured by table and foreign table
+SELECT sum(id) FROM tru_ptable;        -- 155
+ sum 
+-----
+ 155
+(1 row)
+
+TRUNCATE tru_ptable;
+SELECT count(*) FROM tru_ptable;		-- 0
+ count 
+-------
+     0
+(1 row)
+
+SELECT count(*) FROM tru_ptable__p0;	-- 0
+ count 
+-------
+     0
+(1 row)
+
+SELECT count(*) FROM tru_ftable__p1;	-- 0
+ count 
+-------
+     0
+(1 row)
+
+SELECT count(*) FROM tru_rtable1;		-- 0
+ count 
+-------
+     0
+(1 row)
+
+-- 'CASCADE' option
+SELECT sum(id) FROM tru_pk_ftable;      -- 55
+ sum 
+-----
+  55
+(1 row)
+
+TRUNCATE tru_pk_ftable;	-- failed by FK reference
+ERROR:  cannot truncate a table referenced in a foreign key constraint
+DETAIL:  Table "tru_fk_table" references "tru_pk_table".
+HINT:  Truncate table "tru_fk_table" at the same time, or use TRUNCATE ... CASCADE.
+CONTEXT:  remote SQL command: TRUNCATE public.tru_pk_table CONTINUE IDENTITY RESTRICT
+TRUNCATE tru_pk_ftable CASCADE;
+SELECT count(*) FROM tru_pk_ftable;    -- 0
+ count 
+-------
+     0
+(1 row)
+
+SELECT count(*) FROM tru_fk_table;		-- also truncated,0
+ count 
+-------
+     0
+(1 row)
+
+-- truncate two tables at a command
+INSERT INTO tru_ftable (SELECT x FROM generate_series(1,8) x);
+INSERT INTO tru_pk_ftable (SELECT x FROM generate_series(3,10) x);
+SELECT count(*) from tru_ftable; -- 8
+ count 
+-------
+     8
+(1 row)
+
+SELECT count(*) from tru_pk_ftable; -- 8
+ count 
+-------
+     8
+(1 row)
+
+TRUNCATE tru_ftable, tru_pk_ftable CASCADE;
+SELECT count(*) from tru_ftable; -- 0
+ count 
+-------
+     0
+(1 row)
+
+SELECT count(*) from tru_pk_ftable; -- 0
+ count 
+-------
+     0
+(1 row)
+
+-- truncate with ONLY clause
+TRUNCATE ONLY tru_ftable_parent;
+SELECT sum(id) FROM tru_ftable_parent;  -- 126
+ sum 
+-----
+ 126
+(1 row)
+
+TRUNCATE tru_ftable_parent;
+SELECT count(*) FROM tru_ftable_parent; -- 0
+ count 
+-------
+     0
+(1 row)
+
+-- in case when remote table has inherited children
+CREATE TABLE tru_rtable0_child () INHERITS (tru_rtable0);
+INSERT INTO tru_rtable0 (SELECT x FROM generate_series(5,9) x);
+INSERT INTO tru_rtable0_child (SELECT x FROM generate_series(10,14) x);
+SELECT sum(id) FROM tru_ftable;   -- 95
+ sum 
+-----
+  95
+(1 row)
+
+TRUNCATE ONLY tru_ftable;		-- truncate only parent portion
+SELECT sum(id) FROM tru_ftable;   -- 60
+ sum 
+-----
+  60
+(1 row)
+
+INSERT INTO tru_rtable0 (SELECT x FROM generate_series(21,25) x);
+SELECT sum(id) FROM tru_ftable;		-- 175
+ sum 
+-----
+ 175
+(1 row)
+
+TRUNCATE tru_ftable;			-- truncate both of parent and child
+SELECT count(*) FROM tru_ftable;    -- empty
+ count 
+-------
+     0
+(1 row)
+
+-- cleanup
+DROP FOREIGN TABLE tru_ftable_parent, tru_ftable_child, tru_pk_ftable,tru_ftable__p1,tru_ftable;
+DROP TABLE tru_rtable0, tru_rtable1, tru_ptable, tru_ptable__p0, tru_pk_table, tru_fk_table,
+tru_rtable_parent,tru_rtable_child, tru_rtable0_child;
+-- ===================================================================
 -- test IMPORT FOREIGN SCHEMA
 -- ===================================================================
 CREATE SCHEMA import_source;
@@ -8913,7 +9112,7 @@ DO $d$
     END;
 $d$;
 ERROR:  invalid option "password"
-HINT:  Valid options in this context are: service, passfile, channel_binding, connect_timeout, dbname, host, hostaddr, port, options, application_name, keepalives, keepalives_idle, keepalives_interval, keepalives_count, tcp_user_timeout, sslmode, sslcompression, sslcert, sslkey, sslrootcert, sslcrl, sslcrldir, requirepeer, ssl_min_protocol_version, ssl_max_protocol_version, gssencmode, krbsrvname, gsslib, target_session_attrs, use_remote_estimate, fdw_startup_cost, fdw_tuple_cost, extensions, updatable, fetch_size, batch_size, async_capable, keep_connections
+HINT:  Valid options in this context are: service, passfile, channel_binding, connect_timeout, dbname, host, hostaddr, port, options, application_name, keepalives, keepalives_idle, keepalives_interval, keepalives_count, tcp_user_timeout, sslmode, sslcompression, sslcert, sslkey, sslrootcert, sslcrl, sslcrldir, requirepeer, ssl_min_protocol_version, ssl_max_protocol_version, gssencmode, krbsrvname, gsslib, target_session_attrs, use_remote_estimate, fdw_startup_cost, fdw_tuple_cost, extensions, updatable, truncatable, fetch_size, batch_size, async_capable, keep_connections
 CONTEXT:  SQL statement "ALTER SERVER loopback_nopw OPTIONS (ADD password 'dummypw')"
 PL/pgSQL function inline_code_block line 3 at EXECUTE
 -- If we add a password for our user mapping instead, we should get a different
diff --git a/contrib/postgres_fdw/option.c b/contrib/postgres_fdw/option.c
index f1d0c8bd41..fc196fabc0 100644
--- a/contrib/postgres_fdw/option.c
+++ b/contrib/postgres_fdw/option.c
@@ -109,6 +109,7 @@ postgres_fdw_validator(PG_FUNCTION_ARGS)
 		if (strcmp(def->defname, "use_remote_estimate") == 0 ||
 			strcmp(def->defname, "updatable") == 0 ||
 			strcmp(def->defname, "async_capable") == 0 ||
+			strcmp(def->defname, "truncatable") == 0 ||
 			strcmp(def->defname, "keep_connections") == 0)
 		{
 			/* these accept only boolean values */
@@ -213,6 +214,9 @@ InitPgFdwOptions(void)
 		/* updatable is available on both server and table */
 		{"updatable", ForeignServerRelationId, false},
 		{"updatable", ForeignTableRelationId, false},
+		/* truncatable is available on both server and table */
+		{"truncatable", ForeignServerRelationId, false},
+		{"truncatable", ForeignTableRelationId, false},
 		/* fetch_size is available on both server and table */
 		{"fetch_size", ForeignServerRelationId, false},
 		{"fetch_size", ForeignTableRelationId, false},
diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c
index 16c2979f2d..f2b7554b26 100644
--- a/contrib/postgres_fdw/postgres_fdw.c
+++ b/contrib/postgres_fdw/postgres_fdw.c
@@ -400,6 +400,10 @@ static void postgresExplainForeignModify(ModifyTableState *mtstate,
 										 ExplainState *es);
 static void postgresExplainDirectModify(ForeignScanState *node,
 										ExplainState *es);
+static void postgresExecForeignTruncate(List *frels_list,
+										List *frels_extra,
+										DropBehavior behavior,
+										bool restart_seqs);
 static bool postgresAnalyzeForeignTable(Relation relation,
 										AcquireSampleRowsFunc *func,
 										BlockNumber *totalpages);
@@ -588,6 +592,9 @@ postgres_fdw_handler(PG_FUNCTION_ARGS)
 	routine->ExplainForeignModify = postgresExplainForeignModify;
 	routine->ExplainDirectModify = postgresExplainDirectModify;
 
+	/* Support function for TRUNCATE */
+	routine->ExecForeignTruncate = postgresExecForeignTruncate;
+
 	/* Support functions for ANALYZE */
 	routine->AnalyzeForeignTable = postgresAnalyzeForeignTable;
 
@@ -2868,6 +2875,89 @@ postgresExplainDirectModify(ForeignScanState *node, ExplainState *es)
 	}
 }
 
+/*
+ * postgresExecForeignTruncate
+ *		It propagates TRUNCATE command to the remote host inside of the
+ *		transaction block.
+ */
+static void
+postgresExecForeignTruncate(List *frels_list,
+							List *frels_extra,
+							DropBehavior behavior,
+							bool restart_seqs)
+{
+	Oid				server_id = InvalidOid;
+	UserMapping	   *user = NULL;
+	PGconn		   *conn = NULL;
+	StringInfoData	sql;
+	ListCell	   *lc;
+	bool			server_truncatable = true;
+
+	/*
+	 * By default, all postgres_fdw foreign tables are assumed truncatable. This
+	 * can be overridden by a per-server setting, which in turn can be
+	 * overridden by a per-table setting.
+	 */
+	foreach (lc, frels_list)
+	{
+		ForeignServer  *serv = NULL;
+		Relation	frel = lfirst(lc);
+		Oid			frel_oid = RelationGetRelid(frel);
+		ForeignTable *ft = GetForeignTable(frel_oid);
+		ListCell   *cell;
+		bool		truncatable;
+
+		if (!OidIsValid(server_id))
+		{
+			/* This is executed only once per server */
+			server_id = GetForeignServerIdByRelId(frel_oid);
+			serv = GetForeignServer(server_id);
+
+			foreach (cell, serv->options)
+			{
+				DefElem	   *defel = (DefElem *) lfirst(cell);
+
+				if (strcmp(defel->defname, "truncatable") == 0)
+				{
+					server_truncatable = defGetBoolean(defel);
+					break;
+				}
+			}
+		}
+
+		/* ensure the target foreign table is truncatable */
+		truncatable = server_truncatable;
+		foreach (cell, ft->options)
+		{
+			DefElem	   *defel = (DefElem *) lfirst(cell);
+
+			if (strcmp(defel->defname, "truncatable") == 0)
+			{
+				truncatable = defGetBoolean(defel);
+				break;
+			}
+		}
+
+		if (!truncatable)
+			ereport(ERROR,
+					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+					 errmsg("truncate on \"%s\" is prohibited",
+							RelationGetRelationName(frel))));
+	}
+
+	Assert(OidIsValid(server_id));
+	user = GetUserMapping(GetUserId(), server_id);
+	conn = GetConnection(user, false, NULL);
+
+	/* set up remote query */
+	initStringInfo(&sql);
+	deparseTruncateSql(&sql, frels_list, frels_extra, behavior, restart_seqs);
+
+	/* run remote query */
+	do_sql_command(conn,sql.data);
+
+	pfree(sql.data);
+}
 
 /*
  * estimate_path_cost_size
diff --git a/contrib/postgres_fdw/postgres_fdw.h b/contrib/postgres_fdw/postgres_fdw.h
index 88d94da6f6..62e779d818 100644
--- a/contrib/postgres_fdw/postgres_fdw.h
+++ b/contrib/postgres_fdw/postgres_fdw.h
@@ -206,6 +206,11 @@ extern void deparseDirectDeleteSql(StringInfo buf, PlannerInfo *root,
 extern void deparseAnalyzeSizeSql(StringInfo buf, Relation rel);
 extern void deparseAnalyzeSql(StringInfo buf, Relation rel,
 							  List **retrieved_attrs);
+extern void deparseTruncateSql(StringInfo buf,
+							   List *frels_list,
+							   List *frels_extra,
+							   DropBehavior behavior,
+							   bool restart_seqs);
 extern void deparseStringLiteral(StringInfo buf, const char *val);
 extern Expr *find_em_expr_for_rel(EquivalenceClass *ec, RelOptInfo *rel);
 extern Expr *find_em_expr_for_input_target(PlannerInfo *root,
@@ -219,6 +224,7 @@ extern void deparseSelectStmtForRel(StringInfo buf, PlannerInfo *root,
 									bool is_subquery,
 									List **retrieved_attrs, List **params_list);
 extern const char *get_jointype_name(JoinType jointype);
+void do_sql_command(PGconn *conn, const char *sql);
 
 /* in shippable.c */
 extern bool is_builtin(Oid objectId);
diff --git a/contrib/postgres_fdw/sql/postgres_fdw.sql b/contrib/postgres_fdw/sql/postgres_fdw.sql
index 107d1c0e03..eb5e7e3bd6 100644
--- a/contrib/postgres_fdw/sql/postgres_fdw.sql
+++ b/contrib/postgres_fdw/sql/postgres_fdw.sql
@@ -2351,6 +2351,107 @@ select * from rem3;
 drop foreign table rem3;
 drop table loc3;
 
+-- ===================================================================
+-- test for TRUNCATE
+-- ===================================================================
+CREATE TABLE tru_rtable0 (id int primary key);
+CREATE TABLE tru_rtable1 (id int primary key);
+CREATE FOREIGN TABLE tru_ftable (id int)
+       SERVER loopback OPTIONS (table_name 'tru_rtable0');
+INSERT INTO tru_rtable0 (SELECT x FROM generate_series(1,10) x);
+
+CREATE TABLE tru_ptable (id int) PARTITION BY HASH(id);
+CREATE TABLE tru_ptable__p0 PARTITION OF tru_ptable
+                            FOR VALUES WITH (MODULUS 2, REMAINDER 0);
+CREATE FOREIGN TABLE tru_ftable__p1 PARTITION OF tru_ptable
+                                    FOR VALUES WITH (MODULUS 2, REMAINDER 1)
+       SERVER loopback OPTIONS (table_name 'tru_rtable1');
+INSERT INTO tru_ptable (SELECT x FROM generate_series(11,20) x);
+
+CREATE TABLE tru_pk_table(id int primary key);
+CREATE TABLE tru_fk_table(fkey int references tru_pk_table(id));
+INSERT INTO tru_pk_table (SELECT x FROM generate_series(1,10) x);
+INSERT INTO tru_fk_table (SELECT x % 10 + 1 FROM generate_series(5,25) x);
+CREATE FOREIGN TABLE tru_pk_ftable (id int)
+       SERVER loopback OPTIONS (table_name 'tru_pk_table');
+
+CREATE TABLE tru_rtable_parent (id int);
+CREATE TABLE tru_rtable_child (id int);
+CREATE FOREIGN TABLE tru_ftable_parent (id int)
+       SERVER loopback OPTIONS (table_name 'tru_rtable_parent');
+CREATE FOREIGN TABLE tru_ftable_child () INHERITS (tru_ftable_parent)
+       SERVER loopback OPTIONS (table_name 'tru_rtable_child');
+INSERT INTO tru_rtable_parent (SELECT x FROM generate_series(1,8) x);
+INSERT INTO tru_rtable_child  (SELECT x FROM generate_series(10, 18) x);
+
+-- normal truncate
+SELECT sum(id) FROM tru_ftable;        -- 55
+TRUNCATE tru_ftable;
+SELECT count(*) FROM tru_rtable0;		-- 0
+SELECT count(*) FROM tru_ftable;		-- 0
+
+-- 'truncatable' option
+ALTER SERVER loopback OPTIONS (ADD truncatable 'false');
+TRUNCATE tru_ftable;			-- error
+ALTER FOREIGN TABLE tru_ftable OPTIONS (ADD truncatable 'true');
+TRUNCATE tru_ftable;			-- accepted
+ALTER FOREIGN TABLE tru_ftable OPTIONS (SET truncatable 'false');
+TRUNCATE tru_ftable;			-- error
+ALTER SERVER loopback OPTIONS (DROP truncatable);
+ALTER FOREIGN TABLE tru_ftable OPTIONS (SET truncatable 'false');
+TRUNCATE tru_ftable;			-- error
+ALTER FOREIGN TABLE tru_ftable OPTIONS (SET truncatable 'true');
+TRUNCATE tru_ftable;			-- accepted
+
+-- partition table mixtured by table and foreign table
+SELECT sum(id) FROM tru_ptable;        -- 155
+TRUNCATE tru_ptable;
+SELECT count(*) FROM tru_ptable;		-- 0
+SELECT count(*) FROM tru_ptable__p0;	-- 0
+SELECT count(*) FROM tru_ftable__p1;	-- 0
+SELECT count(*) FROM tru_rtable1;		-- 0
+
+-- 'CASCADE' option
+SELECT sum(id) FROM tru_pk_ftable;      -- 55
+TRUNCATE tru_pk_ftable;	-- failed by FK reference
+TRUNCATE tru_pk_ftable CASCADE;
+SELECT count(*) FROM tru_pk_ftable;    -- 0
+SELECT count(*) FROM tru_fk_table;		-- also truncated,0
+
+-- truncate two tables at a command
+INSERT INTO tru_ftable (SELECT x FROM generate_series(1,8) x);
+INSERT INTO tru_pk_ftable (SELECT x FROM generate_series(3,10) x);
+SELECT count(*) from tru_ftable; -- 8
+SELECT count(*) from tru_pk_ftable; -- 8
+TRUNCATE tru_ftable, tru_pk_ftable CASCADE;
+SELECT count(*) from tru_ftable; -- 0
+SELECT count(*) from tru_pk_ftable; -- 0
+
+-- truncate with ONLY clause
+TRUNCATE ONLY tru_ftable_parent;
+SELECT sum(id) FROM tru_ftable_parent;  -- 126
+TRUNCATE tru_ftable_parent;
+SELECT count(*) FROM tru_ftable_parent; -- 0
+
+-- in case when remote table has inherited children
+CREATE TABLE tru_rtable0_child () INHERITS (tru_rtable0);
+INSERT INTO tru_rtable0 (SELECT x FROM generate_series(5,9) x);
+INSERT INTO tru_rtable0_child (SELECT x FROM generate_series(10,14) x);
+SELECT sum(id) FROM tru_ftable;   -- 95
+
+TRUNCATE ONLY tru_ftable;		-- truncate only parent portion
+SELECT sum(id) FROM tru_ftable;   -- 60
+
+INSERT INTO tru_rtable0 (SELECT x FROM generate_series(21,25) x);
+SELECT sum(id) FROM tru_ftable;		-- 175
+TRUNCATE tru_ftable;			-- truncate both of parent and child
+SELECT count(*) FROM tru_ftable;    -- empty
+
+-- cleanup
+DROP FOREIGN TABLE tru_ftable_parent, tru_ftable_child, tru_pk_ftable,tru_ftable__p1,tru_ftable;
+DROP TABLE tru_rtable0, tru_rtable1, tru_ptable, tru_ptable__p0, tru_pk_table, tru_fk_table,
+tru_rtable_parent,tru_rtable_child, tru_rtable0_child;
+
 -- ===================================================================
 -- test IMPORT FOREIGN SCHEMA
 -- ===================================================================
diff --git a/doc/src/sgml/fdwhandler.sgml b/doc/src/sgml/fdwhandler.sgml
index 0f2397df49..66605b7361 100644
--- a/doc/src/sgml/fdwhandler.sgml
+++ b/doc/src/sgml/fdwhandler.sgml
@@ -1065,6 +1065,50 @@ EndDirectModify(ForeignScanState *node);
 
    </sect2>
 
+   <sect2 id="fdw-callbacks-truncate">
+    <title>FDW Routines for Truncate</title>
+<programlisting>
+void
+ExecForeignTruncate(List *frels_list, List *frels_extra,
+                    DropBehavior behavior, bool restart_seqs);
+</programlisting>
+    <para>
+     Truncate a set of foreign tables defined by
+     <literal>frels_list</literal> belonging to the same foreign server.
+     This optional function is called during execution of
+     <command>TRUNCATE</command> for each foreign server involved
+     in one <command>TRUNCATE</command> command (note that invocations
+     are not per foreign table).
+
+     <literal>frels_extra</literal> delivers extra information about
+     the context where the foreign tables are truncated. It is a list of integers and has same length with
+     <literal>frels_list</literal>. <literal>TRUNCATE_REL_CONTEXT_NORMAL</literal> means that
+     the foreign table is specified WITHOUT "ONLY" clause, and <literal>TRUNCATE_REL_CONTEXT_ONLY</literal> means
+     specified WITH "ONLY" clause. <literal>TRUNCATE_REL_CONTEXT_CASCADING</literal> 
+     values means that foreign tables are not specified in the <command>TRUNCATE</command>, 
+     but truncated due to dependency(like partition's child leaf).
+    </para>
+
+    <para>
+     If the <function>ExecForeignTruncate</function> pointer is set to
+     <literal>NULL</literal>, attempts to truncate the foreign table will
+     fail with an error message.
+    </para>
+    
+    <para>
+     <literal>behavior</literal> defines how foreign tables should
+     be truncated, using as possible values <literal>DROP_RESTRICT</literal>
+     and <literal>DROP_CASCADE</literal> (to map with the equivalents of
+     <command>TRUNCATE</command>).
+    </para>
+    
+    <para>
+     <literal>restart_seqs</literal> is set to <literal>true</literal>
+     if <literal>RESTART IDENTITY</literal> was supplied in the
+     <command>TRUNCATE</command>.
+    </para>
+   </sect2>
+
    <sect2 id="fdw-callbacks-row-locking">
     <title>FDW Routines for Row Locking</title>
 
diff --git a/doc/src/sgml/postgres-fdw.sgml b/doc/src/sgml/postgres-fdw.sgml
index a7c695b000..bc364a738e 100644
--- a/doc/src/sgml/postgres-fdw.sgml
+++ b/doc/src/sgml/postgres-fdw.sgml
@@ -63,9 +63,10 @@
  <para>
   Now you need only <command>SELECT</command> from a foreign table to access
   the data stored in its underlying remote table.  You can also modify
-  the remote table using <command>INSERT</command>, <command>UPDATE</command>, or
-  <command>DELETE</command>.  (Of course, the remote user you have specified
-  in your user mapping must have privileges to do these things.)
+  the remote table using <command>INSERT</command>, <command>UPDATE</command>,
+  <command>DELETE</command>, or <command>TRUNCATE</command>.
+  (Of course, the remote user you have specified in your user mapping must
+  have privileges to do these things.)
  </para>
 
  <para>
@@ -436,6 +437,42 @@ OPTIONS (ADD password_required 'false');
    </variablelist>
   </sect3>
 
+  <sect3>
+   <title>Truncatability Options</title>
+
+   <para>
+    By default all foreign tables using <filename>postgres_fdw</filename> are assumed
+    to be truncatable.  This may be overridden using the following options:
+   </para>
+
+   <variablelist>
+
+    <varlistentry>
+     <term><literal>truncatable</literal></term>
+     <listitem>
+      <para>
+       This option controls whether <filename>postgres_fdw</filename> allows
+       foreign tables to be truncated using <command>TRUNCATE</command>
+       command. It can be specified for a foreign table or a foreign server.
+       A table-level option overrides a server-level option.
+       The default is <literal>true</literal>.
+      </para>
+      <para>
+       Pay attention for the case when a foreign table maps remote table
+       that has inherited children or partition leafs.
+       <command>TRUNCATE</command> specifies the foreign tables with
+       <literal>ONLY</literal> clause, remove queries over the
+       <filename>postgres_fdw</filename> also specify remote tables with
+       <literal>ONLY</literal> clause, that will truncate only parent
+       portion of the remote table. In the results, it looks like
+       <command>TRUNCATE</command> command partially eliminated contents
+       of the foreign tables.
+      </para>
+     </listitem>
+    </varlistentry>
+   </variablelist>
+  </sect3>
+
   <sect3>
    <title>Importing Options</title>
 
diff --git a/doc/src/sgml/ref/truncate.sgml b/doc/src/sgml/ref/truncate.sgml
index 91cdac5562..7899106ba1 100644
--- a/doc/src/sgml/ref/truncate.sgml
+++ b/doc/src/sgml/ref/truncate.sgml
@@ -172,9 +172,8 @@ TRUNCATE [ TABLE ] [ ONLY ] <replaceable class="parameter">name</replaceable> [
   </para>
 
   <para>
-   <command>TRUNCATE</command> is not currently supported for foreign tables.
-   This implies that if a specified table has any descendant tables that are
-   foreign, the command will fail.
+    <command>TRUNCATE</command> can be used for foreign tables if
+     the foreign data wrapper supports, for instance, see <xref linkend="postgres-fdw"/>.
   </para>
  </refsect1>
 
diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c
index 87f9bdaef0..31a040e25d 100644
--- a/src/backend/commands/tablecmds.c
+++ b/src/backend/commands/tablecmds.c
@@ -59,6 +59,7 @@
 #include "commands/typecmds.h"
 #include "commands/user.h"
 #include "executor/executor.h"
+#include "foreign/fdwapi.h"
 #include "foreign/foreign.h"
 #include "miscadmin.h"
 #include "nodes/makefuncs.h"
@@ -310,6 +311,21 @@ struct DropRelationCallbackState
 #define		ATT_FOREIGN_TABLE		0x0020
 #define		ATT_PARTITIONED_INDEX	0x0040
 
+/*
+ * ForeignTruncateInfo
+ *
+ * Information related to truncation of foreign tables.  This is used for
+ * the elements in a hash table. It uses the server OID as lookup key,
+ * and includes a per-server list of all foreign tables involved in the
+ * truncation.
+ */
+typedef struct ForeignTruncateInfo
+{
+	Oid			server_oid;
+	List	   *frels_list;
+	List	   *frels_extra;
+} ForeignTruncateInfo;
+
 /*
  * Partition tables are expected to be dropped when the parent partitioned
  * table gets dropped. Hence for partitioning we use AUTO dependency.
@@ -1588,18 +1604,21 @@ RangeVarCallbackForDropRelation(const RangeVar *rel, Oid relOid, Oid oldRelOid,
  *		Executes a TRUNCATE command.
  *
  * This is a multi-relation truncate.  We first open and grab exclusive
- * lock on all relations involved, checking permissions and otherwise
- * verifying that the relation is OK for truncation.  In CASCADE mode,
+ * lock on all relations involved, checking permissions (local database
+ * ACLs even if relations are foreign tables) and otherwise verifying
+ * that the relation is OK for truncation. In CASCADE mode,
  * relations having FK references to the targeted relations are automatically
  * added to the group; in RESTRICT mode, we check that all FK references are
  * internal to the group that's being truncated.  Finally all the relations
- * are truncated and reindexed.
+ * are truncated and reindexed. If any foreign tables are involved,
+ * its callback shall be invoked prior to the truncation of regular tables.
  */
 void
 ExecuteTruncate(TruncateStmt *stmt)
 {
 	List	   *rels = NIL;
 	List	   *relids = NIL;
+	List	   *relids_extra = NIL;
 	List	   *relids_logged = NIL;
 	ListCell   *cell;
 
@@ -1636,8 +1655,9 @@ ExecuteTruncate(TruncateStmt *stmt)
 
 		rels = lappend(rels, rel);
 		relids = lappend_oid(relids, myrelid);
+		relids_extra = lappend_int(relids_extra, (recurse ? TRUNCATE_REL_CONTEXT_NORMAL : TRUNCATE_REL_CONTEXT_ONLY));
 		/* Log this relation only if needed for logical decoding */
-		if (RelationIsLogicallyLogged(rel))
+		if (RelationIsLogicallyLogged(rel) && rel->rd_rel->relkind != RELKIND_FOREIGN_TABLE)
 			relids_logged = lappend_oid(relids_logged, myrelid);
 
 		if (recurse)
@@ -1683,8 +1703,9 @@ ExecuteTruncate(TruncateStmt *stmt)
 
 				rels = lappend(rels, rel);
 				relids = lappend_oid(relids, childrelid);
+				relids_extra = lappend_int(relids_extra, TRUNCATE_REL_CONTEXT_CASCADING);
 				/* Log this relation only if needed for logical decoding */
-				if (RelationIsLogicallyLogged(rel))
+				if (RelationIsLogicallyLogged(rel) && rel->rd_rel->relkind != RELKIND_FOREIGN_TABLE)
 					relids_logged = lappend_oid(relids_logged, childrelid);
 			}
 		}
@@ -1695,7 +1716,7 @@ ExecuteTruncate(TruncateStmt *stmt)
 					 errhint("Do not specify the ONLY keyword, or use TRUNCATE ONLY on the partitions directly.")));
 	}
 
-	ExecuteTruncateGuts(rels, relids, relids_logged,
+	ExecuteTruncateGuts(rels, relids, relids_extra, relids_logged,
 						stmt->behavior, stmt->restart_seqs);
 
 	/* And close the rels */
@@ -1721,16 +1742,21 @@ ExecuteTruncate(TruncateStmt *stmt)
  * this information handy in this form.
  */
 void
-ExecuteTruncateGuts(List *explicit_rels, List *relids, List *relids_logged,
+ExecuteTruncateGuts(List *explicit_rels,
+					List *relids,
+					List *relids_extra,
+					List *relids_logged,
 					DropBehavior behavior, bool restart_seqs)
 {
 	List	   *rels;
 	List	   *seq_relids = NIL;
+	HTAB	   *ft_htab = NULL;
 	EState	   *estate;
 	ResultRelInfo *resultRelInfos;
 	ResultRelInfo *resultRelInfo;
 	SubTransactionId mySubid;
 	ListCell   *cell;
+	ListCell   *lc1, *lc2;
 	Oid		   *logrelids;
 
 	/*
@@ -1768,8 +1794,9 @@ ExecuteTruncateGuts(List *explicit_rels, List *relids, List *relids_logged,
 				truncate_check_activity(rel);
 				rels = lappend(rels, rel);
 				relids = lappend_oid(relids, relid);
+				relids_extra = lappend_int(relids_extra, TRUNCATE_REL_CONTEXT_CASCADING);
 				/* Log this relation only if needed for logical decoding */
-				if (RelationIsLogicallyLogged(rel))
+				if (RelationIsLogicallyLogged(rel) && rel->rd_rel->relkind != RELKIND_FOREIGN_TABLE)
 					relids_logged = lappend_oid(relids_logged, relid);
 			}
 		}
@@ -1868,14 +1895,68 @@ ExecuteTruncateGuts(List *explicit_rels, List *relids, List *relids_logged,
 	 */
 	mySubid = GetCurrentSubTransactionId();
 
-	foreach(cell, rels)
+	Assert(list_length(rels) == list_length(relids_extra));
+	forboth (lc1, rels,
+			 lc2, relids_extra)
 	{
-		Relation	rel = (Relation) lfirst(cell);
+		Relation	rel = (Relation) lfirst(lc1);
+		int			extra = lfirst_int(lc2);
 
 		/* Skip partitioned tables as there is nothing to do */
 		if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
 			continue;
 
+		/*
+		 * If truncating a foreign table, the foreign data wrapper callback
+		 * for TRUNCATE is called once for each server with a list of all the
+		 * relations to process linked to this server.  The list of relations
+		 * for each server is saved as a single entry in a hash table that
+		 * uses the server OID as lookup key.  Once the full set of lists is
+		 * built, all the entries of the hash table are scanned, and the list
+		 * of relations associated with the server is passed down to the
+		 * TRUNCATE callback of its foreign data wrapper.
+		 */
+		if (rel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
+		{
+			Oid			frel_oid = RelationGetRelid(rel);
+			Oid			server_oid = GetForeignServerIdByRelId(frel_oid);
+			bool		found;
+			ForeignTruncateInfo *ft_info;
+
+			/* if the hash table does not exist yet, initialize it */
+			if (!ft_htab)
+			{
+				HASHCTL		hctl;
+
+				memset(&hctl, 0, sizeof(HASHCTL));
+				hctl.keysize = sizeof(Oid);
+				hctl.entrysize = sizeof(ForeignTruncateInfo);
+				hctl.hcxt = CurrentMemoryContext;
+
+				ft_htab = hash_create("TRUNCATE for Foreign Tables",
+									  32,	/* start small and extend */
+									  &hctl,
+									  HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
+			}
+
+			/*
+			 * Look after the entry of the server in the hash table, and
+			 * initialize it if the entry does not exist yet.
+			 */
+			ft_info = hash_search(ft_htab, &server_oid, HASH_ENTER, &found);
+			if (!found)
+			{
+				ft_info->server_oid = server_oid;
+				ft_info->frels_list = NIL;
+				ft_info->frels_extra = NIL;
+			}
+
+			/* save the relation in the list */
+			ft_info->frels_list = lappend(ft_info->frels_list, rel);
+			ft_info->frels_extra = lappend_int(ft_info->frels_extra, extra);
+			continue;
+		}
+
 		/*
 		 * Normally, we need a transaction-safe truncation here.  However, if
 		 * the table was either created in the current (sub)transaction or has
@@ -1938,6 +2019,39 @@ ExecuteTruncateGuts(List *explicit_rels, List *relids, List *relids_logged,
 		pgstat_count_truncate(rel);
 	}
 
+	/*
+	 * Now go through the hash table, and process each entry associated to the
+	 * servers involved in the TRUNCATE.
+	 */
+	if (ft_htab)
+	{
+		ForeignTruncateInfo *ft_info;
+		HASH_SEQ_STATUS seq;
+
+		hash_seq_init(&seq, ft_htab);
+
+		PG_TRY();
+		{
+			while ((ft_info = hash_seq_search(&seq)) != NULL)
+			{
+				FdwRoutine *routine = GetFdwRoutineByServerId(ft_info->server_oid);
+
+				/* truncate_check_rel() has checked that already */
+				Assert(routine->ExecForeignTruncate != NULL);
+
+				routine->ExecForeignTruncate(ft_info->frels_list,
+											ft_info->frels_extra,
+											behavior,
+											restart_seqs);
+			}
+		}
+		PG_FINALLY();
+		{
+			hash_destroy(ft_htab);
+		}
+		PG_END_TRY();
+	}
+
 	/*
 	 * Restart owned sequences if we were asked to.
 	 */
@@ -2023,12 +2137,24 @@ truncate_check_rel(Oid relid, Form_pg_class reltuple)
 	char	   *relname = NameStr(reltuple->relname);
 
 	/*
-	 * Only allow truncate on regular tables and partitioned tables (although,
-	 * the latter are only being included here for the following checks; no
-	 * physical truncation will occur in their case.)
+	 * Only allow truncate on regular tables, foreign tables using foreign
+	 * data wrappers supporting TRUNCATE and partitioned tables (although, the
+	 * latter are only being included here for the following checks; no
+	 * physical truncation will occur in their case.).
 	 */
-	if (reltuple->relkind != RELKIND_RELATION &&
-		reltuple->relkind != RELKIND_PARTITIONED_TABLE)
+	if (reltuple->relkind == RELKIND_FOREIGN_TABLE)
+	{
+		Oid			server_id = GetForeignServerIdByRelId(relid);
+		FdwRoutine *fdwroutine = GetFdwRoutineByServerId(server_id);
+
+		if (!fdwroutine->ExecForeignTruncate)
+			ereport(ERROR,
+					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+					 errmsg("cannot truncate foreign table \"%s\"",
+							relname)));
+	}
+	else if (reltuple->relkind != RELKIND_RELATION &&
+			 reltuple->relkind != RELKIND_PARTITIONED_TABLE)
 		ereport(ERROR,
 				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
 				 errmsg("\"%s\" is not a table", relname)));
diff --git a/src/backend/replication/logical/worker.c b/src/backend/replication/logical/worker.c
index 74d538b5e3..d145d04850 100644
--- a/src/backend/replication/logical/worker.c
+++ b/src/backend/replication/logical/worker.c
@@ -1795,6 +1795,7 @@ apply_handle_truncate(StringInfo s)
 	List	   *rels = NIL;
 	List	   *part_rels = NIL;
 	List	   *relids = NIL;
+	List	   *relids_extra = NIL;
 	List	   *relids_logged = NIL;
 	ListCell   *lc;
 
@@ -1824,7 +1825,8 @@ apply_handle_truncate(StringInfo s)
 		remote_rels = lappend(remote_rels, rel);
 		rels = lappend(rels, rel->localrel);
 		relids = lappend_oid(relids, rel->localreloid);
-		if (RelationIsLogicallyLogged(rel->localrel))
+		relids_extra = lappend_int(relids_extra, TRUNCATE_REL_CONTEXT_NORMAL);
+		if (RelationIsLogicallyLogged(rel->localrel) && rel->localrel->rd_rel->relkind != RELKIND_FOREIGN_TABLE)
 			relids_logged = lappend_oid(relids_logged, rel->localreloid);
 
 		/*
@@ -1862,8 +1864,9 @@ apply_handle_truncate(StringInfo s)
 				rels = lappend(rels, childrel);
 				part_rels = lappend(part_rels, childrel);
 				relids = lappend_oid(relids, childrelid);
+				relids_extra = lappend_int(relids_extra,TRUNCATE_REL_CONTEXT_CASCADING);
 				/* Log this relation only if needed for logical decoding */
-				if (RelationIsLogicallyLogged(childrel))
+				if (RelationIsLogicallyLogged(childrel) && childrel->rd_rel->relkind != RELKIND_FOREIGN_TABLE)
 					relids_logged = lappend_oid(relids_logged, childrelid);
 			}
 		}
@@ -1874,8 +1877,12 @@ apply_handle_truncate(StringInfo s)
 	 * to replaying changes without further cascading. This might be later
 	 * changeable with a user specified option.
 	 */
-	ExecuteTruncateGuts(rels, relids, relids_logged, DROP_RESTRICT, restart_seqs);
-
+	ExecuteTruncateGuts(rels,
+						relids,
+						relids_extra,
+						relids_logged,
+						DROP_RESTRICT,
+						restart_seqs);
 	foreach(lc, remote_rels)
 	{
 		LogicalRepRelMapEntry *rel = lfirst(lc);
diff --git a/src/include/commands/tablecmds.h b/src/include/commands/tablecmds.h
index b3d30acc35..57763ed734 100644
--- a/src/include/commands/tablecmds.h
+++ b/src/include/commands/tablecmds.h
@@ -55,9 +55,16 @@ extern void AlterRelationNamespaceInternal(Relation classRel, Oid relOid,
 
 extern void CheckTableNotInUse(Relation rel, const char *stmt);
 
+#define TRUNCATE_REL_CONTEXT_NORMAL       0x01
+#define TRUNCATE_REL_CONTEXT_ONLY         0x02
+#define TRUNCATE_REL_CONTEXT_CASCADING     0x04
 extern void ExecuteTruncate(TruncateStmt *stmt);
-extern void ExecuteTruncateGuts(List *explicit_rels, List *relids, List *relids_logged,
-								DropBehavior behavior, bool restart_seqs);
+extern void ExecuteTruncateGuts(List *explicit_rels,
+								List *relids,
+								List *relids_extra,
+								List *relids_logged,
+								DropBehavior behavior,
+								bool restart_seqs);
 
 extern void SetRelationHasSubclass(Oid relationId, bool relhassubclass);
 
diff --git a/src/include/foreign/fdwapi.h b/src/include/foreign/fdwapi.h
index 10d29ff292..205afe5ec3 100644
--- a/src/include/foreign/fdwapi.h
+++ b/src/include/foreign/fdwapi.h
@@ -160,6 +160,11 @@ typedef bool (*AnalyzeForeignTable_function) (Relation relation,
 typedef List *(*ImportForeignSchema_function) (ImportForeignSchemaStmt *stmt,
 											   Oid serverOid);
 
+typedef void (*ExecForeignTruncate_function) (List *frels_list,
+											  List *frels_extra,
+											  DropBehavior behavior,
+											  bool restart_seqs);
+
 typedef Size (*EstimateDSMForeignScan_function) (ForeignScanState *node,
 												 ParallelContext *pcxt);
 typedef void (*InitializeDSMForeignScan_function) (ForeignScanState *node,
@@ -255,6 +260,9 @@ typedef struct FdwRoutine
 	/* Support functions for IMPORT FOREIGN SCHEMA */
 	ImportForeignSchema_function ImportForeignSchema;
 
+	/* Support functions for TRUNCATE */
+	ExecForeignTruncate_function ExecForeignTruncate;
+
 	/* Support functions for parallelism under Gather node */
 	IsForeignScanParallelSafe_function IsForeignScanParallelSafe;
 	EstimateDSMForeignScan_function EstimateDSMForeignScan;
diff --git a/src/test/regress/expected/foreign_data.out b/src/test/regress/expected/foreign_data.out
index e4cdb780d0..5385f98a0f 100644
--- a/src/test/regress/expected/foreign_data.out
+++ b/src/test/regress/expected/foreign_data.out
@@ -1807,9 +1807,9 @@ Inherits: fd_pt1
 
 -- TRUNCATE doesn't work on foreign tables, either directly or recursively
 TRUNCATE ft2;  -- ERROR
-ERROR:  "ft2" is not a table
+ERROR:  foreign-data wrapper "dummy" has no handler
 TRUNCATE fd_pt1;  -- ERROR
-ERROR:  "ft2" is not a table
+ERROR:  foreign-data wrapper "dummy" has no handler
 DROP TABLE fd_pt1 CASCADE;
 NOTICE:  drop cascades to foreign table ft2
 -- IMPORT FOREIGN SCHEMA
@@ -2032,9 +2032,9 @@ ALTER FOREIGN TABLE fd_pt2_1 ADD CONSTRAINT fd_pt2chk1 CHECK (c1 > 0);
 ALTER TABLE fd_pt2 ATTACH PARTITION fd_pt2_1 FOR VALUES IN (1);
 -- TRUNCATE doesn't work on foreign tables, either directly or recursively
 TRUNCATE fd_pt2_1;  -- ERROR
-ERROR:  "fd_pt2_1" is not a table
+ERROR:  foreign-data wrapper "dummy" has no handler
 TRUNCATE fd_pt2;  -- ERROR
-ERROR:  "fd_pt2_1" is not a table
+ERROR:  foreign-data wrapper "dummy" has no handler
 DROP FOREIGN TABLE fd_pt2_1;
 DROP TABLE fd_pt2;
 -- foreign table cannot be part of partition tree made of temporary
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index b26e81dcbf..91da5827b2 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -704,6 +704,7 @@ ForeignScanState
 ForeignServer
 ForeignServerInfo
 ForeignTable
+ForeignTruncateInfo
 ForkNumber
 FormData_pg_aggregate
 FormData_pg_am
#54Bharath Rupireddy
bharath.rupireddyforpostgres@gmail.com
In reply to: Kazutaka Onishi (#53)
Re: TRUNCATE on foreign table

On Tue, Apr 6, 2021 at 10:15 PM Kazutaka Onishi <onishi@heterodb.com> wrote:

I've checked v15 patch with "make check-world" and confirmed this passed.

Thanks for the patch. One minor thing - I think "mixtured" is not the
correct word in "+-- partition table mixtured by table and foreign
table". How about something like "+-- partitioned table with both
local and foreign table as partitions"?

The v15 patch basically looks good to me. I have no more comments.

CF entry https://commitfest.postgresql.org/32/2972/ still says it's
"waiting on author", do you want to change it to "needs review" if you
have no open points left so that others can take a look at it?

With Regards,
Bharath Rupireddy.
EnterpriseDB: http://www.enterprisedb.com

#55Kazutaka Onishi
onishi@heterodb.com
In reply to: Bharath Rupireddy (#54)
1 attachment(s)
Re: TRUNCATE on foreign table

One minor thing - I think "mixtured" is not the
correct word in "+-- partition table mixtured by table and foreign
table". How about something like "+-- partitioned table with both
local and foreign table as partitions"?

Sure. I've fixed this.

The v15 patch basically looks good to me. I have no more comments.

Thank you for checking this many times.

CF entry https://commitfest.postgresql.org/32/2972/ still says it's
"waiting on author", do you want to change it to "needs review" if you
have no open points left so that others can take a look at it?

Yes, please.

2021年4月7日(水) 10:15 Bharath Rupireddy <bharath.rupireddyforpostgres@gmail.com>:

Show quoted text

On Tue, Apr 6, 2021 at 10:15 PM Kazutaka Onishi <onishi@heterodb.com> wrote:

I've checked v15 patch with "make check-world" and confirmed this passed.

Thanks for the patch. One minor thing - I think "mixtured" is not the
correct word in "+-- partition table mixtured by table and foreign
table". How about something like "+-- partitioned table with both
local and foreign table as partitions"?

The v15 patch basically looks good to me. I have no more comments.

CF entry https://commitfest.postgresql.org/32/2972/ still says it's
"waiting on author", do you want to change it to "needs review" if you
have no open points left so that others can take a look at it?

With Regards,
Bharath Rupireddy.
EnterpriseDB: http://www.enterprisedb.com

Attachments:

pgsql14-truncate-on-foreign-table.v16.patchapplication/octet-stream; name=pgsql14-truncate-on-foreign-table.v16.patchDownload
diff --git a/contrib/postgres_fdw/connection.c b/contrib/postgres_fdw/connection.c
index 6a61d83862..82aa14a65d 100644
--- a/contrib/postgres_fdw/connection.c
+++ b/contrib/postgres_fdw/connection.c
@@ -92,7 +92,6 @@ static PGconn *connect_pg_server(ForeignServer *server, UserMapping *user);
 static void disconnect_pg_server(ConnCacheEntry *entry);
 static void check_conn_params(const char **keywords, const char **values, UserMapping *user);
 static void configure_remote_session(PGconn *conn);
-static void do_sql_command(PGconn *conn, const char *sql);
 static void begin_remote_xact(ConnCacheEntry *entry);
 static void pgfdw_xact_callback(XactEvent event, void *arg);
 static void pgfdw_subxact_callback(SubXactEvent event,
@@ -568,7 +567,7 @@ configure_remote_session(PGconn *conn)
 /*
  * Convenience subroutine to issue a non-data-returning SQL command to remote
  */
-static void
+void
 do_sql_command(PGconn *conn, const char *sql)
 {
 	PGresult   *res;
diff --git a/contrib/postgres_fdw/deparse.c b/contrib/postgres_fdw/deparse.c
index 5aa3455e30..b9e4e471c1 100644
--- a/contrib/postgres_fdw/deparse.c
+++ b/contrib/postgres_fdw/deparse.c
@@ -56,6 +56,7 @@
 #include "utils/rel.h"
 #include "utils/syscache.h"
 #include "utils/typcache.h"
+#include "commands/tablecmds.h"
 
 /*
  * Global context for foreign_expr_walker's search of an expression tree.
@@ -2172,6 +2173,43 @@ deparseAnalyzeSql(StringInfo buf, Relation rel, List **retrieved_attrs)
 	deparseRelation(buf, rel);
 }
 
+/*
+ * Construct a simple "TRUNCATE rel" statement
+ */
+void
+deparseTruncateSql(StringInfo buf,
+				   List *frels_list,
+				   List *frels_extra,
+				   DropBehavior behavior,
+				   bool restart_seqs)
+{
+	ListCell   *lc1, *lc2;
+
+	appendStringInfoString(buf, "TRUNCATE ");
+
+	forboth (lc1, frels_list,
+			 lc2, frels_extra)
+	{
+		Relation	frel = lfirst(lc1);
+		int			extra = lfirst_int(lc2);
+
+		if (lc1 != list_head(frels_list))
+			appendStringInfoString(buf, ", ");
+		if (extra & TRUNCATE_REL_CONTEXT_ONLY)
+			appendStringInfoString(buf, "ONLY ");
+
+		deparseRelation(buf, frel);
+	}
+
+	appendStringInfo(buf, " %s IDENTITY",
+					 restart_seqs ? "RESTART" : "CONTINUE");
+
+	if (behavior==DROP_RESTRICT)
+		appendStringInfoString(buf, " RESTRICT");
+	else if(behavior==DROP_CASCADE)
+		appendStringInfoString(buf, " CASCADE");
+}
+
 /*
  * Construct name to use for given column, and emit it into buf.
  * If it has a column_name FDW option, use that instead of attribute name.
diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out
index 59e4e27ffb..b1388a799c 100644
--- a/contrib/postgres_fdw/expected/postgres_fdw.out
+++ b/contrib/postgres_fdw/expected/postgres_fdw.out
@@ -8215,6 +8215,205 @@ select * from rem3;
 drop foreign table rem3;
 drop table loc3;
 -- ===================================================================
+-- test for TRUNCATE
+-- ===================================================================
+CREATE TABLE tru_rtable0 (id int primary key);
+CREATE TABLE tru_rtable1 (id int primary key);
+CREATE FOREIGN TABLE tru_ftable (id int)
+       SERVER loopback OPTIONS (table_name 'tru_rtable0');
+INSERT INTO tru_rtable0 (SELECT x FROM generate_series(1,10) x);
+CREATE TABLE tru_ptable (id int) PARTITION BY HASH(id);
+CREATE TABLE tru_ptable__p0 PARTITION OF tru_ptable
+                            FOR VALUES WITH (MODULUS 2, REMAINDER 0);
+CREATE FOREIGN TABLE tru_ftable__p1 PARTITION OF tru_ptable
+                                    FOR VALUES WITH (MODULUS 2, REMAINDER 1)
+       SERVER loopback OPTIONS (table_name 'tru_rtable1');
+INSERT INTO tru_ptable (SELECT x FROM generate_series(11,20) x);
+CREATE TABLE tru_pk_table(id int primary key);
+CREATE TABLE tru_fk_table(fkey int references tru_pk_table(id));
+INSERT INTO tru_pk_table (SELECT x FROM generate_series(1,10) x);
+INSERT INTO tru_fk_table (SELECT x % 10 + 1 FROM generate_series(5,25) x);
+CREATE FOREIGN TABLE tru_pk_ftable (id int)
+       SERVER loopback OPTIONS (table_name 'tru_pk_table');
+CREATE TABLE tru_rtable_parent (id int);
+CREATE TABLE tru_rtable_child (id int);
+CREATE FOREIGN TABLE tru_ftable_parent (id int)
+       SERVER loopback OPTIONS (table_name 'tru_rtable_parent');
+CREATE FOREIGN TABLE tru_ftable_child () INHERITS (tru_ftable_parent)
+       SERVER loopback OPTIONS (table_name 'tru_rtable_child');
+INSERT INTO tru_rtable_parent (SELECT x FROM generate_series(1,8) x);
+INSERT INTO tru_rtable_child  (SELECT x FROM generate_series(10, 18) x);
+-- normal truncate
+SELECT sum(id) FROM tru_ftable;        -- 55
+ sum 
+-----
+  55
+(1 row)
+
+TRUNCATE tru_ftable;
+SELECT count(*) FROM tru_rtable0;		-- 0
+ count 
+-------
+     0
+(1 row)
+
+SELECT count(*) FROM tru_ftable;		-- 0
+ count 
+-------
+     0
+(1 row)
+
+-- 'truncatable' option
+ALTER SERVER loopback OPTIONS (ADD truncatable 'false');
+TRUNCATE tru_ftable;			-- error
+ERROR:  truncate on "tru_ftable" is prohibited
+ALTER FOREIGN TABLE tru_ftable OPTIONS (ADD truncatable 'true');
+TRUNCATE tru_ftable;			-- accepted
+ALTER FOREIGN TABLE tru_ftable OPTIONS (SET truncatable 'false');
+TRUNCATE tru_ftable;			-- error
+ERROR:  truncate on "tru_ftable" is prohibited
+ALTER SERVER loopback OPTIONS (DROP truncatable);
+ALTER FOREIGN TABLE tru_ftable OPTIONS (SET truncatable 'false');
+TRUNCATE tru_ftable;			-- error
+ERROR:  truncate on "tru_ftable" is prohibited
+ALTER FOREIGN TABLE tru_ftable OPTIONS (SET truncatable 'true');
+TRUNCATE tru_ftable;			-- accepted
+-- partitioned table with both local and foreign tables as partitions
+SELECT sum(id) FROM tru_ptable;        -- 155
+ sum 
+-----
+ 155
+(1 row)
+
+TRUNCATE tru_ptable;
+SELECT count(*) FROM tru_ptable;		-- 0
+ count 
+-------
+     0
+(1 row)
+
+SELECT count(*) FROM tru_ptable__p0;	-- 0
+ count 
+-------
+     0
+(1 row)
+
+SELECT count(*) FROM tru_ftable__p1;	-- 0
+ count 
+-------
+     0
+(1 row)
+
+SELECT count(*) FROM tru_rtable1;		-- 0
+ count 
+-------
+     0
+(1 row)
+
+-- 'CASCADE' option
+SELECT sum(id) FROM tru_pk_ftable;      -- 55
+ sum 
+-----
+  55
+(1 row)
+
+TRUNCATE tru_pk_ftable;	-- failed by FK reference
+ERROR:  cannot truncate a table referenced in a foreign key constraint
+DETAIL:  Table "tru_fk_table" references "tru_pk_table".
+HINT:  Truncate table "tru_fk_table" at the same time, or use TRUNCATE ... CASCADE.
+CONTEXT:  remote SQL command: TRUNCATE public.tru_pk_table CONTINUE IDENTITY RESTRICT
+TRUNCATE tru_pk_ftable CASCADE;
+SELECT count(*) FROM tru_pk_ftable;    -- 0
+ count 
+-------
+     0
+(1 row)
+
+SELECT count(*) FROM tru_fk_table;		-- also truncated,0
+ count 
+-------
+     0
+(1 row)
+
+-- truncate two tables at a command
+INSERT INTO tru_ftable (SELECT x FROM generate_series(1,8) x);
+INSERT INTO tru_pk_ftable (SELECT x FROM generate_series(3,10) x);
+SELECT count(*) from tru_ftable; -- 8
+ count 
+-------
+     8
+(1 row)
+
+SELECT count(*) from tru_pk_ftable; -- 8
+ count 
+-------
+     8
+(1 row)
+
+TRUNCATE tru_ftable, tru_pk_ftable CASCADE;
+SELECT count(*) from tru_ftable; -- 0
+ count 
+-------
+     0
+(1 row)
+
+SELECT count(*) from tru_pk_ftable; -- 0
+ count 
+-------
+     0
+(1 row)
+
+-- truncate with ONLY clause
+TRUNCATE ONLY tru_ftable_parent;
+SELECT sum(id) FROM tru_ftable_parent;  -- 126
+ sum 
+-----
+ 126
+(1 row)
+
+TRUNCATE tru_ftable_parent;
+SELECT count(*) FROM tru_ftable_parent; -- 0
+ count 
+-------
+     0
+(1 row)
+
+-- in case when remote table has inherited children
+CREATE TABLE tru_rtable0_child () INHERITS (tru_rtable0);
+INSERT INTO tru_rtable0 (SELECT x FROM generate_series(5,9) x);
+INSERT INTO tru_rtable0_child (SELECT x FROM generate_series(10,14) x);
+SELECT sum(id) FROM tru_ftable;   -- 95
+ sum 
+-----
+  95
+(1 row)
+
+TRUNCATE ONLY tru_ftable;		-- truncate only parent portion
+SELECT sum(id) FROM tru_ftable;   -- 60
+ sum 
+-----
+  60
+(1 row)
+
+INSERT INTO tru_rtable0 (SELECT x FROM generate_series(21,25) x);
+SELECT sum(id) FROM tru_ftable;		-- 175
+ sum 
+-----
+ 175
+(1 row)
+
+TRUNCATE tru_ftable;			-- truncate both of parent and child
+SELECT count(*) FROM tru_ftable;    -- empty
+ count 
+-------
+     0
+(1 row)
+
+-- cleanup
+DROP FOREIGN TABLE tru_ftable_parent, tru_ftable_child, tru_pk_ftable,tru_ftable__p1,tru_ftable;
+DROP TABLE tru_rtable0, tru_rtable1, tru_ptable, tru_ptable__p0, tru_pk_table, tru_fk_table,
+tru_rtable_parent,tru_rtable_child, tru_rtable0_child;
+-- ===================================================================
 -- test IMPORT FOREIGN SCHEMA
 -- ===================================================================
 CREATE SCHEMA import_source;
@@ -8913,7 +9112,7 @@ DO $d$
     END;
 $d$;
 ERROR:  invalid option "password"
-HINT:  Valid options in this context are: service, passfile, channel_binding, connect_timeout, dbname, host, hostaddr, port, options, application_name, keepalives, keepalives_idle, keepalives_interval, keepalives_count, tcp_user_timeout, sslmode, sslcompression, sslcert, sslkey, sslrootcert, sslcrl, sslcrldir, requirepeer, ssl_min_protocol_version, ssl_max_protocol_version, gssencmode, krbsrvname, gsslib, target_session_attrs, use_remote_estimate, fdw_startup_cost, fdw_tuple_cost, extensions, updatable, fetch_size, batch_size, async_capable, keep_connections
+HINT:  Valid options in this context are: service, passfile, channel_binding, connect_timeout, dbname, host, hostaddr, port, options, application_name, keepalives, keepalives_idle, keepalives_interval, keepalives_count, tcp_user_timeout, sslmode, sslcompression, sslcert, sslkey, sslrootcert, sslcrl, sslcrldir, requirepeer, ssl_min_protocol_version, ssl_max_protocol_version, gssencmode, krbsrvname, gsslib, target_session_attrs, use_remote_estimate, fdw_startup_cost, fdw_tuple_cost, extensions, updatable, truncatable, fetch_size, batch_size, async_capable, keep_connections
 CONTEXT:  SQL statement "ALTER SERVER loopback_nopw OPTIONS (ADD password 'dummypw')"
 PL/pgSQL function inline_code_block line 3 at EXECUTE
 -- If we add a password for our user mapping instead, we should get a different
diff --git a/contrib/postgres_fdw/option.c b/contrib/postgres_fdw/option.c
index f1d0c8bd41..fc196fabc0 100644
--- a/contrib/postgres_fdw/option.c
+++ b/contrib/postgres_fdw/option.c
@@ -109,6 +109,7 @@ postgres_fdw_validator(PG_FUNCTION_ARGS)
 		if (strcmp(def->defname, "use_remote_estimate") == 0 ||
 			strcmp(def->defname, "updatable") == 0 ||
 			strcmp(def->defname, "async_capable") == 0 ||
+			strcmp(def->defname, "truncatable") == 0 ||
 			strcmp(def->defname, "keep_connections") == 0)
 		{
 			/* these accept only boolean values */
@@ -213,6 +214,9 @@ InitPgFdwOptions(void)
 		/* updatable is available on both server and table */
 		{"updatable", ForeignServerRelationId, false},
 		{"updatable", ForeignTableRelationId, false},
+		/* truncatable is available on both server and table */
+		{"truncatable", ForeignServerRelationId, false},
+		{"truncatable", ForeignTableRelationId, false},
 		/* fetch_size is available on both server and table */
 		{"fetch_size", ForeignServerRelationId, false},
 		{"fetch_size", ForeignTableRelationId, false},
diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c
index 16c2979f2d..f2b7554b26 100644
--- a/contrib/postgres_fdw/postgres_fdw.c
+++ b/contrib/postgres_fdw/postgres_fdw.c
@@ -400,6 +400,10 @@ static void postgresExplainForeignModify(ModifyTableState *mtstate,
 										 ExplainState *es);
 static void postgresExplainDirectModify(ForeignScanState *node,
 										ExplainState *es);
+static void postgresExecForeignTruncate(List *frels_list,
+										List *frels_extra,
+										DropBehavior behavior,
+										bool restart_seqs);
 static bool postgresAnalyzeForeignTable(Relation relation,
 										AcquireSampleRowsFunc *func,
 										BlockNumber *totalpages);
@@ -588,6 +592,9 @@ postgres_fdw_handler(PG_FUNCTION_ARGS)
 	routine->ExplainForeignModify = postgresExplainForeignModify;
 	routine->ExplainDirectModify = postgresExplainDirectModify;
 
+	/* Support function for TRUNCATE */
+	routine->ExecForeignTruncate = postgresExecForeignTruncate;
+
 	/* Support functions for ANALYZE */
 	routine->AnalyzeForeignTable = postgresAnalyzeForeignTable;
 
@@ -2868,6 +2875,89 @@ postgresExplainDirectModify(ForeignScanState *node, ExplainState *es)
 	}
 }
 
+/*
+ * postgresExecForeignTruncate
+ *		It propagates TRUNCATE command to the remote host inside of the
+ *		transaction block.
+ */
+static void
+postgresExecForeignTruncate(List *frels_list,
+							List *frels_extra,
+							DropBehavior behavior,
+							bool restart_seqs)
+{
+	Oid				server_id = InvalidOid;
+	UserMapping	   *user = NULL;
+	PGconn		   *conn = NULL;
+	StringInfoData	sql;
+	ListCell	   *lc;
+	bool			server_truncatable = true;
+
+	/*
+	 * By default, all postgres_fdw foreign tables are assumed truncatable. This
+	 * can be overridden by a per-server setting, which in turn can be
+	 * overridden by a per-table setting.
+	 */
+	foreach (lc, frels_list)
+	{
+		ForeignServer  *serv = NULL;
+		Relation	frel = lfirst(lc);
+		Oid			frel_oid = RelationGetRelid(frel);
+		ForeignTable *ft = GetForeignTable(frel_oid);
+		ListCell   *cell;
+		bool		truncatable;
+
+		if (!OidIsValid(server_id))
+		{
+			/* This is executed only once per server */
+			server_id = GetForeignServerIdByRelId(frel_oid);
+			serv = GetForeignServer(server_id);
+
+			foreach (cell, serv->options)
+			{
+				DefElem	   *defel = (DefElem *) lfirst(cell);
+
+				if (strcmp(defel->defname, "truncatable") == 0)
+				{
+					server_truncatable = defGetBoolean(defel);
+					break;
+				}
+			}
+		}
+
+		/* ensure the target foreign table is truncatable */
+		truncatable = server_truncatable;
+		foreach (cell, ft->options)
+		{
+			DefElem	   *defel = (DefElem *) lfirst(cell);
+
+			if (strcmp(defel->defname, "truncatable") == 0)
+			{
+				truncatable = defGetBoolean(defel);
+				break;
+			}
+		}
+
+		if (!truncatable)
+			ereport(ERROR,
+					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+					 errmsg("truncate on \"%s\" is prohibited",
+							RelationGetRelationName(frel))));
+	}
+
+	Assert(OidIsValid(server_id));
+	user = GetUserMapping(GetUserId(), server_id);
+	conn = GetConnection(user, false, NULL);
+
+	/* set up remote query */
+	initStringInfo(&sql);
+	deparseTruncateSql(&sql, frels_list, frels_extra, behavior, restart_seqs);
+
+	/* run remote query */
+	do_sql_command(conn,sql.data);
+
+	pfree(sql.data);
+}
 
 /*
  * estimate_path_cost_size
diff --git a/contrib/postgres_fdw/postgres_fdw.h b/contrib/postgres_fdw/postgres_fdw.h
index 88d94da6f6..62e779d818 100644
--- a/contrib/postgres_fdw/postgres_fdw.h
+++ b/contrib/postgres_fdw/postgres_fdw.h
@@ -206,6 +206,11 @@ extern void deparseDirectDeleteSql(StringInfo buf, PlannerInfo *root,
 extern void deparseAnalyzeSizeSql(StringInfo buf, Relation rel);
 extern void deparseAnalyzeSql(StringInfo buf, Relation rel,
 							  List **retrieved_attrs);
+extern void deparseTruncateSql(StringInfo buf,
+							   List *frels_list,
+							   List *frels_extra,
+							   DropBehavior behavior,
+							   bool restart_seqs);
 extern void deparseStringLiteral(StringInfo buf, const char *val);
 extern Expr *find_em_expr_for_rel(EquivalenceClass *ec, RelOptInfo *rel);
 extern Expr *find_em_expr_for_input_target(PlannerInfo *root,
@@ -219,6 +224,7 @@ extern void deparseSelectStmtForRel(StringInfo buf, PlannerInfo *root,
 									bool is_subquery,
 									List **retrieved_attrs, List **params_list);
 extern const char *get_jointype_name(JoinType jointype);
+void do_sql_command(PGconn *conn, const char *sql);
 
 /* in shippable.c */
 extern bool is_builtin(Oid objectId);
diff --git a/contrib/postgres_fdw/sql/postgres_fdw.sql b/contrib/postgres_fdw/sql/postgres_fdw.sql
index 107d1c0e03..eb5e7e3bd6 100644
--- a/contrib/postgres_fdw/sql/postgres_fdw.sql
+++ b/contrib/postgres_fdw/sql/postgres_fdw.sql
@@ -2351,6 +2351,107 @@ select * from rem3;
 drop foreign table rem3;
 drop table loc3;
 
+-- ===================================================================
+-- test for TRUNCATE
+-- ===================================================================
+CREATE TABLE tru_rtable0 (id int primary key);
+CREATE TABLE tru_rtable1 (id int primary key);
+CREATE FOREIGN TABLE tru_ftable (id int)
+       SERVER loopback OPTIONS (table_name 'tru_rtable0');
+INSERT INTO tru_rtable0 (SELECT x FROM generate_series(1,10) x);
+
+CREATE TABLE tru_ptable (id int) PARTITION BY HASH(id);
+CREATE TABLE tru_ptable__p0 PARTITION OF tru_ptable
+                            FOR VALUES WITH (MODULUS 2, REMAINDER 0);
+CREATE FOREIGN TABLE tru_ftable__p1 PARTITION OF tru_ptable
+                                    FOR VALUES WITH (MODULUS 2, REMAINDER 1)
+       SERVER loopback OPTIONS (table_name 'tru_rtable1');
+INSERT INTO tru_ptable (SELECT x FROM generate_series(11,20) x);
+
+CREATE TABLE tru_pk_table(id int primary key);
+CREATE TABLE tru_fk_table(fkey int references tru_pk_table(id));
+INSERT INTO tru_pk_table (SELECT x FROM generate_series(1,10) x);
+INSERT INTO tru_fk_table (SELECT x % 10 + 1 FROM generate_series(5,25) x);
+CREATE FOREIGN TABLE tru_pk_ftable (id int)
+       SERVER loopback OPTIONS (table_name 'tru_pk_table');
+
+CREATE TABLE tru_rtable_parent (id int);
+CREATE TABLE tru_rtable_child (id int);
+CREATE FOREIGN TABLE tru_ftable_parent (id int)
+       SERVER loopback OPTIONS (table_name 'tru_rtable_parent');
+CREATE FOREIGN TABLE tru_ftable_child () INHERITS (tru_ftable_parent)
+       SERVER loopback OPTIONS (table_name 'tru_rtable_child');
+INSERT INTO tru_rtable_parent (SELECT x FROM generate_series(1,8) x);
+INSERT INTO tru_rtable_child  (SELECT x FROM generate_series(10, 18) x);
+
+-- normal truncate
+SELECT sum(id) FROM tru_ftable;        -- 55
+TRUNCATE tru_ftable;
+SELECT count(*) FROM tru_rtable0;		-- 0
+SELECT count(*) FROM tru_ftable;		-- 0
+
+-- 'truncatable' option
+ALTER SERVER loopback OPTIONS (ADD truncatable 'false');
+TRUNCATE tru_ftable;			-- error
+ALTER FOREIGN TABLE tru_ftable OPTIONS (ADD truncatable 'true');
+TRUNCATE tru_ftable;			-- accepted
+ALTER FOREIGN TABLE tru_ftable OPTIONS (SET truncatable 'false');
+TRUNCATE tru_ftable;			-- error
+ALTER SERVER loopback OPTIONS (DROP truncatable);
+ALTER FOREIGN TABLE tru_ftable OPTIONS (SET truncatable 'false');
+TRUNCATE tru_ftable;			-- error
+ALTER FOREIGN TABLE tru_ftable OPTIONS (SET truncatable 'true');
+TRUNCATE tru_ftable;			-- accepted
+
+-- partitioned table with both local and foreign tables as partitions
+SELECT sum(id) FROM tru_ptable;        -- 155
+TRUNCATE tru_ptable;
+SELECT count(*) FROM tru_ptable;		-- 0
+SELECT count(*) FROM tru_ptable__p0;	-- 0
+SELECT count(*) FROM tru_ftable__p1;	-- 0
+SELECT count(*) FROM tru_rtable1;		-- 0
+
+-- 'CASCADE' option
+SELECT sum(id) FROM tru_pk_ftable;      -- 55
+TRUNCATE tru_pk_ftable;	-- failed by FK reference
+TRUNCATE tru_pk_ftable CASCADE;
+SELECT count(*) FROM tru_pk_ftable;    -- 0
+SELECT count(*) FROM tru_fk_table;		-- also truncated,0
+
+-- truncate two tables at a command
+INSERT INTO tru_ftable (SELECT x FROM generate_series(1,8) x);
+INSERT INTO tru_pk_ftable (SELECT x FROM generate_series(3,10) x);
+SELECT count(*) from tru_ftable; -- 8
+SELECT count(*) from tru_pk_ftable; -- 8
+TRUNCATE tru_ftable, tru_pk_ftable CASCADE;
+SELECT count(*) from tru_ftable; -- 0
+SELECT count(*) from tru_pk_ftable; -- 0
+
+-- truncate with ONLY clause
+TRUNCATE ONLY tru_ftable_parent;
+SELECT sum(id) FROM tru_ftable_parent;  -- 126
+TRUNCATE tru_ftable_parent;
+SELECT count(*) FROM tru_ftable_parent; -- 0
+
+-- in case when remote table has inherited children
+CREATE TABLE tru_rtable0_child () INHERITS (tru_rtable0);
+INSERT INTO tru_rtable0 (SELECT x FROM generate_series(5,9) x);
+INSERT INTO tru_rtable0_child (SELECT x FROM generate_series(10,14) x);
+SELECT sum(id) FROM tru_ftable;   -- 95
+
+TRUNCATE ONLY tru_ftable;		-- truncate only parent portion
+SELECT sum(id) FROM tru_ftable;   -- 60
+
+INSERT INTO tru_rtable0 (SELECT x FROM generate_series(21,25) x);
+SELECT sum(id) FROM tru_ftable;		-- 175
+TRUNCATE tru_ftable;			-- truncate both of parent and child
+SELECT count(*) FROM tru_ftable;    -- empty
+
+-- cleanup
+DROP FOREIGN TABLE tru_ftable_parent, tru_ftable_child, tru_pk_ftable,tru_ftable__p1,tru_ftable;
+DROP TABLE tru_rtable0, tru_rtable1, tru_ptable, tru_ptable__p0, tru_pk_table, tru_fk_table,
+tru_rtable_parent,tru_rtable_child, tru_rtable0_child;
+
 -- ===================================================================
 -- test IMPORT FOREIGN SCHEMA
 -- ===================================================================
diff --git a/doc/src/sgml/fdwhandler.sgml b/doc/src/sgml/fdwhandler.sgml
index 0f2397df49..66605b7361 100644
--- a/doc/src/sgml/fdwhandler.sgml
+++ b/doc/src/sgml/fdwhandler.sgml
@@ -1065,6 +1065,50 @@ EndDirectModify(ForeignScanState *node);
 
    </sect2>
 
+   <sect2 id="fdw-callbacks-truncate">
+    <title>FDW Routines for Truncate</title>
+<programlisting>
+void
+ExecForeignTruncate(List *frels_list, List *frels_extra,
+                    DropBehavior behavior, bool restart_seqs);
+</programlisting>
+    <para>
+     Truncate a set of foreign tables defined by
+     <literal>frels_list</literal> belonging to the same foreign server.
+     This optional function is called during execution of
+     <command>TRUNCATE</command> for each foreign server involved
+     in one <command>TRUNCATE</command> command (note that invocations
+     are not per foreign table).
+
+     <literal>frels_extra</literal> delivers extra information about
+     the context where the foreign tables are truncated. It is a list of integers and has same length with
+     <literal>frels_list</literal>. <literal>TRUNCATE_REL_CONTEXT_NORMAL</literal> means that
+     the foreign table is specified WITHOUT "ONLY" clause, and <literal>TRUNCATE_REL_CONTEXT_ONLY</literal> means
+     specified WITH "ONLY" clause. <literal>TRUNCATE_REL_CONTEXT_CASCADING</literal> 
+     values means that foreign tables are not specified in the <command>TRUNCATE</command>, 
+     but truncated due to dependency(like partition's child leaf).
+    </para>
+
+    <para>
+     If the <function>ExecForeignTruncate</function> pointer is set to
+     <literal>NULL</literal>, attempts to truncate the foreign table will
+     fail with an error message.
+    </para>
+    
+    <para>
+     <literal>behavior</literal> defines how foreign tables should
+     be truncated, using as possible values <literal>DROP_RESTRICT</literal>
+     and <literal>DROP_CASCADE</literal> (to map with the equivalents of
+     <command>TRUNCATE</command>).
+    </para>
+    
+    <para>
+     <literal>restart_seqs</literal> is set to <literal>true</literal>
+     if <literal>RESTART IDENTITY</literal> was supplied in the
+     <command>TRUNCATE</command>.
+    </para>
+   </sect2>
+
    <sect2 id="fdw-callbacks-row-locking">
     <title>FDW Routines for Row Locking</title>
 
diff --git a/doc/src/sgml/postgres-fdw.sgml b/doc/src/sgml/postgres-fdw.sgml
index a7c695b000..bc364a738e 100644
--- a/doc/src/sgml/postgres-fdw.sgml
+++ b/doc/src/sgml/postgres-fdw.sgml
@@ -63,9 +63,10 @@
  <para>
   Now you need only <command>SELECT</command> from a foreign table to access
   the data stored in its underlying remote table.  You can also modify
-  the remote table using <command>INSERT</command>, <command>UPDATE</command>, or
-  <command>DELETE</command>.  (Of course, the remote user you have specified
-  in your user mapping must have privileges to do these things.)
+  the remote table using <command>INSERT</command>, <command>UPDATE</command>,
+  <command>DELETE</command>, or <command>TRUNCATE</command>.
+  (Of course, the remote user you have specified in your user mapping must
+  have privileges to do these things.)
  </para>
 
  <para>
@@ -436,6 +437,42 @@ OPTIONS (ADD password_required 'false');
    </variablelist>
   </sect3>
 
+  <sect3>
+   <title>Truncatability Options</title>
+
+   <para>
+    By default all foreign tables using <filename>postgres_fdw</filename> are assumed
+    to be truncatable.  This may be overridden using the following options:
+   </para>
+
+   <variablelist>
+
+    <varlistentry>
+     <term><literal>truncatable</literal></term>
+     <listitem>
+      <para>
+       This option controls whether <filename>postgres_fdw</filename> allows
+       foreign tables to be truncated using <command>TRUNCATE</command>
+       command. It can be specified for a foreign table or a foreign server.
+       A table-level option overrides a server-level option.
+       The default is <literal>true</literal>.
+      </para>
+      <para>
+       Pay attention for the case when a foreign table maps remote table
+       that has inherited children or partition leafs.
+       <command>TRUNCATE</command> specifies the foreign tables with
+       <literal>ONLY</literal> clause, remove queries over the
+       <filename>postgres_fdw</filename> also specify remote tables with
+       <literal>ONLY</literal> clause, that will truncate only parent
+       portion of the remote table. In the results, it looks like
+       <command>TRUNCATE</command> command partially eliminated contents
+       of the foreign tables.
+      </para>
+     </listitem>
+    </varlistentry>
+   </variablelist>
+  </sect3>
+
   <sect3>
    <title>Importing Options</title>
 
diff --git a/doc/src/sgml/ref/truncate.sgml b/doc/src/sgml/ref/truncate.sgml
index 91cdac5562..7899106ba1 100644
--- a/doc/src/sgml/ref/truncate.sgml
+++ b/doc/src/sgml/ref/truncate.sgml
@@ -172,9 +172,8 @@ TRUNCATE [ TABLE ] [ ONLY ] <replaceable class="parameter">name</replaceable> [
   </para>
 
   <para>
-   <command>TRUNCATE</command> is not currently supported for foreign tables.
-   This implies that if a specified table has any descendant tables that are
-   foreign, the command will fail.
+    <command>TRUNCATE</command> can be used for foreign tables if
+     the foreign data wrapper supports, for instance, see <xref linkend="postgres-fdw"/>.
   </para>
  </refsect1>
 
diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c
index 87f9bdaef0..31a040e25d 100644
--- a/src/backend/commands/tablecmds.c
+++ b/src/backend/commands/tablecmds.c
@@ -59,6 +59,7 @@
 #include "commands/typecmds.h"
 #include "commands/user.h"
 #include "executor/executor.h"
+#include "foreign/fdwapi.h"
 #include "foreign/foreign.h"
 #include "miscadmin.h"
 #include "nodes/makefuncs.h"
@@ -310,6 +311,21 @@ struct DropRelationCallbackState
 #define		ATT_FOREIGN_TABLE		0x0020
 #define		ATT_PARTITIONED_INDEX	0x0040
 
+/*
+ * ForeignTruncateInfo
+ *
+ * Information related to truncation of foreign tables.  This is used for
+ * the elements in a hash table. It uses the server OID as lookup key,
+ * and includes a per-server list of all foreign tables involved in the
+ * truncation.
+ */
+typedef struct ForeignTruncateInfo
+{
+	Oid			server_oid;
+	List	   *frels_list;
+	List	   *frels_extra;
+} ForeignTruncateInfo;
+
 /*
  * Partition tables are expected to be dropped when the parent partitioned
  * table gets dropped. Hence for partitioning we use AUTO dependency.
@@ -1588,18 +1604,21 @@ RangeVarCallbackForDropRelation(const RangeVar *rel, Oid relOid, Oid oldRelOid,
  *		Executes a TRUNCATE command.
  *
  * This is a multi-relation truncate.  We first open and grab exclusive
- * lock on all relations involved, checking permissions and otherwise
- * verifying that the relation is OK for truncation.  In CASCADE mode,
+ * lock on all relations involved, checking permissions (local database
+ * ACLs even if relations are foreign tables) and otherwise verifying
+ * that the relation is OK for truncation. In CASCADE mode,
  * relations having FK references to the targeted relations are automatically
  * added to the group; in RESTRICT mode, we check that all FK references are
  * internal to the group that's being truncated.  Finally all the relations
- * are truncated and reindexed.
+ * are truncated and reindexed. If any foreign tables are involved,
+ * its callback shall be invoked prior to the truncation of regular tables.
  */
 void
 ExecuteTruncate(TruncateStmt *stmt)
 {
 	List	   *rels = NIL;
 	List	   *relids = NIL;
+	List	   *relids_extra = NIL;
 	List	   *relids_logged = NIL;
 	ListCell   *cell;
 
@@ -1636,8 +1655,9 @@ ExecuteTruncate(TruncateStmt *stmt)
 
 		rels = lappend(rels, rel);
 		relids = lappend_oid(relids, myrelid);
+		relids_extra = lappend_int(relids_extra, (recurse ? TRUNCATE_REL_CONTEXT_NORMAL : TRUNCATE_REL_CONTEXT_ONLY));
 		/* Log this relation only if needed for logical decoding */
-		if (RelationIsLogicallyLogged(rel))
+		if (RelationIsLogicallyLogged(rel) && rel->rd_rel->relkind != RELKIND_FOREIGN_TABLE)
 			relids_logged = lappend_oid(relids_logged, myrelid);
 
 		if (recurse)
@@ -1683,8 +1703,9 @@ ExecuteTruncate(TruncateStmt *stmt)
 
 				rels = lappend(rels, rel);
 				relids = lappend_oid(relids, childrelid);
+				relids_extra = lappend_int(relids_extra, TRUNCATE_REL_CONTEXT_CASCADING);
 				/* Log this relation only if needed for logical decoding */
-				if (RelationIsLogicallyLogged(rel))
+				if (RelationIsLogicallyLogged(rel) && rel->rd_rel->relkind != RELKIND_FOREIGN_TABLE)
 					relids_logged = lappend_oid(relids_logged, childrelid);
 			}
 		}
@@ -1695,7 +1716,7 @@ ExecuteTruncate(TruncateStmt *stmt)
 					 errhint("Do not specify the ONLY keyword, or use TRUNCATE ONLY on the partitions directly.")));
 	}
 
-	ExecuteTruncateGuts(rels, relids, relids_logged,
+	ExecuteTruncateGuts(rels, relids, relids_extra, relids_logged,
 						stmt->behavior, stmt->restart_seqs);
 
 	/* And close the rels */
@@ -1721,16 +1742,21 @@ ExecuteTruncate(TruncateStmt *stmt)
  * this information handy in this form.
  */
 void
-ExecuteTruncateGuts(List *explicit_rels, List *relids, List *relids_logged,
+ExecuteTruncateGuts(List *explicit_rels,
+					List *relids,
+					List *relids_extra,
+					List *relids_logged,
 					DropBehavior behavior, bool restart_seqs)
 {
 	List	   *rels;
 	List	   *seq_relids = NIL;
+	HTAB	   *ft_htab = NULL;
 	EState	   *estate;
 	ResultRelInfo *resultRelInfos;
 	ResultRelInfo *resultRelInfo;
 	SubTransactionId mySubid;
 	ListCell   *cell;
+	ListCell   *lc1, *lc2;
 	Oid		   *logrelids;
 
 	/*
@@ -1768,8 +1794,9 @@ ExecuteTruncateGuts(List *explicit_rels, List *relids, List *relids_logged,
 				truncate_check_activity(rel);
 				rels = lappend(rels, rel);
 				relids = lappend_oid(relids, relid);
+				relids_extra = lappend_int(relids_extra, TRUNCATE_REL_CONTEXT_CASCADING);
 				/* Log this relation only if needed for logical decoding */
-				if (RelationIsLogicallyLogged(rel))
+				if (RelationIsLogicallyLogged(rel) && rel->rd_rel->relkind != RELKIND_FOREIGN_TABLE)
 					relids_logged = lappend_oid(relids_logged, relid);
 			}
 		}
@@ -1868,14 +1895,68 @@ ExecuteTruncateGuts(List *explicit_rels, List *relids, List *relids_logged,
 	 */
 	mySubid = GetCurrentSubTransactionId();
 
-	foreach(cell, rels)
+	Assert(list_length(rels) == list_length(relids_extra));
+	forboth (lc1, rels,
+			 lc2, relids_extra)
 	{
-		Relation	rel = (Relation) lfirst(cell);
+		Relation	rel = (Relation) lfirst(lc1);
+		int			extra = lfirst_int(lc2);
 
 		/* Skip partitioned tables as there is nothing to do */
 		if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
 			continue;
 
+		/*
+		 * If truncating a foreign table, the foreign data wrapper callback
+		 * for TRUNCATE is called once for each server with a list of all the
+		 * relations to process linked to this server.  The list of relations
+		 * for each server is saved as a single entry in a hash table that
+		 * uses the server OID as lookup key.  Once the full set of lists is
+		 * built, all the entries of the hash table are scanned, and the list
+		 * of relations associated with the server is passed down to the
+		 * TRUNCATE callback of its foreign data wrapper.
+		 */
+		if (rel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
+		{
+			Oid			frel_oid = RelationGetRelid(rel);
+			Oid			server_oid = GetForeignServerIdByRelId(frel_oid);
+			bool		found;
+			ForeignTruncateInfo *ft_info;
+
+			/* if the hash table does not exist yet, initialize it */
+			if (!ft_htab)
+			{
+				HASHCTL		hctl;
+
+				memset(&hctl, 0, sizeof(HASHCTL));
+				hctl.keysize = sizeof(Oid);
+				hctl.entrysize = sizeof(ForeignTruncateInfo);
+				hctl.hcxt = CurrentMemoryContext;
+
+				ft_htab = hash_create("TRUNCATE for Foreign Tables",
+									  32,	/* start small and extend */
+									  &hctl,
+									  HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
+			}
+
+			/*
+			 * Look after the entry of the server in the hash table, and
+			 * initialize it if the entry does not exist yet.
+			 */
+			ft_info = hash_search(ft_htab, &server_oid, HASH_ENTER, &found);
+			if (!found)
+			{
+				ft_info->server_oid = server_oid;
+				ft_info->frels_list = NIL;
+				ft_info->frels_extra = NIL;
+			}
+
+			/* save the relation in the list */
+			ft_info->frels_list = lappend(ft_info->frels_list, rel);
+			ft_info->frels_extra = lappend_int(ft_info->frels_extra, extra);
+			continue;
+		}
+
 		/*
 		 * Normally, we need a transaction-safe truncation here.  However, if
 		 * the table was either created in the current (sub)transaction or has
@@ -1938,6 +2019,39 @@ ExecuteTruncateGuts(List *explicit_rels, List *relids, List *relids_logged,
 		pgstat_count_truncate(rel);
 	}
 
+	/*
+	 * Now go through the hash table, and process each entry associated to the
+	 * servers involved in the TRUNCATE.
+	 */
+	if (ft_htab)
+	{
+		ForeignTruncateInfo *ft_info;
+		HASH_SEQ_STATUS seq;
+
+		hash_seq_init(&seq, ft_htab);
+
+		PG_TRY();
+		{
+			while ((ft_info = hash_seq_search(&seq)) != NULL)
+			{
+				FdwRoutine *routine = GetFdwRoutineByServerId(ft_info->server_oid);
+
+				/* truncate_check_rel() has checked that already */
+				Assert(routine->ExecForeignTruncate != NULL);
+
+				routine->ExecForeignTruncate(ft_info->frels_list,
+											ft_info->frels_extra,
+											behavior,
+											restart_seqs);
+			}
+		}
+		PG_FINALLY();
+		{
+			hash_destroy(ft_htab);
+		}
+		PG_END_TRY();
+	}
+
 	/*
 	 * Restart owned sequences if we were asked to.
 	 */
@@ -2023,12 +2137,24 @@ truncate_check_rel(Oid relid, Form_pg_class reltuple)
 	char	   *relname = NameStr(reltuple->relname);
 
 	/*
-	 * Only allow truncate on regular tables and partitioned tables (although,
-	 * the latter are only being included here for the following checks; no
-	 * physical truncation will occur in their case.)
+	 * Only allow truncate on regular tables, foreign tables using foreign
+	 * data wrappers supporting TRUNCATE and partitioned tables (although, the
+	 * latter are only being included here for the following checks; no
+	 * physical truncation will occur in their case.).
 	 */
-	if (reltuple->relkind != RELKIND_RELATION &&
-		reltuple->relkind != RELKIND_PARTITIONED_TABLE)
+	if (reltuple->relkind == RELKIND_FOREIGN_TABLE)
+	{
+		Oid			server_id = GetForeignServerIdByRelId(relid);
+		FdwRoutine *fdwroutine = GetFdwRoutineByServerId(server_id);
+
+		if (!fdwroutine->ExecForeignTruncate)
+			ereport(ERROR,
+					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+					 errmsg("cannot truncate foreign table \"%s\"",
+							relname)));
+	}
+	else if (reltuple->relkind != RELKIND_RELATION &&
+			 reltuple->relkind != RELKIND_PARTITIONED_TABLE)
 		ereport(ERROR,
 				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
 				 errmsg("\"%s\" is not a table", relname)));
diff --git a/src/backend/replication/logical/worker.c b/src/backend/replication/logical/worker.c
index 74d538b5e3..d145d04850 100644
--- a/src/backend/replication/logical/worker.c
+++ b/src/backend/replication/logical/worker.c
@@ -1795,6 +1795,7 @@ apply_handle_truncate(StringInfo s)
 	List	   *rels = NIL;
 	List	   *part_rels = NIL;
 	List	   *relids = NIL;
+	List	   *relids_extra = NIL;
 	List	   *relids_logged = NIL;
 	ListCell   *lc;
 
@@ -1824,7 +1825,8 @@ apply_handle_truncate(StringInfo s)
 		remote_rels = lappend(remote_rels, rel);
 		rels = lappend(rels, rel->localrel);
 		relids = lappend_oid(relids, rel->localreloid);
-		if (RelationIsLogicallyLogged(rel->localrel))
+		relids_extra = lappend_int(relids_extra, TRUNCATE_REL_CONTEXT_NORMAL);
+		if (RelationIsLogicallyLogged(rel->localrel) && rel->localrel->rd_rel->relkind != RELKIND_FOREIGN_TABLE)
 			relids_logged = lappend_oid(relids_logged, rel->localreloid);
 
 		/*
@@ -1862,8 +1864,9 @@ apply_handle_truncate(StringInfo s)
 				rels = lappend(rels, childrel);
 				part_rels = lappend(part_rels, childrel);
 				relids = lappend_oid(relids, childrelid);
+				relids_extra = lappend_int(relids_extra,TRUNCATE_REL_CONTEXT_CASCADING);
 				/* Log this relation only if needed for logical decoding */
-				if (RelationIsLogicallyLogged(childrel))
+				if (RelationIsLogicallyLogged(childrel) && childrel->rd_rel->relkind != RELKIND_FOREIGN_TABLE)
 					relids_logged = lappend_oid(relids_logged, childrelid);
 			}
 		}
@@ -1874,8 +1877,12 @@ apply_handle_truncate(StringInfo s)
 	 * to replaying changes without further cascading. This might be later
 	 * changeable with a user specified option.
 	 */
-	ExecuteTruncateGuts(rels, relids, relids_logged, DROP_RESTRICT, restart_seqs);
-
+	ExecuteTruncateGuts(rels,
+						relids,
+						relids_extra,
+						relids_logged,
+						DROP_RESTRICT,
+						restart_seqs);
 	foreach(lc, remote_rels)
 	{
 		LogicalRepRelMapEntry *rel = lfirst(lc);
diff --git a/src/include/commands/tablecmds.h b/src/include/commands/tablecmds.h
index b3d30acc35..57763ed734 100644
--- a/src/include/commands/tablecmds.h
+++ b/src/include/commands/tablecmds.h
@@ -55,9 +55,16 @@ extern void AlterRelationNamespaceInternal(Relation classRel, Oid relOid,
 
 extern void CheckTableNotInUse(Relation rel, const char *stmt);
 
+#define TRUNCATE_REL_CONTEXT_NORMAL       0x01
+#define TRUNCATE_REL_CONTEXT_ONLY         0x02
+#define TRUNCATE_REL_CONTEXT_CASCADING     0x04
 extern void ExecuteTruncate(TruncateStmt *stmt);
-extern void ExecuteTruncateGuts(List *explicit_rels, List *relids, List *relids_logged,
-								DropBehavior behavior, bool restart_seqs);
+extern void ExecuteTruncateGuts(List *explicit_rels,
+								List *relids,
+								List *relids_extra,
+								List *relids_logged,
+								DropBehavior behavior,
+								bool restart_seqs);
 
 extern void SetRelationHasSubclass(Oid relationId, bool relhassubclass);
 
diff --git a/src/include/foreign/fdwapi.h b/src/include/foreign/fdwapi.h
index 10d29ff292..205afe5ec3 100644
--- a/src/include/foreign/fdwapi.h
+++ b/src/include/foreign/fdwapi.h
@@ -160,6 +160,11 @@ typedef bool (*AnalyzeForeignTable_function) (Relation relation,
 typedef List *(*ImportForeignSchema_function) (ImportForeignSchemaStmt *stmt,
 											   Oid serverOid);
 
+typedef void (*ExecForeignTruncate_function) (List *frels_list,
+											  List *frels_extra,
+											  DropBehavior behavior,
+											  bool restart_seqs);
+
 typedef Size (*EstimateDSMForeignScan_function) (ForeignScanState *node,
 												 ParallelContext *pcxt);
 typedef void (*InitializeDSMForeignScan_function) (ForeignScanState *node,
@@ -255,6 +260,9 @@ typedef struct FdwRoutine
 	/* Support functions for IMPORT FOREIGN SCHEMA */
 	ImportForeignSchema_function ImportForeignSchema;
 
+	/* Support functions for TRUNCATE */
+	ExecForeignTruncate_function ExecForeignTruncate;
+
 	/* Support functions for parallelism under Gather node */
 	IsForeignScanParallelSafe_function IsForeignScanParallelSafe;
 	EstimateDSMForeignScan_function EstimateDSMForeignScan;
diff --git a/src/test/regress/expected/foreign_data.out b/src/test/regress/expected/foreign_data.out
index e4cdb780d0..5385f98a0f 100644
--- a/src/test/regress/expected/foreign_data.out
+++ b/src/test/regress/expected/foreign_data.out
@@ -1807,9 +1807,9 @@ Inherits: fd_pt1
 
 -- TRUNCATE doesn't work on foreign tables, either directly or recursively
 TRUNCATE ft2;  -- ERROR
-ERROR:  "ft2" is not a table
+ERROR:  foreign-data wrapper "dummy" has no handler
 TRUNCATE fd_pt1;  -- ERROR
-ERROR:  "ft2" is not a table
+ERROR:  foreign-data wrapper "dummy" has no handler
 DROP TABLE fd_pt1 CASCADE;
 NOTICE:  drop cascades to foreign table ft2
 -- IMPORT FOREIGN SCHEMA
@@ -2032,9 +2032,9 @@ ALTER FOREIGN TABLE fd_pt2_1 ADD CONSTRAINT fd_pt2chk1 CHECK (c1 > 0);
 ALTER TABLE fd_pt2 ATTACH PARTITION fd_pt2_1 FOR VALUES IN (1);
 -- TRUNCATE doesn't work on foreign tables, either directly or recursively
 TRUNCATE fd_pt2_1;  -- ERROR
-ERROR:  "fd_pt2_1" is not a table
+ERROR:  foreign-data wrapper "dummy" has no handler
 TRUNCATE fd_pt2;  -- ERROR
-ERROR:  "fd_pt2_1" is not a table
+ERROR:  foreign-data wrapper "dummy" has no handler
 DROP FOREIGN TABLE fd_pt2_1;
 DROP TABLE fd_pt2;
 -- foreign table cannot be part of partition tree made of temporary
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index b26e81dcbf..91da5827b2 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -704,6 +704,7 @@ ForeignScanState
 ForeignServer
 ForeignServerInfo
 ForeignTable
+ForeignTruncateInfo
 ForkNumber
 FormData_pg_aggregate
 FormData_pg_am
#56Fujii Masao
masao.fujii@oss.nttdata.com
In reply to: Kazutaka Onishi (#50)
Re: TRUNCATE on foreign table

On 2021/04/06 21:06, Kazutaka Onishi wrote:

Thank you for checking v13, and here is v14 patch.

1) Are we using all of these macros? I see that we are setting them
but we only use TRUNCATE_REL_CONTEXT_ONLY. If not used, can we remove
them?

These may be needed for the foreign data handler other than postgres_fdw.

Could you tell me how such FDWs use TRUNCATE_REL_CONTEXT_CASCADING and _NORMAL? I'm still not sure if TRUNCATE_REL_CONTEXT_CASCADING is really required.

With the patch, both inherited and referencing relations are marked as TRUNCATE_REL_CONTEXT_CASCADING? Is this ok for that use? Or we should distinguish them?

+#define TRUNCATE_REL_CONTEXT_NORMAL       0x01
+#define TRUNCATE_REL_CONTEXT_ONLY         0x02
+#define TRUNCATE_REL_CONTEXT_CASCADING     0x04

With the patch, these are defined as flag bits. But ExecuteTruncate() seems to always set the entry in relids_extra to either of them, not the combination of them. So we can define them as enum?

Regards,

--
Fujii Masao
Advanced Computing Technology Center
Research and Development Headquarters
NTT DATA CORPORATION

#57Kohei KaiGai
kaigai@heterodb.com
In reply to: Fujii Masao (#56)
Re: TRUNCATE on foreign table

2021年4月8日(木) 4:19 Fujii Masao <masao.fujii@oss.nttdata.com>:

On 2021/04/06 21:06, Kazutaka Onishi wrote:

Thank you for checking v13, and here is v14 patch.

1) Are we using all of these macros? I see that we are setting them
but we only use TRUNCATE_REL_CONTEXT_ONLY. If not used, can we remove
them?

These may be needed for the foreign data handler other than postgres_fdw.

Could you tell me how such FDWs use TRUNCATE_REL_CONTEXT_CASCADING and _NORMAL? I'm still not sure if TRUNCATE_REL_CONTEXT_CASCADING is really required.

/messages/by-id/20200102144644.GM3195@tamriel.snowman.net

This is the suggestion when I added the flag to inform cascading.

| .... Instead, I'd suggest we have the core code build
| up a list of tables to truncate, for each server, based just on the list
| passed in by the user, and then also pass in if CASCADE was included or
| not, and then let the FDW handle that in whatever way makes sense for
| the foreign server (which, for a PG system, would probably be just
| building up the TRUNCATE command and running it with or without the
| CASCADE option, but it might be different on other systems).
|
Indeed, it is not a strong technical reason at this moment.
(And, I also don't have idea to distinct these differences in my module also.)

With the patch, both inherited and referencing relations are marked as TRUNCATE_REL_CONTEXT_CASCADING? Is this ok for that use? Or we should distinguish them?

In addition, even though my prior implementation distinguished and deliver
the status whether the truncate command is issued with NORMAL or ONLY,
does the remote query by postgres_fdw needs to follow the manner?

Please assume the case when a foreign-table "ft" that maps a remote table
with some child-relations.
If we run TRUNCATE ONLY ft at the local server, postgres_fdw setup
a remote truncate command with "ONLY" qualifier, then remote postgresql
server truncate only parent table of the remote side.
Next, "SELECT * FROM ft" command returns some valid rows from the
child tables in the remote side, even if it is just after TRUNCATE command.
Is it a intuitive behavior for users?

Even though we have discussed about the flags and expected behavior of
foreign truncate, strip of the relids_extra may be the most straight-forward
API design.
So, in other words, the API requires FDW driver to make the entire data
represented by the foreign table empty, by ExecForeignTruncate().
It is probably more consistent to look at DropBehavior for listing-up the
target relations at the local relations only.

How about your thought?

If we stand on the above design, ExecForeignTruncate() don't needs
frels_extra and behavior arguments.

+#define TRUNCATE_REL_CONTEXT_NORMAL       0x01
+#define TRUNCATE_REL_CONTEXT_ONLY         0x02
+#define TRUNCATE_REL_CONTEXT_CASCADING     0x04

With the patch, these are defined as flag bits. But ExecuteTruncate() seems to always set the entry in relids_extra to either of them, not the combination of them. So we can define them as enum?

Regardless of my above comment, It's a bug.
When list_member_oid(relids, myrelid) == true, we have to set proper flag on the
relevant frels_extra member, not just ignoring.

Best regards,

Best regards,
--
HeteroDB, Inc / The PG-Strom Project
KaiGai Kohei <kaigai@heterodb.com>

#58Fujii Masao
masao.fujii@oss.nttdata.com
In reply to: Kohei KaiGai (#57)
Re: TRUNCATE on foreign table

On 2021/04/08 10:56, Kohei KaiGai wrote:

2021年4月8日(木) 4:19 Fujii Masao <masao.fujii@oss.nttdata.com>:

On 2021/04/06 21:06, Kazutaka Onishi wrote:

Thank you for checking v13, and here is v14 patch.

1) Are we using all of these macros? I see that we are setting them
but we only use TRUNCATE_REL_CONTEXT_ONLY. If not used, can we remove
them?

These may be needed for the foreign data handler other than postgres_fdw.

Could you tell me how such FDWs use TRUNCATE_REL_CONTEXT_CASCADING and _NORMAL? I'm still not sure if TRUNCATE_REL_CONTEXT_CASCADING is really required.

/messages/by-id/20200102144644.GM3195@tamriel.snowman.net

This is the suggestion when I added the flag to inform cascading.

| .... Instead, I'd suggest we have the core code build
| up a list of tables to truncate, for each server, based just on the list
| passed in by the user, and then also pass in if CASCADE was included or
| not, and then let the FDW handle that in whatever way makes sense for
| the foreign server (which, for a PG system, would probably be just
| building up the TRUNCATE command and running it with or without the
| CASCADE option, but it might be different on other systems).
|
Indeed, it is not a strong technical reason at this moment.
(And, I also don't have idea to distinct these differences in my module also.)

CASCADE option mentioned in the above seems the CASCADE clause specified in TRUNCATE command. No? So the above doesn't seem to suggest to include the information about how each table to truncate is picked up. Am I missing something?

With the patch, both inherited and referencing relations are marked as TRUNCATE_REL_CONTEXT_CASCADING? Is this ok for that use? Or we should distinguish them?

In addition, even though my prior implementation distinguished and deliver
the status whether the truncate command is issued with NORMAL or ONLY,
does the remote query by postgres_fdw needs to follow the manner?

Please assume the case when a foreign-table "ft" that maps a remote table
with some child-relations.
If we run TRUNCATE ONLY ft at the local server, postgres_fdw setup
a remote truncate command with "ONLY" qualifier, then remote postgresql
server truncate only parent table of the remote side.
Next, "SELECT * FROM ft" command returns some valid rows from the
child tables in the remote side, even if it is just after TRUNCATE command.
Is it a intuitive behavior for users?

Yes, because that's the same behavior as for the local tables. No?

If this understanding is true, the following note that the patch added is also intuitive, and not necessary? At least "partition leafs" part should be removed because TRUNCATE ONLY fails if the remote table is a partitioned table.

+       Pay attention for the case when a foreign table maps remote table
+       that has inherited children or partition leafs.
+       <command>TRUNCATE</command> specifies the foreign tables with
+       <literal>ONLY</literal> clause, remove queries over the
+       <filename>postgres_fdw</filename> also specify remote tables with
+       <literal>ONLY</literal> clause, that will truncate only parent
+       portion of the remote table. In the results, it looks like
+       <command>TRUNCATE</command> command partially eliminated contents
+       of the foreign tables.

Even though we have discussed about the flags and expected behavior of
foreign truncate, strip of the relids_extra may be the most straight-forward
API design.
So, in other words, the API requires FDW driver to make the entire data
represented by the foreign table empty, by ExecForeignTruncate().
It is probably more consistent to look at DropBehavior for listing-up the
target relations at the local relations only.

How about your thought?

I was thinking to remove only TRUNCATE_REL_CONTEXT_CASCADING if that's really not necessary. That is, rels_extra is still used to indicate whether each table is specified with ONLY option or not. To do this, we can use _NORMAL and _ONLY. Or we can also make that as the list of boolean flag (indicating whether ONLY is specified or not).

If we stand on the above design, ExecForeignTruncate() don't needs
frels_extra and behavior arguments.

+#define TRUNCATE_REL_CONTEXT_NORMAL       0x01
+#define TRUNCATE_REL_CONTEXT_ONLY         0x02
+#define TRUNCATE_REL_CONTEXT_CASCADING     0x04

With the patch, these are defined as flag bits. But ExecuteTruncate() seems to always set the entry in relids_extra to either of them, not the combination of them. So we can define them as enum?

Regardless of my above comment, It's a bug.
When list_member_oid(relids, myrelid) == true, we have to set proper flag on the
relevant frels_extra member, not just ignoring.

One concern about this is that local tables are not processed that way. For local tables, the information (whether ONLY is specified or not) of the table found first is used. For example, when we execute "TRUNCATE ONLY tbl, tbl" and "TRUNCATE tbl, ONLY tbl", the former truncates only parent table because "ONLY tbl" is found first. But the latter truncates the parent and all inherited tables because "tbl" is found first.

If even foreign table follows this manner, current patch's logic seems right.

Regards,

--
Fujii Masao
Advanced Computing Technology Center
Research and Development Headquarters
NTT DATA CORPORATION

#59Kohei KaiGai
kaigai@heterodb.com
In reply to: Fujii Masao (#58)
Re: TRUNCATE on foreign table

2021年4月8日(木) 11:44 Fujii Masao <masao.fujii@oss.nttdata.com>:

On 2021/04/08 10:56, Kohei KaiGai wrote:

2021年4月8日(木) 4:19 Fujii Masao <masao.fujii@oss.nttdata.com>:

On 2021/04/06 21:06, Kazutaka Onishi wrote:

Thank you for checking v13, and here is v14 patch.

1) Are we using all of these macros? I see that we are setting them
but we only use TRUNCATE_REL_CONTEXT_ONLY. If not used, can we remove
them?

These may be needed for the foreign data handler other than postgres_fdw.

Could you tell me how such FDWs use TRUNCATE_REL_CONTEXT_CASCADING and _NORMAL? I'm still not sure if TRUNCATE_REL_CONTEXT_CASCADING is really required.

/messages/by-id/20200102144644.GM3195@tamriel.snowman.net

This is the suggestion when I added the flag to inform cascading.

| .... Instead, I'd suggest we have the core code build
| up a list of tables to truncate, for each server, based just on the list
| passed in by the user, and then also pass in if CASCADE was included or
| not, and then let the FDW handle that in whatever way makes sense for
| the foreign server (which, for a PG system, would probably be just
| building up the TRUNCATE command and running it with or without the
| CASCADE option, but it might be different on other systems).
|
Indeed, it is not a strong technical reason at this moment.
(And, I also don't have idea to distinct these differences in my module also.)

CASCADE option mentioned in the above seems the CASCADE clause specified in TRUNCATE command. No? So the above doesn't seem to suggest to include the information about how each table to truncate is picked up. Am I missing something?

It might be a bit different context.

With the patch, both inherited and referencing relations are marked as TRUNCATE_REL_CONTEXT_CASCADING? Is this ok for that use? Or we should distinguish them?

In addition, even though my prior implementation distinguished and deliver
the status whether the truncate command is issued with NORMAL or ONLY,
does the remote query by postgres_fdw needs to follow the manner?

Please assume the case when a foreign-table "ft" that maps a remote table
with some child-relations.
If we run TRUNCATE ONLY ft at the local server, postgres_fdw setup
a remote truncate command with "ONLY" qualifier, then remote postgresql
server truncate only parent table of the remote side.
Next, "SELECT * FROM ft" command returns some valid rows from the
child tables in the remote side, even if it is just after TRUNCATE command.
Is it a intuitive behavior for users?

Yes, because that's the same behavior as for the local tables. No?

No. ;-p

When we define a foreign-table as follows,

postgres=# CREATE FOREIGN TABLE ft (id int, v text)
SERVER loopback OPTIONS (table_name 't_parent',
truncatable 'true');
postgres=# select * from ft;
id | v
----+-------------------
1 | 1 in the parent
2 | 2 in the parent
3 | 3 in the parent
4 | 4 in the parent
11 | 11 in the child_1
12 | 12 in the child_1
13 | 13 in the child_1
21 | 21 in the child_2
22 | 22 in the child_2
23 | 23 in the child_2
(10 rows)

TRUNCATE ONLY eliminates the rows come from parent table on the remote side,
even though this foreign table has no parent-child relationship in the
local side.

postgres=# begin;
BEGIN
postgres=# truncate only ft;
TRUNCATE TABLE
postgres=# select * from ft;
id | v
----+-------------------
11 | 11 in the child_1
12 | 12 in the child_1
13 | 13 in the child_1
21 | 21 in the child_2
22 | 22 in the child_2
23 | 23 in the child_2
(6 rows)

postgres=# abort;
ROLLBACK

In case when a local table (with no children) has same contents,
TRUNCATE command
witll remove the entire table contents.

postgres=# select * INTO tt FROM ft;
SELECT 10
postgres=# select * from tt;
id | v
----+-------------------
1 | 1 in the parent
2 | 2 in the parent
3 | 3 in the parent
4 | 4 in the parent
11 | 11 in the child_1
12 | 12 in the child_1
13 | 13 in the child_1
21 | 21 in the child_2
22 | 22 in the child_2
23 | 23 in the child_2
(10 rows)

postgres=# truncate only tt;
TRUNCATE TABLE
postgres=# select * from tt;
id | v
----+---
(0 rows)

If this understanding is true, the following note that the patch added is also intuitive, and not necessary? At least "partition leafs" part should be removed because TRUNCATE ONLY fails if the remote table is a partitioned table.

+       Pay attention for the case when a foreign table maps remote table
+       that has inherited children or partition leafs.
+       <command>TRUNCATE</command> specifies the foreign tables with
+       <literal>ONLY</literal> clause, remove queries over the
+       <filename>postgres_fdw</filename> also specify remote tables with
+       <literal>ONLY</literal> clause, that will truncate only parent
+       portion of the remote table. In the results, it looks like
+       <command>TRUNCATE</command> command partially eliminated contents
+       of the foreign tables.

Base on the above assumption, I don't think it should be a part of
documentation.
On the other hands, we need to describe this API requires FDW driver to wipe out
the entire data on behalf of the foreign tables once they are picked up by the
ExecuteTruncate().

Even though we have discussed about the flags and expected behavior of
foreign truncate, strip of the relids_extra may be the most straight-forward
API design.
So, in other words, the API requires FDW driver to make the entire data
represented by the foreign table empty, by ExecForeignTruncate().
It is probably more consistent to look at DropBehavior for listing-up the
target relations at the local relations only.

How about your thought?

I was thinking to remove only TRUNCATE_REL_CONTEXT_CASCADING if that's really not necessary. That is, rels_extra is still used to indicate whether each table is specified with ONLY option or not. To do this, we can use _NORMAL and _ONLY. Or we can also make that as the list of boolean flag (indicating whether ONLY is specified or not).

I'm inclined to eliminate relids_extra list itself, because FDW
drivers don't need to
distinguish the CASCADING, NORMAL or ONLY cases.
The ExecForeignTruncate receives a list of foreign tables that is already
expanded by the ExecuteTruncate(), thus, all the FDW driver shall do is
just wipe out entire data mapped to the individual foreign tables
Also, FDW driver don't need to know DropBehavior.

If we stand on the above design, ExecForeignTruncate() don't needs
frels_extra and behavior arguments.

+#define TRUNCATE_REL_CONTEXT_NORMAL       0x01
+#define TRUNCATE_REL_CONTEXT_ONLY         0x02
+#define TRUNCATE_REL_CONTEXT_CASCADING     0x04

With the patch, these are defined as flag bits. But ExecuteTruncate() seems to always set the entry in relids_extra to either of them, not the combination of them. So we can define them as enum?

Regardless of my above comment, It's a bug.
When list_member_oid(relids, myrelid) == true, we have to set proper flag on the
relevant frels_extra member, not just ignoring.

One concern about this is that local tables are not processed that way. For local tables, the information (whether ONLY is specified or not) of the table found first is used. For example, when we execute "TRUNCATE ONLY tbl, tbl" and "TRUNCATE tbl, ONLY tbl", the former truncates only parent table because "ONLY tbl" is found first. But the latter truncates the parent and all inherited tables because "tbl" is found first.

If even foreign table follows this manner, current patch's logic seems right.

-1. :-(
It should be fixed, even if we try to deliver the relids_extra list.

Best regards,
--
HeteroDB, Inc / The PG-Strom Project
KaiGai Kohei <kaigai@heterodb.com>

#60Fujii Masao
masao.fujii@oss.nttdata.com
In reply to: Kohei KaiGai (#59)
Re: TRUNCATE on foreign table

On 2021/04/08 13:43, Kohei KaiGai wrote:

In case when a local table (with no children) has same contents,
TRUNCATE command
witll remove the entire table contents.

But if there are local child tables that inherit the local parent table, and TRUNCATE ONLY <parent table> is executed, only the contents in the parent will be truncated. I was thinking that this behavior should be applied to the foreign table whose remote (parent) table have remote child tables.

So what we need to reach the consensus is; how far ONLY option affects. Please imagine the case where we have

(1) local parent table, also foreign table of remote parent table
(2) local child table, inherits local parent table
(3) remote parent table
(4) remote child table, inherits remote parent table

I think that we agree all (1), (2), (3) and (4) should be truncated if local parent table (1) is specified without ONLY in TRUNCATE command. OTOH, if ONLY is specified, we agree that at least local child table (2) should NOT be truncated.

So the remaining point is; remote tables (3) and (4) should be truncated or not when ONLY is specified? You seem to argue that both should be truncated by removing extra list. I was thinking that only remote parent table (3) should be truncated. That is, IMO we should treat the truncation on foreign table as the same as that on its forein data source.

Other people might think neither (3) nor (4) should be truncated in that case because ONLY should affect only the table directly specified in TRUNCATE command, i.e., local parent table (1). For now this also looks good to me.

Regards,

--
Fujii Masao
Advanced Computing Technology Center
Research and Development Headquarters
NTT DATA CORPORATION

#61Kohei KaiGai
kaigai@heterodb.com
In reply to: Fujii Masao (#60)
Re: TRUNCATE on foreign table

2021年4月8日(木) 15:04 Fujii Masao <masao.fujii@oss.nttdata.com>:

On 2021/04/08 13:43, Kohei KaiGai wrote:

In case when a local table (with no children) has same contents,
TRUNCATE command
witll remove the entire table contents.

But if there are local child tables that inherit the local parent table, and TRUNCATE ONLY <parent table> is executed, only the contents in the parent will be truncated. I was thinking that this behavior should be applied to the foreign table whose remote (parent) table have remote child tables.

So what we need to reach the consensus is; how far ONLY option affects. Please imagine the case where we have

(1) local parent table, also foreign table of remote parent table
(2) local child table, inherits local parent table
(3) remote parent table
(4) remote child table, inherits remote parent table

I think that we agree all (1), (2), (3) and (4) should be truncated if local parent table (1) is specified without ONLY in TRUNCATE command. OTOH, if ONLY is specified, we agree that at least local child table (2) should NOT be truncated.

My understanding of a foreign table is a representation of external
data, including remote RDBMS but not only RDBMS,
regardless of the parent-child relationship at the local side.
So, once a local foreign table wraps entire tables tree (a parent and
relevant children) at the remote side, at least, it shall
be considered as a unified data chunk from the standpoint of the local side.

Please assume if file_fdw could map 3 different CSV files, then
truncate on the foreign table may eliminate just 1 of 3 files.
Is it an expected / preferable behavior?
Basically, we don't assume any charasteristics of the data on behalf
of the FDW driver, even if it is PostgreSQL server.
Thus, I think the new API will expect to eliminate the entire rows on
behalf of the foreign table, regardless of the ONLY-clause,
because it already controls which foreign-tables shall be picked up,
but does not control which part of the foreign table
shall be eliminated.

So the remaining point is; remote tables (3) and (4) should be truncated or not when ONLY is specified? You seem to argue that both should be truncated by removing extra list. I was thinking that only remote parent table (3) should be truncated. That is, IMO we should treat the truncation on foreign table as the same as that on its forein data source.

Other people might think neither (3) nor (4) should be truncated in that case because ONLY should affect only the table directly specified in TRUNCATE command, i.e., local parent table (1). For now this also looks good to me.

In case when the local foreign table is a parent, the entire remote
table shall be truncated, if ONLY is given.
In case when the local foreign table is a child, nothing shall be
happen (API is not called), if ONLY is given.

IMO, it is stable and simple definition, even if FDW driver wraps
non-RDBMS data source that has no idea
of table inheritance.

Best regards,
--
HeteroDB, Inc / The PG-Strom Project
KaiGai Kohei <kaigai@heterodb.com>

#62Fujii Masao
masao.fujii@oss.nttdata.com
In reply to: Kohei KaiGai (#61)
1 attachment(s)
Re: TRUNCATE on foreign table

On 2021/04/08 15:48, Kohei KaiGai wrote:

2021年4月8日(木) 15:04 Fujii Masao <masao.fujii@oss.nttdata.com>:

On 2021/04/08 13:43, Kohei KaiGai wrote:

In case when a local table (with no children) has same contents,
TRUNCATE command
witll remove the entire table contents.

But if there are local child tables that inherit the local parent table, and TRUNCATE ONLY <parent table> is executed, only the contents in the parent will be truncated. I was thinking that this behavior should be applied to the foreign table whose remote (parent) table have remote child tables.

So what we need to reach the consensus is; how far ONLY option affects. Please imagine the case where we have

(1) local parent table, also foreign table of remote parent table
(2) local child table, inherits local parent table
(3) remote parent table
(4) remote child table, inherits remote parent table

I think that we agree all (1), (2), (3) and (4) should be truncated if local parent table (1) is specified without ONLY in TRUNCATE command. OTOH, if ONLY is specified, we agree that at least local child table (2) should NOT be truncated.

My understanding of a foreign table is a representation of external
data, including remote RDBMS but not only RDBMS,
regardless of the parent-child relationship at the local side.
So, once a local foreign table wraps entire tables tree (a parent and
relevant children) at the remote side, at least, it shall
be considered as a unified data chunk from the standpoint of the local side.

At least for me it's not intuitive to truncate the remote table and its all dependent tables even though users explicitly specify ONLY for the foreign table. As far as I read the past discussion, some people was thinking the same.

Please assume if file_fdw could map 3 different CSV files, then
truncate on the foreign table may eliminate just 1 of 3 files.
Is it an expected / preferable behavior?

I think that's up to each FDW. That is, IMO the information about whether ONLY is specified or not for each table should be passed to FDW. Then FDW itself should determine how to handle that information.

Anyway, attached is the updated version of the patch. This is still based on the latest Kazutaka-san's patch. That is, extra list for ONLY is still passed to FDW. What about committing this version at first? Then we can continue the discussion and change the behavior later if necessary.

Regards,

--
Fujii Masao
Advanced Computing Technology Center
Research and Development Headquarters
NTT DATA CORPORATION

Attachments:

pgsql14-truncate-on-foreign-table.v17.patchtext/plain; charset=UTF-8; name=pgsql14-truncate-on-foreign-table.v17.patch; x-mac-creator=0; x-mac-type=0Download
diff --git a/contrib/postgres_fdw/connection.c b/contrib/postgres_fdw/connection.c
index 6a61d83862..82aa14a65d 100644
--- a/contrib/postgres_fdw/connection.c
+++ b/contrib/postgres_fdw/connection.c
@@ -92,7 +92,6 @@ static PGconn *connect_pg_server(ForeignServer *server, UserMapping *user);
 static void disconnect_pg_server(ConnCacheEntry *entry);
 static void check_conn_params(const char **keywords, const char **values, UserMapping *user);
 static void configure_remote_session(PGconn *conn);
-static void do_sql_command(PGconn *conn, const char *sql);
 static void begin_remote_xact(ConnCacheEntry *entry);
 static void pgfdw_xact_callback(XactEvent event, void *arg);
 static void pgfdw_subxact_callback(SubXactEvent event,
@@ -568,7 +567,7 @@ configure_remote_session(PGconn *conn)
 /*
  * Convenience subroutine to issue a non-data-returning SQL command to remote
  */
-static void
+void
 do_sql_command(PGconn *conn, const char *sql)
 {
 	PGresult   *res;
diff --git a/contrib/postgres_fdw/deparse.c b/contrib/postgres_fdw/deparse.c
index 5aa3455e30..bdc4c3620d 100644
--- a/contrib/postgres_fdw/deparse.c
+++ b/contrib/postgres_fdw/deparse.c
@@ -56,6 +56,7 @@
 #include "utils/rel.h"
 #include "utils/syscache.h"
 #include "utils/typcache.h"
+#include "commands/tablecmds.h"
 
 /*
  * Global context for foreign_expr_walker's search of an expression tree.
@@ -2172,6 +2173,43 @@ deparseAnalyzeSql(StringInfo buf, Relation rel, List **retrieved_attrs)
 	deparseRelation(buf, rel);
 }
 
+/*
+ * Construct a simple "TRUNCATE rel" statement
+ */
+void
+deparseTruncateSql(StringInfo buf,
+				   List *rels,
+				   List *rels_extra,
+				   DropBehavior behavior,
+				   bool restart_seqs)
+{
+	ListCell   *lc1,
+			   *lc2;
+
+	appendStringInfoString(buf, "TRUNCATE ");
+
+	forboth(lc1, rels, lc2, rels_extra)
+	{
+		Relation	rel = lfirst(lc1);
+		int			extra = lfirst_int(lc2);
+
+		if (lc1 != list_head(rels))
+			appendStringInfoString(buf, ", ");
+		if (extra & TRUNCATE_REL_CONTEXT_ONLY)
+			appendStringInfoString(buf, "ONLY ");
+
+		deparseRelation(buf, rel);
+	}
+
+	appendStringInfo(buf, " %s IDENTITY",
+					 restart_seqs ? "RESTART" : "CONTINUE");
+
+	if (behavior == DROP_RESTRICT)
+		appendStringInfoString(buf, " RESTRICT");
+	else if (behavior == DROP_CASCADE)
+		appendStringInfoString(buf, " CASCADE");
+}
+
 /*
  * Construct name to use for given column, and emit it into buf.
  * If it has a column_name FDW option, use that instead of attribute name.
diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out
index 9d472d2d3d..f6b3fc98cb 100644
--- a/contrib/postgres_fdw/expected/postgres_fdw.out
+++ b/contrib/postgres_fdw/expected/postgres_fdw.out
@@ -8215,6 +8215,205 @@ select * from rem3;
 drop foreign table rem3;
 drop table loc3;
 -- ===================================================================
+-- test for TRUNCATE
+-- ===================================================================
+CREATE TABLE tru_rtable0 (id int primary key);
+CREATE TABLE tru_rtable1 (id int primary key);
+CREATE FOREIGN TABLE tru_ftable (id int)
+       SERVER loopback OPTIONS (table_name 'tru_rtable0');
+INSERT INTO tru_rtable0 (SELECT x FROM generate_series(1,10) x);
+CREATE TABLE tru_ptable (id int) PARTITION BY HASH(id);
+CREATE TABLE tru_ptable__p0 PARTITION OF tru_ptable
+                            FOR VALUES WITH (MODULUS 2, REMAINDER 0);
+CREATE FOREIGN TABLE tru_ftable__p1 PARTITION OF tru_ptable
+                                    FOR VALUES WITH (MODULUS 2, REMAINDER 1)
+       SERVER loopback OPTIONS (table_name 'tru_rtable1');
+INSERT INTO tru_ptable (SELECT x FROM generate_series(11,20) x);
+CREATE TABLE tru_pk_table(id int primary key);
+CREATE TABLE tru_fk_table(fkey int references tru_pk_table(id));
+INSERT INTO tru_pk_table (SELECT x FROM generate_series(1,10) x);
+INSERT INTO tru_fk_table (SELECT x % 10 + 1 FROM generate_series(5,25) x);
+CREATE FOREIGN TABLE tru_pk_ftable (id int)
+       SERVER loopback OPTIONS (table_name 'tru_pk_table');
+CREATE TABLE tru_rtable_parent (id int);
+CREATE TABLE tru_rtable_child (id int);
+CREATE FOREIGN TABLE tru_ftable_parent (id int)
+       SERVER loopback OPTIONS (table_name 'tru_rtable_parent');
+CREATE FOREIGN TABLE tru_ftable_child () INHERITS (tru_ftable_parent)
+       SERVER loopback OPTIONS (table_name 'tru_rtable_child');
+INSERT INTO tru_rtable_parent (SELECT x FROM generate_series(1,8) x);
+INSERT INTO tru_rtable_child  (SELECT x FROM generate_series(10, 18) x);
+-- normal truncate
+SELECT sum(id) FROM tru_ftable;        -- 55
+ sum 
+-----
+  55
+(1 row)
+
+TRUNCATE tru_ftable;
+SELECT count(*) FROM tru_rtable0;		-- 0
+ count 
+-------
+     0
+(1 row)
+
+SELECT count(*) FROM tru_ftable;		-- 0
+ count 
+-------
+     0
+(1 row)
+
+-- 'truncatable' option
+ALTER SERVER loopback OPTIONS (ADD truncatable 'false');
+TRUNCATE tru_ftable;			-- error
+ERROR:  foreign table "tru_ftable" does not allow truncates
+ALTER FOREIGN TABLE tru_ftable OPTIONS (ADD truncatable 'true');
+TRUNCATE tru_ftable;			-- accepted
+ALTER FOREIGN TABLE tru_ftable OPTIONS (SET truncatable 'false');
+TRUNCATE tru_ftable;			-- error
+ERROR:  foreign table "tru_ftable" does not allow truncates
+ALTER SERVER loopback OPTIONS (DROP truncatable);
+ALTER FOREIGN TABLE tru_ftable OPTIONS (SET truncatable 'false');
+TRUNCATE tru_ftable;			-- error
+ERROR:  foreign table "tru_ftable" does not allow truncates
+ALTER FOREIGN TABLE tru_ftable OPTIONS (SET truncatable 'true');
+TRUNCATE tru_ftable;			-- accepted
+-- partitioned table with both local and foreign tables as partitions
+SELECT sum(id) FROM tru_ptable;        -- 155
+ sum 
+-----
+ 155
+(1 row)
+
+TRUNCATE tru_ptable;
+SELECT count(*) FROM tru_ptable;		-- 0
+ count 
+-------
+     0
+(1 row)
+
+SELECT count(*) FROM tru_ptable__p0;	-- 0
+ count 
+-------
+     0
+(1 row)
+
+SELECT count(*) FROM tru_ftable__p1;	-- 0
+ count 
+-------
+     0
+(1 row)
+
+SELECT count(*) FROM tru_rtable1;		-- 0
+ count 
+-------
+     0
+(1 row)
+
+-- 'CASCADE' option
+SELECT sum(id) FROM tru_pk_ftable;      -- 55
+ sum 
+-----
+  55
+(1 row)
+
+TRUNCATE tru_pk_ftable;	-- failed by FK reference
+ERROR:  cannot truncate a table referenced in a foreign key constraint
+DETAIL:  Table "tru_fk_table" references "tru_pk_table".
+HINT:  Truncate table "tru_fk_table" at the same time, or use TRUNCATE ... CASCADE.
+CONTEXT:  remote SQL command: TRUNCATE public.tru_pk_table CONTINUE IDENTITY RESTRICT
+TRUNCATE tru_pk_ftable CASCADE;
+SELECT count(*) FROM tru_pk_ftable;    -- 0
+ count 
+-------
+     0
+(1 row)
+
+SELECT count(*) FROM tru_fk_table;		-- also truncated,0
+ count 
+-------
+     0
+(1 row)
+
+-- truncate two tables at a command
+INSERT INTO tru_ftable (SELECT x FROM generate_series(1,8) x);
+INSERT INTO tru_pk_ftable (SELECT x FROM generate_series(3,10) x);
+SELECT count(*) from tru_ftable; -- 8
+ count 
+-------
+     8
+(1 row)
+
+SELECT count(*) from tru_pk_ftable; -- 8
+ count 
+-------
+     8
+(1 row)
+
+TRUNCATE tru_ftable, tru_pk_ftable CASCADE;
+SELECT count(*) from tru_ftable; -- 0
+ count 
+-------
+     0
+(1 row)
+
+SELECT count(*) from tru_pk_ftable; -- 0
+ count 
+-------
+     0
+(1 row)
+
+-- truncate with ONLY clause
+TRUNCATE ONLY tru_ftable_parent;
+SELECT sum(id) FROM tru_ftable_parent;  -- 126
+ sum 
+-----
+ 126
+(1 row)
+
+TRUNCATE tru_ftable_parent;
+SELECT count(*) FROM tru_ftable_parent; -- 0
+ count 
+-------
+     0
+(1 row)
+
+-- in case when remote table has inherited children
+CREATE TABLE tru_rtable0_child () INHERITS (tru_rtable0);
+INSERT INTO tru_rtable0 (SELECT x FROM generate_series(5,9) x);
+INSERT INTO tru_rtable0_child (SELECT x FROM generate_series(10,14) x);
+SELECT sum(id) FROM tru_ftable;   -- 95
+ sum 
+-----
+  95
+(1 row)
+
+TRUNCATE ONLY tru_ftable;		-- truncate only parent portion
+SELECT sum(id) FROM tru_ftable;   -- 60
+ sum 
+-----
+  60
+(1 row)
+
+INSERT INTO tru_rtable0 (SELECT x FROM generate_series(21,25) x);
+SELECT sum(id) FROM tru_ftable;		-- 175
+ sum 
+-----
+ 175
+(1 row)
+
+TRUNCATE tru_ftable;			-- truncate both of parent and child
+SELECT count(*) FROM tru_ftable;    -- empty
+ count 
+-------
+     0
+(1 row)
+
+-- cleanup
+DROP FOREIGN TABLE tru_ftable_parent, tru_ftable_child, tru_pk_ftable,tru_ftable__p1,tru_ftable;
+DROP TABLE tru_rtable0, tru_rtable1, tru_ptable, tru_ptable__p0, tru_pk_table, tru_fk_table,
+tru_rtable_parent,tru_rtable_child, tru_rtable0_child;
+-- ===================================================================
 -- test IMPORT FOREIGN SCHEMA
 -- ===================================================================
 CREATE SCHEMA import_source;
@@ -8917,7 +9116,7 @@ DO $d$
     END;
 $d$;
 ERROR:  invalid option "password"
-HINT:  Valid options in this context are: service, passfile, channel_binding, connect_timeout, dbname, host, hostaddr, port, options, application_name, keepalives, keepalives_idle, keepalives_interval, keepalives_count, tcp_user_timeout, sslmode, sslcompression, sslcert, sslkey, sslrootcert, sslcrl, sslcrldir, requirepeer, ssl_min_protocol_version, ssl_max_protocol_version, gssencmode, krbsrvname, gsslib, target_session_attrs, use_remote_estimate, fdw_startup_cost, fdw_tuple_cost, extensions, updatable, fetch_size, batch_size, async_capable, keep_connections
+HINT:  Valid options in this context are: service, passfile, channel_binding, connect_timeout, dbname, host, hostaddr, port, options, application_name, keepalives, keepalives_idle, keepalives_interval, keepalives_count, tcp_user_timeout, sslmode, sslcompression, sslcert, sslkey, sslrootcert, sslcrl, sslcrldir, requirepeer, ssl_min_protocol_version, ssl_max_protocol_version, gssencmode, krbsrvname, gsslib, target_session_attrs, use_remote_estimate, fdw_startup_cost, fdw_tuple_cost, extensions, updatable, truncatable, fetch_size, batch_size, async_capable, keep_connections
 CONTEXT:  SQL statement "ALTER SERVER loopback_nopw OPTIONS (ADD password 'dummypw')"
 PL/pgSQL function inline_code_block line 3 at EXECUTE
 -- If we add a password for our user mapping instead, we should get a different
diff --git a/contrib/postgres_fdw/option.c b/contrib/postgres_fdw/option.c
index f1d0c8bd41..672b55a808 100644
--- a/contrib/postgres_fdw/option.c
+++ b/contrib/postgres_fdw/option.c
@@ -108,6 +108,7 @@ postgres_fdw_validator(PG_FUNCTION_ARGS)
 		 */
 		if (strcmp(def->defname, "use_remote_estimate") == 0 ||
 			strcmp(def->defname, "updatable") == 0 ||
+			strcmp(def->defname, "truncatable") == 0 ||
 			strcmp(def->defname, "async_capable") == 0 ||
 			strcmp(def->defname, "keep_connections") == 0)
 		{
@@ -213,6 +214,9 @@ InitPgFdwOptions(void)
 		/* updatable is available on both server and table */
 		{"updatable", ForeignServerRelationId, false},
 		{"updatable", ForeignTableRelationId, false},
+		/* truncatable is available on both server and table */
+		{"truncatable", ForeignServerRelationId, false},
+		{"truncatable", ForeignTableRelationId, false},
 		/* fetch_size is available on both server and table */
 		{"fetch_size", ForeignServerRelationId, false},
 		{"fetch_size", ForeignTableRelationId, false},
diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c
index b6442070a3..c590f374c6 100644
--- a/contrib/postgres_fdw/postgres_fdw.c
+++ b/contrib/postgres_fdw/postgres_fdw.c
@@ -400,6 +400,10 @@ static void postgresExplainForeignModify(ModifyTableState *mtstate,
 										 ExplainState *es);
 static void postgresExplainDirectModify(ForeignScanState *node,
 										ExplainState *es);
+static void postgresExecForeignTruncate(List *rels,
+										List *rels_extra,
+										DropBehavior behavior,
+										bool restart_seqs);
 static bool postgresAnalyzeForeignTable(Relation relation,
 										AcquireSampleRowsFunc *func,
 										BlockNumber *totalpages);
@@ -588,6 +592,9 @@ postgres_fdw_handler(PG_FUNCTION_ARGS)
 	routine->ExplainForeignModify = postgresExplainForeignModify;
 	routine->ExplainDirectModify = postgresExplainDirectModify;
 
+	/* Support function for TRUNCATE */
+	routine->ExecForeignTruncate = postgresExecForeignTruncate;
+
 	/* Support functions for ANALYZE */
 	routine->AnalyzeForeignTable = postgresAnalyzeForeignTable;
 
@@ -2868,6 +2875,102 @@ postgresExplainDirectModify(ForeignScanState *node, ExplainState *es)
 	}
 }
 
+/*
+ * postgresExecForeignTruncate
+ *		Truncate one or more foreign tables
+ */
+static void
+postgresExecForeignTruncate(List *rels,
+							List *rels_extra,
+							DropBehavior behavior,
+							bool restart_seqs)
+{
+	Oid			serverid = InvalidOid;
+	UserMapping *user = NULL;
+	PGconn	   *conn = NULL;
+	StringInfoData sql;
+	ListCell   *lc;
+	bool		server_truncatable = true;
+
+	/*
+	 * By default, all postgres_fdw foreign tables are assumed truncatable.
+	 * This can be overridden by a per-server setting, which in turn can be
+	 * overridden by a per-table setting.
+	 */
+	foreach(lc, rels)
+	{
+		ForeignServer *server = NULL;
+		Relation	rel = lfirst(lc);
+		ForeignTable *table = GetForeignTable(RelationGetRelid(rel));
+		ListCell   *cell;
+		bool		truncatable;
+
+		/*
+		 * First time through, determine whether the foreign server allows
+		 * truncates. Since all specified foreign tables are assumed to belong
+		 * to the same foreign server, this result can be used for other
+		 * foreign tables.
+		 */
+		if (!OidIsValid(serverid))
+		{
+			serverid = table->serverid;
+			server = GetForeignServer(serverid);
+
+			foreach(cell, server->options)
+			{
+				DefElem    *defel = (DefElem *) lfirst(cell);
+
+				if (strcmp(defel->defname, "truncatable") == 0)
+				{
+					server_truncatable = defGetBoolean(defel);
+					break;
+				}
+			}
+		}
+
+		/*
+		 * Confirm that all specified foreign tables belong to the same
+		 * foreign server.
+		 */
+		Assert(table->serverid == serverid);
+
+		/* Determine whether this foreign table allows truncations */
+		truncatable = server_truncatable;
+		foreach(cell, table->options)
+		{
+			DefElem    *defel = (DefElem *) lfirst(cell);
+
+			if (strcmp(defel->defname, "truncatable") == 0)
+			{
+				truncatable = defGetBoolean(defel);
+				break;
+			}
+		}
+
+		if (!truncatable)
+			ereport(ERROR,
+					(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+					 errmsg("foreign table \"%s\" does not allow truncates",
+							RelationGetRelationName(rel))));
+	}
+	Assert(OidIsValid(serverid));
+
+	/*
+	 * Get connection to the foreign server.  Connection manager will
+	 * establish new connection if necessary.
+	 */
+	user = GetUserMapping(GetUserId(), serverid);
+	conn = GetConnection(user, false, NULL);
+
+	/* Construct the TRUNCATE command string */
+	initStringInfo(&sql);
+	deparseTruncateSql(&sql, rels, rels_extra, behavior, restart_seqs);
+
+	/* Issue the TRUNCATE command to remote server */
+	do_sql_command(conn, sql.data);
+
+	pfree(sql.data);
+}
 
 /*
  * estimate_path_cost_size
diff --git a/contrib/postgres_fdw/postgres_fdw.h b/contrib/postgres_fdw/postgres_fdw.h
index 88d94da6f6..5d44b75314 100644
--- a/contrib/postgres_fdw/postgres_fdw.h
+++ b/contrib/postgres_fdw/postgres_fdw.h
@@ -145,6 +145,7 @@ extern PGconn *GetConnection(UserMapping *user, bool will_prep_stmt,
 extern void ReleaseConnection(PGconn *conn);
 extern unsigned int GetCursorNumber(PGconn *conn);
 extern unsigned int GetPrepStmtNumber(PGconn *conn);
+extern void do_sql_command(PGconn *conn, const char *sql);
 extern PGresult *pgfdw_get_result(PGconn *conn, const char *query);
 extern PGresult *pgfdw_exec_query(PGconn *conn, const char *query,
 								  PgFdwConnState *state);
@@ -206,6 +207,11 @@ extern void deparseDirectDeleteSql(StringInfo buf, PlannerInfo *root,
 extern void deparseAnalyzeSizeSql(StringInfo buf, Relation rel);
 extern void deparseAnalyzeSql(StringInfo buf, Relation rel,
 							  List **retrieved_attrs);
+extern void deparseTruncateSql(StringInfo buf,
+							   List *rels,
+							   List *rels_extra,
+							   DropBehavior behavior,
+							   bool restart_seqs);
 extern void deparseStringLiteral(StringInfo buf, const char *val);
 extern Expr *find_em_expr_for_rel(EquivalenceClass *ec, RelOptInfo *rel);
 extern Expr *find_em_expr_for_input_target(PlannerInfo *root,
diff --git a/contrib/postgres_fdw/sql/postgres_fdw.sql b/contrib/postgres_fdw/sql/postgres_fdw.sql
index 3b4f90a99c..7487096eac 100644
--- a/contrib/postgres_fdw/sql/postgres_fdw.sql
+++ b/contrib/postgres_fdw/sql/postgres_fdw.sql
@@ -2351,6 +2351,107 @@ select * from rem3;
 drop foreign table rem3;
 drop table loc3;
 
+-- ===================================================================
+-- test for TRUNCATE
+-- ===================================================================
+CREATE TABLE tru_rtable0 (id int primary key);
+CREATE TABLE tru_rtable1 (id int primary key);
+CREATE FOREIGN TABLE tru_ftable (id int)
+       SERVER loopback OPTIONS (table_name 'tru_rtable0');
+INSERT INTO tru_rtable0 (SELECT x FROM generate_series(1,10) x);
+
+CREATE TABLE tru_ptable (id int) PARTITION BY HASH(id);
+CREATE TABLE tru_ptable__p0 PARTITION OF tru_ptable
+                            FOR VALUES WITH (MODULUS 2, REMAINDER 0);
+CREATE FOREIGN TABLE tru_ftable__p1 PARTITION OF tru_ptable
+                                    FOR VALUES WITH (MODULUS 2, REMAINDER 1)
+       SERVER loopback OPTIONS (table_name 'tru_rtable1');
+INSERT INTO tru_ptable (SELECT x FROM generate_series(11,20) x);
+
+CREATE TABLE tru_pk_table(id int primary key);
+CREATE TABLE tru_fk_table(fkey int references tru_pk_table(id));
+INSERT INTO tru_pk_table (SELECT x FROM generate_series(1,10) x);
+INSERT INTO tru_fk_table (SELECT x % 10 + 1 FROM generate_series(5,25) x);
+CREATE FOREIGN TABLE tru_pk_ftable (id int)
+       SERVER loopback OPTIONS (table_name 'tru_pk_table');
+
+CREATE TABLE tru_rtable_parent (id int);
+CREATE TABLE tru_rtable_child (id int);
+CREATE FOREIGN TABLE tru_ftable_parent (id int)
+       SERVER loopback OPTIONS (table_name 'tru_rtable_parent');
+CREATE FOREIGN TABLE tru_ftable_child () INHERITS (tru_ftable_parent)
+       SERVER loopback OPTIONS (table_name 'tru_rtable_child');
+INSERT INTO tru_rtable_parent (SELECT x FROM generate_series(1,8) x);
+INSERT INTO tru_rtable_child  (SELECT x FROM generate_series(10, 18) x);
+
+-- normal truncate
+SELECT sum(id) FROM tru_ftable;        -- 55
+TRUNCATE tru_ftable;
+SELECT count(*) FROM tru_rtable0;		-- 0
+SELECT count(*) FROM tru_ftable;		-- 0
+
+-- 'truncatable' option
+ALTER SERVER loopback OPTIONS (ADD truncatable 'false');
+TRUNCATE tru_ftable;			-- error
+ALTER FOREIGN TABLE tru_ftable OPTIONS (ADD truncatable 'true');
+TRUNCATE tru_ftable;			-- accepted
+ALTER FOREIGN TABLE tru_ftable OPTIONS (SET truncatable 'false');
+TRUNCATE tru_ftable;			-- error
+ALTER SERVER loopback OPTIONS (DROP truncatable);
+ALTER FOREIGN TABLE tru_ftable OPTIONS (SET truncatable 'false');
+TRUNCATE tru_ftable;			-- error
+ALTER FOREIGN TABLE tru_ftable OPTIONS (SET truncatable 'true');
+TRUNCATE tru_ftable;			-- accepted
+
+-- partitioned table with both local and foreign tables as partitions
+SELECT sum(id) FROM tru_ptable;        -- 155
+TRUNCATE tru_ptable;
+SELECT count(*) FROM tru_ptable;		-- 0
+SELECT count(*) FROM tru_ptable__p0;	-- 0
+SELECT count(*) FROM tru_ftable__p1;	-- 0
+SELECT count(*) FROM tru_rtable1;		-- 0
+
+-- 'CASCADE' option
+SELECT sum(id) FROM tru_pk_ftable;      -- 55
+TRUNCATE tru_pk_ftable;	-- failed by FK reference
+TRUNCATE tru_pk_ftable CASCADE;
+SELECT count(*) FROM tru_pk_ftable;    -- 0
+SELECT count(*) FROM tru_fk_table;		-- also truncated,0
+
+-- truncate two tables at a command
+INSERT INTO tru_ftable (SELECT x FROM generate_series(1,8) x);
+INSERT INTO tru_pk_ftable (SELECT x FROM generate_series(3,10) x);
+SELECT count(*) from tru_ftable; -- 8
+SELECT count(*) from tru_pk_ftable; -- 8
+TRUNCATE tru_ftable, tru_pk_ftable CASCADE;
+SELECT count(*) from tru_ftable; -- 0
+SELECT count(*) from tru_pk_ftable; -- 0
+
+-- truncate with ONLY clause
+TRUNCATE ONLY tru_ftable_parent;
+SELECT sum(id) FROM tru_ftable_parent;  -- 126
+TRUNCATE tru_ftable_parent;
+SELECT count(*) FROM tru_ftable_parent; -- 0
+
+-- in case when remote table has inherited children
+CREATE TABLE tru_rtable0_child () INHERITS (tru_rtable0);
+INSERT INTO tru_rtable0 (SELECT x FROM generate_series(5,9) x);
+INSERT INTO tru_rtable0_child (SELECT x FROM generate_series(10,14) x);
+SELECT sum(id) FROM tru_ftable;   -- 95
+
+TRUNCATE ONLY tru_ftable;		-- truncate only parent portion
+SELECT sum(id) FROM tru_ftable;   -- 60
+
+INSERT INTO tru_rtable0 (SELECT x FROM generate_series(21,25) x);
+SELECT sum(id) FROM tru_ftable;		-- 175
+TRUNCATE tru_ftable;			-- truncate both of parent and child
+SELECT count(*) FROM tru_ftable;    -- empty
+
+-- cleanup
+DROP FOREIGN TABLE tru_ftable_parent, tru_ftable_child, tru_pk_ftable,tru_ftable__p1,tru_ftable;
+DROP TABLE tru_rtable0, tru_rtable1, tru_ptable, tru_ptable__p0, tru_pk_table, tru_fk_table,
+tru_rtable_parent,tru_rtable_child, tru_rtable0_child;
+
 -- ===================================================================
 -- test IMPORT FOREIGN SCHEMA
 -- ===================================================================
diff --git a/doc/src/sgml/fdwhandler.sgml b/doc/src/sgml/fdwhandler.sgml
index 0f2397df49..46ab2e6708 100644
--- a/doc/src/sgml/fdwhandler.sgml
+++ b/doc/src/sgml/fdwhandler.sgml
@@ -1065,6 +1065,50 @@ EndDirectModify(ForeignScanState *node);
 
    </sect2>
 
+   <sect2 id="fdw-callbacks-truncate">
+    <title>FDW Routines for Truncate</title>
+<programlisting>
+void
+ExecForeignTruncate(List *rels, List *rels_extra,
+                    DropBehavior behavior, bool restart_seqs);
+</programlisting>
+    <para>
+     Truncate a set of foreign tables defined by
+     <literal>rels</literal> belonging to the same foreign server.
+     This optional function is called during execution of
+     <command>TRUNCATE</command> for each foreign server involved
+     in one <command>TRUNCATE</command> command (note that invocations
+     are not per foreign table).
+
+     <literal>rels_extra</literal> delivers extra information about
+     the context where the foreign tables are truncated. It is a list of integers and has same length with
+     <literal>rels</literal>. <literal>TRUNCATE_REL_CONTEXT_NORMAL</literal> means that
+     the foreign table is specified WITHOUT <literal>ONLY</literal> clause, and <literal>TRUNCATE_REL_CONTEXT_ONLY</literal> means
+     specified WITH <literal>ONLY</literal> clause. <literal>TRUNCATE_REL_CONTEXT_CASCADING</literal> 
+     value means that foreign tables are not specified in the <command>TRUNCATE</command>, 
+     but truncated due to dependency (for example, partition table).
+    </para>
+
+    <para>
+     If the <function>ExecForeignTruncate</function> pointer is set to
+     <literal>NULL</literal>, attempts to truncate the foreign table will
+     fail with an error message.
+    </para>
+    
+    <para>
+     <literal>behavior</literal> defines how foreign tables should
+     be truncated, using as possible values <literal>DROP_RESTRICT</literal>
+     and <literal>DROP_CASCADE</literal> (to map with the equivalents of
+     <command>TRUNCATE</command>).
+    </para>
+    
+    <para>
+     <literal>restart_seqs</literal> is set to <literal>true</literal>
+     if <literal>RESTART IDENTITY</literal> was supplied in the
+     <command>TRUNCATE</command>.
+    </para>
+   </sect2>
+
    <sect2 id="fdw-callbacks-row-locking">
     <title>FDW Routines for Row Locking</title>
 
diff --git a/doc/src/sgml/postgres-fdw.sgml b/doc/src/sgml/postgres-fdw.sgml
index b0792a13b1..fd34956936 100644
--- a/doc/src/sgml/postgres-fdw.sgml
+++ b/doc/src/sgml/postgres-fdw.sgml
@@ -63,9 +63,10 @@
  <para>
   Now you need only <command>SELECT</command> from a foreign table to access
   the data stored in its underlying remote table.  You can also modify
-  the remote table using <command>INSERT</command>, <command>UPDATE</command>, or
-  <command>DELETE</command>.  (Of course, the remote user you have specified
-  in your user mapping must have privileges to do these things.)
+  the remote table using <command>INSERT</command>, <command>UPDATE</command>,
+  <command>DELETE</command>, or <command>TRUNCATE</command>.
+  (Of course, the remote user you have specified in your user mapping must
+  have privileges to do these things.)
  </para>
 
  <para>
@@ -436,6 +437,31 @@ OPTIONS (ADD password_required 'false');
    </variablelist>
   </sect3>
 
+  <sect3>
+   <title>Truncatability Options</title>
+
+   <para>
+    By default all foreign tables using <filename>postgres_fdw</filename> are assumed
+    to be truncatable.  This may be overridden using the following option:
+   </para>
+
+   <variablelist>
+
+    <varlistentry>
+     <term><literal>truncatable</literal></term>
+     <listitem>
+      <para>
+       This option controls whether <filename>postgres_fdw</filename> allows
+       foreign tables to be truncated using <command>TRUNCATE</command>
+       command. It can be specified for a foreign table or a foreign server.
+       A table-level option overrides a server-level option.
+       The default is <literal>true</literal>.
+      </para>
+     </listitem>
+    </varlistentry>
+   </variablelist>
+  </sect3>
+
   <sect3>
    <title>Importing Options</title>
 
diff --git a/doc/src/sgml/ref/truncate.sgml b/doc/src/sgml/ref/truncate.sgml
index 91cdac5562..7899106ba1 100644
--- a/doc/src/sgml/ref/truncate.sgml
+++ b/doc/src/sgml/ref/truncate.sgml
@@ -172,9 +172,8 @@ TRUNCATE [ TABLE ] [ ONLY ] <replaceable class="parameter">name</replaceable> [
   </para>
 
   <para>
-   <command>TRUNCATE</command> is not currently supported for foreign tables.
-   This implies that if a specified table has any descendant tables that are
-   foreign, the command will fail.
+    <command>TRUNCATE</command> can be used for foreign tables if
+     the foreign data wrapper supports, for instance, see <xref linkend="postgres-fdw"/>.
   </para>
  </refsect1>
 
diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c
index 87f9bdaef0..7bad33bafb 100644
--- a/src/backend/commands/tablecmds.c
+++ b/src/backend/commands/tablecmds.c
@@ -59,6 +59,7 @@
 #include "commands/typecmds.h"
 #include "commands/user.h"
 #include "executor/executor.h"
+#include "foreign/fdwapi.h"
 #include "foreign/foreign.h"
 #include "miscadmin.h"
 #include "nodes/makefuncs.h"
@@ -310,6 +311,21 @@ struct DropRelationCallbackState
 #define		ATT_FOREIGN_TABLE		0x0020
 #define		ATT_PARTITIONED_INDEX	0x0040
 
+/*
+ * ForeignTruncateInfo
+ *
+ * Information related to truncation of foreign tables.  This is used for
+ * the elements in a hash table. It uses the server OID as lookup key,
+ * and includes a per-server list of all foreign tables involved in the
+ * truncation.
+ */
+typedef struct ForeignTruncateInfo
+{
+	Oid			serverid;
+	List	   *rels;
+	List	   *rels_extra;
+} ForeignTruncateInfo;
+
 /*
  * Partition tables are expected to be dropped when the parent partitioned
  * table gets dropped. Hence for partitioning we use AUTO dependency.
@@ -1589,7 +1605,10 @@ RangeVarCallbackForDropRelation(const RangeVar *rel, Oid relOid, Oid oldRelOid,
  *
  * This is a multi-relation truncate.  We first open and grab exclusive
  * lock on all relations involved, checking permissions and otherwise
- * verifying that the relation is OK for truncation.  In CASCADE mode,
+ * verifying that the relation is OK for truncation.  Note that if relations
+ * are foreign tables, at this stage, we have not yet checked that their
+ * foreign data in external data sources are OK for truncation.  These are
+ * checked when foreign data are actually truncated later.  In CASCADE mode,
  * relations having FK references to the targeted relations are automatically
  * added to the group; in RESTRICT mode, we check that all FK references are
  * internal to the group that's being truncated.  Finally all the relations
@@ -1600,6 +1619,7 @@ ExecuteTruncate(TruncateStmt *stmt)
 {
 	List	   *rels = NIL;
 	List	   *relids = NIL;
+	List	   *relids_extra = NIL;
 	List	   *relids_logged = NIL;
 	ListCell   *cell;
 
@@ -1636,6 +1656,9 @@ ExecuteTruncate(TruncateStmt *stmt)
 
 		rels = lappend(rels, rel);
 		relids = lappend_oid(relids, myrelid);
+		relids_extra = lappend_int(relids_extra, (recurse ?
+												  TRUNCATE_REL_CONTEXT_NORMAL :
+												  TRUNCATE_REL_CONTEXT_ONLY));
 		/* Log this relation only if needed for logical decoding */
 		if (RelationIsLogicallyLogged(rel))
 			relids_logged = lappend_oid(relids_logged, myrelid);
@@ -1683,6 +1706,8 @@ ExecuteTruncate(TruncateStmt *stmt)
 
 				rels = lappend(rels, rel);
 				relids = lappend_oid(relids, childrelid);
+				relids_extra = lappend_int(relids_extra,
+										   TRUNCATE_REL_CONTEXT_CASCADING);
 				/* Log this relation only if needed for logical decoding */
 				if (RelationIsLogicallyLogged(rel))
 					relids_logged = lappend_oid(relids_logged, childrelid);
@@ -1695,7 +1720,7 @@ ExecuteTruncate(TruncateStmt *stmt)
 					 errhint("Do not specify the ONLY keyword, or use TRUNCATE ONLY on the partitions directly.")));
 	}
 
-	ExecuteTruncateGuts(rels, relids, relids_logged,
+	ExecuteTruncateGuts(rels, relids, relids_extra, relids_logged,
 						stmt->behavior, stmt->restart_seqs);
 
 	/* And close the rels */
@@ -1721,16 +1746,22 @@ ExecuteTruncate(TruncateStmt *stmt)
  * this information handy in this form.
  */
 void
-ExecuteTruncateGuts(List *explicit_rels, List *relids, List *relids_logged,
+ExecuteTruncateGuts(List *explicit_rels,
+					List *relids,
+					List *relids_extra,
+					List *relids_logged,
 					DropBehavior behavior, bool restart_seqs)
 {
 	List	   *rels;
 	List	   *seq_relids = NIL;
+	HTAB	   *ft_htab = NULL;
 	EState	   *estate;
 	ResultRelInfo *resultRelInfos;
 	ResultRelInfo *resultRelInfo;
 	SubTransactionId mySubid;
 	ListCell   *cell;
+	ListCell   *lc1,
+			*lc2;
 	Oid		   *logrelids;
 
 	/*
@@ -1768,6 +1799,8 @@ ExecuteTruncateGuts(List *explicit_rels, List *relids, List *relids_logged,
 				truncate_check_activity(rel);
 				rels = lappend(rels, rel);
 				relids = lappend_oid(relids, relid);
+				relids_extra = lappend_int(relids_extra,
+										   TRUNCATE_REL_CONTEXT_CASCADING);
 				/* Log this relation only if needed for logical decoding */
 				if (RelationIsLogicallyLogged(rel))
 					relids_logged = lappend_oid(relids_logged, relid);
@@ -1868,14 +1901,63 @@ ExecuteTruncateGuts(List *explicit_rels, List *relids, List *relids_logged,
 	 */
 	mySubid = GetCurrentSubTransactionId();
 
-	foreach(cell, rels)
+	Assert(list_length(rels) == list_length(relids_extra));
+	forboth(lc1, rels, lc2, relids_extra)
 	{
-		Relation	rel = (Relation) lfirst(cell);
+		Relation	rel = (Relation) lfirst(lc1);
+		int			extra = lfirst_int(lc2);
 
 		/* Skip partitioned tables as there is nothing to do */
 		if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
 			continue;
 
+		/*
+		 * Build the lists of foreign tables belonging to each foreign server
+		 * and pass each list to the foreign data wrapper's callback function,
+		 * so that each server can truncate its all foreign tables in bulk.
+		 * Each list is saved as a single entry in a hash table that uses the
+		 * server OID as lookup key.
+		 */
+		if (rel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
+		{
+			Oid			serverid = GetForeignServerIdByRelId(RelationGetRelid(rel));
+			bool		found;
+			ForeignTruncateInfo *ft_info;
+
+			/* First time through, initialize hashtable for foreign tables */
+			if (!ft_htab)
+			{
+				HASHCTL		hctl;
+
+				memset(&hctl, 0, sizeof(HASHCTL));
+				hctl.keysize = sizeof(Oid);
+				hctl.entrysize = sizeof(ForeignTruncateInfo);
+				hctl.hcxt = CurrentMemoryContext;
+
+				ft_htab = hash_create("TRUNCATE for Foreign Tables",
+									  32,	/* start small and extend */
+									  &hctl,
+									  HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
+			}
+
+			/* Find or create cached entry for the foreign table */
+			ft_info = hash_search(ft_htab, &serverid, HASH_ENTER, &found);
+			if (!found)
+			{
+				ft_info->serverid = serverid;
+				ft_info->rels = NIL;
+				ft_info->rels_extra = NIL;
+			}
+
+			/*
+			 * Save the foreign table in the entry of the server that the
+			 * foreign table belongs to.
+			 */
+			ft_info->rels = lappend(ft_info->rels, rel);
+			ft_info->rels_extra = lappend_int(ft_info->rels_extra, extra);
+			continue;
+		}
+
 		/*
 		 * Normally, we need a transaction-safe truncation here.  However, if
 		 * the table was either created in the current (sub)transaction or has
@@ -1938,6 +2020,36 @@ ExecuteTruncateGuts(List *explicit_rels, List *relids, List *relids_logged,
 		pgstat_count_truncate(rel);
 	}
 
+	/* Now go through the hash table, and truncate foreign tables */
+	if (ft_htab)
+	{
+		ForeignTruncateInfo *ft_info;
+		HASH_SEQ_STATUS seq;
+
+		hash_seq_init(&seq, ft_htab);
+
+		PG_TRY();
+		{
+			while ((ft_info = hash_seq_search(&seq)) != NULL)
+			{
+				FdwRoutine *routine = GetFdwRoutineByServerId(ft_info->serverid);
+
+				/* truncate_check_rel() has checked that already */
+				Assert(routine->ExecForeignTruncate != NULL);
+
+				routine->ExecForeignTruncate(ft_info->rels,
+											 ft_info->rels_extra,
+											 behavior,
+											 restart_seqs);
+			}
+		}
+		PG_FINALLY();
+		{
+			hash_destroy(ft_htab);
+		}
+		PG_END_TRY();
+	}
+
 	/*
 	 * Restart owned sequences if we were asked to.
 	 */
@@ -2023,12 +2135,24 @@ truncate_check_rel(Oid relid, Form_pg_class reltuple)
 	char	   *relname = NameStr(reltuple->relname);
 
 	/*
-	 * Only allow truncate on regular tables and partitioned tables (although,
-	 * the latter are only being included here for the following checks; no
-	 * physical truncation will occur in their case.)
+	 * Only allow truncate on regular tables, foreign tables using foreign
+	 * data wrappers supporting TRUNCATE and partitioned tables (although, the
+	 * latter are only being included here for the following checks; no
+	 * physical truncation will occur in their case.).
 	 */
-	if (reltuple->relkind != RELKIND_RELATION &&
-		reltuple->relkind != RELKIND_PARTITIONED_TABLE)
+	if (reltuple->relkind == RELKIND_FOREIGN_TABLE)
+	{
+		Oid			serverid = GetForeignServerIdByRelId(relid);
+		FdwRoutine *fdwroutine = GetFdwRoutineByServerId(serverid);
+
+		if (!fdwroutine->ExecForeignTruncate)
+			ereport(ERROR,
+					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+					 errmsg("cannot truncate foreign table \"%s\"",
+							relname)));
+	}
+	else if (reltuple->relkind != RELKIND_RELATION &&
+			 reltuple->relkind != RELKIND_PARTITIONED_TABLE)
 		ereport(ERROR,
 				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
 				 errmsg("\"%s\" is not a table", relname)));
diff --git a/src/backend/replication/logical/worker.c b/src/backend/replication/logical/worker.c
index 8da602d163..fb3ba5c415 100644
--- a/src/backend/replication/logical/worker.c
+++ b/src/backend/replication/logical/worker.c
@@ -1795,6 +1795,7 @@ apply_handle_truncate(StringInfo s)
 	List	   *rels = NIL;
 	List	   *part_rels = NIL;
 	List	   *relids = NIL;
+	List	   *relids_extra = NIL;
 	List	   *relids_logged = NIL;
 	ListCell   *lc;
 
@@ -1824,6 +1825,7 @@ apply_handle_truncate(StringInfo s)
 		remote_rels = lappend(remote_rels, rel);
 		rels = lappend(rels, rel->localrel);
 		relids = lappend_oid(relids, rel->localreloid);
+		relids_extra = lappend_int(relids_extra, TRUNCATE_REL_CONTEXT_NORMAL);
 		if (RelationIsLogicallyLogged(rel->localrel))
 			relids_logged = lappend_oid(relids_logged, rel->localreloid);
 
@@ -1862,6 +1864,7 @@ apply_handle_truncate(StringInfo s)
 				rels = lappend(rels, childrel);
 				part_rels = lappend(part_rels, childrel);
 				relids = lappend_oid(relids, childrelid);
+				relids_extra = lappend_int(relids_extra, TRUNCATE_REL_CONTEXT_CASCADING);
 				/* Log this relation only if needed for logical decoding */
 				if (RelationIsLogicallyLogged(childrel))
 					relids_logged = lappend_oid(relids_logged, childrelid);
@@ -1874,8 +1877,12 @@ apply_handle_truncate(StringInfo s)
 	 * to replaying changes without further cascading. This might be later
 	 * changeable with a user specified option.
 	 */
-	ExecuteTruncateGuts(rels, relids, relids_logged, DROP_RESTRICT, restart_seqs);
-
+	ExecuteTruncateGuts(rels,
+						relids,
+						relids_extra,
+						relids_logged,
+						DROP_RESTRICT,
+						restart_seqs);
 	foreach(lc, remote_rels)
 	{
 		LogicalRepRelMapEntry *rel = lfirst(lc);
diff --git a/src/include/commands/tablecmds.h b/src/include/commands/tablecmds.h
index b3d30acc35..57763ed734 100644
--- a/src/include/commands/tablecmds.h
+++ b/src/include/commands/tablecmds.h
@@ -55,9 +55,16 @@ extern void AlterRelationNamespaceInternal(Relation classRel, Oid relOid,
 
 extern void CheckTableNotInUse(Relation rel, const char *stmt);
 
+#define TRUNCATE_REL_CONTEXT_NORMAL       0x01
+#define TRUNCATE_REL_CONTEXT_ONLY         0x02
+#define TRUNCATE_REL_CONTEXT_CASCADING     0x04
 extern void ExecuteTruncate(TruncateStmt *stmt);
-extern void ExecuteTruncateGuts(List *explicit_rels, List *relids, List *relids_logged,
-								DropBehavior behavior, bool restart_seqs);
+extern void ExecuteTruncateGuts(List *explicit_rels,
+								List *relids,
+								List *relids_extra,
+								List *relids_logged,
+								DropBehavior behavior,
+								bool restart_seqs);
 
 extern void SetRelationHasSubclass(Oid relationId, bool relhassubclass);
 
diff --git a/src/include/foreign/fdwapi.h b/src/include/foreign/fdwapi.h
index 10d29ff292..4ebbca6de9 100644
--- a/src/include/foreign/fdwapi.h
+++ b/src/include/foreign/fdwapi.h
@@ -160,6 +160,11 @@ typedef bool (*AnalyzeForeignTable_function) (Relation relation,
 typedef List *(*ImportForeignSchema_function) (ImportForeignSchemaStmt *stmt,
 											   Oid serverOid);
 
+typedef void (*ExecForeignTruncate_function) (List *rels,
+											  List *rels_extra,
+											  DropBehavior behavior,
+											  bool restart_seqs);
+
 typedef Size (*EstimateDSMForeignScan_function) (ForeignScanState *node,
 												 ParallelContext *pcxt);
 typedef void (*InitializeDSMForeignScan_function) (ForeignScanState *node,
@@ -255,6 +260,9 @@ typedef struct FdwRoutine
 	/* Support functions for IMPORT FOREIGN SCHEMA */
 	ImportForeignSchema_function ImportForeignSchema;
 
+	/* Support functions for TRUNCATE */
+	ExecForeignTruncate_function ExecForeignTruncate;
+
 	/* Support functions for parallelism under Gather node */
 	IsForeignScanParallelSafe_function IsForeignScanParallelSafe;
 	EstimateDSMForeignScan_function EstimateDSMForeignScan;
diff --git a/src/include/utils/rel.h b/src/include/utils/rel.h
index 9a3a03e520..34e25eb597 100644
--- a/src/include/utils/rel.h
+++ b/src/include/utils/rel.h
@@ -634,7 +634,8 @@ typedef struct ViewOptions
  *		WAL stream.
  *
  * We don't log information for unlogged tables (since they don't WAL log
- * anyway) and for system tables (their content is hard to make sense of, and
+ * anyway), for foreign tables (since they don't WAL log, either),
+ * and for system tables (their content is hard to make sense of, and
  * it would complicate decoding slightly for little gain). Note that we *do*
  * log information for user defined catalog tables since they presumably are
  * interesting to the user...
@@ -642,6 +643,7 @@ typedef struct ViewOptions
 #define RelationIsLogicallyLogged(relation) \
 	(XLogLogicalInfoActive() && \
 	 RelationNeedsWAL(relation) && \
+	 (relation)->rd_rel->relkind != RELKIND_FOREIGN_TABLE &&	\
 	 !IsCatalogRelation(relation))
 
 /* routines in utils/cache/relcache.c */
diff --git a/src/test/regress/expected/foreign_data.out b/src/test/regress/expected/foreign_data.out
index e4cdb780d0..5385f98a0f 100644
--- a/src/test/regress/expected/foreign_data.out
+++ b/src/test/regress/expected/foreign_data.out
@@ -1807,9 +1807,9 @@ Inherits: fd_pt1
 
 -- TRUNCATE doesn't work on foreign tables, either directly or recursively
 TRUNCATE ft2;  -- ERROR
-ERROR:  "ft2" is not a table
+ERROR:  foreign-data wrapper "dummy" has no handler
 TRUNCATE fd_pt1;  -- ERROR
-ERROR:  "ft2" is not a table
+ERROR:  foreign-data wrapper "dummy" has no handler
 DROP TABLE fd_pt1 CASCADE;
 NOTICE:  drop cascades to foreign table ft2
 -- IMPORT FOREIGN SCHEMA
@@ -2032,9 +2032,9 @@ ALTER FOREIGN TABLE fd_pt2_1 ADD CONSTRAINT fd_pt2chk1 CHECK (c1 > 0);
 ALTER TABLE fd_pt2 ATTACH PARTITION fd_pt2_1 FOR VALUES IN (1);
 -- TRUNCATE doesn't work on foreign tables, either directly or recursively
 TRUNCATE fd_pt2_1;  -- ERROR
-ERROR:  "fd_pt2_1" is not a table
+ERROR:  foreign-data wrapper "dummy" has no handler
 TRUNCATE fd_pt2;  -- ERROR
-ERROR:  "fd_pt2_1" is not a table
+ERROR:  foreign-data wrapper "dummy" has no handler
 DROP FOREIGN TABLE fd_pt2_1;
 DROP TABLE fd_pt2;
 -- foreign table cannot be part of partition tree made of temporary
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index b26e81dcbf..91da5827b2 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -704,6 +704,7 @@ ForeignScanState
 ForeignServer
 ForeignServerInfo
 ForeignTable
+ForeignTruncateInfo
 ForkNumber
 FormData_pg_aggregate
 FormData_pg_am
#63Fujii Masao
masao.fujii@oss.nttdata.com
In reply to: Fujii Masao (#62)
1 attachment(s)
Re: TRUNCATE on foreign table

On 2021/04/08 18:25, Fujii Masao wrote:

Anyway, attached is the updated version of the patch. This is still based on the latest Kazutaka-san's patch. That is, extra list for ONLY is still passed to FDW. What about committing this version at first? Then we can continue the discussion and change the behavior later if necessary.

The patch failed to be applied because of recent commit.
Attached is the updated version of the patch.

Regards,

--
Fujii Masao
Advanced Computing Technology Center
Research and Development Headquarters
NTT DATA CORPORATION

Attachments:

pgsql14-truncate-on-foreign-table.v18.patchtext/plain; charset=UTF-8; name=pgsql14-truncate-on-foreign-table.v18.patch; x-mac-creator=0; x-mac-type=0Download
diff --git a/contrib/postgres_fdw/connection.c b/contrib/postgres_fdw/connection.c
index 6a61d83862..82aa14a65d 100644
--- a/contrib/postgres_fdw/connection.c
+++ b/contrib/postgres_fdw/connection.c
@@ -92,7 +92,6 @@ static PGconn *connect_pg_server(ForeignServer *server, UserMapping *user);
 static void disconnect_pg_server(ConnCacheEntry *entry);
 static void check_conn_params(const char **keywords, const char **values, UserMapping *user);
 static void configure_remote_session(PGconn *conn);
-static void do_sql_command(PGconn *conn, const char *sql);
 static void begin_remote_xact(ConnCacheEntry *entry);
 static void pgfdw_xact_callback(XactEvent event, void *arg);
 static void pgfdw_subxact_callback(SubXactEvent event,
@@ -568,7 +567,7 @@ configure_remote_session(PGconn *conn)
 /*
  * Convenience subroutine to issue a non-data-returning SQL command to remote
  */
-static void
+void
 do_sql_command(PGconn *conn, const char *sql)
 {
 	PGresult   *res;
diff --git a/contrib/postgres_fdw/deparse.c b/contrib/postgres_fdw/deparse.c
index 5aa3455e30..bdc4c3620d 100644
--- a/contrib/postgres_fdw/deparse.c
+++ b/contrib/postgres_fdw/deparse.c
@@ -56,6 +56,7 @@
 #include "utils/rel.h"
 #include "utils/syscache.h"
 #include "utils/typcache.h"
+#include "commands/tablecmds.h"
 
 /*
  * Global context for foreign_expr_walker's search of an expression tree.
@@ -2172,6 +2173,43 @@ deparseAnalyzeSql(StringInfo buf, Relation rel, List **retrieved_attrs)
 	deparseRelation(buf, rel);
 }
 
+/*
+ * Construct a simple "TRUNCATE rel" statement
+ */
+void
+deparseTruncateSql(StringInfo buf,
+				   List *rels,
+				   List *rels_extra,
+				   DropBehavior behavior,
+				   bool restart_seqs)
+{
+	ListCell   *lc1,
+			   *lc2;
+
+	appendStringInfoString(buf, "TRUNCATE ");
+
+	forboth(lc1, rels, lc2, rels_extra)
+	{
+		Relation	rel = lfirst(lc1);
+		int			extra = lfirst_int(lc2);
+
+		if (lc1 != list_head(rels))
+			appendStringInfoString(buf, ", ");
+		if (extra & TRUNCATE_REL_CONTEXT_ONLY)
+			appendStringInfoString(buf, "ONLY ");
+
+		deparseRelation(buf, rel);
+	}
+
+	appendStringInfo(buf, " %s IDENTITY",
+					 restart_seqs ? "RESTART" : "CONTINUE");
+
+	if (behavior == DROP_RESTRICT)
+		appendStringInfoString(buf, " RESTRICT");
+	else if (behavior == DROP_CASCADE)
+		appendStringInfoString(buf, " CASCADE");
+}
+
 /*
  * Construct name to use for given column, and emit it into buf.
  * If it has a column_name FDW option, use that instead of attribute name.
diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out
index eeb6ae79d0..7f69fa0054 100644
--- a/contrib/postgres_fdw/expected/postgres_fdw.out
+++ b/contrib/postgres_fdw/expected/postgres_fdw.out
@@ -8215,6 +8215,205 @@ select * from rem3;
 drop foreign table rem3;
 drop table loc3;
 -- ===================================================================
+-- test for TRUNCATE
+-- ===================================================================
+CREATE TABLE tru_rtable0 (id int primary key);
+CREATE TABLE tru_rtable1 (id int primary key);
+CREATE FOREIGN TABLE tru_ftable (id int)
+       SERVER loopback OPTIONS (table_name 'tru_rtable0');
+INSERT INTO tru_rtable0 (SELECT x FROM generate_series(1,10) x);
+CREATE TABLE tru_ptable (id int) PARTITION BY HASH(id);
+CREATE TABLE tru_ptable__p0 PARTITION OF tru_ptable
+                            FOR VALUES WITH (MODULUS 2, REMAINDER 0);
+CREATE FOREIGN TABLE tru_ftable__p1 PARTITION OF tru_ptable
+                                    FOR VALUES WITH (MODULUS 2, REMAINDER 1)
+       SERVER loopback OPTIONS (table_name 'tru_rtable1');
+INSERT INTO tru_ptable (SELECT x FROM generate_series(11,20) x);
+CREATE TABLE tru_pk_table(id int primary key);
+CREATE TABLE tru_fk_table(fkey int references tru_pk_table(id));
+INSERT INTO tru_pk_table (SELECT x FROM generate_series(1,10) x);
+INSERT INTO tru_fk_table (SELECT x % 10 + 1 FROM generate_series(5,25) x);
+CREATE FOREIGN TABLE tru_pk_ftable (id int)
+       SERVER loopback OPTIONS (table_name 'tru_pk_table');
+CREATE TABLE tru_rtable_parent (id int);
+CREATE TABLE tru_rtable_child (id int);
+CREATE FOREIGN TABLE tru_ftable_parent (id int)
+       SERVER loopback OPTIONS (table_name 'tru_rtable_parent');
+CREATE FOREIGN TABLE tru_ftable_child () INHERITS (tru_ftable_parent)
+       SERVER loopback OPTIONS (table_name 'tru_rtable_child');
+INSERT INTO tru_rtable_parent (SELECT x FROM generate_series(1,8) x);
+INSERT INTO tru_rtable_child  (SELECT x FROM generate_series(10, 18) x);
+-- normal truncate
+SELECT sum(id) FROM tru_ftable;        -- 55
+ sum 
+-----
+  55
+(1 row)
+
+TRUNCATE tru_ftable;
+SELECT count(*) FROM tru_rtable0;		-- 0
+ count 
+-------
+     0
+(1 row)
+
+SELECT count(*) FROM tru_ftable;		-- 0
+ count 
+-------
+     0
+(1 row)
+
+-- 'truncatable' option
+ALTER SERVER loopback OPTIONS (ADD truncatable 'false');
+TRUNCATE tru_ftable;			-- error
+ERROR:  foreign table "tru_ftable" does not allow truncates
+ALTER FOREIGN TABLE tru_ftable OPTIONS (ADD truncatable 'true');
+TRUNCATE tru_ftable;			-- accepted
+ALTER FOREIGN TABLE tru_ftable OPTIONS (SET truncatable 'false');
+TRUNCATE tru_ftable;			-- error
+ERROR:  foreign table "tru_ftable" does not allow truncates
+ALTER SERVER loopback OPTIONS (DROP truncatable);
+ALTER FOREIGN TABLE tru_ftable OPTIONS (SET truncatable 'false');
+TRUNCATE tru_ftable;			-- error
+ERROR:  foreign table "tru_ftable" does not allow truncates
+ALTER FOREIGN TABLE tru_ftable OPTIONS (SET truncatable 'true');
+TRUNCATE tru_ftable;			-- accepted
+-- partitioned table with both local and foreign tables as partitions
+SELECT sum(id) FROM tru_ptable;        -- 155
+ sum 
+-----
+ 155
+(1 row)
+
+TRUNCATE tru_ptable;
+SELECT count(*) FROM tru_ptable;		-- 0
+ count 
+-------
+     0
+(1 row)
+
+SELECT count(*) FROM tru_ptable__p0;	-- 0
+ count 
+-------
+     0
+(1 row)
+
+SELECT count(*) FROM tru_ftable__p1;	-- 0
+ count 
+-------
+     0
+(1 row)
+
+SELECT count(*) FROM tru_rtable1;		-- 0
+ count 
+-------
+     0
+(1 row)
+
+-- 'CASCADE' option
+SELECT sum(id) FROM tru_pk_ftable;      -- 55
+ sum 
+-----
+  55
+(1 row)
+
+TRUNCATE tru_pk_ftable;	-- failed by FK reference
+ERROR:  cannot truncate a table referenced in a foreign key constraint
+DETAIL:  Table "tru_fk_table" references "tru_pk_table".
+HINT:  Truncate table "tru_fk_table" at the same time, or use TRUNCATE ... CASCADE.
+CONTEXT:  remote SQL command: TRUNCATE public.tru_pk_table CONTINUE IDENTITY RESTRICT
+TRUNCATE tru_pk_ftable CASCADE;
+SELECT count(*) FROM tru_pk_ftable;    -- 0
+ count 
+-------
+     0
+(1 row)
+
+SELECT count(*) FROM tru_fk_table;		-- also truncated,0
+ count 
+-------
+     0
+(1 row)
+
+-- truncate two tables at a command
+INSERT INTO tru_ftable (SELECT x FROM generate_series(1,8) x);
+INSERT INTO tru_pk_ftable (SELECT x FROM generate_series(3,10) x);
+SELECT count(*) from tru_ftable; -- 8
+ count 
+-------
+     8
+(1 row)
+
+SELECT count(*) from tru_pk_ftable; -- 8
+ count 
+-------
+     8
+(1 row)
+
+TRUNCATE tru_ftable, tru_pk_ftable CASCADE;
+SELECT count(*) from tru_ftable; -- 0
+ count 
+-------
+     0
+(1 row)
+
+SELECT count(*) from tru_pk_ftable; -- 0
+ count 
+-------
+     0
+(1 row)
+
+-- truncate with ONLY clause
+TRUNCATE ONLY tru_ftable_parent;
+SELECT sum(id) FROM tru_ftable_parent;  -- 126
+ sum 
+-----
+ 126
+(1 row)
+
+TRUNCATE tru_ftable_parent;
+SELECT count(*) FROM tru_ftable_parent; -- 0
+ count 
+-------
+     0
+(1 row)
+
+-- in case when remote table has inherited children
+CREATE TABLE tru_rtable0_child () INHERITS (tru_rtable0);
+INSERT INTO tru_rtable0 (SELECT x FROM generate_series(5,9) x);
+INSERT INTO tru_rtable0_child (SELECT x FROM generate_series(10,14) x);
+SELECT sum(id) FROM tru_ftable;   -- 95
+ sum 
+-----
+  95
+(1 row)
+
+TRUNCATE ONLY tru_ftable;		-- truncate only parent portion
+SELECT sum(id) FROM tru_ftable;   -- 60
+ sum 
+-----
+  60
+(1 row)
+
+INSERT INTO tru_rtable0 (SELECT x FROM generate_series(21,25) x);
+SELECT sum(id) FROM tru_ftable;		-- 175
+ sum 
+-----
+ 175
+(1 row)
+
+TRUNCATE tru_ftable;			-- truncate both of parent and child
+SELECT count(*) FROM tru_ftable;    -- empty
+ count 
+-------
+     0
+(1 row)
+
+-- cleanup
+DROP FOREIGN TABLE tru_ftable_parent, tru_ftable_child, tru_pk_ftable,tru_ftable__p1,tru_ftable;
+DROP TABLE tru_rtable0, tru_rtable1, tru_ptable, tru_ptable__p0, tru_pk_table, tru_fk_table,
+tru_rtable_parent,tru_rtable_child, tru_rtable0_child;
+-- ===================================================================
 -- test IMPORT FOREIGN SCHEMA
 -- ===================================================================
 CREATE SCHEMA import_source;
@@ -8917,7 +9116,7 @@ DO $d$
     END;
 $d$;
 ERROR:  invalid option "password"
-HINT:  Valid options in this context are: service, passfile, channel_binding, connect_timeout, dbname, host, hostaddr, port, options, application_name, keepalives, keepalives_idle, keepalives_interval, keepalives_count, tcp_user_timeout, sslmode, sslcompression, sslcert, sslkey, sslrootcert, sslcrl, sslcrldir, sslsni, requirepeer, ssl_min_protocol_version, ssl_max_protocol_version, gssencmode, krbsrvname, gsslib, target_session_attrs, use_remote_estimate, fdw_startup_cost, fdw_tuple_cost, extensions, updatable, fetch_size, batch_size, async_capable, keep_connections
+HINT:  Valid options in this context are: service, passfile, channel_binding, connect_timeout, dbname, host, hostaddr, port, options, application_name, keepalives, keepalives_idle, keepalives_interval, keepalives_count, tcp_user_timeout, sslmode, sslcompression, sslcert, sslkey, sslrootcert, sslcrl, sslcrldir, sslsni, requirepeer, ssl_min_protocol_version, ssl_max_protocol_version, gssencmode, krbsrvname, gsslib, target_session_attrs, use_remote_estimate, fdw_startup_cost, fdw_tuple_cost, extensions, updatable, truncatable, fetch_size, batch_size, async_capable, keep_connections
 CONTEXT:  SQL statement "ALTER SERVER loopback_nopw OPTIONS (ADD password 'dummypw')"
 PL/pgSQL function inline_code_block line 3 at EXECUTE
 -- If we add a password for our user mapping instead, we should get a different
diff --git a/contrib/postgres_fdw/option.c b/contrib/postgres_fdw/option.c
index f1d0c8bd41..672b55a808 100644
--- a/contrib/postgres_fdw/option.c
+++ b/contrib/postgres_fdw/option.c
@@ -108,6 +108,7 @@ postgres_fdw_validator(PG_FUNCTION_ARGS)
 		 */
 		if (strcmp(def->defname, "use_remote_estimate") == 0 ||
 			strcmp(def->defname, "updatable") == 0 ||
+			strcmp(def->defname, "truncatable") == 0 ||
 			strcmp(def->defname, "async_capable") == 0 ||
 			strcmp(def->defname, "keep_connections") == 0)
 		{
@@ -213,6 +214,9 @@ InitPgFdwOptions(void)
 		/* updatable is available on both server and table */
 		{"updatable", ForeignServerRelationId, false},
 		{"updatable", ForeignTableRelationId, false},
+		/* truncatable is available on both server and table */
+		{"truncatable", ForeignServerRelationId, false},
+		{"truncatable", ForeignTableRelationId, false},
 		/* fetch_size is available on both server and table */
 		{"fetch_size", ForeignServerRelationId, false},
 		{"fetch_size", ForeignTableRelationId, false},
diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c
index b6442070a3..c590f374c6 100644
--- a/contrib/postgres_fdw/postgres_fdw.c
+++ b/contrib/postgres_fdw/postgres_fdw.c
@@ -400,6 +400,10 @@ static void postgresExplainForeignModify(ModifyTableState *mtstate,
 										 ExplainState *es);
 static void postgresExplainDirectModify(ForeignScanState *node,
 										ExplainState *es);
+static void postgresExecForeignTruncate(List *rels,
+										List *rels_extra,
+										DropBehavior behavior,
+										bool restart_seqs);
 static bool postgresAnalyzeForeignTable(Relation relation,
 										AcquireSampleRowsFunc *func,
 										BlockNumber *totalpages);
@@ -588,6 +592,9 @@ postgres_fdw_handler(PG_FUNCTION_ARGS)
 	routine->ExplainForeignModify = postgresExplainForeignModify;
 	routine->ExplainDirectModify = postgresExplainDirectModify;
 
+	/* Support function for TRUNCATE */
+	routine->ExecForeignTruncate = postgresExecForeignTruncate;
+
 	/* Support functions for ANALYZE */
 	routine->AnalyzeForeignTable = postgresAnalyzeForeignTable;
 
@@ -2868,6 +2875,102 @@ postgresExplainDirectModify(ForeignScanState *node, ExplainState *es)
 	}
 }
 
+/*
+ * postgresExecForeignTruncate
+ *		Truncate one or more foreign tables
+ */
+static void
+postgresExecForeignTruncate(List *rels,
+							List *rels_extra,
+							DropBehavior behavior,
+							bool restart_seqs)
+{
+	Oid			serverid = InvalidOid;
+	UserMapping *user = NULL;
+	PGconn	   *conn = NULL;
+	StringInfoData sql;
+	ListCell   *lc;
+	bool		server_truncatable = true;
+
+	/*
+	 * By default, all postgres_fdw foreign tables are assumed truncatable.
+	 * This can be overridden by a per-server setting, which in turn can be
+	 * overridden by a per-table setting.
+	 */
+	foreach(lc, rels)
+	{
+		ForeignServer *server = NULL;
+		Relation	rel = lfirst(lc);
+		ForeignTable *table = GetForeignTable(RelationGetRelid(rel));
+		ListCell   *cell;
+		bool		truncatable;
+
+		/*
+		 * First time through, determine whether the foreign server allows
+		 * truncates. Since all specified foreign tables are assumed to belong
+		 * to the same foreign server, this result can be used for other
+		 * foreign tables.
+		 */
+		if (!OidIsValid(serverid))
+		{
+			serverid = table->serverid;
+			server = GetForeignServer(serverid);
+
+			foreach(cell, server->options)
+			{
+				DefElem    *defel = (DefElem *) lfirst(cell);
+
+				if (strcmp(defel->defname, "truncatable") == 0)
+				{
+					server_truncatable = defGetBoolean(defel);
+					break;
+				}
+			}
+		}
+
+		/*
+		 * Confirm that all specified foreign tables belong to the same
+		 * foreign server.
+		 */
+		Assert(table->serverid == serverid);
+
+		/* Determine whether this foreign table allows truncations */
+		truncatable = server_truncatable;
+		foreach(cell, table->options)
+		{
+			DefElem    *defel = (DefElem *) lfirst(cell);
+
+			if (strcmp(defel->defname, "truncatable") == 0)
+			{
+				truncatable = defGetBoolean(defel);
+				break;
+			}
+		}
+
+		if (!truncatable)
+			ereport(ERROR,
+					(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+					 errmsg("foreign table \"%s\" does not allow truncates",
+							RelationGetRelationName(rel))));
+	}
+	Assert(OidIsValid(serverid));
+
+	/*
+	 * Get connection to the foreign server.  Connection manager will
+	 * establish new connection if necessary.
+	 */
+	user = GetUserMapping(GetUserId(), serverid);
+	conn = GetConnection(user, false, NULL);
+
+	/* Construct the TRUNCATE command string */
+	initStringInfo(&sql);
+	deparseTruncateSql(&sql, rels, rels_extra, behavior, restart_seqs);
+
+	/* Issue the TRUNCATE command to remote server */
+	do_sql_command(conn, sql.data);
+
+	pfree(sql.data);
+}
 
 /*
  * estimate_path_cost_size
diff --git a/contrib/postgres_fdw/postgres_fdw.h b/contrib/postgres_fdw/postgres_fdw.h
index 88d94da6f6..5d44b75314 100644
--- a/contrib/postgres_fdw/postgres_fdw.h
+++ b/contrib/postgres_fdw/postgres_fdw.h
@@ -145,6 +145,7 @@ extern PGconn *GetConnection(UserMapping *user, bool will_prep_stmt,
 extern void ReleaseConnection(PGconn *conn);
 extern unsigned int GetCursorNumber(PGconn *conn);
 extern unsigned int GetPrepStmtNumber(PGconn *conn);
+extern void do_sql_command(PGconn *conn, const char *sql);
 extern PGresult *pgfdw_get_result(PGconn *conn, const char *query);
 extern PGresult *pgfdw_exec_query(PGconn *conn, const char *query,
 								  PgFdwConnState *state);
@@ -206,6 +207,11 @@ extern void deparseDirectDeleteSql(StringInfo buf, PlannerInfo *root,
 extern void deparseAnalyzeSizeSql(StringInfo buf, Relation rel);
 extern void deparseAnalyzeSql(StringInfo buf, Relation rel,
 							  List **retrieved_attrs);
+extern void deparseTruncateSql(StringInfo buf,
+							   List *rels,
+							   List *rels_extra,
+							   DropBehavior behavior,
+							   bool restart_seqs);
 extern void deparseStringLiteral(StringInfo buf, const char *val);
 extern Expr *find_em_expr_for_rel(EquivalenceClass *ec, RelOptInfo *rel);
 extern Expr *find_em_expr_for_input_target(PlannerInfo *root,
diff --git a/contrib/postgres_fdw/sql/postgres_fdw.sql b/contrib/postgres_fdw/sql/postgres_fdw.sql
index 3b4f90a99c..7487096eac 100644
--- a/contrib/postgres_fdw/sql/postgres_fdw.sql
+++ b/contrib/postgres_fdw/sql/postgres_fdw.sql
@@ -2351,6 +2351,107 @@ select * from rem3;
 drop foreign table rem3;
 drop table loc3;
 
+-- ===================================================================
+-- test for TRUNCATE
+-- ===================================================================
+CREATE TABLE tru_rtable0 (id int primary key);
+CREATE TABLE tru_rtable1 (id int primary key);
+CREATE FOREIGN TABLE tru_ftable (id int)
+       SERVER loopback OPTIONS (table_name 'tru_rtable0');
+INSERT INTO tru_rtable0 (SELECT x FROM generate_series(1,10) x);
+
+CREATE TABLE tru_ptable (id int) PARTITION BY HASH(id);
+CREATE TABLE tru_ptable__p0 PARTITION OF tru_ptable
+                            FOR VALUES WITH (MODULUS 2, REMAINDER 0);
+CREATE FOREIGN TABLE tru_ftable__p1 PARTITION OF tru_ptable
+                                    FOR VALUES WITH (MODULUS 2, REMAINDER 1)
+       SERVER loopback OPTIONS (table_name 'tru_rtable1');
+INSERT INTO tru_ptable (SELECT x FROM generate_series(11,20) x);
+
+CREATE TABLE tru_pk_table(id int primary key);
+CREATE TABLE tru_fk_table(fkey int references tru_pk_table(id));
+INSERT INTO tru_pk_table (SELECT x FROM generate_series(1,10) x);
+INSERT INTO tru_fk_table (SELECT x % 10 + 1 FROM generate_series(5,25) x);
+CREATE FOREIGN TABLE tru_pk_ftable (id int)
+       SERVER loopback OPTIONS (table_name 'tru_pk_table');
+
+CREATE TABLE tru_rtable_parent (id int);
+CREATE TABLE tru_rtable_child (id int);
+CREATE FOREIGN TABLE tru_ftable_parent (id int)
+       SERVER loopback OPTIONS (table_name 'tru_rtable_parent');
+CREATE FOREIGN TABLE tru_ftable_child () INHERITS (tru_ftable_parent)
+       SERVER loopback OPTIONS (table_name 'tru_rtable_child');
+INSERT INTO tru_rtable_parent (SELECT x FROM generate_series(1,8) x);
+INSERT INTO tru_rtable_child  (SELECT x FROM generate_series(10, 18) x);
+
+-- normal truncate
+SELECT sum(id) FROM tru_ftable;        -- 55
+TRUNCATE tru_ftable;
+SELECT count(*) FROM tru_rtable0;		-- 0
+SELECT count(*) FROM tru_ftable;		-- 0
+
+-- 'truncatable' option
+ALTER SERVER loopback OPTIONS (ADD truncatable 'false');
+TRUNCATE tru_ftable;			-- error
+ALTER FOREIGN TABLE tru_ftable OPTIONS (ADD truncatable 'true');
+TRUNCATE tru_ftable;			-- accepted
+ALTER FOREIGN TABLE tru_ftable OPTIONS (SET truncatable 'false');
+TRUNCATE tru_ftable;			-- error
+ALTER SERVER loopback OPTIONS (DROP truncatable);
+ALTER FOREIGN TABLE tru_ftable OPTIONS (SET truncatable 'false');
+TRUNCATE tru_ftable;			-- error
+ALTER FOREIGN TABLE tru_ftable OPTIONS (SET truncatable 'true');
+TRUNCATE tru_ftable;			-- accepted
+
+-- partitioned table with both local and foreign tables as partitions
+SELECT sum(id) FROM tru_ptable;        -- 155
+TRUNCATE tru_ptable;
+SELECT count(*) FROM tru_ptable;		-- 0
+SELECT count(*) FROM tru_ptable__p0;	-- 0
+SELECT count(*) FROM tru_ftable__p1;	-- 0
+SELECT count(*) FROM tru_rtable1;		-- 0
+
+-- 'CASCADE' option
+SELECT sum(id) FROM tru_pk_ftable;      -- 55
+TRUNCATE tru_pk_ftable;	-- failed by FK reference
+TRUNCATE tru_pk_ftable CASCADE;
+SELECT count(*) FROM tru_pk_ftable;    -- 0
+SELECT count(*) FROM tru_fk_table;		-- also truncated,0
+
+-- truncate two tables at a command
+INSERT INTO tru_ftable (SELECT x FROM generate_series(1,8) x);
+INSERT INTO tru_pk_ftable (SELECT x FROM generate_series(3,10) x);
+SELECT count(*) from tru_ftable; -- 8
+SELECT count(*) from tru_pk_ftable; -- 8
+TRUNCATE tru_ftable, tru_pk_ftable CASCADE;
+SELECT count(*) from tru_ftable; -- 0
+SELECT count(*) from tru_pk_ftable; -- 0
+
+-- truncate with ONLY clause
+TRUNCATE ONLY tru_ftable_parent;
+SELECT sum(id) FROM tru_ftable_parent;  -- 126
+TRUNCATE tru_ftable_parent;
+SELECT count(*) FROM tru_ftable_parent; -- 0
+
+-- in case when remote table has inherited children
+CREATE TABLE tru_rtable0_child () INHERITS (tru_rtable0);
+INSERT INTO tru_rtable0 (SELECT x FROM generate_series(5,9) x);
+INSERT INTO tru_rtable0_child (SELECT x FROM generate_series(10,14) x);
+SELECT sum(id) FROM tru_ftable;   -- 95
+
+TRUNCATE ONLY tru_ftable;		-- truncate only parent portion
+SELECT sum(id) FROM tru_ftable;   -- 60
+
+INSERT INTO tru_rtable0 (SELECT x FROM generate_series(21,25) x);
+SELECT sum(id) FROM tru_ftable;		-- 175
+TRUNCATE tru_ftable;			-- truncate both of parent and child
+SELECT count(*) FROM tru_ftable;    -- empty
+
+-- cleanup
+DROP FOREIGN TABLE tru_ftable_parent, tru_ftable_child, tru_pk_ftable,tru_ftable__p1,tru_ftable;
+DROP TABLE tru_rtable0, tru_rtable1, tru_ptable, tru_ptable__p0, tru_pk_table, tru_fk_table,
+tru_rtable_parent,tru_rtable_child, tru_rtable0_child;
+
 -- ===================================================================
 -- test IMPORT FOREIGN SCHEMA
 -- ===================================================================
diff --git a/doc/src/sgml/fdwhandler.sgml b/doc/src/sgml/fdwhandler.sgml
index 0f2397df49..46ab2e6708 100644
--- a/doc/src/sgml/fdwhandler.sgml
+++ b/doc/src/sgml/fdwhandler.sgml
@@ -1065,6 +1065,50 @@ EndDirectModify(ForeignScanState *node);
 
    </sect2>
 
+   <sect2 id="fdw-callbacks-truncate">
+    <title>FDW Routines for Truncate</title>
+<programlisting>
+void
+ExecForeignTruncate(List *rels, List *rels_extra,
+                    DropBehavior behavior, bool restart_seqs);
+</programlisting>
+    <para>
+     Truncate a set of foreign tables defined by
+     <literal>rels</literal> belonging to the same foreign server.
+     This optional function is called during execution of
+     <command>TRUNCATE</command> for each foreign server involved
+     in one <command>TRUNCATE</command> command (note that invocations
+     are not per foreign table).
+
+     <literal>rels_extra</literal> delivers extra information about
+     the context where the foreign tables are truncated. It is a list of integers and has same length with
+     <literal>rels</literal>. <literal>TRUNCATE_REL_CONTEXT_NORMAL</literal> means that
+     the foreign table is specified WITHOUT <literal>ONLY</literal> clause, and <literal>TRUNCATE_REL_CONTEXT_ONLY</literal> means
+     specified WITH <literal>ONLY</literal> clause. <literal>TRUNCATE_REL_CONTEXT_CASCADING</literal> 
+     value means that foreign tables are not specified in the <command>TRUNCATE</command>, 
+     but truncated due to dependency (for example, partition table).
+    </para>
+
+    <para>
+     If the <function>ExecForeignTruncate</function> pointer is set to
+     <literal>NULL</literal>, attempts to truncate the foreign table will
+     fail with an error message.
+    </para>
+    
+    <para>
+     <literal>behavior</literal> defines how foreign tables should
+     be truncated, using as possible values <literal>DROP_RESTRICT</literal>
+     and <literal>DROP_CASCADE</literal> (to map with the equivalents of
+     <command>TRUNCATE</command>).
+    </para>
+    
+    <para>
+     <literal>restart_seqs</literal> is set to <literal>true</literal>
+     if <literal>RESTART IDENTITY</literal> was supplied in the
+     <command>TRUNCATE</command>.
+    </para>
+   </sect2>
+
    <sect2 id="fdw-callbacks-row-locking">
     <title>FDW Routines for Row Locking</title>
 
diff --git a/doc/src/sgml/postgres-fdw.sgml b/doc/src/sgml/postgres-fdw.sgml
index b0792a13b1..fd34956936 100644
--- a/doc/src/sgml/postgres-fdw.sgml
+++ b/doc/src/sgml/postgres-fdw.sgml
@@ -63,9 +63,10 @@
  <para>
   Now you need only <command>SELECT</command> from a foreign table to access
   the data stored in its underlying remote table.  You can also modify
-  the remote table using <command>INSERT</command>, <command>UPDATE</command>, or
-  <command>DELETE</command>.  (Of course, the remote user you have specified
-  in your user mapping must have privileges to do these things.)
+  the remote table using <command>INSERT</command>, <command>UPDATE</command>,
+  <command>DELETE</command>, or <command>TRUNCATE</command>.
+  (Of course, the remote user you have specified in your user mapping must
+  have privileges to do these things.)
  </para>
 
  <para>
@@ -436,6 +437,31 @@ OPTIONS (ADD password_required 'false');
    </variablelist>
   </sect3>
 
+  <sect3>
+   <title>Truncatability Options</title>
+
+   <para>
+    By default all foreign tables using <filename>postgres_fdw</filename> are assumed
+    to be truncatable.  This may be overridden using the following option:
+   </para>
+
+   <variablelist>
+
+    <varlistentry>
+     <term><literal>truncatable</literal></term>
+     <listitem>
+      <para>
+       This option controls whether <filename>postgres_fdw</filename> allows
+       foreign tables to be truncated using <command>TRUNCATE</command>
+       command. It can be specified for a foreign table or a foreign server.
+       A table-level option overrides a server-level option.
+       The default is <literal>true</literal>.
+      </para>
+     </listitem>
+    </varlistentry>
+   </variablelist>
+  </sect3>
+
   <sect3>
    <title>Importing Options</title>
 
diff --git a/doc/src/sgml/ref/truncate.sgml b/doc/src/sgml/ref/truncate.sgml
index 91cdac5562..7899106ba1 100644
--- a/doc/src/sgml/ref/truncate.sgml
+++ b/doc/src/sgml/ref/truncate.sgml
@@ -172,9 +172,8 @@ TRUNCATE [ TABLE ] [ ONLY ] <replaceable class="parameter">name</replaceable> [
   </para>
 
   <para>
-   <command>TRUNCATE</command> is not currently supported for foreign tables.
-   This implies that if a specified table has any descendant tables that are
-   foreign, the command will fail.
+    <command>TRUNCATE</command> can be used for foreign tables if
+     the foreign data wrapper supports, for instance, see <xref linkend="postgres-fdw"/>.
   </para>
  </refsect1>
 
diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c
index 87f9bdaef0..7bad33bafb 100644
--- a/src/backend/commands/tablecmds.c
+++ b/src/backend/commands/tablecmds.c
@@ -59,6 +59,7 @@
 #include "commands/typecmds.h"
 #include "commands/user.h"
 #include "executor/executor.h"
+#include "foreign/fdwapi.h"
 #include "foreign/foreign.h"
 #include "miscadmin.h"
 #include "nodes/makefuncs.h"
@@ -310,6 +311,21 @@ struct DropRelationCallbackState
 #define		ATT_FOREIGN_TABLE		0x0020
 #define		ATT_PARTITIONED_INDEX	0x0040
 
+/*
+ * ForeignTruncateInfo
+ *
+ * Information related to truncation of foreign tables.  This is used for
+ * the elements in a hash table. It uses the server OID as lookup key,
+ * and includes a per-server list of all foreign tables involved in the
+ * truncation.
+ */
+typedef struct ForeignTruncateInfo
+{
+	Oid			serverid;
+	List	   *rels;
+	List	   *rels_extra;
+} ForeignTruncateInfo;
+
 /*
  * Partition tables are expected to be dropped when the parent partitioned
  * table gets dropped. Hence for partitioning we use AUTO dependency.
@@ -1589,7 +1605,10 @@ RangeVarCallbackForDropRelation(const RangeVar *rel, Oid relOid, Oid oldRelOid,
  *
  * This is a multi-relation truncate.  We first open and grab exclusive
  * lock on all relations involved, checking permissions and otherwise
- * verifying that the relation is OK for truncation.  In CASCADE mode,
+ * verifying that the relation is OK for truncation.  Note that if relations
+ * are foreign tables, at this stage, we have not yet checked that their
+ * foreign data in external data sources are OK for truncation.  These are
+ * checked when foreign data are actually truncated later.  In CASCADE mode,
  * relations having FK references to the targeted relations are automatically
  * added to the group; in RESTRICT mode, we check that all FK references are
  * internal to the group that's being truncated.  Finally all the relations
@@ -1600,6 +1619,7 @@ ExecuteTruncate(TruncateStmt *stmt)
 {
 	List	   *rels = NIL;
 	List	   *relids = NIL;
+	List	   *relids_extra = NIL;
 	List	   *relids_logged = NIL;
 	ListCell   *cell;
 
@@ -1636,6 +1656,9 @@ ExecuteTruncate(TruncateStmt *stmt)
 
 		rels = lappend(rels, rel);
 		relids = lappend_oid(relids, myrelid);
+		relids_extra = lappend_int(relids_extra, (recurse ?
+												  TRUNCATE_REL_CONTEXT_NORMAL :
+												  TRUNCATE_REL_CONTEXT_ONLY));
 		/* Log this relation only if needed for logical decoding */
 		if (RelationIsLogicallyLogged(rel))
 			relids_logged = lappend_oid(relids_logged, myrelid);
@@ -1683,6 +1706,8 @@ ExecuteTruncate(TruncateStmt *stmt)
 
 				rels = lappend(rels, rel);
 				relids = lappend_oid(relids, childrelid);
+				relids_extra = lappend_int(relids_extra,
+										   TRUNCATE_REL_CONTEXT_CASCADING);
 				/* Log this relation only if needed for logical decoding */
 				if (RelationIsLogicallyLogged(rel))
 					relids_logged = lappend_oid(relids_logged, childrelid);
@@ -1695,7 +1720,7 @@ ExecuteTruncate(TruncateStmt *stmt)
 					 errhint("Do not specify the ONLY keyword, or use TRUNCATE ONLY on the partitions directly.")));
 	}
 
-	ExecuteTruncateGuts(rels, relids, relids_logged,
+	ExecuteTruncateGuts(rels, relids, relids_extra, relids_logged,
 						stmt->behavior, stmt->restart_seqs);
 
 	/* And close the rels */
@@ -1721,16 +1746,22 @@ ExecuteTruncate(TruncateStmt *stmt)
  * this information handy in this form.
  */
 void
-ExecuteTruncateGuts(List *explicit_rels, List *relids, List *relids_logged,
+ExecuteTruncateGuts(List *explicit_rels,
+					List *relids,
+					List *relids_extra,
+					List *relids_logged,
 					DropBehavior behavior, bool restart_seqs)
 {
 	List	   *rels;
 	List	   *seq_relids = NIL;
+	HTAB	   *ft_htab = NULL;
 	EState	   *estate;
 	ResultRelInfo *resultRelInfos;
 	ResultRelInfo *resultRelInfo;
 	SubTransactionId mySubid;
 	ListCell   *cell;
+	ListCell   *lc1,
+			*lc2;
 	Oid		   *logrelids;
 
 	/*
@@ -1768,6 +1799,8 @@ ExecuteTruncateGuts(List *explicit_rels, List *relids, List *relids_logged,
 				truncate_check_activity(rel);
 				rels = lappend(rels, rel);
 				relids = lappend_oid(relids, relid);
+				relids_extra = lappend_int(relids_extra,
+										   TRUNCATE_REL_CONTEXT_CASCADING);
 				/* Log this relation only if needed for logical decoding */
 				if (RelationIsLogicallyLogged(rel))
 					relids_logged = lappend_oid(relids_logged, relid);
@@ -1868,14 +1901,63 @@ ExecuteTruncateGuts(List *explicit_rels, List *relids, List *relids_logged,
 	 */
 	mySubid = GetCurrentSubTransactionId();
 
-	foreach(cell, rels)
+	Assert(list_length(rels) == list_length(relids_extra));
+	forboth(lc1, rels, lc2, relids_extra)
 	{
-		Relation	rel = (Relation) lfirst(cell);
+		Relation	rel = (Relation) lfirst(lc1);
+		int			extra = lfirst_int(lc2);
 
 		/* Skip partitioned tables as there is nothing to do */
 		if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
 			continue;
 
+		/*
+		 * Build the lists of foreign tables belonging to each foreign server
+		 * and pass each list to the foreign data wrapper's callback function,
+		 * so that each server can truncate its all foreign tables in bulk.
+		 * Each list is saved as a single entry in a hash table that uses the
+		 * server OID as lookup key.
+		 */
+		if (rel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
+		{
+			Oid			serverid = GetForeignServerIdByRelId(RelationGetRelid(rel));
+			bool		found;
+			ForeignTruncateInfo *ft_info;
+
+			/* First time through, initialize hashtable for foreign tables */
+			if (!ft_htab)
+			{
+				HASHCTL		hctl;
+
+				memset(&hctl, 0, sizeof(HASHCTL));
+				hctl.keysize = sizeof(Oid);
+				hctl.entrysize = sizeof(ForeignTruncateInfo);
+				hctl.hcxt = CurrentMemoryContext;
+
+				ft_htab = hash_create("TRUNCATE for Foreign Tables",
+									  32,	/* start small and extend */
+									  &hctl,
+									  HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
+			}
+
+			/* Find or create cached entry for the foreign table */
+			ft_info = hash_search(ft_htab, &serverid, HASH_ENTER, &found);
+			if (!found)
+			{
+				ft_info->serverid = serverid;
+				ft_info->rels = NIL;
+				ft_info->rels_extra = NIL;
+			}
+
+			/*
+			 * Save the foreign table in the entry of the server that the
+			 * foreign table belongs to.
+			 */
+			ft_info->rels = lappend(ft_info->rels, rel);
+			ft_info->rels_extra = lappend_int(ft_info->rels_extra, extra);
+			continue;
+		}
+
 		/*
 		 * Normally, we need a transaction-safe truncation here.  However, if
 		 * the table was either created in the current (sub)transaction or has
@@ -1938,6 +2020,36 @@ ExecuteTruncateGuts(List *explicit_rels, List *relids, List *relids_logged,
 		pgstat_count_truncate(rel);
 	}
 
+	/* Now go through the hash table, and truncate foreign tables */
+	if (ft_htab)
+	{
+		ForeignTruncateInfo *ft_info;
+		HASH_SEQ_STATUS seq;
+
+		hash_seq_init(&seq, ft_htab);
+
+		PG_TRY();
+		{
+			while ((ft_info = hash_seq_search(&seq)) != NULL)
+			{
+				FdwRoutine *routine = GetFdwRoutineByServerId(ft_info->serverid);
+
+				/* truncate_check_rel() has checked that already */
+				Assert(routine->ExecForeignTruncate != NULL);
+
+				routine->ExecForeignTruncate(ft_info->rels,
+											 ft_info->rels_extra,
+											 behavior,
+											 restart_seqs);
+			}
+		}
+		PG_FINALLY();
+		{
+			hash_destroy(ft_htab);
+		}
+		PG_END_TRY();
+	}
+
 	/*
 	 * Restart owned sequences if we were asked to.
 	 */
@@ -2023,12 +2135,24 @@ truncate_check_rel(Oid relid, Form_pg_class reltuple)
 	char	   *relname = NameStr(reltuple->relname);
 
 	/*
-	 * Only allow truncate on regular tables and partitioned tables (although,
-	 * the latter are only being included here for the following checks; no
-	 * physical truncation will occur in their case.)
+	 * Only allow truncate on regular tables, foreign tables using foreign
+	 * data wrappers supporting TRUNCATE and partitioned tables (although, the
+	 * latter are only being included here for the following checks; no
+	 * physical truncation will occur in their case.).
 	 */
-	if (reltuple->relkind != RELKIND_RELATION &&
-		reltuple->relkind != RELKIND_PARTITIONED_TABLE)
+	if (reltuple->relkind == RELKIND_FOREIGN_TABLE)
+	{
+		Oid			serverid = GetForeignServerIdByRelId(relid);
+		FdwRoutine *fdwroutine = GetFdwRoutineByServerId(serverid);
+
+		if (!fdwroutine->ExecForeignTruncate)
+			ereport(ERROR,
+					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+					 errmsg("cannot truncate foreign table \"%s\"",
+							relname)));
+	}
+	else if (reltuple->relkind != RELKIND_RELATION &&
+			 reltuple->relkind != RELKIND_PARTITIONED_TABLE)
 		ereport(ERROR,
 				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
 				 errmsg("\"%s\" is not a table", relname)));
diff --git a/src/backend/replication/logical/worker.c b/src/backend/replication/logical/worker.c
index 8da602d163..fb3ba5c415 100644
--- a/src/backend/replication/logical/worker.c
+++ b/src/backend/replication/logical/worker.c
@@ -1795,6 +1795,7 @@ apply_handle_truncate(StringInfo s)
 	List	   *rels = NIL;
 	List	   *part_rels = NIL;
 	List	   *relids = NIL;
+	List	   *relids_extra = NIL;
 	List	   *relids_logged = NIL;
 	ListCell   *lc;
 
@@ -1824,6 +1825,7 @@ apply_handle_truncate(StringInfo s)
 		remote_rels = lappend(remote_rels, rel);
 		rels = lappend(rels, rel->localrel);
 		relids = lappend_oid(relids, rel->localreloid);
+		relids_extra = lappend_int(relids_extra, TRUNCATE_REL_CONTEXT_NORMAL);
 		if (RelationIsLogicallyLogged(rel->localrel))
 			relids_logged = lappend_oid(relids_logged, rel->localreloid);
 
@@ -1862,6 +1864,7 @@ apply_handle_truncate(StringInfo s)
 				rels = lappend(rels, childrel);
 				part_rels = lappend(part_rels, childrel);
 				relids = lappend_oid(relids, childrelid);
+				relids_extra = lappend_int(relids_extra, TRUNCATE_REL_CONTEXT_CASCADING);
 				/* Log this relation only if needed for logical decoding */
 				if (RelationIsLogicallyLogged(childrel))
 					relids_logged = lappend_oid(relids_logged, childrelid);
@@ -1874,8 +1877,12 @@ apply_handle_truncate(StringInfo s)
 	 * to replaying changes without further cascading. This might be later
 	 * changeable with a user specified option.
 	 */
-	ExecuteTruncateGuts(rels, relids, relids_logged, DROP_RESTRICT, restart_seqs);
-
+	ExecuteTruncateGuts(rels,
+						relids,
+						relids_extra,
+						relids_logged,
+						DROP_RESTRICT,
+						restart_seqs);
 	foreach(lc, remote_rels)
 	{
 		LogicalRepRelMapEntry *rel = lfirst(lc);
diff --git a/src/include/commands/tablecmds.h b/src/include/commands/tablecmds.h
index b3d30acc35..57763ed734 100644
--- a/src/include/commands/tablecmds.h
+++ b/src/include/commands/tablecmds.h
@@ -55,9 +55,16 @@ extern void AlterRelationNamespaceInternal(Relation classRel, Oid relOid,
 
 extern void CheckTableNotInUse(Relation rel, const char *stmt);
 
+#define TRUNCATE_REL_CONTEXT_NORMAL       0x01
+#define TRUNCATE_REL_CONTEXT_ONLY         0x02
+#define TRUNCATE_REL_CONTEXT_CASCADING     0x04
 extern void ExecuteTruncate(TruncateStmt *stmt);
-extern void ExecuteTruncateGuts(List *explicit_rels, List *relids, List *relids_logged,
-								DropBehavior behavior, bool restart_seqs);
+extern void ExecuteTruncateGuts(List *explicit_rels,
+								List *relids,
+								List *relids_extra,
+								List *relids_logged,
+								DropBehavior behavior,
+								bool restart_seqs);
 
 extern void SetRelationHasSubclass(Oid relationId, bool relhassubclass);
 
diff --git a/src/include/foreign/fdwapi.h b/src/include/foreign/fdwapi.h
index 10d29ff292..4ebbca6de9 100644
--- a/src/include/foreign/fdwapi.h
+++ b/src/include/foreign/fdwapi.h
@@ -160,6 +160,11 @@ typedef bool (*AnalyzeForeignTable_function) (Relation relation,
 typedef List *(*ImportForeignSchema_function) (ImportForeignSchemaStmt *stmt,
 											   Oid serverOid);
 
+typedef void (*ExecForeignTruncate_function) (List *rels,
+											  List *rels_extra,
+											  DropBehavior behavior,
+											  bool restart_seqs);
+
 typedef Size (*EstimateDSMForeignScan_function) (ForeignScanState *node,
 												 ParallelContext *pcxt);
 typedef void (*InitializeDSMForeignScan_function) (ForeignScanState *node,
@@ -255,6 +260,9 @@ typedef struct FdwRoutine
 	/* Support functions for IMPORT FOREIGN SCHEMA */
 	ImportForeignSchema_function ImportForeignSchema;
 
+	/* Support functions for TRUNCATE */
+	ExecForeignTruncate_function ExecForeignTruncate;
+
 	/* Support functions for parallelism under Gather node */
 	IsForeignScanParallelSafe_function IsForeignScanParallelSafe;
 	EstimateDSMForeignScan_function EstimateDSMForeignScan;
diff --git a/src/include/utils/rel.h b/src/include/utils/rel.h
index 9a3a03e520..34e25eb597 100644
--- a/src/include/utils/rel.h
+++ b/src/include/utils/rel.h
@@ -634,7 +634,8 @@ typedef struct ViewOptions
  *		WAL stream.
  *
  * We don't log information for unlogged tables (since they don't WAL log
- * anyway) and for system tables (their content is hard to make sense of, and
+ * anyway), for foreign tables (since they don't WAL log, either),
+ * and for system tables (their content is hard to make sense of, and
  * it would complicate decoding slightly for little gain). Note that we *do*
  * log information for user defined catalog tables since they presumably are
  * interesting to the user...
@@ -642,6 +643,7 @@ typedef struct ViewOptions
 #define RelationIsLogicallyLogged(relation) \
 	(XLogLogicalInfoActive() && \
 	 RelationNeedsWAL(relation) && \
+	 (relation)->rd_rel->relkind != RELKIND_FOREIGN_TABLE &&	\
 	 !IsCatalogRelation(relation))
 
 /* routines in utils/cache/relcache.c */
diff --git a/src/test/regress/expected/foreign_data.out b/src/test/regress/expected/foreign_data.out
index e4cdb780d0..5385f98a0f 100644
--- a/src/test/regress/expected/foreign_data.out
+++ b/src/test/regress/expected/foreign_data.out
@@ -1807,9 +1807,9 @@ Inherits: fd_pt1
 
 -- TRUNCATE doesn't work on foreign tables, either directly or recursively
 TRUNCATE ft2;  -- ERROR
-ERROR:  "ft2" is not a table
+ERROR:  foreign-data wrapper "dummy" has no handler
 TRUNCATE fd_pt1;  -- ERROR
-ERROR:  "ft2" is not a table
+ERROR:  foreign-data wrapper "dummy" has no handler
 DROP TABLE fd_pt1 CASCADE;
 NOTICE:  drop cascades to foreign table ft2
 -- IMPORT FOREIGN SCHEMA
@@ -2032,9 +2032,9 @@ ALTER FOREIGN TABLE fd_pt2_1 ADD CONSTRAINT fd_pt2chk1 CHECK (c1 > 0);
 ALTER TABLE fd_pt2 ATTACH PARTITION fd_pt2_1 FOR VALUES IN (1);
 -- TRUNCATE doesn't work on foreign tables, either directly or recursively
 TRUNCATE fd_pt2_1;  -- ERROR
-ERROR:  "fd_pt2_1" is not a table
+ERROR:  foreign-data wrapper "dummy" has no handler
 TRUNCATE fd_pt2;  -- ERROR
-ERROR:  "fd_pt2_1" is not a table
+ERROR:  foreign-data wrapper "dummy" has no handler
 DROP FOREIGN TABLE fd_pt2_1;
 DROP TABLE fd_pt2;
 -- foreign table cannot be part of partition tree made of temporary
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index efb9811198..fefa65705a 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -704,6 +704,7 @@ ForeignScanState
 ForeignServer
 ForeignServerInfo
 ForeignTable
+ForeignTruncateInfo
 ForkNumber
 FormData_pg_aggregate
 FormData_pg_am
#64Kohei KaiGai
kaigai@heterodb.com
In reply to: Fujii Masao (#62)
Re: TRUNCATE on foreign table

2021年4月8日(木) 18:25 Fujii Masao <masao.fujii@oss.nttdata.com>:

On 2021/04/08 15:48, Kohei KaiGai wrote:

2021年4月8日(木) 15:04 Fujii Masao <masao.fujii@oss.nttdata.com>:

On 2021/04/08 13:43, Kohei KaiGai wrote:

In case when a local table (with no children) has same contents,
TRUNCATE command
witll remove the entire table contents.

But if there are local child tables that inherit the local parent table, and TRUNCATE ONLY <parent table> is executed, only the contents in the parent will be truncated. I was thinking that this behavior should be applied to the foreign table whose remote (parent) table have remote child tables.

So what we need to reach the consensus is; how far ONLY option affects. Please imagine the case where we have

(1) local parent table, also foreign table of remote parent table
(2) local child table, inherits local parent table
(3) remote parent table
(4) remote child table, inherits remote parent table

I think that we agree all (1), (2), (3) and (4) should be truncated if local parent table (1) is specified without ONLY in TRUNCATE command. OTOH, if ONLY is specified, we agree that at least local child table (2) should NOT be truncated.

My understanding of a foreign table is a representation of external
data, including remote RDBMS but not only RDBMS,
regardless of the parent-child relationship at the local side.
So, once a local foreign table wraps entire tables tree (a parent and
relevant children) at the remote side, at least, it shall
be considered as a unified data chunk from the standpoint of the local side.

At least for me it's not intuitive to truncate the remote table and its all dependent tables even though users explicitly specify ONLY for the foreign table. As far as I read the past discussion, some people was thinking the same.

Please assume if file_fdw could map 3 different CSV files, then
truncate on the foreign table may eliminate just 1 of 3 files.
Is it an expected / preferable behavior?

I think that's up to each FDW. That is, IMO the information about whether ONLY is specified or not for each table should be passed to FDW. Then FDW itself should determine how to handle that information.

Anyway, attached is the updated version of the patch. This is still based on the latest Kazutaka-san's patch. That is, extra list for ONLY is still passed to FDW. What about committing this version at first? Then we can continue the discussion and change the behavior later if necessary.

Ok, it's fair enought for me.

I'll try to sort out my thought, then raise a follow-up discussion if necessary.

Best regards,
--
HeteroDB, Inc / The PG-Strom Project
KaiGai Kohei <kaigai@heterodb.com>

#65Fujii Masao
masao.fujii@oss.nttdata.com
In reply to: Kohei KaiGai (#64)
1 attachment(s)
Re: TRUNCATE on foreign table

On 2021/04/08 22:02, Kohei KaiGai wrote:

Anyway, attached is the updated version of the patch. This is still based on the latest Kazutaka-san's patch. That is, extra list for ONLY is still passed to FDW. What about committing this version at first? Then we can continue the discussion and change the behavior later if necessary.

Pushed! Thank all involved in this development!!
For record, I attached the final patch I committed.

Ok, it's fair enought for me.

I'll try to sort out my thought, then raise a follow-up discussion if necessary.

Thanks!

The followings are the open items and discussion points that I'm thinking of.

1. Currently the extra information (TRUNCATE_REL_CONTEXT_NORMAL, TRUNCATE_REL_CONTEXT_ONLY or TRUNCATE_REL_CONTEXT_CASCADING) about how a foreign table was specified as the target to truncate in TRUNCATE command is collected and passed to FDW. Does this really need to be passed to FDW? Seems Stephen, Michael and I think that's necessary. But Kaigai-san does not. I also think that TRUNCATE_REL_CONTEXT_CASCADING can be removed because there seems no use case for that maybe.

2. Currently when the same foreign table is specified multiple times in the command, the extra information only for the foreign table found first is collected. For example, when "TRUNCATE ft, ONLY ft" is executed, TRUNCATE_REL_CONTEXT_NORMAL is collected and _ONLY is ignored because "ft" is found first. Is this OK? Or we should collect all, e.g., both _NORMAL and _ONLY should be collected in that example? I think that the current approach (i.e., collect the extra info about table found first if the same table is specified multiple times) is good because even local tables are also treated the same way. But Kaigai-san does not.

3. Currently postgres_fdw specifies ONLY clause in TRUNCATE command that it constructs. That is, if the foreign table is specified with ONLY, postgres_fdw also issues the TRUNCATE command for the corresponding remote table with ONLY to the remote server. Then only root table is truncated in remote server side, and the tables inheriting that are not truncated. Is this behavior desirable? Seems Michael and I think this behavior is OK. But Kaigai-san does not.

4. Tab-completion for TRUNCATE should be updated so that also foreign tables are displayed.

Regards,

--
Fujii Masao
Advanced Computing Technology Center
Research and Development Headquarters
NTT DATA CORPORATION

Attachments:

pgsql14-truncate-on-foreign-table.v18.patchtext/plain; charset=UTF-8; name=pgsql14-truncate-on-foreign-table.v18.patch; x-mac-creator=0; x-mac-type=0Download
diff --git a/contrib/postgres_fdw/connection.c b/contrib/postgres_fdw/connection.c
index 6a61d83862..82aa14a65d 100644
--- a/contrib/postgres_fdw/connection.c
+++ b/contrib/postgres_fdw/connection.c
@@ -92,7 +92,6 @@ static PGconn *connect_pg_server(ForeignServer *server, UserMapping *user);
 static void disconnect_pg_server(ConnCacheEntry *entry);
 static void check_conn_params(const char **keywords, const char **values, UserMapping *user);
 static void configure_remote_session(PGconn *conn);
-static void do_sql_command(PGconn *conn, const char *sql);
 static void begin_remote_xact(ConnCacheEntry *entry);
 static void pgfdw_xact_callback(XactEvent event, void *arg);
 static void pgfdw_subxact_callback(SubXactEvent event,
@@ -568,7 +567,7 @@ configure_remote_session(PGconn *conn)
 /*
  * Convenience subroutine to issue a non-data-returning SQL command to remote
  */
-static void
+void
 do_sql_command(PGconn *conn, const char *sql)
 {
 	PGresult   *res;
diff --git a/contrib/postgres_fdw/deparse.c b/contrib/postgres_fdw/deparse.c
index 5aa3455e30..bdc4c3620d 100644
--- a/contrib/postgres_fdw/deparse.c
+++ b/contrib/postgres_fdw/deparse.c
@@ -56,6 +56,7 @@
 #include "utils/rel.h"
 #include "utils/syscache.h"
 #include "utils/typcache.h"
+#include "commands/tablecmds.h"
 
 /*
  * Global context for foreign_expr_walker's search of an expression tree.
@@ -2172,6 +2173,43 @@ deparseAnalyzeSql(StringInfo buf, Relation rel, List **retrieved_attrs)
 	deparseRelation(buf, rel);
 }
 
+/*
+ * Construct a simple "TRUNCATE rel" statement
+ */
+void
+deparseTruncateSql(StringInfo buf,
+				   List *rels,
+				   List *rels_extra,
+				   DropBehavior behavior,
+				   bool restart_seqs)
+{
+	ListCell   *lc1,
+			   *lc2;
+
+	appendStringInfoString(buf, "TRUNCATE ");
+
+	forboth(lc1, rels, lc2, rels_extra)
+	{
+		Relation	rel = lfirst(lc1);
+		int			extra = lfirst_int(lc2);
+
+		if (lc1 != list_head(rels))
+			appendStringInfoString(buf, ", ");
+		if (extra & TRUNCATE_REL_CONTEXT_ONLY)
+			appendStringInfoString(buf, "ONLY ");
+
+		deparseRelation(buf, rel);
+	}
+
+	appendStringInfo(buf, " %s IDENTITY",
+					 restart_seqs ? "RESTART" : "CONTINUE");
+
+	if (behavior == DROP_RESTRICT)
+		appendStringInfoString(buf, " RESTRICT");
+	else if (behavior == DROP_CASCADE)
+		appendStringInfoString(buf, " CASCADE");
+}
+
 /*
  * Construct name to use for given column, and emit it into buf.
  * If it has a column_name FDW option, use that instead of attribute name.
diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out
index eeb6ae79d0..7f69fa0054 100644
--- a/contrib/postgres_fdw/expected/postgres_fdw.out
+++ b/contrib/postgres_fdw/expected/postgres_fdw.out
@@ -8215,6 +8215,205 @@ select * from rem3;
 drop foreign table rem3;
 drop table loc3;
 -- ===================================================================
+-- test for TRUNCATE
+-- ===================================================================
+CREATE TABLE tru_rtable0 (id int primary key);
+CREATE TABLE tru_rtable1 (id int primary key);
+CREATE FOREIGN TABLE tru_ftable (id int)
+       SERVER loopback OPTIONS (table_name 'tru_rtable0');
+INSERT INTO tru_rtable0 (SELECT x FROM generate_series(1,10) x);
+CREATE TABLE tru_ptable (id int) PARTITION BY HASH(id);
+CREATE TABLE tru_ptable__p0 PARTITION OF tru_ptable
+                            FOR VALUES WITH (MODULUS 2, REMAINDER 0);
+CREATE FOREIGN TABLE tru_ftable__p1 PARTITION OF tru_ptable
+                                    FOR VALUES WITH (MODULUS 2, REMAINDER 1)
+       SERVER loopback OPTIONS (table_name 'tru_rtable1');
+INSERT INTO tru_ptable (SELECT x FROM generate_series(11,20) x);
+CREATE TABLE tru_pk_table(id int primary key);
+CREATE TABLE tru_fk_table(fkey int references tru_pk_table(id));
+INSERT INTO tru_pk_table (SELECT x FROM generate_series(1,10) x);
+INSERT INTO tru_fk_table (SELECT x % 10 + 1 FROM generate_series(5,25) x);
+CREATE FOREIGN TABLE tru_pk_ftable (id int)
+       SERVER loopback OPTIONS (table_name 'tru_pk_table');
+CREATE TABLE tru_rtable_parent (id int);
+CREATE TABLE tru_rtable_child (id int);
+CREATE FOREIGN TABLE tru_ftable_parent (id int)
+       SERVER loopback OPTIONS (table_name 'tru_rtable_parent');
+CREATE FOREIGN TABLE tru_ftable_child () INHERITS (tru_ftable_parent)
+       SERVER loopback OPTIONS (table_name 'tru_rtable_child');
+INSERT INTO tru_rtable_parent (SELECT x FROM generate_series(1,8) x);
+INSERT INTO tru_rtable_child  (SELECT x FROM generate_series(10, 18) x);
+-- normal truncate
+SELECT sum(id) FROM tru_ftable;        -- 55
+ sum 
+-----
+  55
+(1 row)
+
+TRUNCATE tru_ftable;
+SELECT count(*) FROM tru_rtable0;		-- 0
+ count 
+-------
+     0
+(1 row)
+
+SELECT count(*) FROM tru_ftable;		-- 0
+ count 
+-------
+     0
+(1 row)
+
+-- 'truncatable' option
+ALTER SERVER loopback OPTIONS (ADD truncatable 'false');
+TRUNCATE tru_ftable;			-- error
+ERROR:  foreign table "tru_ftable" does not allow truncates
+ALTER FOREIGN TABLE tru_ftable OPTIONS (ADD truncatable 'true');
+TRUNCATE tru_ftable;			-- accepted
+ALTER FOREIGN TABLE tru_ftable OPTIONS (SET truncatable 'false');
+TRUNCATE tru_ftable;			-- error
+ERROR:  foreign table "tru_ftable" does not allow truncates
+ALTER SERVER loopback OPTIONS (DROP truncatable);
+ALTER FOREIGN TABLE tru_ftable OPTIONS (SET truncatable 'false');
+TRUNCATE tru_ftable;			-- error
+ERROR:  foreign table "tru_ftable" does not allow truncates
+ALTER FOREIGN TABLE tru_ftable OPTIONS (SET truncatable 'true');
+TRUNCATE tru_ftable;			-- accepted
+-- partitioned table with both local and foreign tables as partitions
+SELECT sum(id) FROM tru_ptable;        -- 155
+ sum 
+-----
+ 155
+(1 row)
+
+TRUNCATE tru_ptable;
+SELECT count(*) FROM tru_ptable;		-- 0
+ count 
+-------
+     0
+(1 row)
+
+SELECT count(*) FROM tru_ptable__p0;	-- 0
+ count 
+-------
+     0
+(1 row)
+
+SELECT count(*) FROM tru_ftable__p1;	-- 0
+ count 
+-------
+     0
+(1 row)
+
+SELECT count(*) FROM tru_rtable1;		-- 0
+ count 
+-------
+     0
+(1 row)
+
+-- 'CASCADE' option
+SELECT sum(id) FROM tru_pk_ftable;      -- 55
+ sum 
+-----
+  55
+(1 row)
+
+TRUNCATE tru_pk_ftable;	-- failed by FK reference
+ERROR:  cannot truncate a table referenced in a foreign key constraint
+DETAIL:  Table "tru_fk_table" references "tru_pk_table".
+HINT:  Truncate table "tru_fk_table" at the same time, or use TRUNCATE ... CASCADE.
+CONTEXT:  remote SQL command: TRUNCATE public.tru_pk_table CONTINUE IDENTITY RESTRICT
+TRUNCATE tru_pk_ftable CASCADE;
+SELECT count(*) FROM tru_pk_ftable;    -- 0
+ count 
+-------
+     0
+(1 row)
+
+SELECT count(*) FROM tru_fk_table;		-- also truncated,0
+ count 
+-------
+     0
+(1 row)
+
+-- truncate two tables at a command
+INSERT INTO tru_ftable (SELECT x FROM generate_series(1,8) x);
+INSERT INTO tru_pk_ftable (SELECT x FROM generate_series(3,10) x);
+SELECT count(*) from tru_ftable; -- 8
+ count 
+-------
+     8
+(1 row)
+
+SELECT count(*) from tru_pk_ftable; -- 8
+ count 
+-------
+     8
+(1 row)
+
+TRUNCATE tru_ftable, tru_pk_ftable CASCADE;
+SELECT count(*) from tru_ftable; -- 0
+ count 
+-------
+     0
+(1 row)
+
+SELECT count(*) from tru_pk_ftable; -- 0
+ count 
+-------
+     0
+(1 row)
+
+-- truncate with ONLY clause
+TRUNCATE ONLY tru_ftable_parent;
+SELECT sum(id) FROM tru_ftable_parent;  -- 126
+ sum 
+-----
+ 126
+(1 row)
+
+TRUNCATE tru_ftable_parent;
+SELECT count(*) FROM tru_ftable_parent; -- 0
+ count 
+-------
+     0
+(1 row)
+
+-- in case when remote table has inherited children
+CREATE TABLE tru_rtable0_child () INHERITS (tru_rtable0);
+INSERT INTO tru_rtable0 (SELECT x FROM generate_series(5,9) x);
+INSERT INTO tru_rtable0_child (SELECT x FROM generate_series(10,14) x);
+SELECT sum(id) FROM tru_ftable;   -- 95
+ sum 
+-----
+  95
+(1 row)
+
+TRUNCATE ONLY tru_ftable;		-- truncate only parent portion
+SELECT sum(id) FROM tru_ftable;   -- 60
+ sum 
+-----
+  60
+(1 row)
+
+INSERT INTO tru_rtable0 (SELECT x FROM generate_series(21,25) x);
+SELECT sum(id) FROM tru_ftable;		-- 175
+ sum 
+-----
+ 175
+(1 row)
+
+TRUNCATE tru_ftable;			-- truncate both of parent and child
+SELECT count(*) FROM tru_ftable;    -- empty
+ count 
+-------
+     0
+(1 row)
+
+-- cleanup
+DROP FOREIGN TABLE tru_ftable_parent, tru_ftable_child, tru_pk_ftable,tru_ftable__p1,tru_ftable;
+DROP TABLE tru_rtable0, tru_rtable1, tru_ptable, tru_ptable__p0, tru_pk_table, tru_fk_table,
+tru_rtable_parent,tru_rtable_child, tru_rtable0_child;
+-- ===================================================================
 -- test IMPORT FOREIGN SCHEMA
 -- ===================================================================
 CREATE SCHEMA import_source;
@@ -8917,7 +9116,7 @@ DO $d$
     END;
 $d$;
 ERROR:  invalid option "password"
-HINT:  Valid options in this context are: service, passfile, channel_binding, connect_timeout, dbname, host, hostaddr, port, options, application_name, keepalives, keepalives_idle, keepalives_interval, keepalives_count, tcp_user_timeout, sslmode, sslcompression, sslcert, sslkey, sslrootcert, sslcrl, sslcrldir, sslsni, requirepeer, ssl_min_protocol_version, ssl_max_protocol_version, gssencmode, krbsrvname, gsslib, target_session_attrs, use_remote_estimate, fdw_startup_cost, fdw_tuple_cost, extensions, updatable, fetch_size, batch_size, async_capable, keep_connections
+HINT:  Valid options in this context are: service, passfile, channel_binding, connect_timeout, dbname, host, hostaddr, port, options, application_name, keepalives, keepalives_idle, keepalives_interval, keepalives_count, tcp_user_timeout, sslmode, sslcompression, sslcert, sslkey, sslrootcert, sslcrl, sslcrldir, sslsni, requirepeer, ssl_min_protocol_version, ssl_max_protocol_version, gssencmode, krbsrvname, gsslib, target_session_attrs, use_remote_estimate, fdw_startup_cost, fdw_tuple_cost, extensions, updatable, truncatable, fetch_size, batch_size, async_capable, keep_connections
 CONTEXT:  SQL statement "ALTER SERVER loopback_nopw OPTIONS (ADD password 'dummypw')"
 PL/pgSQL function inline_code_block line 3 at EXECUTE
 -- If we add a password for our user mapping instead, we should get a different
diff --git a/contrib/postgres_fdw/option.c b/contrib/postgres_fdw/option.c
index f1d0c8bd41..672b55a808 100644
--- a/contrib/postgres_fdw/option.c
+++ b/contrib/postgres_fdw/option.c
@@ -108,6 +108,7 @@ postgres_fdw_validator(PG_FUNCTION_ARGS)
 		 */
 		if (strcmp(def->defname, "use_remote_estimate") == 0 ||
 			strcmp(def->defname, "updatable") == 0 ||
+			strcmp(def->defname, "truncatable") == 0 ||
 			strcmp(def->defname, "async_capable") == 0 ||
 			strcmp(def->defname, "keep_connections") == 0)
 		{
@@ -213,6 +214,9 @@ InitPgFdwOptions(void)
 		/* updatable is available on both server and table */
 		{"updatable", ForeignServerRelationId, false},
 		{"updatable", ForeignTableRelationId, false},
+		/* truncatable is available on both server and table */
+		{"truncatable", ForeignServerRelationId, false},
+		{"truncatable", ForeignTableRelationId, false},
 		/* fetch_size is available on both server and table */
 		{"fetch_size", ForeignServerRelationId, false},
 		{"fetch_size", ForeignTableRelationId, false},
diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c
index b6442070a3..c590f374c6 100644
--- a/contrib/postgres_fdw/postgres_fdw.c
+++ b/contrib/postgres_fdw/postgres_fdw.c
@@ -400,6 +400,10 @@ static void postgresExplainForeignModify(ModifyTableState *mtstate,
 										 ExplainState *es);
 static void postgresExplainDirectModify(ForeignScanState *node,
 										ExplainState *es);
+static void postgresExecForeignTruncate(List *rels,
+										List *rels_extra,
+										DropBehavior behavior,
+										bool restart_seqs);
 static bool postgresAnalyzeForeignTable(Relation relation,
 										AcquireSampleRowsFunc *func,
 										BlockNumber *totalpages);
@@ -588,6 +592,9 @@ postgres_fdw_handler(PG_FUNCTION_ARGS)
 	routine->ExplainForeignModify = postgresExplainForeignModify;
 	routine->ExplainDirectModify = postgresExplainDirectModify;
 
+	/* Support function for TRUNCATE */
+	routine->ExecForeignTruncate = postgresExecForeignTruncate;
+
 	/* Support functions for ANALYZE */
 	routine->AnalyzeForeignTable = postgresAnalyzeForeignTable;
 
@@ -2868,6 +2875,102 @@ postgresExplainDirectModify(ForeignScanState *node, ExplainState *es)
 	}
 }
 
+/*
+ * postgresExecForeignTruncate
+ *		Truncate one or more foreign tables
+ */
+static void
+postgresExecForeignTruncate(List *rels,
+							List *rels_extra,
+							DropBehavior behavior,
+							bool restart_seqs)
+{
+	Oid			serverid = InvalidOid;
+	UserMapping *user = NULL;
+	PGconn	   *conn = NULL;
+	StringInfoData sql;
+	ListCell   *lc;
+	bool		server_truncatable = true;
+
+	/*
+	 * By default, all postgres_fdw foreign tables are assumed truncatable.
+	 * This can be overridden by a per-server setting, which in turn can be
+	 * overridden by a per-table setting.
+	 */
+	foreach(lc, rels)
+	{
+		ForeignServer *server = NULL;
+		Relation	rel = lfirst(lc);
+		ForeignTable *table = GetForeignTable(RelationGetRelid(rel));
+		ListCell   *cell;
+		bool		truncatable;
+
+		/*
+		 * First time through, determine whether the foreign server allows
+		 * truncates. Since all specified foreign tables are assumed to belong
+		 * to the same foreign server, this result can be used for other
+		 * foreign tables.
+		 */
+		if (!OidIsValid(serverid))
+		{
+			serverid = table->serverid;
+			server = GetForeignServer(serverid);
+
+			foreach(cell, server->options)
+			{
+				DefElem    *defel = (DefElem *) lfirst(cell);
+
+				if (strcmp(defel->defname, "truncatable") == 0)
+				{
+					server_truncatable = defGetBoolean(defel);
+					break;
+				}
+			}
+		}
+
+		/*
+		 * Confirm that all specified foreign tables belong to the same
+		 * foreign server.
+		 */
+		Assert(table->serverid == serverid);
+
+		/* Determine whether this foreign table allows truncations */
+		truncatable = server_truncatable;
+		foreach(cell, table->options)
+		{
+			DefElem    *defel = (DefElem *) lfirst(cell);
+
+			if (strcmp(defel->defname, "truncatable") == 0)
+			{
+				truncatable = defGetBoolean(defel);
+				break;
+			}
+		}
+
+		if (!truncatable)
+			ereport(ERROR,
+					(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+					 errmsg("foreign table \"%s\" does not allow truncates",
+							RelationGetRelationName(rel))));
+	}
+	Assert(OidIsValid(serverid));
+
+	/*
+	 * Get connection to the foreign server.  Connection manager will
+	 * establish new connection if necessary.
+	 */
+	user = GetUserMapping(GetUserId(), serverid);
+	conn = GetConnection(user, false, NULL);
+
+	/* Construct the TRUNCATE command string */
+	initStringInfo(&sql);
+	deparseTruncateSql(&sql, rels, rels_extra, behavior, restart_seqs);
+
+	/* Issue the TRUNCATE command to remote server */
+	do_sql_command(conn, sql.data);
+
+	pfree(sql.data);
+}
 
 /*
  * estimate_path_cost_size
diff --git a/contrib/postgres_fdw/postgres_fdw.h b/contrib/postgres_fdw/postgres_fdw.h
index 88d94da6f6..5d44b75314 100644
--- a/contrib/postgres_fdw/postgres_fdw.h
+++ b/contrib/postgres_fdw/postgres_fdw.h
@@ -145,6 +145,7 @@ extern PGconn *GetConnection(UserMapping *user, bool will_prep_stmt,
 extern void ReleaseConnection(PGconn *conn);
 extern unsigned int GetCursorNumber(PGconn *conn);
 extern unsigned int GetPrepStmtNumber(PGconn *conn);
+extern void do_sql_command(PGconn *conn, const char *sql);
 extern PGresult *pgfdw_get_result(PGconn *conn, const char *query);
 extern PGresult *pgfdw_exec_query(PGconn *conn, const char *query,
 								  PgFdwConnState *state);
@@ -206,6 +207,11 @@ extern void deparseDirectDeleteSql(StringInfo buf, PlannerInfo *root,
 extern void deparseAnalyzeSizeSql(StringInfo buf, Relation rel);
 extern void deparseAnalyzeSql(StringInfo buf, Relation rel,
 							  List **retrieved_attrs);
+extern void deparseTruncateSql(StringInfo buf,
+							   List *rels,
+							   List *rels_extra,
+							   DropBehavior behavior,
+							   bool restart_seqs);
 extern void deparseStringLiteral(StringInfo buf, const char *val);
 extern Expr *find_em_expr_for_rel(EquivalenceClass *ec, RelOptInfo *rel);
 extern Expr *find_em_expr_for_input_target(PlannerInfo *root,
diff --git a/contrib/postgres_fdw/sql/postgres_fdw.sql b/contrib/postgres_fdw/sql/postgres_fdw.sql
index 3b4f90a99c..7487096eac 100644
--- a/contrib/postgres_fdw/sql/postgres_fdw.sql
+++ b/contrib/postgres_fdw/sql/postgres_fdw.sql
@@ -2351,6 +2351,107 @@ select * from rem3;
 drop foreign table rem3;
 drop table loc3;
 
+-- ===================================================================
+-- test for TRUNCATE
+-- ===================================================================
+CREATE TABLE tru_rtable0 (id int primary key);
+CREATE TABLE tru_rtable1 (id int primary key);
+CREATE FOREIGN TABLE tru_ftable (id int)
+       SERVER loopback OPTIONS (table_name 'tru_rtable0');
+INSERT INTO tru_rtable0 (SELECT x FROM generate_series(1,10) x);
+
+CREATE TABLE tru_ptable (id int) PARTITION BY HASH(id);
+CREATE TABLE tru_ptable__p0 PARTITION OF tru_ptable
+                            FOR VALUES WITH (MODULUS 2, REMAINDER 0);
+CREATE FOREIGN TABLE tru_ftable__p1 PARTITION OF tru_ptable
+                                    FOR VALUES WITH (MODULUS 2, REMAINDER 1)
+       SERVER loopback OPTIONS (table_name 'tru_rtable1');
+INSERT INTO tru_ptable (SELECT x FROM generate_series(11,20) x);
+
+CREATE TABLE tru_pk_table(id int primary key);
+CREATE TABLE tru_fk_table(fkey int references tru_pk_table(id));
+INSERT INTO tru_pk_table (SELECT x FROM generate_series(1,10) x);
+INSERT INTO tru_fk_table (SELECT x % 10 + 1 FROM generate_series(5,25) x);
+CREATE FOREIGN TABLE tru_pk_ftable (id int)
+       SERVER loopback OPTIONS (table_name 'tru_pk_table');
+
+CREATE TABLE tru_rtable_parent (id int);
+CREATE TABLE tru_rtable_child (id int);
+CREATE FOREIGN TABLE tru_ftable_parent (id int)
+       SERVER loopback OPTIONS (table_name 'tru_rtable_parent');
+CREATE FOREIGN TABLE tru_ftable_child () INHERITS (tru_ftable_parent)
+       SERVER loopback OPTIONS (table_name 'tru_rtable_child');
+INSERT INTO tru_rtable_parent (SELECT x FROM generate_series(1,8) x);
+INSERT INTO tru_rtable_child  (SELECT x FROM generate_series(10, 18) x);
+
+-- normal truncate
+SELECT sum(id) FROM tru_ftable;        -- 55
+TRUNCATE tru_ftable;
+SELECT count(*) FROM tru_rtable0;		-- 0
+SELECT count(*) FROM tru_ftable;		-- 0
+
+-- 'truncatable' option
+ALTER SERVER loopback OPTIONS (ADD truncatable 'false');
+TRUNCATE tru_ftable;			-- error
+ALTER FOREIGN TABLE tru_ftable OPTIONS (ADD truncatable 'true');
+TRUNCATE tru_ftable;			-- accepted
+ALTER FOREIGN TABLE tru_ftable OPTIONS (SET truncatable 'false');
+TRUNCATE tru_ftable;			-- error
+ALTER SERVER loopback OPTIONS (DROP truncatable);
+ALTER FOREIGN TABLE tru_ftable OPTIONS (SET truncatable 'false');
+TRUNCATE tru_ftable;			-- error
+ALTER FOREIGN TABLE tru_ftable OPTIONS (SET truncatable 'true');
+TRUNCATE tru_ftable;			-- accepted
+
+-- partitioned table with both local and foreign tables as partitions
+SELECT sum(id) FROM tru_ptable;        -- 155
+TRUNCATE tru_ptable;
+SELECT count(*) FROM tru_ptable;		-- 0
+SELECT count(*) FROM tru_ptable__p0;	-- 0
+SELECT count(*) FROM tru_ftable__p1;	-- 0
+SELECT count(*) FROM tru_rtable1;		-- 0
+
+-- 'CASCADE' option
+SELECT sum(id) FROM tru_pk_ftable;      -- 55
+TRUNCATE tru_pk_ftable;	-- failed by FK reference
+TRUNCATE tru_pk_ftable CASCADE;
+SELECT count(*) FROM tru_pk_ftable;    -- 0
+SELECT count(*) FROM tru_fk_table;		-- also truncated,0
+
+-- truncate two tables at a command
+INSERT INTO tru_ftable (SELECT x FROM generate_series(1,8) x);
+INSERT INTO tru_pk_ftable (SELECT x FROM generate_series(3,10) x);
+SELECT count(*) from tru_ftable; -- 8
+SELECT count(*) from tru_pk_ftable; -- 8
+TRUNCATE tru_ftable, tru_pk_ftable CASCADE;
+SELECT count(*) from tru_ftable; -- 0
+SELECT count(*) from tru_pk_ftable; -- 0
+
+-- truncate with ONLY clause
+TRUNCATE ONLY tru_ftable_parent;
+SELECT sum(id) FROM tru_ftable_parent;  -- 126
+TRUNCATE tru_ftable_parent;
+SELECT count(*) FROM tru_ftable_parent; -- 0
+
+-- in case when remote table has inherited children
+CREATE TABLE tru_rtable0_child () INHERITS (tru_rtable0);
+INSERT INTO tru_rtable0 (SELECT x FROM generate_series(5,9) x);
+INSERT INTO tru_rtable0_child (SELECT x FROM generate_series(10,14) x);
+SELECT sum(id) FROM tru_ftable;   -- 95
+
+TRUNCATE ONLY tru_ftable;		-- truncate only parent portion
+SELECT sum(id) FROM tru_ftable;   -- 60
+
+INSERT INTO tru_rtable0 (SELECT x FROM generate_series(21,25) x);
+SELECT sum(id) FROM tru_ftable;		-- 175
+TRUNCATE tru_ftable;			-- truncate both of parent and child
+SELECT count(*) FROM tru_ftable;    -- empty
+
+-- cleanup
+DROP FOREIGN TABLE tru_ftable_parent, tru_ftable_child, tru_pk_ftable,tru_ftable__p1,tru_ftable;
+DROP TABLE tru_rtable0, tru_rtable1, tru_ptable, tru_ptable__p0, tru_pk_table, tru_fk_table,
+tru_rtable_parent,tru_rtable_child, tru_rtable0_child;
+
 -- ===================================================================
 -- test IMPORT FOREIGN SCHEMA
 -- ===================================================================
diff --git a/doc/src/sgml/fdwhandler.sgml b/doc/src/sgml/fdwhandler.sgml
index 0f2397df49..98882ddab8 100644
--- a/doc/src/sgml/fdwhandler.sgml
+++ b/doc/src/sgml/fdwhandler.sgml
@@ -1065,6 +1065,67 @@ EndDirectModify(ForeignScanState *node);
 
    </sect2>
 
+   <sect2 id="fdw-callbacks-truncate">
+    <title>FDW Routines for <command>TRUNCATE</command></title>
+
+    <para>
+<programlisting>
+void
+ExecForeignTruncate(List *rels, List *rels_extra,
+                    DropBehavior behavior, bool restart_seqs);
+</programlisting>
+
+     Truncate a set of foreign tables specified in <literal>rels</literal>.
+     This function is called when <xref linkend="sql-truncate"/> is executed
+     on foreign tables.  <literal>rels</literal> is the list of
+     <structname>Relation</structname> data structure that indicates
+     a foreign table to truncate.  <literal>rels_extra</literal> the list of
+     <literal>int</literal> value, which delivers extra information about
+     a foreign table to truncate.  Possible values are
+     <literal>TRUNCATE_REL_CONTEXT_NORMAL</literal>, which means that
+     the foreign table is specified WITHOUT <literal>ONLY</literal> clause
+     in <command>TRUNCATE</command>,
+     <literal>TRUNCATE_REL_CONTEXT_ONLY</literal>, which means that
+     the foreign table is specified WITH <literal>ONLY</literal> clause,
+     and <literal>TRUNCATE_REL_CONTEXT_CASCADING</literal>,
+     which means that the foreign table is not specified explicitly,
+     but will be truncated due to dependency (for example, partition table).
+     There is one-to-one mapping between <literal>rels</literal> and
+     <literal>rels_extra</literal>.  The number of entries is the same
+     between the two lists.
+    </para>
+
+    <para>
+     <literal>behavior</literal> defines how foreign tables should
+     be truncated, using as possible values <literal>DROP_RESTRICT</literal>,
+     which means that <literal>RESTRICT</literal> option is specified,
+     and <literal>DROP_CASCADE</literal>, which means that
+     <literal>CASCADE</literal> option is specified, in
+     <command>TRUNCATE</command> command.
+    </para>
+    
+    <para>
+     <literal>restart_seqs</literal> is set to <literal>true</literal>
+     if <literal>RESTART IDENTITY</literal> option is specified in
+     <command>TRUNCATE</command> command.  It is <literal>false</literal>
+     if <literal>CONTINUE IDENTITY</literal> option is specified.
+    </para>
+
+    <para>
+     <command>TRUNCATE</command> invokes
+     <function>ExecForeignTruncate</function> once per foreign server
+     that foreign tables to truncate belong to.  This means that all foreign
+     tables included in <literal>rels</literal> must belong to the same
+     server.
+    </para>
+
+    <para>
+     If the <function>ExecForeignTruncate</function> pointer is set to
+     <literal>NULL</literal>, attempts to truncate foreign tables will
+     fail with an error message.
+    </para>
+   </sect2>
+
    <sect2 id="fdw-callbacks-row-locking">
     <title>FDW Routines for Row Locking</title>
 
diff --git a/doc/src/sgml/postgres-fdw.sgml b/doc/src/sgml/postgres-fdw.sgml
index b0792a13b1..fd34956936 100644
--- a/doc/src/sgml/postgres-fdw.sgml
+++ b/doc/src/sgml/postgres-fdw.sgml
@@ -63,9 +63,10 @@
  <para>
   Now you need only <command>SELECT</command> from a foreign table to access
   the data stored in its underlying remote table.  You can also modify
-  the remote table using <command>INSERT</command>, <command>UPDATE</command>, or
-  <command>DELETE</command>.  (Of course, the remote user you have specified
-  in your user mapping must have privileges to do these things.)
+  the remote table using <command>INSERT</command>, <command>UPDATE</command>,
+  <command>DELETE</command>, or <command>TRUNCATE</command>.
+  (Of course, the remote user you have specified in your user mapping must
+  have privileges to do these things.)
  </para>
 
  <para>
@@ -436,6 +437,31 @@ OPTIONS (ADD password_required 'false');
    </variablelist>
   </sect3>
 
+  <sect3>
+   <title>Truncatability Options</title>
+
+   <para>
+    By default all foreign tables using <filename>postgres_fdw</filename> are assumed
+    to be truncatable.  This may be overridden using the following option:
+   </para>
+
+   <variablelist>
+
+    <varlistentry>
+     <term><literal>truncatable</literal></term>
+     <listitem>
+      <para>
+       This option controls whether <filename>postgres_fdw</filename> allows
+       foreign tables to be truncated using <command>TRUNCATE</command>
+       command. It can be specified for a foreign table or a foreign server.
+       A table-level option overrides a server-level option.
+       The default is <literal>true</literal>.
+      </para>
+     </listitem>
+    </varlistentry>
+   </variablelist>
+  </sect3>
+
   <sect3>
    <title>Importing Options</title>
 
diff --git a/doc/src/sgml/ref/truncate.sgml b/doc/src/sgml/ref/truncate.sgml
index 91cdac5562..acf3633be4 100644
--- a/doc/src/sgml/ref/truncate.sgml
+++ b/doc/src/sgml/ref/truncate.sgml
@@ -172,9 +172,9 @@ TRUNCATE [ TABLE ] [ ONLY ] <replaceable class="parameter">name</replaceable> [
   </para>
 
   <para>
-   <command>TRUNCATE</command> is not currently supported for foreign tables.
-   This implies that if a specified table has any descendant tables that are
-   foreign, the command will fail.
+   <command>TRUNCATE</command> can be used for foreign tables if
+   the foreign data wrapper supports, for instance,
+   see <xref linkend="postgres-fdw"/>.
   </para>
  </refsect1>
 
diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c
index 87f9bdaef0..1f19629a94 100644
--- a/src/backend/commands/tablecmds.c
+++ b/src/backend/commands/tablecmds.c
@@ -59,6 +59,7 @@
 #include "commands/typecmds.h"
 #include "commands/user.h"
 #include "executor/executor.h"
+#include "foreign/fdwapi.h"
 #include "foreign/foreign.h"
 #include "miscadmin.h"
 #include "nodes/makefuncs.h"
@@ -310,6 +311,21 @@ struct DropRelationCallbackState
 #define		ATT_FOREIGN_TABLE		0x0020
 #define		ATT_PARTITIONED_INDEX	0x0040
 
+/*
+ * ForeignTruncateInfo
+ *
+ * Information related to truncation of foreign tables.  This is used for
+ * the elements in a hash table. It uses the server OID as lookup key,
+ * and includes a per-server list of all foreign tables involved in the
+ * truncation.
+ */
+typedef struct ForeignTruncateInfo
+{
+	Oid			serverid;
+	List	   *rels;
+	List	   *rels_extra;
+} ForeignTruncateInfo;
+
 /*
  * Partition tables are expected to be dropped when the parent partitioned
  * table gets dropped. Hence for partitioning we use AUTO dependency.
@@ -1589,7 +1605,10 @@ RangeVarCallbackForDropRelation(const RangeVar *rel, Oid relOid, Oid oldRelOid,
  *
  * This is a multi-relation truncate.  We first open and grab exclusive
  * lock on all relations involved, checking permissions and otherwise
- * verifying that the relation is OK for truncation.  In CASCADE mode,
+ * verifying that the relation is OK for truncation.  Note that if relations
+ * are foreign tables, at this stage, we have not yet checked that their
+ * foreign data in external data sources are OK for truncation.  These are
+ * checked when foreign data are actually truncated later.  In CASCADE mode,
  * relations having FK references to the targeted relations are automatically
  * added to the group; in RESTRICT mode, we check that all FK references are
  * internal to the group that's being truncated.  Finally all the relations
@@ -1600,6 +1619,7 @@ ExecuteTruncate(TruncateStmt *stmt)
 {
 	List	   *rels = NIL;
 	List	   *relids = NIL;
+	List	   *relids_extra = NIL;
 	List	   *relids_logged = NIL;
 	ListCell   *cell;
 
@@ -1636,6 +1656,9 @@ ExecuteTruncate(TruncateStmt *stmt)
 
 		rels = lappend(rels, rel);
 		relids = lappend_oid(relids, myrelid);
+		relids_extra = lappend_int(relids_extra, (recurse ?
+												  TRUNCATE_REL_CONTEXT_NORMAL :
+												  TRUNCATE_REL_CONTEXT_ONLY));
 		/* Log this relation only if needed for logical decoding */
 		if (RelationIsLogicallyLogged(rel))
 			relids_logged = lappend_oid(relids_logged, myrelid);
@@ -1683,6 +1706,8 @@ ExecuteTruncate(TruncateStmt *stmt)
 
 				rels = lappend(rels, rel);
 				relids = lappend_oid(relids, childrelid);
+				relids_extra = lappend_int(relids_extra,
+										   TRUNCATE_REL_CONTEXT_CASCADING);
 				/* Log this relation only if needed for logical decoding */
 				if (RelationIsLogicallyLogged(rel))
 					relids_logged = lappend_oid(relids_logged, childrelid);
@@ -1695,7 +1720,7 @@ ExecuteTruncate(TruncateStmt *stmt)
 					 errhint("Do not specify the ONLY keyword, or use TRUNCATE ONLY on the partitions directly.")));
 	}
 
-	ExecuteTruncateGuts(rels, relids, relids_logged,
+	ExecuteTruncateGuts(rels, relids, relids_extra, relids_logged,
 						stmt->behavior, stmt->restart_seqs);
 
 	/* And close the rels */
@@ -1716,21 +1741,28 @@ ExecuteTruncate(TruncateStmt *stmt)
  *
  * explicit_rels is the list of Relations to truncate that the command
  * specified.  relids is the list of Oids corresponding to explicit_rels.
- * relids_logged is the list of Oids (a subset of relids) that require
- * WAL-logging.  This is all a bit redundant, but the existing callers have
- * this information handy in this form.
+ * relids_extra is the list of integer values that deliver extra information
+ * about relations in explicit_rels.  relids_logged is the list of Oids
+ * (a subset of relids) that require WAL-logging.  This is all a bit redundant,
+ * but the existing callers have this information handy in this form.
  */
 void
-ExecuteTruncateGuts(List *explicit_rels, List *relids, List *relids_logged,
+ExecuteTruncateGuts(List *explicit_rels,
+					List *relids,
+					List *relids_extra,
+					List *relids_logged,
 					DropBehavior behavior, bool restart_seqs)
 {
 	List	   *rels;
 	List	   *seq_relids = NIL;
+	HTAB	   *ft_htab = NULL;
 	EState	   *estate;
 	ResultRelInfo *resultRelInfos;
 	ResultRelInfo *resultRelInfo;
 	SubTransactionId mySubid;
 	ListCell   *cell;
+	ListCell   *lc1,
+			*lc2;
 	Oid		   *logrelids;
 
 	/*
@@ -1768,6 +1800,8 @@ ExecuteTruncateGuts(List *explicit_rels, List *relids, List *relids_logged,
 				truncate_check_activity(rel);
 				rels = lappend(rels, rel);
 				relids = lappend_oid(relids, relid);
+				relids_extra = lappend_int(relids_extra,
+										   TRUNCATE_REL_CONTEXT_CASCADING);
 				/* Log this relation only if needed for logical decoding */
 				if (RelationIsLogicallyLogged(rel))
 					relids_logged = lappend_oid(relids_logged, relid);
@@ -1868,14 +1902,63 @@ ExecuteTruncateGuts(List *explicit_rels, List *relids, List *relids_logged,
 	 */
 	mySubid = GetCurrentSubTransactionId();
 
-	foreach(cell, rels)
+	Assert(list_length(rels) == list_length(relids_extra));
+	forboth(lc1, rels, lc2, relids_extra)
 	{
-		Relation	rel = (Relation) lfirst(cell);
+		Relation	rel = (Relation) lfirst(lc1);
+		int			extra = lfirst_int(lc2);
 
 		/* Skip partitioned tables as there is nothing to do */
 		if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
 			continue;
 
+		/*
+		 * Build the lists of foreign tables belonging to each foreign server
+		 * and pass each list to the foreign data wrapper's callback function,
+		 * so that each server can truncate its all foreign tables in bulk.
+		 * Each list is saved as a single entry in a hash table that uses the
+		 * server OID as lookup key.
+		 */
+		if (rel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
+		{
+			Oid			serverid = GetForeignServerIdByRelId(RelationGetRelid(rel));
+			bool		found;
+			ForeignTruncateInfo *ft_info;
+
+			/* First time through, initialize hashtable for foreign tables */
+			if (!ft_htab)
+			{
+				HASHCTL		hctl;
+
+				memset(&hctl, 0, sizeof(HASHCTL));
+				hctl.keysize = sizeof(Oid);
+				hctl.entrysize = sizeof(ForeignTruncateInfo);
+				hctl.hcxt = CurrentMemoryContext;
+
+				ft_htab = hash_create("TRUNCATE for Foreign Tables",
+									  32,	/* start small and extend */
+									  &hctl,
+									  HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
+			}
+
+			/* Find or create cached entry for the foreign table */
+			ft_info = hash_search(ft_htab, &serverid, HASH_ENTER, &found);
+			if (!found)
+			{
+				ft_info->serverid = serverid;
+				ft_info->rels = NIL;
+				ft_info->rels_extra = NIL;
+			}
+
+			/*
+			 * Save the foreign table in the entry of the server that the
+			 * foreign table belongs to.
+			 */
+			ft_info->rels = lappend(ft_info->rels, rel);
+			ft_info->rels_extra = lappend_int(ft_info->rels_extra, extra);
+			continue;
+		}
+
 		/*
 		 * Normally, we need a transaction-safe truncation here.  However, if
 		 * the table was either created in the current (sub)transaction or has
@@ -1938,6 +2021,36 @@ ExecuteTruncateGuts(List *explicit_rels, List *relids, List *relids_logged,
 		pgstat_count_truncate(rel);
 	}
 
+	/* Now go through the hash table, and truncate foreign tables */
+	if (ft_htab)
+	{
+		ForeignTruncateInfo *ft_info;
+		HASH_SEQ_STATUS seq;
+
+		hash_seq_init(&seq, ft_htab);
+
+		PG_TRY();
+		{
+			while ((ft_info = hash_seq_search(&seq)) != NULL)
+			{
+				FdwRoutine *routine = GetFdwRoutineByServerId(ft_info->serverid);
+
+				/* truncate_check_rel() has checked that already */
+				Assert(routine->ExecForeignTruncate != NULL);
+
+				routine->ExecForeignTruncate(ft_info->rels,
+											 ft_info->rels_extra,
+											 behavior,
+											 restart_seqs);
+			}
+		}
+		PG_FINALLY();
+		{
+			hash_destroy(ft_htab);
+		}
+		PG_END_TRY();
+	}
+
 	/*
 	 * Restart owned sequences if we were asked to.
 	 */
@@ -2023,12 +2136,24 @@ truncate_check_rel(Oid relid, Form_pg_class reltuple)
 	char	   *relname = NameStr(reltuple->relname);
 
 	/*
-	 * Only allow truncate on regular tables and partitioned tables (although,
-	 * the latter are only being included here for the following checks; no
-	 * physical truncation will occur in their case.)
+	 * Only allow truncate on regular tables, foreign tables using foreign
+	 * data wrappers supporting TRUNCATE and partitioned tables (although, the
+	 * latter are only being included here for the following checks; no
+	 * physical truncation will occur in their case.).
 	 */
-	if (reltuple->relkind != RELKIND_RELATION &&
-		reltuple->relkind != RELKIND_PARTITIONED_TABLE)
+	if (reltuple->relkind == RELKIND_FOREIGN_TABLE)
+	{
+		Oid			serverid = GetForeignServerIdByRelId(relid);
+		FdwRoutine *fdwroutine = GetFdwRoutineByServerId(serverid);
+
+		if (!fdwroutine->ExecForeignTruncate)
+			ereport(ERROR,
+					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+					 errmsg("cannot truncate foreign table \"%s\"",
+							relname)));
+	}
+	else if (reltuple->relkind != RELKIND_RELATION &&
+			 reltuple->relkind != RELKIND_PARTITIONED_TABLE)
 		ereport(ERROR,
 				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
 				 errmsg("\"%s\" is not a table", relname)));
diff --git a/src/backend/replication/logical/worker.c b/src/backend/replication/logical/worker.c
index 8da602d163..fb3ba5c415 100644
--- a/src/backend/replication/logical/worker.c
+++ b/src/backend/replication/logical/worker.c
@@ -1795,6 +1795,7 @@ apply_handle_truncate(StringInfo s)
 	List	   *rels = NIL;
 	List	   *part_rels = NIL;
 	List	   *relids = NIL;
+	List	   *relids_extra = NIL;
 	List	   *relids_logged = NIL;
 	ListCell   *lc;
 
@@ -1824,6 +1825,7 @@ apply_handle_truncate(StringInfo s)
 		remote_rels = lappend(remote_rels, rel);
 		rels = lappend(rels, rel->localrel);
 		relids = lappend_oid(relids, rel->localreloid);
+		relids_extra = lappend_int(relids_extra, TRUNCATE_REL_CONTEXT_NORMAL);
 		if (RelationIsLogicallyLogged(rel->localrel))
 			relids_logged = lappend_oid(relids_logged, rel->localreloid);
 
@@ -1862,6 +1864,7 @@ apply_handle_truncate(StringInfo s)
 				rels = lappend(rels, childrel);
 				part_rels = lappend(part_rels, childrel);
 				relids = lappend_oid(relids, childrelid);
+				relids_extra = lappend_int(relids_extra, TRUNCATE_REL_CONTEXT_CASCADING);
 				/* Log this relation only if needed for logical decoding */
 				if (RelationIsLogicallyLogged(childrel))
 					relids_logged = lappend_oid(relids_logged, childrelid);
@@ -1874,8 +1877,12 @@ apply_handle_truncate(StringInfo s)
 	 * to replaying changes without further cascading. This might be later
 	 * changeable with a user specified option.
 	 */
-	ExecuteTruncateGuts(rels, relids, relids_logged, DROP_RESTRICT, restart_seqs);
-
+	ExecuteTruncateGuts(rels,
+						relids,
+						relids_extra,
+						relids_logged,
+						DROP_RESTRICT,
+						restart_seqs);
 	foreach(lc, remote_rels)
 	{
 		LogicalRepRelMapEntry *rel = lfirst(lc);
diff --git a/src/include/commands/tablecmds.h b/src/include/commands/tablecmds.h
index b3d30acc35..b808a07e46 100644
--- a/src/include/commands/tablecmds.h
+++ b/src/include/commands/tablecmds.h
@@ -21,6 +21,16 @@
 #include "storage/lock.h"
 #include "utils/relcache.h"
 
+/*
+ * These values indicate how a relation was specified as the target to
+ * truncate in TRUNCATE command.
+ */
+#define TRUNCATE_REL_CONTEXT_NORMAL       1 /* specified without ONLY clause */
+#define TRUNCATE_REL_CONTEXT_ONLY         2 /* specified with ONLY clause */
+#define TRUNCATE_REL_CONTEXT_CASCADING     3	/* not specified but truncated
+												 * due to dependency (e.g.,
+												 * partition table) */
+
 struct AlterTableUtilityContext;	/* avoid including tcop/utility.h here */
 
 
@@ -56,8 +66,12 @@ extern void AlterRelationNamespaceInternal(Relation classRel, Oid relOid,
 extern void CheckTableNotInUse(Relation rel, const char *stmt);
 
 extern void ExecuteTruncate(TruncateStmt *stmt);
-extern void ExecuteTruncateGuts(List *explicit_rels, List *relids, List *relids_logged,
-								DropBehavior behavior, bool restart_seqs);
+extern void ExecuteTruncateGuts(List *explicit_rels,
+								List *relids,
+								List *relids_extra,
+								List *relids_logged,
+								DropBehavior behavior,
+								bool restart_seqs);
 
 extern void SetRelationHasSubclass(Oid relationId, bool relhassubclass);
 
diff --git a/src/include/foreign/fdwapi.h b/src/include/foreign/fdwapi.h
index 10d29ff292..4ebbca6de9 100644
--- a/src/include/foreign/fdwapi.h
+++ b/src/include/foreign/fdwapi.h
@@ -160,6 +160,11 @@ typedef bool (*AnalyzeForeignTable_function) (Relation relation,
 typedef List *(*ImportForeignSchema_function) (ImportForeignSchemaStmt *stmt,
 											   Oid serverOid);
 
+typedef void (*ExecForeignTruncate_function) (List *rels,
+											  List *rels_extra,
+											  DropBehavior behavior,
+											  bool restart_seqs);
+
 typedef Size (*EstimateDSMForeignScan_function) (ForeignScanState *node,
 												 ParallelContext *pcxt);
 typedef void (*InitializeDSMForeignScan_function) (ForeignScanState *node,
@@ -255,6 +260,9 @@ typedef struct FdwRoutine
 	/* Support functions for IMPORT FOREIGN SCHEMA */
 	ImportForeignSchema_function ImportForeignSchema;
 
+	/* Support functions for TRUNCATE */
+	ExecForeignTruncate_function ExecForeignTruncate;
+
 	/* Support functions for parallelism under Gather node */
 	IsForeignScanParallelSafe_function IsForeignScanParallelSafe;
 	EstimateDSMForeignScan_function EstimateDSMForeignScan;
diff --git a/src/include/utils/rel.h b/src/include/utils/rel.h
index 9a3a03e520..34e25eb597 100644
--- a/src/include/utils/rel.h
+++ b/src/include/utils/rel.h
@@ -634,7 +634,8 @@ typedef struct ViewOptions
  *		WAL stream.
  *
  * We don't log information for unlogged tables (since they don't WAL log
- * anyway) and for system tables (their content is hard to make sense of, and
+ * anyway), for foreign tables (since they don't WAL log, either),
+ * and for system tables (their content is hard to make sense of, and
  * it would complicate decoding slightly for little gain). Note that we *do*
  * log information for user defined catalog tables since they presumably are
  * interesting to the user...
@@ -642,6 +643,7 @@ typedef struct ViewOptions
 #define RelationIsLogicallyLogged(relation) \
 	(XLogLogicalInfoActive() && \
 	 RelationNeedsWAL(relation) && \
+	 (relation)->rd_rel->relkind != RELKIND_FOREIGN_TABLE &&	\
 	 !IsCatalogRelation(relation))
 
 /* routines in utils/cache/relcache.c */
diff --git a/src/test/regress/expected/foreign_data.out b/src/test/regress/expected/foreign_data.out
index e4cdb780d0..5385f98a0f 100644
--- a/src/test/regress/expected/foreign_data.out
+++ b/src/test/regress/expected/foreign_data.out
@@ -1807,9 +1807,9 @@ Inherits: fd_pt1
 
 -- TRUNCATE doesn't work on foreign tables, either directly or recursively
 TRUNCATE ft2;  -- ERROR
-ERROR:  "ft2" is not a table
+ERROR:  foreign-data wrapper "dummy" has no handler
 TRUNCATE fd_pt1;  -- ERROR
-ERROR:  "ft2" is not a table
+ERROR:  foreign-data wrapper "dummy" has no handler
 DROP TABLE fd_pt1 CASCADE;
 NOTICE:  drop cascades to foreign table ft2
 -- IMPORT FOREIGN SCHEMA
@@ -2032,9 +2032,9 @@ ALTER FOREIGN TABLE fd_pt2_1 ADD CONSTRAINT fd_pt2chk1 CHECK (c1 > 0);
 ALTER TABLE fd_pt2 ATTACH PARTITION fd_pt2_1 FOR VALUES IN (1);
 -- TRUNCATE doesn't work on foreign tables, either directly or recursively
 TRUNCATE fd_pt2_1;  -- ERROR
-ERROR:  "fd_pt2_1" is not a table
+ERROR:  foreign-data wrapper "dummy" has no handler
 TRUNCATE fd_pt2;  -- ERROR
-ERROR:  "fd_pt2_1" is not a table
+ERROR:  foreign-data wrapper "dummy" has no handler
 DROP FOREIGN TABLE fd_pt2_1;
 DROP TABLE fd_pt2;
 -- foreign table cannot be part of partition tree made of temporary
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index efb9811198..fefa65705a 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -704,6 +704,7 @@ ForeignScanState
 ForeignServer
 ForeignServerInfo
 ForeignTable
+ForeignTruncateInfo
 ForkNumber
 FormData_pg_aggregate
 FormData_pg_am
#66Kazutaka Onishi
onishi@heterodb.com
In reply to: Fujii Masao (#65)
Re: TRUNCATE on foreign table

Fujii-san,

Anyway, attached is the updated version of the patch. This is still based on the latest Kazutaka-san's patch. That is, extra list for ONLY is still passed to FDW. What about committing this version at first? Then we can continue the discussion and change the behavior later if necessary.

Pushed! Thank all involved in this development!!
For record, I attached the final patch I committed.

Thank you for revising the v16 patch to v18 and pushing it.
Cool!

2021年4月8日(木) 22:14 Fujii Masao <masao.fujii@oss.nttdata.com>:

Show quoted text

On 2021/04/08 22:02, Kohei KaiGai wrote:

Anyway, attached is the updated version of the patch. This is still based on the latest Kazutaka-san's patch. That is, extra list for ONLY is still passed to FDW. What about committing this version at first? Then we can continue the discussion and change the behavior later if necessary.

Pushed! Thank all involved in this development!!
For record, I attached the final patch I committed.

Ok, it's fair enought for me.

I'll try to sort out my thought, then raise a follow-up discussion if necessary.

Thanks!

The followings are the open items and discussion points that I'm thinking of.

1. Currently the extra information (TRUNCATE_REL_CONTEXT_NORMAL, TRUNCATE_REL_CONTEXT_ONLY or TRUNCATE_REL_CONTEXT_CASCADING) about how a foreign table was specified as the target to truncate in TRUNCATE command is collected and passed to FDW. Does this really need to be passed to FDW? Seems Stephen, Michael and I think that's necessary. But Kaigai-san does not. I also think that TRUNCATE_REL_CONTEXT_CASCADING can be removed because there seems no use case for that maybe.

2. Currently when the same foreign table is specified multiple times in the command, the extra information only for the foreign table found first is collected. For example, when "TRUNCATE ft, ONLY ft" is executed, TRUNCATE_REL_CONTEXT_NORMAL is collected and _ONLY is ignored because "ft" is found first. Is this OK? Or we should collect all, e.g., both _NORMAL and _ONLY should be collected in that example? I think that the current approach (i.e., collect the extra info about table found first if the same table is specified multiple times) is good because even local tables are also treated the same way. But Kaigai-san does not.

3. Currently postgres_fdw specifies ONLY clause in TRUNCATE command that it constructs. That is, if the foreign table is specified with ONLY, postgres_fdw also issues the TRUNCATE command for the corresponding remote table with ONLY to the remote server. Then only root table is truncated in remote server side, and the tables inheriting that are not truncated. Is this behavior desirable? Seems Michael and I think this behavior is OK. But Kaigai-san does not.

4. Tab-completion for TRUNCATE should be updated so that also foreign tables are displayed.

Regards,

--
Fujii Masao
Advanced Computing Technology Center
Research and Development Headquarters
NTT DATA CORPORATION

#67Bharath Rupireddy
bharath.rupireddyforpostgres@gmail.com
In reply to: Fujii Masao (#65)
Re: TRUNCATE on foreign table

On Thu, Apr 8, 2021 at 6:44 PM Fujii Masao <masao.fujii@oss.nttdata.com> wrote:

The followings are the open items and discussion points that I'm thinking of.

1. Currently the extra information (TRUNCATE_REL_CONTEXT_NORMAL, TRUNCATE_REL_CONTEXT_ONLY or TRUNCATE_REL_CONTEXT_CASCADING) about how a foreign table was specified as the target to truncate in TRUNCATE command is collected and passed to FDW. Does this really need to be passed to FDW? Seems Stephen, Michael and I think that's necessary. But Kaigai-san does not. I also think that TRUNCATE_REL_CONTEXT_CASCADING can be removed because there seems no use case for that maybe.

I think we should remove the unused enums/macros, instead we could
mention a note of the extensibility of those enums/macros in the
comments section around the enum/macro definitions.

2. Currently when the same foreign table is specified multiple times in the command, the extra information only for the foreign table found first is collected. For example, when "TRUNCATE ft, ONLY ft" is executed, TRUNCATE_REL_CONTEXT_NORMAL is collected and _ONLY is ignored because "ft" is found first. Is this OK? Or we should collect all, e.g., both _NORMAL and _ONLY should be collected in that example? I think that the current approach (i.e., collect the extra info about table found first if the same table is specified multiple times) is good because even local tables are also treated the same way. But Kaigai-san does not.

IMO, the foreign truncate command should be constructed by collecting
all the information i.e. "TRUNCATE ft, ONLY ft" and let the remote
server execute how it wants to execute. That will be consistent and no
extra logic is required to track the already seen foreign tables while
foreign table collection/foreign truncate command is being prepared on
the local server.

I was thinking that the postgres throws error or warning for commands
such as truncate, vaccum, analyze when the same tables are specified,
but seems like that's not what it does.

3. Currently postgres_fdw specifies ONLY clause in TRUNCATE command that it constructs. That is, if the foreign table is specified with ONLY, postgres_fdw also issues the TRUNCATE command for the corresponding remote table with ONLY to the remote server. Then only root table is truncated in remote server side, and the tables inheriting that are not truncated. Is this behavior desirable? Seems Michael and I think this behavior is OK. But Kaigai-san does not.

I'm okay with the behaviour as it is consistent with what ONLY does to
local tables. Documenting this behaviour(if not done already) is a
better way I think.

4. Tab-completion for TRUNCATE should be updated so that also foreign tables are displayed.

It will be good to have.

With Regards,
Bharath Rupireddy.
EnterpriseDB: http://www.enterprisedb.com

#68Zhihong Yu
zyu@yugabyte.com
In reply to: Bharath Rupireddy (#67)
Re: TRUNCATE on foreign table

On Thu, Apr 8, 2021 at 6:47 PM Bharath Rupireddy <
bharath.rupireddyforpostgres@gmail.com> wrote:

On Thu, Apr 8, 2021 at 6:44 PM Fujii Masao <masao.fujii@oss.nttdata.com>
wrote:

The followings are the open items and discussion points that I'm

thinking of.

1. Currently the extra information (TRUNCATE_REL_CONTEXT_NORMAL,

TRUNCATE_REL_CONTEXT_ONLY or TRUNCATE_REL_CONTEXT_CASCADING) about how a
foreign table was specified as the target to truncate in TRUNCATE command
is collected and passed to FDW. Does this really need to be passed to FDW?
Seems Stephen, Michael and I think that's necessary. But Kaigai-san does
not. I also think that TRUNCATE_REL_CONTEXT_CASCADING can be removed
because there seems no use case for that maybe.

I think we should remove the unused enums/macros, instead we could
mention a note of the extensibility of those enums/macros in the
comments section around the enum/macro definitions.

2. Currently when the same foreign table is specified multiple times in

the command, the extra information only for the foreign table found first
is collected. For example, when "TRUNCATE ft, ONLY ft" is executed,
TRUNCATE_REL_CONTEXT_NORMAL is collected and _ONLY is ignored because "ft"
is found first. Is this OK? Or we should collect all, e.g., both _NORMAL
and _ONLY should be collected in that example? I think that the current
approach (i.e., collect the extra info about table found first if the same
table is specified multiple times) is good because even local tables are
also treated the same way. But Kaigai-san does not.

IMO, the foreign truncate command should be constructed by collecting
all the information i.e. "TRUNCATE ft, ONLY ft" and let the remote
server execute how it wants to execute. That will be consistent and no
extra logic is required to track the already seen foreign tables while
foreign table collection/foreign truncate command is being prepared on
the local server.

I was thinking that the postgres throws error or warning for commands
such as truncate, vaccum, analyze when the same tables are specified,
but seems like that's not what it does.

3. Currently postgres_fdw specifies ONLY clause in TRUNCATE command that

it constructs. That is, if the foreign table is specified with ONLY,
postgres_fdw also issues the TRUNCATE command for the corresponding remote
table with ONLY to the remote server. Then only root table is truncated in
remote server side, and the tables inheriting that are not truncated. Is
this behavior desirable? Seems Michael and I think this behavior is OK. But
Kaigai-san does not.

I'm okay with the behaviour as it is consistent with what ONLY does to
local tables. Documenting this behaviour(if not done already) is a
better way I think.

4. Tab-completion for TRUNCATE should be updated so that also foreign

tables are displayed.

It will be good to have.

With Regards,
Bharath Rupireddy.
EnterpriseDB: http://www.enterprisedb.com

w.r.t. point #1:
bq. I think we should remove the unused enums/macros,

I agree. When there is more concrete use case which requires new enum, we
can add enum whose meaning would be clearer.

Cheers

#69Kohei KaiGai
kaigai@heterodb.com
In reply to: Fujii Masao (#65)
Re: TRUNCATE on foreign table

2021年4月8日(木) 22:14 Fujii Masao <masao.fujii@oss.nttdata.com>:

On 2021/04/08 22:02, Kohei KaiGai wrote:

Anyway, attached is the updated version of the patch. This is still based on the latest Kazutaka-san's patch. That is, extra list for ONLY is still passed to FDW. What about committing this version at first? Then we can continue the discussion and change the behavior later if necessary.

Pushed! Thank all involved in this development!!
For record, I attached the final patch I committed.

Ok, it's fair enought for me.

I'll try to sort out my thought, then raise a follow-up discussion if necessary.

Thanks!

The followings are the open items and discussion points that I'm thinking of.

1. Currently the extra information (TRUNCATE_REL_CONTEXT_NORMAL, TRUNCATE_REL_CONTEXT_ONLY or TRUNCATE_REL_CONTEXT_CASCADING) about how a foreign table was specified as the target to truncate in TRUNCATE command is collected and passed to FDW. Does this really need to be passed to FDW? Seems Stephen, Michael and I think that's necessary. But Kaigai-san does not. I also think that TRUNCATE_REL_CONTEXT_CASCADING can be removed because there seems no use case for that maybe.

2. Currently when the same foreign table is specified multiple times in the command, the extra information only for the foreign table found first is collected. For example, when "TRUNCATE ft, ONLY ft" is executed, TRUNCATE_REL_CONTEXT_NORMAL is collected and _ONLY is ignored because "ft" is found first. Is this OK? Or we should collect all, e.g., both _NORMAL and _ONLY should be collected in that example? I think that the current approach (i.e., collect the extra info about table found first if the same table is specified multiple times) is good because even local tables are also treated the same way. But Kaigai-san does not.

3. Currently postgres_fdw specifies ONLY clause in TRUNCATE command that it constructs. That is, if the foreign table is specified with ONLY, postgres_fdw also issues the TRUNCATE command for the corresponding remote table with ONLY to the remote server. Then only root table is truncated in remote server side, and the tables inheriting that are not truncated. Is this behavior desirable? Seems Michael and I think this behavior is OK. But Kaigai-san does not.

Prior to the discussion of 1-3, I like to clarify the role of foreign-tables.
(Likely, it will lead a natural conclusion for the above open items.)

As literal of SQL/MED (Management of External Data), a foreign table
is a representation of external data in PostgreSQL.
It allows to read and (optionally) write the external data wrapped by
FDW drivers, as if we usually read / write heap tables.
By the FDW-APIs, the core PostgreSQL does not care about the
structure, location, volume and other characteristics of
the external data itself. It expects FDW-APIs invocation will perform
as if we access a regular heap table.

On the other hands, we can say local tables are representation of
"internal" data in PostgreSQL.
A heap table is consists of one or more files (per BLCKSZ *
RELSEG_SIZE), and table-am intermediates
the on-disk data to/from on-memory structure (TupleTableSlot).
Here are no big differences in the concept. Ok?

As you know, ONLY clause controls whether TRUNCATE command shall run
on child-tables also, not only the parent.
If "ONLY parent_table" is given, its child tables are not picked up by
ExecuteTruncate(), unless child tables are not
listed up individually.
Then, once ExecuteTruncate() picked up the relations, it makes the
relations empty using table-am
(relation_set_new_filenode), and the callee
(heapam_relation_set_new_filenode) does not care about whether the
table is specified with ONLY, or not. It just makes the data
represented by the table empty (in transactional way).

So, how foreign tables shall perform?

Once ExecuteTruncate() picked up a foreign table, according to
ONLY-clause, does FDW driver shall consider
the context where the foreign tables are specified? And, what behavior
is consistent?
I think that FDW driver shall make the external data represented by
the foreign table empty, regardless of the
structure, location, volume and others.

Therefore, if we follow the above assumption, we don't need to inform
the context where foreign-tables are
picked up (TRUNCATE_REL_CONTEXT_*), so postgres_fdw shall not control
the remote TRUNCATE query
according to the flags. It always truncate the entire tables (if
multiple) on behalf of the foreign tables.

As an aside, if postgres_fdw maps are remote table with "ONLY" clause,
it is exactly a situation where we add
"ONLY" clause on the truncate command, because it is a representation
of the remote "ONLY parent_table" in
this case.

How about your thought?
--
HeteroDB, Inc / The PG-Strom Project
KaiGai Kohei <kaigai@heterodb.com>

#70Fujii Masao
masao.fujii@oss.nttdata.com
In reply to: Zhihong Yu (#68)
1 attachment(s)
Re: TRUNCATE on foreign table

On 2021/04/09 11:05, Zhihong Yu wrote:

On Thu, Apr 8, 2021 at 6:47 PM Bharath Rupireddy <bharath.rupireddyforpostgres@gmail.com <mailto:bharath.rupireddyforpostgres@gmail.com>> wrote:

On Thu, Apr 8, 2021 at 6:44 PM Fujii Masao <masao.fujii@oss.nttdata.com <mailto:masao.fujii@oss.nttdata.com>> wrote:

The followings are the open items and discussion points that I'm thinking of.

1. Currently the extra information (TRUNCATE_REL_CONTEXT_NORMAL, TRUNCATE_REL_CONTEXT_ONLY or TRUNCATE_REL_CONTEXT_CASCADING) about how a foreign table was specified as the target to truncate in TRUNCATE command is collected and passed to FDW. Does this really need to be passed to FDW? Seems Stephen, Michael and I think that's necessary. But Kaigai-san does not. I also think that TRUNCATE_REL_CONTEXT_CASCADING can be removed because there seems no use case for that maybe.

I think we should remove the unused enums/macros, instead we could
mention a note of the extensibility of those enums/macros in the
comments section around the enum/macro definitions.

+1

2. Currently when the same foreign table is specified multiple times in the command, the extra information only for the foreign table found first is collected. For example, when "TRUNCATE ft, ONLY ft" is executed, TRUNCATE_REL_CONTEXT_NORMAL is collected and _ONLY is ignored because "ft" is found first. Is this OK? Or we should collect all, e.g., both _NORMAL and _ONLY should be collected in that example? I think that the current approach (i.e., collect the extra info about table found first if the same table is specified multiple times) is good because even local tables are also treated the same way. But Kaigai-san does not.

IMO, the foreign truncate command should be constructed by collecting
all the information i.e. "TRUNCATE ft, ONLY ft" and let the remote
server execute how it wants to execute. That will be consistent and no
extra logic is required to track the already seen foreign tables while
foreign table collection/foreign truncate command is being prepared on
the local server.

But isn't it difficult for remote server to determine how to execute? Please imagine the case where there are four tables as follows.

- regular table "remote_parent" in the remote server
- regular table "remote_child" inheriting "remote_parent" table in the remote server
- foreign table "local_parent" in the local server, accessing "remote_parent" table
- regular table "local_child" inheriting "local_parent" table in the local server

When "TRUNCATE ONLY local_parent, local_parent" is executed, local_child is not truncated because of ONLY clause. Then if we collect all the information about context, both TRUNCATE_REL_CONTEXT_NORMAL and _ONLY are passed to FDW. In this case how should FDW determine whether to use ONLY when issuing TRUNCATE command to the remote server? Isn't it difficult to do that? If FDW determines not to use ONLY because _NORMAL flag is passed, both remote_parent and remote_child tables are truncated. That is, though both local_child and remote_child are the inheriting tables, isn't it strange that only the former is ignored and the latter is truncated?

I was thinking that the postgres throws error or warning for commands
such as truncate, vaccum, analyze when the same tables are specified,
but seems like that's not what it does.

3. Currently postgres_fdw specifies ONLY clause in TRUNCATE command that it constructs. That is, if the foreign table is specified with ONLY, postgres_fdw also issues the TRUNCATE command for the corresponding remote table with ONLY to the remote server. Then only root table is truncated in remote server side, and the tables inheriting that are not truncated. Is this behavior desirable? Seems Michael and I think this behavior is OK. But Kaigai-san does not.

I'm okay with the behaviour as it is consistent with what ONLY does to
local tables. Documenting this behaviour(if not done already) is a
better way I think.

+1

4. Tab-completion for TRUNCATE should be updated so that also foreign tables are displayed.

It will be good to have.

Patch attached.

With Regards,
Bharath Rupireddy.
EnterpriseDB: http://www.enterprisedb.com <http://www.enterprisedb.com&gt;

w.r.t. point #1:
bq. I think we should remove the unused enums/macros,

I agree. When there is more concrete use case which requires new enum, we can add enum whose meaning would be clearer.

+1

Regards,

--
Fujii Masao
Advanced Computing Technology Center
Research and Development Headquarters
NTT DATA CORPORATION

Attachments:

tab_complete_for_truncate_on_foreign_tables_v1.patchtext/plain; charset=UTF-8; name=tab_complete_for_truncate_on_foreign_tables_v1.patch; x-mac-creator=0; x-mac-type=0Download
diff --git a/src/bin/psql/tab-complete.c b/src/bin/psql/tab-complete.c
index 26ac786c51..d34271e3b8 100644
--- a/src/bin/psql/tab-complete.c
+++ b/src/bin/psql/tab-complete.c
@@ -549,6 +549,18 @@ static const SchemaQuery Query_for_list_of_selectables = {
 	.result = "pg_catalog.quote_ident(c.relname)",
 };
 
+/* Relations supporting TRUNCATE */
+static const SchemaQuery Query_for_list_of_truncatables = {
+	.catname = "pg_catalog.pg_class c",
+	.selcondition =
+	"c.relkind IN (" CppAsString2(RELKIND_RELATION) ", "
+	CppAsString2(RELKIND_FOREIGN_TABLE) ", "
+	CppAsString2(RELKIND_PARTITIONED_TABLE) ")",
+	.viscondition = "pg_catalog.pg_table_is_visible(c.oid)",
+	.namespace = "c.relnamespace",
+	.result = "pg_catalog.quote_ident(c.relname)",
+};
+
 /* Relations supporting GRANT are currently same as those supporting SELECT */
 #define Query_for_list_of_grantables Query_for_list_of_selectables
 
@@ -3834,14 +3846,14 @@ psql_completion(const char *text, int start, int end)
 
 /* TRUNCATE */
 	else if (Matches("TRUNCATE"))
-		COMPLETE_WITH_SCHEMA_QUERY(Query_for_list_of_tables,
+		COMPLETE_WITH_SCHEMA_QUERY(Query_for_list_of_truncatables,
 								   " UNION SELECT 'TABLE'"
 								   " UNION SELECT 'ONLY'");
 	else if (Matches("TRUNCATE", "TABLE"))
-		COMPLETE_WITH_SCHEMA_QUERY(Query_for_list_of_tables,
+		COMPLETE_WITH_SCHEMA_QUERY(Query_for_list_of_truncatables,
 								   " UNION SELECT 'ONLY'");
 	else if (HeadMatches("TRUNCATE") && TailMatches("ONLY"))
-		COMPLETE_WITH_SCHEMA_QUERY(Query_for_list_of_tables, NULL);
+		COMPLETE_WITH_SCHEMA_QUERY(Query_for_list_of_truncatables, NULL);
 	else if (Matches("TRUNCATE", MatchAny) ||
 			 Matches("TRUNCATE", "TABLE|ONLY", MatchAny) ||
 			 Matches("TRUNCATE", "TABLE", "ONLY", MatchAny))
#71Fujii Masao
masao.fujii@oss.nttdata.com
In reply to: Kohei KaiGai (#69)
Re: TRUNCATE on foreign table

On 2021/04/09 12:33, Kohei KaiGai wrote:

2021年4月8日(木) 22:14 Fujii Masao <masao.fujii@oss.nttdata.com>:

On 2021/04/08 22:02, Kohei KaiGai wrote:

Anyway, attached is the updated version of the patch. This is still based on the latest Kazutaka-san's patch. That is, extra list for ONLY is still passed to FDW. What about committing this version at first? Then we can continue the discussion and change the behavior later if necessary.

Pushed! Thank all involved in this development!!
For record, I attached the final patch I committed.

Ok, it's fair enought for me.

I'll try to sort out my thought, then raise a follow-up discussion if necessary.

Thanks!

The followings are the open items and discussion points that I'm thinking of.

1. Currently the extra information (TRUNCATE_REL_CONTEXT_NORMAL, TRUNCATE_REL_CONTEXT_ONLY or TRUNCATE_REL_CONTEXT_CASCADING) about how a foreign table was specified as the target to truncate in TRUNCATE command is collected and passed to FDW. Does this really need to be passed to FDW? Seems Stephen, Michael and I think that's necessary. But Kaigai-san does not. I also think that TRUNCATE_REL_CONTEXT_CASCADING can be removed because there seems no use case for that maybe.

2. Currently when the same foreign table is specified multiple times in the command, the extra information only for the foreign table found first is collected. For example, when "TRUNCATE ft, ONLY ft" is executed, TRUNCATE_REL_CONTEXT_NORMAL is collected and _ONLY is ignored because "ft" is found first. Is this OK? Or we should collect all, e.g., both _NORMAL and _ONLY should be collected in that example? I think that the current approach (i.e., collect the extra info about table found first if the same table is specified multiple times) is good because even local tables are also treated the same way. But Kaigai-san does not.

3. Currently postgres_fdw specifies ONLY clause in TRUNCATE command that it constructs. That is, if the foreign table is specified with ONLY, postgres_fdw also issues the TRUNCATE command for the corresponding remote table with ONLY to the remote server. Then only root table is truncated in remote server side, and the tables inheriting that are not truncated. Is this behavior desirable? Seems Michael and I think this behavior is OK. But Kaigai-san does not.

Prior to the discussion of 1-3, I like to clarify the role of foreign-tables.
(Likely, it will lead a natural conclusion for the above open items.)

As literal of SQL/MED (Management of External Data), a foreign table
is a representation of external data in PostgreSQL.
It allows to read and (optionally) write the external data wrapped by
FDW drivers, as if we usually read / write heap tables.
By the FDW-APIs, the core PostgreSQL does not care about the
structure, location, volume and other characteristics of
the external data itself. It expects FDW-APIs invocation will perform
as if we access a regular heap table.

On the other hands, we can say local tables are representation of
"internal" data in PostgreSQL.
A heap table is consists of one or more files (per BLCKSZ *
RELSEG_SIZE), and table-am intermediates
the on-disk data to/from on-memory structure (TupleTableSlot).
Here are no big differences in the concept. Ok?

As you know, ONLY clause controls whether TRUNCATE command shall run
on child-tables also, not only the parent.
If "ONLY parent_table" is given, its child tables are not picked up by
ExecuteTruncate(), unless child tables are not
listed up individually.
Then, once ExecuteTruncate() picked up the relations, it makes the
relations empty using table-am
(relation_set_new_filenode), and the callee
(heapam_relation_set_new_filenode) does not care about whether the
table is specified with ONLY, or not. It just makes the data
represented by the table empty (in transactional way).

So, how foreign tables shall perform?

Once ExecuteTruncate() picked up a foreign table, according to
ONLY-clause, does FDW driver shall consider
the context where the foreign tables are specified? And, what behavior
is consistent?
I think that FDW driver shall make the external data represented by
the foreign table empty, regardless of the
structure, location, volume and others.

Therefore, if we follow the above assumption, we don't need to inform
the context where foreign-tables are
picked up (TRUNCATE_REL_CONTEXT_*), so postgres_fdw shall not control
the remote TRUNCATE query
according to the flags. It always truncate the entire tables (if
multiple) on behalf of the foreign tables.

This makes me wonder if the information about CASCADE/RESTRICT (maybe also RESTART/CONTINUE) also should not be passed to FDW. You're thinking that? Or only ONLY clause should be ignored for a foreign table?

Regards,

--
Fujii Masao
Advanced Computing Technology Center
Research and Development Headquarters
NTT DATA CORPORATION

#72Bharath Rupireddy
bharath.rupireddyforpostgres@gmail.com
In reply to: Fujii Masao (#70)
Re: TRUNCATE on foreign table

On Fri, Apr 9, 2021 at 7:06 PM Fujii Masao <masao.fujii@oss.nttdata.com> wrote:

4. Tab-completion for TRUNCATE should be updated so that also foreign tables are displayed.

It will be good to have.

Patch attached.

Tab completion patch LGTM and it works as expected.

With Regards,
Bharath Rupireddy.
EnterpriseDB: http://www.enterprisedb.com

#73Bharath Rupireddy
bharath.rupireddyforpostgres@gmail.com
In reply to: Fujii Masao (#70)
Re: TRUNCATE on foreign table

On Fri, Apr 9, 2021 at 7:06 PM Fujii Masao <masao.fujii@oss.nttdata.com> wrote:

2. Currently when the same foreign table is specified multiple times in the command, the extra information only for the foreign table found first is collected. For example, when "TRUNCATE ft, ONLY ft" is executed, TRUNCATE_REL_CONTEXT_NORMAL is collected and _ONLY is ignored because "ft" is found first. Is this OK? Or we should collect all, e.g., both _NORMAL and _ONLY should be collected in that example? I think that the current approach (i.e., collect the extra info about table found first if the same table is specified multiple times) is good because even local tables are also treated the same way. But Kaigai-san does not.

IMO, the foreign truncate command should be constructed by collecting
all the information i.e. "TRUNCATE ft, ONLY ft" and let the remote
server execute how it wants to execute. That will be consistent and no
extra logic is required to track the already seen foreign tables while
foreign table collection/foreign truncate command is being prepared on
the local server.

But isn't it difficult for remote server to determine how to execute? Please imagine the case where there are four tables as follows.

- regular table "remote_parent" in the remote server
- regular table "remote_child" inheriting "remote_parent" table in the remote server
- foreign table "local_parent" in the local server, accessing "remote_parent" table
- regular table "local_child" inheriting "local_parent" table in the local server

When "TRUNCATE ONLY local_parent, local_parent" is executed, local_child is not truncated because of ONLY clause. Then if we collect all the information about context, both TRUNCATE_REL_CONTEXT_NORMAL and _ONLY are passed to FDW. In this case how should FDW determine whether to use ONLY when issuing TRUNCATE command to the remote server? Isn't it difficult to do that? If FDW determines not to use ONLY because _NORMAL flag is passed, both remote_parent and remote_child tables are truncated. That is, though both local_child and remote_child are the inheriting tables, isn't it strange that only the former is ignored and the latter is truncated?

My understanding was wrong. I see below code from ExecuteTruncate:
/* don't throw error for "TRUNCATE foo, foo" */
if (list_member_oid(relids, myrelid))
{
table_close(rel, lockmode);
continue;
}

This basically tells us that the first occurence of a table is
considered, rest all ignored. This is what we are going to have in our
relids_extra and relids. So, we will be sending only the first
occurence info to the foreign truncate command. I agree with the
current approach "i.e., collect the extra info about table found first
if the same table is specified multiple times" for the same reason
that "local tables are also treated the same way."

With Regards,
Bharath Rupireddy.
EnterpriseDB: http://www.enterprisedb.com

#74Kohei KaiGai
kaigai@heterodb.com
In reply to: Fujii Masao (#71)
Re: TRUNCATE on foreign table

2021年4月9日(金) 22:51 Fujii Masao <masao.fujii@oss.nttdata.com>:

On 2021/04/09 12:33, Kohei KaiGai wrote:

2021年4月8日(木) 22:14 Fujii Masao <masao.fujii@oss.nttdata.com>:

On 2021/04/08 22:02, Kohei KaiGai wrote:

Anyway, attached is the updated version of the patch. This is still based on the latest Kazutaka-san's patch. That is, extra list for ONLY is still passed to FDW. What about committing this version at first? Then we can continue the discussion and change the behavior later if necessary.

Pushed! Thank all involved in this development!!
For record, I attached the final patch I committed.

Ok, it's fair enought for me.

I'll try to sort out my thought, then raise a follow-up discussion if necessary.

Thanks!

The followings are the open items and discussion points that I'm thinking of.

1. Currently the extra information (TRUNCATE_REL_CONTEXT_NORMAL, TRUNCATE_REL_CONTEXT_ONLY or TRUNCATE_REL_CONTEXT_CASCADING) about how a foreign table was specified as the target to truncate in TRUNCATE command is collected and passed to FDW. Does this really need to be passed to FDW? Seems Stephen, Michael and I think that's necessary. But Kaigai-san does not. I also think that TRUNCATE_REL_CONTEXT_CASCADING can be removed because there seems no use case for that maybe.

2. Currently when the same foreign table is specified multiple times in the command, the extra information only for the foreign table found first is collected. For example, when "TRUNCATE ft, ONLY ft" is executed, TRUNCATE_REL_CONTEXT_NORMAL is collected and _ONLY is ignored because "ft" is found first. Is this OK? Or we should collect all, e.g., both _NORMAL and _ONLY should be collected in that example? I think that the current approach (i.e., collect the extra info about table found first if the same table is specified multiple times) is good because even local tables are also treated the same way. But Kaigai-san does not.

3. Currently postgres_fdw specifies ONLY clause in TRUNCATE command that it constructs. That is, if the foreign table is specified with ONLY, postgres_fdw also issues the TRUNCATE command for the corresponding remote table with ONLY to the remote server. Then only root table is truncated in remote server side, and the tables inheriting that are not truncated. Is this behavior desirable? Seems Michael and I think this behavior is OK. But Kaigai-san does not.

Prior to the discussion of 1-3, I like to clarify the role of foreign-tables.
(Likely, it will lead a natural conclusion for the above open items.)

As literal of SQL/MED (Management of External Data), a foreign table
is a representation of external data in PostgreSQL.
It allows to read and (optionally) write the external data wrapped by
FDW drivers, as if we usually read / write heap tables.
By the FDW-APIs, the core PostgreSQL does not care about the
structure, location, volume and other characteristics of
the external data itself. It expects FDW-APIs invocation will perform
as if we access a regular heap table.

On the other hands, we can say local tables are representation of
"internal" data in PostgreSQL.
A heap table is consists of one or more files (per BLCKSZ *
RELSEG_SIZE), and table-am intermediates
the on-disk data to/from on-memory structure (TupleTableSlot).
Here are no big differences in the concept. Ok?

As you know, ONLY clause controls whether TRUNCATE command shall run
on child-tables also, not only the parent.
If "ONLY parent_table" is given, its child tables are not picked up by
ExecuteTruncate(), unless child tables are not
listed up individually.
Then, once ExecuteTruncate() picked up the relations, it makes the
relations empty using table-am
(relation_set_new_filenode), and the callee
(heapam_relation_set_new_filenode) does not care about whether the
table is specified with ONLY, or not. It just makes the data
represented by the table empty (in transactional way).

So, how foreign tables shall perform?

Once ExecuteTruncate() picked up a foreign table, according to
ONLY-clause, does FDW driver shall consider
the context where the foreign tables are specified? And, what behavior
is consistent?
I think that FDW driver shall make the external data represented by
the foreign table empty, regardless of the
structure, location, volume and others.

Therefore, if we follow the above assumption, we don't need to inform
the context where foreign-tables are
picked up (TRUNCATE_REL_CONTEXT_*), so postgres_fdw shall not control
the remote TRUNCATE query
according to the flags. It always truncate the entire tables (if
multiple) on behalf of the foreign tables.

This makes me wonder if the information about CASCADE/RESTRICT (maybe also RESTART/CONTINUE) also should not be passed to FDW. You're thinking that? Or only ONLY clause should be ignored for a foreign table?

I think the above information (DropBehavior and restart_seqs) are
valuable to pass.

The CASCADE/RESTRICT clause controls whether the truncate command also
eliminates
the rows that blocks to delete (FKs in RDBMS). Only FDW driver can
know whether the
external data has "removal-blocker", thus we need to pass the
DropBehavior for the callback.

The RESTART/CONTINUE clause also controle whether the truncate command restart
the relevant resources that is associated with the target table
(Sequences in RDBMS).
Only FDW driver can know whether the external data has relevant
resources to reset,
thus we need to pass the "restart_seqs" for the callback.

Unlike above two parameters, the role of ONLY-clause is already
finished at the time
when ExecuteTruncate() picked up the target relations, from the
standpoint of above
understanding of foreign-tables and external data.

Thought?

Best regards,
--
HeteroDB, Inc / The PG-Strom Project
KaiGai Kohei <kaigai@heterodb.com>

#75Justin Pryzby
pryzby@telsasoft.com
In reply to: Fujii Masao (#65)
3 attachment(s)
Re: TRUNCATE on foreign table

On Thu, Apr 08, 2021 at 10:14:17PM +0900, Fujii Masao wrote:

On 2021/04/08 22:02, Kohei KaiGai wrote:

Anyway, attached is the updated version of the patch. This is still based on the latest Kazutaka-san's patch. That is, extra list for ONLY is still passed to FDW. What about committing this version at first? Then we can continue the discussion and change the behavior later if necessary.

Pushed! Thank all involved in this development!!
For record, I attached the final patch I committed.

Find attached language fixes.

I'm also proposing to convert an if/else to an switch(), since I don't like
"if/else if" without an "else", and since the compiler can warn about missing
enum values in switch cases. You could also write:
| Assert(behavior == DROP_RESTRICT || behavior == DROP_CASCADE)

Also, you currently test:

+ if (extra & TRUNCATE_REL_CONTEXT_ONLY)

but TRUNCATE_REL_ aren't indepedent bits, so shouldn't be tested with "&".

src/include/commands/tablecmds.h-#define TRUNCATE_REL_CONTEXT_NORMAL 1 /* specified without ONLY clause */
src/include/commands/tablecmds.h-#define TRUNCATE_REL_CONTEXT_ONLY 2 /* specified with ONLY clause */
src/include/commands/tablecmds.h:#define TRUNCATE_REL_CONTEXT_CASCADING 3 /* not specified but truncated
src/include/commands/tablecmds.h- * due to dependency (e.g.,
src/include/commands/tablecmds.h- * partition table) */

Show quoted text
+++ b/contrib/postgres_fdw/deparse.c
@@ -2172,6 +2173,43 @@ deparseAnalyzeSql(StringInfo buf, Relation rel, List **retrieved_attrs)
deparseRelation(buf, rel);
}
+/*
+ * Construct a simple "TRUNCATE rel" statement
+ */
+void
+deparseTruncateSql(StringInfo buf,
+				   List *rels,
+				   List *rels_extra,
+				   DropBehavior behavior,
+				   bool restart_seqs)
+{
+	ListCell   *lc1,
+			   *lc2;
+
+	appendStringInfoString(buf, "TRUNCATE ");
+
+	forboth(lc1, rels, lc2, rels_extra)
+	{
+		Relation	rel = lfirst(lc1);
+		int			extra = lfirst_int(lc2);
+
+		if (lc1 != list_head(rels))
+			appendStringInfoString(buf, ", ");
+		if (extra & TRUNCATE_REL_CONTEXT_ONLY)
+			appendStringInfoString(buf, "ONLY ");
+
+		deparseRelation(buf, rel);
+	}
+
+	appendStringInfo(buf, " %s IDENTITY",
+					 restart_seqs ? "RESTART" : "CONTINUE");
+
+	if (behavior == DROP_RESTRICT)
+		appendStringInfoString(buf, " RESTRICT");
+	else if (behavior == DROP_CASCADE)
+		appendStringInfoString(buf, " CASCADE");
+}

Attachments:

0003-Test-integer-using-and-not.patchtext/x-diff; charset=us-asciiDownload
From 76d783f4e472ea466cca0d126798505f851e54d0 Mon Sep 17 00:00:00 2001
From: Justin Pryzby <pryzbyj@telsasoft.com>
Date: Sat, 10 Apr 2021 23:08:58 -0500
Subject: [PATCH 3/3] Test integer using == and not &

---
 contrib/postgres_fdw/deparse.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/contrib/postgres_fdw/deparse.c b/contrib/postgres_fdw/deparse.c
index 2c702c0e37..c2b1269f73 100644
--- a/contrib/postgres_fdw/deparse.c
+++ b/contrib/postgres_fdw/deparse.c
@@ -2195,7 +2195,7 @@ deparseTruncateSql(StringInfo buf,
 
 		if (lc1 != list_head(rels))
 			appendStringInfoString(buf, ", ");
-		if (extra & TRUNCATE_REL_CONTEXT_ONLY)
+		if (extra == TRUNCATE_REL_CONTEXT_ONLY)
 			appendStringInfoString(buf, "ONLY ");
 
 		deparseRelation(buf, rel);
-- 
2.17.0

0001-WIP-doc-review-Allow-TRUNCATE-command-to-truncate-fo.patchtext/x-diff; charset=us-asciiDownload
From 7bbd9312464899dfc2c70fdc64c95a298ac01594 Mon Sep 17 00:00:00 2001
From: Justin Pryzby <pryzbyj@telsasoft.com>
Date: Thu, 8 Apr 2021 10:10:58 -0500
Subject: [PATCH 1/3] WIP doc review: Allow TRUNCATE command to truncate
 foreign tables.

8ff1c94649f5c9184ac5f07981d8aea9dfd7ac19
---
 doc/src/sgml/fdwhandler.sgml   | 43 +++++++++++++++++-----------------
 doc/src/sgml/postgres-fdw.sgml |  2 +-
 doc/src/sgml/ref/truncate.sgml |  2 +-
 3 files changed, 23 insertions(+), 24 deletions(-)

diff --git a/doc/src/sgml/fdwhandler.sgml b/doc/src/sgml/fdwhandler.sgml
index 98882ddab8..5a76dede24 100644
--- a/doc/src/sgml/fdwhandler.sgml
+++ b/doc/src/sgml/fdwhandler.sgml
@@ -1075,40 +1075,39 @@ ExecForeignTruncate(List *rels, List *rels_extra,
                     DropBehavior behavior, bool restart_seqs);
 </programlisting>
 
-     Truncate a set of foreign tables specified in <literal>rels</literal>.
+     Truncate foreign tables.
      This function is called when <xref linkend="sql-truncate"/> is executed
-     on foreign tables.  <literal>rels</literal> is the list of
-     <structname>Relation</structname> data structure that indicates
-     a foreign table to truncate.  <literal>rels_extra</literal> the list of
-     <literal>int</literal> value, which delivers extra information about
-     a foreign table to truncate.  Possible values are
+     on a foreign table.  <literal>rels</literal> is a list of
+     <structname>Relation</structname> data structures for the
+     foreign tables to be truncated.  <literal>rels_extra</literal> is a list of
+     corresponding <literal>int</literal> values which provide extra information about
+     the foreign tables.  Each element of rels_extra may have the value
      <literal>TRUNCATE_REL_CONTEXT_NORMAL</literal>, which means that
-     the foreign table is specified WITHOUT <literal>ONLY</literal> clause
+     the foreign table is specified <emphasis>WITHOUT</emphasis> the <literal>ONLY</literal> clause
      in <command>TRUNCATE</command>,
      <literal>TRUNCATE_REL_CONTEXT_ONLY</literal>, which means that
-     the foreign table is specified WITH <literal>ONLY</literal> clause,
-     and <literal>TRUNCATE_REL_CONTEXT_CASCADING</literal>,
+     the foreign table is specified <emphasis>WITH</emphasis> the <literal>ONLY</literal> clause,
+     or <literal>TRUNCATE_REL_CONTEXT_CASCADING</literal>,
      which means that the foreign table is not specified explicitly,
-     but will be truncated due to dependency (for example, partition table).
-     There is one-to-one mapping between <literal>rels</literal> and
-     <literal>rels_extra</literal>.  The number of entries is the same
-     between the two lists.
+     but will be truncated due to a dependency (for example, a table partition).
     </para>
 
     <para>
-     <literal>behavior</literal> defines how foreign tables should
-     be truncated, using as possible values <literal>DROP_RESTRICT</literal>,
-     which means that <literal>RESTRICT</literal> option is specified,
-     and <literal>DROP_CASCADE</literal>, which means that
-     <literal>CASCADE</literal> option is specified, in
+     <literal>behavior</literal> must be specified as
+     <literal>DROP_RESTRICT</literal> or <literal>DROP_CASCADE</literal>.
+     If specified as <literal>DROP_RESTRICT</literal>, the
+     <literal>RESTRICT</literal> option will be included in the
      <command>TRUNCATE</command> command.
+     If specified as <literal>DROP_CASCADE</literal>, the
+     <literal>CASCADE</literal> option will be included.
     </para>
     
     <para>
-     <literal>restart_seqs</literal> is set to <literal>true</literal>
-     if <literal>RESTART IDENTITY</literal> option is specified in
-     <command>TRUNCATE</command> command.  It is <literal>false</literal>
-     if <literal>CONTINUE IDENTITY</literal> option is specified.
+     If <literal>restart_seqs</literal> is specified as <literal>true</literal>,
+     the <command>TRUNCATE</command> command will include the
+     <literal>RESTART IDENTITY</literal> option.
+     Otherwise, the <command>TRUNCATE</command> command will include the
+     <literal>CONTINUE IDENTITY</literal> option.
     </para>
 
     <para>
diff --git a/doc/src/sgml/postgres-fdw.sgml b/doc/src/sgml/postgres-fdw.sgml
index 5320accf6f..116434f658 100644
--- a/doc/src/sgml/postgres-fdw.sgml
+++ b/doc/src/sgml/postgres-fdw.sgml
@@ -452,7 +452,7 @@ OPTIONS (ADD password_required 'false');
      <listitem>
       <para>
        This option controls whether <filename>postgres_fdw</filename> allows
-       foreign tables to be truncated using <command>TRUNCATE</command>
+       foreign tables to be truncated using the <command>TRUNCATE</command>
        command. It can be specified for a foreign table or a foreign server.
        A table-level option overrides a server-level option.
        The default is <literal>true</literal>.
diff --git a/doc/src/sgml/ref/truncate.sgml b/doc/src/sgml/ref/truncate.sgml
index acf3633be4..9d846f88c9 100644
--- a/doc/src/sgml/ref/truncate.sgml
+++ b/doc/src/sgml/ref/truncate.sgml
@@ -173,7 +173,7 @@ TRUNCATE [ TABLE ] [ ONLY ] <replaceable class="parameter">name</replaceable> [
 
   <para>
    <command>TRUNCATE</command> can be used for foreign tables if
-   the foreign data wrapper supports, for instance,
+   supported by the foreign data wrapper, for instance,
    see <xref linkend="postgres-fdw"/>.
   </para>
  </refsect1>
-- 
2.17.0

0002-Convert-an-if-else-if-without-an-else-to-a-switch.patchtext/x-diff; charset=us-asciiDownload
From 513bb389e3e3c3cfe47518164c6ce49736854756 Mon Sep 17 00:00:00 2001
From: Justin Pryzby <pryzbyj@telsasoft.com>
Date: Sat, 10 Apr 2021 22:53:21 -0500
Subject: [PATCH 2/3] Convert an if/else if without an else to a switch

---
 contrib/postgres_fdw/deparse.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/contrib/postgres_fdw/deparse.c b/contrib/postgres_fdw/deparse.c
index bdc4c3620d..2c702c0e37 100644
--- a/contrib/postgres_fdw/deparse.c
+++ b/contrib/postgres_fdw/deparse.c
@@ -2204,10 +2204,15 @@ deparseTruncateSql(StringInfo buf,
 	appendStringInfo(buf, " %s IDENTITY",
 					 restart_seqs ? "RESTART" : "CONTINUE");
 
-	if (behavior == DROP_RESTRICT)
+	switch (behavior)
+	{
+	case DROP_RESTRICT:
 		appendStringInfoString(buf, " RESTRICT");
-	else if (behavior == DROP_CASCADE)
+		break;
+	case DROP_CASCADE:
 		appendStringInfoString(buf, " CASCADE");
+		break;
+	}
 }
 
 /*
-- 
2.17.0

#76Bharath Rupireddy
bharath.rupireddyforpostgres@gmail.com
In reply to: Justin Pryzby (#75)
Re: TRUNCATE on foreign table

On Sun, Apr 11, 2021 at 9:47 AM Justin Pryzby <pryzby@telsasoft.com> wrote:

Find attached language fixes.

Thanks for the patches.

I'm also proposing to convert an if/else to an switch(), since I don't like
"if/else if" without an "else", and since the compiler can warn about missing
enum values in switch cases.

I think we have a good bunch of if, else-if (without else) in the code
base, and I'm not sure the compilers have warned about them. Apart
from that whether if-else or switch-case is just a coding choice. And
we have only two values for DropBehavior enum i.e DROP_RESTRICT and
DROP_CASCADE(I'm not sure we will extend this enum to have more
values), if we have more then switch case would have looked cleaner.
But IMO, existing if and else-if would be good. Having said that, it's
up to the committer which one they think better in this case.

You could also write:
| Assert(behavior == DROP_RESTRICT || behavior == DROP_CASCADE)

IMO, we don't need to assert on behaviour as we just carry that
variable from ExecuteTruncateGuts to postgresExecForeignTruncate
without any modifications. And ExecuteTruncateGuts would get it from
the syntaxer, so no point it will have a different value than
DROP_RESTRICT or DROP_CASCADE.

Also, you currently test:

+ if (extra & TRUNCATE_REL_CONTEXT_ONLY)

but TRUNCATE_REL_ aren't indepedent bits, so shouldn't be tested with "&".

Yeah this is an issue. We could just change the #defines to values
0x0001, 0x0002, 0x0004, 0x0008 ... 0x0020 and so on and then testing
with & would work. So, this way, more than option can be multiplexed
into the same int value. To multiplex, we need to think: will there be
a scenario where a single rel in the truncate can have multiple
options at a time and do we want to distinguish these options while
deparsing?

#define TRUNCATE_REL_CONTEXT_NORMAL 0x0001 /* specified without
ONLY clause */
#define TRUNCATE_REL_CONTEXT_ONLY 0x0002 /* specified with
ONLY clause */
#define TRUNCATE_REL_CONTEXT_CASCADING 0x0004 /* not specified
but truncated

And I'm not sure what's the agreement on retaining or removing #define
values, currently I see only TRUNCATE_REL_CONTEXT_ONLY is being used,
others are just being set but not used. As I said upthread, it will be
good to remove the unused macros/enums, retain only the ones that are
used, especially TRUNCATE_REL_CONTEXT_CASCADING this is not required I
feel, because we can add the child partitions that are foreign tables
to relids as just normal foreign tables with TRUNCATE_REL_CONTEXT_ONLY
option.

On the patches:
0001-WIP-doc-review-Allow-TRUNCATE-command-to-truncate-fo.patch ---> LGTM.
0002-Convert-an-if-else-if-without-an-else-to-a-switch.patch. --> IMO,
we can ignore this patch.
0003-Test-integer-using-and-not.patch --> if we redefine the marcos to
multiplex them into a single int value, we don't need this patch.

With Regards,
Bharath Rupireddy.
EnterpriseDB: http://www.enterprisedb.com

#77Fujii Masao
masao.fujii@oss.nttdata.com
In reply to: Bharath Rupireddy (#76)
Re: TRUNCATE on foreign table

On 2021/04/11 19:15, Bharath Rupireddy wrote:

On Sun, Apr 11, 2021 at 9:47 AM Justin Pryzby <pryzby@telsasoft.com> wrote:

Find attached language fixes.

Thanks for the patches.

Thanks for the patches!

0001 patch basically looks good to me.

+     <literal>behavior</literal> must be specified as
+     <literal>DROP_RESTRICT</literal> or <literal>DROP_CASCADE</literal>.
+     If specified as <literal>DROP_RESTRICT</literal>, the
+     <literal>RESTRICT</literal> option will be included in the
       <command>TRUNCATE</command> command.
+     If specified as <literal>DROP_CASCADE</literal>, the
+     <literal>CASCADE</literal> option will be included.

Maybe "will be included" is confusing? Because FDW might not include
the RESTRICT in the TRUNCATE command that it will issue
when DROP_RESTRICT is specified, for example. To be more precise,
we should document something like "If specified as DROP_RESTRICT,
the RESTRICT option was included in the original TRUNCATE command"?

I'm also proposing to convert an if/else to an switch(), since I don't like
"if/else if" without an "else", and since the compiler can warn about missing
enum values in switch cases.

I think we have a good bunch of if, else-if (without else) in the code
base, and I'm not sure the compilers have warned about them. Apart
from that whether if-else or switch-case is just a coding choice. And
we have only two values for DropBehavior enum i.e DROP_RESTRICT and
DROP_CASCADE(I'm not sure we will extend this enum to have more
values), if we have more then switch case would have looked cleaner.
But IMO, existing if and else-if would be good. Having said that, it's
up to the committer which one they think better in this case.

Either works at least for me. Also for now I have no strong opinion
to change the condition so that it uses switch().

You could also write:
| Assert(behavior == DROP_RESTRICT || behavior == DROP_CASCADE)

IMO, we don't need to assert on behaviour as we just carry that
variable from ExecuteTruncateGuts to postgresExecForeignTruncate
without any modifications. And ExecuteTruncateGuts would get it from
the syntaxer, so no point it will have a different value than
DROP_RESTRICT or DROP_CASCADE.

Also, you currently test:

+ if (extra & TRUNCATE_REL_CONTEXT_ONLY)

but TRUNCATE_REL_ aren't indepedent bits, so shouldn't be tested with "&".

You're right.

Yeah this is an issue. We could just change the #defines to values
0x0001, 0x0002, 0x0004, 0x0008 ... 0x0020 and so on and then testing
with & would work. So, this way, more than option can be multiplexed
into the same int value. To multiplex, we need to think: will there be
a scenario where a single rel in the truncate can have multiple
options at a time and do we want to distinguish these options while
deparsing?

#define TRUNCATE_REL_CONTEXT_NORMAL 0x0001 /* specified without
ONLY clause */
#define TRUNCATE_REL_CONTEXT_ONLY 0x0002 /* specified with
ONLY clause */
#define TRUNCATE_REL_CONTEXT_CASCADING 0x0004 /* not specified
but truncated

And I'm not sure what's the agreement on retaining or removing #define
values, currently I see only TRUNCATE_REL_CONTEXT_ONLY is being used,
others are just being set but not used. As I said upthread, it will be
good to remove the unused macros/enums, retain only the ones that are
used, especially TRUNCATE_REL_CONTEXT_CASCADING this is not required I
feel, because we can add the child partitions that are foreign tables
to relids as just normal foreign tables with TRUNCATE_REL_CONTEXT_ONLY
option.

Our current consensus seems that TRUNCATE_REL_CONTEXT_NORMAL and
TRUNCATE_REL_CONTEXT_CASCADING should be removed because they are not used.
Since Kaigai-san thinks to remove the extra information at all,
I guess he also agrees to remove those both TRUNCATE_REL_CONTEXT_NORMAL
and _CASCADING. If this is right, we should apply 0003 patch and remove
those two macro values. Or we should make the extra info boolean value
instead of int, i.e., it indicates whether ONLY was specified or not.

Regards,

--
Fujii Masao
Advanced Computing Technology Center
Research and Development Headquarters
NTT DATA CORPORATION

#78Fujii Masao
masao.fujii@oss.nttdata.com
In reply to: Bharath Rupireddy (#72)
Re: TRUNCATE on foreign table

On 2021/04/09 23:10, Bharath Rupireddy wrote:

On Fri, Apr 9, 2021 at 7:06 PM Fujii Masao <masao.fujii@oss.nttdata.com> wrote:

4. Tab-completion for TRUNCATE should be updated so that also foreign tables are displayed.

It will be good to have.

Patch attached.

Tab completion patch LGTM and it works as expected.

Thanks for the review! Pushed.

Regards,

--
Fujii Masao
Advanced Computing Technology Center
Research and Development Headquarters
NTT DATA CORPORATION

#79Justin Pryzby
pryzby@telsasoft.com
In reply to: Bharath Rupireddy (#76)
2 attachment(s)
Re: TRUNCATE on foreign table

On Sun, Apr 11, 2021 at 03:45:36PM +0530, Bharath Rupireddy wrote:

On Sun, Apr 11, 2021 at 9:47 AM Justin Pryzby <pryzby@telsasoft.com> wrote:

Also, you currently test:

+ if (extra & TRUNCATE_REL_CONTEXT_ONLY)

but TRUNCATE_REL_ aren't indepedent bits, so shouldn't be tested with "&".

Yeah this is an issue. We could just change the #defines to values
0x0001, 0x0002, 0x0004, 0x0008 ... 0x0020 and so on and then testing
with & would work. So, this way, more than option can be multiplexed
into the same int value. To multiplex, we need to think: will there be
a scenario where a single rel in the truncate can have multiple
options at a time and do we want to distinguish these options while
deparsing?

#define TRUNCATE_REL_CONTEXT_NORMAL 0x0001 /* specified without
ONLY clause */
#define TRUNCATE_REL_CONTEXT_ONLY 0x0002 /* specified with
ONLY clause */
#define TRUNCATE_REL_CONTEXT_CASCADING 0x0004 /* not specified
but truncated

And I'm not sure what's the agreement on retaining or removing #define
values, currently I see only TRUNCATE_REL_CONTEXT_ONLY is being used,
others are just being set but not used. As I said upthread, it will be
good to remove the unused macros/enums, retain only the ones that are
used, especially TRUNCATE_REL_CONTEXT_CASCADING this is not required I
feel, because we can add the child partitions that are foreign tables
to relids as just normal foreign tables with TRUNCATE_REL_CONTEXT_ONLY
option.

Converting to "bits" would collapse TRUNCATE_REL_CONTEXT_ONLY and
TRUNCATE_REL_CONTEXT_NORMAL into a single bit. TRUNCATE_REL_CONTEXT_CASCADING
could optionally be removed.

+1 to convert to bits instead of changing "&" to "==".

--
Justin

Attachments:

0001-Convert-TRUNCATE_REL_CONTEXT_-to-bits.patchtext/x-diff; charset=us-asciiDownload
From 26e1015de344352c4cd73deda28ad59594914067 Mon Sep 17 00:00:00 2001
From: Justin Pryzby <pryzbyj@telsasoft.com>
Date: Mon, 12 Apr 2021 19:07:34 -0500
Subject: [PATCH 1/2] Convert TRUNCATE_REL_CONTEXT_* to bits

The values were defined as consecutive integers, but TRUNCATE_REL_CONTEXT_ONLY
was tested with bitwise test.  If specified as bits, NORMAL vs ONLY is
redundant (there's only one bit).  And CASCADING was unused.

See also: 8ff1c94649f5c9184ac5f07981d8aea9dfd7ac19
---
 src/backend/commands/tablecmds.c         | 9 +++------
 src/backend/replication/logical/worker.c | 4 ++--
 src/include/commands/tablecmds.h         | 6 +-----
 3 files changed, 6 insertions(+), 13 deletions(-)

diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c
index 096a6f2891..e8215d8dc8 100644
--- a/src/backend/commands/tablecmds.c
+++ b/src/backend/commands/tablecmds.c
@@ -1654,8 +1654,7 @@ ExecuteTruncate(TruncateStmt *stmt)
 
 		rels = lappend(rels, rel);
 		relids = lappend_oid(relids, myrelid);
-		relids_extra = lappend_int(relids_extra, (recurse ?
-												  TRUNCATE_REL_CONTEXT_NORMAL :
+		relids_extra = lappend_int(relids_extra, (recurse ? 0 :
 												  TRUNCATE_REL_CONTEXT_ONLY));
 		/* Log this relation only if needed for logical decoding */
 		if (RelationIsLogicallyLogged(rel))
@@ -1704,8 +1703,7 @@ ExecuteTruncate(TruncateStmt *stmt)
 
 				rels = lappend(rels, rel);
 				relids = lappend_oid(relids, childrelid);
-				relids_extra = lappend_int(relids_extra,
-										   TRUNCATE_REL_CONTEXT_CASCADING);
+				relids_extra = lappend_int(relids_extra, 0);
 				/* Log this relation only if needed for logical decoding */
 				if (RelationIsLogicallyLogged(rel))
 					relids_logged = lappend_oid(relids_logged, childrelid);
@@ -1799,8 +1797,7 @@ ExecuteTruncateGuts(List *explicit_rels,
 				truncate_check_activity(rel);
 				rels = lappend(rels, rel);
 				relids = lappend_oid(relids, relid);
-				relids_extra = lappend_int(relids_extra,
-										   TRUNCATE_REL_CONTEXT_CASCADING);
+				relids_extra = lappend_int(relids_extra, 0);
 				/* Log this relation only if needed for logical decoding */
 				if (RelationIsLogicallyLogged(rel))
 					relids_logged = lappend_oid(relids_logged, relid);
diff --git a/src/backend/replication/logical/worker.c b/src/backend/replication/logical/worker.c
index fb3ba5c415..986b634525 100644
--- a/src/backend/replication/logical/worker.c
+++ b/src/backend/replication/logical/worker.c
@@ -1825,7 +1825,7 @@ apply_handle_truncate(StringInfo s)
 		remote_rels = lappend(remote_rels, rel);
 		rels = lappend(rels, rel->localrel);
 		relids = lappend_oid(relids, rel->localreloid);
-		relids_extra = lappend_int(relids_extra, TRUNCATE_REL_CONTEXT_NORMAL);
+		relids_extra = lappend_int(relids_extra, 0);
 		if (RelationIsLogicallyLogged(rel->localrel))
 			relids_logged = lappend_oid(relids_logged, rel->localreloid);
 
@@ -1864,7 +1864,7 @@ apply_handle_truncate(StringInfo s)
 				rels = lappend(rels, childrel);
 				part_rels = lappend(part_rels, childrel);
 				relids = lappend_oid(relids, childrelid);
-				relids_extra = lappend_int(relids_extra, TRUNCATE_REL_CONTEXT_CASCADING);
+				relids_extra = lappend_int(relids_extra, 0);
 				/* Log this relation only if needed for logical decoding */
 				if (RelationIsLogicallyLogged(childrel))
 					relids_logged = lappend_oid(relids_logged, childrelid);
diff --git a/src/include/commands/tablecmds.h b/src/include/commands/tablecmds.h
index b808a07e46..9aa9f3c6c7 100644
--- a/src/include/commands/tablecmds.h
+++ b/src/include/commands/tablecmds.h
@@ -25,11 +25,7 @@
  * These values indicate how a relation was specified as the target to
  * truncate in TRUNCATE command.
  */
-#define TRUNCATE_REL_CONTEXT_NORMAL       1 /* specified without ONLY clause */
-#define TRUNCATE_REL_CONTEXT_ONLY         2 /* specified with ONLY clause */
-#define TRUNCATE_REL_CONTEXT_CASCADING     3	/* not specified but truncated
-												 * due to dependency (e.g.,
-												 * partition table) */
+#define TRUNCATE_REL_CONTEXT_ONLY         0x01 /* specified with ONLY clause */
 
 struct AlterTableUtilityContext;	/* avoid including tcop/utility.h here */
 
-- 
2.17.0

0002-WIP-doc-review-Allow-TRUNCATE-command-to-truncate-fo.patchtext/x-diff; charset=us-asciiDownload
From 8f332a4c9a8690ade17421528b1d375ddd992cce Mon Sep 17 00:00:00 2001
From: Justin Pryzby <pryzbyj@telsasoft.com>
Date: Thu, 8 Apr 2021 10:10:58 -0500
Subject: [PATCH 2/2] WIP doc review: Allow TRUNCATE command to truncate
 foreign tables.

8ff1c94649f5c9184ac5f07981d8aea9dfd7ac19
---
 doc/src/sgml/fdwhandler.sgml   | 55 ++++++++++++++--------------------
 doc/src/sgml/postgres-fdw.sgml |  2 +-
 doc/src/sgml/ref/truncate.sgml |  2 +-
 3 files changed, 25 insertions(+), 34 deletions(-)

diff --git a/doc/src/sgml/fdwhandler.sgml b/doc/src/sgml/fdwhandler.sgml
index 98882ddab8..69feefe66b 100644
--- a/doc/src/sgml/fdwhandler.sgml
+++ b/doc/src/sgml/fdwhandler.sgml
@@ -1075,46 +1075,37 @@ ExecForeignTruncate(List *rels, List *rels_extra,
                     DropBehavior behavior, bool restart_seqs);
 </programlisting>
 
-     Truncate a set of foreign tables specified in <literal>rels</literal>.
+     Truncate foreign tables.
      This function is called when <xref linkend="sql-truncate"/> is executed
-     on foreign tables.  <literal>rels</literal> is the list of
-     <structname>Relation</structname> data structure that indicates
-     a foreign table to truncate.  <literal>rels_extra</literal> the list of
-     <literal>int</literal> value, which delivers extra information about
-     a foreign table to truncate.  Possible values are
-     <literal>TRUNCATE_REL_CONTEXT_NORMAL</literal>, which means that
-     the foreign table is specified WITHOUT <literal>ONLY</literal> clause
-     in <command>TRUNCATE</command>,
-     <literal>TRUNCATE_REL_CONTEXT_ONLY</literal>, which means that
-     the foreign table is specified WITH <literal>ONLY</literal> clause,
-     and <literal>TRUNCATE_REL_CONTEXT_CASCADING</literal>,
-     which means that the foreign table is not specified explicitly,
-     but will be truncated due to dependency (for example, partition table).
-     There is one-to-one mapping between <literal>rels</literal> and
-     <literal>rels_extra</literal>.  The number of entries is the same
-     between the two lists.
-    </para>
-
-    <para>
-     <literal>behavior</literal> defines how foreign tables should
-     be truncated, using as possible values <literal>DROP_RESTRICT</literal>,
-     which means that <literal>RESTRICT</literal> option is specified,
-     and <literal>DROP_CASCADE</literal>, which means that
-     <literal>CASCADE</literal> option is specified, in
+     on a foreign table.  <literal>rels</literal> is a list of
+     <structname>Relation</structname> data structures for each
+     foreign tables to be truncated.
+     <literal>rels_extra</literal> is a list of <literal>int</literal> values
+     of the same length as <literal>rels</literal>.  Each element of
+     <literal>rels_extra</literal> is a bitmask of flags and provides extra
+     information about the corresponding foreign table.  Currently, the only
+     defined flag is <literal>TRUNCATE_REL_CONTEXT_ONLY</literal>, which means
+     that the foreign table was specified with the <literal>ONLY</literal>
+     clause in the original <command>TRUNCATE</command> command.
+    </para>
+
+    <para>
+     <literal>behavior</literal> is either
+     <literal>DROP_RESTRICT</literal> or <literal>DROP_CASCADE</literal>.
+     <literal>DROP_CASCADE</literal> indicates that the
+     <literal>CASCADE</literal> option was specified in the original
      <command>TRUNCATE</command> command.
     </para>
     
     <para>
-     <literal>restart_seqs</literal> is set to <literal>true</literal>
-     if <literal>RESTART IDENTITY</literal> option is specified in
-     <command>TRUNCATE</command> command.  It is <literal>false</literal>
-     if <literal>CONTINUE IDENTITY</literal> option is specified.
+     If <literal>restart_seqs</literal> is <literal>true</literal>,
+     the original <command>TRUNCATE</command> command included the
+     <literal>RESTART IDENTITY</literal> option.
     </para>
 
     <para>
-     <command>TRUNCATE</command> invokes
-     <function>ExecForeignTruncate</function> once per foreign server
-     that foreign tables to truncate belong to.  This means that all foreign
+     <function>ExecForeignTruncate</function> is invoked once per foreign server
+     for which foreign tables are to be truncated.  This means that all foreign
      tables included in <literal>rels</literal> must belong to the same
      server.
     </para>
diff --git a/doc/src/sgml/postgres-fdw.sgml b/doc/src/sgml/postgres-fdw.sgml
index 5320accf6f..116434f658 100644
--- a/doc/src/sgml/postgres-fdw.sgml
+++ b/doc/src/sgml/postgres-fdw.sgml
@@ -452,7 +452,7 @@ OPTIONS (ADD password_required 'false');
      <listitem>
       <para>
        This option controls whether <filename>postgres_fdw</filename> allows
-       foreign tables to be truncated using <command>TRUNCATE</command>
+       foreign tables to be truncated using the <command>TRUNCATE</command>
        command. It can be specified for a foreign table or a foreign server.
        A table-level option overrides a server-level option.
        The default is <literal>true</literal>.
diff --git a/doc/src/sgml/ref/truncate.sgml b/doc/src/sgml/ref/truncate.sgml
index acf3633be4..9d846f88c9 100644
--- a/doc/src/sgml/ref/truncate.sgml
+++ b/doc/src/sgml/ref/truncate.sgml
@@ -173,7 +173,7 @@ TRUNCATE [ TABLE ] [ ONLY ] <replaceable class="parameter">name</replaceable> [
 
   <para>
    <command>TRUNCATE</command> can be used for foreign tables if
-   the foreign data wrapper supports, for instance,
+   supported by the foreign data wrapper, for instance,
    see <xref linkend="postgres-fdw"/>.
   </para>
  </refsect1>
-- 
2.17.0

#80Bharath Rupireddy
bharath.rupireddyforpostgres@gmail.com
In reply to: Justin Pryzby (#79)
Re: TRUNCATE on foreign table

On Tue, Apr 13, 2021 at 6:27 AM Justin Pryzby <pryzby@telsasoft.com> wrote:

On Sun, Apr 11, 2021 at 03:45:36PM +0530, Bharath Rupireddy wrote:

On Sun, Apr 11, 2021 at 9:47 AM Justin Pryzby <pryzby@telsasoft.com> wrote:

Also, you currently test:

+ if (extra & TRUNCATE_REL_CONTEXT_ONLY)

but TRUNCATE_REL_ aren't indepedent bits, so shouldn't be tested with "&".

Yeah this is an issue. We could just change the #defines to values
0x0001, 0x0002, 0x0004, 0x0008 ... 0x0020 and so on and then testing
with & would work. So, this way, more than option can be multiplexed
into the same int value. To multiplex, we need to think: will there be
a scenario where a single rel in the truncate can have multiple
options at a time and do we want to distinguish these options while
deparsing?

#define TRUNCATE_REL_CONTEXT_NORMAL 0x0001 /* specified without
ONLY clause */
#define TRUNCATE_REL_CONTEXT_ONLY 0x0002 /* specified with
ONLY clause */
#define TRUNCATE_REL_CONTEXT_CASCADING 0x0004 /* not specified
but truncated

And I'm not sure what's the agreement on retaining or removing #define
values, currently I see only TRUNCATE_REL_CONTEXT_ONLY is being used,
others are just being set but not used. As I said upthread, it will be
good to remove the unused macros/enums, retain only the ones that are
used, especially TRUNCATE_REL_CONTEXT_CASCADING this is not required I
feel, because we can add the child partitions that are foreign tables
to relids as just normal foreign tables with TRUNCATE_REL_CONTEXT_ONLY
option.

Converting to "bits" would collapse TRUNCATE_REL_CONTEXT_ONLY and
TRUNCATE_REL_CONTEXT_NORMAL into a single bit. TRUNCATE_REL_CONTEXT_CASCADING
could optionally be removed.

+1 to convert to bits instead of changing "&" to "==".

Thanks for the patches.

I agree to convert to bits and pass it as int value which is
extensible i.e. we can pass more extra parameters across if required.
Also I'm not in favour of removing relids_extra altogether, we might
need this to send some info in future.

Both 0001 and 0002(along with the new phrasings) look good to me.

With Regards,
Bharath Rupireddy.
EnterpriseDB: http://www.enterprisedb.com

#81Fujii Masao
masao.fujii@oss.nttdata.com
In reply to: Bharath Rupireddy (#80)
Re: TRUNCATE on foreign table

On 2021/04/13 10:21, Bharath Rupireddy wrote:

I agree to convert to bits and pass it as int value which is
extensible i.e. we can pass more extra parameters across if required.

Looks good to me.

Also I'm not in favour of removing relids_extra altogether, we might
need this to send some info in future.

Both 0001 and 0002(along with the new phrasings) look good to me.

Thanks for updating the patches!

One question is; "CONTEXT" of "TRUNCATE_REL_CONTEXT_ONLY" is required?
If not, I'm tempted to shorten the name to "TRUNCATE_REL_ONLY" or something.

+     <structname>Relation</structname> data structures for each
+     foreign tables to be truncated.

"foreign tables" should be "foreign table" because it follows "each"?

+    <para>
+     <literal>behavior</literal> is either
+     <literal>DROP_RESTRICT</literal> or <literal>DROP_CASCADE</literal>.
+     <literal>DROP_CASCADE</literal> indicates that the
+     <literal>CASCADE</literal> option was specified in the original
       <command>TRUNCATE</command> command.

Why did you remove the description for DROP_RESTRICT?

Regards,

--
Fujii Masao
Advanced Computing Technology Center
Research and Development Headquarters
NTT DATA CORPORATION

#82Justin Pryzby
pryzby@telsasoft.com
In reply to: Fujii Masao (#81)
Re: TRUNCATE on foreign table

On Tue, Apr 13, 2021 at 12:38:35PM +0900, Fujii Masao wrote:

+     <structname>Relation</structname> data structures for each
+     foreign tables to be truncated.

"foreign tables" should be "foreign table" because it follows "each"?

Yes, you're right.

+    <para>
+     <literal>behavior</literal> is either
+     <literal>DROP_RESTRICT</literal> or <literal>DROP_CASCADE</literal>.
+     <literal>DROP_CASCADE</literal> indicates that the
+     <literal>CASCADE</literal> option was specified in the original
<command>TRUNCATE</command> command.

Why did you remove the description for DROP_RESTRICT?

Because in order to handle the default/unspecified case, the description was
going to need to be something like:

| DROP_RESTRICT indicates that the RESTRICT option was specified in the original
| truncate command (or CASCADE option was NOT specified).

--
Justin

#83Kohei KaiGai
kaigai@heterodb.com
In reply to: Kohei KaiGai (#74)
Re: TRUNCATE on foreign table

2021年4月9日(金) 23:49 Kohei KaiGai <kaigai@heterodb.com>:

2021年4月9日(金) 22:51 Fujii Masao <masao.fujii@oss.nttdata.com>:

On 2021/04/09 12:33, Kohei KaiGai wrote:

2021年4月8日(木) 22:14 Fujii Masao <masao.fujii@oss.nttdata.com>:

On 2021/04/08 22:02, Kohei KaiGai wrote:

Anyway, attached is the updated version of the patch. This is still based on the latest Kazutaka-san's patch. That is, extra list for ONLY is still passed to FDW. What about committing this version at first? Then we can continue the discussion and change the behavior later if necessary.

Pushed! Thank all involved in this development!!
For record, I attached the final patch I committed.

Ok, it's fair enought for me.

I'll try to sort out my thought, then raise a follow-up discussion if necessary.

Thanks!

The followings are the open items and discussion points that I'm thinking of.

1. Currently the extra information (TRUNCATE_REL_CONTEXT_NORMAL, TRUNCATE_REL_CONTEXT_ONLY or TRUNCATE_REL_CONTEXT_CASCADING) about how a foreign table was specified as the target to truncate in TRUNCATE command is collected and passed to FDW. Does this really need to be passed to FDW? Seems Stephen, Michael and I think that's necessary. But Kaigai-san does not. I also think that TRUNCATE_REL_CONTEXT_CASCADING can be removed because there seems no use case for that maybe.

2. Currently when the same foreign table is specified multiple times in the command, the extra information only for the foreign table found first is collected. For example, when "TRUNCATE ft, ONLY ft" is executed, TRUNCATE_REL_CONTEXT_NORMAL is collected and _ONLY is ignored because "ft" is found first. Is this OK? Or we should collect all, e.g., both _NORMAL and _ONLY should be collected in that example? I think that the current approach (i.e., collect the extra info about table found first if the same table is specified multiple times) is good because even local tables are also treated the same way. But Kaigai-san does not.

3. Currently postgres_fdw specifies ONLY clause in TRUNCATE command that it constructs. That is, if the foreign table is specified with ONLY, postgres_fdw also issues the TRUNCATE command for the corresponding remote table with ONLY to the remote server. Then only root table is truncated in remote server side, and the tables inheriting that are not truncated. Is this behavior desirable? Seems Michael and I think this behavior is OK. But Kaigai-san does not.

Prior to the discussion of 1-3, I like to clarify the role of foreign-tables.
(Likely, it will lead a natural conclusion for the above open items.)

As literal of SQL/MED (Management of External Data), a foreign table
is a representation of external data in PostgreSQL.
It allows to read and (optionally) write the external data wrapped by
FDW drivers, as if we usually read / write heap tables.
By the FDW-APIs, the core PostgreSQL does not care about the
structure, location, volume and other characteristics of
the external data itself. It expects FDW-APIs invocation will perform
as if we access a regular heap table.

On the other hands, we can say local tables are representation of
"internal" data in PostgreSQL.
A heap table is consists of one or more files (per BLCKSZ *
RELSEG_SIZE), and table-am intermediates
the on-disk data to/from on-memory structure (TupleTableSlot).
Here are no big differences in the concept. Ok?

As you know, ONLY clause controls whether TRUNCATE command shall run
on child-tables also, not only the parent.
If "ONLY parent_table" is given, its child tables are not picked up by
ExecuteTruncate(), unless child tables are not
listed up individually.
Then, once ExecuteTruncate() picked up the relations, it makes the
relations empty using table-am
(relation_set_new_filenode), and the callee
(heapam_relation_set_new_filenode) does not care about whether the
table is specified with ONLY, or not. It just makes the data
represented by the table empty (in transactional way).

So, how foreign tables shall perform?

Once ExecuteTruncate() picked up a foreign table, according to
ONLY-clause, does FDW driver shall consider
the context where the foreign tables are specified? And, what behavior
is consistent?
I think that FDW driver shall make the external data represented by
the foreign table empty, regardless of the
structure, location, volume and others.

Therefore, if we follow the above assumption, we don't need to inform
the context where foreign-tables are
picked up (TRUNCATE_REL_CONTEXT_*), so postgres_fdw shall not control
the remote TRUNCATE query
according to the flags. It always truncate the entire tables (if
multiple) on behalf of the foreign tables.

This makes me wonder if the information about CASCADE/RESTRICT (maybe also RESTART/CONTINUE) also should not be passed to FDW. You're thinking that? Or only ONLY clause should be ignored for a foreign table?

I think the above information (DropBehavior and restart_seqs) are
valuable to pass.

The CASCADE/RESTRICT clause controls whether the truncate command also
eliminates
the rows that blocks to delete (FKs in RDBMS). Only FDW driver can
know whether the
external data has "removal-blocker", thus we need to pass the
DropBehavior for the callback.

The RESTART/CONTINUE clause also controle whether the truncate command restart
the relevant resources that is associated with the target table
(Sequences in RDBMS).
Only FDW driver can know whether the external data has relevant
resources to reset,
thus we need to pass the "restart_seqs" for the callback.

Unlike above two parameters, the role of ONLY-clause is already
finished at the time
when ExecuteTruncate() picked up the target relations, from the
standpoint of above
understanding of foreign-tables and external data.

Thought?

Let me remind the discussion at the design level.

If postgres_fdw (and other FDW drivers) needs to consider whether
ONLY-clause is given
on the foreign tables of them, what does a foreign table represent in
PostgreSQL system?

My assumption is, a foreign table provides a view to external data, as
if it performs like a table.
TRUNCATE command eliminates all the segment files, even if a table
contains multiple
underlying files, never eliminate them partially.
If a foreign table is equivalent to a table in SQL operation level,
indeed, ONLY-clause controls
which tables are picked up by the TRUNCATE command, but never controls
which portion of
the data shall be eliminated. So, I conclude that
ExecForeignTruncate() shall eliminate the entire
external data on behalf of a foreign table, regardless of ONLY-clause.

I think it is more significant to clarify prior to the implementation details.
How about your opinions?

Best regards,
--
HeteroDB, Inc / The PG-Strom Project
KaiGai Kohei <kaigai@heterodb.com>

#84Fujii Masao
masao.fujii@oss.nttdata.com
In reply to: Justin Pryzby (#82)
Re: TRUNCATE on foreign table

On 2021/04/13 12:46, Justin Pryzby wrote:

On Tue, Apr 13, 2021 at 12:38:35PM +0900, Fujii Masao wrote:

+     <structname>Relation</structname> data structures for each
+     foreign tables to be truncated.

"foreign tables" should be "foreign table" because it follows "each"?

Yes, you're right.

+    <para>
+     <literal>behavior</literal> is either
+     <literal>DROP_RESTRICT</literal> or <literal>DROP_CASCADE</literal>.
+     <literal>DROP_CASCADE</literal> indicates that the
+     <literal>CASCADE</literal> option was specified in the original
<command>TRUNCATE</command> command.

Why did you remove the description for DROP_RESTRICT?

Because in order to handle the default/unspecified case, the description was
going to need to be something like:

| DROP_RESTRICT indicates that the RESTRICT option was specified in the original
| truncate command (or CASCADE option was NOT specified).

What about using "requested" instead of "specified"? If neither RESTRICT nor
CASCADE is specified, we can think that user requested the default behavior,
i.e., RESTRICT. So, for example,

<literal>behavior</literal> is either <literal>DROP_RESTRICT</literal> or
<literal>DROP_CASCADE</literal>, which indicates that the
<literal>RESTRICT</literal> or <literal>CASCADE</literal> option was
requested in the original <command>TRUNCATE</command> command, respectively.

Regards,

--
Fujii Masao
Advanced Computing Technology Center
Research and Development Headquarters
NTT DATA CORPORATION

#85Fujii Masao
masao.fujii@oss.nttdata.com
In reply to: Kohei KaiGai (#83)
Re: TRUNCATE on foreign table

On 2021/04/13 14:22, Kohei KaiGai wrote:

Let me remind the discussion at the design level.

If postgres_fdw (and other FDW drivers) needs to consider whether
ONLY-clause is given
on the foreign tables of them, what does a foreign table represent in
PostgreSQL system?

My assumption is, a foreign table provides a view to external data, as
if it performs like a table.
TRUNCATE command eliminates all the segment files, even if a table
contains multiple
underlying files, never eliminate them partially.
If a foreign table is equivalent to a table in SQL operation level,
indeed, ONLY-clause controls
which tables are picked up by the TRUNCATE command, but never controls
which portion of
the data shall be eliminated. So, I conclude that
ExecForeignTruncate() shall eliminate the entire
external data on behalf of a foreign table, regardless of ONLY-clause.

I think it is more significant to clarify prior to the implementation details.
How about your opinions?

I'm still thinking that it's better to pass all information including
ONLY clause about TRUNCATE command to FDW and leave FDW to determine
how to use them. How postgres_fdw should use the information about ONLY
is debetable. But for now IMO that users who explicitly specify ONLY clause for
foreign tables understand the structure of remote tables and want to use ONLY
in TRUNCATE command issued by postgres_fdw. But my opinion might be minority,
so I'd like to hear more opinion about this, from other developers.

Regards,

--
Fujii Masao
Advanced Computing Technology Center
Research and Development Headquarters
NTT DATA CORPORATION

#86Kyotaro Horiguchi
horikyota.ntt@gmail.com
In reply to: Fujii Masao (#85)
Re: TRUNCATE on foreign table

At Tue, 13 Apr 2021 16:17:12 +0900, Fujii Masao <masao.fujii@oss.nttdata.com> wrote in

On 2021/04/13 14:22, Kohei KaiGai wrote:

Let me remind the discussion at the design level.
If postgres_fdw (and other FDW drivers) needs to consider whether
ONLY-clause is given
on the foreign tables of them, what does a foreign table represent in
PostgreSQL system?
My assumption is, a foreign table provides a view to external data, as
if it performs like a table.
TRUNCATE command eliminates all the segment files, even if a table
contains multiple
underlying files, never eliminate them partially.
If a foreign table is equivalent to a table in SQL operation level,
indeed, ONLY-clause controls
which tables are picked up by the TRUNCATE command, but never controls
which portion of
the data shall be eliminated. So, I conclude that
ExecForeignTruncate() shall eliminate the entire
external data on behalf of a foreign table, regardless of ONLY-clause.
I think it is more significant to clarify prior to the implementation
details.
How about your opinions?

I'm still thinking that it's better to pass all information including
ONLY clause about TRUNCATE command to FDW and leave FDW to determine
how to use them. How postgres_fdw should use the information about
ONLY
is debetable. But for now IMO that users who explicitly specify ONLY
clause for
foreign tables understand the structure of remote tables and want to
use ONLY
in TRUNCATE command issued by postgres_fdw. But my opinion might be
minority,
so I'd like to hear more opinion about this, from other developers.

From the syntactical point of view, my opinion on this is that the
"ONLY" in "TRUNCATE ONLY" is assumed to be consumed to tell to
disregard local children so it cannot be propagate further whichever
the target relation has children or not.

regards.

--
Kyotaro Horiguchi
NTT Open Source Software Center

#87Kohei KaiGai
kaigai@heterodb.com
In reply to: Fujii Masao (#85)
Re: TRUNCATE on foreign table

2021年4月13日(火) 16:17 Fujii Masao <masao.fujii@oss.nttdata.com>:

On 2021/04/13 14:22, Kohei KaiGai wrote:

Let me remind the discussion at the design level.

If postgres_fdw (and other FDW drivers) needs to consider whether
ONLY-clause is given
on the foreign tables of them, what does a foreign table represent in
PostgreSQL system?

My assumption is, a foreign table provides a view to external data, as
if it performs like a table.
TRUNCATE command eliminates all the segment files, even if a table
contains multiple
underlying files, never eliminate them partially.
If a foreign table is equivalent to a table in SQL operation level,
indeed, ONLY-clause controls
which tables are picked up by the TRUNCATE command, but never controls
which portion of
the data shall be eliminated. So, I conclude that
ExecForeignTruncate() shall eliminate the entire
external data on behalf of a foreign table, regardless of ONLY-clause.

I think it is more significant to clarify prior to the implementation details.
How about your opinions?

I'm still thinking that it's better to pass all information including
ONLY clause about TRUNCATE command to FDW and leave FDW to determine
how to use them. How postgres_fdw should use the information about ONLY
is debetable. But for now IMO that users who explicitly specify ONLY clause for
foreign tables understand the structure of remote tables and want to use ONLY
in TRUNCATE command issued by postgres_fdw. But my opinion might be minority,
so I'd like to hear more opinion about this, from other developers.

Here are two points to discuss.

Regarding to the FDW-APIs, yes, nobody can deny someone want to implement
their own FDW module that adds special handling when its foreign table
is specified
with ONLY-clause, even if we usually ignore.

On the other hand, when we consider a foreign table is an abstraction
of an external
data source, at least, the current postgres_fdw's behavior is not consistent.

When a foreign table by postgres_fdw that maps a remote parent table,
has a local
child table,

This command shows all the rows from both of local and remote.

postgres=# select * from f_table ;
id | v
----+-----------------------------
1 | remote table t_parent id=1
2 | remote table t_parent id=2
3 | remote table t_parent id=3
10 | remote table t_child1 id=10
11 | remote table t_child1 id=11
12 | remote table t_child1 id=12
20 | remote table t_child2 id=20
21 | remote table t_child2 id=21
22 | remote table t_child2 id=22
50 | it is l_child id=50
51 | it is l_child id=51
52 | it is l_child id=52
53 | it is l_child id=53
(13 rows)

If f_table is specified with "ONLY", it picks up only the parent table
(f_table),
however, ONLY-clause is not push down to the remote side.

postgres=# select * from only f_table ;
id | v
----+-----------------------------
1 | remote table t_parent id=1
2 | remote table t_parent id=2
3 | remote table t_parent id=3
10 | remote table t_child1 id=10
11 | remote table t_child1 id=11
12 | remote table t_child1 id=12
20 | remote table t_child2 id=20
21 | remote table t_child2 id=21
22 | remote table t_child2 id=22
(9 rows)

On the other hands, TRUNCATE ONLY f_table works as follows...

postgres=# truncate only f_table;
TRUNCATE TABLE
postgres=# select * from f_table ;
id | v
----+-----------------------------
10 | remote table t_child1 id=10
11 | remote table t_child1 id=11
12 | remote table t_child1 id=12
20 | remote table t_child2 id=20
21 | remote table t_child2 id=21
22 | remote table t_child2 id=22
50 | it is l_child id=50
51 | it is l_child id=51
52 | it is l_child id=52
53 | it is l_child id=53
(10 rows)

It eliminates the rows only from the remote parent table although it
is a part of the foreign table.

My expectation at the above command shows rows from the local child
table (id=50...53).

Best regards,
--
HeteroDB, Inc / The PG-Strom Project
KaiGai Kohei <kaigai@heterodb.com>

#88Bharath Rupireddy
bharath.rupireddyforpostgres@gmail.com
In reply to: Kohei KaiGai (#87)
Re: TRUNCATE on foreign table

On Tue, Apr 13, 2021 at 2:37 PM Kohei KaiGai <kaigai@heterodb.com> wrote:

Here are two points to discuss.

Regarding to the FDW-APIs, yes, nobody can deny someone want to implement
their own FDW module that adds special handling when its foreign table
is specified
with ONLY-clause, even if we usually ignore.

On the other hand, when we consider a foreign table is an abstraction
of an external
data source, at least, the current postgres_fdw's behavior is not consistent.

When a foreign table by postgres_fdw that maps a remote parent table,
has a local
child table,

This command shows all the rows from both of local and remote.

postgres=# select * from f_table ;
id | v
----+-----------------------------
1 | remote table t_parent id=1
2 | remote table t_parent id=2
3 | remote table t_parent id=3
10 | remote table t_child1 id=10
11 | remote table t_child1 id=11
12 | remote table t_child1 id=12
20 | remote table t_child2 id=20
21 | remote table t_child2 id=21
22 | remote table t_child2 id=22
50 | it is l_child id=50
51 | it is l_child id=51
52 | it is l_child id=52
53 | it is l_child id=53
(13 rows)

If f_table is specified with "ONLY", it picks up only the parent table
(f_table),
however, ONLY-clause is not push down to the remote side.

postgres=# select * from only f_table ;
id | v
----+-----------------------------
1 | remote table t_parent id=1
2 | remote table t_parent id=2
3 | remote table t_parent id=3
10 | remote table t_child1 id=10
11 | remote table t_child1 id=11
12 | remote table t_child1 id=12
20 | remote table t_child2 id=20
21 | remote table t_child2 id=21
22 | remote table t_child2 id=22
(9 rows)

On the other hands, TRUNCATE ONLY f_table works as follows...

postgres=# truncate only f_table;
TRUNCATE TABLE
postgres=# select * from f_table ;
id | v
----+-----------------------------
10 | remote table t_child1 id=10
11 | remote table t_child1 id=11
12 | remote table t_child1 id=12
20 | remote table t_child2 id=20
21 | remote table t_child2 id=21
22 | remote table t_child2 id=22
50 | it is l_child id=50
51 | it is l_child id=51
52 | it is l_child id=52
53 | it is l_child id=53
(10 rows)

It eliminates the rows only from the remote parent table although it
is a part of the foreign table.

My expectation at the above command shows rows from the local child
table (id=50...53).

Yeah, ONLY clause is not pushed to the remote server in case of SELECT
commands. This is also true for DELETE and UPDATE commands on foreign
tables. I'm not sure if it wasn't thought necessary or if there is an
issue to push it or I may be missing something here. I think we can
start a separate thread to see other hackers' opinions on this.

I'm not sure whether all the clauses that are possible for
SELECT/UPDATE/DELETE/INSERT with local tables are pushed to the remote
server by postgres_fdw.

Well, now foreign TRUNCATE pushes the ONLY clause to the remote server
which is inconsistent when compared to SELECT/UPDATE/DELETE commands.
If we were to keep it consistent across all foreign commands that
ONLY clause is not pushed to remote server, then we can restrict for
TRUNCATE too and even if "TRUNCATE ONLY foreign_tbl" is specified,
just pass "TRUNCATE foreign_tbl" to remote server. Having said that, I
don't see any real problem in pushing the ONLY clause, at least in
case of TRUNCATE.

With Regards,
Bharath Rupireddy.
EnterpriseDB: http://www.enterprisedb.com

#89Kohei KaiGai
kaigai@heterodb.com
In reply to: Bharath Rupireddy (#88)
Re: TRUNCATE on foreign table

2021年4月13日(火) 21:03 Bharath Rupireddy <bharath.rupireddyforpostgres@gmail.com>:

On Tue, Apr 13, 2021 at 2:37 PM Kohei KaiGai <kaigai@heterodb.com> wrote:

Here are two points to discuss.

Regarding to the FDW-APIs, yes, nobody can deny someone want to implement
their own FDW module that adds special handling when its foreign table
is specified
with ONLY-clause, even if we usually ignore.

On the other hand, when we consider a foreign table is an abstraction
of an external
data source, at least, the current postgres_fdw's behavior is not consistent.

When a foreign table by postgres_fdw that maps a remote parent table,
has a local
child table,

This command shows all the rows from both of local and remote.

postgres=# select * from f_table ;
id | v
----+-----------------------------
1 | remote table t_parent id=1
2 | remote table t_parent id=2
3 | remote table t_parent id=3
10 | remote table t_child1 id=10
11 | remote table t_child1 id=11
12 | remote table t_child1 id=12
20 | remote table t_child2 id=20
21 | remote table t_child2 id=21
22 | remote table t_child2 id=22
50 | it is l_child id=50
51 | it is l_child id=51
52 | it is l_child id=52
53 | it is l_child id=53
(13 rows)

If f_table is specified with "ONLY", it picks up only the parent table
(f_table),
however, ONLY-clause is not push down to the remote side.

postgres=# select * from only f_table ;
id | v
----+-----------------------------
1 | remote table t_parent id=1
2 | remote table t_parent id=2
3 | remote table t_parent id=3
10 | remote table t_child1 id=10
11 | remote table t_child1 id=11
12 | remote table t_child1 id=12
20 | remote table t_child2 id=20
21 | remote table t_child2 id=21
22 | remote table t_child2 id=22
(9 rows)

On the other hands, TRUNCATE ONLY f_table works as follows...

postgres=# truncate only f_table;
TRUNCATE TABLE
postgres=# select * from f_table ;
id | v
----+-----------------------------
10 | remote table t_child1 id=10
11 | remote table t_child1 id=11
12 | remote table t_child1 id=12
20 | remote table t_child2 id=20
21 | remote table t_child2 id=21
22 | remote table t_child2 id=22
50 | it is l_child id=50
51 | it is l_child id=51
52 | it is l_child id=52
53 | it is l_child id=53
(10 rows)

It eliminates the rows only from the remote parent table although it
is a part of the foreign table.

My expectation at the above command shows rows from the local child
table (id=50...53).

Yeah, ONLY clause is not pushed to the remote server in case of SELECT
commands. This is also true for DELETE and UPDATE commands on foreign
tables. I'm not sure if it wasn't thought necessary or if there is an
issue to push it or I may be missing something here. I think we can
start a separate thread to see other hackers' opinions on this.

I'm not sure whether all the clauses that are possible for
SELECT/UPDATE/DELETE/INSERT with local tables are pushed to the remote
server by postgres_fdw.

Well, now foreign TRUNCATE pushes the ONLY clause to the remote server
which is inconsistent when compared to SELECT/UPDATE/DELETE commands.
If we were to keep it consistent across all foreign commands that
ONLY clause is not pushed to remote server, then we can restrict for
TRUNCATE too and even if "TRUNCATE ONLY foreign_tbl" is specified,
just pass "TRUNCATE foreign_tbl" to remote server. Having said that, I
don't see any real problem in pushing the ONLY clause, at least in
case of TRUNCATE.

If ONLY-clause would be pushed down to the remote query of postgres_fdw,
what does the foreign-table represent in the local system?

In my understanding, a local foreign table by postgres_fdw is a
representation of
entire tree of the remote parent table and its children.
Thus, we have assumed that DML command fetches rows from the remote
parent table without ONLY-clause, once PostgreSQL picked up the foreign table
as a scan target.
I think we don't need to adjust definitions of the role of
foreign-table, even if
it represents non-RDBMS data sources.

If a foreign table by postgres_fdw supports a special table option to
indicate adding
ONLY-clause when remote query uses remote tables, it is suitable to
add ONLY-clause
on the remote TRUNCATE command also, not only SELECT/INSERT/UPDATE/DELETE.
In the other words, if a foreign-table represents only a remote parent
table, it is
suitable to truncate only the remote parent table.

Best regards,
--
HeteroDB, Inc / The PG-Strom Project
KaiGai Kohei <kaigai@heterodb.com>

#90Fujii Masao
masao.fujii@oss.nttdata.com
In reply to: Kohei KaiGai (#89)
Re: TRUNCATE on foreign table

On 2021/04/13 23:25, Kohei KaiGai wrote:

2021年4月13日(火) 21:03 Bharath Rupireddy <bharath.rupireddyforpostgres@gmail.com>:

Yeah, ONLY clause is not pushed to the remote server in case of SELECT
commands. This is also true for DELETE and UPDATE commands on foreign
tables.

This sounds reasonable reason why ONLY should be ignored in TRUNCATE on
foreign tables, for now. If there is the existing rule about how to treat
ONLY clause for foreign tables, basically TRUNCATE should follow that at this
stage. Maybe we can change the rule, but it's an item for v15 or later?

I'm not sure if it wasn't thought necessary or if there is an
issue to push it or I may be missing something here.

I could not find the past discussion about foreign tables and ONLY clause.
I guess that ONLY is ignored in SELECT on foreign tables case because ONLY
is interpreted outside the executor and it's not easy to change the executor
so that ONLY is passed to FDW. Maybe..

I think we can
start a separate thread to see other hackers' opinions on this.

I'm not sure whether all the clauses that are possible for
SELECT/UPDATE/DELETE/INSERT with local tables are pushed to the remote
server by postgres_fdw.

Well, now foreign TRUNCATE pushes the ONLY clause to the remote server
which is inconsistent when compared to SELECT/UPDATE/DELETE commands.
If we were to keep it consistent across all foreign commands that
ONLY clause is not pushed to remote server, then we can restrict for
TRUNCATE too and even if "TRUNCATE ONLY foreign_tbl" is specified,
just pass "TRUNCATE foreign_tbl" to remote server. Having said that, I
don't see any real problem in pushing the ONLY clause, at least in
case of TRUNCATE.

If ONLY-clause would be pushed down to the remote query of postgres_fdw,
what does the foreign-table represent in the local system?

In my understanding, a local foreign table by postgres_fdw is a
representation of
entire tree of the remote parent table and its children.

If so, I'm still wondering why CASCADE/RESTRICT (i.e., DropBehavior) needs to
be passed to FDW. IOW, if a foreign table is an abstraction of an external
data source, ISTM that postgres_fdw should always issue TRUNCATE with
CASCADE. Why do we need to allow RESTRICT to be specified for a foreign table
even though it's an abstraction of an external data source?

Regards,

--
Fujii Masao
Advanced Computing Technology Center
Research and Development Headquarters
NTT DATA CORPORATION

#91Bharath Rupireddy
bharath.rupireddyforpostgres@gmail.com
In reply to: Fujii Masao (#90)
Re: TRUNCATE on foreign table

On Tue, Apr 13, 2021 at 8:30 PM Fujii Masao <masao.fujii@oss.nttdata.com> wrote:

On 2021/04/13 23:25, Kohei KaiGai wrote:

2021年4月13日(火) 21:03 Bharath Rupireddy <bharath.rupireddyforpostgres@gmail.com>:

Yeah, ONLY clause is not pushed to the remote server in case of SELECT
commands. This is also true for DELETE and UPDATE commands on foreign
tables.

This sounds reasonable reason why ONLY should be ignored in TRUNCATE on
foreign tables, for now. If there is the existing rule about how to treat
ONLY clause for foreign tables, basically TRUNCATE should follow that at this
stage. Maybe we can change the rule, but it's an item for v15 or later?

I'm not sure if it wasn't thought necessary or if there is an
issue to push it or I may be missing something here.

I could not find the past discussion about foreign tables and ONLY clause.
I guess that ONLY is ignored in SELECT on foreign tables case because ONLY
is interpreted outside the executor and it's not easy to change the executor
so that ONLY is passed to FDW. Maybe..

I think we can
start a separate thread to see other hackers' opinions on this.

I'm not sure whether all the clauses that are possible for
SELECT/UPDATE/DELETE/INSERT with local tables are pushed to the remote
server by postgres_fdw.

Well, now foreign TRUNCATE pushes the ONLY clause to the remote server
which is inconsistent when compared to SELECT/UPDATE/DELETE commands.
If we were to keep it consistent across all foreign commands that
ONLY clause is not pushed to remote server, then we can restrict for
TRUNCATE too and even if "TRUNCATE ONLY foreign_tbl" is specified,
just pass "TRUNCATE foreign_tbl" to remote server. Having said that, I
don't see any real problem in pushing the ONLY clause, at least in
case of TRUNCATE.

If ONLY-clause would be pushed down to the remote query of postgres_fdw,
what does the foreign-table represent in the local system?

In my understanding, a local foreign table by postgres_fdw is a
representation of
entire tree of the remote parent table and its children.

If so, I'm still wondering why CASCADE/RESTRICT (i.e., DropBehavior) needs to
be passed to FDW. IOW, if a foreign table is an abstraction of an external
data source, ISTM that postgres_fdw should always issue TRUNCATE with
CASCADE. Why do we need to allow RESTRICT to be specified for a foreign table
even though it's an abstraction of an external data source?

IMHO, we can push all the TRUNCATE options (ONLY, RESTRICTED, CASCADE,
RESTART/CONTINUE IDENTITY), because it doesn't have any major
challenge(implementation wise) unlike pushing some clauses in
SELECT/UPDATE/DELETE and we already do this on the master. It doesn't
look good and may confuse users, if we push some options and restrict
others. We should have an explicit note in the documentation saying we
push all these options to the remote server. We can leave it to the
user to write TRUNCATE for foreign tables with the appropriate
options. If somebody complains about a problem that they will face
with this behavior, we can revisit. This is my opinion, others may
disagree.

With Regards,
Bharath Rupireddy.
EnterpriseDB: http://www.enterprisedb.com

#92Kohei KaiGai
kaigai@heterodb.com
In reply to: Fujii Masao (#90)
Re: TRUNCATE on foreign table

2021年4月14日(水) 0:00 Fujii Masao <masao.fujii@oss.nttdata.com>:

On 2021/04/13 23:25, Kohei KaiGai wrote:

2021年4月13日(火) 21:03 Bharath Rupireddy <bharath.rupireddyforpostgres@gmail.com>:

Yeah, ONLY clause is not pushed to the remote server in case of SELECT
commands. This is also true for DELETE and UPDATE commands on foreign
tables.

This sounds reasonable reason why ONLY should be ignored in TRUNCATE on
foreign tables, for now. If there is the existing rule about how to treat
ONLY clause for foreign tables, basically TRUNCATE should follow that at this
stage. Maybe we can change the rule, but it's an item for v15 or later?

I'm not sure if it wasn't thought necessary or if there is an
issue to push it or I may be missing something here.

I could not find the past discussion about foreign tables and ONLY clause.
I guess that ONLY is ignored in SELECT on foreign tables case because ONLY
is interpreted outside the executor and it's not easy to change the executor
so that ONLY is passed to FDW. Maybe..

I think we can
start a separate thread to see other hackers' opinions on this.

I'm not sure whether all the clauses that are possible for
SELECT/UPDATE/DELETE/INSERT with local tables are pushed to the remote
server by postgres_fdw.

Well, now foreign TRUNCATE pushes the ONLY clause to the remote server
which is inconsistent when compared to SELECT/UPDATE/DELETE commands.
If we were to keep it consistent across all foreign commands that
ONLY clause is not pushed to remote server, then we can restrict for
TRUNCATE too and even if "TRUNCATE ONLY foreign_tbl" is specified,
just pass "TRUNCATE foreign_tbl" to remote server. Having said that, I
don't see any real problem in pushing the ONLY clause, at least in
case of TRUNCATE.

If ONLY-clause would be pushed down to the remote query of postgres_fdw,
what does the foreign-table represent in the local system?

In my understanding, a local foreign table by postgres_fdw is a
representation of
entire tree of the remote parent table and its children.

If so, I'm still wondering why CASCADE/RESTRICT (i.e., DropBehavior) needs to
be passed to FDW. IOW, if a foreign table is an abstraction of an external
data source, ISTM that postgres_fdw should always issue TRUNCATE with
CASCADE. Why do we need to allow RESTRICT to be specified for a foreign table
even though it's an abstraction of an external data source?

Please assume the internal heap data is managed by PostgreSQL core, and
external data source is managed by postgres_fdw (or other FDW driver).
TRUNCATE command requires these object managers to eliminate the data
on behalf of the foreign tables picked up.

Even though the object manager tries to eliminate the managed data, it may be
restricted by some reason; FK restrictions in case of PostgreSQL internal data.
In this case, CASCADE/RESTRICT option suggests the object manager how
to handle the target data.

The ONLY clause controls whoes data shall be eliminated.
On the other hand, CASCADE/RESTRICT and CONTINUE/RESTART controls
how data shall be eliminated. It is a primitive difference.

Best regards,
--
HeteroDB, Inc / The PG-Strom Project
KaiGai Kohei <kaigai@heterodb.com>

#93Kyotaro Horiguchi
horikyota.ntt@gmail.com
In reply to: Kohei KaiGai (#92)
Re: TRUNCATE on foreign table

At Wed, 14 Apr 2021 13:17:55 +0900, Kohei KaiGai <kaigai@heterodb.com> wrote in

2021年4月14日(水) 0:00 Fujii Masao <masao.fujii@oss.nttdata.com>:

On 2021/04/13 23:25, Kohei KaiGai wrote:

2021年4月13日(火) 21:03 Bharath Rupireddy <bharath.rupireddyforpostgres@gmail.com>:

Yeah, ONLY clause is not pushed to the remote server in case of SELECT
commands. This is also true for DELETE and UPDATE commands on foreign
tables.

This sounds reasonable reason why ONLY should be ignored in TRUNCATE on
foreign tables, for now. If there is the existing rule about how to treat
ONLY clause for foreign tables, basically TRUNCATE should follow that at this
stage. Maybe we can change the rule, but it's an item for v15 or later?

I'm not sure if it wasn't thought necessary or if there is an
issue to push it or I may be missing something here.

I could not find the past discussion about foreign tables and ONLY clause.
I guess that ONLY is ignored in SELECT on foreign tables case because ONLY
is interpreted outside the executor and it's not easy to change the executor
so that ONLY is passed to FDW. Maybe..

I think we can
start a separate thread to see other hackers' opinions on this.

I'm not sure whether all the clauses that are possible for
SELECT/UPDATE/DELETE/INSERT with local tables are pushed to the remote
server by postgres_fdw.

Well, now foreign TRUNCATE pushes the ONLY clause to the remote server
which is inconsistent when compared to SELECT/UPDATE/DELETE commands.
If we were to keep it consistent across all foreign commands that
ONLY clause is not pushed to remote server, then we can restrict for
TRUNCATE too and even if "TRUNCATE ONLY foreign_tbl" is specified,
just pass "TRUNCATE foreign_tbl" to remote server. Having said that, I
don't see any real problem in pushing the ONLY clause, at least in
case of TRUNCATE.

If ONLY-clause would be pushed down to the remote query of postgres_fdw,
what does the foreign-table represent in the local system?

In my understanding, a local foreign table by postgres_fdw is a
representation of
entire tree of the remote parent table and its children.

If so, I'm still wondering why CASCADE/RESTRICT (i.e., DropBehavior) needs to
be passed to FDW. IOW, if a foreign table is an abstraction of an external
data source, ISTM that postgres_fdw should always issue TRUNCATE with
CASCADE. Why do we need to allow RESTRICT to be specified for a foreign table
even though it's an abstraction of an external data source?

Please assume the internal heap data is managed by PostgreSQL core, and
external data source is managed by postgres_fdw (or other FDW driver).
TRUNCATE command requires these object managers to eliminate the data
on behalf of the foreign tables picked up.

Even though the object manager tries to eliminate the managed data, it may be
restricted by some reason; FK restrictions in case of PostgreSQL internal data.
In this case, CASCADE/RESTRICT option suggests the object manager how
to handle the target data.

The ONLY clause controls whoes data shall be eliminated.
On the other hand, CASCADE/RESTRICT and CONTINUE/RESTART controls
how data shall be eliminated. It is a primitive difference.

I object to unconditionally push ONLY to remote. As Kaigai-san said
that it works an apparent wrong way when a user wants to truncate only
the specified foreign table in a inheritance tree and there's no way to
avoid the behavior.

I also don't think it is right to push down CASCADE/RESTRICT. The
options suggest to propagate truncation to *local* referrer tables
from the *foreign* table, not to the remote referrer tables from the
original table on remote. If a user want to allow that behavior it
should be specified by foreign table options. (It is bothersome when
someone wants to specify the behavior on-the-fly.)

alter foreign table ft1 options (add truncate_cascade 'true');

Also, CONTINUE/RESTART IDENTITY should not work since foreign tables
don't have an identity-sequence. However, this we might be able to
push down the options since it affects only the target table.

I would accept that behavior if TRUNCATE were "TRUNCATE FOREIGN
TABLE", which explicitly targets a foreign table. But I'm not sure it
is possible to add such syntax reasonable way.

regards.

--
Kyotaro Horiguchi
NTT Open Source Software Center

#94Fujii Masao
masao.fujii@oss.nttdata.com
In reply to: Bharath Rupireddy (#91)
Re: TRUNCATE on foreign table

On 2021/04/14 12:54, Bharath Rupireddy wrote:

IMHO, we can push all the TRUNCATE options (ONLY, RESTRICTED, CASCADE,
RESTART/CONTINUE IDENTITY), because it doesn't have any major
challenge(implementation wise) unlike pushing some clauses in
SELECT/UPDATE/DELETE and we already do this on the master. It doesn't
look good and may confuse users, if we push some options and restrict
others. We should have an explicit note in the documentation saying we
push all these options to the remote server. We can leave it to the
user to write TRUNCATE for foreign tables with the appropriate
options. If somebody complains about a problem that they will face
with this behavior, we can revisit.

That's one of the options. But I'm afraid it's hard to drop (revisit)
the feature once it has been released. So if there is no explicit
use case for that, basically I'd like to drop that before release
like we agree to drop unused TRUNCATE_REL_CONTEXT_CASCADING.

Regards,

--
Fujii Masao
Advanced Computing Technology Center
Research and Development Headquarters
NTT DATA CORPORATION

#95Fujii Masao
masao.fujii@oss.nttdata.com
In reply to: Kyotaro Horiguchi (#93)
Re: TRUNCATE on foreign table

On 2021/04/14 13:41, Kyotaro Horiguchi wrote:

At Wed, 14 Apr 2021 13:17:55 +0900, Kohei KaiGai <kaigai@heterodb.com> wrote in

Please assume the internal heap data is managed by PostgreSQL core, and
external data source is managed by postgres_fdw (or other FDW driver).
TRUNCATE command requires these object managers to eliminate the data
on behalf of the foreign tables picked up.

Even though the object manager tries to eliminate the managed data, it may be
restricted by some reason; FK restrictions in case of PostgreSQL internal data.
In this case, CASCADE/RESTRICT option suggests the object manager how
to handle the target data.

The ONLY clause controls whoes data shall be eliminated.
On the other hand, CASCADE/RESTRICT and CONTINUE/RESTART controls
how data shall be eliminated. It is a primitive difference.

I have a different view on this classification. IMO ONLY and RESTRICT/CASCADE
should be categorized into the same group. Because both options specify
whether to truncate dependent tables or not. If we treat a foreign table as
an abstraction of external data source, ISTM that we should not take care of
table dependancy in the remote server. IOW, we should truncate entire
external data source, i.e., postgres_fdw should push neither ONLY nor
RESTRICT down to the remote server.

I object to unconditionally push ONLY to remote. As Kaigai-san said
that it works an apparent wrong way when a user wants to truncate only
the specified foreign table in a inheritance tree and there's no way to
avoid the behavior.

I also don't think it is right to push down CASCADE/RESTRICT. The
options suggest to propagate truncation to *local* referrer tables
from the *foreign* table, not to the remote referrer tables from the
original table on remote.

Agreed.

If a user want to allow that behavior it
should be specified by foreign table options. (It is bothersome when
someone wants to specify the behavior on-the-fly.)

alter foreign table ft1 options (add truncate_cascade 'true');

Maybe. I think this is the item for v15 or later.

Also, CONTINUE/RESTART IDENTITY should not work since foreign tables
don't have an identity-sequence. However, this we might be able to
push down the options since it affects only the target table.

+1

Regards,

--
Fujii Masao
Advanced Computing Technology Center
Research and Development Headquarters
NTT DATA CORPORATION

#96Bharath Rupireddy
bharath.rupireddyforpostgres@gmail.com
In reply to: Fujii Masao (#94)
Re: TRUNCATE on foreign table

On Thu, Apr 15, 2021 at 8:19 PM Fujii Masao <masao.fujii@oss.nttdata.com> wrote:

On 2021/04/14 12:54, Bharath Rupireddy wrote:

IMHO, we can push all the TRUNCATE options (ONLY, RESTRICTED, CASCADE,
RESTART/CONTINUE IDENTITY), because it doesn't have any major
challenge(implementation wise) unlike pushing some clauses in
SELECT/UPDATE/DELETE and we already do this on the master. It doesn't
look good and may confuse users, if we push some options and restrict
others. We should have an explicit note in the documentation saying we
push all these options to the remote server. We can leave it to the
user to write TRUNCATE for foreign tables with the appropriate
options. If somebody complains about a problem that they will face
with this behavior, we can revisit.

That's one of the options. But I'm afraid it's hard to drop (revisit)
the feature once it has been released. So if there is no explicit
use case for that, basically I'd like to drop that before release
like we agree to drop unused TRUNCATE_REL_CONTEXT_CASCADING.

Thanks. Looks like the decision is going in the direction of
restricting those options, I will withdraw my point.

With Regards,
Bharath Rupireddy.
EnterpriseDB: http://www.enterprisedb.com

#97Fujii Masao
masao.fujii@oss.nttdata.com
In reply to: Bharath Rupireddy (#96)
1 attachment(s)
Re: TRUNCATE on foreign table

On 2021/04/16 9:15, Bharath Rupireddy wrote:

On Thu, Apr 15, 2021 at 8:19 PM Fujii Masao <masao.fujii@oss.nttdata.com> wrote:

On 2021/04/14 12:54, Bharath Rupireddy wrote:

IMHO, we can push all the TRUNCATE options (ONLY, RESTRICTED, CASCADE,
RESTART/CONTINUE IDENTITY), because it doesn't have any major
challenge(implementation wise) unlike pushing some clauses in
SELECT/UPDATE/DELETE and we already do this on the master. It doesn't
look good and may confuse users, if we push some options and restrict
others. We should have an explicit note in the documentation saying we
push all these options to the remote server. We can leave it to the
user to write TRUNCATE for foreign tables with the appropriate
options. If somebody complains about a problem that they will face
with this behavior, we can revisit.

That's one of the options. But I'm afraid it's hard to drop (revisit)
the feature once it has been released. So if there is no explicit
use case for that, basically I'd like to drop that before release
like we agree to drop unused TRUNCATE_REL_CONTEXT_CASCADING.

Thanks. Looks like the decision is going in the direction of
restricting those options, I will withdraw my point.

We are still discussing whether RESTRICT option should be pushed down to
a foreign data wrapper. But ISTM at least we could reach the consensus about
the drop of extra information for each foreign table. So what about applying
the attached patch and remove the extra information at first?

Regards,

--
Fujii Masao
Advanced Computing Technology Center
Research and Development Headquarters
NTT DATA CORPORATION

Attachments:

truncate_foreign_table_dont_pass_only_clause.patchtext/plain; charset=UTF-8; name=truncate_foreign_table_dont_pass_only_clause.patch; x-mac-creator=0; x-mac-type=0Download
diff --git a/contrib/postgres_fdw/deparse.c b/contrib/postgres_fdw/deparse.c
index bdc4c3620d..7a798530e3 100644
--- a/contrib/postgres_fdw/deparse.c
+++ b/contrib/postgres_fdw/deparse.c
@@ -2179,24 +2179,19 @@ deparseAnalyzeSql(StringInfo buf, Relation rel, List **retrieved_attrs)
 void
 deparseTruncateSql(StringInfo buf,
 				   List *rels,
-				   List *rels_extra,
 				   DropBehavior behavior,
 				   bool restart_seqs)
 {
-	ListCell   *lc1,
-			   *lc2;
+	ListCell   *cell;
 
 	appendStringInfoString(buf, "TRUNCATE ");
 
-	forboth(lc1, rels, lc2, rels_extra)
+	foreach(cell, rels)
 	{
-		Relation	rel = lfirst(lc1);
-		int			extra = lfirst_int(lc2);
+		Relation	rel = lfirst(cell);
 
-		if (lc1 != list_head(rels))
+		if (cell != list_head(rels))
 			appendStringInfoString(buf, ", ");
-		if (extra & TRUNCATE_REL_CONTEXT_ONLY)
-			appendStringInfoString(buf, "ONLY ");
 
 		deparseRelation(buf, rel);
 	}
diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out
index 5070d93239..1a3f5cb4ad 100644
--- a/contrib/postgres_fdw/expected/postgres_fdw.out
+++ b/contrib/postgres_fdw/expected/postgres_fdw.out
@@ -8218,13 +8218,13 @@ drop table loc3;
 -- test for TRUNCATE
 -- ===================================================================
 CREATE TABLE tru_rtable0 (id int primary key);
-CREATE TABLE tru_rtable1 (id int primary key);
 CREATE FOREIGN TABLE tru_ftable (id int)
        SERVER loopback OPTIONS (table_name 'tru_rtable0');
 INSERT INTO tru_rtable0 (SELECT x FROM generate_series(1,10) x);
 CREATE TABLE tru_ptable (id int) PARTITION BY HASH(id);
 CREATE TABLE tru_ptable__p0 PARTITION OF tru_ptable
                             FOR VALUES WITH (MODULUS 2, REMAINDER 0);
+CREATE TABLE tru_rtable1 (id int primary key);
 CREATE FOREIGN TABLE tru_ftable__p1 PARTITION OF tru_ptable
                                     FOR VALUES WITH (MODULUS 2, REMAINDER 1)
        SERVER loopback OPTIONS (table_name 'tru_rtable1');
@@ -8388,22 +8388,22 @@ SELECT sum(id) FROM tru_ftable;   -- 95
   95
 (1 row)
 
-TRUNCATE ONLY tru_ftable;		-- truncate only parent portion
-SELECT sum(id) FROM tru_ftable;   -- 60
- sum 
------
-  60
+TRUNCATE ONLY tru_ftable;		-- truncate both parent and child
+SELECT count(*) FROM tru_ftable;   -- 0
+ count 
+-------
+     0
 (1 row)
 
 INSERT INTO tru_rtable0 (SELECT x FROM generate_series(21,25) x);
-SELECT sum(id) FROM tru_ftable;		-- 175
+SELECT sum(id) FROM tru_ftable;		-- 115
  sum 
 -----
- 175
+ 115
 (1 row)
 
 TRUNCATE tru_ftable;			-- truncate both of parent and child
-SELECT count(*) FROM tru_ftable;    -- empty
+SELECT count(*) FROM tru_ftable;    -- 0
  count 
 -------
      0
diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c
index c590f374c6..c521cba3fc 100644
--- a/contrib/postgres_fdw/postgres_fdw.c
+++ b/contrib/postgres_fdw/postgres_fdw.c
@@ -401,7 +401,6 @@ static void postgresExplainForeignModify(ModifyTableState *mtstate,
 static void postgresExplainDirectModify(ForeignScanState *node,
 										ExplainState *es);
 static void postgresExecForeignTruncate(List *rels,
-										List *rels_extra,
 										DropBehavior behavior,
 										bool restart_seqs);
 static bool postgresAnalyzeForeignTable(Relation relation,
@@ -2881,7 +2880,6 @@ postgresExplainDirectModify(ForeignScanState *node, ExplainState *es)
  */
 static void
 postgresExecForeignTruncate(List *rels,
-							List *rels_extra,
 							DropBehavior behavior,
 							bool restart_seqs)
 {
@@ -2964,7 +2962,7 @@ postgresExecForeignTruncate(List *rels,
 
 	/* Construct the TRUNCATE command string */
 	initStringInfo(&sql);
-	deparseTruncateSql(&sql, rels, rels_extra, behavior, restart_seqs);
+	deparseTruncateSql(&sql, rels, behavior, restart_seqs);
 
 	/* Issue the TRUNCATE command to remote server */
 	do_sql_command(conn, sql.data);
diff --git a/contrib/postgres_fdw/postgres_fdw.h b/contrib/postgres_fdw/postgres_fdw.h
index 5d44b75314..9591c0f6c2 100644
--- a/contrib/postgres_fdw/postgres_fdw.h
+++ b/contrib/postgres_fdw/postgres_fdw.h
@@ -209,7 +209,6 @@ extern void deparseAnalyzeSql(StringInfo buf, Relation rel,
 							  List **retrieved_attrs);
 extern void deparseTruncateSql(StringInfo buf,
 							   List *rels,
-							   List *rels_extra,
 							   DropBehavior behavior,
 							   bool restart_seqs);
 extern void deparseStringLiteral(StringInfo buf, const char *val);
diff --git a/contrib/postgres_fdw/sql/postgres_fdw.sql b/contrib/postgres_fdw/sql/postgres_fdw.sql
index 74590089bd..97c156a472 100644
--- a/contrib/postgres_fdw/sql/postgres_fdw.sql
+++ b/contrib/postgres_fdw/sql/postgres_fdw.sql
@@ -2355,7 +2355,6 @@ drop table loc3;
 -- test for TRUNCATE
 -- ===================================================================
 CREATE TABLE tru_rtable0 (id int primary key);
-CREATE TABLE tru_rtable1 (id int primary key);
 CREATE FOREIGN TABLE tru_ftable (id int)
        SERVER loopback OPTIONS (table_name 'tru_rtable0');
 INSERT INTO tru_rtable0 (SELECT x FROM generate_series(1,10) x);
@@ -2363,6 +2362,7 @@ INSERT INTO tru_rtable0 (SELECT x FROM generate_series(1,10) x);
 CREATE TABLE tru_ptable (id int) PARTITION BY HASH(id);
 CREATE TABLE tru_ptable__p0 PARTITION OF tru_ptable
                             FOR VALUES WITH (MODULUS 2, REMAINDER 0);
+CREATE TABLE tru_rtable1 (id int primary key);
 CREATE FOREIGN TABLE tru_ftable__p1 PARTITION OF tru_ptable
                                     FOR VALUES WITH (MODULUS 2, REMAINDER 1)
        SERVER loopback OPTIONS (table_name 'tru_rtable1');
@@ -2439,13 +2439,13 @@ INSERT INTO tru_rtable0 (SELECT x FROM generate_series(5,9) x);
 INSERT INTO tru_rtable0_child (SELECT x FROM generate_series(10,14) x);
 SELECT sum(id) FROM tru_ftable;   -- 95
 
-TRUNCATE ONLY tru_ftable;		-- truncate only parent portion
-SELECT sum(id) FROM tru_ftable;   -- 60
+TRUNCATE ONLY tru_ftable;		-- truncate both parent and child
+SELECT count(*) FROM tru_ftable;   -- 0
 
 INSERT INTO tru_rtable0 (SELECT x FROM generate_series(21,25) x);
-SELECT sum(id) FROM tru_ftable;		-- 175
+SELECT sum(id) FROM tru_ftable;		-- 115
 TRUNCATE tru_ftable;			-- truncate both of parent and child
-SELECT count(*) FROM tru_ftable;    -- empty
+SELECT count(*) FROM tru_ftable;    -- 0
 
 -- cleanup
 DROP FOREIGN TABLE tru_ftable_parent, tru_ftable_child, tru_pk_ftable,tru_ftable__p1,tru_ftable;
diff --git a/doc/src/sgml/fdwhandler.sgml b/doc/src/sgml/fdwhandler.sgml
index 98882ddab8..3fe373ef00 100644
--- a/doc/src/sgml/fdwhandler.sgml
+++ b/doc/src/sgml/fdwhandler.sgml
@@ -1071,28 +1071,16 @@ EndDirectModify(ForeignScanState *node);
     <para>
 <programlisting>
 void
-ExecForeignTruncate(List *rels, List *rels_extra,
-                    DropBehavior behavior, bool restart_seqs);
+ExecForeignTruncate(List *rels,
+                    DropBehavior behavior,
+                    bool restart_seqs);
 </programlisting>
 
      Truncate a set of foreign tables specified in <literal>rels</literal>.
      This function is called when <xref linkend="sql-truncate"/> is executed
      on foreign tables.  <literal>rels</literal> is the list of
      <structname>Relation</structname> data structure that indicates
-     a foreign table to truncate.  <literal>rels_extra</literal> the list of
-     <literal>int</literal> value, which delivers extra information about
-     a foreign table to truncate.  Possible values are
-     <literal>TRUNCATE_REL_CONTEXT_NORMAL</literal>, which means that
-     the foreign table is specified WITHOUT <literal>ONLY</literal> clause
-     in <command>TRUNCATE</command>,
-     <literal>TRUNCATE_REL_CONTEXT_ONLY</literal>, which means that
-     the foreign table is specified WITH <literal>ONLY</literal> clause,
-     and <literal>TRUNCATE_REL_CONTEXT_CASCADING</literal>,
-     which means that the foreign table is not specified explicitly,
-     but will be truncated due to dependency (for example, partition table).
-     There is one-to-one mapping between <literal>rels</literal> and
-     <literal>rels_extra</literal>.  The number of entries is the same
-     between the two lists.
+     a foreign table to truncate.
     </para>
 
     <para>
diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c
index 096a6f2891..3d6e7edbae 100644
--- a/src/backend/commands/tablecmds.c
+++ b/src/backend/commands/tablecmds.c
@@ -323,7 +323,6 @@ typedef struct ForeignTruncateInfo
 {
 	Oid			serverid;
 	List	   *rels;
-	List	   *rels_extra;
 } ForeignTruncateInfo;
 
 /*
@@ -1620,7 +1619,6 @@ ExecuteTruncate(TruncateStmt *stmt)
 {
 	List	   *rels = NIL;
 	List	   *relids = NIL;
-	List	   *relids_extra = NIL;
 	List	   *relids_logged = NIL;
 	ListCell   *cell;
 
@@ -1654,9 +1652,6 @@ ExecuteTruncate(TruncateStmt *stmt)
 
 		rels = lappend(rels, rel);
 		relids = lappend_oid(relids, myrelid);
-		relids_extra = lappend_int(relids_extra, (recurse ?
-												  TRUNCATE_REL_CONTEXT_NORMAL :
-												  TRUNCATE_REL_CONTEXT_ONLY));
 		/* Log this relation only if needed for logical decoding */
 		if (RelationIsLogicallyLogged(rel))
 			relids_logged = lappend_oid(relids_logged, myrelid);
@@ -1704,8 +1699,6 @@ ExecuteTruncate(TruncateStmt *stmt)
 
 				rels = lappend(rels, rel);
 				relids = lappend_oid(relids, childrelid);
-				relids_extra = lappend_int(relids_extra,
-										   TRUNCATE_REL_CONTEXT_CASCADING);
 				/* Log this relation only if needed for logical decoding */
 				if (RelationIsLogicallyLogged(rel))
 					relids_logged = lappend_oid(relids_logged, childrelid);
@@ -1718,7 +1711,7 @@ ExecuteTruncate(TruncateStmt *stmt)
 					 errhint("Do not specify the ONLY keyword, or use TRUNCATE ONLY on the partitions directly.")));
 	}
 
-	ExecuteTruncateGuts(rels, relids, relids_extra, relids_logged,
+	ExecuteTruncateGuts(rels, relids, relids_logged,
 						stmt->behavior, stmt->restart_seqs);
 
 	/* And close the rels */
@@ -1739,15 +1732,13 @@ ExecuteTruncate(TruncateStmt *stmt)
  *
  * explicit_rels is the list of Relations to truncate that the command
  * specified.  relids is the list of Oids corresponding to explicit_rels.
- * relids_extra is the list of integer values that deliver extra information
- * about relations in explicit_rels.  relids_logged is the list of Oids
- * (a subset of relids) that require WAL-logging.  This is all a bit redundant,
- * but the existing callers have this information handy in this form.
+ * relids_logged is the list of Oids (a subset of relids) that require
+ * WAL-logging.  This is all a bit redundant, but the existing callers have
+ * this information handy in this form.
  */
 void
 ExecuteTruncateGuts(List *explicit_rels,
 					List *relids,
-					List *relids_extra,
 					List *relids_logged,
 					DropBehavior behavior, bool restart_seqs)
 {
@@ -1760,8 +1751,6 @@ ExecuteTruncateGuts(List *explicit_rels,
 	ResultRelInfo *resultRelInfo;
 	SubTransactionId mySubid;
 	ListCell   *cell;
-	ListCell   *lc1,
-			*lc2;
 	Oid		   *logrelids;
 
 	/*
@@ -1799,8 +1788,6 @@ ExecuteTruncateGuts(List *explicit_rels,
 				truncate_check_activity(rel);
 				rels = lappend(rels, rel);
 				relids = lappend_oid(relids, relid);
-				relids_extra = lappend_int(relids_extra,
-										   TRUNCATE_REL_CONTEXT_CASCADING);
 				/* Log this relation only if needed for logical decoding */
 				if (RelationIsLogicallyLogged(rel))
 					relids_logged = lappend_oid(relids_logged, relid);
@@ -1901,11 +1888,9 @@ ExecuteTruncateGuts(List *explicit_rels,
 	 */
 	mySubid = GetCurrentSubTransactionId();
 
-	Assert(list_length(rels) == list_length(relids_extra));
-	forboth(lc1, rels, lc2, relids_extra)
+	foreach(cell, rels)
 	{
-		Relation	rel = (Relation) lfirst(lc1);
-		int			extra = lfirst_int(lc2);
+		Relation	rel = (Relation) lfirst(cell);
 
 		/*
 		 * Save OID of partitioned tables for later; nothing else to do for
@@ -1952,7 +1937,6 @@ ExecuteTruncateGuts(List *explicit_rels,
 			{
 				ft_info->serverid = serverid;
 				ft_info->rels = NIL;
-				ft_info->rels_extra = NIL;
 			}
 
 			/*
@@ -1960,7 +1944,6 @@ ExecuteTruncateGuts(List *explicit_rels,
 			 * foreign table belongs to.
 			 */
 			ft_info->rels = lappend(ft_info->rels, rel);
-			ft_info->rels_extra = lappend_int(ft_info->rels_extra, extra);
 			continue;
 		}
 
@@ -2044,7 +2027,6 @@ ExecuteTruncateGuts(List *explicit_rels,
 				Assert(routine->ExecForeignTruncate != NULL);
 
 				routine->ExecForeignTruncate(ft_info->rels,
-											 ft_info->rels_extra,
 											 behavior,
 											 restart_seqs);
 			}
diff --git a/src/backend/replication/logical/worker.c b/src/backend/replication/logical/worker.c
index fb3ba5c415..4dd869aaba 100644
--- a/src/backend/replication/logical/worker.c
+++ b/src/backend/replication/logical/worker.c
@@ -1795,7 +1795,6 @@ apply_handle_truncate(StringInfo s)
 	List	   *rels = NIL;
 	List	   *part_rels = NIL;
 	List	   *relids = NIL;
-	List	   *relids_extra = NIL;
 	List	   *relids_logged = NIL;
 	ListCell   *lc;
 
@@ -1825,7 +1824,6 @@ apply_handle_truncate(StringInfo s)
 		remote_rels = lappend(remote_rels, rel);
 		rels = lappend(rels, rel->localrel);
 		relids = lappend_oid(relids, rel->localreloid);
-		relids_extra = lappend_int(relids_extra, TRUNCATE_REL_CONTEXT_NORMAL);
 		if (RelationIsLogicallyLogged(rel->localrel))
 			relids_logged = lappend_oid(relids_logged, rel->localreloid);
 
@@ -1864,7 +1862,6 @@ apply_handle_truncate(StringInfo s)
 				rels = lappend(rels, childrel);
 				part_rels = lappend(part_rels, childrel);
 				relids = lappend_oid(relids, childrelid);
-				relids_extra = lappend_int(relids_extra, TRUNCATE_REL_CONTEXT_CASCADING);
 				/* Log this relation only if needed for logical decoding */
 				if (RelationIsLogicallyLogged(childrel))
 					relids_logged = lappend_oid(relids_logged, childrelid);
@@ -1879,7 +1876,6 @@ apply_handle_truncate(StringInfo s)
 	 */
 	ExecuteTruncateGuts(rels,
 						relids,
-						relids_extra,
 						relids_logged,
 						DROP_RESTRICT,
 						restart_seqs);
diff --git a/src/include/commands/tablecmds.h b/src/include/commands/tablecmds.h
index b808a07e46..14f4b4882f 100644
--- a/src/include/commands/tablecmds.h
+++ b/src/include/commands/tablecmds.h
@@ -21,16 +21,6 @@
 #include "storage/lock.h"
 #include "utils/relcache.h"
 
-/*
- * These values indicate how a relation was specified as the target to
- * truncate in TRUNCATE command.
- */
-#define TRUNCATE_REL_CONTEXT_NORMAL       1 /* specified without ONLY clause */
-#define TRUNCATE_REL_CONTEXT_ONLY         2 /* specified with ONLY clause */
-#define TRUNCATE_REL_CONTEXT_CASCADING     3	/* not specified but truncated
-												 * due to dependency (e.g.,
-												 * partition table) */
-
 struct AlterTableUtilityContext;	/* avoid including tcop/utility.h here */
 
 
@@ -68,7 +58,6 @@ extern void CheckTableNotInUse(Relation rel, const char *stmt);
 extern void ExecuteTruncate(TruncateStmt *stmt);
 extern void ExecuteTruncateGuts(List *explicit_rels,
 								List *relids,
-								List *relids_extra,
 								List *relids_logged,
 								DropBehavior behavior,
 								bool restart_seqs);
diff --git a/src/include/foreign/fdwapi.h b/src/include/foreign/fdwapi.h
index 4ebbca6de9..4f17becbb8 100644
--- a/src/include/foreign/fdwapi.h
+++ b/src/include/foreign/fdwapi.h
@@ -161,7 +161,6 @@ typedef List *(*ImportForeignSchema_function) (ImportForeignSchemaStmt *stmt,
 											   Oid serverOid);
 
 typedef void (*ExecForeignTruncate_function) (List *rels,
-											  List *rels_extra,
 											  DropBehavior behavior,
 											  bool restart_seqs);
 
#98Kyotaro Horiguchi
horikyota.ntt@gmail.com
In reply to: Fujii Masao (#97)
1 attachment(s)
Re: TRUNCATE on foreign table

At Fri, 16 Apr 2021 11:54:16 +0900, Fujii Masao <masao.fujii@oss.nttdata.com> wrote in

On 2021/04/16 9:15, Bharath Rupireddy wrote:

On Thu, Apr 15, 2021 at 8:19 PM Fujii Masao <masao.fujii@oss.nttdata.com>
wrote:

On 2021/04/14 12:54, Bharath Rupireddy wrote:

IMHO, we can push all the TRUNCATE options (ONLY, RESTRICTED, CASCADE,
RESTART/CONTINUE IDENTITY), because it doesn't have any major
challenge(implementation wise) unlike pushing some clauses in
SELECT/UPDATE/DELETE and we already do this on the master. It doesn't
look good and may confuse users, if we push some options and restrict
others. We should have an explicit note in the documentation saying we
push all these options to the remote server. We can leave it to the
user to write TRUNCATE for foreign tables with the appropriate
options. If somebody complains about a problem that they will face
with this behavior, we can revisit.

That's one of the options. But I'm afraid it's hard to drop (revisit)
the feature once it has been released. So if there is no explicit
use case for that, basically I'd like to drop that before release
like we agree to drop unused TRUNCATE_REL_CONTEXT_CASCADING.

Thanks. Looks like the decision is going in the direction of
restricting those options, I will withdraw my point.

We are still discussing whether RESTRICT option should be pushed down to
a foreign data wrapper. But ISTM at least we could reach the consensus about
the drop of extra information for each foreign table. So what about applying
the attached patch and remove the extra information at first?

I'm fine with that direction. Thanks for the patch.

The change is straight-forward and looks fine, except the following
part.

==== contrib/postgres_fdw/sql/postgres_fdw.sql: 2436 -- after patching
2436> -- in case when remote table has inherited children
2437> CREATE TABLE tru_rtable0_child () INHERITS (tru_rtable0);
2438> INSERT INTO tru_rtable0 (SELECT x FROM generate_series(5,9) x);
2439> INSERT INTO tru_rtable0_child (SELECT x FROM generate_series(10,14) x);
2440> SELECT sum(id) FROM tru_ftable; -- 95
2441>
2442> TRUNCATE ONLY tru_ftable; -- truncate both parent and child
2443> SELECT count(*) FROM tru_ftable; -- 0
2444>
2445> INSERT INTO tru_rtable0 (SELECT x FROM generate_series(21,25) x);
2446> SELECT sum(id) FROM tru_ftable; -- 115
2447> TRUNCATE tru_ftable; -- truncate both of parent and child
2448> SELECT count(*) FROM tru_ftable; -- 0

L2445-L2448 doesn't work as described since L2445 inserts tuples only
to the parent.

And there's a slight difference for no reason between the comment at
2442 and 2447.

(The attached is a fix on top of the proposed patch.)

regards.

--
Kyotaro Horiguchi
NTT Open Source Software Center

Attachments:

truncate_foreign_table_dont_pass_only_clause_fix.patch.txttext/plain; charset=us-asciiDownload
diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out
index 1a3f5cb4ad..d32f291089 100644
--- a/contrib/postgres_fdw/expected/postgres_fdw.out
+++ b/contrib/postgres_fdw/expected/postgres_fdw.out
@@ -8388,7 +8388,7 @@ SELECT sum(id) FROM tru_ftable;   -- 95
   95
 (1 row)
 
-TRUNCATE ONLY tru_ftable;		-- truncate both parent and child
+TRUNCATE ONLY tru_ftable;		-- truncate both of parent and child
 SELECT count(*) FROM tru_ftable;   -- 0
  count 
 -------
@@ -8396,10 +8396,11 @@ SELECT count(*) FROM tru_ftable;   -- 0
 (1 row)
 
 INSERT INTO tru_rtable0 (SELECT x FROM generate_series(21,25) x);
-SELECT sum(id) FROM tru_ftable;		-- 115
+INSERT INTO tru_rtable0_child (SELECT x FROM generate_series(26,30) x);
+SELECT sum(id) FROM tru_ftable;		-- 255
  sum 
 -----
- 115
+ 255
 (1 row)
 
 TRUNCATE tru_ftable;			-- truncate both of parent and child
diff --git a/contrib/postgres_fdw/sql/postgres_fdw.sql b/contrib/postgres_fdw/sql/postgres_fdw.sql
index 97c156a472..65643e120d 100644
--- a/contrib/postgres_fdw/sql/postgres_fdw.sql
+++ b/contrib/postgres_fdw/sql/postgres_fdw.sql
@@ -2439,11 +2439,12 @@ INSERT INTO tru_rtable0 (SELECT x FROM generate_series(5,9) x);
 INSERT INTO tru_rtable0_child (SELECT x FROM generate_series(10,14) x);
 SELECT sum(id) FROM tru_ftable;   -- 95
 
-TRUNCATE ONLY tru_ftable;		-- truncate both parent and child
+TRUNCATE ONLY tru_ftable;		-- truncate both of parent and child
 SELECT count(*) FROM tru_ftable;   -- 0
 
 INSERT INTO tru_rtable0 (SELECT x FROM generate_series(21,25) x);
-SELECT sum(id) FROM tru_ftable;		-- 115
+INSERT INTO tru_rtable0_child (SELECT x FROM generate_series(26,30) x);
+SELECT sum(id) FROM tru_ftable;		-- 255
 TRUNCATE tru_ftable;			-- truncate both of parent and child
 SELECT count(*) FROM tru_ftable;    -- 0
 
#99Bharath Rupireddy
bharath.rupireddyforpostgres@gmail.com
In reply to: Fujii Masao (#97)
Re: TRUNCATE on foreign table

On Fri, Apr 16, 2021 at 8:24 AM Fujii Masao <masao.fujii@oss.nttdata.com> wrote:

We are still discussing whether RESTRICT option should be pushed down to
a foreign data wrapper. But ISTM at least we could reach the consensus about
the drop of extra information for each foreign table. So what about applying
the attached patch and remove the extra information at first?

Thanks for the patch, here are some comments:

1) Maybe new empty lines would be better so that the code doesn't look
cluttered:
relids = lappend_oid(relids, myrelid); --> a new line after this.
/* Log this relation only if needed for logical decoding */
if (RelationIsLogicallyLogged(rel))

relids = lappend_oid(relids, childrelid); --> a new line after this.
/* Log this relation only if needed for logical decoding */

relids = lappend_oid(relids, relid); --> a new line after this.
/* Log this relation only if needed for logical decoding */
if (RelationIsLogicallyLogged(rel))

2) Instead of
on foreign tables. <literal>rels</literal> is the list of
<structname>Relation</structname> data structure that indicates
a foreign table to truncate.

I think it is better with:
on foreign tables. <literal>rels</literal> is the list of
<structname>Relation</structname> data structures, where each
entry indicates a foreign table to truncate.

3) How about adding an extra para(after below para in
postgres_fdw.sgml) on WHY we don't push "ONLY" to foreign tables while
truncating? We could add to the same para for other options if at all
we don't choose to push them.
<command>DELETE</command>, or <command>TRUNCATE</command>.
(Of course, the remote user you have specified in your user mapping must
have privileges to do these things.)

4) Isn't it better to mention the "ONLY" option is not pushed to remote
-- truncate with ONLY clause
TRUNCATE ONLY tru_ftable_parent;

TRUNCATE ONLY tru_ftable; -- truncate both parent and child
SELECT count(*) FROM tru_ftable; -- 0

5) I may be missing something here, why is even after ONLY is ignored
in the below truncate command, the sum is 126? Shouldn't it truncate
both tru_ftable_parent and
-- truncate with ONLY clause
TRUNCATE ONLY tru_ftable_parent;
SELECT sum(id) FROM tru_ftable_parent; -- 126

With Regards,
Bharath Rupireddy.
EnterpriseDB: http://www.enterprisedb.com

#100Fujii Masao
masao.fujii@oss.nttdata.com
In reply to: Kyotaro Horiguchi (#98)
Re: TRUNCATE on foreign table

On 2021/04/16 14:20, Kyotaro Horiguchi wrote:

At Fri, 16 Apr 2021 11:54:16 +0900, Fujii Masao <masao.fujii@oss.nttdata.com> wrote in

On 2021/04/16 9:15, Bharath Rupireddy wrote:

On Thu, Apr 15, 2021 at 8:19 PM Fujii Masao <masao.fujii@oss.nttdata.com>
wrote:

On 2021/04/14 12:54, Bharath Rupireddy wrote:

IMHO, we can push all the TRUNCATE options (ONLY, RESTRICTED, CASCADE,
RESTART/CONTINUE IDENTITY), because it doesn't have any major
challenge(implementation wise) unlike pushing some clauses in
SELECT/UPDATE/DELETE and we already do this on the master. It doesn't
look good and may confuse users, if we push some options and restrict
others. We should have an explicit note in the documentation saying we
push all these options to the remote server. We can leave it to the
user to write TRUNCATE for foreign tables with the appropriate
options. If somebody complains about a problem that they will face
with this behavior, we can revisit.

That's one of the options. But I'm afraid it's hard to drop (revisit)
the feature once it has been released. So if there is no explicit
use case for that, basically I'd like to drop that before release
like we agree to drop unused TRUNCATE_REL_CONTEXT_CASCADING.

Thanks. Looks like the decision is going in the direction of
restricting those options, I will withdraw my point.

We are still discussing whether RESTRICT option should be pushed down to
a foreign data wrapper. But ISTM at least we could reach the consensus about
the drop of extra information for each foreign table. So what about applying
the attached patch and remove the extra information at first?

I'm fine with that direction. Thanks for the patch.

The change is straight-forward and looks fine, except the following
part.

==== contrib/postgres_fdw/sql/postgres_fdw.sql: 2436 -- after patching
2436> -- in case when remote table has inherited children
2437> CREATE TABLE tru_rtable0_child () INHERITS (tru_rtable0);
2438> INSERT INTO tru_rtable0 (SELECT x FROM generate_series(5,9) x);
2439> INSERT INTO tru_rtable0_child (SELECT x FROM generate_series(10,14) x);
2440> SELECT sum(id) FROM tru_ftable; -- 95
2441>
2442> TRUNCATE ONLY tru_ftable; -- truncate both parent and child
2443> SELECT count(*) FROM tru_ftable; -- 0
2444>
2445> INSERT INTO tru_rtable0 (SELECT x FROM generate_series(21,25) x);
2446> SELECT sum(id) FROM tru_ftable; -- 115
2447> TRUNCATE tru_ftable; -- truncate both of parent and child
2448> SELECT count(*) FROM tru_ftable; -- 0

L2445-L2448 doesn't work as described since L2445 inserts tuples only
to the parent.

And there's a slight difference for no reason between the comment at
2442 and 2447.

Agreed. Thanks!

(The attached is a fix on top of the proposed patch.)

I will include this patch into the main patch.

Regards,

--
Fujii Masao
Advanced Computing Technology Center
Research and Development Headquarters
NTT DATA CORPORATION

#101Fujii Masao
masao.fujii@oss.nttdata.com
In reply to: Bharath Rupireddy (#99)
2 attachment(s)
Re: TRUNCATE on foreign table

On 2021/04/16 15:13, Bharath Rupireddy wrote:

On Fri, Apr 16, 2021 at 8:24 AM Fujii Masao <masao.fujii@oss.nttdata.com> wrote:

We are still discussing whether RESTRICT option should be pushed down to
a foreign data wrapper. But ISTM at least we could reach the consensus about
the drop of extra information for each foreign table. So what about applying
the attached patch and remove the extra information at first?

Thanks for the patch, here are some comments:

Thanks for the review!

1) Maybe new empty lines would be better so that the code doesn't look
cluttered:
relids = lappend_oid(relids, myrelid); --> a new line after this.
/* Log this relation only if needed for logical decoding */
if (RelationIsLogicallyLogged(rel))

relids = lappend_oid(relids, childrelid); --> a new line after this.
/* Log this relation only if needed for logical decoding */

relids = lappend_oid(relids, relid); --> a new line after this.
/* Log this relation only if needed for logical decoding */
if (RelationIsLogicallyLogged(rel))

Applied. Attached is the updated version of the patch
(truncate_foreign_table_dont_pass_only_clause_v2.patch).
This patch includes the patch that Horiguchi-san posted upthead.
I'm thinking to commit this patch at first.

2) Instead of
on foreign tables. <literal>rels</literal> is the list of
<structname>Relation</structname> data structure that indicates
a foreign table to truncate.

I think it is better with:
on foreign tables. <literal>rels</literal> is the list of
<structname>Relation</structname> data structures, where each
entry indicates a foreign table to truncate.

Justin posted the patch that improves the documents including
this description. I think that we should revisit that patch.
Attached is the updated version of that patch.
(truncate_foreign_table_docs_v1.patch)

3) How about adding an extra para(after below para in
postgres_fdw.sgml) on WHY we don't push "ONLY" to foreign tables while
truncating? We could add to the same para for other options if at all
we don't choose to push them.
<command>DELETE</command>, or <command>TRUNCATE</command>.
(Of course, the remote user you have specified in your user mapping must
have privileges to do these things.)

I agree to document the behavior that ONLY option is always ignored
for foreign tables. But I'm not sure if we can document WHY.
Because I could not find the past discussion about why ONLY option is
ignored on SELECT, etc... Maybe it's enough to document the behavior?

4) Isn't it better to mention the "ONLY" option is not pushed to remote
-- truncate with ONLY clause
TRUNCATE ONLY tru_ftable_parent;

TRUNCATE ONLY tru_ftable; -- truncate both parent and child
SELECT count(*) FROM tru_ftable; -- 0

5) I may be missing something here, why is even after ONLY is ignored
in the below truncate command, the sum is 126? Shouldn't it truncate
both tru_ftable_parent and
-- truncate with ONLY clause
TRUNCATE ONLY tru_ftable_parent;
SELECT sum(id) FROM tru_ftable_parent; -- 126

Because TRUNCATE ONLY command doesn't truncate tru_ftable_child talbe
that inehrits tru_ftable_parent. No?

Regards,

--
Fujii Masao
Advanced Computing Technology Center
Research and Development Headquarters
NTT DATA CORPORATION

Attachments:

truncate_foreign_table_dont_pass_only_clause_v2.patchtext/plain; charset=UTF-8; name=truncate_foreign_table_dont_pass_only_clause_v2.patch; x-mac-creator=0; x-mac-type=0Download
diff --git a/contrib/postgres_fdw/deparse.c b/contrib/postgres_fdw/deparse.c
index bdc4c3620d..7a798530e3 100644
--- a/contrib/postgres_fdw/deparse.c
+++ b/contrib/postgres_fdw/deparse.c
@@ -2179,24 +2179,19 @@ deparseAnalyzeSql(StringInfo buf, Relation rel, List **retrieved_attrs)
 void
 deparseTruncateSql(StringInfo buf,
 				   List *rels,
-				   List *rels_extra,
 				   DropBehavior behavior,
 				   bool restart_seqs)
 {
-	ListCell   *lc1,
-			   *lc2;
+	ListCell   *cell;
 
 	appendStringInfoString(buf, "TRUNCATE ");
 
-	forboth(lc1, rels, lc2, rels_extra)
+	foreach(cell, rels)
 	{
-		Relation	rel = lfirst(lc1);
-		int			extra = lfirst_int(lc2);
+		Relation	rel = lfirst(cell);
 
-		if (lc1 != list_head(rels))
+		if (cell != list_head(rels))
 			appendStringInfoString(buf, ", ");
-		if (extra & TRUNCATE_REL_CONTEXT_ONLY)
-			appendStringInfoString(buf, "ONLY ");
 
 		deparseRelation(buf, rel);
 	}
diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out
index 5070d93239..d32f291089 100644
--- a/contrib/postgres_fdw/expected/postgres_fdw.out
+++ b/contrib/postgres_fdw/expected/postgres_fdw.out
@@ -8218,13 +8218,13 @@ drop table loc3;
 -- test for TRUNCATE
 -- ===================================================================
 CREATE TABLE tru_rtable0 (id int primary key);
-CREATE TABLE tru_rtable1 (id int primary key);
 CREATE FOREIGN TABLE tru_ftable (id int)
        SERVER loopback OPTIONS (table_name 'tru_rtable0');
 INSERT INTO tru_rtable0 (SELECT x FROM generate_series(1,10) x);
 CREATE TABLE tru_ptable (id int) PARTITION BY HASH(id);
 CREATE TABLE tru_ptable__p0 PARTITION OF tru_ptable
                             FOR VALUES WITH (MODULUS 2, REMAINDER 0);
+CREATE TABLE tru_rtable1 (id int primary key);
 CREATE FOREIGN TABLE tru_ftable__p1 PARTITION OF tru_ptable
                                     FOR VALUES WITH (MODULUS 2, REMAINDER 1)
        SERVER loopback OPTIONS (table_name 'tru_rtable1');
@@ -8388,22 +8388,23 @@ SELECT sum(id) FROM tru_ftable;   -- 95
   95
 (1 row)
 
-TRUNCATE ONLY tru_ftable;		-- truncate only parent portion
-SELECT sum(id) FROM tru_ftable;   -- 60
- sum 
------
-  60
+TRUNCATE ONLY tru_ftable;		-- truncate both of parent and child
+SELECT count(*) FROM tru_ftable;   -- 0
+ count 
+-------
+     0
 (1 row)
 
 INSERT INTO tru_rtable0 (SELECT x FROM generate_series(21,25) x);
-SELECT sum(id) FROM tru_ftable;		-- 175
+INSERT INTO tru_rtable0_child (SELECT x FROM generate_series(26,30) x);
+SELECT sum(id) FROM tru_ftable;		-- 255
  sum 
 -----
- 175
+ 255
 (1 row)
 
 TRUNCATE tru_ftable;			-- truncate both of parent and child
-SELECT count(*) FROM tru_ftable;    -- empty
+SELECT count(*) FROM tru_ftable;    -- 0
  count 
 -------
      0
diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c
index c590f374c6..c521cba3fc 100644
--- a/contrib/postgres_fdw/postgres_fdw.c
+++ b/contrib/postgres_fdw/postgres_fdw.c
@@ -401,7 +401,6 @@ static void postgresExplainForeignModify(ModifyTableState *mtstate,
 static void postgresExplainDirectModify(ForeignScanState *node,
 										ExplainState *es);
 static void postgresExecForeignTruncate(List *rels,
-										List *rels_extra,
 										DropBehavior behavior,
 										bool restart_seqs);
 static bool postgresAnalyzeForeignTable(Relation relation,
@@ -2881,7 +2880,6 @@ postgresExplainDirectModify(ForeignScanState *node, ExplainState *es)
  */
 static void
 postgresExecForeignTruncate(List *rels,
-							List *rels_extra,
 							DropBehavior behavior,
 							bool restart_seqs)
 {
@@ -2964,7 +2962,7 @@ postgresExecForeignTruncate(List *rels,
 
 	/* Construct the TRUNCATE command string */
 	initStringInfo(&sql);
-	deparseTruncateSql(&sql, rels, rels_extra, behavior, restart_seqs);
+	deparseTruncateSql(&sql, rels, behavior, restart_seqs);
 
 	/* Issue the TRUNCATE command to remote server */
 	do_sql_command(conn, sql.data);
diff --git a/contrib/postgres_fdw/postgres_fdw.h b/contrib/postgres_fdw/postgres_fdw.h
index 5d44b75314..9591c0f6c2 100644
--- a/contrib/postgres_fdw/postgres_fdw.h
+++ b/contrib/postgres_fdw/postgres_fdw.h
@@ -209,7 +209,6 @@ extern void deparseAnalyzeSql(StringInfo buf, Relation rel,
 							  List **retrieved_attrs);
 extern void deparseTruncateSql(StringInfo buf,
 							   List *rels,
-							   List *rels_extra,
 							   DropBehavior behavior,
 							   bool restart_seqs);
 extern void deparseStringLiteral(StringInfo buf, const char *val);
diff --git a/contrib/postgres_fdw/sql/postgres_fdw.sql b/contrib/postgres_fdw/sql/postgres_fdw.sql
index 74590089bd..65643e120d 100644
--- a/contrib/postgres_fdw/sql/postgres_fdw.sql
+++ b/contrib/postgres_fdw/sql/postgres_fdw.sql
@@ -2355,7 +2355,6 @@ drop table loc3;
 -- test for TRUNCATE
 -- ===================================================================
 CREATE TABLE tru_rtable0 (id int primary key);
-CREATE TABLE tru_rtable1 (id int primary key);
 CREATE FOREIGN TABLE tru_ftable (id int)
        SERVER loopback OPTIONS (table_name 'tru_rtable0');
 INSERT INTO tru_rtable0 (SELECT x FROM generate_series(1,10) x);
@@ -2363,6 +2362,7 @@ INSERT INTO tru_rtable0 (SELECT x FROM generate_series(1,10) x);
 CREATE TABLE tru_ptable (id int) PARTITION BY HASH(id);
 CREATE TABLE tru_ptable__p0 PARTITION OF tru_ptable
                             FOR VALUES WITH (MODULUS 2, REMAINDER 0);
+CREATE TABLE tru_rtable1 (id int primary key);
 CREATE FOREIGN TABLE tru_ftable__p1 PARTITION OF tru_ptable
                                     FOR VALUES WITH (MODULUS 2, REMAINDER 1)
        SERVER loopback OPTIONS (table_name 'tru_rtable1');
@@ -2439,13 +2439,14 @@ INSERT INTO tru_rtable0 (SELECT x FROM generate_series(5,9) x);
 INSERT INTO tru_rtable0_child (SELECT x FROM generate_series(10,14) x);
 SELECT sum(id) FROM tru_ftable;   -- 95
 
-TRUNCATE ONLY tru_ftable;		-- truncate only parent portion
-SELECT sum(id) FROM tru_ftable;   -- 60
+TRUNCATE ONLY tru_ftable;		-- truncate both of parent and child
+SELECT count(*) FROM tru_ftable;   -- 0
 
 INSERT INTO tru_rtable0 (SELECT x FROM generate_series(21,25) x);
-SELECT sum(id) FROM tru_ftable;		-- 175
+INSERT INTO tru_rtable0_child (SELECT x FROM generate_series(26,30) x);
+SELECT sum(id) FROM tru_ftable;		-- 255
 TRUNCATE tru_ftable;			-- truncate both of parent and child
-SELECT count(*) FROM tru_ftable;    -- empty
+SELECT count(*) FROM tru_ftable;    -- 0
 
 -- cleanup
 DROP FOREIGN TABLE tru_ftable_parent, tru_ftable_child, tru_pk_ftable,tru_ftable__p1,tru_ftable;
diff --git a/doc/src/sgml/fdwhandler.sgml b/doc/src/sgml/fdwhandler.sgml
index 98882ddab8..3fe373ef00 100644
--- a/doc/src/sgml/fdwhandler.sgml
+++ b/doc/src/sgml/fdwhandler.sgml
@@ -1071,28 +1071,16 @@ EndDirectModify(ForeignScanState *node);
     <para>
 <programlisting>
 void
-ExecForeignTruncate(List *rels, List *rels_extra,
-                    DropBehavior behavior, bool restart_seqs);
+ExecForeignTruncate(List *rels,
+                    DropBehavior behavior,
+                    bool restart_seqs);
 </programlisting>
 
      Truncate a set of foreign tables specified in <literal>rels</literal>.
      This function is called when <xref linkend="sql-truncate"/> is executed
      on foreign tables.  <literal>rels</literal> is the list of
      <structname>Relation</structname> data structure that indicates
-     a foreign table to truncate.  <literal>rels_extra</literal> the list of
-     <literal>int</literal> value, which delivers extra information about
-     a foreign table to truncate.  Possible values are
-     <literal>TRUNCATE_REL_CONTEXT_NORMAL</literal>, which means that
-     the foreign table is specified WITHOUT <literal>ONLY</literal> clause
-     in <command>TRUNCATE</command>,
-     <literal>TRUNCATE_REL_CONTEXT_ONLY</literal>, which means that
-     the foreign table is specified WITH <literal>ONLY</literal> clause,
-     and <literal>TRUNCATE_REL_CONTEXT_CASCADING</literal>,
-     which means that the foreign table is not specified explicitly,
-     but will be truncated due to dependency (for example, partition table).
-     There is one-to-one mapping between <literal>rels</literal> and
-     <literal>rels_extra</literal>.  The number of entries is the same
-     between the two lists.
+     a foreign table to truncate.
     </para>
 
     <para>
diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c
index 096a6f2891..6e320f96ad 100644
--- a/src/backend/commands/tablecmds.c
+++ b/src/backend/commands/tablecmds.c
@@ -323,7 +323,6 @@ typedef struct ForeignTruncateInfo
 {
 	Oid			serverid;
 	List	   *rels;
-	List	   *rels_extra;
 } ForeignTruncateInfo;
 
 /*
@@ -1620,7 +1619,6 @@ ExecuteTruncate(TruncateStmt *stmt)
 {
 	List	   *rels = NIL;
 	List	   *relids = NIL;
-	List	   *relids_extra = NIL;
 	List	   *relids_logged = NIL;
 	ListCell   *cell;
 
@@ -1654,9 +1652,7 @@ ExecuteTruncate(TruncateStmt *stmt)
 
 		rels = lappend(rels, rel);
 		relids = lappend_oid(relids, myrelid);
-		relids_extra = lappend_int(relids_extra, (recurse ?
-												  TRUNCATE_REL_CONTEXT_NORMAL :
-												  TRUNCATE_REL_CONTEXT_ONLY));
+
 		/* Log this relation only if needed for logical decoding */
 		if (RelationIsLogicallyLogged(rel))
 			relids_logged = lappend_oid(relids_logged, myrelid);
@@ -1704,8 +1700,7 @@ ExecuteTruncate(TruncateStmt *stmt)
 
 				rels = lappend(rels, rel);
 				relids = lappend_oid(relids, childrelid);
-				relids_extra = lappend_int(relids_extra,
-										   TRUNCATE_REL_CONTEXT_CASCADING);
+
 				/* Log this relation only if needed for logical decoding */
 				if (RelationIsLogicallyLogged(rel))
 					relids_logged = lappend_oid(relids_logged, childrelid);
@@ -1718,7 +1713,7 @@ ExecuteTruncate(TruncateStmt *stmt)
 					 errhint("Do not specify the ONLY keyword, or use TRUNCATE ONLY on the partitions directly.")));
 	}
 
-	ExecuteTruncateGuts(rels, relids, relids_extra, relids_logged,
+	ExecuteTruncateGuts(rels, relids, relids_logged,
 						stmt->behavior, stmt->restart_seqs);
 
 	/* And close the rels */
@@ -1739,15 +1734,13 @@ ExecuteTruncate(TruncateStmt *stmt)
  *
  * explicit_rels is the list of Relations to truncate that the command
  * specified.  relids is the list of Oids corresponding to explicit_rels.
- * relids_extra is the list of integer values that deliver extra information
- * about relations in explicit_rels.  relids_logged is the list of Oids
- * (a subset of relids) that require WAL-logging.  This is all a bit redundant,
- * but the existing callers have this information handy in this form.
+ * relids_logged is the list of Oids (a subset of relids) that require
+ * WAL-logging.  This is all a bit redundant, but the existing callers have
+ * this information handy in this form.
  */
 void
 ExecuteTruncateGuts(List *explicit_rels,
 					List *relids,
-					List *relids_extra,
 					List *relids_logged,
 					DropBehavior behavior, bool restart_seqs)
 {
@@ -1760,8 +1753,6 @@ ExecuteTruncateGuts(List *explicit_rels,
 	ResultRelInfo *resultRelInfo;
 	SubTransactionId mySubid;
 	ListCell   *cell;
-	ListCell   *lc1,
-			*lc2;
 	Oid		   *logrelids;
 
 	/*
@@ -1799,8 +1790,7 @@ ExecuteTruncateGuts(List *explicit_rels,
 				truncate_check_activity(rel);
 				rels = lappend(rels, rel);
 				relids = lappend_oid(relids, relid);
-				relids_extra = lappend_int(relids_extra,
-										   TRUNCATE_REL_CONTEXT_CASCADING);
+
 				/* Log this relation only if needed for logical decoding */
 				if (RelationIsLogicallyLogged(rel))
 					relids_logged = lappend_oid(relids_logged, relid);
@@ -1901,11 +1891,9 @@ ExecuteTruncateGuts(List *explicit_rels,
 	 */
 	mySubid = GetCurrentSubTransactionId();
 
-	Assert(list_length(rels) == list_length(relids_extra));
-	forboth(lc1, rels, lc2, relids_extra)
+	foreach(cell, rels)
 	{
-		Relation	rel = (Relation) lfirst(lc1);
-		int			extra = lfirst_int(lc2);
+		Relation	rel = (Relation) lfirst(cell);
 
 		/*
 		 * Save OID of partitioned tables for later; nothing else to do for
@@ -1952,7 +1940,6 @@ ExecuteTruncateGuts(List *explicit_rels,
 			{
 				ft_info->serverid = serverid;
 				ft_info->rels = NIL;
-				ft_info->rels_extra = NIL;
 			}
 
 			/*
@@ -1960,7 +1947,6 @@ ExecuteTruncateGuts(List *explicit_rels,
 			 * foreign table belongs to.
 			 */
 			ft_info->rels = lappend(ft_info->rels, rel);
-			ft_info->rels_extra = lappend_int(ft_info->rels_extra, extra);
 			continue;
 		}
 
@@ -2044,7 +2030,6 @@ ExecuteTruncateGuts(List *explicit_rels,
 				Assert(routine->ExecForeignTruncate != NULL);
 
 				routine->ExecForeignTruncate(ft_info->rels,
-											 ft_info->rels_extra,
 											 behavior,
 											 restart_seqs);
 			}
diff --git a/src/backend/replication/logical/worker.c b/src/backend/replication/logical/worker.c
index fb3ba5c415..4dd869aaba 100644
--- a/src/backend/replication/logical/worker.c
+++ b/src/backend/replication/logical/worker.c
@@ -1795,7 +1795,6 @@ apply_handle_truncate(StringInfo s)
 	List	   *rels = NIL;
 	List	   *part_rels = NIL;
 	List	   *relids = NIL;
-	List	   *relids_extra = NIL;
 	List	   *relids_logged = NIL;
 	ListCell   *lc;
 
@@ -1825,7 +1824,6 @@ apply_handle_truncate(StringInfo s)
 		remote_rels = lappend(remote_rels, rel);
 		rels = lappend(rels, rel->localrel);
 		relids = lappend_oid(relids, rel->localreloid);
-		relids_extra = lappend_int(relids_extra, TRUNCATE_REL_CONTEXT_NORMAL);
 		if (RelationIsLogicallyLogged(rel->localrel))
 			relids_logged = lappend_oid(relids_logged, rel->localreloid);
 
@@ -1864,7 +1862,6 @@ apply_handle_truncate(StringInfo s)
 				rels = lappend(rels, childrel);
 				part_rels = lappend(part_rels, childrel);
 				relids = lappend_oid(relids, childrelid);
-				relids_extra = lappend_int(relids_extra, TRUNCATE_REL_CONTEXT_CASCADING);
 				/* Log this relation only if needed for logical decoding */
 				if (RelationIsLogicallyLogged(childrel))
 					relids_logged = lappend_oid(relids_logged, childrelid);
@@ -1879,7 +1876,6 @@ apply_handle_truncate(StringInfo s)
 	 */
 	ExecuteTruncateGuts(rels,
 						relids,
-						relids_extra,
 						relids_logged,
 						DROP_RESTRICT,
 						restart_seqs);
diff --git a/src/include/commands/tablecmds.h b/src/include/commands/tablecmds.h
index b808a07e46..14f4b4882f 100644
--- a/src/include/commands/tablecmds.h
+++ b/src/include/commands/tablecmds.h
@@ -21,16 +21,6 @@
 #include "storage/lock.h"
 #include "utils/relcache.h"
 
-/*
- * These values indicate how a relation was specified as the target to
- * truncate in TRUNCATE command.
- */
-#define TRUNCATE_REL_CONTEXT_NORMAL       1 /* specified without ONLY clause */
-#define TRUNCATE_REL_CONTEXT_ONLY         2 /* specified with ONLY clause */
-#define TRUNCATE_REL_CONTEXT_CASCADING     3	/* not specified but truncated
-												 * due to dependency (e.g.,
-												 * partition table) */
-
 struct AlterTableUtilityContext;	/* avoid including tcop/utility.h here */
 
 
@@ -68,7 +58,6 @@ extern void CheckTableNotInUse(Relation rel, const char *stmt);
 extern void ExecuteTruncate(TruncateStmt *stmt);
 extern void ExecuteTruncateGuts(List *explicit_rels,
 								List *relids,
-								List *relids_extra,
 								List *relids_logged,
 								DropBehavior behavior,
 								bool restart_seqs);
diff --git a/src/include/foreign/fdwapi.h b/src/include/foreign/fdwapi.h
index 4ebbca6de9..4f17becbb8 100644
--- a/src/include/foreign/fdwapi.h
+++ b/src/include/foreign/fdwapi.h
@@ -161,7 +161,6 @@ typedef List *(*ImportForeignSchema_function) (ImportForeignSchemaStmt *stmt,
 											   Oid serverOid);
 
 typedef void (*ExecForeignTruncate_function) (List *rels,
-											  List *rels_extra,
 											  DropBehavior behavior,
 											  bool restart_seqs);
 
truncate_foreign_table_docs_v1.patchtext/plain; charset=UTF-8; name=truncate_foreign_table_docs_v1.patch; x-mac-creator=0; x-mac-type=0Download
diff --git a/doc/src/sgml/fdwhandler.sgml b/doc/src/sgml/fdwhandler.sgml
index 3fe373ef00..68737aee27 100644
--- a/doc/src/sgml/fdwhandler.sgml
+++ b/doc/src/sgml/fdwhandler.sgml
@@ -1076,35 +1076,32 @@ ExecForeignTruncate(List *rels,
                     bool restart_seqs);
 </programlisting>
 
-     Truncate a set of foreign tables specified in <literal>rels</literal>.
-     This function is called when <xref linkend="sql-truncate"/> is executed
-     on foreign tables.  <literal>rels</literal> is the list of
-     <structname>Relation</structname> data structure that indicates
-     a foreign table to truncate.
+     Truncate foreign tables.  This function is called when
+     <xref linkend="sql-truncate"/> is executed on a foreign table.
+     <literal>rels</literal> is a list of <structname>Relation</structname>
+     data structures for each foreign table to truncated.
     </para>
 
     <para>
-     <literal>behavior</literal> defines how foreign tables should
-     be truncated, using as possible values <literal>DROP_RESTRICT</literal>,
-     which means that <literal>RESTRICT</literal> option is specified,
-     and <literal>DROP_CASCADE</literal>, which means that
-     <literal>CASCADE</literal> option is specified, in
-     <command>TRUNCATE</command> command.
+     <literal>behavior</literal> is either <literal>DROP_RESTRICT</literal>
+     or <literal>DROP_CASCADE</literal>, which indicates that the
+     <literal>RESTRICT</literal> or <literal>CASCADE</literal> option was
+     requested in the original <command>TRUNCATE</command> command,
+     respectively.
     </para>
     
     <para>
-     <literal>restart_seqs</literal> is set to <literal>true</literal>
-     if <literal>RESTART IDENTITY</literal> option is specified in
-     <command>TRUNCATE</command> command.  It is <literal>false</literal>
-     if <literal>CONTINUE IDENTITY</literal> option is specified.
+     If <literal>restart_seqs</literal> is <literal>true</literal>,
+     the original <command>TRUNCATE</command> command requested the
+     <literal>RESTART IDENTITY</literal> option, otherwise
+     <literal>CONTINUE IDENTITY</literal> option.
     </para>
 
     <para>
-     <command>TRUNCATE</command> invokes
-     <function>ExecForeignTruncate</function> once per foreign server
-     that foreign tables to truncate belong to.  This means that all foreign
-     tables included in <literal>rels</literal> must belong to the same
-     server.
+     <function>ExecForeignTruncate</function> is invoked once per
+     foreign server for which foreign tables are to be truncated.
+     This means that all foreign tables included in <literal>rels</literal>
+     must belong to the same server.
     </para>
 
     <para>
diff --git a/doc/src/sgml/postgres-fdw.sgml b/doc/src/sgml/postgres-fdw.sgml
index 5320accf6f..31b96e9d2e 100644
--- a/doc/src/sgml/postgres-fdw.sgml
+++ b/doc/src/sgml/postgres-fdw.sgml
@@ -452,11 +452,17 @@ OPTIONS (ADD password_required 'false');
      <listitem>
       <para>
        This option controls whether <filename>postgres_fdw</filename> allows
-       foreign tables to be truncated using <command>TRUNCATE</command>
+       foreign tables to be truncated using the <command>TRUNCATE</command>
        command. It can be specified for a foreign table or a foreign server.
        A table-level option overrides a server-level option.
        The default is <literal>true</literal>.
       </para>
+
+      <para>
+       Of course, if the remote table is not in fact truncatable, an error
+       would occur anyway.  Use of this option primarily allows the error to
+       be thrown locally without querying the remote server.
+      </para>
      </listitem>
     </varlistentry>
    </variablelist>
diff --git a/doc/src/sgml/ref/truncate.sgml b/doc/src/sgml/ref/truncate.sgml
index acf3633be4..9d846f88c9 100644
--- a/doc/src/sgml/ref/truncate.sgml
+++ b/doc/src/sgml/ref/truncate.sgml
@@ -173,7 +173,7 @@ TRUNCATE [ TABLE ] [ ONLY ] <replaceable class="parameter">name</replaceable> [
 
   <para>
    <command>TRUNCATE</command> can be used for foreign tables if
-   the foreign data wrapper supports, for instance,
+   supported by the foreign data wrapper, for instance,
    see <xref linkend="postgres-fdw"/>.
   </para>
  </refsect1>
#102Bharath Rupireddy
bharath.rupireddyforpostgres@gmail.com
In reply to: Fujii Masao (#101)
Re: TRUNCATE on foreign table

On Wed, Apr 21, 2021 at 8:31 PM Fujii Masao <masao.fujii@oss.nttdata.com> wrote:

Applied. Attached is the updated version of the patch
(truncate_foreign_table_dont_pass_only_clause_v2.patch).
This patch includes the patch that Horiguchi-san posted upthead.
I'm thinking to commit this patch at first.

+1.

2) Instead of
on foreign tables. <literal>rels</literal> is the list of
<structname>Relation</structname> data structure that indicates
a foreign table to truncate.

I think it is better with:
on foreign tables. <literal>rels</literal> is the list of
<structname>Relation</structname> data structures, where each
entry indicates a foreign table to truncate.

Justin posted the patch that improves the documents including
this description. I think that we should revisit that patch.
Attached is the updated version of that patch.
(truncate_foreign_table_docs_v1.patch)

One comment on truncate_foreign_table_docs_v1.patch:
1) I think it is "to be truncated"
+     <literal>rels</literal> is a list of <structname>Relation</structname>
+     data structures for each foreign table to truncated.
How about a slightly changed phrasing like below?
+     <literal>rels</literal> is a list of <structname>Relation</structname>
+     data structures of foreign tables to truncate.

Other than above, the patch LGTM.

3) How about adding an extra para(after below para in
postgres_fdw.sgml) on WHY we don't push "ONLY" to foreign tables while
truncating? We could add to the same para for other options if at all
we don't choose to push them.
<command>DELETE</command>, or <command>TRUNCATE</command>.
(Of course, the remote user you have specified in your user mapping must
have privileges to do these things.)

I agree to document the behavior that ONLY option is always ignored
for foreign tables. But I'm not sure if we can document WHY.
Because I could not find the past discussion about why ONLY option is
ignored on SELECT, etc... Maybe it's enough to document the behavior?

+1 to specify in the documentation about ONLY option is always
ignored. But can we specify the WHY part within deparseTruncateSql, it
will be there for developer reference? I feel it's better if this
change goes with truncate_foreign_table_dont_pass_only_clause_v2.patch

4) Isn't it better to mention the "ONLY" option is not pushed to remote
-- truncate with ONLY clause
TRUNCATE ONLY tru_ftable_parent;

TRUNCATE ONLY tru_ftable; -- truncate both parent and child
SELECT count(*) FROM tru_ftable; -- 0

5) I may be missing something here, why is even after ONLY is ignored
in the below truncate command, the sum is 126? Shouldn't it truncate
both tru_ftable_parent and
-- truncate with ONLY clause
TRUNCATE ONLY tru_ftable_parent;
SELECT sum(id) FROM tru_ftable_parent; -- 126

Because TRUNCATE ONLY command doesn't truncate tru_ftable_child talbe
that inehrits tru_ftable_parent. No?

I get it. tru_ftable_child is a local child so ONLY doesn't truncate it.

With Regards,
Bharath Rupireddy.
EnterpriseDB: http://www.enterprisedb.com

#103Fujii Masao
masao.fujii@oss.nttdata.com
In reply to: Bharath Rupireddy (#102)
2 attachment(s)
Re: TRUNCATE on foreign table

On 2021/04/22 9:39, Bharath Rupireddy wrote:

One comment on truncate_foreign_table_docs_v1.patch:
1) I think it is "to be truncated"
+     <literal>rels</literal> is a list of <structname>Relation</structname>
+     data structures for each foreign table to truncated.

Fixed. Thanks!

How about a slightly changed phrasing like below?
+     <literal>rels</literal> is a list of <structname>Relation</structname>
+     data structures of foreign tables to truncate.

Either works at least for me. If you think that this phrasing is
more precise or better, I'm ok with that and will update the patch again.

Other than above, the patch LGTM.

3) How about adding an extra para(after below para in
postgres_fdw.sgml) on WHY we don't push "ONLY" to foreign tables while
truncating? We could add to the same para for other options if at all
we don't choose to push them.
<command>DELETE</command>, or <command>TRUNCATE</command>.
(Of course, the remote user you have specified in your user mapping must
have privileges to do these things.)

I agree to document the behavior that ONLY option is always ignored
for foreign tables. But I'm not sure if we can document WHY.
Because I could not find the past discussion about why ONLY option is
ignored on SELECT, etc... Maybe it's enough to document the behavior?

+1 to specify in the documentation about ONLY option is always
ignored.

Added.

But can we specify the WHY part within deparseTruncateSql, it
will be there for developer reference? I feel it's better if this
change goes with truncate_foreign_table_dont_pass_only_clause_v2.patch

I added this information into fdwhandler.sgml because the developers
usually read fdwhandler.sgml.

4) Isn't it better to mention the "ONLY" option is not pushed to remote
-- truncate with ONLY clause
TRUNCATE ONLY tru_ftable_parent;

TRUNCATE ONLY tru_ftable; -- truncate both parent and child
SELECT count(*) FROM tru_ftable; -- 0

I added the comment.

Could you review the attached patches?

Regards,

--
Fujii Masao
Advanced Computing Technology Center
Research and Development Headquarters
NTT DATA CORPORATION

Attachments:

truncate_foreign_table_docs_v2.patchtext/plain; charset=UTF-8; name=truncate_foreign_table_docs_v2.patch; x-mac-creator=0; x-mac-type=0Download
diff --git a/doc/src/sgml/fdwhandler.sgml b/doc/src/sgml/fdwhandler.sgml
index 553524553b..69aa66e73e 100644
--- a/doc/src/sgml/fdwhandler.sgml
+++ b/doc/src/sgml/fdwhandler.sgml
@@ -1076,27 +1076,25 @@ ExecForeignTruncate(List *rels,
                     bool restart_seqs);
 </programlisting>
 
-     Truncate a set of foreign tables specified in <literal>rels</literal>.
-     This function is called when <xref linkend="sql-truncate"/> is executed
-     on foreign tables.  <literal>rels</literal> is the list of
-     <structname>Relation</structname> data structure that indicates
-     a foreign table to truncate.
+     Truncate foreign tables.  This function is called when
+     <xref linkend="sql-truncate"/> is executed on a foreign table.
+     <literal>rels</literal> is a list of <structname>Relation</structname>
+     data structures for each foreign table to be truncated.
     </para>
 
     <para>
-     <literal>behavior</literal> defines how foreign tables should
-     be truncated, using as possible values <literal>DROP_RESTRICT</literal>,
-     which means that <literal>RESTRICT</literal> option is specified,
-     and <literal>DROP_CASCADE</literal>, which means that
-     <literal>CASCADE</literal> option is specified, in
-     <command>TRUNCATE</command> command.
+     <literal>behavior</literal> is either <literal>DROP_RESTRICT</literal>
+     or <literal>DROP_CASCADE</literal>, which indicates that the
+     <literal>RESTRICT</literal> or <literal>CASCADE</literal> option was
+     requested in the original <command>TRUNCATE</command> command,
+     respectively.
     </para>
     
     <para>
-     <literal>restart_seqs</literal> is set to <literal>true</literal>
-     if <literal>RESTART IDENTITY</literal> option is specified in
-     <command>TRUNCATE</command> command.  It is <literal>false</literal>
-     if <literal>CONTINUE IDENTITY</literal> option is specified.
+     If <literal>restart_seqs</literal> is <literal>true</literal>,
+     the original <command>TRUNCATE</command> command requested the
+     <literal>RESTART IDENTITY</literal> option, otherwise
+     <literal>CONTINUE IDENTITY</literal> option.
     </para>
 
     <para>
@@ -1109,11 +1107,10 @@ ExecForeignTruncate(List *rels,
     </para>
 
     <para>
-     <command>TRUNCATE</command> invokes
-     <function>ExecForeignTruncate</function> once per foreign server
-     that foreign tables to truncate belong to.  This means that all foreign
-     tables included in <literal>rels</literal> must belong to the same
-     server.
+     <function>ExecForeignTruncate</function> is invoked once per
+     foreign server for which foreign tables are to be truncated.
+     This means that all foreign tables included in <literal>rels</literal>
+     must belong to the same server.
     </para>
 
     <para>
diff --git a/doc/src/sgml/postgres-fdw.sgml b/doc/src/sgml/postgres-fdw.sgml
index d03731b7d4..fe1102ceb4 100644
--- a/doc/src/sgml/postgres-fdw.sgml
+++ b/doc/src/sgml/postgres-fdw.sgml
@@ -459,11 +459,17 @@ OPTIONS (ADD password_required 'false');
      <listitem>
       <para>
        This option controls whether <filename>postgres_fdw</filename> allows
-       foreign tables to be truncated using <command>TRUNCATE</command>
+       foreign tables to be truncated using the <command>TRUNCATE</command>
        command. It can be specified for a foreign table or a foreign server.
        A table-level option overrides a server-level option.
        The default is <literal>true</literal>.
       </para>
+
+      <para>
+       Of course, if the remote table is not in fact truncatable, an error
+       would occur anyway.  Use of this option primarily allows the error to
+       be thrown locally without querying the remote server.
+      </para>
      </listitem>
     </varlistentry>
    </variablelist>
diff --git a/doc/src/sgml/ref/truncate.sgml b/doc/src/sgml/ref/truncate.sgml
index acf3633be4..9d846f88c9 100644
--- a/doc/src/sgml/ref/truncate.sgml
+++ b/doc/src/sgml/ref/truncate.sgml
@@ -173,7 +173,7 @@ TRUNCATE [ TABLE ] [ ONLY ] <replaceable class="parameter">name</replaceable> [
 
   <para>
    <command>TRUNCATE</command> can be used for foreign tables if
-   the foreign data wrapper supports, for instance,
+   supported by the foreign data wrapper, for instance,
    see <xref linkend="postgres-fdw"/>.
   </para>
  </refsect1>
truncate_foreign_table_dont_pass_only_clause_v3.patchtext/plain; charset=UTF-8; name=truncate_foreign_table_dont_pass_only_clause_v3.patch; x-mac-creator=0; x-mac-type=0Download
diff --git a/contrib/postgres_fdw/deparse.c b/contrib/postgres_fdw/deparse.c
index bdc4c3620d..7a798530e3 100644
--- a/contrib/postgres_fdw/deparse.c
+++ b/contrib/postgres_fdw/deparse.c
@@ -2179,24 +2179,19 @@ deparseAnalyzeSql(StringInfo buf, Relation rel, List **retrieved_attrs)
 void
 deparseTruncateSql(StringInfo buf,
 				   List *rels,
-				   List *rels_extra,
 				   DropBehavior behavior,
 				   bool restart_seqs)
 {
-	ListCell   *lc1,
-			   *lc2;
+	ListCell   *cell;
 
 	appendStringInfoString(buf, "TRUNCATE ");
 
-	forboth(lc1, rels, lc2, rels_extra)
+	foreach(cell, rels)
 	{
-		Relation	rel = lfirst(lc1);
-		int			extra = lfirst_int(lc2);
+		Relation	rel = lfirst(cell);
 
-		if (lc1 != list_head(rels))
+		if (cell != list_head(rels))
 			appendStringInfoString(buf, ", ");
-		if (extra & TRUNCATE_REL_CONTEXT_ONLY)
-			appendStringInfoString(buf, "ONLY ");
 
 		deparseRelation(buf, rel);
 	}
diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out
index 5070d93239..8e1cc69508 100644
--- a/contrib/postgres_fdw/expected/postgres_fdw.out
+++ b/contrib/postgres_fdw/expected/postgres_fdw.out
@@ -8218,13 +8218,13 @@ drop table loc3;
 -- test for TRUNCATE
 -- ===================================================================
 CREATE TABLE tru_rtable0 (id int primary key);
-CREATE TABLE tru_rtable1 (id int primary key);
 CREATE FOREIGN TABLE tru_ftable (id int)
        SERVER loopback OPTIONS (table_name 'tru_rtable0');
 INSERT INTO tru_rtable0 (SELECT x FROM generate_series(1,10) x);
 CREATE TABLE tru_ptable (id int) PARTITION BY HASH(id);
 CREATE TABLE tru_ptable__p0 PARTITION OF tru_ptable
                             FOR VALUES WITH (MODULUS 2, REMAINDER 0);
+CREATE TABLE tru_rtable1 (id int primary key);
 CREATE FOREIGN TABLE tru_ftable__p1 PARTITION OF tru_ptable
                                     FOR VALUES WITH (MODULUS 2, REMAINDER 1)
        SERVER loopback OPTIONS (table_name 'tru_rtable1');
@@ -8364,6 +8364,8 @@ SELECT count(*) from tru_pk_ftable; -- 0
 (1 row)
 
 -- truncate with ONLY clause
+-- Since ONLY is specified, the table tru_ftable_child that inherits
+-- tru_ftable_parent locally is not truncated.
 TRUNCATE ONLY tru_ftable_parent;
 SELECT sum(id) FROM tru_ftable_parent;  -- 126
  sum 
@@ -8388,22 +8390,26 @@ SELECT sum(id) FROM tru_ftable;   -- 95
   95
 (1 row)
 
-TRUNCATE ONLY tru_ftable;		-- truncate only parent portion
-SELECT sum(id) FROM tru_ftable;   -- 60
- sum 
------
-  60
+-- Both parent and child tables in the foreign server are truncated
+-- even though ONLY is specified because ONLY has no effect
+-- when truncating a foreign table.
+TRUNCATE ONLY tru_ftable;
+SELECT count(*) FROM tru_ftable;   -- 0
+ count 
+-------
+     0
 (1 row)
 
 INSERT INTO tru_rtable0 (SELECT x FROM generate_series(21,25) x);
-SELECT sum(id) FROM tru_ftable;		-- 175
+INSERT INTO tru_rtable0_child (SELECT x FROM generate_series(26,30) x);
+SELECT sum(id) FROM tru_ftable;		-- 255
  sum 
 -----
- 175
+ 255
 (1 row)
 
 TRUNCATE tru_ftable;			-- truncate both of parent and child
-SELECT count(*) FROM tru_ftable;    -- empty
+SELECT count(*) FROM tru_ftable;    -- 0
  count 
 -------
      0
diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c
index c590f374c6..c521cba3fc 100644
--- a/contrib/postgres_fdw/postgres_fdw.c
+++ b/contrib/postgres_fdw/postgres_fdw.c
@@ -401,7 +401,6 @@ static void postgresExplainForeignModify(ModifyTableState *mtstate,
 static void postgresExplainDirectModify(ForeignScanState *node,
 										ExplainState *es);
 static void postgresExecForeignTruncate(List *rels,
-										List *rels_extra,
 										DropBehavior behavior,
 										bool restart_seqs);
 static bool postgresAnalyzeForeignTable(Relation relation,
@@ -2881,7 +2880,6 @@ postgresExplainDirectModify(ForeignScanState *node, ExplainState *es)
  */
 static void
 postgresExecForeignTruncate(List *rels,
-							List *rels_extra,
 							DropBehavior behavior,
 							bool restart_seqs)
 {
@@ -2964,7 +2962,7 @@ postgresExecForeignTruncate(List *rels,
 
 	/* Construct the TRUNCATE command string */
 	initStringInfo(&sql);
-	deparseTruncateSql(&sql, rels, rels_extra, behavior, restart_seqs);
+	deparseTruncateSql(&sql, rels, behavior, restart_seqs);
 
 	/* Issue the TRUNCATE command to remote server */
 	do_sql_command(conn, sql.data);
diff --git a/contrib/postgres_fdw/postgres_fdw.h b/contrib/postgres_fdw/postgres_fdw.h
index 5d44b75314..9591c0f6c2 100644
--- a/contrib/postgres_fdw/postgres_fdw.h
+++ b/contrib/postgres_fdw/postgres_fdw.h
@@ -209,7 +209,6 @@ extern void deparseAnalyzeSql(StringInfo buf, Relation rel,
 							  List **retrieved_attrs);
 extern void deparseTruncateSql(StringInfo buf,
 							   List *rels,
-							   List *rels_extra,
 							   DropBehavior behavior,
 							   bool restart_seqs);
 extern void deparseStringLiteral(StringInfo buf, const char *val);
diff --git a/contrib/postgres_fdw/sql/postgres_fdw.sql b/contrib/postgres_fdw/sql/postgres_fdw.sql
index 74590089bd..dcd36a9753 100644
--- a/contrib/postgres_fdw/sql/postgres_fdw.sql
+++ b/contrib/postgres_fdw/sql/postgres_fdw.sql
@@ -2355,7 +2355,6 @@ drop table loc3;
 -- test for TRUNCATE
 -- ===================================================================
 CREATE TABLE tru_rtable0 (id int primary key);
-CREATE TABLE tru_rtable1 (id int primary key);
 CREATE FOREIGN TABLE tru_ftable (id int)
        SERVER loopback OPTIONS (table_name 'tru_rtable0');
 INSERT INTO tru_rtable0 (SELECT x FROM generate_series(1,10) x);
@@ -2363,6 +2362,7 @@ INSERT INTO tru_rtable0 (SELECT x FROM generate_series(1,10) x);
 CREATE TABLE tru_ptable (id int) PARTITION BY HASH(id);
 CREATE TABLE tru_ptable__p0 PARTITION OF tru_ptable
                             FOR VALUES WITH (MODULUS 2, REMAINDER 0);
+CREATE TABLE tru_rtable1 (id int primary key);
 CREATE FOREIGN TABLE tru_ftable__p1 PARTITION OF tru_ptable
                                     FOR VALUES WITH (MODULUS 2, REMAINDER 1)
        SERVER loopback OPTIONS (table_name 'tru_rtable1');
@@ -2428,6 +2428,8 @@ SELECT count(*) from tru_ftable; -- 0
 SELECT count(*) from tru_pk_ftable; -- 0
 
 -- truncate with ONLY clause
+-- Since ONLY is specified, the table tru_ftable_child that inherits
+-- tru_ftable_parent locally is not truncated.
 TRUNCATE ONLY tru_ftable_parent;
 SELECT sum(id) FROM tru_ftable_parent;  -- 126
 TRUNCATE tru_ftable_parent;
@@ -2439,13 +2441,17 @@ INSERT INTO tru_rtable0 (SELECT x FROM generate_series(5,9) x);
 INSERT INTO tru_rtable0_child (SELECT x FROM generate_series(10,14) x);
 SELECT sum(id) FROM tru_ftable;   -- 95
 
-TRUNCATE ONLY tru_ftable;		-- truncate only parent portion
-SELECT sum(id) FROM tru_ftable;   -- 60
+-- Both parent and child tables in the foreign server are truncated
+-- even though ONLY is specified because ONLY has no effect
+-- when truncating a foreign table.
+TRUNCATE ONLY tru_ftable;
+SELECT count(*) FROM tru_ftable;   -- 0
 
 INSERT INTO tru_rtable0 (SELECT x FROM generate_series(21,25) x);
-SELECT sum(id) FROM tru_ftable;		-- 175
+INSERT INTO tru_rtable0_child (SELECT x FROM generate_series(26,30) x);
+SELECT sum(id) FROM tru_ftable;		-- 255
 TRUNCATE tru_ftable;			-- truncate both of parent and child
-SELECT count(*) FROM tru_ftable;    -- empty
+SELECT count(*) FROM tru_ftable;    -- 0
 
 -- cleanup
 DROP FOREIGN TABLE tru_ftable_parent, tru_ftable_child, tru_pk_ftable,tru_ftable__p1,tru_ftable;
diff --git a/doc/src/sgml/fdwhandler.sgml b/doc/src/sgml/fdwhandler.sgml
index 98882ddab8..553524553b 100644
--- a/doc/src/sgml/fdwhandler.sgml
+++ b/doc/src/sgml/fdwhandler.sgml
@@ -1071,28 +1071,16 @@ EndDirectModify(ForeignScanState *node);
     <para>
 <programlisting>
 void
-ExecForeignTruncate(List *rels, List *rels_extra,
-                    DropBehavior behavior, bool restart_seqs);
+ExecForeignTruncate(List *rels,
+                    DropBehavior behavior,
+                    bool restart_seqs);
 </programlisting>
 
      Truncate a set of foreign tables specified in <literal>rels</literal>.
      This function is called when <xref linkend="sql-truncate"/> is executed
      on foreign tables.  <literal>rels</literal> is the list of
      <structname>Relation</structname> data structure that indicates
-     a foreign table to truncate.  <literal>rels_extra</literal> the list of
-     <literal>int</literal> value, which delivers extra information about
-     a foreign table to truncate.  Possible values are
-     <literal>TRUNCATE_REL_CONTEXT_NORMAL</literal>, which means that
-     the foreign table is specified WITHOUT <literal>ONLY</literal> clause
-     in <command>TRUNCATE</command>,
-     <literal>TRUNCATE_REL_CONTEXT_ONLY</literal>, which means that
-     the foreign table is specified WITH <literal>ONLY</literal> clause,
-     and <literal>TRUNCATE_REL_CONTEXT_CASCADING</literal>,
-     which means that the foreign table is not specified explicitly,
-     but will be truncated due to dependency (for example, partition table).
-     There is one-to-one mapping between <literal>rels</literal> and
-     <literal>rels_extra</literal>.  The number of entries is the same
-     between the two lists.
+     a foreign table to truncate.
     </para>
 
     <para>
@@ -1111,6 +1099,15 @@ ExecForeignTruncate(List *rels, List *rels_extra,
      if <literal>CONTINUE IDENTITY</literal> option is specified.
     </para>
 
+    <para>
+     Note that information about <literal>ONLY</literal> options specified
+     in the original <command>TRUNCATE</command> command is not passed to
+     <function>ExecForeignTruncate</function>.  This is the same behavior as
+     for the callback functions for <command>SELECT</command>,
+     <command>UPDATE</command> and  <command>DELETE</command> on
+     a foreign table.
+    </para>
+
     <para>
      <command>TRUNCATE</command> invokes
      <function>ExecForeignTruncate</function> once per foreign server
diff --git a/doc/src/sgml/postgres-fdw.sgml b/doc/src/sgml/postgres-fdw.sgml
index 5320accf6f..d03731b7d4 100644
--- a/doc/src/sgml/postgres-fdw.sgml
+++ b/doc/src/sgml/postgres-fdw.sgml
@@ -69,6 +69,13 @@
   have privileges to do these things.)
  </para>
 
+ <para>
+  Note that <literal>ONLY</literal> option specified in
+  <command>SELECT</command>, <command>UPDATE</command>,
+  <command>DELETE</command> or <command>TRUNCATE</command>
+  has no effect when accessing or modifyung the remote table.
+ </para>
+
  <para>
   Note that <filename>postgres_fdw</filename> currently lacks support for
   <command>INSERT</command> statements with an <literal>ON CONFLICT DO
diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c
index 97cc9fd6ec..bb0b1f65a9 100644
--- a/src/backend/commands/tablecmds.c
+++ b/src/backend/commands/tablecmds.c
@@ -323,7 +323,6 @@ typedef struct ForeignTruncateInfo
 {
 	Oid			serverid;
 	List	   *rels;
-	List	   *rels_extra;
 } ForeignTruncateInfo;
 
 /*
@@ -1620,7 +1619,6 @@ ExecuteTruncate(TruncateStmt *stmt)
 {
 	List	   *rels = NIL;
 	List	   *relids = NIL;
-	List	   *relids_extra = NIL;
 	List	   *relids_logged = NIL;
 	ListCell   *cell;
 
@@ -1654,9 +1652,7 @@ ExecuteTruncate(TruncateStmt *stmt)
 
 		rels = lappend(rels, rel);
 		relids = lappend_oid(relids, myrelid);
-		relids_extra = lappend_int(relids_extra, (recurse ?
-												  TRUNCATE_REL_CONTEXT_NORMAL :
-												  TRUNCATE_REL_CONTEXT_ONLY));
+
 		/* Log this relation only if needed for logical decoding */
 		if (RelationIsLogicallyLogged(rel))
 			relids_logged = lappend_oid(relids_logged, myrelid);
@@ -1704,8 +1700,7 @@ ExecuteTruncate(TruncateStmt *stmt)
 
 				rels = lappend(rels, rel);
 				relids = lappend_oid(relids, childrelid);
-				relids_extra = lappend_int(relids_extra,
-										   TRUNCATE_REL_CONTEXT_CASCADING);
+
 				/* Log this relation only if needed for logical decoding */
 				if (RelationIsLogicallyLogged(rel))
 					relids_logged = lappend_oid(relids_logged, childrelid);
@@ -1718,7 +1713,7 @@ ExecuteTruncate(TruncateStmt *stmt)
 					 errhint("Do not specify the ONLY keyword, or use TRUNCATE ONLY on the partitions directly.")));
 	}
 
-	ExecuteTruncateGuts(rels, relids, relids_extra, relids_logged,
+	ExecuteTruncateGuts(rels, relids, relids_logged,
 						stmt->behavior, stmt->restart_seqs);
 
 	/* And close the rels */
@@ -1739,15 +1734,13 @@ ExecuteTruncate(TruncateStmt *stmt)
  *
  * explicit_rels is the list of Relations to truncate that the command
  * specified.  relids is the list of Oids corresponding to explicit_rels.
- * relids_extra is the list of integer values that deliver extra information
- * about relations in explicit_rels.  relids_logged is the list of Oids
- * (a subset of relids) that require WAL-logging.  This is all a bit redundant,
- * but the existing callers have this information handy in this form.
+ * relids_logged is the list of Oids (a subset of relids) that require
+ * WAL-logging.  This is all a bit redundant, but the existing callers have
+ * this information handy in this form.
  */
 void
 ExecuteTruncateGuts(List *explicit_rels,
 					List *relids,
-					List *relids_extra,
 					List *relids_logged,
 					DropBehavior behavior, bool restart_seqs)
 {
@@ -1760,8 +1753,6 @@ ExecuteTruncateGuts(List *explicit_rels,
 	ResultRelInfo *resultRelInfo;
 	SubTransactionId mySubid;
 	ListCell   *cell;
-	ListCell   *lc1,
-			*lc2;
 	Oid		   *logrelids;
 
 	/*
@@ -1799,8 +1790,7 @@ ExecuteTruncateGuts(List *explicit_rels,
 				truncate_check_activity(rel);
 				rels = lappend(rels, rel);
 				relids = lappend_oid(relids, relid);
-				relids_extra = lappend_int(relids_extra,
-										   TRUNCATE_REL_CONTEXT_CASCADING);
+
 				/* Log this relation only if needed for logical decoding */
 				if (RelationIsLogicallyLogged(rel))
 					relids_logged = lappend_oid(relids_logged, relid);
@@ -1901,11 +1891,9 @@ ExecuteTruncateGuts(List *explicit_rels,
 	 */
 	mySubid = GetCurrentSubTransactionId();
 
-	Assert(list_length(rels) == list_length(relids_extra));
-	forboth(lc1, rels, lc2, relids_extra)
+	foreach(cell, rels)
 	{
-		Relation	rel = (Relation) lfirst(lc1);
-		int			extra = lfirst_int(lc2);
+		Relation	rel = (Relation) lfirst(cell);
 
 		/*
 		 * Save OID of partitioned tables for later; nothing else to do for
@@ -1952,7 +1940,6 @@ ExecuteTruncateGuts(List *explicit_rels,
 			{
 				ft_info->serverid = serverid;
 				ft_info->rels = NIL;
-				ft_info->rels_extra = NIL;
 			}
 
 			/*
@@ -1960,7 +1947,6 @@ ExecuteTruncateGuts(List *explicit_rels,
 			 * foreign table belongs to.
 			 */
 			ft_info->rels = lappend(ft_info->rels, rel);
-			ft_info->rels_extra = lappend_int(ft_info->rels_extra, extra);
 			continue;
 		}
 
@@ -2044,7 +2030,6 @@ ExecuteTruncateGuts(List *explicit_rels,
 				Assert(routine->ExecForeignTruncate != NULL);
 
 				routine->ExecForeignTruncate(ft_info->rels,
-											 ft_info->rels_extra,
 											 behavior,
 											 restart_seqs);
 			}
diff --git a/src/backend/replication/logical/worker.c b/src/backend/replication/logical/worker.c
index d09703f7ac..d9f157172b 100644
--- a/src/backend/replication/logical/worker.c
+++ b/src/backend/replication/logical/worker.c
@@ -1816,7 +1816,6 @@ apply_handle_truncate(StringInfo s)
 	List	   *rels = NIL;
 	List	   *part_rels = NIL;
 	List	   *relids = NIL;
-	List	   *relids_extra = NIL;
 	List	   *relids_logged = NIL;
 	ListCell   *lc;
 
@@ -1846,7 +1845,6 @@ apply_handle_truncate(StringInfo s)
 		remote_rels = lappend(remote_rels, rel);
 		rels = lappend(rels, rel->localrel);
 		relids = lappend_oid(relids, rel->localreloid);
-		relids_extra = lappend_int(relids_extra, TRUNCATE_REL_CONTEXT_NORMAL);
 		if (RelationIsLogicallyLogged(rel->localrel))
 			relids_logged = lappend_oid(relids_logged, rel->localreloid);
 
@@ -1885,7 +1883,6 @@ apply_handle_truncate(StringInfo s)
 				rels = lappend(rels, childrel);
 				part_rels = lappend(part_rels, childrel);
 				relids = lappend_oid(relids, childrelid);
-				relids_extra = lappend_int(relids_extra, TRUNCATE_REL_CONTEXT_CASCADING);
 				/* Log this relation only if needed for logical decoding */
 				if (RelationIsLogicallyLogged(childrel))
 					relids_logged = lappend_oid(relids_logged, childrelid);
@@ -1900,7 +1897,6 @@ apply_handle_truncate(StringInfo s)
 	 */
 	ExecuteTruncateGuts(rels,
 						relids,
-						relids_extra,
 						relids_logged,
 						DROP_RESTRICT,
 						restart_seqs);
diff --git a/src/include/commands/tablecmds.h b/src/include/commands/tablecmds.h
index b808a07e46..14f4b4882f 100644
--- a/src/include/commands/tablecmds.h
+++ b/src/include/commands/tablecmds.h
@@ -21,16 +21,6 @@
 #include "storage/lock.h"
 #include "utils/relcache.h"
 
-/*
- * These values indicate how a relation was specified as the target to
- * truncate in TRUNCATE command.
- */
-#define TRUNCATE_REL_CONTEXT_NORMAL       1 /* specified without ONLY clause */
-#define TRUNCATE_REL_CONTEXT_ONLY         2 /* specified with ONLY clause */
-#define TRUNCATE_REL_CONTEXT_CASCADING     3	/* not specified but truncated
-												 * due to dependency (e.g.,
-												 * partition table) */
-
 struct AlterTableUtilityContext;	/* avoid including tcop/utility.h here */
 
 
@@ -68,7 +58,6 @@ extern void CheckTableNotInUse(Relation rel, const char *stmt);
 extern void ExecuteTruncate(TruncateStmt *stmt);
 extern void ExecuteTruncateGuts(List *explicit_rels,
 								List *relids,
-								List *relids_extra,
 								List *relids_logged,
 								DropBehavior behavior,
 								bool restart_seqs);
diff --git a/src/include/foreign/fdwapi.h b/src/include/foreign/fdwapi.h
index 4ebbca6de9..4f17becbb8 100644
--- a/src/include/foreign/fdwapi.h
+++ b/src/include/foreign/fdwapi.h
@@ -161,7 +161,6 @@ typedef List *(*ImportForeignSchema_function) (ImportForeignSchemaStmt *stmt,
 											   Oid serverOid);
 
 typedef void (*ExecForeignTruncate_function) (List *rels,
-											  List *rels_extra,
 											  DropBehavior behavior,
 											  bool restart_seqs);
 
#104Justin Pryzby
pryzby@telsasoft.com
In reply to: Fujii Masao (#103)
Re: TRUNCATE on foreign table

On Thu, Apr 22, 2021 at 03:36:25PM +0900, Fujii Masao wrote:

diff --git a/doc/src/sgml/fdwhandler.sgml b/doc/src/sgml/fdwhandler.sgml
index 553524553b..69aa66e73e 100644
--- a/doc/src/sgml/fdwhandler.sgml
+++ b/doc/src/sgml/fdwhandler.sgml
@@ -1076,27 +1076,25 @@ ExecForeignTruncate(List *rels,
bool restart_seqs);
<para>
-     <literal>behavior</literal> defines how foreign tables should
-     be truncated, using as possible values <literal>DROP_RESTRICT</literal>,
-     which means that <literal>RESTRICT</literal> option is specified,
-     and <literal>DROP_CASCADE</literal>, which means that
-     <literal>CASCADE</literal> option is specified, in
-     <command>TRUNCATE</command> command.
+     <literal>behavior</literal> is either <literal>DROP_RESTRICT</literal>
+     or <literal>DROP_CASCADE</literal>, which indicates that the
+     <literal>RESTRICT</literal> or <literal>CASCADE</literal> option was
+     requested in the original <command>TRUNCATE</command> command,
+     respectively.

Now that I reread this, I would change "which indicates" to "indicating".

-     <literal>restart_seqs</literal> is set to <literal>true</literal>
-     if <literal>RESTART IDENTITY</literal> option is specified in
-     <command>TRUNCATE</command> command.  It is <literal>false</literal>
-     if <literal>CONTINUE IDENTITY</literal> option is specified.
+     If <literal>restart_seqs</literal> is <literal>true</literal>,
+     the original <command>TRUNCATE</command> command requested the
+     <literal>RESTART IDENTITY</literal> option, otherwise
+     <literal>CONTINUE IDENTITY</literal> option.

should it say "specified" instead of requested ?
Or should it say "requested the RESTART IDENTITY behavior" ?

Also, I think it should say "..otherwise, the CONTINUE IDENTITY behavior was
requested".

+++ b/doc/src/sgml/ref/truncate.sgml
@@ -173,7 +173,7 @@ TRUNCATE [ TABLE ] [ ONLY ] <replaceable class="parameter">name</replaceable> [
<para>
<command>TRUNCATE</command> can be used for foreign tables if
-   the foreign data wrapper supports, for instance,
+   supported by the foreign data wrapper, for instance,
see <xref linkend="postgres-fdw"/>.

what does "for instance" mean here? I think it should be removed.

+++ b/doc/src/sgml/fdwhandler.sgml
@@ -1111,6 +1099,15 @@ ExecForeignTruncate(List *rels, List *rels_extra,
if <literal>CONTINUE IDENTITY</literal> option is specified.
</para>
+    <para>
+     Note that information about <literal>ONLY</literal> options specified
+     in the original <command>TRUNCATE</command> command is not passed to
+     <function>ExecForeignTruncate</function>.  This is the same behavior as
+     for the callback functions for <command>SELECT</command>,
+     <command>UPDATE</command> and  <command>DELETE</command> on

There's an extra space before DELETE

diff --git a/doc/src/sgml/postgres-fdw.sgml b/doc/src/sgml/postgres-fdw.sgml
index 5320accf6f..d03731b7d4 100644
--- a/doc/src/sgml/postgres-fdw.sgml
+++ b/doc/src/sgml/postgres-fdw.sgml
@@ -69,6 +69,13 @@
have privileges to do these things.)
</para>
+ <para>
+  Note that <literal>ONLY</literal> option specified in

add "the" to say: "the ONLY"

+  <command>SELECT</command>, <command>UPDATE</command>,
+  <command>DELETE</command> or <command>TRUNCATE</command>
+  has no effect when accessing or modifyung the remote table.

modifying

--
Justin

#105Bharath Rupireddy
bharath.rupireddyforpostgres@gmail.com
In reply to: Fujii Masao (#103)
Re: TRUNCATE on foreign table

On Thu, Apr 22, 2021 at 12:06 PM Fujii Masao
<masao.fujii@oss.nttdata.com> wrote:

On 2021/04/22 9:39, Bharath Rupireddy wrote:

One comment on truncate_foreign_table_docs_v1.patch:
1) I think it is "to be truncated"
+     <literal>rels</literal> is a list of <structname>Relation</structname>
+     data structures for each foreign table to truncated.

Fixed. Thanks!

How about a slightly changed phrasing like below?
+     <literal>rels</literal> is a list of <structname>Relation</structname>
+     data structures of foreign tables to truncate.

Either works at least for me. If you think that this phrasing is
more precise or better, I'm ok with that and will update the patch again.

IMO, "rels is a list of Relation data structures of foreign tables to
truncate." looks better.

3) How about adding an extra para(after below para in
postgres_fdw.sgml) on WHY we don't push "ONLY" to foreign tables while
truncating? We could add to the same para for other options if at all
we don't choose to push them.
<command>DELETE</command>, or <command>TRUNCATE</command>.
(Of course, the remote user you have specified in your user mapping must
have privileges to do these things.)

I agree to document the behavior that ONLY option is always ignored
for foreign tables. But I'm not sure if we can document WHY.
Because I could not find the past discussion about why ONLY option is
ignored on SELECT, etc... Maybe it's enough to document the behavior?

+1 to specify in the documentation about ONLY option is always
ignored.

Added.

But can we specify the WHY part within deparseTruncateSql, it
will be there for developer reference? I feel it's better if this
change goes with truncate_foreign_table_dont_pass_only_clause_v2.patch

I added this information into fdwhandler.sgml because the developers
usually read fdwhandler.sgml.

Thanks!

+    <para>
+     Note that information about <literal>ONLY</literal> options specified
+     in the original <command>TRUNCATE</command> command is not passed to

I think it is not "information about", no? We just don't pass ONLY
option instead we skip it. IMO, we can say "Note that the ONLY option
specified with a foreign table in the original TRUNCATE command is
skipped and not passed to ExecForeignTruncate."

+     <function>ExecForeignTruncate</function>.  This is the same behavior as
+     for the callback functions for <command>SELECT</command>,
+     <command>UPDATE</command> and  <command>DELETE</command> on
+     a foreign table.

How about "This behaviour is similar to the callback functions of
SELECT, UPDATE, DELETE on a foreign table"?

4) Isn't it better to mention the "ONLY" option is not pushed to remote
-- truncate with ONLY clause
TRUNCATE ONLY tru_ftable_parent;

TRUNCATE ONLY tru_ftable; -- truncate both parent and child
SELECT count(*) FROM tru_ftable; -- 0

I added the comment.

LGTM.

With Regards,
Bharath Rupireddy.
EnterpriseDB: http://www.enterprisedb.com

#106Bharath Rupireddy
bharath.rupireddyforpostgres@gmail.com
In reply to: Justin Pryzby (#104)
Re: TRUNCATE on foreign table

On Thu, Apr 22, 2021 at 2:26 PM Justin Pryzby <pryzby@telsasoft.com> wrote:

On Thu, Apr 22, 2021 at 03:36:25PM +0900, Fujii Masao wrote:

diff --git a/doc/src/sgml/fdwhandler.sgml b/doc/src/sgml/fdwhandler.sgml
index 553524553b..69aa66e73e 100644
--- a/doc/src/sgml/fdwhandler.sgml
+++ b/doc/src/sgml/fdwhandler.sgml
@@ -1076,27 +1076,25 @@ ExecForeignTruncate(List *rels,
bool restart_seqs);
<para>
-     <literal>behavior</literal> defines how foreign tables should
-     be truncated, using as possible values <literal>DROP_RESTRICT</literal>,
-     which means that <literal>RESTRICT</literal> option is specified,
-     and <literal>DROP_CASCADE</literal>, which means that
-     <literal>CASCADE</literal> option is specified, in
-     <command>TRUNCATE</command> command.
+     <literal>behavior</literal> is either <literal>DROP_RESTRICT</literal>
+     or <literal>DROP_CASCADE</literal>, which indicates that the
+     <literal>RESTRICT</literal> or <literal>CASCADE</literal> option was
+     requested in the original <command>TRUNCATE</command> command,
+     respectively.

Now that I reread this, I would change "which indicates" to "indicating".

+1.

-     <literal>restart_seqs</literal> is set to <literal>true</literal>
-     if <literal>RESTART IDENTITY</literal> option is specified in
-     <command>TRUNCATE</command> command.  It is <literal>false</literal>
-     if <literal>CONTINUE IDENTITY</literal> option is specified.
+     If <literal>restart_seqs</literal> is <literal>true</literal>,
+     the original <command>TRUNCATE</command> command requested the
+     <literal>RESTART IDENTITY</literal> option, otherwise
+     <literal>CONTINUE IDENTITY</literal> option.

should it say "specified" instead of requested ?
Or should it say "requested the RESTART IDENTITY behavior" ?

Also, I think it should say "..otherwise, the CONTINUE IDENTITY behavior was
requested".

The original TRUNCATE document uses this - "When RESTART IDENTITY is specified"

IMO the following looks better: "If restart_seqs is true, RESTART
IDENTITY was specified in the original TRUNCATE command, otherwise
CONTINUE IDENTITY was specified."

+++ b/doc/src/sgml/ref/truncate.sgml
@@ -173,7 +173,7 @@ TRUNCATE [ TABLE ] [ ONLY ] <replaceable class="parameter">name</replaceable> [
<para>
<command>TRUNCATE</command> can be used for foreign tables if
-   the foreign data wrapper supports, for instance,
+   supported by the foreign data wrapper, for instance,
see <xref linkend="postgres-fdw"/>.

what does "for instance" mean here? I think it should be removed.

+1.

+++ b/doc/src/sgml/fdwhandler.sgml
@@ -1111,6 +1099,15 @@ ExecForeignTruncate(List *rels, List *rels_extra,
if <literal>CONTINUE IDENTITY</literal> option is specified.
</para>
+    <para>
+     Note that information about <literal>ONLY</literal> options specified
+     in the original <command>TRUNCATE</command> command is not passed to
+     <function>ExecForeignTruncate</function>.  This is the same behavior as
+     for the callback functions for <command>SELECT</command>,
+     <command>UPDATE</command> and  <command>DELETE</command> on

There's an extra space before DELETE

Good catch! Extra space after "and" and before "<command>".

diff --git a/doc/src/sgml/postgres-fdw.sgml b/doc/src/sgml/postgres-fdw.sgml
index 5320accf6f..d03731b7d4 100644
--- a/doc/src/sgml/postgres-fdw.sgml
+++ b/doc/src/sgml/postgres-fdw.sgml
@@ -69,6 +69,13 @@
have privileges to do these things.)
</para>
+ <para>
+  Note that <literal>ONLY</literal> option specified in

add "the" to say: "the ONLY"

+1.

+  <command>SELECT</command>, <command>UPDATE</command>,
+  <command>DELETE</command> or <command>TRUNCATE</command>
+  has no effect when accessing or modifyung the remote table.

modifying

Good catch!

With Regards,
Bharath Rupireddy.
EnterpriseDB: http://www.enterprisedb.com

#107Justin Pryzby
pryzby@telsasoft.com
In reply to: Bharath Rupireddy (#106)
Re: TRUNCATE on foreign table

On Thu, Apr 22, 2021 at 05:09:02PM +0530, Bharath Rupireddy wrote:

should it say "specified" instead of requested ?
Or should it say "requested the RESTART IDENTITY behavior" ?

Also, I think it should say "..otherwise, the CONTINUE IDENTITY behavior was
requested".

The original TRUNCATE document uses this - "When RESTART IDENTITY is specified"

IMO the following looks better: "If restart_seqs is true, RESTART
IDENTITY was specified in the original TRUNCATE command, otherwise
CONTINUE IDENTITY was specified."

This suggests that one of the two options was "specified", but the user maybe
didn't specify either, which is why we used the "behavior" language - if
neither is "specified" then the default behavior is what was "requested".

--
Justin

#108Zhihong Yu
zyu@yugabyte.com
In reply to: Bharath Rupireddy (#106)
Re: TRUNCATE on foreign table

On Thu, Apr 22, 2021 at 4:39 AM Bharath Rupireddy <
bharath.rupireddyforpostgres@gmail.com> wrote:

On Thu, Apr 22, 2021 at 2:26 PM Justin Pryzby <pryzby@telsasoft.com>
wrote:

On Thu, Apr 22, 2021 at 03:36:25PM +0900, Fujii Masao wrote:

diff --git a/doc/src/sgml/fdwhandler.sgml

b/doc/src/sgml/fdwhandler.sgml

index 553524553b..69aa66e73e 100644
--- a/doc/src/sgml/fdwhandler.sgml
+++ b/doc/src/sgml/fdwhandler.sgml
@@ -1076,27 +1076,25 @@ ExecForeignTruncate(List *rels,
bool restart_seqs);
<para>
-     <literal>behavior</literal> defines how foreign tables should
-     be truncated, using as possible values

<literal>DROP_RESTRICT</literal>,

-     which means that <literal>RESTRICT</literal> option is specified,
-     and <literal>DROP_CASCADE</literal>, which means that
-     <literal>CASCADE</literal> option is specified, in
-     <command>TRUNCATE</command> command.
+     <literal>behavior</literal> is either

<literal>DROP_RESTRICT</literal>

+     or <literal>DROP_CASCADE</literal>, which indicates that the
+     <literal>RESTRICT</literal> or <literal>CASCADE</literal> option

was

+     requested in the original <command>TRUNCATE</command> command,
+     respectively.

Now that I reread this, I would change "which indicates" to "indicating".

+1.

- <literal>restart_seqs</literal> is set to <literal>true</literal>
- if <literal>RESTART IDENTITY</literal> option is specified in
- <command>TRUNCATE</command> command. It is

<literal>false</literal>

-     if <literal>CONTINUE IDENTITY</literal> option is specified.
+     If <literal>restart_seqs</literal> is <literal>true</literal>,
+     the original <command>TRUNCATE</command> command requested the
+     <literal>RESTART IDENTITY</literal> option, otherwise
+     <literal>CONTINUE IDENTITY</literal> option.

should it say "specified" instead of requested ?
Or should it say "requested the RESTART IDENTITY behavior" ?

Also, I think it should say "..otherwise, the CONTINUE IDENTITY behavior

was

requested".

The original TRUNCATE document uses this - "When RESTART IDENTITY is
specified"

IMO the following looks better: "If restart_seqs is true, RESTART
IDENTITY was specified in the original TRUNCATE command, otherwise
CONTINUE IDENTITY was specified."

+++ b/doc/src/sgml/ref/truncate.sgml
@@ -173,7 +173,7 @@ TRUNCATE [ TABLE ] [ ONLY ] <replaceable

class="parameter">name</replaceable> [

<para>
<command>TRUNCATE</command> can be used for foreign tables if
-   the foreign data wrapper supports, for instance,
+   supported by the foreign data wrapper, for instance,
see <xref linkend="postgres-fdw"/>.

what does "for instance" mean here? I think it should be removed.

+1.

+++ b/doc/src/sgml/fdwhandler.sgml
@@ -1111,6 +1099,15 @@ ExecForeignTruncate(List *rels, List

*rels_extra,

if <literal>CONTINUE IDENTITY</literal> option is specified.
</para>

+    <para>
+     Note that information about <literal>ONLY</literal> options

specified

+ in the original <command>TRUNCATE</command> command is not

passed to

+ <function>ExecForeignTruncate</function>. This is the same

behavior as

+     for the callback functions for <command>SELECT</command>,
+     <command>UPDATE</command> and  <command>DELETE</command> on

There's an extra space before DELETE

Good catch! Extra space after "and" and before "<command>".

diff --git a/doc/src/sgml/postgres-fdw.sgml

b/doc/src/sgml/postgres-fdw.sgml

index 5320accf6f..d03731b7d4 100644
--- a/doc/src/sgml/postgres-fdw.sgml
+++ b/doc/src/sgml/postgres-fdw.sgml
@@ -69,6 +69,13 @@
have privileges to do these things.)
</para>
+ <para>
+  Note that <literal>ONLY</literal> option specified in

add "the" to say: "the ONLY"

+1.

Since 'the only option' is legitimate English phrase, I think the following
would be clearer:

Note that the option <literal>ONLY</literal> ...

Cheers

Show quoted text
+  <command>SELECT</command>, <command>UPDATE</command>,
+  <command>DELETE</command> or <command>TRUNCATE</command>
+  has no effect when accessing or modifyung the remote table.

modifying

Good catch!

With Regards,
Bharath Rupireddy.
EnterpriseDB: http://www.enterprisedb.com

#109Justin Pryzby
pryzby@telsasoft.com
In reply to: Zhihong Yu (#108)
Re: TRUNCATE on foreign table

On Thu, Apr 22, 2021 at 07:41:06AM -0700, Zhihong Yu wrote:

+ Note that <literal>ONLY</literal> option specified in

add "the" to say: "the ONLY"

+1.

Since 'the only option' is legitimate English phrase, I think the following
would be clearer:

Note that the option <literal>ONLY</literal> ...

I think the ONLY option is better, more clear, and more consistent with the
rest of the documentation.

There are only ~5 places where we say "the option >OPTION<":
| git grep 'the option <' doc/src/sgml/

And at least 150 places where we say "The >OPTION< option" (I'm sure there are
some more which are split across lines).
| git grep -E 'the <([^>]*)>[^<]*</\1> option' doc/src/sgml/ |wc -l

--
Justin

#110Fujii Masao
masao.fujii@oss.nttdata.com
In reply to: Justin Pryzby (#104)
Re: TRUNCATE on foreign table

On 2021/04/22 17:56, Justin Pryzby wrote:

On Thu, Apr 22, 2021 at 03:36:25PM +0900, Fujii Masao wrote:

diff --git a/doc/src/sgml/fdwhandler.sgml b/doc/src/sgml/fdwhandler.sgml
index 553524553b..69aa66e73e 100644
--- a/doc/src/sgml/fdwhandler.sgml
+++ b/doc/src/sgml/fdwhandler.sgml
@@ -1076,27 +1076,25 @@ ExecForeignTruncate(List *rels,
bool restart_seqs);
<para>
-     <literal>behavior</literal> defines how foreign tables should
-     be truncated, using as possible values <literal>DROP_RESTRICT</literal>,
-     which means that <literal>RESTRICT</literal> option is specified,
-     and <literal>DROP_CASCADE</literal>, which means that
-     <literal>CASCADE</literal> option is specified, in
-     <command>TRUNCATE</command> command.
+     <literal>behavior</literal> is either <literal>DROP_RESTRICT</literal>
+     or <literal>DROP_CASCADE</literal>, which indicates that the
+     <literal>RESTRICT</literal> or <literal>CASCADE</literal> option was
+     requested in the original <command>TRUNCATE</command> command,
+     respectively.

Now that I reread this, I would change "which indicates" to "indicating".

Fixed. Thanks for reviewing the patch!
I will post the updated version of the patch later.

-     <literal>restart_seqs</literal> is set to <literal>true</literal>
-     if <literal>RESTART IDENTITY</literal> option is specified in
-     <command>TRUNCATE</command> command.  It is <literal>false</literal>
-     if <literal>CONTINUE IDENTITY</literal> option is specified.
+     If <literal>restart_seqs</literal> is <literal>true</literal>,
+     the original <command>TRUNCATE</command> command requested the
+     <literal>RESTART IDENTITY</literal> option, otherwise
+     <literal>CONTINUE IDENTITY</literal> option.

should it say "specified" instead of requested ?
Or should it say "requested the RESTART IDENTITY behavior" ?

Also, I think it should say "..otherwise, the CONTINUE IDENTITY behavior was
requested".

Fixed.

+++ b/doc/src/sgml/ref/truncate.sgml
@@ -173,7 +173,7 @@ TRUNCATE [ TABLE ] [ ONLY ] <replaceable class="parameter">name</replaceable> [
<para>
<command>TRUNCATE</command> can be used for foreign tables if
-   the foreign data wrapper supports, for instance,
+   supported by the foreign data wrapper, for instance,
see <xref linkend="postgres-fdw"/>.

what does "for instance" mean here? I think it should be removed.

Removed.

+++ b/doc/src/sgml/fdwhandler.sgml
@@ -1111,6 +1099,15 @@ ExecForeignTruncate(List *rels, List *rels_extra,
if <literal>CONTINUE IDENTITY</literal> option is specified.
</para>
+    <para>
+     Note that information about <literal>ONLY</literal> options specified
+     in the original <command>TRUNCATE</command> command is not passed to
+     <function>ExecForeignTruncate</function>.  This is the same behavior as
+     for the callback functions for <command>SELECT</command>,
+     <command>UPDATE</command> and  <command>DELETE</command> on

There's an extra space before DELETE

Fixed.

diff --git a/doc/src/sgml/postgres-fdw.sgml b/doc/src/sgml/postgres-fdw.sgml
index 5320accf6f..d03731b7d4 100644
--- a/doc/src/sgml/postgres-fdw.sgml
+++ b/doc/src/sgml/postgres-fdw.sgml
@@ -69,6 +69,13 @@
have privileges to do these things.)
</para>
+ <para>
+  Note that <literal>ONLY</literal> option specified in

add "the" to say: "the ONLY"

Fixed.

+  <command>SELECT</command>, <command>UPDATE</command>,
+  <command>DELETE</command> or <command>TRUNCATE</command>
+  has no effect when accessing or modifyung the remote table.

modifying

Fixed.

Regards,

--
Fujii Masao
Advanced Computing Technology Center
Research and Development Headquarters
NTT DATA CORPORATION

#111Fujii Masao
masao.fujii@oss.nttdata.com
In reply to: Bharath Rupireddy (#105)
2 attachment(s)
Re: TRUNCATE on foreign table

On 2021/04/22 20:27, Bharath Rupireddy wrote:

On Thu, Apr 22, 2021 at 12:06 PM Fujii Masao
<masao.fujii@oss.nttdata.com> wrote:

On 2021/04/22 9:39, Bharath Rupireddy wrote:

One comment on truncate_foreign_table_docs_v1.patch:
1) I think it is "to be truncated"
+     <literal>rels</literal> is a list of <structname>Relation</structname>
+     data structures for each foreign table to truncated.

Fixed. Thanks!

How about a slightly changed phrasing like below?
+     <literal>rels</literal> is a list of <structname>Relation</structname>
+     data structures of foreign tables to truncate.

Either works at least for me. If you think that this phrasing is
more precise or better, I'm ok with that and will update the patch again.

IMO, "rels is a list of Relation data structures of foreign tables to
truncate." looks better.

Fixed.

Thanks for reviewing the patches.
Attached are the updated versions of the patches.
These patches include the fixes pointed by Justin.

3) How about adding an extra para(after below para in
postgres_fdw.sgml) on WHY we don't push "ONLY" to foreign tables while
truncating? We could add to the same para for other options if at all
we don't choose to push them.
<command>DELETE</command>, or <command>TRUNCATE</command>.
(Of course, the remote user you have specified in your user mapping must
have privileges to do these things.)

I agree to document the behavior that ONLY option is always ignored
for foreign tables. But I'm not sure if we can document WHY.
Because I could not find the past discussion about why ONLY option is
ignored on SELECT, etc... Maybe it's enough to document the behavior?

+1 to specify in the documentation about ONLY option is always
ignored.

Added.

But can we specify the WHY part within deparseTruncateSql, it
will be there for developer reference? I feel it's better if this
change goes with truncate_foreign_table_dont_pass_only_clause_v2.patch

I added this information into fdwhandler.sgml because the developers
usually read fdwhandler.sgml.

Thanks!

+    <para>
+     Note that information about <literal>ONLY</literal> options specified
+     in the original <command>TRUNCATE</command> command is not passed to

I think it is not "information about", no? We just don't pass ONLY
option instead we skip it. IMO, we can say "Note that the ONLY option
specified with a foreign table in the original TRUNCATE command is
skipped and not passed to ExecForeignTruncate."

Probably I still fail to understand your point.
But if "information about" is confusing, I'm ok to
remove that. Fixed.

+     <function>ExecForeignTruncate</function>.  This is the same behavior as
+     for the callback functions for <command>SELECT</command>,
+     <command>UPDATE</command> and  <command>DELETE</command> on
+     a foreign table.

How about "This behaviour is similar to the callback functions of
SELECT, UPDATE, DELETE on a foreign table"?

Fixed.

Regards,

--
Fujii Masao
Advanced Computing Technology Center
Research and Development Headquarters
NTT DATA CORPORATION

Attachments:

truncate_foreign_table_docs_v3.patchtext/plain; charset=UTF-8; name=truncate_foreign_table_docs_v3.patch; x-mac-creator=0; x-mac-type=0Download
diff --git a/doc/src/sgml/fdwhandler.sgml b/doc/src/sgml/fdwhandler.sgml
index 0e2cd3628c..70d89393d9 100644
--- a/doc/src/sgml/fdwhandler.sgml
+++ b/doc/src/sgml/fdwhandler.sgml
@@ -1076,44 +1076,41 @@ ExecForeignTruncate(List *rels,
                     bool restart_seqs);
 </programlisting>
 
-     Truncate a set of foreign tables specified in <literal>rels</literal>.
-     This function is called when <xref linkend="sql-truncate"/> is executed
-     on foreign tables.  <literal>rels</literal> is the list of
-     <structname>Relation</structname> data structure that indicates
-     a foreign table to truncate.
+     Truncate foreign tables.  This function is called when
+     <xref linkend="sql-truncate"/> is executed on a foreign table.
+     <literal>rels</literal> is a list of <structname>Relation</structname>
+     data structures of foreign tables to truncate.
     </para>
 
     <para>
-     <literal>behavior</literal> defines how foreign tables should
-     be truncated, using as possible values <literal>DROP_RESTRICT</literal>,
-     which means that <literal>RESTRICT</literal> option is specified,
-     and <literal>DROP_CASCADE</literal>, which means that
-     <literal>CASCADE</literal> option is specified, in
-     <command>TRUNCATE</command> command.
+     <literal>behavior</literal> is either <literal>DROP_RESTRICT</literal>
+     or <literal>DROP_CASCADE</literal> indicating that the
+     <literal>RESTRICT</literal> or <literal>CASCADE</literal> option was
+     requested in the original <command>TRUNCATE</command> command,
+     respectively.
     </para>
     
     <para>
-     <literal>restart_seqs</literal> is set to <literal>true</literal>
-     if <literal>RESTART IDENTITY</literal> option is specified in
-     <command>TRUNCATE</command> command.  It is <literal>false</literal>
-     if <literal>CONTINUE IDENTITY</literal> option is specified.
+     If <literal>restart_seqs</literal> is <literal>true</literal>,
+     the original <command>TRUNCATE</command> command requested the
+     <literal>RESTART IDENTITY</literal> behavior, otherwise the
+     <literal>CONTINUE IDENTITY</literal> behavior was requested.
     </para>
 
     <para>
-     Note that information about <literal>ONLY</literal> options specified
+     Note that the <literal>ONLY</literal> options specified
      in the original <command>TRUNCATE</command> command is not passed to
-     <function>ExecForeignTruncate</function>.  This is the same behavior as
-     for the callback functions for <command>SELECT</command>,
+     <function>ExecForeignTruncate</function>.  This behavior is similar to
+     the callback functions of <command>SELECT</command>,
      <command>UPDATE</command> and <command>DELETE</command> on
      a foreign table.
     </para>
 
     <para>
-     <command>TRUNCATE</command> invokes
-     <function>ExecForeignTruncate</function> once per foreign server
-     that foreign tables to truncate belong to.  This means that all foreign
-     tables included in <literal>rels</literal> must belong to the same
-     server.
+     <function>ExecForeignTruncate</function> is invoked once per
+     foreign server for which foreign tables are to be truncated.
+     This means that all foreign tables included in <literal>rels</literal>
+     must belong to the same server.
     </para>
 
     <para>
diff --git a/doc/src/sgml/postgres-fdw.sgml b/doc/src/sgml/postgres-fdw.sgml
index b0806c1274..839126c4ef 100644
--- a/doc/src/sgml/postgres-fdw.sgml
+++ b/doc/src/sgml/postgres-fdw.sgml
@@ -459,11 +459,17 @@ OPTIONS (ADD password_required 'false');
      <listitem>
       <para>
        This option controls whether <filename>postgres_fdw</filename> allows
-       foreign tables to be truncated using <command>TRUNCATE</command>
+       foreign tables to be truncated using the <command>TRUNCATE</command>
        command. It can be specified for a foreign table or a foreign server.
        A table-level option overrides a server-level option.
        The default is <literal>true</literal>.
       </para>
+
+      <para>
+       Of course, if the remote table is not in fact truncatable, an error
+       would occur anyway.  Use of this option primarily allows the error to
+       be thrown locally without querying the remote server.
+      </para>
      </listitem>
     </varlistentry>
    </variablelist>
diff --git a/doc/src/sgml/ref/truncate.sgml b/doc/src/sgml/ref/truncate.sgml
index acf3633be4..9af42dd008 100644
--- a/doc/src/sgml/ref/truncate.sgml
+++ b/doc/src/sgml/ref/truncate.sgml
@@ -173,7 +173,7 @@ TRUNCATE [ TABLE ] [ ONLY ] <replaceable class="parameter">name</replaceable> [
 
   <para>
    <command>TRUNCATE</command> can be used for foreign tables if
-   the foreign data wrapper supports, for instance,
+   supported by the foreign data wrapper,
    see <xref linkend="postgres-fdw"/>.
   </para>
  </refsect1>
truncate_foreign_table_dont_pass_only_clause_v4.patchtext/plain; charset=UTF-8; name=truncate_foreign_table_dont_pass_only_clause_v4.patch; x-mac-creator=0; x-mac-type=0Download
diff --git a/contrib/postgres_fdw/deparse.c b/contrib/postgres_fdw/deparse.c
index bdc4c3620d..7a798530e3 100644
--- a/contrib/postgres_fdw/deparse.c
+++ b/contrib/postgres_fdw/deparse.c
@@ -2179,24 +2179,19 @@ deparseAnalyzeSql(StringInfo buf, Relation rel, List **retrieved_attrs)
 void
 deparseTruncateSql(StringInfo buf,
 				   List *rels,
-				   List *rels_extra,
 				   DropBehavior behavior,
 				   bool restart_seqs)
 {
-	ListCell   *lc1,
-			   *lc2;
+	ListCell   *cell;
 
 	appendStringInfoString(buf, "TRUNCATE ");
 
-	forboth(lc1, rels, lc2, rels_extra)
+	foreach(cell, rels)
 	{
-		Relation	rel = lfirst(lc1);
-		int			extra = lfirst_int(lc2);
+		Relation	rel = lfirst(cell);
 
-		if (lc1 != list_head(rels))
+		if (cell != list_head(rels))
 			appendStringInfoString(buf, ", ");
-		if (extra & TRUNCATE_REL_CONTEXT_ONLY)
-			appendStringInfoString(buf, "ONLY ");
 
 		deparseRelation(buf, rel);
 	}
diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out
index 5070d93239..8e1cc69508 100644
--- a/contrib/postgres_fdw/expected/postgres_fdw.out
+++ b/contrib/postgres_fdw/expected/postgres_fdw.out
@@ -8218,13 +8218,13 @@ drop table loc3;
 -- test for TRUNCATE
 -- ===================================================================
 CREATE TABLE tru_rtable0 (id int primary key);
-CREATE TABLE tru_rtable1 (id int primary key);
 CREATE FOREIGN TABLE tru_ftable (id int)
        SERVER loopback OPTIONS (table_name 'tru_rtable0');
 INSERT INTO tru_rtable0 (SELECT x FROM generate_series(1,10) x);
 CREATE TABLE tru_ptable (id int) PARTITION BY HASH(id);
 CREATE TABLE tru_ptable__p0 PARTITION OF tru_ptable
                             FOR VALUES WITH (MODULUS 2, REMAINDER 0);
+CREATE TABLE tru_rtable1 (id int primary key);
 CREATE FOREIGN TABLE tru_ftable__p1 PARTITION OF tru_ptable
                                     FOR VALUES WITH (MODULUS 2, REMAINDER 1)
        SERVER loopback OPTIONS (table_name 'tru_rtable1');
@@ -8364,6 +8364,8 @@ SELECT count(*) from tru_pk_ftable; -- 0
 (1 row)
 
 -- truncate with ONLY clause
+-- Since ONLY is specified, the table tru_ftable_child that inherits
+-- tru_ftable_parent locally is not truncated.
 TRUNCATE ONLY tru_ftable_parent;
 SELECT sum(id) FROM tru_ftable_parent;  -- 126
  sum 
@@ -8388,22 +8390,26 @@ SELECT sum(id) FROM tru_ftable;   -- 95
   95
 (1 row)
 
-TRUNCATE ONLY tru_ftable;		-- truncate only parent portion
-SELECT sum(id) FROM tru_ftable;   -- 60
- sum 
------
-  60
+-- Both parent and child tables in the foreign server are truncated
+-- even though ONLY is specified because ONLY has no effect
+-- when truncating a foreign table.
+TRUNCATE ONLY tru_ftable;
+SELECT count(*) FROM tru_ftable;   -- 0
+ count 
+-------
+     0
 (1 row)
 
 INSERT INTO tru_rtable0 (SELECT x FROM generate_series(21,25) x);
-SELECT sum(id) FROM tru_ftable;		-- 175
+INSERT INTO tru_rtable0_child (SELECT x FROM generate_series(26,30) x);
+SELECT sum(id) FROM tru_ftable;		-- 255
  sum 
 -----
- 175
+ 255
 (1 row)
 
 TRUNCATE tru_ftable;			-- truncate both of parent and child
-SELECT count(*) FROM tru_ftable;    -- empty
+SELECT count(*) FROM tru_ftable;    -- 0
  count 
 -------
      0
diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c
index e201b5404e..8bcdc8d616 100644
--- a/contrib/postgres_fdw/postgres_fdw.c
+++ b/contrib/postgres_fdw/postgres_fdw.c
@@ -401,7 +401,6 @@ static void postgresExplainForeignModify(ModifyTableState *mtstate,
 static void postgresExplainDirectModify(ForeignScanState *node,
 										ExplainState *es);
 static void postgresExecForeignTruncate(List *rels,
-										List *rels_extra,
 										DropBehavior behavior,
 										bool restart_seqs);
 static bool postgresAnalyzeForeignTable(Relation relation,
@@ -2881,7 +2880,6 @@ postgresExplainDirectModify(ForeignScanState *node, ExplainState *es)
  */
 static void
 postgresExecForeignTruncate(List *rels,
-							List *rels_extra,
 							DropBehavior behavior,
 							bool restart_seqs)
 {
@@ -2964,7 +2962,7 @@ postgresExecForeignTruncate(List *rels,
 
 	/* Construct the TRUNCATE command string */
 	initStringInfo(&sql);
-	deparseTruncateSql(&sql, rels, rels_extra, behavior, restart_seqs);
+	deparseTruncateSql(&sql, rels, behavior, restart_seqs);
 
 	/* Issue the TRUNCATE command to remote server */
 	do_sql_command(conn, sql.data);
diff --git a/contrib/postgres_fdw/postgres_fdw.h b/contrib/postgres_fdw/postgres_fdw.h
index 5d44b75314..9591c0f6c2 100644
--- a/contrib/postgres_fdw/postgres_fdw.h
+++ b/contrib/postgres_fdw/postgres_fdw.h
@@ -209,7 +209,6 @@ extern void deparseAnalyzeSql(StringInfo buf, Relation rel,
 							  List **retrieved_attrs);
 extern void deparseTruncateSql(StringInfo buf,
 							   List *rels,
-							   List *rels_extra,
 							   DropBehavior behavior,
 							   bool restart_seqs);
 extern void deparseStringLiteral(StringInfo buf, const char *val);
diff --git a/contrib/postgres_fdw/sql/postgres_fdw.sql b/contrib/postgres_fdw/sql/postgres_fdw.sql
index 74590089bd..dcd36a9753 100644
--- a/contrib/postgres_fdw/sql/postgres_fdw.sql
+++ b/contrib/postgres_fdw/sql/postgres_fdw.sql
@@ -2355,7 +2355,6 @@ drop table loc3;
 -- test for TRUNCATE
 -- ===================================================================
 CREATE TABLE tru_rtable0 (id int primary key);
-CREATE TABLE tru_rtable1 (id int primary key);
 CREATE FOREIGN TABLE tru_ftable (id int)
        SERVER loopback OPTIONS (table_name 'tru_rtable0');
 INSERT INTO tru_rtable0 (SELECT x FROM generate_series(1,10) x);
@@ -2363,6 +2362,7 @@ INSERT INTO tru_rtable0 (SELECT x FROM generate_series(1,10) x);
 CREATE TABLE tru_ptable (id int) PARTITION BY HASH(id);
 CREATE TABLE tru_ptable__p0 PARTITION OF tru_ptable
                             FOR VALUES WITH (MODULUS 2, REMAINDER 0);
+CREATE TABLE tru_rtable1 (id int primary key);
 CREATE FOREIGN TABLE tru_ftable__p1 PARTITION OF tru_ptable
                                     FOR VALUES WITH (MODULUS 2, REMAINDER 1)
        SERVER loopback OPTIONS (table_name 'tru_rtable1');
@@ -2428,6 +2428,8 @@ SELECT count(*) from tru_ftable; -- 0
 SELECT count(*) from tru_pk_ftable; -- 0
 
 -- truncate with ONLY clause
+-- Since ONLY is specified, the table tru_ftable_child that inherits
+-- tru_ftable_parent locally is not truncated.
 TRUNCATE ONLY tru_ftable_parent;
 SELECT sum(id) FROM tru_ftable_parent;  -- 126
 TRUNCATE tru_ftable_parent;
@@ -2439,13 +2441,17 @@ INSERT INTO tru_rtable0 (SELECT x FROM generate_series(5,9) x);
 INSERT INTO tru_rtable0_child (SELECT x FROM generate_series(10,14) x);
 SELECT sum(id) FROM tru_ftable;   -- 95
 
-TRUNCATE ONLY tru_ftable;		-- truncate only parent portion
-SELECT sum(id) FROM tru_ftable;   -- 60
+-- Both parent and child tables in the foreign server are truncated
+-- even though ONLY is specified because ONLY has no effect
+-- when truncating a foreign table.
+TRUNCATE ONLY tru_ftable;
+SELECT count(*) FROM tru_ftable;   -- 0
 
 INSERT INTO tru_rtable0 (SELECT x FROM generate_series(21,25) x);
-SELECT sum(id) FROM tru_ftable;		-- 175
+INSERT INTO tru_rtable0_child (SELECT x FROM generate_series(26,30) x);
+SELECT sum(id) FROM tru_ftable;		-- 255
 TRUNCATE tru_ftable;			-- truncate both of parent and child
-SELECT count(*) FROM tru_ftable;    -- empty
+SELECT count(*) FROM tru_ftable;    -- 0
 
 -- cleanup
 DROP FOREIGN TABLE tru_ftable_parent, tru_ftable_child, tru_pk_ftable,tru_ftable__p1,tru_ftable;
diff --git a/doc/src/sgml/fdwhandler.sgml b/doc/src/sgml/fdwhandler.sgml
index 9f9274ce55..0e2cd3628c 100644
--- a/doc/src/sgml/fdwhandler.sgml
+++ b/doc/src/sgml/fdwhandler.sgml
@@ -1071,28 +1071,16 @@ EndDirectModify(ForeignScanState *node);
     <para>
 <programlisting>
 void
-ExecForeignTruncate(List *rels, List *rels_extra,
-                    DropBehavior behavior, bool restart_seqs);
+ExecForeignTruncate(List *rels,
+                    DropBehavior behavior,
+                    bool restart_seqs);
 </programlisting>
 
      Truncate a set of foreign tables specified in <literal>rels</literal>.
      This function is called when <xref linkend="sql-truncate"/> is executed
      on foreign tables.  <literal>rels</literal> is the list of
      <structname>Relation</structname> data structure that indicates
-     a foreign table to truncate.  <literal>rels_extra</literal> the list of
-     <literal>int</literal> value, which delivers extra information about
-     a foreign table to truncate.  Possible values are
-     <literal>TRUNCATE_REL_CONTEXT_NORMAL</literal>, which means that
-     the foreign table is specified WITHOUT <literal>ONLY</literal> clause
-     in <command>TRUNCATE</command>,
-     <literal>TRUNCATE_REL_CONTEXT_ONLY</literal>, which means that
-     the foreign table is specified WITH <literal>ONLY</literal> clause,
-     and <literal>TRUNCATE_REL_CONTEXT_CASCADING</literal>,
-     which means that the foreign table is not specified explicitly,
-     but will be truncated due to dependency (for example, partition table).
-     There is one-to-one mapping between <literal>rels</literal> and
-     <literal>rels_extra</literal>.  The number of entries is the same
-     between the two lists.
+     a foreign table to truncate.
     </para>
 
     <para>
@@ -1111,6 +1099,15 @@ ExecForeignTruncate(List *rels, List *rels_extra,
      if <literal>CONTINUE IDENTITY</literal> option is specified.
     </para>
 
+    <para>
+     Note that information about <literal>ONLY</literal> options specified
+     in the original <command>TRUNCATE</command> command is not passed to
+     <function>ExecForeignTruncate</function>.  This is the same behavior as
+     for the callback functions for <command>SELECT</command>,
+     <command>UPDATE</command> and <command>DELETE</command> on
+     a foreign table.
+    </para>
+
     <para>
      <command>TRUNCATE</command> invokes
      <function>ExecForeignTruncate</function> once per foreign server
diff --git a/doc/src/sgml/postgres-fdw.sgml b/doc/src/sgml/postgres-fdw.sgml
index 5320accf6f..b0806c1274 100644
--- a/doc/src/sgml/postgres-fdw.sgml
+++ b/doc/src/sgml/postgres-fdw.sgml
@@ -69,6 +69,13 @@
   have privileges to do these things.)
  </para>
 
+ <para>
+  Note that the <literal>ONLY</literal> option specified in
+  <command>SELECT</command>, <command>UPDATE</command>,
+  <command>DELETE</command> or <command>TRUNCATE</command>
+  has no effect when accessing or modifying the remote table.
+ </para>
+
  <para>
   Note that <filename>postgres_fdw</filename> currently lacks support for
   <command>INSERT</command> statements with an <literal>ON CONFLICT DO
diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c
index 7d00f4eb25..8e717ada28 100644
--- a/src/backend/commands/tablecmds.c
+++ b/src/backend/commands/tablecmds.c
@@ -323,7 +323,6 @@ typedef struct ForeignTruncateInfo
 {
 	Oid			serverid;
 	List	   *rels;
-	List	   *rels_extra;
 } ForeignTruncateInfo;
 
 /*
@@ -1620,7 +1619,6 @@ ExecuteTruncate(TruncateStmt *stmt)
 {
 	List	   *rels = NIL;
 	List	   *relids = NIL;
-	List	   *relids_extra = NIL;
 	List	   *relids_logged = NIL;
 	ListCell   *cell;
 
@@ -1654,9 +1652,7 @@ ExecuteTruncate(TruncateStmt *stmt)
 
 		rels = lappend(rels, rel);
 		relids = lappend_oid(relids, myrelid);
-		relids_extra = lappend_int(relids_extra, (recurse ?
-												  TRUNCATE_REL_CONTEXT_NORMAL :
-												  TRUNCATE_REL_CONTEXT_ONLY));
+
 		/* Log this relation only if needed for logical decoding */
 		if (RelationIsLogicallyLogged(rel))
 			relids_logged = lappend_oid(relids_logged, myrelid);
@@ -1704,8 +1700,7 @@ ExecuteTruncate(TruncateStmt *stmt)
 
 				rels = lappend(rels, rel);
 				relids = lappend_oid(relids, childrelid);
-				relids_extra = lappend_int(relids_extra,
-										   TRUNCATE_REL_CONTEXT_CASCADING);
+
 				/* Log this relation only if needed for logical decoding */
 				if (RelationIsLogicallyLogged(rel))
 					relids_logged = lappend_oid(relids_logged, childrelid);
@@ -1718,7 +1713,7 @@ ExecuteTruncate(TruncateStmt *stmt)
 					 errhint("Do not specify the ONLY keyword, or use TRUNCATE ONLY on the partitions directly.")));
 	}
 
-	ExecuteTruncateGuts(rels, relids, relids_extra, relids_logged,
+	ExecuteTruncateGuts(rels, relids, relids_logged,
 						stmt->behavior, stmt->restart_seqs);
 
 	/* And close the rels */
@@ -1739,15 +1734,13 @@ ExecuteTruncate(TruncateStmt *stmt)
  *
  * explicit_rels is the list of Relations to truncate that the command
  * specified.  relids is the list of Oids corresponding to explicit_rels.
- * relids_extra is the list of integer values that deliver extra information
- * about relations in explicit_rels.  relids_logged is the list of Oids
- * (a subset of relids) that require WAL-logging.  This is all a bit redundant,
- * but the existing callers have this information handy in this form.
+ * relids_logged is the list of Oids (a subset of relids) that require
+ * WAL-logging.  This is all a bit redundant, but the existing callers have
+ * this information handy in this form.
  */
 void
 ExecuteTruncateGuts(List *explicit_rels,
 					List *relids,
-					List *relids_extra,
 					List *relids_logged,
 					DropBehavior behavior, bool restart_seqs)
 {
@@ -1760,8 +1753,6 @@ ExecuteTruncateGuts(List *explicit_rels,
 	ResultRelInfo *resultRelInfo;
 	SubTransactionId mySubid;
 	ListCell   *cell;
-	ListCell   *lc1,
-			*lc2;
 	Oid		   *logrelids;
 
 	/*
@@ -1799,8 +1790,7 @@ ExecuteTruncateGuts(List *explicit_rels,
 				truncate_check_activity(rel);
 				rels = lappend(rels, rel);
 				relids = lappend_oid(relids, relid);
-				relids_extra = lappend_int(relids_extra,
-										   TRUNCATE_REL_CONTEXT_CASCADING);
+
 				/* Log this relation only if needed for logical decoding */
 				if (RelationIsLogicallyLogged(rel))
 					relids_logged = lappend_oid(relids_logged, relid);
@@ -1901,11 +1891,9 @@ ExecuteTruncateGuts(List *explicit_rels,
 	 */
 	mySubid = GetCurrentSubTransactionId();
 
-	Assert(list_length(rels) == list_length(relids_extra));
-	forboth(lc1, rels, lc2, relids_extra)
+	foreach(cell, rels)
 	{
-		Relation	rel = (Relation) lfirst(lc1);
-		int			extra = lfirst_int(lc2);
+		Relation	rel = (Relation) lfirst(cell);
 
 		/*
 		 * Save OID of partitioned tables for later; nothing else to do for
@@ -1952,7 +1940,6 @@ ExecuteTruncateGuts(List *explicit_rels,
 			{
 				ft_info->serverid = serverid;
 				ft_info->rels = NIL;
-				ft_info->rels_extra = NIL;
 			}
 
 			/*
@@ -1960,7 +1947,6 @@ ExecuteTruncateGuts(List *explicit_rels,
 			 * foreign table belongs to.
 			 */
 			ft_info->rels = lappend(ft_info->rels, rel);
-			ft_info->rels_extra = lappend_int(ft_info->rels_extra, extra);
 			continue;
 		}
 
@@ -2044,7 +2030,6 @@ ExecuteTruncateGuts(List *explicit_rels,
 				Assert(routine->ExecForeignTruncate != NULL);
 
 				routine->ExecForeignTruncate(ft_info->rels,
-											 ft_info->rels_extra,
 											 behavior,
 											 restart_seqs);
 			}
diff --git a/src/backend/replication/logical/worker.c b/src/backend/replication/logical/worker.c
index d09703f7ac..d9f157172b 100644
--- a/src/backend/replication/logical/worker.c
+++ b/src/backend/replication/logical/worker.c
@@ -1816,7 +1816,6 @@ apply_handle_truncate(StringInfo s)
 	List	   *rels = NIL;
 	List	   *part_rels = NIL;
 	List	   *relids = NIL;
-	List	   *relids_extra = NIL;
 	List	   *relids_logged = NIL;
 	ListCell   *lc;
 
@@ -1846,7 +1845,6 @@ apply_handle_truncate(StringInfo s)
 		remote_rels = lappend(remote_rels, rel);
 		rels = lappend(rels, rel->localrel);
 		relids = lappend_oid(relids, rel->localreloid);
-		relids_extra = lappend_int(relids_extra, TRUNCATE_REL_CONTEXT_NORMAL);
 		if (RelationIsLogicallyLogged(rel->localrel))
 			relids_logged = lappend_oid(relids_logged, rel->localreloid);
 
@@ -1885,7 +1883,6 @@ apply_handle_truncate(StringInfo s)
 				rels = lappend(rels, childrel);
 				part_rels = lappend(part_rels, childrel);
 				relids = lappend_oid(relids, childrelid);
-				relids_extra = lappend_int(relids_extra, TRUNCATE_REL_CONTEXT_CASCADING);
 				/* Log this relation only if needed for logical decoding */
 				if (RelationIsLogicallyLogged(childrel))
 					relids_logged = lappend_oid(relids_logged, childrelid);
@@ -1900,7 +1897,6 @@ apply_handle_truncate(StringInfo s)
 	 */
 	ExecuteTruncateGuts(rels,
 						relids,
-						relids_extra,
 						relids_logged,
 						DROP_RESTRICT,
 						restart_seqs);
diff --git a/src/include/commands/tablecmds.h b/src/include/commands/tablecmds.h
index b808a07e46..14f4b4882f 100644
--- a/src/include/commands/tablecmds.h
+++ b/src/include/commands/tablecmds.h
@@ -21,16 +21,6 @@
 #include "storage/lock.h"
 #include "utils/relcache.h"
 
-/*
- * These values indicate how a relation was specified as the target to
- * truncate in TRUNCATE command.
- */
-#define TRUNCATE_REL_CONTEXT_NORMAL       1 /* specified without ONLY clause */
-#define TRUNCATE_REL_CONTEXT_ONLY         2 /* specified with ONLY clause */
-#define TRUNCATE_REL_CONTEXT_CASCADING     3	/* not specified but truncated
-												 * due to dependency (e.g.,
-												 * partition table) */
-
 struct AlterTableUtilityContext;	/* avoid including tcop/utility.h here */
 
 
@@ -68,7 +58,6 @@ extern void CheckTableNotInUse(Relation rel, const char *stmt);
 extern void ExecuteTruncate(TruncateStmt *stmt);
 extern void ExecuteTruncateGuts(List *explicit_rels,
 								List *relids,
-								List *relids_extra,
 								List *relids_logged,
 								DropBehavior behavior,
 								bool restart_seqs);
diff --git a/src/include/foreign/fdwapi.h b/src/include/foreign/fdwapi.h
index 4ebbca6de9..4f17becbb8 100644
--- a/src/include/foreign/fdwapi.h
+++ b/src/include/foreign/fdwapi.h
@@ -161,7 +161,6 @@ typedef List *(*ImportForeignSchema_function) (ImportForeignSchemaStmt *stmt,
 											   Oid serverOid);
 
 typedef void (*ExecForeignTruncate_function) (List *rels,
-											  List *rels_extra,
 											  DropBehavior behavior,
 											  bool restart_seqs);
 
#112Bharath Rupireddy
bharath.rupireddyforpostgres@gmail.com
In reply to: Fujii Masao (#111)
Re: TRUNCATE on foreign table

On Fri, Apr 23, 2021 at 1:39 PM Fujii Masao <masao.fujii@oss.nttdata.com> wrote:

+    <para>
+     Note that information about <literal>ONLY</literal> options specified
+     in the original <command>TRUNCATE</command> command is not passed to

I think it is not "information about", no? We just don't pass ONLY
option instead we skip it. IMO, we can say "Note that the ONLY option
specified with a foreign table in the original TRUNCATE command is
skipped and not passed to ExecForeignTruncate."

Probably I still fail to understand your point.
But if "information about" is confusing, I'm ok to
remove that. Fixed.

A small typo in the docs patch: It is "are not passed to", instead of
"is not passed to" since we used plural "options". "Note that the ONLY
options specified in the original TRUNCATE command are not passed to"

+ Note that the <literal>ONLY</literal> options specified
in the original <command>TRUNCATE</command> command is not passed to

With Regards,
Bharath Rupireddy.
EnterpriseDB: http://www.enterprisedb.com

#113Fujii Masao
masao.fujii@oss.nttdata.com
In reply to: Bharath Rupireddy (#112)
2 attachment(s)
Re: TRUNCATE on foreign table

On 2021/04/23 19:56, Bharath Rupireddy wrote:

On Fri, Apr 23, 2021 at 1:39 PM Fujii Masao <masao.fujii@oss.nttdata.com> wrote:

+    <para>
+     Note that information about <literal>ONLY</literal> options specified
+     in the original <command>TRUNCATE</command> command is not passed to

I think it is not "information about", no? We just don't pass ONLY
option instead we skip it. IMO, we can say "Note that the ONLY option
specified with a foreign table in the original TRUNCATE command is
skipped and not passed to ExecForeignTruncate."

Probably I still fail to understand your point.
But if "information about" is confusing, I'm ok to
remove that. Fixed.

A small typo in the docs patch: It is "are not passed to", instead of
"is not passed to" since we used plural "options". "Note that the ONLY
options specified in the original TRUNCATE command are not passed to"

+ Note that the <literal>ONLY</literal> options specified
in the original <command>TRUNCATE</command> command is not passed to

Thanks for the review! I fixed this.

Regards,

--
Fujii Masao
Advanced Computing Technology Center
Research and Development Headquarters
NTT DATA CORPORATION

Attachments:

truncate_foreign_table_docs_v4.patchtext/plain; charset=UTF-8; name=truncate_foreign_table_docs_v4.patch; x-mac-creator=0; x-mac-type=0Download
diff --git a/doc/src/sgml/fdwhandler.sgml b/doc/src/sgml/fdwhandler.sgml
index e08441ec8b..8aa7edfe4a 100644
--- a/doc/src/sgml/fdwhandler.sgml
+++ b/doc/src/sgml/fdwhandler.sgml
@@ -1076,27 +1076,25 @@ ExecForeignTruncate(List *rels,
                     bool restart_seqs);
 </programlisting>
 
-     Truncate a set of foreign tables specified in <literal>rels</literal>.
-     This function is called when <xref linkend="sql-truncate"/> is executed
-     on foreign tables.  <literal>rels</literal> is the list of
-     <structname>Relation</structname> data structure that indicates
-     a foreign table to truncate.
+     Truncate foreign tables.  This function is called when
+     <xref linkend="sql-truncate"/> is executed on a foreign table.
+     <literal>rels</literal> is a list of <structname>Relation</structname>
+     data structures of foreign tables to truncate.
     </para>
 
     <para>
-     <literal>behavior</literal> defines how foreign tables should
-     be truncated, using as possible values <literal>DROP_RESTRICT</literal>,
-     which means that <literal>RESTRICT</literal> option is specified,
-     and <literal>DROP_CASCADE</literal>, which means that
-     <literal>CASCADE</literal> option is specified, in
-     <command>TRUNCATE</command> command.
+     <literal>behavior</literal> is either <literal>DROP_RESTRICT</literal>
+     or <literal>DROP_CASCADE</literal> indicating that the
+     <literal>RESTRICT</literal> or <literal>CASCADE</literal> option was
+     requested in the original <command>TRUNCATE</command> command,
+     respectively.
     </para>
     
     <para>
-     <literal>restart_seqs</literal> is set to <literal>true</literal>
-     if <literal>RESTART IDENTITY</literal> option is specified in
-     <command>TRUNCATE</command> command.  It is <literal>false</literal>
-     if <literal>CONTINUE IDENTITY</literal> option is specified.
+     If <literal>restart_seqs</literal> is <literal>true</literal>,
+     the original <command>TRUNCATE</command> command requested the
+     <literal>RESTART IDENTITY</literal> behavior, otherwise the
+     <literal>CONTINUE IDENTITY</literal> behavior was requested.
     </para>
 
     <para>
@@ -1109,11 +1107,10 @@ ExecForeignTruncate(List *rels,
     </para>
 
     <para>
-     <command>TRUNCATE</command> invokes
-     <function>ExecForeignTruncate</function> once per foreign server
-     that foreign tables to truncate belong to.  This means that all foreign
-     tables included in <literal>rels</literal> must belong to the same
-     server.
+     <function>ExecForeignTruncate</function> is invoked once per
+     foreign server for which foreign tables are to be truncated.
+     This means that all foreign tables included in <literal>rels</literal>
+     must belong to the same server.
     </para>
 
     <para>
diff --git a/doc/src/sgml/postgres-fdw.sgml b/doc/src/sgml/postgres-fdw.sgml
index b0806c1274..839126c4ef 100644
--- a/doc/src/sgml/postgres-fdw.sgml
+++ b/doc/src/sgml/postgres-fdw.sgml
@@ -459,11 +459,17 @@ OPTIONS (ADD password_required 'false');
      <listitem>
       <para>
        This option controls whether <filename>postgres_fdw</filename> allows
-       foreign tables to be truncated using <command>TRUNCATE</command>
+       foreign tables to be truncated using the <command>TRUNCATE</command>
        command. It can be specified for a foreign table or a foreign server.
        A table-level option overrides a server-level option.
        The default is <literal>true</literal>.
       </para>
+
+      <para>
+       Of course, if the remote table is not in fact truncatable, an error
+       would occur anyway.  Use of this option primarily allows the error to
+       be thrown locally without querying the remote server.
+      </para>
      </listitem>
     </varlistentry>
    </variablelist>
diff --git a/doc/src/sgml/ref/truncate.sgml b/doc/src/sgml/ref/truncate.sgml
index acf3633be4..9af42dd008 100644
--- a/doc/src/sgml/ref/truncate.sgml
+++ b/doc/src/sgml/ref/truncate.sgml
@@ -173,7 +173,7 @@ TRUNCATE [ TABLE ] [ ONLY ] <replaceable class="parameter">name</replaceable> [
 
   <para>
    <command>TRUNCATE</command> can be used for foreign tables if
-   the foreign data wrapper supports, for instance,
+   supported by the foreign data wrapper,
    see <xref linkend="postgres-fdw"/>.
   </para>
  </refsect1>
truncate_foreign_table_dont_pass_only_clause_v5.patchtext/plain; charset=UTF-8; name=truncate_foreign_table_dont_pass_only_clause_v5.patch; x-mac-creator=0; x-mac-type=0Download
diff --git a/contrib/postgres_fdw/deparse.c b/contrib/postgres_fdw/deparse.c
index bdc4c3620d..7a798530e3 100644
--- a/contrib/postgres_fdw/deparse.c
+++ b/contrib/postgres_fdw/deparse.c
@@ -2179,24 +2179,19 @@ deparseAnalyzeSql(StringInfo buf, Relation rel, List **retrieved_attrs)
 void
 deparseTruncateSql(StringInfo buf,
 				   List *rels,
-				   List *rels_extra,
 				   DropBehavior behavior,
 				   bool restart_seqs)
 {
-	ListCell   *lc1,
-			   *lc2;
+	ListCell   *cell;
 
 	appendStringInfoString(buf, "TRUNCATE ");
 
-	forboth(lc1, rels, lc2, rels_extra)
+	foreach(cell, rels)
 	{
-		Relation	rel = lfirst(lc1);
-		int			extra = lfirst_int(lc2);
+		Relation	rel = lfirst(cell);
 
-		if (lc1 != list_head(rels))
+		if (cell != list_head(rels))
 			appendStringInfoString(buf, ", ");
-		if (extra & TRUNCATE_REL_CONTEXT_ONLY)
-			appendStringInfoString(buf, "ONLY ");
 
 		deparseRelation(buf, rel);
 	}
diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out
index 5070d93239..8e1cc69508 100644
--- a/contrib/postgres_fdw/expected/postgres_fdw.out
+++ b/contrib/postgres_fdw/expected/postgres_fdw.out
@@ -8218,13 +8218,13 @@ drop table loc3;
 -- test for TRUNCATE
 -- ===================================================================
 CREATE TABLE tru_rtable0 (id int primary key);
-CREATE TABLE tru_rtable1 (id int primary key);
 CREATE FOREIGN TABLE tru_ftable (id int)
        SERVER loopback OPTIONS (table_name 'tru_rtable0');
 INSERT INTO tru_rtable0 (SELECT x FROM generate_series(1,10) x);
 CREATE TABLE tru_ptable (id int) PARTITION BY HASH(id);
 CREATE TABLE tru_ptable__p0 PARTITION OF tru_ptable
                             FOR VALUES WITH (MODULUS 2, REMAINDER 0);
+CREATE TABLE tru_rtable1 (id int primary key);
 CREATE FOREIGN TABLE tru_ftable__p1 PARTITION OF tru_ptable
                                     FOR VALUES WITH (MODULUS 2, REMAINDER 1)
        SERVER loopback OPTIONS (table_name 'tru_rtable1');
@@ -8364,6 +8364,8 @@ SELECT count(*) from tru_pk_ftable; -- 0
 (1 row)
 
 -- truncate with ONLY clause
+-- Since ONLY is specified, the table tru_ftable_child that inherits
+-- tru_ftable_parent locally is not truncated.
 TRUNCATE ONLY tru_ftable_parent;
 SELECT sum(id) FROM tru_ftable_parent;  -- 126
  sum 
@@ -8388,22 +8390,26 @@ SELECT sum(id) FROM tru_ftable;   -- 95
   95
 (1 row)
 
-TRUNCATE ONLY tru_ftable;		-- truncate only parent portion
-SELECT sum(id) FROM tru_ftable;   -- 60
- sum 
------
-  60
+-- Both parent and child tables in the foreign server are truncated
+-- even though ONLY is specified because ONLY has no effect
+-- when truncating a foreign table.
+TRUNCATE ONLY tru_ftable;
+SELECT count(*) FROM tru_ftable;   -- 0
+ count 
+-------
+     0
 (1 row)
 
 INSERT INTO tru_rtable0 (SELECT x FROM generate_series(21,25) x);
-SELECT sum(id) FROM tru_ftable;		-- 175
+INSERT INTO tru_rtable0_child (SELECT x FROM generate_series(26,30) x);
+SELECT sum(id) FROM tru_ftable;		-- 255
  sum 
 -----
- 175
+ 255
 (1 row)
 
 TRUNCATE tru_ftable;			-- truncate both of parent and child
-SELECT count(*) FROM tru_ftable;    -- empty
+SELECT count(*) FROM tru_ftable;    -- 0
  count 
 -------
      0
diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c
index e201b5404e..8bcdc8d616 100644
--- a/contrib/postgres_fdw/postgres_fdw.c
+++ b/contrib/postgres_fdw/postgres_fdw.c
@@ -401,7 +401,6 @@ static void postgresExplainForeignModify(ModifyTableState *mtstate,
 static void postgresExplainDirectModify(ForeignScanState *node,
 										ExplainState *es);
 static void postgresExecForeignTruncate(List *rels,
-										List *rels_extra,
 										DropBehavior behavior,
 										bool restart_seqs);
 static bool postgresAnalyzeForeignTable(Relation relation,
@@ -2881,7 +2880,6 @@ postgresExplainDirectModify(ForeignScanState *node, ExplainState *es)
  */
 static void
 postgresExecForeignTruncate(List *rels,
-							List *rels_extra,
 							DropBehavior behavior,
 							bool restart_seqs)
 {
@@ -2964,7 +2962,7 @@ postgresExecForeignTruncate(List *rels,
 
 	/* Construct the TRUNCATE command string */
 	initStringInfo(&sql);
-	deparseTruncateSql(&sql, rels, rels_extra, behavior, restart_seqs);
+	deparseTruncateSql(&sql, rels, behavior, restart_seqs);
 
 	/* Issue the TRUNCATE command to remote server */
 	do_sql_command(conn, sql.data);
diff --git a/contrib/postgres_fdw/postgres_fdw.h b/contrib/postgres_fdw/postgres_fdw.h
index 5d44b75314..9591c0f6c2 100644
--- a/contrib/postgres_fdw/postgres_fdw.h
+++ b/contrib/postgres_fdw/postgres_fdw.h
@@ -209,7 +209,6 @@ extern void deparseAnalyzeSql(StringInfo buf, Relation rel,
 							  List **retrieved_attrs);
 extern void deparseTruncateSql(StringInfo buf,
 							   List *rels,
-							   List *rels_extra,
 							   DropBehavior behavior,
 							   bool restart_seqs);
 extern void deparseStringLiteral(StringInfo buf, const char *val);
diff --git a/contrib/postgres_fdw/sql/postgres_fdw.sql b/contrib/postgres_fdw/sql/postgres_fdw.sql
index 74590089bd..dcd36a9753 100644
--- a/contrib/postgres_fdw/sql/postgres_fdw.sql
+++ b/contrib/postgres_fdw/sql/postgres_fdw.sql
@@ -2355,7 +2355,6 @@ drop table loc3;
 -- test for TRUNCATE
 -- ===================================================================
 CREATE TABLE tru_rtable0 (id int primary key);
-CREATE TABLE tru_rtable1 (id int primary key);
 CREATE FOREIGN TABLE tru_ftable (id int)
        SERVER loopback OPTIONS (table_name 'tru_rtable0');
 INSERT INTO tru_rtable0 (SELECT x FROM generate_series(1,10) x);
@@ -2363,6 +2362,7 @@ INSERT INTO tru_rtable0 (SELECT x FROM generate_series(1,10) x);
 CREATE TABLE tru_ptable (id int) PARTITION BY HASH(id);
 CREATE TABLE tru_ptable__p0 PARTITION OF tru_ptable
                             FOR VALUES WITH (MODULUS 2, REMAINDER 0);
+CREATE TABLE tru_rtable1 (id int primary key);
 CREATE FOREIGN TABLE tru_ftable__p1 PARTITION OF tru_ptable
                                     FOR VALUES WITH (MODULUS 2, REMAINDER 1)
        SERVER loopback OPTIONS (table_name 'tru_rtable1');
@@ -2428,6 +2428,8 @@ SELECT count(*) from tru_ftable; -- 0
 SELECT count(*) from tru_pk_ftable; -- 0
 
 -- truncate with ONLY clause
+-- Since ONLY is specified, the table tru_ftable_child that inherits
+-- tru_ftable_parent locally is not truncated.
 TRUNCATE ONLY tru_ftable_parent;
 SELECT sum(id) FROM tru_ftable_parent;  -- 126
 TRUNCATE tru_ftable_parent;
@@ -2439,13 +2441,17 @@ INSERT INTO tru_rtable0 (SELECT x FROM generate_series(5,9) x);
 INSERT INTO tru_rtable0_child (SELECT x FROM generate_series(10,14) x);
 SELECT sum(id) FROM tru_ftable;   -- 95
 
-TRUNCATE ONLY tru_ftable;		-- truncate only parent portion
-SELECT sum(id) FROM tru_ftable;   -- 60
+-- Both parent and child tables in the foreign server are truncated
+-- even though ONLY is specified because ONLY has no effect
+-- when truncating a foreign table.
+TRUNCATE ONLY tru_ftable;
+SELECT count(*) FROM tru_ftable;   -- 0
 
 INSERT INTO tru_rtable0 (SELECT x FROM generate_series(21,25) x);
-SELECT sum(id) FROM tru_ftable;		-- 175
+INSERT INTO tru_rtable0_child (SELECT x FROM generate_series(26,30) x);
+SELECT sum(id) FROM tru_ftable;		-- 255
 TRUNCATE tru_ftable;			-- truncate both of parent and child
-SELECT count(*) FROM tru_ftable;    -- empty
+SELECT count(*) FROM tru_ftable;    -- 0
 
 -- cleanup
 DROP FOREIGN TABLE tru_ftable_parent, tru_ftable_child, tru_pk_ftable,tru_ftable__p1,tru_ftable;
diff --git a/doc/src/sgml/fdwhandler.sgml b/doc/src/sgml/fdwhandler.sgml
index 9f9274ce55..e08441ec8b 100644
--- a/doc/src/sgml/fdwhandler.sgml
+++ b/doc/src/sgml/fdwhandler.sgml
@@ -1071,28 +1071,16 @@ EndDirectModify(ForeignScanState *node);
     <para>
 <programlisting>
 void
-ExecForeignTruncate(List *rels, List *rels_extra,
-                    DropBehavior behavior, bool restart_seqs);
+ExecForeignTruncate(List *rels,
+                    DropBehavior behavior,
+                    bool restart_seqs);
 </programlisting>
 
      Truncate a set of foreign tables specified in <literal>rels</literal>.
      This function is called when <xref linkend="sql-truncate"/> is executed
      on foreign tables.  <literal>rels</literal> is the list of
      <structname>Relation</structname> data structure that indicates
-     a foreign table to truncate.  <literal>rels_extra</literal> the list of
-     <literal>int</literal> value, which delivers extra information about
-     a foreign table to truncate.  Possible values are
-     <literal>TRUNCATE_REL_CONTEXT_NORMAL</literal>, which means that
-     the foreign table is specified WITHOUT <literal>ONLY</literal> clause
-     in <command>TRUNCATE</command>,
-     <literal>TRUNCATE_REL_CONTEXT_ONLY</literal>, which means that
-     the foreign table is specified WITH <literal>ONLY</literal> clause,
-     and <literal>TRUNCATE_REL_CONTEXT_CASCADING</literal>,
-     which means that the foreign table is not specified explicitly,
-     but will be truncated due to dependency (for example, partition table).
-     There is one-to-one mapping between <literal>rels</literal> and
-     <literal>rels_extra</literal>.  The number of entries is the same
-     between the two lists.
+     a foreign table to truncate.
     </para>
 
     <para>
@@ -1111,6 +1099,15 @@ ExecForeignTruncate(List *rels, List *rels_extra,
      if <literal>CONTINUE IDENTITY</literal> option is specified.
     </para>
 
+    <para>
+     Note that the <literal>ONLY</literal> options specified
+     in the original <command>TRUNCATE</command> command are not passed to
+     <function>ExecForeignTruncate</function>.  This behavior is similar to
+     the callback functions of <command>SELECT</command>,
+     <command>UPDATE</command> and <command>DELETE</command> on
+     a foreign table.
+    </para>
+
     <para>
      <command>TRUNCATE</command> invokes
      <function>ExecForeignTruncate</function> once per foreign server
diff --git a/doc/src/sgml/postgres-fdw.sgml b/doc/src/sgml/postgres-fdw.sgml
index 5320accf6f..b0806c1274 100644
--- a/doc/src/sgml/postgres-fdw.sgml
+++ b/doc/src/sgml/postgres-fdw.sgml
@@ -69,6 +69,13 @@
   have privileges to do these things.)
  </para>
 
+ <para>
+  Note that the <literal>ONLY</literal> option specified in
+  <command>SELECT</command>, <command>UPDATE</command>,
+  <command>DELETE</command> or <command>TRUNCATE</command>
+  has no effect when accessing or modifying the remote table.
+ </para>
+
  <para>
   Note that <filename>postgres_fdw</filename> currently lacks support for
   <command>INSERT</command> statements with an <literal>ON CONFLICT DO
diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c
index 7d00f4eb25..8e717ada28 100644
--- a/src/backend/commands/tablecmds.c
+++ b/src/backend/commands/tablecmds.c
@@ -323,7 +323,6 @@ typedef struct ForeignTruncateInfo
 {
 	Oid			serverid;
 	List	   *rels;
-	List	   *rels_extra;
 } ForeignTruncateInfo;
 
 /*
@@ -1620,7 +1619,6 @@ ExecuteTruncate(TruncateStmt *stmt)
 {
 	List	   *rels = NIL;
 	List	   *relids = NIL;
-	List	   *relids_extra = NIL;
 	List	   *relids_logged = NIL;
 	ListCell   *cell;
 
@@ -1654,9 +1652,7 @@ ExecuteTruncate(TruncateStmt *stmt)
 
 		rels = lappend(rels, rel);
 		relids = lappend_oid(relids, myrelid);
-		relids_extra = lappend_int(relids_extra, (recurse ?
-												  TRUNCATE_REL_CONTEXT_NORMAL :
-												  TRUNCATE_REL_CONTEXT_ONLY));
+
 		/* Log this relation only if needed for logical decoding */
 		if (RelationIsLogicallyLogged(rel))
 			relids_logged = lappend_oid(relids_logged, myrelid);
@@ -1704,8 +1700,7 @@ ExecuteTruncate(TruncateStmt *stmt)
 
 				rels = lappend(rels, rel);
 				relids = lappend_oid(relids, childrelid);
-				relids_extra = lappend_int(relids_extra,
-										   TRUNCATE_REL_CONTEXT_CASCADING);
+
 				/* Log this relation only if needed for logical decoding */
 				if (RelationIsLogicallyLogged(rel))
 					relids_logged = lappend_oid(relids_logged, childrelid);
@@ -1718,7 +1713,7 @@ ExecuteTruncate(TruncateStmt *stmt)
 					 errhint("Do not specify the ONLY keyword, or use TRUNCATE ONLY on the partitions directly.")));
 	}
 
-	ExecuteTruncateGuts(rels, relids, relids_extra, relids_logged,
+	ExecuteTruncateGuts(rels, relids, relids_logged,
 						stmt->behavior, stmt->restart_seqs);
 
 	/* And close the rels */
@@ -1739,15 +1734,13 @@ ExecuteTruncate(TruncateStmt *stmt)
  *
  * explicit_rels is the list of Relations to truncate that the command
  * specified.  relids is the list of Oids corresponding to explicit_rels.
- * relids_extra is the list of integer values that deliver extra information
- * about relations in explicit_rels.  relids_logged is the list of Oids
- * (a subset of relids) that require WAL-logging.  This is all a bit redundant,
- * but the existing callers have this information handy in this form.
+ * relids_logged is the list of Oids (a subset of relids) that require
+ * WAL-logging.  This is all a bit redundant, but the existing callers have
+ * this information handy in this form.
  */
 void
 ExecuteTruncateGuts(List *explicit_rels,
 					List *relids,
-					List *relids_extra,
 					List *relids_logged,
 					DropBehavior behavior, bool restart_seqs)
 {
@@ -1760,8 +1753,6 @@ ExecuteTruncateGuts(List *explicit_rels,
 	ResultRelInfo *resultRelInfo;
 	SubTransactionId mySubid;
 	ListCell   *cell;
-	ListCell   *lc1,
-			*lc2;
 	Oid		   *logrelids;
 
 	/*
@@ -1799,8 +1790,7 @@ ExecuteTruncateGuts(List *explicit_rels,
 				truncate_check_activity(rel);
 				rels = lappend(rels, rel);
 				relids = lappend_oid(relids, relid);
-				relids_extra = lappend_int(relids_extra,
-										   TRUNCATE_REL_CONTEXT_CASCADING);
+
 				/* Log this relation only if needed for logical decoding */
 				if (RelationIsLogicallyLogged(rel))
 					relids_logged = lappend_oid(relids_logged, relid);
@@ -1901,11 +1891,9 @@ ExecuteTruncateGuts(List *explicit_rels,
 	 */
 	mySubid = GetCurrentSubTransactionId();
 
-	Assert(list_length(rels) == list_length(relids_extra));
-	forboth(lc1, rels, lc2, relids_extra)
+	foreach(cell, rels)
 	{
-		Relation	rel = (Relation) lfirst(lc1);
-		int			extra = lfirst_int(lc2);
+		Relation	rel = (Relation) lfirst(cell);
 
 		/*
 		 * Save OID of partitioned tables for later; nothing else to do for
@@ -1952,7 +1940,6 @@ ExecuteTruncateGuts(List *explicit_rels,
 			{
 				ft_info->serverid = serverid;
 				ft_info->rels = NIL;
-				ft_info->rels_extra = NIL;
 			}
 
 			/*
@@ -1960,7 +1947,6 @@ ExecuteTruncateGuts(List *explicit_rels,
 			 * foreign table belongs to.
 			 */
 			ft_info->rels = lappend(ft_info->rels, rel);
-			ft_info->rels_extra = lappend_int(ft_info->rels_extra, extra);
 			continue;
 		}
 
@@ -2044,7 +2030,6 @@ ExecuteTruncateGuts(List *explicit_rels,
 				Assert(routine->ExecForeignTruncate != NULL);
 
 				routine->ExecForeignTruncate(ft_info->rels,
-											 ft_info->rels_extra,
 											 behavior,
 											 restart_seqs);
 			}
diff --git a/src/backend/replication/logical/worker.c b/src/backend/replication/logical/worker.c
index d09703f7ac..d9f157172b 100644
--- a/src/backend/replication/logical/worker.c
+++ b/src/backend/replication/logical/worker.c
@@ -1816,7 +1816,6 @@ apply_handle_truncate(StringInfo s)
 	List	   *rels = NIL;
 	List	   *part_rels = NIL;
 	List	   *relids = NIL;
-	List	   *relids_extra = NIL;
 	List	   *relids_logged = NIL;
 	ListCell   *lc;
 
@@ -1846,7 +1845,6 @@ apply_handle_truncate(StringInfo s)
 		remote_rels = lappend(remote_rels, rel);
 		rels = lappend(rels, rel->localrel);
 		relids = lappend_oid(relids, rel->localreloid);
-		relids_extra = lappend_int(relids_extra, TRUNCATE_REL_CONTEXT_NORMAL);
 		if (RelationIsLogicallyLogged(rel->localrel))
 			relids_logged = lappend_oid(relids_logged, rel->localreloid);
 
@@ -1885,7 +1883,6 @@ apply_handle_truncate(StringInfo s)
 				rels = lappend(rels, childrel);
 				part_rels = lappend(part_rels, childrel);
 				relids = lappend_oid(relids, childrelid);
-				relids_extra = lappend_int(relids_extra, TRUNCATE_REL_CONTEXT_CASCADING);
 				/* Log this relation only if needed for logical decoding */
 				if (RelationIsLogicallyLogged(childrel))
 					relids_logged = lappend_oid(relids_logged, childrelid);
@@ -1900,7 +1897,6 @@ apply_handle_truncate(StringInfo s)
 	 */
 	ExecuteTruncateGuts(rels,
 						relids,
-						relids_extra,
 						relids_logged,
 						DROP_RESTRICT,
 						restart_seqs);
diff --git a/src/include/commands/tablecmds.h b/src/include/commands/tablecmds.h
index b808a07e46..14f4b4882f 100644
--- a/src/include/commands/tablecmds.h
+++ b/src/include/commands/tablecmds.h
@@ -21,16 +21,6 @@
 #include "storage/lock.h"
 #include "utils/relcache.h"
 
-/*
- * These values indicate how a relation was specified as the target to
- * truncate in TRUNCATE command.
- */
-#define TRUNCATE_REL_CONTEXT_NORMAL       1 /* specified without ONLY clause */
-#define TRUNCATE_REL_CONTEXT_ONLY         2 /* specified with ONLY clause */
-#define TRUNCATE_REL_CONTEXT_CASCADING     3	/* not specified but truncated
-												 * due to dependency (e.g.,
-												 * partition table) */
-
 struct AlterTableUtilityContext;	/* avoid including tcop/utility.h here */
 
 
@@ -68,7 +58,6 @@ extern void CheckTableNotInUse(Relation rel, const char *stmt);
 extern void ExecuteTruncate(TruncateStmt *stmt);
 extern void ExecuteTruncateGuts(List *explicit_rels,
 								List *relids,
-								List *relids_extra,
 								List *relids_logged,
 								DropBehavior behavior,
 								bool restart_seqs);
diff --git a/src/include/foreign/fdwapi.h b/src/include/foreign/fdwapi.h
index 4ebbca6de9..4f17becbb8 100644
--- a/src/include/foreign/fdwapi.h
+++ b/src/include/foreign/fdwapi.h
@@ -161,7 +161,6 @@ typedef List *(*ImportForeignSchema_function) (ImportForeignSchemaStmt *stmt,
 											   Oid serverOid);
 
 typedef void (*ExecForeignTruncate_function) (List *rels,
-											  List *rels_extra,
 											  DropBehavior behavior,
 											  bool restart_seqs);
 
#114Bharath Rupireddy
bharath.rupireddyforpostgres@gmail.com
In reply to: Fujii Masao (#113)
Re: TRUNCATE on foreign table

On Fri, Apr 23, 2021 at 9:50 PM Fujii Masao <masao.fujii@oss.nttdata.com> wrote:

Thanks for the review! I fixed this.

Thanks for the updated patches.

In docs v4 patch, I think we can combine below two lines into a single line:
+ supported by the foreign data wrapper,
see <xref linkend="postgres-fdw"/>.

Other than the above minor change, both patches look good to me, I
have no further comments.

With Regards,
Bharath Rupireddy.
EnterpriseDB: http://www.enterprisedb.com

#115Fujii Masao
masao.fujii@oss.nttdata.com
In reply to: Bharath Rupireddy (#114)
Re: TRUNCATE on foreign table

On 2021/04/26 13:52, Bharath Rupireddy wrote:

On Fri, Apr 23, 2021 at 9:50 PM Fujii Masao <masao.fujii@oss.nttdata.com> wrote:

Thanks for the review! I fixed this.

Thanks for the updated patches.

In docs v4 patch, I think we can combine below two lines into a single line:
+ supported by the foreign data wrapper,
see <xref linkend="postgres-fdw"/>.

You mean "supported by the foreign data wrapper <xref linkend="postgres-fdw"/>"?

I was thinking that it's better to separate them because postgres_fdw
is just an example of the foreign data wrapper supporting TRUNCATE.
This makes me think again; isn't it better to add "for example" or
"for instance" into after "data wrapper"? That is,

<command>TRUNCATE</command> can be used for foreign tables if
supported by the foreign data wrapper, for instance,
see <xref linkend="postgres-fdw"/>.

Other than the above minor change, both patches look good to me, I
have no further comments.

Thanks! I pushed the patch truncate_foreign_table_dont_pass_only_clause_xx.patch, at first.

Regards,

--
Fujii Masao
Advanced Computing Technology Center
Research and Development Headquarters
NTT DATA CORPORATION

#116Bharath Rupireddy
bharath.rupireddyforpostgres@gmail.com
In reply to: Fujii Masao (#115)
Re: TRUNCATE on foreign table

On Tue, Apr 27, 2021 at 11:19 AM Fujii Masao
<masao.fujii@oss.nttdata.com> wrote:

In docs v4 patch, I think we can combine below two lines into a single line:
+ supported by the foreign data wrapper,
see <xref linkend="postgres-fdw"/>.

You mean "supported by the foreign data wrapper <xref linkend="postgres-fdw"/>"?

I was thinking that it's better to separate them because postgres_fdw
is just an example of the foreign data wrapper supporting TRUNCATE.
This makes me think again; isn't it better to add "for example" or
"for instance" into after "data wrapper"? That is,

<command>TRUNCATE</command> can be used for foreign tables if
supported by the foreign data wrapper, for instance,
see <xref linkend="postgres-fdw"/>.

+1.

With Regards,
Bharath Rupireddy.
EnterpriseDB: http://www.enterprisedb.com

#117Fujii Masao
masao.fujii@oss.nttdata.com
In reply to: Bharath Rupireddy (#116)
Re: TRUNCATE on foreign table

On 2021/04/27 15:02, Bharath Rupireddy wrote:

On Tue, Apr 27, 2021 at 11:19 AM Fujii Masao
<masao.fujii@oss.nttdata.com> wrote:

In docs v4 patch, I think we can combine below two lines into a single line:
+ supported by the foreign data wrapper,
see <xref linkend="postgres-fdw"/>.

You mean "supported by the foreign data wrapper <xref linkend="postgres-fdw"/>"?

I was thinking that it's better to separate them because postgres_fdw
is just an example of the foreign data wrapper supporting TRUNCATE.
This makes me think again; isn't it better to add "for example" or
"for instance" into after "data wrapper"? That is,

<command>TRUNCATE</command> can be used for foreign tables if
supported by the foreign data wrapper, for instance,
see <xref linkend="postgres-fdw"/>.

+1.

Pushed. Thanks!

Regards,

--
Fujii Masao
Advanced Computing Technology Center
Research and Development Headquarters
NTT DATA CORPORATION