From 8549f29f8a94674ee75fbeb6c699ef1750f51df2 Mon Sep 17 00:00:00 2001
From: Tomas Vondra <tomas.vondra@postgresql.org>
Date: Mon, 18 Jul 2022 09:23:23 +0200
Subject: [PATCH 2/2] rework postgres_fdw analyze sampling

---
 contrib/postgres_fdw/deparse.c                | 167 +++++-------------
 .../postgres_fdw/expected/postgres_fdw.out    |  44 ++++-
 contrib/postgres_fdw/option.c                 |  24 ++-
 contrib/postgres_fdw/postgres_fdw.c           | 131 ++++++++++----
 contrib/postgres_fdw/postgres_fdw.h           |  20 ++-
 contrib/postgres_fdw/sql/postgres_fdw.sql     |  54 ++++++
 doc/src/sgml/postgres-fdw.sgml                |  22 +++
 7 files changed, 289 insertions(+), 173 deletions(-)

diff --git a/contrib/postgres_fdw/deparse.c b/contrib/postgres_fdw/deparse.c
index 8454f489161..e9d7869397e 100644
--- a/contrib/postgres_fdw/deparse.c
+++ b/contrib/postgres_fdw/deparse.c
@@ -2298,7 +2298,7 @@ deparseAnalyzeSizeSql(StringInfo buf, Relation rel)
 }
 
 /*
- * Construct SELECT statement to acquire numbe of rows of given relation.
+ * Construct SELECT statement to acquire a number of rows of a relation.
  *
  * Note: Maybe this should compare relpages and current relation size
  * and adjust reltuples accordingly?
@@ -2322,74 +2322,31 @@ deparseAnalyzeTuplesSql(StringInfo buf, Relation rel)
  *
  * SELECT command is appended to buf, and list of columns retrieved
  * is returned to *retrieved_attrs.
- */
-void
-deparseAnalyzeSql(StringInfo buf, Relation rel, List **retrieved_attrs)
-{
-	Oid			relid = RelationGetRelid(rel);
-	TupleDesc	tupdesc = RelationGetDescr(rel);
-	int			i;
-	char	   *colname;
-	List	   *options;
-	ListCell   *lc;
-	bool		first = true;
-
-	*retrieved_attrs = NIL;
-
-	appendStringInfoString(buf, "SELECT ");
-	for (i = 0; i < tupdesc->natts; i++)
-	{
-		/* Ignore dropped columns. */
-		if (TupleDescAttr(tupdesc, i)->attisdropped)
-			continue;
-
-		if (!first)
-			appendStringInfoString(buf, ", ");
-		first = false;
-
-		/* Use attribute name or column_name option. */
-		colname = NameStr(TupleDescAttr(tupdesc, i)->attname);
-		options = GetForeignColumnOptions(relid, i + 1);
-
-		foreach(lc, options)
-		{
-			DefElem    *def = (DefElem *) lfirst(lc);
-
-			if (strcmp(def->defname, "column_name") == 0)
-			{
-				colname = defGetString(def);
-				break;
-			}
-		}
-
-		appendStringInfoString(buf, quote_identifier(colname));
-
-		*retrieved_attrs = lappend_int(*retrieved_attrs, i + 1);
-	}
-
-	/* Don't generate bad syntax for zero-column relation. */
-	if (first)
-		appendStringInfoString(buf, "NULL");
-
-	/*
-	 * Construct FROM clause
-	 */
-	appendStringInfoString(buf, " FROM ");
-	deparseRelation(buf, rel);
-}
-
-/*
- * Construct SELECT statement to acquire sample rows of given relation,
- * by sampling a fraction of the table using TABLESAMPLE SYSTEM.
  *
- * SELECT command is appended to buf, and list of columns retrieved
- * is returned to *retrieved_attrs.
+ * XXX We allow customizing the sampling method, but we only support methods
+ * we can decide based on server version. Allowing custom TSM modules (for
+ * example tsm_system_rows) might be useful, but it would require detecting
+ * which extensions are installed, to allow automatic fall-back. Moreover, the
+ * methods use different parameters (not sampling rate). So we don't do this
+ * for now, leaving it for future improvements.
+ *
+ * XXX Using remote random() to sample rows has advantages & disadvantages.
+ * The advantages are that this works on all PostgreSQL versions (unlike
+ * TABLESAMPLE), and that it does the sampling on the remote side (unlike
+ * the old approach, which transfers everything and then discards most data).
+ * We could also do "ORDER BY random() LIMIT x", which would always pick
+ * the expected number of rows, but it requires sorting so it's a bit more
+ * expensive.
+ *
+ * The disadvantage is that we still have to read all rows and evaluate the
+ * random(), while TABLESAMPLE skips most of the pages entirely.
  *
- * Note: We could allow selecting system/bernoulli, and maybe even the
- * optional TSM modules (especially tsm_system_rows would help).
+ * XXX What if we need only a subset of columns, e.g. ANALYZE t(a,b)?
  */
 void
-deparseAnalyzeTableSampleSql(StringInfo buf, Relation rel, List **retrieved_attrs, double sample_frac)
+deparseAnalyzeSql(StringInfo buf, Relation rel,
+				  PgFdwSamplingMethod sample_method, double sample_frac,
+				  List **retrieved_attrs)
 {
 	Oid			relid = RelationGetRelid(rel);
 	TupleDesc	tupdesc = RelationGetDescr(rel);
@@ -2437,77 +2394,35 @@ deparseAnalyzeTableSampleSql(StringInfo buf, Relation rel, List **retrieved_attr
 		appendStringInfoString(buf, "NULL");
 
 	/*
-	 * Construct FROM clause
+	 * Construct FROM clause, and perhaps WHERE clause too, depending on the
+	 * selected sampling method.
 	 */
 	appendStringInfoString(buf, " FROM ");
 	deparseRelation(buf, rel);
-	appendStringInfo(buf, " TABLESAMPLE SYSTEM(%f)", (100.0 * sample_frac));
-}
 
-/*
- * Construct SELECT statement to acquire sample rows of given relation,
- * by sampling a fraction of the table using TABLESAMPLE SYSTEM.
- *
- * SELECT command is appended to buf, and list of columns retrieved
- * is returned to *retrieved_attrs.
- *
- * Note: We could allow selecting system/bernoulli, and maybe even the
- * optional TSM modules (especially tsm_system_rows would help).
- */
-void
-deparseAnalyzeLegacySampleSql(StringInfo buf, Relation rel, List **retrieved_attrs, double sample_frac)
-{
-	Oid			relid = RelationGetRelid(rel);
-	TupleDesc	tupdesc = RelationGetDescr(rel);
-	int			i;
-	char	   *colname;
-	List	   *options;
-	ListCell   *lc;
-	bool		first = true;
-
-	*retrieved_attrs = NIL;
-
-	appendStringInfoString(buf, "SELECT ");
-	for (i = 0; i < tupdesc->natts; i++)
+	switch (sample_method)
 	{
-		/* Ignore dropped columns. */
-		if (TupleDescAttr(tupdesc, i)->attisdropped)
-			continue;
-
-		if (!first)
-			appendStringInfoString(buf, ", ");
-		first = false;
-
-		/* Use attribute name or column_name option. */
-		colname = NameStr(TupleDescAttr(tupdesc, i)->attname);
-		options = GetForeignColumnOptions(relid, i + 1);
+		case ANALYZE_SAMPLE_OFF:
+			/* nothing to do here */
+			break;
 
-		foreach(lc, options)
-		{
-			DefElem    *def = (DefElem *) lfirst(lc);
+		case ANALYZE_SAMPLE_RANDOM:
+			appendStringInfo(buf, " WHERE pg_catalog.random() < %f", sample_frac);
+			break;
 
-			if (strcmp(def->defname, "column_name") == 0)
-			{
-				colname = defGetString(def);
-				break;
-			}
-		}
+		case ANALYZE_SAMPLE_SYSTEM:
+			appendStringInfo(buf, " TABLESAMPLE SYSTEM(%f)", (100.0 * sample_frac));
+			break;
 
-		appendStringInfoString(buf, quote_identifier(colname));
+		case ANALYZE_SAMPLE_BERNOULLI:
+			appendStringInfo(buf, " TABLESAMPLE BERNOULLI(%f)", (100.0 * sample_frac));
+			break;
 
-		*retrieved_attrs = lappend_int(*retrieved_attrs, i + 1);
+		case ANALYZE_SAMPLE_AUTO:
+			/* should have been resolved into actual method */
+			elog(ERROR, "unexpected sampling method");
+			break;
 	}
-
-	/* Don't generate bad syntax for zero-column relation. */
-	if (first)
-		appendStringInfoString(buf, "NULL");
-
-	/*
-	 * Construct FROM clause
-	 */
-	appendStringInfoString(buf, " FROM ");
-	deparseRelation(buf, rel);
-	appendStringInfo(buf, " WHERE random() < %f", sample_frac);
 }
 
 /*
diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out
index 5f2ef88cf38..e838d31815e 100644
--- a/contrib/postgres_fdw/expected/postgres_fdw.out
+++ b/contrib/postgres_fdw/expected/postgres_fdw.out
@@ -9532,7 +9532,7 @@ DO $d$
     END;
 $d$;
 ERROR:  invalid option "password"
-HINT:  Valid options in this context are: service, passfile, channel_binding, connect_timeout, dbname, host, hostaddr, port, options, application_name, keepalives, keepalives_idle, keepalives_interval, keepalives_count, tcp_user_timeout, sslmode, sslcompression, sslcert, sslkey, sslrootcert, sslcrl, sslcrldir, sslsni, requirepeer, ssl_min_protocol_version, ssl_max_protocol_version, gssencmode, krbsrvname, gsslib, target_session_attrs, use_remote_estimate, fdw_startup_cost, fdw_tuple_cost, extensions, updatable, truncatable, fetch_size, batch_size, async_capable, parallel_commit, keep_connections
+HINT:  Valid options in this context are: service, passfile, channel_binding, connect_timeout, dbname, host, hostaddr, port, options, application_name, keepalives, keepalives_idle, keepalives_interval, keepalives_count, tcp_user_timeout, sslmode, sslcompression, sslcert, sslkey, sslrootcert, sslcrl, sslcrldir, sslsni, requirepeer, ssl_min_protocol_version, ssl_max_protocol_version, gssencmode, krbsrvname, gsslib, target_session_attrs, use_remote_estimate, fdw_startup_cost, fdw_tuple_cost, extensions, updatable, truncatable, fetch_size, batch_size, async_capable, parallel_commit, keep_connections, analyze_sampling
 CONTEXT:  SQL statement "ALTER SERVER loopback_nopw OPTIONS (ADD password 'dummypw')"
 PL/pgSQL function inline_code_block line 3 at EXECUTE
 -- If we add a password for our user mapping instead, we should get a different
@@ -11296,3 +11296,45 @@ SELECT * FROM prem2;
 
 ALTER SERVER loopback OPTIONS (DROP parallel_commit);
 ALTER SERVER loopback2 OPTIONS (DROP parallel_commit);
+-- ===================================================================
+-- test for ANALYZE
+-- ===================================================================
+CREATE TABLE analyze_rtable1 (id int primary key, a text, b bigint);
+CREATE TABLE analyze_rtable2 (id int primary key, a text, b bigint);
+CREATE TABLE analyze_ptable (id int, a text, b bigint) PARTITION BY HASH(id);
+CREATE FOREIGN TABLE analyze_ftable__p1 PARTITION OF analyze_ptable
+                                    FOR VALUES WITH (MODULUS 2, REMAINDER 0)
+       SERVER loopback OPTIONS (table_name 'analyze_rtable1');
+CREATE FOREIGN TABLE analyze_ftable__p2 PARTITION OF analyze_ptable
+                                    FOR VALUES WITH (MODULUS 2, REMAINDER 1)
+       SERVER loopback OPTIONS (table_name 'analyze_rtable2');
+INSERT INTO analyze_ptable (SELECT x FROM generate_series(1,5000) x);
+-- analyze the 'local' tables to update relpages/reltuples
+ANALYZE analyze_rtable1, analyze_rtable2;
+-- now analyze the remote tables directly - this expects to scan everything,
+-- so should not do any sampling
+ANALYZE analyze_ftable__p1;
+ANALYZE analyze_ftable__p2;
+-- now analyze the parent - this should scan everything too, because 30k
+-- sample requires everything to be scanned
+ANALYZE analyze_ptable;
+-- now lower the target to 10, which requires only 3k rows sample, so about
+-- 1500 rows from each partition, so sampling will kick in, by default with
+-- the 'bernoulli' tablesample method
+SET default_statistics_target = 10;
+ANALYZE analyze_ptable;
+-- now alter the method for remote server to 'system'
+ALTER SERVER loopback OPTIONS (analyze_sampling 'system');
+ANALYZE analyze_ptable;
+-- now alter the method for remote table to 'random', to not use tablesample
+-- but the 'legacy' sampling, and disable sampling for the other partition
+ALTER FOREIGN TABLE analyze_ftable__p1 OPTIONS (ADD analyze_sampling 'random');
+ALTER FOREIGN TABLE analyze_ftable__p2 OPTIONS (ADD analyze_sampling 'off');
+ANALYZE analyze_ptable;
+-- now add more data, so that each partition exceeds the statistics target
+INSERT INTO analyze_ptable (SELECT x FROM generate_series(5001, 10000) x);
+ANALYZE analyze_rtable1, analyze_rtable2;
+ANALYZE analyze_ptable;
+-- cleanup
+DROP FOREIGN TABLE analyze_ftable__p1, analyze_ftable__p2;
+DROP TABLE analyze_ptable, analyze_rtable1, analyze_rtable2;
diff --git a/contrib/postgres_fdw/option.c b/contrib/postgres_fdw/option.c
index 3749d4701a7..7c4a7d8d4b7 100644
--- a/contrib/postgres_fdw/option.c
+++ b/contrib/postgres_fdw/option.c
@@ -122,8 +122,7 @@ postgres_fdw_validator(PG_FUNCTION_ARGS)
 			strcmp(def->defname, "truncatable") == 0 ||
 			strcmp(def->defname, "async_capable") == 0 ||
 			strcmp(def->defname, "parallel_commit") == 0 ||
-			strcmp(def->defname, "keep_connections") == 0 ||
-			strcmp(def->defname, "sample") == 0)
+			strcmp(def->defname, "keep_connections") == 0)
 		{
 			/* these accept only boolean values */
 			(void) defGetBoolean(def);
@@ -208,6 +207,23 @@ postgres_fdw_validator(PG_FUNCTION_ARGS)
 						 errmsg("sslcert and sslkey are superuser-only"),
 						 errhint("User mappings with the sslcert or sslkey options set may only be created or modified by the superuser")));
 		}
+		else if (strcmp(def->defname, "analyze_sampling") == 0)
+		{
+			char	   *value;
+
+			value = defGetString(def);
+
+			/* we recognize off/auto/random/system/bernoulli */
+			if (strcmp(value, "off") != 0 &&
+				strcmp(value, "auto") != 0 &&
+				strcmp(value, "random") != 0 &&
+				strcmp(value, "system") != 0 &&
+				strcmp(value, "bernoulli") != 0)
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+						 errmsg("invalid value for string option \"%s\": %s",
+								def->defname, value)));
+		}
 	}
 
 	PG_RETURN_VOID();
@@ -256,8 +272,8 @@ InitPgFdwOptions(void)
 		{"password_required", UserMappingRelationId, false},
 
 		/* sampling is available on both server and table */
-		{"sample", ForeignServerRelationId, false},
-		{"sample", ForeignTableRelationId, false},
+		{"analyze_sampling", ForeignServerRelationId, false},
+		{"analyze_sampling", ForeignTableRelationId, false},
 
 		/*
 		 * sslcert and sslkey are in fact libpq options, but we repeat them
diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c
index 3adf518b676..ea21c5c5119 100644
--- a/contrib/postgres_fdw/postgres_fdw.c
+++ b/contrib/postgres_fdw/postgres_fdw.c
@@ -5056,9 +5056,8 @@ postgresAcquireSampleRowsFunc(Relation relation, int elevel,
 	ListCell   *lc;
 	int			server_version_num;
 
-	/* sampling enabled by default */
-	bool		do_sample = true;
-	bool		use_tablesample = true;
+	/* analyze sampling enabled by default, if available */
+	PgFdwSamplingMethod	method = ANALYZE_SAMPLE_AUTO;
 	double		sample_frac = -1.0;
 	double		reltuples;
 
@@ -5091,57 +5090,120 @@ postgresAcquireSampleRowsFunc(Relation relation, int elevel,
 	/* We'll need server version, so fetch it now. */
 	server_version_num = PQserverVersion(conn);
 
-	/* disable tablesample on old remote servers */
-	if (server_version_num < 95000)
-		use_tablesample = false;
-
 	/*
-	 * Should we use TABLESAMPLE to collect the remote sample?
+	 * Should we try do the sampling for analyze on the remote server?
 	 */
 	foreach(lc, server->options)
 	{
 		DefElem    *def = (DefElem *) lfirst(lc);
 
-		if (strcmp(def->defname, "sample") == 0)
+		if (strcmp(def->defname, "analyze_sampling") == 0)
 		{
-			do_sample = defGetBoolean(def);
+			char *value = defGetString(def);
+
+			if (strcmp(value, "off") == 0)
+				method = ANALYZE_SAMPLE_OFF;
+			else if (strcmp(value, "auto") == 0)
+				method = ANALYZE_SAMPLE_AUTO;
+			else if (strcmp(value, "random") == 0)
+				method = ANALYZE_SAMPLE_RANDOM;
+			else if (strcmp(value, "system") == 0)
+				method = ANALYZE_SAMPLE_SYSTEM;
+			else if (strcmp(value, "bernoulli") == 0)
+				method = ANALYZE_SAMPLE_BERNOULLI;
+
 			break;
 		}
 	}
+
 	foreach(lc, table->options)
 	{
 		DefElem    *def = (DefElem *) lfirst(lc);
 
-		if (strcmp(def->defname, "sample") == 0)
+		if (strcmp(def->defname, "analyze_sampling") == 0)
 		{
-			do_sample = defGetBoolean(def);
+			char *value = defGetString(def);
+
+			if (strcmp(value, "off") == 0)
+				method = ANALYZE_SAMPLE_OFF;
+			else if (strcmp(value, "auto") == 0)
+				method = ANALYZE_SAMPLE_AUTO;
+			else if (strcmp(value, "random") == 0)
+				method = ANALYZE_SAMPLE_RANDOM;
+			else if (strcmp(value, "system") == 0)
+				method = ANALYZE_SAMPLE_SYSTEM;
+			else if (strcmp(value, "bernoulli") == 0)
+				method = ANALYZE_SAMPLE_BERNOULLI;
+
 			break;
 		}
 	}
 
-	if (do_sample)
+	/*
+	 * Error-out if explicitly required one of the TABLESAMPLE methods, but
+	 * the server does not support it.
+	 */
+	if ((server_version_num < 95000) &&
+		(method == ANALYZE_SAMPLE_SYSTEM ||
+		 method == ANALYZE_SAMPLE_BERNOULLI))
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("remote server does not support TABLESAMPLE feature")));
+
+	/*
+	 * For "auto" method, pick the one we believe is best. For servers with
+	 * TABLESAMPLE support we pick BERNOULLI, for old servers we fall-back to
+	 * random() to at least reduce network transfer.
+	 */
+	if (method == ANALYZE_SAMPLE_AUTO)
+	{
+		if (server_version_num < 95000)
+			method = ANALYZE_SAMPLE_RANDOM;
+		else
+			method = ANALYZE_SAMPLE_BERNOULLI;
+	}
+
+	/*
+	 * If we've decided to do remote sampling, calculate the sampling rate. We
+	 * need to get the number of tuples from the remote server, so we skip the
+	 * network round-trip if not needed.
+	 */
+	if (method != ANALYZE_SAMPLE_OFF)
 	{
 		reltuples = postgresCountTuplesForForeignTable(relation);
 
+		/*
+		 * No rows or we expect to sample everything - disable sampling after
+		 * all (and make sure we don't divide by 0 in sample_frac calculation.)
+		 */
 		if ((reltuples <= 0) || (targrows >= reltuples))
-			do_sample = false;
+			method = ANALYZE_SAMPLE_OFF;
 
-		sample_frac = targrows / reltuples;
+		/* Make sure we don't divide by 0 when calculating the rate. */
+		sample_frac = targrows / Max(1.0, reltuples);
 
-		/* Let's sample a bit more, we'll reduce the sample locally. */
-		sample_frac *= 1.25;
+		/*
+		 * Let's sample a bit more (10%), we'll reduce the sample locally.
+		 *
+		 * XXX Not sure this is really necessary. If we don't trust the remote
+		 * sampling to sample the right number of rows, we should not use it.
+		 */
+		sample_frac *= 1.1;
 
-		/* Sanity checks. */
+		/*
+		 * Ensure the sampling rate is between 0.0 and 1.0, even after the
+		 * 10% adjustment above.
+		 */
 		sample_frac = Min(1.0, Max(0.0, sample_frac));
 
 		/*
-		 * When sampling too large fraction, just read everything.
-		 *
-		 * XXX It's not clear where exactly the threshold is, with slow
-		 * network it may be cheaper to sample even 90%.
+		 * If we expect the sampling to reduce very few rows, just disable it
+		 * and read the whole remote table. We decide based on the number of
+		 * rows we expect to "eliminate" by sampling. If saving than 100 rows,
+		 * we disable sampling.
 		 */
-		if (sample_frac > 0.5)
-			do_sample = false;
+		if (reltuples * (1 - sample_frac) < 100.0)
+			method = ANALYZE_SAMPLE_OFF;
 	}
 
 	/*
@@ -5151,14 +5213,7 @@ postgresAcquireSampleRowsFunc(Relation relation, int elevel,
 	initStringInfo(&sql);
 	appendStringInfo(&sql, "DECLARE c%u CURSOR FOR ", cursor_number);
 
-	if (do_sample && use_tablesample)
-		deparseAnalyzeTableSampleSql(&sql, relation, &astate.retrieved_attrs, sample_frac);
-	else if (do_sample)
-		deparseAnalyzeLegacySampleSql(&sql, relation, &astate.retrieved_attrs, sample_frac);
-	else
-		deparseAnalyzeSql(&sql, relation, &astate.retrieved_attrs);
-
-	elog(WARNING, "SQL: %s", sql.data);
+	deparseAnalyzeSql(&sql, relation, method, sample_frac, &astate.retrieved_attrs);
 
 	/* In what follows, do not risk leaking any PGresults. */
 	PG_TRY();
@@ -5251,11 +5306,15 @@ postgresAcquireSampleRowsFunc(Relation relation, int elevel,
 	/* We assume that we have no dead tuple. */
 	*totaldeadrows = 0.0;
 
-	/* We've retrieved all living tuples from foreign server. */
-	if (do_sample)
-		*totalrows = reltuples;
-	else
+	/*
+	 * Without ANALYZE sampling, we've retrieved all living tuples from foreign
+	 * server, so just use that. Otherwise we have the reltuples estimate we
+	 * got from the remote side.
+	 */
+	if (method == ANALYZE_SAMPLE_OFF)
 		*totalrows = astate.samplerows;
+	else
+		*totalrows = reltuples;
 
 	/*
 	 * Emit some interesting relation info
diff --git a/contrib/postgres_fdw/postgres_fdw.h b/contrib/postgres_fdw/postgres_fdw.h
index b0d9cf42982..1c2a6045a94 100644
--- a/contrib/postgres_fdw/postgres_fdw.h
+++ b/contrib/postgres_fdw/postgres_fdw.h
@@ -134,6 +134,18 @@ typedef struct PgFdwConnState
 	AsyncRequest *pendingAreq;	/* pending async request */
 } PgFdwConnState;
 
+/*
+ * Method used by ANALYZE to sample remote rows.
+ */
+typedef enum PgFdwSamplingMethod
+{
+	ANALYZE_SAMPLE_OFF,			/* no remote sampling */
+	ANALYZE_SAMPLE_AUTO,		/* choose by server version */
+	ANALYZE_SAMPLE_RANDOM,		/* remote random() */
+	ANALYZE_SAMPLE_SYSTEM,		/* TABLESAMPLE system */
+	ANALYZE_SAMPLE_BERNOULLI	/* TABLESAMPLE bernoulli */
+} PgFdwSamplingMethod;
+
 /* in postgres_fdw.c */
 extern int	set_transmission_modes(void);
 extern void reset_transmission_modes(int nestlevel);
@@ -213,13 +225,9 @@ extern void deparseDirectDeleteSql(StringInfo buf, PlannerInfo *root,
 extern void deparseAnalyzeSizeSql(StringInfo buf, Relation rel);
 extern void deparseAnalyzeTuplesSql(StringInfo buf, Relation rel);
 extern void deparseAnalyzeSql(StringInfo buf, Relation rel,
+							  PgFdwSamplingMethod sample_method,
+							  double sample_frac,
 							  List **retrieved_attrs);
-extern void deparseAnalyzeTableSampleSql(StringInfo buf, Relation rel,
-										 List **retrieved_attrs,
-										 double sample_frac);
-extern void deparseAnalyzeLegacySampleSql(StringInfo buf, Relation rel,
-										  List **retrieved_attrs,
-										  double sample_frac);
 extern void deparseTruncateSql(StringInfo buf,
 							   List *rels,
 							   DropBehavior behavior,
diff --git a/contrib/postgres_fdw/sql/postgres_fdw.sql b/contrib/postgres_fdw/sql/postgres_fdw.sql
index ae1fc8f58be..9c1daecc1a2 100644
--- a/contrib/postgres_fdw/sql/postgres_fdw.sql
+++ b/contrib/postgres_fdw/sql/postgres_fdw.sql
@@ -3636,3 +3636,57 @@ SELECT * FROM prem2;
 
 ALTER SERVER loopback OPTIONS (DROP parallel_commit);
 ALTER SERVER loopback2 OPTIONS (DROP parallel_commit);
+
+
+-- ===================================================================
+-- test for ANALYZE
+-- ===================================================================
+CREATE TABLE analyze_rtable1 (id int primary key, a text, b bigint);
+CREATE TABLE analyze_rtable2 (id int primary key, a text, b bigint);
+
+CREATE TABLE analyze_ptable (id int, a text, b bigint) PARTITION BY HASH(id);
+CREATE FOREIGN TABLE analyze_ftable__p1 PARTITION OF analyze_ptable
+                                    FOR VALUES WITH (MODULUS 2, REMAINDER 0)
+       SERVER loopback OPTIONS (table_name 'analyze_rtable1');
+CREATE FOREIGN TABLE analyze_ftable__p2 PARTITION OF analyze_ptable
+                                    FOR VALUES WITH (MODULUS 2, REMAINDER 1)
+       SERVER loopback OPTIONS (table_name 'analyze_rtable2');
+INSERT INTO analyze_ptable (SELECT x FROM generate_series(1,5000) x);
+
+-- analyze the 'local' tables to update relpages/reltuples
+ANALYZE analyze_rtable1, analyze_rtable2;
+
+-- now analyze the remote tables directly - this expects to scan everything,
+-- so should not do any sampling
+ANALYZE analyze_ftable__p1;
+ANALYZE analyze_ftable__p2;
+
+-- now analyze the parent - this should scan everything too, because 30k
+-- sample requires everything to be scanned
+ANALYZE analyze_ptable;
+
+-- now lower the target to 10, which requires only 3k rows sample, so about
+-- 1500 rows from each partition, so sampling will kick in, by default with
+-- the 'bernoulli' tablesample method
+SET default_statistics_target = 10;
+ANALYZE analyze_ptable;
+
+-- now alter the method for remote server to 'system'
+ALTER SERVER loopback OPTIONS (analyze_sampling 'system');
+ANALYZE analyze_ptable;
+
+-- now alter the method for remote table to 'random', to not use tablesample
+-- but the 'legacy' sampling, and disable sampling for the other partition
+ALTER FOREIGN TABLE analyze_ftable__p1 OPTIONS (ADD analyze_sampling 'random');
+ALTER FOREIGN TABLE analyze_ftable__p2 OPTIONS (ADD analyze_sampling 'off');
+ANALYZE analyze_ptable;
+
+-- now add more data, so that each partition exceeds the statistics target
+INSERT INTO analyze_ptable (SELECT x FROM generate_series(5001, 10000) x);
+
+ANALYZE analyze_rtable1, analyze_rtable2;
+ANALYZE analyze_ptable;
+
+-- cleanup
+DROP FOREIGN TABLE analyze_ftable__p1, analyze_ftable__p2;
+DROP TABLE analyze_ptable, analyze_rtable1, analyze_rtable2;
diff --git a/doc/src/sgml/postgres-fdw.sgml b/doc/src/sgml/postgres-fdw.sgml
index bfd344cdc0e..d44c8cdd71e 100644
--- a/doc/src/sgml/postgres-fdw.sgml
+++ b/doc/src/sgml/postgres-fdw.sgml
@@ -305,6 +305,28 @@ OPTIONS (ADD password_required 'false');
      </listitem>
     </varlistentry>
 
+    <varlistentry>
+     <term><literal>analyze_sampling</literal> (<type>text</type>)</term>
+     <listitem>
+      <para>
+       This option determines if <command>ANALYZE</command> on a foreign
+       table samples the data on the remote node, or reads and transfers
+       all data and performs the sampling locally. The supported values
+       are <literal>off</literal>, <literal>random</literal>,
+       <literal>system</literal>, <literal>bernoulli</literal> and
+       <literal>auto</literal>. <literal>off</literal> disables remote
+       sampling, so all data are transferred and sampled locally.
+       <literal>random</literal> performs remote sampling using
+       <literal>random()</literal> function, while <literal>system</literal>
+       and <literal>bernoulli</literal> rely on built-in <literal>TABLESAMPLE</literal>
+       methods. <literal>random</literal> works on all server versions,
+       while <literal>TABLESAMPLE</literal> is supported only since 9.5.
+       <literal>auto</literal> checks the server version and picks the
+       best remote sampling method automatically.
+      </para>
+     </listitem>
+    </varlistentry>
+
    </variablelist>
 
    <para>
-- 
2.34.3