diff --git a/doc/src/sgml/fdwhandler.sgml b/doc/src/sgml/fdwhandler.sgml new file mode 100644 index 2c75b80..a78748b *** a/doc/src/sgml/fdwhandler.sgml --- b/doc/src/sgml/fdwhandler.sgml *************** AcquireSampleRowsFunc (Relation relation *** 312,317 **** --- 312,320 ---- the table into the output parameters totalrows and totaldeadrows. (Set totaldeadrows to zero if the FDW does not have any concept of dead rows.) + To avoid excessive size of rows, all values of variable + length can be truncated to length WIDTH_THRESHOLD+1 since + all such values are discarded during analyze processing. diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c new file mode 100644 index ff27164..5b21f3c *** a/src/backend/commands/analyze.c --- b/src/backend/commands/analyze.c *************** ind_fetch_func(VacAttrStatsP stats, int *** 1782,1798 **** */ - /* - * To avoid consuming too much memory during analysis and/or too much space - * in the resulting pg_statistic rows, we ignore varlena datums that are wider - * than WIDTH_THRESHOLD (after detoasting!). This is legitimate for MCV - * and distinct-value calculations since a wide value is unlikely to be - * duplicated at all, much less be a most-common value. For the same reason, - * ignoring wide values will not affect our estimates of histogram bin - * boundaries very much. - */ - #define WIDTH_THRESHOLD 1024 - #define swapInt(a,b) do {int _tmp; _tmp=a; a=b; b=_tmp;} while(0) #define swapDatum(a,b) do {Datum _tmp; _tmp=a; a=b; b=_tmp;} while(0) --- 1782,1787 ---- diff --git a/src/include/commands/vacuum.h b/src/include/commands/vacuum.h new file mode 100644 index 3c95dec..2d53a11 *** a/src/include/commands/vacuum.h --- b/src/include/commands/vacuum.h *************** extern void lazy_vacuum_rel(Relation one *** 167,172 **** --- 167,187 ---- BufferAccessStrategy bstrategy); /* in commands/analyze.c */ + + /* + * To avoid consuming too much memory during analysis and/or too much space + * in the resulting pg_statistic rows, we ignore varlena datums that are wider + * than WIDTH_THRESHOLD (after detoasting!). This is legitimate for MCV + * and distinct-value calculations since a wide value is unlikely to be + * duplicated at all, much less be a most-common value. For the same reason, + * ignoring wide values will not affect our estimates of histogram bin + * boundaries very much. + * To avoid excessive memory consumption, foreign data wrappers can truncate + * any varlena datum in samples returned by an AcquireSampleRowsFunc to + * WIDTH_THRESHOLD+1. + */ + #define WIDTH_THRESHOLD 1024 + extern void analyze_rel(Oid relid, VacuumStmt *vacstmt, BufferAccessStrategy bstrategy); extern bool std_typanalyze(VacAttrStats *stats);