From 7466ff936bd0829a622125eb351c6df57c4c4f3d Mon Sep 17 00:00:00 2001 From: Matthias van de Meent Date: Wed, 3 Jan 2024 01:39:42 +0100 Subject: [PATCH v1 2/7] pg_node_tree: reset node->location before catalog's serialization We don't store original query texts, so any lingering "location" value can only be useful in forensic debugging. In normal operation, however, a non-default value will show up as measurable overhead in serialization, so we reset it to its default (-1), saving several 10s of kB. --- src/backend/catalog/heap.c | 11 +++- src/backend/catalog/index.c | 2 + src/backend/catalog/pg_attrdef.c | 5 +- src/backend/catalog/pg_proc.c | 6 ++ src/backend/catalog/pg_publication.c | 4 ++ src/backend/commands/policy.c | 7 ++ src/backend/commands/statscmds.c | 1 + src/backend/commands/trigger.c | 2 + src/backend/commands/typecmds.c | 6 +- src/backend/nodes/nodeFuncs.c | 95 ++++++++++++++++++++++++++++ src/backend/rewrite/rewriteDefine.c | 8 +++ src/include/nodes/nodeFuncs.h | 2 + 12 files changed, 146 insertions(+), 3 deletions(-) diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c index b93894889d..419d4ab30e 100644 --- a/src/backend/catalog/heap.c +++ b/src/backend/catalog/heap.c @@ -2062,8 +2062,10 @@ StoreRelCheck(Relation rel, const char *ccname, Node *expr, Oid constrOid; /* - * Flatten expression to string form for storage. + * Remove references into the (to be dropped) query string, and + * flatten expression to string form for storage. */ + reset_querytext_references(expr, NULL); ccbin = nodeToString(expr); /* @@ -3676,6 +3678,7 @@ StorePartitionKey(Relation rel, { char *exprString; + reset_querytext_references((Node *) partexprs, NULL); exprString = nodeToString(partexprs); partexprDatum = CStringGetTextDatum(exprString); pfree(exprString); @@ -3834,6 +3837,12 @@ StorePartitionBound(Relation rel, Relation parent, PartitionBoundSpec *bound) memset(new_val, 0, sizeof(new_val)); memset(new_null, false, sizeof(new_null)); memset(new_repl, false, sizeof(new_repl)); + + /* + * We don't keep the original query text around, so remove any + * references to it to reduce its stored size. + */ + reset_querytext_references((Node *) bound, NULL); new_val[Anum_pg_class_relpartbound - 1] = CStringGetTextDatum(nodeToString(bound)); new_null[Anum_pg_class_relpartbound - 1] = false; new_repl[Anum_pg_class_relpartbound - 1] = true; diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c index 7b186c0220..e01ff9cf7f 100644 --- a/src/backend/catalog/index.c +++ b/src/backend/catalog/index.c @@ -588,6 +588,7 @@ UpdateIndexRelation(Oid indexoid, { char *exprsString; + reset_querytext_references((Node *) indexInfo->ii_Expressions, NULL); exprsString = nodeToString(indexInfo->ii_Expressions); exprsDatum = CStringGetTextDatum(exprsString); pfree(exprsString); @@ -603,6 +604,7 @@ UpdateIndexRelation(Oid indexoid, { char *predString; + reset_querytext_references((Node *) indexInfo->ii_Predicate, NULL); predString = nodeToString(make_ands_explicit(indexInfo->ii_Predicate)); predDatum = CStringGetTextDatum(predString); pfree(predString); diff --git a/src/backend/catalog/pg_attrdef.c b/src/backend/catalog/pg_attrdef.c index ade0b6d8e6..49cb869e3c 100644 --- a/src/backend/catalog/pg_attrdef.c +++ b/src/backend/catalog/pg_attrdef.c @@ -23,6 +23,7 @@ #include "catalog/objectaccess.h" #include "catalog/pg_attrdef.h" #include "executor/executor.h" +#include "nodes/nodeFuncs.h" #include "optimizer/optimizer.h" #include "utils/array.h" #include "utils/builtins.h" @@ -62,8 +63,10 @@ StoreAttrDefault(Relation rel, AttrNumber attnum, adrel = table_open(AttrDefaultRelationId, RowExclusiveLock); /* - * Flatten expression to string form for storage. + * Reset any references to the original query text (which isn't stored), + * and flatten the expression to string form for storage. */ + reset_querytext_references(expr, NULL); adbin = nodeToString(expr); /* diff --git a/src/backend/catalog/pg_proc.c b/src/backend/catalog/pg_proc.c index b5fd364003..ca0d1f6174 100644 --- a/src/backend/catalog/pg_proc.c +++ b/src/backend/catalog/pg_proc.c @@ -331,7 +331,10 @@ ProcedureCreate(const char *procedureName, else nulls[Anum_pg_proc_proargnames - 1] = true; if (parameterDefaults != NIL) + { + reset_querytext_references((Node *) parameterDefaults, NULL); values[Anum_pg_proc_proargdefaults - 1] = CStringGetTextDatum(nodeToString(parameterDefaults)); + } else nulls[Anum_pg_proc_proargdefaults - 1] = true; if (trftypes != PointerGetDatum(NULL)) @@ -344,7 +347,10 @@ ProcedureCreate(const char *procedureName, else nulls[Anum_pg_proc_probin - 1] = true; if (prosqlbody) + { + reset_querytext_references(prosqlbody, NULL); values[Anum_pg_proc_prosqlbody - 1] = CStringGetTextDatum(nodeToString(prosqlbody)); + } else nulls[Anum_pg_proc_prosqlbody - 1] = true; if (proconfig != PointerGetDatum(NULL)) diff --git a/src/backend/catalog/pg_publication.c b/src/backend/catalog/pg_publication.c index c488b6370b..d4b772e543 100644 --- a/src/backend/catalog/pg_publication.c +++ b/src/backend/catalog/pg_publication.c @@ -36,6 +36,7 @@ #include "commands/publicationcmds.h" #include "funcapi.h" #include "miscadmin.h" +#include "nodes/nodeFuncs.h" #include "utils/array.h" #include "utils/builtins.h" #include "utils/catcache.h" @@ -422,7 +423,10 @@ publication_add_relation(Oid pubid, PublicationRelInfo *pri, /* Add qualifications, if available */ if (pri->whereClause != NULL) + { + reset_querytext_references(pri->whereClause, NULL); values[Anum_pg_publication_rel_prqual - 1] = CStringGetTextDatum(nodeToString(pri->whereClause)); + } else nulls[Anum_pg_publication_rel_prqual - 1] = true; diff --git a/src/backend/commands/policy.c b/src/backend/commands/policy.c index 76a45e56bf..ce01336ad3 100644 --- a/src/backend/commands/policy.c +++ b/src/backend/commands/policy.c @@ -30,6 +30,7 @@ #include "commands/policy.h" #include "miscadmin.h" #include "nodes/makefuncs.h" +#include "nodes/nodeFuncs.h" #include "nodes/pg_list.h" #include "parser/parse_clause.h" #include "parser/parse_collate.h" @@ -701,13 +702,19 @@ CreatePolicy(CreatePolicyStmt *stmt) /* Add qual if present. */ if (qual) + { + reset_querytext_references(qual, NULL); values[Anum_pg_policy_polqual - 1] = CStringGetTextDatum(nodeToString(qual)); + } else isnull[Anum_pg_policy_polqual - 1] = true; /* Add WITH CHECK qual if present */ if (with_check_qual) + { + reset_querytext_references(with_check_qual, NULL); values[Anum_pg_policy_polwithcheck - 1] = CStringGetTextDatum(nodeToString(with_check_qual)); + } else isnull[Anum_pg_policy_polwithcheck - 1] = true; diff --git a/src/backend/commands/statscmds.c b/src/backend/commands/statscmds.c index 20be564df2..c9e85f0af8 100644 --- a/src/backend/commands/statscmds.c +++ b/src/backend/commands/statscmds.c @@ -477,6 +477,7 @@ CreateStatistics(CreateStatsStmt *stmt) { char *exprsString; + reset_querytext_references((Node *) stxexprs, NULL); exprsString = nodeToString(stxexprs); exprsDatum = CStringGetTextDatum(exprsString); pfree(exprsString); diff --git a/src/backend/commands/trigger.c b/src/backend/commands/trigger.c index 52177759ab..b1ffa8b17f 100644 --- a/src/backend/commands/trigger.c +++ b/src/backend/commands/trigger.c @@ -39,6 +39,7 @@ #include "miscadmin.h" #include "nodes/bitmapset.h" #include "nodes/makefuncs.h" +#include "nodes/nodeFuncs.h" #include "optimizer/optimizer.h" #include "parser/parse_clause.h" #include "parser/parse_collate.h" @@ -674,6 +675,7 @@ CreateTriggerFiringOn(CreateTrigStmt *stmt, const char *queryString, /* we'll need the rtable for recordDependencyOnExpr */ whenRtable = pstate->p_rtable; + reset_querytext_references(whenClause, NULL); qual = nodeToString(whenClause); free_parsestate(pstate); diff --git a/src/backend/commands/typecmds.c b/src/backend/commands/typecmds.c index aaf9728697..b76f1d1c9f 100644 --- a/src/backend/commands/typecmds.c +++ b/src/backend/commands/typecmds.c @@ -58,6 +58,7 @@ #include "executor/executor.h" #include "miscadmin.h" #include "nodes/makefuncs.h" +#include "nodes/nodeFuncs.h" #include "optimizer/optimizer.h" #include "parser/parse_coerce.h" #include "parser/parse_collate.h" @@ -924,6 +925,7 @@ DefineDomain(CreateDomainStmt *stmt) defaultValue = deparse_expression(defaultExpr, NIL, false, false); + reset_querytext_references(defaultExpr, NULL); defaultValueBin = nodeToString(defaultExpr); } } @@ -3506,8 +3508,10 @@ domainAddConstraint(Oid domainOid, Oid domainNamespace, Oid baseTypeOid, errmsg("cannot use table references in domain check constraint"))); /* - * Convert to string form for storage. + * Remove location references into the original query string (which won't + * be stored) and convert to string form for storage. */ + reset_querytext_references(expr, NULL); ccbin = nodeToString(expr); /* diff --git a/src/backend/nodes/nodeFuncs.c b/src/backend/nodes/nodeFuncs.c index c03f4f23e2..44a922464b 100644 --- a/src/backend/nodes/nodeFuncs.c +++ b/src/backend/nodes/nodeFuncs.c @@ -1903,6 +1903,101 @@ check_functions_in_node(Node *node, check_function_callback checker, return false; } +/* + * Reset all query text related fields to their original value. + * + * This is used for reducing the size of nodeToText, like in pg_rewrite. + */ +bool +reset_querytext_references(Node *node, void *context) +{ + const int query_recurse_flags = 0; + ListCell *temp; + +#define RESET_FOR(_type_) \ + case T_##_type_: \ + { \ + castNode(_type_, node)->location = -1; \ + break; \ + } + + if (node == NULL) + return false; + + if (IsA(node, Query)) + { + Query *query = castNode(Query, node); + query->stmt_location = -1; + query->stmt_len = 0; + + return query_tree_walker(query, reset_querytext_references, context, + query_recurse_flags); + } + + switch (nodeTag(node)) + { + RESET_FOR(RangeVar); + RESET_FOR(TableFunc); + RESET_FOR(Var); + RESET_FOR(Const); + RESET_FOR(Param); + RESET_FOR(Aggref); + RESET_FOR(GroupingFunc); + RESET_FOR(WindowFunc); + RESET_FOR(FuncExpr); + RESET_FOR(NamedArgExpr); + RESET_FOR(OpExpr); + RESET_FOR(DistinctExpr); + RESET_FOR(NullIfExpr); + RESET_FOR(ScalarArrayOpExpr); + RESET_FOR(BoolExpr); + RESET_FOR(SubLink); + RESET_FOR(RelabelType); + RESET_FOR(CoerceViaIO); + RESET_FOR(ArrayCoerceExpr); + RESET_FOR(ConvertRowtypeExpr); + RESET_FOR(CollateExpr); + RESET_FOR(CaseWhen); + RESET_FOR(ArrayExpr); + RESET_FOR(RowExpr); + RESET_FOR(CoalesceExpr); + RESET_FOR(MinMaxExpr); + RESET_FOR(SQLValueFunction); + RESET_FOR(XmlExpr); + RESET_FOR(JsonFormat); + RESET_FOR(JsonConstructorExpr); + RESET_FOR(JsonIsPredicate); + RESET_FOR(NullTest); + RESET_FOR(BooleanTest); + RESET_FOR(CoerceToDomain); + RESET_FOR(CoerceToDomainValue); + RESET_FOR(SetToDefault); + case T_CaseExpr: + { + /* + * The expression_tree_walker does not call us for CaseWhen nodes, + * but instead directly wires us through to its inner Nodes. + * To correctly reset the location fields, we manually iterate + * to get those fields reset. + */ + CaseExpr *caseExpr = castNode(CaseExpr, node); + caseExpr->location = -1; + foreach(temp, caseExpr->args) + { + CaseWhen *when = lfirst_node(CaseWhen, temp); + when->location = -1; + } + break; + } + default: + break; + } + + return expression_tree_walker(node, reset_querytext_references, + (void *) context); +#undef RESET_FOR +} + /* * Standard expression-tree walking support diff --git a/src/backend/rewrite/rewriteDefine.c b/src/backend/rewrite/rewriteDefine.c index e36fc72e1e..57650935ac 100644 --- a/src/backend/rewrite/rewriteDefine.c +++ b/src/backend/rewrite/rewriteDefine.c @@ -474,6 +474,14 @@ DefineQueryRewrite(const char *rulename, /* discard rule if it's null action and not INSTEAD; it's a no-op */ if (action != NIL || is_instead) { + /* + * Clear location info from the action and event_qual data. We don't + * store the original query, so keeping track of this information is + * meaningless and hinders serialization/compression efforts. + */ + reset_querytext_references(event_qual, NULL); + reset_querytext_references((Node *) action, NULL); + ruleId = InsertRule(rulename, event_type, event_relid, diff --git a/src/include/nodes/nodeFuncs.h b/src/include/nodes/nodeFuncs.h index 20921b45b9..f513b2ba37 100644 --- a/src/include/nodes/nodeFuncs.h +++ b/src/include/nodes/nodeFuncs.h @@ -139,6 +139,8 @@ get_notclausearg(const void *notclause) extern bool check_functions_in_node(Node *node, check_function_callback checker, void *context); +extern bool reset_querytext_references(Node *node, void *context); + /* * The following functions are usually passed walker or mutator callbacks * that are declared like "bool walker(Node *node, my_struct *context)" -- 2.40.1