From d7daa505f0e76a88f455c8cc9a4cbe1297f4621f Mon Sep 17 00:00:00 2001 From: Matthias van de Meent Date: Sat, 10 Feb 2024 00:57:19 +0100 Subject: [PATCH v4 6/7] nodeToString: Apply RLE on Bitmapset and numeric List types This reduces the size of full serialized queries in pg_rewrite by several %, reducing overhead in the system. --- src/backend/nodes/outfuncs.c | 96 +++++++++++++++++++++++++++++++---- src/backend/nodes/read.c | 53 +++++++++++++++++-- src/backend/nodes/readfuncs.c | 16 +++++- 3 files changed, 149 insertions(+), 16 deletions(-) diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index 1a17eafd57..f96295b3e5 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -378,23 +378,64 @@ writeNodeArray(StringInfo str, const Node *const *arr, int len, /* * Print a List. + * + * Notes: + * NodeList is formatted as ( ...) + * + * OidList is formatted as (o int +int int int ...), with +int indicating N + * successive identical values. + * + * IntList and XidList are formatted as (i/x int +int int int ...), with +int + * indicating N successively increasing values. (i 9 +3) is thus equivalent + * to (i 9 10 11 12). */ static void _outList(StringInfo str, const List *node, bool omitLocation) { const ListCell *lc; + union LCSurrogate { + int32 i; + Oid o; + TransactionId x; + } previous = { .i = 0}; + int run = 0; + int run_delta; + const char *fmt; appendStringInfoChar(str, '('); if (IsA(node, IntList)) + { appendStringInfoChar(str, 'i'); + fmt = " %d"; + run_delta = 1; + } else if (IsA(node, OidList)) + { appendStringInfoChar(str, 'o'); + fmt = " %u"; + run_delta = 0; + } else if (IsA(node, XidList)) + { appendStringInfoChar(str, 'x'); + fmt = " %u"; + run_delta = 1; + } + else if (IsA(node, List)) + { + /* silence the compiler about uninitialized variables */ + fmt = ""; + run_delta = 0; + } + else + elog(ERROR, "unrecognized list node type: %d", + (int) node->type); foreach(lc, node) { + union LCSurrogate val = {.i = 0}; + /* * For the sake of backward compatibility, we emit a slightly * different whitespace format for lists of nodes vs. other types of @@ -405,26 +446,41 @@ _outList(StringInfo str, const List *node, bool omitLocation) outNode(str, lfirst(lc), omitLocation); if (lnext(node, lc)) appendStringInfoChar(str, ' '); + continue; } else if (IsA(node, IntList)) - appendStringInfo(str, " %d", lfirst_int(lc)); + val.i = lfirst_int(lc); else if (IsA(node, OidList)) - appendStringInfo(str, " %u", lfirst_oid(lc)); + val.o = lfirst_oid(lc); else if (IsA(node, XidList)) - appendStringInfo(str, " %u", lfirst_xid(lc)); + val.x = lfirst_xid(lc); + + if (val.i == previous.i + run_delta) + run += 1; else - elog(ERROR, "unrecognized list node type: %d", - (int) node->type); + { + if (run > 0) + appendStringInfo(str, " +%d", run); + run = 0; + appendStringInfo(str, fmt, val.i); + } + previous = val; } - appendStringInfoChar(str, ')'); -} + if (run > 0) + appendStringInfo(str, " +%d", run); + + appendStringInfoChar(str, ')');} /* * outBitmapset - * converts a bitmap set of integers * - * Note: the output format is "(b int int ...)", similar to an integer List. + * Note: the output format is "(b int int +int ...)", similar to an + * integer List. Note that consecutive runs of incremental values are + * indicated by [... int +int ...], where the first int is the first set bit, + * and the int that's tagged with the '+' sign that follows is the number of + * subsequent set bits (resulting in a run of N+1 set bits). * * We export this function for use by extensions that define extensible nodes. * That's somewhat historical, though, because calling outNode() will work. @@ -432,13 +488,31 @@ _outList(StringInfo str, const List *node, bool omitLocation) void outBitmapset(StringInfo str, const Bitmapset *bms) { - int x; + int x = -1; + int prev = -2; + int run = 0; appendStringInfoChar(str, '('); appendStringInfoChar(str, 'b'); - x = -1; + while ((x = bms_next_member(bms, x)) >= 0) - appendStringInfo(str, " %d", x); + { + if (x - prev == 1) + run++; + else + { + if (run > 0) + appendStringInfo(str, " +%d", run); + + run = 0; + appendStringInfo(str, " %d", x); + } + prev = x; + } + + if (run > 0) + appendStringInfo(str, " +%d", run); + appendStringInfoChar(str, ')'); } diff --git a/src/backend/nodes/read.c b/src/backend/nodes/read.c index a4cd3a8932..c3681e3615 100644 --- a/src/backend/nodes/read.c +++ b/src/backend/nodes/read.c @@ -402,6 +402,8 @@ nodeRead(const char *token, int tok_len) elog(ERROR, "unterminated List structure"); if (tok_len == 1 && token[0] == 'i') { + int prev = 0; + /* List of integers */ for (;;) { @@ -417,12 +419,23 @@ nodeRead(const char *token, int tok_len) if (endptr != token + tok_len) elog(ERROR, "unrecognized integer: \"%.*s\"", tok_len, token); - l = lappend_int(l, val); + + if (token[0] == '+') + { + for (int i = 0; i < val; i++) + l = lappend_int(l, ++prev); + } + else + { + prev = val; + l = lappend_int(l, val); + } } result = (Node *) l; } else if (tok_len == 1 && token[0] == 'o') { + Oid prev = 0; /* List of OIDs */ for (;;) { @@ -438,12 +451,23 @@ nodeRead(const char *token, int tok_len) if (endptr != token + tok_len) elog(ERROR, "unrecognized OID: \"%.*s\"", tok_len, token); - l = lappend_oid(l, val); + + if (token[0] == '+') + { + for (int i = 0; i < val; i++) + l = lappend_oid(l, prev); + } + else + { + prev = val; + l = lappend_oid(l, val); + } } result = (Node *) l; } else if (tok_len == 1 && token[0] == 'x') { + TransactionId prev = 0; /* List of TransactionIds */ for (;;) { @@ -459,7 +483,17 @@ nodeRead(const char *token, int tok_len) if (endptr != token + tok_len) elog(ERROR, "unrecognized Xid: \"%.*s\"", tok_len, token); - l = lappend_xid(l, val); + + if (token[0] == '+') + { + for (int i = 0; i < val; i++) + l = lappend_xid(l, ++prev); + } + else + { + prev = val; + l = lappend_xid(l, val); + } } result = (Node *) l; } @@ -467,6 +501,7 @@ nodeRead(const char *token, int tok_len) { /* Bitmapset -- see also _readBitmapset() */ Bitmapset *bms = NULL; + int prev = -2; for (;;) { @@ -482,7 +517,17 @@ nodeRead(const char *token, int tok_len) if (endptr != token + tok_len) elog(ERROR, "unrecognized integer: \"%.*s\"", tok_len, token); - bms = bms_add_member(bms, val); + + if (token[0] == '+') + { + for (int i = 0; i < val; i++) + bms = bms_add_member(bms, ++prev); + } + else + { + bms = bms_add_member(bms, val); + prev = val; + } } result = (Node *) bms; } diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c index 4cdbad9e7e..eb87d17804 100644 --- a/src/backend/nodes/readfuncs.c +++ b/src/backend/nodes/readfuncs.c @@ -298,6 +298,7 @@ static Bitmapset * _readBitmapset(void) { Bitmapset *result = NULL; + int prev = -2; READ_TEMP_LOCALS(); @@ -326,7 +327,20 @@ _readBitmapset(void) val = (int) strtol(token, &endptr, 10); if (endptr != token + length) elog(ERROR, "unrecognized integer: \"%.*s\"", length, token); - result = bms_add_member(result, val); + + if (token[0] == '+') + { + for (int i = 0; i < val; i++) + { + ++prev; + result = bms_add_member(result, prev); + } + } + else + { + result = bms_add_member(result, val); + prev = val; + } } return result; -- 2.40.1