From 3a129c7765a6066ec164d504bcdc2942ace00959 Mon Sep 17 00:00:00 2001 From: Matthias van de Meent Date: Wed, 3 Jan 2024 23:41:44 +0100 Subject: [PATCH v1 7/7] NodeSupport: Apply RLE and differential encoding on lists and bitmaps This reduces raw serialized size of the initdb pg_rewrite.ev_action dataset by ~1.6%, it's pglz-compressed dataset by 5%/15kB (!), and the size of the template0 database by another 2 blocks, for a total improvement of 400kB/49 blocks. --- src/backend/nodes/outfuncs.c | 89 ++++++++++++++++++++++++++++++++--- src/backend/nodes/read.c | 38 +++++++++++++-- src/backend/nodes/readfuncs.c | 15 +++++- 3 files changed, 132 insertions(+), 10 deletions(-) diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index 83f7e43242..d44e1db3d5 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -337,18 +337,49 @@ static void _outList(StringInfo str, const List *node) { const ListCell *lc; + union LCSurrogate { + int32 i; + Oid o; + TransactionId x; + } previous = { .i = 0}; + int run = 0; + int run_delta; + const char *fmt; appendStringInfoChar(str, '('); if (IsA(node, IntList)) + { appendStringInfoChar(str, 'i'); + fmt = " %d"; + run_delta = 1; + } else if (IsA(node, OidList)) + { appendStringInfoChar(str, 'o'); + fmt = " %u"; + run_delta = 0; + } else if (IsA(node, XidList)) + { appendStringInfoChar(str, 'x'); + fmt = " %u"; + run_delta = 1; + } + else if (IsA(node, List)) + { + /* silence the compiler about uninitialized variables */ + fmt = ""; + run_delta = 0; + } + else + elog(ERROR, "unrecognized list node type: %d", + (int) node->type); foreach(lc, node) { + union LCSurrogate val = {.i = 0}; + /* * For the sake of backward compatibility, we emit a slightly * different whitespace format for lists of nodes vs. other types of @@ -359,18 +390,30 @@ _outList(StringInfo str, const List *node) outNode(str, lfirst(lc)); if (lnext(node, lc)) appendStringInfoChar(str, ' '); + continue; } else if (IsA(node, IntList)) - appendStringInfo(str, " %d", lfirst_int(lc)); + val.i = lfirst_int(lc); else if (IsA(node, OidList)) - appendStringInfo(str, " %u", lfirst_oid(lc)); + val.o = lfirst_oid(lc); else if (IsA(node, XidList)) - appendStringInfo(str, " %u", lfirst_xid(lc)); + val.x = lfirst_xid(lc); + + if (val.i == previous.i + run_delta) + run += 1; else - elog(ERROR, "unrecognized list node type: %d", - (int) node->type); + { + if (run > 0) + appendStringInfo(str, " +%d", run); + run = 0; + appendStringInfo(str, fmt, val.i); + } + previous = val; } + if (run > 0) + appendStringInfo(str, " +%d", run); + appendStringInfoChar(str, ')'); } @@ -387,12 +430,46 @@ void outBitmapset(StringInfo str, const Bitmapset *bms) { int x; + int prev = 0; + int run = 0; appendStringInfoChar(str, '('); appendStringInfoChar(str, 'b'); x = -1; while ((x = bms_next_member(bms, x)) >= 0) - appendStringInfo(str, " %d", x); + { + int increment = x - prev; + if (increment == 1) + { + run++; + } + else if (run != 0) + { + /* + * If the run is only one value, the '-' sign of run notation is + * larger than just the plain differential value, so only + * write out runs longer than 1. + */ + if (run == 1) + appendStringInfo(str, " %d", 1); + else + appendStringInfo(str, " +%d", run); + + appendStringInfo(str, " %d", increment); + run = 0; + } + else + { + appendStringInfo(str, " %d", increment); + } + prev = x; + } + + if (run == 1) + appendStringInfo(str, " %d", run); + else if (run > 1) + appendStringInfo(str, " +%d", run); + appendStringInfoChar(str, ')'); } diff --git a/src/backend/nodes/read.c b/src/backend/nodes/read.c index 2815b268be..362672c24e 100644 --- a/src/backend/nodes/read.c +++ b/src/backend/nodes/read.c @@ -402,6 +402,8 @@ nodeRead(const char *token, int tok_len) elog(ERROR, "unterminated List structure"); if (tok_len == 1 && token[0] == 'i') { + int prev = 0; + /* List of integers */ for (;;) { @@ -417,12 +419,23 @@ nodeRead(const char *token, int tok_len) if (endptr != token + tok_len) elog(ERROR, "unrecognized integer: \"%.*s\"", tok_len, token); - l = lappend_int(l, val); + + if (val > 0 && token[0] == '+') + { + for (int i = 0; i < val; i++) + l = lappend_int(l, ++prev); + } + else + { + prev = val; + l = lappend_int(l, val); + } } result = (Node *) l; } else if (tok_len == 1 && token[0] == 'o') { + Oid prev = 0; /* List of OIDs */ for (;;) { @@ -438,12 +451,22 @@ nodeRead(const char *token, int tok_len) if (endptr != token + tok_len) elog(ERROR, "unrecognized OID: \"%.*s\"", tok_len, token); - l = lappend_oid(l, val); + if (token[0] == '+') + { + for (int i = 0; i < val; i++) + l = lappend_oid(l, prev); + } + else + { + prev = val; + l = lappend_oid(l, val); + } } result = (Node *) l; } else if (tok_len == 1 && token[0] == 'x') { + TransactionId prev = 0; /* List of TransactionIds */ for (;;) { @@ -459,7 +482,16 @@ nodeRead(const char *token, int tok_len) if (endptr != token + tok_len) elog(ERROR, "unrecognized Xid: \"%.*s\"", tok_len, token); - l = lappend_xid(l, val); + if (token[0] == '+') + { + for (int i = 0; i < val; i++) + l = lappend_xid(l, ++prev); + } + else + { + prev = val; + l = lappend_xid(l, val); + } } result = (Node *) l; } diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c index 19c023fb32..ad96ff6646 100644 --- a/src/backend/nodes/readfuncs.c +++ b/src/backend/nodes/readfuncs.c @@ -298,6 +298,7 @@ static Bitmapset * _readBitmapset(void) { Bitmapset *result = NULL; + int prev = 0; READ_TEMP_LOCALS(); @@ -326,7 +327,19 @@ _readBitmapset(void) val = (int) strtol(token, &endptr, 10); if (endptr != token + length) elog(ERROR, "unrecognized integer: \"%.*s\"", length, token); - result = bms_add_member(result, val); + if (val > 0 && token[0] == '+') + { + for (int i = 0; i < val; i++) + { + prev++; + result = bms_add_member(result, prev); + } + } + else + { + prev += val; + result = bms_add_member(result, prev); + } } return result; -- 2.40.1