From 3d4683d2682dc3bdcef532489e851ad16ec9262d Mon Sep 17 00:00:00 2001 From: Matthias van de Meent Date: Fri, 15 Sep 2023 22:25:40 +0200 Subject: [PATCH v2] Update BKI syntax, bootstrap performance - NULL values are 56% smaller (from 7 bytes to 3 bytes each) - NULL-only postfixes are not emitted into the BKI file - Inserts are batched into inserts of up to 1000 rows per catalog, reducing overhead of the 'INSERT' statement - The insert tuple descriptor is reused across tuple insertions, reducing insert overhead - OPEN/CLOSE are now implicit and depend on context: no need to manually open/close the relation in the bki. - Removed some wasteful spaces from the generated file. --- doc/src/sgml/bki.sgml | 13 ++--- src/backend/bootstrap/bootparse.y | 73 ++++++++++++++++---------- src/backend/bootstrap/bootscanner.l | 5 +- src/backend/bootstrap/bootstrap.c | 13 +++-- src/backend/catalog/genbki.pl | 80 +++++++++++++++++++++++------ src/include/bootstrap/bootstrap.h | 1 + 6 files changed, 127 insertions(+), 58 deletions(-) diff --git a/doc/src/sgml/bki.sgml b/doc/src/sgml/bki.sgml index f71644e398..5c057d0f8a 100644 --- a/doc/src/sgml/bki.sgml +++ b/doc/src/sgml/bki.sgml @@ -247,9 +247,9 @@ - Null values are represented by _null_. - (Note that there is no way to create a value that is just that - string.) + Null values are represented by _null_ and + __. (Note that there is no way to create a value that + is just either of these strings.) @@ -1068,11 +1068,8 @@ $ perl rewrite_dat_with_prokind.pl pg_proc.dat of type oid, int4 and text, respectively, and insert two rows into the table: -create test_table 420 (oid = oid, cola = int4, colb = text) -open test_table -insert ( 421 1 'value 1' ) -insert ( 422 2 _null_ ) -close test_table +create test_table 420 open (oid = oid, cola = int4, colb = text) +insert ( 421 1 'value 1' ), ( 422 2 _null_ ) diff --git a/src/backend/bootstrap/bootparse.y b/src/backend/bootstrap/bootparse.y index 81a1b7bfec..c911e26018 100644 --- a/src/backend/bootstrap/bootparse.y +++ b/src/backend/bootstrap/bootparse.y @@ -62,6 +62,7 @@ do_end(void) /* Reclaim memory allocated while processing this line */ MemoryContextSwitchTo(CurTransactionContext); MemoryContextReset(per_line_ctx); + tupDesc = NULL; CHECK_FOR_INTERRUPTS(); /* allow SIGINT to kill bootstrap run */ if (isatty(0)) { @@ -91,7 +92,7 @@ static int num_columns_read = 0; %type boot_index_params %type boot_index_param %type boot_ident -%type optbootstrap optsharedrelation boot_column_nullness +%type optbootstrap optsharedrelation optopen boot_column_nullness %type oidspec optrowtypeoid %token ID @@ -110,6 +111,12 @@ static int num_columns_read = 0; TopLevel: Boot_Queries + { + if (boot_reldesc) + { + closerel(NULL); + } + } | ; @@ -119,9 +126,7 @@ Boot_Queries: ; Boot_Query : - Boot_OpenStmt - | Boot_CloseStmt - | Boot_CreateStmt + Boot_CreateStmt | Boot_InsertStmt | Boot_DeclareIndexStmt | Boot_DeclareUniqueIndexStmt @@ -129,26 +134,8 @@ Boot_Query : | Boot_BuildIndsStmt ; -Boot_OpenStmt: - OPEN boot_ident - { - do_start(); - boot_openrel($2); - do_end(); - } - ; - -Boot_CloseStmt: - XCLOSE boot_ident - { - do_start(); - closerel($2); - do_end(); - } - ; - Boot_CreateStmt: - XCREATE boot_ident oidspec optbootstrap optsharedrelation optrowtypeoid LPAREN + XCREATE boot_ident oidspec optbootstrap optsharedrelation optrowtypeoid optopen LPAREN { do_start(); numattr = 0; @@ -192,7 +179,6 @@ Boot_CreateStmt: if (boot_reldesc) { - elog(DEBUG4, "create bootstrap: warning, open relation exists, closing first"); closerel(NULL); } @@ -240,6 +226,12 @@ Boot_CreateStmt: NULL); elog(DEBUG4, "relation created with OID %u", id); } + + if ($7) + { + boot_openrel($2); + } + do_end(); } ; @@ -251,15 +243,37 @@ Boot_InsertStmt: elog(DEBUG4, "inserting row"); num_columns_read = 0; } + Boot_InsertTuples + { + num_columns_read = 0; + do_end(); + } + ; + +Boot_InsertTuples: + Boot_InsertTuple + | Boot_InsertTuples COMMA + { + elog(DEBUG4, "inserting row"); + num_columns_read = 0; + } + Boot_InsertTuple + ; + +Boot_InsertTuple: LPAREN boot_column_val_list RPAREN { + if (boot_reldesc == NULL) + elog(FATAL, "relation not open"); + + while (num_columns_read < numattr) + InsertOneNull(num_columns_read++); + if (num_columns_read != numattr) elog(ERROR, "incorrect number of columns in row (expected %d, got %d)", numattr, num_columns_read); - if (boot_reldesc == NULL) - elog(FATAL, "relation not open"); InsertOneTuple(); - do_end(); + num_columns_read = 0; } ; @@ -427,6 +441,11 @@ optrowtypeoid: | { $$ = InvalidOid; } ; +optopen: + OPEN { $$ = true; } + | { $$ = false; } + ; + boot_column_list: boot_column_def | boot_column_list COMMA boot_column_def diff --git a/src/backend/bootstrap/bootscanner.l b/src/backend/bootstrap/bootscanner.l index 6a9d4193f2..6564469951 100644 --- a/src/backend/bootstrap/bootscanner.l +++ b/src/backend/bootstrap/bootscanner.l @@ -60,8 +60,8 @@ sid \'([^']|\'\')*\' /* * Keyword tokens return the keyword text (as a constant string) in boot_yylval.kw, * just in case that's needed because we want to treat the keyword as an - * unreserved identifier. Note that _null_ is not treated as a keyword - * for this purpose; it's the one "reserved word" in the bootstrap syntax. + * unreserved identifier. Note that _null_ and __ are not treated as a keyword + * for this purpose; they're the only "reserved words" in the bootstrap syntax. * * Notice that all the keywords are case-sensitive, and for historical * reasons some must be upper case. @@ -85,6 +85,7 @@ rowtype_oid { boot_yylval.kw = "rowtype_oid"; return XROWTYPE_OID; } insert { boot_yylval.kw = "insert"; return INSERT_TUPLE; } _null_ { return NULLVAL; } +__ { return NULLVAL; } "," { return COMMA; } "=" { return EQUALS; } diff --git a/src/backend/bootstrap/bootstrap.c b/src/backend/bootstrap/bootstrap.c index 4cc2efa95c..90c5907d30 100644 --- a/src/backend/bootstrap/bootstrap.c +++ b/src/backend/bootstrap/bootstrap.c @@ -64,7 +64,7 @@ Relation boot_reldesc; /* current relation descriptor */ Form_pg_attribute attrtypes[MAXATTR]; /* points to attribute info */ int numattr; /* number of attributes for cur. rel */ - +TupleDesc tupDesc; /* cached tupDesc for cur. rel, or NULL */ /* * Basic information associated with each type. This is used before @@ -506,7 +506,6 @@ DefineAttr(char *name, char *type, int attnum, int nullness) if (boot_reldesc != NULL) { - elog(WARNING, "no open relations allowed with CREATE command"); closerel(NULL); } @@ -612,14 +611,18 @@ void InsertOneTuple(void) { HeapTuple tuple; - TupleDesc tupDesc; int i; elog(DEBUG4, "inserting row with %d columns", numattr); - tupDesc = CreateTupleDesc(numattr, attrtypes); + /* + * tupDesc will be freed when the memory context is reset, + * and will be reset to NULL. + */ + if (tupDesc == NULL) + tupDesc = CreateTupleDesc(numattr, attrtypes); + tuple = heap_form_tuple(tupDesc, values, Nulls); - pfree(tupDesc); /* just free's tupDesc, not the attrtypes */ simple_heap_insert(boot_reldesc, tuple); heap_freetuple(tuple); diff --git a/src/backend/catalog/genbki.pl b/src/backend/catalog/genbki.pl index 4a7205472c..1e6893792c 100644 --- a/src/backend/catalog/genbki.pl +++ b/src/backend/catalog/genbki.pl @@ -28,6 +28,8 @@ my $include_path; my $num_errors = 0; +my $insert_batch_size = 1000; + GetOptions( 'output:s' => \$output_path, 'set-version:s' => \$major_version, @@ -503,9 +505,19 @@ EOM . $catalog->{bootstrap} . $catalog->{rowtype_oid_clause}; + # Open it, unless it's a bootstrap catalog (create bootstrap does this + # automatically) + if (!$catalog->{bootstrap}) + { + if (($catalog_data{$catname} || 0) > 0) + { + print $bki " open"; + } + } + my $first = 1; - print $bki "\n (\n"; + print $bki "\n(\n"; my $schema = $catalog->{columns}; my %attnames; my $attnum = 0; @@ -521,7 +533,7 @@ EOM # Emit column definitions if (!$first) { - print $bki " ,\n"; + print $bki ",\n"; } $first = 0; @@ -539,7 +551,7 @@ EOM # Emit Anum_* constants printf $def "#define Anum_%s_%s %s\n", $catname, $attname, $attnum; } - print $bki "\n )\n"; + print $bki "\n)\n"; # Emit Natts_* constant print $def "\n#define Natts_$catname $attnum\n\n"; @@ -550,19 +562,13 @@ EOM print $def $line; } - # Open it, unless it's a bootstrap catalog (create bootstrap does this - # automatically) - if (!$catalog->{bootstrap}) - { - print $bki "open $catname\n"; - } - # For pg_attribute.h, we generate data entries ourselves. if ($catname eq 'pg_attribute') { gen_pg_attribute($schema); } + my $rows = 0; # Ordinary catalog with a data file foreach my $row (@{ $catalog_data{$catname} }) { @@ -618,7 +624,7 @@ EOM } elsif ($atttype eq '_oid') { - if ($bki_values{$attname} ne '_null_') + if ($bki_values{$attname} ne '_null_' && $bki_values{$attname} ne '__') { $bki_values{$attname} =~ s/[{}]//g; @lookupnames = split /,/, $bki_values{$attname}; @@ -654,7 +660,7 @@ EOM } # Write to postgres.bki - print_bki_insert(\%bki_values, $schema); + print_bki_insert(\%bki_values, $schema, ($rows++ % $insert_batch_size) eq 0); # Emit OID symbol if (defined $bki_values{oid_symbol}) @@ -671,7 +677,6 @@ EOM } } - print $bki "close $catname\n"; printf $def "\n#endif\t\t\t\t\t\t\t/* %s_D_H */\n", uc $catname; # Close and rename definition header @@ -833,6 +838,8 @@ sub gen_pg_attribute push @attnames, $column->{name}; } + my $attrs_processed = 0; + foreach my $table_name (@catnames) { my $table = $catalogs{$table_name}; @@ -862,7 +869,10 @@ sub gen_pg_attribute && ($row{attlen} eq 'NAMEDATALEN' || $row{attlen} > 0)); # If it's bootstrapped, put an entry in postgres.bki. - print_bki_insert(\%row, $schema) if $table->{bootstrap}; + if ($table->{bootstrap}) + { + print_bki_insert(\%row, $schema, ($attrs_processed++ % $insert_batch_size) eq 0); + } # Store schemapg entries for later. morph_row_for_schemapg(\%row, $schema); @@ -892,7 +902,7 @@ sub gen_pg_attribute $row{attstattarget} = '0'; morph_row_for_pgattr(\%row, $schema, $attr, 1); - print_bki_insert(\%row, $schema); + print_bki_insert(\%row, $schema, ($attrs_processed++ % $insert_batch_size) eq 0); } } } @@ -964,6 +974,8 @@ sub print_bki_insert { my $row = shift; my $schema = shift; + my $insert = shift; + my $n_null_skipped = 0; my @bki_values; @@ -977,6 +989,31 @@ sub print_bki_insert # since that represents a NUL char in C code. $bki_value = '' if $bki_value eq '\0'; + # We have many nulls in our catalogs, which would use 6 bytes each if + # stored as _null_. We compress them by printing __ instead. + # While not as readable, the cost of each NULL column's values is + # decreased by 57%. + $bki_value = '__' if $bki_value eq '_null_'; + + # Various catalogs contain trailing NULL values. By defaulting + # missing trailing columns to NULL, we can skip emitting those columns, + # often removing several column values from the generated BKI. + if ($bki_value eq '__') + { + $n_null_skipped += 1; + next; + } + else + { + # We're not in a tail of only nulls, so write out the skipped null + # values. + while ($n_null_skipped > 0) + { + push @bki_values, '__'; + $n_null_skipped -= 1; + } + } + # Handle single quotes by doubling them, because that's what the # bootstrap scanner requires. We do not process backslashes # specially; this allows escape-string-style backslash escapes @@ -991,7 +1028,18 @@ sub print_bki_insert push @bki_values, $bki_value; } - printf $bki "insert ( %s )\n", join(' ', @bki_values); + + # The "insert" part of the statement is quite large at 6 bytes, so we + # combine multiple inserts for the same catalog into one large statement. + # This again saves a few bytes per statement. + if ($insert != 0) + { + printf $bki "insert (%s)\n", join(' ', @bki_values); + } + else + { + printf $bki ", (%s)\n", join(' ', @bki_values); + } return; } diff --git a/src/include/bootstrap/bootstrap.h b/src/include/bootstrap/bootstrap.h index e1cb73c5f2..0228195709 100644 --- a/src/include/bootstrap/bootstrap.h +++ b/src/include/bootstrap/bootstrap.h @@ -30,6 +30,7 @@ extern PGDLLIMPORT Relation boot_reldesc; extern PGDLLIMPORT Form_pg_attribute attrtypes[MAXATTR]; extern PGDLLIMPORT int numattr; +extern PGDLLIMPORT TupleDesc tupDesc; extern void BootstrapModeMain(int argc, char *argv[], bool check_only) pg_attribute_noreturn(); -- 2.40.1