From 864866206cbea843e56bbba7c7ba4c7db6ae12dd Mon Sep 17 00:00:00 2001
From: John Naylor <jcnaylor@gmail.com>
Date: Thu, 14 Dec 2017 14:22:25 +0700
Subject: [PATCH 07/10] Update distprep scripts.

Teach genbki.pl and Gen_fmgrtab.pl to read the data files, and arrange for the former to double-quote certain values so bootscanner.l can read them.

Introduce Makefile dependencies on the data files.
---
 src/backend/catalog/Makefile     |  11 +++-
 src/backend/catalog/genbki.pl    | 114 ++++++++++++++++++++++++++++++---------
 src/backend/utils/Gen_fmgrtab.pl |  34 +++++++-----
 src/backend/utils/Makefile       |   2 +-
 src/include/catalog/genbki.h     |   3 ++
 5 files changed, 124 insertions(+), 40 deletions(-)

diff --git a/src/backend/catalog/Makefile b/src/backend/catalog/Makefile
index 30ca509..23858b8 100644
--- a/src/backend/catalog/Makefile
+++ b/src/backend/catalog/Makefile
@@ -49,6 +49,15 @@ POSTGRES_BKI_SRCS = $(addprefix $(top_srcdir)/src/include/catalog/,\
 	toasting.h indexing.h \
     )
 
+POSTGRES_BKI_DATA = $(addprefix $(top_srcdir)/src/include/catalog/,\
+	pg_aggregate.dat pg_am.dat pg_amop.dat pg_amproc.dat pg_authid.dat \
+	pg_cast.dat pg_class.dat pg_collation.dat pg_database.dat pg_language.dat \
+	pg_namespace.dat pg_opclass.dat pg_operator.dat pg_opfamily.dat \
+	pg_pltemplate.dat pg_proc.dat pg_range.dat pg_tablespace.dat \
+	pg_ts_config.dat pg_ts_config_map.dat pg_ts_dict.dat pg_ts_parser.dat \
+	pg_ts_template.dat pg_type.dat \
+	)
+
 # location of Catalog.pm
 catalogdir = $(top_srcdir)/src/backend/catalog
 
@@ -67,7 +76,7 @@ schemapg.h: postgres.bki ;
 # even in distribution tarballs.  So this is cheating a bit, but it
 # will achieve the goal of updating the version number when it
 # changes.
-postgres.bki: genbki.pl Catalog.pm $(POSTGRES_BKI_SRCS) $(top_srcdir)/configure $(top_srcdir)/src/include/catalog/duplicate_oids
+postgres.bki: genbki.pl Catalog.pm $(POSTGRES_BKI_SRCS) $(POSTGRES_BKI_DATA) $(top_srcdir)/configure $(top_srcdir)/src/include/catalog/duplicate_oids
 	cd $(top_srcdir)/src/include/catalog && $(PERL) ./duplicate_oids
 	$(PERL) -I $(catalogdir) $< $(pg_includes) --set-version=$(MAJORVERSION) $(POSTGRES_BKI_SRCS)
 
diff --git a/src/backend/catalog/genbki.pl b/src/backend/catalog/genbki.pl
index 1876399..973ffc2 100644
--- a/src/backend/catalog/genbki.pl
+++ b/src/backend/catalog/genbki.pl
@@ -93,8 +93,26 @@ my $PG_CATALOG_NAMESPACE =
   Catalog::FindDefinedSymbol('pg_namespace.h', \@include_path,
 							 'PG_CATALOG_NAMESPACE');
 
-# Read all the input header files into internal data structures
-my $catalogs = Catalog::Catalogs(@input_files);
+# We pass .h file names as arguments and then look for matching
+# data files for the data.
+my @datfiles;
+foreach my $header (@input_files)
+{
+	$header =~ /(.+)\.h$/;
+	my $datfile = "$1.dat";
+	if (-e $datfile)
+	{
+		push @datfiles, $datfile;
+	}
+}
+
+# Read all the files into internal data structures
+my $catalogs     = Catalog::ParseHeader(@input_files);
+my $catalog_data = Catalog::ParseData(@datfiles);
+
+# Note: Since we have now loaded all the data, we can build lookup
+# tables here. However, if we need default values for a catalog, we
+# need to wait until the full tuples have been built.
 
 # Generate postgres.bki, postgres.description, and postgres.shdescription
 
@@ -107,7 +125,7 @@ my @tables_needing_macros;
 my %regprocoids;
 my @types;
 
-# produce output, one catalog at a time
+# Produce output, one catalog at a time.
 foreach my $catname (@{ $catalogs->{names} })
 {
 
@@ -156,17 +174,21 @@ foreach my $catname (@{ $catalogs->{names} })
 		print $bki "open $catname\n";
 	}
 
-	if (defined $catalog->{data})
+	if (defined $catalog_data->{$catname})
 	{
 
-		# Ordinary catalog with DATA line(s)
-		foreach my $row (@{ $catalog->{data} })
+		# Ordinary catalog with a data file
+		foreach my $row (@{ $catalog_data->{$catname} })
 		{
+			my %bki_values = %$row;
 
-			# Split line into tokens without interpreting their meaning.
-			my %bki_values;
-			@bki_values{@attnames} =
-			  Catalog::SplitDataLine($row->{bki_values});
+			# We must do the following operations in the order given.
+			Catalog::ResolveColumnAbbrevs(\%bki_values, $schema);
+			if ($catname eq 'pg_proc')
+			{
+				Catalog::ComputePgProcFields(\%bki_values);
+			}
+			Catalog::AddDefaultValues(\%bki_values, $schema, $catname);
 
 			# Perform required substitutions on fields
 			foreach my $column (@$schema)
@@ -200,7 +222,7 @@ foreach my $catname (@{ $catalogs->{names} })
 				}
 				else
 				{
-					$regprocoids{ $bki_values{proname} } = $row->{oid};
+					$regprocoids{ $bki_values{proname} } = $bki_values{oid};
 				}
 			}
 
@@ -208,33 +230,33 @@ foreach my $catname (@{ $catalogs->{names} })
 			if ($catname eq 'pg_type')
 			{
 				my %type = %bki_values;
-				$type{oid} = $row->{oid};
 				push @types, \%type;
 			}
 
+			# Add quotes where necessary.
+			format_bki_row(\%bki_values, $schema);
+
 			# Write to postgres.bki
-			my $oid = $row->{oid} ? "OID = $row->{oid} " : '';
-			printf $bki "insert %s( %s )\n", $oid,
-			  join(' ', @bki_values{@attnames});
+			bki_insert(\%bki_values, @attnames);
 
 			# Write comments to postgres.description and
 			# postgres.shdescription
-			if (defined $row->{descr})
+			if (defined $bki_values{descr})
 			{
-				printf $descr "%s\t%s\t0\t%s\n", $row->{oid}, $catname,
-				  $row->{descr};
+				printf $descr "%s\t%s\t0\t%s\n", $bki_values{oid}, $catname,
+				  $bki_values{descr};
 			}
-			if (defined $row->{shdescr})
+			if (defined $bki_values{shdescr})
 			{
-				printf $shdescr "%s\t%s\t%s\n", $row->{oid}, $catname,
-				  $row->{shdescr};
+				printf $shdescr "%s\t%s\t%s\n", $bki_values{oid}, $catname,
+				  $bki_values{shdescr};
 			}
 		}
 	}
 	if ($catname eq 'pg_attribute')
 	{
 
-		# For pg_attribute.h, we generate DATA entries ourselves.
+		# For pg_attribute.h, we generate data entries ourselves.
 		# NB: pg_type.h must come before pg_attribute.h in the input list
 		# of catalog names, since we use info from pg_type.h here.
 		foreach my $table_name (@{ $catalogs->{names} })
@@ -317,12 +339,12 @@ foreach my $catname (@{ $catalogs->{names} })
 # (i.e., not contained in a header with a CATALOG() statement) comes here
 
 # Write out declare toast/index statements
-foreach my $declaration (@{ $catalogs->{toasting}->{data} })
+foreach my $declaration (@{ $catalogs->{toasting} })
 {
 	print $bki $declaration;
 }
 
-foreach my $declaration (@{ $catalogs->{indexing}->{data} })
+foreach my $declaration (@{ $catalogs->{indexing} })
 {
 	print $bki $declaration;
 }
@@ -380,6 +402,48 @@ exit 0;
 
 #################### Subroutines ########################
 
+# Supply quoting for a normal bki row.
+# This allows us to keep most double quotes
+# out of the catalog data files for readability.
+sub format_bki_row
+{
+	my $row    = shift;
+	my $schema = shift;
+
+	foreach my $column (@$schema)
+	{
+		my $attname = $column->{name};
+		my $atttype = $column->{type};
+
+		if
+		(
+			length($row->{$attname}) == 0
+
+			# Whitespace
+			or $row->{$attname} =~ /\s/
+
+			# Quote strings that have special characters
+			# except for certain cases. See bootscanner.l
+			or (    $row->{$attname} =~ /\W/
+				and $row->{$attname} !~ /^\\\d{3}$/  # octal
+				and $row->{$attname} !~ /^-\d*$/)    # '-' or '-1'
+
+			# XXX Not needed, but keeps the .bki diff down to a reasonable
+			# size during review
+			or $attname eq 'oprname'    # Operator names
+			or $atttype eq 'oidvector'  # Arrays etc.
+			or $atttype eq 'int2vector'
+			or $atttype =~ /\[\]$/
+		)
+		{
+			if ($row->{$attname} ne '_null_' and $row->{$attname} !~ /^"([^"])*"$/)
+			{
+				$row->{$attname} = q|"| . $row->{$attname} . q|"|;
+			}
+		}
+	}
+}
+
 
 # Given the schema of pg_attribute, generate an entry for it using information
 # about the attribute it describes.  Any value that is not handled here
@@ -447,7 +511,7 @@ sub emit_pgattr_row
 	Catalog::AddDefaultValues($row, $pgattr_schema, 'pg_attribute');
 }
 
-# Write a pg_attribute entry to postgres.bki
+# Write an entry to postgres.bki
 sub bki_insert
 {
 	my $row        = shift;
diff --git a/src/backend/utils/Gen_fmgrtab.pl b/src/backend/utils/Gen_fmgrtab.pl
index a51a755..b9a6a11 100644
--- a/src/backend/utils/Gen_fmgrtab.pl
+++ b/src/backend/utils/Gen_fmgrtab.pl
@@ -3,7 +3,7 @@
 #
 # Gen_fmgrtab.pl
 #    Perl script that generates fmgroids.h, fmgrprotos.h, and fmgrtab.c
-#    from pg_proc.h
+#    from pg_proc.h and pg_proc.dat
 #
 # Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
 # Portions Copyright (c) 1994, Regents of the University of California
@@ -55,35 +55,40 @@ if ($output_path ne '' && substr($output_path, -1) ne '/')
 die "No input files.\n"                                     if !$infile;
 die "No include path; you must specify -I at least once.\n" if !@include_path;
 
+# We pass the pg_proc.h path as an argument and then look for a matching
+# data file.
+$infile =~ /(.+)\.h$/;
+my $datfile = "$1.dat";
+die "No data files.\n" if ! -e $datfile;
+
 my $FirstBootstrapObjectId =
 	Catalog::FindDefinedSymbol('access/transam.h', \@include_path, 'FirstBootstrapObjectId');
 my $INTERNALlanguageId =
 	Catalog::FindDefinedSymbol('catalog/pg_language.h', \@include_path, 'INTERNALlanguageId');
 
-# Read all the data from the include/catalog files.
-my $catalogs = Catalog::Catalogs($infile);
+# Read all the files into internal data structures
+my $catalogs     = Catalog::ParseHeader($infile);
+my $catalog_data = Catalog::ParseData($datfile);
 
 # Collect the raw data from pg_proc.h.
 my @fmgr = ();
-my @attnames;
-foreach my $column (@{ $catalogs->{pg_proc}->{columns} })
-{
-	push @attnames, $column->{name};
-}
 
-my $data = $catalogs->{pg_proc}->{data};
+my $schema  = $catalogs->{pg_proc}->{columns};
+my $data    = $catalog_data->{pg_proc};
 foreach my $row (@$data)
 {
+	my %bki_values = %$row;
 
-	# Split line into tokens without interpreting their meaning.
-	my %bki_values;
-	@bki_values{@attnames} = Catalog::SplitDataLine($row->{bki_values});
+	# We must do the following operations in the order given.
+	Catalog::ResolveColumnAbbrevs(\%bki_values, $schema);
+	Catalog::ComputePgProcFields(\%bki_values);
+	Catalog::AddDefaultValues(\%bki_values, $schema, 'pg_proc');
 
 	# Select out just the rows for internal-language procedures.
 	next if $bki_values{prolang} ne $INTERNALlanguageId;
 
 	push @fmgr,
-	  { oid    => $row->{oid},
+	  { oid    => $bki_values{oid},
 		strict => $bki_values{proisstrict},
 		retset => $bki_values{proretset},
 		nargs  => $bki_values{pronargs},
@@ -122,6 +127,7 @@ qq|/*-------------------------------------------------------------------------
  *
  *	It has been GENERATED by $0
  *	from $infile
+ *	and  $datfile
  *
  *-------------------------------------------------------------------------
  */
@@ -157,6 +163,7 @@ qq|/*-------------------------------------------------------------------------
  *
  *	It has been GENERATED by $0
  *	from $infile
+ *	and  $datfile
  *
  *-------------------------------------------------------------------------
  */
@@ -185,6 +192,7 @@ qq|/*-------------------------------------------------------------------------
  *
  *	It has been GENERATED by $0
  *	from $infile
+ *	and  $datfile
  *
  *-------------------------------------------------------------------------
  */
diff --git a/src/backend/utils/Makefile b/src/backend/utils/Makefile
index efb8b53..8ccfc3b 100644
--- a/src/backend/utils/Makefile
+++ b/src/backend/utils/Makefile
@@ -24,7 +24,7 @@ $(SUBDIRS:%=%-recursive): fmgroids.h fmgrprotos.h
 fmgrprotos.h: fmgroids.h ;
 fmgroids.h: fmgrtab.c ;
 
-fmgrtab.c: Gen_fmgrtab.pl $(catalogdir)/Catalog.pm $(top_srcdir)/src/include/catalog/pg_proc.h
+fmgrtab.c: Gen_fmgrtab.pl $(catalogdir)/Catalog.pm $(top_srcdir)/src/include/catalog/pg_proc.h $(top_srcdir)/src/include/catalog/pg_proc.dat
 	$(PERL) -I $(catalogdir) $< -I $(top_srcdir)/src/include/ $(top_srcdir)/src/include/catalog/pg_proc.h
 
 errcodes.h: $(top_srcdir)/src/backend/utils/errcodes.txt generate-errcodes.pl
diff --git a/src/include/catalog/genbki.h b/src/include/catalog/genbki.h
index 71fc579..c3ffa29 100644
--- a/src/include/catalog/genbki.h
+++ b/src/include/catalog/genbki.h
@@ -34,6 +34,9 @@
 /* Specifies a default value for a catalog field */
 #define BKI_DEFAULT(value)
 
+/* Specifies an abbreviated label for a column name */
+#define BKI_ABBREV(abb)
+
 /*
  * This is never defined; it's here only for documentation.
  *
-- 
2.7.4

