From 01ffdf483741566fddaa2939ecf83656fe3bd5a4 Mon Sep 17 00:00:00 2001
From: John Naylor <jcnaylor@gmail.com>
Date: Sat, 23 Dec 2017 15:56:58 +0700
Subject: [PATCH v5 05/13] Update catalog scripts to read data files.

Teach genbki.pl, Gen_fmgrtab.pl, duplicate_oids, and unused_oids to read
the data files, and arrange for genbki.pl to double-quote certain values
so bootscanner.l can read them. Introduce Makefile dependencies on the
data files.
---
 doc/src/sgml/bki.sgml              |   5 +-
 src/backend/Makefile               |   2 +-
 src/backend/catalog/Makefile       |   4 +-
 src/backend/catalog/README         |  72 +++++++++++++++-----
 src/backend/catalog/genbki.pl      | 133 ++++++++++++++++++++++++++++---------
 src/backend/utils/Gen_fmgrtab.pl   |  56 ++++++++++------
 src/backend/utils/Makefile         |   4 +-
 src/include/catalog/duplicate_oids |   6 +-
 src/include/catalog/unused_oids    |   6 +-
 9 files changed, 207 insertions(+), 81 deletions(-)
diff --git a/doc/src/sgml/bki.sgml b/doc/src/sgml/bki.sgml
index 33378b4..a3962c5 100644
--- a/doc/src/sgml/bki.sgml
+++ b/doc/src/sgml/bki.sgml
@@ -21,8 +21,9 @@
   input file used by <application>initdb</application> is created as
   part of building and installing <productname>PostgreSQL</productname>
   by a program named <filename>genbki.pl</filename>, which reads some
-  specially formatted C header files in the <filename>src/include/catalog/</filename>
-  directory of the source tree.  The created <acronym>BKI</acronym> file
+  specially formatted C header files and data files in the
+  <filename>src/include/catalog/</filename> directory of the source tree.
+  The created <acronym>BKI</acronym> file
   is called <filename>postgres.bki</filename> and is
   normally installed in the
   <filename>share</filename> subdirectory of the installation tree.
diff --git a/src/backend/Makefile b/src/backend/Makefile
index aab676d..a0655e4 100644
--- a/src/backend/Makefile
+++ b/src/backend/Makefile
@@ -142,7 +142,7 @@ utils/errcodes.h: utils/generate-errcodes.pl utils/errcodes.txt
 # see explanation in parser/Makefile
 utils/fmgrprotos.h: utils/fmgroids.h ;
 
-utils/fmgroids.h: utils/Gen_fmgrtab.pl catalog/Catalog.pm $(top_srcdir)/src/include/catalog/pg_proc.h
+utils/fmgroids.h: utils/Gen_fmgrtab.pl catalog/Catalog.pm $(top_srcdir)/src/include/catalog/pg_proc.dat $(top_srcdir)/src/include/access/transam.h
 	$(MAKE) -C utils $(notdir $@)
 
 utils/probes.h: utils/probes.d
diff --git a/src/backend/catalog/Makefile b/src/backend/catalog/Makefile
index 30ca509..4f3a5ea 100644
--- a/src/backend/catalog/Makefile
+++ b/src/backend/catalog/Makefile
@@ -49,6 +49,8 @@ POSTGRES_BKI_SRCS = $(addprefix $(top_srcdir)/src/include/catalog/,\
 	toasting.h indexing.h \
     )
 
+POSTGRES_BKI_DATA = $(wildcard $(top_srcdir)/src/include/catalog/*.dat)
+
 # location of Catalog.pm
 catalogdir = $(top_srcdir)/src/backend/catalog
 
@@ -67,7 +69,7 @@ schemapg.h: postgres.bki ;
 # even in distribution tarballs.  So this is cheating a bit, but it
 # will achieve the goal of updating the version number when it
 # changes.
-postgres.bki: genbki.pl Catalog.pm $(POSTGRES_BKI_SRCS) $(top_srcdir)/configure $(top_srcdir)/src/include/catalog/duplicate_oids
+postgres.bki: genbki.pl Catalog.pm $(POSTGRES_BKI_SRCS) $(POSTGRES_BKI_DATA) $(top_srcdir)/configure $(top_srcdir)/src/include/catalog/duplicate_oids
 	cd $(top_srcdir)/src/include/catalog && $(PERL) ./duplicate_oids
 	$(PERL) -I $(catalogdir) $< $(pg_includes) --set-version=$(MAJORVERSION) $(POSTGRES_BKI_SRCS)
 
diff --git a/src/backend/catalog/README b/src/backend/catalog/README
index 7e0ddf3..7b849a4 100644
--- a/src/backend/catalog/README
+++ b/src/backend/catalog/README
@@ -7,24 +7,57 @@ This directory contains .c files that manipulate the system catalogs;
 src/include/catalog contains the .h files that define the structure
 of the system catalogs.
 
-When the compile-time scripts (Gen_fmgrtab.pl and genbki.pl)
-execute, they grep the DATA statements out of the .h files and munge
-these in order to generate the postgres.bki file.  The .bki file is then
+When the compile-time script genbki.pl executes, it parses the .h files
+and .dat files in order to generate the postgres.* files.  These are then
 used as input to initdb (which is just a wrapper around postgres
 running single-user in bootstrapping mode) in order to generate the
 initial (template) system catalog relation files.
 
+backend/utils/Gen_fmgrtab.pl uses the same mechanism to genarate .c and
+.h files used by the function manager.
+
 -----------------------------------------------------------------
 
-People who are going to hose around with the .h files should be aware
-of the following facts:
+The data file format and bootstrap data conventions
 
-- It is very important that the DATA statements be properly formatted
+- As far as the bootstrap code is concerned, it is very important
+that the insert statements in postgres.bki be properly formatted
 (e.g., no broken lines, proper use of white-space and _null_).  The
 scripts are line-oriented and break easily.  In addition, the only
 documentation on the proper format for them is the code in the
-bootstrap/ directory.  Just be careful when adding new DATA
-statements.
+bootstrap/ directory.  Fortunately, the source bootstrap data is much
+more tolerant with respect to formatting, but it still pays to be
+careful when adding new data.
+
+- The .dat files contain Perl data structure literals that are simply
+eval'd to produce in-memory data structures.  As such, the code reading
+them doesn't care about ordering and layout, but in order to maintain
+a standard appearance, src/include/catalog/rewrite_dat.pl should be run
+before submitting catalog data patches.  Each file contains an array of
+hash references, which represent the data entries.  The best examples are
+the existing data files, but an altered subset of pg_database.dat will
+demonstrate the key features:
+
+# pg_database_example.dat
+[
+
+# a comment
+{ oid => '1', oid_symbol => 'TemplateDbOid', shdescr => 'default template',
+  datname => 'Berkely\'s DB', datcollate => '"LC_COLLATE"', datacl => '_null_' },
+
+]
+
+-The layout is: open bracket, one or more sets of curly brackets containing
+comma-separated key-value pairs, close bracket.
+-All values are single-quoted.
+-Single quotes within values must be escaped.
+-If a value is a macro to be expanded by initdb.c, it must have double-
+quotes, since we don't know what kind of characters will be substituted.
+-Nulls are represented as "_null_".
+-Comments must be on their own lines.
+-The fields oid, oid_symbol, descr, and shdescr are on their own line
+within the curly brackets.  This is done automatically during rewriting
+so don't worry about their placement during development.
 
 - Some catalogs require that OIDs be preallocated to tuples because
 of cross-references from other pre-loaded tuples.  For example, pg_type
@@ -38,9 +71,9 @@ catalog that has no OIDs).  In practice we usually preassign OIDs
 for all or none of the pre-loaded tuples in a given catalog, even if only
 some of them are actually cross-referenced.
 
-- We also sometimes preallocate OIDs for catalog tuples whose OIDs must
-be known directly in the C code.  In such cases, put a #define in the
-catalog's .h file, and use the #define symbol in the C code.  Writing
+- We also sometimes preallocate OIDs for catalog tuples whose OIDs must be
+known directly in the C code.  In such cases, put an 'oid_symbol' entry in
+the catalog's data file, and use the #define symbol in the C code.  Writing
 the actual numeric value of any OID in C code is considered very bad form.
 Direct references to pg_proc OIDs are common enough that there's a special
 mechanism to create the necessary #define's automatically: see
@@ -49,19 +82,26 @@ up #define's for the pg_class OIDs of system catalogs and indexes.  For all
 the other system catalogs, you have to manually create any #define's you
 need.
 
-- If you need to find a valid OID for a new predefined tuple,
-use the unused_oids script.  It generates inclusive ranges of
+- If you need to find a valid OID for a new predefined tuple, use the
+script src/include/catalog/unused_oids.  It generates inclusive ranges of
 *unused* OIDs (e.g., the line "45-900" means OIDs 45 through 900 have
 not been allocated yet).  Currently, OIDs 1-9999 are reserved for manual
 assignment; the unused_oids script simply looks through the include/catalog
-headers to see which ones do not appear in "OID =" clauses in DATA lines.
+headers and .dat files to see which ones do not appear.
 (As of Postgres 8.1, it also looks at CATALOG and DECLARE_INDEX lines.)
-You can also use the duplicate_oids script to check for mistakes.
+You can use the duplicate_oids script to check for mistakes.  This script
+is also run at compile time, and will stop the build if a duplicate is
+found.
 
 - The OID counter starts at 10000 at bootstrap.  If a catalog row is in a
 table that requires OIDs, but no OID was preassigned by an "OID =" clause,
 then it will receive an OID of 10000 or above.
 
+-----------------------------------------------------------------
+
+People who are going to hose around with the .h files should be aware
+of the following facts:
+
 - To create a "BOOTSTRAP" table you have to do a lot of extra work: these
 tables are not created through a normal CREATE TABLE operation, but spring
 into existence when first written to during initdb.  Therefore, you must
@@ -98,7 +138,7 @@ catalog tuples that contain NULL attributes except in their
 variable-length portions!  (The bootstrapping code is fairly good about
 marking NOT NULL each of the columns that can legally be referenced via
 C struct declarations ... but those markings won't be enforced against
-DATA commands, so you must get it right in a DATA line.)
+insert commands, so you must get it right in the data files.)
 
 - Modification of the catalogs must be performed with the proper
 updating of catalog indexes!  That is, most catalogs have indexes
diff --git a/src/backend/catalog/genbki.pl b/src/backend/catalog/genbki.pl
index 17e8e23..f941826 100644
--- a/src/backend/catalog/genbki.pl
+++ b/src/backend/catalog/genbki.pl
@@ -4,8 +4,8 @@
 # genbki.pl
 #    Perl script that generates postgres.bki, postgres.description,
 #    postgres.shdescription, and schemapg.h from specially formatted
-#    header files.  The .bki files are used to initialize the postgres
-#    template database.
+#    header files and data files.  The BKI files are used to initialize
+#    the postgres template database.
 #
 # Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
 # Portions Copyright (c) 1994, Regents of the University of California
@@ -93,8 +93,43 @@ my $PG_CATALOG_NAMESPACE =
   Catalog::FindDefinedSymbol('pg_namespace.h', \@include_path,
 							 'PG_CATALOG_NAMESPACE');
 
-# Read all the input header files into internal data structures
-my $catalogs = Catalog::Catalogs(@input_files);
+# Read all the files into internal data structures. Not all catalogs
+# will have a data file.
+my @catnames;
+my %catalogs;
+my %catalog_data;
+my @toast_decls;
+my @index_decls;
+foreach my $header (@input_files)
+{
+	$header =~ /(.+)\.h$/
+	  or die "Input files need to be header files.\n";
+	my $datfile = "$1.dat";
+
+	my $catalog = Catalog::ParseHeader($header);
+	my $catname = $catalog->{catname};
+	my $schema  = $catalog->{columns};
+
+	if (defined $catname)
+	{
+		push @catnames, $catname;
+		$catalogs{$catname} = $catalog;
+	}
+
+	if (-e $datfile)
+	{
+		$catalog_data{$catname} = Catalog::ParseData($datfile, $schema, 0);
+	}
+
+	foreach my $toast_decl (@{ $catalog->{toasting} })
+	{
+		push @toast_decls, $toast_decl;
+	}
+	foreach my $index_decl (@{ $catalog->{indexing} })
+	{
+		push @index_decls, $index_decl;
+	}
+}
 
 # Generate postgres.bki, postgres.description, and postgres.shdescription
 
@@ -108,11 +143,11 @@ my %regprocoids;
 my @types;
 
 # produce output, one catalog at a time
-foreach my $catname (@{ $catalogs->{names} })
+foreach my $catname (@catnames)
 {
 
 	# .bki CREATE command for this catalog
-	my $catalog = $catalogs->{$catname};
+	my $catalog = $catalogs{$catname};
 	print $bki "create $catname $catalog->{relation_oid}"
 	  . $catalog->{shared_relation}
 	  . $catalog->{bootstrap}
@@ -156,17 +191,13 @@ foreach my $catname (@{ $catalogs->{names} })
 		print $bki "open $catname\n";
 	}
 
-	if (defined $catalog->{data})
+	if (defined $catalog_data{$catname})
 	{
 
-		# Ordinary catalog with DATA line(s)
-		foreach my $row (@{ $catalog->{data} })
+		# Ordinary catalog with a data file
+		foreach my $row (@{ $catalog_data{$catname} })
 		{
-
-			# Split line into tokens without interpreting their meaning.
-			my %bki_values;
-			@bki_values{@attnames} =
-			  Catalog::SplitDataLine($row->{bki_values});
+			my %bki_values = %$row;
 
 			# Perform required substitutions on fields
 			foreach my $column (@$schema)
@@ -200,7 +231,7 @@ foreach my $catname (@{ $catalogs->{names} })
 				}
 				else
 				{
-					$regprocoids{ $bki_values{proname} } = $row->{oid};
+					$regprocoids{ $bki_values{proname} } = $bki_values{oid};
 				}
 			}
 
@@ -208,38 +239,38 @@ foreach my $catname (@{ $catalogs->{names} })
 			if ($catname eq 'pg_type')
 			{
 				my %type = %bki_values;
-				$type{oid} = $row->{oid};
 				push @types, \%type;
 			}
 
+			# Add quotes where necessary.
+			quote_bki_values(\%bki_values, $schema);
+
 			# Write to postgres.bki
-			my $oid = $row->{oid} ? "OID = $row->{oid} " : '';
-			printf $bki "insert %s( %s )\n", $oid,
-			  join(' ', @bki_values{@attnames});
+			bki_insert(\%bki_values, @attnames);
 
 			# Write comments to postgres.description and
 			# postgres.shdescription
-			if (defined $row->{descr})
+			if (defined $bki_values{descr})
 			{
 				printf $descr "%s\t%s\t0\t%s\n",
-				  $row->{oid}, $catname, $row->{descr};
+				  $bki_values{oid}, $catname, $bki_values{descr};
 			}
-			if (defined $row->{shdescr})
+			if (defined $bki_values{shdescr})
 			{
 				printf $shdescr "%s\t%s\t%s\n",
-				  $row->{oid}, $catname, $row->{shdescr};
+				  $bki_values{oid}, $catname, $bki_values{shdescr};
 			}
 		}
 	}
 	if ($catname eq 'pg_attribute')
 	{
 
-		# For pg_attribute.h, we generate DATA entries ourselves.
+		# For pg_attribute.h, we generate data entries ourselves.
 		# NB: pg_type.h must come before pg_attribute.h in the input list
 		# of catalog names, since we use info from pg_type.h here.
-		foreach my $table_name (@{ $catalogs->{names} })
+		foreach my $table_name (@catnames)
 		{
-			my $table = $catalogs->{$table_name};
+			my $table = $catalogs{$table_name};
 
 			# Currently, all bootstrapped relations also need schemapg.h
 			# entries, so skip if the relation isn't to be in schemapg.h.
@@ -316,12 +347,12 @@ foreach my $catname (@{ $catalogs->{names} })
 # (i.e., not contained in a header with a CATALOG() statement) comes here
 
 # Write out declare toast/index statements
-foreach my $declaration (@{ $catalogs->{toasting}->{data} })
+foreach my $declaration (@toast_decls)
 {
 	print $bki $declaration;
 }
 
-foreach my $declaration (@{ $catalogs->{indexing}->{data} })
+foreach my $declaration (@index_decls)
 {
 	print $bki $declaration;
 }
@@ -379,6 +410,46 @@ exit 0;
 
 #################### Subroutines ########################
 
+# Supply quoting for a normal bki row.
+# This allows us to keep most double quotes
+# out of the catalog data files for readability.
+sub quote_bki_values
+{
+	my $row    = shift;
+	my $schema = shift;
+
+	foreach my $column (@$schema)
+	{
+		my $attname = $column->{name};
+		my $atttype = $column->{type};
+
+		if
+		(
+			length($row->{$attname}) == 0  # Empty string
+			or $row->{$attname} =~ /\s/    # Whitespace
+
+			# Quote strings that have special characters
+			# except for certain cases. See bootscanner.l
+			or (    $row->{$attname} =~ /\W/
+				and $row->{$attname} !~ /^\\\d{3}$/  # octal
+				and $row->{$attname} !~ /^-\d*$/)    # '-' or '-1'
+
+			# XXX Not needed, but keeps the .bki diff down to a reasonable
+			# size during review
+			or $attname eq 'oprname'    # Operator names
+			or $atttype eq 'oidvector'  # Arrays etc.
+			or $atttype eq 'int2vector'
+			or $atttype =~ /\[\]$/
+		)
+		{
+			if ($row->{$attname} ne '_null_' and $row->{$attname} !~ /^"([^"])*"$/)
+			{
+				$row->{$attname} = q|"| . $row->{$attname} . q|"|;
+			}
+		}
+	}
+}
+
 
 # Given the schema of pg_attribute, generate an entry for it using information
 # about the attribute it describes.  Any value that is not handled here
@@ -452,7 +523,7 @@ sub emit_pgattr_row
 	}
 }
 
-# Write a pg_attribute entry to postgres.bki
+# Write an entry to postgres.bki
 sub bki_insert
 {
 	my $row        = shift;
@@ -522,8 +593,8 @@ Options:
     --set-version    PostgreSQL version number for initdb cross-check
 
 genbki.pl generates BKI files from specially formatted
-header files.  These BKI files are used to initialize the
-postgres template database.
+header files and .dat files.  These BKI files are used
+to initialize the postgres template database.
 
 Report bugs to <pgsql-bugs\@postgresql.org>.
 EOM
diff --git a/src/backend/utils/Gen_fmgrtab.pl b/src/backend/utils/Gen_fmgrtab.pl
index 14c02f5..2e9b6ad 100644
--- a/src/backend/utils/Gen_fmgrtab.pl
+++ b/src/backend/utils/Gen_fmgrtab.pl
@@ -3,7 +3,7 @@
 #
 # Gen_fmgrtab.pl
 #    Perl script that generates fmgroids.h, fmgrprotos.h, and fmgrtab.c
-#    from pg_proc.h
+#    from pg_proc.dat
 #
 # Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
 # Portions Copyright (c) 1994, Regents of the University of California
@@ -20,7 +20,7 @@ use strict;
 use warnings;
 
 # Collect arguments
-my $infile;    # pg_proc.h
+my @input_files;
 my $output_path = '';
 my @include_path;
 
@@ -29,7 +29,7 @@ while (@ARGV)
 	my $arg = shift @ARGV;
 	if ($arg !~ /^-/)
 	{
-		$infile = $arg;
+		push @input_files, $arg;
 	}
 	elsif ($arg =~ /^-o/)
 	{
@@ -52,38 +52,50 @@ if ($output_path ne '' && substr($output_path, -1) ne '/')
 }
 
 # Sanity check arguments.
-die "No input files.\n"                                     if !$infile;
+die "No input files.\n"                                     if !@input_files;
 die "No include path; you must specify -I at least once.\n" if !@include_path;
 
+# Read all the input files into internal data structures.
+# Note: We pass data file names as arguments and then look for matching
+# headers to parse the schema from. This is backwards from genbki.pl,
+# but the Makefile dependencies look more sensible this way.
+my %catalogs;
+my %catalog_data;
+foreach my $datfile (@input_files)
+{
+	$datfile =~ /(.+)\.dat$/
+	  or die "Input files need to be data (.dat) files.\n";
+
+	my $header = "$1.h";
+	die "There in no header file corresponding to $datfile"
+	  if ! -e $header;
+
+	my $catalog = Catalog::ParseHeader($header);
+	my $catname = $catalog->{catname};
+	my $schema  = $catalog->{columns};
+
+	$catalogs{$catname} = $catalog;
+	$catalog_data{$catname} = Catalog::ParseData($datfile, $schema, 0);
+}
+
+# Fetch some values for later.
 my $FirstBootstrapObjectId =
 	Catalog::FindDefinedSymbol('access/transam.h', \@include_path, 'FirstBootstrapObjectId');
 my $INTERNALlanguageId =
 	Catalog::FindDefinedSymbol('catalog/pg_language.h', \@include_path, 'INTERNALlanguageId');
 
-# Read all the data from the include/catalog files.
-my $catalogs = Catalog::Catalogs($infile);
-
-# Collect the raw data from pg_proc.h.
+# Collect certain fields from pg_proc.dat.
 my @fmgr = ();
-my @attnames;
-foreach my $column (@{ $catalogs->{pg_proc}->{columns} })
-{
-	push @attnames, $column->{name};
-}
 
-my $data = $catalogs->{pg_proc}->{data};
-foreach my $row (@$data)
+foreach my $row (@{ $catalog_data{pg_proc} })
 {
-
-	# Split line into tokens without interpreting their meaning.
-	my %bki_values;
-	@bki_values{@attnames} = Catalog::SplitDataLine($row->{bki_values});
+	my %bki_values = %$row;
 
 	# Select out just the rows for internal-language procedures.
 	next if $bki_values{prolang} ne $INTERNALlanguageId;
 
 	push @fmgr,
-	  { oid    => $row->{oid},
+	  { oid    => $bki_values{oid},
 		strict => $bki_values{proisstrict},
 		retset => $bki_values{proretset},
 		nargs  => $bki_values{pronargs},
@@ -281,10 +293,10 @@ Catalog::RenameTempFile($tabfile,    $tmpext);
 sub usage
 {
 	die <<EOM;
-Usage: perl -I [directory of Catalog.pm] Gen_fmgrtab.pl [path to pg_proc.h]
+Usage: perl -I [directory of Catalog.pm] Gen_fmgrtab.pl -I [include path] [path to pg_proc.dat]
 
 Gen_fmgrtab.pl generates fmgroids.h, fmgrprotos.h, and fmgrtab.c from
-pg_proc.h
+pg_proc.dat
 
 Report bugs to <pgsql-bugs\@postgresql.org>.
 EOM
diff --git a/src/backend/utils/Makefile b/src/backend/utils/Makefile
index efb8b53..f71cdc5 100644
--- a/src/backend/utils/Makefile
+++ b/src/backend/utils/Makefile
@@ -24,8 +24,8 @@ $(SUBDIRS:%=%-recursive): fmgroids.h fmgrprotos.h
 fmgrprotos.h: fmgroids.h ;
 fmgroids.h: fmgrtab.c ;
 
-fmgrtab.c: Gen_fmgrtab.pl $(catalogdir)/Catalog.pm $(top_srcdir)/src/include/catalog/pg_proc.h
-	$(PERL) -I $(catalogdir) $< -I $(top_srcdir)/src/include/ $(top_srcdir)/src/include/catalog/pg_proc.h
+fmgrtab.c: Gen_fmgrtab.pl $(catalogdir)/Catalog.pm $(top_srcdir)/src/include/catalog/pg_proc.dat $(top_srcdir)/src/include/access/transam.h
+	$(PERL) -I $(catalogdir) $< -I $(top_srcdir)/src/include/ $(top_srcdir)/src/include/catalog/pg_proc.dat
 
 errcodes.h: $(top_srcdir)/src/backend/utils/errcodes.txt generate-errcodes.pl
 	$(PERL) $(srcdir)/generate-errcodes.pl $< > $@
diff --git a/src/include/catalog/duplicate_oids b/src/include/catalog/duplicate_oids
index 7342d61..9732f61 100755
--- a/src/include/catalog/duplicate_oids
+++ b/src/include/catalog/duplicate_oids
@@ -5,7 +5,7 @@ use warnings;
 
 BEGIN
 {
-	@ARGV = (glob("pg_*.h"), qw(indexing.h toasting.h));
+	@ARGV = (glob("pg_*.h"), glob("pg_*.dat"), qw(indexing.h toasting.h));
 }
 
 my %oidcounts;
@@ -14,7 +14,7 @@ while (<>)
 {
 	next if /^CATALOG\(.*BKI_BOOTSTRAP/;
 	next
-	  unless /^DATA\(insert *OID *= *(\d+)/
+	  unless /\boid *=> *'(\d+)'/
 		  || /^CATALOG\([^,]*, *(\d+).*BKI_ROWTYPE_OID\((\d+)\)/
 		  || /^CATALOG\([^,]*, *(\d+)/
 		  || /^DECLARE_INDEX\([^,]*, *(\d+)/
@@ -30,7 +30,7 @@ foreach my $oid (sort { $a <=> $b } keys %oidcounts)
 {
 	next unless $oidcounts{$oid} > 1;
 	$found = 1;
-	print "$oid\n";
+	print "***Duplicate OID: $oid\n";
 }
 
 exit $found;
diff --git a/src/include/catalog/unused_oids b/src/include/catalog/unused_oids
index 97769d3..a930560 100755
--- a/src/include/catalog/unused_oids
+++ b/src/include/catalog/unused_oids
@@ -25,11 +25,11 @@ export FIRSTOBJECTID
 
 # this part (down to the uniq step) should match the duplicate_oids script
 # note: we exclude BKI_BOOTSTRAP relations since they are expected to have
-# matching DATA lines in pg_class.h and pg_type.h
+# matching data lines in pg_class.dat and pg_type.dat
 
-cat pg_*.h toasting.h indexing.h | \
+cat pg_*.h pg_*.dat toasting.h indexing.h |
 egrep -v -e '^CATALOG\(.*BKI_BOOTSTRAP' | \
-sed -n	-e 's/^DATA(insert *OID *= *\([0-9][0-9]*\).*$/\1/p' \
+sed -n	-e 's/.*\boid *=> *'\''\([0-9][0-9]*\)'\''.*$/\1/p' \
 	-e 's/^CATALOG([^,]*, *\([0-9][0-9]*\).*BKI_ROWTYPE_OID(\([0-9][0-9]*\)).*$/\1,\2/p' \
 	-e 's/^CATALOG([^,]*, *\([0-9][0-9]*\).*$/\1/p' \
 	-e 's/^DECLARE_INDEX([^,]*, *\([0-9][0-9]*\).*$/\1/p' \
-- 
2.7.4