From aca39f35911ce18a94e8df1a64451d09e7a45479 Mon Sep 17 00:00:00 2001
From: John Naylor <jcnaylor@gmail.com>
Date: Mon, 18 Dec 2017 12:38:23 +0700
Subject: [PATCH v4 04/12] Data conversion infrastructure

convert_header2dat.pl turns DATA()/(SH)DESCR() statements into
serialized Perl data structures in pg_*.dat files, preserving comments
along the way. This is a one-off script, but it is committed to the
repo in case third parties want to convert their own catalog data.

Remove data parsing from the original Catalogs() function and rename it
to ParseHeader() to reflect its new, limited role of extracting the
schema info from a single header. The new data files are handled one at
a time by the function ParseData().

rewrite_dat.pl reads in pg_*.dat files and rewrites them in a standard
format. It writes attributes in order, strips out values that match
defaults or are otherwise computable, preserves comments and folds
consecutive blank lines. The meta-attributes oid and (sh)descr are on
their own line, if present.

Add the ability to label columns in the source data by an abbreviation
rather than the full name, in order to shorten the entries. Add default
values and abbreviations to a few catalog headers. More could be done
here, but this is enough for a first pass.

Compute pg_proc.pronargs and (if possible) pg_proc.prosrc, rather than
storing directly.
---
 src/backend/catalog/Catalog.pm            | 270 +++++++++++++---------
 src/include/catalog/convert_header2dat.pl | 370 ++++++++++++++++++++++++++++++
 src/include/catalog/genbki.h              |   3 +
 src/include/catalog/pg_aggregate.h        |  38 +--
 src/include/catalog/pg_amop.h             |  31 ++-
 src/include/catalog/pg_amproc.h           |  19 +-
 src/include/catalog/pg_authid.h           |  43 +++-
 src/include/catalog/pg_class.h            | 138 ++++++++---
 src/include/catalog/pg_opclass.h          |  17 +-
 src/include/catalog/pg_operator.h         |  56 +++--
 src/include/catalog/pg_opfamily.h         |   8 +-
 src/include/catalog/pg_proc.h             | 116 +++++++---
 src/include/catalog/pg_type.h             |  47 ++--
 src/include/catalog/rewrite_dat.pl        | 265 +++++++++++++++++++++
 14 files changed, 1156 insertions(+), 265 deletions(-)
 create mode 100644 src/include/catalog/convert_header2dat.pl
 create mode 100644 src/include/catalog/rewrite_dat.pl

diff --git a/src/backend/catalog/Catalog.pm b/src/backend/catalog/Catalog.pm
index a7a3fa0..c1ea8c6 100644
--- a/src/backend/catalog/Catalog.pm
+++ b/src/backend/catalog/Catalog.pm
@@ -1,7 +1,7 @@
 #----------------------------------------------------------------------
 #
 # Catalog.pm
-#    Perl module that extracts info from catalog headers into Perl
+#    Perl module that extracts info from catalog files into Perl
 #    data structures
 #
 # Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
@@ -16,12 +16,11 @@ package Catalog;
 use strict;
 use warnings;
 
-# Call this function with an array of names of header files to parse.
-# Returns a nested data structure describing the data in the headers.
-sub Catalogs
+# Parses a catalog header file into a data structure describing the schema
+# of the catalog.
+sub ParseHeader
 {
-	my (%catalogs, $catname, $declaring_attributes, $most_recent);
-	$catalogs{names} = [];
+	my $input_file = shift;
 
 	# There are a few types which are given one name in the C source, but a
 	# different name at the SQL level.  These are enumerated here.
@@ -34,18 +33,15 @@ sub Catalogs
 		'TransactionId' => 'xid',
 		'XLogRecPtr'    => 'pg_lsn');
 
-	foreach my $input_file (@_)
-	{
+		my $declaring_attributes;
 		my %catalog;
 		$catalog{columns} = [];
-		$catalog{data}    = [];
+		$catalog{toasting} = [];
+		$catalog{indexing} = [];
 		my $is_varlen     = 0;
 
 		open(my $ifh, '<', $input_file) || die "$input_file: $!";
 
-		my ($filename) = ($input_file =~ m/(\w+)\.h$/);
-		my $natts_pat = "Natts_$filename";
-
 		# Scan the input file.
 		while (<$ifh>)
 		{
@@ -63,9 +59,6 @@ sub Catalogs
 				redo;
 			}
 
-			# Remember input line number for later.
-			my $input_line_number = $.;
-
 			# Strip useless whitespace and trailing semicolons.
 			chomp;
 			s/^\s+//;
@@ -73,68 +66,17 @@ sub Catalogs
 			s/\s+/ /g;
 
 			# Push the data into the appropriate data structure.
-			if (/$natts_pat\s+(\d+)/)
+			if (/^DECLARE_TOAST\(\s*(\w+),\s*(\d+),\s*(\d+)\)/)
 			{
-				$catalog{natts} = $1;
-			}
-			elsif (
-				/^DATA\(insert(\s+OID\s+=\s+(\d+))?\s+\(\s*(.*)\s*\)\s*\)$/)
-			{
-				check_natts($filename, $catalog{natts}, $3, $input_file,
-					$input_line_number);
-
-				push @{ $catalog{data} }, { oid => $2, bki_values => $3 };
-			}
-			elsif (/^DESCR\(\"(.*)\"\)$/)
-			{
-				$most_recent = $catalog{data}->[-1];
-
-				# this tests if most recent line is not a DATA() statement
-				if (ref $most_recent ne 'HASH')
-				{
-					die "DESCR() does not apply to any catalog ($input_file)";
-				}
-				if (!defined $most_recent->{oid})
-				{
-					die "DESCR() does not apply to any oid ($input_file)";
-				}
-				elsif ($1 ne '')
-				{
-					$most_recent->{descr} = $1;
-				}
-			}
-			elsif (/^SHDESCR\(\"(.*)\"\)$/)
-			{
-				$most_recent = $catalog{data}->[-1];
-
-				# this tests if most recent line is not a DATA() statement
-				if (ref $most_recent ne 'HASH')
-				{
-					die
-					  "SHDESCR() does not apply to any catalog ($input_file)";
-				}
-				if (!defined $most_recent->{oid})
-				{
-					die "SHDESCR() does not apply to any oid ($input_file)";
-				}
-				elsif ($1 ne '')
-				{
-					$most_recent->{shdescr} = $1;
-				}
-			}
-			elsif (/^DECLARE_TOAST\(\s*(\w+),\s*(\d+),\s*(\d+)\)/)
-			{
-				$catname = 'toasting';
 				my ($toast_name, $toast_oid, $index_oid) = ($1, $2, $3);
-				push @{ $catalog{data} },
+				push @{ $catalog{toasting} },
 				  "declare toast $toast_oid $index_oid on $toast_name\n";
 			}
 			elsif (/^DECLARE_(UNIQUE_)?INDEX\(\s*(\w+),\s*(\d+),\s*(.+)\)/)
 			{
-				$catname = 'indexing';
 				my ($is_unique, $index_name, $index_oid, $using) =
 				  ($1, $2, $3, $4);
-				push @{ $catalog{data} },
+				push @{ $catalog{indexing} },
 				  sprintf(
 					"declare %sindex %s %s %s\n",
 					$is_unique ? 'unique ' : '',
@@ -142,16 +84,13 @@ sub Catalogs
 			}
 			elsif (/^BUILD_INDICES/)
 			{
-				push @{ $catalog{data} }, "build indices\n";
+				push @{ $catalog{indexing} }, "build indices\n";
 			}
 			elsif (/^CATALOG\(([^,]*),(\d+)\)/)
 			{
-				$catname = $1;
+				$catalog{catname} = $1;
 				$catalog{relation_oid} = $2;
 
-				# Store pg_* catalog names in the same order we receive them
-				push @{ $catalogs{names} }, $catname;
-
 				$catalog{bootstrap} = /BKI_BOOTSTRAP/ ? ' bootstrap' : '';
 				$catalog{shared_relation} =
 				  /BKI_SHARED_RELATION/ ? ' shared_relation' : '';
@@ -217,6 +156,10 @@ sub Catalogs
 						{
 							$column{default} = $1;
 						}
+						elsif ($attopt =~ /BKI_ABBREV\((\S+)\)/)
+						{
+							$column{abbrev} = $1;
+						}
 						else
 						{
 							die
@@ -232,32 +175,127 @@ sub Catalogs
 				}
 			}
 		}
-		$catalogs{$catname} = \%catalog;
 		close $ifh;
+	return \%catalog;
+}
+
+# Parses a file containing Perl data structure literals, returning live data.
+#
+# The parameter $preserve_formatting needs to be set for callers that want
+# to work with non-data lines in the data files, such as comments and blank
+# lines. If a caller just wants consume the data, leave it unset.
+sub ParseData
+{
+	my ($input_file, $schema, $preserve_formatting) = @_;
+
+	open(my $ifh, '<', $input_file) || die "$input_file: $!";
+	$input_file =~ /(\w+)\.dat$/;
+	my $catname = $1;
+	my $data = [];
+	my $prev_blank = 0;
+
+	# Scan the input file.
+	while (<$ifh>)
+	{
+		my $datum;
+
+		if (/^$/)
+		{
+			# Preserve non-consecutive blank lines.
+			# Newline gets added by caller.
+			next if $prev_blank;
+			$datum = '';
+			$prev_blank = 1;
+		}
+		else
+		{
+			$prev_blank = 0;
+		}
+
+		if (/{/)
+		{
+			# Capture the hash ref
+			# NB: Assumes that the next hash ref can't start on the
+			# same line where the present one ended.
+			# Not foolproof, but we shouldn't need a full lexer,
+			# since we expect relatively well-behaved input.
+
+			# Quick hack to detect when we have a full hash ref to
+			# parse. We can't just use a regex because of values in
+			# pg_aggregate and pg_proc like '{0,0}'.
+			my $lcnt = tr/{//;
+			my $rcnt = tr/}//;
+
+			if ($lcnt == $rcnt)
+			{
+				eval '$datum = ' . $_;
+				if (!ref $datum)
+				{
+					die "Error parsing $_\n$!";
+				}
+
+				# Expand tuples.
+				# We must do the following operations in the order given.
+				resolve_column_abbrevs($datum, $schema);
+				if ($catname eq 'pg_proc')
+				{
+					compute_pg_proc_fields($datum);
+				}
+				my $error = AddDefaultValues($datum, $schema);
+				if ($error)
+				{
+					print "Failed to form full tuple for $catname\n";
+					die $error;
+				}
+			}
+			else
+			{
+				my $next_line = <$ifh>;
+				die "$input_file: ends within Perl hash\n"
+				  if !defined $next_line;
+				$_ .= $next_line;
+				redo;
+			}
+		}
+		# Capture comments that are on their own line.
+		elsif (/^\s*#\s*(.+)\s*/)
+		{
+			$datum = "# $1";
+		}
+		# Assume bracket is the only token in the line.
+		elsif (/^\s*(\[|\])\s*$/)
+		{
+			$datum = $1;
+		}
+
+		next if !defined $datum;
+
+		# Hash references are data, so always push.
+		# Other datums are non-data strings, so only push if we
+		# want formatting.
+		if ($preserve_formatting or ref $datum eq 'HASH')
+		{
+			push @$data, $datum;
+		}
 	}
-	return \%catalogs;
+	return $data;
 }
 
-# Split a DATA line into fields.
-# Call this on the bki_values element of a DATA item returned by Catalogs();
-# it returns a list of field values.  We don't strip quoting from the fields.
-# Note: it should be safe to assign the result to a list of length equal to
-# the nominal number of catalog fields, because check_natts already checked
-# the number of fields.
-sub SplitDataLine
+# Copy values from abbreviated keys to full keys.
+sub resolve_column_abbrevs
 {
-	my $bki_values = shift;
-
-	# This handling of quoted strings might look too simplistic, but it
-	# matches what bootscanner.l does: that has no provision for quote marks
-	# inside quoted strings, either.  If we don't have a quoted string, just
-	# snarf everything till next whitespace.  That will accept some things
-	# that bootscanner.l will see as erroneous tokens; but it seems wiser
-	# to do that and let bootscanner.l complain than to silently drop
-	# non-whitespace characters.
-	my @result = $bki_values =~ /"[^"]*"|\S+/g;
-
-	return @result;
+	my $row    = shift;
+	my $schema = shift;
+
+	foreach my $column (@$schema)
+	{
+		my $abbrev  = $column->{abbrev};
+		my $attname = $column->{name};
+		if (defined $abbrev and defined $row->{$abbrev})
+		{
+			$row->{$attname} = $row->{$abbrev};
+		}
+	}
 }
 
 # Fill in default values of a record using the given schema. It's the
@@ -300,6 +338,29 @@ sub AddDefaultValues
 	return $msg;
 }
 
+# Some pg_proc fields are computed as part of forming a full tuple.
+sub compute_pg_proc_fields
+{
+	my $row = shift;
+
+	# pronargs is computed by counting proargtypes.
+	if ($row->{proargtypes})
+	{
+		my @argtypes = split /\s+/, $row->{proargtypes};
+		$row->{pronargs} = scalar(@argtypes);
+	}
+	else
+	{
+		$row->{pronargs} = '0';
+	}
+
+	# If prosrc doesn't exist, it must be a copy of proname.
+	if (!exists $row->{prosrc})
+	{
+		$row->{prosrc} = $row->{proname}
+	}
+}
+
 # Rename temporary files to final names.
 # Call this function with the final file name and the .tmp extension
 # Note: recommended extension is ".tmp$$", so that parallel make steps
@@ -313,7 +374,6 @@ sub RenameTempFile
 	rename($temp_name, $final_name) || die "rename: $temp_name: $!";
 }
 
-
 # Find a symbol defined in a particular header file and extract the value.
 #
 # The include path has to be passed as a reference to an array.
@@ -345,22 +405,4 @@ sub FindDefinedSymbol
 	die "$catalog_header: not found in any include directory\n";
 }
 
-
-# verify the number of fields in the passed-in DATA line
-sub check_natts
-{
-	my ($catname, $natts, $bki_val, $file, $line) = @_;
-
-	die
-"Could not find definition for Natts_${catname} before start of DATA() in $file\n"
-	  unless defined $natts;
-
-	my $nfields = scalar(SplitDataLine($bki_val));
-
-	die sprintf
-"Wrong number of attributes in DATA() entry at %s:%d (expected %d but got %d)\n",
-	  $file, $line, $natts, $nfields
-	  unless $natts == $nfields;
-}
-
 1;
diff --git a/src/include/catalog/convert_header2dat.pl b/src/include/catalog/convert_header2dat.pl
new file mode 100644
index 0000000..d61cdc8
--- /dev/null
+++ b/src/include/catalog/convert_header2dat.pl
@@ -0,0 +1,370 @@
+#!/usr/bin/perl -w
+#----------------------------------------------------------------------
+#
+# convert_header2dat.pl
+#    Perl script that reads BKI data from the catalog header files
+#    and writes them out as native perl data structures. Commments and
+#    white space are preserved. Some functions are loosely copied from
+#    src/backend/catalog/Catalog.pm, whose equivalents will be removed.
+#
+# Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
+# Portions Copyright (c) 1994, Regents of the University of California
+#
+# /src/include/catalog/convert_header2dat.pl
+#
+#----------------------------------------------------------------------
+
+use strict;
+use warnings;
+
+use Data::Dumper;
+# No $VARs - we add our own later.
+$Data::Dumper::Terse = 1;
+
+my @input_files;
+my $output_path = '';
+my $major_version;
+
+# Process command line switches.
+while (@ARGV)
+{
+	my $arg = shift @ARGV;
+	if ($arg !~ /^-/)
+	{
+		push @input_files, $arg;
+	}
+	elsif ($arg =~ /^-o/)
+	{
+		$output_path = length($arg) > 2 ? substr($arg, 2) : shift @ARGV;
+	}
+	else
+	{
+		usage();
+	}
+}
+
+# Sanity check arguments.
+die "No input files.\n" if !@input_files;
+foreach my $input_file (@input_files)
+{
+	if ($input_file !~ /\.h$/)
+	{
+		die "Input files need to be header files.\n";
+	}
+}
+
+# Make sure output_path ends in a slash.
+if ($output_path ne '' && substr($output_path, -1) ne '/')
+{
+	$output_path .= '/';
+}
+
+# Read all the input header files into internal data structures
+# XXX This script is not robust against non-catalog headers. It's best
+# to pass it the same list found in backend/catalog/Makefile.
+my $catalogs = catalogs(@input_files);
+
+# produce output, one catalog at a time
+foreach my $catname (@{ $catalogs->{names} })
+{
+	my $catalog = $catalogs->{$catname};
+	my $schema  = $catalog->{columns};
+
+	# First, see if the header has any data. This is necessary
+	# not only because of catalogs with no data, but also because some
+	# values coming down the pike are comments or newlines.
+	my $found_one = 0;
+	foreach my $data (@{ $catalog->{data} })
+	{
+		if (ref $data eq 'HASH')
+		{
+			$found_one = 1;
+		}
+	}
+	next if !$found_one;
+
+	my @attnames;
+	foreach my $column (@$schema)
+	{
+		my $attname = $column->{name};
+		my $atttype = $column->{type};
+		push @attnames, $attname;
+	}
+
+	my $datfile = "$output_path$catname.dat";
+	open my $dat, '>', $datfile
+	  or die "can't open $datfile: $!";
+
+	# Write out data file.
+	print $dat "# $catname.dat\n";
+
+	# Note: Put extra newlines after brackets because otherwise
+	# some catalogs have newlines and some don't, because of whitespace
+	# around DATA() comments.
+	print $dat "[\n\n";
+
+	foreach my $data (@{ $catalog->{data} })
+	{
+
+		# Either a newline or comment - just write it out.
+		if (! ref $data)
+		{
+			print $dat "$data\n";
+		}
+		# Hash ref representing a data entry.
+		elsif (ref $data eq 'HASH')
+		{
+			# Split line into tokens without interpreting their meaning.
+			my %bki_values;
+			@bki_values{@attnames} = split_data_line($data->{bki_values});
+
+			# Flatten data hierarchy.
+			delete $data->{bki_values};
+			my %flat_data = (%$data, %bki_values);
+
+			# Strip double quotes for readability. Most will be put
+			# back in when writing postgres.bki
+			foreach (values %flat_data)
+			{
+				s/"//g;
+			}
+
+			print $dat Dumper(\%flat_data);
+			print $dat ",\n";
+		}
+	}
+	print $dat "\n]\n";
+}
+
+
+# This function is a heavily modified version of its former namesake
+# in Catalog.pm. There's probably some dead code here. It's not worth removing.
+sub catalogs
+{
+	my (%catalogs, $catname, $declaring_attributes, $most_recent);
+	$catalogs{names} = [];
+
+	# There are a few types which are given one name in the C source, but a
+	# different name at the SQL level.  These are enumerated here.
+	my %RENAME_ATTTYPE = (
+		'int16'         => 'int2',
+		'int32'         => 'int4',
+		'int64'         => 'int8',
+		'Oid'           => 'oid',
+		'NameData'      => 'name',
+		'TransactionId' => 'xid',
+		'XLogRecPtr'    => 'pg_lsn');
+
+	foreach my $input_file (@_)
+	{
+		my %catalog;
+		$catalog{columns} = [];
+		$catalog{data}    = [];
+		my $is_varlen     = 0;
+		my $saving_comments = 0;
+
+		open(my $ifh, '<', $input_file) || die "$input_file: $!";
+
+		# Scan the input file.
+		while (<$ifh>)
+		{
+			# Determine that we're in the DATA section and should
+			# Start saving DATA comments.
+			if (/(\/|\s)\*\s+initial contents of pg_/)
+			{
+				$saving_comments = 1;
+			}
+
+			if ($saving_comments)
+			{
+				if ( m;^(/|\s+)\*\s+(.+); )
+				{
+					my $comment = $2;
+
+					# Ugly way to strip */ off the end
+					if ($comment =~ m;\*/$;)
+					{
+						$comment =~ s/.{2}$//;
+					}
+
+					# Turn C-style comment into Perl-style.
+
+					# Filter out comments we know we don't want.
+					if ($comment !~ /^-+$/
+						and $comment !~ /initial contents of pg/
+						and $comment !~ /PG_\w+_H/)
+					{
+						# Trim whitespace.
+						$comment =~ s/^\s+//;
+						$comment =~ s/\s+$//;
+						push @{ $catalog{data} }, "# $comment";
+					}
+				}
+				elsif (/^$/)
+				{
+					# Preserve blank lines
+					# Newline gets added by caller.
+					push @{ $catalog{data} }, '';
+				}
+			}
+			else
+			{
+				# Strip C-style comments.
+				s;/\*(.|\n)*\*/;;g;
+				if (m;/\*;)
+				{
+					# handle multi-line comments properly.
+					my $next_line = <$ifh>;
+					die "$input_file: ends within C-style comment\n"
+					  if !defined $next_line;
+					$_ .= $next_line;
+					redo;
+				}
+			}
+			# Remember input line number for later.
+			my $input_line_number = $.;
+
+			# Strip useless whitespace and trailing semicolons.
+			chomp;
+			s/^\s+//;
+			s/;\s*$//;
+			s/\s+/ /g;
+
+			# Push the data into the appropriate data structure.
+			if (
+				/^DATA\(insert(\s+OID\s+=\s+(\d+))?\s+\(\s*(.*)\s*\)\s*\)$/)
+			{
+				if ($2)
+				{
+					push @{ $catalog{data} }, { oid => $2, bki_values => $3 };
+				}
+				else
+				{
+					push @{ $catalog{data} }, { bki_values => $3 };
+				}
+			}
+			elsif (/^DESCR\(\"(.*)\"\)$/)
+			{
+				$most_recent = $catalog{data}->[-1];
+
+				# this tests if most recent line is not a DATA() statement
+				if (ref $most_recent ne 'HASH')
+				{
+					die "DESCR() does not apply to any catalog ($input_file)";
+				}
+				if (!defined $most_recent->{oid})
+				{
+					die "DESCR() does not apply to any oid ($input_file)";
+				}
+				elsif ($1 ne '')
+				{
+					$most_recent->{descr} = $1;
+				}
+			}
+			elsif (/^SHDESCR\(\"(.*)\"\)$/)
+			{
+				$most_recent = $catalog{data}->[-1];
+
+				# this tests if most recent line is not a DATA() statement
+				if (ref $most_recent ne 'HASH')
+				{
+					die
+					  "SHDESCR() does not apply to any catalog ($input_file)";
+				}
+				if (!defined $most_recent->{oid})
+				{
+					die "SHDESCR() does not apply to any oid ($input_file)";
+				}
+				elsif ($1 ne '')
+				{
+					$most_recent->{shdescr} = $1;
+				}
+			}
+			elsif (/^CATALOG\(([^,]*),(\d+)\)/)
+			{
+				$catname = $1;
+				$catalog{relation_oid} = $2;
+
+				# Store pg_* catalog names in the same order we receive them
+				push @{ $catalogs{names} }, $catname;
+
+				$declaring_attributes = 1;
+			}
+			elsif ($declaring_attributes)
+			{
+				next if (/^{|^$/);
+				next if (/^#/);
+				if (/^}/)
+				{
+					undef $declaring_attributes;
+				}
+				else
+				{
+					my %column;
+					if ($is_varlen)
+					{
+						$column{is_varlen} = 1;
+					}
+					my ($atttype, $attname, $attopt) = split /\s+/, $_;
+					die "parse error ($input_file)" unless $attname;
+					if (exists $RENAME_ATTTYPE{$atttype})
+					{
+						$atttype = $RENAME_ATTTYPE{$atttype};
+					}
+					if ($attname =~ /(.*)\[.*\]/)    # array attribute
+					{
+						$attname = $1;
+						$atttype .= '[]';
+					}
+
+					$column{type} = $atttype;
+					$column{name} = $attname;
+
+					push @{ $catalog{columns} }, \%column;
+				}
+			}
+		}
+		if (defined $catname)
+		{
+			$catalogs{$catname} = \%catalog;
+		}
+		close $ifh;
+	}
+	return \%catalogs;
+}
+
+# Split a DATA line into fields.
+# Call this on the bki_values element of a DATA item returned by catalogs();
+# it returns a list of field values.  We don't strip quoting from the fields.
+# Note: it should be safe to assign the result to a list of length equal to
+# the nominal number of catalog fields, because the number of fields were
+# checked in the original Catalog module.
+sub split_data_line
+{
+	my $bki_values = shift;
+
+	# This handling of quoted strings might look too simplistic, but it
+	# matches what bootscanner.l does: that has no provision for quote marks
+	# inside quoted strings, either.  If we don't have a quoted string, just
+	# snarf everything till next whitespace.  That will accept some things
+	# that bootscanner.l will see as erroneous tokens; but it seems wiser
+	# to do that and let bootscanner.l complain than to silently drop
+	# non-whitespace characters.
+	my @result = $bki_values =~ /"[^"]*"|\S+/g;
+
+	return @result;
+}
+
+sub usage
+{
+	die <<EOM;
+Usage: convert_macro2dat.pl [options] header...
+
+Options:
+    -o               output path
+
+convert_macro2dat.pl generates data files from the same header files
+currently parsed by Catalag.pm.
+
+EOM
+}
diff --git a/src/include/catalog/genbki.h b/src/include/catalog/genbki.h
index 71fc579..c3ffa29 100644
--- a/src/include/catalog/genbki.h
+++ b/src/include/catalog/genbki.h
@@ -34,6 +34,9 @@
 /* Specifies a default value for a catalog field */
 #define BKI_DEFAULT(value)
 
+/* Specifies an abbreviated label for a column name */
+#define BKI_ABBREV(abb)
+
 /*
  * This is never defined; it's here only for documentation.
  *
diff --git a/src/include/catalog/pg_aggregate.h b/src/include/catalog/pg_aggregate.h
index 13f1bce..f7be2c0 100644
--- a/src/include/catalog/pg_aggregate.h
+++ b/src/include/catalog/pg_aggregate.h
@@ -55,29 +55,29 @@
 CATALOG(pg_aggregate,2600) BKI_WITHOUT_OIDS
 {
 	regproc		aggfnoid;
-	char		aggkind;
-	int16		aggnumdirectargs;
+	char		aggkind BKI_DEFAULT(n);
+	int16		aggnumdirectargs BKI_DEFAULT(0);
 	regproc		aggtransfn;
-	regproc		aggfinalfn;
-	regproc		aggcombinefn;
-	regproc		aggserialfn;
-	regproc		aggdeserialfn;
-	regproc		aggmtransfn;
-	regproc		aggminvtransfn;
-	regproc		aggmfinalfn;
-	bool		aggfinalextra;
-	bool		aggmfinalextra;
-	char		aggfinalmodify;
-	char		aggmfinalmodify;
-	Oid			aggsortop;
+	regproc		aggfinalfn BKI_DEFAULT(-);
+	regproc		aggcombinefn BKI_DEFAULT(-);
+	regproc		aggserialfn BKI_DEFAULT(-);
+	regproc		aggdeserialfn BKI_DEFAULT(-);
+	regproc		aggmtransfn BKI_DEFAULT(-);
+	regproc		aggminvtransfn BKI_DEFAULT(-);
+	regproc		aggmfinalfn BKI_DEFAULT(-);
+	bool		aggfinalextra BKI_DEFAULT(f);
+	bool		aggmfinalextra BKI_DEFAULT(f);
+	char		aggfinalmodify BKI_DEFAULT(r);
+	char		aggmfinalmodify BKI_DEFAULT(r);
+	Oid			aggsortop BKI_DEFAULT(0);
 	Oid			aggtranstype;
-	int32		aggtransspace;
-	Oid			aggmtranstype;
-	int32		aggmtransspace;
+	int32		aggtransspace BKI_DEFAULT(0);
+	Oid			aggmtranstype BKI_DEFAULT(0);
+	int32		aggmtransspace BKI_DEFAULT(0);
 
 #ifdef CATALOG_VARLEN			/* variable-length fields start here */
-	text		agginitval;
-	text		aggminitval;
+	text		agginitval BKI_DEFAULT(_null_);
+	text		aggminitval BKI_DEFAULT(_null_);
 #endif
 } FormData_pg_aggregate;
 
diff --git a/src/include/catalog/pg_amop.h b/src/include/catalog/pg_amop.h
index f850be4..89c97d8 100644
--- a/src/include/catalog/pg_amop.h
+++ b/src/include/catalog/pg_amop.h
@@ -55,14 +55,29 @@
 
 CATALOG(pg_amop,2602)
 {
-	Oid			amopfamily;		/* the index opfamily this entry is for */
-	Oid			amoplefttype;	/* operator's left input data type */
-	Oid			amoprighttype;	/* operator's right input data type */
-	int16		amopstrategy;	/* operator strategy number */
-	char		amoppurpose;	/* is operator for 's'earch or 'o'rdering? */
-	Oid			amopopr;		/* the operator's pg_operator OID */
-	Oid			amopmethod;		/* the index access method this entry is for */
-	Oid			amopsortfamily; /* ordering opfamily OID, or 0 if search op */
+	/* the index opfamily this entry is for */
+	Oid			amopfamily BKI_ABBREV(opf);
+
+	/* operator's left input data type */
+	Oid			amoplefttype BKI_ABBREV(lt);
+
+	/* operator's right input data type */
+	Oid			amoprighttype BKI_ABBREV(rt);
+
+	/* operator strategy number */
+	int16		amopstrategy BKI_ABBREV(str);
+
+	/* is operator for 's'earch or 'o'rdering? */
+	char		amoppurpose BKI_ABBREV(pur) BKI_DEFAULT(s);
+
+	/* the operator's pg_operator OID */
+	Oid			amopopr BKI_ABBREV(oper);
+
+	/* the index access method this entry is for */
+	Oid			amopmethod BKI_ABBREV(am);
+
+	/* ordering opfamily OID, or 0 if search op */
+	Oid			amopsortfamily BKI_DEFAULT(0);
 } FormData_pg_amop;
 
 /* allowed values of amoppurpose: */
diff --git a/src/include/catalog/pg_amproc.h b/src/include/catalog/pg_amproc.h
index 1c95846..93c246f 100644
--- a/src/include/catalog/pg_amproc.h
+++ b/src/include/catalog/pg_amproc.h
@@ -44,11 +44,20 @@
 
 CATALOG(pg_amproc,2603)
 {
-	Oid			amprocfamily;	/* the index opfamily this entry is for */
-	Oid			amproclefttype; /* procedure's left input data type */
-	Oid			amprocrighttype;	/* procedure's right input data type */
-	int16		amprocnum;		/* support procedure index */
-	regproc		amproc;			/* OID of the proc */
+	/* the index opfamily this entry is for */
+	Oid			amprocfamily BKI_ABBREV(opf);
+
+	/* procedure's left input data type */
+	Oid			amproclefttype BKI_ABBREV(lt);
+
+	/* procedure's right input data type */
+	Oid			amprocrighttype BKI_ABBREV(rt);
+
+	/* support procedure index */
+	int16		amprocnum BKI_ABBREV(num);
+
+	/* OID of the proc */
+	regproc		amproc;
 } FormData_pg_amproc;
 
 /* ----------------
diff --git a/src/include/catalog/pg_authid.h b/src/include/catalog/pg_authid.h
index 9b6b52c..c06e2cd3 100644
--- a/src/include/catalog/pg_authid.h
+++ b/src/include/catalog/pg_authid.h
@@ -44,20 +44,41 @@
 
 CATALOG(pg_authid,1260) BKI_SHARED_RELATION BKI_ROWTYPE_OID(2842) BKI_SCHEMA_MACRO
 {
-	NameData	rolname;		/* name of role */
-	bool		rolsuper;		/* read this field via superuser() only! */
-	bool		rolinherit;		/* inherit privileges from other roles? */
-	bool		rolcreaterole;	/* allowed to create more roles? */
-	bool		rolcreatedb;	/* allowed to create databases? */
-	bool		rolcanlogin;	/* allowed to log in as session user? */
-	bool		rolreplication; /* role used for streaming replication */
-	bool		rolbypassrls;	/* bypasses row level security? */
-	int32		rolconnlimit;	/* max connections allowed (-1=no limit) */
+	/* name of role */
+	NameData	rolname;
+
+	/* read this field via superuser() only! */
+	bool		rolsuper BKI_DEFAULT(f);
+
+	/* inherit privileges from other roles? */
+	bool		rolinherit BKI_DEFAULT(t);
+
+	/* allowed to create more roles? */
+	bool		rolcreaterole BKI_DEFAULT(f);
+
+	/* allowed to create databases? */
+	bool		rolcreatedb BKI_DEFAULT(f);
+
+	/* allowed to log in as session user? */
+	bool		rolcanlogin BKI_DEFAULT(f);
+
+	/* role used for streaming replication */
+	bool		rolreplication BKI_DEFAULT(f);
+
+	/* bypasses row level security? */
+	bool		rolbypassrls BKI_DEFAULT(f);
+
+	/* max connections allowed (-1=no limit) */
+	int32		rolconnlimit BKI_DEFAULT(-1);
 
 	/* remaining fields may be null; use heap_getattr to read them! */
 #ifdef CATALOG_VARLEN			/* variable-length fields start here */
-	text		rolpassword;	/* password, if any */
-	timestamptz rolvaliduntil;	/* password expiration time, if any */
+
+	/* password, if any */
+	text		rolpassword BKI_DEFAULT(_null_);
+
+	/* password expiration time, if any */
+	timestamptz rolvaliduntil BKI_DEFAULT(_null_);
 #endif
 } FormData_pg_authid;
 
diff --git a/src/include/catalog/pg_class.h b/src/include/catalog/pg_class.h
index b256657..f8ff6d2 100644
--- a/src/include/catalog/pg_class.h
+++ b/src/include/catalog/pg_class.h
@@ -31,55 +31,117 @@
 
 CATALOG(pg_class,1259) BKI_BOOTSTRAP BKI_ROWTYPE_OID(83) BKI_SCHEMA_MACRO
 {
-	NameData	relname;		/* class name */
-	Oid			relnamespace;	/* OID of namespace containing this class */
-	Oid			reltype;		/* OID of entry in pg_type for table's
-								 * implicit row type */
-	Oid			reloftype;		/* OID of entry in pg_type for underlying
-								 * composite type */
-	Oid			relowner;		/* class owner */
-	Oid			relam;			/* index access method; 0 if not an index */
-	Oid			relfilenode;	/* identifier of physical storage file */
+	/* class name */
+	NameData	relname;
+
+	/* OID of namespace containing this class */
+	Oid			relnamespace BKI_DEFAULT(PGNSP);
+
+	/* OID of entry in pg_type for table's implicit row type */
+	Oid			reltype;
+
+	/* OID of entry in pg_type for underlying composite type */
+	Oid			reloftype BKI_DEFAULT(0);
+
+	/* class owner */
+	Oid			relowner BKI_DEFAULT(PGUID);
+
+	/* index access method; 0 if not an index */
+	Oid			relam BKI_DEFAULT(0);
+
+	/* identifier of physical storage file */
+	Oid			relfilenode BKI_DEFAULT(0);
 
 	/* relfilenode == 0 means it is a "mapped" relation, see relmapper.c */
-	Oid			reltablespace;	/* identifier of table space for relation */
-	int32		relpages;		/* # of blocks (not always up-to-date) */
-	float4		reltuples;		/* # of tuples (not always up-to-date) */
-	int32		relallvisible;	/* # of all-visible blocks (not always
-								 * up-to-date) */
-	Oid			reltoastrelid;	/* OID of toast table; 0 if none */
-	bool		relhasindex;	/* T if has (or has had) any indexes */
-	bool		relisshared;	/* T if shared across databases */
-	char		relpersistence; /* see RELPERSISTENCE_xxx constants below */
-	char		relkind;		/* see RELKIND_xxx constants below */
-	int16		relnatts;		/* number of user attributes */
+
+	/* identifier of table space for relation */
+	Oid			reltablespace BKI_DEFAULT(0);
+
+	/* # of blocks (not always up-to-date) */
+	int32		relpages BKI_DEFAULT(0);
+
+	/* # of tuples (not always up-to-date) */
+	float4		reltuples BKI_DEFAULT(0);
+
+	/* # of all-visible blocks (not always up-to-date) */
+	int32		relallvisible BKI_DEFAULT(0);
+
+	/* OID of toast table; 0 if none */
+	Oid			reltoastrelid BKI_DEFAULT(0);
+
+	/* T if has (or has had) any indexes */
+	bool		relhasindex BKI_DEFAULT(f);
+
+	/* T if shared across databases */
+	bool		relisshared BKI_DEFAULT(f);
+
+	/* see RELPERSISTENCE_xxx constants below */
+	char		relpersistence BKI_DEFAULT(p);
+
+	/* see RELKIND_xxx constants below */
+	char		relkind BKI_DEFAULT(r);
+
+	/* number of user attributes */
+	int16		relnatts;
 
 	/*
 	 * Class pg_attribute must contain exactly "relnatts" user attributes
 	 * (with attnums ranging from 1 to relnatts) for this class.  It may also
 	 * contain entries with negative attnums for system attributes.
 	 */
-	int16		relchecks;		/* # of CHECK constraints for class */
-	bool		relhasoids;		/* T if we generate OIDs for rows of rel */
-	bool		relhaspkey;		/* has (or has had) PRIMARY KEY index */
-	bool		relhasrules;	/* has (or has had) any rules */
-	bool		relhastriggers; /* has (or has had) any TRIGGERs */
-	bool		relhassubclass; /* has (or has had) derived classes */
-	bool		relrowsecurity; /* row security is enabled or not */
-	bool		relforcerowsecurity;	/* row security forced for owners or
-										 * not */
-	bool		relispopulated; /* matview currently holds query results */
-	char		relreplident;	/* see REPLICA_IDENTITY_xxx constants  */
-	bool		relispartition; /* is relation a partition? */
-	TransactionId relfrozenxid; /* all Xids < this are frozen in this rel */
-	TransactionId relminmxid;	/* all multixacts in this rel are >= this.
-								 * this is really a MultiXactId */
+
+	/* # of CHECK constraints for class */
+	int16		relchecks BKI_DEFAULT(0);
+
+	/* T if we generate OIDs for rows of rel */
+	bool		relhasoids;
+
+	/* has (or has had) PRIMARY KEY index */
+	bool		relhaspkey BKI_DEFAULT(f);
+
+	/* has (or has had) any rules */
+	bool		relhasrules BKI_DEFAULT(f);
+
+	/* has (or has had) any TRIGGERs */
+	bool		relhastriggers BKI_DEFAULT(f);
+
+	/* has (or has had) derived classes */
+	bool		relhassubclass BKI_DEFAULT(f);
+
+	/* row security is enabled or not */
+	bool		relrowsecurity BKI_DEFAULT(f);
+
+	/* row security forced for owners or not */
+	bool		relforcerowsecurity BKI_DEFAULT(f);
+
+	/* matview currently holds query results */
+	bool		relispopulated BKI_DEFAULT(t);
+
+	/* see REPLICA_IDENTITY_xxx constants  */
+	char		relreplident BKI_DEFAULT(n);
+
+	/* is relation a partition? */
+	bool		relispartition BKI_DEFAULT(f);
+
+	/* all Xids < this are frozen in this rel */
+	/* Note: "3" stands for FirstNormalTransactionId */
+	TransactionId relfrozenxid BKI_DEFAULT(3);
+
+	/* all multixacts in this rel are >= this. This is really a MultiXactId */
+	/* Note: "1" stands for FirstMultiXactId */
+	TransactionId relminmxid BKI_DEFAULT(1);
 
 #ifdef CATALOG_VARLEN			/* variable-length fields start here */
 	/* NOTE: These fields are not present in a relcache entry's rd_rel field. */
-	aclitem		relacl[1];		/* access permissions */
-	text		reloptions[1];	/* access-method-specific options */
-	pg_node_tree relpartbound;	/* partition bound node tree */
+
+	/* access permissions */
+	aclitem		relacl[1] BKI_DEFAULT(_null_);
+
+	/* access-method-specific options */
+	text		reloptions[1] BKI_DEFAULT(_null_);
+
+	/* partition bound node tree */
+	pg_node_tree relpartbound BKI_DEFAULT(_null_);
 #endif
 } FormData_pg_class;
 
diff --git a/src/include/catalog/pg_opclass.h b/src/include/catalog/pg_opclass.h
index 28dbc74..7c31622 100644
--- a/src/include/catalog/pg_opclass.h
+++ b/src/include/catalog/pg_opclass.h
@@ -52,12 +52,21 @@ CATALOG(pg_opclass,2616)
 {
 	Oid			opcmethod;		/* index access method opclass is for */
 	NameData	opcname;		/* name of this opclass */
-	Oid			opcnamespace;	/* namespace of this opclass */
-	Oid			opcowner;		/* opclass owner */
+
+	/* namespace of this opclass */
+	Oid			opcnamespace BKI_DEFAULT(PGNSP);
+
+	/* opclass owner */
+	Oid			opcowner BKI_DEFAULT(PGUID);
+
 	Oid			opcfamily;		/* containing operator family */
 	Oid			opcintype;		/* type of data indexed by opclass */
-	bool		opcdefault;		/* T if opclass is default for opcintype */
-	Oid			opckeytype;		/* type of data in index, or InvalidOid */
+
+	/* T if opclass is default for opcintype */
+	bool		opcdefault BKI_DEFAULT(t);
+
+	/* type of data in index, or InvalidOid */
+	Oid			opckeytype BKI_DEFAULT(0);
 } FormData_pg_opclass;
 
 /* ----------------
diff --git a/src/include/catalog/pg_operator.h b/src/include/catalog/pg_operator.h
index ff9b470..c94186a 100644
--- a/src/include/catalog/pg_operator.h
+++ b/src/include/catalog/pg_operator.h
@@ -33,20 +33,48 @@
 
 CATALOG(pg_operator,2617)
 {
-	NameData	oprname;		/* name of operator */
-	Oid			oprnamespace;	/* OID of namespace containing this oper */
-	Oid			oprowner;		/* operator owner */
-	char		oprkind;		/* 'l', 'r', or 'b' */
-	bool		oprcanmerge;	/* can be used in merge join? */
-	bool		oprcanhash;		/* can be used in hash join? */
-	Oid			oprleft;		/* left arg type, or 0 if 'l' oprkind */
-	Oid			oprright;		/* right arg type, or 0 if 'r' oprkind */
-	Oid			oprresult;		/* result datatype */
-	Oid			oprcom;			/* OID of commutator oper, or 0 if none */
-	Oid			oprnegate;		/* OID of negator oper, or 0 if none */
-	regproc		oprcode;		/* OID of underlying function */
-	regproc		oprrest;		/* OID of restriction estimator, or 0 */
-	regproc		oprjoin;		/* OID of join estimator, or 0 */
+
+	/* name of operator */
+	NameData	oprname;
+
+	/* OID of namespace containing this oper */
+	Oid			oprnamespace BKI_DEFAULT(PGNSP);
+
+	/* operator owner */
+	Oid			oprowner BKI_DEFAULT(PGUID);
+
+	/* 'l', 'r', or 'b' */
+	char		oprkind BKI_DEFAULT(b);
+
+	/* can be used in merge join? */
+	bool		oprcanmerge BKI_DEFAULT(f);
+
+	/* can be used in hash join? */
+	bool		oprcanhash BKI_DEFAULT(f);
+
+	/* left arg type, or 0 if 'l' oprkind */
+	Oid			oprleft;
+
+	/* right arg type, or 0 if 'r' oprkind */
+	Oid			oprright;
+
+	/* result datatype */
+	Oid			oprresult;
+
+	/* OID of commutator oper, or 0 if none */
+	Oid			oprcom BKI_DEFAULT(0);
+
+	/* OID of negator oper, or 0 if none */
+	Oid			oprnegate BKI_DEFAULT(0);
+
+	/* OID of underlying function */
+	regproc		oprcode;
+
+	/* OID of restriction estimator, or 0 */
+	regproc		oprrest BKI_DEFAULT(-);
+
+	/* OID of join estimator, or 0 */
+	regproc		oprjoin BKI_DEFAULT(-);
 } FormData_pg_operator;
 
 /* ----------------
diff --git a/src/include/catalog/pg_opfamily.h b/src/include/catalog/pg_opfamily.h
index 0d0ba7c..39bb8cd 100644
--- a/src/include/catalog/pg_opfamily.h
+++ b/src/include/catalog/pg_opfamily.h
@@ -32,8 +32,12 @@ CATALOG(pg_opfamily,2753)
 {
 	Oid			opfmethod;		/* index access method opfamily is for */
 	NameData	opfname;		/* name of this opfamily */
-	Oid			opfnamespace;	/* namespace of this opfamily */
-	Oid			opfowner;		/* opfamily owner */
+
+	/* namespace of this opfamily */
+	Oid			opfnamespace BKI_DEFAULT(PGNSP);
+
+	/* opfamily owner */
+	Oid			opfowner BKI_DEFAULT(PGUID);
 } FormData_pg_opfamily;
 
 /* ----------------
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h
index c969375..4b2cf32 100644
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -35,43 +35,99 @@
 
 CATALOG(pg_proc,1255) BKI_BOOTSTRAP BKI_ROWTYPE_OID(81) BKI_SCHEMA_MACRO
 {
-	NameData	proname;		/* procedure name */
-	Oid			pronamespace;	/* OID of namespace containing this proc */
-	Oid			proowner;		/* procedure owner */
-	Oid			prolang;		/* OID of pg_language entry */
-	float4		procost;		/* estimated execution cost */
-	float4		prorows;		/* estimated # of rows out (if proretset) */
-	Oid			provariadic;	/* element type of variadic array, or 0 */
-	regproc		protransform;	/* transforms calls to it during planning */
-	bool		proisagg;		/* is it an aggregate? */
-	bool		proiswindow;	/* is it a window function? */
-	bool		prosecdef;		/* security definer */
-	bool		proleakproof;	/* is it a leak-proof function? */
-	bool		proisstrict;	/* strict with respect to NULLs? */
-	bool		proretset;		/* returns a set? */
-	char		provolatile;	/* see PROVOLATILE_ categories below */
-	char		proparallel;	/* see PROPARALLEL_ categories below */
-	int16		pronargs;		/* number of arguments */
-	int16		pronargdefaults;	/* number of arguments with defaults */
-	Oid			prorettype;		/* OID of result type */
+	/* procedure name */
+	NameData	proname BKI_ABBREV(n);
+
+	/* OID of namespace containing this proc */
+	Oid			pronamespace BKI_DEFAULT(PGNSP);
+
+	/* procedure owner */
+	Oid			proowner BKI_DEFAULT(PGUID);
+
+	/* OID of pg_language entry */
+	Oid			prolang BKI_DEFAULT(12);
+
+	/* estimated execution cost */
+	float4		procost BKI_DEFAULT(1);
+
+	/* estimated # of rows out (if proretset) */
+	float4		prorows BKI_DEFAULT(0);
+
+	/* element type of variadic array, or 0 */
+	Oid			provariadic BKI_DEFAULT(0);
+
+	/* transforms calls to it during planning */
+	regproc		protransform BKI_DEFAULT(0);
+
+	/* is it an aggregate? */
+	bool		proisagg BKI_DEFAULT(f);
+
+	/* is it a window function? */
+	bool		proiswindow BKI_DEFAULT(f);
+
+	/* security definer */
+	bool		prosecdef BKI_DEFAULT(f);
+
+	/* is it a leak-proof function? */
+	bool		proleakproof BKI_ABBREV(lp) BKI_DEFAULT(f);
+
+	/* strict with respect to NULLs? */
+	bool		proisstrict BKI_DEFAULT(t);
+
+	/* returns a set? */
+	bool		proretset BKI_DEFAULT(f);
+
+	/* see PROVOLATILE_ categories below */
+	char		provolatile BKI_ABBREV(v) BKI_DEFAULT(i);
+
+	/* see PROPARALLEL_ categories below */
+	char		proparallel BKI_ABBREV(p) BKI_DEFAULT(u);
+
+	/* number of arguments */
+	int16		pronargs;
+
+	/* number of arguments with defaults */
+	int16		pronargdefaults BKI_DEFAULT(0);
+
+	/* OID of result type */
+	Oid			prorettype BKI_ABBREV(rt);
 
 	/*
 	 * variable-length fields start here, but we allow direct access to
 	 * proargtypes
 	 */
-	oidvector	proargtypes;	/* parameter types (excludes OUT params) */
+
+	/* parameter types (excludes OUT params) */
+	oidvector	proargtypes BKI_ABBREV(at);
 
 #ifdef CATALOG_VARLEN
-	Oid			proallargtypes[1];	/* all param types (NULL if IN only) */
-	char		proargmodes[1]; /* parameter modes (NULL if IN only) */
-	text		proargnames[1]; /* parameter names (NULL if no names) */
-	pg_node_tree proargdefaults;	/* list of expression trees for argument
-									 * defaults (NULL if none) */
-	Oid			protrftypes[1]; /* types for which to apply transforms */
-	text		prosrc BKI_FORCE_NOT_NULL;	/* procedure source text */
-	text		probin;			/* secondary procedure info (can be NULL) */
-	text		proconfig[1];	/* procedure-local GUC settings */
-	aclitem		proacl[1];		/* access permissions */
+
+	/* all param types (NULL if IN only) */
+	Oid			proallargtypes[1] BKI_DEFAULT(_null_);
+
+	/* parameter modes (NULL if IN only) */
+	char		proargmodes[1] BKI_DEFAULT(_null_);
+
+	/* parameter names (NULL if no names) */
+	text		proargnames[1] BKI_DEFAULT(_null_);
+
+	/* list of expression trees for argument defaults (NULL if none) */
+	pg_node_tree proargdefaults BKI_DEFAULT(_null_);
+
+	/* types for which to apply transforms */
+	Oid			protrftypes[1] BKI_DEFAULT(_null_);
+
+	/* procedure source text */
+	text		prosrc BKI_ABBREV(s) BKI_FORCE_NOT_NULL;
+
+	/* secondary procedure info (can be NULL) */
+	text		probin BKI_DEFAULT(_null_);
+
+	/* procedure-local GUC settings */
+	text		proconfig[1] BKI_DEFAULT(_null_);
+
+	/* access permissions */
+	aclitem		proacl[1] BKI_DEFAULT(_null_);
 #endif
 } FormData_pg_proc;
 
diff --git a/src/include/catalog/pg_type.h b/src/include/catalog/pg_type.h
index e355144..1039c1b 100644
--- a/src/include/catalog/pg_type.h
+++ b/src/include/catalog/pg_type.h
@@ -37,8 +37,12 @@
 CATALOG(pg_type,1247) BKI_BOOTSTRAP BKI_ROWTYPE_OID(71) BKI_SCHEMA_MACRO
 {
 	NameData	typname;		/* type name */
-	Oid			typnamespace;	/* OID of namespace containing this type */
-	Oid			typowner;		/* type owner */
+
+	/* OID of namespace containing this type */
+	Oid			typnamespace BKI_DEFAULT(PGNSP);
+
+	/* type owner */
+	Oid			typowner BKI_DEFAULT(PGUID);
 
 	/*
 	 * For a fixed-size type, typlen is the number of bytes we use to
@@ -66,7 +70,7 @@ CATALOG(pg_type,1247) BKI_BOOTSTRAP BKI_ROWTYPE_OID(71) BKI_SCHEMA_MACRO
 	 *
 	 * If typtype is 'c', typrelid is the OID of the class' entry in pg_class.
 	 */
-	char		typtype;
+	char		typtype BKI_DEFAULT(b);
 
 	/*
 	 * typcategory and typispreferred help the parser distinguish preferred
@@ -76,17 +80,20 @@ CATALOG(pg_type,1247) BKI_BOOTSTRAP BKI_ROWTYPE_OID(71) BKI_SCHEMA_MACRO
 	 */
 	char		typcategory;	/* arbitrary type classification */
 
-	bool		typispreferred; /* is type "preferred" within its category? */
+	/* is type "preferred" within its category? */
+	bool		typispreferred BKI_DEFAULT(f);
 
 	/*
 	 * If typisdefined is false, the entry is only a placeholder (forward
 	 * reference).  We know the type name, but not yet anything else about it.
 	 */
-	bool		typisdefined;
+	bool		typisdefined BKI_DEFAULT(t);
 
-	char		typdelim;		/* delimiter for arrays of this type */
+	/* delimiter for arrays of this type */
+	char		typdelim BKI_DEFAULT(\054);
 
-	Oid			typrelid;		/* 0 if not a composite type */
+	/* 0 if not a composite type */
+	Oid			typrelid BKI_DEFAULT(0);
 
 	/*
 	 * If typelem is not 0 then it identifies another row in pg_type. The
@@ -99,7 +106,7 @@ CATALOG(pg_type,1247) BKI_BOOTSTRAP BKI_ROWTYPE_OID(71) BKI_SCHEMA_MACRO
 	 *
 	 * typelem != 0 and typlen == -1.
 	 */
-	Oid			typelem;
+	Oid			typelem BKI_DEFAULT(0);
 
 	/*
 	 * If there is a "true" array type having this type as element type,
@@ -118,13 +125,13 @@ CATALOG(pg_type,1247) BKI_BOOTSTRAP BKI_ROWTYPE_OID(71) BKI_SCHEMA_MACRO
 	/*
 	 * I/O functions for optional type modifiers.
 	 */
-	regproc		typmodin;
-	regproc		typmodout;
+	regproc		typmodin BKI_DEFAULT(-);
+	regproc		typmodout BKI_DEFAULT(-);
 
 	/*
 	 * Custom ANALYZE procedure for the datatype (0 selects the default).
 	 */
-	regproc		typanalyze;
+	regproc		typanalyze BKI_DEFAULT(-);
 
 	/* ----------------
 	 * typalign is the alignment required when storing a value of this
@@ -162,7 +169,7 @@ CATALOG(pg_type,1247) BKI_BOOTSTRAP BKI_ROWTYPE_OID(71) BKI_SCHEMA_MACRO
 	 * 'm' MAIN		  like 'x' but try to keep in main tuple
 	 * ----------------
 	 */
-	char		typstorage;
+	char		typstorage BKI_DEFAULT(p);
 
 	/*
 	 * This flag represents a "NOT NULL" constraint against this datatype.
@@ -172,32 +179,32 @@ CATALOG(pg_type,1247) BKI_BOOTSTRAP BKI_ROWTYPE_OID(71) BKI_SCHEMA_MACRO
 	 *
 	 * Used primarily for domain types.
 	 */
-	bool		typnotnull;
+	bool		typnotnull BKI_DEFAULT(f);
 
 	/*
 	 * Domains use typbasetype to show the base (or domain) type that the
 	 * domain is based on.  Zero if the type is not a domain.
 	 */
-	Oid			typbasetype;
+	Oid			typbasetype BKI_DEFAULT(0);
 
 	/*
 	 * Domains use typtypmod to record the typmod to be applied to their base
 	 * type (-1 if base type does not use a typmod).  -1 if this type is not a
 	 * domain.
 	 */
-	int32		typtypmod;
+	int32		typtypmod BKI_DEFAULT(-1);
 
 	/*
 	 * typndims is the declared number of dimensions for an array domain type
 	 * (i.e., typbasetype is an array type).  Otherwise zero.
 	 */
-	int32		typndims;
+	int32		typndims BKI_DEFAULT(0);
 
 	/*
 	 * Collation: 0 if type cannot use collations, DEFAULT_COLLATION_OID for
 	 * collatable base types, possibly other OID for domains
 	 */
-	Oid			typcollation;
+	Oid			typcollation BKI_DEFAULT(0);
 
 #ifdef CATALOG_VARLEN			/* variable-length fields start here */
 
@@ -206,7 +213,7 @@ CATALOG(pg_type,1247) BKI_BOOTSTRAP BKI_ROWTYPE_OID(71) BKI_SCHEMA_MACRO
 	 * a default expression for the type.  Currently this is only used for
 	 * domains.
 	 */
-	pg_node_tree typdefaultbin;
+	pg_node_tree typdefaultbin BKI_DEFAULT(_null_);
 
 	/*
 	 * typdefault is NULL if the type has no associated default value. If
@@ -216,12 +223,12 @@ CATALOG(pg_type,1247) BKI_BOOTSTRAP BKI_ROWTYPE_OID(71) BKI_SCHEMA_MACRO
 	 * external representation of the type's default value, which may be fed
 	 * to the type's input converter to produce a constant.
 	 */
-	text		typdefault;
+	text		typdefault BKI_DEFAULT(_null_);
 
 	/*
 	 * Access permissions
 	 */
-	aclitem		typacl[1];
+	aclitem		typacl[1] BKI_DEFAULT(_null_);
 #endif
 } FormData_pg_type;
 
diff --git a/src/include/catalog/rewrite_dat.pl b/src/include/catalog/rewrite_dat.pl
new file mode 100644
index 0000000..79f3b83
--- /dev/null
+++ b/src/include/catalog/rewrite_dat.pl
@@ -0,0 +1,265 @@
+#!/usr/bin/perl -w
+#----------------------------------------------------------------------
+#
+# rewrite_dat.pl
+#    Perl script that reads in a catalog data file and writes out
+#    a functionally equivalent file in a standard format.
+#
+#    -Metadata fields are on their own line
+#    -Fields are in the same order they would be in the catalog table
+#    -Default values and computed values for the catalog are left out.
+#    -Column abbreviations are used if available.
+#
+# Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
+# Portions Copyright (c) 1994, Regents of the University of California
+#
+# /src/include/catalog/rewrite_dat.pl
+#
+#----------------------------------------------------------------------
+
+use Catalog;
+
+use strict;
+use warnings;
+
+my @input_files;
+my $output_path = '';
+my $expand_tuples = 0;
+
+# Process command line switches.
+while (@ARGV)
+{
+	my $arg = shift @ARGV;
+	if ($arg !~ /^-/)
+	{
+		push @input_files, $arg;
+	}
+	elsif ($arg =~ /^-o/)
+	{
+		$output_path = length($arg) > 2 ? substr($arg, 2) : shift @ARGV;
+	}
+	elsif ($arg eq '--revert')
+	{
+		revert();
+	}
+	elsif ($arg eq '--expand')
+	{
+		$expand_tuples = 1;
+	}
+	else
+	{
+		usage();
+	}
+}
+
+# Sanity check arguments.
+die "No input files.\n"
+  if !@input_files;
+
+# Make sure output_path ends in a slash.
+if ($output_path ne '' && substr($output_path, -1) ne '/')
+{
+	$output_path .= '/';
+}
+
+# Metadata of a catalog entry
+my @metafields = ('oid', 'descr', 'shdescr');
+
+# Read all the input files into internal data structures.
+# We pass data file names as arguments and then look for matching
+# headers to parse the schema from.
+foreach my $datfile (@input_files)
+{
+	$datfile =~ /(.+)\.dat$/
+	  or die "Input files need to be data (.dat) files.\n";
+
+	my $header = "$1.h";
+	die "There in no header file corresponding to $datfile"
+	  if ! -e $header;
+
+	my @attnames;
+	my $catalog = Catalog::ParseHeader($header);
+	my $catname = $catalog->{catname};
+	my $schema  = $catalog->{columns};
+
+	foreach my $column (@$schema)
+	{
+		my $attname;
+
+		# Use abbreviations where available, unless we're writing
+		# full tuples.
+		if (exists $column->{abbrev} and !$expand_tuples)
+		{
+			$attname = $column->{abbrev};
+		}
+		else
+		{
+			$attname = $column->{name};
+		}
+		push @attnames, $attname;
+	}
+
+	my $catalog_data = Catalog::ParseData($datfile, $schema, 1);
+	next if !defined $catalog_data;
+
+	# Back up old data file rather than overwrite it.
+	# We don't assume the input path and output path are the same,
+	# but they can be.
+	my $newdatfile = "$output_path$catname.dat";
+	if (-e $newdatfile)
+	{
+		rename($newdatfile, $newdatfile . '.bak')
+		  or die "rename: $newdatfile: $!";
+	}
+	open my $dat, '>', $newdatfile
+	  or die "can't open $newdatfile: $!";
+
+	# Write the data.
+	foreach my $data (@$catalog_data)
+	{
+		# Either a newline, comment, or bracket - just write it out.
+		if (! ref $data)
+		{
+			print $dat "$data\n";
+		}
+		# Hash ref representing a data entry.
+		elsif (ref $data eq 'HASH')
+		{
+			my %values = %$data;
+			print $dat "{ ";
+
+			if (!$expand_tuples)
+			{
+				# Write out tuples in a compact representation.
+				# We must do the following operations in the order given.
+				strip_default_values(\%values, $schema, $catname);
+				if ($catname eq 'pg_proc')
+				{
+					delete $values{pronargs};
+					if ($values{prosrc} eq $values{proname})
+					{
+						delete $values{prosrc};
+					}
+				}
+				add_column_abbrevs(\%values, $schema);
+			}
+
+			# Separate out metadata fields for readability.
+			my $metadata_line = format_line(\%values, @metafields);
+			if ($metadata_line)
+			{
+				print $dat $metadata_line;
+				print $dat ",\n";
+			}
+			my $data_line = format_line(\%values, @attnames);
+
+			# Line up with metadata line, if there is one.
+			if ($metadata_line)
+			{
+				print $dat '  ';
+			}
+			print $dat $data_line;
+			print $dat " },\n";
+		}
+		else
+		{
+			die "Unexpected data type";
+		}
+	}
+}
+
+sub strip_default_values
+{
+	my ($row, $schema, $catname) = @_;
+
+	foreach my $column (@$schema)
+	{
+		my $attname = $column->{name};
+		die "No value for $catname.$attname\n"
+		  if ! defined $row->{$attname};
+
+		# Delete values that match defaults.
+		if (defined $column->{default}
+			and ($row->{$attname} eq $column->{default}))
+		{
+			delete $row->{$attname};
+		}
+	}
+}
+
+sub add_column_abbrevs
+{
+	my $row    = shift;
+	my $schema = shift;
+
+	foreach my $column (@$schema)
+	{
+		my $abbrev  = $column->{abbrev};
+		my $attname = $column->{name};
+		if (defined $abbrev and exists $row->{$attname})
+		{
+			$row->{$abbrev} = $row->{$attname};
+		}
+	}
+}
+
+sub format_line
+{
+	my $data = shift;
+	my @atts = @_;
+
+	my $first = 1;
+	my $value;
+	my $line = '';
+
+	foreach my $field (@atts)
+	{
+		next if !defined $data->{$field};
+		$value = $data->{$field};
+
+		# Re-escape single quotes.
+		$value =~ s/'/\\'/g;
+
+		if (!$first)
+		{
+			$line .= ', ';
+		}
+		$first = 0;
+
+		$line .= "$field => '$value'";
+	}
+	return $line;
+}
+
+# Rename .bak files back to .dat
+# This requires passing the .dat files as arguments to the script as normal.
+sub revert
+{
+	foreach my $datfile (@input_files)
+	{
+		my $bakfile = "$datfile.bak";
+		if (-e $bakfile)
+		{
+			rename($bakfile, $datfile) or die "rename: $bakfile: $!";
+		}
+	}
+	exit 0;
+}
+
+sub usage
+{
+	die <<EOM;
+Usage: rewrite_dat.pl [options] datafile...
+
+Options:
+    -o               output path
+    --revert         rename .bak files back to .dat
+    --expand         write out full tuples
+
+Expects a list of .dat files as arguments.
+
+Make sure location of Catalog.pm is passed to the perl interpreter:
+perl -I /path/to/Catalog.pm/ ...
+
+EOM
+}
-- 
2.7.4

