From 7a58861be67b783cd9e440ae8a8863e1ca741243 Mon Sep 17 00:00:00 2001
From: John Naylor <jcnaylor@gmail.com>
Date: Mon, 25 Dec 2017 16:24:22 +0700
Subject: [PATCH v5 08/13] Implement data compaction strategies

Add the ability to label columns in the source data by an abbreviation
rather than the full name, in order to shorten the entries. Add default
values and abbreviations to a few catalog headers. More could be done here,
but this is enough for a first pass.

Compute pg_proc.pronargs and (if not specified) pg_proc.prosrc and pg_type
OID symbols, rather than storing directly.
---
 src/backend/catalog/Catalog.pm     |  84 ++++++++++++++++++++++
 src/backend/catalog/README         |  34 +++++++--
 src/include/catalog/genbki.h       |   3 +
 src/include/catalog/pg_aggregate.h |  38 +++++-----
 src/include/catalog/pg_amop.h      |  31 ++++++---
 src/include/catalog/pg_amproc.h    |  19 +++--
 src/include/catalog/pg_authid.h    |  43 +++++++++---
 src/include/catalog/pg_class.dat   |   2 -
 src/include/catalog/pg_class.h     | 138 +++++++++++++++++++++++++++----------
 src/include/catalog/pg_opclass.h   |  17 +++--
 src/include/catalog/pg_operator.h  |  56 +++++++++++----
 src/include/catalog/pg_opfamily.h  |   8 ++-
 src/include/catalog/pg_proc.h      | 116 +++++++++++++++++++++++--------
 src/include/catalog/pg_type.dat    |   6 ++
 src/include/catalog/pg_type.h      |  47 +++++++------
 src/include/catalog/rewrite_dat.pl |  80 ++++++++++++++++++++-
 16 files changed, 562 insertions(+), 160 deletions(-)

diff --git a/src/backend/catalog/Catalog.pm b/src/backend/catalog/Catalog.pm
index 9d37674..a8a406a 100644
--- a/src/backend/catalog/Catalog.pm
+++ b/src/backend/catalog/Catalog.pm
@@ -156,6 +156,10 @@ sub ParseHeader
 						{
 							$column{default} = $1;
 						}
+						elsif ($attopt =~ /BKI_ABBREV\((\S+)\)/)
+						{
+							$column{abbrev} = $1;
+						}
 						else
 						{
 							die
@@ -229,6 +233,28 @@ sub ParseData
 				{
 					die "Error parsing $_\n$!";
 				}
+
+				# Expand tuples to their full representation.
+				# We must do the following operations in the order given.
+				resolve_column_abbrevs($datum, $schema);
+
+				if ($catname eq 'pg_proc')
+				{
+					compute_pg_proc_fields($datum);
+				}
+				elsif ($catname eq 'pg_type' and !exists $datum->{oid_symbol})
+				{
+					my $symbol = GetPgTypeSymbol($datum->{typname});
+					$datum->{oid_symbol} = $symbol
+					  if defined $symbol;
+				}
+
+				my $error = AddDefaultValues($datum, $schema);
+				if ($error)
+				{
+					print "Failed to form full tuple for $catname\n";
+					die $error;
+				}
 			}
 			else
 			{
@@ -263,6 +289,23 @@ sub ParseData
 	return $data;
 }
 
+# Copy values from abbreviated keys to full keys.
+sub resolve_column_abbrevs
+{
+	my $row    = shift;
+	my $schema = shift;
+
+	foreach my $column (@$schema)
+	{
+		my $abbrev  = $column->{abbrev};
+		my $attname = $column->{name};
+		if (defined $abbrev and defined $row->{$abbrev})
+		{
+			$row->{$attname} = $row->{$abbrev};
+		}
+	}
+}
+
 # Fill in default values of a record using the given schema. It's the
 # caller's responsibility to specify other values beforehand.
 sub AddDefaultValues
@@ -303,6 +346,47 @@ sub AddDefaultValues
 	return $msg;
 }
 
+# Compute certain pg_proc fields from others.
+sub compute_pg_proc_fields
+{
+	my $row = shift;
+
+	# pronargs is computed by counting proargtypes.
+	if ($row->{proargtypes})
+	{
+		my @argtypes = split /\s+/, $row->{proargtypes};
+		$row->{pronargs} = scalar(@argtypes);
+	}
+	else
+	{
+		$row->{pronargs} = '0';
+	}
+
+	# If prosrc doesn't exist, it must be a copy of proname.
+	if (!exists $row->{prosrc})
+	{
+		$row->{prosrc} = $row->{proname}
+	}
+}
+
+# Determine canonical pg_type OID #define symbol from the type name.
+sub GetPgTypeSymbol
+{
+	my $typename = shift;
+
+	# Skip for rowtypes of bootstrap tables.
+	return
+	  if $typename eq 'pg_type'
+	    or $typename eq 'pg_proc'
+	    or $typename eq 'pg_attribute'
+	    or $typename eq 'pg_class';
+
+	$typename =~ /(_)?(.+)/;
+	my $arraystr = $1 ? 'ARRAY' : '';
+	my $name = uc $2;
+	return $name . $arraystr . 'OID';
+}
+
 # Rename temporary files to final names.
 # Call this function with the final file name and the .tmp extension
 # Note: recommended extension is ".tmp$$", so that parallel make steps
diff --git a/src/backend/catalog/README b/src/backend/catalog/README
index 7b849a4..3b2cef6 100644
--- a/src/backend/catalog/README
+++ b/src/backend/catalog/README
@@ -59,6 +59,28 @@ quotes, since we don't know what kind of characters will be substituted.
 within the curly brackets.  This is done automatically during rewriting
 so don't worry about their placement during development.
 
+- Some techniques are used to keep the data representation compact.
+These are automatically enforced by rewrite_dat.pl, but you should be
+aware of them.  pg_proc.dat uses all three of them in an attempt to keep
+the file manageable:
+1. If the .h file specifies a default value for a column, and a data entry
+has that same value, it will be ommitted from the data file.
+2. Likewise, some values could be computed from other values, so are also
+left out.
+3. If the .h file specifies a column abbeviation, then it will be used as
+the hash key in the data entry.
+
+- If you want to add a new default value or abbreviation, you must
+change the relevant .h file to use the new default/abbreviation, and
+then run "perl -I ../../backend/catalog rewrite_dat.pl pg_foo.dat".
+If you want to change an existing default value or abbreviation, you must
+first run rewrite_dat.pl with the "--expand" argument before proceeding
+as above.
+
+-If you want to add a new method of making the data representation
+smaller, you must implement it in rewrite_dat.pl and also teach
+Catalog::ParseData() how to expand the data back into the full representation.
+
 - Some catalogs require that OIDs be preallocated to tuples because
 of cross-references from other pre-loaded tuples.  For example, pg_type
 contains pointers into pg_proc (e.g., pg_type.typinput), and pg_proc
@@ -75,12 +97,12 @@ some of them are actually cross-referenced.
 known directly in the C code.  In such cases, put an 'oid_symbol' entry in
 the catalog's data file, and use the #define symbol in the C code.  Writing
 the actual numeric value of any OID in C code is considered very bad form.
-Direct references to pg_proc OIDs are common enough that there's a special
-mechanism to create the necessary #define's automatically: see
-backend/utils/Gen_fmgrtab.pl.  We also have standard conventions for setting
-up #define's for the pg_class OIDs of system catalogs and indexes.  For all
-the other system catalogs, you have to manually create any #define's you
-need.
+Direct references to pg_type and pg_proc OIDs are common enough that there's
+a special mechanism to create the necessary #define's automatically:
+see Catalog.pm and backend/utils/Gen_fmgrtab.pl, respectively.  We also
+have standard conventions for setting up #define's for the pg_class OIDs
+of system catalogs and indexes.  For all the other system catalogs, you
+have to manually create any #define's you need.
 
 - If you need to find a valid OID for a new predefined tuple, use the
 script src/include/catalog/unused_oids.  It generates inclusive ranges of
diff --git a/src/include/catalog/genbki.h b/src/include/catalog/genbki.h
index ec81d5d..5537b61 100644
--- a/src/include/catalog/genbki.h
+++ b/src/include/catalog/genbki.h
@@ -34,6 +34,9 @@
 /* Specifies a default value for a catalog field */
 #define BKI_DEFAULT(value)
 
+/* Specifies an abbreviated label for a column name */
+#define BKI_ABBREV(abbrev)
+
 /*
  * This is never defined; it's here only for documentation.
  *
diff --git a/src/include/catalog/pg_aggregate.h b/src/include/catalog/pg_aggregate.h
index 54df39a..c0c1469 100644
--- a/src/include/catalog/pg_aggregate.h
+++ b/src/include/catalog/pg_aggregate.h
@@ -55,29 +55,29 @@
 CATALOG(pg_aggregate,2600) BKI_WITHOUT_OIDS
 {
 	regproc		aggfnoid;
-	char		aggkind;
-	int16		aggnumdirectargs;
+	char		aggkind BKI_DEFAULT(n);
+	int16		aggnumdirectargs BKI_DEFAULT(0);
 	regproc		aggtransfn;
-	regproc		aggfinalfn;
-	regproc		aggcombinefn;
-	regproc		aggserialfn;
-	regproc		aggdeserialfn;
-	regproc		aggmtransfn;
-	regproc		aggminvtransfn;
-	regproc		aggmfinalfn;
-	bool		aggfinalextra;
-	bool		aggmfinalextra;
-	char		aggfinalmodify;
-	char		aggmfinalmodify;
-	Oid			aggsortop;
+	regproc		aggfinalfn BKI_DEFAULT(-);
+	regproc		aggcombinefn BKI_DEFAULT(-);
+	regproc		aggserialfn BKI_DEFAULT(-);
+	regproc		aggdeserialfn BKI_DEFAULT(-);
+	regproc		aggmtransfn BKI_DEFAULT(-);
+	regproc		aggminvtransfn BKI_DEFAULT(-);
+	regproc		aggmfinalfn BKI_DEFAULT(-);
+	bool		aggfinalextra BKI_DEFAULT(f);
+	bool		aggmfinalextra BKI_DEFAULT(f);
+	char		aggfinalmodify BKI_DEFAULT(r);
+	char		aggmfinalmodify BKI_DEFAULT(r);
+	Oid			aggsortop BKI_DEFAULT(0);
 	Oid			aggtranstype;
-	int32		aggtransspace;
-	Oid			aggmtranstype;
-	int32		aggmtransspace;
+	int32		aggtransspace BKI_DEFAULT(0);
+	Oid			aggmtranstype BKI_DEFAULT(0);
+	int32		aggmtransspace BKI_DEFAULT(0);
 
 #ifdef CATALOG_VARLEN			/* variable-length fields start here */
-	text		agginitval;
-	text		aggminitval;
+	text		agginitval BKI_DEFAULT(_null_);
+	text		aggminitval BKI_DEFAULT(_null_);
 #endif
 } FormData_pg_aggregate;
 
diff --git a/src/include/catalog/pg_amop.h b/src/include/catalog/pg_amop.h
index 9dd881c..c73f24b 100644
--- a/src/include/catalog/pg_amop.h
+++ b/src/include/catalog/pg_amop.h
@@ -55,14 +55,29 @@
 
 CATALOG(pg_amop,2602)
 {
-	Oid			amopfamily;		/* the index opfamily this entry is for */
-	Oid			amoplefttype;	/* operator's left input data type */
-	Oid			amoprighttype;	/* operator's right input data type */
-	int16		amopstrategy;	/* operator strategy number */
-	char		amoppurpose;	/* is operator for 's'earch or 'o'rdering? */
-	Oid			amopopr;		/* the operator's pg_operator OID */
-	Oid			amopmethod;		/* the index access method this entry is for */
-	Oid			amopsortfamily; /* ordering opfamily OID, or 0 if search op */
+	/* the index opfamily this entry is for */
+	Oid			amopfamily BKI_ABBREV(opf);
+
+	/* operator's left input data type */
+	Oid			amoplefttype BKI_ABBREV(lt);
+
+	/* operator's right input data type */
+	Oid			amoprighttype BKI_ABBREV(rt);
+
+	/* operator strategy number */
+	int16		amopstrategy BKI_ABBREV(str);
+
+	/* is operator for 's'earch or 'o'rdering? */
+	char		amoppurpose BKI_ABBREV(pur) BKI_DEFAULT(s);
+
+	/* the operator's pg_operator OID */
+	Oid			amopopr BKI_ABBREV(oper);
+
+	/* the index access method this entry is for */
+	Oid			amopmethod BKI_ABBREV(am);
+
+	/* ordering opfamily OID, or 0 if search op */
+	Oid			amopsortfamily BKI_DEFAULT(0);
 } FormData_pg_amop;
 
 /* allowed values of amoppurpose: */
diff --git a/src/include/catalog/pg_amproc.h b/src/include/catalog/pg_amproc.h
index 681cdcf..afdfeb1 100644
--- a/src/include/catalog/pg_amproc.h
+++ b/src/include/catalog/pg_amproc.h
@@ -44,11 +44,20 @@
 
 CATALOG(pg_amproc,2603)
 {
-	Oid			amprocfamily;	/* the index opfamily this entry is for */
-	Oid			amproclefttype; /* procedure's left input data type */
-	Oid			amprocrighttype;	/* procedure's right input data type */
-	int16		amprocnum;		/* support procedure index */
-	regproc		amproc;			/* OID of the proc */
+	/* the index opfamily this entry is for */
+	Oid			amprocfamily BKI_ABBREV(opf);
+
+	/* procedure's left input data type */
+	Oid			amproclefttype BKI_ABBREV(lt);
+
+	/* procedure's right input data type */
+	Oid			amprocrighttype BKI_ABBREV(rt);
+
+	/* support procedure index */
+	int16		amprocnum BKI_ABBREV(num);
+
+	/* OID of the proc */
+	regproc		amproc;
 } FormData_pg_amproc;
 
 /* ----------------
diff --git a/src/include/catalog/pg_authid.h b/src/include/catalog/pg_authid.h
index 18bd4c6..d073f72 100644
--- a/src/include/catalog/pg_authid.h
+++ b/src/include/catalog/pg_authid.h
@@ -45,20 +45,41 @@
 
 CATALOG(pg_authid,1260) BKI_SHARED_RELATION BKI_ROWTYPE_OID(2842) BKI_SCHEMA_MACRO
 {
-	NameData	rolname;		/* name of role */
-	bool		rolsuper;		/* read this field via superuser() only! */
-	bool		rolinherit;		/* inherit privileges from other roles? */
-	bool		rolcreaterole;	/* allowed to create more roles? */
-	bool		rolcreatedb;	/* allowed to create databases? */
-	bool		rolcanlogin;	/* allowed to log in as session user? */
-	bool		rolreplication; /* role used for streaming replication */
-	bool		rolbypassrls;	/* bypasses row level security? */
-	int32		rolconnlimit;	/* max connections allowed (-1=no limit) */
+	/* name of role */
+	NameData	rolname;
+
+	/* read this field via superuser() only! */
+	bool		rolsuper BKI_DEFAULT(f);
+
+	/* inherit privileges from other roles? */
+	bool		rolinherit BKI_DEFAULT(t);
+
+	/* allowed to create more roles? */
+	bool		rolcreaterole BKI_DEFAULT(f);
+
+	/* allowed to create databases? */
+	bool		rolcreatedb BKI_DEFAULT(f);
+
+	/* allowed to log in as session user? */
+	bool		rolcanlogin BKI_DEFAULT(f);
+
+	/* role used for streaming replication */
+	bool		rolreplication BKI_DEFAULT(f);
+
+	/* bypasses row level security? */
+	bool		rolbypassrls BKI_DEFAULT(f);
+
+	/* max connections allowed (-1=no limit) */
+	int32		rolconnlimit BKI_DEFAULT(-1);
 
 	/* remaining fields may be null; use heap_getattr to read them! */
 #ifdef CATALOG_VARLEN			/* variable-length fields start here */
-	text		rolpassword;	/* password, if any */
-	timestamptz rolvaliduntil;	/* password expiration time, if any */
+
+	/* password, if any */
+	text		rolpassword BKI_DEFAULT(_null_);
+
+	/* password expiration time, if any */
+	timestamptz rolvaliduntil BKI_DEFAULT(_null_);
 #endif
 } FormData_pg_authid;
 
diff --git a/src/include/catalog/pg_class.dat b/src/include/catalog/pg_class.dat
index 435bc06..f990d26 100644
--- a/src/include/catalog/pg_class.dat
+++ b/src/include/catalog/pg_class.dat
@@ -5,8 +5,6 @@
 # the OIDs listed here match those given in their CATALOG macros, and that
 # the relnatts values are correct.
 
-# Note: "3" in the relfrozenxid column stands for FirstNormalTransactionId;
-# similarly, "1" in relminmxid stands for FirstMultiXactId
 { oid => '1247',
   relname => 'pg_type', relnamespace => 'PGNSP', reltype => '71', reloftype => '0', relowner => 'PGUID', relam => '0', relfilenode => '0', reltablespace => '0', relpages => '0', reltuples => '0', relallvisible => '0', reltoastrelid => '0', relhasindex => 'f', relisshared => 'f', relpersistence => 'p', relkind => 'r', relnatts => '30', relchecks => '0', relhasoids => 't', relhaspkey => 'f', relhasrules => 'f', relhastriggers => 'f', relhassubclass => 'f', relrowsecurity => 'f', relforcerowsecurity => 'f', relispopulated => 't', relreplident => 'n', relispartition => 'f', relfrozenxid => '3', relminmxid => '1', relacl => '_null_', reloptions => '_null_', relpartbound => '_null_' },
 { oid => '1249',
diff --git a/src/include/catalog/pg_class.h b/src/include/catalog/pg_class.h
index f24a27d..9ad6ca8 100644
--- a/src/include/catalog/pg_class.h
+++ b/src/include/catalog/pg_class.h
@@ -31,55 +31,117 @@
 
 CATALOG(pg_class,1259) BKI_BOOTSTRAP BKI_ROWTYPE_OID(83) BKI_SCHEMA_MACRO
 {
-	NameData	relname;		/* class name */
-	Oid			relnamespace;	/* OID of namespace containing this class */
-	Oid			reltype;		/* OID of entry in pg_type for table's
-								 * implicit row type */
-	Oid			reloftype;		/* OID of entry in pg_type for underlying
-								 * composite type */
-	Oid			relowner;		/* class owner */
-	Oid			relam;			/* index access method; 0 if not an index */
-	Oid			relfilenode;	/* identifier of physical storage file */
+	/* class name */
+	NameData	relname;
+
+	/* OID of namespace containing this class */
+	Oid			relnamespace BKI_DEFAULT(PGNSP);
+
+	/* OID of entry in pg_type for table's implicit row type */
+	Oid			reltype;
+
+	/* OID of entry in pg_type for underlying composite type */
+	Oid			reloftype BKI_DEFAULT(0);
+
+	/* class owner */
+	Oid			relowner BKI_DEFAULT(PGUID);
+
+	/* index access method; 0 if not an index */
+	Oid			relam BKI_DEFAULT(0);
+
+	/* identifier of physical storage file */
+	Oid			relfilenode BKI_DEFAULT(0);
 
 	/* relfilenode == 0 means it is a "mapped" relation, see relmapper.c */
-	Oid			reltablespace;	/* identifier of table space for relation */
-	int32		relpages;		/* # of blocks (not always up-to-date) */
-	float4		reltuples;		/* # of tuples (not always up-to-date) */
-	int32		relallvisible;	/* # of all-visible blocks (not always
-								 * up-to-date) */
-	Oid			reltoastrelid;	/* OID of toast table; 0 if none */
-	bool		relhasindex;	/* T if has (or has had) any indexes */
-	bool		relisshared;	/* T if shared across databases */
-	char		relpersistence; /* see RELPERSISTENCE_xxx constants below */
-	char		relkind;		/* see RELKIND_xxx constants below */
-	int16		relnatts;		/* number of user attributes */
+
+	/* identifier of table space for relation */
+	Oid			reltablespace BKI_DEFAULT(0);
+
+	/* # of blocks (not always up-to-date) */
+	int32		relpages BKI_DEFAULT(0);
+
+	/* # of tuples (not always up-to-date) */
+	float4		reltuples BKI_DEFAULT(0);
+
+	/* # of all-visible blocks (not always up-to-date) */
+	int32		relallvisible BKI_DEFAULT(0);
+
+	/* OID of toast table; 0 if none */
+	Oid			reltoastrelid BKI_DEFAULT(0);
+
+	/* T if has (or has had) any indexes */
+	bool		relhasindex BKI_DEFAULT(f);
+
+	/* T if shared across databases */
+	bool		relisshared BKI_DEFAULT(f);
+
+	/* see RELPERSISTENCE_xxx constants below */
+	char		relpersistence BKI_DEFAULT(p);
+
+	/* see RELKIND_xxx constants below */
+	char		relkind BKI_DEFAULT(r);
+
+	/* number of user attributes */
+	int16		relnatts;
 
 	/*
 	 * Class pg_attribute must contain exactly "relnatts" user attributes
 	 * (with attnums ranging from 1 to relnatts) for this class.  It may also
 	 * contain entries with negative attnums for system attributes.
 	 */
-	int16		relchecks;		/* # of CHECK constraints for class */
-	bool		relhasoids;		/* T if we generate OIDs for rows of rel */
-	bool		relhaspkey;		/* has (or has had) PRIMARY KEY index */
-	bool		relhasrules;	/* has (or has had) any rules */
-	bool		relhastriggers; /* has (or has had) any TRIGGERs */
-	bool		relhassubclass; /* has (or has had) derived classes */
-	bool		relrowsecurity; /* row security is enabled or not */
-	bool		relforcerowsecurity;	/* row security forced for owners or
-										 * not */
-	bool		relispopulated; /* matview currently holds query results */
-	char		relreplident;	/* see REPLICA_IDENTITY_xxx constants  */
-	bool		relispartition; /* is relation a partition? */
-	TransactionId relfrozenxid; /* all Xids < this are frozen in this rel */
-	TransactionId relminmxid;	/* all multixacts in this rel are >= this.
-								 * this is really a MultiXactId */
+
+	/* # of CHECK constraints for class */
+	int16		relchecks BKI_DEFAULT(0);
+
+	/* T if we generate OIDs for rows of rel */
+	bool		relhasoids;
+
+	/* has (or has had) PRIMARY KEY index */
+	bool		relhaspkey BKI_DEFAULT(f);
+
+	/* has (or has had) any rules */
+	bool		relhasrules BKI_DEFAULT(f);
+
+	/* has (or has had) any TRIGGERs */
+	bool		relhastriggers BKI_DEFAULT(f);
+
+	/* has (or has had) derived classes */
+	bool		relhassubclass BKI_DEFAULT(f);
+
+	/* row security is enabled or not */
+	bool		relrowsecurity BKI_DEFAULT(f);
+
+	/* row security forced for owners or not */
+	bool		relforcerowsecurity BKI_DEFAULT(f);
+
+	/* matview currently holds query results */
+	bool		relispopulated BKI_DEFAULT(t);
+
+	/* see REPLICA_IDENTITY_xxx constants  */
+	char		relreplident BKI_DEFAULT(n);
+
+	/* is relation a partition? */
+	bool		relispartition BKI_DEFAULT(f);
+
+	/* all Xids < this are frozen in this rel */
+	/* Note: "3" stands for FirstNormalTransactionId */
+	TransactionId relfrozenxid BKI_DEFAULT(3);
+
+	/* all multixacts in this rel are >= this. This is really a MultiXactId */
+	/* Note: "1" stands for FirstMultiXactId */
+	TransactionId relminmxid BKI_DEFAULT(1);
 
 #ifdef CATALOG_VARLEN			/* variable-length fields start here */
 	/* NOTE: These fields are not present in a relcache entry's rd_rel field. */
-	aclitem		relacl[1];		/* access permissions */
-	text		reloptions[1];	/* access-method-specific options */
-	pg_node_tree relpartbound;	/* partition bound node tree */
+
+	/* access permissions */
+	aclitem		relacl[1] BKI_DEFAULT(_null_);
+
+	/* access-method-specific options */
+	text		reloptions[1] BKI_DEFAULT(_null_);
+
+	/* partition bound node tree */
+	pg_node_tree relpartbound BKI_DEFAULT(_null_);
 #endif
 } FormData_pg_class;
 
diff --git a/src/include/catalog/pg_opclass.h b/src/include/catalog/pg_opclass.h
index 51d58f2..f44a0aa 100644
--- a/src/include/catalog/pg_opclass.h
+++ b/src/include/catalog/pg_opclass.h
@@ -53,12 +53,21 @@ CATALOG(pg_opclass,2616)
 {
 	Oid			opcmethod;		/* index access method opclass is for */
 	NameData	opcname;		/* name of this opclass */
-	Oid			opcnamespace;	/* namespace of this opclass */
-	Oid			opcowner;		/* opclass owner */
+
+	/* namespace of this opclass */
+	Oid			opcnamespace BKI_DEFAULT(PGNSP);
+
+	/* opclass owner */
+	Oid			opcowner BKI_DEFAULT(PGUID);
+
 	Oid			opcfamily;		/* containing operator family */
 	Oid			opcintype;		/* type of data indexed by opclass */
-	bool		opcdefault;		/* T if opclass is default for opcintype */
-	Oid			opckeytype;		/* type of data in index, or InvalidOid */
+
+	/* T if opclass is default for opcintype */
+	bool		opcdefault BKI_DEFAULT(t);
+
+	/* type of data in index, or InvalidOid */
+	Oid			opckeytype BKI_DEFAULT(0);
 } FormData_pg_opclass;
 
 /* ----------------
diff --git a/src/include/catalog/pg_operator.h b/src/include/catalog/pg_operator.h
index 3f630d8..cd6e227 100644
--- a/src/include/catalog/pg_operator.h
+++ b/src/include/catalog/pg_operator.h
@@ -31,20 +31,48 @@
 
 CATALOG(pg_operator,2617)
 {
-	NameData	oprname;		/* name of operator */
-	Oid			oprnamespace;	/* OID of namespace containing this oper */
-	Oid			oprowner;		/* operator owner */
-	char		oprkind;		/* 'l', 'r', or 'b' */
-	bool		oprcanmerge;	/* can be used in merge join? */
-	bool		oprcanhash;		/* can be used in hash join? */
-	Oid			oprleft;		/* left arg type, or 0 if 'l' oprkind */
-	Oid			oprright;		/* right arg type, or 0 if 'r' oprkind */
-	Oid			oprresult;		/* result datatype */
-	Oid			oprcom;			/* OID of commutator oper, or 0 if none */
-	Oid			oprnegate;		/* OID of negator oper, or 0 if none */
-	regproc		oprcode;		/* OID of underlying function */
-	regproc		oprrest;		/* OID of restriction estimator, or 0 */
-	regproc		oprjoin;		/* OID of join estimator, or 0 */
+
+	/* name of operator */
+	NameData	oprname;
+
+	/* OID of namespace containing this oper */
+	Oid			oprnamespace BKI_DEFAULT(PGNSP);
+
+	/* operator owner */
+	Oid			oprowner BKI_DEFAULT(PGUID);
+
+	/* 'l', 'r', or 'b' */
+	char		oprkind BKI_DEFAULT(b);
+
+	/* can be used in merge join? */
+	bool		oprcanmerge BKI_DEFAULT(f);
+
+	/* can be used in hash join? */
+	bool		oprcanhash BKI_DEFAULT(f);
+
+	/* left arg type, or 0 if 'l' oprkind */
+	Oid			oprleft;
+
+	/* right arg type, or 0 if 'r' oprkind */
+	Oid			oprright;
+
+	/* result datatype */
+	Oid			oprresult;
+
+	/* OID of commutator oper, or 0 if none */
+	Oid			oprcom BKI_DEFAULT(0);
+
+	/* OID of negator oper, or 0 if none */
+	Oid			oprnegate BKI_DEFAULT(0);
+
+	/* OID of underlying function */
+	regproc		oprcode;
+
+	/* OID of restriction estimator, or 0 */
+	regproc		oprrest BKI_DEFAULT(-);
+
+	/* OID of join estimator, or 0 */
+	regproc		oprjoin BKI_DEFAULT(-);
 } FormData_pg_operator;
 
 /* ----------------
diff --git a/src/include/catalog/pg_opfamily.h b/src/include/catalog/pg_opfamily.h
index b988bc5..a9be48f 100644
--- a/src/include/catalog/pg_opfamily.h
+++ b/src/include/catalog/pg_opfamily.h
@@ -33,8 +33,12 @@ CATALOG(pg_opfamily,2753)
 {
 	Oid			opfmethod;		/* index access method opfamily is for */
 	NameData	opfname;		/* name of this opfamily */
-	Oid			opfnamespace;	/* namespace of this opfamily */
-	Oid			opfowner;		/* opfamily owner */
+
+	/* namespace of this opfamily */
+	Oid			opfnamespace BKI_DEFAULT(PGNSP);
+
+	/* opfamily owner */
+	Oid			opfowner BKI_DEFAULT(PGUID);
 } FormData_pg_opfamily;
 
 /* ----------------
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h
index fab35c2..1f71a6f 100644
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -30,43 +30,99 @@
 
 CATALOG(pg_proc,1255) BKI_BOOTSTRAP BKI_ROWTYPE_OID(81) BKI_SCHEMA_MACRO
 {
-	NameData	proname;		/* procedure name */
-	Oid			pronamespace;	/* OID of namespace containing this proc */
-	Oid			proowner;		/* procedure owner */
-	Oid			prolang;		/* OID of pg_language entry */
-	float4		procost;		/* estimated execution cost */
-	float4		prorows;		/* estimated # of rows out (if proretset) */
-	Oid			provariadic;	/* element type of variadic array, or 0 */
-	regproc		protransform;	/* transforms calls to it during planning */
-	bool		proisagg;		/* is it an aggregate? */
-	bool		proiswindow;	/* is it a window function? */
-	bool		prosecdef;		/* security definer */
-	bool		proleakproof;	/* is it a leak-proof function? */
-	bool		proisstrict;	/* strict with respect to NULLs? */
-	bool		proretset;		/* returns a set? */
-	char		provolatile;	/* see PROVOLATILE_ categories below */
-	char		proparallel;	/* see PROPARALLEL_ categories below */
-	int16		pronargs;		/* number of arguments */
-	int16		pronargdefaults;	/* number of arguments with defaults */
-	Oid			prorettype;		/* OID of result type */
+	/* procedure name */
+	NameData	proname BKI_ABBREV(n);
+
+	/* OID of namespace containing this proc */
+	Oid			pronamespace BKI_DEFAULT(PGNSP);
+
+	/* procedure owner */
+	Oid			proowner BKI_DEFAULT(PGUID);
+
+	/* OID of pg_language entry */
+	Oid			prolang BKI_DEFAULT(12);
+
+	/* estimated execution cost */
+	float4		procost BKI_DEFAULT(1);
+
+	/* estimated # of rows out (if proretset) */
+	float4		prorows BKI_DEFAULT(0);
+
+	/* element type of variadic array, or 0 */
+	Oid			provariadic BKI_DEFAULT(0);
+
+	/* transforms calls to it during planning */
+	regproc		protransform BKI_DEFAULT(0);
+
+	/* is it an aggregate? */
+	bool		proisagg BKI_DEFAULT(f);
+
+	/* is it a window function? */
+	bool		proiswindow BKI_DEFAULT(f);
+
+	/* security definer */
+	bool		prosecdef BKI_DEFAULT(f);
+
+	/* is it a leak-proof function? */
+	bool		proleakproof BKI_ABBREV(lp) BKI_DEFAULT(f);
+
+	/* strict with respect to NULLs? */
+	bool		proisstrict BKI_ABBREV(is) BKI_DEFAULT(f);
+
+	/* returns a set? */
+	bool		proretset BKI_DEFAULT(f);
+
+	/* see PROVOLATILE_ categories below */
+	char		provolatile BKI_ABBREV(v) BKI_DEFAULT(v);
+
+	/* see PROPARALLEL_ categories below */
+	char		proparallel BKI_ABBREV(p) BKI_DEFAULT(u);
+
+	/* number of arguments */
+	int16		pronargs;
+
+	/* number of arguments with defaults */
+	int16		pronargdefaults BKI_DEFAULT(0);
+
+	/* OID of result type */
+	Oid			prorettype BKI_ABBREV(rt);
 
 	/*
 	 * variable-length fields start here, but we allow direct access to
 	 * proargtypes
 	 */
-	oidvector	proargtypes;	/* parameter types (excludes OUT params) */
+
+	/* parameter types (excludes OUT params) */
+	oidvector	proargtypes BKI_ABBREV(at);
 
 #ifdef CATALOG_VARLEN
-	Oid			proallargtypes[1];	/* all param types (NULL if IN only) */
-	char		proargmodes[1]; /* parameter modes (NULL if IN only) */
-	text		proargnames[1]; /* parameter names (NULL if no names) */
-	pg_node_tree proargdefaults;	/* list of expression trees for argument
-									 * defaults (NULL if none) */
-	Oid			protrftypes[1]; /* types for which to apply transforms */
-	text		prosrc BKI_FORCE_NOT_NULL;	/* procedure source text */
-	text		probin;			/* secondary procedure info (can be NULL) */
-	text		proconfig[1];	/* procedure-local GUC settings */
-	aclitem		proacl[1];		/* access permissions */
+
+	/* all param types (NULL if IN only) */
+	Oid			proallargtypes[1] BKI_DEFAULT(_null_);
+
+	/* parameter modes (NULL if IN only) */
+	char		proargmodes[1] BKI_DEFAULT(_null_);
+
+	/* parameter names (NULL if no names) */
+	text		proargnames[1] BKI_DEFAULT(_null_);
+
+	/* list of expression trees for argument defaults (NULL if none) */
+	pg_node_tree proargdefaults BKI_DEFAULT(_null_);
+
+	/* types for which to apply transforms */
+	Oid			protrftypes[1] BKI_DEFAULT(_null_);
+
+	/* procedure source text */
+	text		prosrc BKI_ABBREV(s) BKI_FORCE_NOT_NULL;
+
+	/* secondary procedure info (can be NULL) */
+	text		probin BKI_DEFAULT(_null_);
+
+	/* procedure-local GUC settings */
+	text		proconfig[1] BKI_DEFAULT(_null_);
+
+	/* access permissions */
+	aclitem		proacl[1] BKI_DEFAULT(_null_);
 #endif
 } FormData_pg_proc;
 
diff --git a/src/include/catalog/pg_type.dat b/src/include/catalog/pg_type.dat
index f73e5cf..a71bed7 100644
--- a/src/include/catalog/pg_type.dat
+++ b/src/include/catalog/pg_type.dat
@@ -3,9 +3,15 @@
 
 # Keep the following ordered by OID so that later changes can be made more
 # easily.
+
 # For types used in the system catalogs, make sure the values here match
 # TypInfo[] in bootstrap.c.
 
+# OID symbols not specified here are generated automatically according
+# to a simple rule (see Catalog.pm). If you created a type and want to
+# know what its symbol is, see the generated header
+# backend/catalog/oid_symbols.h.
+
 # OIDS 1 - 99
 
 { oid => '16', oid_symbol => 'BOOLOID', descr => 'boolean, \'true\'/\'false\'',
diff --git a/src/include/catalog/pg_type.h b/src/include/catalog/pg_type.h
index e2d4626..3c069d5 100644
--- a/src/include/catalog/pg_type.h
+++ b/src/include/catalog/pg_type.h
@@ -38,8 +38,12 @@
 CATALOG(pg_type,1247) BKI_BOOTSTRAP BKI_ROWTYPE_OID(71) BKI_SCHEMA_MACRO
 {
 	NameData	typname;		/* type name */
-	Oid			typnamespace;	/* OID of namespace containing this type */
-	Oid			typowner;		/* type owner */
+
+	/* OID of namespace containing this type */
+	Oid			typnamespace BKI_DEFAULT(PGNSP);
+
+	/* type owner */
+	Oid			typowner BKI_DEFAULT(PGUID);
 
 	/*
 	 * For a fixed-size type, typlen is the number of bytes we use to
@@ -67,7 +71,7 @@ CATALOG(pg_type,1247) BKI_BOOTSTRAP BKI_ROWTYPE_OID(71) BKI_SCHEMA_MACRO
 	 *
 	 * If typtype is 'c', typrelid is the OID of the class' entry in pg_class.
 	 */
-	char		typtype;
+	char		typtype BKI_DEFAULT(b);
 
 	/*
 	 * typcategory and typispreferred help the parser distinguish preferred
@@ -77,17 +81,20 @@ CATALOG(pg_type,1247) BKI_BOOTSTRAP BKI_ROWTYPE_OID(71) BKI_SCHEMA_MACRO
 	 */
 	char		typcategory;	/* arbitrary type classification */
 
-	bool		typispreferred; /* is type "preferred" within its category? */
+	/* is type "preferred" within its category? */
+	bool		typispreferred BKI_DEFAULT(f);
 
 	/*
 	 * If typisdefined is false, the entry is only a placeholder (forward
 	 * reference).  We know the type name, but not yet anything else about it.
 	 */
-	bool		typisdefined;
+	bool		typisdefined BKI_DEFAULT(t);
 
-	char		typdelim;		/* delimiter for arrays of this type */
+	/* delimiter for arrays of this type */
+	char		typdelim BKI_DEFAULT(\054);
 
-	Oid			typrelid;		/* 0 if not a composite type */
+	/* 0 if not a composite type */
+	Oid			typrelid BKI_DEFAULT(0);
 
 	/*
 	 * If typelem is not 0 then it identifies another row in pg_type. The
@@ -100,7 +107,7 @@ CATALOG(pg_type,1247) BKI_BOOTSTRAP BKI_ROWTYPE_OID(71) BKI_SCHEMA_MACRO
 	 *
 	 * typelem != 0 and typlen == -1.
 	 */
-	Oid			typelem;
+	Oid			typelem BKI_DEFAULT(0);
 
 	/*
 	 * If there is a "true" array type having this type as element type,
@@ -119,13 +126,13 @@ CATALOG(pg_type,1247) BKI_BOOTSTRAP BKI_ROWTYPE_OID(71) BKI_SCHEMA_MACRO
 	/*
 	 * I/O functions for optional type modifiers.
 	 */
-	regproc		typmodin;
-	regproc		typmodout;
+	regproc		typmodin BKI_DEFAULT(-);
+	regproc		typmodout BKI_DEFAULT(-);
 
 	/*
 	 * Custom ANALYZE procedure for the datatype (0 selects the default).
 	 */
-	regproc		typanalyze;
+	regproc		typanalyze BKI_DEFAULT(-);
 
 	/* ----------------
 	 * typalign is the alignment required when storing a value of this
@@ -163,7 +170,7 @@ CATALOG(pg_type,1247) BKI_BOOTSTRAP BKI_ROWTYPE_OID(71) BKI_SCHEMA_MACRO
 	 * 'm' MAIN		  like 'x' but try to keep in main tuple
 	 * ----------------
 	 */
-	char		typstorage;
+	char		typstorage BKI_DEFAULT(p);
 
 	/*
 	 * This flag represents a "NOT NULL" constraint against this datatype.
@@ -173,32 +180,32 @@ CATALOG(pg_type,1247) BKI_BOOTSTRAP BKI_ROWTYPE_OID(71) BKI_SCHEMA_MACRO
 	 *
 	 * Used primarily for domain types.
 	 */
-	bool		typnotnull;
+	bool		typnotnull BKI_DEFAULT(f);
 
 	/*
 	 * Domains use typbasetype to show the base (or domain) type that the
 	 * domain is based on.  Zero if the type is not a domain.
 	 */
-	Oid			typbasetype;
+	Oid			typbasetype BKI_DEFAULT(0);
 
 	/*
 	 * Domains use typtypmod to record the typmod to be applied to their base
 	 * type (-1 if base type does not use a typmod).  -1 if this type is not a
 	 * domain.
 	 */
-	int32		typtypmod;
+	int32		typtypmod BKI_DEFAULT(-1);
 
 	/*
 	 * typndims is the declared number of dimensions for an array domain type
 	 * (i.e., typbasetype is an array type).  Otherwise zero.
 	 */
-	int32		typndims;
+	int32		typndims BKI_DEFAULT(0);
 
 	/*
 	 * Collation: 0 if type cannot use collations, DEFAULT_COLLATION_OID for
 	 * collatable base types, possibly other OID for domains
 	 */
-	Oid			typcollation;
+	Oid			typcollation BKI_DEFAULT(0);
 
 #ifdef CATALOG_VARLEN			/* variable-length fields start here */
 
@@ -207,7 +214,7 @@ CATALOG(pg_type,1247) BKI_BOOTSTRAP BKI_ROWTYPE_OID(71) BKI_SCHEMA_MACRO
 	 * a default expression for the type.  Currently this is only used for
 	 * domains.
 	 */
-	pg_node_tree typdefaultbin;
+	pg_node_tree typdefaultbin BKI_DEFAULT(_null_);
 
 	/*
 	 * typdefault is NULL if the type has no associated default value. If
@@ -217,12 +224,12 @@ CATALOG(pg_type,1247) BKI_BOOTSTRAP BKI_ROWTYPE_OID(71) BKI_SCHEMA_MACRO
 	 * external representation of the type's default value, which may be fed
 	 * to the type's input converter to produce a constant.
 	 */
-	text		typdefault;
+	text		typdefault BKI_DEFAULT(_null_);
 
 	/*
 	 * Access permissions
 	 */
-	aclitem		typacl[1];
+	aclitem		typacl[1] BKI_DEFAULT(_null_);
 #endif
 } FormData_pg_type;
 
diff --git a/src/include/catalog/rewrite_dat.pl b/src/include/catalog/rewrite_dat.pl
index 410c8b3..cf38c7a 100644
--- a/src/include/catalog/rewrite_dat.pl
+++ b/src/include/catalog/rewrite_dat.pl
@@ -24,6 +24,7 @@ use warnings;
 
 my @input_files;
 my $output_path = '';
+my $expand_tuples = 0;
 
 # Process command line switches.
 while (@ARGV)
@@ -41,6 +42,10 @@ while (@ARGV)
 	{
 		revert();
 	}
+	elsif ($arg eq '--expand')
+	{
+		$expand_tuples = 1;
+	}
 	else
 	{
 		usage();
@@ -79,7 +84,18 @@ foreach my $datfile (@input_files)
 
 	foreach my $column (@$schema)
 	{
-		my $attname = $column->{name};
+		my $attname;
+
+		# Use abbreviations where available, unless we're writing
+		# full tuples.
+		if (exists $column->{abbrev} and !$expand_tuples)
+		{
+			$attname = $column->{abbrev};
+		}
+		else
+		{
+			$attname = $column->{name};
+		}
 		push @attnames, $attname;
 	}
 
@@ -112,6 +128,32 @@ foreach my $datfile (@input_files)
 			my %values = %$data;
 			print $dat "{ ";
 
+			# Write out tuples in a compact representation. We must do
+			# these operations in the order given.
+			# Note: This is also a convenient place to do one-off
+			# bulk-editing.
+			if (!$expand_tuples)
+			{
+				strip_default_values(\%values, $schema, $catname);
+
+				# Delete values that are computable from other fields.
+				if ($catname eq 'pg_proc')
+				{
+					delete $values{pronargs};
+					delete $values{prosrc}
+					  if $values{prosrc} eq $values{proname};
+				}
+				elsif ($catname eq 'pg_type' and exists $values{oid_symbol})
+				{
+					my $symbol = Catalog::GetPgTypeSymbol($values{typname});
+					delete $values{oid_symbol}
+					  if defined $symbol
+					    and $values{oid_symbol} eq $symbol;
+				}
+
+				add_column_abbrevs(\%values, $schema);
+			}
+
 			# Separate out metadata fields for readability.
 			my $metadata_line = format_line(\%values, @metafields);
 			if ($metadata_line)
@@ -136,6 +178,41 @@ foreach my $datfile (@input_files)
 	}
 }
 
+sub strip_default_values
+{
+	my ($row, $schema, $catname) = @_;
+
+	foreach my $column (@$schema)
+	{
+		my $attname = $column->{name};
+		die "No value for $catname.$attname\n"
+		  if ! defined $row->{$attname};
+
+		# Delete values that match defaults.
+		if (defined $column->{default}
+			and ($row->{$attname} eq $column->{default}))
+		{
+			delete $row->{$attname};
+		}
+	}
+}
+
+sub add_column_abbrevs
+{
+	my $row    = shift;
+	my $schema = shift;
+
+	foreach my $column (@$schema)
+	{
+		my $abbrev  = $column->{abbrev};
+		my $attname = $column->{name};
+		if (defined $abbrev and exists $row->{$attname})
+		{
+			$row->{$abbrev} = $row->{$attname};
+		}
+	}
+}
+
 sub format_line
 {
 	my $data = shift;
@@ -186,6 +263,7 @@ Usage: rewrite_dat.pl [options] datafile...
 
 Options:
     -o               output path
+    --expand         write out full tuples
     --revert         rename .bak files back to .dat
 
 Expects a list of .dat files as arguments.
-- 
2.7.4

