[PATCH] pg_dump: Sort overloaded functions in deterministic order

Started by Joel Jacobsonover 13 years ago1 messages
#1Joel Jacobson
joel@trustly.com
1 attachment(s)

I have received positive feedback on the pg_dump --split option I suggested,
but it depends on pg_dump dumping objects in a deterministic order.

I'm committed to fixing this. The first problem I've spotted is overloaded
functions.

This patch adds a new element to DumpableObject: char *proargs
This is set to the output from pg_get_function_identity_arguments(oid)
for all functions, and set to NULL for all other objects.

sortDumpableObjectsByTypeName calls DOTypeNameCompare, which in addition
to sorting objects by type, namespace and name, now also sorts by
the function identity arguments.

This makes overloaded functions being dumped in the same order,
regardless of which order they were created.

Are there any other object types where the order isn't deterministic?

Attachments:

pg_dump_deterministic_order.patchapplication/octet-stream; name=pg_dump_deterministic_order.patchDownload
*** a/src/bin/pg_dump/pg_dump.c
--- b/src/bin/pg_dump/pg_dump.c
***************
*** 2229,2234 **** getBlobs(Archive *fout)
--- 2229,2235 ----
  			AssignDumpId(&binfo[i].dobj);
  
  			binfo[i].dobj.name = pg_strdup(PQgetvalue(res, i, 0));
+ 			binfo[i].dobj.proargs = NULL;
  			if (!PQgetisnull(res, i, 1))
  				binfo[i].rolname = pg_strdup(PQgetvalue(res, i, 1));
  			else
***************
*** 2659,2664 **** getNamespaces(Archive *fout, int *numNamespaces)
--- 2660,2666 ----
  		nsinfo[i].dobj.catId.oid = atooid(PQgetvalue(res, i, i_oid));
  		AssignDumpId(&nsinfo[i].dobj);
  		nsinfo[i].dobj.name = pg_strdup(PQgetvalue(res, i, i_nspname));
+ 		nsinfo[i].dobj.proargs = NULL;
  		nsinfo[i].rolname = pg_strdup(PQgetvalue(res, i, i_rolname));
  		nsinfo[i].nspacl = pg_strdup(PQgetvalue(res, i, i_nspacl));
  
***************
*** 2779,2784 **** getExtensions(Archive *fout, int *numExtensions)
--- 2781,2787 ----
  		extinfo[i].dobj.catId.oid = atooid(PQgetvalue(res, i, i_oid));
  		AssignDumpId(&extinfo[i].dobj);
  		extinfo[i].dobj.name = pg_strdup(PQgetvalue(res, i, i_extname));
+ 		extinfo[i].dobj.proargs = NULL;
  		extinfo[i].namespace = pg_strdup(PQgetvalue(res, i, i_nspname));
  		extinfo[i].relocatable = *(PQgetvalue(res, i, i_extrelocatable)) == 't';
  		extinfo[i].extversion = pg_strdup(PQgetvalue(res, i, i_extversion));
***************
*** 2936,2941 **** getTypes(Archive *fout, int *numTypes)
--- 2939,2945 ----
  		tyinfo[i].dobj.catId.oid = atooid(PQgetvalue(res, i, i_oid));
  		AssignDumpId(&tyinfo[i].dobj);
  		tyinfo[i].dobj.name = pg_strdup(PQgetvalue(res, i, i_typname));
+ 		tyinfo[i].dobj.proargs = NULL;
  		tyinfo[i].dobj.namespace =
  			findNamespace(fout,
  						  atooid(PQgetvalue(res, i, i_typnamespace)),
***************
*** 2986,2991 **** getTypes(Archive *fout, int *numTypes)
--- 2990,2996 ----
  			stinfo->dobj.catId = nilCatalogId;
  			AssignDumpId(&stinfo->dobj);
  			stinfo->dobj.name = pg_strdup(tyinfo[i].dobj.name);
+ 			stinfo->dobj.proargs = NULL;
  			stinfo->dobj.namespace = tyinfo[i].dobj.namespace;
  			stinfo->baseType = &(tyinfo[i]);
  			tyinfo[i].shellType = stinfo;
***************
*** 3140,3145 **** getOperators(Archive *fout, int *numOprs)
--- 3145,3151 ----
  		oprinfo[i].dobj.catId.oid = atooid(PQgetvalue(res, i, i_oid));
  		AssignDumpId(&oprinfo[i].dobj);
  		oprinfo[i].dobj.name = pg_strdup(PQgetvalue(res, i, i_oprname));
+ 		oprinfo[i].dobj.proargs = NULL;
  		oprinfo[i].dobj.namespace =
  			findNamespace(fout,
  						  atooid(PQgetvalue(res, i, i_oprnamespace)),
***************
*** 3227,3232 **** getCollations(Archive *fout, int *numCollations)
--- 3233,3239 ----
  		collinfo[i].dobj.catId.oid = atooid(PQgetvalue(res, i, i_oid));
  		AssignDumpId(&collinfo[i].dobj);
  		collinfo[i].dobj.name = pg_strdup(PQgetvalue(res, i, i_collname));
+ 		collinfo[i].dobj.proargs = NULL;
  		collinfo[i].dobj.namespace =
  			findNamespace(fout,
  						  atooid(PQgetvalue(res, i, i_collnamespace)),
***************
*** 3306,3311 **** getConversions(Archive *fout, int *numConversions)
--- 3313,3319 ----
  		convinfo[i].dobj.catId.oid = atooid(PQgetvalue(res, i, i_oid));
  		AssignDumpId(&convinfo[i].dobj);
  		convinfo[i].dobj.name = pg_strdup(PQgetvalue(res, i, i_conname));
+ 		convinfo[i].dobj.proargs = NULL;
  		convinfo[i].dobj.namespace =
  			findNamespace(fout,
  						  atooid(PQgetvalue(res, i, i_connamespace)),
***************
*** 3397,3402 **** getOpclasses(Archive *fout, int *numOpclasses)
--- 3405,3411 ----
  		opcinfo[i].dobj.catId.oid = atooid(PQgetvalue(res, i, i_oid));
  		AssignDumpId(&opcinfo[i].dobj);
  		opcinfo[i].dobj.name = pg_strdup(PQgetvalue(res, i, i_opcname));
+ 		opcinfo[i].dobj.proargs = NULL;
  		opcinfo[i].dobj.namespace =
  			findNamespace(fout,
  						  atooid(PQgetvalue(res, i, i_opcnamespace)),
***************
*** 3485,3490 **** getOpfamilies(Archive *fout, int *numOpfamilies)
--- 3494,3500 ----
  		opfinfo[i].dobj.catId.oid = atooid(PQgetvalue(res, i, i_oid));
  		AssignDumpId(&opfinfo[i].dobj);
  		opfinfo[i].dobj.name = pg_strdup(PQgetvalue(res, i, i_opfname));
+ 		opfinfo[i].dobj.proargs = NULL;
  		opfinfo[i].dobj.namespace =
  			findNamespace(fout,
  						  atooid(PQgetvalue(res, i, i_opfnamespace)),
***************
*** 3691,3696 **** getFuncs(Archive *fout, int *numFuncs)
--- 3701,3707 ----
  	int			i_proargtypes;
  	int			i_prorettype;
  	int			i_proacl;
+ 	int			i_proargs;
  
  	/* Make sure we are in proper schema */
  	selectSourceSchema(fout, "pg_catalog");
***************
*** 3717,3723 **** getFuncs(Archive *fout, int *numFuncs)
  						  "SELECT tableoid, oid, proname, prolang, "
  						  "pronargs, proargtypes, prorettype, proacl, "
  						  "pronamespace, "
! 						  "(%s proowner) AS rolname "
  						  "FROM pg_proc p "
  						  "WHERE NOT proisagg AND ("
  						  "pronamespace != "
--- 3728,3735 ----
  						  "SELECT tableoid, oid, proname, prolang, "
  						  "pronargs, proargtypes, prorettype, proacl, "
  						  "pronamespace, "
! 						  "(%s proowner) AS rolname, "
! 						  "pg_get_function_identity_arguments(oid) AS proargs"
  						  "FROM pg_proc p "
  						  "WHERE NOT proisagg AND ("
  						  "pronamespace != "
***************
*** 3745,3751 **** getFuncs(Archive *fout, int *numFuncs)
  						  "pronargs, proargtypes, prorettype, "
  						  "'{=X}' AS proacl, "
  						  "0::oid AS pronamespace, "
! 						  "(%s proowner) AS rolname "
  						  "FROM pg_proc "
  						  "WHERE pg_proc.oid > '%u'::oid",
  						  username_subquery,
--- 3757,3764 ----
  						  "pronargs, proargtypes, prorettype, "
  						  "'{=X}' AS proacl, "
  						  "0::oid AS pronamespace, "
! 						  "(%s proowner) AS rolname, "
! 						  "''::text AS proargs"
  						  "FROM pg_proc "
  						  "WHERE pg_proc.oid > '%u'::oid",
  						  username_subquery,
***************
*** 3761,3767 **** getFuncs(Archive *fout, int *numFuncs)
  						  "pronargs, proargtypes, prorettype, "
  						  "'{=X}' AS proacl, "
  						  "0::oid AS pronamespace, "
! 						  "(%s proowner) AS rolname "
  						  "FROM pg_proc "
  						  "where pg_proc.oid > '%u'::oid",
  						  username_subquery,
--- 3774,3781 ----
  						  "pronargs, proargtypes, prorettype, "
  						  "'{=X}' AS proacl, "
  						  "0::oid AS pronamespace, "
! 						  "(%s proowner) AS rolname, "
! 						  "''::text AS proargs"
  						  "FROM pg_proc "
  						  "where pg_proc.oid > '%u'::oid",
  						  username_subquery,
***************
*** 3786,3791 **** getFuncs(Archive *fout, int *numFuncs)
--- 3800,3806 ----
  	i_proargtypes = PQfnumber(res, "proargtypes");
  	i_prorettype = PQfnumber(res, "prorettype");
  	i_proacl = PQfnumber(res, "proacl");
+ 	i_proargs = PQfnumber(res, "proargs");
  
  	for (i = 0; i < ntups; i++)
  	{
***************
*** 3794,3799 **** getFuncs(Archive *fout, int *numFuncs)
--- 3809,3815 ----
  		finfo[i].dobj.catId.oid = atooid(PQgetvalue(res, i, i_oid));
  		AssignDumpId(&finfo[i].dobj);
  		finfo[i].dobj.name = pg_strdup(PQgetvalue(res, i, i_proname));
+ 		finfo[i].dobj.proargs = pg_strdup(PQgetvalue(res, i, i_proargs));
  		finfo[i].dobj.namespace =
  			findNamespace(fout,
  						  atooid(PQgetvalue(res, i, i_pronamespace)),
***************
*** 4919,4924 **** getDomainConstraints(Archive *fout, TypeInfo *tyinfo)
--- 4935,4941 ----
  		constrinfo[i].dobj.catId.oid = atooid(PQgetvalue(res, i, i_oid));
  		AssignDumpId(&constrinfo[i].dobj);
  		constrinfo[i].dobj.name = pg_strdup(PQgetvalue(res, i, i_conname));
+ 		constrinfo[i].dobj.proargs = NULL;
  		constrinfo[i].dobj.namespace = tyinfo->dobj.namespace;
  		constrinfo[i].contable = NULL;
  		constrinfo[i].condomain = tyinfo;
***************
*** 5027,5032 **** getRules(Archive *fout, int *numRules)
--- 5044,5050 ----
  		ruleinfo[i].dobj.catId.oid = atooid(PQgetvalue(res, i, i_oid));
  		AssignDumpId(&ruleinfo[i].dobj);
  		ruleinfo[i].dobj.name = pg_strdup(PQgetvalue(res, i, i_rulename));
+ 		ruleinfo[i].dobj.proargs = NULL;
  		ruletableoid = atooid(PQgetvalue(res, i, i_ruletable));
  		ruleinfo[i].ruletable = findTableByOid(ruletableoid);
  		if (ruleinfo[i].ruletable == NULL)
***************
*** 5413,5418 **** getProcLangs(Archive *fout, int *numProcLangs)
--- 5431,5437 ----
  		AssignDumpId(&planginfo[i].dobj);
  
  		planginfo[i].dobj.name = pg_strdup(PQgetvalue(res, i, i_lanname));
+ 		planginfo[i].dobj.proargs = NULL;
  		planginfo[i].lanpltrusted = *(PQgetvalue(res, i, i_lanpltrusted)) == 't';
  		planginfo[i].lanplcallfoid = atooid(PQgetvalue(res, i, i_lanplcallfoid));
  		if (i_laninline >= 0)
***************
*** 5550,5555 **** getCasts(Archive *fout, int *numCasts)
--- 5569,5575 ----
  			appendPQExpBuffer(&namebuf, "%s %s",
  							  sTypeInfo->dobj.name, tTypeInfo->dobj.name);
  		castinfo[i].dobj.name = namebuf.data;
+ 		castinfo[i].dobj.proargs = NULL;
  
  		if (OidIsValid(castinfo[i].castfunc))
  		{
***************
*** 6232,6237 **** getTSParsers(Archive *fout, int *numTSParsers)
--- 6252,6258 ----
  		prsinfo[i].dobj.catId.oid = atooid(PQgetvalue(res, i, i_oid));
  		AssignDumpId(&prsinfo[i].dobj);
  		prsinfo[i].dobj.name = pg_strdup(PQgetvalue(res, i, i_prsname));
+ 		prsinfo[i].dobj.proargs = NULL;
  		prsinfo[i].dobj.namespace =
  			findNamespace(fout,
  						  atooid(PQgetvalue(res, i, i_prsnamespace)),
***************
*** 6316,6321 **** getTSDictionaries(Archive *fout, int *numTSDicts)
--- 6337,6343 ----
  		dictinfo[i].dobj.catId.oid = atooid(PQgetvalue(res, i, i_oid));
  		AssignDumpId(&dictinfo[i].dobj);
  		dictinfo[i].dobj.name = pg_strdup(PQgetvalue(res, i, i_dictname));
+ 		dictinfo[i].dobj.proargs = NULL;
  		dictinfo[i].dobj.namespace =
  			findNamespace(fout,
  						  atooid(PQgetvalue(res, i, i_dictnamespace)),
***************
*** 6397,6402 **** getTSTemplates(Archive *fout, int *numTSTemplates)
--- 6419,6425 ----
  		tmplinfo[i].dobj.catId.oid = atooid(PQgetvalue(res, i, i_oid));
  		AssignDumpId(&tmplinfo[i].dobj);
  		tmplinfo[i].dobj.name = pg_strdup(PQgetvalue(res, i, i_tmplname));
+ 		tmplinfo[i].dobj.proargs = NULL;
  		tmplinfo[i].dobj.namespace =
  			findNamespace(fout,
  						  atooid(PQgetvalue(res, i, i_tmplnamespace)),
***************
*** 6475,6480 **** getTSConfigurations(Archive *fout, int *numTSConfigs)
--- 6498,6504 ----
  		cfginfo[i].dobj.catId.oid = atooid(PQgetvalue(res, i, i_oid));
  		AssignDumpId(&cfginfo[i].dobj);
  		cfginfo[i].dobj.name = pg_strdup(PQgetvalue(res, i, i_cfgname));
+ 		cfginfo[i].dobj.proargs = NULL;
  		cfginfo[i].dobj.namespace =
  			findNamespace(fout,
  						  atooid(PQgetvalue(res, i, i_cfgnamespace)),
***************
*** 6581,6586 **** getForeignDataWrappers(Archive *fout, int *numForeignDataWrappers)
--- 6605,6611 ----
  		fdwinfo[i].dobj.catId.oid = atooid(PQgetvalue(res, i, i_oid));
  		AssignDumpId(&fdwinfo[i].dobj);
  		fdwinfo[i].dobj.name = pg_strdup(PQgetvalue(res, i, i_fdwname));
+ 		fdwinfo[i].dobj.proargs = NULL;
  		fdwinfo[i].dobj.namespace = NULL;
  		fdwinfo[i].rolname = pg_strdup(PQgetvalue(res, i, i_rolname));
  		fdwinfo[i].fdwhandler = pg_strdup(PQgetvalue(res, i, i_fdwhandler));
***************
*** 6670,6675 **** getForeignServers(Archive *fout, int *numForeignServers)
--- 6695,6701 ----
  		srvinfo[i].dobj.catId.oid = atooid(PQgetvalue(res, i, i_oid));
  		AssignDumpId(&srvinfo[i].dobj);
  		srvinfo[i].dobj.name = pg_strdup(PQgetvalue(res, i, i_srvname));
+ 		srvinfo[i].dobj.proargs = NULL;
  		srvinfo[i].dobj.namespace = NULL;
  		srvinfo[i].rolname = pg_strdup(PQgetvalue(res, i, i_rolname));
  		srvinfo[i].srvfdw = atooid(PQgetvalue(res, i, i_srvfdw));
***************
*** 6754,6759 **** getDefaultACLs(Archive *fout, int *numDefaultACLs)
--- 6780,6786 ----
  		AssignDumpId(&daclinfo[i].dobj);
  		/* cheesy ... is it worth coming up with a better object name? */
  		daclinfo[i].dobj.name = pg_strdup(PQgetvalue(res, i, i_defaclobjtype));
+ 		daclinfo[i].dobj.proargs = NULL;
  
  		if (nspid != InvalidOid)
  			daclinfo[i].dobj.namespace = findNamespace(fout, nspid,
*** a/src/bin/pg_dump/pg_dump.h
--- b/src/bin/pg_dump/pg_dump.h
***************
*** 135,140 **** typedef struct _dumpableObject
--- 135,141 ----
  	DumpId	   *dependencies;	/* dumpIds of objects this one depends on */
  	int			nDeps;			/* number of valid dependencies */
  	int			allocDeps;		/* allocated size of dependencies[] */
+ 	char	   *proargs;			/* pg_get_function_identity_arguments(oid) */
  } DumpableObject;
  
  typedef struct _namespaceInfo
*** a/src/bin/pg_dump/pg_dump_sort.c
--- b/src/bin/pg_dump/pg_dump_sort.c
***************
*** 185,190 **** DOTypeNameCompare(const void *p1, const void *p2)
--- 185,198 ----
  	if (cmpval != 0)
  		return cmpval;
  
+ 	/* Sort by function identity arguments */
+ 	if (obj1->proargs && obj2->proargs)
+ 	{
+ 		cmpval = strcmp(obj1->proargs, obj2->proargs);
+ 		if (cmpval != 0)
+ 			return cmpval;
+ 	}
+ 
  	/* To have a stable sort order, break ties for some object types */
  	if (obj1->objType == DO_FUNC || obj1->objType == DO_AGG)
  	{