10RC1 crash testing MultiXact oddity

Started by Jeff Janesover 8 years ago7 messages

jeff.janes@gmail.com

over 8 years ago

4 attachment(s)

I am running some crash recovery testing against 10rc1 by injecting torn
page writes, using a test case which generates a lot of multixact, some
naturally by doing a lot fk updates, but most artificially by calling
the pg_burn_multixact function from one of the attached patches.

In 22 hours of running I got 12 instances were messages like this appear:

MultiXact member wraparound protections are disabled because oldest
checkpointed MultiXact 681012168 does not exist on disk

This is not a fatal error, and no inconsistent data is found at the end of
the run. But the code comments suggests that this should only happen on a
server that has been upgraded from 9.3 or 9.4, which this server has not
been.

Is the presence of this log message something that needs to be investigated
further?

Thanks,

Jeff

Attachments:

0002-pg_burn_multixact-utility_v10.patchapplication/octet-stream; name=0002-pg_burn_multixact-utility_v10.patchDownload

diff --git a/contrib/pageinspect/heapfuncs.c b/contrib/pageinspect/heapfuncs.c
new file mode 100644
index f8ac343..8b9abe5
*** a/contrib/pageinspect/heapfuncs.c
--- b/contrib/pageinspect/heapfuncs.c
***************
*** 32,37 ****
--- 32,39 ----
  #include "utils/array.h"
  #include "utils/builtins.h"
  #include "utils/rel.h"
+ #include "access/multixact.h"
+ #include "access/transam.h"
  
  
  /*
*************** tuple_data_split(PG_FUNCTION_ARGS)
*** 476,478 ****
--- 478,520 ----
  
  	PG_RETURN_ARRAYTYPE_P(res);
  }
+ 
+ extern Datum
+ pg_burn_multixact(PG_FUNCTION_ARGS);
+ PG_FUNCTION_INFO_V1(pg_burn_multixact);
+ 
+ Datum
+ pg_burn_multixact(PG_FUNCTION_ARGS)
+ {
+ 	int		rep = PG_GETARG_INT32(0);
+ 	int		size = PG_GETARG_INT32(1);
+ 	MultiXactMember *members;
+ 	MultiXactId ret;
+ 	TransactionId id = ReadNewTransactionId() - size;
+ 	int		i;
+ 
+ 	if (rep < 1)
+ 		elog(ERROR, "need to burn, burn, burn");
+ 
+ 	members = palloc(size * sizeof(MultiXactMember));
+ 	for (i = 0; i < size; i++)
+ 	{
+ 		members[i].xid = id++;
+ 		members[i].status = MultiXactStatusForShare;
+ 
+ 		if (!TransactionIdIsNormal(members[i].xid))
+ 		{
+ 			id = FirstNormalTransactionId;
+ 			members[i].xid = id++;
+ 		}
+ 	}
+ 
+ 	MultiXactIdSetOldestMember();
+ 
+ 	for (i = 0; i < rep; i++)
+ 	{
+ 		ret = MultiXactIdCreateFromMembers(size, members, true);
+ 	}
+ 
+ 	PG_RETURN_INT64((int64) ret);
+ }
diff --git a/contrib/pageinspect/pageinspect--1.5.sql b/contrib/pageinspect/pageinspect--1.5.sql
new file mode 100644
index 1e40c3c..9f936e4
*** a/contrib/pageinspect/pageinspect--1.5.sql
--- b/contrib/pageinspect/pageinspect--1.5.sql
*************** CREATE FUNCTION gin_leafpage_items(IN pa
*** 277,279 ****
--- 277,284 ----
  RETURNS SETOF record
  AS 'MODULE_PATHNAME', 'gin_leafpage_items'
  LANGUAGE C STRICT PARALLEL SAFE;
+  
+ CREATE FUNCTION pg_burn_multixact(num int4, size int4)
+ RETURNS int4
+ AS 'MODULE_PATHNAME', 'pg_burn_multixact'
+ LANGUAGE C STRICT;
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
new file mode 100644
index b019bc1..56ddb79
*** a/src/backend/access/heap/heapam.c
--- b/src/backend/access/heap/heapam.c
*************** FreezeMultiXactId(MultiXactId multi, uin
*** 6552,6558 ****
  		 * Create a new multixact with the surviving members of the previous
  		 * one, to set as new Xmax in the tuple.
  		 */
! 		xid = MultiXactIdCreateFromMembers(nnewmembers, newmembers);
  		*flags |= FRM_RETURN_IS_MULTI;
  	}
  
--- 6552,6558 ----
  		 * Create a new multixact with the surviving members of the previous
  		 * one, to set as new Xmax in the tuple.
  		 */
! 		xid = MultiXactIdCreateFromMembers(nnewmembers, newmembers, false);
  		*flags |= FRM_RETURN_IS_MULTI;
  	}
  
diff --git a/src/backend/access/transam/multixact.c b/src/backend/access/transam/multixact.c
new file mode 100644
index e9588a7..9bfce79
*** a/src/backend/access/transam/multixact.c
--- b/src/backend/access/transam/multixact.c
*************** MultiXactIdCreate(TransactionId xid1, Mu
*** 406,412 ****
  	members[1].xid = xid2;
  	members[1].status = status2;
  
! 	newMulti = MultiXactIdCreateFromMembers(2, members);
  
  	debug_elog3(DEBUG2, "Create: %s",
  				mxid_to_string(newMulti, 2, members));
--- 406,412 ----
  	members[1].xid = xid2;
  	members[1].status = status2;
  
! 	newMulti = MultiXactIdCreateFromMembers(2, members, false);
  
  	debug_elog3(DEBUG2, "Create: %s",
  				mxid_to_string(newMulti, 2, members));
*************** MultiXactIdExpand(MultiXactId multi, Tra
*** 472,478 ****
  		 */
  		member.xid = xid;
  		member.status = status;
! 		newMulti = MultiXactIdCreateFromMembers(1, &member);
  
  		debug_elog4(DEBUG2, "Expand: %u has no members, create singleton %u",
  					multi, newMulti);
--- 472,478 ----
  		 */
  		member.xid = xid;
  		member.status = status;
! 		newMulti = MultiXactIdCreateFromMembers(1, &member, false);
  
  		debug_elog4(DEBUG2, "Expand: %u has no members, create singleton %u",
  					multi, newMulti);
*************** MultiXactIdExpand(MultiXactId multi, Tra
*** 524,530 ****
  
  	newMembers[j].xid = xid;
  	newMembers[j++].status = status;
! 	newMulti = MultiXactIdCreateFromMembers(j, newMembers);
  
  	pfree(members);
  	pfree(newMembers);
--- 524,530 ----
  
  	newMembers[j].xid = xid;
  	newMembers[j++].status = status;
! 	newMulti = MultiXactIdCreateFromMembers(j, newMembers, false);
  
  	pfree(members);
  	pfree(newMembers);
*************** ReadNextMultiXactId(void)
*** 743,749 ****
   * NB: the passed members[] array will be sorted in-place.
   */
  MultiXactId
! MultiXactIdCreateFromMembers(int nmembers, MultiXactMember *members)
  {
  	MultiXactId multi;
  	MultiXactOffset offset;
--- 743,749 ----
   * NB: the passed members[] array will be sorted in-place.
   */
  MultiXactId
! MultiXactIdCreateFromMembers(int nmembers, MultiXactMember *members, bool nocache)
  {
  	MultiXactId multi;
  	MultiXactOffset offset;
*************** MultiXactIdCreateFromMembers(int nmember
*** 762,768 ****
  	 * corner cases where someone else added us to a MultiXact without our
  	 * knowledge, but it's not worth checking for.)
  	 */
! 	multi = mXactCacheGetBySet(nmembers, members);
  	if (MultiXactIdIsValid(multi))
  	{
  		debug_elog2(DEBUG2, "Create: in cache!");
--- 762,770 ----
  	 * corner cases where someone else added us to a MultiXact without our
  	 * knowledge, but it's not worth checking for.)
  	 */
! 	multi = nocache ? InvalidMultiXactId :
! 		mXactCacheGetBySet(nmembers, members);
! 
  	if (MultiXactIdIsValid(multi))
  	{
  		debug_elog2(DEBUG2, "Create: in cache!");
diff --git a/src/include/access/multixact.h b/src/include/access/multixact.h
new file mode 100644
index ab5de62..d13cf74
*** a/src/include/access/multixact.h
--- b/src/include/access/multixact.h
*************** typedef struct xl_multixact_truncate
*** 102,111 ****
  extern MultiXactId MultiXactIdCreate(TransactionId xid1,
  				  MultiXactStatus status1, TransactionId xid2,
  				  MultiXactStatus status2);
  extern MultiXactId MultiXactIdExpand(MultiXactId multi, TransactionId xid,
  				  MultiXactStatus status);
  extern MultiXactId MultiXactIdCreateFromMembers(int nmembers,
! 							 MultiXactMember *members);
  
  extern MultiXactId ReadNextMultiXactId(void);
  extern bool MultiXactIdIsRunning(MultiXactId multi, bool isLockOnly);
--- 102,112 ----
  extern MultiXactId MultiXactIdCreate(TransactionId xid1,
  				  MultiXactStatus status1, TransactionId xid2,
  				  MultiXactStatus status2);
+ extern MultiXactId CreateMultiXactId(int nmembers, MultiXactMember *members, bool nocache);
  extern MultiXactId MultiXactIdExpand(MultiXactId multi, TransactionId xid,
  				  MultiXactStatus status);
  extern MultiXactId MultiXactIdCreateFromMembers(int nmembers,
! 							 MultiXactMember *members, bool nocache);
  
  extern MultiXactId ReadNextMultiXactId(void);
  extern bool MultiXactIdIsRunning(MultiXactId multi, bool isLockOnly);

count.plapplication/octet-stream; name=count.plDownload

crash_REL11.patchapplication/octet-stream; name=crash_REL11.patchDownload

diff --git a/src/backend/access/transam/varsup.c b/src/backend/access/transam/varsup.c
new file mode 100644
index 702c8c9..ecbe141
*** a/src/backend/access/transam/varsup.c
--- b/src/backend/access/transam/varsup.c
***************
*** 33,38 ****
--- 33,41 ----
  /* pointer to "variable cache" in shared memory (set up by shmem.c) */
  VariableCache ShmemVariableCache = NULL;
  
+ int JJ_xid=0;
+ extern int	JJ_vac;
+ 
  
  /*
   * Allocate the next XID for a new transaction or subtransaction.
*************** GetNewTransactionId(bool isSubXact)
*** 168,173 ****
--- 171,181 ----
  	 *
  	 * Extend pg_subtrans and pg_commit_ts too.
  	 */
+ 	{
+ 	int		incr;
+ 	for (incr=0; incr <=JJ_xid; incr++)
+ 	{
+ 	xid = ShmemVariableCache->nextXid;
  	ExtendCLOG(xid);
  	ExtendCommitTs(xid);
  	ExtendSUBTRANS(xid);
*************** GetNewTransactionId(bool isSubXact)
*** 179,184 ****
--- 187,194 ----
  	 * more XIDs until there is CLOG space for them.
  	 */
  	TransactionIdAdvance(ShmemVariableCache->nextXid);
+ 	}
+ 	}
  
  	/*
  	 * We must store the new XID into the shared ProcArray before releasing
*************** SetTransactionIdLimit(TransactionId olde
*** 363,370 ****
  	LWLockRelease(XidGenLock);
  
  	/* Log the info */
! 	ereport(DEBUG1,
! 			(errmsg("transaction ID wrap limit is %u, limited by database with OID %u",
  					xidWrapLimit, oldest_datoid)));
  
  	/*
--- 373,380 ----
  	LWLockRelease(XidGenLock);
  
  	/* Log the info */
! 	ereport(LOG,
! 			(errmsg("JJ transaction ID wrap limit is %u, limited by database with OID %u",
  					xidWrapLimit, oldest_datoid)));
  
  	/*
*************** ForceTransactionIdLimitUpdate(void)
*** 441,446 ****
--- 451,467 ----
  	oldestXidDB = ShmemVariableCache->oldestXidDB;
  	LWLockRelease(XidGenLock);
  
+ 	if (JJ_vac) {
+ 	elog(LOG,"JJ ForceTransactionIdLimitUpdate in %d: !normal %d, !valid %d, follows %d (%u, %u), !exists %d", MyDatabaseId,
+ 		!TransactionIdIsNormal(oldestXid),
+ 		!TransactionIdIsValid(xidVacLimit),
+ 		TransactionIdFollowsOrEquals(nextXid, xidVacLimit),
+ 		nextXid, xidVacLimit,
+  		!SearchSysCacheExists1(DATABASEOID, ObjectIdGetDatum(oldestXidDB))
+ 	);
+ 	};
+ 
+ 
  	if (!TransactionIdIsNormal(oldestXid))
  		return true;			/* shouldn't happen, but just in case */
  	if (!TransactionIdIsValid(xidVacLimit))
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
new file mode 100644
index df4843f..8f24c1a
*** a/src/backend/access/transam/xlog.c
--- b/src/backend/access/transam/xlog.c
*************** BootStrapXLOG(void)
*** 5028,5033 ****
--- 5028,5034 ----
  	ShmemVariableCache->oidCount = 0;
  	MultiXactSetNextMXact(checkPoint.nextMulti, checkPoint.nextMultiOffset);
  	AdvanceOldestClogXid(checkPoint.oldestXid);
+ 	//elog(LOG,"JJ SetTransactionIDLimit %d", checkPoint.oldestXid);
  	SetTransactionIdLimit(checkPoint.oldestXid, checkPoint.oldestXidDB);
  	SetMultiXactIdLimit(checkPoint.oldestMulti, checkPoint.oldestMultiDB, true);
  	SetCommitTsLimit(InvalidTransactionId, InvalidTransactionId);
*************** StartupXLOG(void)
*** 6635,6640 ****
--- 6636,6642 ----
  	ShmemVariableCache->oidCount = 0;
  	MultiXactSetNextMXact(checkPoint.nextMulti, checkPoint.nextMultiOffset);
  	AdvanceOldestClogXid(checkPoint.oldestXid);
+ 	//elog(LOG,"JJ SetTransactionIDLimit %d", checkPoint.oldestXid);
  	SetTransactionIdLimit(checkPoint.oldestXid, checkPoint.oldestXidDB);
  	SetMultiXactIdLimit(checkPoint.oldestMulti, checkPoint.oldestMultiDB, true);
  	SetCommitTsLimit(checkPoint.oldestCommitTsXid,
*************** xlog_redo(XLogReaderState *record)
*** 9656,9661 ****
--- 9658,9665 ----
  		MultiXactAdvanceOldest(checkPoint.oldestMulti,
  							   checkPoint.oldestMultiDB);
  
+ 		//elog(LOG,"JJ SetTransactionIDLimit %d", checkPoint.oldestXid);
+ 
  		/*
  		 * No need to set oldestClogXid here as well; it'll be set when we
  		 * redo an xl_clog_truncate if it changed since initialization.
*************** xlog_redo(XLogReaderState *record)
*** 9757,9762 ****
--- 9761,9767 ----
  		 */
  		MultiXactAdvanceOldest(checkPoint.oldestMulti,
  							   checkPoint.oldestMultiDB);
+ 		//elog(LOG,"JJ maybe SetTransactionIDLimit %d", checkPoint.oldestXid);
  		if (TransactionIdPrecedes(ShmemVariableCache->oldestXid,
  								  checkPoint.oldestXid))
  			SetTransactionIdLimit(checkPoint.oldestXid,
diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c
new file mode 100644
index faa1812..09eb770
*** a/src/backend/commands/vacuum.c
--- b/src/backend/commands/vacuum.c
*************** int			vacuum_freeze_min_age;
*** 59,64 ****
--- 59,66 ----
  int			vacuum_freeze_table_age;
  int			vacuum_multixact_freeze_min_age;
  int			vacuum_multixact_freeze_table_age;
+ int			JJ_vac=0;
+ 
  
  
  /* A few variables that don't seem worth passing around as parameters */
*************** vacuum_set_xid_limits(Relation rel,
*** 568,573 ****
--- 570,576 ----
  	}
  
  	*freezeLimit = limit;
+ 	if (JJ_vac) elog(LOG,"JJ freezeLimit %d", *freezeLimit);
  
  	/*
  	 * Compute the multixact age for which freezing is urgent.  This is
*************** vacuum_set_xid_limits(Relation rel,
*** 622,627 ****
--- 625,632 ----
  		 * VACUUM schedule, the nightly VACUUM gets a chance to freeze tuples
  		 * before anti-wraparound autovacuum is launched.
  		 */
+ 		if (JJ_vac) elog(LOG,"JJ freeze_min_age %d vacuum_freeze_table_age %d freeze_table_age %d ReadNew %d", freeze_min_age, 
+                            vacuum_freeze_table_age, freeze_table_age,ReadNewTransactionId());
  		freezetable = freeze_table_age;
  		if (freezetable < 0)
  			freezetable = vacuum_freeze_table_age;
*************** vac_update_datfrozenxid(void)
*** 1059,1064 ****
--- 1064,1070 ----
  	 * truncate pg_xact and/or pg_multixact.  Also do it if the shared
  	 * XID-wrap-limit info is stale, since this action will update that too.
  	 */
+ 	if (JJ_vac && dirty) elog(LOG,"JJ updating in %d without call to ForceTransactionIdLimitUpdate", MyDatabaseId);
  	if (dirty || ForceTransactionIdLimitUpdate())
  		vac_truncate_clog(newFrozenXid, newMinMulti,
  						  lastSaneFrozenXid, lastSaneMinMulti);
diff --git a/src/backend/commands/vacuumlazy.c b/src/backend/commands/vacuumlazy.c
new file mode 100644
index e9b4045..05b8f9c
*** a/src/backend/commands/vacuumlazy.c
--- b/src/backend/commands/vacuumlazy.c
***************
*** 63,68 ****
--- 63,69 ----
  #include "utils/tqual.h"
  
  
+ extern int JJ_vac;
  /*
   * Space/time tradeoff parameters: do these need to be user-tunable?
   *
*************** lazy_vacuum_rel(Relation onerel, int opt
*** 242,247 ****
--- 243,250 ----
  	if (options & VACOPT_DISABLE_PAGE_SKIPPING)
  		aggressive = true;
  
+ 	if (JJ_vac) elog(LOG,"JJ aggresive %d, relfrozenid %d", aggressive, onerel->rd_rel->relfrozenxid);
+ 
  	vacrelstats = (LVRelStats *) palloc0(sizeof(LVRelStats));
  
  	vacrelstats->old_rel_pages = onerel->rd_rel->relpages;
diff --git a/src/backend/postmaster/autovacuum.c b/src/backend/postmaster/autovacuum.c
new file mode 100644
index 776b1c0..39d388e
*** a/src/backend/postmaster/autovacuum.c
--- b/src/backend/postmaster/autovacuum.c
*************** int			autovacuum_vac_cost_delay;
*** 125,130 ****
--- 125,131 ----
  int			autovacuum_vac_cost_limit;
  
  int			Log_autovacuum_min_duration = -1;
+ extern int  JJ_vac;
  
  /* how long to keep pgstat data in the launcher, in milliseconds */
  #define STATS_READ_DELAY 1000
*************** AutoVacWorkerMain(int argc, char *argv[]
*** 1682,1689 ****
  		InitPostgres(NULL, dbid, NULL, InvalidOid, dbname);
  		SetProcessingMode(NormalProcessing);
  		set_ps_display(dbname, false);
- 		ereport(DEBUG1,
- 				(errmsg("autovacuum: processing database \"%s\"", dbname)));
  
  		if (PostAuthDelay)
  			pg_usleep(PostAuthDelay * 1000000L);
--- 1683,1688 ----
*************** AutoVacWorkerMain(int argc, char *argv[]
*** 1691,1697 ****
--- 1690,1700 ----
  		/* And do an appropriate amount of work */
  		recentXid = ReadNewTransactionId();
  		recentMulti = ReadNextMultiXactId();
+ 		if (JJ_vac) ereport(LOG,
+ 				(errmsg("autovacuum: processing database \"%s\" at recent Xid of %u recent mxid of %u", dbname,recentXid,recentMulti)));
  		do_autovacuum();
+ 		if (JJ_vac) ereport(LOG,
+ 				(errmsg("autovacuum: done processing database \"%s\" at recent Xid of %u recent mxid of %u", dbname,ReadNewTransactionId(),ReadNextMultiXactId())));
  	}
  
  	/*
*************** relation_needs_vacanalyze(Oid relid,
*** 3038,3050 ****
  		 * reset, because if that happens, the last vacuum and analyze counts
  		 * will be reset too.
  		 */
- 		elog(DEBUG3, "%s: vac: %.0f (threshold %.0f), anl: %.0f (threshold %.0f)",
- 			 NameStr(classForm->relname),
- 			 vactuples, vacthresh, anltuples, anlthresh);
  
  		/* Determine if this table needs vacuum or analyze. */
  		*dovacuum = force_vacuum || (vactuples > vacthresh);
  		*doanalyze = (anltuples > anlthresh);
  	}
  	else
  	{
--- 3041,3054 ----
  		 * reset, because if that happens, the last vacuum and analyze counts
  		 * will be reset too.
  		 */
  
  		/* Determine if this table needs vacuum or analyze. */
  		*dovacuum = force_vacuum || (vactuples > vacthresh);
  		*doanalyze = (anltuples > anlthresh);
+ 
+ 		if (JJ_vac) elog(LOG, "%s: vac: %.0f (threshold %.0f), anl: %.0f (threshold %.0f) wraparound %d dovaccum %d doanalyze %d",
+ 			 NameStr(classForm->relname),
+ 			 vactuples, vacthresh, anltuples, anlthresh, *wraparound, *dovacuum, *doanalyze);
  	}
  	else
  	{
diff --git a/src/backend/storage/smgr/md.c b/src/backend/storage/smgr/md.c
new file mode 100644
index 65e0abe..278d256
*** a/src/backend/storage/smgr/md.c
--- b/src/backend/storage/smgr/md.c
***************
*** 68,73 ****
--- 68,75 ----
  #define FILE_POSSIBLY_DELETED(err)	((err) == ENOENT || (err) == EACCES)
  #endif
  
+ int JJ_torn_page=0;
+ 
  /*
   *	The magnetic disk storage manager keeps track of open file
   *	descriptors in its own descriptor pool.  This is done to make it
*************** mdwrite(SMgrRelation reln, ForkNumber fo
*** 805,810 ****
--- 807,813 ----
  	off_t		seekpos;
  	int			nbytes;
  	MdfdVec    *v;
+         static int counter=0;
  
  	/* This assert is too expensive to have on normally ... */
  #ifdef CHECK_WRITE_VS_EXTEND
*************** mdwrite(SMgrRelation reln, ForkNumber fo
*** 830,836 ****
--- 833,850 ----
  				 errmsg("could not seek to block %u in file \"%s\": %m",
  						blocknum, FilePathName(v->mdfd_vfd))));
  
+         if (JJ_torn_page > 0 && counter++ > JJ_torn_page && !RecoveryInProgress()) {
+ 		nbytes = FileWrite(v->mdfd_vfd, buffer, BLCKSZ/3, WAIT_EVENT_DATA_FILE_WRITE);
+ 		ereport(FATAL,
+ 				(errcode(ERRCODE_DISK_FULL),
+ 				 errmsg("could not write block %u of relation %s: wrote only %d of %d bytes",
+ 						blocknum,
+ 						relpath(reln->smgr_rnode, forknum),
+ 						nbytes, BLCKSZ),
+ 				 errhint("JJ is screwing with the database.")));
+         } else {
  	nbytes = FileWrite(v->mdfd_vfd, buffer, BLCKSZ, WAIT_EVENT_DATA_FILE_WRITE);
+ 	}
  
  	TRACE_POSTGRESQL_SMGR_MD_WRITE_DONE(forknum, blocknum,
  										reln->smgr_rnode.node.spcNode,
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
new file mode 100644
index 246fea8..93f04c2
*** a/src/backend/utils/misc/guc.c
--- b/src/backend/utils/misc/guc.c
***************
*** 112,117 ****
--- 112,120 ----
  /* XXX these should appear in other modules' header files */
  extern bool Log_disconnections;
  extern int	CommitDelay;
+ int	JJ_torn_page;
+ extern int	JJ_xid;
+ extern int	JJ_vac;
  extern int	CommitSiblings;
  extern char *default_tablespace;
  extern char *temp_tablespaces;
*************** static struct config_int ConfigureNamesI
*** 2389,2394 ****
--- 2392,2424 ----
  	},
  
  	{
+ 		{"JJ_torn_page", PGC_USERSET, WAL_SETTINGS,
+ 			gettext_noop("Simulate a torn-page crash after this number of page writes (0 to turn off)"),
+ 			NULL
+ 		},
+ 		&JJ_torn_page,
+ 		0, 0, 100000, NULL, NULL
+ 	},
+ 
+ 	{
+ 		{"JJ_xid", PGC_USERSET, WAL_SETTINGS,
+ 			gettext_noop("Skip this many xid every time we acquire one"),
+ 			NULL
+ 		},
+ 		&JJ_xid,
+ 		0, 0, 1000000, NULL, NULL
+ 	},
+ 
+ 	{
+ 		{"JJ_vac", PGC_USERSET, WAL_SETTINGS,
+ 			gettext_noop("turn on verbose logging"),
+ 			NULL
+ 		},
+ 		&JJ_vac,
+ 		0, 0, 1000000, NULL, NULL
+ 	},
+ 
+ 	{
  		{"commit_siblings", PGC_USERSET, WAL_SETTINGS,
  			gettext_noop("Sets the minimum concurrent open transactions before performing "
  						 "commit_delay."),
diff --git a/src/include/pg_config_manual.h b/src/include/pg_config_manual.h
new file mode 100644
index f3b3529..ef4c90d
*** a/src/include/pg_config_manual.h
--- b/src/include/pg_config_manual.h
***************
*** 300,306 ****
  /*
   * Enable debugging print statements for lock-related operations.
   */
! /* #define LOCK_DEBUG */
  
  /*
   * Enable debugging print statements for WAL-related operations; see
--- 300,306 ----
  /*
   * Enable debugging print statements for lock-related operations.
   */
! #define LOCK_DEBUG 1
  
  /*
   * Enable debugging print statements for WAL-related operations; see

do.shapplication/x-sh; name=do.shDownload

Alvaro Herrera

alvherre@alvh.no-ip.org

over 8 years ago

In reply to: Jeff Janes (#1)

Re: 10RC1 crash testing MultiXact oddity

Jeff Janes wrote:

I am running some crash recovery testing against 10rc1 by injecting torn
page writes, using a test case which generates a lot of multixact, some
naturally by doing a lot fk updates, but most artificially by calling
the pg_burn_multixact function from one of the attached patches.

Is this new in pg10, or do you also see it in 9.6?

--
ï¿½lvaro Herrera https://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Robert Haas

robertmhaas@gmail.com

over 8 years ago

In reply to: Jeff Janes (#1)

Re: 10RC1 crash testing MultiXact oddity

On Fri, Sep 22, 2017 at 11:37 AM, Jeff Janes <jeff.janes@gmail.com> wrote:

Is the presence of this log message something that needs to be investigated
further?

I'd say yes. It sounds like we have a race condition someplace that
previous fixes in this area failed to adequately understand.

--
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Jeff Janes

jeff.janes@gmail.com

over 8 years ago

In reply to: Alvaro Herrera (#2)

Re: 10RC1 crash testing MultiXact oddity

On Fri, Sep 22, 2017 at 8:47 AM, Alvaro Herrera <alvherre@alvh.no-ip.org>
wrote:

Jeff Janes wrote:

I am running some crash recovery testing against 10rc1 by injecting torn
page writes, using a test case which generates a lot of multixact, some
naturally by doing a lot fk updates, but most artificially by calling
the pg_burn_multixact function from one of the attached patches.

Is this new in pg10, or do you also see it in 9.6?

It turns out it is not new in pg10. I spotted in the log file only by
accident while looking for something else. Now that I am looking for it, I
do see it in 9.6 as well.

Cheers,

Jeff

Robert Haas

robertmhaas@gmail.com

over 8 years ago

In reply to: Jeff Janes (#4)

Re: 10RC1 crash testing MultiXact oddity

On Fri, Sep 22, 2017 at 3:39 PM, Jeff Janes <jeff.janes@gmail.com> wrote:

It turns out it is not new in pg10. I spotted in the log file only by
accident while looking for something else. Now that I am looking for it, I
do see it in 9.6 as well.

So I guess the next question is whether it also shows up if you initdb
with 9.4.latest and then run the same test.

--
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Jeff Janes

jeff.janes@gmail.com

over 8 years ago

In reply to: Robert Haas (#5)

1 attachment(s)

Re: 10RC1 crash testing MultiXact oddity

On Fri, Sep 22, 2017 at 1:19 PM, Robert Haas <robertmhaas@gmail.com> wrote:

On Fri, Sep 22, 2017 at 3:39 PM, Jeff Janes <jeff.janes@gmail.com> wrote:

It turns out it is not new in pg10. I spotted in the log file only by
accident while looking for something else. Now that I am looking for

it, I

do see it in 9.6 as well.

So I guess the next question is whether it also shows up if you initdb
with 9.4.latest and then run the same test.

git bisect shows that it shows up in 9.5, at this commit:

commit bd7c348d83a4576163b635010e49dbcac7126f01
Author: Andres Freund <andres@anarazel.de>
Date: Sat Sep 26 19:04:25 2015 +0200

Rework the way multixact truncations work.

The patches which enable the crashes and the rapid consumption of xid and
multixact both need a little adjustment from the 10rc1 versions, so I'm
attaching a combined patch that applies to bd7c348d83.

Not really sure what the next step is here. I could promote the
ereport(LOG...) to a PANIC to get a core dump, but I don't think that would
help because presumably the problem occurred early, when the truncation was
done, not when it was detected.

Cheers,

Jeff

Attachments:

crash_instrument.patchapplication/octet-stream; name=crash_instrument.patchDownload

diff --git a/contrib/pageinspect/heapfuncs.c b/contrib/pageinspect/heapfuncs.c
new file mode 100644
index 8d1666c..18752cb
*** a/contrib/pageinspect/heapfuncs.c
--- b/contrib/pageinspect/heapfuncs.c
***************
*** 29,34 ****
--- 29,36 ----
  #include "funcapi.h"
  #include "utils/builtins.h"
  #include "miscadmin.h"
+ #include "access/multixact.h"
+ #include "access/transam.h"
  
  
  /*
*************** heap_page_items(PG_FUNCTION_ARGS)
*** 223,225 ****
--- 225,267 ----
  	else
  		SRF_RETURN_DONE(fctx);
  }
+ 
+ extern Datum
+ pg_burn_multixact(PG_FUNCTION_ARGS);
+ PG_FUNCTION_INFO_V1(pg_burn_multixact);
+ 
+ Datum
+ pg_burn_multixact(PG_FUNCTION_ARGS)
+ {
+ 	int		rep = PG_GETARG_INT32(0);
+ 	int		size = PG_GETARG_INT32(1);
+ 	MultiXactMember *members;
+ 	MultiXactId ret;
+ 	TransactionId id = ReadNewTransactionId() - size;
+ 	int		i;
+ 
+ 	if (rep < 1)
+ 		elog(ERROR, "need to burn, burn, burn");
+ 
+ 	members = palloc(size * sizeof(MultiXactMember));
+ 	for (i = 0; i < size; i++)
+ 	{
+ 		members[i].xid = id++;
+ 		members[i].status = MultiXactStatusForShare;
+ 
+ 		if (!TransactionIdIsNormal(members[i].xid))
+ 		{
+ 			id = FirstNormalTransactionId;
+ 			members[i].xid = id++;
+ 		}
+ 	}
+ 
+ 	MultiXactIdSetOldestMember();
+ 
+ 	for (i = 0; i < rep; i++)
+ 	{
+ 		ret = MultiXactIdCreateFromMembers(size, members, true);
+ 	}
+ 
+ 	PG_RETURN_INT64((int64) ret);
+ }
diff --git a/contrib/pageinspect/pageinspect--1.3.sql b/contrib/pageinspect/pageinspect--1.3.sql
new file mode 100644
index a99e058..567d38b
*** a/contrib/pageinspect/pageinspect--1.3.sql
--- b/contrib/pageinspect/pageinspect--1.3.sql
*************** CREATE FUNCTION gin_leafpage_items(IN pa
*** 187,189 ****
--- 187,194 ----
  RETURNS SETOF record
  AS 'MODULE_PATHNAME', 'gin_leafpage_items'
  LANGUAGE C STRICT;
+ 
+ CREATE FUNCTION pg_burn_multixact(num int4, size int4)
+ RETURNS int4
+ AS 'MODULE_PATHNAME', 'pg_burn_multixact'
+ LANGUAGE C STRICT;
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
new file mode 100644
index bcf9871..e167684
*** a/src/backend/access/heap/heapam.c
--- b/src/backend/access/heap/heapam.c
*************** FreezeMultiXactId(MultiXactId multi, uin
*** 6099,6105 ****
  		 * Create a new multixact with the surviving members of the previous
  		 * one, to set as new Xmax in the tuple.
  		 */
! 		xid = MultiXactIdCreateFromMembers(nnewmembers, newmembers);
  		*flags |= FRM_RETURN_IS_MULTI;
  	}
  
--- 6099,6105 ----
  		 * Create a new multixact with the surviving members of the previous
  		 * one, to set as new Xmax in the tuple.
  		 */
! 		xid = MultiXactIdCreateFromMembers(nnewmembers, newmembers, false);
  		*flags |= FRM_RETURN_IS_MULTI;
  	}
  
diff --git a/src/backend/access/transam/multixact.c b/src/backend/access/transam/multixact.c
new file mode 100644
index 5e3357f..bfb4ab6
*** a/src/backend/access/transam/multixact.c
--- b/src/backend/access/transam/multixact.c
*************** MultiXactIdCreate(TransactionId xid1, Mu
*** 410,416 ****
  	members[1].xid = xid2;
  	members[1].status = status2;
  
! 	newMulti = MultiXactIdCreateFromMembers(2, members);
  
  	debug_elog3(DEBUG2, "Create: %s",
  				mxid_to_string(newMulti, 2, members));
--- 410,416 ----
  	members[1].xid = xid2;
  	members[1].status = status2;
  
! 	newMulti = MultiXactIdCreateFromMembers(2, members, false);
  
  	debug_elog3(DEBUG2, "Create: %s",
  				mxid_to_string(newMulti, 2, members));
*************** MultiXactIdExpand(MultiXactId multi, Tra
*** 476,482 ****
  		 */
  		member.xid = xid;
  		member.status = status;
! 		newMulti = MultiXactIdCreateFromMembers(1, &member);
  
  		debug_elog4(DEBUG2, "Expand: %u has no members, create singleton %u",
  					multi, newMulti);
--- 476,482 ----
  		 */
  		member.xid = xid;
  		member.status = status;
! 		newMulti = MultiXactIdCreateFromMembers(1, &member, false);
  
  		debug_elog4(DEBUG2, "Expand: %u has no members, create singleton %u",
  					multi, newMulti);
*************** MultiXactIdExpand(MultiXactId multi, Tra
*** 528,534 ****
  
  	newMembers[j].xid = xid;
  	newMembers[j++].status = status;
! 	newMulti = MultiXactIdCreateFromMembers(j, newMembers);
  
  	pfree(members);
  	pfree(newMembers);
--- 528,534 ----
  
  	newMembers[j].xid = xid;
  	newMembers[j++].status = status;
! 	newMulti = MultiXactIdCreateFromMembers(j, newMembers, false);
  
  	pfree(members);
  	pfree(newMembers);
*************** ReadNextMultiXactId(void)
*** 747,753 ****
   * NB: the passed members[] array will be sorted in-place.
   */
  MultiXactId
! MultiXactIdCreateFromMembers(int nmembers, MultiXactMember *members)
  {
  	MultiXactId multi;
  	MultiXactOffset offset;
--- 747,753 ----
   * NB: the passed members[] array will be sorted in-place.
   */
  MultiXactId
! MultiXactIdCreateFromMembers(int nmembers, MultiXactMember *members, bool nocache)
  {
  	MultiXactId multi;
  	MultiXactOffset offset;
*************** MultiXactIdCreateFromMembers(int nmember
*** 766,772 ****
  	 * corner cases where someone else added us to a MultiXact without our
  	 * knowledge, but it's not worth checking for.)
  	 */
! 	multi = mXactCacheGetBySet(nmembers, members);
  	if (MultiXactIdIsValid(multi))
  	{
  		debug_elog2(DEBUG2, "Create: in cache!");
--- 766,774 ----
  	 * corner cases where someone else added us to a MultiXact without our
  	 * knowledge, but it's not worth checking for.)
  	 */
! 	multi = nocache ? InvalidMultiXactId :
! 		mXactCacheGetBySet(nmembers, members);
! 
  	if (MultiXactIdIsValid(multi))
  	{
  		debug_elog2(DEBUG2, "Create: in cache!");
diff --git a/src/backend/access/transam/varsup.c b/src/backend/access/transam/varsup.c
new file mode 100644
index cf3e964..82248f3
*** a/src/backend/access/transam/varsup.c
--- b/src/backend/access/transam/varsup.c
***************
*** 33,38 ****
--- 33,41 ----
  /* pointer to "variable cache" in shared memory (set up by shmem.c) */
  VariableCache ShmemVariableCache = NULL;
  
+ int JJ_xid=0;
+ extern int	JJ_vac;
+ 
  
  /*
   * Allocate the next XID for a new transaction or subtransaction.
*************** GetNewTransactionId(bool isSubXact)
*** 168,173 ****
--- 171,181 ----
  	 *
  	 * Extend pg_subtrans and pg_commit_ts too.
  	 */
+ 	{
+ 	int		incr;
+ 	for (incr=0; incr <=JJ_xid; incr++)
+ 	{
+ 	xid = ShmemVariableCache->nextXid;
  	ExtendCLOG(xid);
  	ExtendCommitTs(xid);
  	ExtendSUBTRANS(xid);
*************** GetNewTransactionId(bool isSubXact)
*** 179,184 ****
--- 187,194 ----
  	 * more XIDs until there is CLOG space for them.
  	 */
  	TransactionIdAdvance(ShmemVariableCache->nextXid);
+ 	}
+ 	}
  
  	/*
  	 * We must store the new XID into the shared ProcArray before releasing
*************** SetTransactionIdLimit(TransactionId olde
*** 342,349 ****
  	LWLockRelease(XidGenLock);
  
  	/* Log the info */
! 	ereport(DEBUG1,
! 			(errmsg("transaction ID wrap limit is %u, limited by database with OID %u",
  					xidWrapLimit, oldest_datoid)));
  
  	/*
--- 352,359 ----
  	LWLockRelease(XidGenLock);
  
  	/* Log the info */
! 	ereport(LOG,
! 			(errmsg("JJ transaction ID wrap limit is %u, limited by database with OID %u",
  					xidWrapLimit, oldest_datoid)));
  
  	/*
*************** ForceTransactionIdLimitUpdate(void)
*** 420,425 ****
--- 430,446 ----
  	oldestXidDB = ShmemVariableCache->oldestXidDB;
  	LWLockRelease(XidGenLock);
  
+ 	if (JJ_vac) {
+ 	elog(LOG,"JJ ForceTransactionIdLimitUpdate in %d: !normal %d, !valid %d, follows %d (%u, %u), !exists %d", MyDatabaseId,
+ 		!TransactionIdIsNormal(oldestXid),
+ 		!TransactionIdIsValid(xidVacLimit),
+ 		TransactionIdFollowsOrEquals(nextXid, xidVacLimit),
+ 		nextXid, xidVacLimit,
+  		!SearchSysCacheExists1(DATABASEOID, ObjectIdGetDatum(oldestXidDB))
+ 	);
+ 	};
+ 
+ 
  	if (!TransactionIdIsNormal(oldestXid))
  		return true;			/* shouldn't happen, but just in case */
  	if (!TransactionIdIsValid(xidVacLimit))
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
new file mode 100644
index 1ac1c05..49f0585
*** a/src/backend/access/transam/xlog.c
--- b/src/backend/access/transam/xlog.c
*************** BootStrapXLOG(void)
*** 4795,4800 ****
--- 4795,4801 ----
  	ShmemVariableCache->nextOid = checkPoint.nextOid;
  	ShmemVariableCache->oidCount = 0;
  	MultiXactSetNextMXact(checkPoint.nextMulti, checkPoint.nextMultiOffset);
+ 	//elog(LOG,"JJ SetTransactionIDLimit %d", checkPoint.oldestXid);
  	SetTransactionIdLimit(checkPoint.oldestXid, checkPoint.oldestXidDB);
  	SetMultiXactIdLimit(checkPoint.oldestMulti, checkPoint.oldestMultiDB);
  	SetCommitTsLimit(InvalidTransactionId, InvalidTransactionId);
*************** StartupXLOG(void)
*** 6326,6331 ****
--- 6327,6333 ----
  	ShmemVariableCache->nextOid = checkPoint.nextOid;
  	ShmemVariableCache->oidCount = 0;
  	MultiXactSetNextMXact(checkPoint.nextMulti, checkPoint.nextMultiOffset);
+ 	//elog(LOG,"JJ SetTransactionIDLimit %d", checkPoint.oldestXid);
  	SetTransactionIdLimit(checkPoint.oldestXid, checkPoint.oldestXidDB);
  	SetMultiXactIdLimit(checkPoint.oldestMulti, checkPoint.oldestMultiDB);
  	SetCommitTsLimit(checkPoint.oldestCommitTs,
*************** xlog_redo(XLogReaderState *record)
*** 9239,9244 ****
--- 9241,9247 ----
  		 */
  		MultiXactAdvanceOldest(checkPoint.oldestMulti,
  							   checkPoint.oldestMultiDB);
+ 		//elog(LOG,"JJ SetTransactionIDLimit %d", checkPoint.oldestXid);
  		SetTransactionIdLimit(checkPoint.oldestXid, checkPoint.oldestXidDB);
  
  		/*
*************** xlog_redo(XLogReaderState *record)
*** 9336,9341 ****
--- 9339,9345 ----
  		 */
  		MultiXactAdvanceOldest(checkPoint.oldestMulti,
  							   checkPoint.oldestMultiDB);
+ 		//elog(LOG,"JJ maybe SetTransactionIDLimit %d", checkPoint.oldestXid);
  		if (TransactionIdPrecedes(ShmemVariableCache->oldestXid,
  								  checkPoint.oldestXid))
  			SetTransactionIdLimit(checkPoint.oldestXid,
diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c
new file mode 100644
index 698bb35..fbcdb2b
*** a/src/backend/commands/vacuum.c
--- b/src/backend/commands/vacuum.c
*************** int			vacuum_freeze_min_age;
*** 58,63 ****
--- 58,65 ----
  int			vacuum_freeze_table_age;
  int			vacuum_multixact_freeze_min_age;
  int			vacuum_multixact_freeze_table_age;
+ int			JJ_vac=0;
+ 
  
  
  /* A few variables that don't seem worth passing around as parameters */
*************** vacuum_set_xid_limits(Relation rel,
*** 530,535 ****
--- 532,538 ----
  	}
  
  	*freezeLimit = limit;
+ 	if (JJ_vac) elog(LOG,"JJ freezeLimit %d", *freezeLimit);
  
  	/*
  	 * Compute the multixact age for which freezing is urgent.  This is
*************** vacuum_set_xid_limits(Relation rel,
*** 584,589 ****
--- 587,594 ----
  		 * VACUUM schedule, the nightly VACUUM gets a chance to freeze tuples
  		 * before anti-wraparound autovacuum is launched.
  		 */
+ 		if (JJ_vac) elog(LOG,"JJ freeze_min_age %d vacuum_freeze_table_age %d freeze_table_age %d ReadNew %d", freeze_min_age, 
+                            vacuum_freeze_table_age, freeze_table_age,ReadNewTransactionId());
  		freezetable = freeze_table_age;
  		if (freezetable < 0)
  			freezetable = vacuum_freeze_table_age;
*************** vac_update_datfrozenxid(void)
*** 1021,1026 ****
--- 1026,1032 ----
  	 * truncate pg_clog and/or pg_multixact.  Also do it if the shared
  	 * XID-wrap-limit info is stale, since this action will update that too.
  	 */
+ 	if (JJ_vac && dirty) elog(LOG,"JJ updating in %d without call to ForceTransactionIdLimitUpdate", MyDatabaseId);
  	if (dirty || ForceTransactionIdLimitUpdate())
  		vac_truncate_clog(newFrozenXid, newMinMulti,
  						  lastSaneFrozenXid, lastSaneMinMulti);
diff --git a/src/backend/commands/vacuumlazy.c b/src/backend/commands/vacuumlazy.c
new file mode 100644
index a01cfb4..7d462db
*** a/src/backend/commands/vacuumlazy.c
--- b/src/backend/commands/vacuumlazy.c
***************
*** 63,68 ****
--- 63,69 ----
  #include "utils/tqual.h"
  
  
+ extern int JJ_vac;
  /*
   * Space/time tradeoff parameters: do these need to be user-tunable?
   *
*************** lazy_vacuum_rel(Relation onerel, int opt
*** 228,233 ****
--- 229,236 ----
  	scan_all |= MultiXactIdPrecedesOrEquals(onerel->rd_rel->relminmxid,
  											mxactFullScanLimit);
  
+ 	if (JJ_vac) elog(LOG,"JJ scan_all %d, relfrozenid %d", scan_all, onerel->rd_rel->relfrozenxid);
+ 
  	vacrelstats = (LVRelStats *) palloc0(sizeof(LVRelStats));
  
  	vacrelstats->old_rel_pages = onerel->rd_rel->relpages;
diff --git a/src/backend/postmaster/autovacuum.c b/src/backend/postmaster/autovacuum.c
new file mode 100644
index 94e748e..f38a016
*** a/src/backend/postmaster/autovacuum.c
--- b/src/backend/postmaster/autovacuum.c
*************** int			autovacuum_vac_cost_delay;
*** 122,127 ****
--- 122,128 ----
  int			autovacuum_vac_cost_limit;
  
  int			Log_autovacuum_min_duration = -1;
+ extern int  JJ_vac;
  
  /* how long to keep pgstat data in the launcher, in milliseconds */
  #define STATS_READ_DELAY 1000
*************** AutoVacWorkerMain(int argc, char *argv[]
*** 1635,1642 ****
  		InitPostgres(NULL, dbid, NULL, InvalidOid, dbname);
  		SetProcessingMode(NormalProcessing);
  		set_ps_display(dbname, false);
- 		ereport(DEBUG1,
- 				(errmsg("autovacuum: processing database \"%s\"", dbname)));
  
  		if (PostAuthDelay)
  			pg_usleep(PostAuthDelay * 1000000L);
--- 1636,1641 ----
*************** AutoVacWorkerMain(int argc, char *argv[]
*** 1644,1650 ****
--- 1643,1653 ----
  		/* And do an appropriate amount of work */
  		recentXid = ReadNewTransactionId();
  		recentMulti = ReadNextMultiXactId();
+ 		if (JJ_vac) ereport(LOG,
+ 				(errmsg("autovacuum: processing database \"%s\" at recent Xid of %u recent mxid of %u", dbname,recentXid,recentMulti)));
  		do_autovacuum();
+ 		if (JJ_vac) ereport(LOG,
+ 				(errmsg("autovacuum: done processing database \"%s\" at recent Xid of %u recent mxid of %u", dbname,ReadNewTransactionId(),ReadNextMultiXactId())));
  	}
  
  	/*
*************** relation_needs_vacanalyze(Oid relid,
*** 2761,2773 ****
  		 * reset, because if that happens, the last vacuum and analyze counts
  		 * will be reset too.
  		 */
- 		elog(DEBUG3, "%s: vac: %.0f (threshold %.0f), anl: %.0f (threshold %.0f)",
- 			 NameStr(classForm->relname),
- 			 vactuples, vacthresh, anltuples, anlthresh);
  
  		/* Determine if this table needs vacuum or analyze. */
  		*dovacuum = force_vacuum || (vactuples > vacthresh);
  		*doanalyze = (anltuples > anlthresh);
  	}
  	else
  	{
--- 2764,2777 ----
  		 * reset, because if that happens, the last vacuum and analyze counts
  		 * will be reset too.
  		 */
  
  		/* Determine if this table needs vacuum or analyze. */
  		*dovacuum = force_vacuum || (vactuples > vacthresh);
  		*doanalyze = (anltuples > anlthresh);
+ 
+ 		if (JJ_vac) elog(LOG, "%s: vac: %.0f (threshold %.0f), anl: %.0f (threshold %.0f) wraparound %d dovaccum %d doanalyze %d",
+ 			 NameStr(classForm->relname),
+ 			 vactuples, vacthresh, anltuples, anlthresh, *wraparound, *dovacuum, *doanalyze);
  	}
  	else
  	{
diff --git a/src/backend/storage/smgr/md.c b/src/backend/storage/smgr/md.c
new file mode 100644
index 42a43bb..76b7466
*** a/src/backend/storage/smgr/md.c
--- b/src/backend/storage/smgr/md.c
***************
*** 67,72 ****
--- 67,74 ----
  #define FILE_POSSIBLY_DELETED(err)	((err) == ENOENT || (err) == EACCES)
  #endif
  
+ int JJ_torn_page=0;
+ 
  /*
   *	The magnetic disk storage manager keeps track of open file
   *	descriptors in its own descriptor pool.  This is done to make it
*************** mdwrite(SMgrRelation reln, ForkNumber fo
*** 743,748 ****
--- 745,751 ----
  	off_t		seekpos;
  	int			nbytes;
  	MdfdVec    *v;
+         static int counter=0;
  
  	/* This assert is too expensive to have on normally ... */
  #ifdef CHECK_WRITE_VS_EXTEND
*************** mdwrite(SMgrRelation reln, ForkNumber fo
*** 767,773 ****
  				 errmsg("could not seek to block %u in file \"%s\": %m",
  						blocknum, FilePathName(v->mdfd_vfd))));
  
! 	nbytes = FileWrite(v->mdfd_vfd, buffer, BLCKSZ);
  
  	TRACE_POSTGRESQL_SMGR_MD_WRITE_DONE(forknum, blocknum,
  										reln->smgr_rnode.node.spcNode,
--- 770,787 ----
  				 errmsg("could not seek to block %u in file \"%s\": %m",
  						blocknum, FilePathName(v->mdfd_vfd))));
  
!         if (JJ_torn_page > 0 && counter++ > JJ_torn_page && !RecoveryInProgress()) {
! 	  nbytes = FileWrite(v->mdfd_vfd, buffer, BLCKSZ/3);
! 		ereport(FATAL,
! 				(errcode(ERRCODE_DISK_FULL),
! 				 errmsg("could not write block %u of relation %s: wrote only %d of %d bytes",
! 						blocknum,
! 						relpath(reln->smgr_rnode, forknum),
! 						nbytes, BLCKSZ),
! 				 errhint("JJ is screwing with the database.")));
!         } else {
! 	  nbytes = FileWrite(v->mdfd_vfd, buffer, BLCKSZ);
! 	}
  
  	TRACE_POSTGRESQL_SMGR_MD_WRITE_DONE(forknum, blocknum,
  										reln->smgr_rnode.node.spcNode,
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
new file mode 100644
index 3d0eb2d..32f150c
*** a/src/backend/utils/misc/guc.c
--- b/src/backend/utils/misc/guc.c
***************
*** 106,111 ****
--- 106,114 ----
  /* XXX these should appear in other modules' header files */
  extern bool Log_disconnections;
  extern int	CommitDelay;
+ int	JJ_torn_page;
+ extern int	JJ_xid;
+ extern int	JJ_vac;
  extern int	CommitSiblings;
  extern char *default_tablespace;
  extern char *temp_tablespaces;
*************** static struct config_int ConfigureNamesI
*** 2267,2272 ****
--- 2270,2302 ----
  	},
  
  	{
+ 		{"JJ_torn_page", PGC_USERSET, WAL_SETTINGS,
+ 			gettext_noop("Simulate a torn-page crash after this number of page writes (0 to turn off)"),
+ 			NULL
+ 		},
+ 		&JJ_torn_page,
+ 		0, 0, 100000, NULL, NULL
+ 	},
+ 
+ 	{
+ 		{"JJ_xid", PGC_USERSET, WAL_SETTINGS,
+ 			gettext_noop("Skip this many xid every time we acquire one"),
+ 			NULL
+ 		},
+ 		&JJ_xid,
+ 		0, 0, 1000000, NULL, NULL
+ 	},
+ 
+ 	{
+ 		{"JJ_vac", PGC_USERSET, WAL_SETTINGS,
+ 			gettext_noop("turn on verbose logging"),
+ 			NULL
+ 		},
+ 		&JJ_vac,
+ 		0, 0, 1000000, NULL, NULL
+ 	},
+ 
+ 	{
  		{"commit_siblings", PGC_USERSET, WAL_SETTINGS,
  			gettext_noop("Sets the minimum concurrent open transactions before performing "
  						 "commit_delay."),
diff --git a/src/include/access/multixact.h b/src/include/access/multixact.h
new file mode 100644
index 9f9df9c..47ef38d
*** a/src/include/access/multixact.h
--- b/src/include/access/multixact.h
*************** typedef struct xl_multixact_truncate
*** 102,111 ****
  extern MultiXactId MultiXactIdCreate(TransactionId xid1,
  				  MultiXactStatus status1, TransactionId xid2,
  				  MultiXactStatus status2);
  extern MultiXactId MultiXactIdExpand(MultiXactId multi, TransactionId xid,
  				  MultiXactStatus status);
  extern MultiXactId MultiXactIdCreateFromMembers(int nmembers,
! 							 MultiXactMember *members);
  
  extern MultiXactId ReadNextMultiXactId(void);
  extern bool MultiXactIdIsRunning(MultiXactId multi, bool isLockOnly);
--- 102,112 ----
  extern MultiXactId MultiXactIdCreate(TransactionId xid1,
  				  MultiXactStatus status1, TransactionId xid2,
  				  MultiXactStatus status2);
+ extern MultiXactId CreateMultiXactId(int nmembers, MultiXactMember *members, bool nocache);
  extern MultiXactId MultiXactIdExpand(MultiXactId multi, TransactionId xid,
  				  MultiXactStatus status);
  extern MultiXactId MultiXactIdCreateFromMembers(int nmembers,
! 							 MultiXactMember *members, bool nocache);
  
  extern MultiXactId ReadNextMultiXactId(void);
  extern bool MultiXactIdIsRunning(MultiXactId multi, bool isLockOnly);
diff --git a/src/include/pg_config_manual.h b/src/include/pg_config_manual.h
new file mode 100644
index e278fa0..0c9b545
*** a/src/include/pg_config_manual.h
--- b/src/include/pg_config_manual.h
***************
*** 281,287 ****
  /*
   * Enable debugging print statements for lock-related operations.
   */
! /* #define LOCK_DEBUG */
  
  /*
   * Enable debugging print statements for WAL-related operations; see
--- 281,287 ----
  /*
   * Enable debugging print statements for lock-related operations.
   */
! #define LOCK_DEBUG 1
  
  /*
   * Enable debugging print statements for WAL-related operations; see

Alvaro Herrera

alvherre@alvh.no-ip.org

over 8 years ago

In reply to: Jeff Janes (#6)

Re: 10RC1 crash testing MultiXact oddity

Jeff Janes wrote:

On Fri, Sep 22, 2017 at 1:19 PM, Robert Haas <robertmhaas@gmail.com> wrote:

On Fri, Sep 22, 2017 at 3:39 PM, Jeff Janes <jeff.janes@gmail.com> wrote:

It turns out it is not new in pg10. I spotted in the log file only by
accident while looking for something else. Now that I am looking for

it, I

do see it in 9.6 as well.

So I guess the next question is whether it also shows up if you initdb
with 9.4.latest and then run the same test.

git bisect shows that it shows up in 9.5, at this commit:

commit bd7c348d83a4576163b635010e49dbcac7126f01
Author: Andres Freund <andres@anarazel.de>
Date: Sat Sep 26 19:04:25 2015 +0200

Rework the way multixact truncations work.

Oh man. And I thought we were done with that stuff :-(

Not really sure what the next step is here. I could promote the
ereport(LOG...) to a PANIC to get a core dump, but I don't think that would
help because presumably the problem occurred early, when the truncation was
done, not when it was detected.

Probably the best way to track it down is to add some instrumentation
elog(LOG) to the multixact truncation mechanism.

--
ï¿½lvaro Herrera https://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers