pg_upgrade cleanup

Started by Bruce Momjianover 10 years ago7 messages

Bruce Momjian

bruce@momjian.us

over 10 years ago

1 attachment(s)

This patch makes pg_upgrade controldata checks more consistent, and adds
a missing check for float8_pass_by_value.

--
Bruce Momjian <bruce@momjian.us> http://momjian.us
EnterpriseDB http://enterprisedb.com

+ Everyone has their own god. +

Attachments:

pg_upgrade.difftext/x-diff; charset=us-asciiDownload

diff --git a/src/bin/pg_upgrade/controldata.c b/src/bin/pg_upgrade/controldata.c
new file mode 100644
index bf53db0..0608b64
*** a/src/bin/pg_upgrade/controldata.c
--- b/src/bin/pg_upgrade/controldata.c
*************** get_control_data(ClusterInfo *cluster, b
*** 154,176 ****
  			p++;				/* remove ':' char */
  			cluster->controldata.cat_ver = str2uint(p);
  		}
- 		else if ((p = strstr(bufin, "First log segment after reset:")) != NULL)
- 		{
- 			/* Skip the colon and any whitespace after it */
- 			p = strchr(p, ':');
- 			if (p == NULL || strlen(p) <= 1)
- 				pg_fatal("%d: controldata retrieval problem\n", __LINE__);
- 			p = strpbrk(p, "01234567890ABCDEF");
- 			if (p == NULL || strlen(p) <= 1)
- 				pg_fatal("%d: controldata retrieval problem\n", __LINE__);
- 
- 			/* Make sure it looks like a valid WAL file name */
- 			if (strspn(p, "0123456789ABCDEF") != 24)
- 				pg_fatal("%d: controldata retrieval problem\n", __LINE__);
- 
- 			strlcpy(cluster->controldata.nextxlogfile, p, 25);
- 			got_nextxlogfile = true;
- 		}
  		else if ((p = strstr(bufin, "First log file ID after reset:")) != NULL)
  		{
  			p = strchr(p, ':');
--- 154,159 ----
*************** get_control_data(ClusterInfo *cluster, b
*** 201,207 ****
  				pg_fatal("%d: controldata retrieval problem\n", __LINE__);
  
  			p++;				/* remove ':' char */
! 			cluster->controldata.chkpnt_tli = str2uint(p);
  			got_tli = true;
  		}
  		else if ((p = strstr(bufin, "Latest checkpoint's NextXID:")) != NULL)
--- 184,190 ----
  				pg_fatal("%d: controldata retrieval problem\n", __LINE__);
  
  			p++;				/* remove ':' char */
! 			tli = str2uint(p);
  			got_tli = true;
  		}
  		else if ((p = strstr(bufin, "Latest checkpoint's NextXID:")) != NULL)
*************** get_control_data(ClusterInfo *cluster, b
*** 266,271 ****
--- 249,271 ----
  			cluster->controldata.chkpnt_nxtmxoff = str2uint(p);
  			got_mxoff = true;
  		}
+ 		else if ((p = strstr(bufin, "First log segment after reset:")) != NULL)
+ 		{
+ 			/* Skip the colon and any whitespace after it */
+ 			p = strchr(p, ':');
+ 			if (p == NULL || strlen(p) <= 1)
+ 				pg_fatal("%d: controldata retrieval problem\n", __LINE__);
+ 			p = strpbrk(p, "01234567890ABCDEF");
+ 			if (p == NULL || strlen(p) <= 1)
+ 				pg_fatal("%d: controldata retrieval problem\n", __LINE__);
+ 
+ 			/* Make sure it looks like a valid WAL file name */
+ 			if (strspn(p, "0123456789ABCDEF") != 24)
+ 				pg_fatal("%d: controldata retrieval problem\n", __LINE__);
+ 
+ 			strlcpy(cluster->controldata.nextxlogfile, p, 25);
+ 			got_nextxlogfile = true;
+ 		}
  		else if ((p = strstr(bufin, "Maximum data alignment:")) != NULL)
  		{
  			p = strchr(p, ':');
*************** get_control_data(ClusterInfo *cluster, b
*** 436,442 ****
  	 */
  	if (GET_MAJOR_VERSION(cluster->major_version) <= 902)
  	{
! 		if (got_log_id && got_log_seg)
  		{
  			snprintf(cluster->controldata.nextxlogfile, 25, "%08X%08X%08X",
  					 tli, logid, segno);
--- 436,442 ----
  	 */
  	if (GET_MAJOR_VERSION(cluster->major_version) <= 902)
  	{
! 		if (got_tli && got_log_id && got_log_seg)
  		{
  			snprintf(cluster->controldata.nextxlogfile, 25, "%08X%08X%08X",
  					 tli, logid, segno);
*************** get_control_data(ClusterInfo *cluster, b
*** 446,456 ****
  
  	/* verify that we got all the mandatory pg_control data */
  	if (!got_xid || !got_oid ||
! 		!got_multi || !got_mxoff ||
  		(!got_oldestmulti &&
  		 cluster->controldata.cat_ver >= MULTIXACT_FORMATCHANGE_CAT_VER) ||
! 		(!live_check && !got_nextxlogfile) ||
! 		!got_tli ||
  		!got_align || !got_blocksz || !got_largesz || !got_walsz ||
  		!got_walseg || !got_ident || !got_index || !got_toast ||
  		(!got_large_object &&
--- 446,455 ----
  
  	/* verify that we got all the mandatory pg_control data */
  	if (!got_xid || !got_oid ||
! 		!got_multi ||
  		(!got_oldestmulti &&
  		 cluster->controldata.cat_ver >= MULTIXACT_FORMATCHANGE_CAT_VER) ||
! 		!got_mxoff || (!live_check && !got_nextxlogfile) ||
  		!got_align || !got_blocksz || !got_largesz || !got_walsz ||
  		!got_walseg || !got_ident || !got_index || !got_toast ||
  		(!got_large_object &&
*************** get_control_data(ClusterInfo *cluster, b
*** 470,488 ****
  		if (!got_multi)
  			pg_log(PG_REPORT, "  latest checkpoint next MultiXactId\n");
  
- 		if (!got_mxoff)
- 			pg_log(PG_REPORT, "  latest checkpoint next MultiXactOffset\n");
- 
  		if (!got_oldestmulti &&
  			cluster->controldata.cat_ver >= MULTIXACT_FORMATCHANGE_CAT_VER)
  			pg_log(PG_REPORT, "  latest checkpoint oldest MultiXactId\n");
  
  		if (!live_check && !got_nextxlogfile)
  			pg_log(PG_REPORT, "  first WAL segment after reset\n");
  
- 		if (!got_tli)
- 			pg_log(PG_REPORT, "  latest checkpoint timeline ID\n");
- 
  		if (!got_align)
  			pg_log(PG_REPORT, "  maximum alignment\n");
  
--- 469,484 ----
  		if (!got_multi)
  			pg_log(PG_REPORT, "  latest checkpoint next MultiXactId\n");
  
  		if (!got_oldestmulti &&
  			cluster->controldata.cat_ver >= MULTIXACT_FORMATCHANGE_CAT_VER)
  			pg_log(PG_REPORT, "  latest checkpoint oldest MultiXactId\n");
  
+ 		if (!got_mxoff)
+ 			pg_log(PG_REPORT, "  latest checkpoint next MultiXactOffset\n");
+ 
  		if (!live_check && !got_nextxlogfile)
  			pg_log(PG_REPORT, "  first WAL segment after reset\n");
  
  		if (!got_align)
  			pg_log(PG_REPORT, "  maximum alignment\n");
  
*************** check_control_data(ControlData *oldctrl,
*** 568,573 ****
--- 564,572 ----
  	if (oldctrl->date_is_int != newctrl->date_is_int)
  		pg_fatal("old and new pg_controldata date/time storage types do not match\n");
  
+ 	if (oldctrl->float8_pass_by_value != newctrl->float8_pass_by_value)
+ 		pg_fatal("old and new pg_controldata float8 argument passing methods do not match\n");
+ 
  	/*
  	 * We might eventually allow upgrades from checksum to no-checksum
  	 * clusters.
diff --git a/src/bin/pg_upgrade/pg_upgrade.h b/src/bin/pg_upgrade/pg_upgrade.h
new file mode 100644
index bb035e1..aecf0df
*** a/src/bin/pg_upgrade/pg_upgrade.h
--- b/src/bin/pg_upgrade/pg_upgrade.h
*************** typedef struct
*** 206,212 ****
  	uint32		ctrl_ver;
  	uint32		cat_ver;
  	char		nextxlogfile[25];
- 	uint32		chkpnt_tli;
  	uint32		chkpnt_nxtxid;
  	uint32		chkpnt_nxtepoch;
  	uint32		chkpnt_nxtoid;
--- 206,211 ----

Bruce Momjian

bruce@momjian.us

over 10 years ago

In reply to: Bruce Momjian (#1)

Re: pg_upgrade cleanup

On Thu, May 14, 2015 at 09:56:53PM -0400, Bruce Momjian wrote:

This patch makes pg_upgrade controldata checks more consistent, and adds
a missing check for float8_pass_by_value.

Sorry, I should have mentioned I applied this patch to head. It isn't
significant enough to backpatch.

--
Bruce Momjian <bruce@momjian.us> http://momjian.us
EnterpriseDB http://enterprisedb.com

+ Everyone has their own god. +

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Noah Misch

noah@leadboat.com

over 10 years ago

In reply to: Bruce Momjian (#2)

Re: pg_upgrade cleanup

On Thu, May 14, 2015 at 10:06:11PM -0400, Bruce Momjian wrote:

On Thu, May 14, 2015 at 09:56:53PM -0400, Bruce Momjian wrote:

This patch makes pg_upgrade controldata checks more consistent, and adds
a missing check for float8_pass_by_value.

Sorry, I should have mentioned I applied this patch to head. It isn't
significant enough to backpatch.

A float8_pass_by_value match is unnecessary, and requiring it creates needless
hassle for users. Switching between USE_FLOAT8_BYVAL binaries and
!USE_FLOAT8_BYVAL binaries requires an initdb to get different values in
pg_type.typbyval and pg_attribute.attbyval. pg_upgrade's use of pg_dump to
migrate catalog content addresses that fine. Note that
check_for_isn_and_int8_passing_mismatch() exists because pg_upgrade has
allowed source and destination clusters to differ in USE_FLOAT8_BYVAL.

The rest of this change is opaque to me. "More consistent" with what? The
old use of the "tli" variable sure looked like a bug, considering the variable
was never set to anything but zero. What is the anticipated behavior change?

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Bruce Momjian

bruce@momjian.us

over 10 years ago

In reply to: Noah Misch (#3)

Re: pg_upgrade cleanup

On Sat, May 16, 2015 at 12:21:12PM -0400, Noah Misch wrote:

On Thu, May 14, 2015 at 10:06:11PM -0400, Bruce Momjian wrote:

On Thu, May 14, 2015 at 09:56:53PM -0400, Bruce Momjian wrote:

This patch makes pg_upgrade controldata checks more consistent, and adds
a missing check for float8_pass_by_value.

Sorry, I should have mentioned I applied this patch to head. It isn't
significant enough to backpatch.

A float8_pass_by_value match is unnecessary, and requiring it creates needless
hassle for users. Switching between USE_FLOAT8_BYVAL binaries and
!USE_FLOAT8_BYVAL binaries requires an initdb to get different values in
pg_type.typbyval and pg_attribute.attbyval. pg_upgrade's use of pg_dump to
migrate catalog content addresses that fine. Note that
check_for_isn_and_int8_passing_mismatch() exists because pg_upgrade has
allowed source and destination clusters to differ in USE_FLOAT8_BYVAL.

What we had was checking for float8_pass_by_value, but did nothing with
it, so I assumed we had lost the check somehow. I will remove detecting
and checking of that value. Thanks.

The rest of this change is opaque to me. "More consistent" with what? The
old use of the "tli" variable sure looked like a bug, considering the variable
was never set to anything but zero. What is the anticipated behavior change?

The problem was that the option checking was not in a consistent order,
so there was no easy easy to make sure everything was being processed
properly. The new ordering is consistent.

I thought the tli was a harmless cleanup but I now see it was passing a
zero timeline to pg_resetxlog. The only reason that worked was because
pg_resetxlog ignores a timeline that is less than our current one, and
zero was always less than the timeline so pg_resetxlog was making no
timeline change at all. I will clean that up too in backbranches as it
is to odd. (I think it was broken by
038f3a05092365eca070bdc588554520dfd5ffb9).

--
Bruce Momjian <bruce@momjian.us> http://momjian.us
EnterpriseDB http://enterprisedb.com

+ Everyone has their own god. +

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Bruce Momjian

bruce@momjian.us

over 10 years ago

In reply to: Bruce Momjian (#4)

Re: pg_upgrade cleanup

On Sat, May 16, 2015 at 02:49:08PM -0400, Bruce Momjian wrote:

On Sat, May 16, 2015 at 12:21:12PM -0400, Noah Misch wrote:

On Thu, May 14, 2015 at 10:06:11PM -0400, Bruce Momjian wrote:

On Thu, May 14, 2015 at 09:56:53PM -0400, Bruce Momjian wrote:

This patch makes pg_upgrade controldata checks more consistent, and adds
a missing check for float8_pass_by_value.

Sorry, I should have mentioned I applied this patch to head. It isn't
significant enough to backpatch.

A float8_pass_by_value match is unnecessary, and requiring it creates needless
hassle for users. Switching between USE_FLOAT8_BYVAL binaries and
!USE_FLOAT8_BYVAL binaries requires an initdb to get different values in
pg_type.typbyval and pg_attribute.attbyval. pg_upgrade's use of pg_dump to
migrate catalog content addresses that fine. Note that
check_for_isn_and_int8_passing_mismatch() exists because pg_upgrade has
allowed source and destination clusters to differ in USE_FLOAT8_BYVAL.

What we had was checking for float8_pass_by_value, but did nothing with
it, so I assumed we had lost the check somehow. I will remove detecting
and checking of that value. Thanks.

Sorry, I mean we didn't do anything with it in controldata.c, but I had
forgotten how we use it for isn, so I just added a C comment that there
is no need to check that it matches. Thanks.

--
Bruce Momjian <bruce@momjian.us> http://momjian.us
EnterpriseDB http://enterprisedb.com

+ Everyone has their own god. +

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Bruce Momjian

bruce@momjian.us

over 10 years ago

In reply to: Noah Misch (#3)

Re: pg_upgrade cleanup

On Sat, May 16, 2015 at 12:21:12PM -0400, Noah Misch wrote:

The rest of this change is opaque to me. "More consistent" with what? The
old use of the "tli" variable sure looked like a bug, considering the variable
was never set to anything but zero. What is the anticipated behavior change?

The fact you saw the bug helps in another way. I was confused why we
had not gotten reports about incorrect timeline restoration in previous
versions of pg_upgrade. It turns out that through 9.2, we always used a
timeline of 1:

"\"%s/pg_resetxlog\" -l 1,%u,%u \"%s\"", new_cluster.bindir,
^

In 9.3 we added code to restore the timeline, but the code that read the
9.2 pg_controldata was buggy, so it tried to restore a timeline of 0,
which was ignored because the timeline can't be decreased with
pg_resetxlog. Only in 9.4 was the timeline properly restored, leading
to the missing history file error.

This confirms that setting the timeline to 1 unconditionally, as I did
on Friday, is the right fix, and I have added a C comment so we will
remember _why_ this has to be the case.

--
Bruce Momjian <bruce@momjian.us> http://momjian.us
EnterpriseDB http://enterprisedb.com

+ Everyone has their own god. +

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Noah Misch

noah@leadboat.com

over 10 years ago

In reply to: Bruce Momjian (#6)

Re: pg_upgrade cleanup

On Mon, May 18, 2015 at 10:58:42AM -0400, Bruce Momjian wrote:

On Sat, May 16, 2015 at 12:21:12PM -0400, Noah Misch wrote:

The rest of this change is opaque to me. "More consistent" with what? The
old use of the "tli" variable sure looked like a bug, considering the variable
was never set to anything but zero. What is the anticipated behavior change?

The fact you saw the bug helps in another way. I was confused why we
had not gotten reports about incorrect timeline restoration in previous
versions of pg_upgrade. It turns out that through 9.2, we always used a
timeline of 1:

"\"%s/pg_resetxlog\" -l 1,%u,%u \"%s\"", new_cluster.bindir,
^

In 9.3 we added code to restore the timeline, but the code that read the
9.2 pg_controldata was buggy, so it tried to restore a timeline of 0,
which was ignored because the timeline can't be decreased with
pg_resetxlog. Only in 9.4 was the timeline properly restored, leading
to the missing history file error.

That clears up several things. Interesting.

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers