New pg_ctl has retrogressed in error messages

Started by Tom Laneover 21 years ago5 messages
#1Tom Lane
tgl@sss.pgh.pa.us

7.4, on not finding a postmaster:

[tgl@rh1 pgsql]$ pg_ctl stop
/home/tgl/version74/bin/pg_ctl: line 274: kill: (15273) - No such process
waiting for postmaster to shut down................................................................ failed
pg_ctl: postmaster does not shut down

CVS tip, same scenario:

[tgl@rh1 pgsql]$ pg_ctl stop
stop signal failed

Not waiting for the results of a failed kill() is good, but the error
message is exceedingly unhelpful. It should mention the PID it tried to
kill and give the errno string. Perhaps

failed to signal postmaster process 15273: No such process

regards, tom lane

#2Bruce Momjian
pgman@candle.pha.pa.us
In reply to: Tom Lane (#1)
1 attachment(s)
Re: [HACKERS] New pg_ctl has retrogressed in error messages

Tom Lane wrote:

7.4, on not finding a postmaster:

[tgl@rh1 pgsql]$ pg_ctl stop
/home/tgl/version74/bin/pg_ctl: line 274: kill: (15273) - No such process
waiting for postmaster to shut down................................................................ failed
pg_ctl: postmaster does not shut down

CVS tip, same scenario:

[tgl@rh1 pgsql]$ pg_ctl stop
stop signal failed

Not waiting for the results of a failed kill() is good, but the error
message is exceedingly unhelpful. It should mention the PID it tried to
kill and give the errno string. Perhaps

failed to signal postmaster process 15273: No such process

OK, new output is:

$ aspg pg_ctl stop
stop signal failed (PID: 3603): No such process

I also changed all the pid variables to use pid_t.

-- 
  Bruce Momjian                        |  http://candle.pha.pa.us
  pgman@candle.pha.pa.us               |  (610) 359-1001
  +  If your life is a hard drive,     |  13 Roberts Road
  +  Christ can be your backup.        |  Newtown Square, Pennsylvania 19073

Attachments:

/bjm/difftext/plainDownload
Index: src/bin/pg_ctl/pg_ctl.c
===================================================================
RCS file: /cvsroot/pgsql-server/src/bin/pg_ctl/pg_ctl.c,v
retrieving revision 1.1
diff -c -c -r1.1 pg_ctl.c
*** src/bin/pg_ctl/pg_ctl.c	27 May 2004 03:37:55 -0000	1.1
--- src/bin/pg_ctl/pg_ctl.c	31 May 2004 17:56:20 -0000
***************
*** 59,65 ****
  static bool silence_echo = false;
  static ShutdownMode shutdown_mode = SMART_MODE;
  static int	sig = SIGTERM;	/* default */
- static int	killproc;
  static CtlCommand ctl_command = NO_COMMAND;
  static char *pg_data_opts = NULL;
  static char *pg_data = NULL;
--- 59,64 ----
***************
*** 80,87 ****
  static void do_restart(void);
  static void do_reload(void);
  static void do_status(void);
! static void do_kill(void);
! static long get_pgpid(void);
  static char **readfile(char *path);
  static int start_postmaster(void);
  static bool test_postmaster_connection(void);
--- 79,86 ----
  static void do_restart(void);
  static void do_reload(void);
  static void do_status(void);
! static void do_kill(pid_t pid);
! static pid_t get_pgpid(void);
  static char **readfile(char *path);
  static int start_postmaster(void);
  static bool test_postmaster_connection(void);
***************
*** 127,137 ****
  
  
  
! static long
  get_pgpid(void)
  {
  	FILE	   *pidf;
! 	long		pid;
  
  	pidf = fopen(pid_file, "r");
  	if (pidf == NULL)
--- 126,136 ----
  
  
  
! static pid_t
  get_pgpid(void)
  {
  	FILE	   *pidf;
! 	pid_t		pid;
  
  	pidf = fopen(pid_file, "r");
  	if (pidf == NULL)
***************
*** 145,151 ****
  			exit(1);
  		}
  	}
! 	fscanf(pidf, "%ld", &pid);
  	fclose(pidf);
  	return pid;
  }
--- 144,150 ----
  			exit(1);
  		}
  	}
! 	fscanf(pidf, "%u", &pid);
  	fclose(pidf);
  	return pid;
  }
***************
*** 324,331 ****
  static void
  do_start(void)
  {
! 	long		pid;
! 	long		old_pid = 0;
  	char	   *optline = NULL;
  
  	if (ctl_command != RESTART_COMMAND)
--- 323,330 ----
  static void
  do_start(void)
  {
! 	pid_t		pid;
! 	pid_t		old_pid = 0;
  	char	   *optline = NULL;
  
  	if (ctl_command != RESTART_COMMAND)
***************
*** 458,464 ****
  do_stop(void)
  {
  	int			cnt;
! 	long		pid;
  
  	pid = get_pgpid();
  
--- 457,463 ----
  do_stop(void)
  {
  	int			cnt;
! 	pid_t		pid;
  
  	pid = get_pgpid();
  
***************
*** 473,486 ****
  		pid = -pid;
  		fprintf(stderr,
  				_("%s: cannot stop postmaster; "
! 				"postgres is running (PID: %ld)\n"),
  				progname, pid);
  		exit(1);
  	}
  
! 	if (kill((pid_t) pid, sig) != 0)
  	{
! 		fprintf(stderr, _("stop signal failed\n"));
  		exit(1);
  	}
  
--- 472,486 ----
  		pid = -pid;
  		fprintf(stderr,
  				_("%s: cannot stop postmaster; "
! 				"postgres is running (PID: %u)\n"),
  				progname, pid);
  		exit(1);
  	}
  
! 	if (kill(pid, sig) != 0)
  	{
! 		fprintf(stderr, _("stop signal failed (PID: %u): %s\n"), pid,
! 				strerror(errno));
  		exit(1);
  	}
  
***************
*** 537,543 ****
  do_restart(void)
  {
  	int			cnt;
! 	long		pid;
  
  	pid = get_pgpid();
  
--- 537,543 ----
  do_restart(void)
  {
  	int			cnt;
! 	pid_t		pid;
  
  	pid = get_pgpid();
  
***************
*** 553,567 ****
  		pid = -pid;
  		fprintf(stderr,
  				_("%s: cannot restart postmaster; "
! 				"postgres is running (PID: %ld)\n"),
  				progname, pid);
  		fprintf(stderr, _("Please terminate postgres and try again.\n"));
  		exit(1);
  	}
  
! 	if (kill((pid_t) pid, sig) != 0)
  	{
! 		fprintf(stderr, _("stop signal failed\n"));
  		exit(1);
  	}
  
--- 553,568 ----
  		pid = -pid;
  		fprintf(stderr,
  				_("%s: cannot restart postmaster; "
! 				"postgres is running (PID: %u)\n"),
  				progname, pid);
  		fprintf(stderr, _("Please terminate postgres and try again.\n"));
  		exit(1);
  	}
  
! 	if (kill(pid, sig) != 0)
  	{
! 		fprintf(stderr, _("stop signal failed (PID: %u): %s\n"), pid,
! 				strerror(errno));
  		exit(1);
  	}
  
***************
*** 608,614 ****
  static void
  do_reload(void)
  {
! 	long		pid;
  
  	pid = get_pgpid();
  	if (pid == 0)				/* no pid file */
--- 609,615 ----
  static void
  do_reload(void)
  {
! 	pid_t		pid;
  
  	pid = get_pgpid();
  	if (pid == 0)				/* no pid file */
***************
*** 622,636 ****
  		pid = -pid;
  		fprintf(stderr,
  				_("%s: cannot reload postmaster; "
! 				"postgres is running (PID: %ld)\n"),
  				progname, pid);
  		fprintf(stderr, _("Please terminate postgres and try again.\n"));
  		exit(1);
  	}
  
! 	if (kill((pid_t) pid, sig) != 0)
  	{
! 		fprintf(stderr, _("reload signal failed\n"));
  		exit(1);
  	}
  
--- 623,638 ----
  		pid = -pid;
  		fprintf(stderr,
  				_("%s: cannot reload postmaster; "
! 				"postgres is running (PID: %u)\n"),
  				progname, pid);
  		fprintf(stderr, _("Please terminate postgres and try again.\n"));
  		exit(1);
  	}
  
! 	if (kill(pid, sig) != 0)
  	{
! 		fprintf(stderr, _("reload signal failed (PID: %u): %s\n"), pid,
! 				strerror(errno));
  		exit(1);
  	}
  
***************
*** 645,651 ****
  static void
  do_status(void)
  {
! 	long		pid;
  
  	pid = get_pgpid();
  	if (pid == 0)				/* no pid file */
--- 647,653 ----
  static void
  do_status(void)
  {
! 	pid_t		pid;
  
  	pid = get_pgpid();
  	if (pid == 0)				/* no pid file */
***************
*** 656,668 ****
  	else if (pid < 0)			/* standalone backend */
  	{
  		pid = -pid;
! 		fprintf(stdout, _("%s: a standalone backend \"postgres\" is running (PID: %ld)\n"), progname, pid);
  	}
  	else						/* postmaster */
  	{
  		char	  **optlines;
  
! 		fprintf(stdout, _("%s: postmaster is running (PID: %ld)\n"), progname, pid);
  
  		optlines = readfile(postopts_file);
  		if (optlines != NULL)
--- 658,670 ----
  	else if (pid < 0)			/* standalone backend */
  	{
  		pid = -pid;
! 		fprintf(stdout, _("%s: a standalone backend \"postgres\" is running (PID: %u)\n"), progname, pid);
  	}
  	else						/* postmaster */
  	{
  		char	  **optlines;
  
! 		fprintf(stdout, _("%s: postmaster is running (PID: %u)\n"), progname, pid);
  
  		optlines = readfile(postopts_file);
  		if (optlines != NULL)
***************
*** 674,684 ****
  
  
  static void
! do_kill(void)
  {
! 	if (kill(killproc, sig) != 0)
  	{
! 		fprintf(stderr, _("signal %d failed\n"), sig);
  		exit(1);
  	}
  }
--- 676,687 ----
  
  
  static void
! do_kill(pid_t pid)
  {
! 	if (kill(pid, sig) != 0)
  	{
! 		fprintf(stderr, _("signal %d failed (PID: %u): %s\n"), sig, pid,
! 				strerror(errno));
  		exit(1);
  	}
  }
***************
*** 810,815 ****
--- 813,819 ----
  
  	int			option_index;
  	int			c;
+ 	int			killproc = 0;
  	
  #ifdef WIN32
  	setvbuf(stderr, NULL, _IONBF, 0);
***************
*** 1005,1011 ****
  			do_reload();
  			break;
  		case KILL_COMMAND:
! 			do_kill();
  			break;
  		default:
  			break;
--- 1009,1015 ----
  			do_reload();
  			break;
  		case KILL_COMMAND:
! 			do_kill(killproc);
  			break;
  		default:
  			break;
#3Tom Lane
tgl@sss.pgh.pa.us
In reply to: Bruce Momjian (#2)
Re: [HACKERS] New pg_ctl has retrogressed in error messages

Bruce Momjian <pgman@candle.pha.pa.us> writes:

I also changed all the pid variables to use pid_t.

Good, but ...

! fscanf(pidf, "%u", &pid);

this code will fail rather horribly if sizeof(pid_t) != sizeof(int).
Even more to the point, I believe a standalone backend will put
the negative of its PID into the file, and the revised code will fail
to parse that at all.

I think the safest code would be like

long tmp;

fscanf(pidf, "%ld", &tmp);
if (tmp < 0)
{
tmp = -tmp;
// do anything else needed for backend case
}
pid = (pid_t) tmp;

regards, tom lane

#4Andrew Dunstan
andrew@dunslane.net
In reply to: Tom Lane (#3)
Re: [HACKERS] New pg_ctl has retrogressed in error messages

Tom Lane wrote:

Bruce Momjian <pgman@candle.pha.pa.us> writes:

I also changed all the pid variables to use pid_t.

Good, but ...

! fscanf(pidf, "%u", &pid);

this code will fail rather horribly if sizeof(pid_t) != sizeof(int).
Even more to the point, I believe a standalone backend will put
the negative of its PID into the file, and the revised code will fail
to parse that at all.

I think the safest code would be like

long tmp;

fscanf(pidf, "%ld", &tmp);
if (tmp < 0)
{
tmp = -tmp;
// do anything else needed for backend case
}
pid = (pid_t) tmp;

I deliberately used a signed long for these reasons in the first place.

The number of places we actually need to use this value as a pid is
small (3 by my count - in calls to kill() ), and it was cast there to
pid_t, I think. I still don't see what's wrong with that.

cheers

andrew

#5Bruce Momjian
pgman@candle.pha.pa.us
In reply to: Andrew Dunstan (#4)
1 attachment(s)
Re: [HACKERS] New pg_ctl has retrogressed in error messages

Andrew Dunstan wrote:

this code will fail rather horribly if sizeof(pid_t) != sizeof(int).
Even more to the point, I believe a standalone backend will put
the negative of its PID into the file, and the revised code will fail
to parse that at all.

I think the safest code would be like

long tmp;

fscanf(pidf, "%ld", &tmp);
if (tmp < 0)
{
tmp = -tmp;
// do anything else needed for backend case
}
pid = (pid_t) tmp;

I deliberately used a signed long for these reasons in the first place.

The number of places we actually need to use this value as a pid is
small (3 by my count - in calls to kill() ), and it was cast there to
pid_t, I think. I still don't see what's wrong with that.

OK, I created a typedef pgpid_t equal to long and used that in the code,
change format to %ld, and documented why negative values are an issue.

-- 
  Bruce Momjian                        |  http://candle.pha.pa.us
  pgman@candle.pha.pa.us               |  (610) 359-1001
  +  If your life is a hard drive,     |  13 Roberts Road
  +  Christ can be your backup.        |  Newtown Square, Pennsylvania 19073

Attachments:

/bjm/difftext/plainDownload
? pg_ctl
Index: pg_ctl.c
===================================================================
RCS file: /cvsroot/pgsql-server/src/bin/pg_ctl/pg_ctl.c,v
retrieving revision 1.2
diff -c -c -r1.2 pg_ctl.c
*** pg_ctl.c	31 May 2004 17:57:31 -0000	1.2
--- pg_ctl.c	1 Jun 2004 01:26:40 -0000
***************
*** 24,29 ****
--- 24,31 ----
  int			optreset;
  #endif
  
+ /* PID can be negative for standalone backend */
+ typedef long pgpid_t;
  
  #define _(x) gettext((x))
  
***************
*** 79,86 ****
  static void do_restart(void);
  static void do_reload(void);
  static void do_status(void);
! static void do_kill(pid_t pid);
! static pid_t get_pgpid(void);
  static char **readfile(char *path);
  static int start_postmaster(void);
  static bool test_postmaster_connection(void);
--- 81,88 ----
  static void do_restart(void);
  static void do_reload(void);
  static void do_status(void);
! static void do_kill(pgpid_t pid);
! static pgpid_t get_pgpid(void);
  static char **readfile(char *path);
  static int start_postmaster(void);
  static bool test_postmaster_connection(void);
***************
*** 126,136 ****
  
  
  
! static pid_t
  get_pgpid(void)
  {
  	FILE	   *pidf;
! 	pid_t		pid;
  
  	pidf = fopen(pid_file, "r");
  	if (pidf == NULL)
--- 128,138 ----
  
  
  
! static pgpid_t
  get_pgpid(void)
  {
  	FILE	   *pidf;
! 	pgpid_t		pid;
  
  	pidf = fopen(pid_file, "r");
  	if (pidf == NULL)
***************
*** 144,150 ****
  			exit(1);
  		}
  	}
! 	fscanf(pidf, "%u", &pid);
  	fclose(pidf);
  	return pid;
  }
--- 146,152 ----
  			exit(1);
  		}
  	}
! 	fscanf(pidf, "%ld", &pid);
  	fclose(pidf);
  	return pid;
  }
***************
*** 323,330 ****
  static void
  do_start(void)
  {
! 	pid_t		pid;
! 	pid_t		old_pid = 0;
  	char	   *optline = NULL;
  
  	if (ctl_command != RESTART_COMMAND)
--- 325,332 ----
  static void
  do_start(void)
  {
! 	pgpid_t		pid;
! 	pgpid_t		old_pid = 0;
  	char	   *optline = NULL;
  
  	if (ctl_command != RESTART_COMMAND)
***************
*** 457,463 ****
  do_stop(void)
  {
  	int			cnt;
! 	pid_t		pid;
  
  	pid = get_pgpid();
  
--- 459,465 ----
  do_stop(void)
  {
  	int			cnt;
! 	pgpid_t		pid;
  
  	pid = get_pgpid();
  
***************
*** 472,485 ****
  		pid = -pid;
  		fprintf(stderr,
  				_("%s: cannot stop postmaster; "
! 				"postgres is running (PID: %u)\n"),
  				progname, pid);
  		exit(1);
  	}
  
! 	if (kill(pid, sig) != 0)
  	{
! 		fprintf(stderr, _("stop signal failed (PID: %u): %s\n"), pid,
  				strerror(errno));
  		exit(1);
  	}
--- 474,487 ----
  		pid = -pid;
  		fprintf(stderr,
  				_("%s: cannot stop postmaster; "
! 				"postgres is running (PID: %ld)\n"),
  				progname, pid);
  		exit(1);
  	}
  
! 	if (kill((pid_t) pid, sig) != 0)
  	{
! 		fprintf(stderr, _("stop signal failed (PID: %ld): %s\n"), pid,
  				strerror(errno));
  		exit(1);
  	}
***************
*** 537,543 ****
  do_restart(void)
  {
  	int			cnt;
! 	pid_t		pid;
  
  	pid = get_pgpid();
  
--- 539,545 ----
  do_restart(void)
  {
  	int			cnt;
! 	pgpid_t		pid;
  
  	pid = get_pgpid();
  
***************
*** 553,567 ****
  		pid = -pid;
  		fprintf(stderr,
  				_("%s: cannot restart postmaster; "
! 				"postgres is running (PID: %u)\n"),
  				progname, pid);
  		fprintf(stderr, _("Please terminate postgres and try again.\n"));
  		exit(1);
  	}
  
! 	if (kill(pid, sig) != 0)
  	{
! 		fprintf(stderr, _("stop signal failed (PID: %u): %s\n"), pid,
  				strerror(errno));
  		exit(1);
  	}
--- 555,569 ----
  		pid = -pid;
  		fprintf(stderr,
  				_("%s: cannot restart postmaster; "
! 				"postgres is running (PID: %ld)\n"),
  				progname, pid);
  		fprintf(stderr, _("Please terminate postgres and try again.\n"));
  		exit(1);
  	}
  
! 	if (kill((pid_t) pid, sig) != 0)
  	{
! 		fprintf(stderr, _("stop signal failed (PID: %ld): %s\n"), pid,
  				strerror(errno));
  		exit(1);
  	}
***************
*** 609,615 ****
  static void
  do_reload(void)
  {
! 	pid_t		pid;
  
  	pid = get_pgpid();
  	if (pid == 0)				/* no pid file */
--- 611,617 ----
  static void
  do_reload(void)
  {
! 	pgpid_t		pid;
  
  	pid = get_pgpid();
  	if (pid == 0)				/* no pid file */
***************
*** 623,637 ****
  		pid = -pid;
  		fprintf(stderr,
  				_("%s: cannot reload postmaster; "
! 				"postgres is running (PID: %u)\n"),
  				progname, pid);
  		fprintf(stderr, _("Please terminate postgres and try again.\n"));
  		exit(1);
  	}
  
! 	if (kill(pid, sig) != 0)
  	{
! 		fprintf(stderr, _("reload signal failed (PID: %u): %s\n"), pid,
  				strerror(errno));
  		exit(1);
  	}
--- 625,639 ----
  		pid = -pid;
  		fprintf(stderr,
  				_("%s: cannot reload postmaster; "
! 				"postgres is running (PID: %ld)\n"),
  				progname, pid);
  		fprintf(stderr, _("Please terminate postgres and try again.\n"));
  		exit(1);
  	}
  
! 	if (kill((pid_t) pid, sig) != 0)
  	{
! 		fprintf(stderr, _("reload signal failed (PID: %ld): %s\n"), pid,
  				strerror(errno));
  		exit(1);
  	}
***************
*** 647,653 ****
  static void
  do_status(void)
  {
! 	pid_t		pid;
  
  	pid = get_pgpid();
  	if (pid == 0)				/* no pid file */
--- 649,655 ----
  static void
  do_status(void)
  {
! 	pgpid_t		pid;
  
  	pid = get_pgpid();
  	if (pid == 0)				/* no pid file */
***************
*** 658,670 ****
  	else if (pid < 0)			/* standalone backend */
  	{
  		pid = -pid;
! 		fprintf(stdout, _("%s: a standalone backend \"postgres\" is running (PID: %u)\n"), progname, pid);
  	}
  	else						/* postmaster */
  	{
  		char	  **optlines;
  
! 		fprintf(stdout, _("%s: postmaster is running (PID: %u)\n"), progname, pid);
  
  		optlines = readfile(postopts_file);
  		if (optlines != NULL)
--- 660,672 ----
  	else if (pid < 0)			/* standalone backend */
  	{
  		pid = -pid;
! 		fprintf(stdout, _("%s: a standalone backend \"postgres\" is running (PID: %ld)\n"), progname, pid);
  	}
  	else						/* postmaster */
  	{
  		char	  **optlines;
  
! 		fprintf(stdout, _("%s: postmaster is running (PID: %ld)\n"), progname, pid);
  
  		optlines = readfile(postopts_file);
  		if (optlines != NULL)
***************
*** 676,686 ****
  
  
  static void
! do_kill(pid_t pid)
  {
! 	if (kill(pid, sig) != 0)
  	{
! 		fprintf(stderr, _("signal %d failed (PID: %u): %s\n"), sig, pid,
  				strerror(errno));
  		exit(1);
  	}
--- 678,688 ----
  
  
  static void
! do_kill(pgpid_t pid)
  {
! 	if (kill((pid_t) pid, sig) != 0)
  	{
! 		fprintf(stderr, _("signal %d failed (PID: %ld): %s\n"), sig, pid,
  				strerror(errno));
  		exit(1);
  	}
***************
*** 813,819 ****
  
  	int			option_index;
  	int			c;
! 	int			killproc = 0;
  	
  #ifdef WIN32
  	setvbuf(stderr, NULL, _IONBF, 0);
--- 815,821 ----
  
  	int			option_index;
  	int			c;
! 	pgpid_t		killproc = 0;
  	
  #ifdef WIN32
  	setvbuf(stderr, NULL, _IONBF, 0);