fork/exec patch

Started by Claudio Natoliabout 22 years ago46 messages

claudio.natoli@memetrics.com

about 22 years ago

1 attachment(s)

This patch is the next step towards (re)allowing fork/exec.

Bruce, I've cleaned up the parts we discussed, and, pending objections from
anyone else, it is ready for application to HEAD.

Cheers,
Claudio

--- 
Certain disclaimers and policies apply to all email sent from Memetrics.
For the full text of these disclaimers and policies see 
<a
href="http://www.memetrics.com/emailpolicy.html">http://www.memetrics.com/em
ailpolicy.html</a>

Attachments:

diff2c.outapplication/octet-stream; name=diff2c.outDownload

Index: src/backend/access/transam/xlog.c
===================================================================
RCS file: /projects/cvsroot/pgsql-server/src/backend/access/transam/xlog.c,v
retrieving revision 1.127
diff -c -w -r1.127 xlog.c
*** src/backend/access/transam/xlog.c	12 Dec 2003 18:45:08 -0000	1.127
--- src/backend/access/transam/xlog.c	13 Dec 2003 06:18:30 -0000
***************
*** 166,172 ****
   * to update from XLogCtl->Insert.RedoRecPtr if we hold the info_lck;
   * see GetRedoRecPtr.
   */
! static XLogRecPtr RedoRecPtr;
  
  /*----------
   * Shared-memory data structures for XLOG control
--- 166,172 ----
   * to update from XLogCtl->Insert.RedoRecPtr if we hold the info_lck;
   * see GetRedoRecPtr.
   */
! NON_EXEC_STATIC XLogRecPtr RedoRecPtr;
  
  /*----------
   * Shared-memory data structures for XLOG control
***************
*** 231,242 ****
  	XLogRecPtr	Flush;			/* last byte + 1 to flush */
  } XLogwrtRqst;
  
- typedef struct XLogwrtResult
- {
- 	XLogRecPtr	Write;			/* last byte + 1 written out */
- 	XLogRecPtr	Flush;			/* last byte + 1 flushed */
- } XLogwrtResult;
- 
  /*
   * Shared state data for XLogInsert.
   */
--- 231,236 ----
***************
*** 404,410 ****
   * Private, possibly out-of-date copy of shared LogwrtResult.
   * See discussion above.
   */
! static XLogwrtResult LogwrtResult = {{0, 0}, {0, 0}};
  
  /*
   * openLogFile is -1 or a kernel FD for an open log file segment.
--- 398,404 ----
   * Private, possibly out-of-date copy of shared LogwrtResult.
   * See discussion above.
   */
! NON_EXEC_STATIC XLogwrtResult LogwrtResult = {{0, 0}, {0, 0}};
  
  /*
   * openLogFile is -1 or a kernel FD for an open log file segment.
***************
*** 2397,2403 ****
  void
  XLOGShmemInit(void)
  {
! 	bool		found;
  
  	/* this must agree with space requested by XLOGShmemSize() */
  	if (XLOGbuffers < MinXLOGbuffers)
--- 2391,2397 ----
  void
  XLOGShmemInit(void)
  {
! 	bool		foundXLog, foundCFile;
  
  	/* this must agree with space requested by XLOGShmemSize() */
  	if (XLOGbuffers < MinXLOGbuffers)
***************
*** 2408,2418 ****
  						MAXALIGN(sizeof(XLogCtlData) +
  								 sizeof(XLogRecPtr) * XLOGbuffers)
  						+ BLCKSZ * XLOGbuffers,
! 						&found);
! 	Assert(!found);
  	ControlFile = (ControlFileData *)
! 		ShmemInitStruct("Control File", sizeof(ControlFileData), &found);
! 	Assert(!found);
  
  	memset(XLogCtl, 0, sizeof(XLogCtlData));
  
--- 2402,2417 ----
  						MAXALIGN(sizeof(XLogCtlData) +
  								 sizeof(XLogRecPtr) * XLOGbuffers)
  						+ BLCKSZ * XLOGbuffers,
! 						&foundXLog);
  	ControlFile = (ControlFileData *)
! 		ShmemInitStruct("Control File", sizeof(ControlFileData), &foundCFile);
! 
! 	if (foundXLog || foundCFile)
! 	{
! 		/* both should be present or neither */
! 		Assert(foundXLog && foundCFile);
! 		return;
! 	}
  
  	memset(XLogCtl, 0, sizeof(XLogCtlData));
  
Index: src/backend/bootstrap/bootstrap.c
===================================================================
RCS file: /projects/cvsroot/pgsql-server/src/backend/bootstrap/bootstrap.c,v
retrieving revision 1.170
diff -c -w -r1.170 bootstrap.c
*** src/backend/bootstrap/bootstrap.c	12 Dec 2003 18:45:08 -0000	1.170
--- src/backend/bootstrap/bootstrap.c	13 Dec 2003 06:18:31 -0000
***************
*** 347,355 ****
  	if (!dbname || argc != optind)
  		usage();
  
! 
! 	if (IsUnderPostmaster && ExecBackend && MyProc /* ordinary backend */ )
  		AttachSharedMemoryAndSemaphores();
  
  	if (!IsUnderPostmaster /* when exec || ExecBackend */ )
  	{
--- 347,356 ----
  	if (!dbname || argc != optind)
  		usage();
  
! #ifdef EXEC_BACKEND
! 	if (IsUnderPostmaster && MyProc /* ordinary backend */ )
  		AttachSharedMemoryAndSemaphores();
+ #endif
  
  	if (!IsUnderPostmaster /* when exec || ExecBackend */ )
  	{
Index: src/backend/postmaster/pgstat.c
===================================================================
RCS file: /projects/cvsroot/pgsql-server/src/backend/postmaster/pgstat.c,v
retrieving revision 1.48
diff -c -w -r1.48 pgstat.c
*** src/backend/postmaster/pgstat.c	29 Nov 2003 19:51:55 -0000	1.48
--- src/backend/postmaster/pgstat.c	13 Dec 2003 06:18:33 -0000
***************
*** 71,77 ****
   * Local data
   * ----------
   */
! static int	pgStatSock = -1;
  static int	pgStatPipe[2];
  static struct sockaddr_storage pgStatAddr;
  static int	pgStatPmPipe[2] = {-1, -1};
--- 71,77 ----
   * Local data
   * ----------
   */
! NON_EXEC_STATIC int	pgStatSock = -1;
  static int	pgStatPipe[2];
  static struct sockaddr_storage pgStatAddr;
  static int	pgStatPmPipe[2] = {-1, -1};
Index: src/backend/postmaster/postmaster.c
===================================================================
RCS file: /projects/cvsroot/pgsql-server/src/backend/postmaster/postmaster.c,v
retrieving revision 1.351
diff -c -w -r1.351 postmaster.c
*** src/backend/postmaster/postmaster.c	1 Dec 2003 22:15:37 -0000	1.351
--- src/backend/postmaster/postmaster.c	13 Dec 2003 06:18:36 -0000
***************
*** 282,291 ****
  static void processCancelRequest(Port *port, void *pkt);
  static int	initMasks(fd_set *rmask);
  static void report_fork_failure_to_client(Port *port, int errnum);
- enum CAC_state
- {
- 	CAC_OK, CAC_STARTUP, CAC_SHUTDOWN, CAC_RECOVERY, CAC_TOOMANY
- };
  static enum CAC_state canAcceptConnections(void);
  static long PostmasterRandom(void);
  static void RandomSalt(char *cryptSalt, char *md5Salt);
--- 282,287 ----
***************
*** 298,303 ****
--- 294,302 ----
  /* This lets gcc check the format string for consistency. */
  __attribute__((format(printf, 1, 2)));
  
+ static void
+ write_backend_variables(pid_t pid, Port *port);
+ 
  #define StartupDataBase()		SSDataBase(BS_XLOG_STARTUP)
  #define CheckPointDataBase()	SSDataBase(BS_XLOG_CHECKPOINT)
  #define StartBackgroundWriter()	SSDataBase(BS_XLOG_BGWRITER)
***************
*** 1185,1191 ****
  static int
  ProcessStartupPacket(Port *port, bool SSLdone)
  {
- 	enum CAC_state cac;
  	int32		len;
  	void	   *buf;
  	ProtocolVersion proto;
--- 1184,1189 ----
***************
*** 1244,1250 ****
--- 1242,1252 ----
  
  	if (proto == CANCEL_REQUEST_CODE)
  	{
+ #ifdef EXEC_BACKEND
+ 		abort(); /* FIXME: [fork/exec] Whoops. Not handled... yet */
+ #else
  		processCancelRequest(port, buf);
+ #endif
  		return 127;				/* XXX */
  	}
  
***************
*** 1435,1443 ****
  	 * so now instead of wasting cycles on an authentication exchange.
  	 * (This also allows a pg_ping utility to be written.)
  	 */
! 	cac = canAcceptConnections();
! 
! 	switch (cac)
  	{
  		case CAC_STARTUP:
  			ereport(FATAL,
--- 1437,1443 ----
  	 * so now instead of wasting cycles on an authentication exchange.
  	 * (This also allows a pg_ping utility to be written.)
  	 */
! 	switch (port->canAcceptConnections)
  	{
  		case CAC_STARTUP:
  			ereport(FATAL,
***************
*** 1499,1506 ****
  								 backendPID)));
  		return;
  	}
! 	else if (ExecBackend)
  		AttachSharedMemoryAndSemaphores();
  
  	/* See if we have a matching backend */
  
--- 1499,1508 ----
  								 backendPID)));
  		return;
  	}
! #ifdef EXEC_BACKEND
! 	else
  		AttachSharedMemoryAndSemaphores();
+ #endif
  
  	/* See if we have a matching backend */
  
***************
*** 2341,2380 ****
  	}
  }
  
  /*
!  * BackendFork -- perform authentication, and if successful, set up the
!  *		backend's argument list and invoke backend main().
!  *
!  * This used to perform an execv() but we no longer exec the backend;
!  * it's the same executable as the postmaster.
   *
   * returns:
   *		Shouldn't return at all.
   *		If PostgresMain() fails, return status.
   */
! static int
! BackendFork(Port *port)
  {
- 	char	  **av;
- 	int			maxac;
- 	int			ac;
- 	char		debugbuf[32];
- 	char		protobuf[32];
- 
- #ifdef EXEC_BACKEND
- 	char		pbuf[NAMEDATALEN + 256];
- #endif
- 	int			i;
  	int			status;
  	struct timeval now;
  	struct timezone tz;
  	char		remote_host[NI_MAXHOST];
  	char		remote_port[NI_MAXSERV];
  
- 	/*
- 	 * Let's clean up ourselves as the postmaster child
- 	 */
- 
  	IsUnderPostmaster = true;	/* we are a postmaster subprocess now */
  
  	ClientAuthInProgress = true;	/* limit visibility of log messages */
--- 2343,2366 ----
  	}
  }
  
+ 
  /*
!  * BackendInit/Fork -- perform authentication [BackendInit], and if successful,
!  *              set up the backend's argument list [BackendFork] and invoke
!  *              backend main() [or exec in EXEC_BACKEND case]
   *
   * returns:
   *		Shouldn't return at all.
   *		If PostgresMain() fails, return status.
   */
! bool BackendInit(Port *port)
  {
  	int			status;
  	struct timeval now;
  	struct timezone tz;
  	char		remote_host[NI_MAXHOST];
  	char		remote_port[NI_MAXSERV];
  
  	IsUnderPostmaster = true;	/* we are a postmaster subprocess now */
  
  	ClientAuthInProgress = true;	/* limit visibility of log messages */
***************
*** 2386,2394 ****
  	 * Signal handlers setting is moved to tcop/postgres...
  	 */
  
- 	/* Close the postmaster's other sockets */
- 	ClosePostmasterPorts(true);
- 
  	/* Save port etc. for ps status */
  	MyProcPort = port;
  
--- 2372,2377 ----
***************
*** 2445,2460 ****
  	}
  
  	/*
- 	 * PreAuthDelay is a debugging aid for investigating problems in the
- 	 * authentication cycle: it can be set in postgresql.conf to allow
- 	 * time to attach to the newly-forked backend with a debugger. (See
- 	 * also the -W backend switch, which we allow clients to pass through
- 	 * PGOPTIONS, but it is not honored until after authentication.)
- 	 */
- 	if (PreAuthDelay > 0)
- 		sleep(PreAuthDelay);
- 
- 	/*
  	 * Ready to begin client interaction.  We will give up and exit(0)
  	 * after a time delay, so that a broken client can't hog a connection
  	 * indefinitely.  PreAuthDelay doesn't count against the time limit.
--- 2428,2433 ----
***************
*** 2469,2475 ****
  	status = ProcessStartupPacket(port, false);
  
  	if (status != STATUS_OK)
! 		return 0;				/* cancel request processed, or error */
  
  	/*
  	 * Now that we have the user and database name, we can set the process
--- 2442,2448 ----
  	status = ProcessStartupPacket(port, false);
  
  	if (status != STATUS_OK)
! 		return false;				/* cancel request processed, or error */
  
  	/*
  	 * Now that we have the user and database name, we can set the process
***************
*** 2506,2511 ****
--- 2479,2528 ----
  	gettimeofday(&now, &tz);
  	srandom((unsigned int) now.tv_usec);
  
+ #ifdef EXEC_BACKEND
+ 	ClientAuthInProgress = false;		/* client_min_messages is active
+ 										 * now */
+ #endif
+ 	return true;
+ }
+ 
+ 
+ static int
+ BackendFork(Port *port)
+ {
+ 	char	  **av;
+ 	int			maxac;
+ 	int			ac;
+ 	char		debugbuf[32];
+ #ifndef EXEC_BACKEND
+ 	char		protobuf[32];
+ #endif
+ 	int			i;
+ 	char tmpExtraOptions[MAXPGPATH];
+ 
+ 	/*
+ 	 * Let's clean up ourselves as the postmaster child, and
+ 	 * close the postmaster's other sockets
+ 	 */
+ 	ClosePostmasterPorts(true);
+ 
+ 	/*
+ 	 * PreAuthDelay is a debugging aid for investigating problems in the
+ 	 * authentication cycle: it can be set in postgresql.conf to allow
+ 	 * time to attach to the newly-forked backend with a debugger. (See
+ 	 * also the -W backend switch, which we allow clients to pass through
+ 	 * PGOPTIONS, but it is not honored until after authentication.)
+ 	 */
+ 	if (PreAuthDelay > 0)
+ 		sleep(PreAuthDelay);
+ 
+ 	port->canAcceptConnections = canAcceptConnections();
+ 
+ #ifndef EXEC_BACKEND
+ 	if (!BackendInit(port))
+ 		return -1;
+ #endif
+ 
  	/* ----------------
  	 * Now, build the argv vector that will be given to PostgresMain.
  	 *
***************
*** 2540,2569 ****
  
  	/*
  	 * Pass any backend switches specified with -o in the postmaster's own
! 	 * command line.  We assume these are secure. (It's OK to mangle
! 	 * ExtraOptions since we are now in the child process; this won't
! 	 * change the postmaster's copy.)
  	 */
! 	split_opts(av, &ac, ExtraOptions);
  
  	/* Tell the backend what protocol the frontend is using. */
  	snprintf(protobuf, sizeof(protobuf), "-v%u", port->proto);
  	av[ac++] = protobuf;
  
  	/*
  	 * Tell the backend it is being called from the postmaster, and which
  	 * database to use.  -p marks the end of secure switches.
  	 */
- 	av[ac++] = "-p";
  #ifdef EXEC_BACKEND
! 	Assert(UsedShmemSegID != 0 && UsedShmemSegAddr != NULL);
! 	/* database name at the end because it might contain commas */
! 	snprintf(pbuf, sizeof(pbuf), "%d,%d,%lu,%p,%s",
! 			 port->sock, canAcceptConnections(),
! 			 UsedShmemSegID, UsedShmemSegAddr,
! 			 port->database_name);
! 	av[ac++] = pbuf;
  #else
  	av[ac++] = port->database_name;
  #endif
  
--- 2557,2594 ----
  
  	/*
  	 * Pass any backend switches specified with -o in the postmaster's own
! 	 * command line.  We assume these are secure.
! 	 * [Note: now makes a copy to protect against future fork/exec changes]
  	 */
! 	strcpy(tmpExtraOptions,ExtraOptions);
! 	split_opts(av, &ac, tmpExtraOptions);
  
+ #ifndef EXEC_BACKEND
  	/* Tell the backend what protocol the frontend is using. */
  	snprintf(protobuf, sizeof(protobuf), "-v%u", port->proto);
  	av[ac++] = protobuf;
+ #endif
  
  	/*
  	 * Tell the backend it is being called from the postmaster, and which
  	 * database to use.  -p marks the end of secure switches.
  	 */
  #ifdef EXEC_BACKEND
! 	write_backend_variables(getpid(),port);
! 
! 	/* pass data dir before end of secure switches (-p) */
! 	av[ac++] = "-D";
! 	av[ac++] = DataDir;
! 
! 	/*
! 	 * This is totally bogus. We need to pass an arg to -p, but we'll
! 	 * actually get the dbname by ProcessStartupPacket in the exec'd
! 	 * process
! 	 */
! 	av[ac++] = "-p";
! 	av[ac++] = "FORK_EXEC";
  #else
+ 	av[ac++] = "-p";
  	av[ac++] = port->database_name;
  #endif
  
***************
*** 2571,2576 ****
--- 2596,2605 ----
  	 * Pass the (insecure) option switches from the connection request.
  	 * (It's OK to mangle port->cmdline_options now.)
  	 */
+ 	/* FIXME: [fork/exec] Hmmm.. we won't see these until after we BackendInit.
+ 	 * Should we add code to BackendInit to add these (somehow!) into
+ 	 * the PostgresMain argument list in the EXEC_BACKEND case?
+ 	 */
  	if (port->cmdline_options)
  		split_opts(av, &ac, port->cmdline_options);
  
***************
*** 2594,2610 ****
  	 */
  	ereport(DEBUG3,
  			(errmsg_internal("%s child[%d]: starting with (",
! 							 progname, MyProcPid)));
  	for (i = 0; i < ac; ++i)
  		ereport(DEBUG3,
  				(errmsg_internal("\t%s", av[i])));
  	ereport(DEBUG3,
  			(errmsg_internal(")")));
  
  	ClientAuthInProgress = false;		/* client_min_messages is active
  										 * now */
  
  	return (PostgresMain(ac, av, port->user_name));
  }
  
  /*
--- 2623,2643 ----
  	 */
  	ereport(DEBUG3,
  			(errmsg_internal("%s child[%d]: starting with (",
! 							 progname, getpid())));
  	for (i = 0; i < ac; ++i)
  		ereport(DEBUG3,
  				(errmsg_internal("\t%s", av[i])));
  	ereport(DEBUG3,
  			(errmsg_internal(")")));
  
+ #ifdef EXEC_BACKEND
+ 	return execv(pg_pathname,av);
+ #else
  	ClientAuthInProgress = false;		/* client_min_messages is active
  										 * now */
  
  	return (PostgresMain(ac, av, port->user_name));
+ #endif
  }
  
  /*
***************
*** 3051,3053 ****
--- 3084,3216 ----
  	va_end(ap);
  	fprintf(stderr, "\n");
  }
+ 
+ 
+ #ifdef EXEC_BACKEND
+ 
+ /*
+  * The following need to be available to the read/write_backend_variables
+  * functions
+  */
+ extern XLogRecPtr RedoRecPtr;
+ extern XLogwrtResult LogwrtResult;
+ extern slock_t *ShmemLock;
+ extern slock_t *ShmemIndexLock;
+ extern void *ShmemIndexAlloc;
+ typedef struct LWLock LWLock;
+ extern LWLock *LWLockArray;
+ extern slock_t  *ProcStructLock;
+ extern int	pgStatSock;
+ 
+ #define write_var(var,fp) fwrite((void*)&(var),sizeof(var),1,fp)
+ #define read_var(var,fp)  fread((void*)&(var),sizeof(var),1,fp);
+ #define get_tmp_backend_var_dir(buf)		\
+ 		sprintf((buf),"%s/%s",DataDir,PG_TEMP_FILES_DIR)
+ #define get_tmp_backend_var_file_name(buf,id)		\
+ 		Assert(DataDir);			\
+ 		sprintf((buf),				\
+ 			"%s/%s/%s.backend_var.%d",	\
+ 			DataDir,			\
+ 			PG_TEMP_FILES_DIR,		\
+ 			PG_TEMP_FILE_PREFIX,		\
+ 			(id))
+ 
+ static void
+ write_backend_variables(pid_t pid, Port *port)
+ {
+ 	char	filename[MAXPGPATH];
+ 	FILE	*fp;
+ 	get_tmp_backend_var_file_name(filename,pid);
+ 
+ 	/* Open file */
+ 	fp = AllocateFile(filename, PG_BINARY_W);
+ 	if (!fp)
+ 	{
+ 		/* As per OpenTemporaryFile... */
+ 		char dirname[MAXPGPATH];
+ 		get_tmp_backend_var_dir(dirname);
+ 		mkdir(dirname, S_IRWXU);
+ 
+ 		fp = AllocateFile(filename, PG_BINARY_W);
+ 		if (!fp)
+ 		{
+ 			ereport(FATAL,
+ 				(errcode_for_file_access(),
+ 				errmsg("could not write to file \"%s\": %m", filename)));
+ 			return;
+ 		}
+ 	}
+ 
+ 	/* Write vars */
+ 	write_var(port->sock,fp);
+ 	write_var(port->proto,fp);
+ 	write_var(port->laddr,fp);
+ 	write_var(port->raddr,fp);
+ 	write_var(port->canAcceptConnections,fp);
+ 	write_var(MyCancelKey,fp);
+ 
+ 	write_var(RedoRecPtr,fp);
+ 	write_var(LogwrtResult,fp);
+ 
+ 	write_var(UsedShmemSegID,fp);
+ 	write_var(UsedShmemSegAddr,fp);
+ 
+ 	write_var(ShmemLock,fp);
+ 	write_var(ShmemIndexLock,fp);
+ 	write_var(ShmemVariableCache,fp);
+ 	write_var(ShmemIndexAlloc,fp);
+ 
+ 	write_var(LWLockArray,fp);
+ 	write_var(ProcStructLock,fp);
+ 	write_var(pgStatSock,fp);
+ 
+ 	/* Release file */
+ 	FreeFile(fp);
+ }
+ 
+ void
+ read_backend_variables(pid_t pid, Port *port)
+ {
+ 	char	filename[MAXPGPATH];
+ 	FILE	*fp;
+ 	get_tmp_backend_var_file_name(filename,pid);
+ 
+ 	/* Open file */
+ 	fp = AllocateFile(filename, PG_BINARY_R);
+ 	if (!fp)
+ 	{
+ 		ereport(ERROR,
+ 			(errcode_for_file_access(),
+ 			errmsg("could not read from backend_variables file \"%s\": %m", filename)));
+ 		return;
+ 	}
+ 
+ 	/* Read vars */
+ 	read_var(port->sock,fp);
+ 	read_var(port->proto,fp);
+ 	read_var(port->laddr,fp);
+ 	read_var(port->raddr,fp);
+ 	read_var(port->canAcceptConnections,fp);
+ 	read_var(MyCancelKey,fp);
+ 
+ 	read_var(RedoRecPtr,fp);
+ 	read_var(LogwrtResult,fp);
+ 
+ 	read_var(UsedShmemSegID,fp);
+ 	read_var(UsedShmemSegAddr,fp);
+ 
+ 	read_var(ShmemLock,fp);
+ 	read_var(ShmemIndexLock,fp);
+ 	read_var(ShmemVariableCache,fp);
+ 	read_var(ShmemIndexAlloc,fp);
+ 
+ 	read_var(LWLockArray,fp);
+ 	read_var(ProcStructLock,fp);
+ 	read_var(pgStatSock,fp);
+ 
+ 	/* Release file */
+ 	FreeFile(fp);
+ 	unlink(filename);
+ }
+ 
+ #endif
Index: src/backend/storage/buffer/buf_init.c
===================================================================
RCS file: /projects/cvsroot/pgsql-server/src/backend/storage/buffer/buf_init.c,v
retrieving revision 1.58
diff -c -w -r1.58 buf_init.c
*** src/backend/storage/buffer/buf_init.c	29 Nov 2003 19:51:56 -0000	1.58
--- src/backend/storage/buffer/buf_init.c	13 Dec 2003 06:18:36 -0000
***************
*** 136,142 ****
--- 136,144 ----
  	 * anyone else attached to the shmem at this point, we've got
  	 * problems.
  	 */
+ #ifndef EXEC_BACKEND
  	LWLockAcquire(BufMgrLock, LW_EXCLUSIVE);
+ #endif
  
  #ifdef BMTRACE
  	CurTraceBuf = (long *) ShmemInitStruct("Buffer trace",
***************
*** 198,204 ****
--- 200,208 ----
  	/* Init other shared buffer-management stuff */
  	StrategyInitialize(!foundDescs);
  
+ #ifndef EXEC_BACKEND
  	LWLockRelease(BufMgrLock);
+ #endif
  }
  
  /*
Index: src/backend/storage/file/fd.c
===================================================================
RCS file: /projects/cvsroot/pgsql-server/src/backend/storage/file/fd.c,v
retrieving revision 1.104
diff -c -w -r1.104 fd.c
*** src/backend/storage/file/fd.c	12 Dec 2003 18:45:09 -0000	1.104
--- src/backend/storage/file/fd.c	13 Dec 2003 06:18:37 -0000
***************
*** 53,63 ****
  #include "storage/ipc.h"
  
  
- /* Filename components for OpenTemporaryFile */
- #define PG_TEMP_FILES_DIR "pgsql_tmp"
- #define PG_TEMP_FILE_PREFIX "pgsql_tmp"
- 
- 
  /*
   * Problem: Postgres does a system(ld...) to do dynamic loading.
   * This will open several extra files in addition to those used by
--- 53,58 ----
***************
*** 1217,1224 ****
  	{
  		while ((db_de = readdir(db_dir)) != NULL)
  		{
! 			if (strcmp(db_de->d_name, ".") == 0 ||
! 				strcmp(db_de->d_name, "..") == 0)
  				continue;
  
  			snprintf(temp_path, sizeof(temp_path),
--- 1212,1223 ----
  	{
  		while ((db_de = readdir(db_dir)) != NULL)
  		{
! 			if (strcmp(db_de->d_name, ".") == 0
! #ifndef EXEC_BACKEND
! 			/* no PG_TEMP_FILES_DIR in DataDir in non EXEC_BACKEND case */
! 				|| strcmp(db_de->d_name, "..") == 0
! #endif
! 			)
  				continue;
  
  			snprintf(temp_path, sizeof(temp_path),
Index: src/backend/storage/freespace/freespace.c
===================================================================
RCS file: /projects/cvsroot/pgsql-server/src/backend/storage/freespace/freespace.c,v
retrieving revision 1.27
diff -c -w -r1.27 freespace.c
*** src/backend/storage/freespace/freespace.c	12 Dec 2003 18:45:09 -0000	1.27
--- src/backend/storage/freespace/freespace.c	13 Dec 2003 06:18:38 -0000
***************
*** 180,186 ****
  /* Header for whole map */
  struct FSMHeader
  {
- 	HTAB	   *relHash;		/* hashtable of FSMRelation entries */
  	FSMRelation *usageList;		/* FSMRelations in usage-recency order */
  	FSMRelation *usageListTail; /* tail of usage-recency list */
  	FSMRelation *firstRel;		/* FSMRelations in arena storage order */
--- 180,185 ----
***************
*** 218,223 ****
--- 217,223 ----
  int			MaxFSMPages;
  
  static FSMHeader *FreeSpaceMap; /* points to FSMHeader in shared memory */
+ static HTAB	*FreeSpaceMapRelHash; /* points to (what used to be) FSMHeader->relHash */
  
  
  static FSMRelation *lookup_fsm_rel(RelFileNode *rel);
***************
*** 265,277 ****
  {
  	HASHCTL		info;
  	int			nchunks;
  
  	/* Create table header */
! 	FreeSpaceMap = (FSMHeader *) ShmemAlloc(sizeof(FSMHeader));
  	if (FreeSpaceMap == NULL)
  		ereport(FATAL,
  				(errcode(ERRCODE_OUT_OF_MEMORY),
  			   errmsg("insufficient shared memory for free space map")));
  	MemSet(FreeSpaceMap, 0, sizeof(FSMHeader));
  
  	/* Create hashtable for FSMRelations */
--- 265,279 ----
  {
  	HASHCTL		info;
  	int			nchunks;
+ 	bool found;
  
  	/* Create table header */
! 	FreeSpaceMap = (FSMHeader *) ShmemInitStruct("Free Space Map Header",sizeof(FSMHeader),&found);
  	if (FreeSpaceMap == NULL)
  		ereport(FATAL,
  				(errcode(ERRCODE_OUT_OF_MEMORY),
  			   errmsg("insufficient shared memory for free space map")));
+ 	if (!found)
  		MemSet(FreeSpaceMap, 0, sizeof(FSMHeader));
  
  	/* Create hashtable for FSMRelations */
***************
*** 279,295 ****
  	info.entrysize = sizeof(FSMRelation);
  	info.hash = tag_hash;
  
! 	FreeSpaceMap->relHash = ShmemInitHash("Free Space Map Hash",
  										  MaxFSMRelations / 10,
  										  MaxFSMRelations,
  										  &info,
  										  (HASH_ELEM | HASH_FUNCTION));
  
! 	if (!FreeSpaceMap->relHash)
  		ereport(FATAL,
  				(errcode(ERRCODE_OUT_OF_MEMORY),
  			   errmsg("insufficient shared memory for free space map")));
  
  	/* Allocate page-storage arena */
  	nchunks = (MaxFSMPages - 1) / CHUNKPAGES + 1;
  	/* This check ensures spareChunks will be greater than zero */
--- 281,301 ----
  	info.entrysize = sizeof(FSMRelation);
  	info.hash = tag_hash;
  
! 	FreeSpaceMapRelHash = ShmemInitHash("Free Space Map Hash",
  										  MaxFSMRelations / 10,
  										  MaxFSMRelations,
  										  &info,
  										  (HASH_ELEM | HASH_FUNCTION));
  
! 	if (!FreeSpaceMapRelHash)
  		ereport(FATAL,
  				(errcode(ERRCODE_OUT_OF_MEMORY),
  			   errmsg("insufficient shared memory for free space map")));
  
+ 	if (found)
+ 		return;
+ 
+ 
  	/* Allocate page-storage arena */
  	nchunks = (MaxFSMPages - 1) / CHUNKPAGES + 1;
  	/* This check ensures spareChunks will be greater than zero */
***************
*** 974,980 ****
  {
  	FSMRelation *fsmrel;
  
! 	fsmrel = (FSMRelation *) hash_search(FreeSpaceMap->relHash,
  										 (void *) rel,
  										 HASH_FIND,
  										 NULL);
--- 980,986 ----
  {
  	FSMRelation *fsmrel;
  
! 	fsmrel = (FSMRelation *) hash_search(FreeSpaceMapRelHash,
  										 (void *) rel,
  										 HASH_FIND,
  										 NULL);
***************
*** 995,1001 ****
  	FSMRelation *fsmrel;
  	bool		found;
  
! 	fsmrel = (FSMRelation *) hash_search(FreeSpaceMap->relHash,
  										 (void *) rel,
  										 HASH_ENTER,
  										 &found);
--- 1001,1007 ----
  	FSMRelation *fsmrel;
  	bool		found;
  
! 	fsmrel = (FSMRelation *) hash_search(FreeSpaceMapRelHash,
  										 (void *) rel,
  										 HASH_ENTER,
  										 &found);
***************
*** 1050,1056 ****
  	unlink_fsm_rel_usage(fsmrel);
  	unlink_fsm_rel_storage(fsmrel);
  	FreeSpaceMap->numRels--;
! 	result = (FSMRelation *) hash_search(FreeSpaceMap->relHash,
  										 (void *) &(fsmrel->key),
  										 HASH_REMOVE,
  										 NULL);
--- 1056,1062 ----
  	unlink_fsm_rel_usage(fsmrel);
  	unlink_fsm_rel_storage(fsmrel);
  	FreeSpaceMap->numRels--;
! 	result = (FSMRelation *) hash_search(FreeSpaceMapRelHash,
  										 (void *) &(fsmrel->key),
  										 HASH_REMOVE,
  										 NULL);
Index: src/backend/storage/ipc/ipci.c
===================================================================
RCS file: /projects/cvsroot/pgsql-server/src/backend/storage/ipc/ipci.c,v
retrieving revision 1.59
diff -c -w -r1.59 ipci.c
*** src/backend/storage/ipc/ipci.c	1 Dec 2003 21:59:25 -0000	1.59
--- src/backend/storage/ipc/ipci.c	13 Dec 2003 06:18:38 -0000
***************
*** 87,93 ****
  	/*
  	 * Set up shared memory allocation mechanism
  	 */
! 	InitShmemAllocation(seghdr);
  
  	/*
  	 * Now initialize LWLocks, which do shared memory allocation and are
--- 87,93 ----
  	/*
  	 * Set up shared memory allocation mechanism
  	 */
! 	InitShmemAllocation(seghdr, true);
  
  	/*
  	 * Now initialize LWLocks, which do shared memory allocation and are
***************
*** 135,146 ****
--- 135,170 ----
  }
  
  
+ #ifdef EXEC_BACKEND
  /*
   * AttachSharedMemoryAndSemaphores
   *		Attaches to the existing shared resources.
   */
+ 
+ /* FIXME: [fork/exec] This function is starting to look pretty much like
+ 	CreateSharedMemoryAndSemaphores. Refactor? */
  void
  AttachSharedMemoryAndSemaphores(void)
  {
+ 	PGShmemHeader *seghdr = PGSharedMemoryCreate(-1,false,-1);
+ 
+ 	InitShmemAllocation(seghdr, false);
+ 
+ 	InitShmemIndex();
+ 
+ 	XLOGShmemInit();
  	CLOGShmemInit();
+ 	InitBufferPool();
+ 
+ 	InitLocks();
+ 	InitLockTable(MaxBackends);
+ 
+ 	InitProcGlobal(MaxBackends);
+ 
+ 	CreateSharedInvalidationState(MaxBackends);
+ 
+ 	InitFreeSpaceMap();
+ 
+ 	PMSignalInit();
  }
+ #endif
Index: src/backend/storage/ipc/pmsignal.c
===================================================================
RCS file: /projects/cvsroot/pgsql-server/src/backend/storage/ipc/pmsignal.c,v
retrieving revision 1.6
diff -c -w -r1.6 pmsignal.c
*** src/backend/storage/ipc/pmsignal.c	29 Nov 2003 19:51:56 -0000	1.6
--- src/backend/storage/ipc/pmsignal.c	13 Dec 2003 06:18:38 -0000
***************
*** 44,52 ****
  void
  PMSignalInit(void)
  {
  	PMSignalFlags = (sig_atomic_t *)
! 		ShmemAlloc(NUM_PMSIGNALS * sizeof(sig_atomic_t));
  
  	MemSet(PMSignalFlags, 0, NUM_PMSIGNALS * sizeof(sig_atomic_t));
  }
  
--- 44,54 ----
  void
  PMSignalInit(void)
  {
+ 	bool found;
  	PMSignalFlags = (sig_atomic_t *)
! 		ShmemInitStruct("PMSignalFlags",NUM_PMSIGNALS * sizeof(sig_atomic_t),&found);
  
+ 	if (!found)
  		MemSet(PMSignalFlags, 0, NUM_PMSIGNALS * sizeof(sig_atomic_t));
  }
  
Index: src/backend/storage/ipc/shmem.c
===================================================================
RCS file: /projects/cvsroot/pgsql-server/src/backend/storage/ipc/shmem.c,v
retrieving revision 1.74
diff -c -w -r1.74 shmem.c
*** src/backend/storage/ipc/shmem.c	29 Nov 2003 19:51:56 -0000	1.74
--- src/backend/storage/ipc/shmem.c	13 Dec 2003 06:18:39 -0000
***************
*** 74,80 ****
  
  static SHMEM_OFFSET ShmemEnd;	/* end+1 address of shared memory */
  
! static slock_t *ShmemLock;		/* spinlock for shared memory allocation */
  
  static HTAB *ShmemIndex = NULL; /* primary index hashtable for shmem */
  
--- 74,84 ----
  
  static SHMEM_OFFSET ShmemEnd;	/* end+1 address of shared memory */
  
! NON_EXEC_STATIC slock_t *ShmemLock;		/* spinlock for shared memory allocation */
! 
! NON_EXEC_STATIC slock_t *ShmemIndexLock;		/* spinlock for ShmemIndex */
! 
! NON_EXEC_STATIC void *ShmemIndexAlloc = NULL; /* Memory actually allocated for ShmemIndex */
  
  static HTAB *ShmemIndex = NULL; /* primary index hashtable for shmem */
  
***************
*** 88,94 ****
   * but we use void to avoid having to include ipc.h in shmem.h.
   */
  void
! InitShmemAllocation(void *seghdr)
  {
  	PGShmemHeader *shmhdr = (PGShmemHeader *) seghdr;
  
--- 92,98 ----
   * but we use void to avoid having to include ipc.h in shmem.h.
   */
  void
! InitShmemAllocation(void *seghdr, bool init)
  {
  	PGShmemHeader *shmhdr = (PGShmemHeader *) seghdr;
  
***************
*** 97,112 ****
  	ShmemBase = (SHMEM_OFFSET) shmhdr;
  	ShmemEnd = ShmemBase + shmhdr->totalsize;
  
  	/*
! 	 * Initialize the spinlock used by ShmemAlloc.	We have to do the
! 	 * space allocation the hard way, since ShmemAlloc can't be called
! 	 * yet.
  	 */
  	ShmemLock = (slock_t *) (((char *) shmhdr) + shmhdr->freeoffset);
  	shmhdr->freeoffset += MAXALIGN(sizeof(slock_t));
  	Assert(shmhdr->freeoffset <= shmhdr->totalsize);
  
  	SpinLockInit(ShmemLock);
  
  	/* ShmemIndex can't be set up yet (need LWLocks first) */
  	ShmemIndex = (HTAB *) NULL;
--- 101,123 ----
  	ShmemBase = (SHMEM_OFFSET) shmhdr;
  	ShmemEnd = ShmemBase + shmhdr->totalsize;
  
+ 	if (init)
+ 	{
  		/*
! 		 * Initialize the spinlocks used by ShmemAlloc/ShmemInitStruct. We
! 		 * have to do the space allocation the hard way, since ShmemAlloc
! 		 * can't be called yet.
  		 */
  		ShmemLock = (slock_t *) (((char *) shmhdr) + shmhdr->freeoffset);
  		shmhdr->freeoffset += MAXALIGN(sizeof(slock_t));
  		Assert(shmhdr->freeoffset <= shmhdr->totalsize);
  
+ 		ShmemIndexLock = (slock_t *) (((char *) shmhdr) + shmhdr->freeoffset);
+ 		shmhdr->freeoffset += MAXALIGN(sizeof(slock_t));
+ 		Assert(shmhdr->freeoffset <= shmhdr->totalsize);
+ 
  		SpinLockInit(ShmemLock);
+ 		SpinLockInit(ShmemIndexLock);
  
  		/* ShmemIndex can't be set up yet (need LWLocks first) */
  		ShmemIndex = (HTAB *) NULL;
***************
*** 118,123 ****
--- 129,135 ----
  		ShmemAlloc(sizeof(*ShmemVariableCache));
  		memset(ShmemVariableCache, 0, sizeof(*ShmemVariableCache));
  	}
+ }
  
  /*
   * ShmemAlloc -- allocate max-aligned chunk from shared memory
***************
*** 218,223 ****
--- 230,237 ----
  	/*
  	 * Now, create an entry in the hashtable for the index itself.
  	 */
+ 	if (!IsUnderPostmaster)
+ 	{
  		MemSet(item.key, 0, SHMEM_INDEX_KEYSIZE);
  		strncpy(item.key, "ShmemIndex", SHMEM_INDEX_KEYSIZE);
  
***************
*** 234,242 ****
  	result->size = SHMEM_INDEX_SIZE;
  
  	ShmemBootstrap = false;
  
  	/* now release the lock acquired in ShmemInitStruct */
! 	LWLockRelease(ShmemIndexLock);
  }
  
  /*
--- 248,257 ----
  		result->size = SHMEM_INDEX_SIZE;
  
  		ShmemBootstrap = false;
+ 	}
  
  	/* now release the lock acquired in ShmemInitStruct */
! 	SpinLockRelease(ShmemIndexLock);
  }
  
  /*
***************
*** 320,340 ****
  	strncpy(item.key, name, SHMEM_INDEX_KEYSIZE);
  	item.location = BAD_LOCATION;
  
! 	LWLockAcquire(ShmemIndexLock, LW_EXCLUSIVE);
  
  	if (!ShmemIndex)
  	{
  		/*
  		 * If the shmem index doesn't exist, we are bootstrapping: we must
  		 * be trying to init the shmem index itself.
  		 *
! 		 * Notice that the ShmemIndexLock is held until the shmem index has
  		 * been completely initialized.
  		 */
  		Assert(strcmp(name, "ShmemIndex") == 0);
  		Assert(ShmemBootstrap);
  		*foundPtr = FALSE;
! 		return ShmemAlloc(size);
  	}
  
  	/* look it up in the shmem index */
--- 335,367 ----
  	strncpy(item.key, name, SHMEM_INDEX_KEYSIZE);
  	item.location = BAD_LOCATION;
  
! 	SpinLockAcquire(ShmemIndexLock);
  
  	if (!ShmemIndex)
  	{
+ 		if (IsUnderPostmaster)
+ 		{
+ 			/* Must be initializing a (non-standalone) backend */
+ 			Assert(strcmp(name, "ShmemIndex") == 0);
+ 			Assert(ShmemBootstrap);
+ 			Assert(ShmemIndexAlloc);
+ 			*foundPtr = TRUE;
+ 		}
+ 		else
+ 		{
  			/*
  			 * If the shmem index doesn't exist, we are bootstrapping: we must
  			 * be trying to init the shmem index itself.
  			 *
! 			 * Notice that the ShmemLock is held until the shmem index has
  			 * been completely initialized.
  			 */
  			Assert(strcmp(name, "ShmemIndex") == 0);
  			Assert(ShmemBootstrap);
  			*foundPtr = FALSE;
! 			ShmemIndexAlloc = ShmemAlloc(size);
! 		}
! 		return ShmemIndexAlloc;
  	}
  
  	/* look it up in the shmem index */
***************
*** 343,349 ****
  
  	if (!result)
  	{
! 		LWLockRelease(ShmemIndexLock);
  		ereport(ERROR,
  				(errcode(ERRCODE_OUT_OF_MEMORY),
  				 errmsg("out of shared memory")));
--- 370,376 ----
  
  	if (!result)
  	{
! 		SpinLockRelease(ShmemIndexLock);
  		ereport(ERROR,
  				(errcode(ERRCODE_OUT_OF_MEMORY),
  				 errmsg("out of shared memory")));
***************
*** 359,365 ****
  		 */
  		if (result->size != size)
  		{
! 			LWLockRelease(ShmemIndexLock);
  
  			elog(WARNING, "ShmemIndex entry size is wrong");
  			/* let caller print its message too */
--- 386,392 ----
  		 */
  		if (result->size != size)
  		{
! 			SpinLockRelease(ShmemIndexLock);
  
  			elog(WARNING, "ShmemIndex entry size is wrong");
  			/* let caller print its message too */
***************
*** 376,382 ****
  			/* out of memory */
  			Assert(ShmemIndex);
  			hash_search(ShmemIndex, (void *) &item, HASH_REMOVE, NULL);
! 			LWLockRelease(ShmemIndexLock);
  
  			ereport(WARNING,
  					(errcode(ERRCODE_OUT_OF_MEMORY),
--- 403,409 ----
  			/* out of memory */
  			Assert(ShmemIndex);
  			hash_search(ShmemIndex, (void *) &item, HASH_REMOVE, NULL);
! 			SpinLockRelease(ShmemIndexLock);
  
  			ereport(WARNING,
  					(errcode(ERRCODE_OUT_OF_MEMORY),
***************
*** 389,394 ****
  	}
  	Assert(ShmemIsValid((unsigned long) structPtr));
  
! 	LWLockRelease(ShmemIndexLock);
  	return structPtr;
  }
--- 416,421 ----
  	}
  	Assert(ShmemIsValid((unsigned long) structPtr));
  
! 	SpinLockRelease(ShmemIndexLock);
  	return structPtr;
  }
Index: src/backend/storage/ipc/sinvaladt.c
===================================================================
RCS file: /projects/cvsroot/pgsql-server/src/backend/storage/ipc/sinvaladt.c,v
retrieving revision 1.53
diff -c -w -r1.53 sinvaladt.c
*** src/backend/storage/ipc/sinvaladt.c	29 Nov 2003 19:51:56 -0000	1.53
--- src/backend/storage/ipc/sinvaladt.c	13 Dec 2003 06:18:39 -0000
***************
*** 50,59 ****
  	int			segSize;
  	SISeg	   *segP;
  	int			i;
  
  	/* Allocate space in shared memory */
  	segSize = SInvalShmemSize(maxBackends);
! 	shmInvalBuffer = segP = (SISeg *) ShmemAlloc(segSize);
  
  	/* Clear message counters, save size of procState array */
  	segP->minMsgNum = 0;
--- 50,62 ----
  	int			segSize;
  	SISeg	   *segP;
  	int			i;
+ 	bool found;
  
  	/* Allocate space in shared memory */
  	segSize = SInvalShmemSize(maxBackends);
! 	shmInvalBuffer = segP = (SISeg *) ShmemInitStruct("shmInvalBuffer",segSize,&found);
! 	if (found)
! 		return;
  
  	/* Clear message counters, save size of procState array */
  	segP->minMsgNum = 0;
Index: src/backend/storage/lmgr/lock.c
===================================================================
RCS file: /projects/cvsroot/pgsql-server/src/backend/storage/lmgr/lock.c,v
retrieving revision 1.130
diff -c -w -r1.130 lock.c
*** src/backend/storage/lmgr/lock.c	1 Dec 2003 21:59:25 -0000	1.130
--- src/backend/storage/lmgr/lock.c	13 Dec 2003 06:18:40 -0000
***************
*** 153,161 ****
   * map from lock method id to the lock table structure
   */
  static LockMethod LockMethods[MAX_LOCK_METHODS];
! 
  static int	NumLockMethods;
  
  /*
   * InitLocks -- Init the lock module.  Create a private data
   *		structure for constructing conflict masks.
--- 153,163 ----
   * map from lock method id to the lock table structure
   */
  static LockMethod LockMethods[MAX_LOCK_METHODS];
! static HTAB*	LockMethodLockHash[MAX_LOCK_METHODS];
! static HTAB*	LockMethodProcLockHash[MAX_LOCK_METHODS];
  static int	NumLockMethods;
  
+ 
  /*
   * InitLocks -- Init the lock module.  Create a private data
   *		structure for constructing conflict masks.
***************
*** 245,252 ****
  	/*
  	 * Lock the LWLock for the table (probably not necessary here)
  	 */
  	LWLockAcquire(LockMgrLock, LW_EXCLUSIVE);
! 
  	/*
  	 * no zero-th table
  	 */
--- 247,255 ----
  	/*
  	 * Lock the LWLock for the table (probably not necessary here)
  	 */
+ #ifndef EXEC_BACKEND
  	LWLockAcquire(LockMgrLock, LW_EXCLUSIVE);
! #endif
  	/*
  	 * no zero-th table
  	 */
***************
*** 279,293 ****
  	hash_flags = (HASH_ELEM | HASH_FUNCTION);
  
  	sprintf(shmemName, "%s (lock hash)", tabName);
! 	newLockMethod->lockHash = ShmemInitHash(shmemName,
  											init_table_size,
  											max_table_size,
  											&info,
  											hash_flags);
  
! 	if (!newLockMethod->lockHash)
  		elog(FATAL, "could not initialize lock table \"%s\"", tabName);
! 	Assert(newLockMethod->lockHash->hash == tag_hash);
  
  	/*
  	 * allocate a hash table for PROCLOCK structs.	This is used to store
--- 282,296 ----
  	hash_flags = (HASH_ELEM | HASH_FUNCTION);
  
  	sprintf(shmemName, "%s (lock hash)", tabName);
! 	LockMethodLockHash[NumLockMethods-1] = ShmemInitHash(shmemName,
  											init_table_size,
  											max_table_size,
  											&info,
  											hash_flags);
  
! 	if (!LockMethodLockHash[NumLockMethods-1])
  		elog(FATAL, "could not initialize lock table \"%s\"", tabName);
! 	Assert(LockMethodLockHash[NumLockMethods-1]->hash == tag_hash);
  
  	/*
  	 * allocate a hash table for PROCLOCK structs.	This is used to store
***************
*** 299,318 ****
  	hash_flags = (HASH_ELEM | HASH_FUNCTION);
  
  	sprintf(shmemName, "%s (proclock hash)", tabName);
! 	newLockMethod->proclockHash = ShmemInitHash(shmemName,
  												init_table_size,
  												max_table_size,
  												&info,
  												hash_flags);
  
! 	if (!newLockMethod->proclockHash)
  		elog(FATAL, "could not initialize lock table \"%s\"", tabName);
  
  	/* init data structures */
  	LockMethodInit(newLockMethod, conflictsP, numModes);
  
  	LWLockRelease(LockMgrLock);
! 
  	pfree(shmemName);
  
  	return newLockMethod->lockmethodid;
--- 302,322 ----
  	hash_flags = (HASH_ELEM | HASH_FUNCTION);
  
  	sprintf(shmemName, "%s (proclock hash)", tabName);
! 	LockMethodProcLockHash[NumLockMethods-1] = ShmemInitHash(shmemName,
  												init_table_size,
  												max_table_size,
  												&info,
  												hash_flags);
  
! 	if (!LockMethodProcLockHash[NumLockMethods-1])
  		elog(FATAL, "could not initialize lock table \"%s\"", tabName);
  
  	/* init data structures */
  	LockMethodInit(newLockMethod, conflictsP, numModes);
  
+ #ifndef EXEC_BACKEND
  	LWLockRelease(LockMgrLock);
! #endif
  	pfree(shmemName);
  
  	return newLockMethod->lockmethodid;
***************
*** 449,456 ****
  	/*
  	 * Find or create a lock with this tag
  	 */
! 	Assert(lockMethodTable->lockHash->hash == tag_hash);
! 	lock = (LOCK *) hash_search(lockMethodTable->lockHash,
  								(void *) locktag,
  								HASH_ENTER, &found);
  	if (!lock)
--- 453,460 ----
  	/*
  	 * Find or create a lock with this tag
  	 */
! 	Assert(LockMethodLockHash[lockmethodid]->hash == tag_hash);
! 	lock = (LOCK *) hash_search(LockMethodLockHash[lockmethodid],
  								(void *) locktag,
  								HASH_ENTER, &found);
  	if (!lock)
***************
*** 497,503 ****
  	/*
  	 * Find or create a proclock entry with this tag
  	 */
! 	proclockTable = lockMethodTable->proclockHash;
  	proclock = (PROCLOCK *) hash_search(proclockTable,
  										(void *) &proclocktag,
  										HASH_ENTER, &found);
--- 501,507 ----
  	/*
  	 * Find or create a proclock entry with this tag
  	 */
! 	proclockTable = LockMethodProcLockHash[lockmethodid];
  	proclock = (PROCLOCK *) hash_search(proclockTable,
  										(void *) &proclocktag,
  										HASH_ENTER, &found);
***************
*** 988,995 ****
  	/*
  	 * Find a lock with this tag
  	 */
! 	Assert(lockMethodTable->lockHash->hash == tag_hash);
! 	lock = (LOCK *) hash_search(lockMethodTable->lockHash,
  								(void *) locktag,
  								HASH_FIND, NULL);
  
--- 992,999 ----
  	/*
  	 * Find a lock with this tag
  	 */
! 	Assert(LockMethodLockHash[lockmethodid]->hash == tag_hash);
! 	lock = (LOCK *) hash_search(LockMethodLockHash[lockmethodid],
  								(void *) locktag,
  								HASH_FIND, NULL);
  
***************
*** 1014,1020 ****
  	proclocktag.proc = MAKE_OFFSET(MyProc);
  	TransactionIdStore(xid, &proclocktag.xid);
  
! 	proclockTable = lockMethodTable->proclockHash;
  	proclock = (PROCLOCK *) hash_search(proclockTable,
  										(void *) &proclocktag,
  										HASH_FIND_SAVE, NULL);
--- 1018,1024 ----
  	proclocktag.proc = MAKE_OFFSET(MyProc);
  	TransactionIdStore(xid, &proclocktag.xid);
  
! 	proclockTable = LockMethodProcLockHash[lockmethodid];
  	proclock = (PROCLOCK *) hash_search(proclockTable,
  										(void *) &proclocktag,
  										HASH_FIND_SAVE, NULL);
***************
*** 1086,1093 ****
  		 * if there's no one waiting in the queue, we just released the
  		 * last lock on this object. Delete it from the lock table.
  		 */
! 		Assert(lockMethodTable->lockHash->hash == tag_hash);
! 		lock = (LOCK *) hash_search(lockMethodTable->lockHash,
  									(void *) &(lock->tag),
  									HASH_REMOVE,
  									NULL);
--- 1090,1097 ----
  		 * if there's no one waiting in the queue, we just released the
  		 * last lock on this object. Delete it from the lock table.
  		 */
! 		Assert(LockMethodLockHash[lockmethodid]->hash == tag_hash);
! 		lock = (LOCK *) hash_search(LockMethodLockHash[lockmethodid],
  									(void *) &(lock->tag),
  									HASH_REMOVE,
  									NULL);
***************
*** 1269,1275 ****
  		/*
  		 * remove the proclock entry from the hashtable
  		 */
! 		proclock = (PROCLOCK *) hash_search(lockMethodTable->proclockHash,
  											(void *) proclock,
  											HASH_REMOVE,
  											NULL);
--- 1273,1279 ----
  		/*
  		 * remove the proclock entry from the hashtable
  		 */
! 		proclock = (PROCLOCK *) hash_search(LockMethodProcLockHash[lockmethodid],
  											(void *) proclock,
  											HASH_REMOVE,
  											NULL);
***************
*** 1287,1294 ****
  			 * lock object.
  			 */
  			LOCK_PRINT("LockReleaseAll: deleting", lock, 0);
! 			Assert(lockMethodTable->lockHash->hash == tag_hash);
! 			lock = (LOCK *) hash_search(lockMethodTable->lockHash,
  										(void *) &(lock->tag),
  										HASH_REMOVE, NULL);
  			if (!lock)
--- 1291,1298 ----
  			 * lock object.
  			 */
  			LOCK_PRINT("LockReleaseAll: deleting", lock, 0);
! 			Assert(LockMethodLockHash[lockmethodid]->hash == tag_hash);
! 			lock = (LOCK *) hash_search(LockMethodLockHash[lockmethodid],
  										(void *) &(lock->tag),
  										HASH_REMOVE, NULL);
  			if (!lock)
***************
*** 1367,1373 ****
  
  	LWLockAcquire(LockMgrLock, LW_EXCLUSIVE);
  
! 	proclockTable = LockMethods[DEFAULT_LOCKMETHOD]->proclockHash;
  
  	data->nelements = i = proclockTable->hctl->nentries;
  
--- 1371,1377 ----
  
  	LWLockAcquire(LockMgrLock, LW_EXCLUSIVE);
  
! 	proclockTable = LockMethodProcLockHash[DEFAULT_LOCKMETHOD];
  
  	data->nelements = i = proclockTable->hctl->nentries;
  
***************
*** 1480,1486 ****
  	if (!lockMethodTable)
  		return;
  
! 	proclockTable = lockMethodTable->proclockHash;
  
  	if (proc->waitLock)
  		LOCK_PRINT("DumpAllLocks: waiting on", proc->waitLock, 0);
--- 1484,1490 ----
  	if (!lockMethodTable)
  		return;
  
! 	proclockTable = LockMethodProcLockHash[lockmethodid];
  
  	if (proc->waitLock)
  		LOCK_PRINT("DumpAllLocks: waiting on", proc->waitLock, 0);
Index: src/backend/storage/lmgr/lwlock.c
===================================================================
RCS file: /projects/cvsroot/pgsql-server/src/backend/storage/lmgr/lwlock.c,v
retrieving revision 1.18
diff -c -w -r1.18 lwlock.c
*** src/backend/storage/lmgr/lwlock.c	29 Nov 2003 19:51:57 -0000	1.18
--- src/backend/storage/lmgr/lwlock.c	13 Dec 2003 06:18:40 -0000
***************
*** 43,49 ****
   * the pointer by fork from the postmaster.  LWLockIds are indexes into
   * the array.
   */
! static LWLock *LWLockArray = NULL;
  
  /* shared counter for dynamic allocation of LWLockIds */
  static int *LWLockCounter;
--- 43,49 ----
   * the pointer by fork from the postmaster.  LWLockIds are indexes into
   * the array.
   */
! NON_EXEC_STATIC LWLock *LWLockArray = NULL;
  
  /* shared counter for dynamic allocation of LWLockIds */
  static int *LWLockCounter;
Index: src/backend/storage/lmgr/proc.c
===================================================================
RCS file: /projects/cvsroot/pgsql-server/src/backend/storage/lmgr/proc.c,v
retrieving revision 1.140
diff -c -w -r1.140 proc.c
*** src/backend/storage/lmgr/proc.c	12 Dec 2003 18:45:09 -0000	1.140
--- src/backend/storage/lmgr/proc.c	13 Dec 2003 06:18:41 -0000
***************
*** 66,72 ****
   * relatively infrequently (only at backend startup or shutdown) and not for
   * very long, so a spinlock is okay.
   */
! static slock_t *ProcStructLock = NULL;
  
  static PROC_HDR *ProcGlobal = NULL;
  
--- 66,72 ----
   * relatively infrequently (only at backend startup or shutdown) and not for
   * very long, so a spinlock is okay.
   */
! NON_EXEC_STATIC slock_t *ProcStructLock = NULL;
  
  static PROC_HDR *ProcGlobal = NULL;
  
***************
*** 247,252 ****
--- 247,253 ----
  	MyProc->waitLock = NULL;
  	MyProc->waitHolder = NULL;
  	SHMQueueInit(&(MyProc->procHolders));
+ 
  
  	/*
  	 * Arrange to clean up at backend exit.
Index: src/backend/tcop/postgres.c
===================================================================
RCS file: /projects/cvsroot/pgsql-server/src/backend/tcop/postgres.c,v
retrieving revision 1.379
diff -c -w -r1.379 postgres.c
*** src/backend/tcop/postgres.c	1 Dec 2003 22:15:37 -0000	1.379
--- src/backend/tcop/postgres.c	13 Dec 2003 06:18:44 -0000
***************
*** 68,73 ****
--- 68,77 ----
  extern int	optind;
  extern char *optarg;
  
+ #ifdef EXEC_BACKEND
+ extern int BackendInit(Port*);
+ extern void read_backend_variables(pid_t, Port*);
+ #endif
  
  /* ----------------
   *		global variables
***************
*** 2052,2058 ****
  	 * initialize globals (already done if under postmaster, but not if
  	 * standalone; cheap enough to do over)
  	 */
- 
  	MyProcPid = getpid();
  
  	/*
--- 2056,2061 ----
***************
*** 2060,2066 ****
  	 *
  	 * If we are running under the postmaster, this is done already.
  	 */
! 	if (!IsUnderPostmaster)
  		MemoryContextInit();
  
  	set_ps_display("startup");
--- 2063,2069 ----
  	 *
  	 * If we are running under the postmaster, this is done already.
  	 */
! 	if (!IsUnderPostmaster /* when exec || ExecBackend */)
  		MemoryContextInit();
  
  	set_ps_display("startup");
***************
*** 2133,2139 ****
  			case 'D':			/* PGDATA directory */
  				if (secure)
  					potential_DataDir = optarg;
- 				break;
  
  			case 'd':			/* debug level */
  				{
--- 2136,2141 ----
***************
*** 2268,2274 ****
  				break;
  
  			case 'p':
- 
  				/*
  				 * p - special flag passed if backend was forked by a
  				 * postmaster.
--- 2270,2275 ----
***************
*** 2276,2298 ****
  				if (secure)
  				{
  #ifdef EXEC_BACKEND
! 					char	   *p;
! 					int			i;
! 					int			PMcanAcceptConnections; /* will eventually be
! 														 * global or static,
! 														 * when fork */
! 
! 					sscanf(optarg, "%d,%d,%lu,%p,",
! 						   &MyProcPort->sock, &PMcanAcceptConnections,
! 						   &UsedShmemSegID, &UsedShmemSegAddr);
! 					/* Grab dbname as last param */
! 					for (i = 0, p = optarg - 1; i < 4 && p; i++)
! 						p = strchr(p + 1, ',');
! 					if (i == 4 && p)
! 						dbname = strdup(p + 1);
  #else
  					dbname = strdup(optarg);
  #endif
  					secure = false;		/* subsequent switches are NOT
  										 * secure */
  					ctx = PGC_BACKEND;
--- 2277,2287 ----
  				if (secure)
  				{
  #ifdef EXEC_BACKEND
! 					IsUnderPostmaster = true;
  #else
  					dbname = strdup(optarg);
  #endif
+ 
  					secure = false;		/* subsequent switches are NOT
  										 * secure */
  					ctx = PGC_BACKEND;
***************
*** 2477,2483 ****
  		SetConfigOption("log_statement_stats", "false", ctx, gucsource);
  	}
  
! 	if (!IsUnderPostmaster)
  	{
  		if (!potential_DataDir)
  		{
--- 2466,2472 ----
  		SetConfigOption("log_statement_stats", "false", ctx, gucsource);
  	}
  
! 	if (!IsUnderPostmaster || ExecBackend)
  	{
  		if (!potential_DataDir)
  		{
***************
*** 2497,2506 ****
  	if (IsUnderPostmaster)
  	{
  #ifdef EXEC_BACKEND
  		read_nondefault_variables();
  #endif
! 	}
! 	else
  		ProcessConfigFile(PGC_POSTMASTER);
  
  	/*
--- 2486,2512 ----
  	if (IsUnderPostmaster)
  	{
  #ifdef EXEC_BACKEND
+ 		Port *port =(Port*)malloc(sizeof(Port));
+ 		if (port == NULL)
+ 			ereport(ERROR,
+ 				(errcode(ERRCODE_OUT_OF_MEMORY),
+ 				errmsg("insufficient memory to allocate port")));
+ 
  		read_nondefault_variables();
+ 		read_backend_variables(getpid(),port);
+ 
+ 		/* FIXME: [fork/exec] Ugh */
+ 		load_hba();
+ 		load_ident();
+ 		load_user();
+ 		load_group();
+ 
+ 		if (!BackendInit(port))
+ 			return -1;
+ 
+ 		dbname = port->database_name;
  #endif
! 	} else
  		ProcessConfigFile(PGC_POSTMASTER);
  
  	/*
***************
*** 2517,2523 ****
  	 * course, this isn't an issue for signals that are locally generated,
  	 * such as SIGALRM and SIGPIPE.)
  	 */
- 
  	pqsignal(SIGHUP, SigHupHandler);	/* set flag to read config file */
  	pqsignal(SIGINT, StatementCancelHandler);	/* cancel current query */
  	pqsignal(SIGTERM, die);		/* cancel current query and exit */
--- 2523,2528 ----
***************
*** 2565,2574 ****
  					 errmsg("invalid command-line arguments for server process"),
  					 errhint("Try \"%s --help\" for more information.", argv[0])));
  		}
! 		BaseInit();
! #ifdef EXECBACKEND
  		AttachSharedMemoryAndSemaphores();
  #endif
  	}
  	else
  	{
--- 2570,2581 ----
  					 errmsg("invalid command-line arguments for server process"),
  					 errhint("Try \"%s --help\" for more information.", argv[0])));
  		}
! #ifdef EXEC_BACKEND
  		AttachSharedMemoryAndSemaphores();
  #endif
+ 		XLOGPathInit();
+ 
+ 		BaseInit();
  	}
  	else
  	{
***************
*** 2845,2851 ****
--- 2852,2862 ----
  		if (got_SIGHUP)
  		{
  			got_SIGHUP = false;
+ #ifdef EXEC_BACKEND
+ 			read_nondefault_variables();
+ #else
  			ProcessConfigFile(PGC_SIGHUP);
+ #endif
  		}
  
  		/*
***************
*** 3199,3202 ****
  
  	pfree(str.data);
  }
- 
--- 3210,3212 ----
Index: src/include/c.h
===================================================================
RCS file: /projects/cvsroot/pgsql-server/src/include/c.h,v
retrieving revision 1.157
diff -c -w -r1.157 c.h
*** src/include/c.h	29 Nov 2003 22:40:53 -0000	1.157
--- src/include/c.h	13 Dec 2003 06:18:45 -0000
***************
*** 793,798 ****
--- 793,805 ----
  #define HAVE_STRTOULL 1
  #endif
  
+ /* EXEC_BACKEND defines */
+ #ifdef EXEC_BACKEND
+ #define NON_EXEC_STATIC
+ #else
+ #define NON_EXEC_STATIC static
+ #endif
+ 
  /* /port compatibility functions */
  #include "port.h"
  
Index: src/include/access/xlogdefs.h
===================================================================
RCS file: /projects/cvsroot/pgsql-server/src/include/access/xlogdefs.h,v
retrieving revision 1.10
diff -c -w -r1.10 xlogdefs.h
*** src/include/access/xlogdefs.h	29 Nov 2003 22:40:55 -0000	1.10
--- src/include/access/xlogdefs.h	13 Dec 2003 06:18:46 -0000
***************
*** 33,38 ****
--- 33,45 ----
  	uint32		xrecoff;		/* byte offset of location in log file */
  } XLogRecPtr;
  
+ typedef struct XLogwrtResult
+ {
+ 	XLogRecPtr	Write;			/* last byte + 1 written out */
+ 	XLogRecPtr	Flush;			/* last byte + 1 flushed */
+ } XLogwrtResult;
+ 
+ 
  /*
   * Macros for comparing XLogRecPtrs
   *
Index: src/include/libpq/libpq-be.h
===================================================================
RCS file: /projects/cvsroot/pgsql-server/src/include/libpq/libpq-be.h,v
retrieving revision 1.38
diff -c -w -r1.38 libpq-be.h
*** src/include/libpq/libpq-be.h	29 Nov 2003 22:41:03 -0000	1.38
--- src/include/libpq/libpq-be.h	13 Dec 2003 06:18:46 -0000
***************
*** 27,32 ****
--- 27,37 ----
  #endif
  
  
+ typedef enum CAC_state
+ {
+ 	CAC_OK, CAC_STARTUP, CAC_SHUTDOWN, CAC_RECOVERY, CAC_TOOMANY
+ } CAC_state;
+ 
  /*
   * This is used by the postmaster in its communication with frontends.	It
   * contains all state information needed during this communication before the
***************
*** 42,47 ****
--- 47,53 ----
  	ProtocolVersion proto;		/* FE/BE protocol version */
  	SockAddr	laddr;			/* local addr (postmaster) */
  	SockAddr	raddr;			/* remote addr (client) */
+ 	CAC_state	canAcceptConnections;	/* postmaster connection status */
  
  	/*
  	 * Information that needs to be saved from the startup packet and
Index: src/include/storage/fd.h
===================================================================
RCS file: /projects/cvsroot/pgsql-server/src/include/storage/fd.h,v
retrieving revision 1.40
diff -c -w -r1.40 fd.h
*** src/include/storage/fd.h	29 Nov 2003 22:41:13 -0000	1.40
--- src/include/storage/fd.h	13 Dec 2003 06:18:46 -0000
***************
*** 77,80 ****
--- 77,84 ----
  extern int	pg_fsync(int fd);
  extern int	pg_fdatasync(int fd);
  
+ /* Filename components for OpenTemporaryFile */
+ #define PG_TEMP_FILES_DIR "pgsql_tmp"
+ #define PG_TEMP_FILE_PREFIX "pgsql_tmp"
+ 
  #endif   /* FD_H */
Index: src/include/storage/ipc.h
===================================================================
RCS file: /projects/cvsroot/pgsql-server/src/include/storage/ipc.h,v
retrieving revision 1.63
diff -c -w -r1.63 ipc.h
*** src/include/storage/ipc.h	12 Dec 2003 18:45:10 -0000	1.63
--- src/include/storage/ipc.h	13 Dec 2003 06:18:46 -0000
***************
*** 32,37 ****
--- 32,39 ----
  extern void CreateSharedMemoryAndSemaphores(bool makePrivate,
  								int maxBackends,
  								int port);
+ #ifdef EXEC_BACKEND
  extern void AttachSharedMemoryAndSemaphores(void);
+ #endif
  
  #endif   /* IPC_H */
Index: src/include/storage/lock.h
===================================================================
RCS file: /projects/cvsroot/pgsql-server/src/include/storage/lock.h,v
retrieving revision 1.75
diff -c -w -r1.75 lock.h
*** src/include/storage/lock.h	1 Dec 2003 21:59:25 -0000	1.75
--- src/include/storage/lock.h	13 Dec 2003 06:18:46 -0000
***************
*** 86,93 ****
   */
  typedef struct LockMethodData
  {
- 	HTAB		   *lockHash;
- 	HTAB		   *proclockHash;
  	LOCKMETHODID	lockmethodid;
  	int				numLockModes;
  	LOCKMASK		conflictTab[MAX_LOCKMODES];
--- 86,91 ----
Index: src/include/storage/lwlock.h
===================================================================
RCS file: /projects/cvsroot/pgsql-server/src/include/storage/lwlock.h,v
retrieving revision 1.9
diff -c -w -r1.9 lwlock.h
*** src/include/storage/lwlock.h	29 Nov 2003 22:41:13 -0000	1.9
--- src/include/storage/lwlock.h	13 Dec 2003 06:18:47 -0000
***************
*** 29,35 ****
  	LockMgrLock,
  	OidGenLock,
  	XidGenLock,
- 	ShmemIndexLock,
  	SInvalLock,
  	FreeSpaceLock,
  	MMCacheLock,
--- 29,34 ----
Index: src/include/storage/shmem.h
===================================================================
RCS file: /projects/cvsroot/pgsql-server/src/include/storage/shmem.h,v
retrieving revision 1.40
diff -c -w -r1.40 shmem.h
*** src/include/storage/shmem.h	29 Nov 2003 22:41:13 -0000	1.40
--- src/include/storage/shmem.h	13 Dec 2003 06:18:47 -0000
***************
*** 61,67 ****
  } SHM_QUEUE;
  
  /* shmem.c */
! extern void InitShmemAllocation(void *seghdr);
  extern void *ShmemAlloc(Size size);
  extern bool ShmemIsValid(unsigned long addr);
  extern void InitShmemIndex(void);
--- 61,67 ----
  } SHM_QUEUE;
  
  /* shmem.c */
! extern void InitShmemAllocation(void *seghdr, bool init);
  extern void *ShmemAlloc(Size size);
  extern bool ShmemIsValid(unsigned long addr);
  extern void InitShmemIndex(void);

Bruce Momjian

pgman@candle.pha.pa.us

about 22 years ago

In reply to: Claudio Natoli (#1)

Re: fork/exec patch

Let me provide a summary of this patch because I reviewed the first
version.

The patch basically is a slight rearrangement of the code to allow
fork/exec on Unix, with the ultimate goal of doing CreateProcess on
Win32. The changes are:

o Write out postmaster global variables and per-backend
variables to be read by the exec'ed backend

o Mark some static variables as global when exec is used so
then can be dumped from postmaster.c, marked NON_EXEC_STATIC

o Remove value passing with -p now that we have per-backend
file

o Move some pointer storage out of shared memory for easier
dumping.

o Modified pgsql_temp directory cleanup to handle per-database
directories and the backend exec directory under datadir.

Let me add that Claudio is doing a fantastic job on this. The changes
are minimal and clean. I think the writing of a per-backend temp file
has allowed this patch to be smaller than it might have been.

---------------------------------------------------------------------------

Claudio Natoli wrote:

This patch is the next step towards (re)allowing fork/exec.

Bruce, I've cleaned up the parts we discussed, and, pending objections from
anyone else, it is ready for application to HEAD.

Cheers,
Claudio
--- 
Certain disclaimers and policies apply to all email sent from Memetrics.
For the full text of these disclaimers and policies see 
<a
href="http://www.memetrics.com/emailpolicy.html">http://www.memetrics.com/em
ailpolicy.html</a>

[ Attachment, skipping... ]

---------------------------(end of broadcast)---------------------------
TIP 4: Don't 'kill -9' the postmaster

-- 
  Bruce Momjian                        |  http://candle.pha.pa.us
  pgman@candle.pha.pa.us               |  (610) 359-1001
  +  If your life is a hard drive,     |  13 Roberts Road
  +  Christ can be your backup.        |  Newtown Square, Pennsylvania 19073

Neil Conway

neilc@samurai.com

about 22 years ago

In reply to: Bruce Momjian (#2)

Re: fork/exec patch

Bruce Momjian <pgman@candle.pha.pa.us> writes:

Let me add that Claudio is doing a fantastic job on this. The
changes are minimal and clean. I think the writing of a per-backend
temp file has allowed this patch to be smaller than it might have
been.

Did we REALLY conclude that the best way to work around the lack of
fork() on Win32 is by writing variables out to disk and reading them
back in? Frankly, that seems like an enormous kludge.

For example, couldn't we write this data into a particular location in
shared memory, and then pass that location to the child? That is still
ugly, slow, and prone to failure (shmem being statically sized), but
ISTM that the proposed implementation already possesses those
attributes :-)

(/me goes off to re-read the archives on this issue...)

-Neil

Bruce Momjian

pgman@candle.pha.pa.us

about 22 years ago

In reply to: Neil Conway (#3)

Re: fork/exec patch

Neil Conway wrote:

Bruce Momjian <pgman@candle.pha.pa.us> writes:

Let me add that Claudio is doing a fantastic job on this. The
changes are minimal and clean. I think the writing of a per-backend
temp file has allowed this patch to be smaller than it might have
been.

Did we REALLY conclude that the best way to work around the lack of
fork() on Win32 is by writing variables out to disk and reading them
back in? Frankly, that seems like an enormous kludge.

For example, couldn't we write this data into a particular location in
shared memory, and then pass that location to the child? That is still
ugly, slow, and prone to failure (shmem being statically sized), but
ISTM that the proposed implementation already possesses those
attributes :-)

I don't think we ever discussed it, but it seemed logical and a minimal
change to the code. We already have a GUC write of non-default values
for exec and no one had issues with that. Of course, this one is
per-backend.

Yea, we could use shared memory for this too, but I don't see a problem
with using the file system. Older releases of PostgreSQL read from
postgresql.conf or pg_hba.conf or other files for every connection so I
don't see that using the file system is going to be that slow. Of
course, we removed those file reads because they were slow, but they
were also much larger and more complex in requiring parsing and stuff,
while this is just a list of binary values. We also have a GUC dump
file that is read by exec too.

The downside of shared memory is that you would need the postmaster to
write into shared memory and you have to allocate enough shared memory
for all backends, but clearly it could be done. You could just pass the
backend slot number to the child and the child could read from the
offset. Not sure how to cleanly do the GUC dump file because it is of
variable length depending on the number of GUC variables changed.

I guess the big question is whether it is worth doing in shared memory.
We also would have to pass the shared memory address to the child, and
make sure the child knew the proper offset in shared memory to find its
values.

Of course, stuff that is variable length would be a problem in shared
memory, and we have some of those for the GUC defaults.

-- 
  Bruce Momjian                        |  http://candle.pha.pa.us
  pgman@candle.pha.pa.us               |  (610) 359-1001
  +  If your life is a hard drive,     |  13 Roberts Road
  +  Christ can be your backup.        |  Newtown Square, Pennsylvania 19073

Neil Conway

neilc@samurai.com

about 22 years ago

In reply to: Bruce Momjian (#4)

Re: fork/exec patch

Bruce Momjian <pgman@candle.pha.pa.us> writes:

I don't think we ever discussed it, but it seemed logical and a minimal
change to the code. We already have a GUC write of non-default values
for exec and no one had issues with that.

For the record, I think that is ugly as well :-)

Anyway, I'm not necessarily arguing that using shmem is the right way
to go here -- that was merely an off-the-cuff suggestion. I'm just
saying that whatever solution we end up with, ISTM we can do better
than writing out + reading in a file for /every/ new connection.

-Neil

Bruce Momjian

pgman@candle.pha.pa.us

about 22 years ago

In reply to: Neil Conway (#5)

Re: [PATCHES] fork/exec patch

Neil Conway wrote:

Bruce Momjian <pgman@candle.pha.pa.us> writes:

I don't think we ever discussed it, but it seemed logical and a minimal
change to the code. We already have a GUC write of non-default values
for exec and no one had issues with that.

For the record, I think that is ugly as well :-)

Anyway, I'm not necessarily arguing that using shmem is the right way
to go here -- that was merely an off-the-cuff suggestion. I'm just
saying that whatever solution we end up with, ISTM we can do better
than writing out + reading in a file for /every/ new connection.

[ Moved to hackers and win32. Discussion is writing postmaster-constant
and per-backend variables to a file for exec'ed backends to read.]

Sure --- I am all ears. I am looking for suggestions. I couldn't think
of anything better. I did ask a month ago for ideas on how to do this,
but got no reply.

One idea I had was to write the postmaster-constant values into one
file, and the per-backend values into another so you would write less
data for every backend, but then every backend has to read two files.
Is that a win?

-- 
  Bruce Momjian                        |  http://candle.pha.pa.us
  pgman@candle.pha.pa.us               |  (610) 359-1001
  +  If your life is a hard drive,     |  13 Roberts Road
  +  Christ can be your backup.        |  Newtown Square, Pennsylvania 19073

Bruce Momjian

pgman@candle.pha.pa.us

about 22 years ago

In reply to: Bruce Momjian (#6)

Re: [PATCHES] fork/exec patch

pgman wrote:

Neil Conway wrote:

Bruce Momjian <pgman@candle.pha.pa.us> writes:

I don't think we ever discussed it, but it seemed logical and a minimal
change to the code. We already have a GUC write of non-default values
for exec and no one had issues with that.

For the record, I think that is ugly as well :-)

Anyway, I'm not necessarily arguing that using shmem is the right way
to go here -- that was merely an off-the-cuff suggestion. I'm just
saying that whatever solution we end up with, ISTM we can do better
than writing out + reading in a file for /every/ new connection.

[ Moved to hackers and win32. Discussion is writing postmaster-constant
and per-backend variables to a file for exec'ed backends to read.]

Sure --- I am all ears. I am looking for suggestions. I couldn't think
of anything better. I did ask a month ago for ideas on how to do this,
but got no reply.

One idea I had was to write the postmaster-constant values into one
file, and the per-backend values into another so you would write less
data for every backend, but then every backend has to read two files.
Is that a win?

OK, new idea! Let's write the postmaster-constant values to a file, and
pass the per-backend variables on the command line using -p like we have
in the current code in CVS. However, those values include the cancel
key, which has to be a secret. Maybe we need a per-backend array in
shared memory just for those keys. The postmaster has to keep those
keys anyway, so moving into shared memory might be the right solution.
Right now the cancel key is stored in the Backend struct along with the
backend pid but that is only in the postmaster.

Does this make sense? It does remove the pgsql_temp directory we were
going to need in the datadir.

-- 
  Bruce Momjian                        |  http://candle.pha.pa.us
  pgman@candle.pha.pa.us               |  (610) 359-1001
  +  If your life is a hard drive,     |  13 Roberts Road
  +  Christ can be your backup.        |  Newtown Square, Pennsylvania 19073

Import Notes

Reply to msg id not found:  | Resolved by subject fallback

Dennis Bjorklund

db@zigo.dhs.org

about 22 years ago

In reply to: Bruce Momjian (#4)

Re: fork/exec patch

On Sun, 14 Dec 2003, Bruce Momjian wrote:

change to the code. We already have a GUC write of non-default values
for exec and no one had issues with that. Of course, this one is
per-backend.

Yea, we could use shared memory for this too, but I don't see a problem
with using the file system.

Why not use an anonymous pipe to send data from the parent to the child
process? That is a common way to handle this problem in win32 (and in unix
by the way). The parent sets up the pipe and the child process inherits
the handle, and after that the child and parent can excange information in
private.

--
/Dennis

Bruce Momjian

pgman@candle.pha.pa.us

about 22 years ago

In reply to: Dennis Bjorklund (#8)

Re: fork/exec patch

Dennis Bjorklund wrote:

On Sun, 14 Dec 2003, Bruce Momjian wrote:

change to the code. We already have a GUC write of non-default values
for exec and no one had issues with that. Of course, this one is
per-backend.

Yea, we could use shared memory for this too, but I don't see a problem
with using the file system.

Why not use an anonymous pipe to send data from the parent to the child
process? That is a common way to handle this problem in win32 (and in unix
by the way). The parent sets up the pipe and the child process inherits
the handle, and after that the child and parent can excange information in
private.

Doesn't that require the postmaster to stay around to feed that
information into the pipe or can the postmaster just shove the data and
continue on, and how do the old pipes get cleaned up? Seems messy.
Also has to work on Unix too for testing.

-- 
  Bruce Momjian                        |  http://candle.pha.pa.us
  pgman@candle.pha.pa.us               |  (610) 359-1001
  +  If your life is a hard drive,     |  13 Roberts Road
  +  Christ can be your backup.        |  Newtown Square, Pennsylvania 19073

#10

Dennis Bjorklund

db@zigo.dhs.org

about 22 years ago

In reply to: Bruce Momjian (#9)

Re: fork/exec patch

On Sun, 14 Dec 2003, Bruce Momjian wrote:

Why not use an anonymous pipe to send data from the parent to the child
process?

Doesn't that require the postmaster to stay around to feed that
information into the pipe or can the postmaster just shove the data and
continue on, and how do the old pipes get cleaned up?

I think that one can just output the data and close that end of the pipe.
But i've not looked at win32 the last 5 years or so, I could be wrong.

Seems messy.

Maybe, but to me the solution where you write to files are much more ugly.
If one does not like pipes, there are other ipc mechanisms that does not
involve creating, reading and deleting a file on each connect.

Does windows have a temp filesystem where the temp files are not actually
written out on disk? It's still ugly but better then hitting a disk all
the time.

Also has to work on Unix too for testing.

Everything can not work in unix, CreateProcess() and fork() are different.
However, the pipe solution can be mimiced in unix, but it will not be the
same code since the api's are different. So that does not give much.

--
/Dennis

#11

Claudio Natoli

claudio.natoli@memetrics.com

about 22 years ago

In reply to: Dennis Bjorklund (#10)

Re: fork/exec patch

Hi all,

Dennis Bjorklund wrote:

Also has to work on Unix too for testing.

Everything can not work in unix, CreateProcess() and fork()
are different.

True (but CreateProcess and "fork followed by exec" are pretty close). I
think what Bruce is implying is that, ideally, we'd like to keep things as
close as possible between Unix fork/exec and Windows code bases, so that:
* it remains possible to advance the Windows port under a *nix dev
environment and
* should (when!) issues arise in the Windows implementation, it will
be easier to identify and debug

Neil Conway wrote:

For example, couldn't we write this data into a particular location in
shared memory, and then pass that location to the child? That is still
ugly, slow, and prone to failure (shmem being statically sized), but
ISTM that the proposed implementation already possesses those
attributes :-)

I agree that this is a better implementation.

Bruce, do we implement this now, or just hold it as something to revisit
down the track? I'm all for leaving it as is.

Moreover, in general, how do we handle things like this? IMHO, I'd rather
live with a few kludges (that don't impact the *nix code) until the Windows
port is actually a reality, and then reiterate (having the discussions as we
go along, however, is necessary). Perhaps adding a TO_REVISIT section to
your Win32 Status Report page?

Or do people have strong leanings towards "fix as you go along"? Just feels
like that way could see us getting bogged down making things "perfect"
instead of advancing the port...

Comments?

Cheers,
Claudio

--- 
Certain disclaimers and policies apply to all email sent from Memetrics.
For the full text of these disclaimers and policies see 
<a
href="http://www.memetrics.com/emailpolicy.html">http://www.memetrics.com/em
ailpolicy.html</a>

Import Notes

Resolved by subject fallback

#12

Dennis Bjorklund

db@zigo.dhs.org

about 22 years ago

In reply to: Claudio Natoli (#11)

Re: fork/exec patch

On Mon, 15 Dec 2003, Claudio Natoli wrote:

Moreover, in general, how do we handle things like this? IMHO, I'd rather
live with a few kludges (that don't impact the *nix code) until the Windows
port is actually a reality

As long as it does not hurt the unix code it's not a big problem as I see
it. The usual open source solution is that since no one else writes the
code, you can do it the way you think works the best. To change this in
the future does not mean that everything else has to be rewritten which is
good.

It does also not mean that one can not discuss the implementation. A fair
amount of discussion is always good.

--
/Dennis

#13

Andrew Dunstan

andrew@dunslane.net

about 22 years ago

In reply to: Claudio Natoli (#11)

Re: [PATCHES] fork/exec patch

[moved to hackers / win32]

Claudio Natoli wrote:

Or do people have strong leanings towards "fix as you go along"? Just feels
like that way could see us getting bogged down making things "perfect"
instead of advancing the port...

w.r.t. Win32, I think the way to proceed is (in this order):
. make it work
. make it elegant
. make it fast

BTW, I agree with Bruce, you're doing excellent stuff. Now for the fun
part (signals).

cheers

andrew

#14

Tom Lane

tgl@sss.pgh.pa.us

about 22 years ago

In reply to: Bruce Momjian (#4)

Re: fork/exec patch

Bruce Momjian <pgman@candle.pha.pa.us> writes:

I don't think we ever discussed it, but it seemed logical and a minimal
change to the code. We already have a GUC write of non-default values
for exec and no one had issues with that.

You can hardly claim that "no one had issues with that". I complained
about it and I think other people did too. It's a messy, ugly approach;
moreover we have no field experience that says it's reliable.

It may be the least messy, ugly approach available, but I concur with
Neil's wish to at least look for other answers.

regards, tom lane

#15

Tom Lane

tgl@sss.pgh.pa.us

about 22 years ago

In reply to: Bruce Momjian (#7)

Re: [PATCHES] fork/exec patch

Bruce Momjian <pgman@candle.pha.pa.us> writes:

... Maybe we need a per-backend array in
shared memory just for those keys. The postmaster has to keep those
keys anyway, so moving into shared memory might be the right solution.

The postmaster's dependence on the contents of shared memory should
ideally be zero (and it is zero, or nearly so, at the moment).
Otherwise a backend crash that clobbers shared memory poses the risk of
taking down the postmaster as well. We can't go in that direction.

regards, tom lane

#16

Alvaro Herrera

alvherre@dcc.uchile.cl

about 22 years ago

In reply to: Tom Lane (#14)

Re: fork/exec patch

On Sun, Dec 14, 2003 at 06:53:22PM -0500, Tom Lane wrote:

Bruce Momjian <pgman@candle.pha.pa.us> writes:

I don't think we ever discussed it, but it seemed logical and a minimal
change to the code. We already have a GUC write of non-default values
for exec and no one had issues with that.

You can hardly claim that "no one had issues with that". I complained
about it and I think other people did too. It's a messy, ugly approach;
moreover we have no field experience that says it's reliable.

Don't the FSM and the system catalog cache use a similar mechanism?

--
Alvaro Herrera (<alvherre[a]dcc.uchile.cl>)
"Limï¿½tate a mirar... y algun dï¿½a veras"

#17

Tom Lane

tgl@sss.pgh.pa.us

about 22 years ago

In reply to: Alvaro Herrera (#16)

Re: fork/exec patch

Alvaro Herrera <alvherre@dcc.uchile.cl> writes:

On Sun, Dec 14, 2003 at 06:53:22PM -0500, Tom Lane wrote:

You can hardly claim that "no one had issues with that".

Don't the FSM and the system catalog cache use a similar mechanism?

FSM uses a backing file to hold information over a database shutdown
(write once during shutdown, read once during startup). That's a little
different from once per backend fork. Also, there are no race
conditions to worry about, and finally the system does not *require* the
backing file to be either present or correct.

The catalog cache uses a file that typically gets updated once per
VACUUM. Again, the file does not have to be present, nor correct.
There are mechanisms in place to deal with the cases (including race
conditions) where it's broken or obsolete.

I have not looked at the proposed fork/exec code in any detail, but
IIUC it will be *necessary* that the intermediate file be present, and
correct. I think a minimum requirement for accepting this solution is a
sketch of how race conditions will be dealt with (ie, postmaster changes
its own value of some variable immediately after making the temp file).
I don't necessarily say that the first-cut patch has to get it right,
but we'd better understand how we will get to where it is right.

regards, tom lane

#18

Bruce Momjian

pgman@candle.pha.pa.us

about 22 years ago

In reply to: Tom Lane (#17)

Re: fork/exec patch

Tom Lane wrote:

Alvaro Herrera <alvherre@dcc.uchile.cl> writes:

On Sun, Dec 14, 2003 at 06:53:22PM -0500, Tom Lane wrote:

You can hardly claim that "no one had issues with that".

Don't the FSM and the system catalog cache use a similar mechanism?

FSM uses a backing file to hold information over a database shutdown
(write once during shutdown, read once during startup). That's a little
different from once per backend fork. Also, there are no race
conditions to worry about, and finally the system does not *require* the
backing file to be either present or correct.

The catalog cache uses a file that typically gets updated once per
VACUUM. Again, the file does not have to be present, nor correct.
There are mechanisms in place to deal with the cases (including race
conditions) where it's broken or obsolete.

I have not looked at the proposed fork/exec code in any detail, but
IIUC it will be *necessary* that the intermediate file be present, and
correct. I think a minimum requirement for accepting this solution is a
sketch of how race conditions will be dealt with (ie, postmaster changes
its own value of some variable immediately after making the temp file).
I don't necessarily say that the first-cut patch has to get it right,
but we'd better understand how we will get to where it is right.

Agreed, added to the Win32 status page:

* remove per-backend parameter file and move into shared memory

-- 
  Bruce Momjian                        |  http://candle.pha.pa.us
  pgman@candle.pha.pa.us               |  (610) 359-1001
  +  If your life is a hard drive,     |  13 Roberts Road
  +  Christ can be your backup.        |  Newtown Square, Pennsylvania 19073

#19

Bruce Momjian

pgman@candle.pha.pa.us

about 22 years ago

In reply to: Claudio Natoli (#11)

Re: [PATCHES] fork/exec patch

Claudio Natoli wrote:

For example, couldn't we write this data into a particular location in
shared memory, and then pass that location to the child? That is still
ugly, slow, and prone to failure (shmem being statically sized), but
ISTM that the proposed implementation already possesses those
attributes :-)

I agree that this is a better implementation.

Bruce, do we implement this now, or just hold it as something to revisit
down the track? I'm all for leaving it as is.

Moreover, in general, how do we handle things like this? IMHO, I'd rather
live with a few kludges (that don't impact the *nix code) until the Windows
port is actually a reality, and then reiterate (having the discussions as we
go along, however, is necessary). Perhaps adding a TO_REVISIT section to
your Win32 Status Report page?

Or do people have strong leanings towards "fix as you go along"? Just feels
like that way could see us getting bogged down making things "perfect"
instead of advancing the port...

Let's get it working first. I have added an item to the Win32 status
page so we will not forget it.

-- 
  Bruce Momjian                        |  http://candle.pha.pa.us
  pgman@candle.pha.pa.us               |  (610) 359-1001
  +  If your life is a hard drive,     |  13 Roberts Road
  +  Christ can be your backup.        |  Newtown Square, Pennsylvania 19073

#20

Bruce Momjian

pgman@candle.pha.pa.us

about 22 years ago

In reply to: Tom Lane (#14)

Re: fork/exec patch

Tom Lane wrote:

Bruce Momjian <pgman@candle.pha.pa.us> writes:

I don't think we ever discussed it, but it seemed logical and a minimal
change to the code. We already have a GUC write of non-default values
for exec and no one had issues with that.

You can hardly claim that "no one had issues with that". I complained
about it and I think other people did too. It's a messy, ugly approach;
moreover we have no field experience that says it's reliable.

It may be the least messy, ugly approach available, but I concur with
Neil's wish to at least look for other answers.

Absolutely. I am not happy with the GUC file either, but can't see a
better way right now. I have already documented your concern about the
GUC race condition issue on the Win32 status page so we will not forget
about it.

-- 
  Bruce Momjian                        |  http://candle.pha.pa.us
  pgman@candle.pha.pa.us               |  (610) 359-1001
  +  If your life is a hard drive,     |  13 Roberts Road
  +  Christ can be your backup.        |  Newtown Square, Pennsylvania 19073

#21

Bruce Momjian

pgman@candle.pha.pa.us

about 22 years ago

In reply to: Tom Lane (#15)

Re: [PATCHES] fork/exec patch

Tom Lane wrote:

Bruce Momjian <pgman@candle.pha.pa.us> writes:

... Maybe we need a per-backend array in
shared memory just for those keys. The postmaster has to keep those
keys anyway, so moving into shared memory might be the right solution.

The postmaster's dependence on the contents of shared memory should
ideally be zero (and it is zero, or nearly so, at the moment).
Otherwise a backend crash that clobbers shared memory poses the risk of
taking down the postmaster as well. We can't go in that direction.

OK, but I think we are going to need shared memory to do signals on
Win32. Perhaps we should create a second shared memory areas only for
fork/exec to hold the per-backend parameters and the signal stuff ---
that might be the cleanest solution. Also, we could pass all the exec
parameters on the command line _except_ the cancel key, which must be
secret.

-- 
  Bruce Momjian                        |  http://candle.pha.pa.us
  pgman@candle.pha.pa.us               |  (610) 359-1001
  +  If your life is a hard drive,     |  13 Roberts Road
  +  Christ can be your backup.        |  Newtown Square, Pennsylvania 19073

#22

Bruce Momjian

pgman@candle.pha.pa.us

about 22 years ago

In reply to: Andrew Dunstan (#13)

Re: [HACKERS] [PATCHES] fork/exec patch

Andrew Dunstan wrote:

[moved to hackers / win32]

Claudio Natoli wrote:

Or do people have strong leanings towards "fix as you go along"? Just feels
like that way could see us getting bogged down making things "perfect"
instead of advancing the port...

w.r.t. Win32, I think the way to proceed is (in this order):
. make it work
. make it elegant
. make it fast

BTW, I agree with Bruce, you're doing excellent stuff. Now for the fun
part (signals).

Actually, no. I thought fork/exec would be a real mess (as did Tom),
but Claudio has done an excellent job of producing a minimal patch. The
work isn't done yet, but this small patch has taken us much closer, so I
assume signals will be even easier.

-- 
  Bruce Momjian                        |  http://candle.pha.pa.us
  pgman@candle.pha.pa.us               |  (610) 359-1001
  +  If your life is a hard drive,     |  13 Roberts Road
  +  Christ can be your backup.        |  Newtown Square, Pennsylvania 19073

#23

Tom Lane

tgl@sss.pgh.pa.us

about 22 years ago

In reply to: Bruce Momjian (#18)

Re: fork/exec patch

Bruce Momjian <pgman@candle.pha.pa.us> writes:

Agreed, added to the Win32 status page:
* remove per-backend parameter file and move into shared memory

[itch] I'm not sure that's an answer either; see my comments about how
the postmaster shouldn't depend on the contents of shared memory being
valid.

We could get away with the postmaster having a write-only relationship
to shared memory (put value of variable X into predetermined location
Y), but I don't think that helps. It doesn't work for variable-size
values --- we certainly don't want the postmaster dependent on memory
allocation structures being valid within shared memory --- and what
about locks? Do you want the postmaster writing shared values without
taking a lock, or relying on shared-memory lock structures to be valid
enough to not lock it up or crash it? My answer to either of those is
"no way, Jose" ...

Writing temp files may actually be a cleaner solution than writing
shared memory, once we take these considerations into account. My gripe
about race conditions was "I want to see how you solve this", and wasn't
intended to mean "I don't think that is soluble".

regards, tom lane

#24

Bruce Momjian

pgman@candle.pha.pa.us

about 22 years ago

In reply to: Tom Lane (#23)

Re: fork/exec patch

Tom Lane wrote:

Bruce Momjian <pgman@candle.pha.pa.us> writes:

Agreed, added to the Win32 status page:
* remove per-backend parameter file and move into shared memory

[itch] I'm not sure that's an answer either; see my comments about how
the postmaster shouldn't depend on the contents of shared memory being
valid.

We could get away with the postmaster having a write-only relationship
to shared memory (put value of variable X into predetermined location
Y), but I don't think that helps. It doesn't work for variable-size
values --- we certainly don't want the postmaster dependent on memory
allocation structures being valid within shared memory --- and what
about locks? Do you want the postmaster writing shared values without
taking a lock, or relying on shared-memory lock structures to be valid
enough to not lock it up or crash it? My answer to either of those is
"no way, Jose" ...

Writing temp files may actually be a cleaner solution than writing
shared memory, once we take these considerations into account. My gripe
about race conditions was "I want to see how you solve this", and wasn't
intended to mean "I don't think that is soluble".

Read my idea that shared memory for signals might be required, and a
separate shared memory segment might be used for parameter passing too.

I added a question mark to the win32 TODO item, so we can keep as an
open item.

-- 
  Bruce Momjian                        |  http://candle.pha.pa.us
  pgman@candle.pha.pa.us               |  (610) 359-1001
  +  If your life is a hard drive,     |  13 Roberts Road
  +  Christ can be your backup.        |  Newtown Square, Pennsylvania 19073

#25

Andrew Dunstan

andrew@dunslane.net

about 22 years ago

In reply to: Bruce Momjian (#22)

Re: [HACKERS] [PATCHES] fork/exec patch

Bruce Momjian wrote:

Andrew Dunstan wrote:

Now for the fun
part (signals).

Actually, no. I thought fork/exec would be a real mess (as did Tom),
but Claudio has done an excellent job of producing a minimal patch. The
work isn't done yet, but this small patch has taken us much closer, so I
assume signals will be even easier.

Well, it's speculation on both our parts :-). ISTM we'll need an
explicit event loop to check the shmem (or whatever we use to simulate
signals) every so often - maybe that will be easy, I don't know - I'm
interested to see what turns up. (Of course, if we were threaded we'd
just need a thread to watch for the event ...)

cheers

andrew

#26

Bruce Momjian

pgman@candle.pha.pa.us

about 22 years ago

In reply to: Andrew Dunstan (#25)

Re: [HACKERS] [PATCHES] fork/exec patch

Andrew Dunstan wrote:

Bruce Momjian wrote:

Andrew Dunstan wrote:

Now for the fun
part (signals).

Actually, no. I thought fork/exec would be a real mess (as did Tom),
but Claudio has done an excellent job of producing a minimal patch. The
work isn't done yet, but this small patch has taken us much closer, so I
assume signals will be even easier.

Well, it's speculation on both our parts :-). ISTM we'll need an
explicit event loop to check the shmem (or whatever we use to simulate
signals) every so often - maybe that will be easy, I don't know - I'm
interested to see what turns up. (Of course, if we were threaded we'd
just need a thread to watch for the event ...)

Have you looked at the CONNX signal code on the Win32 page:

http://momjian.postgresql.org/main/writings/pgsql/win32.html

It uses shared memory and events.

-- 
  Bruce Momjian                        |  http://candle.pha.pa.us
  pgman@candle.pha.pa.us               |  (610) 359-1001
  +  If your life is a hard drive,     |  13 Roberts Road
  +  Christ can be your backup.        |  Newtown Square, Pennsylvania 19073

#27

Andrew Dunstan

andrew@dunslane.net

about 22 years ago

In reply to: Bruce Momjian (#26)

Re: [HACKERS] [PATCHES] fork/exec patch

Bruce Momjian wrote:

Have you looked at the CONNX signal code on the Win32 page:

http://momjian.postgresql.org/main/writings/pgsql/win32.html

It uses shared memory and events.

Yes, and I just did again. I guess I must be missing something, though -
I don't see what in that code causes the signalled process to call the
handler corresponding to the signal. Maybe I'm just a little brain dead
today ...

andrew

#28

Bruce Momjian

pgman@candle.pha.pa.us

about 22 years ago

In reply to: Andrew Dunstan (#27)

Re: [HACKERS] [PATCHES] fork/exec patch

Andrew Dunstan wrote:

Bruce Momjian wrote:

Have you looked at the CONNX signal code on the Win32 page:

http://momjian.postgresql.org/main/writings/pgsql/win32.html

It uses shared memory and events.

Yes, and I just did again. I guess I must be missing something, though -
I don't see what in that code causes the signalled process to call the
handler corresponding to the signal. Maybe I'm just a little brain dead
today ...

In the CONNX code for kill() I see:

sprintf(szEventName, "CONNX_SIGNAL_%x", (int) lPID);
sprintf(szSharedMemoryName, "CONNX_SMEM_%x", (int) lPID);
hSharedMemory = OpenFileMapping(FILE_MAP_ALL_ACCESS, FALSE, szSharedMemoryName);
if (hSharedMemory) {
/* Call the signal handle for that process.. */
void *pData = MapViewOfFile(hSharedMemory, FILE_MAP_ALL_ACCESS, 0, 0, sizeof(int));
if (pData) {
int nReturn;
HANDLE hProcessHandle;
DWORD ExitCode;
*(int *) pData = nSignal;
UnmapViewOfFile(pData);
/* Open the event handle of the other process */
hEvent = OpenEvent(EVENT_MODIFY_STATE | SYNCHRONIZE, FALSE, szEventName);
hProcessHandle = OpenProcess(PROCESS_QUERY_INFORMATION, FALSE, lPID);
if (hEvent) {
SetEvent(hEvent);
/* Wait for Event to be processed. */
do {
nReturn = WaitForSingleObject(hEvent, 0);

Now, I am no Win32 programmer, but the mixture of OpenFileMapping() and
OpenEvent() looked promising. :-)

-- 
  Bruce Momjian                        |  http://candle.pha.pa.us
  pgman@candle.pha.pa.us               |  (610) 359-1001
  +  If your life is a hard drive,     |  13 Roberts Road
  +  Christ can be your backup.        |  Newtown Square, Pennsylvania 19073

#29

Bruce Momjian

pgman@candle.pha.pa.us

about 22 years ago

In reply to: Claudio Natoli (#1)

Re: fork/exec patch

Your patch has been added to the PostgreSQL unapplied patches list at:

http://momjian.postgresql.org/cgi-bin/pgpatches

I will try to apply it within the next 48 hours.

---------------------------------------------------------------------------

Claudio Natoli wrote:

This patch is the next step towards (re)allowing fork/exec.

Bruce, I've cleaned up the parts we discussed, and, pending objections from
anyone else, it is ready for application to HEAD.

Cheers,
Claudio
--- 
Certain disclaimers and policies apply to all email sent from Memetrics.
For the full text of these disclaimers and policies see 
<a
href="http://www.memetrics.com/emailpolicy.html">http://www.memetrics.com/em
ailpolicy.html</a>

[ Attachment, skipping... ]

---------------------------(end of broadcast)---------------------------
TIP 4: Don't 'kill -9' the postmaster

-- 
  Bruce Momjian                        |  http://candle.pha.pa.us
  pgman@candle.pha.pa.us               |  (610) 359-1001
  +  If your life is a hard drive,     |  13 Roberts Road
  +  Christ can be your backup.        |  Newtown Square, Pennsylvania 19073

#30

Neil Conway

neilc@samurai.com

about 22 years ago

In reply to: Claudio Natoli (#1)

Re: fork/exec patch

Claudio Natoli <claudio.natoli@memetrics.com> writes:

This patch is the next step towards (re)allowing fork/exec.

I've included a few comments on the patch below. Unfortunately I only
had time to briefly look over the code...

Why did you change ShmemIndexLock from an LWLock to a spinlock?

The number of "FIXME" or "This is ugly" comments doesn't ease my mind
about this patch :-) How many of these issues do you plan to resolve?

Index: src/backend/tcop/postgres.c
===================================================================
RCS file: /projects/cvsroot/pgsql-server/src/backend/tcop/postgres.c,v
retrieving revision 1.379
diff -c -w -r1.379 postgres.c
*** src/backend/tcop/postgres.c	1 Dec 2003 22:15:37 -0000	1.379
--- src/backend/tcop/postgres.c	13 Dec 2003 06:18:44 -0000
***************
*** 2133,2139 ****
  			case 'D':			/* PGDATA directory */
  				if (secure)
  					potential_DataDir = optarg;
- 				break;

case 'd': /* debug level */
{

Why was this change made? If you actually intend to fall through the
case here, please make it clear via a comment.

+ /*
+  * The following need to be available to the read/write_backend_variables
+  * functions
+  */
+ extern XLogRecPtr RedoRecPtr;
+ extern XLogwrtResult LogwrtResult;
+ extern slock_t *ShmemLock;
+ extern slock_t *ShmemIndexLock;
+ extern void *ShmemIndexAlloc;
+ typedef struct LWLock LWLock;
+ extern LWLock *LWLockArray;
+ extern slock_t  *ProcStructLock;
+ extern int	pgStatSock;

I wonder whether it is cleaner to make these properly public, rather
than using the NON_EXEC_STATIC #ifdef... (I'm not necessarily saying
it is, I'm just tossing it out there).

+ #define get_tmp_backend_var_file_name(buf,id)		\
+ 		Assert(DataDir);			\
+ 		sprintf((buf),				\
+ 			"%s/%s/%s.backend_var.%d",	\
+ 			DataDir,			\
+ 			PG_TEMP_FILES_DIR,		\
+ 			PG_TEMP_FILE_PREFIX,		\
+ 			(id))

This macro needs to be enclosed in a do {} while (0) block. Also,
what does the "var" in the name of this macro refer to?

+ #define get_tmp_backend_var_dir(buf)		\
+ 		sprintf((buf),"%s/%s",DataDir,PG_TEMP_FILES_DIR)

This seems a little pointless, IMHO.

+ static void
+ write_backend_variables(pid_t pid, Port *port)
+ {
+ 	char	filename[MAXPGPATH];
+ 	FILE	*fp;
+ 	get_tmp_backend_var_file_name(filename,pid);
+ 
+ 	/* Open file */
+ 	fp = AllocateFile(filename, PG_BINARY_W);
+ 	if (!fp)
+ 	{
+ 		/* As per OpenTemporaryFile... */
+ 		char dirname[MAXPGPATH];
+ 		get_tmp_backend_var_dir(dirname);
+ 		mkdir(dirname, S_IRWXU);
+ 
+ 		fp = AllocateFile(filename, PG_BINARY_W);
+ 		if (!fp)
+ 		{
+ 			ereport(FATAL,
+ 				(errcode_for_file_access(),
+ 				errmsg("could not write to file \"%s\": %m", filename)));
+ 			return;
+ 		}
+ 	}

ereport(FATAL) seems too strong, doesn't it?

+ 	read_var(LWLockArray,fp);
+ 	read_var(ProcStructLock,fp);
+ 	read_var(pgStatSock,fp);
+ 
+ 	/* Release file */
+ 	FreeFile(fp);
+ 	unlink(filename);

You should probably check the return value of unlink().

(circa line 335 of ipc/shmem.c:)

/*
* If the shmem index doesn't exist, we are bootstrapping: we must
* be trying to init the shmem index itself.
*
! * Notice that the ShmemLock is held until the shmem index has
* been completely initialized.
*/

Doesn't this function still acquire ShmemIndexLock? (i.e. why was this
comment changed?)

-Neil

#31

Bruce Momjian

pgman@candle.pha.pa.us

about 22 years ago

In reply to: Neil Conway (#30)

Re: fork/exec patch

Neil Conway wrote:

+ /*
+  * The following need to be available to the read/write_backend_variables
+  * functions
+  */
+ extern XLogRecPtr RedoRecPtr;
+ extern XLogwrtResult LogwrtResult;
+ extern slock_t *ShmemLock;
+ extern slock_t *ShmemIndexLock;
+ extern void *ShmemIndexAlloc;
+ typedef struct LWLock LWLock;
+ extern LWLock *LWLockArray;
+ extern slock_t  *ProcStructLock;
+ extern int	pgStatSock;
I wonder whether it is cleaner to make these properly public, rather
than using the NON_EXEC_STATIC #ifdef... (I'm not necessarily saying
it is, I'm just tossing it out there).

This was my idea. Rather than making statics as globals, they are
global only for exec() builds. This seemed safest. They need to be
extern for exec() because we need to reference them in a function that
writes all the postmaster globals to a file. We could have had a write
function in every C file that needed it and call the write function from
postmaster.c, but that seems like too much bloat. If we make them
extern in all builds we lose the safety of a static.

Your other comments are good and I will wait for Claudio to respond.

-- 
  Bruce Momjian                        |  http://candle.pha.pa.us
  pgman@candle.pha.pa.us               |  (610) 359-1001
  +  If your life is a hard drive,     |  13 Roberts Road
  +  Christ can be your backup.        |  Newtown Square, Pennsylvania 19073

#32

Tom Lane

tgl@sss.pgh.pa.us

about 22 years ago

In reply to: Neil Conway (#30)

Re: fork/exec patch

Neil Conway <neilc@samurai.com> writes:

Why did you change ShmemIndexLock from an LWLock to a spinlock?

That one I can answer --- it's a bootstrapping issue: we can't use
LWLocks until we have a PGProc (*MyProc), and we can't set that up
without looking in the ShmemIndex for the related data structures.
So ShmemIndex needs to use a more primitive lock type. This is actually
the way the code used to do it; I changed the lock type when the
opportunity presented itself, but if we're going to support fork/exec
again then we have to go back to how it was done before.

Your other comments seem pretty germane to me, except for

I wonder whether it is cleaner to make these properly public, rather
than using the NON_EXEC_STATIC #ifdef... (I'm not necessarily saying
it is, I'm just tossing it out there).

I don't want to make these things public, because we don't really want
any other modules accessing them. NON_EXEC_STATIC is pretty ugly,
but it does guarantee that we will soon find out about any unintended
cross-module references, because they won't compile on Unix systems.
If you've got an idea about a cleaner way to do it, I'm all ears ...

regards, tom lane

#33

Neil Conway

neilc@samurai.com

about 22 years ago

In reply to: Tom Lane (#32)

Re: fork/exec patch

Tom Lane <tgl@sss.pgh.pa.us> writes:

That one I can answer --- it's a bootstrapping issue: we can't use
LWLocks until we have a PGProc (*MyProc), and we can't set that up
without looking in the ShmemIndex for the related data structures.
So ShmemIndex needs to use a more primitive lock type.

Fair enough.

My next question would have been to ask whether switching to a
spinlock here will be a performance problem. In looking at the code,
it seems we only hold the ShmemIndexLock for a long time (hundreds of
instructions & multiple system calls) while bootstrapping the shmem
index hash table itself. Otherwise, the lock is acquired and released
quickly, and even then it is only done during backend initialization,
so there shouldn't be much contention for it. Is this analysis
correct?

I don't want to make these things public, because we don't really
want any other modules accessing them.

I agree, both ways are non-optimal for different reasons. Can anyone
else see a better way to do this?

-Neil

#34

Magnus Hagander

mha@sollentuna.net

about 22 years ago

In reply to: Neil Conway (#33)

Re: [HACKERS] [PATCHES] fork/exec patch

Bruce Momjian wrote:

Have you looked at the CONNX signal code on the Win32 page:

http://momjian.postgresql.org/main/writings/pgsql/win32.html

It uses shared memory and events.

Yes, and I just did again. I guess I must be missing
something, though -
I don't see what in that code causes the signalled process to
call the
handler corresponding to the signal. Maybe I'm just a little
brain dead
today ...

Can't find that part either, but a few questions for the implementation
regardless of wether that code is around somewhere:

At what times do signals actually *need* to be delivered? And at what
points do the calling process need to be notified?

Actually interrupting a running process to execute a procedure in a
thread can be pretty darn tricky - AFAIK you have to manually switch
thread context and create an exception which you then catch, call
handler, reset and continue.

However, if it's acceptable to have delivery only when the thread is in
"alertable state" this should not be necessary. Then you can basically
take two approaches depending on when you need the response:

If you just need a response that the receiving process has queued the
thread handler, then create a separate thread that receives the signal
and queues a user APC on the main thread. This will then execute when
the thread enters alertable state.

If you need a response once it has actually run, then the main thread
needs to do signal polling now and then. This has the bad sideeffect
that the main thread will block completely until the signal is
delivered, which might be a while.

I don't know what the semantics are for kill() on unix there? And if it
is sync, does postgresql actually need that property?

Of course, I may have missed something completely as well :-)

//Magnus

Import Notes

Resolved by subject fallback

#35

Tom Lane

tgl@sss.pgh.pa.us

about 22 years ago

In reply to: Neil Conway (#33)

Re: fork/exec patch

Neil Conway <neilc@samurai.com> writes:

My next question would have been to ask whether switching to a
spinlock here will be a performance problem. In looking at the code,
it seems we only hold the ShmemIndexLock for a long time (hundreds of
instructions & multiple system calls) while bootstrapping the shmem
index hash table itself. Otherwise, the lock is acquired and released
quickly, and even then it is only done during backend initialization,
so there shouldn't be much contention for it. Is this analysis
correct?

Yes, at least that was the theory I was working from when I suggested
Claudio do it this way ...

regards, tom lane

#36

Andrew Dunstan

andrew@dunslane.net

about 22 years ago

In reply to: Magnus Hagander (#34)

Re: [HACKERS] [PATCHES] fork/exec patch

Magnus Hagander wrote:

Bruce Momjian wrote:

Have you looked at the CONNX signal code on the Win32 page:

http://momjian.postgresql.org/main/writings/pgsql/win32.html

It uses shared memory and events.

Yes, and I just did again. I guess I must be missing
something, though -
I don't see what in that code causes the signalled process to
call the
handler corresponding to the signal. Maybe I'm just a little
brain dead
today ...

Can't find that part either, but a few questions for the implementation
regardless of wether that code is around somewhere:

At what times do signals actually *need* to be delivered? And at what
points do the calling process need to be notified?

Actually interrupting a running process to execute a procedure in a
thread can be pretty darn tricky - AFAIK you have to manually switch
thread context and create an exception which you then catch, call
handler, reset and continue.

However, if it's acceptable to have delivery only when the thread is in
"alertable state" this should not be necessary. Then you can basically
take two approaches depending on when you need the response:

If you just need a response that the receiving process has queued the
thread handler, then create a separate thread that receives the signal
and queues a user APC on the main thread. This will then execute when
the thread enters alertable state.

Actually, I see that in os-fix2.cpp there is code that sets up a thread
that just polls for the event and then calls the corresponding handler.

If you need a response once it has actually run, then the main thread
needs to do signal polling now and then. This has the bad sideeffect
that the main thread will block completely until the signal is
delivered, which might be a while.

I don't know what the semantics are for kill() on unix there? And if it
is sync, does postgresql actually need that property?

kill() should return success upon the signal being queued, as I
understand it - i.e. no sync.

All this kind of answers my original question, by pointing out the need
to poll one way or another, which is why I suggested that signal
emulation might be messy, and more complicated than the fork/exec case.

In effect, the runtime in Unix provides the signal polling for you for
free, which is why this method of IPC is so commonly used.

cheers

andrew

#37

Magnus Hagander

mha@sollentuna.net

about 22 years ago

In reply to: Andrew Dunstan (#36)

Re: [HACKERS] [PATCHES] fork/exec patch

If you need a response once it has actually run, then the

main thread

needs to do signal polling now and then. This has the bad

sideeffect

that the main thread will block completely until the signal is
delivered, which might be a while.

I don't know what the semantics are for kill() on unix

there? And if

it is sync, does postgresql actually need that property?

kill() should return success upon the signal being queued, as I
understand it - i.e. no sync.

Ok. That makes things a lot easier. THen it's just the question of how
fast we need to react.

All this kind of answers my original question, by pointing
out the need
to poll one way or another, which is why I suggested that signal
emulation might be messy, and more complicated than the
fork/exec case.

That definitly makes it more complicated. However, IIRC the thread will
enter an "alerable state" at least for network I/O, and probably for
disk I/O. Can't seem to find a reference to this, though. If you use
queued user APCs (using QueueUserAPC()), from a separate signal-handling
thread, this should dispatch the signal handlers *fairly* fast. The
question is if "fairly fast" is good enough, or if "really fast" is
needed?

In that case, you might have to work either with poll-really-often
(ickk), or using manual thread exceptions (raelly messy).

It shouldn't need to be *too* messy, if you can live with possible
slowdowns (assuming the thread does go alertable on blocking I/O).
Possibly add a WaitForSingleObject() at some place in a loop to force it
there in some cases.

Looking some more on the os-fix2.cpp file (I read the thing as OS2 fix,
and thus ignored it), it seems they fire all signal handlers on a thread
of their own. Is the backend threadsafe enough that that is safe? If so,
that would do away with the nede to use QueueUserAPC() to make the call
execute on the main thread.

//Magnus

Import Notes

Resolved by subject fallback

#38

Dann Corbit

DCorbit@connx.com

about 22 years ago

In reply to: Magnus Hagander (#37)

Re: [HACKERS] [PATCHES] fork/exec patch

-----Original Message-----
From: Magnus Hagander [mailto:mha@sollentuna.net]
Sent: Tuesday, December 16, 2003 7:53 AM
To: Andrew Dunstan; PostgreSQL-development; pgsql-hackers-win32
Subject: Re: [pgsql-hackers-win32] [HACKERS] [PATCHES] fork/exec patch

If you need a response once it has actually run, then the

main thread

needs to do signal polling now and then. This has the bad

sideeffect

that the main thread will block completely until the signal is
delivered, which might be a while.

I don't know what the semantics are for kill() on unix

there? And if

it is sync, does postgresql actually need that property?

kill() should return success upon the signal being queued, as I
understand it - i.e. no sync.

Ok. That makes things a lot easier. THen it's just the
question of how fast we need to react.

All this kind of answers my original question, by pointing
out the need
to poll one way or another, which is why I suggested that signal
emulation might be messy, and more complicated than the
fork/exec case.

That definitly makes it more complicated. However, IIRC the
thread will
enter an "alerable state" at least for network I/O, and probably for
disk I/O. Can't seem to find a reference to this, though. If you use
queued user APCs (using QueueUserAPC()), from a separate
signal-handling
thread, this should dispatch the signal handlers *fairly* fast. The
question is if "fairly fast" is good enough, or if "really fast" is
needed?

In that case, you might have to work either with poll-really-often
(ickk), or using manual thread exceptions (raelly messy).

It shouldn't need to be *too* messy, if you can live with possible
slowdowns (assuming the thread does go alertable on blocking I/O).
Possibly add a WaitForSingleObject() at some place in a loop
to force it
there in some cases.

Looking some more on the os-fix2.cpp file (I read the thing
as OS2 fix,
and thus ignored it), it seems they fire all signal handlers
on a thread
of their own. Is the backend threadsafe enough that that is
safe? If so,
that would do away with the nede to use QueueUserAPC() to
make the call
execute on the main thread.

By using events you don't have to poll at all. You are waiting on the
event. A signal fires the event. It is also possible to add as many
signal types as you like, even beyond the standard UNIX batch if it
suits your fancy.

Import Notes

Resolved by subject fallback

#39

Magnus Hagander

mha@sollentuna.net

about 22 years ago

In reply to: Dann Corbit (#38)

Re: [HACKERS] [PATCHES] fork/exec patch

We'd probably need a tool like "pgsignal" or something that
would send signals to the backends. Since you can't use the
kill commandline tool (that one only supports what would be
called kill -9 on unix)

When the user says "net stop postmaster", an entry point in
postmaster will be called by the Service Control Manager. Then
postmaster will have to send the signals to the other processes
asking them to stop and presumably wait for them to stop before
terminating itself.

Absolutely, but there are other signals to send, no? Or you might want
to send a signal directly to a backend (to cancel for example), as you
can do on Unix.

//Magnus

Import Notes

Resolved by subject fallback

#40

Andrew Dunstan

andrew@dunslane.net

about 22 years ago

In reply to: Dann Corbit (#38)

Re: [HACKERS] [PATCHES] fork/exec patch

Dann Corbit wrote:

By using events you don't have to poll at all. You are waiting on the
event. A signal fires the event. It is also possible to add as many
signal types as you like, even beyond the standard UNIX batch if it
suits your fancy.

Isn't WaitForSingleObject() in effect a polling call?

cheers

andrew

#41

Andrew Dunstan

andrew@dunslane.net

about 22 years ago

In reply to: Magnus Hagander (#39)

Re: [pgsql-hackers-win32] [PATCHES] fork/exec patch

Magnus Hagander wrote:

Absolutely, but there are other signals to send, no? Or you might want
to send a signal directly to a backend (to cancel for example), as you
can do on Unix.

In normal operation the only thing that should be signalling a backend
is the postmaster.

cheers

andrew

#42

Neil Conway

neilc@samurai.com

about 22 years ago

In reply to: Andrew Dunstan (#41)

Re: [pgsql-hackers-win32] [PATCHES] fork/exec patch

Andrew Dunstan <andrew@dunslane.net> writes:

In normal operation the only thing that should be signalling a
backend is the postmaster.

Oh? What about LISTEN/NOTIFY?

-Neil

#43

Hannu Krosing

hannu@tm.ee

about 22 years ago

In reply to: Neil Conway (#42)

Re: [pgsql-hackers-win32] [PATCHES] fork/exec patch

Neil Conway kirjutas K, 17.12.2003 kell 00:37:

Andrew Dunstan <andrew@dunslane.net> writes:

In normal operation the only thing that should be signalling a
backend is the postmaster.

Oh? What about LISTEN/NOTIFY?

IIRC cancelling queries is done by making a connection to a new backend
and then this backend signalls the one needing to have its query
cancelled. Or is the cancelling done by postmaster without starting a
new backend ?

--------------
Hannu

#44

Bruce Momjian

pgman@candle.pha.pa.us

about 22 years ago

In reply to: Claudio Natoli (#1)

Re: fork/exec patch

Patch withdrawn. Author will resubmit new version.

---------------------------------------------------------------------------

Claudio Natoli wrote:

This patch is the next step towards (re)allowing fork/exec.

Bruce, I've cleaned up the parts we discussed, and, pending objections from
anyone else, it is ready for application to HEAD.

Cheers,
Claudio
--- 
Certain disclaimers and policies apply to all email sent from Memetrics.
For the full text of these disclaimers and policies see 
<a
href="http://www.memetrics.com/emailpolicy.html">http://www.memetrics.com/em
ailpolicy.html</a>

[ Attachment, skipping... ]

---------------------------(end of broadcast)---------------------------
TIP 4: Don't 'kill -9' the postmaster

-- 
  Bruce Momjian                        |  http://candle.pha.pa.us
  pgman@candle.pha.pa.us               |  (610) 359-1001
  +  If your life is a hard drive,     |  13 Roberts Road
  +  Christ can be your backup.        |  Newtown Square, Pennsylvania 19073

#45

Andrew Dunstan

andrew@dunslane.net

about 22 years ago

In reply to: Bruce Momjian (#44)

Re: [pgsql-hackers-win32] [PATCHES] fork/exec patch

Neil Conway said:

Andrew Dunstan <andrew@dunslane.net> writes:

In normal operation the only thing that should be signalling a
backend is the postmaster.

Oh? What about LISTEN/NOTIFY?

er, yeah. *self-lart*

+" ... or another backend"

cheers

andrew

Import Notes