Index location patch for review

Started by Jim Buttafuocoover 24 years ago18 messages

Jim Buttafuoco

jim@buttafuoco.net

over 24 years ago

1 attachment(s)

Hi all,

Attached is a patch that adds support for specifying a location for
indexes via the "create database" command.

I believe this patch is complete, but it is my first .

Thanks
Jim

Attachments:

location.diffsapplication/octet-stream; name=location.diffsDownload

diff -r -c --exclude *.o --exclude *.a --exclude *.so pgsql/doc/src/sgml/ref/create_database.sgml a/doc/src/sgml/ref/create_database.sgml
*** pgsql/doc/src/sgml/ref/create_database.sgml	Mon Sep  3 08:57:49 2001
--- a/doc/src/sgml/ref/create_database.sgml	Tue Sep 11 14:04:35 2001
***************
*** 25,30 ****
--- 25,31 ----
    <synopsis>
  CREATE DATABASE <replaceable class="PARAMETER">name</replaceable>
      [ WITH [ LOCATION = '<replaceable class="parameter">dbpath</replaceable>' ]
+            [ INDEX_LOCATION = '<replaceable class="parameter">idxpath</replaceable>' ]
             [ TEMPLATE = <replaceable class="parameter">template</replaceable> ]
             [ ENCODING = <replaceable class="parameter">encoding</replaceable> ] ]
    </synopsis>
***************
*** 52,57 ****
--- 53,69 ----
        <listitem>
         <para>
  	An alternate filesystem location in which to store the new database,
+ 	specified as a string literal;
+ 	or <literal>DEFAULT</literal> to use the default location.
+        </para>
+       </listitem>
+      </varlistentry>
+      <varlistentry>
+      <varlistentry>
+       <term><replaceable class="parameter">idxpath</replaceable></term>
+       <listitem>
+        <para>
+ 	An alternate filesystem location in which to store the new database indexes,
  	specified as a string literal;
  	or <literal>DEFAULT</literal> to use the default location.
         </para>
diff -r -c --exclude *.o --exclude *.a --exclude *.so pgsql/src/backend/catalog/catalog.c a/src/backend/catalog/catalog.c
*** pgsql/src/backend/catalog/catalog.c	Fri Aug 10 14:57:33 2001
--- a/src/backend/catalog/catalog.c	Sun Sep  9 16:18:26 2001
***************
*** 46,51 ****
--- 46,77 ----
  	}
  	return path;
  }
+ /*
+  * relidxpath			- construct path to a relation's file
+  *
+  * Result is a palloc'd string.
+  */
+ 
+ char *
+ relidxpath(RelFileNode rnode)
+ {
+ 	char	   *path;
+ 
+ 	if (rnode.tblNode == (Oid) 0)		/* "global tablespace" */
+ 	{
+ 		/* Shared system relations live in {datadir}/global */
+ 		path = (char *) palloc(strlen(DataDir) + 8 + sizeof(NameData) + 1);
+ 		sprintf(path, "%s/global/%u", DataDir, rnode.relNode);
+ 	}
+ 	else
+ 	{
+ 		path = (char *) palloc(strlen(DataDir) + 6 + 2 * sizeof(NameData) + 3);
+ 		sprintf(path, "%s/base/%u_index/%u", DataDir,
+ 				rnode.tblNode,rnode.relNode);
+ 	}
+ 	return path;
+ }
+ 
  
  /*
   * GetDatabasePath			- construct path to a database dir
***************
*** 70,75 ****
--- 96,121 ----
  		sprintf(path, "%s/base/%u", DataDir, tblNode);
  	}
  	return path;
+ }
+ 
+ 
+ char *
+ GetIndexPath(Oid tblNode)
+ {
+   char	   *path;
+  
+   if (tblNode == (Oid) 0)		/* "global tablespace" */
+   {
+       /* Shared system relations live in {datadir}/global */
+       path = (char *) palloc(strlen(DataDir) + 8);
+       sprintf(path, "%s/global", DataDir);
+    }
+   else
+   {
+       path = (char *) palloc(strlen(DataDir) + 6 + sizeof(NameData) + 1);
+       sprintf(path, "%s/base/%u_index", DataDir, tblNode);
+   }
+   return path;
  }
  
  
diff -r -c --exclude *.o --exclude *.a --exclude *.so pgsql/src/backend/commands/dbcommands.c a/src/backend/commands/dbcommands.c
*** pgsql/src/backend/commands/dbcommands.c	Thu Sep  6 00:57:28 2001
--- a/src/backend/commands/dbcommands.c	Mon Sep 10 16:24:08 2001
***************
*** 45,53 ****
  static bool get_db_info(const char *name, Oid *dbIdP, int4 *ownerIdP,
  			int *encodingP, bool *dbIsTemplateP, Oid *dbLastSysOidP,
  			TransactionId *dbVacuumXidP, TransactionId *dbFrozenXidP,
! 			char *dbpath);
  static bool get_user_info(Oid use_sysid, bool *use_super, bool *use_createdb);
  static char *resolve_alt_dbpath(const char *dbpath, Oid dboid);
  static bool remove_dbdirs(const char *real_loc, const char *altloc);
  
  /*
--- 45,54 ----
  static bool get_db_info(const char *name, Oid *dbIdP, int4 *ownerIdP,
  			int *encodingP, bool *dbIsTemplateP, Oid *dbLastSysOidP,
  			TransactionId *dbVacuumXidP, TransactionId *dbFrozenXidP,
! 			char *dbpath,char *idxpath);
  static bool get_user_info(Oid use_sysid, bool *use_super, bool *use_createdb);
  static char *resolve_alt_dbpath(const char *dbpath, Oid dboid);
+ static char *resolve_alt_idxpath(const char *dbpath, Oid dboid);
  static bool remove_dbdirs(const char *real_loc, const char *altloc);
  
  /*
***************
*** 56,67 ****
  
  void
  createdb(const char *dbname, const char *dbpath,
! 		 const char *dbtemplate, int encoding)
  {
  	char	   *nominal_loc;
  	char	   *alt_loc;
  	char	   *target_dir;
  	char		src_loc[MAXPGPATH];
  	char		buf[2 * MAXPGPATH + 100];
  	bool		use_super,
  				use_createdb;
--- 57,72 ----
  
  void
  createdb(const char *dbname, const char *dbpath,
! 		 const char *dbtemplate, int encoding,const char *idxpath)
  {
  	char	   *nominal_loc;
+ 	char       *nominal_idx_loc;
  	char	   *alt_loc;
+ 	char       *alt_idx_loc;
  	char	   *target_dir;
+ 	char	   *target_idx_dir;
  	char		src_loc[MAXPGPATH];
+ 	char		src_idxloc[MAXPGPATH];
  	char		buf[2 * MAXPGPATH + 100];
  	bool		use_super,
  				use_createdb;
***************
*** 73,78 ****
--- 78,84 ----
  	TransactionId src_vacuumxid;
  	TransactionId src_frozenxid;
  	char		src_dbpath[MAXPGPATH];
+ 	char		src_idxpath[MAXPGPATH];
  	Relation	pg_database_rel;
  	HeapTuple	tuple;
  	TupleDesc	pg_database_dsc;
***************
*** 80,85 ****
--- 86,92 ----
  	char		new_record_nulls[Natts_pg_database];
  	Oid			dboid;
  
+ 
  	if (!get_user_info(GetUserId(), &use_super, &use_createdb))
  		elog(ERROR, "current user name is invalid");
  
***************
*** 98,104 ****
  	 * idea, so accept possibility of race to create.  We will check again
  	 * after we grab the exclusive lock.
  	 */
! 	if (get_db_info(dbname, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL))
  		elog(ERROR, "CREATE DATABASE: database \"%s\" already exists", dbname);
  
  	/*
--- 105,111 ----
  	 * idea, so accept possibility of race to create.  We will check again
  	 * after we grab the exclusive lock.
  	 */
! 	if (get_db_info(dbname, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,NULL))
  		elog(ERROR, "CREATE DATABASE: database \"%s\" already exists", dbname);
  
  	/*
***************
*** 110,116 ****
  	if (!get_db_info(dbtemplate, &src_dboid, &src_owner, &src_encoding,
  					 &src_istemplate, &src_lastsysoid,
  					 &src_vacuumxid, &src_frozenxid,
! 					 src_dbpath))
  		elog(ERROR, "CREATE DATABASE: template \"%s\" does not exist",
  			 dbtemplate);
  
--- 117,123 ----
  	if (!get_db_info(dbtemplate, &src_dboid, &src_owner, &src_encoding,
  					 &src_istemplate, &src_lastsysoid,
  					 &src_vacuumxid, &src_frozenxid,
! 					 src_dbpath,src_idxpath))
  		elog(ERROR, "CREATE DATABASE: template \"%s\" does not exist",
  			 dbtemplate);
  
***************
*** 126,138 ****
  	}
  
  	/*
! 	 * Determine physical path of source database
  	 */
  	alt_loc = resolve_alt_dbpath(src_dbpath, src_dboid);
  	if (!alt_loc)
  		alt_loc = GetDatabasePath(src_dboid);
  	strcpy(src_loc, alt_loc);
  
  	/*
  	 * The source DB can't have any active backends, except this one
  	 * (exception is to allow CREATE DB while connected to template1).
--- 133,150 ----
  	}
  
  	/*
! 	 * Determine physical path of source database and indexes
  	 */
  	alt_loc = resolve_alt_dbpath(src_dbpath, src_dboid);
  	if (!alt_loc)
  		alt_loc = GetDatabasePath(src_dboid);
  	strcpy(src_loc, alt_loc);
  
+ 	alt_loc = resolve_alt_dbpath(src_idxpath, src_dboid);
+ 	if (!alt_loc)
+ 		alt_loc = GetIndexPath(src_dboid);
+ 	strcpy(src_idxloc, alt_loc);
+ 
  	/*
  	 * The source DB can't have any active backends, except this one
  	 * (exception is to allow CREATE DB while connected to template1).
***************
*** 165,170 ****
--- 177,185 ----
  	 * specified.
  	 */
  	nominal_loc = GetDatabasePath(dboid);
+ 	nominal_idx_loc = GetIndexPath(dboid);
+ 
+ 
  	alt_loc = resolve_alt_dbpath(dbpath, dboid);
  
  	if (strchr(nominal_loc, '\''))
***************
*** 207,215 ****
--- 222,252 ----
  				 nominal_loc, alt_loc);
  	}
  
+ 	/* create symlink for default index space 
+            if idxpath is null then just symlink 
+            datpath to datpath_index
+ 	*/
+ 
+ 	alt_idx_loc = resolve_alt_idxpath(idxpath, dboid);
+ 	target_idx_dir = alt_idx_loc ? alt_idx_loc : nominal_idx_loc;
+ 
+ 	if (mkdir(target_idx_dir, S_IRWXU) != 0)
+ 	  elog(ERROR, "CREATE DATABASE: unable to create index directory '%s': %m",
+ 	       target_dir);
+ 	
+ 	if(alt_idx_loc)
+ 	{
+ 	    if (symlink(alt_idx_loc, nominal_idx_loc) != 0)
+ 	      elog(ERROR, "CREATE DATABASE: could not link '%s' to '%s': %m",
+ 		   nominal_loc, alt_loc);
+ 
+ 	}
+ 
  	/* Copy the template database to the new location */
  	snprintf(buf, sizeof(buf), "cp -r '%s' '%s'", src_loc, target_dir);
  
+ 	elog(NOTICE, buf);
+ 
  	if (system(buf) != 0)
  	{
  		if (remove_dbdirs(nominal_loc, alt_loc))
***************
*** 218,230 ****
  			elog(ERROR, "CREATE DATABASE: could not initialize database directory; delete failed as well");
  	}
  
  	/*
  	 * Now OK to grab exclusive lock on pg_database.
  	 */
  	pg_database_rel = heap_openr(DatabaseRelationName, AccessExclusiveLock);
  
  	/* Check to see if someone else created same DB name meanwhile. */
! 	if (get_db_info(dbname, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL))
  	{
  		/* Don't hold lock while doing recursive remove */
  		heap_close(pg_database_rel, AccessExclusiveLock);
--- 255,280 ----
  			elog(ERROR, "CREATE DATABASE: could not initialize database directory; delete failed as well");
  	}
  
+  	/* Copy the template database index files to the new location */
+  	snprintf(buf, sizeof(buf), "cp -r '%s'/* '%s'", src_idxloc, target_idx_dir);
+  
+ 	if (system(buf) != 0)
+  	{
+  		if (remove_dbdirs(nominal_idx_loc, alt_idx_loc))
+  			elog(ERROR, "CREATE DATABASE: could not initialize index directory");
+  		else
+  			elog(ERROR, "CREATE DATABASE: could not initialize index directory; delete failed as well");
+  		if (! remove_dbdirs(nominal_loc, alt_loc))
+  		        elog(ERROR, "CREATE DATABASE: could not initialize index directory; data dir delete failed as well");
+  	}
+  
  	/*
  	 * Now OK to grab exclusive lock on pg_database.
  	 */
  	pg_database_rel = heap_openr(DatabaseRelationName, AccessExclusiveLock);
  
  	/* Check to see if someone else created same DB name meanwhile. */
! 	if (get_db_info(dbname, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,NULL))
  	{
  		/* Don't hold lock while doing recursive remove */
  		heap_close(pg_database_rel, AccessExclusiveLock);
***************
*** 250,255 ****
--- 300,307 ----
  	/* no nulls here, GetRawDatabaseInfo doesn't like them */
  	new_record[Anum_pg_database_datpath - 1] =
  		DirectFunctionCall1(textin, CStringGetDatum(dbpath ? dbpath : ""));
+ 	new_record[Anum_pg_database_idxpath - 1] =
+ 		DirectFunctionCall1(textin, CStringGetDatum(idxpath ? idxpath : ""));
  
  	memset(new_record_nulls, ' ', sizeof(new_record_nulls));
  
***************
*** 298,304 ****
--- 350,359 ----
  	Oid			db_id;
  	char	   *alt_loc;
  	char	   *nominal_loc;
+ 	char	   *alt_idx_loc;
+ 	char	   *nominal_idx_loc;
  	char		dbpath[MAXPGPATH];
+ 	char		idxpath[MAXPGPATH];
  	Relation	pgdbrel;
  	HeapScanDesc pgdbscan;
  	ScanKeyData key;
***************
*** 327,333 ****
  	pgdbrel = heap_openr(DatabaseRelationName, AccessExclusiveLock);
  
  	if (!get_db_info(dbname, &db_id, &db_owner, NULL,
! 					 &db_istemplate, NULL, NULL, NULL, dbpath))
  		elog(ERROR, "DROP DATABASE: database \"%s\" does not exist", dbname);
  
  	if (!use_super && GetUserId() != db_owner)
--- 382,388 ----
  	pgdbrel = heap_openr(DatabaseRelationName, AccessExclusiveLock);
  
  	if (!get_db_info(dbname, &db_id, &db_owner, NULL,
! 					 &db_istemplate, NULL, NULL, NULL, dbpath,idxpath))
  		elog(ERROR, "DROP DATABASE: database \"%s\" does not exist", dbname);
  
  	if (!use_super && GetUserId() != db_owner)
***************
*** 344,349 ****
--- 399,407 ----
  	nominal_loc = GetDatabasePath(db_id);
  	alt_loc = resolve_alt_dbpath(dbpath, db_id);
  
+ 	nominal_idx_loc = GetIndexPath(db_id);
+ 	alt_idx_loc = resolve_alt_idxpath(idxpath, db_id);
+ 	
  	/*
  	 * Check for active backends in the target database.
  	 */
***************
*** 399,404 ****
--- 457,463 ----
  	 * Remove the database's subdirectory and everything in it.
  	 */
  	remove_dbdirs(nominal_loc, alt_loc);
+ 	remove_dbdirs(nominal_idx_loc, alt_idx_loc);
  
  	/*
  	 * Force dirty buffers out to disk, so that newly-connecting backends
***************
*** 419,425 ****
  get_db_info(const char *name, Oid *dbIdP, int4 *ownerIdP,
  			int *encodingP, bool *dbIsTemplateP, Oid *dbLastSysOidP,
  			TransactionId *dbVacuumXidP, TransactionId *dbFrozenXidP,
! 			char *dbpath)
  {
  	Relation	relation;
  	ScanKeyData scanKey;
--- 478,484 ----
  get_db_info(const char *name, Oid *dbIdP, int4 *ownerIdP,
  			int *encodingP, bool *dbIsTemplateP, Oid *dbLastSysOidP,
  			TransactionId *dbVacuumXidP, TransactionId *dbFrozenXidP,
! 			char *dbpath,char *idxpath)
  {
  	Relation	relation;
  	ScanKeyData scanKey;
***************
*** 484,489 ****
--- 543,567 ----
  			else
  				strcpy(dbpath, "");
  		}
+ 
+ 		/* idx path (as registered in pg_database) */
+ 		if (idxpath)
+ 		{
+ 			tmptext = DatumGetTextP(heap_getattr(tuple,
+ 												 Anum_pg_database_idxpath,
+ 											  RelationGetDescr(relation),
+ 												 &isnull));
+ 			if (!isnull)
+ 			{
+ 				Assert(VARSIZE(tmptext) - VARHDRSZ < MAXPGPATH);
+ 
+ 				strncpy(idxpath, VARDATA(tmptext), VARSIZE(tmptext) - VARHDRSZ);
+ 				*(idxpath + VARSIZE(tmptext) - VARHDRSZ) = '\0';
+ 			}
+ 			else
+ 				strcpy(idxpath, "");
+ 		}
+ 
  	}
  
  	heap_endscan(scan);
***************
*** 553,591 ****
  	return ret;
  }
  
  
  static bool
  remove_dbdirs(const char *nominal_loc, const char *alt_loc)
  {
- 	const char *target_dir;
  	char		buf[MAXPGPATH + 100];
  	bool		success = true;
  
- 	target_dir = alt_loc ? alt_loc : nominal_loc;
- 
  	/*
  	 * Close virtual file descriptors so the kernel has more available for
  	 * the system() call below.
  	 */
  	closeAllVfds();
  
! 	if (alt_loc)
  	{
- 		/* remove symlink */
- 		if (unlink(nominal_loc) != 0)
- 		{
- 			elog(NOTICE, "could not remove '%s': %m", nominal_loc);
- 			success = false;
- 		}
- 	}
  
! 	snprintf(buf, sizeof(buf), "rm -rf '%s'", target_dir);
  
! 	if (system(buf) != 0)
  	{
! 		elog(NOTICE, "database directory '%s' could not be removed",
! 			 target_dir);
! 		success = false;
  	}
  
  	return success;
--- 631,711 ----
  	return ret;
  }
  
+ static char *
+ resolve_alt_idxpath(const char *dbpath, Oid dboid)
+ {
+ 	const char *prefix;
+ 	char	   *ret;
+ 	size_t		len;
+ 
+ 	if (dbpath == NULL || dbpath[0] == '\0')
+ 		return NULL;
+ 
+ 	if (strchr(dbpath, '/'))
+ 	{
+ 		if (dbpath[0] != '/')
+ 			elog(ERROR, "Relative paths are not allowed as database locations");
+ #ifndef ALLOW_ABSOLUTE_DBPATHS
+ 		elog(ERROR, "Absolute paths are not allowed as database locations");
+ #endif
+ 		prefix = dbpath;
+ 	}
+ 	else
+ 	{
+ 		/* must be environment variable */
+ 		char	   *var = getenv(dbpath);
+ 
+ 		if (!var)
+ 			elog(ERROR, "Postmaster environment variable '%s' not set", dbpath);
+ 		if (var[0] != '/')
+ 			elog(ERROR, "Postmaster environment variable '%s' must be absolute path", dbpath);
+ 		prefix = var;
+ 	}
+ 
+ 	len = strlen(prefix) + 6 + sizeof(Oid) * 8 + 1;
+ 	ret = palloc(len);
+ 	snprintf(ret, len, "%s/base/%u_index", prefix, dboid);
+ 
+ 	return ret;
+ }
+ 
  
  static bool
  remove_dbdirs(const char *nominal_loc, const char *alt_loc)
  {
  	char		buf[MAXPGPATH + 100];
  	bool		success = true;
  
  	/*
  	 * Close virtual file descriptors so the kernel has more available for
  	 * the system() call below.
  	 */
  	closeAllVfds();
  
! 	if(alt_loc)
  	{
  
! 	  snprintf(buf, sizeof(buf), "rm -rf '%s'", alt_loc);
! 	  
! 	  
! 	  if (system(buf) != 0)
! 	  {
! 	    elog(NOTICE, "database directory '%s' could not be removed",
! 		 alt_loc);
! 	    success = false;
! 	  }
! 	}
  
! 	if(nominal_loc)
  	{
! 	  snprintf(buf, sizeof(buf), "rm -rf '%s'", nominal_loc);
! 	  
! 	  if (system(buf) != 0)
! 	  {
! 	    elog(NOTICE, "database directory '%s' could not be removed",
! 		 nominal_loc);
! 	    success = false;
! 	 }
  	}
  
  	return success;
diff -r -c --exclude *.o --exclude *.a --exclude *.so pgsql/src/backend/nodes/copyfuncs.c a/src/backend/nodes/copyfuncs.c
*** pgsql/src/backend/nodes/copyfuncs.c	Sun Aug 26 12:55:59 2001
--- a/src/backend/nodes/copyfuncs.c	Sun Sep  9 14:53:36 2001
***************
*** 2220,2225 ****
--- 2220,2227 ----
  		newnode->dbname = pstrdup(from->dbname);
  	if (from->dbpath)
  		newnode->dbpath = pstrdup(from->dbpath);
+ 	if (from->idxpath)
+ 		newnode->idxpath = pstrdup(from->idxpath);
  	if (from->dbtemplate)
  		newnode->dbtemplate = pstrdup(from->dbtemplate);
  	newnode->encoding = from->encoding;
diff -r -c --exclude *.o --exclude *.a --exclude *.so pgsql/src/backend/parser/gram.y a/src/backend/parser/gram.y
*** pgsql/src/backend/parser/gram.y	Thu Sep  6 00:57:28 2001
--- a/src/backend/parser/gram.y	Sun Sep  9 15:01:23 2001
***************
*** 347,353 ****
  		DATABASE, DELIMITERS, DO,
  		EACH, ENCODING, EXCLUSIVE, EXPLAIN,
  		FORCE, FORWARD, FREEZE, FUNCTION, HANDLER,
! 		ILIKE, INCREMENT, INDEX, INHERITS, INSTEAD, ISNULL,
  		LANCOMPILER, LIMIT, LISTEN, LOAD, LOCATION, LOCK_P,
  		MAXVALUE, MINVALUE, MODE, MOVE,
  		NEW, NOCREATEDB, NOCREATEUSER, NONE, NOTHING, NOTIFY, NOTNULL,
--- 347,353 ----
  		DATABASE, DELIMITERS, DO,
  		EACH, ENCODING, EXCLUSIVE, EXPLAIN,
  		FORCE, FORWARD, FREEZE, FUNCTION, HANDLER,
! 		ILIKE, INCREMENT, INDEX, INDEX_LOCATION, INHERITS, INSTEAD, ISNULL,
  		LANCOMPILER, LIMIT, LISTEN, LOAD, LOCATION, LOCK_P,
  		MAXVALUE, MINVALUE, MODE, MOVE,
  		NEW, NOCREATEDB, NOCREATEUSER, NONE, NOTHING, NOTIFY, NOTNULL,
***************
*** 2969,2974 ****
--- 2969,2977 ----
  							case 3:
  								n->encoding = lfirsti(lnext(optitem));
  								break;
+ 							case 4:
+ 								n->idxpath = (char *) lsecond(optitem);
+ 								break;
  						}
  					}
  					$$ = (Node *)n;
***************
*** 2978,2983 ****
--- 2981,2987 ----
  					CreatedbStmt *n = makeNode(CreatedbStmt);
  					n->dbname = $3;
  					n->dbpath = NULL;
+ 					n->idxpath = NULL;
  					n->dbtemplate = NULL;
  					n->encoding = -1;
  					$$ = (Node *)n;
***************
*** 3002,3007 ****
--- 3006,3019 ----
  				{
  					$$ = lconsi(1, makeList1(NULL));
  				}
+                 | INDEX_LOCATION '=' Sconst
+ 				{
+ 					$$ = lconsi(4, makeList1($3));
+ 				}
+ 		| INDEX_LOCATION '=' DEFAULT
+ 				{
+ 					$$ = lconsi(4, makeList1(NULL));
+ 				}
  		| TEMPLATE '=' name
  				{
  					$$ = lconsi(2, makeList1($3));
***************
*** 5637,5642 ****
--- 5649,5655 ----
  		| IMMEDIATE						{ $$ = "immediate"; }
  		| INCREMENT						{ $$ = "increment"; }
  		| INDEX							{ $$ = "index"; }
+ 		| INDEX_LOCATION					{ $$ = "index_location"; }
  		| INHERITS						{ $$ = "inherits"; }
  		| INSENSITIVE					{ $$ = "insensitive"; }
  		| INSERT						{ $$ = "insert"; }
diff -r -c --exclude *.o --exclude *.a --exclude *.so pgsql/src/backend/parser/keywords.c a/src/backend/parser/keywords.c
*** pgsql/src/backend/parser/keywords.c	Sun Aug 26 12:56:00 2001
--- a/src/backend/parser/keywords.c	Sun Sep  9 14:53:42 2001
***************
*** 133,138 ****
--- 133,139 ----
  	{"in", IN},
  	{"increment", INCREMENT},
  	{"index", INDEX},
+ 	{"index_location", INDEX_LOCATION},
  	{"inherits", INHERITS},
  	{"initially", INITIALLY},
  	{"inner", INNER_P},
diff -r -c --exclude *.o --exclude *.a --exclude *.so pgsql/src/backend/storage/smgr/md.c a/src/backend/storage/smgr/md.c
*** pgsql/src/backend/storage/smgr/md.c	Fri Aug 24 10:07:49 2001
--- a/src/backend/storage/smgr/md.c	Sun Sep  9 14:53:46 2001
***************
*** 18,23 ****
--- 18,24 ----
  #include <unistd.h>
  #include <fcntl.h>
  #include <sys/file.h>
+ #include <sys/stat.h>
  
  #include "catalog/catalog.h"
  #include "miscadmin.h"
***************
*** 126,134 ****
  	int			fd,
  				vfd;
  
  	Assert(reln->rd_fd < 0);
  
! 	path = relpath(reln->rd_node);
  
  	fd = FileNameOpenFile(path, O_RDWR | O_CREAT | O_EXCL | PG_BINARY, 0600);
  
--- 127,143 ----
  	int			fd,
  				vfd;
  
+ 	
  	Assert(reln->rd_fd < 0);
  
! 	if(reln->rd_rel->relkind == RELKIND_INDEX)
! 	{
! 	  path = relidxpath(reln->rd_node);
! 	}
! 	else
! 	{
! 	  path = relpath(reln->rd_node);
! 	}
  
  	fd = FileNameOpenFile(path, O_RDWR | O_CREAT | O_EXCL | PG_BINARY, 0600);
  
***************
*** 179,186 ****
--- 188,201 ----
  	int			status = SM_SUCCESS;
  	int			save_errno = 0;
  	char	   *path;
+ 	struct stat buf;
  
  	path = relpath(rnode);
+ 	if(stat(path,&buf) == -1)  /* lets try for an index... */
+ 	{
+ 	  pfree(path);
+ 	  path = relidxpath(rnode);
+ 	}
  
  	/* Delete the first segment, or only segment if not doing segmenting */
  	if (unlink(path) < 0)
***************
*** 303,309 ****
  
  	Assert(reln->rd_fd < 0);
  
! 	path = relpath(reln->rd_node);
  
  	fd = FileNameOpenFile(path, O_RDWR | PG_BINARY, 0600);
  
--- 318,331 ----
  
  	Assert(reln->rd_fd < 0);
  
! 	if(reln->rd_rel->relkind == RELKIND_INDEX)
! 	{
! 	  path = relidxpath(reln->rd_node);
! 	}
! 	else
! 	{
! 	  path = relpath(reln->rd_node);
! 	}
  
  	fd = FileNameOpenFile(path, O_RDWR | PG_BINARY, 0600);
  
***************
*** 941,947 ****
  			   *fullpath;
  
  	/* be sure we have enough space for the '.segno', if any */
! 	path = relpath(reln->rd_node);
  
  	if (segno > 0)
  	{
--- 963,976 ----
  			   *fullpath;
  
  	/* be sure we have enough space for the '.segno', if any */
! 	if(reln->rd_rel->relkind == RELKIND_INDEX)
! 	{
! 	  path = relidxpath(reln->rd_node);
! 	}
! 	else
! 	{
! 	  path = relpath(reln->rd_node);
! 	}
  
  	if (segno > 0)
  	{
***************
*** 1066,1074 ****
  #ifndef LET_OS_MANAGE_FILESIZE
  	BlockNumber	segno;
  #endif
  
  	path = relpath(rnode);
! 
  #ifndef LET_OS_MANAGE_FILESIZE
  	/* append the '.segno', if needed */
  	segno = blkno / ((BlockNumber) RELSEG_SIZE);
--- 1095,1109 ----
  #ifndef LET_OS_MANAGE_FILESIZE
  	BlockNumber	segno;
  #endif
+ 	struct stat buf;
  
  	path = relpath(rnode);
! 	if(stat(path,&buf) == -1)  /* lets try for an index... */
! 	{
! 	  pfree(path);
! 	  path = relidxpath(rnode);
! 	}
! 	    
  #ifndef LET_OS_MANAGE_FILESIZE
  	/* append the '.segno', if needed */
  	segno = blkno / ((BlockNumber) RELSEG_SIZE);
***************
*** 1085,1092 ****
  	/* call fd.c to allow other FDs to be closed if needed */
  	fd = BasicOpenFile(path, O_RDWR | PG_BINARY, 0600);
  	if (fd < 0)
  		elog(DEBUG, "_mdfd_blind_getseg: couldn't open %s: %m", path);
! 
  	pfree(path);
  
  	return fd;
--- 1120,1128 ----
  	/* call fd.c to allow other FDs to be closed if needed */
  	fd = BasicOpenFile(path, O_RDWR | PG_BINARY, 0600);
  	if (fd < 0)
+ 	{
  		elog(DEBUG, "_mdfd_blind_getseg: couldn't open %s: %m", path);
! 	}
  	pfree(path);
  
  	return fd;
diff -r -c --exclude *.o --exclude *.a --exclude *.so pgsql/src/backend/storage/smgr/smgr.c a/src/backend/storage/smgr/smgr.c
*** pgsql/src/backend/storage/smgr/smgr.c	Mon Jul  2 16:50:46 2001
--- a/src/backend/storage/smgr/smgr.c	Sun Sep  9 14:53:46 2001
***************
*** 171,177 ****
  int
  smgrcreate(int16 which, Relation reln)
  {
! 	int			fd;
  	PendingRelDelete *pending;
  
  	if ((fd = (*(smgrsw[which].smgr_create)) (reln)) < 0)
--- 171,177 ----
  int
  smgrcreate(int16 which, Relation reln)
  {
!         int			fd;
  	PendingRelDelete *pending;
  
  	if ((fd = (*(smgrsw[which].smgr_create)) (reln)) < 0)
***************
*** 267,273 ****
  		return -1;
  	if ((fd = (*(smgrsw[which].smgr_open)) (reln)) < 0)
  		if (!failOK)
! 			elog(ERROR, "cannot open %s: %m", RelationGetRelationName(reln));
  
  	return fd;
  }
--- 267,273 ----
  		return -1;
  	if ((fd = (*(smgrsw[which].smgr_open)) (reln)) < 0)
  		if (!failOK)
! 			elog(ERROR, "smgropen: cannot open %s: %m", RelationGetRelationName(reln));
  
  	return fd;
  }
diff -r -c --exclude *.o --exclude *.a --exclude *.so pgsql/src/backend/tcop/utility.c a/src/backend/tcop/utility.c
*** pgsql/src/backend/tcop/utility.c	Fri Sep  7 21:10:20 2001
--- a/src/backend/tcop/utility.c	Sun Sep  9 14:53:46 2001
***************
*** 620,626 ****
  				set_ps_display(commandTag = "CREATE DATABASE");
  
  				createdb(stmt->dbname, stmt->dbpath,
! 						 stmt->dbtemplate, stmt->encoding);
  			}
  			break;
  
--- 620,626 ----
  				set_ps_display(commandTag = "CREATE DATABASE");
  
  				createdb(stmt->dbname, stmt->dbpath,
! 						 stmt->dbtemplate, stmt->encoding,stmt->idxpath);
  			}
  			break;
  
diff -r -c --exclude *.o --exclude *.a --exclude *.so pgsql/src/include/catalog/catalog.h a/src/include/catalog/catalog.h
*** pgsql/src/include/catalog/catalog.h	Wed May 30 16:52:34 2001
--- a/src/include/catalog/catalog.h	Sun Sep  9 14:53:49 2001
***************
*** 19,25 ****
--- 19,27 ----
  #include "storage/relfilenode.h"
  
  extern char *relpath(RelFileNode rnode);
+ extern char *relidxpath(RelFileNode rnode);
  extern char *GetDatabasePath(Oid tblNode);
+ extern char *GetIndexPath(Oid tblNode);
  
  extern bool IsSystemRelationName(const char *relname);
  extern bool IsSharedSystemRelationName(const char *relname);
diff -r -c --exclude *.o --exclude *.a --exclude *.so pgsql/src/include/catalog/pg_attribute.h a/src/include/catalog/pg_attribute.h
*** pgsql/src/include/catalog/pg_attribute.h	Sun Aug 26 12:56:00 2001
--- a/src/include/catalog/pg_attribute.h	Sun Sep  9 15:05:23 2001
***************
*** 280,285 ****
--- 280,286 ----
  DATA(insert ( 1262 datfrozenxid		28 0  4   8 0 -1 -1 t p f i f f));
  /* do not mark datpath as toastable; GetRawDatabaseInfo won't cope */
  DATA(insert ( 1262 datpath			25 0 -1   9 0 -1 -1 f p f i f f));
+ DATA(insert ( 1262 idxpath			25 0 -1  10 0 -1 -1 f p f i f f));
  DATA(insert ( 1262 ctid				27 0  6  -1 0 -1 -1 f p f i f f));
  DATA(insert ( 1262 oid				26 0  4  -2 0 -1 -1 t p f i f f));
  DATA(insert ( 1262 xmin				28 0  4  -3 0 -1 -1 t p f i f f));
diff -r -c --exclude *.o --exclude *.a --exclude *.so pgsql/src/include/catalog/pg_class.h a/src/include/catalog/pg_class.h
*** pgsql/src/include/catalog/pg_class.h	Sun Aug 26 12:56:01 2001
--- a/src/include/catalog/pg_class.h	Sun Sep  9 15:04:48 2001
***************
*** 142,148 ****
  DESCR("");
  DATA(insert OID = 1261 (  pg_group		87	PGUID 0 1261 0 0 0 0 f t r 3  0 0 0 0 0 f f f f _null_ ));
  DESCR("");
! DATA(insert OID = 1262 (  pg_database	88	PGUID 0 1262 0 0 0 0 f t r 9  0 0 0 0 0 t f f f _null_ ));
  DESCR("");
  DATA(insert OID = 376  (  pg_xactlock	0	PGUID 0    0 0 0 0 0 f t s 1  0 0 0 0 0 f f f f _null_ ));
  DESCR("");
--- 142,148 ----
  DESCR("");
  DATA(insert OID = 1261 (  pg_group		87	PGUID 0 1261 0 0 0 0 f t r 3  0 0 0 0 0 f f f f _null_ ));
  DESCR("");
! DATA(insert OID = 1262 (  pg_database	88	PGUID 0 1262 0 0 0 0 f t r 10  0 0 0 0 0 t f f f _null_ ));
  DESCR("");
  DATA(insert OID = 376  (  pg_xactlock	0	PGUID 0    0 0 0 0 0 f t s 1  0 0 0 0 0 f f f f _null_ ));
  DESCR("");
diff -r -c --exclude *.o --exclude *.a --exclude *.so pgsql/src/include/catalog/pg_database.h a/src/include/catalog/pg_database.h
*** pgsql/src/include/catalog/pg_database.h	Sun Aug 26 12:56:02 2001
--- a/src/include/catalog/pg_database.h	Sun Sep  9 16:37:43 2001
***************
*** 42,47 ****
--- 42,48 ----
  	TransactionId datvacuumxid;	/* all XIDs before this are vacuumed */
  	TransactionId datfrozenxid;	/* all XIDs before this are frozen */
  	text		datpath;		/* VARIABLE LENGTH FIELD */
+ 	text		idxpath;		/* VARIABLE LENGTH FIELD */
  } FormData_pg_database;
  
  /* ----------------
***************
*** 55,61 ****
   *		compiler constants for pg_database
   * ----------------
   */
! #define Natts_pg_database				9
  #define Anum_pg_database_datname		1
  #define Anum_pg_database_datdba			2
  #define Anum_pg_database_encoding		3
--- 56,62 ----
   *		compiler constants for pg_database
   * ----------------
   */
! #define Natts_pg_database				10
  #define Anum_pg_database_datname		1
  #define Anum_pg_database_datdba			2
  #define Anum_pg_database_encoding		3
***************
*** 65,72 ****
  #define Anum_pg_database_datvacuumxid	7
  #define Anum_pg_database_datfrozenxid	8
  #define Anum_pg_database_datpath		9
  
! DATA(insert OID = 1 (  template1 PGUID ENCODING t t 0 0 0 "" ));
  DESCR("Default template database");
  
  #define TemplateDbOid			1
--- 66,74 ----
  #define Anum_pg_database_datvacuumxid	7
  #define Anum_pg_database_datfrozenxid	8
  #define Anum_pg_database_datpath		9
+ #define Anum_pg_database_idxpath		10
  
! DATA(insert OID = 1 (  template1 PGUID ENCODING t t 0 0 0 "" ""));
  DESCR("Default template database");
  
  #define TemplateDbOid			1
***************
*** 80,82 ****
--- 82,88 ----
  #undef DATAMARKOID
  
  #endif	 /* PG_DATABASE_H */
+ 
+ 
+ 
+ 
diff -r -c --exclude *.o --exclude *.a --exclude *.so pgsql/src/include/commands/dbcommands.h a/src/include/commands/dbcommands.h
*** pgsql/src/include/commands/dbcommands.h	Wed Mar 21 23:00:42 2001
--- a/src/include/commands/dbcommands.h	Sun Sep  9 14:53:49 2001
***************
*** 15,21 ****
  #define DBCOMMANDS_H
  
  extern void createdb(const char *dbname, const char *dbpath,
! 		 const char *dbtemplate, int encoding);
  extern void dropdb(const char *dbname);
  
  #endif	 /* DBCOMMANDS_H */
--- 15,21 ----
  #define DBCOMMANDS_H
  
  extern void createdb(const char *dbname, const char *dbpath,
! 		 const char *dbtemplate, int encoding,const char *idxpath);
  extern void dropdb(const char *dbname);
  
  #endif	 /* DBCOMMANDS_H */
diff -r -c --exclude *.o --exclude *.a --exclude *.so pgsql/src/include/nodes/parsenodes.h a/src/include/nodes/parsenodes.h
*** pgsql/src/include/nodes/parsenodes.h	Sun Aug 26 12:56:02 2001
--- a/src/include/nodes/parsenodes.h	Sun Sep  9 14:53:49 2001
***************
*** 656,661 ****
--- 656,662 ----
  	NodeTag		type;
  	char	   *dbname;			/* name of database to create */
  	char	   *dbpath;			/* location of database (NULL = default) */
+         char       *idxpath;                    /* locaiton of database INDEXES (NULL = default) */
  	char	   *dbtemplate;		/* template to use (NULL = default) */
  	int			encoding;		/* MULTIBYTE encoding (-1 = use default) */
  } CreatedbStmt;
diff -r -c --exclude *.o --exclude *.a --exclude *.so pgsql/src/interfaces/ecpg/preproc/keywords.c a/src/interfaces/ecpg/preproc/keywords.c
*** pgsql/src/interfaces/ecpg/preproc/keywords.c	Thu Aug 16 16:38:55 2001
--- a/src/interfaces/ecpg/preproc/keywords.c	Sun Sep  9 14:53:49 2001
***************
*** 131,136 ****
--- 131,137 ----
  	{"in", IN},
  	{"increment", INCREMENT},
  	{"index", INDEX},
+ 	{"index_location", INDEX_LOCATION},
  	{"inherits", INHERITS},
  	{"initially", INITIALLY},
  	{"inner", INNER_P},
diff -r -c --exclude *.o --exclude *.a --exclude *.so pgsql/src/interfaces/ecpg/preproc/preproc.y a/src/interfaces/ecpg/preproc/preproc.y
*** pgsql/src/interfaces/ecpg/preproc/preproc.y	Sun Aug 19 05:21:44 2001
--- a/src/interfaces/ecpg/preproc/preproc.y	Sun Sep  9 15:02:09 2001
***************
*** 219,225 ****
  		COMMENT, COPY, CREATEDB, CREATEUSER, CYCLE, DATABASE,
  		DELIMITERS, DO, EACH, ENCODING, EXCLUSIVE, EXPLAIN, EXTEND,
  		FORCE, FORWARD, FUNCTION, HANDLER, INCREMENT,
! 		INDEX, INHERITS, INSTEAD, ISNULL, LANCOMPILER, LIMIT,
  		LISTEN, UNLISTEN, LOAD, LOCATION, LOCK_P, MAXVALUE,
  		MINVALUE, MODE, MOVE, NEW, NOCREATEDB, NOCREATEUSER,
  		NONE, NOTHING, NOTIFY, NOTNULL, OFFSET, OIDS,
--- 219,225 ----
  		COMMENT, COPY, CREATEDB, CREATEUSER, CYCLE, DATABASE,
  		DELIMITERS, DO, EACH, ENCODING, EXCLUSIVE, EXPLAIN, EXTEND,
  		FORCE, FORWARD, FUNCTION, HANDLER, INCREMENT,
! 		INDEX, INDEX_LOCATION, INHERITS, INSTEAD, ISNULL, LANCOMPILER, LIMIT,
  		LISTEN, UNLISTEN, LOAD, LOCATION, LOCK_P, MAXVALUE,
  		MINVALUE, MODE, MOVE, NEW, NOCREATEDB, NOCREATEUSER,
  		NONE, NOTHING, NOTIFY, NOTNULL, OFFSET, OIDS,
***************
*** 2231,2236 ****
--- 2231,2238 ----
  
  createdb_opt_item:  LOCATION '=' StringConst	{ $$ = cat2_str(make_str("location ="), $3); }
  		| LOCATION '=' DEFAULT		{ $$ = make_str("location = default"); }
+                 | INDEX_LOCATION '=' StringConst	{ $$ = cat2_str(make_str("index_location ="), $3); }
+ 		| INDEX_LOCATION '=' DEFAULT		{ $$ = make_str("index_location = default"); }
  		| TEMPLATE '=' name  		{ $$ = cat2_str(make_str("template ="), $3); }
  		| TEMPLATE '=' DEFAULT		{ $$ = make_str("template = default"); }
  		| ENCODING '=' PosIntStringConst  
***************
*** 5022,5027 ****
--- 5024,5030 ----
  	| IMMEDIATE			{ $$ = make_str("immediate"); } 
  	| INCREMENT			{ $$ = make_str("increment"); }
  	| INDEX				{ $$ = make_str("index"); }
+ 	| INDEX_LOCATION      	        { $$ = make_str("index_location"); }
  	| INHERITS			{ $$ = make_str("inherits"); }
  	| INSENSITIVE			{ $$ = make_str("insensitive"); }
  	| INSERT			{ $$ = make_str("insert"); }

Bruce Momjian

pgman@candle.pha.pa.us

over 24 years ago

In reply to: Jim Buttafuoco (#1)

Re: Index location patch for review

Hi all,

Attached is a patch that adds support for specifying a location for
indexes via the "create database" command.

I believe this patch is complete, but it is my first .

This patch allows index locations to be specified as different from data
locations. Is this a feature direction we want to go in? Comments?

-- 
  Bruce Momjian                        |  http://candle.pha.pa.us
  pgman@candle.pha.pa.us               |  (610) 853-3000
  +  If your life is a hard drive,     |  830 Blythe Avenue
  +  Christ can be your backup.        |  Drexel Hill, Pennsylvania 19026

Thomas Lockhart

lockhart@fourpalms.org

over 24 years ago

In reply to: Bruce Momjian (#2)

Re: Index location patch for review

Attached is a patch that adds support for specifying a location for
indexes via the "create database" command.

This patch allows index locations to be specified as different from data
locations. Is this a feature direction we want to go in? Comments?

I have not looked at the patch, either implementation or proposed
syntax, but in general we certainly want to head in a direction where we
have full control over placement of storage resources.

- Thomas

Darren King

DarrenK@Routescape.com

over 24 years ago

In reply to: Thomas Lockhart (#3)

Re: Index location patch for review

Attached is a patch that adds support for specifying a location for
indexes via the "create database" command.

I believe this patch is complete, but it is my first .

This patch allows index locations to be specified as
different from data locations. Is this a feature direction
we want to go in? Comments?

Having the table and index on separate drives can do wonders for i/o
performance. :)

darrenk

Import Notes

Resolved by subject fallback

Stefan Rindeskar

sr@globecom.net

over 24 years ago

In reply to: Bruce Momjian (#2)

Re: Index location patch for review

I am very new to this mailinglist so I apologize if I start talking early but
I've been working as a sysadmin and that kind of problems for a long while
now and my suggestion is that it is a start but I think that we should aim a
little higher than this and use something more like the Oracle approach
instead. Where they introduce an abstraction layer in the form of a
tablespace. And this tablespace is then referenced from the create table or
create index instead.
eg:
table -> tablespace -> path to physical storage
index -> tablespace -> path to physical storage

Advantages:
Changes can be done to storage whithout need to change create scripts for db,
tables and so on.
Designers can specify in which tablespace tables/indexes should reside based
on usage.
Sysadmins can work with tablespaces and change paths without changing
anything in the database/table/index definitions.

The alternative is symlinks to distribute the load and that is not a pretty
sight dba-wise.

Hope you can bare with me on this, since I think it is an very important
issue.
I'm unfortunately not a fast coder yet (but I'm getting faster :-) ). But I
could start writing a spec if someone is interrested.

Bruce Momjian wrote:

Show quoted text

Hi all,

Attached is a patch that adds support for specifying a location for
indexes via the "create database" command.

I believe this patch is complete, but it is my first .

This patch allows index locations to be specified as different from data
locations. Is this a feature direction we want to go in? Comments?
--
Bruce Momjian                        |  http://candle.pha.pa.us
pgman@candle.pha.pa.us               |  (610) 853-3000
+  If your life is a hard drive,     |  830 Blythe Avenue
+  Christ can be your backup.        |  Drexel Hill, Pennsylvania 19026
---------------------------(end of broadcast)---------------------------
TIP 6: Have you searched our list archives?

http://www.postgresql.org/search.mpl

Mikheev, Vadim

vmikheev@SECTORBASE.COM

over 24 years ago

In reply to: Stefan Rindeskar (#5)

Re: Index location patch for review

Attached is a patch that adds support for specifying a
location for indexes via the "create database" command.

I believe this patch is complete, but it is my first .

This patch allows index locations to be specified as
different from data locations. Is this a feature direction
we want to go in? Comments?

The more general and "standard" way to go are TABLESPACEs.
But probably proposed feature will be compatible with
tablespaces, when we'll got them: we could use new "create
database" syntax to specify default tablespace for indices.
Unfortunately I removed message with patch, can you send it
to me, Bruce?

Vadim

Import Notes

Resolved by subject fallback

Tom Lane

tgl@sss.pgh.pa.us

over 24 years ago

In reply to: Mikheev, Vadim (#6)

Re: Index location patch for review

"Mikheev, Vadim" <vmikheev@SECTORBASE.COM> writes:

The more general and "standard" way to go are TABLESPACEs.
But probably proposed feature will be compatible with
tablespaces, when we'll got them:

Will it be? I'm afraid of creating a backwards-compatibility
problem for ourselves when it comes time to implement tablespaces.

At the very least I'd like to see some information demonstrating
how much benefit there is to this proposed patch, before we
consider whether to adopt it. If there's a significant performance
benefit to splitting a PG database along the table-vs-index divide,
then it's interesting as a short-term improvement ... but Jim didn't
even make that assertion, let alone provide evidence to back it up.

regards, tom lane

Jim Buttafuoco

jim@buttafuoco.net

over 24 years ago

In reply to: Tom Lane (#7)

Re: Index location patch for review

I agree that groups of objects in separate data storage areas are needed
and that is what I am trying to get to. Don't you think that Postgresql
with locations/files is the same as Oracle tablespaces. I don't think
we want to invent our own filesystem (which is what a tablespace really
is...).

Jim

Show quoted text

Attached is a patch that adds support for specifying a
location for indexes via the "create database" command.

I believe this patch is complete, but it is my first .

This patch allows index locations to be specified as
different from data locations. Is this a feature direction
we want to go in? Comments?

The more general and "standard" way to go are TABLESPACEs.
But probably proposed feature will be compatible with
tablespaces, when we'll got them: we could use new "create
database" syntax to specify default tablespace for indices.
Unfortunately I removed message with patch, can you send it
to me, Bruce?

Vadim

Import Notes

Resolved by subject fallback

Bruce Momjian

pgman@candle.pha.pa.us

over 24 years ago

In reply to: Tom Lane (#7)

Re: Index location patch for review

"Mikheev, Vadim" <vmikheev@SECTORBASE.COM> writes:

The more general and "standard" way to go are TABLESPACEs.
But probably proposed feature will be compatible with
tablespaces, when we'll got them:

Will it be? I'm afraid of creating a backwards-compatibility
problem for ourselves when it comes time to implement tablespaces.

At the very least I'd like to see some information demonstrating
how much benefit there is to this proposed patch, before we
consider whether to adopt it. If there's a significant performance
benefit to splitting a PG database along the table-vs-index divide,
then it's interesting as a short-term improvement ... but Jim didn't
even make that assertion, let alone provide evidence to back it up.

If that is your only concern, I can tell you for sure that if the
locations are on different drives, there will be a performance benefit.
It is standard database practice to put indexes on different drives than
data. In fact, sometimes you want to put two tables that are frequently
joined on separate drives.

-- 
  Bruce Momjian                        |  http://candle.pha.pa.us
  pgman@candle.pha.pa.us               |  (610) 853-3000
  +  If your life is a hard drive,     |  830 Blythe Avenue
  +  Christ can be your backup.        |  Drexel Hill, Pennsylvania 19026

#10

Thomas Lockhart

lockhart@fourpalms.org

over 24 years ago

In reply to: Mikheev, Vadim (#6)

Re: Index location patch for review

...

At the very least I'd like to see some information demonstrating
how much benefit there is to this proposed patch, before we
consider whether to adopt it. If there's a significant performance
benefit to splitting a PG database along the table-vs-index divide,
then it's interesting as a short-term improvement ... but Jim didn't
even make that assertion, let alone provide evidence to back it up.

Clearly there can be a *storage management* benefit to having control
over what gets put where, so this does not need to be justified strictly
on a performance basis.

For features like this, we will feel free to evolve them or
revolutionize them with further development, so I'm not worried about
the backward compatibility issue for cases like this.

Comments?

- Thomas

#11

Jim Buttafuoco

jim@buttafuoco.net

over 24 years ago

In reply to: Thomas Lockhart (#10)

Re: Index location patch for review

just change the work tablespace below to location and that is exactly
what this patch is trying to do. You can think of the LOCATION and
INDEX_LOCATION provided to the create database command as the default
storage locations for these objects. In the future, I want to enable
the DBA to specify LOCATIONS any object just like Oracle. I am also
planning on a pg_locations table and "create location" command which
will do what the current initlocation script does and more.

Jim

I am very new to this mailinglist so I apologize if I start talking

early but

I've been working as a sysadmin and that kind of problems for a long

while

now and my suggestion is that it is a start but I think that we should

aim a

little higher than this and use something more like the Oracle

approach

instead. Where they introduce an abstraction layer in the form of a
tablespace. And this tablespace is then referenced from the create

table or

create index instead.
eg:
table -> tablespace -> path to physical storage
index -> tablespace -> path to physical storage

Advantages:
Changes can be done to storage whithout need to change create scripts

for db,

tables and so on.
Designers can specify in which tablespace tables/indexes should reside

based

on usage.
Sysadmins can work with tablespaces and change paths without changing
anything in the database/table/index definitions.

The alternative is symlinks to distribute the load and that is not a

pretty

sight dba-wise.

Hope you can bare with me on this, since I think it is an very

important

issue.
I'm unfortunately not a fast coder yet (but I'm getting faster :-) ).

But I

could start writing a spec if someone is interrested.

Bruce Momjian wrote:

Hi all,

Attached is a patch that adds support for specifying a location

for

indexes via the "create database" command.

I believe this patch is complete, but it is my first .

This patch allows index locations to be specified as different from

data

locations. Is this a feature direction we want to go in? Comments?

--
Bruce Momjian                        |  http://candle.pha.pa.us
pgman@candle.pha.pa.us               |  (610) 853-3000
+  If your life is a hard drive,     |  830 Blythe Avenue
+  Christ can be your backup.        |  Drexel Hill, Pennsylvania

19026

---------------------------(end of

broadcast)---------------------------

TIP 6: Have you searched our list archives?

http://www.postgresql.org/search.mpl

---------------------------(end of

broadcast)---------------------------

TIP 2: you can get off all lists at once with the unregister command
(send "unregister YourEmailAddressHere" to

majordomo@postgresql.org)

Show quoted text

Import Notes

Resolved by subject fallback

#12

Bruce Momjian

pgman@candle.pha.pa.us

over 24 years ago

In reply to: Thomas Lockhart (#10)

Re: Index location patch for review

...

At the very least I'd like to see some information demonstrating
how much benefit there is to this proposed patch, before we
consider whether to adopt it. If there's a significant performance
benefit to splitting a PG database along the table-vs-index divide,
then it's interesting as a short-term improvement ... but Jim didn't
even make that assertion, let alone provide evidence to back it up.

Clearly there can be a *storage management* benefit to having control
over what gets put where, so this does not need to be justified strictly
on a performance basis.

For features like this, we will feel free to evolve them or
revolutionize them with further development, so I'm not worried about
the backward compatibility issue for cases like this.

Comments?

Agreed.

-- 
  Bruce Momjian                        |  http://candle.pha.pa.us
  pgman@candle.pha.pa.us               |  (610) 853-3000
  +  If your life is a hard drive,     |  830 Blythe Avenue
  +  Christ can be your backup.        |  Drexel Hill, Pennsylvania 19026

#13

Mikheev, Vadim

vmikheev@SECTORBASE.COM

over 24 years ago

In reply to: Bruce Momjian (#12)

Re: Index location patch for review

The more general and "standard" way to go are TABLESPACEs.
But probably proposed feature will be compatible with
tablespaces, when we'll got them:

Will it be? I'm afraid of creating a backwards-compatibility
problem for ourselves when it comes time to implement tablespaces.

As I said, INDEX_LOCATION in CREATE DATABASE could mean location
of default tablespace for indices in future and one will be able
to override tablespace for particular index with TABLESPACE
clause in CREATE INDEX command.

At the very least I'd like to see some information demonstrating
how much benefit there is to this proposed patch, before we
consider whether to adopt it. If there's a significant performance
benefit to splitting a PG database along the table-vs-index divide,
then it's interesting as a short-term improvement ... but Jim didn't
even make that assertion, let alone provide evidence to back it up.

Agreed. He mentioned significant performance difference but it would
be great to see results of pgbench tests with scaling factor of >= 10.
Jim?

Also, after reviewing patch I have to say that it will NOT work
with WAL. Jim, please do not name index' dir as "<TBL_NODE>_index".
Instead, just use different TBL_NODE for indices (different number).
It's not good to put if(reln->rd_rel->relkind == RELKIND_INDEX)
stuff into storage manager - only two numbers (tblnode & relnode)
must be used to identify file, no any other logical information
totally unrelated to storage issues.

Vadim

Import Notes

Resolved by subject fallback

#14

Jim Buttafuoco

jim@buttafuoco.net

over 24 years ago

In reply to: Mikheev, Vadim (#13)

Re: Index location patch for review

Vadim,

I don't understand the WAL issue below, can you explain. The dir name
is the same name as the database with _index added to it. This is how
the current datpath stuff works. I really just copied the datpath code
to get this patch to work...

Also I have been running this patch (both 7.1.3 and 7.2devel) against
some of my companies applications. I have loaded a small database 10G
data and 15G indexes both with and without the patch. There seems to be
between 5% and 10% performance gain doing most common db commands
(selects, selects with joins and inserts). The system is a DUAL P3 733
with 3 IDE disks. One for PGDATA, second for APPDATA and third for
APPIDX. As you can see I have seperated WAL files, GLOBAL, Application
data and application indexes over 3 disks. Our production systems have
around 50k queries/day ( not including data loads), so I believe that
when this patch get put into production, with 20 disks and 10 database
the performance increase should go up.

I should also add, that I have been working on the second part of this
patch, which will allow tables and indexes to be put into LOCATIONS
also. I am going planning on having a PG_LOCATIONS table and
CREATE|DROP|ALTER location SQL command instead of the initlocation shell
script we currently have. The only thing stopping me now is 7.2 testing
I am planning on doing once the beta begins and problems adding a
location column to the pg_class table with the necessary support code in
heap.c...

Thanks for all the comments (keep them comming)

Jim

The more general and "standard" way to go are TABLESPACEs.
But probably proposed feature will be compatible with
tablespaces, when we'll got them:

Will it be? I'm afraid of creating a backwards-compatibility
problem for ourselves when it comes time to implement tablespaces.

As I said, INDEX_LOCATION in CREATE DATABASE could mean location
of default tablespace for indices in future and one will be able
to override tablespace for particular index with TABLESPACE
clause in CREATE INDEX command.

At the very least I'd like to see some information demonstrating
how much benefit there is to this proposed patch, before we
consider whether to adopt it. If there's a significant performance
benefit to splitting a PG database along the table-vs-index divide,
then it's interesting as a short-term improvement ... but Jim didn't
even make that assertion, let alone provide evidence to back it up.

Agreed. He mentioned significant performance difference but it would
be great to see results of pgbench tests with scaling factor of >= 10.
Jim?

Also, after reviewing patch I have to say that it will NOT work
with WAL. Jim, please do not name index' dir as "<TBL_NODE>_index".
Instead, just use different TBL_NODE for indices (different number).
It's not good to put if(reln->rd_rel->relkind == RELKIND_INDEX)
stuff into storage manager - only two numbers (tblnode & relnode)
must be used to identify file, no any other logical information
totally unrelated to storage issues.

Vadim

---------------------------(end of

broadcast)---------------------------

Show quoted text

TIP 5: Have you checked our extensive FAQ?

http://www.postgresql.org/users-lounge/docs/faq.html

Import Notes

Resolved by subject fallback

#15

Mikheev, Vadim

vmikheev@SECTORBASE.COM

over 24 years ago

In reply to: Jim Buttafuoco (#14)

Re: Index location patch for review

I don't understand the WAL issue below, can you explain. The dir name
is the same name as the database with _index added to it. This is how
the current datpath stuff works. I really just copied the datpath
code to get this patch to work...

At the time of after crash recovery WAL is not able to read relation
description from catalog and so only relfilenode is provided for
storage manager in relation structure (look backend/access/transam/
xlogutils.c:XLogOpenRelation). Well, we could add Index/Table
file type identifier to RmgrData (rmgr.c in the same dir) to set
relkind in relation structure, but I don't see any reason to
do so when we can just use different tblnode number for indices and
name index dirs just like other dirs under 'base' named - ie
only tblnode number is used for dir names, without any additions
unrelated to storage issues.

Vadim

Import Notes

Resolved by subject fallback

#16

Mikheev, Vadim

vmikheev@SECTORBASE.COM

over 24 years ago

In reply to: Mikheev, Vadim (#15)

Re: Index location patch for review

Also I have been running this patch (both 7.1.3 and 7.2devel) against
some of my companies applications. I have loaded a small database 10G

We are not familiar with your applications. It would be better to see
results of test suit available to the community. pgbench is first to
come in mind. Such tests would be more valuable.

Vadim

Import Notes

Resolved by subject fallback

#17

Jim Buttafuoco

jim@buttafuoco.net

over 24 years ago

In reply to: Mikheev, Vadim (#16)

Re: Index location patch for review

I could also symlink all index files back to the tblnode directory?

I don't understand the WAL issue below, can you explain. The dir

name

is the same name as the database with _index added to it. This is

how

the current datpath stuff works. I really just copied the datpath
code to get this patch to work...

At the time of after crash recovery WAL is not able to read relation
description from catalog and so only relfilenode is provided for
storage manager in relation structure (look backend/access/transam/
xlogutils.c:XLogOpenRelation). Well, we could add Index/Table
file type identifier to RmgrData (rmgr.c in the same dir) to set
relkind in relation structure, but I don't see any reason to
do so when we can just use different tblnode number for indices and
name index dirs just like other dirs under 'base' named - ie
only tblnode number is used for dir names, without any additions
unrelated to storage issues.

Vadim

---------------------------(end of

broadcast)---------------------------

TIP 1: subscribe and unsubscribe commands go to

majordomo@postgresql.org

Show quoted text

Import Notes

Resolved by subject fallback

#18

Jim Buttafuoco

jim@buttafuoco.net

over 24 years ago

In reply to: Jim Buttafuoco (#17)

Re: Index location patch for review

Here is my pgbench results. As you can see the I am getting 2X tps with
the 2 directories. I believe this is a BIG win for Postgresql if we can
figure out the WAL recovery issues.

Can someone other than me apply the patch and verify the pgbench
results.

My hardward setup is a dual processor P3/733 running Redhat 7.1 with 512
megs of memory. The postgresql.conf file is the installed version with
NO changes.

Jim

template1=# create database one_dir with location='PGDATA1';
template1=# create database two_dir with location='PGDATA1'
index_location='PGIDX1';
for X in 1 2 3 4 5 6 7 8 9 10
do
pgbench -i -s 10 one_dir >>one_dir.log
pgbench -i -s 10 two_dir >>two_dir.log
done

bash-2.04$ grep 'excluding' one_dir.log
tps = 44.319306(excluding connections establishing)
tps = 34.641020(excluding connections establishing)
tps = 50.516889(excluding connections establishing)
tps = 52.747039(excluding connections establishing)
tps = 16.203821(excluding connections establishing)
tps = 36.902861(excluding connections establishing)
tps = 52.511769(excluding connections establishing)
tps = 53.479882(excluding connections establishing)
tps = 54.599429(excluding connections establishing)
tps = 36.780419(excluding connections establishing)
tps = 48.048279(excluding connections establishing)

bash-2.04$ grep 'excluding' two_dir.log
tps = 58.739049(excluding connections establishing)
tps = 100.259270(excluding connections establishing)
tps = 103.156166(excluding connections establishing)
tps = 110.829358(excluding connections establishing)
tps = 111.929690(excluding connections establishing)
tps = 106.840118(excluding connections establishing)
tps = 101.563159(excluding connections establishing)
tps = 102.877060(excluding connections establishing)
tps = 103.784717(excluding connections establishing)
tps = 53.056309(excluding connections establishing)
tps = 73.842428(excluding connections establishing)

Also I have been running this patch (both 7.1.3 and 7.2devel)

against

some of my companies applications. I have loaded a small database

10G

We are not familiar with your applications. It would be better to see
results of test suit available to the community. pgbench is first to
come in mind. Such tests would be more valuable.

Vadim

---------------------------(end of

broadcast)---------------------------

Show quoted text

TIP 4: Don't 'kill -9' the postmaster

Import Notes

Resolved by subject fallback