Index location patch for review
Hi all,
Attached is a patch that adds support for specifying a location for
indexes via the "create database" command.
I believe this patch is complete, but it is my first .
Thanks
Jim
Attachments:
location.diffsapplication/octet-stream; name=location.diffsDownload
diff -r -c --exclude *.o --exclude *.a --exclude *.so pgsql/doc/src/sgml/ref/create_database.sgml a/doc/src/sgml/ref/create_database.sgml
*** pgsql/doc/src/sgml/ref/create_database.sgml Mon Sep 3 08:57:49 2001
--- a/doc/src/sgml/ref/create_database.sgml Tue Sep 11 14:04:35 2001
***************
*** 25,30 ****
--- 25,31 ----
<synopsis>
CREATE DATABASE <replaceable class="PARAMETER">name</replaceable>
[ WITH [ LOCATION = '<replaceable class="parameter">dbpath</replaceable>' ]
+ [ INDEX_LOCATION = '<replaceable class="parameter">idxpath</replaceable>' ]
[ TEMPLATE = <replaceable class="parameter">template</replaceable> ]
[ ENCODING = <replaceable class="parameter">encoding</replaceable> ] ]
</synopsis>
***************
*** 52,57 ****
--- 53,69 ----
<listitem>
<para>
An alternate filesystem location in which to store the new database,
+ specified as a string literal;
+ or <literal>DEFAULT</literal> to use the default location.
+ </para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <varlistentry>
+ <term><replaceable class="parameter">idxpath</replaceable></term>
+ <listitem>
+ <para>
+ An alternate filesystem location in which to store the new database indexes,
specified as a string literal;
or <literal>DEFAULT</literal> to use the default location.
</para>
diff -r -c --exclude *.o --exclude *.a --exclude *.so pgsql/src/backend/catalog/catalog.c a/src/backend/catalog/catalog.c
*** pgsql/src/backend/catalog/catalog.c Fri Aug 10 14:57:33 2001
--- a/src/backend/catalog/catalog.c Sun Sep 9 16:18:26 2001
***************
*** 46,51 ****
--- 46,77 ----
}
return path;
}
+ /*
+ * relidxpath - construct path to a relation's file
+ *
+ * Result is a palloc'd string.
+ */
+
+ char *
+ relidxpath(RelFileNode rnode)
+ {
+ char *path;
+
+ if (rnode.tblNode == (Oid) 0) /* "global tablespace" */
+ {
+ /* Shared system relations live in {datadir}/global */
+ path = (char *) palloc(strlen(DataDir) + 8 + sizeof(NameData) + 1);
+ sprintf(path, "%s/global/%u", DataDir, rnode.relNode);
+ }
+ else
+ {
+ path = (char *) palloc(strlen(DataDir) + 6 + 2 * sizeof(NameData) + 3);
+ sprintf(path, "%s/base/%u_index/%u", DataDir,
+ rnode.tblNode,rnode.relNode);
+ }
+ return path;
+ }
+
/*
* GetDatabasePath - construct path to a database dir
***************
*** 70,75 ****
--- 96,121 ----
sprintf(path, "%s/base/%u", DataDir, tblNode);
}
return path;
+ }
+
+
+ char *
+ GetIndexPath(Oid tblNode)
+ {
+ char *path;
+
+ if (tblNode == (Oid) 0) /* "global tablespace" */
+ {
+ /* Shared system relations live in {datadir}/global */
+ path = (char *) palloc(strlen(DataDir) + 8);
+ sprintf(path, "%s/global", DataDir);
+ }
+ else
+ {
+ path = (char *) palloc(strlen(DataDir) + 6 + sizeof(NameData) + 1);
+ sprintf(path, "%s/base/%u_index", DataDir, tblNode);
+ }
+ return path;
}
diff -r -c --exclude *.o --exclude *.a --exclude *.so pgsql/src/backend/commands/dbcommands.c a/src/backend/commands/dbcommands.c
*** pgsql/src/backend/commands/dbcommands.c Thu Sep 6 00:57:28 2001
--- a/src/backend/commands/dbcommands.c Mon Sep 10 16:24:08 2001
***************
*** 45,53 ****
static bool get_db_info(const char *name, Oid *dbIdP, int4 *ownerIdP,
int *encodingP, bool *dbIsTemplateP, Oid *dbLastSysOidP,
TransactionId *dbVacuumXidP, TransactionId *dbFrozenXidP,
! char *dbpath);
static bool get_user_info(Oid use_sysid, bool *use_super, bool *use_createdb);
static char *resolve_alt_dbpath(const char *dbpath, Oid dboid);
static bool remove_dbdirs(const char *real_loc, const char *altloc);
/*
--- 45,54 ----
static bool get_db_info(const char *name, Oid *dbIdP, int4 *ownerIdP,
int *encodingP, bool *dbIsTemplateP, Oid *dbLastSysOidP,
TransactionId *dbVacuumXidP, TransactionId *dbFrozenXidP,
! char *dbpath,char *idxpath);
static bool get_user_info(Oid use_sysid, bool *use_super, bool *use_createdb);
static char *resolve_alt_dbpath(const char *dbpath, Oid dboid);
+ static char *resolve_alt_idxpath(const char *dbpath, Oid dboid);
static bool remove_dbdirs(const char *real_loc, const char *altloc);
/*
***************
*** 56,67 ****
void
createdb(const char *dbname, const char *dbpath,
! const char *dbtemplate, int encoding)
{
char *nominal_loc;
char *alt_loc;
char *target_dir;
char src_loc[MAXPGPATH];
char buf[2 * MAXPGPATH + 100];
bool use_super,
use_createdb;
--- 57,72 ----
void
createdb(const char *dbname, const char *dbpath,
! const char *dbtemplate, int encoding,const char *idxpath)
{
char *nominal_loc;
+ char *nominal_idx_loc;
char *alt_loc;
+ char *alt_idx_loc;
char *target_dir;
+ char *target_idx_dir;
char src_loc[MAXPGPATH];
+ char src_idxloc[MAXPGPATH];
char buf[2 * MAXPGPATH + 100];
bool use_super,
use_createdb;
***************
*** 73,78 ****
--- 78,84 ----
TransactionId src_vacuumxid;
TransactionId src_frozenxid;
char src_dbpath[MAXPGPATH];
+ char src_idxpath[MAXPGPATH];
Relation pg_database_rel;
HeapTuple tuple;
TupleDesc pg_database_dsc;
***************
*** 80,85 ****
--- 86,92 ----
char new_record_nulls[Natts_pg_database];
Oid dboid;
+
if (!get_user_info(GetUserId(), &use_super, &use_createdb))
elog(ERROR, "current user name is invalid");
***************
*** 98,104 ****
* idea, so accept possibility of race to create. We will check again
* after we grab the exclusive lock.
*/
! if (get_db_info(dbname, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL))
elog(ERROR, "CREATE DATABASE: database \"%s\" already exists", dbname);
/*
--- 105,111 ----
* idea, so accept possibility of race to create. We will check again
* after we grab the exclusive lock.
*/
! if (get_db_info(dbname, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,NULL))
elog(ERROR, "CREATE DATABASE: database \"%s\" already exists", dbname);
/*
***************
*** 110,116 ****
if (!get_db_info(dbtemplate, &src_dboid, &src_owner, &src_encoding,
&src_istemplate, &src_lastsysoid,
&src_vacuumxid, &src_frozenxid,
! src_dbpath))
elog(ERROR, "CREATE DATABASE: template \"%s\" does not exist",
dbtemplate);
--- 117,123 ----
if (!get_db_info(dbtemplate, &src_dboid, &src_owner, &src_encoding,
&src_istemplate, &src_lastsysoid,
&src_vacuumxid, &src_frozenxid,
! src_dbpath,src_idxpath))
elog(ERROR, "CREATE DATABASE: template \"%s\" does not exist",
dbtemplate);
***************
*** 126,138 ****
}
/*
! * Determine physical path of source database
*/
alt_loc = resolve_alt_dbpath(src_dbpath, src_dboid);
if (!alt_loc)
alt_loc = GetDatabasePath(src_dboid);
strcpy(src_loc, alt_loc);
/*
* The source DB can't have any active backends, except this one
* (exception is to allow CREATE DB while connected to template1).
--- 133,150 ----
}
/*
! * Determine physical path of source database and indexes
*/
alt_loc = resolve_alt_dbpath(src_dbpath, src_dboid);
if (!alt_loc)
alt_loc = GetDatabasePath(src_dboid);
strcpy(src_loc, alt_loc);
+ alt_loc = resolve_alt_dbpath(src_idxpath, src_dboid);
+ if (!alt_loc)
+ alt_loc = GetIndexPath(src_dboid);
+ strcpy(src_idxloc, alt_loc);
+
/*
* The source DB can't have any active backends, except this one
* (exception is to allow CREATE DB while connected to template1).
***************
*** 165,170 ****
--- 177,185 ----
* specified.
*/
nominal_loc = GetDatabasePath(dboid);
+ nominal_idx_loc = GetIndexPath(dboid);
+
+
alt_loc = resolve_alt_dbpath(dbpath, dboid);
if (strchr(nominal_loc, '\''))
***************
*** 207,215 ****
--- 222,252 ----
nominal_loc, alt_loc);
}
+ /* create symlink for default index space
+ if idxpath is null then just symlink
+ datpath to datpath_index
+ */
+
+ alt_idx_loc = resolve_alt_idxpath(idxpath, dboid);
+ target_idx_dir = alt_idx_loc ? alt_idx_loc : nominal_idx_loc;
+
+ if (mkdir(target_idx_dir, S_IRWXU) != 0)
+ elog(ERROR, "CREATE DATABASE: unable to create index directory '%s': %m",
+ target_dir);
+
+ if(alt_idx_loc)
+ {
+ if (symlink(alt_idx_loc, nominal_idx_loc) != 0)
+ elog(ERROR, "CREATE DATABASE: could not link '%s' to '%s': %m",
+ nominal_loc, alt_loc);
+
+ }
+
/* Copy the template database to the new location */
snprintf(buf, sizeof(buf), "cp -r '%s' '%s'", src_loc, target_dir);
+ elog(NOTICE, buf);
+
if (system(buf) != 0)
{
if (remove_dbdirs(nominal_loc, alt_loc))
***************
*** 218,230 ****
elog(ERROR, "CREATE DATABASE: could not initialize database directory; delete failed as well");
}
/*
* Now OK to grab exclusive lock on pg_database.
*/
pg_database_rel = heap_openr(DatabaseRelationName, AccessExclusiveLock);
/* Check to see if someone else created same DB name meanwhile. */
! if (get_db_info(dbname, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL))
{
/* Don't hold lock while doing recursive remove */
heap_close(pg_database_rel, AccessExclusiveLock);
--- 255,280 ----
elog(ERROR, "CREATE DATABASE: could not initialize database directory; delete failed as well");
}
+ /* Copy the template database index files to the new location */
+ snprintf(buf, sizeof(buf), "cp -r '%s'/* '%s'", src_idxloc, target_idx_dir);
+
+ if (system(buf) != 0)
+ {
+ if (remove_dbdirs(nominal_idx_loc, alt_idx_loc))
+ elog(ERROR, "CREATE DATABASE: could not initialize index directory");
+ else
+ elog(ERROR, "CREATE DATABASE: could not initialize index directory; delete failed as well");
+ if (! remove_dbdirs(nominal_loc, alt_loc))
+ elog(ERROR, "CREATE DATABASE: could not initialize index directory; data dir delete failed as well");
+ }
+
/*
* Now OK to grab exclusive lock on pg_database.
*/
pg_database_rel = heap_openr(DatabaseRelationName, AccessExclusiveLock);
/* Check to see if someone else created same DB name meanwhile. */
! if (get_db_info(dbname, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,NULL))
{
/* Don't hold lock while doing recursive remove */
heap_close(pg_database_rel, AccessExclusiveLock);
***************
*** 250,255 ****
--- 300,307 ----
/* no nulls here, GetRawDatabaseInfo doesn't like them */
new_record[Anum_pg_database_datpath - 1] =
DirectFunctionCall1(textin, CStringGetDatum(dbpath ? dbpath : ""));
+ new_record[Anum_pg_database_idxpath - 1] =
+ DirectFunctionCall1(textin, CStringGetDatum(idxpath ? idxpath : ""));
memset(new_record_nulls, ' ', sizeof(new_record_nulls));
***************
*** 298,304 ****
--- 350,359 ----
Oid db_id;
char *alt_loc;
char *nominal_loc;
+ char *alt_idx_loc;
+ char *nominal_idx_loc;
char dbpath[MAXPGPATH];
+ char idxpath[MAXPGPATH];
Relation pgdbrel;
HeapScanDesc pgdbscan;
ScanKeyData key;
***************
*** 327,333 ****
pgdbrel = heap_openr(DatabaseRelationName, AccessExclusiveLock);
if (!get_db_info(dbname, &db_id, &db_owner, NULL,
! &db_istemplate, NULL, NULL, NULL, dbpath))
elog(ERROR, "DROP DATABASE: database \"%s\" does not exist", dbname);
if (!use_super && GetUserId() != db_owner)
--- 382,388 ----
pgdbrel = heap_openr(DatabaseRelationName, AccessExclusiveLock);
if (!get_db_info(dbname, &db_id, &db_owner, NULL,
! &db_istemplate, NULL, NULL, NULL, dbpath,idxpath))
elog(ERROR, "DROP DATABASE: database \"%s\" does not exist", dbname);
if (!use_super && GetUserId() != db_owner)
***************
*** 344,349 ****
--- 399,407 ----
nominal_loc = GetDatabasePath(db_id);
alt_loc = resolve_alt_dbpath(dbpath, db_id);
+ nominal_idx_loc = GetIndexPath(db_id);
+ alt_idx_loc = resolve_alt_idxpath(idxpath, db_id);
+
/*
* Check for active backends in the target database.
*/
***************
*** 399,404 ****
--- 457,463 ----
* Remove the database's subdirectory and everything in it.
*/
remove_dbdirs(nominal_loc, alt_loc);
+ remove_dbdirs(nominal_idx_loc, alt_idx_loc);
/*
* Force dirty buffers out to disk, so that newly-connecting backends
***************
*** 419,425 ****
get_db_info(const char *name, Oid *dbIdP, int4 *ownerIdP,
int *encodingP, bool *dbIsTemplateP, Oid *dbLastSysOidP,
TransactionId *dbVacuumXidP, TransactionId *dbFrozenXidP,
! char *dbpath)
{
Relation relation;
ScanKeyData scanKey;
--- 478,484 ----
get_db_info(const char *name, Oid *dbIdP, int4 *ownerIdP,
int *encodingP, bool *dbIsTemplateP, Oid *dbLastSysOidP,
TransactionId *dbVacuumXidP, TransactionId *dbFrozenXidP,
! char *dbpath,char *idxpath)
{
Relation relation;
ScanKeyData scanKey;
***************
*** 484,489 ****
--- 543,567 ----
else
strcpy(dbpath, "");
}
+
+ /* idx path (as registered in pg_database) */
+ if (idxpath)
+ {
+ tmptext = DatumGetTextP(heap_getattr(tuple,
+ Anum_pg_database_idxpath,
+ RelationGetDescr(relation),
+ &isnull));
+ if (!isnull)
+ {
+ Assert(VARSIZE(tmptext) - VARHDRSZ < MAXPGPATH);
+
+ strncpy(idxpath, VARDATA(tmptext), VARSIZE(tmptext) - VARHDRSZ);
+ *(idxpath + VARSIZE(tmptext) - VARHDRSZ) = '\0';
+ }
+ else
+ strcpy(idxpath, "");
+ }
+
}
heap_endscan(scan);
***************
*** 553,591 ****
return ret;
}
static bool
remove_dbdirs(const char *nominal_loc, const char *alt_loc)
{
- const char *target_dir;
char buf[MAXPGPATH + 100];
bool success = true;
- target_dir = alt_loc ? alt_loc : nominal_loc;
-
/*
* Close virtual file descriptors so the kernel has more available for
* the system() call below.
*/
closeAllVfds();
! if (alt_loc)
{
- /* remove symlink */
- if (unlink(nominal_loc) != 0)
- {
- elog(NOTICE, "could not remove '%s': %m", nominal_loc);
- success = false;
- }
- }
! snprintf(buf, sizeof(buf), "rm -rf '%s'", target_dir);
! if (system(buf) != 0)
{
! elog(NOTICE, "database directory '%s' could not be removed",
! target_dir);
! success = false;
}
return success;
--- 631,711 ----
return ret;
}
+ static char *
+ resolve_alt_idxpath(const char *dbpath, Oid dboid)
+ {
+ const char *prefix;
+ char *ret;
+ size_t len;
+
+ if (dbpath == NULL || dbpath[0] == '\0')
+ return NULL;
+
+ if (strchr(dbpath, '/'))
+ {
+ if (dbpath[0] != '/')
+ elog(ERROR, "Relative paths are not allowed as database locations");
+ #ifndef ALLOW_ABSOLUTE_DBPATHS
+ elog(ERROR, "Absolute paths are not allowed as database locations");
+ #endif
+ prefix = dbpath;
+ }
+ else
+ {
+ /* must be environment variable */
+ char *var = getenv(dbpath);
+
+ if (!var)
+ elog(ERROR, "Postmaster environment variable '%s' not set", dbpath);
+ if (var[0] != '/')
+ elog(ERROR, "Postmaster environment variable '%s' must be absolute path", dbpath);
+ prefix = var;
+ }
+
+ len = strlen(prefix) + 6 + sizeof(Oid) * 8 + 1;
+ ret = palloc(len);
+ snprintf(ret, len, "%s/base/%u_index", prefix, dboid);
+
+ return ret;
+ }
+
static bool
remove_dbdirs(const char *nominal_loc, const char *alt_loc)
{
char buf[MAXPGPATH + 100];
bool success = true;
/*
* Close virtual file descriptors so the kernel has more available for
* the system() call below.
*/
closeAllVfds();
! if(alt_loc)
{
! snprintf(buf, sizeof(buf), "rm -rf '%s'", alt_loc);
!
!
! if (system(buf) != 0)
! {
! elog(NOTICE, "database directory '%s' could not be removed",
! alt_loc);
! success = false;
! }
! }
! if(nominal_loc)
{
! snprintf(buf, sizeof(buf), "rm -rf '%s'", nominal_loc);
!
! if (system(buf) != 0)
! {
! elog(NOTICE, "database directory '%s' could not be removed",
! nominal_loc);
! success = false;
! }
}
return success;
diff -r -c --exclude *.o --exclude *.a --exclude *.so pgsql/src/backend/nodes/copyfuncs.c a/src/backend/nodes/copyfuncs.c
*** pgsql/src/backend/nodes/copyfuncs.c Sun Aug 26 12:55:59 2001
--- a/src/backend/nodes/copyfuncs.c Sun Sep 9 14:53:36 2001
***************
*** 2220,2225 ****
--- 2220,2227 ----
newnode->dbname = pstrdup(from->dbname);
if (from->dbpath)
newnode->dbpath = pstrdup(from->dbpath);
+ if (from->idxpath)
+ newnode->idxpath = pstrdup(from->idxpath);
if (from->dbtemplate)
newnode->dbtemplate = pstrdup(from->dbtemplate);
newnode->encoding = from->encoding;
diff -r -c --exclude *.o --exclude *.a --exclude *.so pgsql/src/backend/parser/gram.y a/src/backend/parser/gram.y
*** pgsql/src/backend/parser/gram.y Thu Sep 6 00:57:28 2001
--- a/src/backend/parser/gram.y Sun Sep 9 15:01:23 2001
***************
*** 347,353 ****
DATABASE, DELIMITERS, DO,
EACH, ENCODING, EXCLUSIVE, EXPLAIN,
FORCE, FORWARD, FREEZE, FUNCTION, HANDLER,
! ILIKE, INCREMENT, INDEX, INHERITS, INSTEAD, ISNULL,
LANCOMPILER, LIMIT, LISTEN, LOAD, LOCATION, LOCK_P,
MAXVALUE, MINVALUE, MODE, MOVE,
NEW, NOCREATEDB, NOCREATEUSER, NONE, NOTHING, NOTIFY, NOTNULL,
--- 347,353 ----
DATABASE, DELIMITERS, DO,
EACH, ENCODING, EXCLUSIVE, EXPLAIN,
FORCE, FORWARD, FREEZE, FUNCTION, HANDLER,
! ILIKE, INCREMENT, INDEX, INDEX_LOCATION, INHERITS, INSTEAD, ISNULL,
LANCOMPILER, LIMIT, LISTEN, LOAD, LOCATION, LOCK_P,
MAXVALUE, MINVALUE, MODE, MOVE,
NEW, NOCREATEDB, NOCREATEUSER, NONE, NOTHING, NOTIFY, NOTNULL,
***************
*** 2969,2974 ****
--- 2969,2977 ----
case 3:
n->encoding = lfirsti(lnext(optitem));
break;
+ case 4:
+ n->idxpath = (char *) lsecond(optitem);
+ break;
}
}
$$ = (Node *)n;
***************
*** 2978,2983 ****
--- 2981,2987 ----
CreatedbStmt *n = makeNode(CreatedbStmt);
n->dbname = $3;
n->dbpath = NULL;
+ n->idxpath = NULL;
n->dbtemplate = NULL;
n->encoding = -1;
$$ = (Node *)n;
***************
*** 3002,3007 ****
--- 3006,3019 ----
{
$$ = lconsi(1, makeList1(NULL));
}
+ | INDEX_LOCATION '=' Sconst
+ {
+ $$ = lconsi(4, makeList1($3));
+ }
+ | INDEX_LOCATION '=' DEFAULT
+ {
+ $$ = lconsi(4, makeList1(NULL));
+ }
| TEMPLATE '=' name
{
$$ = lconsi(2, makeList1($3));
***************
*** 5637,5642 ****
--- 5649,5655 ----
| IMMEDIATE { $$ = "immediate"; }
| INCREMENT { $$ = "increment"; }
| INDEX { $$ = "index"; }
+ | INDEX_LOCATION { $$ = "index_location"; }
| INHERITS { $$ = "inherits"; }
| INSENSITIVE { $$ = "insensitive"; }
| INSERT { $$ = "insert"; }
diff -r -c --exclude *.o --exclude *.a --exclude *.so pgsql/src/backend/parser/keywords.c a/src/backend/parser/keywords.c
*** pgsql/src/backend/parser/keywords.c Sun Aug 26 12:56:00 2001
--- a/src/backend/parser/keywords.c Sun Sep 9 14:53:42 2001
***************
*** 133,138 ****
--- 133,139 ----
{"in", IN},
{"increment", INCREMENT},
{"index", INDEX},
+ {"index_location", INDEX_LOCATION},
{"inherits", INHERITS},
{"initially", INITIALLY},
{"inner", INNER_P},
diff -r -c --exclude *.o --exclude *.a --exclude *.so pgsql/src/backend/storage/smgr/md.c a/src/backend/storage/smgr/md.c
*** pgsql/src/backend/storage/smgr/md.c Fri Aug 24 10:07:49 2001
--- a/src/backend/storage/smgr/md.c Sun Sep 9 14:53:46 2001
***************
*** 18,23 ****
--- 18,24 ----
#include <unistd.h>
#include <fcntl.h>
#include <sys/file.h>
+ #include <sys/stat.h>
#include "catalog/catalog.h"
#include "miscadmin.h"
***************
*** 126,134 ****
int fd,
vfd;
Assert(reln->rd_fd < 0);
! path = relpath(reln->rd_node);
fd = FileNameOpenFile(path, O_RDWR | O_CREAT | O_EXCL | PG_BINARY, 0600);
--- 127,143 ----
int fd,
vfd;
+
Assert(reln->rd_fd < 0);
! if(reln->rd_rel->relkind == RELKIND_INDEX)
! {
! path = relidxpath(reln->rd_node);
! }
! else
! {
! path = relpath(reln->rd_node);
! }
fd = FileNameOpenFile(path, O_RDWR | O_CREAT | O_EXCL | PG_BINARY, 0600);
***************
*** 179,186 ****
--- 188,201 ----
int status = SM_SUCCESS;
int save_errno = 0;
char *path;
+ struct stat buf;
path = relpath(rnode);
+ if(stat(path,&buf) == -1) /* lets try for an index... */
+ {
+ pfree(path);
+ path = relidxpath(rnode);
+ }
/* Delete the first segment, or only segment if not doing segmenting */
if (unlink(path) < 0)
***************
*** 303,309 ****
Assert(reln->rd_fd < 0);
! path = relpath(reln->rd_node);
fd = FileNameOpenFile(path, O_RDWR | PG_BINARY, 0600);
--- 318,331 ----
Assert(reln->rd_fd < 0);
! if(reln->rd_rel->relkind == RELKIND_INDEX)
! {
! path = relidxpath(reln->rd_node);
! }
! else
! {
! path = relpath(reln->rd_node);
! }
fd = FileNameOpenFile(path, O_RDWR | PG_BINARY, 0600);
***************
*** 941,947 ****
*fullpath;
/* be sure we have enough space for the '.segno', if any */
! path = relpath(reln->rd_node);
if (segno > 0)
{
--- 963,976 ----
*fullpath;
/* be sure we have enough space for the '.segno', if any */
! if(reln->rd_rel->relkind == RELKIND_INDEX)
! {
! path = relidxpath(reln->rd_node);
! }
! else
! {
! path = relpath(reln->rd_node);
! }
if (segno > 0)
{
***************
*** 1066,1074 ****
#ifndef LET_OS_MANAGE_FILESIZE
BlockNumber segno;
#endif
path = relpath(rnode);
!
#ifndef LET_OS_MANAGE_FILESIZE
/* append the '.segno', if needed */
segno = blkno / ((BlockNumber) RELSEG_SIZE);
--- 1095,1109 ----
#ifndef LET_OS_MANAGE_FILESIZE
BlockNumber segno;
#endif
+ struct stat buf;
path = relpath(rnode);
! if(stat(path,&buf) == -1) /* lets try for an index... */
! {
! pfree(path);
! path = relidxpath(rnode);
! }
!
#ifndef LET_OS_MANAGE_FILESIZE
/* append the '.segno', if needed */
segno = blkno / ((BlockNumber) RELSEG_SIZE);
***************
*** 1085,1092 ****
/* call fd.c to allow other FDs to be closed if needed */
fd = BasicOpenFile(path, O_RDWR | PG_BINARY, 0600);
if (fd < 0)
elog(DEBUG, "_mdfd_blind_getseg: couldn't open %s: %m", path);
!
pfree(path);
return fd;
--- 1120,1128 ----
/* call fd.c to allow other FDs to be closed if needed */
fd = BasicOpenFile(path, O_RDWR | PG_BINARY, 0600);
if (fd < 0)
+ {
elog(DEBUG, "_mdfd_blind_getseg: couldn't open %s: %m", path);
! }
pfree(path);
return fd;
diff -r -c --exclude *.o --exclude *.a --exclude *.so pgsql/src/backend/storage/smgr/smgr.c a/src/backend/storage/smgr/smgr.c
*** pgsql/src/backend/storage/smgr/smgr.c Mon Jul 2 16:50:46 2001
--- a/src/backend/storage/smgr/smgr.c Sun Sep 9 14:53:46 2001
***************
*** 171,177 ****
int
smgrcreate(int16 which, Relation reln)
{
! int fd;
PendingRelDelete *pending;
if ((fd = (*(smgrsw[which].smgr_create)) (reln)) < 0)
--- 171,177 ----
int
smgrcreate(int16 which, Relation reln)
{
! int fd;
PendingRelDelete *pending;
if ((fd = (*(smgrsw[which].smgr_create)) (reln)) < 0)
***************
*** 267,273 ****
return -1;
if ((fd = (*(smgrsw[which].smgr_open)) (reln)) < 0)
if (!failOK)
! elog(ERROR, "cannot open %s: %m", RelationGetRelationName(reln));
return fd;
}
--- 267,273 ----
return -1;
if ((fd = (*(smgrsw[which].smgr_open)) (reln)) < 0)
if (!failOK)
! elog(ERROR, "smgropen: cannot open %s: %m", RelationGetRelationName(reln));
return fd;
}
diff -r -c --exclude *.o --exclude *.a --exclude *.so pgsql/src/backend/tcop/utility.c a/src/backend/tcop/utility.c
*** pgsql/src/backend/tcop/utility.c Fri Sep 7 21:10:20 2001
--- a/src/backend/tcop/utility.c Sun Sep 9 14:53:46 2001
***************
*** 620,626 ****
set_ps_display(commandTag = "CREATE DATABASE");
createdb(stmt->dbname, stmt->dbpath,
! stmt->dbtemplate, stmt->encoding);
}
break;
--- 620,626 ----
set_ps_display(commandTag = "CREATE DATABASE");
createdb(stmt->dbname, stmt->dbpath,
! stmt->dbtemplate, stmt->encoding,stmt->idxpath);
}
break;
diff -r -c --exclude *.o --exclude *.a --exclude *.so pgsql/src/include/catalog/catalog.h a/src/include/catalog/catalog.h
*** pgsql/src/include/catalog/catalog.h Wed May 30 16:52:34 2001
--- a/src/include/catalog/catalog.h Sun Sep 9 14:53:49 2001
***************
*** 19,25 ****
--- 19,27 ----
#include "storage/relfilenode.h"
extern char *relpath(RelFileNode rnode);
+ extern char *relidxpath(RelFileNode rnode);
extern char *GetDatabasePath(Oid tblNode);
+ extern char *GetIndexPath(Oid tblNode);
extern bool IsSystemRelationName(const char *relname);
extern bool IsSharedSystemRelationName(const char *relname);
diff -r -c --exclude *.o --exclude *.a --exclude *.so pgsql/src/include/catalog/pg_attribute.h a/src/include/catalog/pg_attribute.h
*** pgsql/src/include/catalog/pg_attribute.h Sun Aug 26 12:56:00 2001
--- a/src/include/catalog/pg_attribute.h Sun Sep 9 15:05:23 2001
***************
*** 280,285 ****
--- 280,286 ----
DATA(insert ( 1262 datfrozenxid 28 0 4 8 0 -1 -1 t p f i f f));
/* do not mark datpath as toastable; GetRawDatabaseInfo won't cope */
DATA(insert ( 1262 datpath 25 0 -1 9 0 -1 -1 f p f i f f));
+ DATA(insert ( 1262 idxpath 25 0 -1 10 0 -1 -1 f p f i f f));
DATA(insert ( 1262 ctid 27 0 6 -1 0 -1 -1 f p f i f f));
DATA(insert ( 1262 oid 26 0 4 -2 0 -1 -1 t p f i f f));
DATA(insert ( 1262 xmin 28 0 4 -3 0 -1 -1 t p f i f f));
diff -r -c --exclude *.o --exclude *.a --exclude *.so pgsql/src/include/catalog/pg_class.h a/src/include/catalog/pg_class.h
*** pgsql/src/include/catalog/pg_class.h Sun Aug 26 12:56:01 2001
--- a/src/include/catalog/pg_class.h Sun Sep 9 15:04:48 2001
***************
*** 142,148 ****
DESCR("");
DATA(insert OID = 1261 ( pg_group 87 PGUID 0 1261 0 0 0 0 f t r 3 0 0 0 0 0 f f f f _null_ ));
DESCR("");
! DATA(insert OID = 1262 ( pg_database 88 PGUID 0 1262 0 0 0 0 f t r 9 0 0 0 0 0 t f f f _null_ ));
DESCR("");
DATA(insert OID = 376 ( pg_xactlock 0 PGUID 0 0 0 0 0 0 f t s 1 0 0 0 0 0 f f f f _null_ ));
DESCR("");
--- 142,148 ----
DESCR("");
DATA(insert OID = 1261 ( pg_group 87 PGUID 0 1261 0 0 0 0 f t r 3 0 0 0 0 0 f f f f _null_ ));
DESCR("");
! DATA(insert OID = 1262 ( pg_database 88 PGUID 0 1262 0 0 0 0 f t r 10 0 0 0 0 0 t f f f _null_ ));
DESCR("");
DATA(insert OID = 376 ( pg_xactlock 0 PGUID 0 0 0 0 0 0 f t s 1 0 0 0 0 0 f f f f _null_ ));
DESCR("");
diff -r -c --exclude *.o --exclude *.a --exclude *.so pgsql/src/include/catalog/pg_database.h a/src/include/catalog/pg_database.h
*** pgsql/src/include/catalog/pg_database.h Sun Aug 26 12:56:02 2001
--- a/src/include/catalog/pg_database.h Sun Sep 9 16:37:43 2001
***************
*** 42,47 ****
--- 42,48 ----
TransactionId datvacuumxid; /* all XIDs before this are vacuumed */
TransactionId datfrozenxid; /* all XIDs before this are frozen */
text datpath; /* VARIABLE LENGTH FIELD */
+ text idxpath; /* VARIABLE LENGTH FIELD */
} FormData_pg_database;
/* ----------------
***************
*** 55,61 ****
* compiler constants for pg_database
* ----------------
*/
! #define Natts_pg_database 9
#define Anum_pg_database_datname 1
#define Anum_pg_database_datdba 2
#define Anum_pg_database_encoding 3
--- 56,62 ----
* compiler constants for pg_database
* ----------------
*/
! #define Natts_pg_database 10
#define Anum_pg_database_datname 1
#define Anum_pg_database_datdba 2
#define Anum_pg_database_encoding 3
***************
*** 65,72 ****
#define Anum_pg_database_datvacuumxid 7
#define Anum_pg_database_datfrozenxid 8
#define Anum_pg_database_datpath 9
! DATA(insert OID = 1 ( template1 PGUID ENCODING t t 0 0 0 "" ));
DESCR("Default template database");
#define TemplateDbOid 1
--- 66,74 ----
#define Anum_pg_database_datvacuumxid 7
#define Anum_pg_database_datfrozenxid 8
#define Anum_pg_database_datpath 9
+ #define Anum_pg_database_idxpath 10
! DATA(insert OID = 1 ( template1 PGUID ENCODING t t 0 0 0 "" ""));
DESCR("Default template database");
#define TemplateDbOid 1
***************
*** 80,82 ****
--- 82,88 ----
#undef DATAMARKOID
#endif /* PG_DATABASE_H */
+
+
+
+
diff -r -c --exclude *.o --exclude *.a --exclude *.so pgsql/src/include/commands/dbcommands.h a/src/include/commands/dbcommands.h
*** pgsql/src/include/commands/dbcommands.h Wed Mar 21 23:00:42 2001
--- a/src/include/commands/dbcommands.h Sun Sep 9 14:53:49 2001
***************
*** 15,21 ****
#define DBCOMMANDS_H
extern void createdb(const char *dbname, const char *dbpath,
! const char *dbtemplate, int encoding);
extern void dropdb(const char *dbname);
#endif /* DBCOMMANDS_H */
--- 15,21 ----
#define DBCOMMANDS_H
extern void createdb(const char *dbname, const char *dbpath,
! const char *dbtemplate, int encoding,const char *idxpath);
extern void dropdb(const char *dbname);
#endif /* DBCOMMANDS_H */
diff -r -c --exclude *.o --exclude *.a --exclude *.so pgsql/src/include/nodes/parsenodes.h a/src/include/nodes/parsenodes.h
*** pgsql/src/include/nodes/parsenodes.h Sun Aug 26 12:56:02 2001
--- a/src/include/nodes/parsenodes.h Sun Sep 9 14:53:49 2001
***************
*** 656,661 ****
--- 656,662 ----
NodeTag type;
char *dbname; /* name of database to create */
char *dbpath; /* location of database (NULL = default) */
+ char *idxpath; /* locaiton of database INDEXES (NULL = default) */
char *dbtemplate; /* template to use (NULL = default) */
int encoding; /* MULTIBYTE encoding (-1 = use default) */
} CreatedbStmt;
diff -r -c --exclude *.o --exclude *.a --exclude *.so pgsql/src/interfaces/ecpg/preproc/keywords.c a/src/interfaces/ecpg/preproc/keywords.c
*** pgsql/src/interfaces/ecpg/preproc/keywords.c Thu Aug 16 16:38:55 2001
--- a/src/interfaces/ecpg/preproc/keywords.c Sun Sep 9 14:53:49 2001
***************
*** 131,136 ****
--- 131,137 ----
{"in", IN},
{"increment", INCREMENT},
{"index", INDEX},
+ {"index_location", INDEX_LOCATION},
{"inherits", INHERITS},
{"initially", INITIALLY},
{"inner", INNER_P},
diff -r -c --exclude *.o --exclude *.a --exclude *.so pgsql/src/interfaces/ecpg/preproc/preproc.y a/src/interfaces/ecpg/preproc/preproc.y
*** pgsql/src/interfaces/ecpg/preproc/preproc.y Sun Aug 19 05:21:44 2001
--- a/src/interfaces/ecpg/preproc/preproc.y Sun Sep 9 15:02:09 2001
***************
*** 219,225 ****
COMMENT, COPY, CREATEDB, CREATEUSER, CYCLE, DATABASE,
DELIMITERS, DO, EACH, ENCODING, EXCLUSIVE, EXPLAIN, EXTEND,
FORCE, FORWARD, FUNCTION, HANDLER, INCREMENT,
! INDEX, INHERITS, INSTEAD, ISNULL, LANCOMPILER, LIMIT,
LISTEN, UNLISTEN, LOAD, LOCATION, LOCK_P, MAXVALUE,
MINVALUE, MODE, MOVE, NEW, NOCREATEDB, NOCREATEUSER,
NONE, NOTHING, NOTIFY, NOTNULL, OFFSET, OIDS,
--- 219,225 ----
COMMENT, COPY, CREATEDB, CREATEUSER, CYCLE, DATABASE,
DELIMITERS, DO, EACH, ENCODING, EXCLUSIVE, EXPLAIN, EXTEND,
FORCE, FORWARD, FUNCTION, HANDLER, INCREMENT,
! INDEX, INDEX_LOCATION, INHERITS, INSTEAD, ISNULL, LANCOMPILER, LIMIT,
LISTEN, UNLISTEN, LOAD, LOCATION, LOCK_P, MAXVALUE,
MINVALUE, MODE, MOVE, NEW, NOCREATEDB, NOCREATEUSER,
NONE, NOTHING, NOTIFY, NOTNULL, OFFSET, OIDS,
***************
*** 2231,2236 ****
--- 2231,2238 ----
createdb_opt_item: LOCATION '=' StringConst { $$ = cat2_str(make_str("location ="), $3); }
| LOCATION '=' DEFAULT { $$ = make_str("location = default"); }
+ | INDEX_LOCATION '=' StringConst { $$ = cat2_str(make_str("index_location ="), $3); }
+ | INDEX_LOCATION '=' DEFAULT { $$ = make_str("index_location = default"); }
| TEMPLATE '=' name { $$ = cat2_str(make_str("template ="), $3); }
| TEMPLATE '=' DEFAULT { $$ = make_str("template = default"); }
| ENCODING '=' PosIntStringConst
***************
*** 5022,5027 ****
--- 5024,5030 ----
| IMMEDIATE { $$ = make_str("immediate"); }
| INCREMENT { $$ = make_str("increment"); }
| INDEX { $$ = make_str("index"); }
+ | INDEX_LOCATION { $$ = make_str("index_location"); }
| INHERITS { $$ = make_str("inherits"); }
| INSENSITIVE { $$ = make_str("insensitive"); }
| INSERT { $$ = make_str("insert"); }
Hi all,
Attached is a patch that adds support for specifying a location for
indexes via the "create database" command.I believe this patch is complete, but it is my first .
This patch allows index locations to be specified as different from data
locations. Is this a feature direction we want to go in? Comments?
--
Bruce Momjian | http://candle.pha.pa.us
pgman@candle.pha.pa.us | (610) 853-3000
+ If your life is a hard drive, | 830 Blythe Avenue
+ Christ can be your backup. | Drexel Hill, Pennsylvania 19026
Attached is a patch that adds support for specifying a location for
indexes via the "create database" command.This patch allows index locations to be specified as different from data
locations. Is this a feature direction we want to go in? Comments?
I have not looked at the patch, either implementation or proposed
syntax, but in general we certainly want to head in a direction where we
have full control over placement of storage resources.
- Thomas
Attached is a patch that adds support for specifying a location for
indexes via the "create database" command.I believe this patch is complete, but it is my first .
This patch allows index locations to be specified as
different from data locations. Is this a feature direction
we want to go in? Comments?
Having the table and index on separate drives can do wonders for i/o
performance. :)
darrenk
Import Notes
Resolved by subject fallback
I am very new to this mailinglist so I apologize if I start talking early but
I've been working as a sysadmin and that kind of problems for a long while
now and my suggestion is that it is a start but I think that we should aim a
little higher than this and use something more like the Oracle approach
instead. Where they introduce an abstraction layer in the form of a
tablespace. And this tablespace is then referenced from the create table or
create index instead.
eg:
table -> tablespace -> path to physical storage
index -> tablespace -> path to physical storage
Advantages:
Changes can be done to storage whithout need to change create scripts for db,
tables and so on.
Designers can specify in which tablespace tables/indexes should reside based
on usage.
Sysadmins can work with tablespaces and change paths without changing
anything in the database/table/index definitions.
The alternative is symlinks to distribute the load and that is not a pretty
sight dba-wise.
Hope you can bare with me on this, since I think it is an very important
issue.
I'm unfortunately not a fast coder yet (but I'm getting faster :-) ). But I
could start writing a spec if someone is interrested.
Bruce Momjian wrote:
Show quoted text
Hi all,
Attached is a patch that adds support for specifying a location for
indexes via the "create database" command.I believe this patch is complete, but it is my first .
This patch allows index locations to be specified as different from data
locations. Is this a feature direction we want to go in? Comments?-- Bruce Momjian | http://candle.pha.pa.us pgman@candle.pha.pa.us | (610) 853-3000 + If your life is a hard drive, | 830 Blythe Avenue + Christ can be your backup. | Drexel Hill, Pennsylvania 19026---------------------------(end of broadcast)---------------------------
TIP 6: Have you searched our list archives?
Attached is a patch that adds support for specifying a
location for indexes via the "create database" command.I believe this patch is complete, but it is my first .
This patch allows index locations to be specified as
different from data locations. Is this a feature direction
we want to go in? Comments?
The more general and "standard" way to go are TABLESPACEs.
But probably proposed feature will be compatible with
tablespaces, when we'll got them: we could use new "create
database" syntax to specify default tablespace for indices.
Unfortunately I removed message with patch, can you send it
to me, Bruce?
Vadim
Import Notes
Resolved by subject fallback
"Mikheev, Vadim" <vmikheev@SECTORBASE.COM> writes:
The more general and "standard" way to go are TABLESPACEs.
But probably proposed feature will be compatible with
tablespaces, when we'll got them:
Will it be? I'm afraid of creating a backwards-compatibility
problem for ourselves when it comes time to implement tablespaces.
At the very least I'd like to see some information demonstrating
how much benefit there is to this proposed patch, before we
consider whether to adopt it. If there's a significant performance
benefit to splitting a PG database along the table-vs-index divide,
then it's interesting as a short-term improvement ... but Jim didn't
even make that assertion, let alone provide evidence to back it up.
regards, tom lane
I agree that groups of objects in separate data storage areas are needed
and that is what I am trying to get to. Don't you think that Postgresql
with locations/files is the same as Oracle tablespaces. I don't think
we want to invent our own filesystem (which is what a tablespace really
is...).
Jim
Show quoted text
Attached is a patch that adds support for specifying a
location for indexes via the "create database" command.I believe this patch is complete, but it is my first .
This patch allows index locations to be specified as
different from data locations. Is this a feature direction
we want to go in? Comments?The more general and "standard" way to go are TABLESPACEs.
But probably proposed feature will be compatible with
tablespaces, when we'll got them: we could use new "create
database" syntax to specify default tablespace for indices.
Unfortunately I removed message with patch, can you send it
to me, Bruce?Vadim
Import Notes
Resolved by subject fallback
"Mikheev, Vadim" <vmikheev@SECTORBASE.COM> writes:
The more general and "standard" way to go are TABLESPACEs.
But probably proposed feature will be compatible with
tablespaces, when we'll got them:Will it be? I'm afraid of creating a backwards-compatibility
problem for ourselves when it comes time to implement tablespaces.At the very least I'd like to see some information demonstrating
how much benefit there is to this proposed patch, before we
consider whether to adopt it. If there's a significant performance
benefit to splitting a PG database along the table-vs-index divide,
then it's interesting as a short-term improvement ... but Jim didn't
even make that assertion, let alone provide evidence to back it up.
If that is your only concern, I can tell you for sure that if the
locations are on different drives, there will be a performance benefit.
It is standard database practice to put indexes on different drives than
data. In fact, sometimes you want to put two tables that are frequently
joined on separate drives.
--
Bruce Momjian | http://candle.pha.pa.us
pgman@candle.pha.pa.us | (610) 853-3000
+ If your life is a hard drive, | 830 Blythe Avenue
+ Christ can be your backup. | Drexel Hill, Pennsylvania 19026
...
At the very least I'd like to see some information demonstrating
how much benefit there is to this proposed patch, before we
consider whether to adopt it. If there's a significant performance
benefit to splitting a PG database along the table-vs-index divide,
then it's interesting as a short-term improvement ... but Jim didn't
even make that assertion, let alone provide evidence to back it up.
Clearly there can be a *storage management* benefit to having control
over what gets put where, so this does not need to be justified strictly
on a performance basis.
For features like this, we will feel free to evolve them or
revolutionize them with further development, so I'm not worried about
the backward compatibility issue for cases like this.
Comments?
- Thomas
just change the work tablespace below to location and that is exactly
what this patch is trying to do. You can think of the LOCATION and
INDEX_LOCATION provided to the create database command as the default
storage locations for these objects. In the future, I want to enable
the DBA to specify LOCATIONS any object just like Oracle. I am also
planning on a pg_locations table and "create location" command which
will do what the current initlocation script does and more.
Jim
I am very new to this mailinglist so I apologize if I start talking
early but
I've been working as a sysadmin and that kind of problems for a long
while
now and my suggestion is that it is a start but I think that we should
aim a
little higher than this and use something more like the Oracle
approach
instead. Where they introduce an abstraction layer in the form of a
tablespace. And this tablespace is then referenced from the create
table or
create index instead.
eg:
table -> tablespace -> path to physical storage
index -> tablespace -> path to physical storageAdvantages:
Changes can be done to storage whithout need to change create scripts
for db,
tables and so on.
Designers can specify in which tablespace tables/indexes should reside
based
on usage.
Sysadmins can work with tablespaces and change paths without changing
anything in the database/table/index definitions.The alternative is symlinks to distribute the load and that is not a
pretty
sight dba-wise.
Hope you can bare with me on this, since I think it is an very
important
issue.
I'm unfortunately not a fast coder yet (but I'm getting faster :-) ).
But I
could start writing a spec if someone is interrested.
Bruce Momjian wrote:
Hi all,
Attached is a patch that adds support for specifying a location
for
indexes via the "create database" command.
I believe this patch is complete, but it is my first .
This patch allows index locations to be specified as different from
data
locations. Is this a feature direction we want to go in? Comments?
-- Bruce Momjian | http://candle.pha.pa.us pgman@candle.pha.pa.us | (610) 853-3000 + If your life is a hard drive, | 830 Blythe Avenue + Christ can be your backup. | Drexel Hill, Pennsylvania
19026
---------------------------(end of
broadcast)---------------------------
TIP 6: Have you searched our list archives?
---------------------------(end of
broadcast)---------------------------
TIP 2: you can get off all lists at once with the unregister command
(send "unregister YourEmailAddressHere" to
majordomo@postgresql.org)
Show quoted text
Import Notes
Resolved by subject fallback
...
At the very least I'd like to see some information demonstrating
how much benefit there is to this proposed patch, before we
consider whether to adopt it. If there's a significant performance
benefit to splitting a PG database along the table-vs-index divide,
then it's interesting as a short-term improvement ... but Jim didn't
even make that assertion, let alone provide evidence to back it up.Clearly there can be a *storage management* benefit to having control
over what gets put where, so this does not need to be justified strictly
on a performance basis.For features like this, we will feel free to evolve them or
revolutionize them with further development, so I'm not worried about
the backward compatibility issue for cases like this.Comments?
Agreed.
--
Bruce Momjian | http://candle.pha.pa.us
pgman@candle.pha.pa.us | (610) 853-3000
+ If your life is a hard drive, | 830 Blythe Avenue
+ Christ can be your backup. | Drexel Hill, Pennsylvania 19026
The more general and "standard" way to go are TABLESPACEs.
But probably proposed feature will be compatible with
tablespaces, when we'll got them:Will it be? I'm afraid of creating a backwards-compatibility
problem for ourselves when it comes time to implement tablespaces.
As I said, INDEX_LOCATION in CREATE DATABASE could mean location
of default tablespace for indices in future and one will be able
to override tablespace for particular index with TABLESPACE
clause in CREATE INDEX command.
At the very least I'd like to see some information demonstrating
how much benefit there is to this proposed patch, before we
consider whether to adopt it. If there's a significant performance
benefit to splitting a PG database along the table-vs-index divide,
then it's interesting as a short-term improvement ... but Jim didn't
even make that assertion, let alone provide evidence to back it up.
Agreed. He mentioned significant performance difference but it would
be great to see results of pgbench tests with scaling factor of >= 10.
Jim?
Also, after reviewing patch I have to say that it will NOT work
with WAL. Jim, please do not name index' dir as "<TBL_NODE>_index".
Instead, just use different TBL_NODE for indices (different number).
It's not good to put if(reln->rd_rel->relkind == RELKIND_INDEX)
stuff into storage manager - only two numbers (tblnode & relnode)
must be used to identify file, no any other logical information
totally unrelated to storage issues.
Vadim
Import Notes
Resolved by subject fallback
Vadim,
I don't understand the WAL issue below, can you explain. The dir name
is the same name as the database with _index added to it. This is how
the current datpath stuff works. I really just copied the datpath code
to get this patch to work...
Also I have been running this patch (both 7.1.3 and 7.2devel) against
some of my companies applications. I have loaded a small database 10G
data and 15G indexes both with and without the patch. There seems to be
between 5% and 10% performance gain doing most common db commands
(selects, selects with joins and inserts). The system is a DUAL P3 733
with 3 IDE disks. One for PGDATA, second for APPDATA and third for
APPIDX. As you can see I have seperated WAL files, GLOBAL, Application
data and application indexes over 3 disks. Our production systems have
around 50k queries/day ( not including data loads), so I believe that
when this patch get put into production, with 20 disks and 10 database
the performance increase should go up.
I should also add, that I have been working on the second part of this
patch, which will allow tables and indexes to be put into LOCATIONS
also. I am going planning on having a PG_LOCATIONS table and
CREATE|DROP|ALTER location SQL command instead of the initlocation shell
script we currently have. The only thing stopping me now is 7.2 testing
I am planning on doing once the beta begins and problems adding a
location column to the pg_class table with the necessary support code in
heap.c...
Thanks for all the comments (keep them comming)
Jim
The more general and "standard" way to go are TABLESPACEs.
But probably proposed feature will be compatible with
tablespaces, when we'll got them:Will it be? I'm afraid of creating a backwards-compatibility
problem for ourselves when it comes time to implement tablespaces.As I said, INDEX_LOCATION in CREATE DATABASE could mean location
of default tablespace for indices in future and one will be able
to override tablespace for particular index with TABLESPACE
clause in CREATE INDEX command.At the very least I'd like to see some information demonstrating
how much benefit there is to this proposed patch, before we
consider whether to adopt it. If there's a significant performance
benefit to splitting a PG database along the table-vs-index divide,
then it's interesting as a short-term improvement ... but Jim didn't
even make that assertion, let alone provide evidence to back it up.Agreed. He mentioned significant performance difference but it would
be great to see results of pgbench tests with scaling factor of >= 10.
Jim?Also, after reviewing patch I have to say that it will NOT work
with WAL. Jim, please do not name index' dir as "<TBL_NODE>_index".
Instead, just use different TBL_NODE for indices (different number).
It's not good to put if(reln->rd_rel->relkind == RELKIND_INDEX)
stuff into storage manager - only two numbers (tblnode & relnode)
must be used to identify file, no any other logical information
totally unrelated to storage issues.Vadim
---------------------------(end of
broadcast)---------------------------
Show quoted text
TIP 5: Have you checked our extensive FAQ?
Import Notes
Resolved by subject fallback
I don't understand the WAL issue below, can you explain. The dir name
is the same name as the database with _index added to it. This is how
the current datpath stuff works. I really just copied the datpath
code to get this patch to work...
At the time of after crash recovery WAL is not able to read relation
description from catalog and so only relfilenode is provided for
storage manager in relation structure (look backend/access/transam/
xlogutils.c:XLogOpenRelation). Well, we could add Index/Table
file type identifier to RmgrData (rmgr.c in the same dir) to set
relkind in relation structure, but I don't see any reason to
do so when we can just use different tblnode number for indices and
name index dirs just like other dirs under 'base' named - ie
only tblnode number is used for dir names, without any additions
unrelated to storage issues.
Vadim
Import Notes
Resolved by subject fallback
Also I have been running this patch (both 7.1.3 and 7.2devel) against
some of my companies applications. I have loaded a small database 10G
We are not familiar with your applications. It would be better to see
results of test suit available to the community. pgbench is first to
come in mind. Such tests would be more valuable.
Vadim
Import Notes
Resolved by subject fallback
I could also symlink all index files back to the tblnode directory?
I don't understand the WAL issue below, can you explain. The dir
name
is the same name as the database with _index added to it. This is
how
the current datpath stuff works. I really just copied the datpath
code to get this patch to work...At the time of after crash recovery WAL is not able to read relation
description from catalog and so only relfilenode is provided for
storage manager in relation structure (look backend/access/transam/
xlogutils.c:XLogOpenRelation). Well, we could add Index/Table
file type identifier to RmgrData (rmgr.c in the same dir) to set
relkind in relation structure, but I don't see any reason to
do so when we can just use different tblnode number for indices and
name index dirs just like other dirs under 'base' named - ie
only tblnode number is used for dir names, without any additions
unrelated to storage issues.Vadim
---------------------------(end of
broadcast)---------------------------
TIP 1: subscribe and unsubscribe commands go to
majordomo@postgresql.org
Show quoted text
Import Notes
Resolved by subject fallback
Here is my pgbench results. As you can see the I am getting 2X tps with
the 2 directories. I believe this is a BIG win for Postgresql if we can
figure out the WAL recovery issues.
Can someone other than me apply the patch and verify the pgbench
results.
My hardward setup is a dual processor P3/733 running Redhat 7.1 with 512
megs of memory. The postgresql.conf file is the installed version with
NO changes.
Jim
template1=# create database one_dir with location='PGDATA1';
template1=# create database two_dir with location='PGDATA1'
index_location='PGIDX1';
for X in 1 2 3 4 5 6 7 8 9 10
do
pgbench -i -s 10 one_dir >>one_dir.log
pgbench -i -s 10 two_dir >>two_dir.log
done
bash-2.04$ grep 'excluding' one_dir.log
tps = 44.319306(excluding connections establishing)
tps = 34.641020(excluding connections establishing)
tps = 50.516889(excluding connections establishing)
tps = 52.747039(excluding connections establishing)
tps = 16.203821(excluding connections establishing)
tps = 36.902861(excluding connections establishing)
tps = 52.511769(excluding connections establishing)
tps = 53.479882(excluding connections establishing)
tps = 54.599429(excluding connections establishing)
tps = 36.780419(excluding connections establishing)
tps = 48.048279(excluding connections establishing)
bash-2.04$ grep 'excluding' two_dir.log
tps = 58.739049(excluding connections establishing)
tps = 100.259270(excluding connections establishing)
tps = 103.156166(excluding connections establishing)
tps = 110.829358(excluding connections establishing)
tps = 111.929690(excluding connections establishing)
tps = 106.840118(excluding connections establishing)
tps = 101.563159(excluding connections establishing)
tps = 102.877060(excluding connections establishing)
tps = 103.784717(excluding connections establishing)
tps = 53.056309(excluding connections establishing)
tps = 73.842428(excluding connections establishing)
Also I have been running this patch (both 7.1.3 and 7.2devel)
against
some of my companies applications. I have loaded a small database
10G
We are not familiar with your applications. It would be better to see
results of test suit available to the community. pgbench is first to
come in mind. Such tests would be more valuable.Vadim
---------------------------(end of
broadcast)---------------------------
Show quoted text
TIP 4: Don't 'kill -9' the postmaster
Import Notes
Resolved by subject fallback