rmgr hooks (v2)
Latest version of rmgr hooks patch for later review in current
commitfest.
(Minor update to CVS HEAD).
--
Simon Riggs www.2ndQuadrant.com
PostgreSQL Training, Services and Support
Attachments:
rmgr_hook_serverside.v2.patchtext/x-patch; charset=UTF-8; name=rmgr_hook_serverside.v2.patchDownload
Index: src/backend/access/transam/rmgr.c
===================================================================
RCS file: /home/sriggs/pg/REPOSITORY/pgsql/src/backend/access/transam/rmgr.c,v
retrieving revision 1.27
diff -c -r1.27 rmgr.c
*** src/backend/access/transam/rmgr.c 19 Nov 2008 10:34:50 -0000 1.27
--- src/backend/access/transam/rmgr.c 17 Dec 2008 19:05:51 -0000
***************
*** 7,12 ****
--- 7,13 ----
*/
#include "postgres.h"
+ #include "miscadmin.h"
#include "access/clog.h"
#include "access/gin.h"
#include "access/gist_private.h"
***************
*** 16,21 ****
--- 17,23 ----
#include "access/nbtree.h"
#include "access/xact.h"
#include "access/xlog_internal.h"
+ #include "nodes/bitmapset.h"
#include "catalog/storage.h"
#include "commands/dbcommands.h"
#include "commands/sequence.h"
***************
*** 23,29 ****
#include "storage/freespace.h"
! const RmgrData RmgrTable[RM_MAX_ID + 1] = {
{"XLOG", xlog_redo, xlog_desc, NULL, NULL, NULL},
{"Transaction", xact_redo, xact_desc, NULL, NULL, NULL},
{"Storage", smgr_redo, smgr_desc, NULL, NULL, NULL},
--- 25,31 ----
#include "storage/freespace.h"
! const RmgrData FixedRmgrTable[RM_MAX_FIXED_ID + 1] = {
{"XLOG", xlog_redo, xlog_desc, NULL, NULL, NULL},
{"Transaction", xact_redo, xact_desc, NULL, NULL, NULL},
{"Storage", smgr_redo, smgr_desc, NULL, NULL, NULL},
***************
*** 41,43 ****
--- 43,366 ----
{"Gist", gist_redo, gist_desc, gist_xlog_startup, gist_xlog_cleanup, gist_safe_restartpoint},
{"Sequence", seq_redo, seq_desc, NULL, NULL, NULL}
};
+
+ /* Main table of Resource Managers */
+ RmgrData *RmgrTable;
+
+ /* Hook for plugins to get control in RmgrInitialize() */
+ rmgr_hook_type rmgr_hook = NULL;
+
+ #define MAX_NUM_RMGRS 255
+ #define RMGR_BITS_PER_WORD 32
+ #define RMGR_BITMAP_WORDS ((MAX_NUM_RMGRS + 1) / RMGR_BITS_PER_WORD)
+ typedef struct RmgrCtlData
+ {
+ uint32 RmgrBitmap[RMGR_BITMAP_WORDS]; /* fixed size bitmapset */
+ } RmgrCtlData;
+
+ static RmgrCtlData *RmgrCtl = NULL;
+
+ /*
+ * Initialization of shared memory for Rmgrs
+ */
+ Size
+ RmgrShmemSize(void)
+ {
+ return sizeof(RmgrCtlData);
+ }
+
+ void
+ RmgrShmemInit(void)
+ {
+ bool foundRmgr;
+
+ RmgrCtl = (RmgrCtlData *)
+ ShmemInitStruct("Rmgr Ctl", RmgrShmemSize(), &foundRmgr);
+
+ if (foundRmgr)
+ return;
+
+ memset(RmgrCtl, 0, sizeof(RmgrCtlData));
+ }
+
+ /*
+ * RmgrInitialize()
+ *
+ * Create RmgrTable, populate RmgrTable with fixed Rmgrs, call plugin
+ * and then initialize shared list of current resource managers.
+ *
+ * RmgrInitialize must run before any other Rmgr function. Normally,
+ * it runs only in the Startup process (once), ensuring that the RmgrTable
+ * and its functions cannot be accessed by normal backends.
+ *
+ * If we have WAL_DEBUG set then the plugin will be called multiple
+ * times and so must be designed to return same answer every time.
+ */
+ void
+ RmgrInitialize(void)
+ {
+ int rmid;
+ int num_rmgrs = 0;
+
+ if (RmgrTable)
+ return;
+
+ /*
+ * Create local copy of RmgrTable. Memory is never freed.
+ */
+ RmgrTable = malloc(sizeof(RmgrData) * (MAX_NUM_RMGRS + 1));
+
+ for (rmid = 0; rmid <= MAX_NUM_RMGRS; rmid++)
+ {
+ if (rmid <= RM_MAX_FIXED_ID)
+ {
+ RmgrTable[rmid].rm_name = FixedRmgrTable[rmid].rm_name;
+ RmgrTable[rmid].rm_redo = FixedRmgrTable[rmid].rm_redo;
+ RmgrTable[rmid].rm_desc = FixedRmgrTable[rmid].rm_desc;
+ RmgrTable[rmid].rm_startup = FixedRmgrTable[rmid].rm_startup;
+ RmgrTable[rmid].rm_cleanup = FixedRmgrTable[rmid].rm_cleanup;
+ RmgrTable[rmid].rm_safe_restartpoint =
+ FixedRmgrTable[rmid].rm_safe_restartpoint;
+ }
+ else
+ {
+ RmgrTable[rmid].rm_name = NULL;
+ RmgrTable[rmid].rm_redo = NULL;
+ RmgrTable[rmid].rm_desc = NULL;
+ RmgrTable[rmid].rm_startup = NULL;
+ RmgrTable[rmid].rm_cleanup = NULL;
+ RmgrTable[rmid].rm_safe_restartpoint = NULL;
+ }
+ }
+
+ /*
+ * Call plugin, if present and output log message to assist with
+ * diagnosis of recovery issues.
+ *
+ * What is the plugin allowed to do?
+ * All RMs are assigned to a single RmgrId. All RM functions are
+ * optional, though we will only allow WAL inserts for RmgrIds that
+ * have a defined rm_redo function. The fixed RMs can be overridden
+ * by providing new routines that do similar, yet slightly different
+ * actions such as filters. There is no direct connection between
+ * an RM and and entry in pg_am, so it is possible to create a new
+ * index type but then not be able to insert into that index, iff
+ * the new index code issues any WAL inserts. That error is quickly
+ * discovered in practice, even if the docs are ignored. That sounds
+ * a little strange, but then pg_am is not readable at the time we
+ * need to know which RM functions to use for recovery.
+ *
+ * XXX we cannot yet issue an index rebuild from rm_cleanup()
+ * though that feature is expected to be achieved one day
+ */
+ if (rmgr_hook)
+ {
+ elog(LOG, "Executing Rmgr hook to extend and/or modify resource managers");
+ (*rmgr_hook) (RmgrTable);
+ }
+
+ /*
+ * Now create data structure to allow checking of RmgrId in each backend.
+ */
+ for (rmid = 0; rmid <= MAX_NUM_RMGRS; rmid++)
+ {
+ if (RmgrTable[rmid].rm_redo != NULL)
+ {
+ int wordnum = rmid / RMGR_BITS_PER_WORD;
+ int bitnum = rmid % RMGR_BITS_PER_WORD;
+ RmgrCtl->RmgrBitmap[wordnum] |= ((uint32) 1 << bitnum);
+
+ num_rmgrs++;
+
+ /*
+ * Keep track of new or modified RMs for diagnostic purposes
+ */
+ if (rmid <= RM_MAX_FIXED_ID)
+ {
+ if (RmgrTable[rmid].rm_name != FixedRmgrTable[rmid].rm_name ||
+ RmgrTable[rmid].rm_redo != FixedRmgrTable[rmid].rm_redo ||
+ RmgrTable[rmid].rm_startup != FixedRmgrTable[rmid].rm_startup ||
+ RmgrTable[rmid].rm_cleanup != FixedRmgrTable[rmid].rm_cleanup ||
+ RmgrTable[rmid].rm_safe_restartpoint !=
+ FixedRmgrTable[rmid].rm_safe_restartpoint)
+ elog(LOG, "Rmgr (%d) %s has been modified", rmid,
+ FixedRmgrTable[rmid].rm_name);
+ }
+ else
+ elog(LOG, "Rmgr (%d) %s defined", rmid,
+ (RmgrTable[rmid].rm_name != NULL ?
+ RmgrTable[rmid].rm_name : "<Unnamed>"));
+ }
+ }
+
+ if (num_rmgrs == 0)
+ elog(FATAL, "No valid resource managers defined");
+ }
+
+
+ /*
+ * RmgrStartup()
+ *
+ * Allow resource managers to do any required startup.
+ * Run RM startup functions once, only ever runs in Startup process.
+ */
+ void
+ RmgrStartup(void)
+ {
+ int rmid;
+
+ Assert(RmgrTable);
+ for (rmid = 0; rmid <= MAX_NUM_RMGRS; rmid++)
+ {
+ if (RmgrTable[rmid].rm_startup != NULL)
+ {
+ elog(DEBUG2, "RmgrStartup (%d) %s", rmid, RmgrTable[rmid].rm_name);
+ RmgrTable[rmid].rm_startup();
+ }
+ }
+ }
+
+ /*
+ * RmgrCleanup()
+ *
+ * Allow resource managers to do any required cleanup.
+ * Run RM cleanup functions once, only ever runs in Startup process.
+ *
+ * Called once prior to start of normal running
+ */
+ void
+ RmgrCleanup(void)
+ {
+ int rmid;
+
+ Assert(RmgrTable);
+ for (rmid = 0; rmid <= MAX_NUM_RMGRS; rmid++)
+ {
+ if (RmgrTable[rmid].rm_cleanup != NULL)
+ {
+ elog(DEBUG2, "RmgrCleanup (%d) %s", rmid, RmgrTable[rmid].rm_name);
+ RmgrTable[rmid].rm_cleanup();
+ }
+ }
+ }
+
+ /*
+ * RmgrSafeRestartpoint()
+ *
+ * Is it safe to mark a restartpoint? We must ask each of the resource
+ * managers whether they have any partial state information that might
+ * prevent a correct restart from the supplied LSN.
+ *
+ * Called once every 5 minutes with default settings, sometimes more
+ * frequently if last call returned false.
+ */
+ bool
+ RmgrSafeRestartpoint(XLogRecPtr checkPointRedo)
+ {
+ int rmid;
+
+ Assert(RmgrTable);
+ for (rmid = 0; rmid <= MAX_NUM_RMGRS; rmid++)
+ {
+ if (RmgrTable[rmid].rm_safe_restartpoint != NULL)
+ if (!(RmgrTable[rmid].rm_safe_restartpoint()))
+ {
+ elog(DEBUG2, "RM (%d) %s not safe to record restart point at %X/%X",
+ rmid,
+ RmgrTable[rmid].rm_name,
+ checkPointRedo.xlogid,
+ checkPointRedo.xrecoff);
+ return false;
+ }
+ }
+
+ return true;
+ }
+
+ /*
+ * RmgrRedo
+ *
+ * Call the RM's redo function to apply WAL records. The RmgrId should
+ * previously have been checked using RmgrIdIsValid().
+ */
+ void
+ RmgrRedo(XLogRecPtr lsn, XLogRecord *rptr)
+ {
+ RmgrTable[rptr->xl_rmid].rm_redo(lsn, rptr);
+ }
+
+ /*
+ * Error context callback for errors occurring during rm_redo().
+ */
+ void
+ rm_redo_error_callback(void *arg)
+ {
+ XLogRecord *record = (XLogRecord *) arg;
+ StringInfoData buf;
+
+ initStringInfo(&buf) ;
+ Assert(RmgrTable);
+ if (RmgrTable[record->xl_rmid].rm_desc != NULL)
+ RmgrTable[record->xl_rmid].rm_desc(&buf,
+ record->xl_info,
+ XLogRecGetData(record));
+
+ /* don't bother emitting empty description */
+ if (buf.len > 0)
+ errcontext("xlog redo %s", buf.data);
+
+ pfree(buf.data);
+ }
+
+ /*
+ * RmgrDesc
+ *
+ * Call the RM's desc function to print details of WAL records
+ * during error handling or when WAL_DEBUG is set
+ */
+ void
+ RmgrDesc(StringInfo buf, XLogRecord *rptr, char *rec)
+ {
+ if (RmgrTable[rptr->xl_rmid].rm_desc != NULL)
+ RmgrTable[rptr->xl_rmid].rm_desc(buf, rptr->xl_info, rec);
+ }
+
+ /*
+ * RmgrName
+ *
+ * Get the RM's namefor use when WAL_DEBUG is set
+ */
+ const char *
+ RmgrName(RmgrId rmid)
+ {
+ Assert(RmgrTable);
+ return RmgrTable[rmid].rm_name;
+ }
+
+ /*
+ * RmgrIsIsValid
+ *
+ * Check whether the RmgrId has a valid rm_redo function currently
+ * Caller throws any error, not here.
+ */
+ bool
+ RmgrIdIsValid(RmgrId rmid)
+ {
+ int wordnum = rmid / RMGR_BITS_PER_WORD;
+ int bitnum = rmid % RMGR_BITS_PER_WORD;
+
+ /*
+ * If not already done so, get set of valid RmgrIds as a bms
+ */
+ if (!RmgrCtl)
+ RmgrShmemInit();
+
+ /*
+ * Check validity
+ */
+ if ((RmgrCtl->RmgrBitmap[wordnum] & ((bitmapword) 1 << bitnum)) != 0)
+ return true;
+
+ return false;
+ }
+
Index: src/backend/access/transam/xlog.c
===================================================================
RCS file: /home/sriggs/pg/REPOSITORY/pgsql/src/backend/access/transam/xlog.c,v
retrieving revision 1.323
diff -c -r1.323 xlog.c
*** src/backend/access/transam/xlog.c 3 Dec 2008 08:20:11 -0000 1.323
--- src/backend/access/transam/xlog.c 17 Dec 2008 19:04:34 -0000
***************
*** 437,443 ****
static void pg_start_backup_callback(int code, Datum arg);
static bool read_backup_label(XLogRecPtr *checkPointLoc,
XLogRecPtr *minRecoveryLoc);
- static void rm_redo_error_callback(void *arg);
static int get_sync_bit(int method);
--- 437,442 ----
***************
*** 498,503 ****
--- 497,513 ----
}
/*
+ * Check that we are writing a currently valid RmgrId. If not, then
+ * we cannot allow this to be written to WAL otherwise we would be
+ * unable to recover correctly from crash and could lose data written
+ * after an invalid WAL record.
+ */
+ if (!RmgrIdIsValid(rmid))
+ ereport(ERROR,
+ (errmsg("invalid resource manager ID %u", rmid),
+ errhint("You must configure resource manager at startup.")));
+
+ /*
* Here we scan the rdata chain, determine which buffers must be backed
* up, and compute the CRC values for the data. Note that the record
* header isn't added into the CRC initially since we don't know the final
***************
*** 829,834 ****
--- 839,846 ----
{
StringInfoData buf;
+ RmgrInitialize();
+
initStringInfo(&buf);
appendStringInfo(&buf, "INSERT @ %X/%X: ",
RecPtr.xlogid, RecPtr.xrecoff);
***************
*** 836,842 ****
if (rdata->data != NULL)
{
appendStringInfo(&buf, " - ");
! RmgrTable[record->xl_rmid].rm_desc(&buf, record->xl_info, rdata->data);
}
elog(LOG, "%s", buf.data);
pfree(buf.data);
--- 848,854 ----
if (rdata->data != NULL)
{
appendStringInfo(&buf, " - ");
! RmgrDesc(&buf, record, rdata->data);
}
elog(LOG, "%s", buf.data);
pfree(buf.data);
***************
*** 3235,3244 ****
RecPtr->xlogid, RecPtr->xrecoff)));
goto next_record_is_invalid;
}
! if (record->xl_rmid > RM_MAX_ID)
{
ereport(emode,
! (errmsg("invalid resource manager ID %u at %X/%X",
record->xl_rmid, RecPtr->xlogid, RecPtr->xrecoff)));
goto next_record_is_invalid;
}
--- 3247,3256 ----
RecPtr->xlogid, RecPtr->xrecoff)));
goto next_record_is_invalid;
}
! if (!RmgrIdIsValid(record->xl_rmid))
{
ereport(emode,
! (errmsg("invalid resource manager ID %u" " at %X/%X",
record->xl_rmid, RecPtr->xlogid, RecPtr->xrecoff)));
goto next_record_is_invalid;
}
***************
*** 4945,4950 ****
--- 4957,4971 ----
*/
readRecoveryCommandFile();
+ /*
+ * Initialize RMs. Startup functions are *not* called unless we
+ * are InRecovery. We do this in two steps so that we know what
+ * RMs are currently valid for when we call XLogInsert(). We
+ * specifically do not write the list of RMs to the ControlFile,
+ * to allow more flexibility in adding/removing RMs.
+ */
+ RmgrInitialize();
+
/* Now we can determine the list of expected TLIs */
expectedTLIs = readTimeLineHistory(recoveryTargetTLI);
***************
*** 5074,5081 ****
/* REDO */
if (InRecovery)
{
- int rmid;
-
/*
* Update pg_control to show that we are recovering and to show the
* selected checkpoint as the place we are starting from. We also mark
--- 5095,5100 ----
***************
*** 5121,5132 ****
BACKUP_LABEL_FILE, BACKUP_LABEL_OLD)));
}
! /* Initialize resource managers */
! for (rmid = 0; rmid <= RM_MAX_ID; rmid++)
! {
! if (RmgrTable[rmid].rm_startup != NULL)
! RmgrTable[rmid].rm_startup();
! }
/*
* Find the first record that logically follows the checkpoint --- it
--- 5140,5149 ----
BACKUP_LABEL_FILE, BACKUP_LABEL_OLD)));
}
! /*
! * Startup Resource Managers
! */
! RmgrStartup();
/*
* Find the first record that logically follows the checkpoint --- it
***************
*** 5170,5178 ****
EndRecPtr.xlogid, EndRecPtr.xrecoff);
xlog_outrec(&buf, record);
appendStringInfo(&buf, " - ");
! RmgrTable[record->xl_rmid].rm_desc(&buf,
! record->xl_info,
! XLogRecGetData(record));
elog(LOG, "%s", buf.data);
pfree(buf.data);
}
--- 5187,5193 ----
EndRecPtr.xlogid, EndRecPtr.xrecoff);
xlog_outrec(&buf, record);
appendStringInfo(&buf, " - ");
! RmgrDesc(&buf, record, XLogRecGetData(record));
elog(LOG, "%s", buf.data);
pfree(buf.data);
}
***************
*** 5190,5196 ****
}
/* Setup error traceback support for ereport() */
! errcontext.callback = rm_redo_error_callback;
errcontext.arg = (void *) record;
errcontext.previous = error_context_stack;
error_context_stack = &errcontext;
--- 5205,5211 ----
}
/* Setup error traceback support for ereport() */
! errcontext.callback = rm_redo_error_callback; /* see rmgr.c */
errcontext.arg = (void *) record;
errcontext.previous = error_context_stack;
error_context_stack = &errcontext;
***************
*** 5206,5212 ****
if (record->xl_info & XLR_BKP_BLOCK_MASK)
RestoreBkpBlocks(record, EndRecPtr);
! RmgrTable[record->xl_rmid].rm_redo(EndRecPtr, record);
/* Pop the error context stack */
error_context_stack = errcontext.previous;
--- 5221,5227 ----
if (record->xl_info & XLR_BKP_BLOCK_MASK)
RestoreBkpBlocks(record, EndRecPtr);
! RmgrRedo(EndRecPtr, record);
/* Pop the error context stack */
error_context_stack = errcontext.previous;
***************
*** 5353,5368 ****
if (InRecovery)
{
! int rmid;
!
! /*
! * Allow resource managers to do any required cleanup.
! */
! for (rmid = 0; rmid <= RM_MAX_ID; rmid++)
! {
! if (RmgrTable[rmid].rm_cleanup != NULL)
! RmgrTable[rmid].rm_cleanup();
! }
/*
* Check to see if the XLOG sequence contained any unresolved
--- 5368,5374 ----
if (InRecovery)
{
! RmgrCleanup();
/*
* Check to see if the XLOG sequence contained any unresolved
***************
*** 6102,6108 ****
RecoveryRestartPoint(const CheckPoint *checkPoint)
{
int elapsed_secs;
- int rmid;
/*
* Do nothing if the elapsed time since the last restartpoint is less than
--- 6108,6113 ----
***************
*** 6118,6140 ****
return;
/*
! * Is it safe to checkpoint? We must ask each of the resource managers
! * whether they have any partial state information that might prevent a
! * correct restart from this point. If so, we skip this opportunity, but
* return at the next checkpoint record for another try.
*/
! for (rmid = 0; rmid <= RM_MAX_ID; rmid++)
! {
! if (RmgrTable[rmid].rm_safe_restartpoint != NULL)
! if (!(RmgrTable[rmid].rm_safe_restartpoint()))
! {
! elog(DEBUG2, "RM %d not safe to record restart point at %X/%X",
! rmid,
! checkPoint->redo.xlogid,
! checkPoint->redo.xrecoff);
! return;
! }
! }
/*
* OK, force data out to disk
--- 6123,6133 ----
return;
/*
! * Is it safe to restart from here? If not, we skip this opportunity, but
* return at the next checkpoint record for another try.
*/
! if (!RmgrSafeRestartpoint(checkPoint->redo))
! return;
/*
* OK, force data out to disk
***************
*** 6369,6375 ****
appendStringInfo(buf, "; bkpb%d", i + 1);
}
! appendStringInfo(buf, ": %s", RmgrTable[record->xl_rmid].rm_name);
}
#endif /* WAL_DEBUG */
--- 6362,6368 ----
appendStringInfo(buf, "; bkpb%d", i + 1);
}
! appendStringInfo(buf, ": %s", RmgrName(record->xl_rmid));
}
#endif /* WAL_DEBUG */
***************
*** 7141,7167 ****
}
/*
- * Error context callback for errors occurring during rm_redo().
- */
- static void
- rm_redo_error_callback(void *arg)
- {
- XLogRecord *record = (XLogRecord *) arg;
- StringInfoData buf;
-
- initStringInfo(&buf);
- RmgrTable[record->xl_rmid].rm_desc(&buf,
- record->xl_info,
- XLogRecGetData(record));
-
- /* don't bother emitting empty description */
- if (buf.len > 0)
- errcontext("xlog redo %s", buf.data);
-
- pfree(buf.data);
- }
-
- /*
* BackupInProgress: check if online backup mode is active
*
* This is done by checking for existence of the "backup_label" file.
--- 7134,7139 ----
Index: src/backend/storage/ipc/ipci.c
===================================================================
RCS file: /home/sriggs/pg/REPOSITORY/pgsql/src/backend/storage/ipc/ipci.c,v
retrieving revision 1.97
diff -c -r1.97 ipci.c
*** src/backend/storage/ipc/ipci.c 30 Sep 2008 10:52:13 -0000 1.97
--- src/backend/storage/ipc/ipci.c 17 Dec 2008 19:04:34 -0000
***************
*** 18,23 ****
--- 18,24 ----
#include "access/heapam.h"
#include "access/multixact.h"
#include "access/nbtree.h"
+ #include "access/rmgr.h"
#include "access/subtrans.h"
#include "access/twophase.h"
#include "miscadmin.h"
***************
*** 101,106 ****
--- 102,108 ----
size = add_size(size, LockShmemSize());
size = add_size(size, ProcGlobalShmemSize());
size = add_size(size, XLOGShmemSize());
+ size = add_size(size, RmgrShmemSize());
size = add_size(size, CLOGShmemSize());
size = add_size(size, SUBTRANSShmemSize());
size = add_size(size, TwoPhaseShmemSize());
***************
*** 174,182 ****
InitShmemIndex();
/*
! * Set up xlog, clog, and buffers
*/
XLOGShmemInit();
CLOGShmemInit();
SUBTRANSShmemInit();
TwoPhaseShmemInit();
--- 176,185 ----
InitShmemIndex();
/*
! * Set up xlog, rmgr, clog, and buffers
*/
XLOGShmemInit();
+ RmgrShmemInit();
CLOGShmemInit();
SUBTRANSShmemInit();
TwoPhaseShmemInit();
Index: src/include/access/rmgr.h
===================================================================
RCS file: /home/sriggs/pg/REPOSITORY/pgsql/src/include/access/rmgr.h,v
retrieving revision 1.19
diff -c -r1.19 rmgr.h
*** src/include/access/rmgr.h 19 Nov 2008 10:34:52 -0000 1.19
--- src/include/access/rmgr.h 17 Dec 2008 19:04:34 -0000
***************
*** 10,15 ****
--- 10,18 ----
typedef uint8 RmgrId;
+ extern Size RmgrShmemSize(void);
+ extern void RmgrShmemInit(void);
+
/*
* Built-in resource managers
*
***************
*** 23,28 ****
--- 26,33 ----
#define RM_DBASE_ID 4
#define RM_TBLSPC_ID 5
#define RM_MULTIXACT_ID 6
+ /* RmgrId 7 is currently unused */
+ /* RmgrId 8 is currently unused */
#define RM_HEAP2_ID 9
#define RM_HEAP_ID 10
#define RM_BTREE_ID 11
***************
*** 30,35 ****
#define RM_GIN_ID 13
#define RM_GIST_ID 14
#define RM_SEQ_ID 15
! #define RM_MAX_ID RM_SEQ_ID
#endif /* RMGR_H */
--- 35,49 ----
#define RM_GIN_ID 13
#define RM_GIST_ID 14
#define RM_SEQ_ID 15
! #define RM_MAX_FIXED_ID RM_SEQ_ID
!
! /*
! * RmgrId Reservation
! *
! * RmgrIds up to 31 are reserved for PostgreSQL Core use only
! * RmgrIds between 32 and 127 are available for registration by
! * PostgreSQL-related projects
! * RmgrIds of 128 and above are always designed for each site
! */
#endif /* RMGR_H */
Index: src/include/access/xlog_internal.h
===================================================================
RCS file: /home/sriggs/pg/REPOSITORY/pgsql/src/include/access/xlog_internal.h,v
retrieving revision 1.24
diff -c -r1.24 xlog_internal.h
*** src/include/access/xlog_internal.h 11 Aug 2008 11:05:11 -0000 1.24
--- src/include/access/xlog_internal.h 17 Dec 2008 19:04:34 -0000
***************
*** 229,235 ****
*/
typedef struct RmgrData
{
! const char *rm_name;
void (*rm_redo) (XLogRecPtr lsn, XLogRecord *rptr);
void (*rm_desc) (StringInfo buf, uint8 xl_info, char *rec);
void (*rm_startup) (void);
--- 229,235 ----
*/
typedef struct RmgrData
{
! char *rm_name;
void (*rm_redo) (XLogRecPtr lsn, XLogRecord *rptr);
void (*rm_desc) (StringInfo buf, uint8 xl_info, char *rec);
void (*rm_startup) (void);
***************
*** 237,243 ****
bool (*rm_safe_restartpoint) (void);
} RmgrData;
! extern const RmgrData RmgrTable[];
/*
* Exported to support xlog switching from bgwriter
--- 237,254 ----
bool (*rm_safe_restartpoint) (void);
} RmgrData;
! typedef void (*rmgr_hook_type) (RmgrData *);
! extern PGDLLIMPORT rmgr_hook_type rmgr_hook;
!
! extern void RmgrInitialize(void);
! extern void RmgrStartup(void);
! extern void RmgrCleanup(void);
! extern bool RmgrSafeRestartpoint(XLogRecPtr checkPointRedo);
! extern void RmgrRedo(XLogRecPtr lsn, XLogRecord *rptr);
! extern void RmgrDesc(StringInfo buf, XLogRecord *rptr, char *rec);
! extern void rm_redo_error_callback(void *arg);
! extern const char *RmgrName(RmgrId rmid);
! extern bool RmgrIdIsValid(RmgrId rmid);
/*
* Exported to support xlog switching from bgwriter
Simon Riggs <simon@2ndQuadrant.com> wrote:
Latest version of rmgr hooks patch for later review in current
commitfest.
(Minor update to CVS HEAD).
It doesn't work on Window (EXEC_BACKEND platform) because
shared_preload_libraries are not loaded in startup process.
So, there are no timing to initialize rmgr_hook.
I have the same problem in contrib/pg_stat_statements.
The both patches requires the following modifications.
diff -cprN HEAD/src/backend/storage/lmgr/proc.c pg_stat_statements/src/backend/storage/lmgr/proc.c
*** HEAD/src/backend/storage/lmgr/proc.c Mon Nov 3 06:24:52 2008
--- pg_stat_statements/src/backend/storage/lmgr/proc.c Tue Dec 2 16:57:33 2008
*************** InitAuxiliaryProcess(void)
*** 381,386 ****
--- 381,390 ----
if (MyProc != NULL)
elog(ERROR, "you already exist");
+ #ifdef EXEC_BACKEND
+ process_shared_preload_libraries();
+ #endif
+
/*
* We use the ProcStructLock to protect assignment and releasing of
* AuxiliaryProcs entries.
Regards,
---
ITAGAKI Takahiro
NTT Open Source Software Center
On Thu, 2008-12-18 at 14:30 +0900, ITAGAKI Takahiro wrote:
Simon Riggs <simon@2ndQuadrant.com> wrote:
Latest version of rmgr hooks patch for later review in current
commitfest.
(Minor update to CVS HEAD).It doesn't work on Window (EXEC_BACKEND platform) because
shared_preload_libraries are not loaded in startup process.
So, there are no timing to initialize rmgr_hook.
Ah, thank you. How amusing that I should notice that in review but not
in my own patch.
I won't add this just yet, since it sounds like it will be added by your
patch. But I'll keep track of this just in case.
--
Simon Riggs www.2ndQuadrant.com
PostgreSQL Training, Services and Support
Wow, you are really shooting out a lot of good stuff today!
---------------------------------------------------------------------------
Simon Riggs wrote:
Latest version of rmgr hooks patch for later review in current
commitfest.(Minor update to CVS HEAD).
--
Simon Riggs www.2ndQuadrant.com
PostgreSQL Training, Services and Support
[ Attachment, skipping... ]
[ Attachment, skipping... ]
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
--
Bruce Momjian <bruce@momjian.us> http://momjian.us
EnterpriseDB http://enterprisedb.com
+ If your life is a hard drive, Christ can be your backup. +
Simon Riggs wrote:
Latest version of rmgr hooks patch for later review in current
commitfest.
I'd like to reject this patch.
I've read through all the related threads again, and I just still don't
see a convincing use case for it. I think that tools that let you
introspect and modify WAL files should be written as an external
toolkit, like pglesslog. The external indexam use case doesn't impress
me either, and Tom seems to agree
(http://archives.postgresql.org/message-id/24006.1221483138@sss.pgh.pa.us).
Plus there's the version incompatibility dangers. Although I think we
could put in some safeguards and live with it, it does open new
opportunities for confusion, so I'd rather not go there without a very
convincing use case.
Regarding the example plugin included, for debugging purposes you could
just compile with WAL_DEBUG, and the plugin to suppress actions for all
but one database is clearly not ready for any real work. It only
suppresses heapam records, replaying index updates and full-page-images
as usual, and it requires that you know the Oid of the database,
--
Heikki Linnakangas
EnterpriseDB http://www.enterprisedb.com
On Wed, 2009-01-21 at 14:05 +0200, Heikki Linnakangas wrote:
Simon Riggs wrote:
Latest version of rmgr hooks patch for later review in current
commitfest.I'd like to reject this patch.
...
The external indexam use case doesn't impress me either, and Tom seems to agree
(http://archives.postgresql.org/message-id/24006.1221483138@sss.pgh.pa.us).Plus there's the version incompatibility dangers. Although I think we
could put in some safeguards and live with it, it does open new
opportunities for confusion, so I'd rather not go there without a very
convincing use case.
The original design of Postgres allowed pluggable index access methods,
but that capability has not been brought forward to allow for WAL. This
patch would bridge that gap.
Right now we've got a variety of index types that are *not* flourishing
(hash, bitmap, grouped). If we allow them to develop as separate
projects, then whenever they are ready they can be used with particular
releases. You may doubt the worth of those index types but preventing
other people from building them seems strange.
Why do we have 12+ pluggable languages, but we're not allowed to write
pluggable indexes? Whatever argument you put against it being "too hard"
or dangerous or whatever *also* applies to languages. Yet experience
shows pluggability has resulted in a variety of robust and useful
language types, some that might not have been predicted (PL/Proxy, PL/R
etc). They cover a variety of users and situations. Personally, I'd like
to enable people to come up with audio, video, bioinformatics datatypes
and indexes and I definitely don't want to limit the possibilities
there.
There is no danger here for Core, only opportunity. There *is* danger in
forcing new index designers to fit them into Core - look how unusable
hash indexes are. How can we allow that functionality to continue to
exist in Core and yet block the path by which we might reasonably
correct that?
You don't want pluggable indexes, don't use 'em. But that isn't an
argument against allowing the capability for others. That line of
thought would have led us to banning pluggable languages. We should
respect the roots of this project and look for ways to enable the
advancement of database technology, not limit it to only how far we can
currently see ahead through the fog.
Plus there's the version incompatibility dangers. Although I think we
could put in some safeguards and live with it, it does open new
opportunities for confusion, so I'd rather not go there without a
very
convincing use case.
There is danger in every plugin, so not a reasonable objection. Any
badly written external module can kill the database or smash data.
--
Simon Riggs www.2ndQuadrant.com
PostgreSQL Training, Services and Support
The external indexam use case doesn't impress me either, and Tom seems to agree
(http://archives.postgresql.org/message-id/24006.1221483138@sss.pgh.pa.us).
Just for correctness - there is one external index http://www.cs.purdue.edu/spgist/
--
Teodor Sigaev E-mail: teodor@sigaev.ru
WWW: http://www.sigaev.ru/
On Wed, 2009-01-21 at 14:05 +0200, Heikki Linnakangas wrote:
Simon Riggs wrote:
Latest version of rmgr hooks patch for later review in current
commitfest.I'd like to reject this patch.
...
I've read through all the related threads again, and I just still don't
see a convincing use case for it. I think that tools that let you
introspect and modify WAL files should be written as an external
toolkit, like pglesslog.
The only reasonable way to examine the contents of WAL files is with
reference to a copy of the catalog that wrote them, timed *exactly* in
synchronisation with the WAL stream.
If somebody issued
CREATE TABLE x
INSERT INTO x
DROP TABLE
then the only time you can reasonably look at the data from the insert
is while replaying that record. At no other time does the data have
certain meaning.
So you *must* replay catalog entries and recreate the original catalog
in exact synchronisation with reading WAL files. Recreating the catalog
can only be done by Postgres itself. It simply isn't practical to do
this all with an external tool, or even link in to replay somehow to
keep replay and the reading of the external file synchronised. If it
*was*, somebody would have done it already - some have already tried and
failed.
(I haven't suggested modifying WAL files, BTW, not sure where that came
from).
Regarding the example plugin included, for debugging purposes you could
just compile with WAL_DEBUG, and the plugin to suppress actions for all
but one database is clearly not ready for any real work. It only
suppresses heapam records, replaying index updates and full-page-images
as usual, and it requires that you know the Oid of the database,
They're minor examples, so don't reject the plugin patch because the
example of usage isn't as useful as it could be. I'm hardly likely to
invest lots of time in a plugin while the approach has not been agreed,
am I?
It is viable for us to filter WAL records in this way, and not very
viable any other way. It doesn't require you to know the Oid of the
database, cos you can look that up in the catalog (with hot standby).
The example plugin doesn't do that, but it could.
So two use cases: inspecting WAL and filtering records before applying
them are covered here. Pluggable indexes is another, and there are
others also, as discussed on the original patch.
--
Simon Riggs www.2ndQuadrant.com
PostgreSQL Training, Services and Support
On Wed, 2009-01-21 at 16:25 +0300, Teodor Sigaev wrote:
The external indexam use case doesn't impress me either, and Tom seems to agree
(http://archives.postgresql.org/message-id/24006.1221483138@sss.pgh.pa.us).
Just for correctness - there is one external index http://www.cs.purdue.edu/spgist/
If there is one even when we don't allow them (!), just think how many
there will be if we did allow them...
The docs for the SP-GiST describe PostgreSQL as "highly extensible". I'd
like that to extend to allowing recoverable extensions also.
--
Simon Riggs www.2ndQuadrant.com
PostgreSQL Training, Services and Support
On Wed, Jan 21, 2009 at 1:25 PM, Simon Riggs <simon@2ndquadrant.com> wrote:
The only reasonable way to examine the contents of WAL files is with
reference to a copy of the catalog that wrote them, timed *exactly* in
synchronisation with the WAL stream.
This is a good point.
Regarding the example plugin included, for debugging purposes you could
just compile with WAL_DEBUG, and the plugin to suppress actions for all
but one database is clearly not ready for any real work. It only
suppresses heapam records, replaying index updates and full-page-images
as usual, and it requires that you know the Oid of the database,They're minor examples, so don't reject the plugin patch because the
example of usage isn't as useful as it could be. I'm hardly likely to
invest lots of time in a plugin while the approach has not been agreed,
am I?
Well for these two cases I think the question is would the be better
done from within the core instead of a plugin? And if they are better
done as a plugin are the advantages strong enough to outweigh the
downsides of a plugin. (This actually reinforces the point that doing
these things externally is not very realistic.)
I don't see much of an advantage for plugins instead of core features
for either of these two cases. And given how tightly bound to a
specific version and the WAL record formats of that version a plugin
will have are there any advantages? If a plugin will only work with a
particular version of Postgres and it needs access to internal include
files then what separation does it give? From a code structure point
of view it may as well be integrated, in which case anyone who
modifies the wal structures is more likely to keep the other features
up to date.
Moreover, for things like restoring a single database I think there
are further disadvantages. You would have to ensure that the records
you're skipping don't result in an incoherent database. That means
either doing a cold restore of just a single database. That could be
really cool, you could, for instance allow rolling back a single
database to a hot backup + PITR without even shutting down the rest of
the cluster. However for anything like this to work properly you have
to know what version of the data files were restored and what version
the rest of the database is at, etc. If it's a plugin I think you
don't have enough information or control of the overall state to
handle it.
The only advantage that remains, I think, is the real-world concern
that you can have proprietary plugins that add features to the
database for dealing with emergency situations. It also means people
can experiment with features without maintaining a fork. That's not a
trivial advantage at all. I could see that being quite useful. But on
balance, considering how critical backups and restores are I would
personally avoid experimenting in this area anyways.
--
greg
Simon Riggs <simon@2ndQuadrant.com> writes:
The original design of Postgres allowed pluggable index access methods,
but that capability has not been brought forward to allow for WAL. This
patch would bridge that gap.
Well I think what people do is what GIST did early on -- they just don't
support recoverability until they get merged into core.
Nonetheless this *would* be a worthwhile problem to put effort into solving. I
agree that there are lots of exotic index methods out there that it would be
good to be able to develop externally.
But to do that we need an abstract interface that doesn't depend on internal
data structures, not a generic plugin facility that allows the plugin to
hijack the whole system.
We need something more like indexams which provides a set of call points which
do specific functions, only get called when they're needed, and are expected
to only do the one thing they've been asked to do.
This could be a bit tricky since the catalog isn't available to the wal replay
system. We can't just store the info needed in the pg_indexam table. And it
has to span all the databases in the cluster in any case.
Perhaps this should be solved along with the "plugins" thread. Binary modules
could have some way to register their rmgr id so you could guarantee that
there aren't two plugins with conflicting rmgr ids or version mismatches.
--
Gregory Stark
EnterpriseDB http://www.enterprisedb.com
Ask me about EnterpriseDB's Slony Replication support!
On Wed, 2009-01-21 at 14:28 +0000, Greg Stark wrote:
The only advantage that remains, I think, is the real-world concern
that you can have proprietary plugins
How exactly is this plugin more likely to result in a proprietary plugin
than all of the other plugin types we have? Because I suggest it??
I find it quite amazing that anybody would think I proposed a patch
whose "only advantage" lay in commercial exploitation, implying that I
intend that. But at least you had the courage to write it, allowing me
to answer, so actually I'll say thank you for raising that point:
** I have no plans for selling software that has been enabled by this
patch. **
The plugin approach was suggested because it brings together so many use
cases in one and adds missing robustness to a case where we already have
extensibility. Extensibility is about doing things for specific
implementations *without* needing to patch Postgres, not just allowing
external projects to exist alongside.
--
Simon Riggs www.2ndQuadrant.com
PostgreSQL Training, Services and Support
On Wed, 2009-01-21 at 14:57 +0000, Gregory Stark wrote:
But to do that we need an abstract interface that doesn't depend on
internal data structures, not a generic plugin facility that allows
the plugin to hijack the whole system.We need something more like indexams which provides a set of call
points which do specific functions, only get called when they're
needed, and are expected to only do the one thing they've been asked
to do.
Really this is just ridiculous scare-mongering. Hijack the whole system?
The patch takes special care to allow calls to the rmgr functions only
from the startup process. The APIs are exactly like the indexams and
*are* called only in specific ways, at specific times. At your earlier
request I put in filters to prevent WAL inserts for plugins that didn't
exist, ensuring that all WAL writes were crash recoverable.
You can already do all the weird stuff you like with index AMs, like
send emails to the Pope on every row insert. I can already create an
in-memory index for example. How exactly do the rmgr interface give more
power? The structure of the function pointers is identical to the
indexAM code...
--
Simon Riggs www.2ndQuadrant.com
PostgreSQL Training, Services and Support
Simon Riggs <simon@2ndQuadrant.com> writes:
On Wed, 2009-01-21 at 14:28 +0000, Greg Stark wrote:
The only advantage that remains, I think, is the real-world concern
that you can have proprietary plugins** I have no plans for selling software that has been enabled by this
patch. **
Hm, I didn't specifically mean this. However I'm not sure why this would be
considered so prejudicial. The Postgres project isn't generally hostile to
commercial use and extensions. If there was something you *did* want to sell
based on this and you needed a clean, generally useful interface to do it then
I think it would be an argument in *favour* of providing it, not against.
But I meant more generally, that the real-world use case for a generic rmgr
plugin function is for providing interfaces for things which cannot -- for
whatever non-code-related reason -- be integrated in core. That is, from a
code point of view they would be best integrated in core. So either they're
not generally useful, not production quality, not license compatible, or
whatever.
The plugin approach was suggested because it brings together so many use
cases in one and adds missing robustness to a case where we already have
extensibility. Extensibility is about doing things for specific
implementations *without* needing to patch Postgres, not just allowing
external projects to exist alongside.
I think a generic plugin architecture is *too* many use cases. That is it's
too flexible and doesn't make any promises at all of what its intended to do.
As a result the system can't be sure it's calling the right method, can't
detect conflicts or catch errors. There's a sweet spot of abstraction where
the interface has to be specific enough to be useful but general enough to
cover all the use cases.
I'm not sure though, your comments in the other email make me think there
might be more to the patch that I had the impression was there. Will now go
read the patch and see if I was mistaken.
--
Gregory Stark
EnterpriseDB http://www.enterprisedb.com
Ask me about EnterpriseDB's RemoteDBA services!
Gregory Stark wrote:
But to do that we need an abstract interface that doesn't depend on internal
data structures, not a generic plugin facility that allows the plugin to
hijack the whole system.We need something more like indexams which provides a set of call points which
do specific functions, only get called when they're needed, and are expected
to only do the one thing they've been asked to do.
That's called GiST. ;-)
--
Heikki Linnakangas
EnterpriseDB http://www.enterprisedb.com
Simon Riggs wrote:
Right now we've got a variety of index types that are *not* flourishing
(hash, bitmap, grouped).
Hash indexam has been in core for ages, and yet no-one has bothered to
implement WAL logging. If I've understood correctly, it has been now
been revamped in 8.4 so that there's a performance use case to use it. I
wouldn't be surprised if someone (GSoC?) implements WAL logging for it
for 8.5.
Bitmap indexes required significant changes to the rest of the system,
the indexam API in particular.
By "grouped", I presume you mean my grouped index tuples patch, aka
clustered indexes. That too required changes to the indexam API, and
even if it didn't, I can guarantee that I wouldn't spend any more time
on it than I do now (= 0) if it was on pgfoundry.
If we allow them to develop as separate
projects, then whenever they are ready they can be used with particular
releases.
Developing a new indexam is not something you do over the weekend. It's
a long way from design to an implementation robust enough that anyone
cares about crash recovery. Short-circuiting the release cycle with a
plugin won't get you a production-ready indexam much sooner.
--
Heikki Linnakangas
EnterpriseDB http://www.enterprisedb.com
On Wed, 2009-01-21 at 16:07 +0000, Gregory Stark wrote:
The plugin approach was suggested because it brings together so many
use cases in one and adds missing robustness to a case where we
already have extensibility. Extensibility is about doing things for
specific implementations *without* needing to patch Postgres, not just
allowing external projects to exist alongside.I think a generic plugin architecture is *too* many use cases. That is
it's too flexible and doesn't make any promises at all of what its
intended to do.
I agree. I don't see providing the plugin capability should prevent
provision of further features in this area. Indeed, I see it as a way of
encouraging people to write stuff for Postgres, which we then reel
slowly back into core, if it is robust enough and general purpose
enough. My model is PL/Proxy: the capability we will eventually gain in
Core will be because we gave solution designers a free hand to invent
and a free hand to overcome obstacles in months, not years. Solutions
now, better solutions later.
I'm not sure though, your comments in the other email make me think
there might be more to the patch that I had the impression was there.
Will now go read the patch and see if I was mistaken.
Thank you.
--
Simon Riggs www.2ndQuadrant.com
PostgreSQL Training, Services and Support
Simon Riggs wrote:
So you *must* replay catalog entries and recreate the original catalog
in exact synchronisation with reading WAL files. Recreating the catalog
can only be done by Postgres itself.
The startup process doesn't have a relcache, so this rmgr patch is
nowhere near enough to enable that. If I understood correctly, the hot
standby patch doesn't change that either.
--
Heikki Linnakangas
EnterpriseDB http://www.enterprisedb.com
Heikki Linnakangas wrote:
Simon Riggs wrote:
Right now we've got a variety of index types that are *not* flourishing
(hash, bitmap, grouped).Hash indexam has been in core for ages, and yet no-one has bothered to
implement WAL logging. If I've understood correctly, it has been now
been revamped in 8.4 so that there's a performance use case to use it. I
wouldn't be surprised if someone (GSoC?) implements WAL logging for it
for 8.5.Bitmap indexes required significant changes to the rest of the system,
the indexam API in particular.By "grouped", I presume you mean my grouped index tuples patch, aka
clustered indexes. That too required changes to the indexam API, and
even if it didn't, I can guarantee that I wouldn't spend any more time
on it than I do now (= 0) if it was on pgfoundry.If we allow them to develop as separate
projects, then whenever they are ready they can be used with particular
releases.Developing a new indexam is not something you do over the weekend. It's
a long way from design to an implementation robust enough that anyone
cares about crash recovery. Short-circuiting the release cycle with a
plugin won't get you a production-ready indexam much sooner.
Agreed.
--
Bruce Momjian <bruce@momjian.us> http://momjian.us
EnterpriseDB http://enterprisedb.com
+ If your life is a hard drive, Christ can be your backup. +
Simon Riggs wrote:
Why do we have 12+ pluggable languages, but we're not allowed to write
pluggable indexes? Whatever argument you put against it being "too hard"
or dangerous or whatever *also* applies to languages. Yet experience
shows pluggability has resulted in a variety of robust and useful
language types, some that might not have been predicted (PL/Proxy, PL/R
etc). They cover a variety of users and situations.
Languages are quite different. People already know language X, so they
want to use it for stored procedures too. Or they want to interface
other libraries or functionality available in language X. There's no
such argument with indexams. Also, PL handlers are not as tightly
integrated into the rest of the system, no need for low-level page
access, for example, which is why it's easier to have a generic
interface for them. There's also less issues with concurrency and
version-compatibility.
Personally, I'd like
to enable people to come up with audio, video, bioinformatics datatypes
and indexes and I definitely don't want to limit the possibilities
there.
Yeah, I'd like to see all those datatypes too. But I'd presume that
audio, video and bioinformatics indexing could all be implemented using
GiST. You don't want to write an indexam from scratch for every data type.
... - look how unusable
hash indexes are. How can we allow that functionality to continue to
exist in Core and yet block the path by which we might reasonably
correct that?
I don't see how ripping out hash indexes from core and pushing it into
an external module where it could use the rmgr plugin mechanism would
help to add WAL-logging to it. If someone wants to implement WAL-logging
for hash indexes, just do it, and send a patch.
You don't want pluggable indexes, don't use 'em. But that isn't an
argument against allowing the capability for others. That line of
thought would have led us to banning pluggable languages. We should
respect the roots of this project and look for ways to enable the
advancement of database technology, not limit it to only how far we can
currently see ahead through the fog.
This is an open source project. There's already a lot of people writing
their thesis and whatnot using PostgreSQL, having no problem modifying
the code as they see fit to try completely novel things. We're not
banning or blocking that. On the contrary, that's great! Anyone can
download the source code, modify it, and publish a patch. Others will
find the patch interesting and embrace it, or not. *That's* how this
project moves forward.
--
Heikki Linnakangas
EnterpriseDB http://www.enterprisedb.com
None of this is Any of My Business any more, but
On Wed, Jan 21, 2009 at 03:44:15PM +0000, Simon Riggs wrote:
The patch takes special care to allow calls to the rmgr functions only
from the startup process. The APIs are exactly like the indexams and
*are* called only in specific ways, at specific times. At your earlier
request I put in filters to prevent WAL inserts for plugins that didn't
exist, ensuring that all WAL writes were crash recoverable.
I haven't even started to think about looking at the code, but I buy
Simon's argument here. The Pg project is at big pains to point out
how the extensible PL support and custom datatypes are such big
deals. So why is pluggable index support not also a good thing?
I take no position on the merits of the proposed patch, which I do not
pretend to understand. But it'd be nice to see opponents distinguish
beteween " bad idea in principle" and "bad idea in this case". If
you're arguing the former, clarifying why the analogies aren't
relevant would be helpful.
A
--
Andrew Sullivan
ajs@crankycanuck.ca
On Wed, 2009-01-21 at 18:24 +0200, Heikki Linnakangas wrote:
If we allow them to develop as separate projects, then whenever they
are ready they can be used with particular releases.
Developing a new indexam is not something you do over the weekend.
It's a long way from design to an implementation robust enough that
anyone cares about crash recovery. Short-circuiting the release cycle
with a plugin won't get you a production-ready indexam much sooner.
You're assuming that somebody is starting from scratch and that they
don't have access to index and/or Postgres experts.
There are already research projects in various forms of new index. This
would further encourage that. There are also companies such as CopperEye
that sell indexes for use in other RDBMS, that would be easily able to
adapt their technology to Postgres.
They could also be adapting one of the existing index types for use in a
particular application. Various ideas present themselves.
I'm not trying to persuade you to personally work on indexes. I'm trying
to persuade you to let others work on indexes without your approval.
They already can, though they cannot make them production ready without
this and I see no reason to prevent them. We're not talking about
including their code in Postgres, we're talking about allowing them not
to.
Bruce Lindsay, IBM Fellow and long term DB guru was interviewed in 2005:
Q: If you magically had enough extra time to do one additional thing at
work that you're not doing now, what would it be?
"I think I would work on indexing a little harder".
(He mentions XML indexing, multi-dimensional indexing etc)
[Taken from SIGMOD Record, June 2005]
--
Simon Riggs www.2ndQuadrant.com
PostgreSQL Training, Services and Support
On Wed, 2009-01-21 at 18:38 +0200, Heikki Linnakangas wrote:
Simon Riggs wrote:
So you *must* replay catalog entries and recreate the original catalog
in exact synchronisation with reading WAL files. Recreating the catalog
can only be done by Postgres itself.The startup process doesn't have a relcache,
Yes
so this rmgr patch is
nowhere near enough to enable
You are way too smart not to overcome such a minor hurdle...
that. If I understood correctly, the hot
standby patch doesn't change that either.
No it doesn't.
--
Simon Riggs www.2ndQuadrant.com
PostgreSQL Training, Services and Support
On Wed, 2009-01-21 at 19:13 +0200, Heikki Linnakangas wrote:
Simon Riggs wrote:
Why do we have 12+ pluggable languages, but we're not allowed to write
pluggable indexes? Whatever argument you put against it being "too hard"
or dangerous or whatever *also* applies to languages. Yet experience
shows pluggability has resulted in a variety of robust and useful
language types, some that might not have been predicted (PL/Proxy, PL/R
etc). They cover a variety of users and situations.Languages are quite different. People already know language X, so they
want to use it for stored procedures too. Or they want to interface
other libraries or functionality available in language X. There's no
such argument with indexams. Also, PL handlers are not as tightly
integrated into the rest of the system, no need for low-level page
access, for example, which is why it's easier to have a generic
interface for them. There's also less issues with concurrency and
version-compatibility.
Yes, they allow people's external experience to be brought to Postgres.
Which includes index experience.
You're assuming that indexes must have concurrency and are therefore
difficult to design. Concurrency isn't a requirement in many cases. You
just need to store tids and feed them back. Indexes don't have to use
database pages even. Robustness is a much more certain requirement,
since rebuilding indexes from scratch may not even be practical in some
cases.
--
Simon Riggs www.2ndQuadrant.com
PostgreSQL Training, Services and Support
Gregory Stark wrote:
Simon Riggs <simon@2ndQuadrant.com> writes:
The original design of Postgres allowed pluggable index access methods,
but that capability has not been brought forward to allow for WAL. This
patch would bridge that gap.Well I think what people do is what GIST did early on -- they just don't
support recoverability until they get merged into core.
What other constraints are there on such non-in-core indexex? Early (2005)
GIST indexes were very painful in production environments because vacuuming
them held locks for a *long* time (IIRC, an hour or so on my database) on
the indexes locking out queries. Was that just a shortcoming of the
implementation, or was it a side-effect of them not supporting recoverability.
If the latter, I think that's a good reason to try to avoid developing new
index types the same way the GIST guys did.
Ron Mayer wrote:
Early (2005)
GIST indexes were very painful in production environments because vacuuming
them held locks for a *long* time (IIRC, an hour or so on my database) on
the indexes locking out queries. Was that just a shortcoming of the
implementation, or was it a side-effect of them not supporting recoverability.
The former.
--
Heikki Linnakangas
EnterpriseDB http://www.enterprisedb.com
On Wed, 2009-01-21 at 19:13 +0200, Heikki Linnakangas wrote:
You don't want pluggable indexes, don't use 'em. But that isn't an
argument against allowing the capability for others. That line of
thought would have led us to banning pluggable languages. We should
respect the roots of this project and look for ways to enable the
advancement of database technology, not limit it to only how far wecan > currently see ahead through the fog.
This is an open source project.
That's a whole different discussion.
Extensibility is what gives options in production. Yes, the academics
can do whatever they like. We know the reality is people don't fiddle
with core code for a range of reasons but are happy to use extensions.
I'm in favour of allowing people that use Postgres to get access to
advanced technology without asking my permission or paying me a licence
fee for a modified version.
We support extensible everything, but not indexes. Why?
PostgreSQL is supposed to be The World's Most Advanced Open Source
Database. There is no good technical reason to hold back this patch.
The arguments against this patch seem to revolve around fears of
commercial exploitation or subverting the release process. Or telling
people that we know better than them and they can't possibly write an
index worthy of actual use. They might not be able to, its true, but I
see no reason to prevent them either.
*That's* how this project moves forward.
We've got one committer working almost exclusively on new indexes.
Preventing work on new indexes by non-committers has meant that Bitmap
indexes, which first came out in 2005 have not been usable with
Postgres. That forced people *away* from Postgres towards Bizgres. Lack
of Bitmap indexes is a huge issue for many people. It's 2009 now and it
seems probable that without this patch it will be 2010 at least before
they see BMIs, and later still before they see other index types.
Many people can see the blockage there. I agree it is right to have
prevented BMIs from being committed to core, but they have been usable
and beneficial for many years now for read only workloads. In the
current way of thinking early GIST would never have been allowed in and
there would be no PostGIS.
--
Simon Riggs www.2ndQuadrant.com
PostgreSQL Training, Services and Support
On Wed, 2009-01-21 at 21:45 +0200, Heikki Linnakangas wrote:
Ron Mayer wrote:
Early (2005)
GIST indexes were very painful in production environments because vacuuming
them held locks for a *long* time (IIRC, an hour or so on my database) on
the indexes locking out queries. Was that just a shortcoming of the
implementation, or was it a side-effect of them not supporting recoverability.The former.
In the current way of thinking early-GIST would never have been
committed and as a result we would not have PostGIS. Yes, early index
implementations can be bad and they scare the hell out of me. That's
exactly why I want to keep them out of core, so they don't need to be
perfect, they can come with all sorts of health warnings.
--
Simon Riggs www.2ndQuadrant.com
PostgreSQL Training, Services and Support
On Wed, 21 Jan 2009, Simon Riggs wrote:
On Wed, 2009-01-21 at 21:45 +0200, Heikki Linnakangas wrote:
Ron Mayer wrote:
Early (2005)
GIST indexes were very painful in production environments because vacuuming
them held locks for a *long* time (IIRC, an hour or so on my database) on
the indexes locking out queries. Was that just a shortcoming of the
implementation, or was it a side-effect of them not supporting recoverability.The former.
In the current way of thinking early-GIST would never have been
committed and as a result we would not have PostGIS. Yes, early index
implementations can be bad and they scare the hell out of me. That's
exactly why I want to keep them out of core, so they don't need to be
perfect, they can come with all sorts of health warnings.
I'm rather keen on Pg extendability, which allowed me and Teodor to
work on many extensions. Yes, first GiST we inherited from early
academic research and was more like a toy. We still have several TODO
items about GiST interface (incorporate SP-GiST).
I'm not sure about specific patch Simon advocate, but as soon as it
doesnot introduces any threat to the whole database cluster health
(for example, WAL spamming) I think we can apply it.
Other question, why don't improve GiST to allow support of more indexes ?
bitmap indexes could be implemented usin g GiST.
Regards,
Oleg
_____________________________________________________________
Oleg Bartunov, Research Scientist, Head of AstroNet (www.astronet.ru),
Sternberg Astronomical Institute, Moscow University, Russia
Internet: oleg@sai.msu.su, http://www.sai.msu.su/~megera/
phone: +007(495)939-16-83, +007(495)939-23-83
On Thu, 2009-01-22 at 00:29 +0300, Oleg Bartunov wrote:
I'm rather keen on Pg extendability, which allowed me and Teodor to
work on many extensions. Yes, first GiST we inherited from early
academic research and was more like a toy. We still have several TODO
items about GiST interface (incorporate SP-GiST).
Sounds good.
I'm not sure about specific patch Simon advocate, but as soon as it
doesnot introduces any threat to the whole database cluster health
(for example, WAL spamming) I think we can apply it.
Currently you can write any crap you want to WAL from any plugin, as
long as it looks a lot like an existing WAL message type. If you crash
then we'll read that crap and (probably) crash again. That is already a
risk.
The rmgr plugin provides a way to handle user-defined WAL messages. The
patch is recovery-side only and is designed to complement the indexAM
APIs, which are normal-running-side only. Best way to think of it is as
another 5 functions on index access method interface that allow you to
implement recoverable index plugins. (Remembering that dynamic index
plugins are already allowed by Postgres).
So the patch does not provide any additional way of *writing* WAL, it
just provides a way of reading it and then taking action.
Rmgr plugins would allow you to simply ignore certain kinds of WAL,
apply data in a user defined manner or filter it etc.. So if you come
across a buggy index, you can turn off the WAL for that index type and
then recover the database without those indexes. Or dynamically patch
the code for that index type and recover. You'll get Postgres back up
faster with this patch than without it, in many cases.
Other question, why don't improve GiST to allow support of more indexes ?
bitmap indexes could be implemented usin g GiST.
I'm not advocating any particular type of index here, just the ability
to make index plugins robust. There is no other way of doing this, i.e.
it can't be done by an external module etc..
I'll avoid discussing index design with you :-)
--
Simon Riggs www.2ndQuadrant.com
PostgreSQL Training, Services and Support
All,
I really don't see why we would object to making *anything* pluggable if
someone was willing to write the code to do so. For example, making
storage pluggable would allow PostgreSQL to achieve great new things on
new types of hardware. (yes, I have some idea how difficult this would be)
For that matter, our pluggable languages, operators, aggregates, and
UDFs are the mainsteam of PostgreSQL adoption -- and as hardware and
technology changes in the future, I believe that our database's
programmability will become the *entire* use case for PostgreSQL.
So I really can't see any plausible reason to be opposed to pluggable
indexes *in principle*. We should be promoting pluggability whereever
we can reasonably add it.
Now, like always, that says nothing about the quality of this particular
patch or whether it *really* moves us closer to pluggable indexes.
--Josh Berkus
Josh Berkus wrote:
All,
I really don't see why we would object to making *anything* pluggable if
someone was willing to write the code to do so. For example, making
storage pluggable would allow PostgreSQL to achieve great new things on
new types of hardware. (yes, I have some idea how difficult this would be)For that matter, our pluggable languages, operators, aggregates, and
UDFs are the mainsteam of PostgreSQL adoption -- and as hardware and
technology changes in the future, I believe that our database's
programmability will become the *entire* use case for PostgreSQL.So I really can't see any plausible reason to be opposed to pluggable
indexes *in principle*. We should be promoting pluggability whereever
we can reasonably add it.Now, like always, that says nothing about the quality of this particular
patch or whether it *really* moves us closer to pluggable indexes.
Plugability adds complexity. Heikki's comment is that adding this patch
make the job of creating pluggable indexes 5% easier, while no one is
actually working on plugable indexes, and it hard to say that making it
5% easier really advances anything, especially since many of our
existing index types aren't WAL-logged. Plugability is not a zero-cost
feature.
--
Bruce Momjian <bruce@momjian.us> http://momjian.us
EnterpriseDB http://enterprisedb.com
+ If your life is a hard drive, Christ can be your backup. +
Bruce,
Plugability adds complexity. Heikki's comment is that adding this patch
make the job of creating pluggable indexes 5% easier, while no one is
actually working on plugable indexes, and it hard to say that making it
5% easier really advances anything, especially since many of our
existing index types aren't WAL-logged. Plugability is not a zero-cost
feature.
Right. And I'm saying that pluggability is PostgreSQL's main reason for
existence, if you look at our place in the future of databases. So it's
worth paying *some* cost, provided that the cost/benefit ratio works for
the particular patch.
To rephrase: I can't judge the rmgr patch one way or the other. I'm
only objecting to the idea expressed by Heikki and others that pluggable
indexes are stupid and unnecessary.
--Josh
Josh Berkus wrote:
Bruce,
Plugability adds complexity. Heikki's comment is that adding this patch
make the job of creating pluggable indexes 5% easier, while no one is
actually working on plugable indexes, and it hard to say that making it
5% easier really advances anything, especially since many of our
existing index types aren't WAL-logged. Plugability is not a zero-cost
feature.Right. And I'm saying that pluggability is PostgreSQL's main reason for
existence, if you look at our place in the future of databases. So it's
worth paying *some* cost, provided that the cost/benefit ratio works for
the particular patch.To rephrase: I can't judge the rmgr patch one way or the other. I'm
only objecting to the idea expressed by Heikki and others that pluggable
indexes are stupid and unnecessary.
It is cost vs. benefit. No one is saying plugabiity is bad, only that
in this case it is more costly than beneficial; of course, that might
change some day.
--
Bruce Momjian <bruce@momjian.us> http://momjian.us
EnterpriseDB http://enterprisedb.com
+ If your life is a hard drive, Christ can be your backup. +
Bruce Momjian <bruce@momjian.us> wrote:
It is cost vs. benefit. No one is saying plugabiity is bad, only
that
in this case it is more costly than beneficial
Just curious -- are we talking execution time costs or programming
costs because of increased code complexity?
-Kevin
Kevin Grittner wrote:
Bruce Momjian <bruce@momjian.us> wrote:
It is cost vs. benefit. No one is saying plugabiity is bad, only
that
in this case it is more costly than beneficial
Just curious -- are we talking execution time costs or programming
costs because of increased code complexity?
Programming, I assume, and the chance of bugs.
--
Bruce Momjian <bruce@momjian.us> http://momjian.us
EnterpriseDB http://enterprisedb.com
+ If your life is a hard drive, Christ can be your backup. +
Josh Berkus <josh@agliodbs.com> writes:
Right. And I'm saying that pluggability is PostgreSQL's main reason for
existence, if you look at our place in the future of databases. So it's worth
paying *some* cost, provided that the cost/benefit ratio works for the
particular patch.
I agree that pluggability is a huge deal for Postgres. But note that the
interface is critical. If we provided a plugin architecture for functions and
operators which was simply a hook where you replaced part of the
infrastructure of the parser and executor it would be pointless.
Instead we provide an interface where your function has to know as little as
possible about the rest of the system. And the parser and executor get enough
information about your function that they can do most of the work. That you
can create a new operator in Postgres *without* knowing how operators actually
are implemented and without worrying about what other operators exist is what
makes the feature so useful.
This is made a lot harder with WAL because a) it spans the entire cluster, not
just a database so any meta-information has to be stored somewhere global and
b) the consequences for getting something wrong are so much more dire. The
entire cluster is dead and can't even be restored from backup.
To rephrase: I can't judge the rmgr patch one way or the other. I'm only
objecting to the idea expressed by Heikki and others that pluggable indexes are
stupid and unnecessary.
Well we support pluggable indexes -- they just can't be recoverable right now.
Presumably if they're merged into the core database they would have
recoverability added like how GIST progressed.
--
Gregory Stark
EnterpriseDB http://www.enterprisedb.com
Ask me about EnterpriseDB's On-Demand Production Tuning
On Wed, 2009-01-21 at 18:06 -0500, Bruce Momjian wrote:
Plugability adds complexity. Heikki's comment is that adding this
patch make the job of creating pluggable indexes 5% easier, while no
one is actually working on plugable indexes, and it hard to say that
making it 5% easier really advances anything, especially since many of
our existing index types aren't WAL-logged. Plugability is not a
zero-cost feature.
Sorry Bruce, but that misses the key point.
Without the patch it is completely *impossible* to write an index plugin
that is *recoverable*. Yes, we have pluggable indexes now, but unless
they are recoverable we certainly can't ever use them in production.
With the patch, you still have to write the index code. I agree it is
hard code to write, but not impossible. I would go so far as to say that
the patch helps you 0% with the task of actually writing the plugin. But
the patch enables you to start and that is all its intended as: an
enabler.
So its not a "slightly easier" thing, its a can/cannot thing.
--
Simon Riggs www.2ndQuadrant.com
PostgreSQL Training, Services and Support
Simon Riggs wrote:
On Wed, 2009-01-21 at 18:06 -0500, Bruce Momjian wrote:
Plugability adds complexity. Heikki's comment is that adding this
patch make the job of creating pluggable indexes 5% easier, while no
one is actually working on plugable indexes, and it hard to say that
making it 5% easier really advances anything, especially since many of
our existing index types aren't WAL-logged. Plugability is not a
zero-cost feature.Sorry Bruce, but that misses the key point.
I understood the key point.
--
Bruce Momjian <bruce@momjian.us> http://momjian.us
EnterpriseDB http://enterprisedb.com
+ If your life is a hard drive, Christ can be your backup. +
On Thu, 2009-01-22 at 00:00 +0000, Gregory Stark wrote:
But note that the interface is critical.
Yes, it is.
The existing rmgr code provides for 5 separate calls that a module needs
to implement to make an access method recoverable. btree, hash, gist and
gin already implement that API.
I haven't invented a new interface at all. All the patch does is expose
the existing API for plugins, allowing them to act in exactly the same
ways that the existing index types do.
If you have patch review comments about additional requirements for that
API, that is fine. But saying the API is wrong is not a reason to reject
the patch. Its a reason to change the patch.
the consequences for getting something wrong are so much more dire.
The entire cluster is dead and can't even be restored from backup.
Not true. If you decide to use a pluggable index and the plugin breaks,
you can turn off that index type and continue recovering the database.
If GIN breaks for example, you can simply bypass it and continue. So the
rmgr patch provides you a mechanism for recovering an existing system in
a way that is not currently possible - no data loss, just loss of
damaged indexes. And it provides an escape hatch if you use a pluggable
index and it breaks.
--
Simon Riggs www.2ndQuadrant.com
PostgreSQL Training, Services and Support
On Wed, 2009-01-21 at 17:46 -0600, Kevin Grittner wrote:
Bruce Momjian <bruce@momjian.us> wrote:
It is cost vs. benefit. No one is saying plugabiity is bad, only
that
in this case it is more costly than beneficial
Just curious -- are we talking execution time costs or programming
costs because of increased code complexity?
The execution time of a pluggable index would be identical to a
non-pluggable index. There is zero overhead in having the capability,
since we already use a function pointer mechanism in the existing code.
There is not really any overhead in having 10 or 50 plugins; the
recovery processing time is determined by the efficiency of the plugin
and how many WAL message need to be processed. For example, if you have
more TypeX indexes then recovery will spend more time recovering TypeX
indexes. If you have no TypeX indexes, that module would only be asked
to startup() and cleanup(), but nothing else.
The code complexity is exactly the same whether you write it as a plugin
or a patch against core. The API is identical. The key difference is
that users get to choose whether they use a plugin, or not, whereas
without the plugin you are limited to index types that have been
included with core Postgres.
Just as with some PL languages, some pluggable indexes may gain a
reputation as buggy and fall into disuse. Others may become popular and
be invited to join core, where they will gain further trust.
--
Simon Riggs www.2ndQuadrant.com
PostgreSQL Training, Services and Support
On Wed, 2009-01-21 at 17:49 +0000, Simon Riggs wrote:
On Wed, 2009-01-21 at 18:24 +0200, Heikki Linnakangas wrote:
Bruce Lindsay, IBM Fellow and long term DB guru was interviewed in 2005:
Q: If you magically had enough extra time to do one additional thing at
work that you're not doing now, what would it be?"I think I would work on indexing a little harder".
(He mentions XML indexing, multi-dimensional indexing etc)
[Taken from SIGMOD Record, June 2005]
I am curious. I read this whole current thread. What is "wrong" with the
patch? As I understand it it does not increase complexity. It appears to
only expose (or perhaps abstract?) existing functionality into a usable
API that is not dependent on something being in core.
Imagine if at some point to develop new index types or perhaps single
purpose modified index types all you needed was knowhow, pgxs and too
much time.
Unless there is something "wrong" with this patch I say we need to stop
arguing semantics and apply it.
Sincerely,
Joshua D. Drake
--
PostgreSQL - XMPP: jdrake@jabber.postgresql.org
Consulting, Development, Support, Training
503-667-4564 - http://www.commandprompt.com/
The PostgreSQL Company, serving since 1997
I am curious. I read this whole current thread. What is "wrong" with the
patch? As I understand it it does not increase complexity. It appears to
only expose (or perhaps abstract?) existing functionality into a usable
API that is not dependent on something being in core.Imagine if at some point to develop new index types or perhaps single
purpose modified index types all you needed was knowhow, pgxs and too
much time.Unless there is something "wrong" with this patch I say we need to stop
arguing semantics and apply it.
+1. One of the points Heikki made in his original post about
rejecting the patch was that the two examples Simon provided were not
very useful: one was a one-database-only rmgr that didn't actually
work and the other was an unimpressive logging plugin. So maybe there
is an argument that Simon should, say, throw away the logging plugin
and finish the other one so that it actually works. But other than
that... if no one uses it, who cares? It's not a lot of code. If
someone does use it, and they mix two incompatible versions and trash
their database... well, there are lots of ways to trash your database
by loading arbitrary C code into your database engine. So, uh, don't
do that unless you know what you're doing.
We allow extensibility and hooks in other parts of the database where
the use case is pretty thin and tenuous. I betcha there aren't many
people who try writing their own eqjoinsel() either.
...Robert
Simon Riggs wrote:
Without the patch it is completely *impossible* to write an index plugin
that is *recoverable*.
It's also impossible to do many other things without modifying the
source code. Bitmap indexam had to do it, my clustered indexes had to do
it, GIN had to do it.
Yes, we have pluggable indexes now, but unless
they are recoverable we certainly can't ever use them in production.
Sure you can. Just Do It, if that's what you want. If you're willing to
write a custom indexam, and run it in production, compiling PostgreSQL
from source and patching it isn't such a stretch.
Don't get me wrong, I'm certainly not against pluggable indexes in
principle. I just don't believe this patch brings us closer to that goal
in any significant way.
With the patch, you still have to write the index code. I agree it is
hard code to write, but not impossible. I would go so far as to say that
the patch helps you 0% with the task of actually writing the plugin. But
the patch enables you to start and that is all its intended as: an
enabler.
Nothing stops you from starting right now, without this plugin. This is
open source.
--
Heikki Linnakangas
EnterpriseDB http://www.enterprisedb.com
Simon Riggs wrote:
Preventing work on new indexes by non-committers has meant that Bitmap
indexes, which first came out in 2005 have not been usable with
Postgres. That forced people *away* from Postgres towards Bizgres. Lack
of Bitmap indexes is a huge issue for many people. It's 2009 now and it
seems probable that without this patch it will be 2010 at least before
they see BMIs, and later still before they see other index types.
No-one is preventing anyone from working on bitmap indexes.
Bitmap indexes required other backend changes, in addition to the rmgr
changes. This rmgr plugin patch is *not* sufficient to enable bitmap
indexes to live as a plugin.
This patch does *not* bring us any closer to having bitmap indexes.
Don't raise false hopes.
--
Heikki Linnakangas
EnterpriseDB http://www.enterprisedb.com
Oleg Bartunov wrote:
bitmap indexes could be implemented usin g GiST.
Huh, how would that work? Bitmap indexes have a very different
structure, AFAICS.
--
Heikki Linnakangas
EnterpriseDB http://www.enterprisedb.com
On Wed, 21 Jan 2009, Bruce Momjian wrote:
Josh Berkus wrote:
Bruce,
Plugability adds complexity. Heikki's comment is that adding this patch
make the job of creating pluggable indexes 5% easier, while no one is
actually working on plugable indexes, and it hard to say that making it
5% easier really advances anything, especially since many of our
existing index types aren't WAL-logged. Plugability is not a zero-cost
feature.Right. And I'm saying that pluggability is PostgreSQL's main reason for
existence, if you look at our place in the future of databases. So it's
worth paying *some* cost, provided that the cost/benefit ratio works for
the particular patch.To rephrase: I can't judge the rmgr patch one way or the other. I'm
only objecting to the idea expressed by Heikki and others that pluggable
indexes are stupid and unnecessary.It is cost vs. benefit. No one is saying plugabiity is bad, only that
in this case it is more costly than beneficial; of course, that might
change some day.
as I understand, there are already plans to utilize this feature. If so,
we need to be more attentive by now.
Regards,
Oleg
_____________________________________________________________
Oleg Bartunov, Research Scientist, Head of AstroNet (www.astronet.ru),
Sternberg Astronomical Institute, Moscow University, Russia
Internet: oleg@sai.msu.su, http://www.sai.msu.su/~megera/
phone: +007(495)939-16-83, +007(495)939-23-83
Hi all,
I hope to raise some valid concerns with the following two stories, a "true
story" first then a little fiction that I hope has a lot to do with current
reality.
Le jeudi 22 janvier 2009, Heikki Linnakangas a écrit :
It's also impossible to do many other things without modifying the
source code. Bitmap indexam had to do it, my clustered indexes had to do
it, GIN had to do it.
So we're "only" talking about new index kinds which fit current indexam API,
right?
Sure you can. Just Do It, if that's what you want. If you're willing to
write a custom indexam, and run it in production, compiling PostgreSQL
from source and patching it isn't such a stretch.
It's all about comfort and product maturity, isn't it?
I had performance concerns for prefix matching, ala telecom matches, i.e. the
prefix is in the table, not in the literal. And our IRC PostgreSQL guru told
me the best way to solve it would be implementing a specific datatype with
specific indexing facility. Easy enough? Sure, he said, just write an
external module and provide a GiST OPERATOR CLASS.
I did just this, wrote a single C file (less than 2000 lines) and I now run my
datatype and its GiST index in production. It has already served something
like 15 millions lookups and counting. Just works.
http://www.postgresql.org/docs/8.3/static/xindex.html
http://wiki.postgresql.org/images/3/30/Prato_2008_prefix.pdf
http://cvs.pgfoundry.org/cgi-bin/cvsweb.cgi/prefix/prefix/
I assure you that should I have needed to patch PostgreSQL, I'd be running
plpgsql procedural code instead and would be fighting against this real time
costing evaluation trigger with such bad perfs.
True story. :)
Don't get me wrong, I'm certainly not against pluggable indexes in
principle. I just don't believe this patch brings us closer to that goal
in any significant way.
If I understand the matter at all, it brings us closer only when the new index
type can be done without changing current indexam API. Which covers BTree,
Hash, GiST and GIN, so could probably cover some more.
If I were to start developping a new external module index kind, I'd really
like to avoid this situation:
- so for my new index idea, I'm only to write some custom C code?
- yes, an simple external module, following indexam API
- great, will I be able to WAL log it from this external module?
- of course, it's PostgreSQL we're talking about.
- what about recovering my custom index?
- oh. now you have to patch core code and run custom PG version
- huh?
- yes, core team finds the situation comfortable enough as is.
- ...
Nothing stops you from starting right now, without this plugin. This is
open source.
We're not talking about how great it is to be able to experiment new ideas by
forking core code, we're talking about making it easy and comfy to run user
code in production environments and being able to still apply minor upgrades
strait from the distribution.
Or maybe I'm misunderstanding it all.
Regards,
--
dim
-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1
On Wed, Jan 21, 2009 at 10:48:21PM +0000, Simon Riggs wrote:
On Thu, 2009-01-22 at 00:29 +0300, Oleg Bartunov wrote:
[...]
Other question, why don't improve GiST to allow support of more indexes ?
bitmap indexes could be implemented usin g GiST.
[...]
I'll avoid discussing index design with you :-)
Oooh. What a pity -- this would allow us lurkers to learn a lot!
(Oh, wait, Heikki has taken up that :-)
Just wanted to say -- thanks folks
- -- tomás
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.4.6 (GNU/Linux)
iD8DBQFJeDvvBcgs9XrR2kYRAviLAJ4jW1rSygrgeA4M73PerFqWXmO4NACeNvV8
GSSnxUyCroSrvpF2PBevBV4=
=jhqe
-----END PGP SIGNATURE-----
What other constraints are there on such non-in-core indexex? Early (2005)
GIST indexes were very painful in production environments because vacuuming
them held locks for a *long* time (IIRC, an hour or so on my database) on
the indexes locking out queries. Was that just a shortcoming of the
implementation, or was it a side-effect of them not supporting recoverability.
GiST concurrent algorithm is based on Log Sequence Number of WAL and that was
the reason to implement WAL (and recoverability) first in GiST.
--
Teodor Sigaev E-mail: teodor@sigaev.ru
WWW: http://www.sigaev.ru/
On Thu, 2009-01-22 at 10:09 +0200, Heikki Linnakangas wrote:
Simon Riggs wrote:
Preventing work on new indexes by non-committers has meant that Bitmap
indexes, which first came out in 2005 have not been usable with
Postgres. That forced people *away* from Postgres towards Bizgres. Lack
of Bitmap indexes is a huge issue for many people. It's 2009 now and it
seems probable that without this patch it will be 2010 at least before
they see BMIs, and later still before they see other index types.No-one is preventing anyone from working on bitmap indexes.
Bitmap indexes required other backend changes, in addition to the rmgr
changes. This rmgr plugin patch is *not* sufficient to enable bitmap
indexes to live as a plugin.This patch does *not* bring us any closer to having bitmap indexes.
Don't raise false hopes.
I agree those changes would be better but those changes are *not*
essential (as has been agreed onlist). They are just a possible tuning
feature, amongst many that must prove themselves before they happen.
Manipulating multiple large bitmaps on a 1 TB table will still be much
more efficient than reading multiple btrees and manipulating those. BMIs
are typically much smaller than btrees, so even if they use some memory
we will avoid significant amounts of real I/O. BMIs also have a
significantly lower time to build, making them much more practical.
It is not a false hope since the case is not black/white, just a matter
of opinion.
--
Simon Riggs www.2ndQuadrant.com
PostgreSQL Training, Services and Support
Oleg Bartunov wrote:
as I understand, there are already plans to utilize this feature. If so,
we need to be more attentive by now.
Is there? If I understood Simon correctly in the last paragraphs of
these emails:
http://archives.postgresql.org/message-id/1221470800.3913.1229.camel@ebony.2ndQuadrant
http://archives.postgresql.org/message-id/1221555881.3913.1761.camel@ebony.2ndQuadrant
he has no immediate use for this.
--
Heikki Linnakangas
EnterpriseDB http://www.enterprisedb.com
On Thu, 2009-01-22 at 14:52 +0200, Heikki Linnakangas wrote:
Oleg Bartunov wrote:
as I understand, there are already plans to utilize this feature. If so,
we need to be more attentive by now.Is there? If I understood Simon correctly in the last paragraphs of
these emails:http://archives.postgresql.org/message-id/1221470800.3913.1229.camel@ebony.2ndQuadrant
http://archives.postgresql.org/message-id/1221555881.3913.1761.camel@ebony.2ndQuadranthe has no immediate use for this.
Immediate use cases for me would be
* ability to filter WAL records based on database or relation
* ability to recover quickly from various types of bug, for example if
new freespace code caused a corruption we would be able to sidestep it
and get the database up again quickly without doing resetxlog and losing
data.
Medium term
* bit map indexes
But this isn't just for me...
--
Simon Riggs www.2ndQuadrant.com
PostgreSQL Training, Services and Support
Simon Riggs wrote:
Immediate use cases for me would be
* ability to filter WAL records based on database or relation
This patch isn't enough to allow the catalog lookups. Without the
catalog lookups, you might as well implement that as an external tool,
like pglesslog.
* ability to recover quickly from various types of bug, for example if
new freespace code caused a corruption we would be able to sidestep it
and get the database up again quickly without doing resetxlog and losing
data.
That might be useful. But again, could just as well be implemented as an
external tool like pglesslog.
(the new FSM implementation isn't WAL-logged, so that particular
scenario isn't very plausible)
--
Heikki Linnakangas
EnterpriseDB http://www.enterprisedb.com
On Thu, 2009-01-22 at 16:15 +0200, Heikki Linnakangas wrote:
Simon Riggs wrote:
Immediate use cases for me would be
* ability to filter WAL records based on database or relation
This patch isn't enough to allow the catalog lookups. Without the
catalog lookups, you might as well implement that as an external tool,
like pglesslog.
It makes it harder, but you can specify oids easily enough.
Flat file handling reads pg_database during recovery, so can we. Or you
can look in global/pg_database flat file.
* ability to recover quickly from various types of bug, for example if
new freespace code caused a corruption we would be able to sidestep it
and get the database up again quickly without doing resetxlog and losing
data.That might be useful. But again, could just as well be implemented as an
external tool like pglesslog.
There is no WAL record for "no-op", at least not one of variable length.
The WAL files can't just have chunks of zeroes in the middle of them,
they must be CRC valid and chained together in the exact byte position.
There isn't any way to do this, even if there were, that's a seriously
complex way of doing that.
pg_lesslog takes great care to reconstruct the files into the right
shape because recovery is such an unforgiving mistress.
(the new FSM implementation isn't WAL-logged, so that particular
scenario isn't very plausible)
Yeh, that was just a joke. But the principle applies to any index, as
I'm sure you realise.
--
Simon Riggs www.2ndQuadrant.com
PostgreSQL Training, Services and Support
On Thu, Jan 22, 2009 at 9:15 AM, Heikki Linnakangas
<heikki.linnakangas@enterprisedb.com> wrote:
Immediate use cases for me would be
* ability to filter WAL records based on database or relation
This patch isn't enough to allow the catalog lookups. Without the catalog
lookups, you might as well implement that as an external tool, like
pglesslog.
The fact the patch does not do anything that anyone might ever want is
not a sufficient grounds for rejecting it. If it were, zero patches
would ever get accepted. If additional changes are needed, Simon or
someone else can send a patch later with those changes.
Much ink has been spilled in this space over the size and difficulty
of reviewing Simon's hot standby patch, on the grounds that it is big
and changed many things. Of course, Simon did submit an earlier
version of this patch that was less big and changed fewer things, and
it was never committed even though Simon responded to all of the
review comments. In fact, even after you took the time to split it
back out again, and even after acknowledging that the split-out part
was good code and independently useful, you never committed THAT
either. And so here we sit in limbo.
If you now reject this patch because it is small and changes too few
things, then will you reject his next patch that is more comprehensive
on the grounds that the patch is now too big to review?
I wonder what Simon has to do to get a patch committed. It's been
four months since he started submitting patches, and so far the only
thing that's been committed is the pg_stop_backup() wait bug fix. If
the code were bad or required a lot of fixing to get it in committable
form, that would be completely understandable but no one is alleging
that.
...Robert
Robert Haas wrote:
The fact the patch does not do anything that anyone might ever want is
not a sufficient grounds for rejecting it.
Huh? That sounds like enough of a reason to me.
Much ink has been spilled in this space over the size and difficulty
of reviewing Simon's hot standby patch, on the grounds that it is big
and changed many things. Of course, Simon did submit an earlier
version of this patch that was less big and changed fewer things, and
it was never committed even though Simon responded to all of the
review comments.
What patch was that?
In fact, even after you took the time to split it
back out again, and even after acknowledging that the split-out part
was good code and independently useful, you never committed THAT
either. And so here we sit in limbo.
I did split the "recovery infrastructure" patch from the hot standby
patch. I still intend to review and hopefully commit that (I'll need to
split the latest version from the hot standby patch again). When I
reviewed it for the first time, I just didn't feel that I understood it
well enough to commit it. But that's a completely different patch than
what we're talking about now.
If you now reject this patch because it is small and changes too few
things, then will you reject his next patch that is more comprehensive
on the grounds that the patch is now too big to review?
I won't and haven't rejected a patch because it's too big to review. I
admit that a big patch is a lot harder and more time consuming to
review, so I might not have the time or desire to review it. But that's
a different story.
I wonder what Simon has to do to get a patch committed. It's been
four months since he started submitting patches, and so far the only
thing that's been committed is the pg_stop_backup() wait bug fix. If
the code were bad or required a lot of fixing to get it in committable
form, that would be completely understandable but no one is alleging
that.
You're confusing things. I'm objecting this rmgr patch, but I'm spending
all the spare time I have to review the hot standby patch. It *does* and
*has* required a lot of fixing to get it into committable form. I feel
that it's pretty close now, but I'm waiting for his latest version and I
still need to go through it more closely before I feel comfortable
enough to commit.
(I should also say that if any of the other committers feels differently
and wants to pick up this rmgr patch and commit it, that's fine with me
(assuming the code is fine))
--
Heikki Linnakangas
EnterpriseDB http://www.enterprisedb.com
Teodor Sigaev wrote:
What other constraints are there on such non-in-core indexex? Early (2005)
GIST indexes were very painful in production environments because vacuuming
them held locks for a *long* time (IIRC, an hour or so on my database) on
the indexes locking out queries. Was that just a shortcoming of the
implementation, or was it a side-effect of them not supporting recoverability.GiST concurrent algorithm is based on Log Sequence Number of WAL and that
was the reason to implement WAL (and recoverability) first in GiST.
Hmm, IIRC it is based on a monotonically increasing number. It could
have been anything. LSN was just a monotonically increasing number that
would be available if WAL was implemented first (or in parallel).
Of course, there's no much point in an index that's easily corrupted, so
I understand the desire to implement WAL too -- I'm just pointing out
that concurrency could have been developed independently.
--
Alvaro Herrera http://www.CommandPrompt.com/
The PostgreSQL Company - Command Prompt, Inc.
Robert Haas escribi�:
We allow extensibility and hooks in other parts of the database where
the use case is pretty thin and tenuous. I betcha there aren't many
people who try writing their own eqjoinsel() either.
The PostGIS guys do implement their own selectivity estimators. In fact
it was them that first implemented pluggability in that area, AFAIR.
--
Alvaro Herrera http://www.CommandPrompt.com/
The PostgreSQL Company - Command Prompt, Inc.
Simon Riggs wrote:
On Thu, 2009-01-22 at 16:15 +0200, Heikki Linnakangas wrote:
That might be useful. But again, could just as well be implemented as an
external tool like pglesslog.There is no WAL record for "no-op", at least not one of variable length.
Hmm, maybe there should be? That seems like a useful thing to have for
external tools.
The WAL files can't just have chunks of zeroes in the middle of them,
they must be CRC valid and chained together in the exact byte position.
There isn't any way to do this, even if there were, that's a seriously
complex way of doing that.
Hmm, I think you could remove the records in the middle, rechain the
remaining ones, recalculate the crc, and put an xlog switch record at
the end. I agree that's seriously complicated, a no-op record would be
much simpler.
--
Heikki Linnakangas
EnterpriseDB http://www.enterprisedb.com
On Thu, Jan 22, 2009 at 10:31 AM, Heikki Linnakangas
<heikki.linnakangas@enterprisedb.com> wrote:
The fact the patch does not do anything that anyone might ever want is
not a sufficient grounds for rejecting it.Huh? That sounds like enough of a reason to me.
s/anything that anyone might ever want/everything that anyone might ever want/
Much ink has been spilled in this space over the size and difficulty
of reviewing Simon's hot standby patch, on the grounds that it is big
and changed many things. Of course, Simon did submit an earlier
version of this patch that was less big and changed fewer things, and
it was never committed even though Simon responded to all of the
review comments.What patch was that?
Infrastructure changes for recovery was an earlier version of hot
standby. That's all I was referring to here.
You're confusing things. I'm objecting this rmgr patch, but I'm spending all
the spare time I have to review the hot standby patch. It *does* and *has*
required a lot of fixing to get it into committable form. I feel that it's
pretty close now, but I'm waiting for his latest version and I still need to
go through it more closely before I feel comfortable enough to commit.(I should also say that if any of the other committers feels differently and
wants to pick up this rmgr patch and commit it, that's fine with me
(assuming the code is fine))
Hmm, well, not feeling that the patch is a priority for you seems
somewhat different than saying that it should be rejected outright.
I am glad to hear that Hot Standby is still on the road to being
committed, but even as a regular reader of -hackers I have to say the
process has been somewhat murky to me. Either there is a lot of
discussion that has been happening off-list, or there are long pauses
when either you or Simon aren't really corresponding and it isn't
obvious in whose court the ball lies. Based on what I've seen
on-list, I sort of thought that Simon was waiting for you to take the
next step by committing at least some portion of the patch. Needless
to say if you're both waiting for each other nothing will get done.
...Robert
Of course, there's no much point in an index that's easily corrupted, so
I understand the desire to implement WAL too -- I'm just pointing out
that concurrency could have been developed independently.
Anything's possible with enough work, but having good support in -core
makes it easier and -core has usually been receptive to requests for
such things - for example, I think Tom put in quite a bit of work to
getting the right hooks in to enable libpqtypes.
...Robert
Robert Haas wrote:
On Thu, Jan 22, 2009 at 10:31 AM, Heikki Linnakangas
<heikki.linnakangas@enterprisedb.com> wrote:The fact the patch does not do anything that anyone might ever want is
not a sufficient grounds for rejecting it.Huh? That sounds like enough of a reason to me.
s/anything that anyone might ever want/everything that anyone might ever want/
Well, if it did at least something that someone might want, the case
would be much stronger ;-).
Infrastructure changes for recovery was an earlier version of hot
standby. That's all I was referring to here.
The "infrastrucutre changes for recovery" patch is a prerequisite patch
for hot standby. It's included now in the hot standby patch, but it does
provide some functionality of its own, so it could be split out and
committed separately. And it should, IMO.
I am glad to hear that Hot Standby is still on the road to being
committed, but even as a regular reader of -hackers I have to say the
process has been somewhat murky to me. Either there is a lot of
discussion that has been happening off-list, or there are long pauses
when either you or Simon aren't really corresponding and it isn't
obvious in whose court the ball lies.
There hasn't been any substantial discussion off-list. The latter
might've true at times. Also, I've been busy with other stuff, and Simon
was ill at one point.
Based on what I've seen
on-list, I sort of thought that Simon was waiting for you to take the
next step by committing at least some portion of the patch. Needless
to say if you're both waiting for each other nothing will get done.
Well, right now I'm waiting for a new version from Simon. But the
infrastructure patch is really the first part that should be reviewed in
detail (again) and committed.
--
Heikki Linnakangas
EnterpriseDB http://www.enterprisedb.com
On Thu, Jan 22, 2009 at 11:13 AM, Heikki Linnakangas
<heikki.linnakangas@enterprisedb.com> wrote:
Simon Riggs wrote:
On Thu, 2009-01-22 at 16:15 +0200, Heikki Linnakangas wrote:
That might be useful. But again, could just as well be implemented as an
external tool like pglesslog.There is no WAL record for "no-op", at least not one of variable length.
Hmm, maybe there should be? That seems like a useful thing to have for
external tools.The WAL files can't just have chunks of zeroes in the middle of them,
they must be CRC valid and chained together in the exact byte position.
There isn't any way to do this, even if there were, that's a seriously
complex way of doing that.Hmm, I think you could remove the records in the middle, rechain the
remaining ones, recalculate the crc, and put an xlog switch record at the
end. I agree that's seriously complicated, a no-op record would be much
simpler.
Would I be pushing my luck if I suggested that maybe a pluggable rmgr
would also be much simpler, and we already have a patch for that? :-)
...Robert
On Thu, 2009-01-22 at 18:13 +0200, Heikki Linnakangas wrote:
Simon Riggs wrote:
On Thu, 2009-01-22 at 16:15 +0200, Heikki Linnakangas wrote:
That might be useful. But again, could just as well be implemented as an
external tool like pglesslog.There is no WAL record for "no-op", at least not one of variable length.
Hmm, maybe there should be? That seems like a useful thing to have for
external tools.The WAL files can't just have chunks of zeroes in the middle of them,
they must be CRC valid and chained together in the exact byte position.
There isn't any way to do this, even if there were, that's a seriously
complex way of doing that.Hmm, I think you could remove the records in the middle, rechain the
remaining ones, recalculate the crc, and put an xlog switch record at
the end. I agree that's seriously complicated, a no-op record would be
much simpler.
If someone else suggested that mechanism you'd laugh and rip it to
shreds in an instant.
You are brilliant at seeing simple, practical ways of doing things and
that just ain't one of them. That's why for me this looks less and less
like a debate to determine the best way forwards.
I'm happy that you've chosen to spend your time on HS and I think we
should both return to that, for a rest. I'll be posting a new version
shortly.
--
Simon Riggs www.2ndQuadrant.com
PostgreSQL Training, Services and Support
All,
I have a suggestion for the rmgr patch.
Currently, there are *no* plans to get WAL working for the new hash
indexes, nor is there time.
I suggest that we take the rmgr patch and combine it with getting WAL
working properly for Bitmap-on-disk and Hash indexes in 8.5. Having
this patch attached to an actual implementation will show if it's the
correct code to make building new types of indexes easier, or not,
rather than arguing about it in the abstract.
In other words, I'm suggesting that we move it to commitfest-first for
8.5. It's not like we don't have plenty of features and uncommitted
patches for 8.4, and it's not like Simon is going away.
--Josh
On Thu, 2009-01-22 at 13:45 +0000, Simon Riggs wrote:
But this isn't just for me...
I have an old proposal here:
http://archives.postgresql.org/pgsql-hackers/2008-06/msg00404.php
Of course, the number one problem I ran into was that I never actually
wrote the code, not that I needed it to be a plugin ;)
But seriously, it might help. I may be able to write it for 8.5+, and
then turn it into a plugin and people using 8.4 could benefit.
Or maybe it gets rejected from the core and I have to write it as a
plugin by copying GiST and modifying it. I think this might be an answer
to Heikki's observation that writing a stable index AM takes a long
time: it doesn't if you just copy an existing one and modify it
slightly. Because I don't need to make any changes to the way WAL is
used, ideally I could have a high degree of confidence that it's correct
with little effort. Right?
I haven't given a lot of thought to whether my improvement could be made
a plugin or not, nor have I read your patch, but it seems possible to
me.
Regards,
Jeff Davis
On Thu, 2009-01-22 at 11:23 -0800, Josh Berkus wrote:
I suggest that we take the rmgr patch and combine it with getting WAL
working properly for Bitmap-on-disk and Hash indexes in 8.5. Having
this patch attached to an actual implementation will show if it's the
correct code to make building new types of indexes easier, or not,
rather than arguing about it in the abstract.
Your suggestion sounds reasonable and I thank you, but doesn't actually
address the plugin discussion at all. It had absolutely zip to do with
making building indexes easier; it was about enabling robust index
plugins, period. (As well as other worthwhile use cases). It's not a
cost benefit decision, its just "can we have it, or not?". The API *is*
the right one because we already use it with at least 3 actual
implementations. Will it change over time? Of course.
We just "mulled it over" in great detail and it appears this was a
popular feature with no technical problems mentioned about the patch. We
almost never get 8 people speaking out clearly in favour of something.
I'm too busy with Hot Standby to carry on this debate any longer, as
everyone knows - though I think the various forms of filibustering need
to stop.
--
Simon Riggs www.2ndQuadrant.com
PostgreSQL Training, Services and Support
On Wed, 2009-01-21 at 18:38 +0200, Heikki Linnakangas wrote:
Simon Riggs wrote:
So you *must* replay catalog entries and recreate the original catalog
in exact synchronisation with reading WAL files. Recreating the catalog
can only be done by Postgres itself.The startup process doesn't have a relcache, so this rmgr patch is
nowhere near enough to enable that. If I understood correctly, the hot
standby patch doesn't change that either.
The answer to this question was that it doesn't need a relcache, though
perhaps it might be desirable.
Catalog tables are scanned with SnapshotNow and so will work correctly
without that machinery. We already rely on this within the existing code
to update flat files towards the end of recovery.
It is true that you can't look at user data, but then I can already do
that with Hot Standby, so the plugin isn't needed for that.
--
Simon Riggs www.2ndQuadrant.com
PostgreSQL Training, Services and Support
Simon,
Your suggestion sounds reasonable and I thank you, but doesn't actually
address the plugin discussion at all. It had absolutely zip to do with
making building indexes easier; it was about enabling robust index
plugins, period. (As well as other worthwhile use cases). It's not a
cost benefit decision, its just "can we have it, or not?". The API *is*
the right one because we already use it with at least 3 actual
implementations. Will it change over time? Of course.
OK. Mostly I'm looking at the calendar, and didn't want to see this
rejected permanently just because people don't want to hash it out in
time for 8.4.
--Josh
Robert Haas <robertmhaas@gmail.com> writes:
Of course, there's no much point in an index that's easily corrupted, so
I understand the desire to implement WAL too -- I'm just pointing out
that concurrency could have been developed independently.
Anything's possible with enough work, but having good support in -core
makes it easier and -core has usually been receptive to requests for
such things - for example, I think Tom put in quite a bit of work to
getting the right hooks in to enable libpqtypes.
Well, in fact, that's an exceedingly apt and instructive comparison.
The hooks that went into libpq resulted from several iterations of
design against a real, live, working application for those hooks.
The proposed rmgr patch is apparently suffering from no such handicap
as having been proven to satisfy the needs of real code :-(
There are other recent examples of proposed hooks that in fact
failed to be useful because of some oversight or other, and it was
not until we insisted on seeing a live use of the hooks that this
became apparent. (IIRC, one or both of the planner-related hooks
that are new in 8.4 had such issues.)
I generally agree that pluggable rmgr support would be a good idea,
but I would much rather put off making the hooks until we have a live
application for them to prove that they are useful and usable. If
we make a hook now sans test case, then what happens if we discover
later that it's not quite right? We'd have to figure out whether there's
a need for backwards-compatible behavior, and we will have a hard time
knowing whether there are any live uses of the hook in the field.
So my take on this is to wait. If it were actually needed by the hot
standby code then of course the above argument would be wrong, but
what I gather from the discussion is that it's not.
regards, tom lane
Needless
to say if you're both waiting for each other nothing will get done.
SET deadlock_timeout = '3d';
;-)
On Thu, 2009-01-22 at 18:45 -0500, Tom Lane wrote:
There are other recent examples of proposed hooks that in fact
failed to be useful because of some oversight or other, and it was
not until we insisted on seeing a live use of the hooks that this
became apparent. (IIRC, one or both of the planner-related hooks
that are new in 8.4 had such issues.)
Thank you for your support of the plugin concept.
You make good points and are completely correct about the earlier
plugin. The additional plugin capability was filling a gap that had been
left when the planner plugin was added in 8.3. A similar thing happened
with executor plugins IIRC. So I agree, new and complex plugin APIs need
a working example otherwise they'll be wrong.
In the current case, index APIs are already well known, so that API is
unlikely to be a problem. The actual "rmgr plugin" API is very simple,
since its intention is only to add or edit entries onto the internal
RmgrTable (in memory) after which everything is well defined already.
This is probably the simplest API that has been added in recent times.
I'm happy to make the WAL filter plugin work correctly in all cases. It
was intended as a demonstration only, but if that is a problem it is
easily fixed. One of my clients has requested filtering capability
alongside hot standby, so I will deliver it, even if that is rejected
for reasons outside of my hands (such as timing).
--
Simon Riggs www.2ndQuadrant.com
PostgreSQL Training, Services and Support
Hmm, IIRC it is based on a monotonically increasing number. It could
have been anything. LSN was just a monotonically increasing number that
would be available if WAL was implemented first (or in parallel).
You are right, but without WAL-logging we would need to implement some kind of
sequence :)
--
Teodor Sigaev E-mail: teodor@sigaev.ru
WWW: http://www.sigaev.ru/
Simon Riggs <simon@2ndQuadrant.com> writes:
On Thu, 2009-01-22 at 18:45 -0500, Tom Lane wrote:
There are other recent examples of proposed hooks that in fact
failed to be useful because of some oversight or other, and it was
not until we insisted on seeing a live use of the hooks that this
became apparent.
In the current case, index APIs are already well known, so that API is
unlikely to be a problem. The actual "rmgr plugin" API is very simple,
since its intention is only to add or edit entries onto the internal
RmgrTable (in memory) after which everything is well defined already.
Right, the WAL-record-processing API is not really at issue, since it's
been proven internally to the core code. My concern is with the other
part, namely exactly how are we going to identify and install additional
rmgrs. There was substantial debate about that when it first came up,
so you're not likely to convince me that it's such an open-and-shut case
as to not need supporting evidence.
regards, tom lane
On Fri, 2009-01-23 at 10:33 -0500, Tom Lane wrote:
Simon Riggs <simon@2ndQuadrant.com> writes:
On Thu, 2009-01-22 at 18:45 -0500, Tom Lane wrote:
There are other recent examples of proposed hooks that in fact
failed to be useful because of some oversight or other, and it was
not until we insisted on seeing a live use of the hooks that this
became apparent.In the current case, index APIs are already well known, so that API is
unlikely to be a problem. The actual "rmgr plugin" API is very simple,
since its intention is only to add or edit entries onto the internal
RmgrTable (in memory) after which everything is well defined already.Right, the WAL-record-processing API is not really at issue, since it's
been proven internally to the core code. My concern is with the other
part, namely exactly how are we going to identify and install additional
rmgrs. There was substantial debate about that when it first came up,
so you're not likely to convince me that it's such an open-and-shut case
as to not need supporting evidence.
I hear your objection and will answer it, for the record at least.
We can load arbitrary code into any normal backend. I just want to be
able to do the same with the startup process. It can't be much of a
discussion since the API is essentially just the same as _PG_init(), or
shmem_startup_hook.
We took the risk with planner hook, and missed something. We took the
risk with RequestAddinShmemSpace() and missed something. There wasn't
any backlash or problem as a result though and we haven't even
backpatched the additional hooks. They were inspired additions. Why is
such a simple hook in Startup such a big deal? What would be wrong in
fixing any problem in the next release, just as we've done in the other
examples?
If we didn't already have chapters in the manual on index extensibility
I would have to agree. We could regard this patch as fixing an oversight
in index extensibility, presumably when WAL was created.
The patch is just
* a hook in StartupXLOG to allow loading arbitrary code into Startup
* some slight redefinition of RmgrTable to allow arbitrary code to add
or modify the contents of that table of functions. (Being able to modify
the table is an not necessary for index extensions, but is for other
uses).
* some safeguards people requested
Buggy code in shmem_startup_hook could do just as much damage at startup
or in a crash situation, but we have no safeguards there and nobody has
said a single word against that.
--
Simon Riggs www.2ndQuadrant.com
PostgreSQL Training, Services and Support
Simon Riggs <simon@2ndQuadrant.com> writes:
On Fri, 2009-01-23 at 10:33 -0500, Tom Lane wrote:
Right, the WAL-record-processing API is not really at issue, since it's
been proven internally to the core code. My concern is with the other
part, namely exactly how are we going to identify and install additional
rmgrs.
The patch is just
* a hook in StartupXLOG to allow loading arbitrary code into Startup
* some slight redefinition of RmgrTable to allow arbitrary code to add
or modify the contents of that table of functions. (Being able to modify
the table is an not necessary for index extensions, but is for other
uses).
* some safeguards people requested
Well, that really seems to just prove my point. You've defined a hook
and not thought carefully about how people will use it. The main thing
that I can see right now that we'd need is some way to determine who
gets which rmgr index. (Maybe community assignment of numbers ---
similar to what we've defined for pg_statistic kind codes --- is fine,
or maybe it isn't; in any case we need an answer for that before this
hook can be considered usable.) Furthermore, maybe that's not the only
problem. I'd feel a lot better about this if the hook patch were done
in parallel with development of actual WAL support in an actual external
indexam. As was suggested earlier, we could do something like building
hash as an external module for the sake of this development, so it's not
like I'm demanding someone write a whole AM from scratch for this. But
putting in the hook and leaving people to invent their own ways of using
it is a recipe for conflicts.
regards, tom lane
On Fri, 2009-01-23 at 16:49 -0500, Tom Lane wrote:
Simon Riggs <simon@2ndQuadrant.com> writes:
On Fri, 2009-01-23 at 10:33 -0500, Tom Lane wrote:
Right, the WAL-record-processing API is not really at issue, since it's
been proven internally to the core code. My concern is with the other
part, namely exactly how are we going to identify and install additional
rmgrs.The patch is just
* a hook in StartupXLOG to allow loading arbitrary code into Startup
* some slight redefinition of RmgrTable to allow arbitrary code to add
or modify the contents of that table of functions. (Being able to modify
the table is an not necessary for index extensions, but is for other
uses).
* some safeguards people requestedWell, that really seems to just prove my point. You've defined a hook
and not thought carefully about how people will use it.
This was originally proposed on 19 August and a patch submitted to the
September commit fest.
http://archives.postgresql.org/pgsql-hackers/2008-08/msg00794.php
After about 30 emails of technical rebuttal we have a list of possible
uses that can't be done sensibly any other way.
* WAL filtering
* Recovery when we have buggy index AMs, yet without losing data
* Pluggable indexes
* Extracting user data from WAL records (very challenging though)
Those uses require the ability to both add to *and* modify all of the
RmgrTable entries. If this was just for pluggable indexes then the API
probably would look a little different, but it's not. The simplicity of
the hook proposal says nothing about the careful thought behind it, it
just relates to the wide variety of beneficial uses.
At any point there we might have hit serious problems with the patch,
but we didn't. I've done my best to cover the objections raised with
code or suggested control mechanisms, so I'm not expecting anyone to
agree with my first musings.
The main thing
that I can see right now that we'd need is some way to determine who
gets which rmgr index. (Maybe community assignment of numbers ---
similar to what we've defined for pg_statistic kind codes --- is fine,
http://archives.postgresql.org/pgsql-hackers/2008-08/msg00916.php
or maybe it isn't; in any case we need an answer for that before this
hook can be considered usable.) Furthermore, maybe that's not the only
problem. I'd feel a lot better about this if the hook patch were done
in parallel with development of actual WAL support in an actual external
...
I agree we need an external module and I learned that lesson from the
earier API proposal you mentioned. The supplied WAL filter plugin was/is
a valid use for this and, as discussed, is the only practical way of
doing WAL filtering. As I said, am happy to make a few mods to make that
more acceptable.
I've deferred on this patch sometimes because of my other work, but also
because I sensed there might be some feeling that people thought this
was a threat to the project from some commercial usurpation (e.g. like
InnoDB). I asked to be contacted off-list if that was the case but
nobody has, so I have assumed this to be a decision based on technical
merit alone. After considering all that has been said I feel this idea
has merit. Yes, we need more and better plugins and this patch is the
seed for those.
--
Simon Riggs www.2ndQuadrant.com
PostgreSQL Training, Services and Support
On Sat, 2009-01-24 at 09:57 +0000, Simon Riggs wrote:
I agree we need an external module and I learned that lesson from the
earier API proposal you mentioned. The supplied WAL filter plugin was/is
a valid use for this and, as discussed, is the only practical way of
doing WAL filtering. As I said, am happy to make a few mods to make that
more acceptable.
I can change the contrib plugin to show how to exclude DROP DATABASE and
DROP TABLESPACE records, which is a common recovery scenario.
I'll produce the table filter plugin and release it to pgfoundry. We
currently have everything we need to make that work, AFAICS.
--
Simon Riggs www.2ndQuadrant.com
PostgreSQL Training, Services and Support
On Sat, 2009-01-24 at 13:51 +0000, Simon Riggs wrote:
On Sat, 2009-01-24 at 09:57 +0000, Simon Riggs wrote:
I agree we need an external module and I learned that lesson from the
earier API proposal you mentioned. The supplied WAL filter plugin was/is
a valid use for this and, as discussed, is the only practical way of
doing WAL filtering. As I said, am happy to make a few mods to make that
more acceptable.I can change the contrib plugin to show how to exclude DROP DATABASE and
DROP TABLESPACE records, which is a common recovery scenario.I'll produce the table filter plugin and release it to pgfoundry. We
currently have everything we need to make that work, AFAICS.
On reflection, I'm not going to do those things.
--
Simon Riggs www.2ndQuadrant.com
PostgreSQL Training, Services and Support