diff --git a/src/backend/replication/logical/reorderbuffer.c b/src/backend/replication/logical/reorderbuffer.c index 77375d9..ecd68bc 100644 --- a/src/backend/replication/logical/reorderbuffer.c +++ b/src/backend/replication/logical/reorderbuffer.c @@ -135,6 +135,9 @@ typedef struct ReorderBufferDiskChange /* data follows */ } ReorderBufferDiskChange; +/* 10k tuples seems like a reasonable value (~80MB with MaxHeapTupleSize) */ +#define TUPLES_PER_GENERATION 10000 + /* * Maximum number of changes kept in memory, per transaction. After that, * changes are spooled to disk. @@ -156,9 +159,6 @@ static const Size max_changes_in_memory = 4096; * major bottleneck, especially when spilling to disk while decoding batch * workloads. */ -static const Size max_cached_changes = 4096 * 2; -static const Size max_cached_tuplebufs = 4096 * 2; /* ~8MB */ -static const Size max_cached_transactions = 512; /* --------------------------------------- @@ -243,6 +243,28 @@ ReorderBufferAllocate(void) buffer->context = new_ctx; + buffer->change_context = SlabContextCreate(new_ctx, + "ReorderBufferChange", + SLAB_DEFAULT_BLOCK_SIZE, + sizeof(ReorderBufferChange)); + + buffer->txn_context = SlabContextCreate(new_ctx, + "ReorderBufferChangeTXN", + SLAB_DEFAULT_BLOCK_SIZE, + sizeof(ReorderBufferTXN)); + + buffer->tup_context = SlabContextCreate(new_ctx, + "ReorderBufferChangeTup", + SLAB_LARGE_BLOCK_SIZE, + sizeof(ReorderBufferTupleBuf) + + MAXIMUM_ALIGNOF + MaxHeapTupleSize); + + buffer->tup_context2 = AllocSetContextCreate(new_ctx, + "ReorderBufferChangeTup2", + ALLOCSET_DEFAULT_MINSIZE, + ALLOCSET_DEFAULT_INITSIZE, + ALLOCSET_DEFAULT_MAXSIZE); + hash_ctl.keysize = sizeof(TransactionId); hash_ctl.entrysize = sizeof(ReorderBufferTXNByIdEnt); hash_ctl.hcxt = buffer->context; @@ -262,11 +284,17 @@ ReorderBufferAllocate(void) buffer->current_restart_decoding_lsn = InvalidXLogRecPtr; + buffer->tuples_count = 0; + buffer->tuples_size = 0; + dlist_init(&buffer->toplevel_by_lsn); dlist_init(&buffer->cached_transactions); dlist_init(&buffer->cached_changes); slist_init(&buffer->cached_tuplebufs); + buffer->tup_context_history = NIL; + buffer->current_size = MaxHeapTupleSize; + return buffer; } @@ -293,19 +321,8 @@ ReorderBufferGetTXN(ReorderBuffer *rb) { ReorderBufferTXN *txn; - /* check the slab cache */ - if (rb->nr_cached_transactions > 0) - { - rb->nr_cached_transactions--; - txn = (ReorderBufferTXN *) - dlist_container(ReorderBufferTXN, node, - dlist_pop_head_node(&rb->cached_transactions)); - } - else - { - txn = (ReorderBufferTXN *) - MemoryContextAlloc(rb->context, sizeof(ReorderBufferTXN)); - } + txn = (ReorderBufferTXN *) + MemoryContextAlloc(rb->txn_context, sizeof(ReorderBufferTXN)); memset(txn, 0, sizeof(ReorderBufferTXN)); @@ -346,18 +363,7 @@ ReorderBufferReturnTXN(ReorderBuffer *rb, ReorderBufferTXN *txn) txn->invalidations = NULL; } - /* check whether to put into the slab cache */ - if (rb->nr_cached_transactions < max_cached_transactions) - { - rb->nr_cached_transactions++; - dlist_push_head(&rb->cached_transactions, &txn->node); - VALGRIND_MAKE_MEM_UNDEFINED(txn, sizeof(ReorderBufferTXN)); - VALGRIND_MAKE_MEM_DEFINED(&txn->node, sizeof(txn->node)); - } - else - { - pfree(txn); - } + pfree(txn); } /* @@ -368,19 +374,8 @@ ReorderBufferGetChange(ReorderBuffer *rb) { ReorderBufferChange *change; - /* check the slab cache */ - if (rb->nr_cached_changes) - { - rb->nr_cached_changes--; - change = (ReorderBufferChange *) - dlist_container(ReorderBufferChange, node, - dlist_pop_head_node(&rb->cached_changes)); - } - else - { - change = (ReorderBufferChange *) - MemoryContextAlloc(rb->context, sizeof(ReorderBufferChange)); - } + change = (ReorderBufferChange *) + MemoryContextAlloc(rb->change_context, sizeof(ReorderBufferChange)); memset(change, 0, sizeof(ReorderBufferChange)); return change; @@ -436,18 +431,7 @@ ReorderBufferReturnChange(ReorderBuffer *rb, ReorderBufferChange *change) break; } - /* check whether to put into the slab cache */ - if (rb->nr_cached_changes < max_cached_changes) - { - rb->nr_cached_changes++; - dlist_push_head(&rb->cached_changes, &change->node); - VALGRIND_MAKE_MEM_UNDEFINED(change, sizeof(ReorderBufferChange)); - VALGRIND_MAKE_MEM_DEFINED(&change->node, sizeof(change->node)); - } - else - { - pfree(change); - } + pfree(change); } @@ -463,37 +447,52 @@ ReorderBufferGetTupleBuf(ReorderBuffer *rb, Size tuple_len) alloc_len = tuple_len + SizeofHeapTupleHeader; - /* - * Most tuples are below MaxHeapTupleSize, so we use a slab allocator for - * those. Thus always allocate at least MaxHeapTupleSize. Note that tuples - * generated for oldtuples can be bigger, as they don't have out-of-line - * toast columns. - */ - if (alloc_len < MaxHeapTupleSize) - alloc_len = MaxHeapTupleSize; + /* see if we need to allocate a new context generation */ + if (rb->tuples_count == TUPLES_PER_GENERATION) + { + Size new_size; + Size avg_length = (rb->tuples_size / rb->tuples_count); + rb->tup_context_history + = lappend(rb->tup_context_history, rb->tup_context); + + /* assume +50% is enough slack to fit most tuples into the slab context */ + new_size = MAXALIGN(avg_length * 1.5); + + elog(WARNING, "slab size update old=%ld new=%ld", + rb->current_size, new_size); + + rb->current_size = new_size; + rb->tup_context = SlabContextCreate(rb->context, + "ReorderBufferChangeTup", + SLAB_LARGE_BLOCK_SIZE, + sizeof(ReorderBufferTupleBuf) + + MAXIMUM_ALIGNOF + rb->current_size); + + /* we could also recreate the aset context, with block sizes set so + * that the palloc always does malloc(), but not sure about that */ + + rb->tuples_count = 0; + rb->tuples_size = 0; + } + + rb->tuples_count += 1; + rb->tuples_size += alloc_len; /* if small enough, check the slab cache */ - if (alloc_len <= MaxHeapTupleSize && rb->nr_cached_tuplebufs) + if (alloc_len <= rb->current_size) { - rb->nr_cached_tuplebufs--; - tuple = slist_container(ReorderBufferTupleBuf, node, - slist_pop_head_node(&rb->cached_tuplebufs)); - Assert(tuple->alloc_tuple_size == MaxHeapTupleSize); -#ifdef USE_ASSERT_CHECKING - memset(&tuple->tuple, 0xa9, sizeof(HeapTupleData)); - VALGRIND_MAKE_MEM_UNDEFINED(&tuple->tuple, sizeof(HeapTupleData)); -#endif + tuple = (ReorderBufferTupleBuf *) + MemoryContextAlloc(rb->tup_context, + sizeof(ReorderBufferTupleBuf) + + MAXIMUM_ALIGNOF + rb->current_size); + tuple->alloc_tuple_size = rb->current_size; tuple->tuple.t_data = ReorderBufferTupleBufData(tuple); -#ifdef USE_ASSERT_CHECKING - memset(tuple->tuple.t_data, 0xa8, tuple->alloc_tuple_size); - VALGRIND_MAKE_MEM_UNDEFINED(tuple->tuple.t_data, tuple->alloc_tuple_size); -#endif } else { tuple = (ReorderBufferTupleBuf *) - MemoryContextAlloc(rb->context, + MemoryContextAlloc(rb->tup_context2, sizeof(ReorderBufferTupleBuf) + MAXIMUM_ALIGNOF + alloc_len); tuple->alloc_tuple_size = alloc_len; @@ -512,21 +511,7 @@ ReorderBufferGetTupleBuf(ReorderBuffer *rb, Size tuple_len) void ReorderBufferReturnTupleBuf(ReorderBuffer *rb, ReorderBufferTupleBuf *tuple) { - /* check whether to put into the slab cache, oversized tuples never are */ - if (tuple->alloc_tuple_size == MaxHeapTupleSize && - rb->nr_cached_tuplebufs < max_cached_tuplebufs) - { - rb->nr_cached_tuplebufs++; - slist_push_head(&rb->cached_tuplebufs, &tuple->node); - VALGRIND_MAKE_MEM_UNDEFINED(tuple->tuple.t_data, tuple->alloc_tuple_size); - VALGRIND_MAKE_MEM_UNDEFINED(tuple, sizeof(ReorderBufferTupleBuf)); - VALGRIND_MAKE_MEM_DEFINED(&tuple->node, sizeof(tuple->node)); - VALGRIND_MAKE_MEM_DEFINED(&tuple->alloc_tuple_size, sizeof(tuple->alloc_tuple_size)); - } - else - { - pfree(tuple); - } + pfree(tuple); } /* diff --git a/src/backend/utils/mmgr/Makefile b/src/backend/utils/mmgr/Makefile index b2403e1..321289f 100644 --- a/src/backend/utils/mmgr/Makefile +++ b/src/backend/utils/mmgr/Makefile @@ -12,6 +12,6 @@ subdir = src/backend/utils/mmgr top_builddir = ../../../.. include $(top_builddir)/src/Makefile.global -OBJS = aset.o mcxt.o portalmem.o +OBJS = aset.o mcxt.o portalmem.o slab.o include $(top_srcdir)/src/backend/common.mk diff --git a/src/backend/utils/mmgr/slab.c b/src/backend/utils/mmgr/slab.c new file mode 100644 index 0000000..6fe8afb --- /dev/null +++ b/src/backend/utils/mmgr/slab.c @@ -0,0 +1,712 @@ +/*------------------------------------------------------------------------- + * + * slab.c + * SLAB allocator definitions. + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "utils/memdebug.h" +#include "utils/memutils.h" + +/* + * slab is intended for allocating small equally-sized chunks, so 1kB seems + * like a good maximum (but mostly arbitrary otherwise) + */ +#define SLAB_CHUNK_MIN 16 +#define SLAB_CHUNK_MAX 1024 + + +#define SLAB_BLOCKHDRSZ MAXALIGN(sizeof(SlabBlockData)) +#define SLAB_CHUNKHDRSZ MAXALIGN(sizeof(SlabChunkData)) + +/* Portion of SLAB_CHUNKHDRSZ examined outside slab.c. */ +#define SLAB_CHUNK_PUBLIC \ + (offsetof(SlabChunkData, size) + sizeof(Size)) + +/* Portion of SLAB_CHUNKHDRSZ excluding trailing padding. */ +#ifdef MEMORY_CONTEXT_CHECKING +#define SLAB_CHUNK_USED \ + (offsetof(SlabChunkData, requested_size) + sizeof(Size)) +#else +#define SLAB_CHUNK_USED \ + (offsetof(SlabChunkData, size) + sizeof(Size)) +#endif + +typedef struct SlabBlockData *SlabBlock; /* forward reference */ +typedef struct SlabChunkData *SlabChunk; + +/* + * SlabPointer + * Aligned pointer which may be a member of an allocation set. + */ +typedef void *SlabPointer; + +/* + * SlabContext is our standard implementation of MemoryContext. + */ +typedef struct SlabContext +{ + MemoryContextData header; /* Standard memory-context fields */ + /* Allocation parameters for this context: */ + Size chunkSize; /* chunk size */ + Size fullChunkSize; /* chunk size including header and alignment */ + Size blockSize; /* block size */ + int chunksPerBlock; /* number of chunks per block */ + int minFreeCount; /* min number of free chunks in any block */ + /* Info about storage allocated in this context: */ + SlabBlock freelist[1]; /* free lists (block-level) */ +} SlabContext; + +typedef SlabContext *Slab; + +typedef struct SlabBlockData +{ + Slab slab; /* slab that owns this block */ + SlabBlock prev; /* previous block in slab's block list */ + SlabBlock next; /* next block in slab's blocks list */ + int nfree; /* number of free chunks */ + char *bitmapptr; /* pointer to free bitmap */ +} SlabBlockData; + +/* + * SlabChunk + * The prefix of each piece of memory in an SlabBlock + * + * NB: this MUST match StandardChunkHeader as defined by utils/memutils.h. + */ +typedef struct SlabChunkData +{ + void *block; + /* aset is the owning aset if allocated, or the freelist link if free */ + void *aset; + /* size is always the size of the usable space in the chunk */ + Size size; +#ifdef MEMORY_CONTEXT_CHECKING + /* when debugging memory usage, also store actual requested size */ + /* this is zero in a free chunk */ + Size requested_size; +#endif +} SlabChunkData; + +/* + * SlabPointerIsValid + * True iff pointer is valid allocation pointer. + */ +#define SlabPointerIsValid(pointer) PointerIsValid(pointer) + +/* + * SlabIsValid + * True iff set is valid allocation set. + */ +#define SlabIsValid(set) PointerIsValid(set) + +#define SlabPointerGetChunk(ptr) \ + ((SlabChunk)(((char *)(ptr)) - SLAB_CHUNKHDRSZ)) +#define SlabChunkGetPointer(chk) \ + ((SlabPointer)(((char *)(chk)) + SLAB_CHUNKHDRSZ)) + +/* + * These functions implement the MemoryContext API for Slab contexts. + */ +static void *SlabAlloc(MemoryContext context, Size size); +static void SlabFree(MemoryContext context, void *pointer); +static void *SlabRealloc(MemoryContext context, void *pointer, Size size); +static void SlabInit(MemoryContext context); +static void SlabReset(MemoryContext context); +static void SlabDelete(MemoryContext context); +static Size SlabGetChunkSpace(MemoryContext context, void *pointer); +static bool SlabIsEmpty(MemoryContext context); +static void SlabStats(MemoryContext context, int level, bool print, + MemoryContextCounters *totals); + +#ifdef MEMORY_CONTEXT_CHECKING +static void SlabCheck(MemoryContext context); +#endif + +/* + * This is the virtual function table for Slab contexts. + */ +static MemoryContextMethods SlabMethods = { + SlabAlloc, + SlabFree, + SlabRealloc, + SlabInit, + SlabReset, + SlabDelete, + SlabGetChunkSpace, + SlabIsEmpty, + SlabStats +#ifdef MEMORY_CONTEXT_CHECKING + ,SlabCheck +#endif +}; + +/* ---------- + * Debug macros + * ---------- + */ +#ifdef HAVE_ALLOCINFO +#define SlabFreeInfo(_cxt, _chunk) \ + fprintf(stderr, "AllocFree: %s: %p, %d\n", \ + (_cxt)->header.name, (_chunk), (_chunk)->size) +#define SlabAllocInfo(_cxt, _chunk) \ + fprintf(stderr, "AllocAlloc: %s: %p, %d\n", \ + (_cxt)->header.name, (_chunk), (_chunk)->size) +#else +#define SlabFreeInfo(_cxt, _chunk) +#define SlabAllocInfo(_cxt, _chunk) +#endif + + +#ifdef CLOBBER_FREED_MEMORY + +/* Wipe freed memory for debugging purposes */ +static void +wipe_mem(void *ptr, size_t size) +{ + VALGRIND_MAKE_MEM_UNDEFINED(ptr, size); + memset(ptr, 0x7F, size); + VALGRIND_MAKE_MEM_NOACCESS(ptr, size); +} +#endif + +#ifdef MEMORY_CONTEXT_CHECKING +static void +set_sentinel(void *base, Size offset) +{ + char *ptr = (char *) base + offset; + + VALGRIND_MAKE_MEM_UNDEFINED(ptr, 1); + *ptr = 0x7E; + VALGRIND_MAKE_MEM_NOACCESS(ptr, 1); +} + +static bool +sentinel_ok(const void *base, Size offset) +{ + const char *ptr = (const char *) base + offset; + bool ret; + + VALGRIND_MAKE_MEM_DEFINED(ptr, 1); + ret = *ptr == 0x7E; + VALGRIND_MAKE_MEM_NOACCESS(ptr, 1); + + return ret; +} +#endif + +#ifdef RANDOMIZE_ALLOCATED_MEMORY + +/* + * Fill a just-allocated piece of memory with "random" data. It's not really + * very random, just a repeating sequence with a length that's prime. What + * we mainly want out of it is to have a good probability that two palloc's + * of the same number of bytes start out containing different data. + * + * The region may be NOACCESS, so make it UNDEFINED first to avoid errors as + * we fill it. Filling the region makes it DEFINED, so make it UNDEFINED + * again afterward. Whether to finally make it UNDEFINED or NOACCESS is + * fairly arbitrary. UNDEFINED is more convenient for SlabRealloc(), and + * other callers have no preference. + */ +static void +randomize_mem(char *ptr, size_t size) +{ + static int save_ctr = 1; + size_t remaining = size; + int ctr; + + ctr = save_ctr; + VALGRIND_MAKE_MEM_UNDEFINED(ptr, size); + while (remaining-- > 0) + { + *ptr++ = ctr; + if (++ctr > 251) + ctr = 1; + } + VALGRIND_MAKE_MEM_UNDEFINED(ptr - size, size); + save_ctr = ctr; +} +#endif /* RANDOMIZE_ALLOCATED_MEMORY */ + + +/* + * Public routines + */ + + +/* + * SlabContextCreate + * Create a new Slab context. + * + * parent: parent context, or NULL if top-level context + * name: name of context (for debugging --- string will be copied) + * minContextSize: minimum context size + * initBlockSize: initial allocation block size + * maxBlockSize: maximum allocation block size + */ +MemoryContext +SlabContextCreate(MemoryContext parent, + const char *name, + Size blockSize, + Size chunkSize) +{ + int i, chunksPerBlock; + Size fullChunkSize; + Slab set; + + /* we want both addresses nicely aligned */ + fullChunkSize = MAXALIGN(sizeof(SlabChunkData) + MAXALIGN(chunkSize)); + + /* so how many chunks can we fit into a block, including header and bitmap */ + chunksPerBlock = 0; + for (i = 0; i <= blockSize / chunkSize; i++) + { + Size bitmapsize = MAXALIGN((i + 7) / 8); + + /* repeat until we exceed the block */ + if (((i * fullChunkSize) + sizeof(SlabBlockData) + bitmapsize) > blockSize) + break; + + chunksPerBlock = i; + } + + /* Do the type-independent part of context creation */ + set = (Slab) MemoryContextCreate(T_SlabContext, + (sizeof(SlabContext) + sizeof(SlabChunk) * chunksPerBlock), + &SlabMethods, + parent, + name); + + set->blockSize = blockSize; + set->chunkSize = chunkSize; + set->fullChunkSize = fullChunkSize; + set->chunksPerBlock = chunksPerBlock; + + return (MemoryContext) set; +} + +/* + * SlabInit + * Context-type-specific initialization routine. + * + * This is called by MemoryContextCreate() after setting up the + * generic MemoryContext fields and before linking the new context + * into the context tree. We must do whatever is needed to make the + * new context minimally valid for deletion. We must *not* risk + * failure --- thus, for example, allocating more memory is not cool. + * (SlabContextCreate can allocate memory when it gets control + * back, however.) + */ +static void +SlabInit(MemoryContext context) +{ + /* + * Since MemoryContextCreate already zeroed the context node, we don't + * have to do anything here: it's already OK. + */ +} + +/* + * SlabReset + * Frees all memory which is allocated in the given set. + * + * Actually, this routine has some discretion about what to do. + * It should mark all allocated chunks freed, but it need not necessarily + * give back all the resources the set owns. Our actual implementation is + * that we hang onto any "keeper" block specified for the set. In this way, + * we don't thrash malloc() when a context is repeatedly reset after small + * allocations, which is typical behavior for per-tuple contexts. + */ +static void +SlabReset(MemoryContext context) +{ + int i; + Slab set = (Slab) context; + + AssertArg(SlabIsValid(set)); + +#ifdef MEMORY_CONTEXT_CHECKING + /* Check for corruption and leaks before freeing */ + SlabCheck(context); +#endif + + /* walk over freelists and free the blocks */ + for (i = 0; i <= set->chunksPerBlock; i++) + { + SlabBlock block = set->freelist[i]; + set->freelist[i] = NULL; + + while (block != NULL) + { + SlabBlock next = block->next; + + /* Normal case, release the block */ +#ifdef CLOBBER_FREED_MEMORY + wipe_mem(block, set->blockSize); +#endif + free(block); + + block = next; + } + } +} + +/* + * SlabDelete + * Frees all memory which is allocated in the given set, + * in preparation for deletion of the set. + * + * Unlike SlabReset, this *must* free all resources of the set. + * But note we are not responsible for deleting the context node itself. + */ +static void +SlabDelete(MemoryContext context) +{ + /* just reset the context */ + SlabReset(context); +} + +static void +remove_from_freelist(Slab set, SlabBlock block, int nfree_old) +{ + /* either it has a previous block, or it's the first block in list */ + if (block->prev) + block->prev->next = block->next; + else + set->freelist[nfree_old] = block->next; + + /* if it has a next block, update it too */ + if (block->next) + block->next->prev = block->prev; + + block->prev = NULL; + block->next = NULL; +} + +static void +add_to_freelist(Slab set, SlabBlock block) +{ + /* otherwise add it to the proper freelist bin */ + if (set->freelist[block->nfree]) + set->freelist[block->nfree]->prev = block; + + block->next = set->freelist[block->nfree]; + set->freelist[block->nfree] = block; +} + + +static void +move_in_freelist(Slab set, SlabBlock block, int nfree_old) +{ + remove_from_freelist(set, block, nfree_old); + add_to_freelist(set, block); +} + +/* + * SlabAlloc + * Returns pointer to allocated memory of given size or NULL if + * request could not be completed; memory is added to the set. + * + * No request may exceed: + * MAXALIGN_DOWN(SIZE_MAX) - SLAB_BLOCKHDRSZ - SLAB_CHUNKHDRSZ + * All callers use a much-lower limit. + */ +static void * +SlabAlloc(MemoryContext context, Size size) +{ + Slab set = (Slab) context; + SlabBlock block; + SlabChunk chunk; + int idx; + + AssertArg(SlabIsValid(set)); + + /* TODO check that the chunk size is the correct one */ + Assert(size == set->chunkSize); + + /* if there are no free chunks in any block, create a new block and put if + * to the last bucket */ + if (set->minFreeCount == 0) + { + block = (SlabBlock)malloc(set->blockSize); + + if (block == NULL) + return NULL; + + memset(block, 0, set->blockSize); + + block->slab = set; + block->nfree = set->chunksPerBlock; + block->prev = NULL; + block->next = NULL; + + block->bitmapptr + = ((char *) block) + set->blockSize - (set->chunksPerBlock + 7) / 8; + + /* + * And add it to the last freelist with all chunks empty (we know + * there are no blocks in the freelist, otherwise we wouldn't need + * a new block. + */ + set->freelist[set->chunksPerBlock] = block; + set->minFreeCount = set->chunksPerBlock; + } + + /* now grab the block */ + block = set->freelist[set->minFreeCount]; + + /* make sure the values match */ + Assert(set->minFreeCount == block->nfree); + + /* find an empty chunk in the block */ + for (idx = 0; idx < set->chunksPerBlock; idx++) + /* stop when you find 0 (unused) */ + if (! (block->bitmapptr[idx/8] & (0x01 << (idx % 8)))) + break; + + /* mark the chunk as used (set 1 to the bit) */ + block->bitmapptr[idx/8] |= (0x01 << (idx % 8)); + + /* compute the location wrt block start (after the block header) */ + chunk = (SlabChunk) ((char*)block + sizeof(SlabBlockData) + + (idx * set->fullChunkSize)); + + /* + * update the block nfree count, and also the minFreeCount as we've + * decreased nfree for a block with the minimum count + */ + block->nfree--; + set->minFreeCount = block->nfree; + + /* move the block to the right place in the freelist */ + move_in_freelist(set, block, block->nfree + 1); + + /* but if the minimum is 0, we need to look for a new one */ + if (set->minFreeCount == 0) + for (idx = 1; idx <= set->chunksPerBlock; idx++) + if (set->freelist[idx]) + { + set->minFreeCount = idx; + break; + } + + /* Prepare to initialize the chunk header. */ + VALGRIND_MAKE_MEM_UNDEFINED(chunk, SLAB_CHUNK_USED); + + chunk->aset = (void *) set; + chunk->block = (void *) block; + chunk->size = MAXALIGN(size); + +#ifdef MEMORY_CONTEXT_CHECKING + chunk->requested_size = size; + VALGRIND_MAKE_MEM_NOACCESS(&chunk->requested_size, + sizeof(chunk->requested_size)); + /* set mark to catch clobber of "unused" space */ + if (size < chunk->size) + set_sentinel(SlabChunkGetPointer(chunk), size); +#endif +#ifdef RANDOMIZE_ALLOCATED_MEMORY + /* fill the allocated space with junk */ + randomize_mem((char *) SlabChunkGetPointer(chunk), size); +#endif + + SlabAllocInfo(set, chunk); + return SlabChunkGetPointer(chunk); +} + +/* + * SlabFree + * Frees allocated memory; memory is removed from the set. + */ +static void +SlabFree(MemoryContext context, void *pointer) +{ + int idx; + Slab set = (Slab) context; + SlabChunk chunk = SlabPointerGetChunk(pointer); + SlabBlock block = chunk->block; + + SlabFreeInfo(set, chunk); + +#ifdef MEMORY_CONTEXT_CHECKING + VALGRIND_MAKE_MEM_DEFINED(&chunk->requested_size, + sizeof(chunk->requested_size)); + /* Test for someone scribbling on unused space in chunk */ + if (chunk->requested_size < chunk->size) + if (!sentinel_ok(pointer, chunk->requested_size)) + elog(WARNING, "detected write past chunk end in %s %p", + set->header.name, chunk); +#endif + + /* compute index wrt to block start */ + idx = ((char*)chunk - ((char*)block + sizeof(SlabBlockData))) / set->fullChunkSize; + + /* mark the chunk as unused (set 0 to the bit), and update block nfree count */ + block->bitmapptr[idx/8] ^= (0x01 << (idx % 8)); + block->nfree++; + +#ifdef CLOBBER_FREED_MEMORY + wipe_mem(pointer, chunk->size); +#endif + +#ifdef MEMORY_CONTEXT_CHECKING + /* Reset requested_size to 0 in chunks that are on freelist */ + chunk->requested_size = 0; +#endif + + /* now decide what to do with the block */ + + /* + * See if we need to update the minFreeCount field - we only need to do + * that if the block had that number of free chunks before we freed one. + * In that case, check if there still are blocks with that number of free + * chunks - we can simply check if there's prev or next pointer. + */ + if (set->minFreeCount == (block->nfree-1)) + if ((block->prev == NULL) && (block->next == NULL)) /* no other blocks */ + set->minFreeCount++; + + /* remove the block from a freelist */ + remove_from_freelist(set, block, block->nfree-1); + + /* + * If the block is now completely empty, free it (but only if there's + * another entirely empty block, and there are no other blocks with + * free chunks). + */ + if ((block->nfree == set->chunksPerBlock) && + (set->minFreeCount == set->chunksPerBlock) && + (set->freelist[set->chunksPerBlock] != NULL)) + { + free(block); + return; + } + + add_to_freelist(set, block); +} + +/* + * SlabRealloc + * Returns new pointer to allocated memory of given size or NULL if + * request could not be completed; this memory is added to the set. + * Memory associated with given pointer is copied into the new memory, + * and the old memory is freed. + * + * Without MEMORY_CONTEXT_CHECKING, we don't know the old request size. This + * makes our Valgrind client requests less-precise, hazarding false negatives. + * (In principle, we could use VALGRIND_GET_VBITS() to rediscover the old + * request size.) + */ +static void * +SlabRealloc(MemoryContext context, void *pointer, Size size) +{ + /* can't do realloc with slab */ + return NULL; +} + +/* + * SlabGetChunkSpace + * Given a currently-allocated chunk, determine the total space + * it occupies (including all memory-allocation overhead). + */ +static Size +SlabGetChunkSpace(MemoryContext context, void *pointer) +{ + SlabChunk chunk = SlabPointerGetChunk(pointer); + + return chunk->size + SLAB_CHUNKHDRSZ; +} + +/* + * SlabIsEmpty + * Is an Slab empty of any allocated space? + */ +static bool +SlabIsEmpty(MemoryContext context) +{ + /* + * For now, we say "empty" only if the context is new or just reset. We + * could examine the freelists to determine if all space has been freed, + * but it's not really worth the trouble for present uses of this + * functionality. + */ + if (context->isReset) + return true; + return false; +} + +/* + * SlabStats + * Compute stats about memory consumption of an Slab. + * + * level: recursion level (0 at top level); used for print indentation. + * print: true to print stats to stderr. + * totals: if not NULL, add stats about this Slab into *totals. + */ +static void +SlabStats(MemoryContext context, int level, bool print, + MemoryContextCounters *totals) +{ + Slab set = (Slab) context; + Size nblocks = 0; + Size freechunks = 0; + Size totalspace = 0; + Size freespace = 0; + int i; + + for (i = 0; i <= set->chunksPerBlock; i++) + { + SlabBlock block = set->freelist[i]; + while (block != NULL) + { + nblocks++; + totalspace += set->blockSize; + freespace += set->fullChunkSize * block->nfree; + freechunks += block->nfree; + block = block->next; + } + } + + if (print) + { + int i; + + for (i = 0; i < level; i++) + fprintf(stderr, " "); + fprintf(stderr, + "%s: %zu total in %zd blocks; %zu free (%zd chunks); %zu used\n", + set->header.name, totalspace, nblocks, freespace, freechunks, + totalspace - freespace); + } + + if (totals) + { + totals->nblocks += nblocks; + totals->freechunks += freechunks; + totals->totalspace += totalspace; + totals->freespace += freespace; + } +} + + +#ifdef MEMORY_CONTEXT_CHECKING + +/* + * SlabCheck + * Walk through chunks and check consistency of memory. + * + * NOTE: report errors as WARNING, *not* ERROR or FATAL. Otherwise you'll + * find yourself in an infinite loop when trouble occurs, because this + * routine will be entered again when elog cleanup tries to release memory! + */ +static void +SlabCheck(MemoryContext context) +{ + /* FIXME */ +} + +#endif /* MEMORY_CONTEXT_CHECKING */ diff --git a/src/include/nodes/memnodes.h b/src/include/nodes/memnodes.h index ba069cc..92a7478 100644 --- a/src/include/nodes/memnodes.h +++ b/src/include/nodes/memnodes.h @@ -96,6 +96,6 @@ typedef struct MemoryContextData */ #define MemoryContextIsValid(context) \ ((context) != NULL && \ - (IsA((context), AllocSetContext))) + (IsA((context), AllocSetContext) || IsA((context), SlabContext))) #endif /* MEMNODES_H */ diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h index 6b850e4..62005bb 100644 --- a/src/include/nodes/nodes.h +++ b/src/include/nodes/nodes.h @@ -274,6 +274,7 @@ typedef enum NodeTag */ T_MemoryContext = 600, T_AllocSetContext, + T_SlabContext, /* * TAGS FOR VALUE NODES (value.h) diff --git a/src/include/replication/reorderbuffer.h b/src/include/replication/reorderbuffer.h index 9e209ae..4959c74 100644 --- a/src/include/replication/reorderbuffer.h +++ b/src/include/replication/reorderbuffer.h @@ -331,6 +331,22 @@ struct ReorderBuffer MemoryContext context; /* + * slab contexts for change and TXN objects. + */ + MemoryContext change_context; + MemoryContext txn_context; + MemoryContext tup_context; + MemoryContext tup_context2; + + /* counters for current generation of tuples */ + int tuples_count; + Size tuples_size; + Size current_size; + + /* history of tuple slab contexts (generational design) */ + List *tup_context_history; + + /* * Data structure slab cache. * * We allocate/deallocate some structures very frequently, to avoid bigger diff --git a/src/include/utils/memutils.h b/src/include/utils/memutils.h index ae07705..3209f71 100644 --- a/src/include/utils/memutils.h +++ b/src/include/utils/memutils.h @@ -135,6 +135,12 @@ extern MemoryContext AllocSetContextCreate(MemoryContext parent, Size initBlockSize, Size maxBlockSize); +/* slab.c */ +extern MemoryContext SlabContextCreate(MemoryContext parent, + const char *name, + Size blockSize, + Size chunkSize); + /* * Recommended default alloc parameters, suitable for "ordinary" contexts * that might hold quite a lot of data. @@ -159,4 +165,7 @@ extern MemoryContext AllocSetContextCreate(MemoryContext parent, */ #define ALLOCSET_SEPARATE_THRESHOLD 8192 +#define SLAB_DEFAULT_BLOCK_SIZE 8192 +#define SLAB_LARGE_BLOCK_SIZE (8 * 1024 * 1024) + #endif /* MEMUTILS_H */