Make MemoryContextMemAllocated() more precise

Started by Jeff Davisalmost 6 years ago22 messages
#1Jeff Davis
pgsql@j-davis.com
1 attachment(s)

AllocSet allocates memory for itself in blocks, which double in size up
to maxBlockSize. So, the current block (the last one malloc'd) may
represent half of the total memory allocated for the context itself.

The free space at the end of that block hasn't been touched at all, and
doesn't represent fragmentation or overhead. That means that the
"allocated" memory can be 2X the memory ever touched in the worst case.

Although that's technically correct, the purpose of
MemoryContextMemAllocated() is to give a "real" usage number so we know
when we're out of work_mem and need to spill (in particular, the disk-
based HashAgg work, but ideally other operators as well). This "real"
number should include fragmentation, freed-and-not-reused chunks, and
other overhead. But it should not include significant amounts of
allocated-but-never-touched memory, which says more about economizing
calls to malloc than it does about the algorithm's memory usage.

Attached is a patch that makes mem_allocated a method (rather than a
field) of MemoryContext, and allows each memory context type to track
the memory its own way. They all do the same thing as before
(increment/decrement a field), but AllocSet also subtracts out the free
space in the current block. For Slab and Generation, we could do
something similar, but it's not as much of a problem because there's no
doubling of the allocation size.

Although I think this still matches the word "allocation" in spirit,
it's not technically correct, so feel free to suggest a new name for
MemoryContextMemAllocated().

Regards,
Jeff Davis

Attachments:

allocated.patchtext/x-patch; charset=UTF-8; name=allocated.patchDownload
diff --git a/src/backend/utils/mmgr/aset.c b/src/backend/utils/mmgr/aset.c
index c0623f106d2..ccf78ffe0cb 100644
--- a/src/backend/utils/mmgr/aset.c
+++ b/src/backend/utils/mmgr/aset.c
@@ -132,6 +132,7 @@ typedef struct AllocSetContext
 	Size		maxBlockSize;	/* maximum block size */
 	Size		nextBlockSize;	/* next block size to allocate */
 	Size		allocChunkLimit;	/* effective chunk size limit */
+	Size		memAllocated;	/* track memory allocated for this context */
 	AllocBlock	keeper;			/* keep this block over resets */
 	/* freelist this context could be put in, or -1 if not a candidate: */
 	int			freeListIndex;	/* index in context_freelists[], or -1 */
@@ -272,6 +273,7 @@ static void *AllocSetRealloc(MemoryContext context, void *pointer, Size size);
 static void AllocSetReset(MemoryContext context);
 static void AllocSetDelete(MemoryContext context);
 static Size AllocSetGetChunkSpace(MemoryContext context, void *pointer);
+static Size AllocSetMemAllocated(MemoryContext context);
 static bool AllocSetIsEmpty(MemoryContext context);
 static void AllocSetStats(MemoryContext context,
 						  MemoryStatsPrintFunc printfunc, void *passthru,
@@ -291,6 +293,7 @@ static const MemoryContextMethods AllocSetMethods = {
 	AllocSetReset,
 	AllocSetDelete,
 	AllocSetGetChunkSpace,
+	AllocSetMemAllocated,
 	AllocSetIsEmpty,
 	AllocSetStats
 #ifdef MEMORY_CONTEXT_CHECKING
@@ -464,8 +467,7 @@ AllocSetContextCreateInternal(MemoryContext parent,
 								parent,
 								name);
 
-			((MemoryContext) set)->mem_allocated =
-				set->keeper->endptr - ((char *) set);
+			set->memAllocated = set->keeper->endptr - ((char *) set);
 
 			return (MemoryContext) set;
 		}
@@ -555,7 +557,7 @@ AllocSetContextCreateInternal(MemoryContext parent,
 						parent,
 						name);
 
-	((MemoryContext) set)->mem_allocated = firstBlockSize;
+	set->memAllocated = firstBlockSize;
 
 	return (MemoryContext) set;
 }
@@ -617,7 +619,7 @@ AllocSetReset(MemoryContext context)
 		else
 		{
 			/* Normal case, release the block */
-			context->mem_allocated -= block->endptr - ((char*) block);
+			set->memAllocated -= block->endptr - ((char*) block);
 
 #ifdef CLOBBER_FREED_MEMORY
 			wipe_mem(block, block->freeptr - ((char *) block));
@@ -627,7 +629,7 @@ AllocSetReset(MemoryContext context)
 		block = next;
 	}
 
-	Assert(context->mem_allocated == keepersize);
+	Assert(set->memAllocated == keepersize);
 
 	/* Reset block size allocation sequence, too */
 	set->nextBlockSize = set->initBlockSize;
@@ -703,7 +705,7 @@ AllocSetDelete(MemoryContext context)
 		AllocBlock	next = block->next;
 
 		if (block != set->keeper)
-			context->mem_allocated -= block->endptr - ((char *) block);
+			set->memAllocated -= block->endptr - ((char *) block);
 
 #ifdef CLOBBER_FREED_MEMORY
 		wipe_mem(block, block->freeptr - ((char *) block));
@@ -715,7 +717,7 @@ AllocSetDelete(MemoryContext context)
 		block = next;
 	}
 
-	Assert(context->mem_allocated == keepersize);
+	Assert(set->memAllocated == keepersize);
 
 	/* Finally, free the context header, including the keeper block */
 	free(set);
@@ -758,7 +760,7 @@ AllocSetAlloc(MemoryContext context, Size size)
 		if (block == NULL)
 			return NULL;
 
-		context->mem_allocated += blksize;
+		set->memAllocated += blksize;
 
 		block->aset = set;
 		block->freeptr = block->endptr = ((char *) block) + blksize;
@@ -955,7 +957,7 @@ AllocSetAlloc(MemoryContext context, Size size)
 		if (block == NULL)
 			return NULL;
 
-		context->mem_allocated += blksize;
+		set->memAllocated += blksize;
 
 		block->aset = set;
 		block->freeptr = ((char *) block) + ALLOC_BLOCKHDRSZ;
@@ -1058,7 +1060,7 @@ AllocSetFree(MemoryContext context, void *pointer)
 		if (block->next)
 			block->next->prev = block->prev;
 
-		context->mem_allocated -= block->endptr - ((char*) block);
+		set->memAllocated -= block->endptr - ((char*) block);
 
 #ifdef CLOBBER_FREED_MEMORY
 		wipe_mem(block, block->freeptr - ((char *) block));
@@ -1161,8 +1163,8 @@ AllocSetRealloc(MemoryContext context, void *pointer, Size size)
 		}
 
 		/* updated separately, not to underflow when (oldblksize > blksize) */
-		context->mem_allocated -= oldblksize;
-		context->mem_allocated += blksize;
+		set->memAllocated -= oldblksize;
+		set->memAllocated += blksize;
 
 		block->freeptr = block->endptr = ((char *) block) + blksize;
 
@@ -1337,6 +1339,24 @@ AllocSetGetChunkSpace(MemoryContext context, void *pointer)
 	return result;
 }
 
+/*
+ * All memory currently allocated for this context (including fragmentation
+ * and freed chunks).
+ *
+ * Allocation sizes double (up to maxBlockSize), so the current block may
+ * represent half of the total space allocated to the context. Subtract away
+ * the free space at the tail of the current block, which may never have been
+ * touched.
+ */
+static Size
+AllocSetMemAllocated(MemoryContext context)
+{
+	AllocSet set = (AllocSet) context;
+	AllocBlock currentBlock = set->blocks;
+	Size tailSpace = currentBlock->endptr - currentBlock->freeptr;
+	return set->memAllocated - tailSpace;
+}
+
 /*
  * AllocSetIsEmpty
  *		Is an allocset empty of any allocated space?
@@ -1538,7 +1558,7 @@ AllocSetCheck(MemoryContext context)
 				 name, block);
 	}
 
-	Assert(total_allocated == context->mem_allocated);
+	Assert(total_allocated == set->memAllocated);
 }
 
 #endif							/* MEMORY_CONTEXT_CHECKING */
diff --git a/src/backend/utils/mmgr/generation.c b/src/backend/utils/mmgr/generation.c
index 56651d06931..f0ef540a7c5 100644
--- a/src/backend/utils/mmgr/generation.c
+++ b/src/backend/utils/mmgr/generation.c
@@ -61,6 +61,7 @@ typedef struct GenerationContext
 
 	/* Generational context parameters */
 	Size		blockSize;		/* standard block size */
+	Size		memAllocated;	/* track memory allocated for this context */
 
 	GenerationBlock *block;		/* current (most recently allocated) block */
 	dlist_head	blocks;			/* list of blocks */
@@ -152,6 +153,7 @@ static void *GenerationRealloc(MemoryContext context, void *pointer, Size size);
 static void GenerationReset(MemoryContext context);
 static void GenerationDelete(MemoryContext context);
 static Size GenerationGetChunkSpace(MemoryContext context, void *pointer);
+static Size GenerationMemAllocated(MemoryContext context);
 static bool GenerationIsEmpty(MemoryContext context);
 static void GenerationStats(MemoryContext context,
 							MemoryStatsPrintFunc printfunc, void *passthru,
@@ -171,6 +173,7 @@ static const MemoryContextMethods GenerationMethods = {
 	GenerationReset,
 	GenerationDelete,
 	GenerationGetChunkSpace,
+	GenerationMemAllocated,
 	GenerationIsEmpty,
 	GenerationStats
 #ifdef MEMORY_CONTEXT_CHECKING
@@ -258,6 +261,7 @@ GenerationContextCreate(MemoryContext parent,
 
 	/* Fill in GenerationContext-specific header fields */
 	set->blockSize = blockSize;
+	set->memAllocated = 0;
 	set->block = NULL;
 	dlist_init(&set->blocks);
 
@@ -297,7 +301,7 @@ GenerationReset(MemoryContext context)
 
 		dlist_delete(miter.cur);
 
-		context->mem_allocated -= block->blksize;
+		set->memAllocated -= block->blksize;
 
 #ifdef CLOBBER_FREED_MEMORY
 		wipe_mem(block, block->blksize);
@@ -354,7 +358,7 @@ GenerationAlloc(MemoryContext context, Size size)
 		if (block == NULL)
 			return NULL;
 
-		context->mem_allocated += blksize;
+		set->memAllocated += blksize;
 
 		/* block with a single (used) chunk */
 		block->blksize = blksize;
@@ -411,7 +415,7 @@ GenerationAlloc(MemoryContext context, Size size)
 		if (block == NULL)
 			return NULL;
 
-		context->mem_allocated += blksize;
+		set->memAllocated += blksize;
 
 		block->blksize = blksize;
 		block->nchunks = 0;
@@ -528,7 +532,7 @@ GenerationFree(MemoryContext context, void *pointer)
 	if (set->block == block)
 		set->block = NULL;
 
-	context->mem_allocated -= block->blksize;
+	set->memAllocated -= block->blksize;
 	free(block);
 }
 
@@ -666,6 +670,17 @@ GenerationGetChunkSpace(MemoryContext context, void *pointer)
 	return result;
 }
 
+/*
+ * All memory currently allocated for this context (including fragmentation
+ * and freed chunks).
+ */
+static Size
+GenerationMemAllocated(MemoryContext context)
+{
+	GenerationContext *set = (GenerationContext *) context;
+	return set->memAllocated;
+}
+
 /*
  * GenerationIsEmpty
  *		Is a GenerationContext empty of any allocated space?
@@ -844,7 +859,7 @@ GenerationCheck(MemoryContext context)
 				 name, nfree, block, block->nfree);
 	}
 
-	Assert(total_allocated == context->mem_allocated);
+	Assert(total_allocated == gen->memAllocated);
 }
 
 #endif							/* MEMORY_CONTEXT_CHECKING */
diff --git a/src/backend/utils/mmgr/mcxt.c b/src/backend/utils/mmgr/mcxt.c
index 9e24fec72d6..e32e279c340 100644
--- a/src/backend/utils/mmgr/mcxt.c
+++ b/src/backend/utils/mmgr/mcxt.c
@@ -469,7 +469,7 @@ MemoryContextIsEmpty(MemoryContext context)
 Size
 MemoryContextMemAllocated(MemoryContext context, bool recurse)
 {
-	Size	total = context->mem_allocated;
+	Size	total = context->methods->mem_allocated(context);
 
 	AssertArg(MemoryContextIsValid(context));
 
@@ -760,7 +760,6 @@ MemoryContextCreate(MemoryContext node,
 	node->methods = methods;
 	node->parent = parent;
 	node->firstchild = NULL;
-	node->mem_allocated = 0;
 	node->prevchild = NULL;
 	node->name = name;
 	node->ident = NULL;
diff --git a/src/backend/utils/mmgr/slab.c b/src/backend/utils/mmgr/slab.c
index c928476c479..63750fbc81f 100644
--- a/src/backend/utils/mmgr/slab.c
+++ b/src/backend/utils/mmgr/slab.c
@@ -67,6 +67,7 @@ typedef struct SlabContext
 	Size		fullChunkSize;	/* chunk size including header and alignment */
 	Size		blockSize;		/* block size */
 	Size		headerSize;		/* allocated size of context header */
+	Size		memAllocated;	/* track memory allocated for this context */
 	int			chunksPerBlock; /* number of chunks per block */
 	int			minFreeChunks;	/* min number of free chunks in any block */
 	int			nblocks;		/* number of blocks allocated */
@@ -132,6 +133,7 @@ static void *SlabRealloc(MemoryContext context, void *pointer, Size size);
 static void SlabReset(MemoryContext context);
 static void SlabDelete(MemoryContext context);
 static Size SlabGetChunkSpace(MemoryContext context, void *pointer);
+static Size SlabMemAllocated(MemoryContext context);
 static bool SlabIsEmpty(MemoryContext context);
 static void SlabStats(MemoryContext context,
 					  MemoryStatsPrintFunc printfunc, void *passthru,
@@ -150,6 +152,7 @@ static const MemoryContextMethods SlabMethods = {
 	SlabReset,
 	SlabDelete,
 	SlabGetChunkSpace,
+	SlabMemAllocated,
 	SlabIsEmpty,
 	SlabStats
 #ifdef MEMORY_CONTEXT_CHECKING
@@ -262,6 +265,7 @@ SlabContextCreate(MemoryContext parent,
 	slab->fullChunkSize = fullChunkSize;
 	slab->blockSize = blockSize;
 	slab->headerSize = headerSize;
+	slab->memAllocated = 0;
 	slab->chunksPerBlock = chunksPerBlock;
 	slab->minFreeChunks = 0;
 	slab->nblocks = 0;
@@ -286,6 +290,17 @@ SlabContextCreate(MemoryContext parent,
 	return (MemoryContext) slab;
 }
 
+/*
+ * All memory currently allocated for this context (including fragmentation
+ * and freed chunks).
+ */
+static Size
+SlabMemAllocated(MemoryContext context)
+{
+	SlabContext *slab = (SlabContext *) context;
+	return slab->memAllocated;
+}
+
 /*
  * SlabReset
  *		Frees all memory which is allocated in the given set.
@@ -322,14 +337,14 @@ SlabReset(MemoryContext context)
 #endif
 			free(block);
 			slab->nblocks--;
-			context->mem_allocated -= slab->blockSize;
+			slab->memAllocated -= slab->blockSize;
 		}
 	}
 
 	slab->minFreeChunks = 0;
 
 	Assert(slab->nblocks == 0);
-	Assert(context->mem_allocated == 0);
+	Assert(slab->memAllocated == 0);
 }
 
 /*
@@ -407,7 +422,7 @@ SlabAlloc(MemoryContext context, Size size)
 
 		slab->minFreeChunks = slab->chunksPerBlock;
 		slab->nblocks += 1;
-		context->mem_allocated += slab->blockSize;
+		slab->memAllocated += slab->blockSize;
 	}
 
 	/* grab the block from the freelist (even the new block is there) */
@@ -501,7 +516,7 @@ SlabAlloc(MemoryContext context, Size size)
 
 	SlabAllocInfo(slab, chunk);
 
-	Assert(slab->nblocks * slab->blockSize == context->mem_allocated);
+	Assert(slab->nblocks * slab->blockSize == slab->memAllocated);
 
 	return SlabChunkGetPointer(chunk);
 }
@@ -578,13 +593,13 @@ SlabFree(MemoryContext context, void *pointer)
 	{
 		free(block);
 		slab->nblocks--;
-		context->mem_allocated -= slab->blockSize;
+		slab->memAllocated -= slab->blockSize;
 	}
 	else
 		dlist_push_head(&slab->freelist[block->nfree], &block->node);
 
 	Assert(slab->nblocks >= 0);
-	Assert(slab->nblocks * slab->blockSize == context->mem_allocated);
+	Assert(slab->nblocks * slab->blockSize == slab->memAllocated);
 }
 
 /*
@@ -804,7 +819,7 @@ SlabCheck(MemoryContext context)
 		}
 	}
 
-	Assert(slab->nblocks * slab->blockSize == context->mem_allocated);
+	Assert(slab->nblocks * slab->blockSize == slab->memAllocated);
 }
 
 #endif							/* MEMORY_CONTEXT_CHECKING */
diff --git a/src/include/nodes/memnodes.h b/src/include/nodes/memnodes.h
index c9f2bbcb367..b6296af7d5f 100644
--- a/src/include/nodes/memnodes.h
+++ b/src/include/nodes/memnodes.h
@@ -63,6 +63,7 @@ typedef struct MemoryContextMethods
 	void		(*reset) (MemoryContext context);
 	void		(*delete_context) (MemoryContext context);
 	Size		(*get_chunk_space) (MemoryContext context, void *pointer);
+	Size		(*mem_allocated) (MemoryContext context);
 	bool		(*is_empty) (MemoryContext context);
 	void		(*stats) (MemoryContext context,
 						  MemoryStatsPrintFunc printfunc, void *passthru,
@@ -79,7 +80,6 @@ typedef struct MemoryContextData
 	/* these two fields are placed here to minimize alignment wastage: */
 	bool		isReset;		/* T = no space alloced since last reset */
 	bool		allowInCritSection; /* allow palloc in critical section */
-	Size		mem_allocated;	/* track memory allocated for this context */
 	const MemoryContextMethods *methods;	/* virtual function table */
 	MemoryContext parent;		/* NULL if no parent (toplevel context) */
 	MemoryContext firstchild;	/* head of linked list of children */
#2Jeff Davis
pgsql@j-davis.com
In reply to: Jeff Davis (#1)
Re: Make MemoryContextMemAllocated() more precise

On Mon, 2020-03-16 at 11:45 -0700, Jeff Davis wrote:

Attached is a patch that makes mem_allocated a method (rather than a
field) of MemoryContext, and allows each memory context type to track
the memory its own way. They all do the same thing as before
(increment/decrement a field), but AllocSet also subtracts out the
free
space in the current block. For Slab and Generation, we could do
something similar, but it's not as much of a problem because there's
no
doubling of the allocation size.

Committed.

In an off-list discussion, Andres suggested that MemoryContextStats
could be refactored to achieve this purpose, perhaps with flags to
avoid walking through the blocks and freelists when those are not
needed.

We discussed a few other names, such as "space", "active memory", and
"touched". We didn't settle on any great name, but "touched" seemed to
be the most descriptive.

This refactoring/renaming can be done later; right now I committed this
to unblock disk-based Hash Aggregation, which is ready.

Regards,
Jeff Davis

#3Robert Haas
robertmhaas@gmail.com
In reply to: Jeff Davis (#1)
Re: Make MemoryContextMemAllocated() more precise

On Mon, Mar 16, 2020 at 2:45 PM Jeff Davis <pgsql@j-davis.com> wrote:

Attached is a patch that makes mem_allocated a method (rather than a
field) of MemoryContext, and allows each memory context type to track
the memory its own way. They all do the same thing as before
(increment/decrement a field), but AllocSet also subtracts out the free
space in the current block. For Slab and Generation, we could do
something similar, but it's not as much of a problem because there's no
doubling of the allocation size.

Although I think this still matches the word "allocation" in spirit,
it's not technically correct, so feel free to suggest a new name for
MemoryContextMemAllocated().

Procedurally, I think that it is highly inappropriate to submit a
patch two weeks after the start of the final CommitFest and then
commit it just over 48 hours later without a single endorsement of the
change from anyone.

Substantively, I think that whether or not this is improvement depends
considerably on how your OS handles overcommit. I do not have enough
knowledge to know whether it will be better in general, but would
welcome opinions from others.

--
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company

#4Tomas Vondra
tomas.vondra@2ndquadrant.com
In reply to: Robert Haas (#3)
Re: Make MemoryContextMemAllocated() more precise

On Thu, Mar 19, 2020 at 11:44:05AM -0400, Robert Haas wrote:

On Mon, Mar 16, 2020 at 2:45 PM Jeff Davis <pgsql@j-davis.com> wrote:

Attached is a patch that makes mem_allocated a method (rather than a
field) of MemoryContext, and allows each memory context type to track
the memory its own way. They all do the same thing as before
(increment/decrement a field), but AllocSet also subtracts out the free
space in the current block. For Slab and Generation, we could do
something similar, but it's not as much of a problem because there's no
doubling of the allocation size.

Although I think this still matches the word "allocation" in spirit,
it's not technically correct, so feel free to suggest a new name for
MemoryContextMemAllocated().

Procedurally, I think that it is highly inappropriate to submit a
patch two weeks after the start of the final CommitFest and then
commit it just over 48 hours later without a single endorsement of the
change from anyone.

True.

Substantively, I think that whether or not this is improvement depends
considerably on how your OS handles overcommit. I do not have enough
knowledge to know whether it will be better in general, but would
welcome opinions from others.

Not sure overcommit is a major factor, and if it is then maybe it's the
strategy of doubling block size that's causing problems.

AFAICS the 2x allocation is the worst case, because it only happens
right after allocating a new block (of twice the size), when the
"utilization" drops from 100% to 50%. But in practice the utilization
will be somewhere in between, with an average of 75%. And we're not
doubling the block size indefinitely - there's an upper limit, so over
time the utilization drops less and less. So as the contexts grow, the
discrepancy disappears. And I'd argue the smaller the context, the less
of an issue the overcommit behavior is.

My understanding is that this is really just an accounting issue, where
allocating a block would get us over the limit, which I suppose might be
an issue with low work_mem values.

regards

--
Tomas Vondra http://www.2ndQuadrant.com
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services

#5Jeff Davis
pgsql@j-davis.com
In reply to: Tomas Vondra (#4)
Re: Make MemoryContextMemAllocated() more precise

On Thu, 2020-03-19 at 19:11 +0100, Tomas Vondra wrote:

AFAICS the 2x allocation is the worst case, because it only happens
right after allocating a new block (of twice the size), when the
"utilization" drops from 100% to 50%. But in practice the utilization
will be somewhere in between, with an average of 75%.

Sort of. Hash Agg is constantly watching the memory, so it will
typically spill right at the point where the accounting for that memory
context is off by 2X.

That's mitigated because the hash table itself (the array of
TupleHashEntryData) ends up allocated as its own block, so does not
have any waste. The total (table mem + out of line) might be close to
right if the hash table array itself is a large fraction of the data,
but I don't think that's what we want.

And we're not
doubling the block size indefinitely - there's an upper limit, so
over
time the utilization drops less and less. So as the contexts grow,
the
discrepancy disappears. And I'd argue the smaller the context, the
less
of an issue the overcommit behavior is.

The problem is that the default work_mem is 4MB, and the doubling
behavior goes to 8MB, so it's a problem with default settings.

Regards,
Jeff Davis

#6Robert Haas
robertmhaas@gmail.com
In reply to: Tomas Vondra (#4)
Re: Make MemoryContextMemAllocated() more precise

On Thu, Mar 19, 2020 at 2:11 PM Tomas Vondra
<tomas.vondra@2ndquadrant.com> wrote:

My understanding is that this is really just an accounting issue, where
allocating a block would get us over the limit, which I suppose might be
an issue with low work_mem values.

Well, the issue is, if I understand correctly, that this means that
MemoryContextStats() might now report a smaller amount of memory than
what we actually allocated from the operating system. That seems like
it might lead someone trying to figure out where a backend is leaking
memory to erroneous conclusions.

--
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company

#7Jeff Davis
pgsql@j-davis.com
In reply to: Robert Haas (#3)
Re: Make MemoryContextMemAllocated() more precise

On Thu, 2020-03-19 at 11:44 -0400, Robert Haas wrote:

Procedurally, I think that it is highly inappropriate to submit a
patch two weeks after the start of the final CommitFest and then
commit it just over 48 hours later without a single endorsement of
the
change from anyone.

Reverted.

Sorry, I misjudged this as a "supporting fix for a specific problem",
but it seems others feel it requires discussion.

Substantively, I think that whether or not this is improvement
depends
considerably on how your OS handles overcommit. I do not have enough
knowledge to know whether it will be better in general, but would
welcome opinions from others.

I think omitting the tail of the current block is an unqualified
improvement for the purpose of obeying work_mem, regardless of the OS.
The block sizes keep doubling up to 8MB, and it doesn't make a lot of
sense to count that last large mostly-empty block against work_mem.

Regards,
Jeff Davis

#8Robert Haas
robertmhaas@gmail.com
In reply to: Jeff Davis (#7)
Re: Make MemoryContextMemAllocated() more precise

On Thu, Mar 19, 2020 at 3:27 PM Jeff Davis <pgsql@j-davis.com> wrote:

I think omitting the tail of the current block is an unqualified
improvement for the purpose of obeying work_mem, regardless of the OS.
The block sizes keep doubling up to 8MB, and it doesn't make a lot of
sense to count that last large mostly-empty block against work_mem.

Well, again, my point is that whether or not it counts depends on your
system's overcommit behavior. Depending on how you have the
configured, or what your OS likes to do, it may in reality count or
not count. Or so I believe. Am I wrong?

--
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company

#9Tomas Vondra
tomas.vondra@2ndquadrant.com
In reply to: Jeff Davis (#5)
Re: Make MemoryContextMemAllocated() more precise

On Thu, Mar 19, 2020 at 12:25:16PM -0700, Jeff Davis wrote:

On Thu, 2020-03-19 at 19:11 +0100, Tomas Vondra wrote:

AFAICS the 2x allocation is the worst case, because it only happens
right after allocating a new block (of twice the size), when the
"utilization" drops from 100% to 50%. But in practice the utilization
will be somewhere in between, with an average of 75%.

Sort of. Hash Agg is constantly watching the memory, so it will
typically spill right at the point where the accounting for that memory
context is off by 2X.

That's mitigated because the hash table itself (the array of
TupleHashEntryData) ends up allocated as its own block, so does not
have any waste. The total (table mem + out of line) might be close to
right if the hash table array itself is a large fraction of the data,
but I don't think that's what we want.

And we're not
doubling the block size indefinitely - there's an upper limit, so
over
time the utilization drops less and less. So as the contexts grow,
the
discrepancy disappears. And I'd argue the smaller the context, the
less
of an issue the overcommit behavior is.

The problem is that the default work_mem is 4MB, and the doubling
behavior goes to 8MB, so it's a problem with default settings.

Yes, it's an issue for the accuracy of our accounting. What Robert was
talking about is overcommit behavior at the OS level, which I'm arguing
is unlikely to be an issue, because for low work_mem values the absolute
difference is small, and on large work_mem values it's limited by the
block size limit.

regards

--
Tomas Vondra http://www.2ndQuadrant.com
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services

#10Jeff Davis
pgsql@j-davis.com
In reply to: Robert Haas (#6)
Re: Make MemoryContextMemAllocated() more precise

On Thu, 2020-03-19 at 15:26 -0400, Robert Haas wrote:

Well, the issue is, if I understand correctly, that this means that
MemoryContextStats() might now report a smaller amount of memory than
what we actually allocated from the operating system. That seems like
it might lead someone trying to figure out where a backend is leaking
memory to erroneous conclusions.

MemoryContextStats() was unaffected by my now-reverted change.

Regards,
Jeff Davis

#11Jeff Davis
pgsql@j-davis.com
In reply to: Jeff Davis (#1)
Re: Make MemoryContextMemAllocated() more precise

On Mon, 2020-03-16 at 11:45 -0700, Jeff Davis wrote:

Although that's technically correct, the purpose of
MemoryContextMemAllocated() is to give a "real" usage number so we
know
when we're out of work_mem and need to spill (in particular, the
disk-
based HashAgg work, but ideally other operators as well). This "real"
number should include fragmentation, freed-and-not-reused chunks, and
other overhead. But it should not include significant amounts of
allocated-but-never-touched memory, which says more about economizing
calls to malloc than it does about the algorithm's memory usage.

To expand on this point:

work_mem is to keep executor algorithms somewhat constrained in the
memory that they use. With that in mind, it should reflect things that
the algorithm has some control over, and can be measured cheaply.

Therefore, we shouldn't include huge nearly-empty blocks of memory that
the system decided to allocate in response to a request for a small
chunk (the algorithm has little control). Nor should we try to walk a
list of blocks or free chunks (expensive).

We should include used memory, reasonable overhead (chunk headers,
alignment, etc.), and probably free chunks (which represent
fragmentation).

For AllocSet, the notion of "all touched memory", which is everything
except the current block's tail, seems to fit the requirements well.

Regards,
Jeff Davis

#12Robert Haas
robertmhaas@gmail.com
In reply to: Jeff Davis (#10)
Re: Make MemoryContextMemAllocated() more precise

On Thu, Mar 19, 2020 at 3:44 PM Jeff Davis <pgsql@j-davis.com> wrote:

On Thu, 2020-03-19 at 15:26 -0400, Robert Haas wrote:

Well, the issue is, if I understand correctly, that this means that
MemoryContextStats() might now report a smaller amount of memory than
what we actually allocated from the operating system. That seems like
it might lead someone trying to figure out where a backend is leaking
memory to erroneous conclusions.

MemoryContextStats() was unaffected by my now-reverted change.

Oh. Well, that addresses my concern, then. If this only affects the
accounting for memory-bounded hash aggregation and nothing else is
going to be touched, including MemoryContextStats(), then it's not an
issue for me.

Other people may have different concerns, but that was the only thing
that was bothering me.

--
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company

#13Jeff Davis
pgsql@j-davis.com
In reply to: Robert Haas (#8)
Re: Make MemoryContextMemAllocated() more precise

On Thu, 2020-03-19 at 15:33 -0400, Robert Haas wrote:

On Thu, Mar 19, 2020 at 3:27 PM Jeff Davis <pgsql@j-davis.com> wrote:

I think omitting the tail of the current block is an unqualified
improvement for the purpose of obeying work_mem, regardless of the
OS.
The block sizes keep doubling up to 8MB, and it doesn't make a lot
of
sense to count that last large mostly-empty block against work_mem.

Well, again, my point is that whether or not it counts depends on
your
system's overcommit behavior. Depending on how you have the
configured, or what your OS likes to do, it may in reality count or
not count. Or so I believe. Am I wrong?

I don't believe it should not be counted for the purposes of work_mem.

Let's say that the OS eagerly allocates it, then what is the algorithm
supposed to do in response? It can either:

1. just accept that all of the space is used, even though it's
potentially as low as 50% used, and spill earlier than may be
necessary; or

2. find a way to measure the free space, and somehow predict whether
that space will be reused the next time a group is added to the hash
table

It just doesn't seem reasonable for me for the algorithm to change its
behavior based on these large block allocations. It may be valuable
information for other purposes (like tuning your OS, or tracking down
memory issues), though.

Regards,
Jeff Davis

#14Jeff Davis
pgsql@j-davis.com
In reply to: Robert Haas (#12)
Re: Make MemoryContextMemAllocated() more precise

On Thu, 2020-03-19 at 16:04 -0400, Robert Haas wrote:

Other people may have different concerns, but that was the only thing
that was bothering me.

OK, thank you for raising it.

Perhaps we can re-fix the issue for HashAgg if necessary, or I can
tweak some accounting things within HashAgg itself, or both.

Regards,
Jeff Davis

#15Jeff Davis
pgsql@j-davis.com
In reply to: Jeff Davis (#2)
1 attachment(s)
Re: Make MemoryContextMemAllocated() more precise

On Wed, 2020-03-18 at 15:41 -0700, Jeff Davis wrote:

In an off-list discussion, Andres suggested that MemoryContextStats
could be refactored to achieve this purpose, perhaps with flags to
avoid walking through the blocks and freelists when those are not
needed.

Attached refactoring patch. There's enough in here that warrants
discussion that I don't think this makes sense for v13 and I'm adding
it to the July commitfest.

I still think we should do something for v13, such as the originally-
proposed patch[1]/messages/by-id/ec63d70b668818255486a83ffadc3aec492c1f57.camel@j-davis.com. It's not critical, but it simply reports a better
number for memory consumption. Currently, the memory usage appears to
jump, often right past work mem (by a reasonable but noticable amount),
which could be confusing.

Regarding the attached patch (target v14):

* there's a new MemoryContextCount() that simply calculates the
statistics without printing anything, and returns a struct
- it supports flags to indicate which stats should be
calculated, so that some callers can avoid walking through
blocks/freelists
* it adds a new statistic for "new space" (i.e. untouched)
* it eliminates specialization of the memory context printing
- the only specialization was for generation.c to output the
number of chunks, which can be done easily enough for the
other types, too

Regards,
Jeff Davis

[1]: /messages/by-id/ec63d70b668818255486a83ffadc3aec492c1f57.camel@j-davis.com
/messages/by-id/ec63d70b668818255486a83ffadc3aec492c1f57.camel@j-davis.com

Attachments:

mcxt.patchtext/x-patch; charset=UTF-8; name=mcxt.patchDownload
diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c
index 2a6f44a6274..9ae62dda9cd 100644
--- a/src/backend/executor/nodeAgg.c
+++ b/src/backend/executor/nodeAgg.c
@@ -1776,11 +1776,24 @@ hash_agg_set_limits(double hashentrysize, uint64 input_groups, int used_bits,
 static void
 hash_agg_check_limits(AggState *aggstate)
 {
-	uint64 ngroups = aggstate->hash_ngroups_current;
-	Size meta_mem = MemoryContextMemAllocated(
-		aggstate->hash_metacxt, true);
-	Size hash_mem = MemoryContextMemAllocated(
-		aggstate->hashcontext->ecxt_per_tuple_memory, true);
+	MemoryContextCounters	counters;
+	uint64					ngroups = aggstate->hash_ngroups_current;
+	uint32					flags	= (MCXT_STAT_TOTALSPACE |
+									   MCXT_STAT_NEWSPACE);
+	Size					meta_mem;
+	Size					hash_mem;
+
+	/*
+	 * Consider all memory except "newspace", which is part of a block
+	 * allocation by the memory context itself that aggregation has little
+	 * control over. It doesn't make sense to count that against work_mem.
+	 */
+	counters = MemoryContextCount(aggstate->hash_metacxt, flags, true);
+	meta_mem = counters.totalspace - counters.newspace;
+
+	counters = MemoryContextCount(
+		aggstate->hashcontext->ecxt_per_tuple_memory, flags, true);
+	hash_mem = counters.totalspace - counters.newspace;
 
 	/*
 	 * Don't spill unless there's at least one group in the hash table so we
@@ -1838,21 +1851,25 @@ hash_agg_enter_spill_mode(AggState *aggstate)
 static void
 hash_agg_update_metrics(AggState *aggstate, bool from_tape, int npartitions)
 {
-	Size	meta_mem;
-	Size	hash_mem;
-	Size	buffer_mem;
-	Size	total_mem;
+	MemoryContextCounters	counters;
+	uint32					flags = MCXT_STAT_TOTALSPACE | MCXT_STAT_NEWSPACE;
+	Size					meta_mem;
+	Size					hash_mem;
+	Size					buffer_mem;
+	Size					total_mem;
 
 	if (aggstate->aggstrategy != AGG_MIXED &&
 		aggstate->aggstrategy != AGG_HASHED)
 		return;
 
 	/* memory for the hash table itself */
-	meta_mem = MemoryContextMemAllocated(aggstate->hash_metacxt, true);
+	counters = MemoryContextCount(aggstate->hash_metacxt, flags, true);
+	meta_mem = counters.totalspace - counters.newspace;
 
 	/* memory for the group keys and transition states */
-	hash_mem = MemoryContextMemAllocated(
-		aggstate->hashcontext->ecxt_per_tuple_memory, true);
+	counters = MemoryContextCount(
+		aggstate->hashcontext->ecxt_per_tuple_memory, flags, true);
+	hash_mem = counters.totalspace - counters.newspace;
 
 	/* memory for read/write tape buffers, if spilled */
 	buffer_mem = npartitions * HASHAGG_WRITE_BUFFER_SIZE;
diff --git a/src/backend/utils/mmgr/aset.c b/src/backend/utils/mmgr/aset.c
index c0623f106d2..b4f3cb33ff9 100644
--- a/src/backend/utils/mmgr/aset.c
+++ b/src/backend/utils/mmgr/aset.c
@@ -132,6 +132,8 @@ typedef struct AllocSetContext
 	Size		maxBlockSize;	/* maximum block size */
 	Size		nextBlockSize;	/* next block size to allocate */
 	Size		allocChunkLimit;	/* effective chunk size limit */
+	Size		memAllocated;	/* track memory allocated for this context */
+	uint64		nChunks;		/* total number of chunks */
 	AllocBlock	keeper;			/* keep this block over resets */
 	/* freelist this context could be put in, or -1 if not a candidate: */
 	int			freeListIndex;	/* index in context_freelists[], or -1 */
@@ -273,9 +275,8 @@ static void AllocSetReset(MemoryContext context);
 static void AllocSetDelete(MemoryContext context);
 static Size AllocSetGetChunkSpace(MemoryContext context, void *pointer);
 static bool AllocSetIsEmpty(MemoryContext context);
-static void AllocSetStats(MemoryContext context,
-						  MemoryStatsPrintFunc printfunc, void *passthru,
-						  MemoryContextCounters *totals);
+static MemoryContextCounters AllocSetStats(MemoryContext context,
+										   uint32 flags);
 
 #ifdef MEMORY_CONTEXT_CHECKING
 static void AllocSetCheck(MemoryContext context);
@@ -464,8 +465,8 @@ AllocSetContextCreateInternal(MemoryContext parent,
 								parent,
 								name);
 
-			((MemoryContext) set)->mem_allocated =
-				set->keeper->endptr - ((char *) set);
+			set->memAllocated = set->keeper->endptr - ((char *) set);
+			set->nChunks = 0;
 
 			return (MemoryContext) set;
 		}
@@ -555,7 +556,8 @@ AllocSetContextCreateInternal(MemoryContext parent,
 						parent,
 						name);
 
-	((MemoryContext) set)->mem_allocated = firstBlockSize;
+	set->memAllocated = firstBlockSize;
+	set->nChunks = 0;
 
 	return (MemoryContext) set;
 }
@@ -617,7 +619,7 @@ AllocSetReset(MemoryContext context)
 		else
 		{
 			/* Normal case, release the block */
-			context->mem_allocated -= block->endptr - ((char*) block);
+			set->memAllocated -= block->endptr - ((char*) block);
 
 #ifdef CLOBBER_FREED_MEMORY
 			wipe_mem(block, block->freeptr - ((char *) block));
@@ -627,7 +629,9 @@ AllocSetReset(MemoryContext context)
 		block = next;
 	}
 
-	Assert(context->mem_allocated == keepersize);
+	Assert(set->memAllocated == keepersize);
+
+	set->nChunks = 0;
 
 	/* Reset block size allocation sequence, too */
 	set->nextBlockSize = set->initBlockSize;
@@ -703,7 +707,7 @@ AllocSetDelete(MemoryContext context)
 		AllocBlock	next = block->next;
 
 		if (block != set->keeper)
-			context->mem_allocated -= block->endptr - ((char *) block);
+			set->memAllocated -= block->endptr - ((char *) block);
 
 #ifdef CLOBBER_FREED_MEMORY
 		wipe_mem(block, block->freeptr - ((char *) block));
@@ -715,7 +719,7 @@ AllocSetDelete(MemoryContext context)
 		block = next;
 	}
 
-	Assert(context->mem_allocated == keepersize);
+	Assert(set->memAllocated == keepersize);
 
 	/* Finally, free the context header, including the keeper block */
 	free(set);
@@ -758,7 +762,7 @@ AllocSetAlloc(MemoryContext context, Size size)
 		if (block == NULL)
 			return NULL;
 
-		context->mem_allocated += blksize;
+		set->memAllocated += blksize;
 
 		block->aset = set;
 		block->freeptr = block->endptr = ((char *) block) + blksize;
@@ -805,6 +809,7 @@ AllocSetAlloc(MemoryContext context, Size size)
 		/* Disallow external access to private part of chunk header. */
 		VALGRIND_MAKE_MEM_NOACCESS(chunk, ALLOCCHUNK_PRIVATE_LEN);
 
+		set->nChunks++;
 		return AllocChunkGetPointer(chunk);
 	}
 
@@ -844,6 +849,8 @@ AllocSetAlloc(MemoryContext context, Size size)
 		/* Disallow external access to private part of chunk header. */
 		VALGRIND_MAKE_MEM_NOACCESS(chunk, ALLOCCHUNK_PRIVATE_LEN);
 
+		/* chunk already existed; don't increment nChunks */
+
 		return AllocChunkGetPointer(chunk);
 	}
 
@@ -906,6 +913,7 @@ AllocSetAlloc(MemoryContext context, Size size)
 #endif
 				chunk->aset = (void *) set->freelist[a_fidx];
 				set->freelist[a_fidx] = chunk;
+				set->nChunks++;
 			}
 
 			/* Mark that we need to create a new block */
@@ -955,7 +963,7 @@ AllocSetAlloc(MemoryContext context, Size size)
 		if (block == NULL)
 			return NULL;
 
-		context->mem_allocated += blksize;
+		set->memAllocated += blksize;
 
 		block->aset = set;
 		block->freeptr = ((char *) block) + ALLOC_BLOCKHDRSZ;
@@ -1005,6 +1013,7 @@ AllocSetAlloc(MemoryContext context, Size size)
 	/* Disallow external access to private part of chunk header. */
 	VALGRIND_MAKE_MEM_NOACCESS(chunk, ALLOCCHUNK_PRIVATE_LEN);
 
+	set->nChunks++;
 	return AllocChunkGetPointer(chunk);
 }
 
@@ -1058,7 +1067,8 @@ AllocSetFree(MemoryContext context, void *pointer)
 		if (block->next)
 			block->next->prev = block->prev;
 
-		context->mem_allocated -= block->endptr - ((char*) block);
+		set->memAllocated -= block->endptr - ((char*) block);
+		set->nChunks--;
 
 #ifdef CLOBBER_FREED_MEMORY
 		wipe_mem(block, block->freeptr - ((char *) block));
@@ -1161,8 +1171,8 @@ AllocSetRealloc(MemoryContext context, void *pointer, Size size)
 		}
 
 		/* updated separately, not to underflow when (oldblksize > blksize) */
-		context->mem_allocated -= oldblksize;
-		context->mem_allocated += blksize;
+		set->memAllocated -= oldblksize;
+		set->memAllocated += blksize;
 
 		block->freeptr = block->endptr = ((char *) block) + blksize;
 
@@ -1358,63 +1368,55 @@ AllocSetIsEmpty(MemoryContext context)
 /*
  * AllocSetStats
  *		Compute stats about memory consumption of an allocset.
- *
- * printfunc: if not NULL, pass a human-readable stats string to this.
- * passthru: pass this pointer through to printfunc.
- * totals: if not NULL, add stats about this context into *totals.
  */
-static void
-AllocSetStats(MemoryContext context,
-			  MemoryStatsPrintFunc printfunc, void *passthru,
-			  MemoryContextCounters *totals)
+static MemoryContextCounters
+AllocSetStats(MemoryContext context, uint32 flags)
 {
-	AllocSet	set = (AllocSet) context;
-	Size		nblocks = 0;
-	Size		freechunks = 0;
-	Size		totalspace;
-	Size		freespace = 0;
-	AllocBlock	block;
-	int			fidx;
-
-	/* Include context header in totalspace */
-	totalspace = MAXALIGN(sizeof(AllocSetContext));
-
-	for (block = set->blocks; block != NULL; block = block->next)
+	AllocSet				set		 = (AllocSet) context;
+	MemoryContextCounters	counters = {0};
+	uint64					nblocks = 0;
+	uint64					freechunks = 0;
+	Size					freespace = 0;
+	AllocBlock				block;
+	int						fidx;
+
+	if (flags & (MCXT_STAT_NBLOCKS | MCXT_STAT_FREESPACE))
 	{
-		nblocks++;
-		totalspace += block->endptr - ((char *) block);
-		freespace += block->endptr - block->freeptr;
-	}
-	for (fidx = 0; fidx < ALLOCSET_NUM_FREELISTS; fidx++)
-	{
-		AllocChunk	chunk;
-
-		for (chunk = set->freelist[fidx]; chunk != NULL;
-			 chunk = (AllocChunk) chunk->aset)
+		for (block = set->blocks; block != NULL; block = block->next)
 		{
-			freechunks++;
-			freespace += chunk->size + ALLOC_CHUNKHDRSZ;
+			nblocks++;
+			freespace += block->endptr - block->freeptr;
 		}
 	}
-
-	if (printfunc)
+	if (flags & (MCXT_STAT_FREECHUNKS | MCXT_STAT_FREESPACE))
 	{
-		char		stats_string[200];
+		for (fidx = 0; fidx < ALLOCSET_NUM_FREELISTS; fidx++)
+		{
+			AllocChunk	chunk;
 
-		snprintf(stats_string, sizeof(stats_string),
-				 "%zu total in %zd blocks; %zu free (%zd chunks); %zu used",
-				 totalspace, nblocks, freespace, freechunks,
-				 totalspace - freespace);
-		printfunc(context, passthru, stats_string);
+			for (chunk = set->freelist[fidx]; chunk != NULL;
+				 chunk = (AllocChunk) chunk->aset)
+			{
+				freechunks++;
+				freespace += chunk->size + ALLOC_CHUNKHDRSZ;
+			}
+		}
 	}
 
-	if (totals)
-	{
-		totals->nblocks += nblocks;
-		totals->freechunks += freechunks;
-		totals->totalspace += totalspace;
-		totals->freespace += freespace;
-	}
+	if (flags & MCXT_STAT_NBLOCKS)
+		counters.nblocks = nblocks;
+	if (flags & MCXT_STAT_NCHUNKS)
+		counters.nchunks = set->nChunks;
+	if (flags & MCXT_STAT_FREECHUNKS)
+		counters.freechunks = freechunks;
+	if (flags & MCXT_STAT_TOTALSPACE)
+		counters.totalspace = set->memAllocated;
+	if (flags & MCXT_STAT_FREESPACE)
+		counters.freespace = freespace;
+	if (flags & MCXT_STAT_NEWSPACE)
+		counters.newspace = set->blocks->endptr - set->blocks->freeptr;
+
+	return counters;
 }
 
 
@@ -1436,6 +1438,7 @@ AllocSetCheck(MemoryContext context)
 	AllocBlock	prevblock;
 	AllocBlock	block;
 	Size		total_allocated = 0;
+	uint64		total_nchunks = 0;
 
 	for (prevblock = NULL, block = set->blocks;
 		 block != NULL;
@@ -1529,6 +1532,7 @@ AllocSetCheck(MemoryContext context)
 
 			blk_data += chsize;
 			nchunks++;
+			total_nchunks++;
 
 			bpoz += ALLOC_CHUNKHDRSZ + chsize;
 		}
@@ -1538,7 +1542,8 @@ AllocSetCheck(MemoryContext context)
 				 name, block);
 	}
 
-	Assert(total_allocated == context->mem_allocated);
+	Assert(total_allocated == set->memAllocated);
+	Assert(total_nchunks == set->nChunks);
 }
 
 #endif							/* MEMORY_CONTEXT_CHECKING */
diff --git a/src/backend/utils/mmgr/generation.c b/src/backend/utils/mmgr/generation.c
index 56651d06931..1f3713cb27e 100644
--- a/src/backend/utils/mmgr/generation.c
+++ b/src/backend/utils/mmgr/generation.c
@@ -61,6 +61,7 @@ typedef struct GenerationContext
 
 	/* Generational context parameters */
 	Size		blockSize;		/* standard block size */
+	Size		memAllocated;	/* track memory allocated for this context */
 
 	GenerationBlock *block;		/* current (most recently allocated) block */
 	dlist_head	blocks;			/* list of blocks */
@@ -153,9 +154,8 @@ static void GenerationReset(MemoryContext context);
 static void GenerationDelete(MemoryContext context);
 static Size GenerationGetChunkSpace(MemoryContext context, void *pointer);
 static bool GenerationIsEmpty(MemoryContext context);
-static void GenerationStats(MemoryContext context,
-							MemoryStatsPrintFunc printfunc, void *passthru,
-							MemoryContextCounters *totals);
+static MemoryContextCounters GenerationStats(MemoryContext context,
+											 uint32 flags);
 
 #ifdef MEMORY_CONTEXT_CHECKING
 static void GenerationCheck(MemoryContext context);
@@ -258,6 +258,7 @@ GenerationContextCreate(MemoryContext parent,
 
 	/* Fill in GenerationContext-specific header fields */
 	set->blockSize = blockSize;
+	set->memAllocated = 0;
 	set->block = NULL;
 	dlist_init(&set->blocks);
 
@@ -297,7 +298,7 @@ GenerationReset(MemoryContext context)
 
 		dlist_delete(miter.cur);
 
-		context->mem_allocated -= block->blksize;
+		set->memAllocated -= block->blksize;
 
 #ifdef CLOBBER_FREED_MEMORY
 		wipe_mem(block, block->blksize);
@@ -354,7 +355,7 @@ GenerationAlloc(MemoryContext context, Size size)
 		if (block == NULL)
 			return NULL;
 
-		context->mem_allocated += blksize;
+		set->memAllocated += blksize;
 
 		/* block with a single (used) chunk */
 		block->blksize = blksize;
@@ -411,7 +412,7 @@ GenerationAlloc(MemoryContext context, Size size)
 		if (block == NULL)
 			return NULL;
 
-		context->mem_allocated += blksize;
+		set->memAllocated += blksize;
 
 		block->blksize = blksize;
 		block->nchunks = 0;
@@ -528,7 +529,7 @@ GenerationFree(MemoryContext context, void *pointer)
 	if (set->block == block)
 		set->block = NULL;
 
-	context->mem_allocated -= block->blksize;
+	set->memAllocated -= block->blksize;
 	free(block);
 }
 
@@ -681,59 +682,47 @@ GenerationIsEmpty(MemoryContext context)
 /*
  * GenerationStats
  *		Compute stats about memory consumption of a Generation context.
- *
- * printfunc: if not NULL, pass a human-readable stats string to this.
- * passthru: pass this pointer through to printfunc.
- * totals: if not NULL, add stats about this context into *totals.
- *
- * XXX freespace only accounts for empty space at the end of the block, not
- * space of freed chunks (which is unknown).
  */
-static void
-GenerationStats(MemoryContext context,
-				MemoryStatsPrintFunc printfunc, void *passthru,
-				MemoryContextCounters *totals)
+static MemoryContextCounters
+GenerationStats(MemoryContext context, uint32 flags)
 {
-	GenerationContext *set = (GenerationContext *) context;
-	Size		nblocks = 0;
-	Size		nchunks = 0;
-	Size		nfreechunks = 0;
-	Size		totalspace;
-	Size		freespace = 0;
-	dlist_iter	iter;
-
-	/* Include context header in totalspace */
-	totalspace = MAXALIGN(sizeof(GenerationContext));
-
-	dlist_foreach(iter, &set->blocks)
-	{
-		GenerationBlock *block = dlist_container(GenerationBlock, node, iter.cur);
-
-		nblocks++;
-		nchunks += block->nchunks;
-		nfreechunks += block->nfree;
-		totalspace += block->blksize;
-		freespace += (block->endptr - block->freeptr);
-	}
-
-	if (printfunc)
+	GenerationContext		*set	  = (GenerationContext *) context;
+	MemoryContextCounters	 counters = {0};
+	uint64					 nblocks = 0;
+	uint64					 nchunks = 0;
+	uint64					 freechunks = 0;
+	Size					 freespace = 0;
+	dlist_iter				 iter;
+
+	if (flags & (MCXT_STAT_NBLOCKS | MCXT_STAT_NCHUNKS |
+				 MCXT_STAT_FREECHUNKS | MCXT_STAT_FREESPACE))
 	{
-		char		stats_string[200];
+		dlist_foreach(iter, &set->blocks)
+		{
+			GenerationBlock *block = dlist_container(
+				GenerationBlock, node, iter.cur);
 
-		snprintf(stats_string, sizeof(stats_string),
-				 "%zu total in %zd blocks (%zd chunks); %zu free (%zd chunks); %zu used",
-				 totalspace, nblocks, nchunks, freespace,
-				 nfreechunks, totalspace - freespace);
-		printfunc(context, passthru, stats_string);
+			nblocks++;
+			nchunks += block->nchunks;
+			freechunks += block->nfree;
+			freespace += (block->endptr - block->freeptr);
+		}
 	}
 
-	if (totals)
-	{
-		totals->nblocks += nblocks;
-		totals->freechunks += nfreechunks;
-		totals->totalspace += totalspace;
-		totals->freespace += freespace;
-	}
+	if (flags & MCXT_STAT_NBLOCKS)
+		counters.nblocks = nblocks;
+	if (flags & MCXT_STAT_NCHUNKS)
+		counters.nchunks = nchunks;
+	if (flags & MCXT_STAT_FREECHUNKS)
+		counters.freechunks = freechunks;
+	if (flags & MCXT_STAT_TOTALSPACE)
+		counters.totalspace = set->memAllocated;
+	if (flags & MCXT_STAT_FREESPACE)
+		counters.freespace = freespace;
+	if (flags & MCXT_STAT_NEWSPACE)
+		counters.newspace = set->block->endptr - set->block->freeptr;
+
+	return counters;
 }
 
 
@@ -844,7 +833,7 @@ GenerationCheck(MemoryContext context)
 				 name, nfree, block, block->nfree);
 	}
 
-	Assert(total_allocated == context->mem_allocated);
+	Assert(total_allocated == gen->memAllocated);
 }
 
 #endif							/* MEMORY_CONTEXT_CHECKING */
diff --git a/src/backend/utils/mmgr/mcxt.c b/src/backend/utils/mmgr/mcxt.c
index 9e24fec72d6..bd8ee42405f 100644
--- a/src/backend/utils/mmgr/mcxt.c
+++ b/src/backend/utils/mmgr/mcxt.c
@@ -56,7 +56,7 @@ static void MemoryContextCallResetCallbacks(MemoryContext context);
 static void MemoryContextStatsInternal(MemoryContext context, int level,
 									   bool print, int max_children,
 									   MemoryContextCounters *totals);
-static void MemoryContextStatsPrint(MemoryContext context, void *passthru,
+static void MemoryContextStatsPrint(MemoryContext context, int level,
 									const char *stats_string);
 
 /*
@@ -463,24 +463,39 @@ MemoryContextIsEmpty(MemoryContext context)
 }
 
 /*
- * Find the memory allocated to blocks for this memory context. If recurse is
- * true, also include children.
+ * MemoryContextCount
+ *		Return statistics about this memory context, optionally recursing to
+ *		children. Flags are defined in memnodes.h and specify which statistics
+ *		are required.
  */
-Size
-MemoryContextMemAllocated(MemoryContext context, bool recurse)
+MemoryContextCounters
+MemoryContextCount(MemoryContext context, uint32 flags, bool recurse)
 {
-	Size	total = context->mem_allocated;
+	MemoryContextCounters	total;
 
 	AssertArg(MemoryContextIsValid(context));
 
+	total = context->methods->count(context, flags);
+
 	if (recurse)
 	{
-		MemoryContext child = context->firstchild;
+		MemoryContext			child;
 
 		for (child = context->firstchild;
 			 child != NULL;
 			 child = child->nextchild)
-			total += MemoryContextMemAllocated(child, true);
+		{
+			MemoryContextCounters	child_counters;
+			
+			child_counters = MemoryContextCount(child, flags, true);
+
+			total.nblocks += child_counters.nblocks;
+			total.nchunks += child_counters.nchunks;
+			total.freechunks += child_counters.freechunks;
+			total.totalspace += child_counters.totalspace;
+			total.freespace += child_counters.freespace;
+			total.newspace += child_counters.newspace;
+		}
 	}
 
 	return total;
@@ -516,9 +531,10 @@ MemoryContextStatsDetail(MemoryContext context, int max_children)
 	MemoryContextStatsInternal(context, 0, true, max_children, &grand_totals);
 
 	fprintf(stderr,
-			"Grand total: %zu bytes in %zd blocks; %zu free (%zd chunks); %zu used\n",
+			"Grand total: %zu bytes in %zd blocks (%zd chunks); %zu free (%zd chunks); %zu used\n",
 			grand_totals.totalspace, grand_totals.nblocks,
-			grand_totals.freespace, grand_totals.freechunks,
+			grand_totals.nchunks, grand_totals.freespace,
+			grand_totals.freechunks,
 			grand_totals.totalspace - grand_totals.freespace);
 }
 
@@ -534,24 +550,42 @@ MemoryContextStatsInternal(MemoryContext context, int level,
 						   bool print, int max_children,
 						   MemoryContextCounters *totals)
 {
-	MemoryContextCounters local_totals;
+	MemoryContextCounters excess_summary = {0};
+	MemoryContextCounters current;
 	MemoryContext child;
 	int			ichild;
 
+
 	AssertArg(MemoryContextIsValid(context));
 
 	/* Examine the context itself */
-	context->methods->stats(context,
-							print ? MemoryContextStatsPrint : NULL,
-							(void *) &level,
-							totals);
+	current = context->methods->count(context, MCXT_STAT_ALL);
+
+	if (print)
+	{
+		char		stats_string[200];
+		snprintf(stats_string, sizeof(stats_string),
+				 "%zu total in %zd blocks (%zd chunks); %zu free (%zd chunks); %zu used",
+				 current.totalspace, current.nblocks, current.nchunks,
+				 current.freespace, current.freechunks,
+				 current.totalspace - current.freespace);
+		MemoryContextStatsPrint(context, level, stats_string);
+	}
+
+	if (totals)
+	{
+		totals->nblocks += current.nblocks;
+		totals->nchunks += current.nchunks;
+		totals->freechunks += current.freechunks;
+		totals->totalspace += current.totalspace;
+		totals->freespace += current.freespace;
+		totals->newspace += current.newspace;
+	}
 
 	/*
 	 * Examine children.  If there are more than max_children of them, we do
 	 * not print the rest explicitly, but just summarize them.
 	 */
-	memset(&local_totals, 0, sizeof(local_totals));
-
 	for (child = context->firstchild, ichild = 0;
 		 child != NULL;
 		 child = child->nextchild, ichild++)
@@ -563,7 +597,7 @@ MemoryContextStatsInternal(MemoryContext context, int level,
 		else
 			MemoryContextStatsInternal(child, level + 1,
 									   false, max_children,
-									   &local_totals);
+									   &excess_summary);
 	}
 
 	/* Deal with excess children */
@@ -578,19 +612,21 @@ MemoryContextStatsInternal(MemoryContext context, int level,
 			fprintf(stderr,
 					"%d more child contexts containing %zu total in %zd blocks; %zu free (%zd chunks); %zu used\n",
 					ichild - max_children,
-					local_totals.totalspace,
-					local_totals.nblocks,
-					local_totals.freespace,
-					local_totals.freechunks,
-					local_totals.totalspace - local_totals.freespace);
+					excess_summary.totalspace,
+					excess_summary.nblocks,
+					excess_summary.freespace,
+					excess_summary.freechunks,
+					excess_summary.totalspace - excess_summary.freespace);
 		}
 
 		if (totals)
 		{
-			totals->nblocks += local_totals.nblocks;
-			totals->freechunks += local_totals.freechunks;
-			totals->totalspace += local_totals.totalspace;
-			totals->freespace += local_totals.freespace;
+			totals->nblocks += excess_summary.nblocks;
+			totals->nchunks += excess_summary.nchunks;
+			totals->freechunks += excess_summary.freechunks;
+			totals->totalspace += excess_summary.totalspace;
+			totals->freespace += excess_summary.freespace;
+			totals->newspace += excess_summary.newspace;
 		}
 	}
 }
@@ -603,10 +639,9 @@ MemoryContextStatsInternal(MemoryContext context, int level,
  * make that more complicated.
  */
 static void
-MemoryContextStatsPrint(MemoryContext context, void *passthru,
+MemoryContextStatsPrint(MemoryContext context, int level,
 						const char *stats_string)
 {
-	int			level = *(int *) passthru;
 	const char *name = context->name;
 	const char *ident = context->ident;
 	int			i;
@@ -760,7 +795,6 @@ MemoryContextCreate(MemoryContext node,
 	node->methods = methods;
 	node->parent = parent;
 	node->firstchild = NULL;
-	node->mem_allocated = 0;
 	node->prevchild = NULL;
 	node->name = name;
 	node->ident = NULL;
diff --git a/src/backend/utils/mmgr/slab.c b/src/backend/utils/mmgr/slab.c
index c928476c479..e3921991248 100644
--- a/src/backend/utils/mmgr/slab.c
+++ b/src/backend/utils/mmgr/slab.c
@@ -67,6 +67,7 @@ typedef struct SlabContext
 	Size		fullChunkSize;	/* chunk size including header and alignment */
 	Size		blockSize;		/* block size */
 	Size		headerSize;		/* allocated size of context header */
+	Size		memAllocated;	/* track memory allocated for this context */
 	int			chunksPerBlock; /* number of chunks per block */
 	int			minFreeChunks;	/* min number of free chunks in any block */
 	int			nblocks;		/* number of blocks allocated */
@@ -133,9 +134,7 @@ static void SlabReset(MemoryContext context);
 static void SlabDelete(MemoryContext context);
 static Size SlabGetChunkSpace(MemoryContext context, void *pointer);
 static bool SlabIsEmpty(MemoryContext context);
-static void SlabStats(MemoryContext context,
-					  MemoryStatsPrintFunc printfunc, void *passthru,
-					  MemoryContextCounters *totals);
+static MemoryContextCounters SlabStats(MemoryContext context, uint32 flags);
 #ifdef MEMORY_CONTEXT_CHECKING
 static void SlabCheck(MemoryContext context);
 #endif
@@ -262,6 +261,7 @@ SlabContextCreate(MemoryContext parent,
 	slab->fullChunkSize = fullChunkSize;
 	slab->blockSize = blockSize;
 	slab->headerSize = headerSize;
+	slab->memAllocated = 0;
 	slab->chunksPerBlock = chunksPerBlock;
 	slab->minFreeChunks = 0;
 	slab->nblocks = 0;
@@ -322,14 +322,14 @@ SlabReset(MemoryContext context)
 #endif
 			free(block);
 			slab->nblocks--;
-			context->mem_allocated -= slab->blockSize;
+			slab->memAllocated -= slab->blockSize;
 		}
 	}
 
 	slab->minFreeChunks = 0;
 
 	Assert(slab->nblocks == 0);
-	Assert(context->mem_allocated == 0);
+	Assert(slab->memAllocated == 0);
 }
 
 /*
@@ -407,7 +407,7 @@ SlabAlloc(MemoryContext context, Size size)
 
 		slab->minFreeChunks = slab->chunksPerBlock;
 		slab->nblocks += 1;
-		context->mem_allocated += slab->blockSize;
+		slab->memAllocated += slab->blockSize;
 	}
 
 	/* grab the block from the freelist (even the new block is there) */
@@ -501,7 +501,7 @@ SlabAlloc(MemoryContext context, Size size)
 
 	SlabAllocInfo(slab, chunk);
 
-	Assert(slab->nblocks * slab->blockSize == context->mem_allocated);
+	Assert(slab->nblocks * slab->blockSize == slab->memAllocated);
 
 	return SlabChunkGetPointer(chunk);
 }
@@ -578,13 +578,13 @@ SlabFree(MemoryContext context, void *pointer)
 	{
 		free(block);
 		slab->nblocks--;
-		context->mem_allocated -= slab->blockSize;
+		slab->memAllocated -= slab->blockSize;
 	}
 	else
 		dlist_push_head(&slab->freelist[block->nfree], &block->node);
 
 	Assert(slab->nblocks >= 0);
-	Assert(slab->nblocks * slab->blockSize == context->mem_allocated);
+	Assert(slab->nblocks * slab->blockSize == slab->memAllocated);
 }
 
 /*
@@ -647,59 +647,50 @@ SlabIsEmpty(MemoryContext context)
 /*
  * SlabStats
  *		Compute stats about memory consumption of a Slab context.
- *
- * printfunc: if not NULL, pass a human-readable stats string to this.
- * passthru: pass this pointer through to printfunc.
- * totals: if not NULL, add stats about this context into *totals.
  */
-static void
-SlabStats(MemoryContext context,
-		  MemoryStatsPrintFunc printfunc, void *passthru,
-		  MemoryContextCounters *totals)
+static MemoryContextCounters
+SlabStats(MemoryContext context, uint32 flags)
 {
-	SlabContext *slab = castNode(SlabContext, context);
-	Size		nblocks = 0;
-	Size		freechunks = 0;
-	Size		totalspace;
-	Size		freespace = 0;
-	int			i;
-
-	/* Include context header in totalspace */
-	totalspace = slab->headerSize;
-
-	for (i = 0; i <= slab->chunksPerBlock; i++)
+	SlabContext				*slab	  = castNode(SlabContext, context);
+	MemoryContextCounters	 counters = {0};
+	uint64					 nblocks = 0;
+	uint64					 nchunks = 0;
+	uint64					 freechunks = 0;
+	Size					 freespace = 0;
+	int						 i;
+
+	if (flags & (MCXT_STAT_NBLOCKS | MCXT_STAT_NCHUNKS |
+				 MCXT_STAT_FREECHUNKS | MCXT_STAT_FREESPACE))
 	{
-		dlist_iter	iter;
-
-		dlist_foreach(iter, &slab->freelist[i])
+		for (i = 0; i <= slab->chunksPerBlock; i++)
 		{
-			SlabBlock  *block = dlist_container(SlabBlock, node, iter.cur);
+			dlist_iter	iter;
 
-			nblocks++;
-			totalspace += slab->blockSize;
-			freespace += slab->fullChunkSize * block->nfree;
-			freechunks += block->nfree;
-		}
-	}
-
-	if (printfunc)
-	{
-		char		stats_string[200];
+			dlist_foreach(iter, &slab->freelist[i])
+			{
+				SlabBlock  *block = dlist_container(SlabBlock, node, iter.cur);
 
-		snprintf(stats_string, sizeof(stats_string),
-				 "%zu total in %zd blocks; %zu free (%zd chunks); %zu used",
-				 totalspace, nblocks, freespace, freechunks,
-				 totalspace - freespace);
-		printfunc(context, passthru, stats_string);
+				nblocks++;
+				nchunks += slab->chunksPerBlock;
+				freespace += slab->fullChunkSize * block->nfree;
+				freechunks += block->nfree;
+			}
+		}
 	}
 
-	if (totals)
-	{
-		totals->nblocks += nblocks;
-		totals->freechunks += freechunks;
-		totals->totalspace += totalspace;
-		totals->freespace += freespace;
-	}
+	if (flags & MCXT_STAT_NBLOCKS)
+		counters.nblocks = nblocks;
+	if (flags & MCXT_STAT_NCHUNKS)
+		counters.nchunks = nchunks;
+	if (flags & MCXT_STAT_FREECHUNKS)
+		counters.freechunks = freechunks;
+	if (flags & MCXT_STAT_TOTALSPACE)
+		counters.totalspace = slab->memAllocated;
+	if (flags & MCXT_STAT_FREESPACE)
+		counters.freespace = freespace;
+	/* new memory is already sliced into chunks, so newspace is always 0 */
+
+	return counters;
 }
 
 
@@ -804,7 +795,7 @@ SlabCheck(MemoryContext context)
 		}
 	}
 
-	Assert(slab->nblocks * slab->blockSize == context->mem_allocated);
+	Assert(slab->nblocks * slab->blockSize == slab->memAllocated);
 }
 
 #endif							/* MEMORY_CONTEXT_CHECKING */
diff --git a/src/include/nodes/memnodes.h b/src/include/nodes/memnodes.h
index c9f2bbcb367..cc545852968 100644
--- a/src/include/nodes/memnodes.h
+++ b/src/include/nodes/memnodes.h
@@ -29,11 +29,21 @@
 typedef struct MemoryContextCounters
 {
 	Size		nblocks;		/* Total number of malloc blocks */
+	Size		nchunks;		/* Total number of chunks (used+free) */
 	Size		freechunks;		/* Total number of free chunks */
 	Size		totalspace;		/* Total bytes requested from malloc */
 	Size		freespace;		/* The unused portion of totalspace */
+	Size		newspace;		/* Allocated but never held any chunks */
 } MemoryContextCounters;
 
+#define MCXT_STAT_NBLOCKS		(1 << 0)
+#define MCXT_STAT_NCHUNKS		(1 << 1)
+#define MCXT_STAT_FREECHUNKS	(1 << 2)
+#define MCXT_STAT_TOTALSPACE	(1 << 3)
+#define MCXT_STAT_FREESPACE		(1 << 4)
+#define MCXT_STAT_NEWSPACE		(1 << 5)
+#define MCXT_STAT_ALL			((1 << 6) - 1)
+
 /*
  * MemoryContext
  *		A logical context in which memory allocations occur.
@@ -51,9 +61,6 @@ typedef struct MemoryContextCounters
  * to the context struct rather than the struct type itself.
  */
 
-typedef void (*MemoryStatsPrintFunc) (MemoryContext context, void *passthru,
-									  const char *stats_string);
-
 typedef struct MemoryContextMethods
 {
 	void	   *(*alloc) (MemoryContext context, Size size);
@@ -64,9 +71,7 @@ typedef struct MemoryContextMethods
 	void		(*delete_context) (MemoryContext context);
 	Size		(*get_chunk_space) (MemoryContext context, void *pointer);
 	bool		(*is_empty) (MemoryContext context);
-	void		(*stats) (MemoryContext context,
-						  MemoryStatsPrintFunc printfunc, void *passthru,
-						  MemoryContextCounters *totals);
+	MemoryContextCounters (*count) (MemoryContext context, uint32 flags);
 #ifdef MEMORY_CONTEXT_CHECKING
 	void		(*check) (MemoryContext context);
 #endif
@@ -79,7 +84,6 @@ typedef struct MemoryContextData
 	/* these two fields are placed here to minimize alignment wastage: */
 	bool		isReset;		/* T = no space alloced since last reset */
 	bool		allowInCritSection; /* allow palloc in critical section */
-	Size		mem_allocated;	/* track memory allocated for this context */
 	const MemoryContextMethods *methods;	/* virtual function table */
 	MemoryContext parent;		/* NULL if no parent (toplevel context) */
 	MemoryContext firstchild;	/* head of linked list of children */
diff --git a/src/include/utils/memutils.h b/src/include/utils/memutils.h
index 909bc2e9888..fb2d960e333 100644
--- a/src/include/utils/memutils.h
+++ b/src/include/utils/memutils.h
@@ -82,7 +82,8 @@ extern void MemoryContextSetParent(MemoryContext context,
 extern Size GetMemoryChunkSpace(void *pointer);
 extern MemoryContext MemoryContextGetParent(MemoryContext context);
 extern bool MemoryContextIsEmpty(MemoryContext context);
-extern Size MemoryContextMemAllocated(MemoryContext context, bool recurse);
+extern MemoryContextCounters MemoryContextCount(MemoryContext context,
+												uint32 flags, bool recurse);
 extern void MemoryContextStats(MemoryContext context);
 extern void MemoryContextStatsDetail(MemoryContext context, int max_children);
 extern void MemoryContextAllowInCriticalSection(MemoryContext context,
#16Andres Freund
andres@anarazel.de
In reply to: Jeff Davis (#15)
Re: Make MemoryContextMemAllocated() more precise

Hi,

On 2020-03-27 17:21:10 -0700, Jeff Davis wrote:

Attached refactoring patch. There's enough in here that warrants
discussion that I don't think this makes sense for v13 and I'm adding
it to the July commitfest.

IDK, adding a commit to v13 that we know we should do architecturally
differently in v14, when the difference in complexity between the two
patches isn't actually *that* big...

I'd like to see others jump in here...

I still think we should do something for v13, such as the originally-
proposed patch[1]. It's not critical, but it simply reports a better
number for memory consumption. Currently, the memory usage appears to
jump, often right past work mem (by a reasonable but noticable amount),
which could be confusing.

Is that really a significant issue for most work mem sizes? Shouldn't
the way we increase sizes lead to the max difference between the
measurements to be somewhat limited?

* there's a new MemoryContextCount() that simply calculates the
statistics without printing anything, and returns a struct
- it supports flags to indicate which stats should be
calculated, so that some callers can avoid walking through
blocks/freelists
* it adds a new statistic for "new space" (i.e. untouched)
* it eliminates specialization of the memory context printing
- the only specialization was for generation.c to output the
number of chunks, which can be done easily enough for the
other types, too

That sounds like a good direction.

+	if (flags & MCXT_STAT_NBLOCKS)
+		counters.nblocks = nblocks;
+	if (flags & MCXT_STAT_NCHUNKS)
+		counters.nchunks = set->nChunks;
+	if (flags & MCXT_STAT_FREECHUNKS)
+		counters.freechunks = freechunks;
+	if (flags & MCXT_STAT_TOTALSPACE)
+		counters.totalspace = set->memAllocated;
+	if (flags & MCXT_STAT_FREESPACE)
+		counters.freespace = freespace;
+	if (flags & MCXT_STAT_NEWSPACE)
+		counters.newspace = set->blocks->endptr - set->blocks->freeptr;

I'd spec it so that context implementations are allowed to
unconditionally fill fields, even when the flag isn't specified. The
branches quoted don't buy us anyting...

diff --git a/src/include/nodes/memnodes.h b/src/include/nodes/memnodes.h
index c9f2bbcb367..cc545852968 100644
--- a/src/include/nodes/memnodes.h
+++ b/src/include/nodes/memnodes.h
@@ -29,11 +29,21 @@
typedef struct MemoryContextCounters
{
Size		nblocks;		/* Total number of malloc blocks */
+	Size		nchunks;		/* Total number of chunks (used+free) */
Size		freechunks;		/* Total number of free chunks */
Size		totalspace;		/* Total bytes requested from malloc */
Size		freespace;		/* The unused portion of totalspace */
+	Size		newspace;		/* Allocated but never held any chunks */

I'd add some reasoning as to why this is useful.

} MemoryContextCounters;

+#define MCXT_STAT_NBLOCKS		(1 << 0)
+#define MCXT_STAT_NCHUNKS		(1 << 1)
+#define MCXT_STAT_FREECHUNKS	(1 << 2)
+#define MCXT_STAT_TOTALSPACE	(1 << 3)
+#define MCXT_STAT_FREESPACE		(1 << 4)
+#define MCXT_STAT_NEWSPACE		(1 << 5)

s/MCXT_STAT/MCXT_STAT_NEED/?

+#define MCXT_STAT_ALL ((1 << 6) - 1)

Hm, why not go for ~0 or such?

Greetings,

Andres Freund

#17David Rowley
dgrowleyml@gmail.com
In reply to: Jeff Davis (#15)
Re: Make MemoryContextMemAllocated() more precise

On Sat, 28 Mar 2020 at 13:21, Jeff Davis <pgsql@j-davis.com> wrote:

Attached refactoring patch. There's enough in here that warrants
discussion that I don't think this makes sense for v13 and I'm adding
it to the July commitfest.

I had a read over this too. I noted down the following during my pass of it.

1. The comment mentions "passthru", but you've removed that parameter.

  * For now, the passthru pointer just points to "int level"; later we might
  * make that more complicated.
  */
 static void
-MemoryContextStatsPrint(MemoryContext context, void *passthru,
+MemoryContextStatsPrint(MemoryContext context, int level,
  const char *stats_string)

2. I don't think MemoryContextCount is the best name for this
function. When I saw:

counters = MemoryContextCount(aggstate->hash_metacxt, flags, true);

i assumed it was returning some integer number, that is until I looked
at the "counters" datatype.

Maybe it would be better to name the function
MemoryContextGetTelemetry and the struct MemoryContextTelemetry rather
than MemoryContextCounters? Or maybe MemoryContextTally and call the
function either MemoryContextGetTelemetry() or
MemoryContextGetTally(). Or perhaps MemoryContextGetAccounting() and
the struct MemoryContextAccounting

3. I feel like it would be nicer if you didn't change the "count"
methods to return a MemoryContextCounters. It means you may need to
zero a struct for each level, assign the values, then add them to the
total. If you were just to zero the struct in MemoryContextCount()
then pass it into the count function, then you could just have it do
all the += work. It would reduce the code in MemoryContextCount() too.

4. Do you think it would be better to have two separate functions for
MemoryContextCount(), a recursive version and a non-recursive version.
I feel that the function should be so simple that it does not make a
great deal of sense to load it up to handle both cases. Looking
around mcxt.c, I see MemoryContextResetOnly() and
MemoryContextResetChildren(), while that's not a perfect example, it
does seem like a better lead to follow.

5. For performance testing, I tried using the following table with 1MB
work_mem then again with 1GB work_mem. I wondered if making the
accounting more complex would cause a slowdown in nodeAgg.c, as I see
we're calling this function each time we get a tuple that belongs in a
new group. The 1MB test is likely not such a great way to measure this
since we do spill to disk in that case and the changing in accounting
means we do that at a slightly different time, but the 1GB test does
not spill and it's a bit slower.

create table t (a int);
insert into t select x from generate_Series(1,10000000) x;
analyze t;

-- Unpatched

set work_mem = '1MB';
explain analyze select a from t group by a; -- Ran 3 times.

QUERY PLAN
-------------------------------------------------------------------------------------------------------------------------
HashAggregate (cost=481750.10..659875.95 rows=10000048 width=4)
(actual time=3350.190..8193.400 rows=10000000 loops=1)
Group Key: a
Planned Partitions: 32
Peak Memory Usage: 1177 kB
Disk Usage: 234920 kB
HashAgg Batches: 1188
-> Seq Scan on t (cost=0.00..144248.48 rows=10000048 width=4)
(actual time=0.013..1013.755 rows=10000000 loops=1)
Planning Time: 0.131 ms
Execution Time: 8586.420 ms
Execution Time: 8446.961 ms
Execution Time: 8449.492 ms
(9 rows)

-- Patched

set work_mem = '1MB';
explain analyze select a from t group by a;

QUERY PLAN
-------------------------------------------------------------------------------------------------------------------------
HashAggregate (cost=481748.00..659873.00 rows=10000000 width=4)
(actual time=3470.107..8598.836 rows=10000000 loops=1)
Group Key: a
Planned Partitions: 32
Peak Memory Usage: 1033 kB
Disk Usage: 234816 kB
HashAgg Batches: 1056
-> Seq Scan on t (cost=0.00..144248.00 rows=10000000 width=4)
(actual time=0.017..1091.820 rows=10000000 loops=1)
Planning Time: 0.285 ms
Execution Time: 8996.824 ms
Execution Time: 8781.624 ms
Execution Time: 8900.324 ms
(9 rows)

-- Unpatched

set work_mem = '1GB';
explain analyze select a from t group by a;
QUERY PLAN
-------------------------------------------------------------------------------------------------------------------------
HashAggregate (cost=169248.00..269248.00 rows=10000000 width=4)
(actual time=4537.779..7033.318 rows=10000000 loops=1)
Group Key: a
Peak Memory Usage: 868369 kB
-> Seq Scan on t (cost=0.00..144248.00 rows=10000000 width=4)
(actual time=0.018..820.136 rows=10000000 loops=1)
Planning Time: 0.054 ms
Execution Time: 7561.063 ms
Execution Time: 7573.985 ms
Execution Time: 7572.058 ms
(6 rows)

-- Patched

set work_mem = '1GB';
explain analyze select a from t group by a;
QUERY PLAN
-------------------------------------------------------------------------------------------------------------------------
HashAggregate (cost=169248.00..269248.00 rows=10000000 width=4)
(actual time=4840.045..7359.970 rows=10000000 loops=1)
Group Key: a
Peak Memory Usage: 861975 kB
-> Seq Scan on t (cost=0.00..144248.00 rows=10000000 width=4)
(actual time=0.018..789.975 rows=10000000 loops=1)
Planning Time: 0.055 ms
Execution Time: 7904.069 ms
Execution Time: 7913.692 ms
Execution Time: 7927.061 ms
(6 rows)

Perhaps the slowdown is unrelated. If it, then maybe the reduction in
branching mentioned by Andres might help a bit plus maybe a bit from
what I mentioned above about passing in the counter struct instead of
returning it at each level.

David

#18Jeff Davis
pgsql@j-davis.com
In reply to: David Rowley (#17)
1 attachment(s)
Re: Make MemoryContextMemAllocated() more precise

On Mon, 2020-04-06 at 23:39 +1200, David Rowley wrote:

1. The comment mentions "passthru", but you've removed that
parameter.

Fixed, thank you.

2. I don't think MemoryContextCount is the best name for this
function. When I saw:

I've gone back and forth on naming a bit. The right name, in my
opinion, is MemoryContextStats(), but that's taken by something that
should be called MemoryContextReport(). But I didn't want to change
that as it would probably annoy a lot of people who are used to calling
MemoryContextStats() from gdb.

I changed the new function to be called MemoryContextGetCounters(),
which is more directly what it's doing. "Telemetry" makes me think more
of a stream of information rather than a particular point in time.

3. I feel like it would be nicer if you didn't change the "count"
methods to return a MemoryContextCounters. It means you may need to
zero a struct for each level, assign the values, then add them to the
total. If you were just to zero the struct in MemoryContextCount()
then pass it into the count function, then you could just have it do
all the += work. It would reduce the code in MemoryContextCount()
too.

I changed it to use a pointer out parameter, but I don't think an
in/out parameter is quite right there. MemoryContextStats() ends up
using both the per-context counters as well as the totals, so it's not
helpful to return just the totals.

4. Do you think it would be better to have two separate functions for
MemoryContextCount(), a recursive version and a non-recursive
version.

I could, but right now the only caller passes recurse=true, so I'm not
really eliminating any code in that path by specializing the functions.
Are you thinking about performance or you just think it would be better
to have two entry points?

5. For performance testing, I tried using the following table with
1MB
work_mem then again with 1GB work_mem. I wondered if making the
accounting more complex would cause a slowdown in nodeAgg.c, as I see
we're calling this function each time we get a tuple that belongs in
a
new group. The 1MB test is likely not such a great way to measure
this
since we do spill to disk in that case and the changing in accounting
means we do that at a slightly different time, but the 1GB test does
not spill and it's a bit slower.

I think this was because I was previously returning a struct. By just
passing a pointer as an out param, it seems to have mitigated it, but
not completely eliminated the cost (< 2% in my tests). I was using an
OFFSET 100000000 instead of EXPLAIN ANALYZE in my test measurements
because it was less noisy, and I focused on the 1GB test for the reason
you mention.

I also addressed Andres's comments:

* changed the name of the flags from MCXT_STAT to MCXT_COUNT
* changed ((1<<6)-1) to ~0
* removed unnecessary branches from the GetCounters method
* expanded some comments

Regards,
Jeff Davis

Attachments:

mcxt-20200406.patchtext/x-patch; charset=UTF-8; name=mcxt-20200406.patchDownload
diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c
index 4e100e5755f..fd205071ca1 100644
--- a/src/backend/executor/nodeAgg.c
+++ b/src/backend/executor/nodeAgg.c
@@ -414,6 +414,7 @@ static void agg_fill_hash_table(AggState *aggstate);
 static bool agg_refill_hash_table(AggState *aggstate);
 static TupleTableSlot *agg_retrieve_hash_table(AggState *aggstate);
 static TupleTableSlot *agg_retrieve_hash_table_in_memory(AggState *aggstate);
+static Size hash_agg_check_memory(MemoryContext context);
 static void hash_agg_check_limits(AggState *aggstate);
 static void hash_agg_enter_spill_mode(AggState *aggstate);
 static void hash_agg_update_metrics(AggState *aggstate, bool from_tape,
@@ -1792,6 +1793,25 @@ hash_agg_set_limits(double hashentrysize, uint64 input_groups, int used_bits,
 		*ngroups_limit = 1;
 }
 
+/*
+ * Get the memory consumed by the memory context. Consider all memory except
+ * "newspace", which is the untouched part of a new block allocation by the
+ * memory context itself. The algorithm has little control over how eagerly
+ * the memory context allocates new memory, so it doesn't make sense to count
+ * it against work_mem.
+ */
+static Size
+hash_agg_check_memory(MemoryContext context)
+{
+	MemoryContextCounters	counters;
+	uint32					flags = (MCXT_COUNT_TOTALSPACE |
+									 MCXT_COUNT_NEWSPACE);
+
+	MemoryContextGetCounters(context, flags, true, &counters);
+
+	return counters.totalspace - counters.newspace;
+}
+
 /*
  * hash_agg_check_limits
  *
@@ -1802,11 +1822,17 @@ hash_agg_set_limits(double hashentrysize, uint64 input_groups, int used_bits,
 static void
 hash_agg_check_limits(AggState *aggstate)
 {
-	uint64 ngroups = aggstate->hash_ngroups_current;
-	Size meta_mem = MemoryContextMemAllocated(
-		aggstate->hash_metacxt, true);
-	Size hash_mem = MemoryContextMemAllocated(
-		aggstate->hashcontext->ecxt_per_tuple_memory, true);
+	uint64	ngroups = aggstate->hash_ngroups_current;
+	Size	meta_mem;
+	Size	hash_mem;
+
+	/* already in spill mode; nothing to do */
+	if (aggstate->hash_spill_mode)
+		return;
+
+	meta_mem = hash_agg_check_memory(aggstate->hash_metacxt);
+	hash_mem = hash_agg_check_memory(
+		aggstate->hashcontext->ecxt_per_tuple_memory);
 
 	/*
 	 * Don't spill unless there's at least one group in the hash table so we
@@ -1874,11 +1900,11 @@ hash_agg_update_metrics(AggState *aggstate, bool from_tape, int npartitions)
 		return;
 
 	/* memory for the hash table itself */
-	meta_mem = MemoryContextMemAllocated(aggstate->hash_metacxt, true);
+	meta_mem = hash_agg_check_memory(aggstate->hash_metacxt);
 
 	/* memory for the group keys and transition states */
-	hash_mem = MemoryContextMemAllocated(
-		aggstate->hashcontext->ecxt_per_tuple_memory, true);
+	hash_mem = hash_agg_check_memory(
+		aggstate->hashcontext->ecxt_per_tuple_memory);
 
 	/* memory for read/write tape buffers, if spilled */
 	buffer_mem = npartitions * HASHAGG_WRITE_BUFFER_SIZE;
diff --git a/src/backend/utils/mmgr/aset.c b/src/backend/utils/mmgr/aset.c
index c0623f106d2..1dfc6126ad4 100644
--- a/src/backend/utils/mmgr/aset.c
+++ b/src/backend/utils/mmgr/aset.c
@@ -132,6 +132,8 @@ typedef struct AllocSetContext
 	Size		maxBlockSize;	/* maximum block size */
 	Size		nextBlockSize;	/* next block size to allocate */
 	Size		allocChunkLimit;	/* effective chunk size limit */
+	Size		memAllocated;	/* track memory allocated for this context */
+	uint64		nChunks;		/* total number of chunks */
 	AllocBlock	keeper;			/* keep this block over resets */
 	/* freelist this context could be put in, or -1 if not a candidate: */
 	int			freeListIndex;	/* index in context_freelists[], or -1 */
@@ -273,9 +275,8 @@ static void AllocSetReset(MemoryContext context);
 static void AllocSetDelete(MemoryContext context);
 static Size AllocSetGetChunkSpace(MemoryContext context, void *pointer);
 static bool AllocSetIsEmpty(MemoryContext context);
-static void AllocSetStats(MemoryContext context,
-						  MemoryStatsPrintFunc printfunc, void *passthru,
-						  MemoryContextCounters *totals);
+static void AllocSetGetCounters(MemoryContext context, uint32 flags,
+								MemoryContextCounters *counters);
 
 #ifdef MEMORY_CONTEXT_CHECKING
 static void AllocSetCheck(MemoryContext context);
@@ -292,7 +293,7 @@ static const MemoryContextMethods AllocSetMethods = {
 	AllocSetDelete,
 	AllocSetGetChunkSpace,
 	AllocSetIsEmpty,
-	AllocSetStats
+	AllocSetGetCounters
 #ifdef MEMORY_CONTEXT_CHECKING
 	,AllocSetCheck
 #endif
@@ -464,8 +465,8 @@ AllocSetContextCreateInternal(MemoryContext parent,
 								parent,
 								name);
 
-			((MemoryContext) set)->mem_allocated =
-				set->keeper->endptr - ((char *) set);
+			set->memAllocated = set->keeper->endptr - ((char *) set);
+			set->nChunks = 0;
 
 			return (MemoryContext) set;
 		}
@@ -555,7 +556,8 @@ AllocSetContextCreateInternal(MemoryContext parent,
 						parent,
 						name);
 
-	((MemoryContext) set)->mem_allocated = firstBlockSize;
+	set->memAllocated = firstBlockSize;
+	set->nChunks = 0;
 
 	return (MemoryContext) set;
 }
@@ -617,7 +619,7 @@ AllocSetReset(MemoryContext context)
 		else
 		{
 			/* Normal case, release the block */
-			context->mem_allocated -= block->endptr - ((char*) block);
+			set->memAllocated -= block->endptr - ((char*) block);
 
 #ifdef CLOBBER_FREED_MEMORY
 			wipe_mem(block, block->freeptr - ((char *) block));
@@ -627,7 +629,9 @@ AllocSetReset(MemoryContext context)
 		block = next;
 	}
 
-	Assert(context->mem_allocated == keepersize);
+	Assert(set->memAllocated == keepersize);
+
+	set->nChunks = 0;
 
 	/* Reset block size allocation sequence, too */
 	set->nextBlockSize = set->initBlockSize;
@@ -703,7 +707,7 @@ AllocSetDelete(MemoryContext context)
 		AllocBlock	next = block->next;
 
 		if (block != set->keeper)
-			context->mem_allocated -= block->endptr - ((char *) block);
+			set->memAllocated -= block->endptr - ((char *) block);
 
 #ifdef CLOBBER_FREED_MEMORY
 		wipe_mem(block, block->freeptr - ((char *) block));
@@ -715,7 +719,7 @@ AllocSetDelete(MemoryContext context)
 		block = next;
 	}
 
-	Assert(context->mem_allocated == keepersize);
+	Assert(set->memAllocated == keepersize);
 
 	/* Finally, free the context header, including the keeper block */
 	free(set);
@@ -758,7 +762,7 @@ AllocSetAlloc(MemoryContext context, Size size)
 		if (block == NULL)
 			return NULL;
 
-		context->mem_allocated += blksize;
+		set->memAllocated += blksize;
 
 		block->aset = set;
 		block->freeptr = block->endptr = ((char *) block) + blksize;
@@ -805,6 +809,7 @@ AllocSetAlloc(MemoryContext context, Size size)
 		/* Disallow external access to private part of chunk header. */
 		VALGRIND_MAKE_MEM_NOACCESS(chunk, ALLOCCHUNK_PRIVATE_LEN);
 
+		set->nChunks++;
 		return AllocChunkGetPointer(chunk);
 	}
 
@@ -844,6 +849,8 @@ AllocSetAlloc(MemoryContext context, Size size)
 		/* Disallow external access to private part of chunk header. */
 		VALGRIND_MAKE_MEM_NOACCESS(chunk, ALLOCCHUNK_PRIVATE_LEN);
 
+		/* chunk already existed; don't increment nChunks */
+
 		return AllocChunkGetPointer(chunk);
 	}
 
@@ -906,6 +913,7 @@ AllocSetAlloc(MemoryContext context, Size size)
 #endif
 				chunk->aset = (void *) set->freelist[a_fidx];
 				set->freelist[a_fidx] = chunk;
+				set->nChunks++;
 			}
 
 			/* Mark that we need to create a new block */
@@ -955,7 +963,7 @@ AllocSetAlloc(MemoryContext context, Size size)
 		if (block == NULL)
 			return NULL;
 
-		context->mem_allocated += blksize;
+		set->memAllocated += blksize;
 
 		block->aset = set;
 		block->freeptr = ((char *) block) + ALLOC_BLOCKHDRSZ;
@@ -1005,6 +1013,7 @@ AllocSetAlloc(MemoryContext context, Size size)
 	/* Disallow external access to private part of chunk header. */
 	VALGRIND_MAKE_MEM_NOACCESS(chunk, ALLOCCHUNK_PRIVATE_LEN);
 
+	set->nChunks++;
 	return AllocChunkGetPointer(chunk);
 }
 
@@ -1058,7 +1067,8 @@ AllocSetFree(MemoryContext context, void *pointer)
 		if (block->next)
 			block->next->prev = block->prev;
 
-		context->mem_allocated -= block->endptr - ((char*) block);
+		set->memAllocated -= block->endptr - ((char*) block);
+		set->nChunks--;
 
 #ifdef CLOBBER_FREED_MEMORY
 		wipe_mem(block, block->freeptr - ((char *) block));
@@ -1161,8 +1171,8 @@ AllocSetRealloc(MemoryContext context, void *pointer, Size size)
 		}
 
 		/* updated separately, not to underflow when (oldblksize > blksize) */
-		context->mem_allocated -= oldblksize;
-		context->mem_allocated += blksize;
+		set->memAllocated -= oldblksize;
+		set->memAllocated += blksize;
 
 		block->freeptr = block->endptr = ((char *) block) + blksize;
 
@@ -1356,65 +1366,53 @@ AllocSetIsEmpty(MemoryContext context)
 }
 
 /*
- * AllocSetStats
- *		Compute stats about memory consumption of an allocset.
- *
- * printfunc: if not NULL, pass a human-readable stats string to this.
- * passthru: pass this pointer through to printfunc.
- * totals: if not NULL, add stats about this context into *totals.
+ * AllocSetGetCounters
+ *		Compute and return memory consumption counters. Specify the needed
+ *		counters by setting the appropriate flags.
  */
 static void
-AllocSetStats(MemoryContext context,
-			  MemoryStatsPrintFunc printfunc, void *passthru,
-			  MemoryContextCounters *totals)
+AllocSetGetCounters(MemoryContext context, uint32 flags,
+					MemoryContextCounters *counters)
 {
-	AllocSet	set = (AllocSet) context;
-	Size		nblocks = 0;
+	AllocSet	set		   = (AllocSet) context;
+	Size		nblocks	   = 0;
 	Size		freechunks = 0;
-	Size		totalspace;
-	Size		freespace = 0;
-	AllocBlock	block;
-	int			fidx;
+	Size		freespace  = 0;
 
-	/* Include context header in totalspace */
-	totalspace = MAXALIGN(sizeof(AllocSetContext));
-
-	for (block = set->blocks; block != NULL; block = block->next)
-	{
-		nblocks++;
-		totalspace += block->endptr - ((char *) block);
-		freespace += block->endptr - block->freeptr;
-	}
-	for (fidx = 0; fidx < ALLOCSET_NUM_FREELISTS; fidx++)
+	if (flags & (MCXT_COUNT_NBLOCKS | MCXT_COUNT_FREESPACE))
 	{
-		AllocChunk	chunk;
+		AllocBlock	block;
 
-		for (chunk = set->freelist[fidx]; chunk != NULL;
-			 chunk = (AllocChunk) chunk->aset)
+		for (block = set->blocks; block != NULL; block = block->next)
 		{
-			freechunks++;
-			freespace += chunk->size + ALLOC_CHUNKHDRSZ;
+			nblocks++;
+			freespace += block->endptr - block->freeptr;
 		}
 	}
 
-	if (printfunc)
+	if (flags & (MCXT_COUNT_FREECHUNKS | MCXT_COUNT_FREESPACE))
 	{
-		char		stats_string[200];
+		int		fidx;
 
-		snprintf(stats_string, sizeof(stats_string),
-				 "%zu total in %zd blocks; %zu free (%zd chunks); %zu used",
-				 totalspace, nblocks, freespace, freechunks,
-				 totalspace - freespace);
-		printfunc(context, passthru, stats_string);
-	}
+		for (fidx = 0; fidx < ALLOCSET_NUM_FREELISTS; fidx++)
+		{
+			AllocChunk	chunk;
 
-	if (totals)
-	{
-		totals->nblocks += nblocks;
-		totals->freechunks += freechunks;
-		totals->totalspace += totalspace;
-		totals->freespace += freespace;
+			for (chunk = set->freelist[fidx]; chunk != NULL;
+				 chunk = (AllocChunk) chunk->aset)
+			{
+				freechunks++;
+				freespace += chunk->size + ALLOC_CHUNKHDRSZ;
+			}
+		}
 	}
+
+	counters->nblocks = nblocks;
+	counters->nchunks = set->nChunks;
+	counters->freechunks = freechunks;
+	counters->totalspace = set->memAllocated;
+	counters->freespace = freespace;
+	counters->newspace = set->blocks->endptr - set->blocks->freeptr;
 }
 
 
@@ -1436,6 +1434,7 @@ AllocSetCheck(MemoryContext context)
 	AllocBlock	prevblock;
 	AllocBlock	block;
 	Size		total_allocated = 0;
+	uint64		total_nchunks = 0;
 
 	for (prevblock = NULL, block = set->blocks;
 		 block != NULL;
@@ -1529,6 +1528,7 @@ AllocSetCheck(MemoryContext context)
 
 			blk_data += chsize;
 			nchunks++;
+			total_nchunks++;
 
 			bpoz += ALLOC_CHUNKHDRSZ + chsize;
 		}
@@ -1538,7 +1538,8 @@ AllocSetCheck(MemoryContext context)
 				 name, block);
 	}
 
-	Assert(total_allocated == context->mem_allocated);
+	Assert(total_allocated == set->memAllocated);
+	Assert(total_nchunks == set->nChunks);
 }
 
 #endif							/* MEMORY_CONTEXT_CHECKING */
diff --git a/src/backend/utils/mmgr/generation.c b/src/backend/utils/mmgr/generation.c
index 56651d06931..f1a989dd1e6 100644
--- a/src/backend/utils/mmgr/generation.c
+++ b/src/backend/utils/mmgr/generation.c
@@ -61,6 +61,7 @@ typedef struct GenerationContext
 
 	/* Generational context parameters */
 	Size		blockSize;		/* standard block size */
+	Size		memAllocated;	/* track memory allocated for this context */
 
 	GenerationBlock *block;		/* current (most recently allocated) block */
 	dlist_head	blocks;			/* list of blocks */
@@ -153,9 +154,8 @@ static void GenerationReset(MemoryContext context);
 static void GenerationDelete(MemoryContext context);
 static Size GenerationGetChunkSpace(MemoryContext context, void *pointer);
 static bool GenerationIsEmpty(MemoryContext context);
-static void GenerationStats(MemoryContext context,
-							MemoryStatsPrintFunc printfunc, void *passthru,
-							MemoryContextCounters *totals);
+static void GenerationGetCounters(MemoryContext context, uint32 flags,
+								  MemoryContextCounters *counters);
 
 #ifdef MEMORY_CONTEXT_CHECKING
 static void GenerationCheck(MemoryContext context);
@@ -172,7 +172,7 @@ static const MemoryContextMethods GenerationMethods = {
 	GenerationDelete,
 	GenerationGetChunkSpace,
 	GenerationIsEmpty,
-	GenerationStats
+	GenerationGetCounters
 #ifdef MEMORY_CONTEXT_CHECKING
 	,GenerationCheck
 #endif
@@ -258,6 +258,7 @@ GenerationContextCreate(MemoryContext parent,
 
 	/* Fill in GenerationContext-specific header fields */
 	set->blockSize = blockSize;
+	set->memAllocated = 0;
 	set->block = NULL;
 	dlist_init(&set->blocks);
 
@@ -297,7 +298,7 @@ GenerationReset(MemoryContext context)
 
 		dlist_delete(miter.cur);
 
-		context->mem_allocated -= block->blksize;
+		set->memAllocated -= block->blksize;
 
 #ifdef CLOBBER_FREED_MEMORY
 		wipe_mem(block, block->blksize);
@@ -354,7 +355,7 @@ GenerationAlloc(MemoryContext context, Size size)
 		if (block == NULL)
 			return NULL;
 
-		context->mem_allocated += blksize;
+		set->memAllocated += blksize;
 
 		/* block with a single (used) chunk */
 		block->blksize = blksize;
@@ -411,7 +412,7 @@ GenerationAlloc(MemoryContext context, Size size)
 		if (block == NULL)
 			return NULL;
 
-		context->mem_allocated += blksize;
+		set->memAllocated += blksize;
 
 		block->blksize = blksize;
 		block->nchunks = 0;
@@ -528,7 +529,7 @@ GenerationFree(MemoryContext context, void *pointer)
 	if (set->block == block)
 		set->block = NULL;
 
-	context->mem_allocated -= block->blksize;
+	set->memAllocated -= block->blksize;
 	free(block);
 }
 
@@ -679,61 +680,43 @@ GenerationIsEmpty(MemoryContext context)
 }
 
 /*
- * GenerationStats
- *		Compute stats about memory consumption of a Generation context.
- *
- * printfunc: if not NULL, pass a human-readable stats string to this.
- * passthru: pass this pointer through to printfunc.
- * totals: if not NULL, add stats about this context into *totals.
- *
- * XXX freespace only accounts for empty space at the end of the block, not
- * space of freed chunks (which is unknown).
+ * GenerationGetCounters
+ *		Compute and return memory consumption counters. Specify the needed
+ *		counters by setting the appropriate flags.
  */
 static void
-GenerationStats(MemoryContext context,
-				MemoryStatsPrintFunc printfunc, void *passthru,
-				MemoryContextCounters *totals)
+GenerationGetCounters(MemoryContext context, uint32 flags,
+					  MemoryContextCounters *counters)
 {
-	GenerationContext *set = (GenerationContext *) context;
-	Size		nblocks = 0;
-	Size		nchunks = 0;
-	Size		nfreechunks = 0;
-	Size		totalspace;
-	Size		freespace = 0;
-	dlist_iter	iter;
-
-	/* Include context header in totalspace */
-	totalspace = MAXALIGN(sizeof(GenerationContext));
-
-	dlist_foreach(iter, &set->blocks)
+	GenerationContext		*set	  = (GenerationContext *) context;
+	uint64					 nblocks = 0;
+	uint64					 nchunks = 0;
+	uint64					 freechunks = 0;
+	Size					 freespace = 0;
+
+	if (flags & (MCXT_COUNT_NBLOCKS | MCXT_COUNT_NCHUNKS |
+				 MCXT_COUNT_FREECHUNKS | MCXT_COUNT_FREESPACE))
 	{
-		GenerationBlock *block = dlist_container(GenerationBlock, node, iter.cur);
+		dlist_iter	iter;
 
-		nblocks++;
-		nchunks += block->nchunks;
-		nfreechunks += block->nfree;
-		totalspace += block->blksize;
-		freespace += (block->endptr - block->freeptr);
-	}
-
-	if (printfunc)
-	{
-		char		stats_string[200];
+		dlist_foreach(iter, &set->blocks)
+		{
+			GenerationBlock *block = dlist_container(
+				GenerationBlock, node, iter.cur);
 
-		snprintf(stats_string, sizeof(stats_string),
-				 "%zu total in %zd blocks (%zd chunks); %zu free (%zd chunks); %zu used",
-				 totalspace, nblocks, nchunks, freespace,
-				 nfreechunks, totalspace - freespace);
-		printfunc(context, passthru, stats_string);
+			nblocks++;
+			nchunks += block->nchunks;
+			freechunks += block->nfree;
+			freespace += (block->endptr - block->freeptr);
+		}
 	}
 
-	if (totals)
-	{
-		totals->nblocks += nblocks;
-		totals->freechunks += nfreechunks;
-		totals->totalspace += totalspace;
-		totals->freespace += freespace;
-	}
+	counters->nblocks = nblocks;
+	counters->nchunks = nchunks;
+	counters->freechunks = freechunks;
+	counters->totalspace = set->memAllocated;
+	counters->freespace = freespace;
+	counters->newspace = set->block->endptr - set->block->freeptr;
 }
 
 
@@ -844,7 +827,7 @@ GenerationCheck(MemoryContext context)
 				 name, nfree, block, block->nfree);
 	}
 
-	Assert(total_allocated == context->mem_allocated);
+	Assert(total_allocated == gen->memAllocated);
 }
 
 #endif							/* MEMORY_CONTEXT_CHECKING */
diff --git a/src/backend/utils/mmgr/mcxt.c b/src/backend/utils/mmgr/mcxt.c
index 9e24fec72d6..1708413c46c 100644
--- a/src/backend/utils/mmgr/mcxt.c
+++ b/src/backend/utils/mmgr/mcxt.c
@@ -56,7 +56,7 @@ static void MemoryContextCallResetCallbacks(MemoryContext context);
 static void MemoryContextStatsInternal(MemoryContext context, int level,
 									   bool print, int max_children,
 									   MemoryContextCounters *totals);
-static void MemoryContextStatsPrint(MemoryContext context, void *passthru,
+static void MemoryContextStatsPrint(MemoryContext context, int level,
 									const char *stats_string);
 
 /*
@@ -463,27 +463,39 @@ MemoryContextIsEmpty(MemoryContext context)
 }
 
 /*
- * Find the memory allocated to blocks for this memory context. If recurse is
- * true, also include children.
+ * MemoryContextCount
+ *		Return statistics about this memory context, optionally recursing to
+ *		children. Flags are defined in memnodes.h and specify which statistics
+ *		are required.
  */
-Size
-MemoryContextMemAllocated(MemoryContext context, bool recurse)
+void
+MemoryContextGetCounters(MemoryContext context, uint32 flags, bool recurse,
+						 MemoryContextCounters *counters)
 {
-	Size	total = context->mem_allocated;
-
 	AssertArg(MemoryContextIsValid(context));
 
+	context->methods->get_counters(context, flags, counters);
+
 	if (recurse)
 	{
-		MemoryContext child = context->firstchild;
+		MemoryContext                   child;
 
 		for (child = context->firstchild;
 			 child != NULL;
 			 child = child->nextchild)
-			total += MemoryContextMemAllocated(child, true);
-	}
+		{
+			MemoryContextCounters child_counters;
+
+			child->methods->get_counters(child, flags, &child_counters);
 
-	return total;
+			counters->nblocks += child_counters.nblocks;
+			counters->nchunks += child_counters.nchunks;
+			counters->freechunks += child_counters.freechunks;
+			counters->totalspace += child_counters.totalspace;
+			counters->freespace += child_counters.freespace;
+			counters->newspace += child_counters.newspace;
+		}
+	}
 }
 
 /*
@@ -516,9 +528,10 @@ MemoryContextStatsDetail(MemoryContext context, int max_children)
 	MemoryContextStatsInternal(context, 0, true, max_children, &grand_totals);
 
 	fprintf(stderr,
-			"Grand total: %zu bytes in %zd blocks; %zu free (%zd chunks); %zu used\n",
+			"Grand total: %zu bytes in %zd blocks (%zd chunks); %zu free (%zd chunks); %zu used\n",
 			grand_totals.totalspace, grand_totals.nblocks,
-			grand_totals.freespace, grand_totals.freechunks,
+			grand_totals.nchunks, grand_totals.freespace,
+			grand_totals.freechunks,
 			grand_totals.totalspace - grand_totals.freespace);
 }
 
@@ -534,24 +547,42 @@ MemoryContextStatsInternal(MemoryContext context, int level,
 						   bool print, int max_children,
 						   MemoryContextCounters *totals)
 {
-	MemoryContextCounters local_totals;
+	MemoryContextCounters excess_summary = {0};
+	MemoryContextCounters current;
 	MemoryContext child;
 	int			ichild;
 
+
 	AssertArg(MemoryContextIsValid(context));
 
 	/* Examine the context itself */
-	context->methods->stats(context,
-							print ? MemoryContextStatsPrint : NULL,
-							(void *) &level,
-							totals);
+	context->methods->get_counters(context, MCXT_COUNT_ALL, &current);
+
+	if (print)
+	{
+		char		stats_string[200];
+		snprintf(stats_string, sizeof(stats_string),
+				 "%zu total in %zd blocks (%zd chunks); %zu free (%zd chunks); %zu used",
+				 current.totalspace, current.nblocks, current.nchunks,
+				 current.freespace, current.freechunks,
+				 current.totalspace - current.freespace);
+		MemoryContextStatsPrint(context, level, stats_string);
+	}
+
+	if (totals)
+	{
+		totals->nblocks += current.nblocks;
+		totals->nchunks += current.nchunks;
+		totals->freechunks += current.freechunks;
+		totals->totalspace += current.totalspace;
+		totals->freespace += current.freespace;
+		totals->newspace += current.newspace;
+	}
 
 	/*
 	 * Examine children.  If there are more than max_children of them, we do
 	 * not print the rest explicitly, but just summarize them.
 	 */
-	memset(&local_totals, 0, sizeof(local_totals));
-
 	for (child = context->firstchild, ichild = 0;
 		 child != NULL;
 		 child = child->nextchild, ichild++)
@@ -563,7 +594,7 @@ MemoryContextStatsInternal(MemoryContext context, int level,
 		else
 			MemoryContextStatsInternal(child, level + 1,
 									   false, max_children,
-									   &local_totals);
+									   &excess_summary);
 	}
 
 	/* Deal with excess children */
@@ -578,35 +609,33 @@ MemoryContextStatsInternal(MemoryContext context, int level,
 			fprintf(stderr,
 					"%d more child contexts containing %zu total in %zd blocks; %zu free (%zd chunks); %zu used\n",
 					ichild - max_children,
-					local_totals.totalspace,
-					local_totals.nblocks,
-					local_totals.freespace,
-					local_totals.freechunks,
-					local_totals.totalspace - local_totals.freespace);
+					excess_summary.totalspace,
+					excess_summary.nblocks,
+					excess_summary.freespace,
+					excess_summary.freechunks,
+					excess_summary.totalspace - excess_summary.freespace);
 		}
 
 		if (totals)
 		{
-			totals->nblocks += local_totals.nblocks;
-			totals->freechunks += local_totals.freechunks;
-			totals->totalspace += local_totals.totalspace;
-			totals->freespace += local_totals.freespace;
+			totals->nblocks += excess_summary.nblocks;
+			totals->nchunks += excess_summary.nchunks;
+			totals->freechunks += excess_summary.freechunks;
+			totals->totalspace += excess_summary.totalspace;
+			totals->freespace += excess_summary.freespace;
+			totals->newspace += excess_summary.newspace;
 		}
 	}
 }
 
 /*
  * MemoryContextStatsPrint
- *		Print callback used by MemoryContextStatsInternal
- *
- * For now, the passthru pointer just points to "int level"; later we might
- * make that more complicated.
+ *		Print function used by MemoryContextStatsInternal
  */
 static void
-MemoryContextStatsPrint(MemoryContext context, void *passthru,
+MemoryContextStatsPrint(MemoryContext context, int level,
 						const char *stats_string)
 {
-	int			level = *(int *) passthru;
 	const char *name = context->name;
 	const char *ident = context->ident;
 	int			i;
@@ -760,7 +789,6 @@ MemoryContextCreate(MemoryContext node,
 	node->methods = methods;
 	node->parent = parent;
 	node->firstchild = NULL;
-	node->mem_allocated = 0;
 	node->prevchild = NULL;
 	node->name = name;
 	node->ident = NULL;
diff --git a/src/backend/utils/mmgr/slab.c b/src/backend/utils/mmgr/slab.c
index c928476c479..acab22ffa49 100644
--- a/src/backend/utils/mmgr/slab.c
+++ b/src/backend/utils/mmgr/slab.c
@@ -67,6 +67,7 @@ typedef struct SlabContext
 	Size		fullChunkSize;	/* chunk size including header and alignment */
 	Size		blockSize;		/* block size */
 	Size		headerSize;		/* allocated size of context header */
+	Size		memAllocated;	/* track memory allocated for this context */
 	int			chunksPerBlock; /* number of chunks per block */
 	int			minFreeChunks;	/* min number of free chunks in any block */
 	int			nblocks;		/* number of blocks allocated */
@@ -133,9 +134,8 @@ static void SlabReset(MemoryContext context);
 static void SlabDelete(MemoryContext context);
 static Size SlabGetChunkSpace(MemoryContext context, void *pointer);
 static bool SlabIsEmpty(MemoryContext context);
-static void SlabStats(MemoryContext context,
-					  MemoryStatsPrintFunc printfunc, void *passthru,
-					  MemoryContextCounters *totals);
+static void SlabGetCounters(MemoryContext context, uint32 flags,
+							MemoryContextCounters *counters);
 #ifdef MEMORY_CONTEXT_CHECKING
 static void SlabCheck(MemoryContext context);
 #endif
@@ -151,7 +151,7 @@ static const MemoryContextMethods SlabMethods = {
 	SlabDelete,
 	SlabGetChunkSpace,
 	SlabIsEmpty,
-	SlabStats
+	SlabGetCounters
 #ifdef MEMORY_CONTEXT_CHECKING
 	,SlabCheck
 #endif
@@ -262,6 +262,7 @@ SlabContextCreate(MemoryContext parent,
 	slab->fullChunkSize = fullChunkSize;
 	slab->blockSize = blockSize;
 	slab->headerSize = headerSize;
+	slab->memAllocated = 0;
 	slab->chunksPerBlock = chunksPerBlock;
 	slab->minFreeChunks = 0;
 	slab->nblocks = 0;
@@ -322,14 +323,14 @@ SlabReset(MemoryContext context)
 #endif
 			free(block);
 			slab->nblocks--;
-			context->mem_allocated -= slab->blockSize;
+			slab->memAllocated -= slab->blockSize;
 		}
 	}
 
 	slab->minFreeChunks = 0;
 
 	Assert(slab->nblocks == 0);
-	Assert(context->mem_allocated == 0);
+	Assert(slab->memAllocated == 0);
 }
 
 /*
@@ -407,7 +408,7 @@ SlabAlloc(MemoryContext context, Size size)
 
 		slab->minFreeChunks = slab->chunksPerBlock;
 		slab->nblocks += 1;
-		context->mem_allocated += slab->blockSize;
+		slab->memAllocated += slab->blockSize;
 	}
 
 	/* grab the block from the freelist (even the new block is there) */
@@ -501,7 +502,7 @@ SlabAlloc(MemoryContext context, Size size)
 
 	SlabAllocInfo(slab, chunk);
 
-	Assert(slab->nblocks * slab->blockSize == context->mem_allocated);
+	Assert(slab->nblocks * slab->blockSize == slab->memAllocated);
 
 	return SlabChunkGetPointer(chunk);
 }
@@ -578,13 +579,13 @@ SlabFree(MemoryContext context, void *pointer)
 	{
 		free(block);
 		slab->nblocks--;
-		context->mem_allocated -= slab->blockSize;
+		slab->memAllocated -= slab->blockSize;
 	}
 	else
 		dlist_push_head(&slab->freelist[block->nfree], &block->node);
 
 	Assert(slab->nblocks >= 0);
-	Assert(slab->nblocks * slab->blockSize == context->mem_allocated);
+	Assert(slab->nblocks * slab->blockSize == slab->memAllocated);
 }
 
 /*
@@ -645,61 +646,48 @@ SlabIsEmpty(MemoryContext context)
 }
 
 /*
- * SlabStats
- *		Compute stats about memory consumption of a Slab context.
- *
- * printfunc: if not NULL, pass a human-readable stats string to this.
- * passthru: pass this pointer through to printfunc.
- * totals: if not NULL, add stats about this context into *totals.
+ * SlabGetCounters
+ *		Compute and return memory consumption counters. Specify the needed
+ *		counters by setting the appropriate flags.
  */
 static void
-SlabStats(MemoryContext context,
-		  MemoryStatsPrintFunc printfunc, void *passthru,
-		  MemoryContextCounters *totals)
+SlabGetCounters(MemoryContext context, uint32 flags,
+				MemoryContextCounters *counters)
 {
-	SlabContext *slab = castNode(SlabContext, context);
-	Size		nblocks = 0;
-	Size		freechunks = 0;
-	Size		totalspace;
-	Size		freespace = 0;
-	int			i;
-
-	/* Include context header in totalspace */
-	totalspace = slab->headerSize;
-
-	for (i = 0; i <= slab->chunksPerBlock; i++)
+	SlabContext				*slab	  = castNode(SlabContext, context);
+	uint64					 nblocks = 0;
+	uint64					 nchunks = 0;
+	uint64					 freechunks = 0;
+	Size					 freespace = 0;
+
+	if (flags & (MCXT_COUNT_NBLOCKS | MCXT_COUNT_NCHUNKS |
+				 MCXT_COUNT_FREECHUNKS | MCXT_COUNT_FREESPACE))
 	{
-		dlist_iter	iter;
+		int	i;
 
-		dlist_foreach(iter, &slab->freelist[i])
+		for (i = 0; i <= slab->chunksPerBlock; i++)
 		{
-			SlabBlock  *block = dlist_container(SlabBlock, node, iter.cur);
+			dlist_iter	iter;
 
-			nblocks++;
-			totalspace += slab->blockSize;
-			freespace += slab->fullChunkSize * block->nfree;
-			freechunks += block->nfree;
-		}
-	}
-
-	if (printfunc)
-	{
-		char		stats_string[200];
+			dlist_foreach(iter, &slab->freelist[i])
+			{
+				SlabBlock  *block = dlist_container(SlabBlock, node, iter.cur);
 
-		snprintf(stats_string, sizeof(stats_string),
-				 "%zu total in %zd blocks; %zu free (%zd chunks); %zu used",
-				 totalspace, nblocks, freespace, freechunks,
-				 totalspace - freespace);
-		printfunc(context, passthru, stats_string);
+				nblocks++;
+				nchunks += slab->chunksPerBlock;
+				freespace += slab->fullChunkSize * block->nfree;
+				freechunks += block->nfree;
+			}
+		}
 	}
 
-	if (totals)
-	{
-		totals->nblocks += nblocks;
-		totals->freechunks += freechunks;
-		totals->totalspace += totalspace;
-		totals->freespace += freespace;
-	}
+	counters->nblocks = nblocks;
+	counters->nchunks = nchunks;
+	counters->freechunks = freechunks;
+	counters->totalspace = slab->memAllocated;
+	counters->freespace = freespace;
+	/* new memory is already sliced into chunks, so newspace is always 0 */
+	counters->newspace = 0;
 }
 
 
@@ -804,7 +792,7 @@ SlabCheck(MemoryContext context)
 		}
 	}
 
-	Assert(slab->nblocks * slab->blockSize == context->mem_allocated);
+	Assert(slab->nblocks * slab->blockSize == slab->memAllocated);
 }
 
 #endif							/* MEMORY_CONTEXT_CHECKING */
diff --git a/src/include/nodes/memnodes.h b/src/include/nodes/memnodes.h
index c9f2bbcb367..798854da99d 100644
--- a/src/include/nodes/memnodes.h
+++ b/src/include/nodes/memnodes.h
@@ -25,15 +25,30 @@
  * we might need more or different counters here.  A possible API spec then
  * would be to print only nonzero counters, but for now we just summarize in
  * the format historically used by AllocSet.
+ *
+ * The 'newspace' counter represents memory that is allocated by malloc, but
+ * not touched yet. This can be significant for context types that malloc
+ * large blocks of memory in anticipation of chunks that have not yet been
+ * requested.
  */
 typedef struct MemoryContextCounters
 {
 	Size		nblocks;		/* Total number of malloc blocks */
+	Size		nchunks;		/* Total number of chunks (used+free) */
 	Size		freechunks;		/* Total number of free chunks */
 	Size		totalspace;		/* Total bytes requested from malloc */
 	Size		freespace;		/* The unused portion of totalspace */
+	Size		newspace;		/* Allocated but never held any chunks */
 } MemoryContextCounters;
 
+#define MCXT_COUNT_NBLOCKS		(1 << 0)
+#define MCXT_COUNT_NCHUNKS		(1 << 1)
+#define MCXT_COUNT_FREECHUNKS	(1 << 2)
+#define MCXT_COUNT_TOTALSPACE	(1 << 3)
+#define MCXT_COUNT_FREESPACE	(1 << 4)
+#define MCXT_COUNT_NEWSPACE		(1 << 5)
+#define MCXT_COUNT_ALL			(~0)
+
 /*
  * MemoryContext
  *		A logical context in which memory allocations occur.
@@ -51,9 +66,6 @@ typedef struct MemoryContextCounters
  * to the context struct rather than the struct type itself.
  */
 
-typedef void (*MemoryStatsPrintFunc) (MemoryContext context, void *passthru,
-									  const char *stats_string);
-
 typedef struct MemoryContextMethods
 {
 	void	   *(*alloc) (MemoryContext context, Size size);
@@ -64,9 +76,8 @@ typedef struct MemoryContextMethods
 	void		(*delete_context) (MemoryContext context);
 	Size		(*get_chunk_space) (MemoryContext context, void *pointer);
 	bool		(*is_empty) (MemoryContext context);
-	void		(*stats) (MemoryContext context,
-						  MemoryStatsPrintFunc printfunc, void *passthru,
-						  MemoryContextCounters *totals);
+	void		(*get_counters) (MemoryContext context, uint32 flags,
+								 MemoryContextCounters *counters);
 #ifdef MEMORY_CONTEXT_CHECKING
 	void		(*check) (MemoryContext context);
 #endif
@@ -79,7 +90,6 @@ typedef struct MemoryContextData
 	/* these two fields are placed here to minimize alignment wastage: */
 	bool		isReset;		/* T = no space alloced since last reset */
 	bool		allowInCritSection; /* allow palloc in critical section */
-	Size		mem_allocated;	/* track memory allocated for this context */
 	const MemoryContextMethods *methods;	/* virtual function table */
 	MemoryContext parent;		/* NULL if no parent (toplevel context) */
 	MemoryContext firstchild;	/* head of linked list of children */
diff --git a/src/include/utils/memutils.h b/src/include/utils/memutils.h
index 909bc2e9888..e787e43b3fb 100644
--- a/src/include/utils/memutils.h
+++ b/src/include/utils/memutils.h
@@ -82,7 +82,9 @@ extern void MemoryContextSetParent(MemoryContext context,
 extern Size GetMemoryChunkSpace(void *pointer);
 extern MemoryContext MemoryContextGetParent(MemoryContext context);
 extern bool MemoryContextIsEmpty(MemoryContext context);
-extern Size MemoryContextMemAllocated(MemoryContext context, bool recurse);
+extern void MemoryContextGetCounters(MemoryContext context, uint32 flags,
+									 bool recurse,
+									 MemoryContextCounters *counters);
 extern void MemoryContextStats(MemoryContext context);
 extern void MemoryContextStatsDetail(MemoryContext context, int max_children);
 extern void MemoryContextAllowInCriticalSection(MemoryContext context,
#19Jeff Davis
pgsql@j-davis.com
In reply to: Andres Freund (#16)
Re: Make MemoryContextMemAllocated() more precise

On Sun, 2020-04-05 at 16:48 -0700, Andres Freund wrote:

I still think we should do something for v13, such as the
originally-
proposed patch[1]. It's not critical, but it simply reports a
better
number for memory consumption. Currently, the memory usage appears
to
jump, often right past work mem (by a reasonable but noticable
amount),
which could be confusing.

Is that really a significant issue for most work mem sizes? Shouldn't
the way we increase sizes lead to the max difference between the
measurements to be somewhat limited?

For work_mem less than 16MB, it's essentially spilling when the table
context is about 75% of what it could be (as bad as 50% and as good as
100% depending on a number of factors). That's not terrible, but it is
significant.

It also means that the reported memory peak jumps up rather than going
up gradually, so it ends up surpassing work_mem (e.g. 4MB of work_mem
might show a memory peak of 5MB). So it's a weird combination of under-
utilizing and over-reporting.

I'd spec it so that context implementations are allowed to
unconditionally fill fields, even when the flag isn't specified. The
branches quoted don't buy us anyting...

Done.

I'd add some reasoning as to why this is useful.

Done.

s/MCXT_STAT/MCXT_STAT_NEED/?

I changed to MCXT_COUNT_. MCXT_STAT_NEED seemed slightly verbose for
me.

+#define MCXT_STAT_ALL ((1 << 6) - 1)

Hm, why not go for ~0 or such?

Done.

Regards,
Jeff Davis

#20David Rowley
dgrowleyml@gmail.com
In reply to: Jeff Davis (#18)
Re: Make MemoryContextMemAllocated() more precise

On Tue, 7 Apr 2020 at 14:26, Jeff Davis <pgsql@j-davis.com> wrote:

On Mon, 2020-04-06 at 23:39 +1200, David Rowley wrote:

4. Do you think it would be better to have two separate functions for
MemoryContextCount(), a recursive version and a non-recursive
version.

I could, but right now the only caller passes recurse=true, so I'm not
really eliminating any code in that path by specializing the functions.
Are you thinking about performance or you just think it would be better
to have two entry points?

I was thinking in terms of both performance by eliminating a branch,
but mostly it was down to ease of code reading.

I thought it was easier to read:
MemoryContextGetCountersRecruse(&counters); then
MemoryContextGetCounters(&counters, true); since I might need to go
see what "true" is for.

The non-recursive version, if we decide we need one, would likely just
be 1 one-line body function calling the implementation's getcounter
method.

David

#21Jeff Davis
pgsql@j-davis.com
In reply to: Jeff Davis (#1)
1 attachment(s)
Re: Make MemoryContextMemAllocated() more precise

On Mon, 2020-03-16 at 11:45 -0700, Jeff Davis wrote:

AllocSet allocates memory for itself in blocks, which double in size
up
to maxBlockSize. So, the current block (the last one malloc'd) may
represent half of the total memory allocated for the context itself.

Narrower approach that doesn't touch memory context internals:

If the blocks double up in size to maxBlockSize, why not just create
the memory context with a smaller maxBlockSize? I had originally
dismissed this as a hack that could slow down some workloads when
work_mem is large.

But we can simply make it proportional to work_mem, which makes a lot
of sense for an operator like HashAgg that controls its memory usage.
It can allocate in blocks large enough that we don't call malloc() too
often when work_mem is large; but small enough that we don't overrun
work_mem when work_mem is small.

I have attached a patch to do this only for HashAgg, using a new entry
point in execUtils.c called CreateWorkExprContext(). It sets
maxBlockSize to 1/16th of work_mem (rounded down to a power of two),
with a minimum of initBlockSize.

This could be a good general solution for other operators as well, but
that requires a bit more investigation, so I'll leave that for v14.

The attached patch is narrow and solves the problem for HashAgg nicely
without interfering with anything else, so I plan to commit it soon for
v13.

Regards,
Jeff Davis

Attachments:

CreateWorkExprContext.patchtext/x-patch; charset=UTF-8; name=CreateWorkExprContext.patchDownload
diff --git a/src/backend/executor/execUtils.c b/src/backend/executor/execUtils.c
index cc5177cc2b9..ca973882d01 100644
--- a/src/backend/executor/execUtils.c
+++ b/src/backend/executor/execUtils.c
@@ -53,6 +53,7 @@
 #include "executor/executor.h"
 #include "jit/jit.h"
 #include "mb/pg_wchar.h"
+#include "miscadmin.h"
 #include "nodes/nodeFuncs.h"
 #include "parser/parsetree.h"
 #include "partitioning/partdesc.h"
@@ -227,21 +228,13 @@ FreeExecutorState(EState *estate)
 	MemoryContextDelete(estate->es_query_cxt);
 }
 
-/* ----------------
- *		CreateExprContext
- *
- *		Create a context for expression evaluation within an EState.
- *
- * An executor run may require multiple ExprContexts (we usually make one
- * for each Plan node, and a separate one for per-output-tuple processing
- * such as constraint checking).  Each ExprContext has its own "per-tuple"
- * memory context.
- *
- * Note we make no assumption about the caller's memory context.
- * ----------------
+/*
+ * Internal implementation for CreateExprContext() and CreateWorkExprContext()
+ * that allows control over the AllocSet parameters.
  */
-ExprContext *
-CreateExprContext(EState *estate)
+static ExprContext *
+CreateExprContextInternal(EState *estate, Size minContextSize,
+						  Size initBlockSize, Size maxBlockSize)
 {
 	ExprContext *econtext;
 	MemoryContext oldcontext;
@@ -264,7 +257,9 @@ CreateExprContext(EState *estate)
 	econtext->ecxt_per_tuple_memory =
 		AllocSetContextCreate(estate->es_query_cxt,
 							  "ExprContext",
-							  ALLOCSET_DEFAULT_SIZES);
+							  minContextSize,
+							  initBlockSize,
+							  maxBlockSize);
 
 	econtext->ecxt_param_exec_vals = estate->es_param_exec_vals;
 	econtext->ecxt_param_list_info = estate->es_param_list_info;
@@ -294,6 +289,52 @@ CreateExprContext(EState *estate)
 	return econtext;
 }
 
+/* ----------------
+ *		CreateExprContext
+ *
+ *		Create a context for expression evaluation within an EState.
+ *
+ * An executor run may require multiple ExprContexts (we usually make one
+ * for each Plan node, and a separate one for per-output-tuple processing
+ * such as constraint checking).  Each ExprContext has its own "per-tuple"
+ * memory context.
+ *
+ * Note we make no assumption about the caller's memory context.
+ * ----------------
+ */
+ExprContext *
+CreateExprContext(EState *estate)
+{
+	return CreateExprContextInternal(estate, ALLOCSET_DEFAULT_SIZES);
+}
+
+
+/* ----------------
+ *		CreateWorkExprContext
+ *
+ * Like CreateExprContext, but specifies the AllocSet sizes to be reasonable
+ * in proportion to work_mem. If the maximum block allocation size is too
+ * large, it's easy to skip right past work_mem with a single allocation.
+ * ----------------
+ */
+ExprContext *
+CreateWorkExprContext(EState *estate)
+{
+	Size minContextSize = ALLOCSET_DEFAULT_MINSIZE;
+	Size initBlockSize = ALLOCSET_DEFAULT_INITSIZE;
+	Size maxBlockSize = ALLOCSET_DEFAULT_MAXSIZE;
+
+	/* choose the maxBlockSize to be no larger than 1/16 of work_mem */
+	while (16 * maxBlockSize > work_mem * 1024L)
+		maxBlockSize >>= 1;
+
+	if (maxBlockSize < ALLOCSET_DEFAULT_INITSIZE)
+		maxBlockSize = ALLOCSET_DEFAULT_INITSIZE;
+
+	return CreateExprContextInternal(estate, minContextSize,
+									 initBlockSize, maxBlockSize);
+}
+
 /* ----------------
  *		CreateStandaloneExprContext
  *
diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c
index 4e100e5755f..44587a84bae 100644
--- a/src/backend/executor/nodeAgg.c
+++ b/src/backend/executor/nodeAgg.c
@@ -3277,10 +3277,7 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
 	}
 
 	if (use_hashing)
-	{
-		ExecAssignExprContext(estate, &aggstate->ss.ps);
-		aggstate->hashcontext = aggstate->ss.ps.ps_ExprContext;
-	}
+		aggstate->hashcontext = CreateWorkExprContext(estate);
 
 	ExecAssignExprContext(estate, &aggstate->ss.ps);
 
diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h
index 94890512dc8..c7deeac662f 100644
--- a/src/include/executor/executor.h
+++ b/src/include/executor/executor.h
@@ -493,6 +493,7 @@ extern void end_tup_output(TupOutputState *tstate);
 extern EState *CreateExecutorState(void);
 extern void FreeExecutorState(EState *estate);
 extern ExprContext *CreateExprContext(EState *estate);
+extern ExprContext *CreateWorkExprContext(EState *estate);
 extern ExprContext *CreateStandaloneExprContext(void);
 extern void FreeExprContext(ExprContext *econtext, bool isCommit);
 extern void ReScanExprContext(ExprContext *econtext);
#22Daniel Gustafsson
daniel@yesql.se
In reply to: Jeff Davis (#21)
Re: Make MemoryContextMemAllocated() more precise

On 8 Apr 2020, at 02:21, Jeff Davis <pgsql@j-davis.com> wrote:

The attached patch is narrow and solves the problem for HashAgg nicely
without interfering with anything else, so I plan to commit it soon for
v13.

If I read this thread correctly, there is nothing outstanding here for 14 from
this patch? I've marked the entry committed i the July commitfest, feel to
free to change that in case I misunderstood.

cheers ./daniel