Patch: Optimize memory allocation in function 'bringetbitmap'

Started by Jinyu Zhangover 10 years ago14 messages
#1Jinyu Zhang
beijing_pg@163.com
1 attachment(s)

BRIN Scan: Optimize memory allocation in function 'bringetbitmap'.
We can allocate memory for some pointer before do long loop instead of allocating
memory in long loop.

Before optimizing code (warm run)
postgres=# select count(*) from lineitem where l_orderkey=1;
count
-------
6
(1 row)

Time: 456.219 ms

After optimizing code (warm run)
postgres=# select count(*) from lineitem where l_orderkey=1;
count
-------
6
(1 row)

Time: 349.219 ms

The following shows the DDL of this test case.
CREATE TABLE LINEITEM ( L_ORDERKEY INTEGER NOT NULL,
L_PARTKEY INTEGER NOT NULL,
L_SUPPKEY INTEGER NOT NULL,
L_LINENUMBER INTEGER NOT NULL,
L_QUANTITY DECIMAL(15,2) NOT NULL,
L_EXTENDEDPRICE DECIMAL(15,2) NOT NULL,
L_DISCOUNT DECIMAL(15,2) NOT NULL,
L_TAX DECIMAL(15,2) NOT NULL,
L_RETURNFLAG CHAR(1) NOT NULL,
L_LINESTATUS CHAR(1) NOT NULL,
L_SHIPDATE DATE NOT NULL,
L_COMMITDATE DATE NOT NULL,
L_RECEIPTDATE DATE NOT NULL,
L_SHIPINSTRUCT CHAR(25) NOT NULL,
L_SHIPMODE CHAR(10) NOT NULL,
L_COMMENT VARCHAR(44) NOT NULL);

copy lineitem from '/home/jinyu/mywork/dbgen/lineitem.tbl' delimiter '|';
create index brinLineitem on lineitem using brin(L_ORDERKEY) with(pages_per_range = 1);

Jinyu Zhang

网易考拉iPhone6s玫瑰金5288元,现货不加价

Attachments:

patch_optimize_memapplication/octet-stream; name=patch_optimize_memDownload
diff --git a/src/backend/access/brin/brin.c b/src/backend/access/brin/brin.c
index 99337b0..2e92dd0 100644
--- a/src/backend/access/brin/brin.c
+++ b/src/backend/access/brin/brin.c
@@ -131,7 +131,7 @@ brininsert(PG_FUNCTION_ARGS)
 			oldcxt = MemoryContextSwitchTo(tupcxt);
 		}
 
-		dtup = brin_deform_tuple(bdesc, brtup);
+		dtup = brin_deform_tuple(bdesc, brtup, NULL, NULL, NULL);
 
 		/*
 		 * Compare the key values of the new tuple to the stored index values;
@@ -284,7 +284,12 @@ bringetbitmap(PG_FUNCTION_ARGS)
 	FmgrInfo   *consistentFn;
 	MemoryContext oldcxt;
 	MemoryContext perRangeCxt;
-
+	BrinTuple  *btup;
+	uint32	   tupInitSize;
+	Datum 	   *values;
+	bool 	   *allnulls;
+	bool 	   *hasnulls;
+	
 	opaque = (BrinOpaque *) scan->opaque;
 	bdesc = opaque->bo_bdesc;
 	pgstat_count_index_scan(idxRel);
@@ -304,7 +309,16 @@ bringetbitmap(PG_FUNCTION_ARGS)
 	 * key reference each of them.  We rely on zeroing fn_oid to InvalidOid.
 	 */
 	consistentFn = palloc0(sizeof(FmgrInfo) * bdesc->bd_tupdesc->natts);
-
+	
+	/*
+	 * Estimate the approximate size and allocate memory for tup.
+	 */
+	tupInitSize = bdesc->bd_tupdesc->natts * 16;
+	btup = palloc(tupInitSize);
+	values = palloc(sizeof(Datum) * bdesc->bd_totalstored);
+	allnulls = palloc(sizeof(bool) * bdesc->bd_tupdesc->natts);
+	hasnulls = palloc(sizeof(bool) * bdesc->bd_tupdesc->natts);
+	
 	/*
 	 * Setup and use a per-range memory context, which is reset every time we
 	 * loop below.  This avoids having to free the tuples within the loop.
@@ -327,7 +341,6 @@ bringetbitmap(PG_FUNCTION_ARGS)
 		BrinTuple  *tup;
 		OffsetNumber off;
 		Size		size;
-
 		CHECK_FOR_INTERRUPTS();
 
 		MemoryContextResetAndDeleteChildren(perRangeCxt);
@@ -336,7 +349,10 @@ bringetbitmap(PG_FUNCTION_ARGS)
 									   &off, &size, BUFFER_LOCK_SHARE);
 		if (tup)
 		{
-			tup = brin_copy_tuple(tup, size);
+			if(size <= tupInitSize)
+				memcpy(btup, tup, size);
+			else
+				btup = brin_copy_tuple(tup, size);
 			LockBuffer(buf, BUFFER_LOCK_UNLOCK);
 		}
 
@@ -344,7 +360,7 @@ bringetbitmap(PG_FUNCTION_ARGS)
 		 * For page ranges with no indexed tuple, we must return the whole
 		 * range; otherwise, compare it to the scan keys.
 		 */
-		if (tup == NULL)
+		if (btup == NULL)
 		{
 			addrange = true;
 		}
@@ -352,7 +368,7 @@ bringetbitmap(PG_FUNCTION_ARGS)
 		{
 			BrinMemTuple *dtup;
 
-			dtup = brin_deform_tuple(bdesc, tup);
+			dtup = brin_deform_tuple(bdesc, btup, values, allnulls, hasnulls);
 			if (dtup->bt_placeholder)
 			{
 				/*
@@ -1136,7 +1152,7 @@ union_tuples(BrinDesc *bdesc, BrinMemTuple *a, BrinTuple *b)
 								ALLOCSET_DEFAULT_INITSIZE,
 								ALLOCSET_DEFAULT_MAXSIZE);
 	oldcxt = MemoryContextSwitchTo(cxt);
-	db = brin_deform_tuple(bdesc, b);
+	db = brin_deform_tuple(bdesc, b, NULL, NULL, NULL);
 	MemoryContextSwitchTo(oldcxt);
 
 	for (keyno = 0; keyno < bdesc->bd_tupdesc->natts; keyno++)
diff --git a/src/backend/access/brin/brin_tuple.c b/src/backend/access/brin/brin_tuple.c
index 5a7462b..2f416f1 100644
--- a/src/backend/access/brin/brin_tuple.c
+++ b/src/backend/access/brin/brin_tuple.c
@@ -397,7 +397,8 @@ brin_memtuple_initialize(BrinMemTuple *dtuple, BrinDesc *brdesc)
  * deconstruct the tuple from the on-disk format.
  */
 BrinMemTuple *
-brin_deform_tuple(BrinDesc *brdesc, BrinTuple *tuple)
+brin_deform_tuple(BrinDesc *brdesc, BrinTuple *tuple, 
+                  Datum *valuesArg, bool *allnullsArg, bool *hasnullsArg)
 {
 	BrinMemTuple *dtup;
 	Datum	   *values;
@@ -415,9 +416,20 @@ brin_deform_tuple(BrinDesc *brdesc, BrinTuple *tuple)
 		dtup->bt_placeholder = true;
 	dtup->bt_blkno = tuple->bt_blkno;
 
-	values = palloc(sizeof(Datum) * brdesc->bd_totalstored);
-	allnulls = palloc(sizeof(bool) * brdesc->bd_tupdesc->natts);
-	hasnulls = palloc(sizeof(bool) * brdesc->bd_tupdesc->natts);
+	if(!valuesArg)
+	{
+		Assert(!allnullsArg && !hasnullsArg);
+		values = palloc(sizeof(Datum) * brdesc->bd_totalstored);
+		allnulls = palloc(sizeof(bool) * brdesc->bd_tupdesc->natts);
+		hasnulls = palloc(sizeof(bool) * brdesc->bd_tupdesc->natts);
+	}
+	else
+	{
+		Assert(allnullsArg && hasnullsArg);
+		values = valuesArg;
+		allnulls = allnullsArg;
+		hasnulls = hasnullsArg;
+	}
 
 	tp = (char *) tuple + BrinTupleDataOffset(tuple);
 
@@ -460,9 +472,12 @@ brin_deform_tuple(BrinDesc *brdesc, BrinTuple *tuple)
 
 	MemoryContextSwitchTo(oldcxt);
 
-	pfree(values);
-	pfree(allnulls);
-	pfree(hasnulls);
+	if(!valuesArg)
+	{
+		pfree(values);
+		pfree(allnulls);
+		pfree(hasnulls);
+	}
 
 	return dtup;
 }
diff --git a/src/include/access/brin_tuple.h b/src/include/access/brin_tuple.h
index ce63b30..b63ea15 100644
--- a/src/include/access/brin_tuple.h
+++ b/src/include/access/brin_tuple.h
@@ -91,6 +91,9 @@ extern BrinMemTuple *brin_new_memtuple(BrinDesc *brdesc);
 extern void brin_memtuple_initialize(BrinMemTuple *dtuple,
 						 BrinDesc *brdesc);
 extern BrinMemTuple *brin_deform_tuple(BrinDesc *brdesc,
-				  BrinTuple *tuple);
+				  BrinTuple *tuple,
+				  Datum *valuesArg, 
+				  bool *allnullsArg, 
+				  bool *hasnullsArg);
 
 #endif   /* BRIN_TUPLE_H */
#2Alvaro Herrera
alvherre@2ndquadrant.com
In reply to: Jinyu Zhang (#1)
Re: Patch: Optimize memory allocation in function 'bringetbitmap'

Jinyu Zhang wrote:

BRIN Scan: Optimize memory allocation in function 'bringetbitmap'.
We can allocate memory for some pointer before do long loop instead of allocating
memory in long loop.

Before optimizing code (warm run)
postgres=# select count(*) from lineitem where l_orderkey=1;
Time: 456.219 ms

After optimizing code (warm run)
postgres=# select count(*) from lineitem where l_orderkey=1;
Time: 349.219 ms

Hmm, did you measure this in a c-assert-enabled build? Those are slower
in allocation because of the memory-clobber thingy; without that, the
allocations would be a lot faster, so perhaps the gains are not so
interesting. Still, it's a lot of palloc/pfree calls that do not happen
with your patch, so perhaps it's still worthwhile, but please do measure
it.

However I wonder if it would be simpler to have the dtup structure have
the pointers, so that you can pass it as NULL in the first call and then
followup calls reuse the one allocated in the first call. That makes
the callers a bit less ugly, I think.

--
�lvaro Herrera http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#3zhangjinyu
beijing_pg@163.com
In reply to: Alvaro Herrera (#2)
Re: Patch: Optimize memory allocation in function 'bringetbitmap'

Yes, I forgot disable-c-assert last test.
The following show the test result when disable-c-assert.
I think it's still worthwhile.
*After optimize code (warm run)*
postgres=# select count(*) from lineitem where l_orderkey=1;
count
-------
6
(1 row)

Time: 327.143 ms
*Before optimizing code (warm run)*
postgres=# select count(*) from lineitem where l_orderkey=1;
count
-------
6
(1 row)

Time: 404.323 ms

However I wonder if it would be simpler to have the dtup structure have
the pointers, so that you can pass it as NULL in the first call and then
followup calls reuse the one allocated in the first call.

Jinyu: the memory is allocated from perRangeCxt and perRangeCxt will be
reset in loop,
so this way don't work.

Jinyu Zhang

--
View this message in context: http://postgresql.nabble.com/Patch-Optimize-memory-allocation-in-function-bringetbitmap-tp5867537p5867647.html
Sent from the PostgreSQL - hackers mailing list archive at Nabble.com.

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#4Alvaro Herrera
alvherre@2ndquadrant.com
In reply to: zhangjinyu (#3)
Re: Patch: Optimize memory allocation in function 'bringetbitmap'

zhangjinyu wrote:

However I wonder if it would be simpler to have the dtup structure have
the pointers, so that you can pass it as NULL in the first call and then
followup calls reuse the one allocated in the first call.

Jinyu: the memory is allocated from perRangeCxt and perRangeCxt will be
reset in loop,
so this way don't work.

You're right. I think we can do better: have brin_deform_tuple accept
another argument of type BrinMemTuple *, which can be NULL. If NULL,
the function calls brin_new_memtuple to allocate a new one (just like
current code); if not NULL, that one is used. Have brin_new_memtuple
also allocate the values/allnulls/hasnulls arrays, which are now part of
the BrinMemTuple struct. Then, bringetbitmap calls brin_new_memtuple
once before changing to perRangeCxt, then reuse the returned inside the
loop (we probably need brin_memtuple_initialize to clear out the struct
at the end of each loop iteration). Other callers of brin_deform_tuple
just pass NULL to get the current behavior.

--
�lvaro Herrera http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#5Alvaro Herrera
alvherre@2ndquadrant.com
In reply to: Alvaro Herrera (#4)
Re: Patch: Optimize memory allocation in function 'bringetbitmap'

Also, please make sure your patch is registered in
https://commitfest.postgresql.org/7/ so that we don't forget.

--
�lvaro Herrera http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#6Jinyu Zhang
beijing_pg@163.com
In reply to: Alvaro Herrera (#4)
1 attachment(s)
Re: Patch: Optimize memory allocation in function 'bringetbitmap'

Update the patch_brin_optimze_mem according to your comment.

At 2015-10-16 10:13:35, "Alvaro Herrera" <alvherre@2ndquadrant.com> wrote:

Show quoted text

zhangjinyu wrote:

However I wonder if it would be simpler to have the dtup structure have
the pointers, so that you can pass it as NULL in the first call and then
followup calls reuse the one allocated in the first call.

Jinyu: the memory is allocated from perRangeCxt and perRangeCxt will be
reset in loop,
so this way don't work.

You're right. I think we can do better: have brin_deform_tuple accept
another argument of type BrinMemTuple *, which can be NULL. If NULL,
the function calls brin_new_memtuple to allocate a new one (just like
current code); if not NULL, that one is used. Have brin_new_memtuple
also allocate the values/allnulls/hasnulls arrays, which are now part of
the BrinMemTuple struct. Then, bringetbitmap calls brin_new_memtuple
once before changing to perRangeCxt, then reuse the returned inside the
loop (we probably need brin_memtuple_initialize to clear out the struct
at the end of each loop iteration). Other callers of brin_deform_tuple
just pass NULL to get the current behavior.

--
Álvaro Herrera http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Attachments:

patch_brin_optimize_memapplication/octet-stream; name=patch_brin_optimize_memDownload
diff --git a/src/backend/access/brin/brin.c b/src/backend/access/brin/brin.c
index 99337b0..e767613 100644
--- a/src/backend/access/brin/brin.c
+++ b/src/backend/access/brin/brin.c
@@ -131,7 +131,7 @@ brininsert(PG_FUNCTION_ARGS)
 			oldcxt = MemoryContextSwitchTo(tupcxt);
 		}
 
-		dtup = brin_deform_tuple(bdesc, brtup);
+		dtup = brin_deform_tuple(bdesc, brtup, NULL);
 
 		/*
 		 * Compare the key values of the new tuple to the stored index values;
@@ -284,6 +284,9 @@ bringetbitmap(PG_FUNCTION_ARGS)
 	FmgrInfo   *consistentFn;
 	MemoryContext oldcxt;
 	MemoryContext perRangeCxt;
+	BrinMemTuple *dtup;
+	BrinTuple    *btup;
+	uint32	     btupInitSize;
 
 	opaque = (BrinOpaque *) scan->opaque;
 	bdesc = opaque->bo_bdesc;
@@ -304,7 +307,14 @@ bringetbitmap(PG_FUNCTION_ARGS)
 	 * key reference each of them.  We rely on zeroing fn_oid to InvalidOid.
 	 */
 	consistentFn = palloc0(sizeof(FmgrInfo) * bdesc->bd_tupdesc->natts);
-
+	dtup = brin_new_memtuple(bdesc);
+	
+	/*
+	 * Estimate the approximate size of btup and allocate memory for btup.
+	 */
+	btupInitSize = bdesc->bd_tupdesc->natts * 16;
+	btup = palloc(btupInitSize);
+	
 	/*
 	 * Setup and use a per-range memory context, which is reset every time we
 	 * loop below.  This avoids having to free the tuples within the loop.
@@ -335,8 +345,11 @@ bringetbitmap(PG_FUNCTION_ARGS)
 		tup = brinGetTupleForHeapBlock(opaque->bo_rmAccess, heapBlk, &buf,
 									   &off, &size, BUFFER_LOCK_SHARE);
 		if (tup)
-		{
-			tup = brin_copy_tuple(tup, size);
+		{			
+			if(size <= btupInitSize)
+				memcpy(btup, tup, size);
+			else
+				btup = brin_copy_tuple(tup, size);
 			LockBuffer(buf, BUFFER_LOCK_UNLOCK);
 		}
 
@@ -344,15 +357,13 @@ bringetbitmap(PG_FUNCTION_ARGS)
 		 * For page ranges with no indexed tuple, we must return the whole
 		 * range; otherwise, compare it to the scan keys.
 		 */
-		if (tup == NULL)
+		if (btup == NULL)
 		{
 			addrange = true;
 		}
 		else
 		{
-			BrinMemTuple *dtup;
-
-			dtup = brin_deform_tuple(bdesc, tup);
+			dtup = brin_deform_tuple(bdesc, btup, dtup);
 			if (dtup->bt_placeholder)
 			{
 				/*
@@ -1136,7 +1147,7 @@ union_tuples(BrinDesc *bdesc, BrinMemTuple *a, BrinTuple *b)
 								ALLOCSET_DEFAULT_INITSIZE,
 								ALLOCSET_DEFAULT_MAXSIZE);
 	oldcxt = MemoryContextSwitchTo(cxt);
-	db = brin_deform_tuple(bdesc, b);
+	db = brin_deform_tuple(bdesc, b, NULL);
 	MemoryContextSwitchTo(oldcxt);
 
 	for (keyno = 0; keyno < bdesc->bd_tupdesc->natts; keyno++)
diff --git a/src/backend/access/brin/brin_tuple.c b/src/backend/access/brin/brin_tuple.c
index 5a7462b..4b9a163 100644
--- a/src/backend/access/brin/brin_tuple.c
+++ b/src/backend/access/brin/brin_tuple.c
@@ -348,23 +348,16 @@ BrinMemTuple *
 brin_new_memtuple(BrinDesc *brdesc)
 {
 	BrinMemTuple *dtup;
-	char	   *currdatum;
 	long		basesize;
-	int			i;
 
 	basesize = MAXALIGN(sizeof(BrinMemTuple) +
 						sizeof(BrinValues) * brdesc->bd_tupdesc->natts);
 	dtup = palloc0(basesize + sizeof(Datum) * brdesc->bd_totalstored);
-	currdatum = (char *) dtup + basesize;
-	for (i = 0; i < brdesc->bd_tupdesc->natts; i++)
-	{
-		dtup->bt_columns[i].bv_attno = i + 1;
-		dtup->bt_columns[i].bv_allnulls = true;
-		dtup->bt_columns[i].bv_hasnulls = false;
-		dtup->bt_columns[i].bv_values = (Datum *) currdatum;
-		currdatum += sizeof(Datum) * brdesc->bd_info[i]->oi_nstored;
-	}
-
+	
+	dtup->bt_values = palloc(sizeof(Datum) * brdesc->bd_totalstored);
+	dtup->bt_allnulls = palloc(sizeof(bool) * brdesc->bd_tupdesc->natts);
+	dtup->bt_hasnulls = palloc(sizeof(bool) * brdesc->bd_tupdesc->natts);
+	
 	dtup->bt_context = AllocSetContextCreate(CurrentMemoryContext,
 											 "brin dtuple",
 											 ALLOCSET_DEFAULT_MINSIZE,
@@ -380,12 +373,23 @@ void
 brin_memtuple_initialize(BrinMemTuple *dtuple, BrinDesc *brdesc)
 {
 	int			i;
-
+	long		basesize;
+	char	   *currdatum;
+	
+	basesize = MAXALIGN(sizeof(BrinMemTuple) +
+						sizeof(BrinValues) * brdesc->bd_tupdesc->natts);
+	currdatum = (char *) dtuple + basesize;
 	MemoryContextReset(dtuple->bt_context);
 	for (i = 0; i < brdesc->bd_tupdesc->natts; i++)
 	{
 		dtuple->bt_columns[i].bv_allnulls = true;
 		dtuple->bt_columns[i].bv_hasnulls = false;
+
+		dtuple->bt_columns[i].bv_attno = i + 1;
+		dtuple->bt_columns[i].bv_allnulls = true;
+		dtuple->bt_columns[i].bv_hasnulls = false;
+		dtuple->bt_columns[i].bv_values = (Datum *) currdatum;
+		currdatum += sizeof(Datum) * brdesc->bd_info[i]->oi_nstored;
 	}
 }
 
@@ -397,7 +401,7 @@ brin_memtuple_initialize(BrinMemTuple *dtuple, BrinDesc *brdesc)
  * deconstruct the tuple from the on-disk format.
  */
 BrinMemTuple *
-brin_deform_tuple(BrinDesc *brdesc, BrinTuple *tuple)
+brin_deform_tuple(BrinDesc *brdesc, BrinTuple *tuple, BrinMemTuple *dMemtuple)
 {
 	BrinMemTuple *dtup;
 	Datum	   *values;
@@ -409,15 +413,18 @@ brin_deform_tuple(BrinDesc *brdesc, BrinTuple *tuple)
 	int			valueno;
 	MemoryContext oldcxt;
 
-	dtup = brin_new_memtuple(brdesc);
+	dtup = dMemtuple;
+	if(!dtup)
+		dtup = brin_new_memtuple(brdesc);
+	brin_memtuple_initialize(dtup, brdesc);
 
 	if (BrinTupleIsPlaceholder(tuple))
 		dtup->bt_placeholder = true;
 	dtup->bt_blkno = tuple->bt_blkno;
 
-	values = palloc(sizeof(Datum) * brdesc->bd_totalstored);
-	allnulls = palloc(sizeof(bool) * brdesc->bd_tupdesc->natts);
-	hasnulls = palloc(sizeof(bool) * brdesc->bd_tupdesc->natts);
+	values = dtup->bt_values;
+	allnulls = dtup->bt_allnulls;
+	hasnulls = dtup->bt_hasnulls;
 
 	tp = (char *) tuple + BrinTupleDataOffset(tuple);
 
@@ -460,10 +467,6 @@ brin_deform_tuple(BrinDesc *brdesc, BrinTuple *tuple)
 
 	MemoryContextSwitchTo(oldcxt);
 
-	pfree(values);
-	pfree(allnulls);
-	pfree(hasnulls);
-
 	return dtup;
 }
 
diff --git a/src/include/access/brin_tuple.h b/src/include/access/brin_tuple.h
index ce63b30..d3daac5 100644
--- a/src/include/access/brin_tuple.h
+++ b/src/include/access/brin_tuple.h
@@ -38,6 +38,9 @@ typedef struct BrinMemTuple
 	bool		bt_placeholder; /* this is a placeholder tuple */
 	BlockNumber bt_blkno;		/* heap blkno that the tuple is for */
 	MemoryContext bt_context;	/* memcxt holding the bt_columns values */
+	Datum		*bt_values;		/* values pointer is for brin_deconstruct_tuple */
+	bool		*bt_allnulls;	/* allnulls pointer is for brin_deconstruct_tuple */
+	bool		*bt_hasnulls; 	/* hasnulls pointer is for brin_deconstruct_tuple */
 	BrinValues	bt_columns[FLEXIBLE_ARRAY_MEMBER];
 } BrinMemTuple;
 
@@ -91,6 +94,6 @@ extern BrinMemTuple *brin_new_memtuple(BrinDesc *brdesc);
 extern void brin_memtuple_initialize(BrinMemTuple *dtuple,
 						 BrinDesc *brdesc);
 extern BrinMemTuple *brin_deform_tuple(BrinDesc *brdesc,
-				  BrinTuple *tuple);
+				  BrinTuple *tuple, BrinMemTuple *dMemtuple);
 
 #endif   /* BRIN_TUPLE_H */
#7Tomas Vondra
tomas.vondra@2ndquadrant.com
In reply to: Jinyu Zhang (#6)
Re: Patch: Optimize memory allocation in function 'bringetbitmap'

Hi,

On 10/16/2015 08:00 PM, Jinyu Zhang wrote:

Update the patch_brin_optimze_mem according to your comment.

I've reviewed the last version of the patch, and I do have a few
comments. I haven't done any performance testing at this point, as the
machine I'm using for that is chewing on something else at the moment.

1) bringetbitmap(PG_FUNCTION_ARGS)

+ /*
+  * Estimate the approximate size of btup and allocate memory for btup.
+  */
+ btupInitSize = bdesc->bd_tupdesc->natts * 16;
+ btup = palloc(btupInitSize);

What is the reasoning behind this estimate? I mean, this only accounts
for the values, not the NULL bitmap or BrinTuple fields. Maybe it does
not really matter much, but I'd like to see some brief description in
the docs, please.

This also interacts with our memory allocator in a rather annoying way,
because palloc() always allocates memory in 2^k chunks, but sadly the
estimate ignores that. So for natts=3 we get btupInitSize=48, but
internally allocate 64B (and ignore the top 16B).

2) bringetbitmap(PG_FUNCTION_ARGS)

if (tup)
{
if(size <= btupInitSize)
memcpy(btup, tup, size);
else
btup = brin_copy_tuple(tup, size);
LockBuffer(buf, BUFFER_LOCK_UNLOCK);
}

Is there any particular reason why not to update btupInitSize to the
size value? I mean, if the estimate is too low, then we'll call
brin_copy_tuple() for all BRIN tuples, no?

In other words, I think the code should do this:

if (tup)
{
if(size <= btupInitSize)
memcpy(btup, tup, size);
else
{
btup = brin_copy_tuple(tup, size);
brinInitSize = size;
}
LockBuffer(buf, BUFFER_LOCK_UNLOCK);
}

Another option might be the usual "doubling" strategy, i.e. do something
like this instead:

if (tup)
{
while (btupInitSize < size)
btupInitSize *= 2;

btup = repalloc(btup, btupInitSize);
memcpy(btup, tup, size);

LockBuffer(buf, BUFFER_LOCK_UNLOCK);
}

Possibly with only doing the repalloc when actually needed.

3) brin_new_memtuple(...)

The changes in this function seem wrong to me. Not only you've removed
the code that initialized all the bv_* attributes, you're also using
palloc() so there could easily be random data. This might be OK for our
code as we call brin_memtuple_initialize() right after this function,
and that seems to do the init, but AFAIK brin_new_memtuple() is a part
of our public API at it's in a header file. And these changes surely
break the contract documented in the comment above the function:

* Create a new BrinMemTuple from scratch, and initialize it to an empty
* state.

So I think this is wrong and needs to be reverted.

It's possible that we'll immediately reset the attributes, but that's
negligible cost - we expect to do brin_memtuple_initialize() many times
for the tuple, so it should not make any difference.

Another thing is that the three arrays (bt_values, bt_allnulls and
bt_hasnulls) may be allocated as a single chunk and then sliced.

char * ptr = palloc0(space for all three arrays);
bt_values = ptr;
bt_allnulls = ptr + (size of bt_values)
bt_bt_hasnulls = ptr + (size of bt_values + bt_allnulls)

which is essentially what the original code did with bv_values. This
reduces palloc() overhead and fragmentation.

4) brin_memtuple_initialize(...)

It's possible that the new code needs to reset more fields, but I don't
really see why it should mess with dtuple->bt_columns[i].bv_values for
example.

5) brin_deform_tuple(..)

The comment before the function probably should explain the purpose of
the new parameter (that it can either be NULL or an existing tuple).

regards

--
Tomas Vondra http://www.2ndQuadrant.com
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#8Michael Paquier
michael.paquier@gmail.com
In reply to: Tomas Vondra (#7)
Re: Patch: Optimize memory allocation in function 'bringetbitmap'

On Tue, Dec 15, 2015 at 10:41 AM, Tomas Vondra
<tomas.vondra@2ndquadrant.com> wrote:

On 10/16/2015 08:00 PM, Jinyu Zhang wrote:

Update the patch_brin_optimze_mem according to your comment.

I've reviewed the last version of the patch, and I do have a few comments. I
haven't done any performance testing at this point, as the machine I'm using
for that is chewing on something else at the moment.

I have switched the patch as returned with feedback for now based on
the review of Tomas that is still waiting some input from the author.
Jinyu, if you are still planning to work on that, feel free to move
your patch to the next commit fest 2016-01. Thanks!
--
Michael

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#9Alvaro Herrera
alvherre@2ndquadrant.com
In reply to: Jinyu Zhang (#6)
Re: Patch: Optimize memory allocation in function 'bringetbitmap'

Jinyu Zhang wrote:

Update the patch_brin_optimze_mem according to your comment.

I have added this patch to the commitfest, which I've been intending to
get in for a long time. I'll be submitting an updated patch soon.

--
�lvaro Herrera https://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#10Alvaro Herrera
alvherre@2ndquadrant.com
In reply to: Alvaro Herrera (#9)
1 attachment(s)
Re: Patch: Optimize memory allocation in function 'bringetbitmap'

Alvaro Herrera wrote:

Jinyu Zhang wrote:

Update the patch_brin_optimze_mem according to your comment.

I have added this patch to the commitfest, which I've been intending to
get in for a long time. I'll be submitting an updated patch soon.

Here it is. I addressed some of Tomas' comments, but not all (so this
is mostly just a rebased on Jinyu's original submission).

--
�lvaro Herrera https://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services

Attachments:

brin_optimize_mem-3.patchtext/plain; charset=us-asciiDownload
diff --git a/contrib/pageinspect/brinfuncs.c b/contrib/pageinspect/brinfuncs.c
index 2c7963e..dc9cc2d 100644
--- a/contrib/pageinspect/brinfuncs.c
+++ b/contrib/pageinspect/brinfuncs.c
@@ -226,7 +226,8 @@ brin_page_items(PG_FUNCTION_ARGS)
 			if (ItemIdIsUsed(itemId))
 			{
 				dtup = brin_deform_tuple(bdesc,
-									(BrinTuple *) PageGetItem(page, itemId));
+									(BrinTuple *) PageGetItem(page, itemId),
+									NULL);
 				attno = 1;
 				unusedItem = false;
 			}
diff --git a/src/backend/access/brin/brin.c b/src/backend/access/brin/brin.c
index b22563b..04c50fe 100644
--- a/src/backend/access/brin/brin.c
+++ b/src/backend/access/brin/brin.c
@@ -182,7 +182,7 @@ brininsert(Relation idxRel, Datum *values, bool *nulls,
 			MemoryContextSwitchTo(tupcxt);
 		}
 
-		dtup = brin_deform_tuple(bdesc, brtup);
+		dtup = brin_deform_tuple(bdesc, brtup, NULL);
 
 		/*
 		 * Compare the key values of the new tuple to the stored index values;
@@ -328,6 +328,9 @@ bringetbitmap(IndexScanDesc scan, TIDBitmap *tbm)
 	FmgrInfo   *consistentFn;
 	MemoryContext oldcxt;
 	MemoryContext perRangeCxt;
+	BrinMemTuple *dtup;
+	BrinTuple    *btup;
+	uint32	     btupInitSize;
 
 	opaque = (BrinOpaque *) scan->opaque;
 	bdesc = opaque->bo_bdesc;
@@ -348,6 +351,13 @@ bringetbitmap(IndexScanDesc scan, TIDBitmap *tbm)
 	 * key reference each of them.  We rely on zeroing fn_oid to InvalidOid.
 	 */
 	consistentFn = palloc0(sizeof(FmgrInfo) * bdesc->bd_tupdesc->natts);
+	dtup = brin_new_memtuple(bdesc);
+
+	/*
+	 * Estimate the approximate size of btup and allocate memory for btup.
+	 */
+	btupInitSize = bdesc->bd_tupdesc->natts * 16;
+	btup = palloc(btupInitSize);
 
 	/*
 	 * Setup and use a per-range memory context, which is reset every time we
@@ -379,7 +389,10 @@ bringetbitmap(IndexScanDesc scan, TIDBitmap *tbm)
 									   scan->xs_snapshot);
 		if (tup)
 		{
-			tup = brin_copy_tuple(tup, size);
+			if(size <= btupInitSize)
+				memcpy(btup, tup, size);
+			else
+				btup = brin_copy_tuple(tup, size);
 			LockBuffer(buf, BUFFER_LOCK_UNLOCK);
 		}
 
@@ -387,15 +400,13 @@ bringetbitmap(IndexScanDesc scan, TIDBitmap *tbm)
 		 * For page ranges with no indexed tuple, we must return the whole
 		 * range; otherwise, compare it to the scan keys.
 		 */
-		if (tup == NULL)
+		if (btup == NULL)
 		{
 			addrange = true;
 		}
 		else
 		{
-			BrinMemTuple *dtup;
-
-			dtup = brin_deform_tuple(bdesc, tup);
+			dtup = brin_deform_tuple(bdesc, btup, dtup);
 			if (dtup->bt_placeholder)
 			{
 				/*
@@ -1175,7 +1186,7 @@ union_tuples(BrinDesc *bdesc, BrinMemTuple *a, BrinTuple *b)
 								"brin union",
 								ALLOCSET_DEFAULT_SIZES);
 	oldcxt = MemoryContextSwitchTo(cxt);
-	db = brin_deform_tuple(bdesc, b);
+	db = brin_deform_tuple(bdesc, b, NULL);
 	MemoryContextSwitchTo(oldcxt);
 
 	for (keyno = 0; keyno < bdesc->bd_tupdesc->natts; keyno++)
diff --git a/src/backend/access/brin/brin_tuple.c b/src/backend/access/brin/brin_tuple.c
index ec5fc56..20b0079 100644
--- a/src/backend/access/brin/brin_tuple.c
+++ b/src/backend/access/brin/brin_tuple.c
@@ -348,54 +348,69 @@ BrinMemTuple *
 brin_new_memtuple(BrinDesc *brdesc)
 {
 	BrinMemTuple *dtup;
-	char	   *currdatum;
 	long		basesize;
-	int			i;
 
 	basesize = MAXALIGN(sizeof(BrinMemTuple) +
 						sizeof(BrinValues) * brdesc->bd_tupdesc->natts);
 	dtup = palloc0(basesize + sizeof(Datum) * brdesc->bd_totalstored);
-	currdatum = (char *) dtup + basesize;
-	for (i = 0; i < brdesc->bd_tupdesc->natts; i++)
-	{
-		dtup->bt_columns[i].bv_attno = i + 1;
-		dtup->bt_columns[i].bv_allnulls = true;
-		dtup->bt_columns[i].bv_hasnulls = false;
-		dtup->bt_columns[i].bv_values = (Datum *) currdatum;
-		currdatum += sizeof(Datum) * brdesc->bd_info[i]->oi_nstored;
-	}
+
+	dtup->bt_values = palloc(sizeof(Datum) * brdesc->bd_totalstored);
+	dtup->bt_allnulls = palloc(sizeof(bool) * brdesc->bd_tupdesc->natts);
+	dtup->bt_hasnulls = palloc(sizeof(bool) * brdesc->bd_tupdesc->natts);
 
 	dtup->bt_context = AllocSetContextCreate(CurrentMemoryContext,
 											 "brin dtuple",
 											 ALLOCSET_DEFAULT_SIZES);
+
+	brin_memtuple_initialize(dtup, brdesc);
+
 	return dtup;
 }
 
 /*
- * Reset a BrinMemTuple to initial state
+ * Reset a BrinMemTuple to initial state.  We return the same tuple, for
+ * notational convenience.
  */
-void
+BrinMemTuple *
 brin_memtuple_initialize(BrinMemTuple *dtuple, BrinDesc *brdesc)
 {
 	int			i;
+	char	   *currdatum;
 
 	MemoryContextReset(dtuple->bt_context);
+
+	currdatum = (char *) dtuple +
+		MAXALIGN(sizeof(BrinMemTuple) +
+				 sizeof(BrinValues) * brdesc->bd_tupdesc->natts);
 	for (i = 0; i < brdesc->bd_tupdesc->natts; i++)
 	{
 		dtuple->bt_columns[i].bv_allnulls = true;
 		dtuple->bt_columns[i].bv_hasnulls = false;
+
+		dtuple->bt_columns[i].bv_attno = i + 1;
+		dtuple->bt_columns[i].bv_allnulls = true;
+		dtuple->bt_columns[i].bv_hasnulls = false;
+		dtuple->bt_columns[i].bv_values = (Datum *) currdatum;
+		currdatum += sizeof(Datum) * brdesc->bd_info[i]->oi_nstored;
 	}
+
+	return dtuple;
 }
 
 /*
  * Convert a BrinTuple back to a BrinMemTuple.  This is the reverse of
  * brin_form_tuple.
  *
+ * As an optimization, the caller can pass a previously allocated 'dMemtuple'.
+ * This avoids having to allocate it here, which can be useful when this
+ * function is called many times in a loop.  It is caller's responsibility
+ * that the given BrinMemTuple matches what we need here.
+ *
  * Note we don't need the "on disk tupdesc" here; we rely on our own routine to
  * deconstruct the tuple from the on-disk format.
  */
 BrinMemTuple *
-brin_deform_tuple(BrinDesc *brdesc, BrinTuple *tuple)
+brin_deform_tuple(BrinDesc *brdesc, BrinTuple *tuple, BrinMemTuple *dMemtuple)
 {
 	BrinMemTuple *dtup;
 	Datum	   *values;
@@ -407,15 +422,16 @@ brin_deform_tuple(BrinDesc *brdesc, BrinTuple *tuple)
 	int			valueno;
 	MemoryContext oldcxt;
 
-	dtup = brin_new_memtuple(brdesc);
+	dtup = dMemtuple ? brin_memtuple_initialize(dMemtuple, brdesc) :
+		brin_new_memtuple(brdesc);
 
 	if (BrinTupleIsPlaceholder(tuple))
 		dtup->bt_placeholder = true;
 	dtup->bt_blkno = tuple->bt_blkno;
 
-	values = palloc(sizeof(Datum) * brdesc->bd_totalstored);
-	allnulls = palloc(sizeof(bool) * brdesc->bd_tupdesc->natts);
-	hasnulls = palloc(sizeof(bool) * brdesc->bd_tupdesc->natts);
+	values = dtup->bt_values;
+	allnulls = dtup->bt_allnulls;
+	hasnulls = dtup->bt_hasnulls;
 
 	tp = (char *) tuple + BrinTupleDataOffset(tuple);
 
@@ -458,10 +474,6 @@ brin_deform_tuple(BrinDesc *brdesc, BrinTuple *tuple)
 
 	MemoryContextSwitchTo(oldcxt);
 
-	pfree(values);
-	pfree(allnulls);
-	pfree(hasnulls);
-
 	return dtup;
 }
 
diff --git a/src/include/access/brin_tuple.h b/src/include/access/brin_tuple.h
index 6927865..86d8f98 100644
--- a/src/include/access/brin_tuple.h
+++ b/src/include/access/brin_tuple.h
@@ -38,6 +38,11 @@ typedef struct BrinMemTuple
 	bool		bt_placeholder; /* this is a placeholder tuple */
 	BlockNumber bt_blkno;		/* heap blkno that the tuple is for */
 	MemoryContext bt_context;	/* memcxt holding the bt_columns values */
+	/* output arrays for brin_deform_tuple: */
+	Datum		*bt_values;		/* values array */
+	bool		*bt_allnulls;	/* allnulls array */
+	bool		*bt_hasnulls; 	/* hasnulls array */
+	/* not an output array, but must be last */
 	BrinValues	bt_columns[FLEXIBLE_ARRAY_MEMBER];
 } BrinMemTuple;
 
@@ -88,9 +93,9 @@ extern bool brin_tuples_equal(const BrinTuple *a, Size alen,
 				  const BrinTuple *b, Size blen);
 
 extern BrinMemTuple *brin_new_memtuple(BrinDesc *brdesc);
-extern void brin_memtuple_initialize(BrinMemTuple *dtuple,
+extern BrinMemTuple *brin_memtuple_initialize(BrinMemTuple *dtuple,
 						 BrinDesc *brdesc);
 extern BrinMemTuple *brin_deform_tuple(BrinDesc *brdesc,
-				  BrinTuple *tuple);
+				  BrinTuple *tuple, BrinMemTuple *dMemtuple);
 
 #endif   /* BRIN_TUPLE_H */
#11Robert Haas
robertmhaas@gmail.com
In reply to: Alvaro Herrera (#10)
Re: Patch: Optimize memory allocation in function 'bringetbitmap'

On Wed, Mar 1, 2017 at 11:28 PM, Alvaro Herrera
<alvherre@2ndquadrant.com> wrote:

Alvaro Herrera wrote:

Jinyu Zhang wrote:

Update the patch_brin_optimze_mem according to your comment.

I have added this patch to the commitfest, which I've been intending to
get in for a long time. I'll be submitting an updated patch soon.

Here it is. I addressed some of Tomas' comments, but not all (so this
is mostly just a rebased on Jinyu's original submission).

I think we should regard this resubmission as untimely, unless there
is an argument that it amounts to a bug fix.

--
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#12David Steele
david@pgmasters.net
In reply to: Robert Haas (#11)
Re: Patch: Optimize memory allocation in function 'bringetbitmap'

On 3/4/17 12:43 AM, Robert Haas wrote:

On Wed, Mar 1, 2017 at 11:28 PM, Alvaro Herrera
<alvherre@2ndquadrant.com> wrote:

Alvaro Herrera wrote:

Jinyu Zhang wrote:

Update the patch_brin_optimze_mem according to your comment.

I have added this patch to the commitfest, which I've been intending to
get in for a long time. I'll be submitting an updated patch soon.

Here it is. I addressed some of Tomas' comments, but not all (so this
is mostly just a rebased on Jinyu's original submission).

I think we should regard this resubmission as untimely, unless there
is an argument that it amounts to a bug fix.

I agree and I'm also confused about which author this is waiting on. Is
it Jinyu or Álvaro?

Thanks,
--
-David
david@pgmasters.net

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#13Alvaro Herrera
alvherre@2ndquadrant.com
In reply to: David Steele (#12)
Re: Patch: Optimize memory allocation in function 'bringetbitmap'

David Steele wrote:

On 3/4/17 12:43 AM, Robert Haas wrote:

I think we should regard this resubmission as untimely, unless there
is an argument that it amounts to a bug fix.

I agree and I'm also confused about which author this is waiting on. Is
it Jinyu or �lvaro?

I don't think Jinyu is interested in the project anymore, so it's me
that this is waiting on.

I intend to push this at some point before feature freeze.

--
�lvaro Herrera https://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#14Alvaro Herrera
alvherre@2ndquadrant.com
In reply to: Tomas Vondra (#7)
Re: Patch: Optimize memory allocation in function 'bringetbitmap'

Tomas Vondra wrote:

1) bringetbitmap(PG_FUNCTION_ARGS)

+ /*
+  * Estimate the approximate size of btup and allocate memory for btup.
+  */
+ btupInitSize = bdesc->bd_tupdesc->natts * 16;
+ btup = palloc(btupInitSize);

What is the reasoning behind this estimate? I mean, this only accounts for
the values, not the NULL bitmap or BrinTuple fields. Maybe it does not
really matter much, but I'd like to see some brief description in the docs,
please.

This also interacts with our memory allocator in a rather annoying way,
because palloc() always allocates memory in 2^k chunks, but sadly the
estimate ignores that. So for natts=3 we get btupInitSize=48, but internally
allocate 64B (and ignore the top 16B).

I fixed this point (and the following one, which is essentially
complaining about the same thing) by changing the API of
brin_copy_tuple, similarly to the changes we made to brin_deform_tuple:
pass the previously allocated memory (along with its size) as arguments,
so that it can be repalloc'ed if necessary, or just re-used as-is.

That seemed the most effective way to solve the points you raised.

Your review was extremely useful, many thanks.

In a extremely simpleminded test to measure the benefit, I did this:

create table t (a int, b bigint, c bigint) ;
insert into t select g, g, g from generate_series(1, 10000 * 1000) g;
-- generates about 800 MB of data; fits in shared_buffers
create index ti on t using brin (a, b, c) with (pages_per_range = 1);
-- index contains about 84 revmap pages

and then measured this query:
explain analyze select * from t where b between 245782 and 1256782;
with and without this patch. It goes from 40ms without patch to 25ms
with, which seems a decent enough improvement. (Assertions were
enabled, but I disabled memory clobbering).

--
�lvaro Herrera https://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers