Optimization of the alignment padding
Hi Hackers,
After the subtransaction had been added,
the size of HeapTupleHeader became 27 bytes.
This consumes extra bytes per tuple for the alignment padding,
especially on systems where MAXIMUM_ALIGNOF is 8.
This patch optimizes the location of the first field,
and reduces the padding. I expect most rows are saved
about 4 bytes, if the table definition is appropriate.
Following is a bit artificial test:
# CREATE TABLE test (c "char", i int4);
# INSERT INTO test VALUES('A', 1);
# SELECT * FROM pgstattuple('test');
the size of a tuple (8.1.0) is 40 bytes:
[27]: HeapTupleHeader [ 1] c "char" [ 4] i int4
[ 5] (padding)
[ 1] c "char"
[ 3] (padding)
[ 4] i int4
the size of tuple (patched) is 32 bytes
[27]: HeapTupleHeader [ 1] c "char" [ 4] i int4
[ 1] c "char"
[ 4] i int4
Is this effective? Or are there some problems?
I'll appreciate any comments.
Thanks,
---
ITAGAKI Takahiro
NTT Cyber Space Laboratories
Attachments:
table-padding.patchapplication/octet-stream; name=table-padding.patchDownload
diff -cpr pgsql-orig/src/backend/access/common/heaptuple.c pgsql/src/backend/access/common/heaptuple.c
*** pgsql-orig/src/backend/access/common/heaptuple.c 2005-11-09 18:07:14.000000000 +0900
--- pgsql/src/backend/access/common/heaptuple.c 2005-11-09 18:38:50.000000000 +0900
*************** heap_compute_data_size(TupleDesc tupleDe
*** 43,63 ****
Datum *values,
bool *isnull)
{
! Size data_length = 0;
int i;
int numberOfAttributes = tupleDesc->natts;
Form_pg_attribute *att = tupleDesc->attrs;
for (i = 0; i < numberOfAttributes; i++)
{
if (isnull[i])
continue;
data_length = att_align(data_length, att[i]->attalign);
data_length = att_addlength(data_length, att[i]->attlen, values[i]);
}
! return data_length;
}
/* ----------------
--- 43,95 ----
Datum *values,
bool *isnull)
{
! Size start;
! Size end;
! heap_compute_data_size_aligned(tupleDesc, values, isnull, 0, &start, &end);
! return end - start;
! }
!
! /*
! * heap_compute_data_size_aligned
! * Determine size of the data area of a tuple to be constructed
! */
! void
! heap_compute_data_size_aligned(TupleDesc tupleDesc,
! Datum *values,
! bool *isnull,
! Size offset,
! Size *start,
! Size *end)
! {
! Size data_length = offset;
int i;
int numberOfAttributes = tupleDesc->natts;
Form_pg_attribute *att = tupleDesc->attrs;
+ AssertArg(start);
+ AssertArg(end);
+
for (i = 0; i < numberOfAttributes; i++)
{
if (isnull[i])
continue;
data_length = att_align(data_length, att[i]->attalign);
+ *start = data_length;
+ data_length = att_addlength(data_length, att[i]->attlen, values[i]);
+ i++;
+ break;
+ }
+ for (; i < numberOfAttributes; i++)
+ {
+ if (isnull[i])
+ continue;
+
+ data_length = att_align(data_length, att[i]->attalign);
data_length = att_addlength(data_length, att[i]->attlen, values[i]);
}
! *end = data_length;
}
/* ----------------
*************** heap_compute_data_size(TupleDesc tupleDe
*** 68,95 ****
* OLD API with char 'n'/' ' convention for indicating nulls
* ----------------
*/
! static Size
ComputeDataSize(TupleDesc tupleDesc,
Datum *values,
! char *nulls)
{
! Size data_length = 0;
int i;
int numberOfAttributes = tupleDesc->natts;
Form_pg_attribute *att = tupleDesc->attrs;
for (i = 0; i < numberOfAttributes; i++)
{
if (nulls[i] != ' ')
continue;
data_length = att_align(data_length, att[i]->attalign);
data_length = att_addlength(data_length, att[i]->attlen, values[i]);
}
! return data_length;
}
/*
* heap_fill_tuple
* Load data portion of a tuple from values/isnull arrays
--- 100,145 ----
* OLD API with char 'n'/' ' convention for indicating nulls
* ----------------
*/
! static void
ComputeDataSize(TupleDesc tupleDesc,
Datum *values,
! char *nulls,
! Size offset,
! Size *start,
! Size *end)
{
! Size data_length = offset;
int i;
int numberOfAttributes = tupleDesc->natts;
Form_pg_attribute *att = tupleDesc->attrs;
+ AssertArg(start);
+ AssertArg(end);
+
for (i = 0; i < numberOfAttributes; i++)
{
if (nulls[i] != ' ')
continue;
data_length = att_align(data_length, att[i]->attalign);
+ *start = data_length;
data_length = att_addlength(data_length, att[i]->attlen, values[i]);
+ i++;
+ break;
}
+ for (; i < numberOfAttributes; i++)
+ {
+ if (nulls[i] != ' ')
+ continue;
! data_length = att_align(data_length, att[i]->attalign);
! data_length = att_addlength(data_length, att[i]->attlen, values[i]);
! }
!
! *end = data_length;
}
+
/*
* heap_fill_tuple
* Load data portion of a tuple from values/isnull arrays
*************** heap_form_tuple(TupleDesc tupleDescripto
*** 666,673 ****
{
HeapTuple tuple; /* return tuple */
HeapTupleHeader td; /* tuple data */
! unsigned long len;
! int hoff;
bool hasnull = false;
Form_pg_attribute *att = tupleDescriptor->attrs;
int numberOfAttributes = tupleDescriptor->natts;
--- 716,723 ----
{
HeapTuple tuple; /* return tuple */
HeapTupleHeader td; /* tuple data */
! Size len;
! Size hoff;
bool hasnull = false;
Form_pg_attribute *att = tupleDescriptor->attrs;
int numberOfAttributes = tupleDescriptor->natts;
*************** heap_form_tuple(TupleDesc tupleDescripto
*** 714,724 ****
len += BITMAPLEN(numberOfAttributes);
if (tupleDescriptor->tdhasoid)
len += sizeof(Oid);
! hoff = len = MAXALIGN(len); /* align user data safely */
!
! len += heap_compute_data_size(tupleDescriptor, values, isnull);
/*
* Allocate and zero the space needed. Note that the tuple body and
--- 764,775 ----
len += BITMAPLEN(numberOfAttributes);
if (tupleDescriptor->tdhasoid)
+ {
len += sizeof(Oid);
+ len = TYPEALIGN(ALIGNOF_OID, len);
+ }
! heap_compute_data_size_aligned(tupleDescriptor, values, isnull, len, &hoff, &len);
/*
* Allocate and zero the space needed. Note that the tuple body and
*************** heap_formtuple(TupleDesc tupleDescriptor
*** 774,781 ****
{
HeapTuple tuple; /* return tuple */
HeapTupleHeader td; /* tuple data */
! unsigned long len;
! int hoff;
bool hasnull = false;
Form_pg_attribute *att = tupleDescriptor->attrs;
int numberOfAttributes = tupleDescriptor->natts;
--- 825,832 ----
{
HeapTuple tuple; /* return tuple */
HeapTupleHeader td; /* tuple data */
! Size len;
! Size hoff;
bool hasnull = false;
Form_pg_attribute *att = tupleDescriptor->attrs;
int numberOfAttributes = tupleDescriptor->natts;
*************** heap_formtuple(TupleDesc tupleDescriptor
*** 822,832 ****
len += BITMAPLEN(numberOfAttributes);
if (tupleDescriptor->tdhasoid)
len += sizeof(Oid);
! hoff = len = MAXALIGN(len); /* align user data safely */
!
! len += ComputeDataSize(tupleDescriptor, values, nulls);
/*
* Allocate and zero the space needed. Note that the tuple body and
--- 873,884 ----
len += BITMAPLEN(numberOfAttributes);
if (tupleDescriptor->tdhasoid)
+ {
len += sizeof(Oid);
+ len = TYPEALIGN(ALIGNOF_OID, len);
+ }
! ComputeDataSize(tupleDescriptor, values, nulls, len, &hoff, &len);
/*
* Allocate and zero the space needed. Note that the tuple body and
diff -cpr pgsql-orig/src/include/access/heapam.h pgsql/src/include/access/heapam.h
*** pgsql-orig/src/include/access/heapam.h 2005-11-09 18:07:14.000000000 +0900
--- pgsql/src/include/access/heapam.h 2005-11-09 18:17:06.000000000 +0900
*************** extern XLogRecPtr log_heap_move(Relation
*** 188,193 ****
--- 188,196 ----
/* in common/heaptuple.c */
extern Size heap_compute_data_size(TupleDesc tupleDesc,
Datum *values, bool *isnull);
+ extern void heap_compute_data_size_aligned(TupleDesc tupleDesc,
+ Datum *values, bool *isnull,
+ Size offset, Size *start, Size *end);
extern void heap_fill_tuple(TupleDesc tupleDesc,
Datum *values, bool *isnull,
char *data, uint16 *infomask, bits8 *bit);
diff -cpr pgsql-orig/src/include/postgres_ext.h pgsql/src/include/postgres_ext.h
*** pgsql-orig/src/include/postgres_ext.h 2005-11-09 18:07:14.000000000 +0900
--- pgsql/src/include/postgres_ext.h 2005-11-09 18:12:24.000000000 +0900
*************** typedef unsigned int Oid;
*** 35,40 ****
--- 35,41 ----
#endif
#define OID_MAX UINT_MAX
+ #define ALIGNOF_OID ALIGNOF_INT
/* you will need to include <limits.h> to use the above #define */
ITAGAKI Takahiro wrote:
After the subtransaction had been added,
the size of HeapTupleHeader became 27 bytes.
This consumes extra bytes per tuple for the alignment padding,
especially on systems where MAXIMUM_ALIGNOF is 8.
There was a discussion during the 8.1 devel cycle about shortening the
HeapTupleHeader struct. It involved some games with the command Ids.
Maybe you'll want to look at that, as it could have an impact on what
you're trying to do here. It reduced the size of the header by 4 bytes.
There was even a detailed design posted by Tom, I see you were copied on
it:
From: Tom Lane <tgl@sss.pgh.pa.us>
To: Alvaro Herrera <alvherre@alvh.no-ip.org>
Cc: "Jim C. Nasby" <jnasby@pervasive.com>,
Bruce Momjian <pgman@candle.pha.pa.us>,
ITAGAKI Takahiro <itagaki.takahiro@lab.ntt.co.jp>,
pgsql-hackers@postgresql.org, mkoi-pg@aon.at
Date: Wed, 07 Sep 2005 13:38:07 -0400
Subject: Re: [HACKERS] Remove xmin and cmin from frozen tuples
--
Alvaro Herrera http://www.CommandPrompt.com/
The PostgreSQL Company - Command Prompt, Inc.
Alvaro Herrera <alvherre@commandprompt.com> writes:
There was a discussion during the 8.1 devel cycle about shortening the
HeapTupleHeader struct. It involved some games with the command Ids.
Maybe you'll want to look at that, as it could have an impact on what
you're trying to do here.
It would, in fact, largely eliminate the point of this patch, since the
standard header size would go back to being a multiple of 8.
I believe the patch is a bad idea as proposed, even if it works at all
(have you tested it on machines that enforce alignment?) The reason is
that if the start of the tuple data area isn't necessarily at a MAXALIGN
boundary, then the internal padding within the tuple depends on what
alignment the start was at --- consider the case where a double-aligned
field follows some fields that have lesser alignment. This is going to
vastly complicate matters, because it will not be possible to lay out
the tuple contents without first figuring out exactly what the header looks
like --- ie, whether there's a null bitmap (and how long the bitmap is),
whether there's an OID, and so on. It will probably actually break some
places, because I think we sometimes attach a data area to a separately
created header.
regards, tom lane
Tom Lane <tgl@sss.pgh.pa.us> wrote:
Alvaro Herrera <alvherre@commandprompt.com> writes:
There was a discussion during the 8.1 devel cycle about shortening the
HeapTupleHeader struct.It would, in fact, largely eliminate the point of this patch, since the
standard header size would go back to being a multiple of 8.
Yes, I know the discussion, but I think it and this patch don't conflict.
These spaces don't disappear completely even if the shortening improvement
has done, ex. for tuples that have 2 bytes or more null-bitmaps.
the internal padding within the tuple depends on what
alignment the start was at
Tuple headers must be located at a MAXALIGN boundary,
so internal padding does not change as long as a relative position
between the header and the first field is fixed.
but...
It will probably actually break some
places, because I think we sometimes attach a data area to a separately
created header.
Thanks, I didn't consider it.
I'll check the cases and whether they can be resolved.
---
ITAGAKI Takahiro
NTT Cyber Space Laboratories
There is a long TODO about it:
* Merge xmin/xmax/cmin/cmax back into three header fields
Before subtransactions, there used to be only three fields needed to
store these four values. This was possible because only the current
transaction looks at the cmin/cmax values. If the current transaction
created and expired the row the fields stored where xmin (same as
xmax), cmin, cmax, and if the transaction was expiring a row from a
another transaction, the fields stored were xmin (cmin was not
needed), xmax, and cmax. Such a system worked because a transaction
could only see rows from another completed transaction. However,
subtransactions can see rows from outer transactions, and once the
subtransaction completes, the outer transaction continues, requiring
the storage of all four fields. With subtransactions, an outer
transaction can create a row, a subtransaction expire it, and when the
subtransaction completes, the outer transaction still has to have
proper visibility of the row's cmin, for example, for cursors.
One possible solution is to create a phantom cid which represents a
cmin/cmax pair and is stored in local memory. Another idea is to
store both cmin and cmax only in local memory.
As mentioned before, this saves four bytes in all cases.
---------------------------------------------------------------------------
Alvaro Herrera wrote:
ITAGAKI Takahiro wrote:
After the subtransaction had been added,
the size of HeapTupleHeader became 27 bytes.
This consumes extra bytes per tuple for the alignment padding,
especially on systems where MAXIMUM_ALIGNOF is 8.There was a discussion during the 8.1 devel cycle about shortening the
HeapTupleHeader struct. It involved some games with the command Ids.
Maybe you'll want to look at that, as it could have an impact on what
you're trying to do here. It reduced the size of the header by 4 bytes.There was even a detailed design posted by Tom, I see you were copied on
it:From: Tom Lane <tgl@sss.pgh.pa.us>
To: Alvaro Herrera <alvherre@alvh.no-ip.org>
Cc: "Jim C. Nasby" <jnasby@pervasive.com>,
Bruce Momjian <pgman@candle.pha.pa.us>,
ITAGAKI Takahiro <itagaki.takahiro@lab.ntt.co.jp>,
pgsql-hackers@postgresql.org, mkoi-pg@aon.at
Date: Wed, 07 Sep 2005 13:38:07 -0400
Subject: Re: [HACKERS] Remove xmin and cmin from frozen tuples--
Alvaro Herrera http://www.CommandPrompt.com/
The PostgreSQL Company - Command Prompt, Inc.---------------------------(end of broadcast)---------------------------
TIP 5: don't forget to increase your free space map settings
--
Bruce Momjian | http://candle.pha.pa.us
pgman@candle.pha.pa.us | (610) 359-1001
+ If your life is a hard drive, | 13 Roberts Road
+ Christ can be your backup. | Newtown Square, Pennsylvania 19073