refactor the CopyOneRowTo
original CopyOneRowTo:
https://git.postgresql.org/cgit/postgresql.git/tree/src/backend/commands/copyto.c#n922
I change it to:
-----------------------
if (!cstate->opts.binary)
{
foreach_int(attnum, cstate->attnumlist)
{
Datum value = slot->tts_values[attnum - 1];
bool isnull = slot->tts_isnull[attnum - 1];
if (need_delim)
CopySendChar(cstate, cstate->opts.delim[0]);
need_delim = true;
if (isnull)
CopySendString(cstate, cstate->opts.null_print_client);
else
{
string = OutputFunctionCall(&out_functions[attnum - 1],
value);
if (cstate->opts.csv_mode)
CopyAttributeOutCSV(cstate, string,
cstate->opts.force_quote_flags[attnum - 1]);
else
CopyAttributeOutText(cstate, string);
}
}
}
else
{
foreach_int(attnum, cstate->attnumlist)
{
Datum value = slot->tts_values[attnum - 1];
bool isnull = slot->tts_isnull[attnum - 1];
bytea *outputbytes;
if (isnull)
CopySendInt32(cstate, -1);
else
{
outputbytes = SendFunctionCall(&out_functions[attnum - 1],
value);
CopySendInt32(cstate, VARSIZE(outputbytes) - VARHDRSZ);
CopySendData(cstate, VARDATA(outputbytes),
VARSIZE(outputbytes) - VARHDRSZ);
}
}
}
overall less "if else" logic,
also copy format don't change during copy, we don't need to check
binary or nor for each datum value.
Attachments:
v1-0001-refactor-CopyOneRowTo.patchtext/x-patch; charset=US-ASCII; name=v1-0001-refactor-CopyOneRowTo.patchDownload
From eff237930ddc8727044aff29e5b01499f1b14c77 Mon Sep 17 00:00:00 2001
From: jian he <jian.universality@gmail.com>
Date: Fri, 5 Jul 2024 00:19:04 +0800
Subject: [PATCH v1 1/1] refactor CopyOneRowTo
---
src/backend/commands/copyto.c | 44 +++++++++++++++++------------------
1 file changed, 22 insertions(+), 22 deletions(-)
diff --git a/src/backend/commands/copyto.c b/src/backend/commands/copyto.c
index ae8b2e36..ffafe4c7 100644
--- a/src/backend/commands/copyto.c
+++ b/src/backend/commands/copyto.c
@@ -904,7 +904,6 @@ CopyOneRowTo(CopyToState cstate, TupleTableSlot *slot)
bool need_delim = false;
FmgrInfo *out_functions = cstate->out_functions;
MemoryContext oldcontext;
- ListCell *cur;
char *string;
MemoryContextReset(cstate->rowcontext);
@@ -919,30 +918,21 @@ CopyOneRowTo(CopyToState cstate, TupleTableSlot *slot)
/* Make sure the tuple is fully deconstructed */
slot_getallattrs(slot);
- foreach(cur, cstate->attnumlist)
+ if (!cstate->opts.binary)
{
- int attnum = lfirst_int(cur);
- Datum value = slot->tts_values[attnum - 1];
- bool isnull = slot->tts_isnull[attnum - 1];
-
- if (!cstate->opts.binary)
+ foreach_int(attnum, cstate->attnumlist)
{
+ Datum value = slot->tts_values[attnum - 1];
+ bool isnull = slot->tts_isnull[attnum - 1];
+
if (need_delim)
CopySendChar(cstate, cstate->opts.delim[0]);
need_delim = true;
- }
- if (isnull)
- {
- if (!cstate->opts.binary)
+ if (isnull)
CopySendString(cstate, cstate->opts.null_print_client);
else
- CopySendInt32(cstate, -1);
- }
- else
- {
- if (!cstate->opts.binary)
- {
+ {
string = OutputFunctionCall(&out_functions[attnum - 1],
value);
if (cstate->opts.csv_mode)
@@ -951,15 +941,25 @@ CopyOneRowTo(CopyToState cstate, TupleTableSlot *slot)
else
CopyAttributeOutText(cstate, string);
}
- else
- {
- bytea *outputbytes;
+ }
+ }
+ else
+ {
+ foreach_int(attnum, cstate->attnumlist)
+ {
+ Datum value = slot->tts_values[attnum - 1];
+ bool isnull = slot->tts_isnull[attnum - 1];
+ bytea *outputbytes;
+ if (isnull)
+ CopySendInt32(cstate, -1);
+ else
+ {
outputbytes = SendFunctionCall(&out_functions[attnum - 1],
- value);
+ value);
CopySendInt32(cstate, VARSIZE(outputbytes) - VARHDRSZ);
CopySendData(cstate, VARDATA(outputbytes),
- VARSIZE(outputbytes) - VARHDRSZ);
+ VARSIZE(outputbytes) - VARHDRSZ);
}
}
}
base-commit: dec9d4acdb7d758c3cbe989ad80cf0367f4e166d
--
2.34.1
Hi, hackers
I'm sure this refactoring is useful because it's more readable when
datum value is binary or not.
However, I can see a little improvement. We can declare variable 'bytea
*outputbytes' in 'else' because variable is used nowhere except this place.
Regards,
Ilia Evdokimov,
Tantor Labs LLC.
On Fri, Jul 5, 2024 at 12:26 AM jian he <jian.universality@gmail.com> wrote:
original CopyOneRowTo:
https://git.postgresql.org/cgit/postgresql.git/tree/src/backend/commands/copyto.c#n922
I change it to:
-----------------------
if (!cstate->opts.binary)
{
foreach_int(attnum, cstate->attnumlist)
{
Datum value = slot->tts_values[attnum - 1];
bool isnull = slot->tts_isnull[attnum - 1];if (need_delim)
CopySendChar(cstate, cstate->opts.delim[0]);
need_delim = true;if (isnull)
CopySendString(cstate, cstate->opts.null_print_client);
else
{
string = OutputFunctionCall(&out_functions[attnum - 1],
value);
if (cstate->opts.csv_mode)
CopyAttributeOutCSV(cstate, string,
cstate->opts.force_quote_flags[attnum - 1]);
else
CopyAttributeOutText(cstate, string);
}
}
}
else
{
foreach_int(attnum, cstate->attnumlist)
{
Datum value = slot->tts_values[attnum - 1];
bool isnull = slot->tts_isnull[attnum - 1];
bytea *outputbytes;if (isnull)
CopySendInt32(cstate, -1);
else
{
outputbytes = SendFunctionCall(&out_functions[attnum - 1],
value);
CopySendInt32(cstate, VARSIZE(outputbytes) - VARHDRSZ);
CopySendData(cstate, VARDATA(outputbytes),
VARSIZE(outputbytes) - VARHDRSZ);
}
}
}overall less "if else" logic,
also copy format don't change during copy, we don't need to check
binary or nor for each datum value.
I believe what you proposed is included in the patch 0002
attached in [1]/messages/by-id/20240724.173059.909782980111496972.kou@clear-code.com, but you use foreach_int, which I think is
an improvement.
[1]: /messages/by-id/20240724.173059.909782980111496972.kou@clear-code.com
--
Regards
Junwang Zhao
On 31/07/2024 16:30, Junwang Zhao wrote:
On Fri, Jul 5, 2024 at 12:26 AM jian he <jian.universality@gmail.com> wrote:
overall less "if else" logic,
also copy format don't change during copy, we don't need to check
binary or nor for each datum value.
Committed, thanks.
For the archives: this code is in a very hot path during COPY TO, so I
did some quick micro-benchmarking on my laptop. I used this:
COPY (select
0,1,2,3,4,5,6,7,8,9,10,0,1,2,3,4,5,6,7,8,9,10,0,1,2,3,4,5,6,7,8,9,10,0,1,2,3,4,5,6,7,8,9,10,0,1,2,3,4,5,6,7,8,9,10,0,1,2,3,4,5,6,7,8,9,10,0,1,2,3,4,5,6,7,8,9,10,0,1,2,3,4,5,6,7,8,9,10,0,1,2,3,4,5,6,7,8,9,10,0,1,2,3,4,5,6,7,8,9,10
from generate_series(1, 1_000_000) g) TO '/dev/null';
and
COPY (select 0 from generate_series(1, 1_000_000) g) TO '/dev/null';
to check the impact with few attributes and with many attributes. I
repeated that a few times with \timing with and without the patch, and
eyeballed the result. I also used 'perf' to check the fraction of CPU
time spent in CopyOneRowTo. Conclusion: the patch has no performance impact.
--
Heikki Linnakangas
Neon (https://neon.tech)