refactor the CopyOneRowTo

Started by jian heover 1 year ago4 messages
#1jian he
jian.universality@gmail.com
1 attachment(s)

original CopyOneRowTo:
https://git.postgresql.org/cgit/postgresql.git/tree/src/backend/commands/copyto.c#n922
I change it to:
-----------------------
if (!cstate->opts.binary)
{
foreach_int(attnum, cstate->attnumlist)
{
Datum value = slot->tts_values[attnum - 1];
bool isnull = slot->tts_isnull[attnum - 1];

if (need_delim)
CopySendChar(cstate, cstate->opts.delim[0]);
need_delim = true;

if (isnull)
CopySendString(cstate, cstate->opts.null_print_client);
else
{
string = OutputFunctionCall(&out_functions[attnum - 1],
value);
if (cstate->opts.csv_mode)
CopyAttributeOutCSV(cstate, string,
cstate->opts.force_quote_flags[attnum - 1]);
else
CopyAttributeOutText(cstate, string);
}
}
}
else
{
foreach_int(attnum, cstate->attnumlist)
{
Datum value = slot->tts_values[attnum - 1];
bool isnull = slot->tts_isnull[attnum - 1];
bytea *outputbytes;

if (isnull)
CopySendInt32(cstate, -1);
else
{
outputbytes = SendFunctionCall(&out_functions[attnum - 1],
value);
CopySendInt32(cstate, VARSIZE(outputbytes) - VARHDRSZ);
CopySendData(cstate, VARDATA(outputbytes),
VARSIZE(outputbytes) - VARHDRSZ);
}
}
}

overall less "if else" logic,
also copy format don't change during copy, we don't need to check
binary or nor for each datum value.

Attachments:

v1-0001-refactor-CopyOneRowTo.patchtext/x-patch; charset=US-ASCII; name=v1-0001-refactor-CopyOneRowTo.patchDownload
From eff237930ddc8727044aff29e5b01499f1b14c77 Mon Sep 17 00:00:00 2001
From: jian he <jian.universality@gmail.com>
Date: Fri, 5 Jul 2024 00:19:04 +0800
Subject: [PATCH v1 1/1] refactor CopyOneRowTo

---
 src/backend/commands/copyto.c | 44 +++++++++++++++++------------------
 1 file changed, 22 insertions(+), 22 deletions(-)

diff --git a/src/backend/commands/copyto.c b/src/backend/commands/copyto.c
index ae8b2e36..ffafe4c7 100644
--- a/src/backend/commands/copyto.c
+++ b/src/backend/commands/copyto.c
@@ -904,7 +904,6 @@ CopyOneRowTo(CopyToState cstate, TupleTableSlot *slot)
 	bool		need_delim = false;
 	FmgrInfo   *out_functions = cstate->out_functions;
 	MemoryContext oldcontext;
-	ListCell   *cur;
 	char	   *string;
 
 	MemoryContextReset(cstate->rowcontext);
@@ -919,30 +918,21 @@ CopyOneRowTo(CopyToState cstate, TupleTableSlot *slot)
 	/* Make sure the tuple is fully deconstructed */
 	slot_getallattrs(slot);
 
-	foreach(cur, cstate->attnumlist)
+	if (!cstate->opts.binary)
 	{
-		int			attnum = lfirst_int(cur);
-		Datum		value = slot->tts_values[attnum - 1];
-		bool		isnull = slot->tts_isnull[attnum - 1];
-
-		if (!cstate->opts.binary)
+		foreach_int(attnum, cstate->attnumlist)
 		{
+			Datum		value = slot->tts_values[attnum - 1];
+			bool		isnull = slot->tts_isnull[attnum - 1];
+
 			if (need_delim)
 				CopySendChar(cstate, cstate->opts.delim[0]);
 			need_delim = true;
-		}
 
-		if (isnull)
-		{
-			if (!cstate->opts.binary)
+			if (isnull)
 				CopySendString(cstate, cstate->opts.null_print_client);
 			else
-				CopySendInt32(cstate, -1);
-		}
-		else
-		{
-			if (!cstate->opts.binary)
-			{
+			{			
 				string = OutputFunctionCall(&out_functions[attnum - 1],
 											value);
 				if (cstate->opts.csv_mode)
@@ -951,15 +941,25 @@ CopyOneRowTo(CopyToState cstate, TupleTableSlot *slot)
 				else
 					CopyAttributeOutText(cstate, string);
 			}
-			else
-			{
-				bytea	   *outputbytes;
+		}
+	}
+	else
+	{
+		foreach_int(attnum, cstate->attnumlist)
+		{
+			Datum		value = slot->tts_values[attnum - 1];
+			bool		isnull = slot->tts_isnull[attnum - 1];
+			bytea	   *outputbytes;
 
+			if (isnull)
+				CopySendInt32(cstate, -1);
+			else
+			{		
 				outputbytes = SendFunctionCall(&out_functions[attnum - 1],
-											   value);
+												value);
 				CopySendInt32(cstate, VARSIZE(outputbytes) - VARHDRSZ);
 				CopySendData(cstate, VARDATA(outputbytes),
-							 VARSIZE(outputbytes) - VARHDRSZ);
+								VARSIZE(outputbytes) - VARHDRSZ);
 			}
 		}
 	}

base-commit: dec9d4acdb7d758c3cbe989ad80cf0367f4e166d
-- 
2.34.1

#2Ilia Evdokimov
ilya.evdokimov@tantorlabs.com
In reply to: jian he (#1)
Re: refactor the CopyOneRowTo

Hi, hackers

I'm sure this refactoring is useful because it's more readable when
datum value is binary or not.

However, I can see a little improvement. We can declare variable 'bytea
*outputbytes' in 'else' because variable is used nowhere except this place.

Regards,

Ilia Evdokimov,

Tantor Labs LLC.

#3Junwang Zhao
zhjwpku@gmail.com
In reply to: jian he (#1)
Re: refactor the CopyOneRowTo

On Fri, Jul 5, 2024 at 12:26 AM jian he <jian.universality@gmail.com> wrote:

original CopyOneRowTo:
https://git.postgresql.org/cgit/postgresql.git/tree/src/backend/commands/copyto.c#n922
I change it to:
-----------------------
if (!cstate->opts.binary)
{
foreach_int(attnum, cstate->attnumlist)
{
Datum value = slot->tts_values[attnum - 1];
bool isnull = slot->tts_isnull[attnum - 1];

if (need_delim)
CopySendChar(cstate, cstate->opts.delim[0]);
need_delim = true;

if (isnull)
CopySendString(cstate, cstate->opts.null_print_client);
else
{
string = OutputFunctionCall(&out_functions[attnum - 1],
value);
if (cstate->opts.csv_mode)
CopyAttributeOutCSV(cstate, string,
cstate->opts.force_quote_flags[attnum - 1]);
else
CopyAttributeOutText(cstate, string);
}
}
}
else
{
foreach_int(attnum, cstate->attnumlist)
{
Datum value = slot->tts_values[attnum - 1];
bool isnull = slot->tts_isnull[attnum - 1];
bytea *outputbytes;

if (isnull)
CopySendInt32(cstate, -1);
else
{
outputbytes = SendFunctionCall(&out_functions[attnum - 1],
value);
CopySendInt32(cstate, VARSIZE(outputbytes) - VARHDRSZ);
CopySendData(cstate, VARDATA(outputbytes),
VARSIZE(outputbytes) - VARHDRSZ);
}
}
}

overall less "if else" logic,
also copy format don't change during copy, we don't need to check
binary or nor for each datum value.

I believe what you proposed is included in the patch 0002
attached in [1]/messages/by-id/20240724.173059.909782980111496972.kou@clear-code.com, but you use foreach_int, which I think is
an improvement.

[1]: /messages/by-id/20240724.173059.909782980111496972.kou@clear-code.com

--
Regards
Junwang Zhao

#4Heikki Linnakangas
hlinnaka@iki.fi
In reply to: Junwang Zhao (#3)
Re: refactor the CopyOneRowTo

On 31/07/2024 16:30, Junwang Zhao wrote:

On Fri, Jul 5, 2024 at 12:26 AM jian he <jian.universality@gmail.com> wrote:

overall less "if else" logic,
also copy format don't change during copy, we don't need to check
binary or nor for each datum value.

Committed, thanks.

For the archives: this code is in a very hot path during COPY TO, so I
did some quick micro-benchmarking on my laptop. I used this:

COPY (select
0,1,2,3,4,5,6,7,8,9,10,0,1,2,3,4,5,6,7,8,9,10,0,1,2,3,4,5,6,7,8,9,10,0,1,2,3,4,5,6,7,8,9,10,0,1,2,3,4,5,6,7,8,9,10,0,1,2,3,4,5,6,7,8,9,10,0,1,2,3,4,5,6,7,8,9,10,0,1,2,3,4,5,6,7,8,9,10,0,1,2,3,4,5,6,7,8,9,10,0,1,2,3,4,5,6,7,8,9,10
from generate_series(1, 1_000_000) g) TO '/dev/null';

and

COPY (select 0 from generate_series(1, 1_000_000) g) TO '/dev/null';

to check the impact with few attributes and with many attributes. I
repeated that a few times with \timing with and without the patch, and
eyeballed the result. I also used 'perf' to check the fraction of CPU
time spent in CopyOneRowTo. Conclusion: the patch has no performance impact.

--
Heikki Linnakangas
Neon (https://neon.tech)