pg_dump insert with column names speedup

Started by David Rowleyover 12 years ago8 messages
#1David Rowley
dgrowleyml@gmail.com
1 attachment(s)

Here's a small patch which greatly increases the speed of
pg_dump --column-inserts.

Previous to this patch a string was being build for the names of the
columns for each row in the table... I've now changed this to only do this
once for the table.

I also did some clean ups to remove calling the va_args function to write
to the backup file when it was not needed, it instead now uses the puts
type function which should be faster.

Here are some benchmark results on how it performs:

*** patched ***
D:\9.4\bin>timer pg_dump --column-inserts -f d:\postgres_backup.sql postgres
Ran for 5850 ms

D:\9.4\bin>timer pg_dump --column-inserts -f d:\postgres_backup.sql postgres
Ran for 5790 ms

D:\9.4\bin>timer pg_dump --column-inserts -f d:\postgres_backup.sql postgres
Ran for 5700 ms

D:\9.4\bin>timer pg_dump --column-inserts -f d:\postgres_backup.sql postgres
Ran for 5760 ms

D:\9.4\bin>timer pg_dump --column-inserts -f d:\postgres_backup.sql postgres
Ran for 5730 ms

*** unpatched ***

D:\9.4\bin>timer pg_dump --column-inserts -f d:\postgres_backup.sql postgres
Ran for 9580 ms

D:\9.4\bin>timer pg_dump --column-inserts -f d:\postgres_backup.sql postgres
Ran for 9330 ms

D:\9.4\bin>timer pg_dump --column-inserts -f d:\postgres_backup.sql postgres
Ran for 9250 ms

D:\9.4\bin>timer pg_dump --column-inserts -f d:\postgres_backup.sql postgres
Ran for 9230 ms

D:\9.4\bin>timer pg_dump --column-inserts -f d:\postgres_backup.sql postgres
Ran for 9240 ms

The database contained a single 1 million record table with 6 columns.

Regards

David Rowley

Attachments:

pg_dump_colinsert_v0.1.patchapplication/octet-stream; name=pg_dump_colinsert_v0.1.patchDownload
diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c
index 57320cc..696cc83 100644
--- a/src/bin/pg_dump/pg_dump.c
+++ b/src/bin/pg_dump/pg_dump.c
@@ -1550,6 +1550,7 @@ dumpTableData_insert(Archive *fout, void *dcontext)
 	TableInfo  *tbinfo = tdinfo->tdtable;
 	const char *classname = tbinfo->dobj.name;
 	PQExpBuffer q = createPQExpBuffer();
+	PQExpBuffer collist = NULL;
 	PGresult   *res;
 	int			tuple;
 	int			nfields;
@@ -1595,30 +1596,34 @@ dumpTableData_insert(Archive *fout, void *dcontext)
 			if (nfields == 0)
 			{
 				/* corner case for zero-column table */
-				archprintf(fout, "DEFAULT VALUES;\n");
+				archputs("DEFAULT VALUES;\n", fout);
 				continue;
 			}
 			if (column_inserts)
 			{
-				resetPQExpBuffer(q);
-				appendPQExpBuffer(q, "(");
-				for (field = 0; field < nfields; field++)
+				/* build the list of column names if required */
+				if (collist == NULL)
 				{
-					if (field > 0)
-						appendPQExpBuffer(q, ", ");
-					appendPQExpBufferStr(q, fmtId(PQfname(res, field)));
+					collist = createPQExpBuffer();
+					appendPQExpBuffer(collist, "(");
+					for (field = 0; field < nfields; field++)
+					{
+						if (field > 0)
+							appendPQExpBuffer(collist, ", ");
+						appendPQExpBufferStr(collist, fmtId(PQfname(res, field)));
+					}
+					appendPQExpBuffer(collist, ") ");
 				}
-				appendPQExpBuffer(q, ") ");
-				archputs(q->data, fout);
+				archputs(collist->data, fout);
 			}
-			archprintf(fout, "VALUES (");
+			archputs("VALUES (", fout);
 			for (field = 0; field < nfields; field++)
 			{
 				if (field > 0)
-					archprintf(fout, ", ");
+					archputs(", ", fout);
 				if (PQgetisnull(res, tuple, field))
 				{
-					archprintf(fout, "NULL");
+					archputs("NULL", fout);
 					continue;
 				}
 
@@ -1647,7 +1652,7 @@ dumpTableData_insert(Archive *fout, void *dcontext)
 							const char *s = PQgetvalue(res, tuple, field);
 
 							if (strspn(s, "0123456789 +-eE.") == strlen(s))
-								archprintf(fout, "%s", s);
+								archputs(s, fout);
 							else
 								archprintf(fout, "'%s'", s);
 						}
@@ -1661,9 +1666,9 @@ dumpTableData_insert(Archive *fout, void *dcontext)
 
 					case BOOLOID:
 						if (strcmp(PQgetvalue(res, tuple, field), "t") == 0)
-							archprintf(fout, "true");
+							archputs("true", fout);
 						else
-							archprintf(fout, "false");
+							archputs("false", fout);
 						break;
 
 					default:
@@ -1676,7 +1681,7 @@ dumpTableData_insert(Archive *fout, void *dcontext)
 						break;
 				}
 			}
-			archprintf(fout, ");\n");
+			archputs(");\n", fout);
 		}
 
 		if (PQntuples(res) <= 0)
@@ -1687,11 +1692,15 @@ dumpTableData_insert(Archive *fout, void *dcontext)
 		PQclear(res);
 	}
 
-	archprintf(fout, "\n\n");
+	archputs("\n\n", fout);
 
 	ExecuteSqlStatement(fout, "CLOSE _pg_dump_cursor");
 
 	destroyPQExpBuffer(q);
+
+	if (collist != NULL)
+		destroyPQExpBuffer(collist);
+
 	return 1;
 }
 
#2Tom Lane
tgl@sss.pgh.pa.us
In reply to: David Rowley (#1)
Re: pg_dump insert with column names speedup

David Rowley <dgrowleyml@gmail.com> writes:

Here's a small patch which greatly increases the speed of
pg_dump --column-inserts.

The reason why no one's paid any attention to the speed of that code path
is that if you care about dump/restore speed, you should be using the COPY
code paths instead. Is it really worth adding code and complexity to
pg_dump for this?

regards, tom lane

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#3Robert Haas
robertmhaas@gmail.com
In reply to: Tom Lane (#2)
Re: pg_dump insert with column names speedup

On Fri, Oct 4, 2013 at 11:28 AM, Tom Lane <tgl@sss.pgh.pa.us> wrote:

David Rowley <dgrowleyml@gmail.com> writes:

Here's a small patch which greatly increases the speed of
pg_dump --column-inserts.

The reason why no one's paid any attention to the speed of that code path
is that if you care about dump/restore speed, you should be using the COPY
code paths instead. Is it really worth adding code and complexity to
pg_dump for this?

One possible reason to care about this is if you're trying to move
data to another database. The INSERT format is more portable.

Also, this isn't really adding any net code or complexity AFAICS.

--
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#4Stephen Frost
sfrost@snowman.net
In reply to: Robert Haas (#3)
Re: pg_dump insert with column names speedup

* Robert Haas (robertmhaas@gmail.com) wrote:

On Fri, Oct 4, 2013 at 11:28 AM, Tom Lane <tgl@sss.pgh.pa.us> wrote:

David Rowley <dgrowleyml@gmail.com> writes:

Here's a small patch which greatly increases the speed of
pg_dump --column-inserts.

The reason why no one's paid any attention to the speed of that code path
is that if you care about dump/restore speed, you should be using the COPY
code paths instead. Is it really worth adding code and complexity to
pg_dump for this?

One possible reason to care about this is if you're trying to move
data to another database. The INSERT format is more portable.

Also, this isn't really adding any net code or complexity AFAICS.

Agreed- this looks more like a "gee, that makes a lot of sense" than a
"wow, that's way more complicated". Not a whole lot of point in
building up a known-to-be-constant string on every iteration of the
loop.

Thanks,

Stephen

#5David Rowley
dgrowleyml@gmail.com
In reply to: Stephen Frost (#4)
1 attachment(s)
Re: pg_dump insert with column names speedup

On Sat, Oct 5, 2013 at 6:39 AM, Stephen Frost <sfrost@snowman.net> wrote:

* Robert Haas (robertmhaas@gmail.com) wrote:

On Fri, Oct 4, 2013 at 11:28 AM, Tom Lane <tgl@sss.pgh.pa.us> wrote:

David Rowley <dgrowleyml@gmail.com> writes:

Here's a small patch which greatly increases the speed of
pg_dump --column-inserts.

The reason why no one's paid any attention to the speed of that code

path

is that if you care about dump/restore speed, you should be using the

COPY

code paths instead. Is it really worth adding code and complexity to
pg_dump for this?

One possible reason to care about this is if you're trying to move
data to another database. The INSERT format is more portable.

Also, this isn't really adding any net code or complexity AFAICS.

Agreed- this looks more like a "gee, that makes a lot of sense" than a
"wow, that's way more complicated". Not a whole lot of point in
building up a known-to-be-constant string on every iteration of the
loop.

These words made me think that the changes I made were not quite enough to
satisfy this what you said.
I understand that most people won't use the --column-inserts feature, but
that's not really a great reason to have not very clever and wasteful code
in there. This fruit was so low hanging it was pretty much touching the
ground, so I couldn't resist fixing it when I saw it.

The attached revised patch goes a little further and prepares everything
that is constant on processing the first row, this now includes the "INSERT
INTO tablename " part. I don't think the changes make the code any harder
to read, the code which builds the staticStmt fits into my small laptop
screen.

The timings with my benchmark look something like:

Unpatched: 9200 ms
Version 0.1: 5700 ms
Version 0.2: 5250 ms

So it does shave off a bit more, for what it's worth.

David

Show quoted text

Thanks,

Stephen

Attachments:

pg_dump_colinsert_v0.2.patchapplication/octet-stream; name=pg_dump_colinsert_v0.2.patchDownload
diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c
index 57320cc..be55aa6 100644
--- a/src/bin/pg_dump/pg_dump.c
+++ b/src/bin/pg_dump/pg_dump.c
@@ -1550,6 +1550,7 @@ dumpTableData_insert(Archive *fout, void *dcontext)
 	TableInfo  *tbinfo = tdinfo->tdtable;
 	const char *classname = tbinfo->dobj.name;
 	PQExpBuffer q = createPQExpBuffer();
+	PQExpBuffer staticStmt = NULL;
 	PGresult   *res;
 	int			tuple;
 	int			nfields;
@@ -1591,34 +1592,55 @@ dumpTableData_insert(Archive *fout, void *dcontext)
 		nfields = PQnfields(res);
 		for (tuple = 0; tuple < PQntuples(res); tuple++)
 		{
-			archprintf(fout, "INSERT INTO %s ", fmtId(classname));
-			if (nfields == 0)
+			/*
+			 * On first loop we build as much of the INSERT statment
+			 * as possible. If the table happens to have 0 columns
+			 * then this will be a complete statement, otherwise
+			 * it will end in "VALUES(" and be ready to have the
+			 * row's column values written.
+			 */
+			if (staticStmt == NULL)
 			{
+				staticStmt = createPQExpBuffer(); 
+				appendPQExpBuffer(staticStmt, "INSERT INTO %s ", fmtId(classname));
+
 				/* corner case for zero-column table */
-				archprintf(fout, "DEFAULT VALUES;\n");
-				continue;
-			}
-			if (column_inserts)
-			{
-				resetPQExpBuffer(q);
-				appendPQExpBuffer(q, "(");
-				for (field = 0; field < nfields; field++)
+				if (nfields == 0)
 				{
-					if (field > 0)
-						appendPQExpBuffer(q, ", ");
-					appendPQExpBufferStr(q, fmtId(PQfname(res, field)));
+					appendPQExpBufferStr(staticStmt, "DEFAULT VALUES;\n");
+				}
+				else 
+				{
+					if (column_inserts) 
+					{
+						/* append the list of column names if required */
+						appendPQExpBufferStr(staticStmt, "(");
+						for (field = 0; field < nfields; field++)
+						{
+							if (field > 0)
+								appendPQExpBufferStr(staticStmt, ", ");
+							appendPQExpBufferStr(staticStmt, fmtId(PQfname(res, field)));
+						}
+						appendPQExpBufferStr(staticStmt, ") ");
+					}
+
+					appendPQExpBufferStr(staticStmt, "VALUES (");
 				}
-				appendPQExpBuffer(q, ") ");
-				archputs(q->data, fout);
 			}
-			archprintf(fout, "VALUES (");
+
+			archputs(staticStmt->data, fout);
+
+			/* if it is 0 column table then skip to the next row */
+			if (nfields == 0)
+				continue;
+
 			for (field = 0; field < nfields; field++)
 			{
 				if (field > 0)
-					archprintf(fout, ", ");
+					archputs(", ", fout);
 				if (PQgetisnull(res, tuple, field))
 				{
-					archprintf(fout, "NULL");
+					archputs("NULL", fout);
 					continue;
 				}
 
@@ -1647,7 +1669,7 @@ dumpTableData_insert(Archive *fout, void *dcontext)
 							const char *s = PQgetvalue(res, tuple, field);
 
 							if (strspn(s, "0123456789 +-eE.") == strlen(s))
-								archprintf(fout, "%s", s);
+								archputs(s, fout);
 							else
 								archprintf(fout, "'%s'", s);
 						}
@@ -1661,9 +1683,9 @@ dumpTableData_insert(Archive *fout, void *dcontext)
 
 					case BOOLOID:
 						if (strcmp(PQgetvalue(res, tuple, field), "t") == 0)
-							archprintf(fout, "true");
+							archputs("true", fout);
 						else
-							archprintf(fout, "false");
+							archputs("false", fout);
 						break;
 
 					default:
@@ -1676,7 +1698,7 @@ dumpTableData_insert(Archive *fout, void *dcontext)
 						break;
 				}
 			}
-			archprintf(fout, ");\n");
+			archputs(");\n", fout);
 		}
 
 		if (PQntuples(res) <= 0)
@@ -1687,11 +1709,15 @@ dumpTableData_insert(Archive *fout, void *dcontext)
 		PQclear(res);
 	}
 
-	archprintf(fout, "\n\n");
+	archputs("\n\n", fout);
 
 	ExecuteSqlStatement(fout, "CLOSE _pg_dump_cursor");
 
 	destroyPQExpBuffer(q);
+
+	if (staticStmt != NULL)
+		destroyPQExpBuffer(staticStmt);
+
 	return 1;
 }
 
#6Peter Eisentraut
peter_e@gmx.net
In reply to: David Rowley (#5)
Re: pg_dump insert with column names speedup

src/bin/pg_dump/pg_dump.c:1604: trailing whitespace.
+ staticStmt = createPQExpBuffer();
src/bin/pg_dump/pg_dump.c:1612: trailing whitespace.
+ else
src/bin/pg_dump/pg_dump.c:1614: trailing whitespace.
+ if (column_inserts)

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#7David Rowley
dgrowleyml@gmail.com
In reply to: Peter Eisentraut (#6)
1 attachment(s)
Re: pg_dump insert with column names speedup

On Sat, Nov 16, 2013 at 9:04 AM, Peter Eisentraut <peter_e@gmx.net> wrote:

src/bin/pg_dump/pg_dump.c:1604: trailing whitespace.
+                               staticStmt = createPQExpBuffer();
src/bin/pg_dump/pg_dump.c:1612: trailing whitespace.
+                               else
src/bin/pg_dump/pg_dump.c:1614: trailing whitespace.
+                                       if (column_inserts)

The tailing white space is fixed in the attached patch.

Regards

David Rowley

Attachments:

pg_dump_colinsert_v0.3.patchapplication/octet-stream; name=pg_dump_colinsert_v0.3.patchDownload
diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c
index 769058d..0659d8e 100644
--- a/src/bin/pg_dump/pg_dump.c
+++ b/src/bin/pg_dump/pg_dump.c
@@ -1550,6 +1550,7 @@ dumpTableData_insert(Archive *fout, void *dcontext)
 	TableInfo  *tbinfo = tdinfo->tdtable;
 	const char *classname = tbinfo->dobj.name;
 	PQExpBuffer q = createPQExpBuffer();
+	PQExpBuffer staticStmt = NULL;
 	PGresult   *res;
 	int			tuple;
 	int			nfields;
@@ -1591,34 +1592,55 @@ dumpTableData_insert(Archive *fout, void *dcontext)
 		nfields = PQnfields(res);
 		for (tuple = 0; tuple < PQntuples(res); tuple++)
 		{
-			archprintf(fout, "INSERT INTO %s ", fmtId(classname));
-			if (nfields == 0)
+			/*
+			 * On first loop we build as much of the INSERT statment
+			 * as possible. If the table happens to have 0 columns
+			 * then this will be a complete statement, otherwise
+			 * it will end in "VALUES(" and be ready to have the
+			 * row's column values written.
+			 */
+			if (staticStmt == NULL)
 			{
+				staticStmt = createPQExpBuffer();
+				appendPQExpBuffer(staticStmt, "INSERT INTO %s ", fmtId(classname));
+
 				/* corner case for zero-column table */
-				archprintf(fout, "DEFAULT VALUES;\n");
-				continue;
-			}
-			if (column_inserts)
-			{
-				resetPQExpBuffer(q);
-				appendPQExpBuffer(q, "(");
-				for (field = 0; field < nfields; field++)
+				if (nfields == 0)
 				{
-					if (field > 0)
-						appendPQExpBuffer(q, ", ");
-					appendPQExpBufferStr(q, fmtId(PQfname(res, field)));
+					appendPQExpBufferStr(staticStmt, "DEFAULT VALUES;\n");
+				}
+				else
+				{
+					if (column_inserts)
+					{
+						/* append the list of column names if required */
+						appendPQExpBufferStr(staticStmt, "(");
+						for (field = 0; field < nfields; field++)
+						{
+							if (field > 0)
+								appendPQExpBufferStr(staticStmt, ", ");
+							appendPQExpBufferStr(staticStmt, fmtId(PQfname(res, field)));
+						}
+						appendPQExpBufferStr(staticStmt, ") ");
+					}
+
+					appendPQExpBufferStr(staticStmt, "VALUES (");
 				}
-				appendPQExpBuffer(q, ") ");
-				archputs(q->data, fout);
 			}
-			archprintf(fout, "VALUES (");
+
+			archputs(staticStmt->data, fout);
+
+			/* if it is 0 column table then skip to the next row */
+			if (nfields == 0)
+				continue;
+
 			for (field = 0; field < nfields; field++)
 			{
 				if (field > 0)
-					archprintf(fout, ", ");
+					archputs(", ", fout);
 				if (PQgetisnull(res, tuple, field))
 				{
-					archprintf(fout, "NULL");
+					archputs("NULL", fout);
 					continue;
 				}
 
@@ -1647,7 +1669,7 @@ dumpTableData_insert(Archive *fout, void *dcontext)
 							const char *s = PQgetvalue(res, tuple, field);
 
 							if (strspn(s, "0123456789 +-eE.") == strlen(s))
-								archprintf(fout, "%s", s);
+								archputs(s, fout);
 							else
 								archprintf(fout, "'%s'", s);
 						}
@@ -1661,9 +1683,9 @@ dumpTableData_insert(Archive *fout, void *dcontext)
 
 					case BOOLOID:
 						if (strcmp(PQgetvalue(res, tuple, field), "t") == 0)
-							archprintf(fout, "true");
+							archputs("true", fout);
 						else
-							archprintf(fout, "false");
+							archputs("false", fout);
 						break;
 
 					default:
@@ -1676,7 +1698,7 @@ dumpTableData_insert(Archive *fout, void *dcontext)
 						break;
 				}
 			}
-			archprintf(fout, ");\n");
+			archputs(");\n", fout);
 		}
 
 		if (PQntuples(res) <= 0)
@@ -1687,11 +1709,15 @@ dumpTableData_insert(Archive *fout, void *dcontext)
 		PQclear(res);
 	}
 
-	archprintf(fout, "\n\n");
+	archputs("\n\n", fout);
 
 	ExecuteSqlStatement(fout, "CLOSE _pg_dump_cursor");
 
 	destroyPQExpBuffer(q);
+
+	if (staticStmt != NULL)
+		destroyPQExpBuffer(staticStmt);
+
 	return 1;
 }
 
#8Tom Lane
tgl@sss.pgh.pa.us
In reply to: David Rowley (#7)
Re: pg_dump insert with column names speedup

David Rowley <dgrowleyml@gmail.com> writes:

The tailing white space is fixed in the attached patch.

Applied with minor cosmetic adjustments.

regards, tom lane

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers