pgbench - allow to create partitioned tables

Started by Fabien COELHOover 6 years ago88 messages

coelho@cri.ensmp.fr

over 6 years ago

1 attachment(s)

Hello devs,

While doing some performance tests and reviewing patches, I needed to
create partitioned tables. Given the current syntax this is time
consumming.

The attached patch adds two options to create a partitioned "account"
table in pgbench.

It allows to answer quickly simple questions, eg "what is the overhead of
hash partitioning on a simple select on my laptop"? Answer:

# N=0..?
sh> pgench -i -s 1 --partition-number=$N --partition-type=hash

# then run
sh> pgench -S -M prepared -P 1 -T 10

# and look at latency:
# no parts = 0.071 ms
# 1 hash = 0.071 ms (did someone optimize this case?!)
# 2 hash ~ 0.126 ms (+ 0.055 ms)
# 50 hash ~ 0.155 ms
# 100 hash ~ 0.178 ms
# 150 hash ~ 0.232 ms
# 200 hash ~ 0.279 ms
# overhead ~ (0.050 + [0.0005-0.0008] * nparts) ms

--
Fabien.

Attachments:

pgbench-init-partitioned-1.patchtext/x-diff; name=pgbench-init-partitioned-1.patchDownload

diff --git a/doc/src/sgml/ref/pgbench.sgml b/doc/src/sgml/ref/pgbench.sgml
index 816f9cc4c7..c10789262c 100644
--- a/doc/src/sgml/ref/pgbench.sgml
+++ b/doc/src/sgml/ref/pgbench.sgml
@@ -306,6 +306,32 @@ pgbench <optional> <replaceable>options</replaceable> </optional> <replaceable>d
       </listitem>
      </varlistentry>
 
+     <varlistentry>
+      <term><option>--partition-number=<replaceable>NUM</replaceable></option></term>
+      <listitem>
+       <para>
+        Create a partitioned <literal>pgbench_accounts</literal> table with
+        <replaceable>NUM</replaceable> partitions of nearly equal size for
+        the scaled number of accounts.
+        Default is <literal>0</literal>, meaning no partitioning.
+       </para>
+      </listitem>
+     </varlistentry>
+
+     <varlistentry>
+      <term><option>--partition-type=<replaceable>TYPE</replaceable></option></term>
+      <listitem>
+       <para>
+        Create a partitioned <literal>pgbench_accounts</literal> table of type
+        <replaceable>TYPE</replaceable>.
+        Expected values are <literal>range</literal> or <literal>hash</literal>.
+        This option is only taken into account if
+        <option>--partition-number</option> is non-zero.
+        Default is <literal>range</literal>.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry>
       <term><option>--tablespace=<replaceable>tablespace</replaceable></option></term>
       <listitem>
diff --git a/src/bin/pgbench/pgbench.c b/src/bin/pgbench/pgbench.c
index 570cf3306a..0b262eff13 100644
--- a/src/bin/pgbench/pgbench.c
+++ b/src/bin/pgbench/pgbench.c
@@ -186,6 +186,11 @@ int64		latency_limit = 0;
 char	   *tablespace = NULL;
 char	   *index_tablespace = NULL;
 
+/* partitioning for pgbench_accounts table, 0 for no partitioning */
+int 		partition_number = 0;
+enum { PART_RANGE, PART_HASH }
+			partition_type = PART_RANGE;
+
 /* random seed used to initialize base_random_sequence */
 int64		random_seed = -1;
 
@@ -617,6 +622,8 @@ usage(void)
 		   "  --foreign-keys           create foreign key constraints between tables\n"
 		   "  --index-tablespace=TABLESPACE\n"
 		   "                           create indexes in the specified tablespace\n"
+		   "  --partition-number=NUM   partition account table in NUM parts (defaults: 0)\n"
+		   "  --partition-type=TYPE    partition type (range or hash; default: range)\n"
 		   "  --tablespace=TABLESPACE  create tables in the specified tablespace\n"
 		   "  --unlogged-tables        create tables as unlogged tables\n"
 		   "\nOptions to select what to run:\n"
@@ -3601,6 +3608,17 @@ initDropTables(PGconn *con)
 					 "pgbench_tellers");
 }
 
+/*
+ * add fillfactor percent option if not 100.
+ */
+static void
+append_fillfactor(char *opts, int len)
+{
+	if (fillfactor < 100)
+		snprintf(opts + strlen(opts), len - strlen(opts),
+				 " with (fillfactor=%d)", fillfactor);
+}
+
 /*
  * Create pgbench's standard tables
  */
@@ -3625,6 +3643,7 @@ initCreateTables(PGconn *con)
 		const char *bigcols;	/* column decls if accountIDs are 64 bits */
 		int			declare_fillfactor;
 	};
+
 	static const struct ddlinfo DDLs[] = {
 		{
 			"pgbench_history",
@@ -3651,11 +3670,10 @@ initCreateTables(PGconn *con)
 			1
 		}
 	};
-	int			i;
 
 	fprintf(stderr, "creating tables...\n");
 
-	for (i = 0; i < lengthof(DDLs); i++)
+	for (int i = 0; i < lengthof(DDLs); i++)
 	{
 		char		opts[256];
 		char		buffer[256];
@@ -3664,9 +3682,17 @@ initCreateTables(PGconn *con)
 
 		/* Construct new create table statement. */
 		opts[0] = '\0';
-		if (ddl->declare_fillfactor)
+
+		/* Partition pgbench_accounts table */
+		if (partition_number >= 1 && strcmp(ddl->table, "pgbench_accounts") == 0)
+		{
 			snprintf(opts + strlen(opts), sizeof(opts) - strlen(opts),
-					 " with (fillfactor=%d)", fillfactor);
+					 " partition by %s (aid)",
+					 partition_type == PART_RANGE ? "range" : "hash");
+		}
+		else if (ddl->declare_fillfactor)
+			append_fillfactor(opts, sizeof(opts));
+
 		if (tablespace != NULL)
 		{
 			char	   *escape_tablespace;
@@ -3686,6 +3712,54 @@ initCreateTables(PGconn *con)
 
 		executeStatement(con, buffer);
 	}
+
+	if (partition_number >= 1)
+	{
+		int64		part_size = (naccounts * (int64) scale + partition_number - 1) / partition_number;
+		char		ff[64];
+		ff[0] = '\0';
+		append_fillfactor(ff, sizeof(ff));
+
+		fprintf(stderr, "creating %d partitions...\n", partition_number);
+
+		for (int p = 1; p <= partition_number; p++)
+		{
+			char		query[256];
+
+			if (partition_type == PART_RANGE)
+			{
+				char		minvalue[32], maxvalue[32];
+
+				if (p == 1)
+					sprintf(minvalue, "MINVALUE");
+				else
+					sprintf(minvalue, INT64_FORMAT, (p-1) * part_size + 1);
+
+				if (p < partition_number)
+					sprintf(maxvalue, INT64_FORMAT, p * part_size + 1);
+				else
+					sprintf(maxvalue, "MAXVALUE");
+
+				snprintf(query, sizeof(query),
+						 "create%s table pgbench_accounts_%d\n"
+						 "  partition of pgbench_accounts\n"
+						 "  for values from (%s) to (%s)%s\n",
+						 unlogged_tables ? " unlogged" : "", p,
+						 minvalue, maxvalue, ff);
+			}
+			else if (partition_type == PART_HASH)
+				snprintf(query, sizeof(query),
+						 "create%s table pgbench_accounts_%d\n"
+						 "  partition of pgbench_accounts\n"
+						 "  for values with (modulus %d, remainder %d)%s\n",
+						 unlogged_tables ? " unlogged" : "", p,
+						 partition_number, p-1, ff);
+			else /* cannot get there */
+				Assert(0);
+
+			executeStatement(con, query);
+		}
+	}
 }
 
 /*
@@ -5126,6 +5200,8 @@ main(int argc, char **argv)
 		{"foreign-keys", no_argument, NULL, 8},
 		{"random-seed", required_argument, NULL, 9},
 		{"show-script", required_argument, NULL, 10},
+		{"partition-number", required_argument, NULL, 11},
+		{"partition-type", required_argument, NULL, 12},
 		{NULL, 0, NULL, 0}
 	};
 
@@ -5486,6 +5562,29 @@ main(int argc, char **argv)
 					exit(0);
 				}
 				break;
+			case 11:			/* partition-number */
+				initialization_option_set = true;
+				partition_number = atoi(optarg);
+				if (partition_number < 0)
+				{
+					fprintf(stderr, "invalid number of partitions: \"%s\"\n",
+							optarg);
+					exit(1);
+				}
+				break;
+			case 12:			/* partition-type */
+				initialization_option_set = true;
+				if (strcasecmp(optarg, "range") == 0)
+					partition_type = PART_RANGE;
+				else if (strcasecmp(optarg, "hash") == 0)
+					partition_type = PART_HASH;
+				else
+				{
+					fprintf(stderr, "invalid partition type, expecting \"range\" or \"hash\","
+							" got: \"%s\"\n", optarg);
+					exit(1);
+				}
+				break;
 			default:
 				fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
 				exit(1);
diff --git a/src/bin/pgbench/t/001_pgbench_with_server.pl b/src/bin/pgbench/t/001_pgbench_with_server.pl
index 5a2fdb9acb..0d1fd1f043 100644
--- a/src/bin/pgbench/t/001_pgbench_with_server.pl
+++ b/src/bin/pgbench/t/001_pgbench_with_server.pl
@@ -58,6 +58,18 @@ sub pgbench
 	return;
 }
 
+# tablespace for testing
+my $ts = $node->basedir . '/regress_pgbench_tap_1_ts_dir';
+mkdir $ts or die "cannot create directory $ts";
+
+# escape
+my $ets = $ts;
+$ets =~ s/'/''/;
+
+$node->safe_psql('postgres',
+	"CREATE TABLESPACE regress_pgbench_tap_1_ts LOCATION '$ets';"
+);
+
 # Test concurrent OID generation via pg_enum_oid_index.  This indirectly
 # exercises LWLock and spinlock concurrency.
 my $labels = join ',', map { "'l$_'" } 1 .. 1000;
@@ -98,30 +110,32 @@ pgbench(
 	],
 	'pgbench scale 1 initialization',);
 
-# Again, with all possible options
+# Again, with all possible options but tablespace
 pgbench(
-	'--initialize --init-steps=dtpvg --scale=1 --unlogged-tables --fillfactor=98 --foreign-keys --quiet --tablespace=pg_default --index-tablespace=pg_default',
+	'--initialize --init-steps=dtpvg --scale=1 --unlogged-tables --fillfactor=98 --foreign-keys --quiet --tablespace=regress_pgbench_tap_1_ts --index-tablespace=regress_pgbench_tap_1_ts --partition-number=2 --partition-type=hash',
 	0,
 	[qr{^$}i],
 	[
 		qr{dropping old tables},
 		qr{creating tables},
+		qr{creating 2 partitions},
 		qr{vacuuming},
 		qr{creating primary keys},
 		qr{creating foreign keys},
 		qr{(?!vacuuming)}, # no vacuum
 		qr{done in \d+\.\d\d s }
 	],
-	'pgbench scale 1 initialization');
+	'pgbench scale 1 initialization with options');
 
 # Test interaction of --init-steps with legacy step-selection options
 pgbench(
-	'--initialize --init-steps=dtpvgvv --no-vacuum --foreign-keys --unlogged-tables',
+	'--initialize --init-steps=dtpvgvv --no-vacuum --foreign-keys --unlogged-tables --partition-number=3',
 	0,
 	[qr{^$}],
 	[
 		qr{dropping old tables},
 		qr{creating tables},
+		qr{creating 3 partitions},
 		qr{creating primary keys},
 		qr{.* of .* tuples \(.*\) done},
 		qr{creating foreign keys},
@@ -833,7 +847,6 @@ pgbench(
 	'pgbench throttling');
 
 pgbench(
-
 	# given the expected rate and the 2 ms tx duration, at most one is executed
 	'-t 10 --rate=100000 --latency-limit=1 -n -r',
 	0,
@@ -909,6 +922,8 @@ pgbench(
 check_pgbench_logs($bdir, '001_pgbench_log_3', 1, 10, 10,
 	qr{^\d \d{1,2} \d+ \d \d+ \d+$});
 
+$node->safe_psql('postgres', 'DROP TABLESPACE regress_pgbench_tap_1_ts');
+
 # done
 $node->stop;
 done_testing();
diff --git a/src/bin/pgbench/t/002_pgbench_no_server.pl b/src/bin/pgbench/t/002_pgbench_no_server.pl
index f7fa18418b..fc6bd2e50e 100644
--- a/src/bin/pgbench/t/002_pgbench_no_server.pl
+++ b/src/bin/pgbench/t/002_pgbench_no_server.pl
@@ -157,6 +157,8 @@ my @options = (
 			qr{error while setting random seed from --random-seed option}
 		]
 	],
+	[ 'bad partition type', '-i --partition-type=BAD', [qr{"range"}, qr{"hash"}, qr{"BAD"}] ],
+	[ 'bad partition number', '-i --partition-number -1', [ qr{invalid number of partitions: "-1"} ] ],
 
 	# logging sub-options
 	[

Simon Riggs

simon@2ndquadrant.com

over 6 years ago

In reply to: Fabien COELHO (#1)

Re: pgbench - allow to create partitioned tables

On Tue, 23 Jul 2019 at 19:26, Fabien COELHO <coelho@cri.ensmp.fr> wrote:

Hello devs,

While doing some performance tests and reviewing patches, I needed to
create partitioned tables. Given the current syntax this is time
consumming.

Good idea. I wonder why we didn't have it already.

The attached patch adds two options to create a partitioned "account"
table in pgbench.

It allows to answer quickly simple questions, eg "what is the overhead of
hash partitioning on a simple select on my laptop"? Answer:

# N=0..?
sh> pgench -i -s 1 --partition-number=$N --partition-type=hash

Given current naming of options, I would call this
--partitions=number-of-partitions and --partition-method=hash

# then run
sh> pgench -S -M prepared -P 1 -T 10

# and look at latency:
# no parts = 0.071 ms
# 1 hash = 0.071 ms (did someone optimize this case?!)
# 2 hash ~ 0.126 ms (+ 0.055 ms)
# 50 hash ~ 0.155 ms
# 100 hash ~ 0.178 ms
# 150 hash ~ 0.232 ms
# 200 hash ~ 0.279 ms
# overhead ~ (0.050 + [0.0005-0.0008] * nparts) ms

It is linear?

--
Simon Riggs http://www.2ndQuadrant.com/
<http://www.2ndquadrant.com/>
PostgreSQL Solutions for the Enterprise

Fabien COELHO

coelho@cri.ensmp.fr

over 6 years ago

In reply to: Simon Riggs (#2)

1 attachment(s)

Re: pgbench - allow to create partitioned tables

Hello Simon,

While doing some performance tests and reviewing patches, I needed to
create partitioned tables. Given the current syntax this is time
consumming.

Good idea. I wonder why we didn't have it already.

Probably because I did not have to create partitioned table for some
testing:-)

sh> pgench -i -s 1 --partition-number=$N --partition-type=hash

Given current naming of options, I would call this
--partitions=number-of-partitions and --partition-method=hash

Ok.

# then run
sh> pgench -S -M prepared -P 1 -T 10

# and look at latency:
# no parts = 0.071 ms
# 1 hash = 0.071 ms (did someone optimize this case?!)
# 2 hash ~ 0.126 ms (+ 0.055 ms)
# 50 hash ~ 0.155 ms
# 100 hash ~ 0.178 ms
# 150 hash ~ 0.232 ms
# 200 hash ~ 0.279 ms
# overhead ~ (0.050 + [0.0005-0.0008] * nparts) ms

It is linear?

Good question. I would have hoped affine, but this is not very clear on
these data, which are the median of about five runs, hence the bracket on
the slope factor. At least it is increasing with the number of partitions.
Maybe it would be clearer on the minimum of five runs.

--
Fabien.

Attachments:

pgbench-init-partitioned-2.patchtext/x-diff; name=pgbench-init-partitioned-2.patchDownload

diff --git a/doc/src/sgml/ref/pgbench.sgml b/doc/src/sgml/ref/pgbench.sgml
index 816f9cc4c7..3e8e292e39 100644
--- a/doc/src/sgml/ref/pgbench.sgml
+++ b/doc/src/sgml/ref/pgbench.sgml
@@ -306,6 +306,32 @@ pgbench <optional> <replaceable>options</replaceable> </optional> <replaceable>d
       </listitem>
      </varlistentry>
 
+     <varlistentry>
+      <term><option>--partitions=<replaceable>NUM</replaceable></option></term>
+      <listitem>
+       <para>
+        Create a partitioned <literal>pgbench_accounts</literal> table with
+        <replaceable>NUM</replaceable> partitions of nearly equal size for
+        the scaled number of accounts.
+        Default is <literal>0</literal>, meaning no partitioning.
+       </para>
+      </listitem>
+     </varlistentry>
+
+     <varlistentry>
+      <term><option>--partition-method=<replaceable>NAME</replaceable></option></term>
+      <listitem>
+       <para>
+        Create a partitioned <literal>pgbench_accounts</literal> table with
+        <replaceable>NAME</replaceable> method.
+        Expected values are <literal>range</literal> or <literal>hash</literal>.
+        This option is only taken into account if
+        <option>--partitions</option> is non-zero.
+        Default is <literal>range</literal>.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry>
       <term><option>--tablespace=<replaceable>tablespace</replaceable></option></term>
       <listitem>
diff --git a/src/bin/pgbench/pgbench.c b/src/bin/pgbench/pgbench.c
index 570cf3306a..6819b4e433 100644
--- a/src/bin/pgbench/pgbench.c
+++ b/src/bin/pgbench/pgbench.c
@@ -186,6 +186,11 @@ int64		latency_limit = 0;
 char	   *tablespace = NULL;
 char	   *index_tablespace = NULL;
 
+/* partitioning for pgbench_accounts table, 0 for no partitioning */
+int 		partitions = 0;
+enum { PART_RANGE, PART_HASH }
+			partition_method = PART_RANGE;
+
 /* random seed used to initialize base_random_sequence */
 int64		random_seed = -1;
 
@@ -617,6 +622,9 @@ usage(void)
 		   "  --foreign-keys           create foreign key constraints between tables\n"
 		   "  --index-tablespace=TABLESPACE\n"
 		   "                           create indexes in the specified tablespace\n"
+		   "  --partitions=NUM         partition account table in NUM parts (defaults: 0)\n"
+		   "  --partition-method=(range|hash)\n"
+		   "                           partition account table with this method (default: range)\n"
 		   "  --tablespace=TABLESPACE  create tables in the specified tablespace\n"
 		   "  --unlogged-tables        create tables as unlogged tables\n"
 		   "\nOptions to select what to run:\n"
@@ -3601,6 +3609,17 @@ initDropTables(PGconn *con)
 					 "pgbench_tellers");
 }
 
+/*
+ * add fillfactor percent option if not 100.
+ */
+static void
+append_fillfactor(char *opts, int len)
+{
+	if (fillfactor < 100)
+		snprintf(opts + strlen(opts), len - strlen(opts),
+				 " with (fillfactor=%d)", fillfactor);
+}
+
 /*
  * Create pgbench's standard tables
  */
@@ -3625,6 +3644,7 @@ initCreateTables(PGconn *con)
 		const char *bigcols;	/* column decls if accountIDs are 64 bits */
 		int			declare_fillfactor;
 	};
+
 	static const struct ddlinfo DDLs[] = {
 		{
 			"pgbench_history",
@@ -3651,11 +3671,10 @@ initCreateTables(PGconn *con)
 			1
 		}
 	};
-	int			i;
 
 	fprintf(stderr, "creating tables...\n");
 
-	for (i = 0; i < lengthof(DDLs); i++)
+	for (int i = 0; i < lengthof(DDLs); i++)
 	{
 		char		opts[256];
 		char		buffer[256];
@@ -3664,9 +3683,17 @@ initCreateTables(PGconn *con)
 
 		/* Construct new create table statement. */
 		opts[0] = '\0';
-		if (ddl->declare_fillfactor)
+
+		/* Partition pgbench_accounts table */
+		if (partitions >= 1 && strcmp(ddl->table, "pgbench_accounts") == 0)
+		{
 			snprintf(opts + strlen(opts), sizeof(opts) - strlen(opts),
-					 " with (fillfactor=%d)", fillfactor);
+					 " partition by %s (aid)",
+					 partition_method == PART_RANGE ? "range" : "hash");
+		}
+		else if (ddl->declare_fillfactor)
+			append_fillfactor(opts, sizeof(opts));
+
 		if (tablespace != NULL)
 		{
 			char	   *escape_tablespace;
@@ -3686,6 +3713,54 @@ initCreateTables(PGconn *con)
 
 		executeStatement(con, buffer);
 	}
+
+	if (partitions >= 1)
+	{
+		int64		part_size = (naccounts * (int64) scale + partitions - 1) / partitions;
+		char		ff[64];
+		ff[0] = '\0';
+		append_fillfactor(ff, sizeof(ff));
+
+		fprintf(stderr, "creating %d partitions...\n", partitions);
+
+		for (int p = 1; p <= partitions; p++)
+		{
+			char		query[256];
+
+			if (partition_method == PART_RANGE)
+			{
+				char		minvalue[32], maxvalue[32];
+
+				if (p == 1)
+					sprintf(minvalue, "MINVALUE");
+				else
+					sprintf(minvalue, INT64_FORMAT, (p-1) * part_size + 1);
+
+				if (p < partitions)
+					sprintf(maxvalue, INT64_FORMAT, p * part_size + 1);
+				else
+					sprintf(maxvalue, "MAXVALUE");
+
+				snprintf(query, sizeof(query),
+						 "create%s table pgbench_accounts_%d\n"
+						 "  partition of pgbench_accounts\n"
+						 "  for values from (%s) to (%s)%s\n",
+						 unlogged_tables ? " unlogged" : "", p,
+						 minvalue, maxvalue, ff);
+			}
+			else if (partition_method == PART_HASH)
+				snprintf(query, sizeof(query),
+						 "create%s table pgbench_accounts_%d\n"
+						 "  partition of pgbench_accounts\n"
+						 "  for values with (modulus %d, remainder %d)%s\n",
+						 unlogged_tables ? " unlogged" : "", p,
+						 partitions, p-1, ff);
+			else /* cannot get there */
+				Assert(0);
+
+			executeStatement(con, query);
+		}
+	}
 }
 
 /*
@@ -5126,6 +5201,8 @@ main(int argc, char **argv)
 		{"foreign-keys", no_argument, NULL, 8},
 		{"random-seed", required_argument, NULL, 9},
 		{"show-script", required_argument, NULL, 10},
+		{"partitions", required_argument, NULL, 11},
+		{"partition-method", required_argument, NULL, 12},
 		{NULL, 0, NULL, 0}
 	};
 
@@ -5486,6 +5563,29 @@ main(int argc, char **argv)
 					exit(0);
 				}
 				break;
+			case 11:			/* partition-number */
+				initialization_option_set = true;
+				partitions = atoi(optarg);
+				if (partitions < 0)
+				{
+					fprintf(stderr, "invalid number of partitions: \"%s\"\n",
+							optarg);
+					exit(1);
+				}
+				break;
+			case 12:			/* partition-type */
+				initialization_option_set = true;
+				if (strcasecmp(optarg, "range") == 0)
+					partition_method = PART_RANGE;
+				else if (strcasecmp(optarg, "hash") == 0)
+					partition_method = PART_HASH;
+				else
+				{
+					fprintf(stderr, "invalid partition type, expecting \"range\" or \"hash\","
+							" got: \"%s\"\n", optarg);
+					exit(1);
+				}
+				break;
 			default:
 				fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
 				exit(1);
diff --git a/src/bin/pgbench/t/001_pgbench_with_server.pl b/src/bin/pgbench/t/001_pgbench_with_server.pl
index 5a2fdb9acb..ef6aafb3f9 100644
--- a/src/bin/pgbench/t/001_pgbench_with_server.pl
+++ b/src/bin/pgbench/t/001_pgbench_with_server.pl
@@ -58,6 +58,18 @@ sub pgbench
 	return;
 }
 
+# tablespace for testing
+my $ts = $node->basedir . '/regress_pgbench_tap_1_ts_dir';
+mkdir $ts or die "cannot create directory $ts";
+
+# escape
+my $ets = $ts;
+$ets =~ s/'/''/;
+
+$node->safe_psql('postgres',
+	"CREATE TABLESPACE regress_pgbench_tap_1_ts LOCATION '$ets';"
+);
+
 # Test concurrent OID generation via pg_enum_oid_index.  This indirectly
 # exercises LWLock and spinlock concurrency.
 my $labels = join ',', map { "'l$_'" } 1 .. 1000;
@@ -98,30 +110,32 @@ pgbench(
 	],
 	'pgbench scale 1 initialization',);
 
-# Again, with all possible options
+# Again, with all possible options but tablespace
 pgbench(
-	'--initialize --init-steps=dtpvg --scale=1 --unlogged-tables --fillfactor=98 --foreign-keys --quiet --tablespace=pg_default --index-tablespace=pg_default',
+	'--initialize --init-steps=dtpvg --scale=1 --unlogged-tables --fillfactor=98 --foreign-keys --quiet --tablespace=regress_pgbench_tap_1_ts --index-tablespace=regress_pgbench_tap_1_ts --partitions=2 --partition-method=hash',
 	0,
 	[qr{^$}i],
 	[
 		qr{dropping old tables},
 		qr{creating tables},
+		qr{creating 2 partitions},
 		qr{vacuuming},
 		qr{creating primary keys},
 		qr{creating foreign keys},
 		qr{(?!vacuuming)}, # no vacuum
 		qr{done in \d+\.\d\d s }
 	],
-	'pgbench scale 1 initialization');
+	'pgbench scale 1 initialization with options');
 
 # Test interaction of --init-steps with legacy step-selection options
 pgbench(
-	'--initialize --init-steps=dtpvgvv --no-vacuum --foreign-keys --unlogged-tables',
+	'--initialize --init-steps=dtpvgvv --no-vacuum --foreign-keys --unlogged-tables --partitions=3',
 	0,
 	[qr{^$}],
 	[
 		qr{dropping old tables},
 		qr{creating tables},
+		qr{creating 3 partitions},
 		qr{creating primary keys},
 		qr{.* of .* tuples \(.*\) done},
 		qr{creating foreign keys},
@@ -833,7 +847,6 @@ pgbench(
 	'pgbench throttling');
 
 pgbench(
-
 	# given the expected rate and the 2 ms tx duration, at most one is executed
 	'-t 10 --rate=100000 --latency-limit=1 -n -r',
 	0,
@@ -909,6 +922,8 @@ pgbench(
 check_pgbench_logs($bdir, '001_pgbench_log_3', 1, 10, 10,
 	qr{^\d \d{1,2} \d+ \d \d+ \d+$});
 
+$node->safe_psql('postgres', 'DROP TABLESPACE regress_pgbench_tap_1_ts');
+
 # done
 $node->stop;
 done_testing();
diff --git a/src/bin/pgbench/t/002_pgbench_no_server.pl b/src/bin/pgbench/t/002_pgbench_no_server.pl
index f7fa18418b..a097c18ee6 100644
--- a/src/bin/pgbench/t/002_pgbench_no_server.pl
+++ b/src/bin/pgbench/t/002_pgbench_no_server.pl
@@ -157,6 +157,8 @@ my @options = (
 			qr{error while setting random seed from --random-seed option}
 		]
 	],
+	[ 'bad partition type', '-i --partition-method=BAD', [qr{"range"}, qr{"hash"}, qr{"BAD"}] ],
+	[ 'bad partition number', '-i --partitions -1', [ qr{invalid number of partitions: "-1"} ] ],
 
 	# logging sub-options
 	[

Fabien COELHO

coelho@cri.ensmp.fr

over 6 years ago

In reply to: Fabien COELHO (#3)

1 attachment(s)

Re: pgbench - allow to create partitioned tables

# and look at latency:
# no parts = 0.071 ms
# 1 hash = 0.071 ms (did someone optimize this case?!)
# 2 hash ~ 0.126 ms (+ 0.055 ms)
# 50 hash ~ 0.155 ms
# 100 hash ~ 0.178 ms
# 150 hash ~ 0.232 ms
# 200 hash ~ 0.279 ms
# overhead ~ (0.050 + [0.0005-0.0008] * nparts) ms

It is linear?

Good question. I would have hoped affine, but this is not very clear on these
data, which are the median of about five runs, hence the bracket on the slope
factor. At least it is increasing with the number of partitions. Maybe it
would be clearer on the minimum of five runs.

Here is a fellow up.

On the minimum of all available runs the query time on hash partitions is
about:

0.64375 nparts + 118.30979 (in ï¿œs).

So the overhead is about 47.30979 + 0.64375 nparts, and it is indeed
pretty convincingly linear as suggested by the attached figure.

--
Fabien.

Attachments:

regression.pngimage/png; name=regression.pngDownload

�PNG


IHDR�y`��	pHYs���+tIME�:����iTXtCommentCreated with GIMPd.e IDATx����sg~&�o��We��:p/�%����m��mOL���O�c��cc�;;�Y����z��>l�[�$Ro\���++3+���@R�D�p=���
�Y@>�f>�+\[�A"b

 ��&Y {��

 � �

 � � �
��X�����#c��

 � �

;�����1��6������Y����l��[YR�x��(�����9�~@ABD$(����,{�VsO|�^��p@�+��k�O7n�w�s����?�����*O����;�����7�o}�M�.�P��������?_��U����}@I�a�s�����y-��,I�0n���.$<gK%c�2)�����;��]��z{��K�����Q���gb�o����R2d]_a{@@�0p:�A��~����3*d���W������W~�e/�[(Y�'tR"�p����_��\��`&����><y�L�o������|�y��B>����7tEy���HV���j���Gze�B��A��?�/?�}���������D,��w��umm�iI����{ri��YQ��[
9W?��v76�S���j�f������[�P�(k����c.�iuVR����Z���r}�U��I�����"�\���;���p��U}�^��4U�I?eV�������w��F�?��$���=�:

�j^V��c��<��H�l&#*E�tL^���7v2�q�P�L����dD���bK����������OyvO�e�2xC�	G�38���Ns+����j��VH�%	�N���d����*����=l�����9J�/��$I��=Q�w������H���:�;�w�X��Z�f�����f���AY"�����=���bk��3Y~5D1�dsI|?q������=l���H$<��|��tZ#gA�&��]��lV��r�RI�t�,�Y�2"�$Qp�G
p�����8a� *��$�����1�X@��l^��?���W�]�a����~������|�(K�$��|w�&��_/eT=W7rC)����c�!k���=��'oB�tZ������|����<�����^.���=���`fz�V�$A������H���D�3
Y)�+��/E8*�R H����2���� �YSF&��8�(d���w_�(��l.����0�1C�{�>��qzbg>���4��G������4�#"R3Z6�#�#8^�K�8x��4���p4�*Q���.����G�.�~|{������Z���2m��N�;������������0���a�8�}�3Q�����j5�X����������b��x/��;�� !A��+qG#��D�j����|���X���g7����3������R&��(�(�EqE�ht��I���g��lF�Y�f��}&C��S7�Y�9��{N��yWW�|>����;p�e�Y"�uG
G
p���0fr&#�t�.���dU�+�����:�z+���}yog������nf%��$���<z4��&�?jz�vge)o�����=lo�$a����G�(����%r����=7�Q�(��zN46b_��0�(I|o4��A���e�Z��2Zn��
���M�8�*�a��U�3���$D^����Q&-���e�C����'��n@@!�$��a+f��)k�2�I����:I��eE�Td��,%�.�l�F�a���yU3f���B��mG�X�����Z�Pu��{��e=���p_�����_�������O~BI����^�~#���:Y�-�������^W���i�2���{�n�m�\?	R�����������g����L]V�=m��7��~�gDI������1T����I������-7,�*��;??y�j�T"Q!���gf..�hses��J7�f�I����h�$U�O.�83[,��io������{/�G���W>]ixL�HJ��ke�����8y����0{�BVv��zy����I�~>X������f���JR�f�Zf�Gs���pv��+n(DD$��Q��w������b��|i����~�x���
U,v,)�>��^�q�K�9��D]"�Zi�v��S�/���W�Pv'g��'�lu���?�����*�mN�Z��c*Q���.����O��#�v���SS9�,��mpD<\�e�j�����D$��"��J/=c|�(�Z9��t��l��G��$�}8mar18pPPP��>M����M�obQ����
�*g�w#(p� �����;pXNX�xW���%���A�PPPPPPP��Pp��K����,��	/*�0�
 � �

 � �
���d�i.Ys���%�w
#(p� �
�����))��4N(f�$��5�1�O����� �'b����#(@D�0%�G|4�����J�)��� "��yl��n4��^���!���>��P�/�9c$4���:#�=d�1O���&������1
�FaB-��l��\�����p�������/?
�q%�
�(�C��F|��~�3�q�,�$l�i�������C@y���n�W:l��n�X�h��R���`���	�D)����,�|���	v9���{:m�o
X��QJFV�Y���T��9U �$$�a}G�=(��<�����
�p�&M�B^MUP3�GE@8����CD�&L���	�d
�H�x���P�t@���N�K�`k��d��v�
������TA4��"�#P���t4E���|(GG��p����{:
��&�����L����� �ez:19�y����	zO�HFaB;[n���\�g|O����8Q�p/���|�����1��B9'.LH��p�{:(G.�1FN���l��nt��n�'JR����S-C����pt<���|8��7�����q3+�����T�D3{��}C@8Lv{:�{��N����C�P��sb^����s;
�A�lOg�a����0]�NV��!���NX��P���9Yy0-��fVE��KF@8����3_~�=�}L<(2����~eY�[� 9h=�}�
Ay�	��VOg�N[2����q�2
b�y�?`=���
A�)#?��~��6������p��t��{!w�����]�tP��������N��e[}�9Uma�|D��A@8Lv{:^��c�����=��Ta2/���tP����z��j����*�L���8a��ZO����h�x~%I�8�QQdY�U%o����g����cr�������8�
�(R����BY*��tP�����,�m4:��q�����l���d���so#���t���H�,L���R��`��q�z:(DD�8����X_��P����R�+�(
��D�7�����8�3ba���3!����2�	
KI{��7�WmN�����7��c$q�������m�tE*���!Z��eHqpP��b�EQ(����[��~85�4�>c�C����4a� �(���h��V3>F\^8����mM0�B�������:m�8_�G���/Q#(oz:(%c
��p��vF^����rjq2g����+_�z��p��8����gn�R;71����Jg�aDI�
�Nc���c
v[iYC�U-����&�8���A@y��O�+�����_����y������G�����.����]�p����77�_��M��>��Js����_����X!�|i�\�������q���&5]""�� .������T�j&�F�7��������(�[��W�_����v<����������/&TU�g3�G#"I8�w�m����+���/O�-[�~�^H�A8X�^�������{����S7�R���R�n��Y��q8�"Q /�6�J���m���h��
I�����|�qY�p�tP���h{����/>_k����vn���
U�A�+����_W~��(�y�U�e���^���>8w�>Ad���x�V��4ZW������3��������	��ww����/���>+�c�~j�h@H�A ���q�����A"�� �\��q�z7��L��[�P>l=�����d�su����V�/]�P��Q3����}������\t� =>��h��O�����.LY�~gR����������Z����������	A��[

��t��������bM����]Sg�k�EIPa���"��le���I]���;0�F��F�y������$�� ��R�j��z�gs��&f��#q�K5����I������I����%���}���{����k������rF�O������Hm8�������O�����w�T��������f,&��aEI��R���'Fn��~s;m9,J��
K�r����CO�H&2U�T��'>�$
�(`<e��q�cO�CNmxw�����6��l+'
�k����Nf�/J��z1�*����U�R���w:�fwht� ���x7�}����z��qC>�y�����*9���T���L2z:(��m���s��s3o=x>��q�����_���_�Z�(���s�j������9�o'�av�dub�fjO6��8�M'V/H<��MG�nOg8��Zl���Um��%M���!Z���I<����}��^>B�P	Q��;w��w�#90m��s�"�O�)�V������%uw&�(l��8b��Z9j'�p���D
5[3������I��b*�*�^��q���>y����v��0H���u���E{rrR������<��}�?�7p��x��q*�������w����A��r[�&TqBu�xL���0��g���J@���WW>]]����
Zyv�:1Y}z�#71{v�����~ZWn�]���~����M��e��f-�}w���$I���2����sCY��t�y��S��M(�v�W������v���~%c��s���b�e�����~��`r]��Cp���q"�i�S�U?&U�r��fE��Z$$.�B�9������IJ�������"/�V��;w���v�g����fk����g�Z�6���&�����K���
�W�e�z/]�����K�jUdq�Q��s�K3J6�d
AJ�8�bb���������z6���|���|
���1�%�hUC��RN!��1���_�����s���������1-�|�����5����U���������f���|'U�r��3�������ya��-��^��D�G�����A��%�(��A�T����k��X������a�!8RpL�W�����m����&�����R�DK�z��2�����#���q�$I������v�,�ll\�y���MG�r����'��E���eY�������sY#W�36ckQ�EQ��(���*�$�#�FQ�D����8��p�OmQT���dH|r�A�0
�9�������M���|.�;k���g�0p�p��h�'z:����������0����]O'JX6��(�����H��$.�w���������B�����s�OU�����E�����YI^�����K��	��8�F
o�I�	Y�M#c�$c�@���X�t��~ReDI����/6o]Y����'*��/\�4W?W3s��OH�3�ik�n��A:bGPv�1I|�q|?HUC�lC�
Ix7�O����3b[6�9�di�t^��z:(���"r����g����'���A�d���N~������cw��}�|=#m�H}���������l�u�Pe��M)p<��N���X���-�M���� ��j>S�tvV�o��v��Ylzn����K�S�'�>y�y0AB�6+e��J
�����(I�i�E�Td�$���f��y�D��������HNz������X3����z��t�
���`=��;�y�=z���,
EC�.�s%��c=����uk�q����J����Kg&�,���O��F���iy�S�k���X�B��+�;+����PKS���z��T����Z�ONk���j�nh��fN����7�^��9v<{��S����9S�
�a�l��
�V��h�;#�2�����\���.�T�P���k����`�^�j����[��KQ������aL��L�i9;_�X�f�	�����m��o���C�B�h�_���ZoT\���-��:W5sY"Q!���gf/.v����=�*����&�������j��N�X�-u,p�q��(!/��1��|�a]��	Y�����&D����k����?��"��������~���e�d�M��,}�g�����9�U3��;�Vo�������?�����ke�����?���\�=U��E&2�����i�}>X�r�w��j��������zf����s��������8%��>_i��{�1`�tA�N�eS(������|���5��S�%6�T��]*v3��~�;��!*���������NU��`��HQ�	�xo����3�ns����K��K�9}ain���x\q�����|�vys��cND�&L���X4D]��	����i����N��'
���^��eC6ecZ_����Y���G����91kY���<�Yd-�&-�,���@bj�z7��b^�m]���T^�0��&����9����M�6U{���_���(�].Py��=���z:;�q�8��DEC�)�e�h�r
�;�xO�5b�SN�f�JN�u!�
&�^C@x����RN���0!V-�tP��o���l��
���������q2b��?���oz:o������=�#	
�tP�t�a?�K�qC
�z:n��c�CO�]&�}(��nO����mv����L��9�P���S>�����;.o�0=�}4����mt�J+������X���)��������')��=�o;�5bAL�DeS�-Jse1��r
�;���|���i�e�H�&\��',!��*��
���xOg����v�����QN���P+9�tP��'z:=�r�$�tA��b����S��A@xW�����L(��%i�(�uA�`��"��<����TA���eS�4ASE�N��s����d�K����`"$<��~�FP`��(pP0NaB�'>COP`�qR.|v?H�Q�� ��~�S�G�5�M'��@B�� ����#��c����-=H�dIFOP`��G)�#>
x���!k�X���,��(�?��>�������1I�Br��O����V)���
�����	����3b��{<e�����m�&rb�"c'������tno��>��L���X��rN�0�����W�vJ2���G���H����+�eaR��A�eHT�yUW������(��1o�xs�F�D*���8U���B�aW�[
(a��W:��7�Y2��7o�*����.�w;��o��(S��������M��x������u�j;
^2���X���)��=���p���m)�����(����i8�[���n����ui�h(Q�����p��h�s~�pb8z�����t&r�lI�-���{M�����$D��Y^���-��g���=�M>!Y]}�OD�+�Ne����7�����Qx[����u����DN�5!��t�����q$���d<�QQ!���*�zq�f���w(�����N�OU���(�$����?T��Ey���F�����������^N�UK-NZ�Y60A%r	 IDAT�pD���S�Sd�N�]�HQ��bT
D'�������`=�������t�;����w��F�q���WSL!W�l{*�Z��z��q������������������������m"��M����������r��a��g����c������,���P����?���7�Qb�� A�s�(Lh���;.�����\=8�%���b
������w���'j���%�k�CP�	�F��v�5`�+2�(�UK,�b=8�E)��|��-3g/�NO�2FQ�(�~���|�v�����K"t�DI�.��*d���P�U�2�N�=����R�'S'R�S-?�k��M7�,N��NU��%M�R]d�t��""J���ht�{W':;���-����N�e���<JI�P�gK�t^����CP�1��F�n�^�jm����5��.���p�=����<+S�OW��)�u��
z:p�
�D���'*]�d
p�������y���XFOe@�����wN���;~��m��8���z:�X���d���CPd�d����}g8�l9�OD�nj��h��ITr��nkY�i��=8�eW���������\^^:w������2�n�7�����}0kk���~�`���#����rw-�N�smmuy��8�������"f)���7��9%���!�g`��������Lu���������k��.������i��B�Ty�����/]w��N/�
fH����z:p�N[��?q���G����i�����u�KH���^�s������x���TP����N��h�>�z���a�(�3�����O���D1W��C�d�Q)C�MY����Gq��������n6��>s�<�AO��u�j�������4)�v+�w)J��Q��>o�������z:pL��I���+�6l_:���h:�����~�j��0oXeP�xG���t�z'������%.���%
�t�8Y%������F�wVo�B�n�e�/l-�[i���z~�j�Y��(a<J���h��.��������P��%��s*FM�X��S�y�����|�����;�;IgD$�+�X3�3��:s����u�l"��EQB=�mt��F:s#+�lM�0��.���f����1KJV/����F�3����k�AS�������B��-����=�����3��C6s"�5a*/.LH%S@O�Y@������z�/g����^�G�,�8iY;xk���Nc�FW3t�*U-�d
�&����
(�F�f���$v&�;���NF��!�����hfEF0��P��{nO�����)��i����dLA�q��[���r^L��B�j��T^�T�!�W�����zK�th���y�������������t���/.��{U�f�0�2�t��Ga�"�,�����2	(���$9���!���W�����w���J���(`W�KBO��k��n����f^�����l���=K���/���������o�I�2U�$�
=8�^:7<�����0�_��'{����u����~4����8'N'�����W$I�qIp���W��G�9��1r��e]�te�$��I����]g8^�j+���`I8F�}_3�*(���$�o��<L��p�;.o���W���A-����r���_���������V
gk��/}w���Z-�b_�X�Pg�������sM������T#S���sSw������<����Q�J3����j��=
�<z:19�y�=�;\�����W�z��/����w�Kcw�<_�dZv}��1a-�7FaB;[n���\�'+n7AOP^���J�,�<��	�R���|�����1��B9'���z^@O����1����c=��n��e���-�(IK,bN����9�����,����a��uF��(yv��!������Z<�i|"8����I7�B=/�(IUK4���l�o��
�K��������O�oo69���������O�<���c���N�.�3����#+������������v6zQl���d'���{qOg� ���%CE
����� J��q���A�%]U����
v,�~5����\������M_�t��,<���%c8���=]0��(�@4p�+�k���q��e�LF6tuv����9x7�d�^�qQ$%+i��5[6f��W;3m�D����z:�x�q�����Fn�����/�]�@�-�����'�}(����F�Iy��:��/�����!��ZO'��n����~j8mO}�M���}��*I�b�����c%������2_V����A��D�h��1�:=�&Q0�$�i����I=LfQ3�����cj5F�Wo�^����t�TuE~r H,/��?�����w@!��T�bu��������8O�cr��������������o?���|0����0���>�x�H�1o�r#��0��������"kdj�Z����n��#"��i��,�����j�����p���t��{!w�����:#&XO`���O�+�_}������U":?wf���2���e�g���Y[�`�8be��3�+�t��������x�"�sB���[@�]����������:}�������~F�R�5C>^o��^�4eC��)�=/��1o�Xc��c�,M�,�'�b9'����@�@^,n3DOWu6<%HDC���G]��t�V�ns<}��>� ��OM#gJ��e��I�3L����;�`N1C
�=����L�17U*���iq"'��`d����e�d����u�����������
 �|K@	�k����ozZ���3%5���%�hzQ/�3�9���=o����)���zNO�an�E�
�8]�R��{:�����gI��]�G�uK/�NN�*������x���I5W�sE�; z�z��!��,�8 r)�c_���lO��
�u�j�C�T�x=�R��������=�N��I�����
� ��jzIBo���Q�e�������G������S�S��/?}�#J"o���g�S����q���nG���t&�z^�3BFz3�\t]��Y�y��(�c$���h����T�G!t�����~(��9���s�Ct}����sS������~�4���%!r����/���W7?mRcD��o��=���m=K�I(����Q�s����;k��U1���)�����4f�r-?Q3�7?����5����/����G�,e)��?lP��O����yU��*�>��rs��k7Wo����3�S���k��1�����(��]��=
o��o?��d�.�I?�I��������7�I{D$���X3����:s����u�l���mZ��������v���?�T�*5i��[�V������7Jy�k��*�&Q0�j}�_n-���`&����{z�B�w�y����������
��l?w���p4������"fI���4��������t���[�j���������l�m��Y��=i��}�z��R��|���~��k��ht�s���L,��w�7�]�w����������?<!^�~���v�5Z[����s5e2��=m_T$"��^���p���D�k
PT�`F=�����Z����1v����j�Yu����D��Y��Y�8C���V=#��$�<�s����%��h�������j�W����t��3�y:W�>{��u6����fo���P���d�/P��
�`z7=���O���%���~��&I��(����DAx#2���f��~�3P�&QEc,e,��0�g���-�����������d�RERH�fs�RE��a���]gy	e���@Y��p�<���!w�����[���h��M�/S���x���7����=�KK����\�����ww6���mN��?��9�7��0;s�Z����S��	5�$��A��d�i{"�Kp�
c4�{-��M�C&��p0��Z<���jsm�7���H~x���^�_�k;�w�sI���m��^�����A��L=g(IdI��C���X
��f��<�>���(���
���;>��=l�F���	��HR$��|����m
�`�#=�CPv��i6~����/}����O'��e��k�������|?6�I�% �v{�w�r�w}�_�&f��|$��s��;L�����d��&��@�� �^<�p���=��x� ���w����
y^��X������a(�������w�/�;3�(�D������M�V����k���M��~�����;��f'&O��g����8
�(`�9_+J���)YC��(fa��`�G1=�������3a�&������*���IM��<�<��}�?�7����$���T�cq0���5�iw1E�X���*����v-~����
� a���@�4&�!	������?��dU7j3F��=F��X�'qoey���e�������/�������m�v�����WW[;���>�wE���qn�l�X�������Ng��u]�9G*a�8������KQ��b2�y5�T�'�
�x$�c������Q��iz�� R&+)qV����}�BJ\�s���;_����Y+�9=�8?Y2v'a�D)��E�cN�a���K�(E���-�Y%#f��U����!7�<��8�	wC>
�a���y�I�e]8������$p�.���5��QeRv�� ����`.��9�m��'��b���x��w��y���������_(�Zx�����kyS}�:��a������8�G��u)yX�I���4�Q'M*��7��m��������K��T��_���>��K<����Z
o����s�Z5;�����N���vz�Z%q���l��l��L�H\H �f��4��i�I�[@����hk��e�Ntw����A��z�����
�3�K2[E��Z��FWfDI����/���r����Q����KK���j�� ��(g��M���t�FI��C��3�� ��6t���f�a{SyzW�i�?�_=�w����f�|>����;p���?��#u��nO��`�;>�,��sK�'��t��/>��\��a��}�@�A!��m\��v������fVW�'n�����	��b�f�������������%!�����������w{�A�d���������w�>y@��a��U������%���Kb�&^�I��Z�M5cH\���o`Jx����T,�n?�����������k<�Ld�z�z��eO1c(�y�(�����^mP�t�W�����V�1��������K��e��<�,r���f�'�Yoy�^��s�f�b9�En���Fc57U���������I���o����8�Q�{k9l�a~�eQ(�tQ�+������:������O�l�'�����C�������o�#��Z7{7?]}�����is�����f��5�d���V�OM�;�{m�Nh�/����3�k�����������'OU-S�cyO�c�H�!o������(��*,M�UK��BN���qV�525[-�V�7[���e���zF&5c��LA����?'DA�4_}�����-��
Q2�[7�nc"�d"M�����e+�\����K���y��=��n7��\���W�6i�jgN�X�+�tRD���l���>�p���<���w���e�JN<9!�l1+���t�E@y0����g�_]���//��@���N����&����R:�W��Z����h{��}��\�����	��[[��_?�b�Y"�,�����?��?:���a��S�Jo����~��U�T�SW6����<�����b���������?8	)c��X?Jv�����x��&rB�-U��������x:[�k�+�7����)moY��+B��z���)��������1iM�/���
�"���x��;?�0~�S����^^�/}t����������n�Z�� ��+l9���C�B�9'K��"z:�5����������m��/��G���a��B�T�L3���/Gn���,�ox-����Q��jnn���(z��{x��A���_%k9�8eYf��;W��=�;AL����5u/���\������h-���i�����p������%�$�,����qs<�v�7��Ld��Y��ko��^�Y�my�������M��\��z:(D��Q���I5W�sE?=�3U��a�B����Z<p��t���F#m����d��+9qnJ�D�t�}@y��$�Fmo�$I	���:8��=/��1��|�a]��)�T����W-�nq����(�c$���h����T�G���n�u��V���m��G��0��|�������	D��f����a�?����:8����d��\pB[o����`U��{D����B���i�\�O�LCC@�=����������
���lM�.��%�h�i(l���c�?�7o�r�����������������?�}���GD����=3_?���3��[<�PG���c�y/m����M���r[��8�'L!�	�"(2�C�'���N���%��KY��F94�N����v���k�AS�[���Ku�D����z�����q����0S�b������=����<��V����V��l����<�������\�	�.�T���k��k���y�I{;^��{:UKX����@@x�^��)�bN�z:(���$r�����x$O=*����m�&L���D����ez:��Q����-���o~u����������bN�����o���y#���)��(y�� �|[4	�`}�}o����G~����(������uVgy[��b<�2���?�������P��z���Y��/����|��KRQ{������z���������/��s��

<������[n�������PXH��;��Fos@�������N>o���K��u����v?��>%���)
>_�z:n�>��lg��>KR�4�t��J��wg�9����f���s�0��l�q�</�����A���
��3'�	L�p���tz��f�{��0I���X���!�u=x������nW�#m�V.���b���Q�9�������N��Y�x�S>����������ua�$M��.h���+��\�B�4�r/�:��!��,�8 r)�c_O��o�F����qLy]�*��y�l��&h��H�I���3�CJb�H�U�}VI����8%)"�����'z:.�vXk������0[���b^��x��������"��pyF����.X�����M�d���n:[�/C����^�.yw7��9R-��i����z:)��&T-q�"Vr���[(��V���\�?Z5e���aQ�����t���B�=
p����S6��.�T�t��I%�n�'*���D�R��U�&��5SWq-	��BO�/���|g��S?����������U���T^��8���8����l!�����mN��8^��`�A���3�>��cq��4�t�-�
�����	r>�y�e;z:G�C�>��c^�����mv��:.�E�t�I�r?�����=��!��������4O�@���"j�����-UI/��TR�O��=u�t�����Wr����^�t���^gN�Z��R;����H�.J$EqHl���$vb�������e���<@�	��(0H7\�Vw��j����������&U}:p���m��5�G�4�C"�����B�G]7l�a���A�8~(rL:��&����8��>�������.[�S�%g��PN����5:�V=�������Q��q>ct���O^�������|�I[���1rz^�W��}:�N��vZa�
{Q&��S6c9�",�	�"^u��l���p,S��9�I��x�Q%�c	�P����c���
3�d����������lReb�t�U(�����d����($asp�}y���C}:]����:���{A<����� IDAT>,�D"%*�!Kx2(��d`��/3�H�X��GZ�F�QN"<����1����h����X��4���>�<s����4:\<{zV��0�����a(d���^��dC�
tf_�O�����v�V��^�����"�F{���0�k�TP0�o�����������N�^TZ���f;za;bu�^/�
��������U���t� t�Gn1�=��<qx�p;����0�8
��������Z-p��(0(��tZ�p�J�`�?����@@�>�����J~��
;���t��	(~��z�em�=�������
G�� ����v����fPn�N�����N�����I@��T��qm���Q
o���'�R'�!�!(9>U�����V#hX���h���lJe�(��x��6�!b�B���N�^��v}/�JEC�$�*��>��6:a��[�����Qv4)&XMb���<�aX6b���=>q�a�vU"�`N^V@y�������3���/,]�}����p!�y�����?;���q{�����^�pP��t�j��Z��(c�2���&TF����p_�O�aX��~��;�����j��F��aq��z����z������� �:�8��Vnm5�+���y��U)��>�G2OY�I�E���q�N4*�s����>�,�e�Hd����2*�zf��[\%e:u<��~vz�������_����+�{�?��$�J�����Lg#)����D~}:IC{������a�O���a�a����k�"��"	��N*��TF�r�e��TZ����y-hn�*{8��
���Q4$M��/����}:�/�O'*�Q9����P"��3�^9��zk������6Z9m��D���'��u*kb����@_vX�l�c^���0�C^����8T��_�n�U����Wn��U�������w�r�g��A������8T^0���O������7�w�6�n+�������''&��"Y���B�������x��]�����W���W���;2<2��w����+��BX����i�
6��[��F���t^���?wRZ����������f�Z�T������L��L�}:�O�nX���p�4����j��s'����v��;�}��d>;���%P��tZ���^���P�h}:���t���/�u����� �~h:a����������|��*;�����(��t���z+���b:��n��������{,�jlBab#����G�I�u��!�K����q�v7���V#��B�G���4f4�lL��	 �@�������^�*"W���HZe�(�J��c��S�t�A��.3C;��$U}:��}(O��H<Mg#Y�M��.3
�t���}:
;�b���*2�`(�G������h#�������a@?�O�Q^,�`��}:(���>�ZvX��]3,�O^9�>!��)�������`��+5�	e}:�}�sl^�0����C�
�v����VPi]/�9���+M�A������B����Z�V���� �:;���c��@@�/�:>u���
w�p��Y�����
v4�j�*b����v���^p��kuCY`�
s"��b�e���� ����}:{VPm��V��C�!=�����TXW� ��/�<����B�g&3wo�GUdXW� ��g���^(rL*���"y�A�
@���t2�P����<�O���o�n�P��t��{��4������C���������^H�O5}:(u�O����H��a`�z��@@h@!
�\v�%��z�0D}:(����c9a�n���m�pLT@�
���������{�M��0���E�������?A��z���u+�m��V`:!C�"��Xd*����� ��O/��n7��Ro�8~����|D�X��5���tP�6������z��9��J�aH�����m���f�Ym��>�����2��#����0+[�f�nS�'"b����n
I���k�����]��a���,�p\$*	����@��t\?���e�u+,���v����t��X�i2=>���s'(_�M��X�������*���r'��,����h�����kW�����>����S����[���Od���w���]��Y�J&��������??��e_��k����������=��U����4RH���P����
�w{��`�Q^g�3�T��+�&3=�1LP�>�Kv�������u�����NE�X���h�����s�t���g����G��.���n����;���\^X��|�(���]����u��9E�*�h9�N�������*�����cF��q
�c����t�����V��L!��&�T�UD���.:���_�����|i�����������eu�x����1���Lfd�;������,�BD��x���N�x�e�|�n^_�mc�y�������b.�n�������W.&�o�7�*	��W����]�^��K�5�������~�:jY��{�O��
U��*{���c�.3���8ep�Y������;���q��o?���/\_����C����,�U}j���N������ �f���6?����a����s�����Y��o�����J����N��y��8"��J��]���{E�~�x<��a��������B�~��R\a�	v"I�y:��'��6��G������������K��W&?�� }�s�|���������ZYR��������rBD�����N�m����������)(�VHA���������}:Y��+�&1
��p��P^�G��6���Vm���E��<�w�����F�^"qB�v�K+W5�6��Qa���pW��%�d�UWw��N*��IMV Ac�tF4�]w����5���H$`w(!�>Y��m��C}:y���A�]Fy������������t��u�_.������By}��?��s]����D���o�������r|���mz�^�)���y5��mD���Z�����E��+|hsl�L;D*��a�~���S�^��+P8���6�|K;j���fi��b2��ET��x�c�D��U,�S�5�
���v�������i�|��Q5��o��������7�f�3]�9��Q�WT`
;�~��(C�S�\:����]GX�6���b��cK���R�)���k����gw>�����q�{Sj���v�j7}�{������X���o�-���WP��<B���1&o���>�������
�BdB��nF�t%�#�����=���S9�����noi�l��CR.�s]�s�0x���,'1ATX���z�G��KZ�Y�����c�����a����[�Z�P�e_���������1�M����m�|/`D���n��]������N������B���*�8v2�Sw�(/�d�f�|��4"�r��-h��1In���-��2���s!�J�W�����W�;;�e������=v�h"�`�hx9�{{�g�u���E�8���,�w}Jk�Z������N���{�~��h�|h�R�H@�t��'H�^�.�*�c#�j���x���5]������mA�q}�������]�
�e%��<�����B"
��C�	k��KC�&t�0�I���CSa�(�g���,b�^�]6������J�Pb"���|�������k�����*Y���exN�)�����K&���w?<{�[�:�3>>���0��$����T�����N�Ig*)��T�y�2X6dyI55*
����?���fE����<T
���j���B&���(O�r��jQ5�qv4h����W�D��u�V���=)����U���TTu\}�s�{��K�x�5#�a����>u���
w;A�XI.�5�9��'^Z������(��!�J�^����P%���`N �<s
�TMR�<�9.I��L�3��o:|/��M�y<}d�Tnk�������F��hl^gS*��L��x
��o��w�a7�l�ly��C"!p|�j�U+���TUU��*pq"�)�H
�E��X�Zu�f�U7p9��������J��|f<�P$�A�;XA�u=j���TZA�t��c����X��G�<�W����V���vL�.|����9J�-k������fkKJ�#�\L��I�GS������WV=ftz((�vgw{s�n�������O�4U�d�A���L'�n�7wz�V���*1��\Vc�(�E��tP^�#p"?t[����V�J�b`4�,%�e����r�����gNL
��UM&bE�������]�\Z���L��m{�s�zgk#b�G��f��)�|�@RH������N�g��V��?�y&c'�����qx�����{����NL>�����	�n,]�����*��=��'F�3vd���;5z$��HU3�S��=y��<w�������T��o��<W|o����g�f�.�
��(DA@�nx���������:{D��cLBa5�94x`:����)VP��xj�}�\�(���*��##�I��;��J�S%""5�PS�������'g��(w���%D%-��3�3'|�n�� FS�������R�*��/�~�����N���A��!i2S4����������A���2������l�e-;���mpD*�+S����6���u��&(�����r��|���O|��#����srLJ���Vq���@��������*$�t���O�H`AH��^�I</�1�d1�/�C���^u��8������@�1���a���d9a��L�� ����	B��w��joc/(5"*���t�h(0^/�=2���	�f������t�)���� �����v#X���*���z�������T&&1Q}:(}q�O�
[�����[a�
�%f8�N�����9��}����Un�z�v���S�\&��Q&&1*��
��S�tjV��(&1Y��H�Y
}:��}�d�CT��9�M��x�������g���L1��$���F��
����t�*������
��t^/���0���R��C�>��i��P�3�
7���0}������O��/?�`��B� ��!�����N������8�n���k�r��q	}:����B�G��>3�i�v`{�>@@���{�����`���5�%]ff��4�t��~����M�r3�w�^@1��j�D�����(�G�}:{V�T
6��J+�0T��Y�M��eb�t����6ZV�v}���c���q��$�"K=��Sj�n�2dD��d��`�(#��5�-knau��kul��{��9%*�Rgg����}:]��(3g�RYMfd�"T��3���r�������ol����a�#��W���A��IJ�d�NJeF��kDq:�MJ*(�&�=�jU%g����b�����f�B9�h{m������#�h�{�
�LA�s��k��c����a�c|�����zj�C�j���������\���}:ox4�!)��a�/�P����O$��;<�J���S��4�>x(�0�21�I�c)@@�C�e������)@@�>��NPR�e�<�c�l��D��2H(��o�y:^/�T����F������9�e�O��;n���f7,�"�9O�b{�-�����sQY��
�������J�gu�X�����#q��'eX����h2a`���k;�f��u�^/�x�0�r�ECS�����n��n����n7�j;p�P����:�bG�.��k��_K����v{}�lu�{��������e���������=jt��zps��g�1����|:��2��t��N�������,������#��������~����i��N��vZa�
{Q&��S6c9�"h#�&|�5�u�+�S�+���^���������}:{Vp{'��53�X�gs����(�J��'���X�c�A��4�%:O)a��l�n���A�V��v3h�a�����&��8��(�w�6��j�[��?L��X���lReb��(�_Az~h�a�D���C�c�1v$�>@@��2�����0�A� ����aI���/��v'(}��K-5���3Y�99yk(�>x���a�P8<��?X��z�ZS
B
��)�!"���r@�&Grz$���xY;��d Q3(����5��
o�f�,C,K,C6)���#,�M�5���?��6Z�J��S�1��h��|(�P�%%*��,G���t�P"9���Z��l�I���2C
'�bl��Ud#(�������++���%��n�Y%���B��F������P�����5W�G��(�uv2��T��P�ef���\4*'�FP��umgs�V*��$
�Jd=�j�
�]�����������	w�`�#���lPFc��T�M�OP�e���N�����T�m�b���*�h�6����qz�vp{���V(p4�d�:�RY#��"�t^� =�������q'�QLz��P���
�_��i���
,'������H1�j#�y:����g����)�O���L$��	��$&*0��(p0�b
�a�aH�)��#I�h���(0�tBD	���75��_n"�y:�������$��L}:p8�s�{�>8\X�x}:���)�~G����L9��>�����su�M��h���Hs'���K'!��l��f�
�EEY���	KD��6���8�1R��}"KB1��X�n�q��b$���D�%"RF�0#C�hT�X
�I�P�=3=��VW�,C�S��D�e�^z�0�PT��T����bT�|t�\�961���1,(��0aB�!�B��S��x0m:�j��P��t����������'���h2���8tPPPPPPPPf@�c0�}�4��Q�fP��A@x���D��s@F�R�A�C�{�>�Gdw��z��s��
��H~�#D�k5��V����R�'"bD����n�%��_x���6�Nc}~���m��������9������L�r}��?�2�b���DDlz2r�{'gg������/�����BDD��%��ll��o�����_���c
f�l��%�n��_�Z������+�]?����n�����^p=~N���)"�5��{�:�|�~e�o~v��/�keu�8���v�~�pm�7����{��;=�%�Z,�����T���I��p�M����5��{s'[7��/��w�~�����U""r[2p�7���������v������������u��>��i���3�����P<����I��%�/�+�K��;������o����r��K�&��[��7��WV%���RQY�TN=���������fc{c������������o�K������dn[�����"�����K{�g-i�������J���C	]V Ac�TV0�w}���6�����"��D"N� ��%'���y[8�|�.�7o�����������M��e����\��W�G;����Z��2�����B���6 ����Z��R'�c��{����M-�=PB���f�m�|���XTS���gY^c+�V�o�����P����.��^�b�f�\n6�m�����������F<��^�fc}m���04:]*���^ol������������`r�3��;^����%n���x�1��y�v�2��x7. IDAT��=�*���6��������5d�_`y�$_}�O�\���v��|yg�4D&�a"����h/GFjX�J��p���x+��`@P)@���JU�_���z.�K��(�+�`N&UV�T";B�����"�u<����1������ j���?l�
l�l��w]&J����������0�=}�`
��j��7o�"����� VN@�P/�V��x������
�M�A�d�|������c�x�]��w��^
C���`�-�4[�e2T
�������h6�����L��PX��#,�
���z�K�{�#A��N�u�����$�$�$����@��-���>���?�r����J����R�v���@�P)@���J)�m?v����2��Wb������[�v�e��1����*��j�y��)B����,S��l��}N�T=��\7�z�oF$ r}�c6f��KQ^�cQ^������q��,�K���s<�����w\�����g�~��{iM.h��)q/�<.�@@y�*����T1.�u�j�*���W����5�]�V�VW�
����*�\�����=�,�������f�*�*5�J�9E"�M��#��g^^^t�����b<P�Rgw{s�n���;��3�yMU97�B�?�Pr<�i����PeP)T
P/Tjp�z���6i3�HBR����>�n.��i���d��b-V77h�)L?2vb<�VH`�[�������������*�J��J
�R��
C�����'U��������K��K7.|��;�����&��#�i�����(���!}���Wah�O����P�
p��R��^��+���7�}+(Bb<=��=�-�����|�d�~Ra�QM����6{����7B���P���o?uzrH������+�iG5
_���B��B�9���z��P����f��U������aH���="��h��v�M�M���49&'��ndTN�o�< ���G�p�p���

 � ��k���A8��V�Q��5��.�O"���Fv4���J����u\�K��n���
��m��U�
Z|���e����f�T���>��0ZN������jw���]���Fc��jZ'�����QuE�����=�Z��o���0+��������D��J���YkM2oW�����(���C3�������9K�b��&�������OW���;?��:��)u\P������3���5wA���P���K?_�2����nm���IOFf�w����z.wRVQ�CU/�������������w�DD�0[x������?}r\T�x���wD���<�D�Y�����l�B{�G���gs��T)�~����������+[�m3����T������R�IkR\Re�bPs'���������_�X�S�dKt]7P���?n������~��W��pHt-7�;/R�\���t�TivM�%�{����KQ7�?��LF��&j7�z=X��\Z����im��!�Lc��{�s�czL��U_B����5�co��
�~m����.�z�N���H{]��;}��������s�����^x��:����9���\���W:s��-!1z.7$��`�s'����7>YK��3gs�p��2j7�#���ms/����m�h,IC���][_����Vw���O|o:�����;_�������'
��|g�Q��P����|k�g���z�S?��,��m�����{�v��yu���>e���AN�<�Zc<���;��3������?�mV���.M�w���r�����.5�7�^kn����ON�w���)mr�����������*S�YPq����l�i��KK_|q�r�.s�'������'E`Q���;]�,Y��]s��6k�%�ydVP��DZH��??>+gi�Bf�+ww����TiY�����z�D�m�����wU�����[����P+S�]w��n�a����z��Xk��g��kj�f�^��������]u�8y>�o����"
}���]��j��o�����TN�q��J�t~d$��%\�uz�;���p���������tqay^���;����O���UY�8��v�:�k�K7�z�7��f���i�!��FK%�Hff�}��YQ�+dKO�we��p��!��}{k�[3Z��j������hL���[�C����5���b.�|{��3�B3*-�}��!���k{{�mV�������
$b,��&4�o9�j�t<�(����A��U]_�|�����f��:U:mn��f�Fv4l���� (�D>����(�C��^cX�Q����_��oV��z��!�����]��0���#Re%���'��Y��4W���V[�e>gML���j����z��FS�ci��������\�v���

q�!���v;��*"�����u�
��z���T�S�x"��������{��!���y�G������+��\��~b����%�WJ�������������v�9���S���n�My5�=��b�^���t}{q�j;V�F�����VM��p�K"���G�}x���_�x��M��/��������������D��c]��&���f$�Fe��]���������zO�����hz��w�h���J�i�<���x�*���X"���$�y4�b��S�Kf������[-A�$=��>Y�F�Y�������iZ�]�ED9"�"�1r�	�3�y�7�S][�|������
��|b�p�����z�=v�=�ly���zm"�(����������Bu�z�r��[w���S|*^H����5���DDA+�xC���lu�f������4s����)�����teO1XCu�g�j�W>���gw�f����NL�)��c�vX������K_�h����L
�_[;�t�7�s��Sc��8�Um�k�d����;�~�oo���o}8:3��n�;
X����Q�	��z.Q@=����
��D���C�O��\�w�v��b��c�N��������%��K���j�k�sN����C���+!��Zs����kWn��l�����N�	��`������n��������;�����;�$m��m�n�����.�Z��X���hA�.�o-^�����J�#w��s��h/o���]���Kj��	]���m�=�J!�����n���e��Edq�g=|}C�SP����xnHSeM�xN���E7w��k��I�����"�n�k�S��'2����O?�zqiq/�+���sG�9���%j`�;4����Q�a'����������	���-]Z3?�\�f&�
��u��'O�d�qj�����/,�j7�m�gy��|�3�Q����,����O��;���j	_)g�������uwZu�Y��� -������[fKH����!�*���x�b�����1$>m#�r��#��uwl�������������7�o�v�H����������&��C����r�v��Z<���t2����6���+V�2}Q	%E�l�k;��k
�		%=1~f"����K>Y�����aZw'8�J��+"E��H*_�/��`o���4g�����z}������|**�(F?I2�����qi/����m��g��Nu{c}g������FB�����vP��v$h�����^����;�P���~��t6�%���w�N�h�����u���v�7-���X��ZC����I��������[#sqe��������?<7sf���u{y��|�r��B�xET��xz�r��M����3?Gm��b���ZX�e�����dRy������pN"��g�'�����q�������5{���N����d�
���=~�jwh�{�������K�6[;BJ��fs��<u�E�4E�e�yyd�-�w���b����[a��\���-
�N13U��v(����5
r�^2�mj�q����y��R�����T����}��A��*�G2��w���\��w�������cnd�����������B�{
���>8��]�8����uq��'��<7s���'31_�J��a;$�vm������Bi��m��?���g��d>����[�?�oG'&����5�������������_������v��S�|�U�"�T��{4�����Y�Q�L��y��
���]�%%#��=:�O

F��x"^2���s�M����K$������������N�Dz����=�*����9�(�'�������+(Br"=��������
$$�L2{���h�N U6��SgK�I%��l���
�W�5O_���J���$����D)**J��h�e{(��O���<��X/��CD'�����PF�U�C���������n�Q����6����e�dQ����Ey�|�j4�[�F�k=�	{��*�v�B�Z/Ed�����F��mu}�k������y�����?������St#.����R(p� �

�r8���C3(����((����((��p#Y8l0�((���p	
6�A@@@@���!x����]_����9����"�D�]��M�Yo�'eT]��<L	�M���ly>+j�q���o�9�����D��X ��i������\�s�����O������|��R��u�U
?���IUT���J=&��+�_��e
�����r�H�FI��s����>��7V(o���u��^m�cg�
��$'������fe��g
���bo���gU�V.���lv����������'z��
�M#���s?t����M���:c���#�i���������������b(���Z6T�t��M��luw���=)������r[������D2��'w�yL���������ON�E�CD$����
#.�|w3���F��Vn6�������rco�E$���'�$�H�5#9��
U�H�����l��n��t�H������\������O��*�~�Y7�e�r��Kk����N�j����Pn������#8�������	�,u~��M+%C������	61u&�I���<tUDiwS������U�DD������hv����w/�����k�����W�����C��i�*[GF��o���ch�x|�����S���J"����_�����%�1�&:1yj���?��U���}�D��)������{|�{?,*�v�7���n�y���������N�������7�H]/&�M����^�jt[32�L:1\0�%\\y������+��rJ��l�h�W��_]�
c����Ry�|���+_l����Q]�s-�c���	�|���+K;w�w���Gn�r��Y�����2#i3qQs���d�.���#���%"��z��jI�_-2���T9�����O����V���xye�)W���U��c���v
���8�;�������r�����`��3��i5����Et�d�)�`���>��S����*��-_��x~lz8S�4+w�~�V�[�o���`ZR�d�^����?�DD>�iV��~1�U��������@%5l���������������#"��NdG������S����e.~�������������d�������!�Q�~����e�Z��������&��R���G���st<Y�����8��O3xMWn���,�
��z����N2���LR��Y��|�l4*[�f����?|e�3.o`zN6b��#3����p�B�6����n4L����V �~���W.�6B�O��~���:���!��;���5�b��kVu}����Z�+�k	2BR�'juZ��^P��j��n,�H�V�����N�79F��0Sk�B�77���xm��r$��{�����{y��
��w;����U"��E����o�|{r����0���������x
?�y5}��'�j���9��hn��~���i�������,��rZ!G�D���j2���tZf��z�of�S���'�/�M���Q��?�&���.����|�����(_��W�29=q���&�O������I�Sq���l5>��)�b���O49|�L�H��r��u|[3�Ak1 ���-��d���Y��{z���i�[57�4��#���x����D$J�\:���J<�GH�8��Y�^�	��
��3���j�)kh�tR��~�=�<���V�X�.	� �o�DD%�����2O��e9A�Q��5j���]�x������h<�5$�A�#������kP^}v�vnv����	Gq��-���y��slZ�&��TR�dNd�7��{��J.o`��(����o������
%�S��O>t���Zs��j�=_�_�Ies�J�G�/y7���7.��K����l�Y����S��P|B�����_�j�L55��%\_)"������F�Y�#A�����]��m�e�'}��1Mok��N�59,��?-~�O�����f��Z��l��$�*�q��=����hXBY`��'�.�� D(Y/l��9�8<�^g�v���'D��������\^SgN�����Zz�r�X��U�[4�����Dj!�VFF�S���f��C]��K����a�nc�v{����)��1K��Vus<��249�I�7{��~�{������L�o8�3��x�fP����Y1+_l-��YQW)���'��J��W��n�Y+��qcB�	b?�Y����L������;+W���U�F������J��_2��R������w�]&G�*'��d�0���)w��!��
sy^�h�D$�����r�����B���~��"������V !!��t��W�m����Yf:9�6
�PEL�
f�w�����������\?���s�I��Z|�D��T��k�'�����s?���ngT59:��z������^[7?��g�M��l���*G������~?r���?�7�~��D&GD+z�������V�������Z��_}�������v*y���9s>��KD�|������3_9"�e�)��:��_���������;uf���>����(�M���l����D!����]�����T�j9Gq^���HH(i{�L���YCVY�{�'��'�$�#AT2����;v���������,��Dzlb��`I5#�g�6;�-7��FH��3�Hr"=tdv,w��������)���|���|��M���f�x��N�M,�L����.Nc�6�O�4������6&(F\"�'��x\o��z�8��N
���[��������U��|�~�'��{r�G~�;����F�����|��c�c�25���z�o����-O|t~x��#R��n���=y�K����49#�a����+��W�F��"�'�������������_����QkH�w��O�'�Y����
��A�W��������x����B02(�����1x2�A@@@@@@@@x���a�@@@@�W�KP�����;��6�+�4�	�� �
z�!��{����rjg��J2SS����_�j���+�����g�[I6q���8�3F,R"%"A�~4����DD)�-[��#y�����n���{�%

�@ ���@ �@ qP�@ ��B �8(�@ �A!���A�W��B _���p2��l�A!�/�A�,��OsPl6���"
�@ |!X0����B ��A!�@�@ ���@ 
�@ qP�@ �@ �8(�@ ^9�#Ol$
�@ |a��u���@8QP�@ �@ �8(�@ �r�����4U���,c@���rLD%I
 q_#������;F��X�
"	q��rN�vVY��1��%�`����L���WM���e����vG�Y���{/���X:6qY��|MUT�C6WP��//	��.cY.�k���&-��	Q:���V�n5�Gq(�h�R�F���e��H[����|��X���9:^�0:�l��h&$�h8�x�+�|v���\n��?���N����vKa����{'�Ay������>��IDATZ��WW77*#Gs��.D��q2���@�c�F���R*������@�^M$��h�B��<1���p2:�����g�	�qe=}��f[��b��D�aw����k��o�7�p����e�����������
P�y����d�'+!)�<�2��������X���7��k���q>y|?��d��'p����&�_�S��.����i��������?S0�[��Jlt�Na>dR�m�
����p%~���y>���������Y�t�:��S�n��
��Q�k�N ���2a`M��vrk����y���[��N%�������i��=�u$�bv��rM�p��0_

i�w3�VAs���;���gRk[�;;J��fM��}�8D���Q�*Ee/s+��](�*�s�i���r�b�/>AW�m���R�[y��A��Te��V��{�Kc��>���������wJ�_�S���	�U9_���l��L�D/s���V�c�}����z�,�����T����|4��o�a����������Z��zd����,�����{����k��[[�J@��ir����d�y)�%D���:��wZ��nG�����@ ���X�.���o���I4��Q��M���KU���.K��;��y�^�������RA_�����(��*8���S����xy�*�b�����/�gU���h�m��
�<�������tN7v�?�W{��\�����$`}����m��^o��q�x]!����?���`����H,L���.n���ono�zLPJ �C�p��������Sw6��z�Cc'W�N�����J��N���S+!�0��[�%tie:�
���W����������+pnj���hE�^���[�7�b����6|�q�T���@�z����������U����v&a��tu��~���%&����yotr��=���k��JW*����@ ��� �3�|=����}�
x����]o���Z7�K]�30I�d���`����U��.�D$�""@{��>�u��>���Nw�{*���V��#����/}k2X�w{"�y�j���nD��R���B�ZX��y�I,]x��+!�������c������jug5[�������Y��������\����'��x������?���~t��T"	J ���)�b��H�"��g)c`����o

��������(`u�d�J	��8��$�aQ���D2N#��c>������j�
g�8���I���Q�����f=("Z$;Q�/�Ay%��<���������r��kc0_����	���Lo��m��������W���+�84s��D�Y�[��Xt�����hT�t6h���������y����J�NS��Wd���R��~mn�\\<�����;.��c����~(���owni.�����x�Lz��+?j:R����.U�����*R�����%:�r-\	��?��
B���U)�R[��|:�������Y��=��^_���.��Mb$�iqu����I���+Rv(���*��I���.<�q���^�b�s	zQ;Ql��G^%���`mPR���e>`N�rQ�?M������H���V�$wp��8/4t��m�:]P�@��+���/�����S4h�H��~��t`'��2p|�����M�rG��@�5(SW����(��*
G��Y��PN��`D��Cf+�L�V5����h�;��.��c�TYhd;F]��X?��
������W����M +�x�pF����������-C7��6�V\S�o�7�""#1��:l�V�t�dmv��W����bA��$WT#=���|�u�3}[�a�\�
���h"9�"�	FM3w���q�G@�
p(�-��q���v��;I;G�4�2LZ3��a�-Z��6�=�Wi'����S�9�am<E�,kgY��N�C��
N��&6�J�1|��VI*Y�)
��PR���[���x'q���";���k��.t���W�%���4�����
}$^`H`>���;K��HU+�����V5���q�����d�'+��|el�,:.�h��d��N��e����R���f
��J��mc�T6�P�qQ$'b4�'����5�g�����������?�ad�4��q=��a�=��M��wr����CG�%Z�Uk:�`�'�^����xVE�~��3�uQ�_�^k:���:�|�8��#@	�L��w��n����od4�����]xsi��B(�T�Dk@)�S��3yt��&jM���fW���I���:��x�D��\�8������jS���LWP:�U�.8Px)U�~�T�+a��J |���>���o�����9���T�\�\u7aw��&Y�\���7"�~��gS�NC��6r��n=��m��i�Vq����t��4�fGn�������5(��������F��S\���1�#����f��f���Dm���4�����5�����/��4Z��ZzB��ow��ZZC)e��{�2�J�0hK������JO��������j���������S���Z��Z��M��>����������r>=�-�c��9������]U�v��6�� �>8ch�*u'W���3@S[��Zn�nU/��?���-+2���-�����
L���<�1��MQv��)Z�����e�]��ahV��d��V.�i�)0#yP�^G��|��1\����
GE
X��Ke�����V���7'��g��K�p����<��k/>6����!��f��\N�*��j���
=OO��>),����P��k����� B������w�c}~A0�r���������B |���-��rv��{7�<3�Y����g�W���gg�A���R������9s�o�s����U�o��b��I�}��{ ^@D�������������������������A�;��o�jc�o������j����g)x-�"bpz���4[�o��s	�iAz��W���gr��s�b����rjn����f)������4����	jo�'��	��kK��k@-]��9�;����R\
���������j��#b�=�j��Y�V4�SB^�%�=���\���R3������K�������y�p��
O'���h?���~��zP.�"��-����|!���������<("���0[*���S��|�y��@#7J,��%q:|kzrz&�w��%}�y�+��g����?��i-��={n���db:�����9TR�����}��ZL�����!R�f�����5�jijH�w�@�
9(N�����;���N�9>�Z��b\n:�?��]�xh���VX-l�o���	/���?=u��U0{O�T[��/��;e[��9�|���n��1w=�������f�K�\��\�Q������_B�E�:��L�}gI���k� ��	X����2#�4�]X��U����!����y?�w����[���*��F�{�
x���-�/�������z{Knov��4���*U���Z(��X�}c��������~����s��b�{�^.v��U�t������
x����j����s����u~�h�������l��z 
z����6�|����|����q�`�z�j���gf&�9�A�#�F��6`$�AB��T���bz����T���{V����Fz�����e9;��g�5���xW���b�����y��Cs�+����&�T�B��IvQ���F���
z�g�� �BJ�E][d_�M�O+5�����^��]�!��~���U�����N	[
������a�A�UwL^�.��j�OQ���D��2�	�-�f$7��j�$�'�~���������N����s����������Bn�@�Q����2�'f��4�r���9��m�dt!�S�e��`�k��-2!���~GO���k���T���(�g����;9�,M�����L���2[� T���ui�D�����"W����6��iij�����4�t����t+���t�O��^G�x(�������5��V��/���\bk�;�M�^6.1A�H��
���7��nW@k�TsG����K��SW��48]���ntg��ye��������?o�<��w&��G��R��(qu1�"-��B��}��w�S��\�A2�����x�s�N����b�zx#,2-s�����c�<k�X�h�&PQ�cij���X'������I���h����x���O����|�����zd�.�r�c���	��{=�gv����;�i�r=�#�����$��`X'��y'�qeF��a��&�X�-:f���h�qx���
���f�%1�$:Y����������O�X��:��-������wf�&`U�U���v�a������%�VF���,o���j�4�g��;}S�,�p2v����\v!�������\������W��J�\������ �*6!��2u#?������5�xy�M�U�gnmwJ��YN����7�W����+����^��?���z�Q�n�}&���p��y7�N��\�n�}7_�:�����P(�
� ��������|���z�m�L��$����|�-����X�h�;�*�Y��,�� ���7~<�K~7�b��3�\AJ�����.����"b�g6���%��8f!�|g����W�����_��fm��H������7�xM��23����S,&I3'M��L�|�so���1zx�2��s��\)��������h�'E�����A��`|GU��^�Y�4����9��>�o4gZ�PM\��V6����������OVB��:N���o�W�����N.
*@�N������=&&%���*�AB�������(�oN.M�O]1:�Fk]\��26�1�F���=}D���$qP^v�;D.����q7
���8�]�N	�������������^��*v��)�����V���?��������to��v�9���MS�J��(G(���{(-�*���F����&�:�T�S��=���2V;�������4@��4��aE�h��)����T�]�7�b���r��{���V��+Q�� .w���SU�q��jg ���>V�No��+�Y�A�Zf����6X<����+]�e�}�/:�C��X���E����h6��G�0�u��������)�1�;�.�������J_������P���h
[�{P�m�o;8�����T ������%�O������{^��b������,z_N�W(Y���g���~z<�����d}0�����L����~]�K_A���4�#A�<�w!_�Q���3��d��p��o��E����3�������_�ML�XE�U�oh{2R�����2c��u�*|(r����(|�j�����n���M��P�b���~������](����;5(T���;�]���h���5�B<C�^�!r���g1�;���f�x^a���f����
���3\o���Q���(N��yn�q�x�=(�>Xr��2v0����C��..�L����j6��(��w���t�vY*��!�*���\�������s�1�_��0k�Q����������/���4g���z�g�d������^^	�<�4�V�m��������5����VB���r��PC��{�B�����C����6��'�8��l_���$�%F_6���w$8����
�R��OJ�����cV(�������nM�J������+��J�p�k�����\u#U��.����]YU`\j	���pr|�mZ��V���2����">@��,��J���V[�1e�H�b8�Wr�9\�����^�a�q�j���O�+J,�~N�>!���'������� �������o�N��/j�_�*��Z����N�s��(_W����n!�x�0W��ID#��g�]b����G������?<,��ZB`�#��lN���j#��@;f��$�L��Rc�B^nm��
��I����H(��(�c����^��\,h{p��<y���2�;��w*��l?�87��0)8��pkf����]��]	���?<����Z��������P�Ap�S)��V��}Ggi��M�;��F��*���r.eN6}�����*�����������&�_�y��UL <MAyE�&�Nta�[�2������T2���Q�zjc�����M���y������}}9vi>q������yk���{�A'DC����Hg� _X�G�1�c���wno~�~���Gt��4V�{��a���6>S&�c�Q��a��[������YC�@�0�g�������^#P�w���O�73��1+�p�g�o;�3;�b���dD=&)<>�f�_ngg��&_�L��S�2��u���`��������e�;�4Dk�B��<1���p2�Io����X������y��P�(��,��������S�<N^\^I>���go�����~���Pzi<���;��Y����Wzd����D�j��������������4X�`��1�`�K�@ ���)�%xIw�����hFp��K�z��n�-�%�J�l��v��vl������Dr6�4�C��\JD@�VThw(��89QrH.p������;�-%������7��Z<���?g��6��T�UE����H�L'#~I�D��USF�}��(&"��db����Zkh@	N����H �R`u��#=�y	��~~	�)dh�@xifz�k��!��nK	7k�-
c�CmI/E$�8��u������t[�t4{��{,�P�����)�Kk��9O=�t?�k$viV���;{�v���Gl><��8}�4�?'#K3�A�_j���G�h��aL�g`����X'�-�HQ$���K�������:���x����is�G���lx���������H���[�=z���X��!c#�C~���
�Z�Rj�}�	v{R-S�j���i�I���Hh2��b3�GEq�����8<T���~X������vKa�� ���3h��^���o0�����$.��hp����$�O9����i����J��(����*C��a�G���j����yi�s���>rMl��������|z[����fJi�����y��S.���t��>Tzv�w�����K���������w�� ������9�\��U���`�j����2�����5���,�7^O.�G�7��c�+u����I
7�Lo�0��
�rs��
\�����G%��}�m�������"P�W�A9}���+����n�^����3	>���ri�B��{���H�����x.,z���|��y����/��������kS!�X����V��z��n�#�	��\�>���qP�c!�S�s�d��j���H��x>v�r��@����tx��9��t�7�}�+a#���f��Sw��yNi!|���b���\+-^����dn�A�jC������O��'��h'/�L������l��&@�����u���e�����P8�Cdn_4���s�'��{DHV��b�L���WM���e9�
�����/�Fw����A>��A!�����B���@ ��B �@�@ �A!�@ 
�@ ���@ �@ �+��'<�#��T�IEND�B`�

Fabien COELHO

coelho@cri.ensmp.fr

over 6 years ago

In reply to: Fabien COELHO (#3)

1 attachment(s)

Re: pgbench - allow to create partitioned tables

Attached v3 fixes strcasecmp non portability on windows, per postgresql
patch tester.

--
Fabien.

Attachments:

pgbench-init-partitioned-3.patchtext/x-diff; name=pgbench-init-partitioned-3.patchDownload

diff --git a/doc/src/sgml/ref/pgbench.sgml b/doc/src/sgml/ref/pgbench.sgml
index 816f9cc4c7..3e8e292e39 100644
--- a/doc/src/sgml/ref/pgbench.sgml
+++ b/doc/src/sgml/ref/pgbench.sgml
@@ -306,6 +306,32 @@ pgbench <optional> <replaceable>options</replaceable> </optional> <replaceable>d
       </listitem>
      </varlistentry>
 
+     <varlistentry>
+      <term><option>--partitions=<replaceable>NUM</replaceable></option></term>
+      <listitem>
+       <para>
+        Create a partitioned <literal>pgbench_accounts</literal> table with
+        <replaceable>NUM</replaceable> partitions of nearly equal size for
+        the scaled number of accounts.
+        Default is <literal>0</literal>, meaning no partitioning.
+       </para>
+      </listitem>
+     </varlistentry>
+
+     <varlistentry>
+      <term><option>--partition-method=<replaceable>NAME</replaceable></option></term>
+      <listitem>
+       <para>
+        Create a partitioned <literal>pgbench_accounts</literal> table with
+        <replaceable>NAME</replaceable> method.
+        Expected values are <literal>range</literal> or <literal>hash</literal>.
+        This option is only taken into account if
+        <option>--partitions</option> is non-zero.
+        Default is <literal>range</literal>.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry>
       <term><option>--tablespace=<replaceable>tablespace</replaceable></option></term>
       <listitem>
diff --git a/src/bin/pgbench/pgbench.c b/src/bin/pgbench/pgbench.c
index 570cf3306a..6fa8ed7f81 100644
--- a/src/bin/pgbench/pgbench.c
+++ b/src/bin/pgbench/pgbench.c
@@ -186,6 +186,11 @@ int64		latency_limit = 0;
 char	   *tablespace = NULL;
 char	   *index_tablespace = NULL;
 
+/* partitioning for pgbench_accounts table, 0 for no partitioning */
+int 		partitions = 0;
+enum { PART_RANGE, PART_HASH }
+			partition_method = PART_RANGE;
+
 /* random seed used to initialize base_random_sequence */
 int64		random_seed = -1;
 
@@ -617,6 +622,9 @@ usage(void)
 		   "  --foreign-keys           create foreign key constraints between tables\n"
 		   "  --index-tablespace=TABLESPACE\n"
 		   "                           create indexes in the specified tablespace\n"
+		   "  --partitions=NUM         partition account table in NUM parts (defaults: 0)\n"
+		   "  --partition-method=(range|hash)\n"
+		   "                           partition account table with this method (default: range)\n"
 		   "  --tablespace=TABLESPACE  create tables in the specified tablespace\n"
 		   "  --unlogged-tables        create tables as unlogged tables\n"
 		   "\nOptions to select what to run:\n"
@@ -3601,6 +3609,17 @@ initDropTables(PGconn *con)
 					 "pgbench_tellers");
 }
 
+/*
+ * add fillfactor percent option if not 100.
+ */
+static void
+append_fillfactor(char *opts, int len)
+{
+	if (fillfactor < 100)
+		snprintf(opts + strlen(opts), len - strlen(opts),
+				 " with (fillfactor=%d)", fillfactor);
+}
+
 /*
  * Create pgbench's standard tables
  */
@@ -3625,6 +3644,7 @@ initCreateTables(PGconn *con)
 		const char *bigcols;	/* column decls if accountIDs are 64 bits */
 		int			declare_fillfactor;
 	};
+
 	static const struct ddlinfo DDLs[] = {
 		{
 			"pgbench_history",
@@ -3651,11 +3671,10 @@ initCreateTables(PGconn *con)
 			1
 		}
 	};
-	int			i;
 
 	fprintf(stderr, "creating tables...\n");
 
-	for (i = 0; i < lengthof(DDLs); i++)
+	for (int i = 0; i < lengthof(DDLs); i++)
 	{
 		char		opts[256];
 		char		buffer[256];
@@ -3664,9 +3683,17 @@ initCreateTables(PGconn *con)
 
 		/* Construct new create table statement. */
 		opts[0] = '\0';
-		if (ddl->declare_fillfactor)
+
+		/* Partition pgbench_accounts table */
+		if (partitions >= 1 && strcmp(ddl->table, "pgbench_accounts") == 0)
+		{
 			snprintf(opts + strlen(opts), sizeof(opts) - strlen(opts),
-					 " with (fillfactor=%d)", fillfactor);
+					 " partition by %s (aid)",
+					 partition_method == PART_RANGE ? "range" : "hash");
+		}
+		else if (ddl->declare_fillfactor)
+			append_fillfactor(opts, sizeof(opts));
+
 		if (tablespace != NULL)
 		{
 			char	   *escape_tablespace;
@@ -3686,6 +3713,54 @@ initCreateTables(PGconn *con)
 
 		executeStatement(con, buffer);
 	}
+
+	if (partitions >= 1)
+	{
+		int64		part_size = (naccounts * (int64) scale + partitions - 1) / partitions;
+		char		ff[64];
+		ff[0] = '\0';
+		append_fillfactor(ff, sizeof(ff));
+
+		fprintf(stderr, "creating %d partitions...\n", partitions);
+
+		for (int p = 1; p <= partitions; p++)
+		{
+			char		query[256];
+
+			if (partition_method == PART_RANGE)
+			{
+				char		minvalue[32], maxvalue[32];
+
+				if (p == 1)
+					sprintf(minvalue, "MINVALUE");
+				else
+					sprintf(minvalue, INT64_FORMAT, (p-1) * part_size + 1);
+
+				if (p < partitions)
+					sprintf(maxvalue, INT64_FORMAT, p * part_size + 1);
+				else
+					sprintf(maxvalue, "MAXVALUE");
+
+				snprintf(query, sizeof(query),
+						 "create%s table pgbench_accounts_%d\n"
+						 "  partition of pgbench_accounts\n"
+						 "  for values from (%s) to (%s)%s\n",
+						 unlogged_tables ? " unlogged" : "", p,
+						 minvalue, maxvalue, ff);
+			}
+			else if (partition_method == PART_HASH)
+				snprintf(query, sizeof(query),
+						 "create%s table pgbench_accounts_%d\n"
+						 "  partition of pgbench_accounts\n"
+						 "  for values with (modulus %d, remainder %d)%s\n",
+						 unlogged_tables ? " unlogged" : "", p,
+						 partitions, p-1, ff);
+			else /* cannot get there */
+				Assert(0);
+
+			executeStatement(con, query);
+		}
+	}
 }
 
 /*
@@ -5126,6 +5201,8 @@ main(int argc, char **argv)
 		{"foreign-keys", no_argument, NULL, 8},
 		{"random-seed", required_argument, NULL, 9},
 		{"show-script", required_argument, NULL, 10},
+		{"partitions", required_argument, NULL, 11},
+		{"partition-method", required_argument, NULL, 12},
 		{NULL, 0, NULL, 0}
 	};
 
@@ -5486,6 +5563,29 @@ main(int argc, char **argv)
 					exit(0);
 				}
 				break;
+			case 11:			/* partition-number */
+				initialization_option_set = true;
+				partitions = atoi(optarg);
+				if (partitions < 0)
+				{
+					fprintf(stderr, "invalid number of partitions: \"%s\"\n",
+							optarg);
+					exit(1);
+				}
+				break;
+			case 12:			/* partition-type */
+				initialization_option_set = true;
+				if (pg_strcasecmp(optarg, "range") == 0)
+					partition_method = PART_RANGE;
+				else if (pg_strcasecmp(optarg, "hash") == 0)
+					partition_method = PART_HASH;
+				else
+				{
+					fprintf(stderr, "invalid partition type, expecting \"range\" or \"hash\","
+							" got: \"%s\"\n", optarg);
+					exit(1);
+				}
+				break;
 			default:
 				fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
 				exit(1);
diff --git a/src/bin/pgbench/t/001_pgbench_with_server.pl b/src/bin/pgbench/t/001_pgbench_with_server.pl
index b82d3f65c4..90a815ff3a 100644
--- a/src/bin/pgbench/t/001_pgbench_with_server.pl
+++ b/src/bin/pgbench/t/001_pgbench_with_server.pl
@@ -58,6 +58,18 @@ sub pgbench
 	return;
 }
 
+# tablespace for testing
+my $ts = $node->basedir . '/regress_pgbench_tap_1_ts_dir';
+mkdir $ts or die "cannot create directory $ts";
+
+# escape
+my $ets = $ts;
+$ets =~ s/'/''/;
+
+$node->safe_psql('postgres',
+	"CREATE TABLESPACE regress_pgbench_tap_1_ts LOCATION '$ets';"
+);
+
 # Test concurrent OID generation via pg_enum_oid_index.  This indirectly
 # exercises LWLock and spinlock concurrency.
 my $labels = join ',', map { "'l$_'" } 1 .. 1000;
@@ -98,30 +110,32 @@ pgbench(
 	],
 	'pgbench scale 1 initialization',);
 
-# Again, with all possible options
+# Again, with all possible options but tablespace
 pgbench(
-	'--initialize --init-steps=dtpvg --scale=1 --unlogged-tables --fillfactor=98 --foreign-keys --quiet --tablespace=pg_default --index-tablespace=pg_default',
+	'--initialize --init-steps=dtpvg --scale=1 --unlogged-tables --fillfactor=98 --foreign-keys --quiet --tablespace=regress_pgbench_tap_1_ts --index-tablespace=regress_pgbench_tap_1_ts --partitions=2 --partition-method=hash',
 	0,
 	[qr{^$}i],
 	[
 		qr{dropping old tables},
 		qr{creating tables},
+		qr{creating 2 partitions},
 		qr{vacuuming},
 		qr{creating primary keys},
 		qr{creating foreign keys},
 		qr{(?!vacuuming)}, # no vacuum
 		qr{done in \d+\.\d\d s }
 	],
-	'pgbench scale 1 initialization');
+	'pgbench scale 1 initialization with options');
 
 # Test interaction of --init-steps with legacy step-selection options
 pgbench(
-	'--initialize --init-steps=dtpvgvv --no-vacuum --foreign-keys --unlogged-tables',
+	'--initialize --init-steps=dtpvgvv --no-vacuum --foreign-keys --unlogged-tables --partitions=3',
 	0,
 	[qr{^$}],
 	[
 		qr{dropping old tables},
 		qr{creating tables},
+		qr{creating 3 partitions},
 		qr{creating primary keys},
 		qr{.* of .* tuples \(.*\) done},
 		qr{creating foreign keys},
@@ -833,7 +847,6 @@ pgbench(
 	'pgbench throttling');
 
 pgbench(
-
 	# given the expected rate and the 2 ms tx duration, at most one is executed
 	'-t 10 --rate=100000 --latency-limit=1 -n -r',
 	0,
@@ -909,6 +922,8 @@ pgbench(
 check_pgbench_logs($bdir, '001_pgbench_log_3', 1, 10, 10,
 	qr{^\d \d{1,2} \d+ \d \d+ \d+$});
 
+$node->safe_psql('postgres', 'DROP TABLESPACE regress_pgbench_tap_1_ts');
+
 # done
 $node->stop;
 done_testing();
diff --git a/src/bin/pgbench/t/002_pgbench_no_server.pl b/src/bin/pgbench/t/002_pgbench_no_server.pl
index f7fa18418b..a097c18ee6 100644
--- a/src/bin/pgbench/t/002_pgbench_no_server.pl
+++ b/src/bin/pgbench/t/002_pgbench_no_server.pl
@@ -157,6 +157,8 @@ my @options = (
 			qr{error while setting random seed from --random-seed option}
 		]
 	],
+	[ 'bad partition type', '-i --partition-method=BAD', [qr{"range"}, qr{"hash"}, qr{"BAD"}] ],
+	[ 'bad partition number', '-i --partitions -1', [ qr{invalid number of partitions: "-1"} ] ],
 
 	# logging sub-options
 	[

Asif Rehman

asifr.rehman@gmail.com

over 6 years ago

In reply to: Fabien COELHO (#5)

Re: pgbench - allow to create partitioned tables

The following review has been posted through the commitfest application:
make installcheck-world: tested, passed
Implements feature: tested, passed
Spec compliant: not tested
Documentation: not tested

Hi,

The patch looks good to me, Just one suggestion --partition-method option should be made dependent on --partitions, because it has no use unless used with --partitions. What do you think?

Regards,
Asif

The new status of this patch is: Waiting on Author

Fabien COELHO

coelho@cri.ensmp.fr

over 6 years ago

In reply to: Asif Rehman (#6)

1 attachment(s)

Re: pgbench - allow to create partitioned tables

Just one suggestion --partition-method option should be made dependent
on --partitions, because it has no use unless used with --partitions.
What do you think?

Why not. V4 attached.

--
Fabien.

Attachments:

pgbench-init-partitioned-4.patchtext/x-diff; name=pgbench-init-partitioned-4.patchDownload

diff --git a/doc/src/sgml/ref/pgbench.sgml b/doc/src/sgml/ref/pgbench.sgml
index 816f9cc4c7..38f4ac1557 100644
--- a/doc/src/sgml/ref/pgbench.sgml
+++ b/doc/src/sgml/ref/pgbench.sgml
@@ -306,6 +306,31 @@ pgbench <optional> <replaceable>options</replaceable> </optional> <replaceable>d
       </listitem>
      </varlistentry>
 
+     <varlistentry>
+      <term><option>--partitions=<replaceable>NUM</replaceable></option></term>
+      <listitem>
+       <para>
+        Create a partitioned <literal>pgbench_accounts</literal> table with
+        <replaceable>NUM</replaceable> partitions of nearly equal size for
+        the scaled number of accounts.
+        Default is <literal>0</literal>, meaning no partitioning.
+       </para>
+      </listitem>
+     </varlistentry>
+
+     <varlistentry>
+      <term><option>--partition-method=<replaceable>NAME</replaceable></option></term>
+      <listitem>
+       <para>
+        Create a partitioned <literal>pgbench_accounts</literal> table with
+        <replaceable>NAME</replaceable> method.
+        Expected values are <literal>range</literal> or <literal>hash</literal>.
+        This option requires that <option>--partitions</option> is set to non-zero.
+        If unspecified, default is <literal>range</literal>.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry>
       <term><option>--tablespace=<replaceable>tablespace</replaceable></option></term>
       <listitem>
diff --git a/src/bin/pgbench/pgbench.c b/src/bin/pgbench/pgbench.c
index 570cf3306a..6d8476af5c 100644
--- a/src/bin/pgbench/pgbench.c
+++ b/src/bin/pgbench/pgbench.c
@@ -186,6 +186,11 @@ int64		latency_limit = 0;
 char	   *tablespace = NULL;
 char	   *index_tablespace = NULL;
 
+/* partitioning for pgbench_accounts table, 0 for no partitioning */
+int 		partitions = 0;
+enum { PART_NONE, PART_RANGE, PART_HASH }
+			partition_method = PART_NONE;
+
 /* random seed used to initialize base_random_sequence */
 int64		random_seed = -1;
 
@@ -617,6 +622,9 @@ usage(void)
 		   "  --foreign-keys           create foreign key constraints between tables\n"
 		   "  --index-tablespace=TABLESPACE\n"
 		   "                           create indexes in the specified tablespace\n"
+		   "  --partitions=NUM         partition account table in NUM parts (defaults: 0)\n"
+		   "  --partition-method=(range|hash)\n"
+		   "                           partition account table with this method (default: range)\n"
 		   "  --tablespace=TABLESPACE  create tables in the specified tablespace\n"
 		   "  --unlogged-tables        create tables as unlogged tables\n"
 		   "\nOptions to select what to run:\n"
@@ -3601,6 +3609,17 @@ initDropTables(PGconn *con)
 					 "pgbench_tellers");
 }
 
+/*
+ * add fillfactor percent option if not 100.
+ */
+static void
+append_fillfactor(char *opts, int len)
+{
+	if (fillfactor < 100)
+		snprintf(opts + strlen(opts), len - strlen(opts),
+				 " with (fillfactor=%d)", fillfactor);
+}
+
 /*
  * Create pgbench's standard tables
  */
@@ -3625,6 +3644,7 @@ initCreateTables(PGconn *con)
 		const char *bigcols;	/* column decls if accountIDs are 64 bits */
 		int			declare_fillfactor;
 	};
+
 	static const struct ddlinfo DDLs[] = {
 		{
 			"pgbench_history",
@@ -3651,11 +3671,10 @@ initCreateTables(PGconn *con)
 			1
 		}
 	};
-	int			i;
 
 	fprintf(stderr, "creating tables...\n");
 
-	for (i = 0; i < lengthof(DDLs); i++)
+	for (int i = 0; i < lengthof(DDLs); i++)
 	{
 		char		opts[256];
 		char		buffer[256];
@@ -3664,9 +3683,17 @@ initCreateTables(PGconn *con)
 
 		/* Construct new create table statement. */
 		opts[0] = '\0';
-		if (ddl->declare_fillfactor)
+
+		/* Partition pgbench_accounts table */
+		if (partitions >= 1 && strcmp(ddl->table, "pgbench_accounts") == 0)
+		{
 			snprintf(opts + strlen(opts), sizeof(opts) - strlen(opts),
-					 " with (fillfactor=%d)", fillfactor);
+					 " partition by %s (aid)",
+					 partition_method == PART_RANGE ? "range" : "hash");
+		}
+		else if (ddl->declare_fillfactor)
+			append_fillfactor(opts, sizeof(opts));
+
 		if (tablespace != NULL)
 		{
 			char	   *escape_tablespace;
@@ -3686,6 +3713,54 @@ initCreateTables(PGconn *con)
 
 		executeStatement(con, buffer);
 	}
+
+	if (partitions >= 1)
+	{
+		int64		part_size = (naccounts * (int64) scale + partitions - 1) / partitions;
+		char		ff[64];
+		ff[0] = '\0';
+		append_fillfactor(ff, sizeof(ff));
+
+		fprintf(stderr, "creating %d partitions...\n", partitions);
+
+		for (int p = 1; p <= partitions; p++)
+		{
+			char		query[256];
+
+			if (partition_method == PART_RANGE)
+			{
+				char		minvalue[32], maxvalue[32];
+
+				if (p == 1)
+					sprintf(minvalue, "minvalue");
+				else
+					sprintf(minvalue, INT64_FORMAT, (p-1) * part_size + 1);
+
+				if (p < partitions)
+					sprintf(maxvalue, INT64_FORMAT, p * part_size + 1);
+				else
+					sprintf(maxvalue, "maxvalue");
+
+				snprintf(query, sizeof(query),
+						 "create%s table pgbench_accounts_%d\n"
+						 "  partition of pgbench_accounts\n"
+						 "  for values from (%s) to (%s)%s\n",
+						 unlogged_tables ? " unlogged" : "", p,
+						 minvalue, maxvalue, ff);
+			}
+			else if (partition_method == PART_HASH)
+				snprintf(query, sizeof(query),
+						 "create%s table pgbench_accounts_%d\n"
+						 "  partition of pgbench_accounts\n"
+						 "  for values with (modulus %d, remainder %d)%s\n",
+						 unlogged_tables ? " unlogged" : "", p,
+						 partitions, p-1, ff);
+			else /* cannot get there */
+				Assert(0);
+
+			executeStatement(con, query);
+		}
+	}
 }
 
 /*
@@ -5126,6 +5201,8 @@ main(int argc, char **argv)
 		{"foreign-keys", no_argument, NULL, 8},
 		{"random-seed", required_argument, NULL, 9},
 		{"show-script", required_argument, NULL, 10},
+		{"partitions", required_argument, NULL, 11},
+		{"partition-method", required_argument, NULL, 12},
 		{NULL, 0, NULL, 0}
 	};
 
@@ -5486,6 +5563,29 @@ main(int argc, char **argv)
 					exit(0);
 				}
 				break;
+			case 11:			/* partitions */
+				initialization_option_set = true;
+				partitions = atoi(optarg);
+				if (partitions < 0)
+				{
+					fprintf(stderr, "invalid number of partitions: \"%s\"\n",
+							optarg);
+					exit(1);
+				}
+				break;
+			case 12:			/* partition-method */
+				initialization_option_set = true;
+				if (pg_strcasecmp(optarg, "range") == 0)
+					partition_method = PART_RANGE;
+				else if (pg_strcasecmp(optarg, "hash") == 0)
+					partition_method = PART_HASH;
+				else
+				{
+					fprintf(stderr, "invalid partition type, expecting \"range\" or \"hash\","
+							" got: \"%s\"\n", optarg);
+					exit(1);
+				}
+				break;
 			default:
 				fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
 				exit(1);
@@ -5559,6 +5659,16 @@ main(int argc, char **argv)
 			exit(1);
 		}
 
+		if (partitions == 0 && partition_method != PART_NONE)
+		{
+			fprintf(stderr, "--partition-method requires actual partitioning with --partitions\n");
+			exit(1);
+		}
+
+		/* set default method */
+		if (partitions > 0 && partition_method == PART_NONE)
+			partition_method = PART_RANGE;
+
 		if (initialize_steps == NULL)
 			initialize_steps = pg_strdup(DEFAULT_INIT_STEPS);
 
diff --git a/src/bin/pgbench/t/001_pgbench_with_server.pl b/src/bin/pgbench/t/001_pgbench_with_server.pl
index b82d3f65c4..4028525118 100644
--- a/src/bin/pgbench/t/001_pgbench_with_server.pl
+++ b/src/bin/pgbench/t/001_pgbench_with_server.pl
@@ -58,6 +58,18 @@ sub pgbench
 	return;
 }
 
+# tablespace for testing
+my $ts = $node->basedir . '/regress_pgbench_tap_1_ts_dir';
+mkdir $ts or die "cannot create directory $ts";
+
+# escape
+my $ets = $ts;
+$ets =~ s/'/''/;
+
+$node->safe_psql('postgres',
+	"CREATE TABLESPACE regress_pgbench_tap_1_ts LOCATION '$ets';"
+);
+
 # Test concurrent OID generation via pg_enum_oid_index.  This indirectly
 # exercises LWLock and spinlock concurrency.
 my $labels = join ',', map { "'l$_'" } 1 .. 1000;
@@ -100,28 +112,30 @@ pgbench(
 
 # Again, with all possible options
 pgbench(
-	'--initialize --init-steps=dtpvg --scale=1 --unlogged-tables --fillfactor=98 --foreign-keys --quiet --tablespace=pg_default --index-tablespace=pg_default',
+	'--initialize --init-steps=dtpvg --scale=1 --unlogged-tables --fillfactor=98 --foreign-keys --quiet --tablespace=regress_pgbench_tap_1_ts --index-tablespace=regress_pgbench_tap_1_ts --partitions=2 --partition-method=hash',
 	0,
 	[qr{^$}i],
 	[
 		qr{dropping old tables},
 		qr{creating tables},
+		qr{creating 2 partitions},
 		qr{vacuuming},
 		qr{creating primary keys},
 		qr{creating foreign keys},
 		qr{(?!vacuuming)}, # no vacuum
 		qr{done in \d+\.\d\d s }
 	],
-	'pgbench scale 1 initialization');
+	'pgbench scale 1 initialization with options');
 
 # Test interaction of --init-steps with legacy step-selection options
 pgbench(
-	'--initialize --init-steps=dtpvgvv --no-vacuum --foreign-keys --unlogged-tables',
+	'--initialize --init-steps=dtpvgvv --no-vacuum --foreign-keys --unlogged-tables --partitions=3',
 	0,
 	[qr{^$}],
 	[
 		qr{dropping old tables},
 		qr{creating tables},
+		qr{creating 3 partitions},
 		qr{creating primary keys},
 		qr{.* of .* tuples \(.*\) done},
 		qr{creating foreign keys},
@@ -833,7 +847,6 @@ pgbench(
 	'pgbench throttling');
 
 pgbench(
-
 	# given the expected rate and the 2 ms tx duration, at most one is executed
 	'-t 10 --rate=100000 --latency-limit=1 -n -r',
 	0,
@@ -909,6 +922,8 @@ pgbench(
 check_pgbench_logs($bdir, '001_pgbench_log_3', 1, 10, 10,
 	qr{^\d \d{1,2} \d+ \d \d+ \d+$});
 
+$node->safe_psql('postgres', 'DROP TABLESPACE regress_pgbench_tap_1_ts');
+
 # done
 $node->stop;
 done_testing();
diff --git a/src/bin/pgbench/t/002_pgbench_no_server.pl b/src/bin/pgbench/t/002_pgbench_no_server.pl
index f7fa18418b..998d814232 100644
--- a/src/bin/pgbench/t/002_pgbench_no_server.pl
+++ b/src/bin/pgbench/t/002_pgbench_no_server.pl
@@ -157,6 +157,13 @@ my @options = (
 			qr{error while setting random seed from --random-seed option}
 		]
 	],
+	[ 'bad partition type', '-i --partition-method=BAD', [qr{"range"}, qr{"hash"}, qr{"BAD"}] ],
+	[ 'bad partition number', '-i --partitions -1', [ qr{invalid number of partitions: "-1"} ] ],
+	[
+		'partition method without partitioning',
+		'-i --partition-method=hash',
+		[ qr{partition-method requires actual partitioning} ]
+	],
 
 	# logging sub-options
 	[

Asif Rehman

asifr.rehman@gmail.com

over 6 years ago

In reply to: Fabien COELHO (#7)

Re: pgbench - allow to create partitioned tables

Thanks. All looks good, making it ready for committer.

Regards,
Asif

The new status of this patch is: Ready for Committer

Amit Kapila

amit.kapila16@gmail.com

over 6 years ago

In reply to: Fabien COELHO (#7)

Re: pgbench - allow to create partitioned tables

On Mon, Aug 26, 2019 at 11:04 PM Fabien COELHO <coelho@cri.ensmp.fr> wrote:

Just one suggestion --partition-method option should be made dependent
on --partitions, because it has no use unless used with --partitions.
What do you think?

Some comments:
*
+ case 11: /* partitions */
+ initialization_option_set = true;
+ partitions = atoi(optarg);
+ if (partitions < 0)
+ {
+ fprintf(stderr, "invalid number of partitions: \"%s\"\n",
+ optarg);
+ exit(1);
+ }
+ break;

Is there a reason why we treat "partitions = 0" as a valid value?
Also, shouldn't we keep some max limit for this parameter as well?
Forex. how realistic it will be if the user gives the value of
partitions the same or greater than the number of rows in
pgbench_accounts table? I understand it is not sensible to give such
a value, but I guess the API should behave sanely in such cases as
well. I am not sure what will be the good max value for it, but I
think there should be one. Anyone else have any better suggestions
for this?

*
@@ -3625,6 +3644,7 @@ initCreateTables(PGconn *con)
const char *bigcols; /* column decls if accountIDs are 64 bits */
int declare_fillfactor;
};
+
static const struct ddlinfo DDLs[] = {

Spurious line change.

*
+    "  --partitions=NUM         partition account table in NUM parts
(defaults: 0)\n"
+    "  --partition-method=(range|hash)\n"
+    "                           partition account table with this
method (default: range)\n"

Refer complete table name like pgbench_accounts instead of just
account. It will be clear and in sync with what we display in some
other options like --skip-some-updates.

*
+ " --partitions=NUM partition account table in NUM parts
(defaults: 0)\n"

/defaults/default.

*
I think we should print the information about partitions in
printResults. It can help users while analyzing results.

*
+enum { PART_NONE, PART_RANGE, PART_HASH }
+ partition_method = PART_NONE;
+

I think it is better to follow the style of QueryMode enum by using
typedef here, that will make look code in sync with nearby code.

*
- int i;

fprintf(stderr, "creating tables...\n");

- for (i = 0; i < lengthof(DDLs); i++)
+ for (int i = 0; i < lengthof(DDLs); i++)

This is unnecessary change as far as this patch is concerned. I
understand there is no problem in writing either way, but let's not
change the coding pattern here as part of this patch.

*
+ if (partitions >= 1)
+ {
+ int64 part_size = (naccounts * (int64) scale + partitions - 1) / partitions;
+ char ff[64];
+ ff[0] = '\0';
+ append_fillfactor(ff, sizeof(ff));
+
+ fprintf(stderr, "creating %d partitions...\n", partitions);
+
+ for (int p = 1; p <= partitions; p++)
+ {
+ char query[256];
+
+ if (partition_method == PART_RANGE)
+ {

part_size can be defined inside "if (partition_method == PART_RANGE)"
as it is used here. In general, this part of the code can use some
comments.

--
With Regards,
Amit Kapila.
EnterpriseDB: http://www.enterprisedb.com

#10

Fabien COELHO

coelho@cri.ensmp.fr

over 6 years ago

In reply to: Amit Kapila (#9)

1 attachment(s)

Re: pgbench - allow to create partitioned tables

Hello Amit,

Thanks for the feedback.

+ case 11: /* partitions */
+ initialization_option_set = true;
+ partitions = atoi(optarg);
+ if (partitions < 0)
+ {
+ fprintf(stderr, "invalid number of partitions: \"%s\"\n",
+ optarg);
+ exit(1);
+ }
+ break;

Is there a reason why we treat "partitions = 0" as a valid value?

Yes. It is an explicit "do not create partitioned tables", which differ
from 1 which says "create a partitionned table with just one partition".

Also, shouldn't we keep some max limit for this parameter as well?

I do not think so. If someone wants to test how terrible it is to use
100000 partitions, we should not prevent it.

Forex. how realistic it will be if the user gives the value of
partitions the same or greater than the number of rows in
pgbench_accounts table?

Although I agree that it does not make much sense, for testing purposes
why not, to test overheads in critical cases for instance.

I understand it is not sensible to give such a value, but I guess the
API should behave sanely in such cases as well.

Yep, it should work.

I am not sure what will be the good max value for it, but I
think there should be one.

I disagree. Pgbench is a tool for testing performance for given
parameters. If postgres accepts a parameter there is no reason why pgbench
should reject it.

@@ -3625,6 +3644,7 @@ initCreateTables(PGconn *con)
const char *bigcols; /* column decls if accountIDs are 64 bits */
int declare_fillfactor;
};
+
static const struct ddlinfo DDLs[] = {

Spurious line change.

Indeed.

*
+    "  --partitions=NUM         partition account table in NUM parts
(defaults: 0)\n"
+    "  --partition-method=(range|hash)\n"
+    "                           partition account table with this
method (default: range)\n"
Refer complete table name like pgbench_accounts instead of just account.
It will be clear and in sync with what we display in some other options
like --skip-some-updates.

Ok.

*
+ " --partitions=NUM partition account table in NUM parts
(defaults: 0)\n"

/defaults/default.

Ok.

I think we should print the information about partitions in
printResults. It can help users while analyzing results.

Hmmm. Why not, with some hocus-pocus to get the information out of
pg_catalog, and trying to fail gracefully so that if pgbench is run
against a no partitioning-support version.

*
+enum { PART_NONE, PART_RANGE, PART_HASH }
+ partition_method = PART_NONE;
+
I think it is better to follow the style of QueryMode enum by using
typedef here, that will make look code in sync with nearby code.

Hmmm. Why not. This means inventing a used-once type name for
partition_method. My great creativity lead to partition_method_t.

*
- int i;

fprintf(stderr, "creating tables...\n");
- for (i = 0; i < lengthof(DDLs); i++)
+ for (int i = 0; i < lengthof(DDLs); i++)
This is unnecessary change as far as this patch is concerned. I
understand there is no problem in writing either way, but let's not
change the coding pattern here as part of this patch.

The reason I did that is that I had a stupid bug in a development version
which was due to an accidental reuse of this index, which would have been
prevented by this declaration style. Removed anyway.

+ if (partitions >= 1)
+ {
+ int64 part_size = (naccounts * (int64) scale + partitions - 1) / partitions;
+ char ff[64];
+ ff[0] = '\0';
+ append_fillfactor(ff, sizeof(ff));
+
+ fprintf(stderr, "creating %d partitions...\n", partitions);
+
+ for (int p = 1; p <= partitions; p++)
+ {
+ char query[256];
+
+ if (partition_method == PART_RANGE)
+ {

part_size can be defined inside "if (partition_method == PART_RANGE)"
as it is used here.

I just wanted to avoid recomputing the value in the loop, but indeed it
may be computed needlessly. Moved.

In general, this part of the code can use some comments.

Ok.

Attached an updated version.

--
Fabien.

Attachments:

pgbench-init-partitioned-5.patchtext/x-diff; name=pgbench-init-partitioned-5.patchDownload

diff --git a/doc/src/sgml/ref/pgbench.sgml b/doc/src/sgml/ref/pgbench.sgml
index c857aa3cba..e3a0abb4c7 100644
--- a/doc/src/sgml/ref/pgbench.sgml
+++ b/doc/src/sgml/ref/pgbench.sgml
@@ -306,6 +306,31 @@ pgbench <optional> <replaceable>options</replaceable> </optional> <replaceable>d
       </listitem>
      </varlistentry>
 
+     <varlistentry>
+      <term><option>--partitions=<replaceable>NUM</replaceable></option></term>
+      <listitem>
+       <para>
+        Create a partitioned <literal>pgbench_accounts</literal> table with
+        <replaceable>NUM</replaceable> partitions of nearly equal size for
+        the scaled number of accounts.
+        Default is <literal>0</literal>, meaning no partitioning.
+       </para>
+      </listitem>
+     </varlistentry>
+
+     <varlistentry>
+      <term><option>--partition-method=<replaceable>NAME</replaceable></option></term>
+      <listitem>
+       <para>
+        Create a partitioned <literal>pgbench_accounts</literal> table with
+        <replaceable>NAME</replaceable> method.
+        Expected values are <literal>range</literal> or <literal>hash</literal>.
+        This option requires that <option>--partitions</option> is set to non-zero.
+        If unspecified, default is <literal>range</literal>.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry>
       <term><option>--tablespace=<replaceable>tablespace</replaceable></option></term>
       <listitem>
diff --git a/src/bin/pgbench/pgbench.c b/src/bin/pgbench/pgbench.c
index ed7652bfbf..673b175522 100644
--- a/src/bin/pgbench/pgbench.c
+++ b/src/bin/pgbench/pgbench.c
@@ -186,6 +186,14 @@ int64		latency_limit = 0;
 char	   *tablespace = NULL;
 char	   *index_tablespace = NULL;
 
+/* partitioning for pgbench_accounts table, 0 for no partitioning */
+int 		partitions = 0;
+
+typedef enum { PART_NONE, PART_RANGE, PART_HASH, PART_UNKNOWN }
+  partition_method_t;
+
+partition_method_t partition_method = PART_NONE;
+
 /* random seed used to initialize base_random_sequence */
 int64		random_seed = -1;
 
@@ -617,6 +625,9 @@ usage(void)
 		   "  --foreign-keys           create foreign key constraints between tables\n"
 		   "  --index-tablespace=TABLESPACE\n"
 		   "                           create indexes in the specified tablespace\n"
+		   "  --partitions=NUM         partition pgbench_accounts in NUM parts (default: 0)\n"
+		   "  --partition-method=(range|hash)\n"
+		   "                           partition pgbench_accounts with this method (default: range)\n"
 		   "  --tablespace=TABLESPACE  create tables in the specified tablespace\n"
 		   "  --unlogged-tables        create tables as unlogged tables\n"
 		   "\nOptions to select what to run:\n"
@@ -3601,6 +3612,17 @@ initDropTables(PGconn *con)
 					 "pgbench_tellers");
 }
 
+/*
+ * add fillfactor percent option if not 100.
+ */
+static void
+append_fillfactor(char *opts, int len)
+{
+	if (fillfactor < 100)
+		snprintf(opts + strlen(opts), len - strlen(opts),
+				 " with (fillfactor=%d)", fillfactor);
+}
+
 /*
  * Create pgbench's standard tables
  */
@@ -3664,9 +3686,18 @@ initCreateTables(PGconn *con)
 
 		/* Construct new create table statement. */
 		opts[0] = '\0';
-		if (ddl->declare_fillfactor)
+
+		/* Partition pgbench_accounts table */
+		if (partitions >= 1 && strcmp(ddl->table, "pgbench_accounts") == 0)
+		{
 			snprintf(opts + strlen(opts), sizeof(opts) - strlen(opts),
-					 " with (fillfactor=%d)", fillfactor);
+					 " partition by %s (aid)",
+					 partition_method == PART_RANGE ? "range" : "hash");
+		}
+		else if (ddl->declare_fillfactor)
+			/* fillfactor is only expected on actual tables */
+			append_fillfactor(opts, sizeof(opts));
+
 		if (tablespace != NULL)
 		{
 			char	   *escape_tablespace;
@@ -3686,6 +3717,56 @@ initCreateTables(PGconn *con)
 
 		executeStatement(con, buffer);
 	}
+
+	/* if needed, pgbench_accounts partitions must be created manually */
+	if (partitions >= 1)
+	{
+		char		ff[64];
+		ff[0] = '\0';
+		append_fillfactor(ff, sizeof(ff));
+
+		fprintf(stderr, "creating %d partitions...\n", partitions);
+
+		for (int p = 1; p <= partitions; p++)
+		{
+			char		query[256];
+
+			if (partition_method == PART_RANGE)
+			{
+				int64		part_size = (naccounts * (int64) scale + partitions - 1) / partitions;
+				char		minvalue[32], maxvalue[32];
+
+				/* For RANGE, we use open-ended partitions at the beginning and end */
+				if (p == 1)
+					sprintf(minvalue, "minvalue");
+				else
+					sprintf(minvalue, INT64_FORMAT, (p-1) * part_size + 1);
+
+				if (p < partitions)
+					sprintf(maxvalue, INT64_FORMAT, p * part_size + 1);
+				else
+					sprintf(maxvalue, "maxvalue");
+
+				snprintf(query, sizeof(query),
+						 "create%s table pgbench_accounts_%d\n"
+						 "  partition of pgbench_accounts\n"
+						 "  for values from (%s) to (%s)%s\n",
+						 unlogged_tables ? " unlogged" : "", p,
+						 minvalue, maxvalue, ff);
+			}
+			else if (partition_method == PART_HASH)
+				snprintf(query, sizeof(query),
+						 "create%s table pgbench_accounts_%d\n"
+						 "  partition of pgbench_accounts\n"
+						 "  for values with (modulus %d, remainder %d)%s\n",
+						 unlogged_tables ? " unlogged" : "", p,
+						 partitions, p-1, ff);
+			else /* cannot get there */
+				Assert(0);
+
+			executeStatement(con, query);
+		}
+	}
 }
 
 /*
@@ -4919,6 +5000,13 @@ printResults(StatsData *total, instr_time total_time,
 	printf("transaction type: %s\n",
 		   num_scripts == 1 ? sql_script[0].desc : "multiple scripts");
 	printf("scaling factor: %d\n", scale);
+	/* only print partitioning information if some partitioning was detected */
+	if (partition_method != PART_NONE && partition_method != PART_UNKNOWN)
+		printf("partition method: %s\n"
+			   "partitions: %d\n",
+			   partition_method == PART_RANGE ? "range" :
+			   partition_method == PART_HASH ? "hash" : "unexpected",
+			   partitions);
 	printf("query mode: %s\n", QUERYMODE[querymode]);
 	printf("number of clients: %d\n", nclients);
 	printf("number of threads: %d\n", nthreads);
@@ -5126,6 +5214,8 @@ main(int argc, char **argv)
 		{"foreign-keys", no_argument, NULL, 8},
 		{"random-seed", required_argument, NULL, 9},
 		{"show-script", required_argument, NULL, 10},
+		{"partitions", required_argument, NULL, 11},
+		{"partition-method", required_argument, NULL, 12},
 		{NULL, 0, NULL, 0}
 	};
 
@@ -5486,6 +5576,29 @@ main(int argc, char **argv)
 					exit(0);
 				}
 				break;
+			case 11:			/* partitions */
+				initialization_option_set = true;
+				partitions = atoi(optarg);
+				if (partitions < 0)
+				{
+					fprintf(stderr, "invalid number of partitions: \"%s\"\n",
+							optarg);
+					exit(1);
+				}
+				break;
+			case 12:			/* partition-method */
+				initialization_option_set = true;
+				if (pg_strcasecmp(optarg, "range") == 0)
+					partition_method = PART_RANGE;
+				else if (pg_strcasecmp(optarg, "hash") == 0)
+					partition_method = PART_HASH;
+				else
+				{
+					fprintf(stderr, "invalid partition type, expecting \"range\" or \"hash\","
+							" got: \"%s\"\n", optarg);
+					exit(1);
+				}
+				break;
 			default:
 				fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
 				exit(1);
@@ -5567,6 +5680,16 @@ main(int argc, char **argv)
 			exit(1);
 		}
 
+		if (partitions == 0 && partition_method != PART_NONE)
+		{
+			fprintf(stderr, "--partition-method requires actual partitioning with --partitions\n");
+			exit(1);
+		}
+
+		/* set default method */
+		if (partitions > 0 && partition_method == PART_NONE)
+			partition_method = PART_RANGE;
+
 		if (initialize_steps == NULL)
 			initialize_steps = pg_strdup(DEFAULT_INIT_STEPS);
 
@@ -5756,6 +5879,48 @@ main(int argc, char **argv)
 			fprintf(stderr,
 					"scale option ignored, using count from pgbench_branches table (%d)\n",
 					scale);
+
+		/*
+		 * Partition information. Assume no partitioning on any failure, so as
+		 * to avoid failing on an older version. We hope that there is only
+		 * one pgbench_accounts table, otherwise which one is used would depend
+		 * on search_path settings.
+		 */
+		res = PQexec(con,
+					 "select p.partstrat, count(*) "
+					 "from pg_catalog.pg_class as c "
+					 "left join pg_catalog.pg_partitioned_table as p on (p.partrelid = c.oid) "
+					 "left join pg_catalog.pg_inherits as i on (c.oid = i.inhparent) "
+					 "where c.relname = 'pgbench_accounts' "
+					 "group by 1, c.oid");
+		if (PQresultStatus(res) != PGRES_TUPLES_OK)
+		{
+			/* probably an older version, coldly assume no partitioning */
+			partition_method = PART_NONE;
+			partitions = 0;
+		}
+		else if (PQntuples(res) != 1)
+		{
+			/* unsure because multiple pgbench_accounts found */
+			partition_method = PART_UNKNOWN;
+			partitions = 0;
+		}
+		else
+		{
+			char *ps = PQgetvalue(res, 0, 0);
+
+			if (ps == NULL)
+				partition_method = PART_NONE;
+			else if (strcmp(ps, "r") == 0)
+				partition_method = PART_RANGE;
+			else if (strcmp(ps, "h") == 0)
+				partition_method = PART_HASH;
+			else /* whatever */
+				partition_method = PART_NONE;
+
+			partitions = atoi(PQgetvalue(res, 0, 1));
+		}
+		PQclear(res);
 	}
 
 	/*
diff --git a/src/bin/pgbench/t/001_pgbench_with_server.pl b/src/bin/pgbench/t/001_pgbench_with_server.pl
index b82d3f65c4..4028525118 100644
--- a/src/bin/pgbench/t/001_pgbench_with_server.pl
+++ b/src/bin/pgbench/t/001_pgbench_with_server.pl
@@ -58,6 +58,18 @@ sub pgbench
 	return;
 }
 
+# tablespace for testing
+my $ts = $node->basedir . '/regress_pgbench_tap_1_ts_dir';
+mkdir $ts or die "cannot create directory $ts";
+
+# escape
+my $ets = $ts;
+$ets =~ s/'/''/;
+
+$node->safe_psql('postgres',
+	"CREATE TABLESPACE regress_pgbench_tap_1_ts LOCATION '$ets';"
+);
+
 # Test concurrent OID generation via pg_enum_oid_index.  This indirectly
 # exercises LWLock and spinlock concurrency.
 my $labels = join ',', map { "'l$_'" } 1 .. 1000;
@@ -100,28 +112,30 @@ pgbench(
 
 # Again, with all possible options
 pgbench(
-	'--initialize --init-steps=dtpvg --scale=1 --unlogged-tables --fillfactor=98 --foreign-keys --quiet --tablespace=pg_default --index-tablespace=pg_default',
+	'--initialize --init-steps=dtpvg --scale=1 --unlogged-tables --fillfactor=98 --foreign-keys --quiet --tablespace=regress_pgbench_tap_1_ts --index-tablespace=regress_pgbench_tap_1_ts --partitions=2 --partition-method=hash',
 	0,
 	[qr{^$}i],
 	[
 		qr{dropping old tables},
 		qr{creating tables},
+		qr{creating 2 partitions},
 		qr{vacuuming},
 		qr{creating primary keys},
 		qr{creating foreign keys},
 		qr{(?!vacuuming)}, # no vacuum
 		qr{done in \d+\.\d\d s }
 	],
-	'pgbench scale 1 initialization');
+	'pgbench scale 1 initialization with options');
 
 # Test interaction of --init-steps with legacy step-selection options
 pgbench(
-	'--initialize --init-steps=dtpvgvv --no-vacuum --foreign-keys --unlogged-tables',
+	'--initialize --init-steps=dtpvgvv --no-vacuum --foreign-keys --unlogged-tables --partitions=3',
 	0,
 	[qr{^$}],
 	[
 		qr{dropping old tables},
 		qr{creating tables},
+		qr{creating 3 partitions},
 		qr{creating primary keys},
 		qr{.* of .* tuples \(.*\) done},
 		qr{creating foreign keys},
@@ -833,7 +847,6 @@ pgbench(
 	'pgbench throttling');
 
 pgbench(
-
 	# given the expected rate and the 2 ms tx duration, at most one is executed
 	'-t 10 --rate=100000 --latency-limit=1 -n -r',
 	0,
@@ -909,6 +922,8 @@ pgbench(
 check_pgbench_logs($bdir, '001_pgbench_log_3', 1, 10, 10,
 	qr{^\d \d{1,2} \d+ \d \d+ \d+$});
 
+$node->safe_psql('postgres', 'DROP TABLESPACE regress_pgbench_tap_1_ts');
+
 # done
 $node->stop;
 done_testing();
diff --git a/src/bin/pgbench/t/002_pgbench_no_server.pl b/src/bin/pgbench/t/002_pgbench_no_server.pl
index f7fa18418b..998d814232 100644
--- a/src/bin/pgbench/t/002_pgbench_no_server.pl
+++ b/src/bin/pgbench/t/002_pgbench_no_server.pl
@@ -157,6 +157,13 @@ my @options = (
 			qr{error while setting random seed from --random-seed option}
 		]
 	],
+	[ 'bad partition type', '-i --partition-method=BAD', [qr{"range"}, qr{"hash"}, qr{"BAD"}] ],
+	[ 'bad partition number', '-i --partitions -1', [ qr{invalid number of partitions: "-1"} ] ],
+	[
+		'partition method without partitioning',
+		'-i --partition-method=hash',
+		[ qr{partition-method requires actual partitioning} ]
+	],
 
 	# logging sub-options
 	[

#11

Dilip Kumar

dilipbalaut@gmail.com

over 6 years ago

In reply to: Fabien COELHO (#10)

Re: pgbench - allow to create partitioned tables

On Wed, Sep 11, 2019 at 6:08 PM Fabien COELHO <coelho@cri.ensmp.fr> wrote:

Attached an updated version.

I have reviewed the patch and done some basic testing. It works as
per the expectation

I have a few cosmetic comments

1.
+ if (partitions >= 1)
+ {
+ char ff[64];
+ ff[0] = '\0';
+ append_fillfactor(ff, sizeof(ff));

Generally, we give one blank line between the variable declaration and
the first statement of the block.

2.
+ if (p == 1)
+ sprintf(minvalue, "minvalue");
+ else
+ sprintf(minvalue, INT64_FORMAT, (p-1) * part_size + 1);

(p-1) -> (p - 1)

I am just wondering will it be a good idea to expand it to support
multi-level partitioning?

--
Regards,
Dilip Kumar
EnterpriseDB: http://www.enterprisedb.com

#12

Amit Kapila

amit.kapila16@gmail.com

over 6 years ago

In reply to: Fabien COELHO (#10)

Re: pgbench - allow to create partitioned tables

On Wed, Sep 11, 2019 at 6:08 PM Fabien COELHO <coelho@cri.ensmp.fr> wrote:

I would like to take inputs from others as well for the display part
of this patch. After this patch, for a simple-update pgbench test,
the changed output will be as follows (note: partition method and
partitions):
pgbench.exe -c 4 -j 4 -T 10 -N postgres
starting vacuum...end.
transaction type: <builtin: simple update>
scaling factor: 1
partition method: hash
partitions: 3
query mode: simple
number of clients: 4
number of threads: 4
duration: 10 s
number of transactions actually processed: 14563
latency average = 2.749 ms
tps = 1454.899150 (including connections establishing)
tps = 1466.689412 (excluding connections establishing)

What do others think about this? This will be the case when the user
has used --partitions option in pgbench, otherwise, it won't change.

+ case 11: /* partitions */
+ initialization_option_set = true;
+ partitions = atoi(optarg);
+ if (partitions < 0)
+ {
+ fprintf(stderr, "invalid number of partitions: \"%s\"\n",
+ optarg);
+ exit(1);
+ }
+ break;
Is there a reason why we treat "partitions = 0" as a valid value?
Yes. It is an explicit "do not create partitioned tables", which differ
from 1 which says "create a partitionned table with just one partition".

Why would anyone want to use --partitions option in the first case
("do not create partitioned tables")?

I think we should print the information about partitions in
printResults. It can help users while analyzing results.

Hmmm. Why not, with some hocus-pocus to get the information out of
pg_catalog, and trying to fail gracefully so that if pgbench is run
against a no partitioning-support version.

+ res = PQexec(con,
+ "select p.partstrat, count(*) "
+ "from pg_catalog.pg_class as c "
+ "left join pg_catalog.pg_partitioned_table as p on (p.partrelid = c.oid) "
+ "left join pg_catalog.pg_inherits as i on (c.oid = i.inhparent) "
+ "where c.relname = 'pgbench_accounts' "
+ "group by 1, c.oid");

Can't we write this query with inner join instead of left join? What
additional purpose you are trying to serve by using left join?

*
+enum { PART_NONE, PART_RANGE, PART_HASH }
+ partition_method = PART_NONE;
+
I think it is better to follow the style of QueryMode enum by using
typedef here, that will make look code in sync with nearby code.
Hmmm. Why not. This means inventing a used-once type name for
partition_method. My great creativity lead to partition_method_t.

+partition_method_t partition_method = PART_NONE;

It is better to make this static.

--
With Regards,
Amit Kapila.
EnterpriseDB: http://www.enterprisedb.com

#13

Fabien COELHO

coelho@cri.ensmp.fr

over 6 years ago

In reply to: Dilip Kumar (#11)

1 attachment(s)

Re: pgbench - allow to create partitioned tables

Hello Dilip,

Generally, we give one blank line between the variable declaration and
the first statement of the block.

Ok.

(p-1) -> (p - 1)

Ok.

I am just wondering will it be a good idea to expand it to support
multi-level partitioning?

ISTM that how the user could specify multi-level parameters is pretty
unclear, so I would let that as a possible extension if someone wants it
enough.

Attached v6 implements the two cosmetic changes outlined above.

--
Fabien.

Attachments:

pgbench-init-partitioned-6.patchtext/x-diff; name=pgbench-init-partitioned-6.patchDownload

diff --git a/doc/src/sgml/ref/pgbench.sgml b/doc/src/sgml/ref/pgbench.sgml
index c857aa3cba..e3a0abb4c7 100644
--- a/doc/src/sgml/ref/pgbench.sgml
+++ b/doc/src/sgml/ref/pgbench.sgml
@@ -306,6 +306,31 @@ pgbench <optional> <replaceable>options</replaceable> </optional> <replaceable>d
       </listitem>
      </varlistentry>
 
+     <varlistentry>
+      <term><option>--partitions=<replaceable>NUM</replaceable></option></term>
+      <listitem>
+       <para>
+        Create a partitioned <literal>pgbench_accounts</literal> table with
+        <replaceable>NUM</replaceable> partitions of nearly equal size for
+        the scaled number of accounts.
+        Default is <literal>0</literal>, meaning no partitioning.
+       </para>
+      </listitem>
+     </varlistentry>
+
+     <varlistentry>
+      <term><option>--partition-method=<replaceable>NAME</replaceable></option></term>
+      <listitem>
+       <para>
+        Create a partitioned <literal>pgbench_accounts</literal> table with
+        <replaceable>NAME</replaceable> method.
+        Expected values are <literal>range</literal> or <literal>hash</literal>.
+        This option requires that <option>--partitions</option> is set to non-zero.
+        If unspecified, default is <literal>range</literal>.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry>
       <term><option>--tablespace=<replaceable>tablespace</replaceable></option></term>
       <listitem>
diff --git a/src/bin/pgbench/pgbench.c b/src/bin/pgbench/pgbench.c
index ed7652bfbf..2b9fd07561 100644
--- a/src/bin/pgbench/pgbench.c
+++ b/src/bin/pgbench/pgbench.c
@@ -186,6 +186,14 @@ int64		latency_limit = 0;
 char	   *tablespace = NULL;
 char	   *index_tablespace = NULL;
 
+/* partitioning for pgbench_accounts table, 0 for no partitioning */
+int 		partitions = 0;
+
+typedef enum { PART_NONE, PART_RANGE, PART_HASH, PART_UNKNOWN }
+  partition_method_t;
+
+partition_method_t partition_method = PART_NONE;
+
 /* random seed used to initialize base_random_sequence */
 int64		random_seed = -1;
 
@@ -617,6 +625,9 @@ usage(void)
 		   "  --foreign-keys           create foreign key constraints between tables\n"
 		   "  --index-tablespace=TABLESPACE\n"
 		   "                           create indexes in the specified tablespace\n"
+		   "  --partitions=NUM         partition pgbench_accounts in NUM parts (default: 0)\n"
+		   "  --partition-method=(range|hash)\n"
+		   "                           partition pgbench_accounts with this method (default: range)\n"
 		   "  --tablespace=TABLESPACE  create tables in the specified tablespace\n"
 		   "  --unlogged-tables        create tables as unlogged tables\n"
 		   "\nOptions to select what to run:\n"
@@ -3601,6 +3612,17 @@ initDropTables(PGconn *con)
 					 "pgbench_tellers");
 }
 
+/*
+ * add fillfactor percent option if not 100.
+ */
+static void
+append_fillfactor(char *opts, int len)
+{
+	if (fillfactor < 100)
+		snprintf(opts + strlen(opts), len - strlen(opts),
+				 " with (fillfactor=%d)", fillfactor);
+}
+
 /*
  * Create pgbench's standard tables
  */
@@ -3664,9 +3686,18 @@ initCreateTables(PGconn *con)
 
 		/* Construct new create table statement. */
 		opts[0] = '\0';
-		if (ddl->declare_fillfactor)
+
+		/* Partition pgbench_accounts table */
+		if (partitions >= 1 && strcmp(ddl->table, "pgbench_accounts") == 0)
+		{
 			snprintf(opts + strlen(opts), sizeof(opts) - strlen(opts),
-					 " with (fillfactor=%d)", fillfactor);
+					 " partition by %s (aid)",
+					 partition_method == PART_RANGE ? "range" : "hash");
+		}
+		else if (ddl->declare_fillfactor)
+			/* fillfactor is only expected on actual tables */
+			append_fillfactor(opts, sizeof(opts));
+
 		if (tablespace != NULL)
 		{
 			char	   *escape_tablespace;
@@ -3686,6 +3717,57 @@ initCreateTables(PGconn *con)
 
 		executeStatement(con, buffer);
 	}
+
+	/* if needed, pgbench_accounts partitions must be created manually */
+	if (partitions >= 1)
+	{
+		char		ff[64];
+
+		ff[0] = '\0';
+		append_fillfactor(ff, sizeof(ff));
+
+		fprintf(stderr, "creating %d partitions...\n", partitions);
+
+		for (int p = 1; p <= partitions; p++)
+		{
+			char		query[256];
+
+			if (partition_method == PART_RANGE)
+			{
+				int64		part_size = (naccounts * (int64) scale + partitions - 1) / partitions;
+				char		minvalue[32], maxvalue[32];
+
+				/* For RANGE, we use open-ended partitions at the beginning and end */
+				if (p == 1)
+					sprintf(minvalue, "minvalue");
+				else
+					sprintf(minvalue, INT64_FORMAT, (p - 1) * part_size + 1);
+
+				if (p < partitions)
+					sprintf(maxvalue, INT64_FORMAT, p * part_size + 1);
+				else
+					sprintf(maxvalue, "maxvalue");
+
+				snprintf(query, sizeof(query),
+						 "create%s table pgbench_accounts_%d\n"
+						 "  partition of pgbench_accounts\n"
+						 "  for values from (%s) to (%s)%s\n",
+						 unlogged_tables ? " unlogged" : "", p,
+						 minvalue, maxvalue, ff);
+			}
+			else if (partition_method == PART_HASH)
+				snprintf(query, sizeof(query),
+						 "create%s table pgbench_accounts_%d\n"
+						 "  partition of pgbench_accounts\n"
+						 "  for values with (modulus %d, remainder %d)%s\n",
+						 unlogged_tables ? " unlogged" : "", p,
+						 partitions, p-1, ff);
+			else /* cannot get there */
+				Assert(0);
+
+			executeStatement(con, query);
+		}
+	}
 }
 
 /*
@@ -4919,6 +5001,13 @@ printResults(StatsData *total, instr_time total_time,
 	printf("transaction type: %s\n",
 		   num_scripts == 1 ? sql_script[0].desc : "multiple scripts");
 	printf("scaling factor: %d\n", scale);
+	/* only print partitioning information if some partitioning was detected */
+	if (partition_method != PART_NONE && partition_method != PART_UNKNOWN)
+		printf("partition method: %s\n"
+			   "partitions: %d\n",
+			   partition_method == PART_RANGE ? "range" :
+			   partition_method == PART_HASH ? "hash" : "unexpected",
+			   partitions);
 	printf("query mode: %s\n", QUERYMODE[querymode]);
 	printf("number of clients: %d\n", nclients);
 	printf("number of threads: %d\n", nthreads);
@@ -5126,6 +5215,8 @@ main(int argc, char **argv)
 		{"foreign-keys", no_argument, NULL, 8},
 		{"random-seed", required_argument, NULL, 9},
 		{"show-script", required_argument, NULL, 10},
+		{"partitions", required_argument, NULL, 11},
+		{"partition-method", required_argument, NULL, 12},
 		{NULL, 0, NULL, 0}
 	};
 
@@ -5486,6 +5577,29 @@ main(int argc, char **argv)
 					exit(0);
 				}
 				break;
+			case 11:			/* partitions */
+				initialization_option_set = true;
+				partitions = atoi(optarg);
+				if (partitions < 0)
+				{
+					fprintf(stderr, "invalid number of partitions: \"%s\"\n",
+							optarg);
+					exit(1);
+				}
+				break;
+			case 12:			/* partition-method */
+				initialization_option_set = true;
+				if (pg_strcasecmp(optarg, "range") == 0)
+					partition_method = PART_RANGE;
+				else if (pg_strcasecmp(optarg, "hash") == 0)
+					partition_method = PART_HASH;
+				else
+				{
+					fprintf(stderr, "invalid partition type, expecting \"range\" or \"hash\","
+							" got: \"%s\"\n", optarg);
+					exit(1);
+				}
+				break;
 			default:
 				fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
 				exit(1);
@@ -5567,6 +5681,16 @@ main(int argc, char **argv)
 			exit(1);
 		}
 
+		if (partitions == 0 && partition_method != PART_NONE)
+		{
+			fprintf(stderr, "--partition-method requires actual partitioning with --partitions\n");
+			exit(1);
+		}
+
+		/* set default method */
+		if (partitions > 0 && partition_method == PART_NONE)
+			partition_method = PART_RANGE;
+
 		if (initialize_steps == NULL)
 			initialize_steps = pg_strdup(DEFAULT_INIT_STEPS);
 
@@ -5756,6 +5880,48 @@ main(int argc, char **argv)
 			fprintf(stderr,
 					"scale option ignored, using count from pgbench_branches table (%d)\n",
 					scale);
+
+		/*
+		 * Partition information. Assume no partitioning on any failure, so as
+		 * to avoid failing on an older version. We hope that there is only
+		 * one pgbench_accounts table, otherwise which one is used would depend
+		 * on search_path settings.
+		 */
+		res = PQexec(con,
+					 "select p.partstrat, count(*) "
+					 "from pg_catalog.pg_class as c "
+					 "left join pg_catalog.pg_partitioned_table as p on (p.partrelid = c.oid) "
+					 "left join pg_catalog.pg_inherits as i on (c.oid = i.inhparent) "
+					 "where c.relname = 'pgbench_accounts' "
+					 "group by 1, c.oid");
+		if (PQresultStatus(res) != PGRES_TUPLES_OK)
+		{
+			/* probably an older version, coldly assume no partitioning */
+			partition_method = PART_NONE;
+			partitions = 0;
+		}
+		else if (PQntuples(res) != 1)
+		{
+			/* unsure because multiple pgbench_accounts found */
+			partition_method = PART_UNKNOWN;
+			partitions = 0;
+		}
+		else
+		{
+			char *ps = PQgetvalue(res, 0, 0);
+
+			if (ps == NULL)
+				partition_method = PART_NONE;
+			else if (strcmp(ps, "r") == 0)
+				partition_method = PART_RANGE;
+			else if (strcmp(ps, "h") == 0)
+				partition_method = PART_HASH;
+			else /* whatever */
+				partition_method = PART_NONE;
+
+			partitions = atoi(PQgetvalue(res, 0, 1));
+		}
+		PQclear(res);
 	}
 
 	/*
diff --git a/src/bin/pgbench/t/001_pgbench_with_server.pl b/src/bin/pgbench/t/001_pgbench_with_server.pl
index b82d3f65c4..4028525118 100644
--- a/src/bin/pgbench/t/001_pgbench_with_server.pl
+++ b/src/bin/pgbench/t/001_pgbench_with_server.pl
@@ -58,6 +58,18 @@ sub pgbench
 	return;
 }
 
+# tablespace for testing
+my $ts = $node->basedir . '/regress_pgbench_tap_1_ts_dir';
+mkdir $ts or die "cannot create directory $ts";
+
+# escape
+my $ets = $ts;
+$ets =~ s/'/''/;
+
+$node->safe_psql('postgres',
+	"CREATE TABLESPACE regress_pgbench_tap_1_ts LOCATION '$ets';"
+);
+
 # Test concurrent OID generation via pg_enum_oid_index.  This indirectly
 # exercises LWLock and spinlock concurrency.
 my $labels = join ',', map { "'l$_'" } 1 .. 1000;
@@ -100,28 +112,30 @@ pgbench(
 
 # Again, with all possible options
 pgbench(
-	'--initialize --init-steps=dtpvg --scale=1 --unlogged-tables --fillfactor=98 --foreign-keys --quiet --tablespace=pg_default --index-tablespace=pg_default',
+	'--initialize --init-steps=dtpvg --scale=1 --unlogged-tables --fillfactor=98 --foreign-keys --quiet --tablespace=regress_pgbench_tap_1_ts --index-tablespace=regress_pgbench_tap_1_ts --partitions=2 --partition-method=hash',
 	0,
 	[qr{^$}i],
 	[
 		qr{dropping old tables},
 		qr{creating tables},
+		qr{creating 2 partitions},
 		qr{vacuuming},
 		qr{creating primary keys},
 		qr{creating foreign keys},
 		qr{(?!vacuuming)}, # no vacuum
 		qr{done in \d+\.\d\d s }
 	],
-	'pgbench scale 1 initialization');
+	'pgbench scale 1 initialization with options');
 
 # Test interaction of --init-steps with legacy step-selection options
 pgbench(
-	'--initialize --init-steps=dtpvgvv --no-vacuum --foreign-keys --unlogged-tables',
+	'--initialize --init-steps=dtpvgvv --no-vacuum --foreign-keys --unlogged-tables --partitions=3',
 	0,
 	[qr{^$}],
 	[
 		qr{dropping old tables},
 		qr{creating tables},
+		qr{creating 3 partitions},
 		qr{creating primary keys},
 		qr{.* of .* tuples \(.*\) done},
 		qr{creating foreign keys},
@@ -833,7 +847,6 @@ pgbench(
 	'pgbench throttling');
 
 pgbench(
-
 	# given the expected rate and the 2 ms tx duration, at most one is executed
 	'-t 10 --rate=100000 --latency-limit=1 -n -r',
 	0,
@@ -909,6 +922,8 @@ pgbench(
 check_pgbench_logs($bdir, '001_pgbench_log_3', 1, 10, 10,
 	qr{^\d \d{1,2} \d+ \d \d+ \d+$});
 
+$node->safe_psql('postgres', 'DROP TABLESPACE regress_pgbench_tap_1_ts');
+
 # done
 $node->stop;
 done_testing();
diff --git a/src/bin/pgbench/t/002_pgbench_no_server.pl b/src/bin/pgbench/t/002_pgbench_no_server.pl
index f7fa18418b..998d814232 100644
--- a/src/bin/pgbench/t/002_pgbench_no_server.pl
+++ b/src/bin/pgbench/t/002_pgbench_no_server.pl
@@ -157,6 +157,13 @@ my @options = (
 			qr{error while setting random seed from --random-seed option}
 		]
 	],
+	[ 'bad partition type', '-i --partition-method=BAD', [qr{"range"}, qr{"hash"}, qr{"BAD"}] ],
+	[ 'bad partition number', '-i --partitions -1', [ qr{invalid number of partitions: "-1"} ] ],
+	[
+		'partition method without partitioning',
+		'-i --partition-method=hash',
+		[ qr{partition-method requires actual partitioning} ]
+	],
 
 	# logging sub-options
 	[

#14

Dilip Kumar

dilipbalaut@gmail.com

over 6 years ago

In reply to: Fabien COELHO (#13)

Re: pgbench - allow to create partitioned tables

On Fri, Sep 13, 2019 at 1:35 PM Fabien COELHO <coelho@cri.ensmp.fr> wrote:

Thanks for the updated version of the patch.

Generally, we give one blank line between the variable declaration and
the first statement of the block.

Ok.

(p-1) -> (p - 1)

Ok.

I am just wondering will it be a good idea to expand it to support
multi-level partitioning?

ISTM that how the user could specify multi-level parameters is pretty
unclear, so I would let that as a possible extension if someone wants it
enough.

Attached v6 implements the two cosmetic changes outlined above.

+ /* For RANGE, we use open-ended partitions at the beginning and end */
+ if (p == 1)
+ sprintf(minvalue, "minvalue");
+ else
+ sprintf(minvalue, INT64_FORMAT, (p-1) * part_size + 1);
+
+ if (p < partitions)
+ sprintf(maxvalue, INT64_FORMAT, p * part_size + 1);
+ else
+ sprintf(maxvalue, "maxvalue");

I do not understand the reason why first partition need to be
open-ended? Because we are clear that the minimum value of the aid is
1 in pgbench_accout. So if you directly use
sprintf(minvalue, INT64_FORMAT, (p-1) * part_size + 1); then also it
will give 1 as minvalue for the first partition and that will be the
right thing to do. Am I missing something here?

--
Regards,
Dilip Kumar
EnterpriseDB: http://www.enterprisedb.com

#15

Fabien COELHO

coelho@cri.ensmp.fr

over 6 years ago

In reply to: Amit Kapila (#12)

Re: pgbench - allow to create partitioned tables

Hello Amit,

Is there a reason why we treat "partitions = 0" as a valid value?

Yes. It is an explicit "do not create partitioned tables", which differ
from 1 which says "create a partitionned table with just one partition".

Why would anyone want to use --partitions option in the first case
("do not create partitioned tables")?

Having an explicit value for the default is generally a good idea, eg for
a script to tests various partitioning settings:

for nparts in 0 1 2 3 4 5 6 7 8 9 ; do
pgbench -i --partitions=$nparts ... ;
...
done

Otherwise you would need significant kludging to add/remove the option.
Allowing 0 does not harm anyone.

Now if the consensus is to remove an explicit 0, it is simple enough to
change it, but my opinion is that it is better to have it.

I think we should print the information about partitions in
printResults. It can help users while analyzing results.
+ res = PQexec(con,
+ "select p.partstrat, count(*) "
+ "from pg_catalog.pg_class as c "
+ "left join pg_catalog.pg_partitioned_table as p on (p.partrelid = c.oid) "
+ "left join pg_catalog.pg_inherits as i on (c.oid = i.inhparent) "
+ "where c.relname = 'pgbench_accounts' "
+ "group by 1, c.oid");
Can't we write this query with inner join instead of left join? What
additional purpose you are trying to serve by using left join?

I'm ensuring that there is always a one line answer, whether it is
partitioned or not. Maybe the count(*) should be count(something in p) to
get 0 instead of 1 on non partitioned tables, though, but this is hidden
in the display anyway.

+partition_method_t partition_method = PART_NONE;

It is better to make this static.

I do agree, but this would depart from all other global variables around
which are currently not static. Maybe a separate patch could turn them all
as static, but ISTM that this patch should not change the current style.

--
Fabien.

#16

Fabien COELHO

coelho@cri.ensmp.fr

over 6 years ago

In reply to: Dilip Kumar (#14)

Re: pgbench - allow to create partitioned tables

Hello Dilip,

+ /* For RANGE, we use open-ended partitions at the beginning and end */
+ if (p == 1)
+ sprintf(minvalue, "minvalue");
+ else
+ sprintf(minvalue, INT64_FORMAT, (p-1) * part_size + 1);
+
+ if (p < partitions)
+ sprintf(maxvalue, INT64_FORMAT, p * part_size + 1);
+ else
+ sprintf(maxvalue, "maxvalue");
I do not understand the reason why first partition need to be
open-ended? Because we are clear that the minimum value of the aid is 1
in pgbench_accout. So if you directly use sprintf(minvalue,
INT64_FORMAT, (p-1) * part_size + 1); then also it will give 1 as
minvalue for the first partition and that will be the right thing to do.
Am I missing something here?

This is simply for the principle that any value allowed for the primary
key type has a corresponding partition, and also that it exercices these
special values.

It also probably reduces the cost of checking whether a value belongs to
the first partition because one test is removed, so there is a small
additional performance benefit beyond principle and coverage.

--
Fabien.

#17

Dilip Kumar

dilipbalaut@gmail.com

over 6 years ago

In reply to: Fabien COELHO (#16)

Re: pgbench - allow to create partitioned tables

On Fri, Sep 13, 2019 at 2:05 PM Fabien COELHO <coelho@cri.ensmp.fr> wrote:

Hello Dilip,
+ /* For RANGE, we use open-ended partitions at the beginning and end */
+ if (p == 1)
+ sprintf(minvalue, "minvalue");
+ else
+ sprintf(minvalue, INT64_FORMAT, (p-1) * part_size + 1);
+
+ if (p < partitions)
+ sprintf(maxvalue, INT64_FORMAT, p * part_size + 1);
+ else
+ sprintf(maxvalue, "maxvalue");
I do not understand the reason why first partition need to be
open-ended? Because we are clear that the minimum value of the aid is 1
in pgbench_accout. So if you directly use sprintf(minvalue,
INT64_FORMAT, (p-1) * part_size + 1); then also it will give 1 as
minvalue for the first partition and that will be the right thing to do.
Am I missing something here?

This is simply for the principle that any value allowed for the primary
key type has a corresponding partition, and also that it exercices these
special values.

IMHO, the primary key values for the pgbench_accout tables are always
within the defined range minvalue=1 and maxvalue=scale*100000, right?

It also probably reduces the cost of checking whether a value belongs to
the first partition because one test is removed, so there is a small
additional performance benefit beyond principle and coverage.

Ok, I agree that it will slightly reduce the cost for the tuple
falling in the first and the last partition.

--
Regards,
Dilip Kumar
EnterpriseDB: http://www.enterprisedb.com

#18

Amit Kapila

amit.kapila16@gmail.com

over 6 years ago

In reply to: Fabien COELHO (#15)

Re: pgbench - allow to create partitioned tables

On Fri, Sep 13, 2019 at 1:50 PM Fabien COELHO <coelho@cri.ensmp.fr> wrote:

Is there a reason why we treat "partitions = 0" as a valid value?

Yes. It is an explicit "do not create partitioned tables", which differ
from 1 which says "create a partitionned table with just one partition".

Why would anyone want to use --partitions option in the first case
("do not create partitioned tables")?

Having an explicit value for the default is generally a good idea, eg for
a script to tests various partitioning settings:

for nparts in 0 1 2 3 4 5 6 7 8 9 ; do
pgbench -i --partitions=$nparts ... ;
...
done

Otherwise you would need significant kludging to add/remove the option.
Allowing 0 does not harm anyone.

Now if the consensus is to remove an explicit 0, it is simple enough to
change it, but my opinion is that it is better to have it.

Fair enough, let us see if anyone else wants to weigh in.

I think we should print the information about partitions in
printResults. It can help users while analyzing results.
+ res = PQexec(con,
+ "select p.partstrat, count(*) "
+ "from pg_catalog.pg_class as c "
+ "left join pg_catalog.pg_partitioned_table as p on (p.partrelid = c.oid) "
+ "left join pg_catalog.pg_inherits as i on (c.oid = i.inhparent) "
+ "where c.relname = 'pgbench_accounts' "
+ "group by 1, c.oid");
Can't we write this query with inner join instead of left join? What
additional purpose you are trying to serve by using left join?
I'm ensuring that there is always a one line answer, whether it is
partitioned or not. Maybe the count(*) should be count(something in p) to
get 0 instead of 1 on non partitioned tables, though, but this is hidden
in the display anyway.

Sure, but I feel the code will be simplified. I see no reason for
using left join here.

+partition_method_t partition_method = PART_NONE;

It is better to make this static.

I do agree, but this would depart from all other global variables around
which are currently not static.

Check QueryMode.

Maybe a separate patch could turn them all
as static, but ISTM that this patch should not change the current style.

No need to change others, but we can do it for this one.

--
With Regards,
Amit Kapila.
EnterpriseDB: http://www.enterprisedb.com

#19

Alvaro Herrera

alvherre@2ndquadrant.com

over 6 years ago

In reply to: Amit Kapila (#18)

Re: pgbench - allow to create partitioned tables

On 2019-Sep-13, Amit Kapila wrote:

On Fri, Sep 13, 2019 at 1:50 PM Fabien COELHO <coelho@cri.ensmp.fr> wrote:

Is there a reason why we treat "partitions = 0" as a valid value?

Yes. It is an explicit "do not create partitioned tables", which differ
from 1 which says "create a partitionned table with just one partition".

Why would anyone want to use --partitions option in the first case
("do not create partitioned tables")?

Having an explicit value for the default is generally a good idea, eg for
a script to tests various partitioning settings:

for nparts in 0 1 2 3 4 5 6 7 8 9 ; do
pgbench -i --partitions=$nparts ... ;
...
done

Otherwise you would need significant kludging to add/remove the option.
Allowing 0 does not harm anyone.

Now if the consensus is to remove an explicit 0, it is simple enough to
change it, but my opinion is that it is better to have it.

Fair enough, let us see if anyone else wants to weigh in.

It seems convenient UI -- I vote to keep it.

--
ï¿½lvaro Herrera https://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services

#20

Alvaro Herrera

alvherre@2ndquadrant.com

over 6 years ago

In reply to: Amit Kapila (#12)

Re: pgbench - allow to create partitioned tables

On 2019-Sep-13, Amit Kapila wrote:

I would like to take inputs from others as well for the display part
of this patch. After this patch, for a simple-update pgbench test,
the changed output will be as follows (note: partition method and
partitions):

pgbench.exe -c 4 -j 4 -T 10 -N postgres
starting vacuum...end.
transaction type: <builtin: simple update>
scaling factor: 1
partition method: hash
partitions: 3
query mode: simple
number of clients: 4
number of threads: 4
duration: 10 s
number of transactions actually processed: 14563
latency average = 2.749 ms
tps = 1454.899150 (including connections establishing)
tps = 1466.689412 (excluding connections establishing)

What do others think about this? This will be the case when the user
has used --partitions option in pgbench, otherwise, it won't change.

I wonder what's the intended usage of this output ... it seems to be
getting a bit too long. Is this intended for machine processing? I
would rather have more things per line in a more compact header.
But then I'm not the kind of person who automates multiple pgbench runs.
Maybe we can get some input from Tomas, who does -- how do you automate
extracting data from collected pgbench output, or do you instead just
redirect the output to a file whose path/name indicates the parameters
that were used? (I do the latter.)

I mean, if we changed it like this (and I'm not proposing to do it in
this patch, this is only an example), would it bother anyone?

$ pgbench -x -y -z ...
starting vacuum...end.
scaling factor: 1 partition method: hash partitions: 1
transaction type: <builtin: simple update> query mode: simple
number of clients: 4 number of threads: 4 duration: 10s
number of transactions actually processed: 14563
latency average = 2.749 ms
tps = 1454.899150 (including connections establishing)
tps = 1466.689412 (excluding connections establishing)

If this output doesn't bother people, then I suggest that this patch
should put the partition information in the line together with scaling
factor.

--
ï¿½lvaro Herrera https://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services

#21

Fabien COELHO

coelho@cri.ensmp.fr

over 6 years ago

In reply to: Amit Kapila (#18)

1 attachment(s)

Re: pgbench - allow to create partitioned tables

Hello Amit,

+ res = PQexec(con,
+ "select p.partstrat, count(*) "
+ "from pg_catalog.pg_class as c "
+ "left join pg_catalog.pg_partitioned_table as p on (p.partrelid = c.oid) "
+ "left join pg_catalog.pg_inherits as i on (c.oid = i.inhparent) "
+ "where c.relname = 'pgbench_accounts' "
+ "group by 1, c.oid");
Can't we write this query with inner join instead of left join? What
additional purpose you are trying to serve by using left join?
I'm ensuring that there is always a one line answer, whether it is
partitioned or not. Maybe the count(*) should be count(something in p) to
get 0 instead of 1 on non partitioned tables, though, but this is hidden
in the display anyway.
Sure, but I feel the code will be simplified. I see no reason for
using left join here.

Without a left join, the query result is empty if there are no partitions,
whereas there is one line with it. This fact simplifies managing the query
result afterwards because we are always expecting 1 row in the "normal"
case, whether partitioned or not.

+partition_method_t partition_method = PART_NONE;

It is better to make this static.

I do agree, but this would depart from all other global variables around
which are currently not static.

Check QueryMode.

Indeed, there is a mix of static (about 8) and non static (29 cases). I
think static is better anyway, so why not.

Attached a v7.

--
Fabien.

Attachments:

pgbench-init-partitioned-7.patchtext/x-diff; name=pgbench-init-partitioned-7.patchDownload

diff --git a/doc/src/sgml/ref/pgbench.sgml b/doc/src/sgml/ref/pgbench.sgml
index c857aa3cba..e3a0abb4c7 100644
--- a/doc/src/sgml/ref/pgbench.sgml
+++ b/doc/src/sgml/ref/pgbench.sgml
@@ -306,6 +306,31 @@ pgbench <optional> <replaceable>options</replaceable> </optional> <replaceable>d
       </listitem>
      </varlistentry>
 
+     <varlistentry>
+      <term><option>--partitions=<replaceable>NUM</replaceable></option></term>
+      <listitem>
+       <para>
+        Create a partitioned <literal>pgbench_accounts</literal> table with
+        <replaceable>NUM</replaceable> partitions of nearly equal size for
+        the scaled number of accounts.
+        Default is <literal>0</literal>, meaning no partitioning.
+       </para>
+      </listitem>
+     </varlistentry>
+
+     <varlistentry>
+      <term><option>--partition-method=<replaceable>NAME</replaceable></option></term>
+      <listitem>
+       <para>
+        Create a partitioned <literal>pgbench_accounts</literal> table with
+        <replaceable>NAME</replaceable> method.
+        Expected values are <literal>range</literal> or <literal>hash</literal>.
+        This option requires that <option>--partitions</option> is set to non-zero.
+        If unspecified, default is <literal>range</literal>.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry>
       <term><option>--tablespace=<replaceable>tablespace</replaceable></option></term>
       <listitem>
diff --git a/src/bin/pgbench/pgbench.c b/src/bin/pgbench/pgbench.c
index ed7652bfbf..648a0c9865 100644
--- a/src/bin/pgbench/pgbench.c
+++ b/src/bin/pgbench/pgbench.c
@@ -186,6 +186,15 @@ int64		latency_limit = 0;
 char	   *tablespace = NULL;
 char	   *index_tablespace = NULL;
 
+/* partitioning for pgbench_accounts table, 0 for no partitioning */
+static int 		partitions = 0;
+
+typedef enum { PART_NONE, PART_RANGE, PART_HASH, PART_UNKNOWN }
+  partition_method_t;
+
+static partition_method_t partition_method = PART_NONE;
+static const char *PARTITION_METHOD[] = { "none", "range", "hash", "unknown" };
+
 /* random seed used to initialize base_random_sequence */
 int64		random_seed = -1;
 
@@ -617,6 +626,9 @@ usage(void)
 		   "  --foreign-keys           create foreign key constraints between tables\n"
 		   "  --index-tablespace=TABLESPACE\n"
 		   "                           create indexes in the specified tablespace\n"
+		   "  --partitions=NUM         partition pgbench_accounts in NUM parts (default: 0)\n"
+		   "  --partition-method=(range|hash)\n"
+		   "                           partition pgbench_accounts with this method (default: range)\n"
 		   "  --tablespace=TABLESPACE  create tables in the specified tablespace\n"
 		   "  --unlogged-tables        create tables as unlogged tables\n"
 		   "\nOptions to select what to run:\n"
@@ -3601,6 +3613,17 @@ initDropTables(PGconn *con)
 					 "pgbench_tellers");
 }
 
+/*
+ * add fillfactor percent option if not 100.
+ */
+static void
+append_fillfactor(char *opts, int len)
+{
+	if (fillfactor < 100)
+		snprintf(opts + strlen(opts), len - strlen(opts),
+				 " with (fillfactor=%d)", fillfactor);
+}
+
 /*
  * Create pgbench's standard tables
  */
@@ -3664,9 +3687,15 @@ initCreateTables(PGconn *con)
 
 		/* Construct new create table statement. */
 		opts[0] = '\0';
-		if (ddl->declare_fillfactor)
+
+		/* Partition pgbench_accounts table */
+		if (partitions >= 1 && strcmp(ddl->table, "pgbench_accounts") == 0)
 			snprintf(opts + strlen(opts), sizeof(opts) - strlen(opts),
-					 " with (fillfactor=%d)", fillfactor);
+					 " partition by %s (aid)", PARTITION_METHOD[partition_method]);
+		else if (ddl->declare_fillfactor)
+			/* fillfactor is only expected on actual tables */
+			append_fillfactor(opts, sizeof(opts));
+
 		if (tablespace != NULL)
 		{
 			char	   *escape_tablespace;
@@ -3686,6 +3715,57 @@ initCreateTables(PGconn *con)
 
 		executeStatement(con, buffer);
 	}
+
+	/* if needed, pgbench_accounts partitions must be created manually */
+	if (partitions >= 1)
+	{
+		char		ff[64];
+
+		ff[0] = '\0';
+		append_fillfactor(ff, sizeof(ff));
+
+		fprintf(stderr, "creating %d partitions...\n", partitions);
+
+		for (int p = 1; p <= partitions; p++)
+		{
+			char		query[256];
+
+			if (partition_method == PART_RANGE)
+			{
+				int64		part_size = (naccounts * (int64) scale + partitions - 1) / partitions;
+				char		minvalue[32], maxvalue[32];
+
+				/* For RANGE, we use open-ended partitions at the beginning and end */
+				if (p == 1)
+					sprintf(minvalue, "minvalue");
+				else
+					sprintf(minvalue, INT64_FORMAT, (p - 1) * part_size + 1);
+
+				if (p < partitions)
+					sprintf(maxvalue, INT64_FORMAT, p * part_size + 1);
+				else
+					sprintf(maxvalue, "maxvalue");
+
+				snprintf(query, sizeof(query),
+						 "create%s table pgbench_accounts_%d\n"
+						 "  partition of pgbench_accounts\n"
+						 "  for values from (%s) to (%s)%s\n",
+						 unlogged_tables ? " unlogged" : "", p,
+						 minvalue, maxvalue, ff);
+			}
+			else if (partition_method == PART_HASH)
+				snprintf(query, sizeof(query),
+						 "create%s table pgbench_accounts_%d\n"
+						 "  partition of pgbench_accounts\n"
+						 "  for values with (modulus %d, remainder %d)%s\n",
+						 unlogged_tables ? " unlogged" : "", p,
+						 partitions, p-1, ff);
+			else /* cannot get there */
+				Assert(0);
+
+			executeStatement(con, query);
+		}
+	}
 }
 
 /*
@@ -4919,6 +4999,10 @@ printResults(StatsData *total, instr_time total_time,
 	printf("transaction type: %s\n",
 		   num_scripts == 1 ? sql_script[0].desc : "multiple scripts");
 	printf("scaling factor: %d\n", scale);
+	/* only print partitioning information if some partitioning was detected */
+	if (partition_method != PART_NONE && partition_method != PART_UNKNOWN)
+		printf("partition method: %s\npartitions: %d\n",
+				PARTITION_METHOD[partition_method], partitions);
 	printf("query mode: %s\n", QUERYMODE[querymode]);
 	printf("number of clients: %d\n", nclients);
 	printf("number of threads: %d\n", nthreads);
@@ -5126,6 +5210,8 @@ main(int argc, char **argv)
 		{"foreign-keys", no_argument, NULL, 8},
 		{"random-seed", required_argument, NULL, 9},
 		{"show-script", required_argument, NULL, 10},
+		{"partitions", required_argument, NULL, 11},
+		{"partition-method", required_argument, NULL, 12},
 		{NULL, 0, NULL, 0}
 	};
 
@@ -5486,6 +5572,29 @@ main(int argc, char **argv)
 					exit(0);
 				}
 				break;
+			case 11:			/* partitions */
+				initialization_option_set = true;
+				partitions = atoi(optarg);
+				if (partitions < 0)
+				{
+					fprintf(stderr, "invalid number of partitions: \"%s\"\n",
+							optarg);
+					exit(1);
+				}
+				break;
+			case 12:			/* partition-method */
+				initialization_option_set = true;
+				if (pg_strcasecmp(optarg, "range") == 0)
+					partition_method = PART_RANGE;
+				else if (pg_strcasecmp(optarg, "hash") == 0)
+					partition_method = PART_HASH;
+				else
+				{
+					fprintf(stderr, "invalid partition type, expecting \"range\" or \"hash\","
+							" got: \"%s\"\n", optarg);
+					exit(1);
+				}
+				break;
 			default:
 				fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
 				exit(1);
@@ -5567,6 +5676,16 @@ main(int argc, char **argv)
 			exit(1);
 		}
 
+		if (partitions == 0 && partition_method != PART_NONE)
+		{
+			fprintf(stderr, "--partition-method requires actual partitioning with --partitions\n");
+			exit(1);
+		}
+
+		/* set default method */
+		if (partitions > 0 && partition_method == PART_NONE)
+			partition_method = PART_RANGE;
+
 		if (initialize_steps == NULL)
 			initialize_steps = pg_strdup(DEFAULT_INIT_STEPS);
 
@@ -5756,6 +5875,48 @@ main(int argc, char **argv)
 			fprintf(stderr,
 					"scale option ignored, using count from pgbench_branches table (%d)\n",
 					scale);
+
+		/*
+		 * Partition information. Assume no partitioning on any failure, so as
+		 * to avoid failing on an older version. We hope that there is only
+		 * one pgbench_accounts table, otherwise which one is used would depend
+		 * on search_path settings.
+		 */
+		res = PQexec(con,
+					 "select p.partstrat, count(*) "
+					 "from pg_catalog.pg_class as c "
+					 "left join pg_catalog.pg_partitioned_table as p on (p.partrelid = c.oid) "
+					 "left join pg_catalog.pg_inherits as i on (c.oid = i.inhparent) "
+					 "where c.relname = 'pgbench_accounts' "
+					 "group by 1, c.oid");
+		if (PQresultStatus(res) != PGRES_TUPLES_OK)
+		{
+			/* probably an older version, coldly assume no partitioning */
+			partition_method = PART_NONE;
+			partitions = 0;
+		}
+		else if (PQntuples(res) != 1)
+		{
+			/* unsure because multiple pgbench_accounts found */
+			partition_method = PART_UNKNOWN;
+			partitions = 0;
+		}
+		else
+		{
+			char *ps = PQgetvalue(res, 0, 0);
+
+			if (ps == NULL)
+				partition_method = PART_NONE;
+			else if (strcmp(ps, "r") == 0)
+				partition_method = PART_RANGE;
+			else if (strcmp(ps, "h") == 0)
+				partition_method = PART_HASH;
+			else /* whatever */
+				partition_method = PART_NONE;
+
+			partitions = atoi(PQgetvalue(res, 0, 1));
+		}
+		PQclear(res);
 	}
 
 	/*
diff --git a/src/bin/pgbench/t/001_pgbench_with_server.pl b/src/bin/pgbench/t/001_pgbench_with_server.pl
index b82d3f65c4..4028525118 100644
--- a/src/bin/pgbench/t/001_pgbench_with_server.pl
+++ b/src/bin/pgbench/t/001_pgbench_with_server.pl
@@ -58,6 +58,18 @@ sub pgbench
 	return;
 }
 
+# tablespace for testing
+my $ts = $node->basedir . '/regress_pgbench_tap_1_ts_dir';
+mkdir $ts or die "cannot create directory $ts";
+
+# escape
+my $ets = $ts;
+$ets =~ s/'/''/;
+
+$node->safe_psql('postgres',
+	"CREATE TABLESPACE regress_pgbench_tap_1_ts LOCATION '$ets';"
+);
+
 # Test concurrent OID generation via pg_enum_oid_index.  This indirectly
 # exercises LWLock and spinlock concurrency.
 my $labels = join ',', map { "'l$_'" } 1 .. 1000;
@@ -100,28 +112,30 @@ pgbench(
 
 # Again, with all possible options
 pgbench(
-	'--initialize --init-steps=dtpvg --scale=1 --unlogged-tables --fillfactor=98 --foreign-keys --quiet --tablespace=pg_default --index-tablespace=pg_default',
+	'--initialize --init-steps=dtpvg --scale=1 --unlogged-tables --fillfactor=98 --foreign-keys --quiet --tablespace=regress_pgbench_tap_1_ts --index-tablespace=regress_pgbench_tap_1_ts --partitions=2 --partition-method=hash',
 	0,
 	[qr{^$}i],
 	[
 		qr{dropping old tables},
 		qr{creating tables},
+		qr{creating 2 partitions},
 		qr{vacuuming},
 		qr{creating primary keys},
 		qr{creating foreign keys},
 		qr{(?!vacuuming)}, # no vacuum
 		qr{done in \d+\.\d\d s }
 	],
-	'pgbench scale 1 initialization');
+	'pgbench scale 1 initialization with options');
 
 # Test interaction of --init-steps with legacy step-selection options
 pgbench(
-	'--initialize --init-steps=dtpvgvv --no-vacuum --foreign-keys --unlogged-tables',
+	'--initialize --init-steps=dtpvgvv --no-vacuum --foreign-keys --unlogged-tables --partitions=3',
 	0,
 	[qr{^$}],
 	[
 		qr{dropping old tables},
 		qr{creating tables},
+		qr{creating 3 partitions},
 		qr{creating primary keys},
 		qr{.* of .* tuples \(.*\) done},
 		qr{creating foreign keys},
@@ -833,7 +847,6 @@ pgbench(
 	'pgbench throttling');
 
 pgbench(
-
 	# given the expected rate and the 2 ms tx duration, at most one is executed
 	'-t 10 --rate=100000 --latency-limit=1 -n -r',
 	0,
@@ -909,6 +922,8 @@ pgbench(
 check_pgbench_logs($bdir, '001_pgbench_log_3', 1, 10, 10,
 	qr{^\d \d{1,2} \d+ \d \d+ \d+$});
 
+$node->safe_psql('postgres', 'DROP TABLESPACE regress_pgbench_tap_1_ts');
+
 # done
 $node->stop;
 done_testing();
diff --git a/src/bin/pgbench/t/002_pgbench_no_server.pl b/src/bin/pgbench/t/002_pgbench_no_server.pl
index f7fa18418b..998d814232 100644
--- a/src/bin/pgbench/t/002_pgbench_no_server.pl
+++ b/src/bin/pgbench/t/002_pgbench_no_server.pl
@@ -157,6 +157,13 @@ my @options = (
 			qr{error while setting random seed from --random-seed option}
 		]
 	],
+	[ 'bad partition type', '-i --partition-method=BAD', [qr{"range"}, qr{"hash"}, qr{"BAD"}] ],
+	[ 'bad partition number', '-i --partitions -1', [ qr{invalid number of partitions: "-1"} ] ],
+	[
+		'partition method without partitioning',
+		'-i --partition-method=hash',
+		[ qr{partition-method requires actual partitioning} ]
+	],
 
 	# logging sub-options
 	[

#22

Amit Kapila

amit.kapila16@gmail.com

over 6 years ago

In reply to: Alvaro Herrera (#20)

Re: pgbench - allow to create partitioned tables

On Fri, Sep 13, 2019 at 6:36 PM Alvaro Herrera <alvherre@2ndquadrant.com> wrote:

On 2019-Sep-13, Amit Kapila wrote:

I would like to take inputs from others as well for the display part
of this patch. After this patch, for a simple-update pgbench test,
the changed output will be as follows (note: partition method and
partitions):

pgbench.exe -c 4 -j 4 -T 10 -N postgres
starting vacuum...end.
transaction type: <builtin: simple update>
scaling factor: 1
partition method: hash
partitions: 3
query mode: simple
number of clients: 4
number of threads: 4
duration: 10 s
number of transactions actually processed: 14563
latency average = 2.749 ms
tps = 1454.899150 (including connections establishing)
tps = 1466.689412 (excluding connections establishing)

What do others think about this? This will be the case when the user
has used --partitions option in pgbench, otherwise, it won't change.

I wonder what's the intended usage of this output ... it seems to be
getting a bit too long. Is this intended for machine processing? I
would rather have more things per line in a more compact header.
But then I'm not the kind of person who automates multiple pgbench runs.
Maybe we can get some input from Tomas, who does -- how do you automate
extracting data from collected pgbench output, or do you instead just
redirect the output to a file whose path/name indicates the parameters
that were used? (I do the latter.)

I mean, if we changed it like this (and I'm not proposing to do it in
this patch, this is only an example), would it bother anyone?

$ pgbench -x -y -z ...
starting vacuum...end.
scaling factor: 1 partition method: hash partitions: 1
transaction type: <builtin: simple update> query mode: simple
number of clients: 4 number of threads: 4 duration: 10s
number of transactions actually processed: 14563
latency average = 2.749 ms
tps = 1454.899150 (including connections establishing)
tps = 1466.689412 (excluding connections establishing)

If this output doesn't bother people, then I suggest that this patch
should put the partition information in the line together with scaling
factor.

IIUC, there are two things here (a) you seem to be fine displaying
'partitions' and 'partition method' information, (b) you would prefer
to put it along with 'scaling factor' line.

I personally prefer each parameter to be displayed in a separate line,
but I am fine if more people would like to see the 'multiple
parameters information in a single line'. I think it is better to
that (point (b)) as a separate patch even if we agree on changing the
display format.

--
With Regards,
Amit Kapila.
EnterpriseDB: http://www.enterprisedb.com

#23

Amit Kapila

amit.kapila16@gmail.com

over 6 years ago

In reply to: Fabien COELHO (#21)

1 attachment(s)

Re: pgbench - allow to create partitioned tables

On Fri, Sep 13, 2019 at 11:06 PM Fabien COELHO <coelho@cri.ensmp.fr> wrote:

Hello Amit,
+ res = PQexec(con,
+ "select p.partstrat, count(*) "
+ "from pg_catalog.pg_class as c "
+ "left join pg_catalog.pg_partitioned_table as p on (p.partrelid = c.oid) "
+ "left join pg_catalog.pg_inherits as i on (c.oid = i.inhparent) "
+ "where c.relname = 'pgbench_accounts' "
+ "group by 1, c.oid");
Can't we write this query with inner join instead of left join? What
additional purpose you are trying to serve by using left join?
I'm ensuring that there is always a one line answer, whether it is
partitioned or not. Maybe the count(*) should be count(something in p) to
get 0 instead of 1 on non partitioned tables, though, but this is hidden
in the display anyway.
Sure, but I feel the code will be simplified. I see no reason for
using left join here.
Without a left join, the query result is empty if there are no partitions,
whereas there is one line with it. This fact simplifies managing the query
result afterwards because we are always expecting 1 row in the "normal"
case, whether partitioned or not.

Why can't we change it as attached? I find using left join to always
get one row as an ugly way to manipulate the results later. We
shouldn't go in that direction unless we can't handle this with some
simple code.

Some more comments:
*
- '--initialize --init-steps=dtpvg --scale=1 --unlogged-tables
--fillfactor=98 --foreign-keys --quiet --tablespace=pg_default
--index-tablespace=pg_default',
+ '--initialize --init-steps=dtpvg --scale=1 --unlogged-tables
--fillfactor=98 --foreign-keys --quiet
--tablespace=regress_pgbench_tap_1_ts
--index-tablespace=regress_pgbench_tap_1_ts --partitions=2
--partition-method=hash',

What is the need of using regress_pgbench_tap_1_ts in this test? I
think we don't need to change existing tests unless required for the
new functionality.

*
- 'pgbench scale 1 initialization');
+ 'pgbench scale 1 initialization with options');

Similar to the above, it is not clear to me why we need to change this?

*pgbench(
-
# given the expected rate and the 2 ms tx duration, at most one is executed
'-t 10 --rate=100000 --latency-limit=1 -n -r',
0,

The above appears to be a spurious line change.

* I think we need to change the docs [1]https://www.postgresql.org/docs/devel/pgbench.html to indicate the new step for
partitioning. See section --init-steps=init_steps

[1]: https://www.postgresql.org/docs/devel/pgbench.html

--
With Regards,
Amit Kapila.
EnterpriseDB: http://www.enterprisedb.com

Attachments:

delta-pgbench-init-partitioned-7.patchapplication/octet-stream; name=delta-pgbench-init-partitioned-7.patchDownload

diff --git a/src/bin/pgbench/pgbench.c b/src/bin/pgbench/pgbench.c
index 648a0c9865..dd6bad7302 100644
--- a/src/bin/pgbench/pgbench.c
+++ b/src/bin/pgbench/pgbench.c
@@ -4999,8 +4999,8 @@ printResults(StatsData *total, instr_time total_time,
 	printf("transaction type: %s\n",
 		   num_scripts == 1 ? sql_script[0].desc : "multiple scripts");
 	printf("scaling factor: %d\n", scale);
-	/* only print partitioning information if some partitioning was detected */
-	if (partition_method != PART_NONE && partition_method != PART_UNKNOWN)
+	/* print partitioning information only if there exists any partition */
+	if (partitions > 0)
 		printf("partition method: %s\npartitions: %d\n",
 				PARTITION_METHOD[partition_method], partitions);
 	printf("query mode: %s\n", QUERYMODE[querymode]);
@@ -5883,38 +5883,41 @@ main(int argc, char **argv)
 		 * on search_path settings.
 		 */
 		res = PQexec(con,
-					 "select p.partstrat, count(*) "
+					 "select count(*), p.partstrat "
 					 "from pg_catalog.pg_class as c "
-					 "left join pg_catalog.pg_partitioned_table as p on (p.partrelid = c.oid) "
-					 "left join pg_catalog.pg_inherits as i on (c.oid = i.inhparent) "
+					 "join pg_catalog.pg_partitioned_table as p on (p.partrelid = c.oid) "
+					 "join pg_catalog.pg_inherits as i on (c.oid = i.inhparent) "
 					 "where c.relname = 'pgbench_accounts' "
-					 "group by 1, c.oid");
+					 "group by 2, c.oid");
 		if (PQresultStatus(res) != PGRES_TUPLES_OK)
 		{
 			/* probably an older version, coldly assume no partitioning */
 			partition_method = PART_NONE;
 			partitions = 0;
 		}
-		else if (PQntuples(res) != 1)
+		else if (PQntuples(res) > 1)
 		{
 			/* unsure because multiple pgbench_accounts found */
 			partition_method = PART_UNKNOWN;
 			partitions = 0;
 		}
-		else
+		else if (PQntuples(res) == 1)
 		{
-			char *ps = PQgetvalue(res, 0, 0);
-
-			if (ps == NULL)
-				partition_method = PART_NONE;
-			else if (strcmp(ps, "r") == 0)
-				partition_method = PART_RANGE;
-			else if (strcmp(ps, "h") == 0)
-				partition_method = PART_HASH;
-			else /* whatever */
-				partition_method = PART_NONE;
-
-			partitions = atoi(PQgetvalue(res, 0, 1));
+			partitions = atoi(PQgetvalue(res, 0, 0));
+
+			if (partitions > 0)
+			{
+				char *ps = PQgetvalue(res, 0, 1);
+
+				Assert(ps != NULL);
+
+				if (strcmp(ps, "r") == 0)
+					partition_method = PART_RANGE;
+				else if (strcmp(ps, "h") == 0)
+					partition_method = PART_HASH;
+				else /* whatever */
+					partition_method = PART_NONE;
+			}
 		}
 		PQclear(res);
 	}

#24

Fabien COELHO

coelho@cri.ensmp.fr

over 6 years ago

In reply to: Amit Kapila (#23)

1 attachment(s)

Re: pgbench - allow to create partitioned tables

Hello Amit,

I'm ensuring that there is always a one line answer, whether it is
partitioned or not. Maybe the count(*) should be count(something in p) to
get 0 instead of 1 on non partitioned tables, though, but this is hidden
in the display anyway.

Sure, but I feel the code will be simplified. I see no reason for
using left join here.

Without a left join, the query result is empty if there are no partitions,
whereas there is one line with it. This fact simplifies managing the query
result afterwards because we are always expecting 1 row in the "normal"
case, whether partitioned or not.

Why can't we change it as attached?

I think that your version works, but I do not like much the condition for
the normal case which is implicitely assumed. The solution I took has 3
clear-cut cases: 1 error against a server without partition support,
detect multiple pgbench_accounts table -- argh, and then the normal
expected case, whether partitioned or not. Your solution has 4 cases
because of the last implicit zero-row select that relies on default, which
would need some explanations.

I find using left join to always get one row as an ugly way to
manipulate the results later.

Hmmm. It is really a matter of taste. I do not share your distate for left
join on principle. In the case at hand, I find that getting one row in all
cases pretty elegant because there is just one code for handling them all.

We shouldn't go in that direction unless we can't handle this with some
simple code.

Hmmm. Left join does not strike me as over complex code. I wish my student
would remember that this thing exists:-)

What is the need of using regress_pgbench_tap_1_ts in this test?

I wanted to check that tablespace options work appropriately with
partition tables, as I changed the create table stuff significantly, and
just using "pg_default" is kind of cheating.

I think we don't need to change existing tests unless required for the
new functionality.

I do agree, but there was a motivation behind the addition.

*
- 'pgbench scale 1 initialization');
+ 'pgbench scale 1 initialization with options');
Similar to the above, it is not clear to me why we need to change this?

Because I noticed that it had the same description as the previous one, so
I made the test name distinct and more precise, while I was adding options
on it.

*pgbench(
-
# given the expected rate and the 2 ms tx duration, at most one is executed
'-t 10 --rate=100000 --latency-limit=1 -n -r',
0,

The above appears to be a spurious line change.

Indeed. I think that this empty line is a typo, but I can let it as it is.

* I think we need to change the docs [1] to indicate the new step for
partitioning. See section --init-steps=init_steps

[1] - https://www.postgresql.org/docs/devel/pgbench.html

The partitioned table generation is integrated into the existing create
table step, it is not a separate step because I cannot see an interest to
do something in between the table creations.

Patch v8 attached adds some comments around partition detection, ensures
that 0 is returned for the no partition case and let the spurious empty
line where it is.

--
Fabien.

Attachments:

pgbench-init-partitioned-8.patchtext/x-diff; name=pgbench-init-partitioned-8.patchDownload

diff --git a/doc/src/sgml/ref/pgbench.sgml b/doc/src/sgml/ref/pgbench.sgml
index c857aa3cba..e3a0abb4c7 100644
--- a/doc/src/sgml/ref/pgbench.sgml
+++ b/doc/src/sgml/ref/pgbench.sgml
@@ -306,6 +306,31 @@ pgbench <optional> <replaceable>options</replaceable> </optional> <replaceable>d
       </listitem>
      </varlistentry>
 
+     <varlistentry>
+      <term><option>--partitions=<replaceable>NUM</replaceable></option></term>
+      <listitem>
+       <para>
+        Create a partitioned <literal>pgbench_accounts</literal> table with
+        <replaceable>NUM</replaceable> partitions of nearly equal size for
+        the scaled number of accounts.
+        Default is <literal>0</literal>, meaning no partitioning.
+       </para>
+      </listitem>
+     </varlistentry>
+
+     <varlistentry>
+      <term><option>--partition-method=<replaceable>NAME</replaceable></option></term>
+      <listitem>
+       <para>
+        Create a partitioned <literal>pgbench_accounts</literal> table with
+        <replaceable>NAME</replaceable> method.
+        Expected values are <literal>range</literal> or <literal>hash</literal>.
+        This option requires that <option>--partitions</option> is set to non-zero.
+        If unspecified, default is <literal>range</literal>.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry>
       <term><option>--tablespace=<replaceable>tablespace</replaceable></option></term>
       <listitem>
diff --git a/src/bin/pgbench/pgbench.c b/src/bin/pgbench/pgbench.c
index ed7652bfbf..5a9e27ec95 100644
--- a/src/bin/pgbench/pgbench.c
+++ b/src/bin/pgbench/pgbench.c
@@ -186,6 +186,15 @@ int64		latency_limit = 0;
 char	   *tablespace = NULL;
 char	   *index_tablespace = NULL;
 
+/* partitioning for pgbench_accounts table, 0 for no partitioning */
+static int 		partitions = 0;
+
+typedef enum { PART_NONE, PART_RANGE, PART_HASH, PART_UNKNOWN }
+  partition_method_t;
+
+static partition_method_t partition_method = PART_NONE;
+static const char *PARTITION_METHOD[] = { "none", "range", "hash", "unknown" };
+
 /* random seed used to initialize base_random_sequence */
 int64		random_seed = -1;
 
@@ -617,6 +626,9 @@ usage(void)
 		   "  --foreign-keys           create foreign key constraints between tables\n"
 		   "  --index-tablespace=TABLESPACE\n"
 		   "                           create indexes in the specified tablespace\n"
+		   "  --partitions=NUM         partition pgbench_accounts in NUM parts (default: 0)\n"
+		   "  --partition-method=(range|hash)\n"
+		   "                           partition pgbench_accounts with this method (default: range)\n"
 		   "  --tablespace=TABLESPACE  create tables in the specified tablespace\n"
 		   "  --unlogged-tables        create tables as unlogged tables\n"
 		   "\nOptions to select what to run:\n"
@@ -3601,6 +3613,17 @@ initDropTables(PGconn *con)
 					 "pgbench_tellers");
 }
 
+/*
+ * add fillfactor percent option if not 100.
+ */
+static void
+append_fillfactor(char *opts, int len)
+{
+	if (fillfactor < 100)
+		snprintf(opts + strlen(opts), len - strlen(opts),
+				 " with (fillfactor=%d)", fillfactor);
+}
+
 /*
  * Create pgbench's standard tables
  */
@@ -3664,9 +3687,15 @@ initCreateTables(PGconn *con)
 
 		/* Construct new create table statement. */
 		opts[0] = '\0';
-		if (ddl->declare_fillfactor)
+
+		/* Partition pgbench_accounts table */
+		if (partitions >= 1 && strcmp(ddl->table, "pgbench_accounts") == 0)
 			snprintf(opts + strlen(opts), sizeof(opts) - strlen(opts),
-					 " with (fillfactor=%d)", fillfactor);
+					 " partition by %s (aid)", PARTITION_METHOD[partition_method]);
+		else if (ddl->declare_fillfactor)
+			/* fillfactor is only expected on actual tables */
+			append_fillfactor(opts, sizeof(opts));
+
 		if (tablespace != NULL)
 		{
 			char	   *escape_tablespace;
@@ -3686,6 +3715,57 @@ initCreateTables(PGconn *con)
 
 		executeStatement(con, buffer);
 	}
+
+	/* if needed, pgbench_accounts partitions must be created manually */
+	if (partitions >= 1)
+	{
+		char		ff[64];
+
+		ff[0] = '\0';
+		append_fillfactor(ff, sizeof(ff));
+
+		fprintf(stderr, "creating %d partitions...\n", partitions);
+
+		for (int p = 1; p <= partitions; p++)
+		{
+			char		query[256];
+
+			if (partition_method == PART_RANGE)
+			{
+				int64		part_size = (naccounts * (int64) scale + partitions - 1) / partitions;
+				char		minvalue[32], maxvalue[32];
+
+				/* For RANGE, we use open-ended partitions at the beginning and end */
+				if (p == 1)
+					sprintf(minvalue, "minvalue");
+				else
+					sprintf(minvalue, INT64_FORMAT, (p - 1) * part_size + 1);
+
+				if (p < partitions)
+					sprintf(maxvalue, INT64_FORMAT, p * part_size + 1);
+				else
+					sprintf(maxvalue, "maxvalue");
+
+				snprintf(query, sizeof(query),
+						 "create%s table pgbench_accounts_%d\n"
+						 "  partition of pgbench_accounts\n"
+						 "  for values from (%s) to (%s)%s\n",
+						 unlogged_tables ? " unlogged" : "", p,
+						 minvalue, maxvalue, ff);
+			}
+			else if (partition_method == PART_HASH)
+				snprintf(query, sizeof(query),
+						 "create%s table pgbench_accounts_%d\n"
+						 "  partition of pgbench_accounts\n"
+						 "  for values with (modulus %d, remainder %d)%s\n",
+						 unlogged_tables ? " unlogged" : "", p,
+						 partitions, p-1, ff);
+			else /* cannot get there */
+				Assert(0);
+
+			executeStatement(con, query);
+		}
+	}
 }
 
 /*
@@ -4919,6 +4999,10 @@ printResults(StatsData *total, instr_time total_time,
 	printf("transaction type: %s\n",
 		   num_scripts == 1 ? sql_script[0].desc : "multiple scripts");
 	printf("scaling factor: %d\n", scale);
+	/* only print partitioning information if some partitioning was detected */
+	if (partition_method != PART_NONE && partition_method != PART_UNKNOWN)
+		printf("partition method: %s\npartitions: %d\n",
+				PARTITION_METHOD[partition_method], partitions);
 	printf("query mode: %s\n", QUERYMODE[querymode]);
 	printf("number of clients: %d\n", nclients);
 	printf("number of threads: %d\n", nthreads);
@@ -5126,6 +5210,8 @@ main(int argc, char **argv)
 		{"foreign-keys", no_argument, NULL, 8},
 		{"random-seed", required_argument, NULL, 9},
 		{"show-script", required_argument, NULL, 10},
+		{"partitions", required_argument, NULL, 11},
+		{"partition-method", required_argument, NULL, 12},
 		{NULL, 0, NULL, 0}
 	};
 
@@ -5486,6 +5572,29 @@ main(int argc, char **argv)
 					exit(0);
 				}
 				break;
+			case 11:			/* partitions */
+				initialization_option_set = true;
+				partitions = atoi(optarg);
+				if (partitions < 0)
+				{
+					fprintf(stderr, "invalid number of partitions: \"%s\"\n",
+							optarg);
+					exit(1);
+				}
+				break;
+			case 12:			/* partition-method */
+				initialization_option_set = true;
+				if (pg_strcasecmp(optarg, "range") == 0)
+					partition_method = PART_RANGE;
+				else if (pg_strcasecmp(optarg, "hash") == 0)
+					partition_method = PART_HASH;
+				else
+				{
+					fprintf(stderr, "invalid partition type, expecting \"range\" or \"hash\","
+							" got: \"%s\"\n", optarg);
+					exit(1);
+				}
+				break;
 			default:
 				fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
 				exit(1);
@@ -5567,6 +5676,16 @@ main(int argc, char **argv)
 			exit(1);
 		}
 
+		if (partitions == 0 && partition_method != PART_NONE)
+		{
+			fprintf(stderr, "--partition-method requires actual partitioning with --partitions\n");
+			exit(1);
+		}
+
+		/* set default method */
+		if (partitions > 0 && partition_method == PART_NONE)
+			partition_method = PART_RANGE;
+
 		if (initialize_steps == NULL)
 			initialize_steps = pg_strdup(DEFAULT_INIT_STEPS);
 
@@ -5756,6 +5875,49 @@ main(int argc, char **argv)
 			fprintf(stderr,
 					"scale option ignored, using count from pgbench_branches table (%d)\n",
 					scale);
+
+		/*
+		 * Partition information. Assume no partitioning on any failure, so as
+		 * to avoid failing on an older version. We hope that there is only
+		 * one pgbench_accounts table, otherwise which one is used would depend
+		 * on search_path settings.
+		 */
+		res = PQexec(con,
+					 "select p.partstrat, count(p.partrelid) "
+					 "from pg_catalog.pg_class as c "
+					 "left join pg_catalog.pg_partitioned_table as p on (p.partrelid = c.oid) "
+					 "left join pg_catalog.pg_inherits as i on (c.oid = i.inhparent) "
+					 "where c.relname = 'pgbench_accounts' "
+					 "group by 1, c.oid");
+		if (PQresultStatus(res) != PGRES_TUPLES_OK)
+		{
+			/* probably an older version, coldly assume no partitioning */
+			partition_method = PART_NONE;
+			partitions = 0;
+		}
+		else if (PQntuples(res) != 1)
+		{
+			/* unsure because multiple (or no) pgbench_accounts found */
+			partition_method = PART_UNKNOWN;
+			partitions = 0;
+		}
+		else
+		{
+			/* PQntupes(res) == 1: normal case, extract the partition status */
+			char *ps = PQgetvalue(res, 0, 0);
+
+			if (ps == NULL)
+				partition_method = PART_NONE;
+			else if (strcmp(ps, "r") == 0)
+				partition_method = PART_RANGE;
+			else if (strcmp(ps, "h") == 0)
+				partition_method = PART_HASH;
+			else /* whatever */
+				partition_method = PART_NONE;
+
+			partitions = atoi(PQgetvalue(res, 0, 1));
+		}
+		PQclear(res);
 	}
 
 	/*
diff --git a/src/bin/pgbench/t/001_pgbench_with_server.pl b/src/bin/pgbench/t/001_pgbench_with_server.pl
index b82d3f65c4..5dc5d1f191 100644
--- a/src/bin/pgbench/t/001_pgbench_with_server.pl
+++ b/src/bin/pgbench/t/001_pgbench_with_server.pl
@@ -58,6 +58,18 @@ sub pgbench
 	return;
 }
 
+# tablespace for testing
+my $ts = $node->basedir . '/regress_pgbench_tap_1_ts_dir';
+mkdir $ts or die "cannot create directory $ts";
+
+# escape
+my $ets = $ts;
+$ets =~ s/'/''/;
+
+$node->safe_psql('postgres',
+	"CREATE TABLESPACE regress_pgbench_tap_1_ts LOCATION '$ets';"
+);
+
 # Test concurrent OID generation via pg_enum_oid_index.  This indirectly
 # exercises LWLock and spinlock concurrency.
 my $labels = join ',', map { "'l$_'" } 1 .. 1000;
@@ -100,28 +112,30 @@ pgbench(
 
 # Again, with all possible options
 pgbench(
-	'--initialize --init-steps=dtpvg --scale=1 --unlogged-tables --fillfactor=98 --foreign-keys --quiet --tablespace=pg_default --index-tablespace=pg_default',
+	'--initialize --init-steps=dtpvg --scale=1 --unlogged-tables --fillfactor=98 --foreign-keys --quiet --tablespace=regress_pgbench_tap_1_ts --index-tablespace=regress_pgbench_tap_1_ts --partitions=2 --partition-method=hash',
 	0,
 	[qr{^$}i],
 	[
 		qr{dropping old tables},
 		qr{creating tables},
+		qr{creating 2 partitions},
 		qr{vacuuming},
 		qr{creating primary keys},
 		qr{creating foreign keys},
 		qr{(?!vacuuming)}, # no vacuum
 		qr{done in \d+\.\d\d s }
 	],
-	'pgbench scale 1 initialization');
+	'pgbench scale 1 initialization with options');
 
 # Test interaction of --init-steps with legacy step-selection options
 pgbench(
-	'--initialize --init-steps=dtpvgvv --no-vacuum --foreign-keys --unlogged-tables',
+	'--initialize --init-steps=dtpvgvv --no-vacuum --foreign-keys --unlogged-tables --partitions=3',
 	0,
 	[qr{^$}],
 	[
 		qr{dropping old tables},
 		qr{creating tables},
+		qr{creating 3 partitions},
 		qr{creating primary keys},
 		qr{.* of .* tuples \(.*\) done},
 		qr{creating foreign keys},
@@ -909,6 +923,8 @@ pgbench(
 check_pgbench_logs($bdir, '001_pgbench_log_3', 1, 10, 10,
 	qr{^\d \d{1,2} \d+ \d \d+ \d+$});
 
+$node->safe_psql('postgres', 'DROP TABLESPACE regress_pgbench_tap_1_ts');
+
 # done
 $node->stop;
 done_testing();
diff --git a/src/bin/pgbench/t/002_pgbench_no_server.pl b/src/bin/pgbench/t/002_pgbench_no_server.pl
index f7fa18418b..998d814232 100644
--- a/src/bin/pgbench/t/002_pgbench_no_server.pl
+++ b/src/bin/pgbench/t/002_pgbench_no_server.pl
@@ -157,6 +157,13 @@ my @options = (
 			qr{error while setting random seed from --random-seed option}
 		]
 	],
+	[ 'bad partition type', '-i --partition-method=BAD', [qr{"range"}, qr{"hash"}, qr{"BAD"}] ],
+	[ 'bad partition number', '-i --partitions -1', [ qr{invalid number of partitions: "-1"} ] ],
+	[
+		'partition method without partitioning',
+		'-i --partition-method=hash',
+		[ qr{partition-method requires actual partitioning} ]
+	],
 
 	# logging sub-options
 	[

#25

Amit Kapila

amit.kapila16@gmail.com

over 6 years ago

In reply to: Fabien COELHO (#24)

Re: pgbench - allow to create partitioned tables

On Sat, Sep 14, 2019 at 6:35 PM Fabien COELHO <coelho@cri.ensmp.fr> wrote:

I'm ensuring that there is always a one line answer, whether it is
partitioned or not. Maybe the count(*) should be count(something in p) to
get 0 instead of 1 on non partitioned tables, though, but this is hidden
in the display anyway.

Sure, but I feel the code will be simplified. I see no reason for
using left join here.

Without a left join, the query result is empty if there are no partitions,
whereas there is one line with it. This fact simplifies managing the query
result afterwards because we are always expecting 1 row in the "normal"
case, whether partitioned or not.

Why can't we change it as attached?

I think that your version works, but I do not like much the condition for
the normal case which is implicitely assumed. The solution I took has 3
clear-cut cases: 1 error against a server without partition support,
detect multiple pgbench_accounts table -- argh, and then the normal
expected case, whether partitioned or not. Your solution has 4 cases
because of the last implicit zero-row select that relies on default, which
would need some explanations.

Why? Here, we are fetching the partitioning information. If it
exists, then we remember that to display for later, otherwise, the
default should apply.

I find using left join to always get one row as an ugly way to
manipulate the results later.

Hmmm. It is really a matter of taste. I do not share your distate for left
join on principle.

Oh no, I am not generally against using left join, but here it appears
like using it without much need. If nothing else, it will consume
more cycles to fetch one extra row when we can avoid it.

Irrespective of whether we use left join or not, I think the below
change from my patch is important.
- /* only print partitioning information if some partitioning was detected */
- if (partition_method != PART_NONE && partition_method != PART_UNKNOWN)
+ /* print partitioning information only if there exists any partition */
+ if (partitions > 0)

Basically, it would be good if we just rely on 'partitions' to decide
whether we have partitions or not.

In the case at hand, I find that getting one row in all
cases pretty elegant because there is just one code for handling them all.

Hmm, I would be fine if you can show some other place in code where
such a method is used or if someone else also shares your viewpoint.

What is the need of using regress_pgbench_tap_1_ts in this test?

I wanted to check that tablespace options work appropriately with
partition tables, as I changed the create table stuff significantly, and
just using "pg_default" is kind of cheating.

I think your change will be tested if there is a '--tablespace'
option. Even if you want to test win non-default tablespace, then
also, adding additional test would make more sense rather than
changing existing one which is testing a valid thing. Also, there is
an existing way to create tablespace location in
"src/bin/pg_checksums/t/002_actions". I think we can use the same. I
don't find any problem with your way, but why having multiple ways of
doing same thing in code. We need to test this on windows also once
as this involves some path creation which might vary, although I don't
think there should be any problem in that especially if we use
existing way.

I think we don't need to change existing tests unless required for the
new functionality.

I do agree, but there was a motivation behind the addition.
*
- 'pgbench scale 1 initialization');
+ 'pgbench scale 1 initialization with options');
Similar to the above, it is not clear to me why we need to change this?
Because I noticed that it had the same description as the previous one, so
I made the test name distinct and more precise, while I was adding options
on it.

Good observation, but better be done separately. I think in general
the more unrelated changes are present in patch, the more time it
takes to review.

One more comment:
+typedef enum { PART_NONE, PART_RANGE, PART_HASH, PART_UNKNOWN }
+  partition_method_t;

See, if we can eliminate PART_UNKNOWN. I don't see much use of same.
It is used at one place where we can set PART_NONE without much loss.
Having lesser invalid values makes code easier to follow.

--
With Regards,
Amit Kapila.
EnterpriseDB: http://www.enterprisedb.com

#26

Amit Kapila

amit.kapila16@gmail.com

over 6 years ago

In reply to: Amit Kapila (#25)

Re: pgbench - allow to create partitioned tables

On Tue, Sep 17, 2019 at 4:24 PM Amit Kapila <amit.kapila16@gmail.com> wrote:

On Sat, Sep 14, 2019 at 6:35 PM Fabien COELHO <coelho@cri.ensmp.fr> wrote:
One more comment:
+typedef enum { PART_NONE, PART_RANGE, PART_HASH, PART_UNKNOWN }
+  partition_method_t;
See, if we can eliminate PART_UNKNOWN. I don't see much use of same.
It is used at one place where we can set PART_NONE without much loss.
Having lesser invalid values makes code easier to follow.

Looking more closely at this case:
+ else if (PQntuples(res) != 1)
+ {
+ /* unsure because multiple (or no) pgbench_accounts found */
+ partition_method = PART_UNKNOWN;
+ partitions = 0;
+ }

Is it ever possible to have multiple pgbench_accounts considering we
have unique index on (relname, relnamespace) for pg_class?

--
With Regards,
Amit Kapila.
EnterpriseDB: http://www.enterprisedb.com

#27

Fabien COELHO

coelho@cri.ensmp.fr

over 6 years ago

In reply to: Amit Kapila (#26)

Re: pgbench - allow to create partitioned tables

Hello Amit,

One more comment:
+typedef enum { PART_NONE, PART_RANGE, PART_HASH, PART_UNKNOWN }
+  partition_method_t;

See, if we can eliminate PART_UNKNOWN.

I'm not very happy with this one, but I wanted to differentiate "we do
know that it is not partitioned" from "we do not know if it is
partitioned", and I did not have a better idea.

I don't see much use of same.

Although it is not used afterwards, we could display the partitioning
information differently between the two cases. This is not done because I
did not want to add more lines on the "normal" case.

It is used at one place where we can set PART_NONE without much loss.
Having lesser invalid values makes code easier to follow.
Looking more closely at this case:
+ else if (PQntuples(res) != 1)
+ {
+ /* unsure because multiple (or no) pgbench_accounts found */
+ partition_method = PART_UNKNOWN;
+ partitions = 0;
+ }
Is it ever possible to have multiple pgbench_accounts considering we
have unique index on (relname, relnamespace) for pg_class?

The issue is that it is not directly obvious which relnamespace will be
used by the queries which rely on non schema qualified "pgbench_accounts".
Each schema could theoretically hold a pgbench_accounts table. As this is
pretty unlikely, I did not attempt to add complexity to resolve taking
into account the search_path, but just skipped to unknown in this case,
which I expect nobody would hit in normal circumstances.

Another possible and unlikely issue is that pgbench_accounts could have
been deleted but not pgbench_branches which is used earlier to get the
current "scale". If so, the queries will fail later on anyway.

--
Fabien.

#28

Fabien COELHO

coelho@cri.ensmp.fr

over 6 years ago

In reply to: Amit Kapila (#25)

Re: pgbench - allow to create partitioned tables

Hello Amit,

Why can't we change it as attached?

I think that your version works, but I do not like much the condition for
the normal case which is implicitely assumed. The solution I took has 3
clear-cut cases: 1 error against a server without partition support,
detect multiple pgbench_accounts table -- argh, and then the normal
expected case, whether partitioned or not. Your solution has 4 cases
because of the last implicit zero-row select that relies on default, which
would need some explanations.

Why?

Hmmm. This is a coding-philosophy question:-)

To be nice to the code reader?

You have several if cases, but the last one is to keep the default *which
means something*. ISTM that the default is kept in two cases: when there
is a pgbench_accounts without partitioning, and when no pgbench_accounts
was found, in which case the defaults are plain false. I could be okay of
the default say "we do not know", but for me having all cases explicitely
covered in one place helps understand the behavior of a code.

Here, we are fetching the partitioning information. If it exists, then
we remember that to display for later, otherwise, the default should
apply.

Yep, but the default is also kept if nothing is found, whereas the left
join solution would give one row when found and empty when not found,
which for me are quite distinct cases.

Oh no, I am not generally against using left join, but here it appears
like using it without much need. If nothing else, it will consume
more cycles to fetch one extra row when we can avoid it.

As pointed out, the left join allows to distinguish "not found" from "not
partitioned" logically, even if no explicit use of that is done
afterwards.

Irrespective of whether we use left join or not, I think the below
change from my patch is important.
- /* only print partitioning information if some partitioning was detected */
- if (partition_method != PART_NONE && partition_method != PART_UNKNOWN)
+ /* print partitioning information only if there exists any partition */
+ if (partitions > 0)

Basically, it would be good if we just rely on 'partitions' to decide
whether we have partitions or not.

Could be, although I was thinking of telling the user that we do not know
on unknown. I'll think about this one.

In the case at hand, I find that getting one row in all cases pretty
elegant because there is just one code for handling them all.

Hmm, I would be fine if you can show some other place in code where
such a method is used

No problem:-) Although there are no other catalog queries in "pgbench",
there are plenty in "psql" and "pg_dump", and also in some other commands,
and they often rely on "LEFT" joins:

sh> grep LEFT src/bin/psql/*.c | wc -l # 58
sh> grep LEFT src/bin/pg_dump/*.c | wc -l # 54

Note that there are no "RIGHT" nor "FULL" joins…

What is the need of using regress_pgbench_tap_1_ts in this test?

I wanted to check that tablespace options work appropriately with
partition tables, as I changed the create table stuff significantly, and
just using "pg_default" is kind of cheating.

I think your change will be tested if there is a '--tablespace'
option.

Yes. There is just one, really.

Even if you want to test win non-default tablespace, then also, adding
additional test would make more sense rather than changing existing one
which is testing a valid thing.

Tom tends to think that there are already too many tests, so I try to keep
them as compact/combined as possible. Moreover, the spirit of this test is
to cover "all possible options", so it made also sense to add the new
options there, and it achieves both coverage and testing my changes with
an explicit tablespace.

Also, there is an existing way to create tablespace location in
"src/bin/pg_checksums/t/002_actions". I think we can use the same. I
don't find any problem with your way, but why having multiple ways of
doing same thing in code. We need to test this on windows also once as
this involves some path creation which might vary, although I don't
think there should be any problem in that especially if we use existing
way.

Ok, I'll look at the pg_checksums way to do that.

- 'pgbench scale 1 initialization');
+ 'pgbench scale 1 initialization with options');
Similar to the above, it is not clear to me why we need to change this?
Because I noticed that it had the same description as the previous one,
so I made the test name distinct and more precise, while I was adding
options on it.

Hmmm. Keeping the same name is really a copy paste error, and I wanted to
avoid a distinct commit for more than very minor thing.

Good observation, but better be done separately. I think in general
the more unrelated changes are present in patch, the more time it
takes to review.

Then let's keep the same name.

One more comment:
+typedef enum { PART_NONE, PART_RANGE, PART_HASH, PART_UNKNOWN }
+  partition_method_t;
See, if we can eliminate PART_UNKNOWN. I don't see much use of same.
It is used at one place where we can set PART_NONE without much loss.
Having lesser invalid values makes code easier to follow.

Discussed in other mail.

--
Fabien.

#29

Amit Kapila

amit.kapila16@gmail.com

over 6 years ago

In reply to: Fabien COELHO (#27)

Re: pgbench - allow to create partitioned tables

On Tue, Sep 17, 2019 at 6:38 PM Fabien COELHO <coelho@cri.ensmp.fr> wrote:

It is used at one place where we can set PART_NONE without much loss.
Having lesser invalid values makes code easier to follow.
Looking more closely at this case:
+ else if (PQntuples(res) != 1)
+ {
+ /* unsure because multiple (or no) pgbench_accounts found */
+ partition_method = PART_UNKNOWN;
+ partitions = 0;
+ }
Is it ever possible to have multiple pgbench_accounts considering we
have unique index on (relname, relnamespace) for pg_class?
The issue is that it is not directly obvious which relnamespace will be
used by the queries which rely on non schema qualified "pgbench_accounts".

It seems to me the patch already uses namespace in the query, so this
should not be a problem here.  The part of query is as below:
+ res = PQexec(con,
+ "select p.partstrat, count(p.partrelid) "
+ "from pg_catalog.pg_class as c "

This uses pg_catalog, so it should not have multiple entries for
"pgbench_accounts".

--
With Regards,
Amit Kapila.
EnterpriseDB: http://www.enterprisedb.com

#30

Fabien COELHO

coelho@cri.ensmp.fr

over 6 years ago

In reply to: Amit Kapila (#29)

1 attachment(s)

Re: pgbench - allow to create partitioned tables

Attached v9:

- remove the PART_UNKNOWN and use partitions = -1 to tell
that there is an error, and partitions >= 1 to print info
- use search_path to find at most one pgbench_accounts
It still uses left join because I still think that it is appropriate.
I added a lateral to avoid repeating the array_position call
to manage the search_path, and use explicit pg_catalog everywhere.
- let the wrongly repeated test name as is
- somehow use pg_checksums tablespace creation method, however:
- I kept testing that mkdir succeeds
- I kept escaping single quotes, if the path contains a "'"
so the only difference is that on some msys platform it may
avoid some unclear issue.

--
Fabien.

Attachments:

pgbench-init-partitioned-9.patchtext/x-diff; name=pgbench-init-partitioned-9.patchDownload

diff --git a/doc/src/sgml/ref/pgbench.sgml b/doc/src/sgml/ref/pgbench.sgml
index c857aa3cba..e3a0abb4c7 100644
--- a/doc/src/sgml/ref/pgbench.sgml
+++ b/doc/src/sgml/ref/pgbench.sgml
@@ -306,6 +306,31 @@ pgbench <optional> <replaceable>options</replaceable> </optional> <replaceable>d
       </listitem>
      </varlistentry>
 
+     <varlistentry>
+      <term><option>--partitions=<replaceable>NUM</replaceable></option></term>
+      <listitem>
+       <para>
+        Create a partitioned <literal>pgbench_accounts</literal> table with
+        <replaceable>NUM</replaceable> partitions of nearly equal size for
+        the scaled number of accounts.
+        Default is <literal>0</literal>, meaning no partitioning.
+       </para>
+      </listitem>
+     </varlistentry>
+
+     <varlistentry>
+      <term><option>--partition-method=<replaceable>NAME</replaceable></option></term>
+      <listitem>
+       <para>
+        Create a partitioned <literal>pgbench_accounts</literal> table with
+        <replaceable>NAME</replaceable> method.
+        Expected values are <literal>range</literal> or <literal>hash</literal>.
+        This option requires that <option>--partitions</option> is set to non-zero.
+        If unspecified, default is <literal>range</literal>.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry>
       <term><option>--tablespace=<replaceable>tablespace</replaceable></option></term>
       <listitem>
diff --git a/src/bin/pgbench/pgbench.c b/src/bin/pgbench/pgbench.c
index ed7652bfbf..0385932208 100644
--- a/src/bin/pgbench/pgbench.c
+++ b/src/bin/pgbench/pgbench.c
@@ -186,6 +186,15 @@ int64		latency_limit = 0;
 char	   *tablespace = NULL;
 char	   *index_tablespace = NULL;
 
+/* partitioning for pgbench_accounts table, 0 for no partitioning, -1 for bad */
+static int 		partitions = 0;
+
+typedef enum { PART_NONE, PART_RANGE, PART_HASH }
+  partition_method_t;
+
+static partition_method_t partition_method = PART_NONE;
+static const char *PARTITION_METHOD[] = { "none", "range", "hash" };
+
 /* random seed used to initialize base_random_sequence */
 int64		random_seed = -1;
 
@@ -617,6 +626,9 @@ usage(void)
 		   "  --foreign-keys           create foreign key constraints between tables\n"
 		   "  --index-tablespace=TABLESPACE\n"
 		   "                           create indexes in the specified tablespace\n"
+		   "  --partitions=NUM         partition pgbench_accounts in NUM parts (default: 0)\n"
+		   "  --partition-method=(range|hash)\n"
+		   "                           partition pgbench_accounts with this method (default: range)\n"
 		   "  --tablespace=TABLESPACE  create tables in the specified tablespace\n"
 		   "  --unlogged-tables        create tables as unlogged tables\n"
 		   "\nOptions to select what to run:\n"
@@ -3601,6 +3613,17 @@ initDropTables(PGconn *con)
 					 "pgbench_tellers");
 }
 
+/*
+ * add fillfactor percent option if not 100.
+ */
+static void
+append_fillfactor(char *opts, int len)
+{
+	if (fillfactor < 100)
+		snprintf(opts + strlen(opts), len - strlen(opts),
+				 " with (fillfactor=%d)", fillfactor);
+}
+
 /*
  * Create pgbench's standard tables
  */
@@ -3664,9 +3687,15 @@ initCreateTables(PGconn *con)
 
 		/* Construct new create table statement. */
 		opts[0] = '\0';
-		if (ddl->declare_fillfactor)
+
+		/* Partition pgbench_accounts table */
+		if (partitions >= 1 && strcmp(ddl->table, "pgbench_accounts") == 0)
 			snprintf(opts + strlen(opts), sizeof(opts) - strlen(opts),
-					 " with (fillfactor=%d)", fillfactor);
+					 " partition by %s (aid)", PARTITION_METHOD[partition_method]);
+		else if (ddl->declare_fillfactor)
+			/* fillfactor is only expected on actual tables */
+			append_fillfactor(opts, sizeof(opts));
+
 		if (tablespace != NULL)
 		{
 			char	   *escape_tablespace;
@@ -3686,6 +3715,57 @@ initCreateTables(PGconn *con)
 
 		executeStatement(con, buffer);
 	}
+
+	/* if needed, pgbench_accounts partitions must be created manually */
+	if (partitions >= 1)
+	{
+		char		ff[64];
+
+		ff[0] = '\0';
+		append_fillfactor(ff, sizeof(ff));
+
+		fprintf(stderr, "creating %d partitions...\n", partitions);
+
+		for (int p = 1; p <= partitions; p++)
+		{
+			char		query[256];
+
+			if (partition_method == PART_RANGE)
+			{
+				int64		part_size = (naccounts * (int64) scale + partitions - 1) / partitions;
+				char		minvalue[32], maxvalue[32];
+
+				/* For RANGE, we use open-ended partitions at the beginning and end */
+				if (p == 1)
+					sprintf(minvalue, "minvalue");
+				else
+					sprintf(minvalue, INT64_FORMAT, (p - 1) * part_size + 1);
+
+				if (p < partitions)
+					sprintf(maxvalue, INT64_FORMAT, p * part_size + 1);
+				else
+					sprintf(maxvalue, "maxvalue");
+
+				snprintf(query, sizeof(query),
+						 "create%s table pgbench_accounts_%d\n"
+						 "  partition of pgbench_accounts\n"
+						 "  for values from (%s) to (%s)%s\n",
+						 unlogged_tables ? " unlogged" : "", p,
+						 minvalue, maxvalue, ff);
+			}
+			else if (partition_method == PART_HASH)
+				snprintf(query, sizeof(query),
+						 "create%s table pgbench_accounts_%d\n"
+						 "  partition of pgbench_accounts\n"
+						 "  for values with (modulus %d, remainder %d)%s\n",
+						 unlogged_tables ? " unlogged" : "", p,
+						 partitions, p-1, ff);
+			else /* cannot get there */
+				Assert(0);
+
+			executeStatement(con, query);
+		}
+	}
 }
 
 /*
@@ -4919,6 +4999,10 @@ printResults(StatsData *total, instr_time total_time,
 	printf("transaction type: %s\n",
 		   num_scripts == 1 ? sql_script[0].desc : "multiple scripts");
 	printf("scaling factor: %d\n", scale);
+	/* only print partitioning information if some partitioning was detected */
+	if (partitions >= 1)
+		printf("partition method: %s\npartitions: %d\n",
+				PARTITION_METHOD[partition_method], partitions);
 	printf("query mode: %s\n", QUERYMODE[querymode]);
 	printf("number of clients: %d\n", nclients);
 	printf("number of threads: %d\n", nthreads);
@@ -5126,6 +5210,8 @@ main(int argc, char **argv)
 		{"foreign-keys", no_argument, NULL, 8},
 		{"random-seed", required_argument, NULL, 9},
 		{"show-script", required_argument, NULL, 10},
+		{"partitions", required_argument, NULL, 11},
+		{"partition-method", required_argument, NULL, 12},
 		{NULL, 0, NULL, 0}
 	};
 
@@ -5486,6 +5572,29 @@ main(int argc, char **argv)
 					exit(0);
 				}
 				break;
+			case 11:			/* partitions */
+				initialization_option_set = true;
+				partitions = atoi(optarg);
+				if (partitions < 0)
+				{
+					fprintf(stderr, "invalid number of partitions: \"%s\"\n",
+							optarg);
+					exit(1);
+				}
+				break;
+			case 12:			/* partition-method */
+				initialization_option_set = true;
+				if (pg_strcasecmp(optarg, "range") == 0)
+					partition_method = PART_RANGE;
+				else if (pg_strcasecmp(optarg, "hash") == 0)
+					partition_method = PART_HASH;
+				else
+				{
+					fprintf(stderr, "invalid partition type, expecting \"range\" or \"hash\","
+							" got: \"%s\"\n", optarg);
+					exit(1);
+				}
+				break;
 			default:
 				fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
 				exit(1);
@@ -5567,6 +5676,16 @@ main(int argc, char **argv)
 			exit(1);
 		}
 
+		if (partitions == 0 && partition_method != PART_NONE)
+		{
+			fprintf(stderr, "--partition-method requires actual partitioning with --partitions\n");
+			exit(1);
+		}
+
+		/* set default method */
+		if (partitions > 0 && partition_method == PART_NONE)
+			partition_method = PART_RANGE;
+
 		if (initialize_steps == NULL)
 			initialize_steps = pg_strdup(DEFAULT_INIT_STEPS);
 
@@ -5756,6 +5875,53 @@ main(int argc, char **argv)
 			fprintf(stderr,
 					"scale option ignored, using count from pgbench_branches table (%d)\n",
 					scale);
+
+		/*
+		 * Partition information. Assume no partitioning on any failure, so as
+		 * to avoid failing on an older version.
+		 */
+		res = PQexec(con,
+					 "select o.n, p.partstrat, pg_catalog.count(p.partrelid) "
+					 "from pg_catalog.pg_class as c "
+					 "join pg_catalog.pg_namespace as n on (n.oid = c.relnamespace) "
+					 "cross join lateral (select pg_catalog.array_position(pg_catalog.current_schemas(true), n.nspname)) as o(n) "
+					 "left join pg_catalog.pg_partitioned_table as p on (p.partrelid = c.oid) "
+					 "left join pg_catalog.pg_inherits as i on (c.oid = i.inhparent) "
+					 /* right name and schema in search_path */
+					 "where c.relname = 'pgbench_accounts' and o.n is not null "
+					 "group by 1, 2 "
+					 "order by 1 asc "
+					 /* we keep the first encountered */
+					 "limit 1");
+		if (PQresultStatus(res) != PGRES_TUPLES_OK)
+		{
+			/* probably an older version, coldly assume no partitioning */
+			partition_method = PART_NONE;
+			partitions = 0;
+		}
+		else if (PQntuples(res) == 0)
+		{
+			/* no pgbench_accounts found, builtin script should fail later */
+			partition_method = PART_NONE;
+			partitions = -1;
+		}
+		else
+		{
+			/* PQntupes(res) == 1: normal case, extract the partition status */
+			char *ps = PQgetvalue(res, 0, 1);
+
+			if (ps == NULL)
+				partition_method = PART_NONE;
+			else if (strcmp(ps, "r") == 0)
+				partition_method = PART_RANGE;
+			else if (strcmp(ps, "h") == 0)
+				partition_method = PART_HASH;
+			else /* whatever */
+				partition_method = PART_NONE;
+
+			partitions = atoi(PQgetvalue(res, 0, 2));
+		}
+		PQclear(res);
 	}
 
 	/*
diff --git a/src/bin/pgbench/t/001_pgbench_with_server.pl b/src/bin/pgbench/t/001_pgbench_with_server.pl
index b82d3f65c4..fb0f6b677d 100644
--- a/src/bin/pgbench/t/001_pgbench_with_server.pl
+++ b/src/bin/pgbench/t/001_pgbench_with_server.pl
@@ -58,6 +58,17 @@ sub pgbench
 	return;
 }
 
+# tablespace for testing
+my $ts = $node->basedir . '/regress_pgbench_tap_1_ts_dir';
+mkdir $ts or die "cannot create directory $ts";
+my $ets = TestLib::perl2host($ts);
+# add needed escaping!
+$ets =~ s/'/''/;
+
+$node->safe_psql('postgres',
+	"CREATE TABLESPACE regress_pgbench_tap_1_ts LOCATION '$ets';"
+);
+
 # Test concurrent OID generation via pg_enum_oid_index.  This indirectly
 # exercises LWLock and spinlock concurrency.
 my $labels = join ',', map { "'l$_'" } 1 .. 1000;
@@ -100,12 +111,13 @@ pgbench(
 
 # Again, with all possible options
 pgbench(
-	'--initialize --init-steps=dtpvg --scale=1 --unlogged-tables --fillfactor=98 --foreign-keys --quiet --tablespace=pg_default --index-tablespace=pg_default',
+	'--initialize --init-steps=dtpvg --scale=1 --unlogged-tables --fillfactor=98 --foreign-keys --quiet --tablespace=regress_pgbench_tap_1_ts --index-tablespace=regress_pgbench_tap_1_ts --partitions=2 --partition-method=hash',
 	0,
 	[qr{^$}i],
 	[
 		qr{dropping old tables},
 		qr{creating tables},
+		qr{creating 2 partitions},
 		qr{vacuuming},
 		qr{creating primary keys},
 		qr{creating foreign keys},
@@ -116,12 +128,13 @@ pgbench(
 
 # Test interaction of --init-steps with legacy step-selection options
 pgbench(
-	'--initialize --init-steps=dtpvgvv --no-vacuum --foreign-keys --unlogged-tables',
+	'--initialize --init-steps=dtpvgvv --no-vacuum --foreign-keys --unlogged-tables --partitions=3',
 	0,
 	[qr{^$}],
 	[
 		qr{dropping old tables},
 		qr{creating tables},
+		qr{creating 3 partitions},
 		qr{creating primary keys},
 		qr{.* of .* tuples \(.*\) done},
 		qr{creating foreign keys},
@@ -909,6 +922,8 @@ pgbench(
 check_pgbench_logs($bdir, '001_pgbench_log_3', 1, 10, 10,
 	qr{^\d \d{1,2} \d+ \d \d+ \d+$});
 
+$node->safe_psql('postgres', 'DROP TABLESPACE regress_pgbench_tap_1_ts');
+
 # done
 $node->stop;
 done_testing();
diff --git a/src/bin/pgbench/t/002_pgbench_no_server.pl b/src/bin/pgbench/t/002_pgbench_no_server.pl
index f7fa18418b..998d814232 100644
--- a/src/bin/pgbench/t/002_pgbench_no_server.pl
+++ b/src/bin/pgbench/t/002_pgbench_no_server.pl
@@ -157,6 +157,13 @@ my @options = (
 			qr{error while setting random seed from --random-seed option}
 		]
 	],
+	[ 'bad partition type', '-i --partition-method=BAD', [qr{"range"}, qr{"hash"}, qr{"BAD"}] ],
+	[ 'bad partition number', '-i --partitions -1', [ qr{invalid number of partitions: "-1"} ] ],
+	[
+		'partition method without partitioning',
+		'-i --partition-method=hash',
+		[ qr{partition-method requires actual partitioning} ]
+	],
 
 	# logging sub-options
 	[

#31

Erikjan Rijkers

er@xs4all.nl

over 6 years ago

In reply to: Fabien COELHO (#30)

Re: pgbench - allow to create partitioned tables

On 2019-09-17 20:49, Fabien COELHO wrote:

Attached v9:

[pgbench-init-partitioned-9.patch]

Turns out this patch needed a dos2unix treatment.

It's easy to do but it takes time to figure it out (I'm dumb). I for
one would be happy to receive patches not so encumbered :)

thanks,

Erik Rijkers

#32

Fabien COELHO

coelho@cri.ensmp.fr

over 6 years ago

In reply to: Erikjan Rijkers (#31)

Re: pgbench - allow to create partitioned tables

Hello Erikjan,

[pgbench-init-partitioned-9.patch]

Turns out this patch needed a dos2unix treatment.

It's easy to do but it takes time to figure it out (I'm dumb). I for one
would be happy to receive patches not so encumbered :)

AFAICR this is usually because your mailer does not conform to MIME spec,
which *requires* that text files be sent over with \r\n terminations, so
my mailer does it for text/x-diff, and your mailer should translate back
EOL for your platform, but it does not, so you have to do it manually.

I could edit my /etc/mime.types file to switch patch files to some binary
mime type, but it may have side effects on my system, so I refrained.

Hoping that mailer writers read and conform to MIME seems desperate.

Last time this discussion occured there was no obvious solution beside me
switching to another bug-compatible mailer, but this is not really
convenient for me. ISTM that the "patch" command accepts these files with
warnings.

--
Fabien.

#33

Amit Langote

amitlangote09@gmail.com

over 6 years ago

In reply to: Fabien COELHO (#30)

Re: pgbench - allow to create partitioned tables

Hi Fabien,

On Wed, Sep 18, 2019 at 3:49 AM Fabien COELHO <coelho@cri.ensmp.fr> wrote:

Attached v9:

Thanks. This seems to work well.

Couple of nitpicks on parameter error messages.

+ fprintf(stderr, "invalid partition type,
expecting \"range\" or \"hash\","

How about "partitioning method" instead of "partition type"?

+ fprintf(stderr, "--partition-method requires actual
partitioning with --partitions\n");

Assuming that this error message is to direct the user to fix a
mistake they might have inadvertently made in specifying --partitions,
I don't think the message is very clear. How about:

"--partition-method requires --partitions to be greater than zero"

but this wording might suggest to some users that some partitioning
methods do allow zero partitions. So, maybe:

"specifying --partition-method requires --partitions to be greater than zero"

Thanks,
Amit

#34

Fabien COELHO

coelho@cri.ensmp.fr

over 6 years ago

In reply to: Amit Langote (#33)

1 attachment(s)

Re: pgbench - allow to create partitioned tables

Hello Amit,

+ fprintf(stderr, "invalid partition type,
expecting \"range\" or \"hash\","

How about "partitioning method" instead of "partition type"?

Indeed, this is a left over from a previous version.

+ fprintf(stderr, "--partition-method requires actual
partitioning with --partitions\n");

[...] "--partition-method requires --partitions to be greater than zero"

I think the first suggestion is clear enough. I've put a shorter variant
in the same spirit:

"--partitions-method requires greater than zero --partitions"

Attached v10 fixes both messages.

--
Fabien.

Attachments:

pgbench-init-partitioned-10.patchtext/x-diff; name=pgbench-init-partitioned-10.patchDownload

diff --git a/doc/src/sgml/ref/pgbench.sgml b/doc/src/sgml/ref/pgbench.sgml
index c857aa3cba..e3a0abb4c7 100644
--- a/doc/src/sgml/ref/pgbench.sgml
+++ b/doc/src/sgml/ref/pgbench.sgml
@@ -306,6 +306,31 @@ pgbench <optional> <replaceable>options</replaceable> </optional> <replaceable>d
       </listitem>
      </varlistentry>
 
+     <varlistentry>
+      <term><option>--partitions=<replaceable>NUM</replaceable></option></term>
+      <listitem>
+       <para>
+        Create a partitioned <literal>pgbench_accounts</literal> table with
+        <replaceable>NUM</replaceable> partitions of nearly equal size for
+        the scaled number of accounts.
+        Default is <literal>0</literal>, meaning no partitioning.
+       </para>
+      </listitem>
+     </varlistentry>
+
+     <varlistentry>
+      <term><option>--partition-method=<replaceable>NAME</replaceable></option></term>
+      <listitem>
+       <para>
+        Create a partitioned <literal>pgbench_accounts</literal> table with
+        <replaceable>NAME</replaceable> method.
+        Expected values are <literal>range</literal> or <literal>hash</literal>.
+        This option requires that <option>--partitions</option> is set to non-zero.
+        If unspecified, default is <literal>range</literal>.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry>
       <term><option>--tablespace=<replaceable>tablespace</replaceable></option></term>
       <listitem>
diff --git a/src/bin/pgbench/pgbench.c b/src/bin/pgbench/pgbench.c
index ed7652bfbf..dd5bb5c215 100644
--- a/src/bin/pgbench/pgbench.c
+++ b/src/bin/pgbench/pgbench.c
@@ -186,6 +186,15 @@ int64		latency_limit = 0;
 char	   *tablespace = NULL;
 char	   *index_tablespace = NULL;
 
+/* partitioning for pgbench_accounts table, 0 for no partitioning, -1 for bad */
+static int 		partitions = 0;
+
+typedef enum { PART_NONE, PART_RANGE, PART_HASH }
+  partition_method_t;
+
+static partition_method_t partition_method = PART_NONE;
+static const char *PARTITION_METHOD[] = { "none", "range", "hash" };
+
 /* random seed used to initialize base_random_sequence */
 int64		random_seed = -1;
 
@@ -617,6 +626,9 @@ usage(void)
 		   "  --foreign-keys           create foreign key constraints between tables\n"
 		   "  --index-tablespace=TABLESPACE\n"
 		   "                           create indexes in the specified tablespace\n"
+		   "  --partitions=NUM         partition pgbench_accounts in NUM parts (default: 0)\n"
+		   "  --partition-method=(range|hash)\n"
+		   "                           partition pgbench_accounts with this method (default: range)\n"
 		   "  --tablespace=TABLESPACE  create tables in the specified tablespace\n"
 		   "  --unlogged-tables        create tables as unlogged tables\n"
 		   "\nOptions to select what to run:\n"
@@ -3601,6 +3613,17 @@ initDropTables(PGconn *con)
 					 "pgbench_tellers");
 }
 
+/*
+ * add fillfactor percent option if not 100.
+ */
+static void
+append_fillfactor(char *opts, int len)
+{
+	if (fillfactor < 100)
+		snprintf(opts + strlen(opts), len - strlen(opts),
+				 " with (fillfactor=%d)", fillfactor);
+}
+
 /*
  * Create pgbench's standard tables
  */
@@ -3664,9 +3687,15 @@ initCreateTables(PGconn *con)
 
 		/* Construct new create table statement. */
 		opts[0] = '\0';
-		if (ddl->declare_fillfactor)
+
+		/* Partition pgbench_accounts table */
+		if (partitions >= 1 && strcmp(ddl->table, "pgbench_accounts") == 0)
 			snprintf(opts + strlen(opts), sizeof(opts) - strlen(opts),
-					 " with (fillfactor=%d)", fillfactor);
+					 " partition by %s (aid)", PARTITION_METHOD[partition_method]);
+		else if (ddl->declare_fillfactor)
+			/* fillfactor is only expected on actual tables */
+			append_fillfactor(opts, sizeof(opts));
+
 		if (tablespace != NULL)
 		{
 			char	   *escape_tablespace;
@@ -3686,6 +3715,57 @@ initCreateTables(PGconn *con)
 
 		executeStatement(con, buffer);
 	}
+
+	/* if needed, pgbench_accounts partitions must be created manually */
+	if (partitions >= 1)
+	{
+		char		ff[64];
+
+		ff[0] = '\0';
+		append_fillfactor(ff, sizeof(ff));
+
+		fprintf(stderr, "creating %d partitions...\n", partitions);
+
+		for (int p = 1; p <= partitions; p++)
+		{
+			char		query[256];
+
+			if (partition_method == PART_RANGE)
+			{
+				int64		part_size = (naccounts * (int64) scale + partitions - 1) / partitions;
+				char		minvalue[32], maxvalue[32];
+
+				/* For RANGE, we use open-ended partitions at the beginning and end */
+				if (p == 1)
+					sprintf(minvalue, "minvalue");
+				else
+					sprintf(minvalue, INT64_FORMAT, (p - 1) * part_size + 1);
+
+				if (p < partitions)
+					sprintf(maxvalue, INT64_FORMAT, p * part_size + 1);
+				else
+					sprintf(maxvalue, "maxvalue");
+
+				snprintf(query, sizeof(query),
+						 "create%s table pgbench_accounts_%d\n"
+						 "  partition of pgbench_accounts\n"
+						 "  for values from (%s) to (%s)%s\n",
+						 unlogged_tables ? " unlogged" : "", p,
+						 minvalue, maxvalue, ff);
+			}
+			else if (partition_method == PART_HASH)
+				snprintf(query, sizeof(query),
+						 "create%s table pgbench_accounts_%d\n"
+						 "  partition of pgbench_accounts\n"
+						 "  for values with (modulus %d, remainder %d)%s\n",
+						 unlogged_tables ? " unlogged" : "", p,
+						 partitions, p-1, ff);
+			else /* cannot get there */
+				Assert(0);
+
+			executeStatement(con, query);
+		}
+	}
 }
 
 /*
@@ -4919,6 +4999,10 @@ printResults(StatsData *total, instr_time total_time,
 	printf("transaction type: %s\n",
 		   num_scripts == 1 ? sql_script[0].desc : "multiple scripts");
 	printf("scaling factor: %d\n", scale);
+	/* only print partitioning information if some partitioning was detected */
+	if (partitions >= 1)
+		printf("partition method: %s\npartitions: %d\n",
+				PARTITION_METHOD[partition_method], partitions);
 	printf("query mode: %s\n", QUERYMODE[querymode]);
 	printf("number of clients: %d\n", nclients);
 	printf("number of threads: %d\n", nthreads);
@@ -5126,6 +5210,8 @@ main(int argc, char **argv)
 		{"foreign-keys", no_argument, NULL, 8},
 		{"random-seed", required_argument, NULL, 9},
 		{"show-script", required_argument, NULL, 10},
+		{"partitions", required_argument, NULL, 11},
+		{"partition-method", required_argument, NULL, 12},
 		{NULL, 0, NULL, 0}
 	};
 
@@ -5486,6 +5572,29 @@ main(int argc, char **argv)
 					exit(0);
 				}
 				break;
+			case 11:			/* partitions */
+				initialization_option_set = true;
+				partitions = atoi(optarg);
+				if (partitions < 0)
+				{
+					fprintf(stderr, "invalid number of partitions: \"%s\"\n",
+							optarg);
+					exit(1);
+				}
+				break;
+			case 12:			/* partition-method */
+				initialization_option_set = true;
+				if (pg_strcasecmp(optarg, "range") == 0)
+					partition_method = PART_RANGE;
+				else if (pg_strcasecmp(optarg, "hash") == 0)
+					partition_method = PART_HASH;
+				else
+				{
+					fprintf(stderr, "invalid partition method, expecting \"range\" or \"hash\","
+							" got: \"%s\"\n", optarg);
+					exit(1);
+				}
+				break;
 			default:
 				fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
 				exit(1);
@@ -5567,6 +5676,16 @@ main(int argc, char **argv)
 			exit(1);
 		}
 
+		if (partitions == 0 && partition_method != PART_NONE)
+		{
+			fprintf(stderr, "--partition-method requires greater than zero --partitions\n");
+			exit(1);
+		}
+
+		/* set default method */
+		if (partitions > 0 && partition_method == PART_NONE)
+			partition_method = PART_RANGE;
+
 		if (initialize_steps == NULL)
 			initialize_steps = pg_strdup(DEFAULT_INIT_STEPS);
 
@@ -5756,6 +5875,53 @@ main(int argc, char **argv)
 			fprintf(stderr,
 					"scale option ignored, using count from pgbench_branches table (%d)\n",
 					scale);
+
+		/*
+		 * Partition information. Assume no partitioning on any failure, so as
+		 * to avoid failing on an older version.
+		 */
+		res = PQexec(con,
+					 "select o.n, p.partstrat, pg_catalog.count(p.partrelid) "
+					 "from pg_catalog.pg_class as c "
+					 "join pg_catalog.pg_namespace as n on (n.oid = c.relnamespace) "
+					 "cross join lateral (select pg_catalog.array_position(pg_catalog.current_schemas(true), n.nspname)) as o(n) "
+					 "left join pg_catalog.pg_partitioned_table as p on (p.partrelid = c.oid) "
+					 "left join pg_catalog.pg_inherits as i on (c.oid = i.inhparent) "
+					 /* right name and schema in search_path */
+					 "where c.relname = 'pgbench_accounts' and o.n is not null "
+					 "group by 1, 2 "
+					 "order by 1 asc "
+					 /* we keep the first encountered */
+					 "limit 1");
+		if (PQresultStatus(res) != PGRES_TUPLES_OK)
+		{
+			/* probably an older version, coldly assume no partitioning */
+			partition_method = PART_NONE;
+			partitions = 0;
+		}
+		else if (PQntuples(res) == 0)
+		{
+			/* no pgbench_accounts found, builtin script should fail later */
+			partition_method = PART_NONE;
+			partitions = -1;
+		}
+		else
+		{
+			/* PQntupes(res) == 1: normal case, extract the partition status */
+			char *ps = PQgetvalue(res, 0, 1);
+
+			if (ps == NULL)
+				partition_method = PART_NONE;
+			else if (strcmp(ps, "r") == 0)
+				partition_method = PART_RANGE;
+			else if (strcmp(ps, "h") == 0)
+				partition_method = PART_HASH;
+			else /* whatever */
+				partition_method = PART_NONE;
+
+			partitions = atoi(PQgetvalue(res, 0, 2));
+		}
+		PQclear(res);
 	}
 
 	/*
diff --git a/src/bin/pgbench/t/001_pgbench_with_server.pl b/src/bin/pgbench/t/001_pgbench_with_server.pl
index b82d3f65c4..fb0f6b677d 100644
--- a/src/bin/pgbench/t/001_pgbench_with_server.pl
+++ b/src/bin/pgbench/t/001_pgbench_with_server.pl
@@ -58,6 +58,17 @@ sub pgbench
 	return;
 }
 
+# tablespace for testing
+my $ts = $node->basedir . '/regress_pgbench_tap_1_ts_dir';
+mkdir $ts or die "cannot create directory $ts";
+my $ets = TestLib::perl2host($ts);
+# add needed escaping!
+$ets =~ s/'/''/;
+
+$node->safe_psql('postgres',
+	"CREATE TABLESPACE regress_pgbench_tap_1_ts LOCATION '$ets';"
+);
+
 # Test concurrent OID generation via pg_enum_oid_index.  This indirectly
 # exercises LWLock and spinlock concurrency.
 my $labels = join ',', map { "'l$_'" } 1 .. 1000;
@@ -100,12 +111,13 @@ pgbench(
 
 # Again, with all possible options
 pgbench(
-	'--initialize --init-steps=dtpvg --scale=1 --unlogged-tables --fillfactor=98 --foreign-keys --quiet --tablespace=pg_default --index-tablespace=pg_default',
+	'--initialize --init-steps=dtpvg --scale=1 --unlogged-tables --fillfactor=98 --foreign-keys --quiet --tablespace=regress_pgbench_tap_1_ts --index-tablespace=regress_pgbench_tap_1_ts --partitions=2 --partition-method=hash',
 	0,
 	[qr{^$}i],
 	[
 		qr{dropping old tables},
 		qr{creating tables},
+		qr{creating 2 partitions},
 		qr{vacuuming},
 		qr{creating primary keys},
 		qr{creating foreign keys},
@@ -116,12 +128,13 @@ pgbench(
 
 # Test interaction of --init-steps with legacy step-selection options
 pgbench(
-	'--initialize --init-steps=dtpvgvv --no-vacuum --foreign-keys --unlogged-tables',
+	'--initialize --init-steps=dtpvgvv --no-vacuum --foreign-keys --unlogged-tables --partitions=3',
 	0,
 	[qr{^$}],
 	[
 		qr{dropping old tables},
 		qr{creating tables},
+		qr{creating 3 partitions},
 		qr{creating primary keys},
 		qr{.* of .* tuples \(.*\) done},
 		qr{creating foreign keys},
@@ -909,6 +922,8 @@ pgbench(
 check_pgbench_logs($bdir, '001_pgbench_log_3', 1, 10, 10,
 	qr{^\d \d{1,2} \d+ \d \d+ \d+$});
 
+$node->safe_psql('postgres', 'DROP TABLESPACE regress_pgbench_tap_1_ts');
+
 # done
 $node->stop;
 done_testing();
diff --git a/src/bin/pgbench/t/002_pgbench_no_server.pl b/src/bin/pgbench/t/002_pgbench_no_server.pl
index f7fa18418b..1e9542af3f 100644
--- a/src/bin/pgbench/t/002_pgbench_no_server.pl
+++ b/src/bin/pgbench/t/002_pgbench_no_server.pl
@@ -157,6 +157,13 @@ my @options = (
 			qr{error while setting random seed from --random-seed option}
 		]
 	],
+	[ 'bad partition type', '-i --partition-method=BAD', [qr{"range"}, qr{"hash"}, qr{"BAD"}] ],
+	[ 'bad partition number', '-i --partitions -1', [ qr{invalid number of partitions: "-1"} ] ],
+	[
+		'partition method without partitioning',
+		'-i --partition-method=hash',
+		[ qr{partition-method requires greater than zero --partitions} ]
+	],
 
 	# logging sub-options
 	[

#35

Dilip Kumar

dilipbalaut@gmail.com

over 6 years ago

In reply to: Fabien COELHO (#34)

Re: pgbench - allow to create partitioned tables

On Wed, Sep 18, 2019 at 1:02 PM Fabien COELHO <coelho@cri.ensmp.fr> wrote:

*/
+ res = PQexec(con,
+ "select o.n, p.partstrat, pg_catalog.count(p.partrelid) "
+ "from pg_catalog.pg_class as c "
+ "join pg_catalog.pg_namespace as n on (n.oid = c.relnamespace) "
+ "cross join lateral (select
pg_catalog.array_position(pg_catalog.current_schemas(true),
n.nspname)) as o(n) "
+ "left join pg_catalog.pg_partitioned_table as p on (p.partrelid = c.oid) "
+ "left join pg_catalog.pg_inherits as i on (c.oid = i.inhparent) "
+ /* right name and schema in search_path */
+ "where c.relname = 'pgbench_accounts' and o.n is not null "
+ "group by 1, 2 "
+ "order by 1 asc "

I have a question, wouldn't it be sufficient to just group by 1? Are
you expecting multiple pgbench_account tables partitioned by different
strategy under the same schema?

--
Regards,
Dilip Kumar
EnterpriseDB: http://www.enterprisedb.com

#36

Amit Kapila

amit.kapila16@gmail.com

over 6 years ago

In reply to: Fabien COELHO (#30)

Re: pgbench - allow to create partitioned tables

On Wed, Sep 18, 2019 at 12:19 AM Fabien COELHO <coelho@cri.ensmp.fr> wrote:

Attached v9:

- remove the PART_UNKNOWN and use partitions = -1 to tell
that there is an error, and partitions >= 1 to print info
- use search_path to find at most one pgbench_accounts
It still uses left join because I still think that it is appropriate.
I added a lateral to avoid repeating the array_position call
to manage the search_path, and use explicit pg_catalog everywhere.

It would be good if you can add some more comments to explain the
intent of query.

Few more comments:
*
else
+ {
+ /* PQntupes(res) == 1: normal case, extract the partition status */
+ char *ps = PQgetvalue(res, 0, 1);
+
+ if (ps == NULL)
+ partition_method = PART_NONE;

When can we expect ps as NULL? If this is not a valid case, then
probably and Assert would be better.

*
+ else if (PQntuples(res) == 0)
+ {
+ /* no pgbench_accounts found, builtin script should fail later */
+ partition_method = PART_NONE;
+ partitions = -1;
+ }

If we don't find pgbench_accounts, let's give error here itself rather
than later unless you have a valid case in mind.

*
+
+ /*
+ * Partition information. Assume no partitioning on any failure, so as
+ * to avoid failing on an older version.
+ */
..
+ if (PQresultStatus(res) != PGRES_TUPLES_OK)
+ {
+ /* probably an older version, coldly assume no partitioning */
+ partition_method = PART_NONE;
+ partitions = 0;
+ }

So, here we are silently absorbing the error when pgbench is executed
against older server version which doesn't support partitioning. If
that is the case, then I think if user gives --partitions for the old
server version, it will also give an error? It is not clear in
documentation whether we support or not using pgbench with older
server versions. I guess it didn't matter, but with this feature, it
can matter. Do we need to document this?

--
With Regards,
Amit Kapila.
EnterpriseDB: http://www.enterprisedb.com

#37

Fabien COELHO

coelho@cri.ensmp.fr

over 6 years ago

In reply to: Dilip Kumar (#35)

Re: pgbench - allow to create partitioned tables

+ "group by 1, 2 "

I have a question, wouldn't it be sufficient to just group by 1?

Conceptually yes, it is what is happening in practice, but SQL requires
that non aggregated columns must appear explicitely in the GROUP BY
clause, so I have to put it even if it will not change groups.

--
Fabien.

#38

Fabien COELHO

coelho@cri.ensmp.fr

over 6 years ago

In reply to: Amit Kapila (#36)

1 attachment(s)

Re: pgbench - allow to create partitioned tables

Hello Amit,

- use search_path to find at most one pgbench_accounts
It still uses left join because I still think that it is appropriate.
I added a lateral to avoid repeating the array_position call
to manage the search_path, and use explicit pg_catalog everywhere.

It would be good if you can add some more comments to explain the
intent of query.

Indeed, I put too few comments on the query.

+ if (ps == NULL)
+ partition_method = PART_NONE;

When can we expect ps as NULL? If this is not a valid case, then
probably and Assert would be better.

No, ps is really NULL if there is no partitioning, because of the LEFT
JOIN and pg_partitioned_table is just empty in that case.

The last else where there is an unexpected entry is different, see
comments about v11 below.

+ else if (PQntuples(res) == 0)
+ {
+ /* no pgbench_accounts found, builtin script should fail later */
+ partition_method = PART_NONE;
+ partitions = -1;
If we don't find pgbench_accounts, let's give error here itself rather
than later unless you have a valid case in mind.

I thought of it, but decided not to: Someone could add a builtin script
which does not use pgbench_accounts, or a parallel running script could
create a table dynamically, whatever, so I prefer the error to be raised
by the script itself, rather than deciding that it will fail before even
trying.

+ /*
+ * Partition information. Assume no partitioning on any failure, so as
+ * to avoid failing on an older version.
+ */
..
+ if (PQresultStatus(res) != PGRES_TUPLES_OK)
+ {
+ /* probably an older version, coldly assume no partitioning */
+ partition_method = PART_NONE;
+ partitions = 0;
+ }
So, here we are silently absorbing the error when pgbench is executed
against older server version which doesn't support partitioning.

Yes, exactly.

If that is the case, then I think if user gives --partitions for the old
server version, it will also give an error?

Yes, on -i it will fail because the syntax will not be recognized.

It is not clear in documentation whether we support or not using pgbench
with older server versions.

Indeed. We more or less do in practice. Command "psql" works back to 8
AFAICR, and pgbench as well.

I guess it didn't matter, but with this feature, it can matter. Do we
need to document this?

This has been discussed in the past, and the conclusion was that it was
not worth the effort. We just try not to break things if it is avoidable.
On this regard, the patch slightly changes FILLFACTOR output, which is
removed if the value is 100 (%) as it is the default, which means that
table creation would work on very very old version which did not support
fillfactor, unless you specify a lower percentage.

Attached v11:

- add quite a few comments on the pg_catalog query

- reverts the partitions >= 1 test; If some new partition method is
added that pgbench does not know about, the failure mode will be that
nothing is printed rather than printing something strange like
"method none with 2 partitions".

--
Fabien.

Attachments:

pgbench-init-partitioned-11.patchtext/x-diff; name=pgbench-init-partitioned-11.patchDownload

diff --git a/doc/src/sgml/ref/pgbench.sgml b/doc/src/sgml/ref/pgbench.sgml
index c857aa3cba..e3a0abb4c7 100644
--- a/doc/src/sgml/ref/pgbench.sgml
+++ b/doc/src/sgml/ref/pgbench.sgml
@@ -306,6 +306,31 @@ pgbench <optional> <replaceable>options</replaceable> </optional> <replaceable>d
       </listitem>
      </varlistentry>
 
+     <varlistentry>
+      <term><option>--partitions=<replaceable>NUM</replaceable></option></term>
+      <listitem>
+       <para>
+        Create a partitioned <literal>pgbench_accounts</literal> table with
+        <replaceable>NUM</replaceable> partitions of nearly equal size for
+        the scaled number of accounts.
+        Default is <literal>0</literal>, meaning no partitioning.
+       </para>
+      </listitem>
+     </varlistentry>
+
+     <varlistentry>
+      <term><option>--partition-method=<replaceable>NAME</replaceable></option></term>
+      <listitem>
+       <para>
+        Create a partitioned <literal>pgbench_accounts</literal> table with
+        <replaceable>NAME</replaceable> method.
+        Expected values are <literal>range</literal> or <literal>hash</literal>.
+        This option requires that <option>--partitions</option> is set to non-zero.
+        If unspecified, default is <literal>range</literal>.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry>
       <term><option>--tablespace=<replaceable>tablespace</replaceable></option></term>
       <listitem>
diff --git a/src/bin/pgbench/pgbench.c b/src/bin/pgbench/pgbench.c
index ed7652bfbf..c07ed42bbb 100644
--- a/src/bin/pgbench/pgbench.c
+++ b/src/bin/pgbench/pgbench.c
@@ -186,6 +186,15 @@ int64		latency_limit = 0;
 char	   *tablespace = NULL;
 char	   *index_tablespace = NULL;
 
+/* partitioning for pgbench_accounts table, 0 for no partitioning, -1 for bad */
+static int 		partitions = 0;
+
+typedef enum { PART_NONE, PART_RANGE, PART_HASH }
+  partition_method_t;
+
+static partition_method_t partition_method = PART_NONE;
+static const char *PARTITION_METHOD[] = { "none", "range", "hash" };
+
 /* random seed used to initialize base_random_sequence */
 int64		random_seed = -1;
 
@@ -617,6 +626,9 @@ usage(void)
 		   "  --foreign-keys           create foreign key constraints between tables\n"
 		   "  --index-tablespace=TABLESPACE\n"
 		   "                           create indexes in the specified tablespace\n"
+		   "  --partitions=NUM         partition pgbench_accounts in NUM parts (default: 0)\n"
+		   "  --partition-method=(range|hash)\n"
+		   "                           partition pgbench_accounts with this method (default: range)\n"
 		   "  --tablespace=TABLESPACE  create tables in the specified tablespace\n"
 		   "  --unlogged-tables        create tables as unlogged tables\n"
 		   "\nOptions to select what to run:\n"
@@ -3601,6 +3613,17 @@ initDropTables(PGconn *con)
 					 "pgbench_tellers");
 }
 
+/*
+ * add fillfactor percent option if not 100.
+ */
+static void
+append_fillfactor(char *opts, int len)
+{
+	if (fillfactor < 100)
+		snprintf(opts + strlen(opts), len - strlen(opts),
+				 " with (fillfactor=%d)", fillfactor);
+}
+
 /*
  * Create pgbench's standard tables
  */
@@ -3664,9 +3687,15 @@ initCreateTables(PGconn *con)
 
 		/* Construct new create table statement. */
 		opts[0] = '\0';
-		if (ddl->declare_fillfactor)
+
+		/* Partition pgbench_accounts table */
+		if (partitions >= 1 && strcmp(ddl->table, "pgbench_accounts") == 0)
 			snprintf(opts + strlen(opts), sizeof(opts) - strlen(opts),
-					 " with (fillfactor=%d)", fillfactor);
+					 " partition by %s (aid)", PARTITION_METHOD[partition_method]);
+		else if (ddl->declare_fillfactor)
+			/* fillfactor is only expected on actual tables */
+			append_fillfactor(opts, sizeof(opts));
+
 		if (tablespace != NULL)
 		{
 			char	   *escape_tablespace;
@@ -3686,6 +3715,57 @@ initCreateTables(PGconn *con)
 
 		executeStatement(con, buffer);
 	}
+
+	/* if needed, pgbench_accounts partitions must be created manually */
+	if (partitions >= 1)
+	{
+		char		ff[64];
+
+		ff[0] = '\0';
+		append_fillfactor(ff, sizeof(ff));
+
+		fprintf(stderr, "creating %d partitions...\n", partitions);
+
+		for (int p = 1; p <= partitions; p++)
+		{
+			char		query[256];
+
+			if (partition_method == PART_RANGE)
+			{
+				int64		part_size = (naccounts * (int64) scale + partitions - 1) / partitions;
+				char		minvalue[32], maxvalue[32];
+
+				/* For RANGE, we use open-ended partitions at the beginning and end */
+				if (p == 1)
+					sprintf(minvalue, "minvalue");
+				else
+					sprintf(minvalue, INT64_FORMAT, (p - 1) * part_size + 1);
+
+				if (p < partitions)
+					sprintf(maxvalue, INT64_FORMAT, p * part_size + 1);
+				else
+					sprintf(maxvalue, "maxvalue");
+
+				snprintf(query, sizeof(query),
+						 "create%s table pgbench_accounts_%d\n"
+						 "  partition of pgbench_accounts\n"
+						 "  for values from (%s) to (%s)%s\n",
+						 unlogged_tables ? " unlogged" : "", p,
+						 minvalue, maxvalue, ff);
+			}
+			else if (partition_method == PART_HASH)
+				snprintf(query, sizeof(query),
+						 "create%s table pgbench_accounts_%d\n"
+						 "  partition of pgbench_accounts\n"
+						 "  for values with (modulus %d, remainder %d)%s\n",
+						 unlogged_tables ? " unlogged" : "", p,
+						 partitions, p-1, ff);
+			else /* cannot get there */
+				Assert(0);
+
+			executeStatement(con, query);
+		}
+	}
 }
 
 /*
@@ -4919,6 +4999,10 @@ printResults(StatsData *total, instr_time total_time,
 	printf("transaction type: %s\n",
 		   num_scripts == 1 ? sql_script[0].desc : "multiple scripts");
 	printf("scaling factor: %d\n", scale);
+	/* only print partitioning information if some partitioning was detected */
+	if (partition_method != PART_NONE)
+		printf("partition method: %s\npartitions: %d\n",
+				PARTITION_METHOD[partition_method], partitions);
 	printf("query mode: %s\n", QUERYMODE[querymode]);
 	printf("number of clients: %d\n", nclients);
 	printf("number of threads: %d\n", nthreads);
@@ -5126,6 +5210,8 @@ main(int argc, char **argv)
 		{"foreign-keys", no_argument, NULL, 8},
 		{"random-seed", required_argument, NULL, 9},
 		{"show-script", required_argument, NULL, 10},
+		{"partitions", required_argument, NULL, 11},
+		{"partition-method", required_argument, NULL, 12},
 		{NULL, 0, NULL, 0}
 	};
 
@@ -5486,6 +5572,29 @@ main(int argc, char **argv)
 					exit(0);
 				}
 				break;
+			case 11:			/* partitions */
+				initialization_option_set = true;
+				partitions = atoi(optarg);
+				if (partitions < 0)
+				{
+					fprintf(stderr, "invalid number of partitions: \"%s\"\n",
+							optarg);
+					exit(1);
+				}
+				break;
+			case 12:			/* partition-method */
+				initialization_option_set = true;
+				if (pg_strcasecmp(optarg, "range") == 0)
+					partition_method = PART_RANGE;
+				else if (pg_strcasecmp(optarg, "hash") == 0)
+					partition_method = PART_HASH;
+				else
+				{
+					fprintf(stderr, "invalid partition method, expecting \"range\" or \"hash\","
+							" got: \"%s\"\n", optarg);
+					exit(1);
+				}
+				break;
 			default:
 				fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
 				exit(1);
@@ -5567,6 +5676,16 @@ main(int argc, char **argv)
 			exit(1);
 		}
 
+		if (partitions == 0 && partition_method != PART_NONE)
+		{
+			fprintf(stderr, "--partition-method requires greater than zero --partitions\n");
+			exit(1);
+		}
+
+		/* set default method */
+		if (partitions > 0 && partition_method == PART_NONE)
+			partition_method = PART_RANGE;
+
 		if (initialize_steps == NULL)
 			initialize_steps = pg_strdup(DEFAULT_INIT_STEPS);
 
@@ -5756,6 +5875,61 @@ main(int argc, char **argv)
 			fprintf(stderr,
 					"scale option ignored, using count from pgbench_branches table (%d)\n",
 					scale);
+
+		/*
+		 * Gather partition information from pg_catalog.
+		 *
+		 * We Assume no partitioning on any failure, so as to avoid failing
+		 * on an older version.
+		 */
+		res = PQexec(con,
+					 "select o.n, p.partstrat, pg_catalog.count(p.partrelid) "
+					 /* for all tables */
+					 "from pg_catalog.pg_class as c "
+					 /* get the schema corresponding to the previous table */
+					 "join pg_catalog.pg_namespace as n on (n.oid = c.relnamespace) "
+					 /* get this schema order in search_path */
+					 "cross join lateral (select pg_catalog.array_position(pg_catalog.current_schemas(true), n.nspname)) as o(n) "
+					 /* check whether it is partitionned */
+					 "left join pg_catalog.pg_partitioned_table as p on (p.partrelid = c.oid) "
+					 /* fetch actual partitions which inherits the main table */
+					 "left join pg_catalog.pg_inherits as i on (c.oid = i.inhparent) "
+					 /* check table name and that schema was in search_path */
+					 "where c.relname = 'pgbench_accounts' and o.n is not null "
+					 /* count partitions, possibly 0 */
+					 "group by 1, 2 "
+					 /* and only keep the first encountered */
+					 "order by 1 asc "
+					 "limit 1");
+		if (PQresultStatus(res) != PGRES_TUPLES_OK)
+		{
+			/* probably an older version, coldly assume no partitioning */
+			partition_method = PART_NONE;
+			partitions = 0;
+		}
+		else if (PQntuples(res) == 0)
+		{
+			/* no pgbench_accounts found, builtin script should fail later */
+			partition_method = PART_NONE;
+			partitions = -1;
+		}
+		else
+		{
+			/* PQntupes(res) == 1: normal case, extract the partition status */
+			char *ps = PQgetvalue(res, 0, 1);
+
+			if (ps == NULL)
+				partition_method = PART_NONE;
+			else if (strcmp(ps, "r") == 0)
+				partition_method = PART_RANGE;
+			else if (strcmp(ps, "h") == 0)
+				partition_method = PART_HASH;
+			else /* unexpected partitioning method, ignore it... */
+				partition_method = PART_NONE;
+
+			partitions = atoi(PQgetvalue(res, 0, 2));
+		}
+		PQclear(res);
 	}
 
 	/*
diff --git a/src/bin/pgbench/t/001_pgbench_with_server.pl b/src/bin/pgbench/t/001_pgbench_with_server.pl
index b82d3f65c4..fb0f6b677d 100644
--- a/src/bin/pgbench/t/001_pgbench_with_server.pl
+++ b/src/bin/pgbench/t/001_pgbench_with_server.pl
@@ -58,6 +58,17 @@ sub pgbench
 	return;
 }
 
+# tablespace for testing
+my $ts = $node->basedir . '/regress_pgbench_tap_1_ts_dir';
+mkdir $ts or die "cannot create directory $ts";
+my $ets = TestLib::perl2host($ts);
+# add needed escaping!
+$ets =~ s/'/''/;
+
+$node->safe_psql('postgres',
+	"CREATE TABLESPACE regress_pgbench_tap_1_ts LOCATION '$ets';"
+);
+
 # Test concurrent OID generation via pg_enum_oid_index.  This indirectly
 # exercises LWLock and spinlock concurrency.
 my $labels = join ',', map { "'l$_'" } 1 .. 1000;
@@ -100,12 +111,13 @@ pgbench(
 
 # Again, with all possible options
 pgbench(
-	'--initialize --init-steps=dtpvg --scale=1 --unlogged-tables --fillfactor=98 --foreign-keys --quiet --tablespace=pg_default --index-tablespace=pg_default',
+	'--initialize --init-steps=dtpvg --scale=1 --unlogged-tables --fillfactor=98 --foreign-keys --quiet --tablespace=regress_pgbench_tap_1_ts --index-tablespace=regress_pgbench_tap_1_ts --partitions=2 --partition-method=hash',
 	0,
 	[qr{^$}i],
 	[
 		qr{dropping old tables},
 		qr{creating tables},
+		qr{creating 2 partitions},
 		qr{vacuuming},
 		qr{creating primary keys},
 		qr{creating foreign keys},
@@ -116,12 +128,13 @@ pgbench(
 
 # Test interaction of --init-steps with legacy step-selection options
 pgbench(
-	'--initialize --init-steps=dtpvgvv --no-vacuum --foreign-keys --unlogged-tables',
+	'--initialize --init-steps=dtpvgvv --no-vacuum --foreign-keys --unlogged-tables --partitions=3',
 	0,
 	[qr{^$}],
 	[
 		qr{dropping old tables},
 		qr{creating tables},
+		qr{creating 3 partitions},
 		qr{creating primary keys},
 		qr{.* of .* tuples \(.*\) done},
 		qr{creating foreign keys},
@@ -909,6 +922,8 @@ pgbench(
 check_pgbench_logs($bdir, '001_pgbench_log_3', 1, 10, 10,
 	qr{^\d \d{1,2} \d+ \d \d+ \d+$});
 
+$node->safe_psql('postgres', 'DROP TABLESPACE regress_pgbench_tap_1_ts');
+
 # done
 $node->stop;
 done_testing();
diff --git a/src/bin/pgbench/t/002_pgbench_no_server.pl b/src/bin/pgbench/t/002_pgbench_no_server.pl
index f7fa18418b..1e9542af3f 100644
--- a/src/bin/pgbench/t/002_pgbench_no_server.pl
+++ b/src/bin/pgbench/t/002_pgbench_no_server.pl
@@ -157,6 +157,13 @@ my @options = (
 			qr{error while setting random seed from --random-seed option}
 		]
 	],
+	[ 'bad partition type', '-i --partition-method=BAD', [qr{"range"}, qr{"hash"}, qr{"BAD"}] ],
+	[ 'bad partition number', '-i --partitions -1', [ qr{invalid number of partitions: "-1"} ] ],
+	[
+		'partition method without partitioning',
+		'-i --partition-method=hash',
+		[ qr{partition-method requires greater than zero --partitions} ]
+	],
 
 	# logging sub-options
 	[

#39

Amit Langote

amitlangote09@gmail.com

over 6 years ago

In reply to: Fabien COELHO (#38)

Re: pgbench - allow to create partitioned tables

Hi Fabien,

On Thu, Sep 19, 2019 at 2:03 AM Fabien COELHO <coelho@cri.ensmp.fr> wrote:

If that is the case, then I think if user gives --partitions for the old
server version, it will also give an error?

Yes, on -i it will fail because the syntax will not be recognized.

Maybe we should be checking the server version, which would allow to
produce more useful error messages when these options are used against
older servers, like

if (sversion < 10000)
fprintf(stderr, "cannot use --partitions/--partitions-method
against servers older than 10");

We would also have to check that partition-method=hash is not used against v10.

Maybe overkill?

Thanks,
Amit

#40

Fabien COELHO

coelho@cri.ensmp.fr

over 6 years ago

In reply to: Amit Langote (#39)

Re: pgbench - allow to create partitioned tables

Hello Amit,

Yes, on -i it will fail because the syntax will not be recognized.

Maybe we should be checking the server version, which would allow to
produce more useful error messages when these options are used against
older servers, like

if (sversion < 10000)
fprintf(stderr, "cannot use --partitions/--partitions-method
against servers older than 10");

We would also have to check that partition-method=hash is not used against v10.

Maybe overkill?

Yes, I think so: the error detection and messages would be more or less
replicated from the server and would vary from version to version.

I do not think that it is worth going this path because the use case is
virtually void as people in 99.9% of cases would use a pgbench matching
the server version. For those who do not, the error message should be
clear enough to let them guess what the issue is. Also, it would be
untestable.

One thing we could eventually do is just to check pgbench version against
the server version like psql does and output a generic warning if they
differ, but franckly I do not think it is worth the effort: ISTM that
nobody ever complained about such issues. Also, that would be matter for
another patch.

--
Fabien.

#41

Amit Kapila

amit.kapila16@gmail.com

over 6 years ago

In reply to: Fabien COELHO (#38)

Re: pgbench - allow to create partitioned tables

On Wed, Sep 18, 2019 at 10:33 PM Fabien COELHO <coelho@cri.ensmp.fr> wrote:

Hello Amit,

- use search_path to find at most one pgbench_accounts
It still uses left join because I still think that it is appropriate.
I added a lateral to avoid repeating the array_position call
to manage the search_path, and use explicit pg_catalog everywhere.

It would be good if you can add some more comments to explain the
intent of query.

Indeed, I put too few comments on the query.

+ if (ps == NULL)
+ partition_method = PART_NONE;

When can we expect ps as NULL? If this is not a valid case, then
probably and Assert would be better.

No, ps is really NULL if there is no partitioning, because of the LEFT
JOIN and pg_partitioned_table is just empty in that case.

'ps' itself won't be NULL in that case, the value it contains is NULL.
I have debugged this case as well. 'ps' itself can be NULL only when
you pass wrong column number or something like that to PQgetvalue.

The last else where there is an unexpected entry is different, see
comments about v11 below.
+ else if (PQntuples(res) == 0)
+ {
+ /* no pgbench_accounts found, builtin script should fail later */
+ partition_method = PART_NONE;
+ partitions = -1;
If we don't find pgbench_accounts, let's give error here itself rather
than later unless you have a valid case in mind.
I thought of it, but decided not to: Someone could add a builtin script
which does not use pgbench_accounts, or a parallel running script could
create a table dynamically, whatever, so I prefer the error to be raised
by the script itself, rather than deciding that it will fail before even
trying.

I think this is not a possibility today and I don't know of the
future. I don't think it is a good idea to add code which we can't
reach today. You can probably add Assert if required.

+ /*
+ * Partition information. Assume no partitioning on any failure, so as
+ * to avoid failing on an older version.
+ */
..
+ if (PQresultStatus(res) != PGRES_TUPLES_OK)
+ {
+ /* probably an older version, coldly assume no partitioning */
+ partition_method = PART_NONE;
+ partitions = 0;
+ }
So, here we are silently absorbing the error when pgbench is executed
against older server version which doesn't support partitioning.
Yes, exactly.

If that is the case, then I think if user gives --partitions for the old
server version, it will also give an error?

Yes, on -i it will fail because the syntax will not be recognized.

It is not clear in documentation whether we support or not using pgbench
with older server versions.

Indeed. We more or less do in practice. Command "psql" works back to 8
AFAICR, and pgbench as well.

I guess it didn't matter, but with this feature, it can matter. Do we
need to document this?

This has been discussed in the past, and the conclusion was that it was
not worth the effort. We just try not to break things if it is avoidable.
On this regard, the patch slightly changes FILLFACTOR output, which is
removed if the value is 100 (%) as it is the default, which means that
table creation would work on very very old version which did not support
fillfactor, unless you specify a lower percentage.

Hmm, why you need to change the fill factor behavior? If it is not
specifically required for the functionality of this patch, then I
suggest keeping that behavior as it is.

Attached v11:

- add quite a few comments on the pg_catalog query

- reverts the partitions >= 1 test; If some new partition method is
added that pgbench does not know about, the failure mode will be that
nothing is printed rather than printing something strange like
"method none with 2 partitions".

but how will that new partition method will be associated with a table
created via pgbench? I think the previous check was good because it
makes partition checking consistent throughout the patch.

--
With Regards,
Amit Kapila.
EnterpriseDB: http://www.enterprisedb.com

#42

Amit Kapila

amit.kapila16@gmail.com

over 6 years ago

In reply to: Fabien COELHO (#40)

Re: pgbench - allow to create partitioned tables

On Thu, Sep 19, 2019 at 10:25 AM Fabien COELHO <coelho@cri.ensmp.fr> wrote:

Hello Amit,

Yes, on -i it will fail because the syntax will not be recognized.

Maybe we should be checking the server version, which would allow to
produce more useful error messages when these options are used against
older servers, like

if (sversion < 10000)
fprintf(stderr, "cannot use --partitions/--partitions-method
against servers older than 10");

We would also have to check that partition-method=hash is not used against v10.

Maybe overkill?

Yes, I think so: the error detection and messages would be more or less
replicated from the server and would vary from version to version.

Yeah, but I think Amit L's point is worth considering. I think it
would be good if a few other people can also share their suggestion on
this point. Alvaro, Dilip, anybody else following this thread, would
like to comment? It is important to know others opinion on this
because this will change how pgbench behaves with prior versions.

I do not think that it is worth going this path because the use case is
virtually void as people in 99.9% of cases would use a pgbench matching
the server version.

Fair enough, but there is no restriction of using it with prior
versions. In fact some people might want to use this with v11 where
partitioning was present. So, we shouldn't ignore this point.

One thing we could eventually do is just to check pgbench version against
the server version like psql does and output a generic warning if they
differ, but franckly I do not think it is worth the effort:

Yeah and even if we want to do something like that, it should not be
part of this patch.

--
With Regards,
Amit Kapila.
EnterpriseDB: http://www.enterprisedb.com

#43

Amit Langote

amitlangote09@gmail.com

over 6 years ago

In reply to: Fabien COELHO (#40)

Re: pgbench - allow to create partitioned tables

Hi Fabien,

On Thu, Sep 19, 2019 at 1:55 PM Fabien COELHO <coelho@cri.ensmp.fr> wrote:

Hello Amit,

Yes, on -i it will fail because the syntax will not be recognized.

Maybe we should be checking the server version, which would allow to
produce more useful error messages when these options are used against
older servers, like

if (sversion < 10000)
fprintf(stderr, "cannot use --partitions/--partitions-method
against servers older than 10");

We would also have to check that partition-method=hash is not used against v10.

Maybe overkill?

Yes, I think so: the error detection and messages would be more or less
replicated from the server and would vary from version to version.

I do not think that it is worth going this path because the use case is
virtually void as people in 99.9% of cases would use a pgbench matching
the server version. For those who do not, the error message should be
clear enough to let them guess what the issue is. Also, it would be
untestable.

Okay, I can understand the desire to not add code for rarely occurring
situations where the server's error is a good enough clue.

One thing we could eventually do is just to check pgbench version against
the server version like psql does and output a generic warning if they
differ, but franckly I do not think it is worth the effort: ISTM that
nobody ever complained about such issues.

Agree.

Thanks,
Amit

#44

Dilip Kumar

dilipbalaut@gmail.com

over 6 years ago

In reply to: Amit Kapila (#42)

Re: pgbench - allow to create partitioned tables

On Thu, Sep 19, 2019 at 11:47 AM Amit Kapila <amit.kapila16@gmail.com> wrote:

On Thu, Sep 19, 2019 at 10:25 AM Fabien COELHO <coelho@cri.ensmp.fr> wrote:

Hello Amit,

Yes, on -i it will fail because the syntax will not be recognized.

Maybe we should be checking the server version, which would allow to
produce more useful error messages when these options are used against
older servers, like

if (sversion < 10000)
fprintf(stderr, "cannot use --partitions/--partitions-method
against servers older than 10");

We would also have to check that partition-method=hash is not used against v10.

Maybe overkill?

Yes, I think so: the error detection and messages would be more or less
replicated from the server and would vary from version to version.

Yeah, but I think Amit L's point is worth considering. I think it
would be good if a few other people can also share their suggestion on
this point. Alvaro, Dilip, anybody else following this thread, would
like to comment? It is important to know others opinion on this
because this will change how pgbench behaves with prior versions.

IMHO, we don't need to invent the error handling at the pgbench
instead we can rely on the server's error.

--
Regards,
Dilip Kumar
EnterpriseDB: http://www.enterprisedb.com

#45

Fabien COELHO

coelho@cri.ensmp.fr

over 6 years ago

In reply to: Amit Kapila (#41)

1 attachment(s)

Re: pgbench - allow to create partitioned tables

Hello Amit,

[...] 'ps' itself won't be NULL in that case, the value it contains is
NULL. I have debugged this case as well. 'ps' itself can be NULL only
when you pass wrong column number or something like that to PQgetvalue.

Argh, you are right! I mixed up C NULL and SQL NULL:-(

If we don't find pgbench_accounts, let's give error here itself rather
than later unless you have a valid case in mind.

I thought of it, but decided not to: Someone could add a builtin script
which does not use pgbench_accounts, or a parallel running script could
create a table dynamically, whatever, so I prefer the error to be raised
by the script itself, rather than deciding that it will fail before even
trying.

I think this is not a possibility today and I don't know of the
future. I don't think it is a good idea to add code which we can't
reach today. You can probably add Assert if required.

I added a fail on an unexpected partition method, i.e. not 'r' or 'h',
and an Assert of PQgetvalue returns NULL.

I fixed the query so that it counts actual partitions, otherwise I was
getting one for a partitioned table without partitions attached, which
does not generate an error by the way. I just figured out that pgbench
does not check that UPDATE updates anything. Hmmm.

Hmm, why you need to change the fill factor behavior? If it is not
specifically required for the functionality of this patch, then I
suggest keeping that behavior as it is.

The behavior is not actually changed, but I had to move fillfactor away
because it cannot be declared on partitioned tables, it must be declared
on partitions only. Once there is a function to handle that it is pretty
easy to add the test.

I can remove it but franckly there are only benefits: the default is now
tested by pgbench, the create query is smaller, and it would work with
older versions of pg, which does not matter but is good on principle.

added that pgbench does not know about, the failure mode will be that
nothing is printed rather than printing something strange like
"method none with 2 partitions".

but how will that new partition method will be associated with a table
created via pgbench?

The user could do a -i with a version of pgbench and bench with another
one. I do that often while developing…

I think the previous check was good because it makes partition checking
consistent throughout the patch.

This case now generates a fail.

v12:
- fixes NULL vs NULL
- works correctly with a partitioned table without partitions attached
- generates an error if the partition method is unknown
- adds an assert

--
Fabien.

Attachments:

pgbench-init-partitioned-2.patchtext/x-diff; name=pgbench-init-partitioned-2.patchDownload

diff --git a/doc/src/sgml/ref/pgbench.sgml b/doc/src/sgml/ref/pgbench.sgml
index 816f9cc4c7..3e8e292e39 100644
--- a/doc/src/sgml/ref/pgbench.sgml
+++ b/doc/src/sgml/ref/pgbench.sgml
@@ -306,6 +306,32 @@ pgbench <optional> <replaceable>options</replaceable> </optional> <replaceable>d
       </listitem>
      </varlistentry>
 
+     <varlistentry>
+      <term><option>--partitions=<replaceable>NUM</replaceable></option></term>
+      <listitem>
+       <para>
+        Create a partitioned <literal>pgbench_accounts</literal> table with
+        <replaceable>NUM</replaceable> partitions of nearly equal size for
+        the scaled number of accounts.
+        Default is <literal>0</literal>, meaning no partitioning.
+       </para>
+      </listitem>
+     </varlistentry>
+
+     <varlistentry>
+      <term><option>--partition-method=<replaceable>NAME</replaceable></option></term>
+      <listitem>
+       <para>
+        Create a partitioned <literal>pgbench_accounts</literal> table with
+        <replaceable>NAME</replaceable> method.
+        Expected values are <literal>range</literal> or <literal>hash</literal>.
+        This option is only taken into account if
+        <option>--partitions</option> is non-zero.
+        Default is <literal>range</literal>.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry>
       <term><option>--tablespace=<replaceable>tablespace</replaceable></option></term>
       <listitem>
diff --git a/src/bin/pgbench/pgbench.c b/src/bin/pgbench/pgbench.c
index 570cf3306a..6819b4e433 100644
--- a/src/bin/pgbench/pgbench.c
+++ b/src/bin/pgbench/pgbench.c
@@ -186,6 +186,11 @@ int64		latency_limit = 0;
 char	   *tablespace = NULL;
 char	   *index_tablespace = NULL;
 
+/* partitioning for pgbench_accounts table, 0 for no partitioning */
+int 		partitions = 0;
+enum { PART_RANGE, PART_HASH }
+			partition_method = PART_RANGE;
+
 /* random seed used to initialize base_random_sequence */
 int64		random_seed = -1;
 
@@ -617,6 +622,9 @@ usage(void)
 		   "  --foreign-keys           create foreign key constraints between tables\n"
 		   "  --index-tablespace=TABLESPACE\n"
 		   "                           create indexes in the specified tablespace\n"
+		   "  --partitions=NUM         partition account table in NUM parts (defaults: 0)\n"
+		   "  --partition-method=(range|hash)\n"
+		   "                           partition account table with this method (default: range)\n"
 		   "  --tablespace=TABLESPACE  create tables in the specified tablespace\n"
 		   "  --unlogged-tables        create tables as unlogged tables\n"
 		   "\nOptions to select what to run:\n"
@@ -3601,6 +3609,17 @@ initDropTables(PGconn *con)
 					 "pgbench_tellers");
 }
 
+/*
+ * add fillfactor percent option if not 100.
+ */
+static void
+append_fillfactor(char *opts, int len)
+{
+	if (fillfactor < 100)
+		snprintf(opts + strlen(opts), len - strlen(opts),
+				 " with (fillfactor=%d)", fillfactor);
+}
+
 /*
  * Create pgbench's standard tables
  */
@@ -3625,6 +3644,7 @@ initCreateTables(PGconn *con)
 		const char *bigcols;	/* column decls if accountIDs are 64 bits */
 		int			declare_fillfactor;
 	};
+
 	static const struct ddlinfo DDLs[] = {
 		{
 			"pgbench_history",
@@ -3651,11 +3671,10 @@ initCreateTables(PGconn *con)
 			1
 		}
 	};
-	int			i;
 
 	fprintf(stderr, "creating tables...\n");
 
-	for (i = 0; i < lengthof(DDLs); i++)
+	for (int i = 0; i < lengthof(DDLs); i++)
 	{
 		char		opts[256];
 		char		buffer[256];
@@ -3664,9 +3683,17 @@ initCreateTables(PGconn *con)
 
 		/* Construct new create table statement. */
 		opts[0] = '\0';
-		if (ddl->declare_fillfactor)
+
+		/* Partition pgbench_accounts table */
+		if (partitions >= 1 && strcmp(ddl->table, "pgbench_accounts") == 0)
+		{
 			snprintf(opts + strlen(opts), sizeof(opts) - strlen(opts),
-					 " with (fillfactor=%d)", fillfactor);
+					 " partition by %s (aid)",
+					 partition_method == PART_RANGE ? "range" : "hash");
+		}
+		else if (ddl->declare_fillfactor)
+			append_fillfactor(opts, sizeof(opts));
+
 		if (tablespace != NULL)
 		{
 			char	   *escape_tablespace;
@@ -3686,6 +3713,54 @@ initCreateTables(PGconn *con)
 
 		executeStatement(con, buffer);
 	}
+
+	if (partitions >= 1)
+	{
+		int64		part_size = (naccounts * (int64) scale + partitions - 1) / partitions;
+		char		ff[64];
+		ff[0] = '\0';
+		append_fillfactor(ff, sizeof(ff));
+
+		fprintf(stderr, "creating %d partitions...\n", partitions);
+
+		for (int p = 1; p <= partitions; p++)
+		{
+			char		query[256];
+
+			if (partition_method == PART_RANGE)
+			{
+				char		minvalue[32], maxvalue[32];
+
+				if (p == 1)
+					sprintf(minvalue, "MINVALUE");
+				else
+					sprintf(minvalue, INT64_FORMAT, (p-1) * part_size + 1);
+
+				if (p < partitions)
+					sprintf(maxvalue, INT64_FORMAT, p * part_size + 1);
+				else
+					sprintf(maxvalue, "MAXVALUE");
+
+				snprintf(query, sizeof(query),
+						 "create%s table pgbench_accounts_%d\n"
+						 "  partition of pgbench_accounts\n"
+						 "  for values from (%s) to (%s)%s\n",
+						 unlogged_tables ? " unlogged" : "", p,
+						 minvalue, maxvalue, ff);
+			}
+			else if (partition_method == PART_HASH)
+				snprintf(query, sizeof(query),
+						 "create%s table pgbench_accounts_%d\n"
+						 "  partition of pgbench_accounts\n"
+						 "  for values with (modulus %d, remainder %d)%s\n",
+						 unlogged_tables ? " unlogged" : "", p,
+						 partitions, p-1, ff);
+			else /* cannot get there */
+				Assert(0);
+
+			executeStatement(con, query);
+		}
+	}
 }
 
 /*
@@ -5126,6 +5201,8 @@ main(int argc, char **argv)
 		{"foreign-keys", no_argument, NULL, 8},
 		{"random-seed", required_argument, NULL, 9},
 		{"show-script", required_argument, NULL, 10},
+		{"partitions", required_argument, NULL, 11},
+		{"partition-method", required_argument, NULL, 12},
 		{NULL, 0, NULL, 0}
 	};
 
@@ -5486,6 +5563,29 @@ main(int argc, char **argv)
 					exit(0);
 				}
 				break;
+			case 11:			/* partition-number */
+				initialization_option_set = true;
+				partitions = atoi(optarg);
+				if (partitions < 0)
+				{
+					fprintf(stderr, "invalid number of partitions: \"%s\"\n",
+							optarg);
+					exit(1);
+				}
+				break;
+			case 12:			/* partition-type */
+				initialization_option_set = true;
+				if (strcasecmp(optarg, "range") == 0)
+					partition_method = PART_RANGE;
+				else if (strcasecmp(optarg, "hash") == 0)
+					partition_method = PART_HASH;
+				else
+				{
+					fprintf(stderr, "invalid partition type, expecting \"range\" or \"hash\","
+							" got: \"%s\"\n", optarg);
+					exit(1);
+				}
+				break;
 			default:
 				fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
 				exit(1);
diff --git a/src/bin/pgbench/t/001_pgbench_with_server.pl b/src/bin/pgbench/t/001_pgbench_with_server.pl
index 5a2fdb9acb..ef6aafb3f9 100644
--- a/src/bin/pgbench/t/001_pgbench_with_server.pl
+++ b/src/bin/pgbench/t/001_pgbench_with_server.pl
@@ -58,6 +58,18 @@ sub pgbench
 	return;
 }
 
+# tablespace for testing
+my $ts = $node->basedir . '/regress_pgbench_tap_1_ts_dir';
+mkdir $ts or die "cannot create directory $ts";
+
+# escape
+my $ets = $ts;
+$ets =~ s/'/''/;
+
+$node->safe_psql('postgres',
+	"CREATE TABLESPACE regress_pgbench_tap_1_ts LOCATION '$ets';"
+);
+
 # Test concurrent OID generation via pg_enum_oid_index.  This indirectly
 # exercises LWLock and spinlock concurrency.
 my $labels = join ',', map { "'l$_'" } 1 .. 1000;
@@ -98,30 +110,32 @@ pgbench(
 	],
 	'pgbench scale 1 initialization',);
 
-# Again, with all possible options
+# Again, with all possible options but tablespace
 pgbench(
-	'--initialize --init-steps=dtpvg --scale=1 --unlogged-tables --fillfactor=98 --foreign-keys --quiet --tablespace=pg_default --index-tablespace=pg_default',
+	'--initialize --init-steps=dtpvg --scale=1 --unlogged-tables --fillfactor=98 --foreign-keys --quiet --tablespace=regress_pgbench_tap_1_ts --index-tablespace=regress_pgbench_tap_1_ts --partitions=2 --partition-method=hash',
 	0,
 	[qr{^$}i],
 	[
 		qr{dropping old tables},
 		qr{creating tables},
+		qr{creating 2 partitions},
 		qr{vacuuming},
 		qr{creating primary keys},
 		qr{creating foreign keys},
 		qr{(?!vacuuming)}, # no vacuum
 		qr{done in \d+\.\d\d s }
 	],
-	'pgbench scale 1 initialization');
+	'pgbench scale 1 initialization with options');
 
 # Test interaction of --init-steps with legacy step-selection options
 pgbench(
-	'--initialize --init-steps=dtpvgvv --no-vacuum --foreign-keys --unlogged-tables',
+	'--initialize --init-steps=dtpvgvv --no-vacuum --foreign-keys --unlogged-tables --partitions=3',
 	0,
 	[qr{^$}],
 	[
 		qr{dropping old tables},
 		qr{creating tables},
+		qr{creating 3 partitions},
 		qr{creating primary keys},
 		qr{.* of .* tuples \(.*\) done},
 		qr{creating foreign keys},
@@ -833,7 +847,6 @@ pgbench(
 	'pgbench throttling');
 
 pgbench(
-
 	# given the expected rate and the 2 ms tx duration, at most one is executed
 	'-t 10 --rate=100000 --latency-limit=1 -n -r',
 	0,
@@ -909,6 +922,8 @@ pgbench(
 check_pgbench_logs($bdir, '001_pgbench_log_3', 1, 10, 10,
 	qr{^\d \d{1,2} \d+ \d \d+ \d+$});
 
+$node->safe_psql('postgres', 'DROP TABLESPACE regress_pgbench_tap_1_ts');
+
 # done
 $node->stop;
 done_testing();
diff --git a/src/bin/pgbench/t/002_pgbench_no_server.pl b/src/bin/pgbench/t/002_pgbench_no_server.pl
index f7fa18418b..a097c18ee6 100644
--- a/src/bin/pgbench/t/002_pgbench_no_server.pl
+++ b/src/bin/pgbench/t/002_pgbench_no_server.pl
@@ -157,6 +157,8 @@ my @options = (
 			qr{error while setting random seed from --random-seed option}
 		]
 	],
+	[ 'bad partition type', '-i --partition-method=BAD', [qr{"range"}, qr{"hash"}, qr{"BAD"}] ],
+	[ 'bad partition number', '-i --partitions -1', [ qr{invalid number of partitions: "-1"} ] ],
 
 	# logging sub-options
 	[

#46

Amit Kapila

amit.kapila16@gmail.com

over 6 years ago

In reply to: Fabien COELHO (#45)

Re: pgbench - allow to create partitioned tables

On Fri, Sep 20, 2019 at 12:41 AM Fabien COELHO <coelho@cri.ensmp.fr> wrote:

This case now generates a fail.

v12:
- fixes NULL vs NULL
- works correctly with a partitioned table without partitions attached
- generates an error if the partition method is unknown
- adds an assert

You seem to have attached some previous version (v2) of this patch. I
could see old issues in the patch which we have sorted out in the
review.

--
With Regards,
Amit Kapila.
EnterpriseDB: http://www.enterprisedb.com

#47

Amit Kapila

amit.kapila16@gmail.com

over 6 years ago

In reply to: Fabien COELHO (#45)

Re: pgbench - allow to create partitioned tables

On Fri, Sep 20, 2019 at 12:41 AM Fabien COELHO <coelho@cri.ensmp.fr> wrote:

Hello Amit,

[...] 'ps' itself won't be NULL in that case, the value it contains is
NULL. I have debugged this case as well. 'ps' itself can be NULL only
when you pass wrong column number or something like that to PQgetvalue.

Argh, you are right! I mixed up C NULL and SQL NULL:-(

If we don't find pgbench_accounts, let's give error here itself rather
than later unless you have a valid case in mind.

I thought of it, but decided not to: Someone could add a builtin script
which does not use pgbench_accounts, or a parallel running script could
create a table dynamically, whatever, so I prefer the error to be raised
by the script itself, rather than deciding that it will fail before even
trying.

I think this is not a possibility today and I don't know of the
future. I don't think it is a good idea to add code which we can't
reach today. You can probably add Assert if required.

I added a fail on an unexpected partition method, i.e. not 'r' or 'h',
and an Assert of PQgetvalue returns NULL.

I fixed the query so that it counts actual partitions, otherwise I was
getting one for a partitioned table without partitions attached, which
does not generate an error by the way. I just figured out that pgbench
does not check that UPDATE updates anything. Hmmm.

Hmm, why you need to change the fill factor behavior? If it is not
specifically required for the functionality of this patch, then I
suggest keeping that behavior as it is.

The behavior is not actually changed, but I had to move fillfactor away
because it cannot be declared on partitioned tables, it must be declared
on partitions only. Once there is a function to handle that it is pretty
easy to add the test.

I can remove it but franckly there are only benefits: the default is now
tested by pgbench, the create query is smaller, and it would work with
older versions of pg, which does not matter but is good on principle.

I am not saying that it is a bad check on its own, rather it might be
good, but let's not do any unrelated change as that will delay the
main patch. Once, we are done with the main patch, you can propose
these as improvements.

added that pgbench does not know about, the failure mode will be that
nothing is printed rather than printing something strange like
"method none with 2 partitions".

but how will that new partition method will be associated with a table
created via pgbench?

The user could do a -i with a version of pgbench and bench with another
one. I do that often while developing…

I am not following what you want to say here especially ("pgbench and
bench with another one"). Can you explain with some example?

--
With Regards,
Amit Kapila.
EnterpriseDB: http://www.enterprisedb.com

#48

Fabien COELHO

coelho@cri.ensmp.fr

over 6 years ago

In reply to: Amit Kapila (#46)

1 attachment(s)

Re: pgbench - allow to create partitioned tables

v12:
- fixes NULL vs NULL
- works correctly with a partitioned table without partitions attached
- generates an error if the partition method is unknown
- adds an assert

You seem to have attached some previous version (v2) of this patch. I
could see old issues in the patch which we have sorted out in the
review.

Indeed. This is a change from forgetting the attachement.

Here is v12. Hopefully.

--
Fabien.

Attachments:

pgbench-init-partitioned-12.patchtext/x-diff; charset=us-ascii; name=pgbench-init-partitioned-12.patchDownload

diff --git a/doc/src/sgml/ref/pgbench.sgml b/doc/src/sgml/ref/pgbench.sgml
index c857aa3cba..e3a0abb4c7 100644
--- a/doc/src/sgml/ref/pgbench.sgml
+++ b/doc/src/sgml/ref/pgbench.sgml
@@ -306,6 +306,31 @@ pgbench <optional> <replaceable>options</replaceable> </optional> <replaceable>d
       </listitem>
      </varlistentry>
 
+     <varlistentry>
+      <term><option>--partitions=<replaceable>NUM</replaceable></option></term>
+      <listitem>
+       <para>
+        Create a partitioned <literal>pgbench_accounts</literal> table with
+        <replaceable>NUM</replaceable> partitions of nearly equal size for
+        the scaled number of accounts.
+        Default is <literal>0</literal>, meaning no partitioning.
+       </para>
+      </listitem>
+     </varlistentry>
+
+     <varlistentry>
+      <term><option>--partition-method=<replaceable>NAME</replaceable></option></term>
+      <listitem>
+       <para>
+        Create a partitioned <literal>pgbench_accounts</literal> table with
+        <replaceable>NAME</replaceable> method.
+        Expected values are <literal>range</literal> or <literal>hash</literal>.
+        This option requires that <option>--partitions</option> is set to non-zero.
+        If unspecified, default is <literal>range</literal>.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry>
       <term><option>--tablespace=<replaceable>tablespace</replaceable></option></term>
       <listitem>
diff --git a/src/bin/pgbench/pgbench.c b/src/bin/pgbench/pgbench.c
index ed7652bfbf..c17df93dbe 100644
--- a/src/bin/pgbench/pgbench.c
+++ b/src/bin/pgbench/pgbench.c
@@ -186,6 +186,15 @@ int64		latency_limit = 0;
 char	   *tablespace = NULL;
 char	   *index_tablespace = NULL;
 
+/* partitioning for pgbench_accounts table, 0 for no partitioning, -1 for bad */
+static int 		partitions = 0;
+
+typedef enum { PART_NONE, PART_RANGE, PART_HASH }
+  partition_method_t;
+
+static partition_method_t partition_method = PART_NONE;
+static const char *PARTITION_METHOD[] = { "none", "range", "hash" };
+
 /* random seed used to initialize base_random_sequence */
 int64		random_seed = -1;
 
@@ -617,6 +626,9 @@ usage(void)
 		   "  --foreign-keys           create foreign key constraints between tables\n"
 		   "  --index-tablespace=TABLESPACE\n"
 		   "                           create indexes in the specified tablespace\n"
+		   "  --partitions=NUM         partition pgbench_accounts in NUM parts (default: 0)\n"
+		   "  --partition-method=(range|hash)\n"
+		   "                           partition pgbench_accounts with this method (default: range)\n"
 		   "  --tablespace=TABLESPACE  create tables in the specified tablespace\n"
 		   "  --unlogged-tables        create tables as unlogged tables\n"
 		   "\nOptions to select what to run:\n"
@@ -3601,6 +3613,17 @@ initDropTables(PGconn *con)
 					 "pgbench_tellers");
 }
 
+/*
+ * add fillfactor percent option if not 100.
+ */
+static void
+append_fillfactor(char *opts, int len)
+{
+	if (fillfactor < 100)
+		snprintf(opts + strlen(opts), len - strlen(opts),
+				 " with (fillfactor=%d)", fillfactor);
+}
+
 /*
  * Create pgbench's standard tables
  */
@@ -3664,9 +3687,15 @@ initCreateTables(PGconn *con)
 
 		/* Construct new create table statement. */
 		opts[0] = '\0';
-		if (ddl->declare_fillfactor)
+
+		/* Partition pgbench_accounts table */
+		if (partitions >= 1 && strcmp(ddl->table, "pgbench_accounts") == 0)
 			snprintf(opts + strlen(opts), sizeof(opts) - strlen(opts),
-					 " with (fillfactor=%d)", fillfactor);
+					 " partition by %s (aid)", PARTITION_METHOD[partition_method]);
+		else if (ddl->declare_fillfactor)
+			/* fillfactor is only expected on actual tables */
+			append_fillfactor(opts, sizeof(opts));
+
 		if (tablespace != NULL)
 		{
 			char	   *escape_tablespace;
@@ -3686,6 +3715,57 @@ initCreateTables(PGconn *con)
 
 		executeStatement(con, buffer);
 	}
+
+	/* if needed, pgbench_accounts partitions must be created manually */
+	if (partitions >= 1)
+	{
+		char		ff[64];
+
+		ff[0] = '\0';
+		append_fillfactor(ff, sizeof(ff));
+
+		fprintf(stderr, "creating %d partitions...\n", partitions);
+
+		for (int p = 1; p <= partitions; p++)
+		{
+			char		query[256];
+
+			if (partition_method == PART_RANGE)
+			{
+				int64		part_size = (naccounts * (int64) scale + partitions - 1) / partitions;
+				char		minvalue[32], maxvalue[32];
+
+				/* For RANGE, we use open-ended partitions at the beginning and end */
+				if (p == 1)
+					sprintf(minvalue, "minvalue");
+				else
+					sprintf(minvalue, INT64_FORMAT, (p - 1) * part_size + 1);
+
+				if (p < partitions)
+					sprintf(maxvalue, INT64_FORMAT, p * part_size + 1);
+				else
+					sprintf(maxvalue, "maxvalue");
+
+				snprintf(query, sizeof(query),
+						 "create%s table pgbench_accounts_%d\n"
+						 "  partition of pgbench_accounts\n"
+						 "  for values from (%s) to (%s)%s\n",
+						 unlogged_tables ? " unlogged" : "", p,
+						 minvalue, maxvalue, ff);
+			}
+			else if (partition_method == PART_HASH)
+				snprintf(query, sizeof(query),
+						 "create%s table pgbench_accounts_%d\n"
+						 "  partition of pgbench_accounts\n"
+						 "  for values with (modulus %d, remainder %d)%s\n",
+						 unlogged_tables ? " unlogged" : "", p,
+						 partitions, p-1, ff);
+			else /* cannot get there */
+				Assert(0);
+
+			executeStatement(con, query);
+		}
+	}
 }
 
 /*
@@ -4919,6 +4999,10 @@ printResults(StatsData *total, instr_time total_time,
 	printf("transaction type: %s\n",
 		   num_scripts == 1 ? sql_script[0].desc : "multiple scripts");
 	printf("scaling factor: %d\n", scale);
+	/* only print partitioning information if some partitioning was detected */
+	if (partition_method != PART_NONE)
+		printf("partition method: %s\npartitions: %d\n",
+				PARTITION_METHOD[partition_method], partitions);
 	printf("query mode: %s\n", QUERYMODE[querymode]);
 	printf("number of clients: %d\n", nclients);
 	printf("number of threads: %d\n", nthreads);
@@ -5126,6 +5210,8 @@ main(int argc, char **argv)
 		{"foreign-keys", no_argument, NULL, 8},
 		{"random-seed", required_argument, NULL, 9},
 		{"show-script", required_argument, NULL, 10},
+		{"partitions", required_argument, NULL, 11},
+		{"partition-method", required_argument, NULL, 12},
 		{NULL, 0, NULL, 0}
 	};
 
@@ -5486,6 +5572,29 @@ main(int argc, char **argv)
 					exit(0);
 				}
 				break;
+			case 11:			/* partitions */
+				initialization_option_set = true;
+				partitions = atoi(optarg);
+				if (partitions < 0)
+				{
+					fprintf(stderr, "invalid number of partitions: \"%s\"\n",
+							optarg);
+					exit(1);
+				}
+				break;
+			case 12:			/* partition-method */
+				initialization_option_set = true;
+				if (pg_strcasecmp(optarg, "range") == 0)
+					partition_method = PART_RANGE;
+				else if (pg_strcasecmp(optarg, "hash") == 0)
+					partition_method = PART_HASH;
+				else
+				{
+					fprintf(stderr, "invalid partition method, expecting \"range\" or \"hash\","
+							" got: \"%s\"\n", optarg);
+					exit(1);
+				}
+				break;
 			default:
 				fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
 				exit(1);
@@ -5567,6 +5676,16 @@ main(int argc, char **argv)
 			exit(1);
 		}
 
+		if (partitions == 0 && partition_method != PART_NONE)
+		{
+			fprintf(stderr, "--partition-method requires greater than zero --partitions\n");
+			exit(1);
+		}
+
+		/* set default method */
+		if (partitions > 0 && partition_method == PART_NONE)
+			partition_method = PART_RANGE;
+
 		if (initialize_steps == NULL)
 			initialize_steps = pg_strdup(DEFAULT_INIT_STEPS);
 
@@ -5756,6 +5875,70 @@ main(int argc, char **argv)
 			fprintf(stderr,
 					"scale option ignored, using count from pgbench_branches table (%d)\n",
 					scale);
+
+		/*
+		 * Gather partition information from pg_catalog.
+		 *
+		 * We Assume no partitioning on any failure, so as to avoid failing
+		 * on an older version.
+		 */
+		res = PQexec(con,
+					 "select o.n, p.partstrat, pg_catalog.count(i.inhparent) "
+					 /* for all tables */
+					 "from pg_catalog.pg_class as c "
+					 /* get the schema corresponding to the previous table */
+					 "join pg_catalog.pg_namespace as n on (n.oid = c.relnamespace) "
+					 /* get this schema order in search_path */
+					 "cross join lateral (select pg_catalog.array_position(pg_catalog.current_schemas(true), n.nspname)) as o(n) "
+					 /* check whether it is partitionned */
+					 "left join pg_catalog.pg_partitioned_table as p on (p.partrelid = c.oid) "
+					 /* fetch actual partitions which inherits the main table */
+					 "left join pg_catalog.pg_inherits as i on (c.oid = i.inhparent) "
+					 /* check table name and that schema was in search_path */
+					 "where c.relname = 'pgbench_accounts' and o.n is not null "
+					 /* count partitions, possibly 0 */
+					 "group by 1, 2 "
+					 /* and only keep the first encountered */
+					 "order by 1 asc "
+					 "limit 1");
+		if (PQresultStatus(res) != PGRES_TUPLES_OK)
+		{
+			/* probably an older version, coldly assume no partitioning */
+			partition_method = PART_NONE;
+			partitions = 0;
+		}
+		else if (PQntuples(res) == 0)
+		{
+			/* no pgbench_accounts found, builtin script should fail later */
+			partition_method = PART_NONE;
+			partitions = -1;
+		}
+		else
+		{
+			/* PQntupes(res) == 1: normal case, extract the partition status */
+			if (PQgetisnull(res, 0, 1))
+				partition_method = PART_NONE;
+			else
+			{
+				char	*ps	= PQgetvalue(res, 0, 1);
+
+				Assert(ps != NULL);
+
+				if (strcmp(ps, "r") == 0)
+					partition_method = PART_RANGE;
+				else if (strcmp(ps, "h") == 0)
+					partition_method = PART_HASH;
+				else
+				{
+					fprintf(stderr, "unexpected partition method: \"%s\"\n", ps);
+					exit(1);
+				}
+			}
+
+			partitions = atoi(PQgetvalue(res, 0, 2));
+		}
+
+		PQclear(res);
 	}
 
 	/*
diff --git a/src/bin/pgbench/t/001_pgbench_with_server.pl b/src/bin/pgbench/t/001_pgbench_with_server.pl
index b82d3f65c4..fb0f6b677d 100644
--- a/src/bin/pgbench/t/001_pgbench_with_server.pl
+++ b/src/bin/pgbench/t/001_pgbench_with_server.pl
@@ -58,6 +58,17 @@ sub pgbench
 	return;
 }
 
+# tablespace for testing
+my $ts = $node->basedir . '/regress_pgbench_tap_1_ts_dir';
+mkdir $ts or die "cannot create directory $ts";
+my $ets = TestLib::perl2host($ts);
+# add needed escaping!
+$ets =~ s/'/''/;
+
+$node->safe_psql('postgres',
+	"CREATE TABLESPACE regress_pgbench_tap_1_ts LOCATION '$ets';"
+);
+
 # Test concurrent OID generation via pg_enum_oid_index.  This indirectly
 # exercises LWLock and spinlock concurrency.
 my $labels = join ',', map { "'l$_'" } 1 .. 1000;
@@ -100,12 +111,13 @@ pgbench(
 
 # Again, with all possible options
 pgbench(
-	'--initialize --init-steps=dtpvg --scale=1 --unlogged-tables --fillfactor=98 --foreign-keys --quiet --tablespace=pg_default --index-tablespace=pg_default',
+	'--initialize --init-steps=dtpvg --scale=1 --unlogged-tables --fillfactor=98 --foreign-keys --quiet --tablespace=regress_pgbench_tap_1_ts --index-tablespace=regress_pgbench_tap_1_ts --partitions=2 --partition-method=hash',
 	0,
 	[qr{^$}i],
 	[
 		qr{dropping old tables},
 		qr{creating tables},
+		qr{creating 2 partitions},
 		qr{vacuuming},
 		qr{creating primary keys},
 		qr{creating foreign keys},
@@ -116,12 +128,13 @@ pgbench(
 
 # Test interaction of --init-steps with legacy step-selection options
 pgbench(
-	'--initialize --init-steps=dtpvgvv --no-vacuum --foreign-keys --unlogged-tables',
+	'--initialize --init-steps=dtpvgvv --no-vacuum --foreign-keys --unlogged-tables --partitions=3',
 	0,
 	[qr{^$}],
 	[
 		qr{dropping old tables},
 		qr{creating tables},
+		qr{creating 3 partitions},
 		qr{creating primary keys},
 		qr{.* of .* tuples \(.*\) done},
 		qr{creating foreign keys},
@@ -909,6 +922,8 @@ pgbench(
 check_pgbench_logs($bdir, '001_pgbench_log_3', 1, 10, 10,
 	qr{^\d \d{1,2} \d+ \d \d+ \d+$});
 
+$node->safe_psql('postgres', 'DROP TABLESPACE regress_pgbench_tap_1_ts');
+
 # done
 $node->stop;
 done_testing();
diff --git a/src/bin/pgbench/t/002_pgbench_no_server.pl b/src/bin/pgbench/t/002_pgbench_no_server.pl
index f7fa18418b..1e9542af3f 100644
--- a/src/bin/pgbench/t/002_pgbench_no_server.pl
+++ b/src/bin/pgbench/t/002_pgbench_no_server.pl
@@ -157,6 +157,13 @@ my @options = (
 			qr{error while setting random seed from --random-seed option}
 		]
 	],
+	[ 'bad partition type', '-i --partition-method=BAD', [qr{"range"}, qr{"hash"}, qr{"BAD"}] ],
+	[ 'bad partition number', '-i --partitions -1', [ qr{invalid number of partitions: "-1"} ] ],
+	[
+		'partition method without partitioning',
+		'-i --partition-method=hash',
+		[ qr{partition-method requires greater than zero --partitions} ]
+	],
 
 	# logging sub-options
 	[

#49

Fabien COELHO

coelho@cri.ensmp.fr

over 6 years ago

In reply to: Amit Kapila (#47)

Re: pgbench - allow to create partitioned tables

The behavior is not actually changed, but I had to move fillfactor away
because it cannot be declared on partitioned tables, it must be declared
on partitions only. Once there is a function to handle that it is pretty
easy to add the test.

I can remove it but franckly there are only benefits: the default is now
tested by pgbench, the create query is smaller, and it would work with
older versions of pg, which does not matter but is good on principle.

I am not saying that it is a bad check on its own, rather it might be
good, but let's not do any unrelated change as that will delay the
main patch. Once, we are done with the main patch, you can propose
these as improvements.

I would not bother to create a patch for so small an improvement. This
makes sense in passing because the created function makes it very easy,
but otherwise I'll just drop it.

The user could do a -i with a version of pgbench and bench with another
one. I do that often while developing…

I am not following what you want to say here especially ("pgbench and
bench with another one"). Can you explain with some example?

While developing, I often run pgbench under development client against an
already created set of tables on an already created cluster, and usually
the server side on my laptop is the last major release from pgdg (ie 11.5)
while the pgbench I'm testing is from sources (ie 12dev). If I type
"pgbench" I run 11.5, and in the sources "./pgbench" runs the dev version.

--
Fabien.

#50

Amit Kapila

amit.kapila16@gmail.com

over 6 years ago

In reply to: Fabien COELHO (#49)

1 attachment(s)

Re: pgbench - allow to create partitioned tables

On Fri, Sep 20, 2019 at 10:29 AM Fabien COELHO <coelho@cri.ensmp.fr> wrote:

The behavior is not actually changed, but I had to move fillfactor away
because it cannot be declared on partitioned tables, it must be declared
on partitions only. Once there is a function to handle that it is pretty
easy to add the test.

I can remove it but franckly there are only benefits: the default is now
tested by pgbench, the create query is smaller, and it would work with
older versions of pg, which does not matter but is good on principle.

I am not saying that it is a bad check on its own, rather it might be
good, but let's not do any unrelated change as that will delay the
main patch. Once, we are done with the main patch, you can propose
these as improvements.

I would not bother to create a patch for so small an improvement. This
makes sense in passing because the created function makes it very easy,
but otherwise I'll just drop it.

I would prefer to drop for now.

The user could do a -i with a version of pgbench and bench with another
one. I do that often while developing…

I am not following what you want to say here especially ("pgbench and
bench with another one"). Can you explain with some example?

While developing, I often run pgbench under development client against an
already created set of tables on an already created cluster, and usually
the server side on my laptop is the last major release from pgdg (ie 11.5)
while the pgbench I'm testing is from sources (ie 12dev). If I type
"pgbench" I run 11.5, and in the sources "./pgbench" runs the dev version.

Hmm, I think some such thing is possible when you are running pgbench
of lower version with tables initialized by some higher version of
pgbench. Because higher version pgbench must be a superset of lower
version unless we drop support for one of the partitioning method. I
think even if there is some unknown partition method, it should be
detected much earlier rather than reaching the stage of printing the
results like after the query for partitions in below code.

+ else
+ {
+ fprintf(stderr, "unexpected partition method: \"%s\"\n", ps);
+ exit(1);
+ }

If we can't catch that earlier, then it might be better to have some
version-specific checks rather than such obscure code which is
difficult to understand for others.

I have made a few modifications in the attached patch.
* move the create partitions related code into a separate function.
* make the check related to number of partitions consistent i.e check
partitions > 0 apart from where we print which I also want to change
but let us first discuss one of the above points
* when we don't found pgbench_accounts table, error out instead of continuing
* ensure append_fillfactor doesn't assume that it has to append
fillfactor and removed fillfactor < 100 check from it.
* improve the comments around query to fetch partitions
* improve the comments in the patch and make the code look like nearby code
* pgindent the patch

I think we should try to add some note or comment that why we only
choose to partition pgbench_accounts table when the user has given
--partitions option.

--
With Regards,
Amit Kapila.
EnterpriseDB: http://www.enterprisedb.com

Attachments:

pgbench-init-partitioned-13.patchapplication/octet-stream; name=pgbench-init-partitioned-13.patchDownload

diff --git a/doc/src/sgml/ref/pgbench.sgml b/doc/src/sgml/ref/pgbench.sgml
index c857aa3..e3a0abb 100644
--- a/doc/src/sgml/ref/pgbench.sgml
+++ b/doc/src/sgml/ref/pgbench.sgml
@@ -307,6 +307,31 @@ pgbench <optional> <replaceable>options</replaceable> </optional> <replaceable>d
      </varlistentry>
 
      <varlistentry>
+      <term><option>--partitions=<replaceable>NUM</replaceable></option></term>
+      <listitem>
+       <para>
+        Create a partitioned <literal>pgbench_accounts</literal> table with
+        <replaceable>NUM</replaceable> partitions of nearly equal size for
+        the scaled number of accounts.
+        Default is <literal>0</literal>, meaning no partitioning.
+       </para>
+      </listitem>
+     </varlistentry>
+
+     <varlistentry>
+      <term><option>--partition-method=<replaceable>NAME</replaceable></option></term>
+      <listitem>
+       <para>
+        Create a partitioned <literal>pgbench_accounts</literal> table with
+        <replaceable>NAME</replaceable> method.
+        Expected values are <literal>range</literal> or <literal>hash</literal>.
+        This option requires that <option>--partitions</option> is set to non-zero.
+        If unspecified, default is <literal>range</literal>.
+       </para>
+      </listitem>
+     </varlistentry>
+
+     <varlistentry>
       <term><option>--tablespace=<replaceable>tablespace</replaceable></option></term>
       <listitem>
        <para>
diff --git a/src/bin/pgbench/pgbench.c b/src/bin/pgbench/pgbench.c
index ed7652b..788e3c6 100644
--- a/src/bin/pgbench/pgbench.c
+++ b/src/bin/pgbench/pgbench.c
@@ -186,6 +186,23 @@ int64		latency_limit = 0;
 char	   *tablespace = NULL;
 char	   *index_tablespace = NULL;
 
+/*
+ * This indicates the number of partitions to be created for pgbench_accounts
+ * table.  0 is the default and means no partitions.
+ */
+static int	partitions = 0;
+
+/* This defines the strategy of partitioning the pgbench_accounts table. */
+typedef enum
+{
+	PART_NONE,					/* table is not partitioned */
+	PART_RANGE,					/* range partitioning method */
+	PART_HASH					/* hash partitioning method */
+}			partition_method_t;
+
+static partition_method_t partition_method = PART_NONE;
+static const char *PARTITION_METHOD[] = {"none", "range", "hash"};
+
 /* random seed used to initialize base_random_sequence */
 int64		random_seed = -1;
 
@@ -617,6 +634,9 @@ usage(void)
 		   "  --foreign-keys           create foreign key constraints between tables\n"
 		   "  --index-tablespace=TABLESPACE\n"
 		   "                           create indexes in the specified tablespace\n"
+		   "  --partitions=NUM         partition pgbench_accounts in NUM parts (default: 0)\n"
+		   "  --partition-method=(range|hash)\n"
+		   "                           partition pgbench_accounts with this method (default: range)\n"
 		   "  --tablespace=TABLESPACE  create tables in the specified tablespace\n"
 		   "  --unlogged-tables        create tables as unlogged tables\n"
 		   "\nOptions to select what to run:\n"
@@ -3602,6 +3622,84 @@ initDropTables(PGconn *con)
 }
 
 /*
+ * add fillfactor percent option if required.
+ */
+static void
+append_fillfactor(char *opts, int len, bool ff_required)
+{
+	if (ff_required)
+		snprintf(opts + strlen(opts), len - strlen(opts),
+				 " with (fillfactor=%d)", fillfactor);
+}
+
+/*
+ * Create partitions of pgbench_accounts table.
+ */
+static void
+createPartitions(PGconn *con)
+{
+	char		ff[64];
+
+	ff[0] = '\0';
+
+	/*
+	 * The fillfactor is required for pgbench_accounts table.  See definition
+	 * of ddlinfo in initCreateTables.
+	 */
+	append_fillfactor(ff, sizeof(ff), true);
+
+	fprintf(stderr, "creating %d partitions...\n", partitions);
+
+	for (int p = 1; p <= partitions; p++)
+	{
+		char		query[256];
+
+		if (partition_method == PART_RANGE)
+		{
+			int64		part_size = (naccounts * (int64) scale + partitions - 1) / partitions;
+			char		minvalue[32],
+						maxvalue[32];
+
+			/*
+			 * For RANGE, we use open-ended partitions at the beginning and
+			 * end to allow any valid value for the primary key.  Strictly
+			 * speaking, we don't need special min/max value for
+			 * pgbench_accounts as we can derive the actual minimum and
+			 * maximum values from the scale.  But it is good to keep it
+			 * generic and it is somewhat better performance-wise.
+			 */
+			if (p == 1)
+				sprintf(minvalue, "minvalue");
+			else
+				sprintf(minvalue, INT64_FORMAT, (p - 1) * part_size + 1);
+
+			if (p < partitions)
+				sprintf(maxvalue, INT64_FORMAT, p * part_size + 1);
+			else
+				sprintf(maxvalue, "maxvalue");
+
+			snprintf(query, sizeof(query),
+					 "create%s table pgbench_accounts_%d\n"
+					 "  partition of pgbench_accounts\n"
+					 "  for values from (%s) to (%s)%s\n",
+					 unlogged_tables ? " unlogged" : "", p,
+					 minvalue, maxvalue, ff);
+		}
+		else if (partition_method == PART_HASH)
+			snprintf(query, sizeof(query),
+					 "create%s table pgbench_accounts_%d\n"
+					 "  partition of pgbench_accounts\n"
+					 "  for values with (modulus %d, remainder %d)%s\n",
+					 unlogged_tables ? " unlogged" : "", p,
+					 partitions, p - 1, ff);
+		else					/* cannot get here */
+			Assert(0);
+
+		executeStatement(con, query);
+	}
+}
+
+/*
  * Create pgbench's standard tables
  */
 static void
@@ -3664,9 +3762,15 @@ initCreateTables(PGconn *con)
 
 		/* Construct new create table statement. */
 		opts[0] = '\0';
-		if (ddl->declare_fillfactor)
+
+		/* Partition pgbench_accounts table */
+		if (partitions > 0 && strcmp(ddl->table, "pgbench_accounts") == 0)
 			snprintf(opts + strlen(opts), sizeof(opts) - strlen(opts),
-					 " with (fillfactor=%d)", fillfactor);
+					 " partition by %s (aid)", PARTITION_METHOD[partition_method]);
+		else
+			/* fillfactor is only expected on actual tables */
+			append_fillfactor(opts, sizeof(opts), ddl->declare_fillfactor);
+
 		if (tablespace != NULL)
 		{
 			char	   *escape_tablespace;
@@ -3686,6 +3790,10 @@ initCreateTables(PGconn *con)
 
 		executeStatement(con, buffer);
 	}
+
+	/* if needed, partitions must be created manually */
+	if (partitions > 0)
+		createPartitions(con);
 }
 
 /*
@@ -4919,6 +5027,10 @@ printResults(StatsData *total, instr_time total_time,
 	printf("transaction type: %s\n",
 		   num_scripts == 1 ? sql_script[0].desc : "multiple scripts");
 	printf("scaling factor: %d\n", scale);
+	/* only print partitioning information if some partitioning was detected */
+	if (partition_method != PART_NONE)
+		printf("partition method: %s\npartitions: %d\n",
+			   PARTITION_METHOD[partition_method], partitions);
 	printf("query mode: %s\n", QUERYMODE[querymode]);
 	printf("number of clients: %d\n", nclients);
 	printf("number of threads: %d\n", nthreads);
@@ -5126,6 +5238,8 @@ main(int argc, char **argv)
 		{"foreign-keys", no_argument, NULL, 8},
 		{"random-seed", required_argument, NULL, 9},
 		{"show-script", required_argument, NULL, 10},
+		{"partitions", required_argument, NULL, 11},
+		{"partition-method", required_argument, NULL, 12},
 		{NULL, 0, NULL, 0}
 	};
 
@@ -5486,6 +5600,29 @@ main(int argc, char **argv)
 					exit(0);
 				}
 				break;
+			case 11:			/* partitions */
+				initialization_option_set = true;
+				partitions = atoi(optarg);
+				if (partitions < 0)
+				{
+					fprintf(stderr, "invalid number of partitions: \"%s\"\n",
+							optarg);
+					exit(1);
+				}
+				break;
+			case 12:			/* partition-method */
+				initialization_option_set = true;
+				if (pg_strcasecmp(optarg, "range") == 0)
+					partition_method = PART_RANGE;
+				else if (pg_strcasecmp(optarg, "hash") == 0)
+					partition_method = PART_HASH;
+				else
+				{
+					fprintf(stderr, "invalid partition method, expecting \"range\" or \"hash\","
+							" got: \"%s\"\n", optarg);
+					exit(1);
+				}
+				break;
 			default:
 				fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
 				exit(1);
@@ -5567,6 +5704,16 @@ main(int argc, char **argv)
 			exit(1);
 		}
 
+		if (partitions == 0 && partition_method != PART_NONE)
+		{
+			fprintf(stderr, "--partition-method requires greater than zero --partitions\n");
+			exit(1);
+		}
+
+		/* set default method */
+		if (partitions > 0 && partition_method == PART_NONE)
+			partition_method = PART_RANGE;
+
 		if (initialize_steps == NULL)
 			initialize_steps = pg_strdup(DEFAULT_INIT_STEPS);
 
@@ -5756,6 +5903,70 @@ main(int argc, char **argv)
 			fprintf(stderr,
 					"scale option ignored, using count from pgbench_branches table (%d)\n",
 					scale);
+
+		/*
+		 * Get the partition information for pgbench_accounts table.  Here, we
+		 * need to ensure that we get the information for the table according
+		 * to current search_path (and that too found in the first namespace
+		 * in the path) as there could be multiple pgbench_accounts tables.
+		 *
+		 * This query will always fetch at least one row even if the table is
+		 * not partitioned in which case the value of 'partitions' will be 0.
+		 *
+		 * We assume no partitioning on any failure, so as to avoid failing on
+		 * an older version.
+		 */
+		res = PQexec(con,
+					 "select o.n, p.partstrat, pg_catalog.count(i.inhparent) "
+					 "from pg_catalog.pg_class as c "
+					 "join pg_catalog.pg_namespace as n on (n.oid = c.relnamespace) "
+					 "cross join lateral (select pg_catalog.array_position(pg_catalog.current_schemas(true), n.nspname)) as o(n) "
+					 "left join pg_catalog.pg_partitioned_table as p on (p.partrelid = c.oid) "
+					 "left join pg_catalog.pg_inherits as i on (c.oid = i.inhparent) "
+					 "where c.relname = 'pgbench_accounts' and o.n is not null "
+					 "group by 1, 2 "
+					 "order by 1 asc "
+					 "limit 1");
+		if (PQresultStatus(res) != PGRES_TUPLES_OK)
+		{
+			/* probably an older version, coldly assume no partitioning */
+			partition_method = PART_NONE;
+			partitions = 0;
+		}
+		else if (PQntuples(res) == 0)
+		{
+			fprintf(stderr,
+					"pgbench_accounts is missing, you need to do initialization (\"pgbench -i\") in database \"%s\"\n",
+					PQdb(con));
+			exit(1);
+		}
+		else
+		{
+			/* extract the partitioning information */
+			if (PQgetisnull(res, 0, 1))
+				partition_method = PART_NONE;
+			else
+			{
+				char	   *ps = PQgetvalue(res, 0, 1);
+
+				/* The column must be present. */
+				Assert(ps != NULL);
+
+				if (strcmp(ps, "r") == 0)
+					partition_method = PART_RANGE;
+				else if (strcmp(ps, "h") == 0)
+					partition_method = PART_HASH;
+				else
+				{
+					fprintf(stderr, "unexpected partition method: \"%s\"\n", ps);
+					exit(1);
+				}
+			}
+
+			partitions = atoi(PQgetvalue(res, 0, 2));
+		}
+
+		PQclear(res);
 	}
 
 	/*
diff --git a/src/bin/pgbench/t/001_pgbench_with_server.pl b/src/bin/pgbench/t/001_pgbench_with_server.pl
index b82d3f6..fb0f6b6 100644
--- a/src/bin/pgbench/t/001_pgbench_with_server.pl
+++ b/src/bin/pgbench/t/001_pgbench_with_server.pl
@@ -58,6 +58,17 @@ sub pgbench
 	return;
 }
 
+# tablespace for testing
+my $ts = $node->basedir . '/regress_pgbench_tap_1_ts_dir';
+mkdir $ts or die "cannot create directory $ts";
+my $ets = TestLib::perl2host($ts);
+# add needed escaping!
+$ets =~ s/'/''/;
+
+$node->safe_psql('postgres',
+	"CREATE TABLESPACE regress_pgbench_tap_1_ts LOCATION '$ets';"
+);
+
 # Test concurrent OID generation via pg_enum_oid_index.  This indirectly
 # exercises LWLock and spinlock concurrency.
 my $labels = join ',', map { "'l$_'" } 1 .. 1000;
@@ -100,12 +111,13 @@ pgbench(
 
 # Again, with all possible options
 pgbench(
-	'--initialize --init-steps=dtpvg --scale=1 --unlogged-tables --fillfactor=98 --foreign-keys --quiet --tablespace=pg_default --index-tablespace=pg_default',
+	'--initialize --init-steps=dtpvg --scale=1 --unlogged-tables --fillfactor=98 --foreign-keys --quiet --tablespace=regress_pgbench_tap_1_ts --index-tablespace=regress_pgbench_tap_1_ts --partitions=2 --partition-method=hash',
 	0,
 	[qr{^$}i],
 	[
 		qr{dropping old tables},
 		qr{creating tables},
+		qr{creating 2 partitions},
 		qr{vacuuming},
 		qr{creating primary keys},
 		qr{creating foreign keys},
@@ -116,12 +128,13 @@ pgbench(
 
 # Test interaction of --init-steps with legacy step-selection options
 pgbench(
-	'--initialize --init-steps=dtpvgvv --no-vacuum --foreign-keys --unlogged-tables',
+	'--initialize --init-steps=dtpvgvv --no-vacuum --foreign-keys --unlogged-tables --partitions=3',
 	0,
 	[qr{^$}],
 	[
 		qr{dropping old tables},
 		qr{creating tables},
+		qr{creating 3 partitions},
 		qr{creating primary keys},
 		qr{.* of .* tuples \(.*\) done},
 		qr{creating foreign keys},
@@ -909,6 +922,8 @@ pgbench(
 check_pgbench_logs($bdir, '001_pgbench_log_3', 1, 10, 10,
 	qr{^\d \d{1,2} \d+ \d \d+ \d+$});
 
+$node->safe_psql('postgres', 'DROP TABLESPACE regress_pgbench_tap_1_ts');
+
 # done
 $node->stop;
 done_testing();
diff --git a/src/bin/pgbench/t/002_pgbench_no_server.pl b/src/bin/pgbench/t/002_pgbench_no_server.pl
index f7fa184..1e9542a 100644
--- a/src/bin/pgbench/t/002_pgbench_no_server.pl
+++ b/src/bin/pgbench/t/002_pgbench_no_server.pl
@@ -157,6 +157,13 @@ my @options = (
 			qr{error while setting random seed from --random-seed option}
 		]
 	],
+	[ 'bad partition type', '-i --partition-method=BAD', [qr{"range"}, qr{"hash"}, qr{"BAD"}] ],
+	[ 'bad partition number', '-i --partitions -1', [ qr{invalid number of partitions: "-1"} ] ],
+	[
+		'partition method without partitioning',
+		'-i --partition-method=hash',
+		[ qr{partition-method requires greater than zero --partitions} ]
+	],
 
 	# logging sub-options
 	[

#51

Fabien COELHO

coelho@cri.ensmp.fr

over 6 years ago

In reply to: Amit Kapila (#50)

1 attachment(s)

Re: pgbench - allow to create partitioned tables

Hello Amit,

I would not bother to create a patch for so small an improvement. This
makes sense in passing because the created function makes it very easy,
but otherwise I'll just drop it.

I would prefer to drop for now.

Attached v13 does that, I added a comment instead. I do not think that it
is an improvement.

+ else
+ {
+ fprintf(stderr, "unexpected partition method: \"%s\"\n", ps);
+ exit(1);
+ }
If we can't catch that earlier, then it might be better to have some
version-specific checks rather than such obscure code which is
difficult to understand for others.

Hmmm. The code simply checks for the current partitioning and fails if the
result is unknown, which I understood was what you asked, the previous
version was just ignoring the result.

The likelyhood of postgres dropping support for range or hash partitions
seems unlikely.

This issue rather be raised if an older partition-enabled pgbench is run
against a newer postgres which adds a new partition method. But then I
cannot guess when a new partition method will be added, so I cannot put a
guard with a version about something in the future. Possibly, if no new
method is ever added, the code will never be triggered.

I have made a few modifications in the attached patch.
* move the create partitions related code into a separate function.

Why not. Not sure it is an improvement.

* make the check related to number of partitions consistent i.e check
partitions > 0 apart from where we print which I also want to change
but let us first discuss one of the above points

I switched two instances of >= 1 to > 0, which had 1 instance before.

* when we don't found pgbench_accounts table, error out instead of continuing

I do not think that it is a a good idea, but I did it anyway to move
things forward.

* ensure append_fillfactor doesn't assume that it has to append
fillfactor and removed fillfactor < 100 check from it.

Done, which is too bad.

* improve the comments around query to fetch partitions

What? How?

There are already quite a few comments compared to the length of the
query.

* improve the comments in the patch and make the code look like nearby
code

This requirement is to fuzzy. I re-read the changes, and both code and
comments look okay to me.

* pgindent the patch

Done.

I think we should try to add some note or comment that why we only
choose to partition pgbench_accounts table when the user has given
--partitions option.

Added as a comment on the initPartition function.

--
Fabien.

Attachments:

pgbench-init-partitioned-13.patchtext/x-diff; name=pgbench-init-partitioned-13.patchDownload

diff --git a/doc/src/sgml/ref/pgbench.sgml b/doc/src/sgml/ref/pgbench.sgml
index c857aa3cba..e3a0abb4c7 100644
--- a/doc/src/sgml/ref/pgbench.sgml
+++ b/doc/src/sgml/ref/pgbench.sgml
@@ -306,6 +306,31 @@ pgbench <optional> <replaceable>options</replaceable> </optional> <replaceable>d
       </listitem>
      </varlistentry>
 
+     <varlistentry>
+      <term><option>--partitions=<replaceable>NUM</replaceable></option></term>
+      <listitem>
+       <para>
+        Create a partitioned <literal>pgbench_accounts</literal> table with
+        <replaceable>NUM</replaceable> partitions of nearly equal size for
+        the scaled number of accounts.
+        Default is <literal>0</literal>, meaning no partitioning.
+       </para>
+      </listitem>
+     </varlistentry>
+
+     <varlistentry>
+      <term><option>--partition-method=<replaceable>NAME</replaceable></option></term>
+      <listitem>
+       <para>
+        Create a partitioned <literal>pgbench_accounts</literal> table with
+        <replaceable>NAME</replaceable> method.
+        Expected values are <literal>range</literal> or <literal>hash</literal>.
+        This option requires that <option>--partitions</option> is set to non-zero.
+        If unspecified, default is <literal>range</literal>.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry>
       <term><option>--tablespace=<replaceable>tablespace</replaceable></option></term>
       <listitem>
diff --git a/src/bin/pgbench/pgbench.c b/src/bin/pgbench/pgbench.c
index ed7652bfbf..10eadd8e96 100644
--- a/src/bin/pgbench/pgbench.c
+++ b/src/bin/pgbench/pgbench.c
@@ -186,6 +186,19 @@ int64		latency_limit = 0;
 char	   *tablespace = NULL;
 char	   *index_tablespace = NULL;
 
+/* partitioning for pgbench_accounts table, 0 for no partitioning */
+static int	partitions = 0;
+
+typedef enum
+{
+	PART_NONE, PART_RANGE, PART_HASH
+}
+
+			partition_method_t;
+
+static partition_method_t partition_method = PART_NONE;
+static const char *PARTITION_METHOD[] = {"none", "range", "hash"};
+
 /* random seed used to initialize base_random_sequence */
 int64		random_seed = -1;
 
@@ -617,6 +630,9 @@ usage(void)
 		   "  --foreign-keys           create foreign key constraints between tables\n"
 		   "  --index-tablespace=TABLESPACE\n"
 		   "                           create indexes in the specified tablespace\n"
+		   "  --partitions=NUM         partition pgbench_accounts in NUM parts (default: 0)\n"
+		   "  --partition-method=(range|hash)\n"
+		   "                           partition pgbench_accounts with this method (default: range)\n"
 		   "  --tablespace=TABLESPACE  create tables in the specified tablespace\n"
 		   "  --unlogged-tables        create tables as unlogged tables\n"
 		   "\nOptions to select what to run:\n"
@@ -3601,6 +3617,80 @@ initDropTables(PGconn *con)
 					 "pgbench_tellers");
 }
 
+/*
+ * add fillfactor percent option.
+ */
+static void
+append_fillfactor(char *opts, int len)
+{
+	/* as default is 100, it could be removed in this case */
+	snprintf(opts + strlen(opts), len - strlen(opts),
+			 " with (fillfactor=%d)", fillfactor);
+}
+
+/*
+ * Initialize pgbench_accounts partitions if needed.
+ *
+ * This is the larger table of pgbench default tpc-b like schema
+ * with a known size, so that it can be partitioned by range.
+ */
+static void
+initPartitions(PGconn *con)
+{
+	char		ff[64];
+
+	ff[0] = '\0';
+	append_fillfactor(ff, sizeof(ff));
+
+	Assert(partitions > 0);
+
+	fprintf(stderr, "creating %d partitions...\n", partitions);
+
+	for (int p = 1; p <= partitions; p++)
+	{
+		char		query[256];
+
+		if (partition_method == PART_RANGE)
+		{
+			int64		part_size = (naccounts * (int64) scale + partitions - 1) / partitions;
+			char		minvalue[32],
+						maxvalue[32];
+
+			/*
+			 * For RANGE, we use open-ended partitions at the beginning and
+			 * end
+			 */
+			if (p == 1)
+				sprintf(minvalue, "minvalue");
+			else
+				sprintf(minvalue, INT64_FORMAT, (p - 1) * part_size + 1);
+
+			if (p < partitions)
+				sprintf(maxvalue, INT64_FORMAT, p * part_size + 1);
+			else
+				sprintf(maxvalue, "maxvalue");
+
+			snprintf(query, sizeof(query),
+					 "create%s table pgbench_accounts_%d\n"
+					 "  partition of pgbench_accounts\n"
+					 "  for values from (%s) to (%s)%s\n",
+					 unlogged_tables ? " unlogged" : "", p,
+					 minvalue, maxvalue, ff);
+		}
+		else if (partition_method == PART_HASH)
+			snprintf(query, sizeof(query),
+					 "create%s table pgbench_accounts_%d\n"
+					 "  partition of pgbench_accounts\n"
+					 "  for values with (modulus %d, remainder %d)%s\n",
+					 unlogged_tables ? " unlogged" : "", p,
+					 partitions, p - 1, ff);
+		else					/* cannot get there */
+			Assert(0);
+
+		executeStatement(con, query);
+	}
+}
+
 /*
  * Create pgbench's standard tables
  */
@@ -3664,9 +3754,15 @@ initCreateTables(PGconn *con)
 
 		/* Construct new create table statement. */
 		opts[0] = '\0';
-		if (ddl->declare_fillfactor)
+
+		/* Partition pgbench_accounts table */
+		if (partitions > 0 && strcmp(ddl->table, "pgbench_accounts") == 0)
 			snprintf(opts + strlen(opts), sizeof(opts) - strlen(opts),
-					 " with (fillfactor=%d)", fillfactor);
+					 " partition by %s (aid)", PARTITION_METHOD[partition_method]);
+		else if (ddl->declare_fillfactor)
+			/* fillfactor is only expected on actual tables */
+			append_fillfactor(opts, sizeof(opts));
+
 		if (tablespace != NULL)
 		{
 			char	   *escape_tablespace;
@@ -3686,6 +3782,9 @@ initCreateTables(PGconn *con)
 
 		executeStatement(con, buffer);
 	}
+
+	if (partitions > 0)
+		initPartitions(con);
 }
 
 /*
@@ -4919,6 +5018,10 @@ printResults(StatsData *total, instr_time total_time,
 	printf("transaction type: %s\n",
 		   num_scripts == 1 ? sql_script[0].desc : "multiple scripts");
 	printf("scaling factor: %d\n", scale);
+	/* only print partitioning information if some partitioning was detected */
+	if (partition_method != PART_NONE)
+		printf("partition method: %s\npartitions: %d\n",
+			   PARTITION_METHOD[partition_method], partitions);
 	printf("query mode: %s\n", QUERYMODE[querymode]);
 	printf("number of clients: %d\n", nclients);
 	printf("number of threads: %d\n", nthreads);
@@ -5126,6 +5229,8 @@ main(int argc, char **argv)
 		{"foreign-keys", no_argument, NULL, 8},
 		{"random-seed", required_argument, NULL, 9},
 		{"show-script", required_argument, NULL, 10},
+		{"partitions", required_argument, NULL, 11},
+		{"partition-method", required_argument, NULL, 12},
 		{NULL, 0, NULL, 0}
 	};
 
@@ -5486,6 +5591,29 @@ main(int argc, char **argv)
 					exit(0);
 				}
 				break;
+			case 11:			/* partitions */
+				initialization_option_set = true;
+				partitions = atoi(optarg);
+				if (partitions < 0)
+				{
+					fprintf(stderr, "invalid number of partitions: \"%s\"\n",
+							optarg);
+					exit(1);
+				}
+				break;
+			case 12:			/* partition-method */
+				initialization_option_set = true;
+				if (pg_strcasecmp(optarg, "range") == 0)
+					partition_method = PART_RANGE;
+				else if (pg_strcasecmp(optarg, "hash") == 0)
+					partition_method = PART_HASH;
+				else
+				{
+					fprintf(stderr, "invalid partition method, expecting \"range\" or \"hash\","
+							" got: \"%s\"\n", optarg);
+					exit(1);
+				}
+				break;
 			default:
 				fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
 				exit(1);
@@ -5567,6 +5695,16 @@ main(int argc, char **argv)
 			exit(1);
 		}
 
+		if (partitions == 0 && partition_method != PART_NONE)
+		{
+			fprintf(stderr, "--partition-method requires greater than zero --partitions\n");
+			exit(1);
+		}
+
+		/* set default method */
+		if (partitions > 0 && partition_method == PART_NONE)
+			partition_method = PART_RANGE;
+
 		if (initialize_steps == NULL)
 			initialize_steps = pg_strdup(DEFAULT_INIT_STEPS);
 
@@ -5756,6 +5894,70 @@ main(int argc, char **argv)
 			fprintf(stderr,
 					"scale option ignored, using count from pgbench_branches table (%d)\n",
 					scale);
+
+		/*
+		 * Gather partition information from pg_catalog.
+		 *
+		 * We Assume no partitioning on any failure, so as to avoid failing on
+		 * an older version.
+		 */
+		res = PQexec(con,
+					 "select o.n, p.partstrat, pg_catalog.count(i.inhparent) "
+		/* for all tables */
+					 "from pg_catalog.pg_class as c "
+		/* get the schema corresponding to the previous table */
+					 "join pg_catalog.pg_namespace as n on (n.oid = c.relnamespace) "
+		/* get this schema order in search_path */
+					 "cross join lateral (select pg_catalog.array_position(pg_catalog.current_schemas(true), n.nspname)) as o(n) "
+		/* check whether it is partitionned */
+					 "left join pg_catalog.pg_partitioned_table as p on (p.partrelid = c.oid) "
+		/* fetch actual partitions which inherits the main table */
+					 "left join pg_catalog.pg_inherits as i on (c.oid = i.inhparent) "
+		/* check table name and that schema was in search_path */
+					 "where c.relname = 'pgbench_accounts' and o.n is not null "
+		/* count partitions, possibly 0 */
+					 "group by 1, 2 "
+		/* and only keep the first encountered */
+					 "order by 1 asc "
+					 "limit 1");
+		if (PQresultStatus(res) != PGRES_TUPLES_OK)
+		{
+			/* probably an older version, coldly assume no partitioning */
+			partition_method = PART_NONE;
+			partitions = 0;
+		}
+		else if (PQntuples(res) == 0)
+		{
+			/* builtin script would fail later anyway... */
+			fprintf(stderr, "No pgbench_accounts table found in search_path\n");
+			exit(1);
+		}
+		else
+		{
+			/* PQntupes(res) == 1: normal case, extract the partition status */
+			if (PQgetisnull(res, 0, 1))
+				partition_method = PART_NONE;
+			else
+			{
+				char	   *ps = PQgetvalue(res, 0, 1);
+
+				Assert(ps != NULL);
+
+				if (strcmp(ps, "r") == 0)
+					partition_method = PART_RANGE;
+				else if (strcmp(ps, "h") == 0)
+					partition_method = PART_HASH;
+				else
+				{
+					fprintf(stderr, "unexpected partition method: \"%s\"\n", ps);
+					exit(1);
+				}
+			}
+
+			partitions = atoi(PQgetvalue(res, 0, 2));
+		}
+
+		PQclear(res);
 	}
 
 	/*
diff --git a/src/bin/pgbench/t/001_pgbench_with_server.pl b/src/bin/pgbench/t/001_pgbench_with_server.pl
index b82d3f65c4..fb0f6b677d 100644
--- a/src/bin/pgbench/t/001_pgbench_with_server.pl
+++ b/src/bin/pgbench/t/001_pgbench_with_server.pl
@@ -58,6 +58,17 @@ sub pgbench
 	return;
 }
 
+# tablespace for testing
+my $ts = $node->basedir . '/regress_pgbench_tap_1_ts_dir';
+mkdir $ts or die "cannot create directory $ts";
+my $ets = TestLib::perl2host($ts);
+# add needed escaping!
+$ets =~ s/'/''/;
+
+$node->safe_psql('postgres',
+	"CREATE TABLESPACE regress_pgbench_tap_1_ts LOCATION '$ets';"
+);
+
 # Test concurrent OID generation via pg_enum_oid_index.  This indirectly
 # exercises LWLock and spinlock concurrency.
 my $labels = join ',', map { "'l$_'" } 1 .. 1000;
@@ -100,12 +111,13 @@ pgbench(
 
 # Again, with all possible options
 pgbench(
-	'--initialize --init-steps=dtpvg --scale=1 --unlogged-tables --fillfactor=98 --foreign-keys --quiet --tablespace=pg_default --index-tablespace=pg_default',
+	'--initialize --init-steps=dtpvg --scale=1 --unlogged-tables --fillfactor=98 --foreign-keys --quiet --tablespace=regress_pgbench_tap_1_ts --index-tablespace=regress_pgbench_tap_1_ts --partitions=2 --partition-method=hash',
 	0,
 	[qr{^$}i],
 	[
 		qr{dropping old tables},
 		qr{creating tables},
+		qr{creating 2 partitions},
 		qr{vacuuming},
 		qr{creating primary keys},
 		qr{creating foreign keys},
@@ -116,12 +128,13 @@ pgbench(
 
 # Test interaction of --init-steps with legacy step-selection options
 pgbench(
-	'--initialize --init-steps=dtpvgvv --no-vacuum --foreign-keys --unlogged-tables',
+	'--initialize --init-steps=dtpvgvv --no-vacuum --foreign-keys --unlogged-tables --partitions=3',
 	0,
 	[qr{^$}],
 	[
 		qr{dropping old tables},
 		qr{creating tables},
+		qr{creating 3 partitions},
 		qr{creating primary keys},
 		qr{.* of .* tuples \(.*\) done},
 		qr{creating foreign keys},
@@ -909,6 +922,8 @@ pgbench(
 check_pgbench_logs($bdir, '001_pgbench_log_3', 1, 10, 10,
 	qr{^\d \d{1,2} \d+ \d \d+ \d+$});
 
+$node->safe_psql('postgres', 'DROP TABLESPACE regress_pgbench_tap_1_ts');
+
 # done
 $node->stop;
 done_testing();
diff --git a/src/bin/pgbench/t/002_pgbench_no_server.pl b/src/bin/pgbench/t/002_pgbench_no_server.pl
index f7fa18418b..1e9542af3f 100644
--- a/src/bin/pgbench/t/002_pgbench_no_server.pl
+++ b/src/bin/pgbench/t/002_pgbench_no_server.pl
@@ -157,6 +157,13 @@ my @options = (
 			qr{error while setting random seed from --random-seed option}
 		]
 	],
+	[ 'bad partition type', '-i --partition-method=BAD', [qr{"range"}, qr{"hash"}, qr{"BAD"}] ],
+	[ 'bad partition number', '-i --partitions -1', [ qr{invalid number of partitions: "-1"} ] ],
+	[
+		'partition method without partitioning',
+		'-i --partition-method=hash',
+		[ qr{partition-method requires greater than zero --partitions} ]
+	],
 
 	# logging sub-options
 	[

#52

Amit Kapila

amit.kapila16@gmail.com

over 6 years ago

In reply to: Fabien COELHO (#51)

Re: pgbench - allow to create partitioned tables

On Sat, Sep 21, 2019 at 12:26 AM Fabien COELHO <coelho@cri.ensmp.fr> wrote:

I would not bother to create a patch for so small an improvement. This
makes sense in passing because the created function makes it very easy,
but otherwise I'll just drop it.

I would prefer to drop for now.

Attached v13 does that, I added a comment instead. I do not think that it
is an improvement.
+ else
+ {
+ fprintf(stderr, "unexpected partition method: \"%s\"\n", ps);
+ exit(1);
+ }
If we can't catch that earlier, then it might be better to have some
version-specific checks rather than such obscure code which is
difficult to understand for others.
Hmmm. The code simply checks for the current partitioning and fails if the
result is unknown, which I understood was what you asked, the previous
version was just ignoring the result.

Yes, this code is correct.  I am not sure if you understood the point,
so let me try again. I am bothered about below code in the patch:
+ /* only print partitioning information if some partitioning was detected */
+ if (partition_method != PART_NONE)

This is the only place now where we check 'whether there are any
partitions' differently. I am suggesting to make this similar to
other checks (if (partitions > 0)).

The likelyhood of postgres dropping support for range or hash partitions
seems unlikely.

This issue rather be raised if an older partition-enabled pgbench is run
against a newer postgres which adds a new partition method. But then I
cannot guess when a new partition method will be added, so I cannot put a
guard with a version about something in the future. Possibly, if no new
method is ever added, the code will never be triggered.

Sure, even in that case your older version of pgbench will be able to
detect by below code:
+ else
+ {
+ fprintf(stderr, "unexpected partition method: \"%s\"\n", ps);
+ exit(1);
+ }

* improve the comments around query to fetch partitions

What? How?

There are already quite a few comments compared to the length of the
query.

Hmm, you have just written what each part of the query is doing which
I think one can identify if we write some general comment as I have in
the patch to explain the overall intent. Even if we write what each
part of the statement is doing, the comment explaining overall intent
is required. I personally don't like writing a comment for each
sub-part of the query as that makes reading the query difficult. See
the patch sent by me in my previous email.

* improve the comments in the patch and make the code look like nearby
code

This requirement is to fuzzy. I re-read the changes, and both code and
comments look okay to me.

I have done that in some of the cases in the patch attached by me in
my last email. Have you looked at those changes? Try to make those
changes in the next version unless you see something wrong is written
in comments.

* pgindent the patch

Done.

I think we should try to add some note or comment that why we only
choose to partition pgbench_accounts table when the user has given
--partitions option.

Added as a comment on the initPartition function.

I am not sure if something like that is required in the docs, but we
can leave it for now.

--
With Regards,
Amit Kapila.
EnterpriseDB: http://www.enterprisedb.com

#53

Fabien COELHO

coelho@cri.ensmp.fr

over 6 years ago

In reply to: Amit Kapila (#52)

1 attachment(s)

Re: pgbench - allow to create partitioned tables

Hello Amit,

Yes, this code is correct.  I am not sure if you understood the point,
so let me try again. I am bothered about below code in the patch:
+ /* only print partitioning information if some partitioning was detected */
+ if (partition_method != PART_NONE)
This is the only place now where we check 'whether there are any
partitions' differently. I am suggesting to make this similar to
other checks (if (partitions > 0)).

As I said somewhere up thread, you can have a partitioned table with zero
partitions, and it works fine (yep! the update just does not do anything…)
so partitions > 0 is not a good way to know whether there is a partitioned
table when running a bench. It is a good way for initialization, though,
because we are creating them.

sh> pgbench -i --partitions=1
sh> psql -c 'DROP TABLE pgbench_accounts_1'
sh> pgbench -T 10
...
transaction type: <builtin: TPC-B (sort of)>
scaling factor: 1
partition method: hash
partitions: 0
query mode: simple
number of clients: 1
number of threads: 1
duration: 10 s
number of transactions actually processed: 2314
latency average = 4.323 ms
tps = 231.297122 (including connections establishing)
tps = 231.549125 (excluding connections establishing)

As postgres does not break, there is no good reason to forbid it.

[...] Sure, even in that case your older version of pgbench will be able
to detect by below code [...] "unexpected partition method: " [...].

Yes, that is what I was saying.

Hmm, you have just written what each part of the query is doing which I
think one can identify if we write some general comment as I have in the
patch to explain the overall intent. Even if we write what each part of
the statement is doing, the comment explaining overall intent is
required.

There was some comments.

I personally don't like writing a comment for each sub-part of the query
as that makes reading the query difficult. See the patch sent by me in
my previous email.

I did not notice there was an attachment.

I have done that in some of the cases in the patch attached by me in
my last email. Have you looked at those changes?

Nope, as I was not expected one.

Try to make those changes in the next version unless you see something
wrong is written in comments.

I incorporated most of them, although I made them terser, and fixed them
when inaccurate.

I did not buy moving the condition inside the fillfactor function.

See attached v14.

--
Fabien.

Attachments:

pgbench-init-partitioned-14.patchtext/x-diff; name=pgbench-init-partitioned-14.patchDownload

diff --git a/doc/src/sgml/ref/pgbench.sgml b/doc/src/sgml/ref/pgbench.sgml
index c857aa3cba..e3a0abb4c7 100644
--- a/doc/src/sgml/ref/pgbench.sgml
+++ b/doc/src/sgml/ref/pgbench.sgml
@@ -306,6 +306,31 @@ pgbench <optional> <replaceable>options</replaceable> </optional> <replaceable>d
       </listitem>
      </varlistentry>
 
+     <varlistentry>
+      <term><option>--partitions=<replaceable>NUM</replaceable></option></term>
+      <listitem>
+       <para>
+        Create a partitioned <literal>pgbench_accounts</literal> table with
+        <replaceable>NUM</replaceable> partitions of nearly equal size for
+        the scaled number of accounts.
+        Default is <literal>0</literal>, meaning no partitioning.
+       </para>
+      </listitem>
+     </varlistentry>
+
+     <varlistentry>
+      <term><option>--partition-method=<replaceable>NAME</replaceable></option></term>
+      <listitem>
+       <para>
+        Create a partitioned <literal>pgbench_accounts</literal> table with
+        <replaceable>NAME</replaceable> method.
+        Expected values are <literal>range</literal> or <literal>hash</literal>.
+        This option requires that <option>--partitions</option> is set to non-zero.
+        If unspecified, default is <literal>range</literal>.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry>
       <term><option>--tablespace=<replaceable>tablespace</replaceable></option></term>
       <listitem>
diff --git a/src/bin/pgbench/pgbench.c b/src/bin/pgbench/pgbench.c
index ed7652bfbf..97b73d5a8a 100644
--- a/src/bin/pgbench/pgbench.c
+++ b/src/bin/pgbench/pgbench.c
@@ -186,6 +186,25 @@ int64		latency_limit = 0;
 char	   *tablespace = NULL;
 char	   *index_tablespace = NULL;
 
+/*
+ * Number of "pgbench_accounts" partitions, found or to create.
+ * When creating, 0 is the default and means no partitioning.
+ * When running, this is the actual number of partitions.
+ */
+static int	partitions = 0;
+
+/* partitioning strategy for "pgbench_accounts" */
+typedef enum
+{
+	PART_NONE,		/* no partitioning */
+	PART_RANGE,	/* range partitioning */
+	PART_HASH		/* hash partitioning */
+}
+			partition_method_t;
+
+static partition_method_t partition_method = PART_NONE;
+static const char *PARTITION_METHOD[] = {"none", "range", "hash"};
+
 /* random seed used to initialize base_random_sequence */
 int64		random_seed = -1;
 
@@ -617,6 +636,9 @@ usage(void)
 		   "  --foreign-keys           create foreign key constraints between tables\n"
 		   "  --index-tablespace=TABLESPACE\n"
 		   "                           create indexes in the specified tablespace\n"
+		   "  --partitions=NUM         partition pgbench_accounts in NUM parts (default: 0)\n"
+		   "  --partition-method=(range|hash)\n"
+		   "                           partition pgbench_accounts with this method (default: range)\n"
 		   "  --tablespace=TABLESPACE  create tables in the specified tablespace\n"
 		   "  --unlogged-tables        create tables as unlogged tables\n"
 		   "\nOptions to select what to run:\n"
@@ -3601,6 +3623,82 @@ initDropTables(PGconn *con)
 					 "pgbench_tellers");
 }
 
+/*
+ * add fillfactor percent option.
+ */
+static void
+append_fillfactor(char *opts, int len)
+{
+	/* as default is 100, it could be removed in this case */
+	snprintf(opts + strlen(opts), len - strlen(opts),
+			 " with (fillfactor=%d)", fillfactor);
+}
+
+/*
+ * Create "pgbench_accounts" partitions if needed.
+ *
+ * This is the larger table of pgbench default tpc-b like schema
+ * with a known size, so that it can be partitioned by range.
+ */
+static void
+createPartitions(PGconn *con)
+{
+	char		ff[64];
+
+	ff[0] = '\0';
+	append_fillfactor(ff, sizeof(ff));
+
+	Assert(partitions > 0);
+
+	fprintf(stderr, "creating %d partitions...\n", partitions);
+
+	for (int p = 1; p <= partitions; p++)
+	{
+		char		query[256];
+
+		if (partition_method == PART_RANGE)
+		{
+			int64		part_size = (naccounts * (int64) scale + partitions - 1) / partitions;
+			char		minvalue[32],
+						maxvalue[32];
+
+			/*
+			 * For RANGE, we use open-ended partitions at the beginning and
+			 * end to allow any valid value for the primary key.
+			 * Although the actual minimum and maximum values can be derived
+			 * from the scale, it is more generic and the performance is better.
+			 */
+			if (p == 1)
+				sprintf(minvalue, "minvalue");
+			else
+				sprintf(minvalue, INT64_FORMAT, (p - 1) * part_size + 1);
+
+			if (p < partitions)
+				sprintf(maxvalue, INT64_FORMAT, p * part_size + 1);
+			else
+				sprintf(maxvalue, "maxvalue");
+
+			snprintf(query, sizeof(query),
+					 "create%s table pgbench_accounts_%d\n"
+					 "  partition of pgbench_accounts\n"
+					 "  for values from (%s) to (%s)%s\n",
+					 unlogged_tables ? " unlogged" : "", p,
+					 minvalue, maxvalue, ff);
+		}
+		else if (partition_method == PART_HASH)
+			snprintf(query, sizeof(query),
+					 "create%s table pgbench_accounts_%d\n"
+					 "  partition of pgbench_accounts\n"
+					 "  for values with (modulus %d, remainder %d)%s\n",
+					 unlogged_tables ? " unlogged" : "", p,
+					 partitions, p - 1, ff);
+		else					/* cannot get there */
+			Assert(0);
+
+		executeStatement(con, query);
+	}
+}
+
 /*
  * Create pgbench's standard tables
  */
@@ -3664,9 +3762,15 @@ initCreateTables(PGconn *con)
 
 		/* Construct new create table statement. */
 		opts[0] = '\0';
-		if (ddl->declare_fillfactor)
+
+		/* Partition pgbench_accounts table */
+		if (partitions > 0 && strcmp(ddl->table, "pgbench_accounts") == 0)
 			snprintf(opts + strlen(opts), sizeof(opts) - strlen(opts),
-					 " with (fillfactor=%d)", fillfactor);
+					 " partition by %s (aid)", PARTITION_METHOD[partition_method]);
+		else if (ddl->declare_fillfactor)
+			/* fillfactor is only expected on actual tables */
+			append_fillfactor(opts, sizeof(opts));
+
 		if (tablespace != NULL)
 		{
 			char	   *escape_tablespace;
@@ -3686,6 +3790,9 @@ initCreateTables(PGconn *con)
 
 		executeStatement(con, buffer);
 	}
+
+	if (partitions > 0)
+		createPartitions(con);
 }
 
 /*
@@ -4919,6 +5026,10 @@ printResults(StatsData *total, instr_time total_time,
 	printf("transaction type: %s\n",
 		   num_scripts == 1 ? sql_script[0].desc : "multiple scripts");
 	printf("scaling factor: %d\n", scale);
+	/* only print partitioning information if some partitioning was detected */
+	if (partition_method != PART_NONE)
+		printf("partition method: %s\npartitions: %d\n",
+			   PARTITION_METHOD[partition_method], partitions);
 	printf("query mode: %s\n", QUERYMODE[querymode]);
 	printf("number of clients: %d\n", nclients);
 	printf("number of threads: %d\n", nthreads);
@@ -5126,6 +5237,8 @@ main(int argc, char **argv)
 		{"foreign-keys", no_argument, NULL, 8},
 		{"random-seed", required_argument, NULL, 9},
 		{"show-script", required_argument, NULL, 10},
+		{"partitions", required_argument, NULL, 11},
+		{"partition-method", required_argument, NULL, 12},
 		{NULL, 0, NULL, 0}
 	};
 
@@ -5486,6 +5599,29 @@ main(int argc, char **argv)
 					exit(0);
 				}
 				break;
+			case 11:			/* partitions */
+				initialization_option_set = true;
+				partitions = atoi(optarg);
+				if (partitions < 0)
+				{
+					fprintf(stderr, "invalid number of partitions: \"%s\"\n",
+							optarg);
+					exit(1);
+				}
+				break;
+			case 12:			/* partition-method */
+				initialization_option_set = true;
+				if (pg_strcasecmp(optarg, "range") == 0)
+					partition_method = PART_RANGE;
+				else if (pg_strcasecmp(optarg, "hash") == 0)
+					partition_method = PART_HASH;
+				else
+				{
+					fprintf(stderr, "invalid partition method, expecting \"range\" or \"hash\","
+							" got: \"%s\"\n", optarg);
+					exit(1);
+				}
+				break;
 			default:
 				fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
 				exit(1);
@@ -5567,6 +5703,16 @@ main(int argc, char **argv)
 			exit(1);
 		}
 
+		if (partitions == 0 && partition_method != PART_NONE)
+		{
+			fprintf(stderr, "--partition-method requires greater than zero --partitions\n");
+			exit(1);
+		}
+
+		/* set default method */
+		if (partitions > 0 && partition_method == PART_NONE)
+			partition_method = PART_RANGE;
+
 		if (initialize_steps == NULL)
 			initialize_steps = pg_strdup(DEFAULT_INIT_STEPS);
 
@@ -5756,6 +5902,79 @@ main(int argc, char **argv)
 			fprintf(stderr,
 					"scale option ignored, using count from pgbench_branches table (%d)\n",
 					scale);
+
+		/*
+		 * Get the partition information for the first "pgbench_accounts" table
+		 * found in search_path.
+		 *
+		 * The result is empty if no "pgbench_accounts" is found.
+		 *
+		 * Otherwise, it always returns one row even if the table is not
+		 * partitioned (in which case the partition strategy is NULL).
+		 *
+		 * The number of partitions can be 0 even for partitioned tables, if no
+		 * partition are attached.
+		 *
+		 * We Assume no partitioning on any failure, so as to avoid failing on
+		 * an old version without "pg_partitioned_table".
+		 */
+		res = PQexec(con,
+					 "select o.n, p.partstrat, pg_catalog.count(i.inhparent) "
+					 "from pg_catalog.pg_class as c "
+					 "join pg_catalog.pg_namespace as n on (n.oid = c.relnamespace) "
+					 "cross join lateral (select pg_catalog.array_position(pg_catalog.current_schemas(true), n.nspname)) as o(n) "
+					 "left join pg_catalog.pg_partitioned_table as p on (p.partrelid = c.oid) "
+					 "left join pg_catalog.pg_inherits as i on (c.oid = i.inhparent) "
+					 "where c.relname = 'pgbench_accounts' and o.n is not null "
+					 "group by 1, 2 "
+					 "order by 1 asc "
+					 "limit 1");
+
+		if (PQresultStatus(res) != PGRES_TUPLES_OK)
+		{
+			/* probably an older version, coldly assume no partitioning */
+			partition_method = PART_NONE;
+			partitions = 0;
+		}
+		else if (PQntuples(res) == 0)
+		{
+			/*
+			 * This case is unlikely as pgbench already found "pgbench_branches"
+			 * above to compute the scale.
+			 */
+			fprintf(stderr,
+					"No pgbench_accounts table found in search_path. "
+					"Perhaps you need to do initialization (\"pgbench -i\") in database \"%s\"\n", PQdb(con));
+			exit(1);
+		}
+		else /* PQntupes(res) == 1 */
+		{
+			/* normal case, extract partition information */
+			if (PQgetisnull(res, 0, 1))
+				partition_method = PART_NONE;
+			else
+			{
+				char	   *ps = PQgetvalue(res, 0, 1);
+
+				/* column must be there */
+				Assert(ps != NULL);
+
+				if (strcmp(ps, "r") == 0)
+					partition_method = PART_RANGE;
+				else if (strcmp(ps, "h") == 0)
+					partition_method = PART_HASH;
+				else
+				{
+					/* possibly a newer version with new partition method */
+					fprintf(stderr, "unexpected partition method: \"%s\"\n", ps);
+					exit(1);
+				}
+			}
+
+			partitions = atoi(PQgetvalue(res, 0, 2));
+		}
+
+		PQclear(res);
 	}
 
 	/*
diff --git a/src/bin/pgbench/t/001_pgbench_with_server.pl b/src/bin/pgbench/t/001_pgbench_with_server.pl
index b82d3f65c4..fb0f6b677d 100644
--- a/src/bin/pgbench/t/001_pgbench_with_server.pl
+++ b/src/bin/pgbench/t/001_pgbench_with_server.pl
@@ -58,6 +58,17 @@ sub pgbench
 	return;
 }
 
+# tablespace for testing
+my $ts = $node->basedir . '/regress_pgbench_tap_1_ts_dir';
+mkdir $ts or die "cannot create directory $ts";
+my $ets = TestLib::perl2host($ts);
+# add needed escaping!
+$ets =~ s/'/''/;
+
+$node->safe_psql('postgres',
+	"CREATE TABLESPACE regress_pgbench_tap_1_ts LOCATION '$ets';"
+);
+
 # Test concurrent OID generation via pg_enum_oid_index.  This indirectly
 # exercises LWLock and spinlock concurrency.
 my $labels = join ',', map { "'l$_'" } 1 .. 1000;
@@ -100,12 +111,13 @@ pgbench(
 
 # Again, with all possible options
 pgbench(
-	'--initialize --init-steps=dtpvg --scale=1 --unlogged-tables --fillfactor=98 --foreign-keys --quiet --tablespace=pg_default --index-tablespace=pg_default',
+	'--initialize --init-steps=dtpvg --scale=1 --unlogged-tables --fillfactor=98 --foreign-keys --quiet --tablespace=regress_pgbench_tap_1_ts --index-tablespace=regress_pgbench_tap_1_ts --partitions=2 --partition-method=hash',
 	0,
 	[qr{^$}i],
 	[
 		qr{dropping old tables},
 		qr{creating tables},
+		qr{creating 2 partitions},
 		qr{vacuuming},
 		qr{creating primary keys},
 		qr{creating foreign keys},
@@ -116,12 +128,13 @@ pgbench(
 
 # Test interaction of --init-steps with legacy step-selection options
 pgbench(
-	'--initialize --init-steps=dtpvgvv --no-vacuum --foreign-keys --unlogged-tables',
+	'--initialize --init-steps=dtpvgvv --no-vacuum --foreign-keys --unlogged-tables --partitions=3',
 	0,
 	[qr{^$}],
 	[
 		qr{dropping old tables},
 		qr{creating tables},
+		qr{creating 3 partitions},
 		qr{creating primary keys},
 		qr{.* of .* tuples \(.*\) done},
 		qr{creating foreign keys},
@@ -909,6 +922,8 @@ pgbench(
 check_pgbench_logs($bdir, '001_pgbench_log_3', 1, 10, 10,
 	qr{^\d \d{1,2} \d+ \d \d+ \d+$});
 
+$node->safe_psql('postgres', 'DROP TABLESPACE regress_pgbench_tap_1_ts');
+
 # done
 $node->stop;
 done_testing();
diff --git a/src/bin/pgbench/t/002_pgbench_no_server.pl b/src/bin/pgbench/t/002_pgbench_no_server.pl
index f7fa18418b..1e9542af3f 100644
--- a/src/bin/pgbench/t/002_pgbench_no_server.pl
+++ b/src/bin/pgbench/t/002_pgbench_no_server.pl
@@ -157,6 +157,13 @@ my @options = (
 			qr{error while setting random seed from --random-seed option}
 		]
 	],
+	[ 'bad partition type', '-i --partition-method=BAD', [qr{"range"}, qr{"hash"}, qr{"BAD"}] ],
+	[ 'bad partition number', '-i --partitions -1', [ qr{invalid number of partitions: "-1"} ] ],
+	[
+		'partition method without partitioning',
+		'-i --partition-method=hash',
+		[ qr{partition-method requires greater than zero --partitions} ]
+	],
 
 	# logging sub-options
 	[

#54

Amit Kapila

amit.kapila16@gmail.com

over 6 years ago

In reply to: Fabien COELHO (#53)

Re: pgbench - allow to create partitioned tables

On Sat, Sep 21, 2019 at 1:18 PM Fabien COELHO <coelho@cri.ensmp.fr> wrote:

Yes, this code is correct.  I am not sure if you understood the point,
so let me try again. I am bothered about below code in the patch:
+ /* only print partitioning information if some partitioning was detected */
+ if (partition_method != PART_NONE)
This is the only place now where we check 'whether there are any
partitions' differently. I am suggesting to make this similar to
other checks (if (partitions > 0)).
As I said somewhere up thread, you can have a partitioned table with zero
partitions, and it works fine (yep! the update just does not do anything…)
so partitions > 0 is not a good way to know whether there is a partitioned
table when running a bench. It is a good way for initialization, though,
because we are creating them.

sh> pgbench -i --partitions=1
sh> psql -c 'DROP TABLE pgbench_accounts_1'
sh> pgbench -T 10
...
transaction type: <builtin: TPC-B (sort of)>
scaling factor: 1
partition method: hash
partitions: 0

I am not sure how many users would be able to make out that it is a
run where actual partitions are not present unless they beforehand
know and detect such a condition in their scripts. What is the use of
such a run which completes without actual updates? I think it is
better if give an error for such a case rather than allowing to
execute it and then give some information which doesn't make much
sense.

I incorporated most of them, although I made them terser, and fixed them
when inaccurate.

I did not buy moving the condition inside the fillfactor function.

I also don't agree with your position. My main concern here is that
we can't implicitly assume that fillfactor need to be appended. At
the very least we should have a comment saying why we are always
appending the fillfactor for partitions, something like I had in my
patch.

--
With Regards,
Amit Kapila.
EnterpriseDB: http://www.enterprisedb.com

#55

Fabien COELHO

coelho@cri.ensmp.fr

over 6 years ago

In reply to: Amit Kapila (#54)

Re: pgbench - allow to create partitioned tables

Hello Amit,

sh> pgbench -T 10
...
partitions: 0

I am not sure how many users would be able to make out that it is a
run where actual partitions are not present unless they beforehand
know and detect such a condition in their scripts.

What is the use of such a run which completes without actual updates?

Why should we decide that they cannot do that?

The user could be testing the overhead of no-op updates, which is
something interesting, and check what happens with partitioning in this
case. For that, they may delete pgbench_accounts contents or its
partitions for partitioned version, or only some partitions, or whatever.

A valid (future) case is that hopefully dynamic partitioning could be
implemented, thus no partitions would be a perfectly legal state even with
the standard benchmarking practice. Maybe the user just wrote a clever
extension to do that with a trigger and wants to test the performance
overhead with pgbench. Fine!

IMHO we should not babysit the user by preventing them to run a bench
which would not generate any error, so is fundamentaly legal. If running a
bench should fail, it should fail while running it, not before even
starting. I have already added at your request early failures modes to the
patch about which I'm not very happy.

Note that I'm mostly okay with warnings, but I know that I do not know
what use may be done with pgbench, and I do not want to decide for users.

In this case, franckly I would not bother to issue a warning which has a
very low probability ever to be raised.

I think it is better if give an error for such a case rather than
allowing to execute it and then give some information which doesn't make
much sense.

I strongly disagree, as explained above.

I incorporated most of them, although I made them terser, and fixed them
when inaccurate.

I did not buy moving the condition inside the fillfactor function.

I also don't agree with your position. My main concern here is that
we can't implicitly assume that fillfactor need to be appended.

Sure.

At the very least we should have a comment saying why we are always
appending the fillfactor for partitions

The patch does not do that, the condition is just before the call, not
inside it with a boolean passed as an argument. AFAICS the behavior of v14
is exactly the same as your version and as the initial code.

--
Fabien.

#56

Amit Kapila

amit.kapila16@gmail.com

over 6 years ago

In reply to: Fabien COELHO (#55)

Re: pgbench - allow to create partitioned tables

On Sun, Sep 22, 2019 at 12:22 PM Fabien COELHO <coelho@cri.ensmp.fr> wrote:

sh> pgbench -T 10
...
partitions: 0

I am not sure how many users would be able to make out that it is a
run where actual partitions are not present unless they beforehand
know and detect such a condition in their scripts.

What is the use of such a run which completes without actual updates?

Why should we decide that they cannot do that?

The user could be testing the overhead of no-op updates, which is
something interesting, and check what happens with partitioning in this
case. For that, they may delete pgbench_accounts contents or its
partitions for partitioned version, or only some partitions, or whatever.

A valid (future) case is that hopefully dynamic partitioning could be
implemented, thus no partitions would be a perfectly legal state even with
the standard benchmarking practice. Maybe the user just wrote a clever
extension to do that with a trigger and wants to test the performance
overhead with pgbench. Fine!

It is better for a user to write a custom script for such cases.
Because after that "select-only" or "simple-update" script doesn't
make any sense. In the "select-only" case why would anyone like test
fetching zero rows, similarly in "simple-update" case, 2 out of 3
statements will be a no-op. In "tpcb-like" script, 2 out of 5 queries
will be no-op and it won't be completely no-op updates as you are
telling. Having said that, I see your point and don't mind allowing
such cases until we don't have to write special checks in the code to
support such cases. Now, we can have a detailed comment in
printResults to explain why we have a different check there as compare
to other code paths or change other code paths to have a similar check
as printResults, but I am not convinced of any of those options.

I did not buy moving the condition inside the fillfactor function.

I also don't agree with your position. My main concern here is that
we can't implicitly assume that fillfactor need to be appended.

Sure.

At the very least we should have a comment saying why we are always
appending the fillfactor for partitions

The patch does not do that, the condition is just before the call, not
inside it with a boolean passed as an argument. AFAICS the behavior of v14
is exactly the same as your version and as the initial code.

Here, I am talking about the call to append_fillfactor in
createPartitions() function. See, in my version, there are some
comments.

--
With Regards,
Amit Kapila.
EnterpriseDB: http://www.enterprisedb.com

#57

Fabien COELHO

coelho@cri.ensmp.fr

over 6 years ago

In reply to: Amit Kapila (#56)

1 attachment(s)

Re: pgbench - allow to create partitioned tables

Hello Amit,

It is better for a user to write a custom script for such cases.

I kind-of agree, but IMHO this is not for pgbench to decide what is better
for the user and to fail on a script that would not fail.

Because after that "select-only" or "simple-update" script doesn't
make any sense. [...].

What make sense in a benchmarking context may not be what you think. For
instance, AFAICR, I already removed benevolent but misplaced guards which
were preventing running scripts without queries: if one wants to look at
pgbench overheads because they are warry that it may be too high, they
really need to be allowed to run such scripts.

This not for us to decide, and as I already said they do if you want to
test no-op overheads. Moreover the problem pre-exists: if the user deletes
the contents of pgbench_accounts these scripts are no-op, and we do not
complain. The no partition attached is just a particular case.

Having said that, I see your point and don't mind allowing such cases
until we don't have to write special checks in the code to support such
cases.

Indeed, it is also simpler to not care about such issues in the code.

[...] Now, we can have a detailed comment in printResults to explain why
we have a different check there as compare to other code paths or change
other code paths to have a similar check as printResults, but I am not
convinced of any of those options.

Yep. ISTM that the current version is reasonable.

[...] I am talking about the call to append_fillfactor in
createPartitions() function. See, in my version, there are some
comments.

Ok, I understand that you want a comment. Patch v15 does that.

--
Fabien.

Attachments:

pgbench-init-partitioned-15.patchtext/x-diff; name=pgbench-init-partitioned-15.patchDownload

diff --git a/doc/src/sgml/ref/pgbench.sgml b/doc/src/sgml/ref/pgbench.sgml
index c857aa3cba..e3a0abb4c7 100644
--- a/doc/src/sgml/ref/pgbench.sgml
+++ b/doc/src/sgml/ref/pgbench.sgml
@@ -306,6 +306,31 @@ pgbench <optional> <replaceable>options</replaceable> </optional> <replaceable>d
       </listitem>
      </varlistentry>
 
+     <varlistentry>
+      <term><option>--partitions=<replaceable>NUM</replaceable></option></term>
+      <listitem>
+       <para>
+        Create a partitioned <literal>pgbench_accounts</literal> table with
+        <replaceable>NUM</replaceable> partitions of nearly equal size for
+        the scaled number of accounts.
+        Default is <literal>0</literal>, meaning no partitioning.
+       </para>
+      </listitem>
+     </varlistentry>
+
+     <varlistentry>
+      <term><option>--partition-method=<replaceable>NAME</replaceable></option></term>
+      <listitem>
+       <para>
+        Create a partitioned <literal>pgbench_accounts</literal> table with
+        <replaceable>NAME</replaceable> method.
+        Expected values are <literal>range</literal> or <literal>hash</literal>.
+        This option requires that <option>--partitions</option> is set to non-zero.
+        If unspecified, default is <literal>range</literal>.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry>
       <term><option>--tablespace=<replaceable>tablespace</replaceable></option></term>
       <listitem>
diff --git a/src/bin/pgbench/pgbench.c b/src/bin/pgbench/pgbench.c
index ed7652bfbf..9e45f00fb5 100644
--- a/src/bin/pgbench/pgbench.c
+++ b/src/bin/pgbench/pgbench.c
@@ -186,6 +186,25 @@ int64		latency_limit = 0;
 char	   *tablespace = NULL;
 char	   *index_tablespace = NULL;
 
+/*
+ * Number of "pgbench_accounts" partitions, found or to create.
+ * When creating, 0 is the default and means no partitioning.
+ * When running, this is the actual number of partitions.
+ */
+static int	partitions = 0;
+
+/* partitioning strategy for "pgbench_accounts" */
+typedef enum
+{
+	PART_NONE,		/* no partitioning */
+	PART_RANGE,	/* range partitioning */
+	PART_HASH		/* hash partitioning */
+}
+			partition_method_t;
+
+static partition_method_t partition_method = PART_NONE;
+static const char *PARTITION_METHOD[] = {"none", "range", "hash"};
+
 /* random seed used to initialize base_random_sequence */
 int64		random_seed = -1;
 
@@ -617,6 +636,9 @@ usage(void)
 		   "  --foreign-keys           create foreign key constraints between tables\n"
 		   "  --index-tablespace=TABLESPACE\n"
 		   "                           create indexes in the specified tablespace\n"
+		   "  --partitions=NUM         partition pgbench_accounts in NUM parts (default: 0)\n"
+		   "  --partition-method=(range|hash)\n"
+		   "                           partition pgbench_accounts with this method (default: range)\n"
 		   "  --tablespace=TABLESPACE  create tables in the specified tablespace\n"
 		   "  --unlogged-tables        create tables as unlogged tables\n"
 		   "\nOptions to select what to run:\n"
@@ -3601,6 +3623,87 @@ initDropTables(PGconn *con)
 					 "pgbench_tellers");
 }
 
+/*
+ * add fillfactor percent option.
+ */
+static void
+append_fillfactor(char *opts, int len)
+{
+	/* as default is 100, it could be removed in this case */
+	snprintf(opts + strlen(opts), len - strlen(opts),
+			 " with (fillfactor=%d)", fillfactor);
+}
+
+/*
+ * Create "pgbench_accounts" partitions if needed.
+ *
+ * This is the larger table of pgbench default tpc-b like schema
+ * with a known size, so that it can be partitioned by range.
+ */
+static void
+createPartitions(PGconn *con)
+{
+	char		ff[64];
+
+	ff[0] = '\0';
+
+	/*
+	 * Per ddlinfo in initCreateTables below, fillfactor is needed on
+	 * table pgbench_accounts.
+	 */
+	append_fillfactor(ff, sizeof(ff));
+
+	Assert(partitions > 0);
+
+	fprintf(stderr, "creating %d partitions...\n", partitions);
+
+	for (int p = 1; p <= partitions; p++)
+	{
+		char		query[256];
+
+		if (partition_method == PART_RANGE)
+		{
+			int64		part_size = (naccounts * (int64) scale + partitions - 1) / partitions;
+			char		minvalue[32],
+						maxvalue[32];
+
+			/*
+			 * For RANGE, we use open-ended partitions at the beginning and
+			 * end to allow any valid value for the primary key.
+			 * Although the actual minimum and maximum values can be derived
+			 * from the scale, it is more generic and the performance is better.
+			 */
+			if (p == 1)
+				sprintf(minvalue, "minvalue");
+			else
+				sprintf(minvalue, INT64_FORMAT, (p - 1) * part_size + 1);
+
+			if (p < partitions)
+				sprintf(maxvalue, INT64_FORMAT, p * part_size + 1);
+			else
+				sprintf(maxvalue, "maxvalue");
+
+			snprintf(query, sizeof(query),
+					 "create%s table pgbench_accounts_%d\n"
+					 "  partition of pgbench_accounts\n"
+					 "  for values from (%s) to (%s)%s\n",
+					 unlogged_tables ? " unlogged" : "", p,
+					 minvalue, maxvalue, ff);
+		}
+		else if (partition_method == PART_HASH)
+			snprintf(query, sizeof(query),
+					 "create%s table pgbench_accounts_%d\n"
+					 "  partition of pgbench_accounts\n"
+					 "  for values with (modulus %d, remainder %d)%s\n",
+					 unlogged_tables ? " unlogged" : "", p,
+					 partitions, p - 1, ff);
+		else					/* cannot get there */
+			Assert(0);
+
+		executeStatement(con, query);
+	}
+}
+
 /*
  * Create pgbench's standard tables
  */
@@ -3664,9 +3767,15 @@ initCreateTables(PGconn *con)
 
 		/* Construct new create table statement. */
 		opts[0] = '\0';
-		if (ddl->declare_fillfactor)
+
+		/* Partition pgbench_accounts table */
+		if (partitions > 0 && strcmp(ddl->table, "pgbench_accounts") == 0)
 			snprintf(opts + strlen(opts), sizeof(opts) - strlen(opts),
-					 " with (fillfactor=%d)", fillfactor);
+					 " partition by %s (aid)", PARTITION_METHOD[partition_method]);
+		else if (ddl->declare_fillfactor)
+			/* fillfactor is only expected on actual tables */
+			append_fillfactor(opts, sizeof(opts));
+
 		if (tablespace != NULL)
 		{
 			char	   *escape_tablespace;
@@ -3686,6 +3795,9 @@ initCreateTables(PGconn *con)
 
 		executeStatement(con, buffer);
 	}
+
+	if (partitions > 0)
+		createPartitions(con);
 }
 
 /*
@@ -4919,6 +5031,10 @@ printResults(StatsData *total, instr_time total_time,
 	printf("transaction type: %s\n",
 		   num_scripts == 1 ? sql_script[0].desc : "multiple scripts");
 	printf("scaling factor: %d\n", scale);
+	/* only print partitioning information if some partitioning was detected */
+	if (partition_method != PART_NONE)
+		printf("partition method: %s\npartitions: %d\n",
+			   PARTITION_METHOD[partition_method], partitions);
 	printf("query mode: %s\n", QUERYMODE[querymode]);
 	printf("number of clients: %d\n", nclients);
 	printf("number of threads: %d\n", nthreads);
@@ -5126,6 +5242,8 @@ main(int argc, char **argv)
 		{"foreign-keys", no_argument, NULL, 8},
 		{"random-seed", required_argument, NULL, 9},
 		{"show-script", required_argument, NULL, 10},
+		{"partitions", required_argument, NULL, 11},
+		{"partition-method", required_argument, NULL, 12},
 		{NULL, 0, NULL, 0}
 	};
 
@@ -5486,6 +5604,29 @@ main(int argc, char **argv)
 					exit(0);
 				}
 				break;
+			case 11:			/* partitions */
+				initialization_option_set = true;
+				partitions = atoi(optarg);
+				if (partitions < 0)
+				{
+					fprintf(stderr, "invalid number of partitions: \"%s\"\n",
+							optarg);
+					exit(1);
+				}
+				break;
+			case 12:			/* partition-method */
+				initialization_option_set = true;
+				if (pg_strcasecmp(optarg, "range") == 0)
+					partition_method = PART_RANGE;
+				else if (pg_strcasecmp(optarg, "hash") == 0)
+					partition_method = PART_HASH;
+				else
+				{
+					fprintf(stderr, "invalid partition method, expecting \"range\" or \"hash\","
+							" got: \"%s\"\n", optarg);
+					exit(1);
+				}
+				break;
 			default:
 				fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
 				exit(1);
@@ -5567,6 +5708,16 @@ main(int argc, char **argv)
 			exit(1);
 		}
 
+		if (partitions == 0 && partition_method != PART_NONE)
+		{
+			fprintf(stderr, "--partition-method requires greater than zero --partitions\n");
+			exit(1);
+		}
+
+		/* set default method */
+		if (partitions > 0 && partition_method == PART_NONE)
+			partition_method = PART_RANGE;
+
 		if (initialize_steps == NULL)
 			initialize_steps = pg_strdup(DEFAULT_INIT_STEPS);
 
@@ -5756,6 +5907,79 @@ main(int argc, char **argv)
 			fprintf(stderr,
 					"scale option ignored, using count from pgbench_branches table (%d)\n",
 					scale);
+
+		/*
+		 * Get the partition information for the first "pgbench_accounts" table
+		 * found in search_path.
+		 *
+		 * The result is empty if no "pgbench_accounts" is found.
+		 *
+		 * Otherwise, it always returns one row even if the table is not
+		 * partitioned (in which case the partition strategy is NULL).
+		 *
+		 * The number of partitions can be 0 even for partitioned tables, if no
+		 * partition are attached.
+		 *
+		 * We Assume no partitioning on any failure, so as to avoid failing on
+		 * an old version without "pg_partitioned_table".
+		 */
+		res = PQexec(con,
+					 "select o.n, p.partstrat, pg_catalog.count(i.inhparent) "
+					 "from pg_catalog.pg_class as c "
+					 "join pg_catalog.pg_namespace as n on (n.oid = c.relnamespace) "
+					 "cross join lateral (select pg_catalog.array_position(pg_catalog.current_schemas(true), n.nspname)) as o(n) "
+					 "left join pg_catalog.pg_partitioned_table as p on (p.partrelid = c.oid) "
+					 "left join pg_catalog.pg_inherits as i on (c.oid = i.inhparent) "
+					 "where c.relname = 'pgbench_accounts' and o.n is not null "
+					 "group by 1, 2 "
+					 "order by 1 asc "
+					 "limit 1");
+
+		if (PQresultStatus(res) != PGRES_TUPLES_OK)
+		{
+			/* probably an older version, coldly assume no partitioning */
+			partition_method = PART_NONE;
+			partitions = 0;
+		}
+		else if (PQntuples(res) == 0)
+		{
+			/*
+			 * This case is unlikely as pgbench already found "pgbench_branches"
+			 * above to compute the scale.
+			 */
+			fprintf(stderr,
+					"No pgbench_accounts table found in search_path. "
+					"Perhaps you need to do initialization (\"pgbench -i\") in database \"%s\"\n", PQdb(con));
+			exit(1);
+		}
+		else /* PQntupes(res) == 1 */
+		{
+			/* normal case, extract partition information */
+			if (PQgetisnull(res, 0, 1))
+				partition_method = PART_NONE;
+			else
+			{
+				char	   *ps = PQgetvalue(res, 0, 1);
+
+				/* column must be there */
+				Assert(ps != NULL);
+
+				if (strcmp(ps, "r") == 0)
+					partition_method = PART_RANGE;
+				else if (strcmp(ps, "h") == 0)
+					partition_method = PART_HASH;
+				else
+				{
+					/* possibly a newer version with new partition method */
+					fprintf(stderr, "unexpected partition method: \"%s\"\n", ps);
+					exit(1);
+				}
+			}
+
+			partitions = atoi(PQgetvalue(res, 0, 2));
+		}
+
+		PQclear(res);
 	}
 
 	/*
diff --git a/src/bin/pgbench/t/001_pgbench_with_server.pl b/src/bin/pgbench/t/001_pgbench_with_server.pl
index b82d3f65c4..fb0f6b677d 100644
--- a/src/bin/pgbench/t/001_pgbench_with_server.pl
+++ b/src/bin/pgbench/t/001_pgbench_with_server.pl
@@ -58,6 +58,17 @@ sub pgbench
 	return;
 }
 
+# tablespace for testing
+my $ts = $node->basedir . '/regress_pgbench_tap_1_ts_dir';
+mkdir $ts or die "cannot create directory $ts";
+my $ets = TestLib::perl2host($ts);
+# add needed escaping!
+$ets =~ s/'/''/;
+
+$node->safe_psql('postgres',
+	"CREATE TABLESPACE regress_pgbench_tap_1_ts LOCATION '$ets';"
+);
+
 # Test concurrent OID generation via pg_enum_oid_index.  This indirectly
 # exercises LWLock and spinlock concurrency.
 my $labels = join ',', map { "'l$_'" } 1 .. 1000;
@@ -100,12 +111,13 @@ pgbench(
 
 # Again, with all possible options
 pgbench(
-	'--initialize --init-steps=dtpvg --scale=1 --unlogged-tables --fillfactor=98 --foreign-keys --quiet --tablespace=pg_default --index-tablespace=pg_default',
+	'--initialize --init-steps=dtpvg --scale=1 --unlogged-tables --fillfactor=98 --foreign-keys --quiet --tablespace=regress_pgbench_tap_1_ts --index-tablespace=regress_pgbench_tap_1_ts --partitions=2 --partition-method=hash',
 	0,
 	[qr{^$}i],
 	[
 		qr{dropping old tables},
 		qr{creating tables},
+		qr{creating 2 partitions},
 		qr{vacuuming},
 		qr{creating primary keys},
 		qr{creating foreign keys},
@@ -116,12 +128,13 @@ pgbench(
 
 # Test interaction of --init-steps with legacy step-selection options
 pgbench(
-	'--initialize --init-steps=dtpvgvv --no-vacuum --foreign-keys --unlogged-tables',
+	'--initialize --init-steps=dtpvgvv --no-vacuum --foreign-keys --unlogged-tables --partitions=3',
 	0,
 	[qr{^$}],
 	[
 		qr{dropping old tables},
 		qr{creating tables},
+		qr{creating 3 partitions},
 		qr{creating primary keys},
 		qr{.* of .* tuples \(.*\) done},
 		qr{creating foreign keys},
@@ -909,6 +922,8 @@ pgbench(
 check_pgbench_logs($bdir, '001_pgbench_log_3', 1, 10, 10,
 	qr{^\d \d{1,2} \d+ \d \d+ \d+$});
 
+$node->safe_psql('postgres', 'DROP TABLESPACE regress_pgbench_tap_1_ts');
+
 # done
 $node->stop;
 done_testing();
diff --git a/src/bin/pgbench/t/002_pgbench_no_server.pl b/src/bin/pgbench/t/002_pgbench_no_server.pl
index f7fa18418b..1e9542af3f 100644
--- a/src/bin/pgbench/t/002_pgbench_no_server.pl
+++ b/src/bin/pgbench/t/002_pgbench_no_server.pl
@@ -157,6 +157,13 @@ my @options = (
 			qr{error while setting random seed from --random-seed option}
 		]
 	],
+	[ 'bad partition type', '-i --partition-method=BAD', [qr{"range"}, qr{"hash"}, qr{"BAD"}] ],
+	[ 'bad partition number', '-i --partitions -1', [ qr{invalid number of partitions: "-1"} ] ],
+	[
+		'partition method without partitioning',
+		'-i --partition-method=hash',
+		[ qr{partition-method requires greater than zero --partitions} ]
+	],
 
 	# logging sub-options
 	[

#58

Amit Kapila

amit.kapila16@gmail.com

over 6 years ago

In reply to: Fabien COELHO (#57)

Re: pgbench - allow to create partitioned tables

On Mon, Sep 23, 2019 at 11:58 AM Fabien COELHO <coelho@cri.ensmp.fr> wrote:

Hello Amit,

It is better for a user to write a custom script for such cases.

I kind-of agree, but IMHO this is not for pgbench to decide what is better
for the user and to fail on a script that would not fail.

Because after that "select-only" or "simple-update" script doesn't
make any sense. [...].

What make sense in a benchmarking context may not be what you think. For
instance, AFAICR, I already removed benevolent but misplaced guards which
were preventing running scripts without queries: if one wants to look at
pgbench overheads because they are warry that it may be too high, they
really need to be allowed to run such scripts.

This not for us to decide, and as I already said they do if you want to
test no-op overheads. Moreover the problem pre-exists: if the user deletes
the contents of pgbench_accounts these scripts are no-op, and we do not
complain. The no partition attached is just a particular case.

Having said that, I see your point and don't mind allowing such cases
until we don't have to write special checks in the code to support such
cases.

Indeed, it is also simpler to not care about such issues in the code.

If you agree with this, then why haven't you changed below check in patch:
+ if (partition_method != PART_NONE)
+ printf("partition method: %s\npartitions: %d\n",
+    PARTITION_METHOD[partition_method], partitions);

This is exactly the thing bothering me. It won't be easy for others
to understand why this check for partitioning information is different
from other checks. For you or me, it might be okay as we have
discussed this case, but it won't be apparent to others. This doesn't
buy us much, so it is better to keep this code consistent with other
places that check for partitions.

[...] Now, we can have a detailed comment in printResults to explain why
we have a different check there as compare to other code paths or change
other code paths to have a similar check as printResults, but I am not
convinced of any of those options.

Yep. ISTM that the current version is reasonable.

[...] I am talking about the call to append_fillfactor in
createPartitions() function. See, in my version, there are some
comments.

Ok, I understand that you want a comment. Patch v15 does that.

Thanks!

--
With Regards,
Amit Kapila.
EnterpriseDB: http://www.enterprisedb.com

#59

Fabien COELHO

coelho@cri.ensmp.fr

over 6 years ago

In reply to: Amit Kapila (#58)

1 attachment(s)

Re: pgbench - allow to create partitioned tables

Hello Amit,

{...]
If you agree with this, then why haven't you changed below check in patch:
+ if (partition_method != PART_NONE)
+ printf("partition method: %s\npartitions: %d\n",
+    PARTITION_METHOD[partition_method], partitions);
This is exactly the thing bothering me. It won't be easy for others
to understand why this check for partitioning information is different
from other checks.

As I tried to explain with an example, using "partitions > 0" does not
work in this case because you can have a partitioned table with zero
partitions attached while benchmarking, but this cannot happen while
creating.

For you or me, it might be okay as we have discussed this case, but it
won't be apparent to others. This doesn't buy us much, so it is better
to keep this code consistent with other places that check for
partitions.

Attached uses "partition_method != PART_NONE" consistently, plus an assert
on "partitions > 0" for checking and for triggering the default method at
the end of option processing.

--
Fabien.

Attachments:

pgbench-init-partitioned-16.patchtext/x-diff; name=pgbench-init-partitioned-16.patchDownload

diff --git a/doc/src/sgml/ref/pgbench.sgml b/doc/src/sgml/ref/pgbench.sgml
index c857aa3cba..e3a0abb4c7 100644
--- a/doc/src/sgml/ref/pgbench.sgml
+++ b/doc/src/sgml/ref/pgbench.sgml
@@ -306,6 +306,31 @@ pgbench <optional> <replaceable>options</replaceable> </optional> <replaceable>d
       </listitem>
      </varlistentry>
 
+     <varlistentry>
+      <term><option>--partitions=<replaceable>NUM</replaceable></option></term>
+      <listitem>
+       <para>
+        Create a partitioned <literal>pgbench_accounts</literal> table with
+        <replaceable>NUM</replaceable> partitions of nearly equal size for
+        the scaled number of accounts.
+        Default is <literal>0</literal>, meaning no partitioning.
+       </para>
+      </listitem>
+     </varlistentry>
+
+     <varlistentry>
+      <term><option>--partition-method=<replaceable>NAME</replaceable></option></term>
+      <listitem>
+       <para>
+        Create a partitioned <literal>pgbench_accounts</literal> table with
+        <replaceable>NAME</replaceable> method.
+        Expected values are <literal>range</literal> or <literal>hash</literal>.
+        This option requires that <option>--partitions</option> is set to non-zero.
+        If unspecified, default is <literal>range</literal>.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry>
       <term><option>--tablespace=<replaceable>tablespace</replaceable></option></term>
       <listitem>
diff --git a/src/bin/pgbench/pgbench.c b/src/bin/pgbench/pgbench.c
index ed7652bfbf..2d93f6fbb2 100644
--- a/src/bin/pgbench/pgbench.c
+++ b/src/bin/pgbench/pgbench.c
@@ -186,6 +186,25 @@ int64		latency_limit = 0;
 char	   *tablespace = NULL;
 char	   *index_tablespace = NULL;
 
+/*
+ * Number of "pgbench_accounts" partitions, found or to create.
+ * When creating, 0 is the default and means no partitioning.
+ * When running, this is the actual number of partitions.
+ */
+static int	partitions = 0;
+
+/* partitioning strategy for "pgbench_accounts" */
+typedef enum
+{
+	PART_NONE,		/* no partitioning */
+	PART_RANGE,	/* range partitioning */
+	PART_HASH		/* hash partitioning */
+}
+			partition_method_t;
+
+static partition_method_t partition_method = PART_NONE;
+static const char *PARTITION_METHOD[] = {"none", "range", "hash"};
+
 /* random seed used to initialize base_random_sequence */
 int64		random_seed = -1;
 
@@ -617,6 +636,9 @@ usage(void)
 		   "  --foreign-keys           create foreign key constraints between tables\n"
 		   "  --index-tablespace=TABLESPACE\n"
 		   "                           create indexes in the specified tablespace\n"
+		   "  --partitions=NUM         partition pgbench_accounts in NUM parts (default: 0)\n"
+		   "  --partition-method=(range|hash)\n"
+		   "                           partition pgbench_accounts with this method (default: range)\n"
 		   "  --tablespace=TABLESPACE  create tables in the specified tablespace\n"
 		   "  --unlogged-tables        create tables as unlogged tables\n"
 		   "\nOptions to select what to run:\n"
@@ -3601,6 +3623,88 @@ initDropTables(PGconn *con)
 					 "pgbench_tellers");
 }
 
+/*
+ * add fillfactor percent option.
+ */
+static void
+append_fillfactor(char *opts, int len)
+{
+	/* as default is 100, it could be removed in this case */
+	snprintf(opts + strlen(opts), len - strlen(opts),
+			 " with (fillfactor=%d)", fillfactor);
+}
+
+/*
+ * Create "pgbench_accounts" partitions if needed.
+ *
+ * This is the larger table of pgbench default tpc-b like schema
+ * with a known size, so that it can be partitioned by range.
+ */
+static void
+createPartitions(PGconn *con)
+{
+	char		ff[64];
+
+	ff[0] = '\0';
+
+	/*
+	 * Per ddlinfo in initCreateTables below, fillfactor is needed on
+	 * table pgbench_accounts.
+	 */
+	append_fillfactor(ff, sizeof(ff));
+
+	/* we must have to create some partitions */
+	Assert(partitions > 0);
+
+	fprintf(stderr, "creating %d partitions...\n", partitions);
+
+	for (int p = 1; p <= partitions; p++)
+	{
+		char		query[256];
+
+		if (partition_method == PART_RANGE)
+		{
+			int64		part_size = (naccounts * (int64) scale + partitions - 1) / partitions;
+			char		minvalue[32],
+						maxvalue[32];
+
+			/*
+			 * For RANGE, we use open-ended partitions at the beginning and
+			 * end to allow any valid value for the primary key.
+			 * Although the actual minimum and maximum values can be derived
+			 * from the scale, it is more generic and the performance is better.
+			 */
+			if (p == 1)
+				sprintf(minvalue, "minvalue");
+			else
+				sprintf(minvalue, INT64_FORMAT, (p - 1) * part_size + 1);
+
+			if (p < partitions)
+				sprintf(maxvalue, INT64_FORMAT, p * part_size + 1);
+			else
+				sprintf(maxvalue, "maxvalue");
+
+			snprintf(query, sizeof(query),
+					 "create%s table pgbench_accounts_%d\n"
+					 "  partition of pgbench_accounts\n"
+					 "  for values from (%s) to (%s)%s\n",
+					 unlogged_tables ? " unlogged" : "", p,
+					 minvalue, maxvalue, ff);
+		}
+		else if (partition_method == PART_HASH)
+			snprintf(query, sizeof(query),
+					 "create%s table pgbench_accounts_%d\n"
+					 "  partition of pgbench_accounts\n"
+					 "  for values with (modulus %d, remainder %d)%s\n",
+					 unlogged_tables ? " unlogged" : "", p,
+					 partitions, p - 1, ff);
+		else					/* cannot get there */
+			Assert(0);
+
+		executeStatement(con, query);
+	}
+}
+
 /*
  * Create pgbench's standard tables
  */
@@ -3664,9 +3768,15 @@ initCreateTables(PGconn *con)
 
 		/* Construct new create table statement. */
 		opts[0] = '\0';
-		if (ddl->declare_fillfactor)
+
+		/* Partition pgbench_accounts table */
+		if (partitions_method != PART_NONE && strcmp(ddl->table, "pgbench_accounts") == 0)
 			snprintf(opts + strlen(opts), sizeof(opts) - strlen(opts),
-					 " with (fillfactor=%d)", fillfactor);
+					 " partition by %s (aid)", PARTITION_METHOD[partition_method]);
+		else if (ddl->declare_fillfactor)
+			/* fillfactor is only expected on actual tables */
+			append_fillfactor(opts, sizeof(opts));
+
 		if (tablespace != NULL)
 		{
 			char	   *escape_tablespace;
@@ -3686,6 +3796,9 @@ initCreateTables(PGconn *con)
 
 		executeStatement(con, buffer);
 	}
+
+	if (partition_method != PART_NONE)
+		createPartitions(con);
 }
 
 /*
@@ -4919,6 +5032,10 @@ printResults(StatsData *total, instr_time total_time,
 	printf("transaction type: %s\n",
 		   num_scripts == 1 ? sql_script[0].desc : "multiple scripts");
 	printf("scaling factor: %d\n", scale);
+	/* only print partitioning information if some partitioning was detected */
+	if (partition_method != PART_NONE)
+		printf("partition method: %s\npartitions: %d\n",
+			   PARTITION_METHOD[partition_method], partitions);
 	printf("query mode: %s\n", QUERYMODE[querymode]);
 	printf("number of clients: %d\n", nclients);
 	printf("number of threads: %d\n", nthreads);
@@ -5126,6 +5243,8 @@ main(int argc, char **argv)
 		{"foreign-keys", no_argument, NULL, 8},
 		{"random-seed", required_argument, NULL, 9},
 		{"show-script", required_argument, NULL, 10},
+		{"partitions", required_argument, NULL, 11},
+		{"partition-method", required_argument, NULL, 12},
 		{NULL, 0, NULL, 0}
 	};
 
@@ -5486,6 +5605,29 @@ main(int argc, char **argv)
 					exit(0);
 				}
 				break;
+			case 11:			/* partitions */
+				initialization_option_set = true;
+				partitions = atoi(optarg);
+				if (partitions < 0)
+				{
+					fprintf(stderr, "invalid number of partitions: \"%s\"\n",
+							optarg);
+					exit(1);
+				}
+				break;
+			case 12:			/* partition-method */
+				initialization_option_set = true;
+				if (pg_strcasecmp(optarg, "range") == 0)
+					partition_method = PART_RANGE;
+				else if (pg_strcasecmp(optarg, "hash") == 0)
+					partition_method = PART_HASH;
+				else
+				{
+					fprintf(stderr, "invalid partition method, expecting \"range\" or \"hash\","
+							" got: \"%s\"\n", optarg);
+					exit(1);
+				}
+				break;
 			default:
 				fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
 				exit(1);
@@ -5567,6 +5709,16 @@ main(int argc, char **argv)
 			exit(1);
 		}
 
+		if (partitions == 0 && partition_method != PART_NONE)
+		{
+			fprintf(stderr, "--partition-method requires greater than zero --partitions\n");
+			exit(1);
+		}
+
+		/* set default method */
+		if (partitions > 0 && partition_method == PART_NONE)
+			partition_method = PART_RANGE;
+
 		if (initialize_steps == NULL)
 			initialize_steps = pg_strdup(DEFAULT_INIT_STEPS);
 
@@ -5756,6 +5908,79 @@ main(int argc, char **argv)
 			fprintf(stderr,
 					"scale option ignored, using count from pgbench_branches table (%d)\n",
 					scale);
+
+		/*
+		 * Get the partition information for the first "pgbench_accounts" table
+		 * found in search_path.
+		 *
+		 * The result is empty if no "pgbench_accounts" is found.
+		 *
+		 * Otherwise, it always returns one row even if the table is not
+		 * partitioned (in which case the partition strategy is NULL).
+		 *
+		 * The number of partitions can be 0 even for partitioned tables, if no
+		 * partition are attached.
+		 *
+		 * We Assume no partitioning on any failure, so as to avoid failing on
+		 * an old version without "pg_partitioned_table".
+		 */
+		res = PQexec(con,
+					 "select o.n, p.partstrat, pg_catalog.count(i.inhparent) "
+					 "from pg_catalog.pg_class as c "
+					 "join pg_catalog.pg_namespace as n on (n.oid = c.relnamespace) "
+					 "cross join lateral (select pg_catalog.array_position(pg_catalog.current_schemas(true), n.nspname)) as o(n) "
+					 "left join pg_catalog.pg_partitioned_table as p on (p.partrelid = c.oid) "
+					 "left join pg_catalog.pg_inherits as i on (c.oid = i.inhparent) "
+					 "where c.relname = 'pgbench_accounts' and o.n is not null "
+					 "group by 1, 2 "
+					 "order by 1 asc "
+					 "limit 1");
+
+		if (PQresultStatus(res) != PGRES_TUPLES_OK)
+		{
+			/* probably an older version, coldly assume no partitioning */
+			partition_method = PART_NONE;
+			partitions = 0;
+		}
+		else if (PQntuples(res) == 0)
+		{
+			/*
+			 * This case is unlikely as pgbench already found "pgbench_branches"
+			 * above to compute the scale.
+			 */
+			fprintf(stderr,
+					"No pgbench_accounts table found in search_path. "
+					"Perhaps you need to do initialization (\"pgbench -i\") in database \"%s\"\n", PQdb(con));
+			exit(1);
+		}
+		else /* PQntupes(res) == 1 */
+		{
+			/* normal case, extract partition information */
+			if (PQgetisnull(res, 0, 1))
+				partition_method = PART_NONE;
+			else
+			{
+				char	   *ps = PQgetvalue(res, 0, 1);
+
+				/* column must be there */
+				Assert(ps != NULL);
+
+				if (strcmp(ps, "r") == 0)
+					partition_method = PART_RANGE;
+				else if (strcmp(ps, "h") == 0)
+					partition_method = PART_HASH;
+				else
+				{
+					/* possibly a newer version with new partition method */
+					fprintf(stderr, "unexpected partition method: \"%s\"\n", ps);
+					exit(1);
+				}
+			}
+
+			partitions = atoi(PQgetvalue(res, 0, 2));
+		}
+
+		PQclear(res);
 	}
 
 	/*
diff --git a/src/bin/pgbench/t/001_pgbench_with_server.pl b/src/bin/pgbench/t/001_pgbench_with_server.pl
index b82d3f65c4..fb0f6b677d 100644
--- a/src/bin/pgbench/t/001_pgbench_with_server.pl
+++ b/src/bin/pgbench/t/001_pgbench_with_server.pl
@@ -58,6 +58,17 @@ sub pgbench
 	return;
 }
 
+# tablespace for testing
+my $ts = $node->basedir . '/regress_pgbench_tap_1_ts_dir';
+mkdir $ts or die "cannot create directory $ts";
+my $ets = TestLib::perl2host($ts);
+# add needed escaping!
+$ets =~ s/'/''/;
+
+$node->safe_psql('postgres',
+	"CREATE TABLESPACE regress_pgbench_tap_1_ts LOCATION '$ets';"
+);
+
 # Test concurrent OID generation via pg_enum_oid_index.  This indirectly
 # exercises LWLock and spinlock concurrency.
 my $labels = join ',', map { "'l$_'" } 1 .. 1000;
@@ -100,12 +111,13 @@ pgbench(
 
 # Again, with all possible options
 pgbench(
-	'--initialize --init-steps=dtpvg --scale=1 --unlogged-tables --fillfactor=98 --foreign-keys --quiet --tablespace=pg_default --index-tablespace=pg_default',
+	'--initialize --init-steps=dtpvg --scale=1 --unlogged-tables --fillfactor=98 --foreign-keys --quiet --tablespace=regress_pgbench_tap_1_ts --index-tablespace=regress_pgbench_tap_1_ts --partitions=2 --partition-method=hash',
 	0,
 	[qr{^$}i],
 	[
 		qr{dropping old tables},
 		qr{creating tables},
+		qr{creating 2 partitions},
 		qr{vacuuming},
 		qr{creating primary keys},
 		qr{creating foreign keys},
@@ -116,12 +128,13 @@ pgbench(
 
 # Test interaction of --init-steps with legacy step-selection options
 pgbench(
-	'--initialize --init-steps=dtpvgvv --no-vacuum --foreign-keys --unlogged-tables',
+	'--initialize --init-steps=dtpvgvv --no-vacuum --foreign-keys --unlogged-tables --partitions=3',
 	0,
 	[qr{^$}],
 	[
 		qr{dropping old tables},
 		qr{creating tables},
+		qr{creating 3 partitions},
 		qr{creating primary keys},
 		qr{.* of .* tuples \(.*\) done},
 		qr{creating foreign keys},
@@ -909,6 +922,8 @@ pgbench(
 check_pgbench_logs($bdir, '001_pgbench_log_3', 1, 10, 10,
 	qr{^\d \d{1,2} \d+ \d \d+ \d+$});
 
+$node->safe_psql('postgres', 'DROP TABLESPACE regress_pgbench_tap_1_ts');
+
 # done
 $node->stop;
 done_testing();
diff --git a/src/bin/pgbench/t/002_pgbench_no_server.pl b/src/bin/pgbench/t/002_pgbench_no_server.pl
index f7fa18418b..1e9542af3f 100644
--- a/src/bin/pgbench/t/002_pgbench_no_server.pl
+++ b/src/bin/pgbench/t/002_pgbench_no_server.pl
@@ -157,6 +157,13 @@ my @options = (
 			qr{error while setting random seed from --random-seed option}
 		]
 	],
+	[ 'bad partition type', '-i --partition-method=BAD', [qr{"range"}, qr{"hash"}, qr{"BAD"}] ],
+	[ 'bad partition number', '-i --partitions -1', [ qr{invalid number of partitions: "-1"} ] ],
+	[
+		'partition method without partitioning',
+		'-i --partition-method=hash',
+		[ qr{partition-method requires greater than zero --partitions} ]
+	],
 
 	# logging sub-options
 	[

#60

Amit Kapila

amit.kapila16@gmail.com

over 6 years ago

In reply to: Fabien COELHO (#59)

Re: pgbench - allow to create partitioned tables

On Tue, Sep 24, 2019 at 6:59 PM Fabien COELHO <coelho@cri.ensmp.fr> wrote:

For you or me, it might be okay as we have discussed this case, but it
won't be apparent to others. This doesn't buy us much, so it is better
to keep this code consistent with other places that check for
partitions.

Attached uses "partition_method != PART_NONE" consistently, plus an assert
on "partitions > 0" for checking and for triggering the default method at
the end of option processing.

Okay, I think making the check consistent is a step forward.  The
latest patch is not compiling for me.  You have used the wrong
variable name in below line:
+ /* Partition pgbench_accounts table */
+ if (partitions_method != PART_NONE && strcmp(ddl->table,
"pgbench_accounts") == 0)

Another point is:
+ else if (PQntuples(res) == 0)
+ {
+ /*
+ * This case is unlikely as pgbench already found "pgbench_branches"
+ * above to compute the scale.
+ */
+ fprintf(stderr,
+ "No pgbench_accounts table found in search_path. "
+ "Perhaps you need to do initialization (\"pgbench -i\") in database
\"%s\"\n", PQdb(con));

We don't recommend to start messages with a capital letter. See
"Upper Case vs. Lower Case" section in docs [1]https://www.postgresql.org/docs/devel/error-style-guide.html. It is not that we
have not used it anywhere else, but I think we should try to avoid it.

[1]: https://www.postgresql.org/docs/devel/error-style-guide.html

--
With Regards,
Amit Kapila.
EnterpriseDB: http://www.enterprisedb.com

#61

Fabien COELHO

coelho@cri.ensmp.fr

over 6 years ago

In reply to: Amit Kapila (#60)

1 attachment(s)

Re: pgbench - allow to create partitioned tables

Okay, I think making the check consistent is a step forward. The
latest patch is not compiling for me.

Argh, shame on me!

[...] We don't recommend to start messages with a capital letter. See
"Upper Case vs. Lower Case" section in docs [1]. It is not that we have
not used it anywhere else, but I think we should try to avoid it.

Ok.

Patch v17 makes both above changes, compiles and passes pgbench TAP tests
on my laptop.

--
Fabien.

Attachments:

pgbench-init-partitioned-17.patchtext/x-diff; name=pgbench-init-partitioned-17.patchDownload

diff --git a/doc/src/sgml/ref/pgbench.sgml b/doc/src/sgml/ref/pgbench.sgml
index c857aa3cba..e3a0abb4c7 100644
--- a/doc/src/sgml/ref/pgbench.sgml
+++ b/doc/src/sgml/ref/pgbench.sgml
@@ -306,6 +306,31 @@ pgbench <optional> <replaceable>options</replaceable> </optional> <replaceable>d
       </listitem>
      </varlistentry>
 
+     <varlistentry>
+      <term><option>--partitions=<replaceable>NUM</replaceable></option></term>
+      <listitem>
+       <para>
+        Create a partitioned <literal>pgbench_accounts</literal> table with
+        <replaceable>NUM</replaceable> partitions of nearly equal size for
+        the scaled number of accounts.
+        Default is <literal>0</literal>, meaning no partitioning.
+       </para>
+      </listitem>
+     </varlistentry>
+
+     <varlistentry>
+      <term><option>--partition-method=<replaceable>NAME</replaceable></option></term>
+      <listitem>
+       <para>
+        Create a partitioned <literal>pgbench_accounts</literal> table with
+        <replaceable>NAME</replaceable> method.
+        Expected values are <literal>range</literal> or <literal>hash</literal>.
+        This option requires that <option>--partitions</option> is set to non-zero.
+        If unspecified, default is <literal>range</literal>.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry>
       <term><option>--tablespace=<replaceable>tablespace</replaceable></option></term>
       <listitem>
diff --git a/src/bin/pgbench/pgbench.c b/src/bin/pgbench/pgbench.c
index ed7652bfbf..3f3b2b22ca 100644
--- a/src/bin/pgbench/pgbench.c
+++ b/src/bin/pgbench/pgbench.c
@@ -186,6 +186,25 @@ int64		latency_limit = 0;
 char	   *tablespace = NULL;
 char	   *index_tablespace = NULL;
 
+/*
+ * Number of "pgbench_accounts" partitions, found or to create.
+ * When creating, 0 is the default and means no partitioning.
+ * When running, this is the actual number of partitions.
+ */
+static int	partitions = 0;
+
+/* partitioning strategy for "pgbench_accounts" */
+typedef enum
+{
+	PART_NONE,		/* no partitioning */
+	PART_RANGE,	/* range partitioning */
+	PART_HASH		/* hash partitioning */
+}
+			partition_method_t;
+
+static partition_method_t partition_method = PART_NONE;
+static const char *PARTITION_METHOD[] = {"none", "range", "hash"};
+
 /* random seed used to initialize base_random_sequence */
 int64		random_seed = -1;
 
@@ -617,6 +636,9 @@ usage(void)
 		   "  --foreign-keys           create foreign key constraints between tables\n"
 		   "  --index-tablespace=TABLESPACE\n"
 		   "                           create indexes in the specified tablespace\n"
+		   "  --partitions=NUM         partition pgbench_accounts in NUM parts (default: 0)\n"
+		   "  --partition-method=(range|hash)\n"
+		   "                           partition pgbench_accounts with this method (default: range)\n"
 		   "  --tablespace=TABLESPACE  create tables in the specified tablespace\n"
 		   "  --unlogged-tables        create tables as unlogged tables\n"
 		   "\nOptions to select what to run:\n"
@@ -3601,6 +3623,88 @@ initDropTables(PGconn *con)
 					 "pgbench_tellers");
 }
 
+/*
+ * add fillfactor percent option.
+ */
+static void
+append_fillfactor(char *opts, int len)
+{
+	/* as default is 100, it could be removed in this case */
+	snprintf(opts + strlen(opts), len - strlen(opts),
+			 " with (fillfactor=%d)", fillfactor);
+}
+
+/*
+ * Create "pgbench_accounts" partitions if needed.
+ *
+ * This is the larger table of pgbench default tpc-b like schema
+ * with a known size, so that it can be partitioned by range.
+ */
+static void
+createPartitions(PGconn *con)
+{
+	char		ff[64];
+
+	ff[0] = '\0';
+
+	/*
+	 * Per ddlinfo in initCreateTables below, fillfactor is needed on
+	 * table pgbench_accounts.
+	 */
+	append_fillfactor(ff, sizeof(ff));
+
+	/* we must have to create some partitions */
+	Assert(partitions > 0);
+
+	fprintf(stderr, "creating %d partitions...\n", partitions);
+
+	for (int p = 1; p <= partitions; p++)
+	{
+		char		query[256];
+
+		if (partition_method == PART_RANGE)
+		{
+			int64		part_size = (naccounts * (int64) scale + partitions - 1) / partitions;
+			char		minvalue[32],
+						maxvalue[32];
+
+			/*
+			 * For RANGE, we use open-ended partitions at the beginning and
+			 * end to allow any valid value for the primary key.
+			 * Although the actual minimum and maximum values can be derived
+			 * from the scale, it is more generic and the performance is better.
+			 */
+			if (p == 1)
+				sprintf(minvalue, "minvalue");
+			else
+				sprintf(minvalue, INT64_FORMAT, (p - 1) * part_size + 1);
+
+			if (p < partitions)
+				sprintf(maxvalue, INT64_FORMAT, p * part_size + 1);
+			else
+				sprintf(maxvalue, "maxvalue");
+
+			snprintf(query, sizeof(query),
+					 "create%s table pgbench_accounts_%d\n"
+					 "  partition of pgbench_accounts\n"
+					 "  for values from (%s) to (%s)%s\n",
+					 unlogged_tables ? " unlogged" : "", p,
+					 minvalue, maxvalue, ff);
+		}
+		else if (partition_method == PART_HASH)
+			snprintf(query, sizeof(query),
+					 "create%s table pgbench_accounts_%d\n"
+					 "  partition of pgbench_accounts\n"
+					 "  for values with (modulus %d, remainder %d)%s\n",
+					 unlogged_tables ? " unlogged" : "", p,
+					 partitions, p - 1, ff);
+		else					/* cannot get there */
+			Assert(0);
+
+		executeStatement(con, query);
+	}
+}
+
 /*
  * Create pgbench's standard tables
  */
@@ -3664,9 +3768,15 @@ initCreateTables(PGconn *con)
 
 		/* Construct new create table statement. */
 		opts[0] = '\0';
-		if (ddl->declare_fillfactor)
+
+		/* Partition pgbench_accounts table */
+		if (partition_method != PART_NONE && strcmp(ddl->table, "pgbench_accounts") == 0)
 			snprintf(opts + strlen(opts), sizeof(opts) - strlen(opts),
-					 " with (fillfactor=%d)", fillfactor);
+					 " partition by %s (aid)", PARTITION_METHOD[partition_method]);
+		else if (ddl->declare_fillfactor)
+			/* fillfactor is only expected on actual tables */
+			append_fillfactor(opts, sizeof(opts));
+
 		if (tablespace != NULL)
 		{
 			char	   *escape_tablespace;
@@ -3686,6 +3796,9 @@ initCreateTables(PGconn *con)
 
 		executeStatement(con, buffer);
 	}
+
+	if (partition_method != PART_NONE)
+		createPartitions(con);
 }
 
 /*
@@ -4919,6 +5032,10 @@ printResults(StatsData *total, instr_time total_time,
 	printf("transaction type: %s\n",
 		   num_scripts == 1 ? sql_script[0].desc : "multiple scripts");
 	printf("scaling factor: %d\n", scale);
+	/* only print partitioning information if some partitioning was detected */
+	if (partition_method != PART_NONE)
+		printf("partition method: %s\npartitions: %d\n",
+			   PARTITION_METHOD[partition_method], partitions);
 	printf("query mode: %s\n", QUERYMODE[querymode]);
 	printf("number of clients: %d\n", nclients);
 	printf("number of threads: %d\n", nthreads);
@@ -5126,6 +5243,8 @@ main(int argc, char **argv)
 		{"foreign-keys", no_argument, NULL, 8},
 		{"random-seed", required_argument, NULL, 9},
 		{"show-script", required_argument, NULL, 10},
+		{"partitions", required_argument, NULL, 11},
+		{"partition-method", required_argument, NULL, 12},
 		{NULL, 0, NULL, 0}
 	};
 
@@ -5486,6 +5605,29 @@ main(int argc, char **argv)
 					exit(0);
 				}
 				break;
+			case 11:			/* partitions */
+				initialization_option_set = true;
+				partitions = atoi(optarg);
+				if (partitions < 0)
+				{
+					fprintf(stderr, "invalid number of partitions: \"%s\"\n",
+							optarg);
+					exit(1);
+				}
+				break;
+			case 12:			/* partition-method */
+				initialization_option_set = true;
+				if (pg_strcasecmp(optarg, "range") == 0)
+					partition_method = PART_RANGE;
+				else if (pg_strcasecmp(optarg, "hash") == 0)
+					partition_method = PART_HASH;
+				else
+				{
+					fprintf(stderr, "invalid partition method, expecting \"range\" or \"hash\","
+							" got: \"%s\"\n", optarg);
+					exit(1);
+				}
+				break;
 			default:
 				fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
 				exit(1);
@@ -5567,6 +5709,16 @@ main(int argc, char **argv)
 			exit(1);
 		}
 
+		if (partitions == 0 && partition_method != PART_NONE)
+		{
+			fprintf(stderr, "--partition-method requires greater than zero --partitions\n");
+			exit(1);
+		}
+
+		/* set default method */
+		if (partitions > 0 && partition_method == PART_NONE)
+			partition_method = PART_RANGE;
+
 		if (initialize_steps == NULL)
 			initialize_steps = pg_strdup(DEFAULT_INIT_STEPS);
 
@@ -5756,6 +5908,79 @@ main(int argc, char **argv)
 			fprintf(stderr,
 					"scale option ignored, using count from pgbench_branches table (%d)\n",
 					scale);
+
+		/*
+		 * Get the partition information for the first "pgbench_accounts" table
+		 * found in search_path.
+		 *
+		 * The result is empty if no "pgbench_accounts" is found.
+		 *
+		 * Otherwise, it always returns one row even if the table is not
+		 * partitioned (in which case the partition strategy is NULL).
+		 *
+		 * The number of partitions can be 0 even for partitioned tables, if no
+		 * partition are attached.
+		 *
+		 * We Assume no partitioning on any failure, so as to avoid failing on
+		 * an old version without "pg_partitioned_table".
+		 */
+		res = PQexec(con,
+					 "select o.n, p.partstrat, pg_catalog.count(i.inhparent) "
+					 "from pg_catalog.pg_class as c "
+					 "join pg_catalog.pg_namespace as n on (n.oid = c.relnamespace) "
+					 "cross join lateral (select pg_catalog.array_position(pg_catalog.current_schemas(true), n.nspname)) as o(n) "
+					 "left join pg_catalog.pg_partitioned_table as p on (p.partrelid = c.oid) "
+					 "left join pg_catalog.pg_inherits as i on (c.oid = i.inhparent) "
+					 "where c.relname = 'pgbench_accounts' and o.n is not null "
+					 "group by 1, 2 "
+					 "order by 1 asc "
+					 "limit 1");
+
+		if (PQresultStatus(res) != PGRES_TUPLES_OK)
+		{
+			/* probably an older version, coldly assume no partitioning */
+			partition_method = PART_NONE;
+			partitions = 0;
+		}
+		else if (PQntuples(res) == 0)
+		{
+			/*
+			 * This case is unlikely as pgbench already found "pgbench_branches"
+			 * above to compute the scale.
+			 */
+			fprintf(stderr,
+					"no pgbench_accounts table found in search_path\n"
+					"Perhaps you need to do initialization (\"pgbench -i\") in database \"%s\"\n", PQdb(con));
+			exit(1);
+		}
+		else /* PQntupes(res) == 1 */
+		{
+			/* normal case, extract partition information */
+			if (PQgetisnull(res, 0, 1))
+				partition_method = PART_NONE;
+			else
+			{
+				char	   *ps = PQgetvalue(res, 0, 1);
+
+				/* column must be there */
+				Assert(ps != NULL);
+
+				if (strcmp(ps, "r") == 0)
+					partition_method = PART_RANGE;
+				else if (strcmp(ps, "h") == 0)
+					partition_method = PART_HASH;
+				else
+				{
+					/* possibly a newer version with new partition method */
+					fprintf(stderr, "unexpected partition method: \"%s\"\n", ps);
+					exit(1);
+				}
+			}
+
+			partitions = atoi(PQgetvalue(res, 0, 2));
+		}
+
+		PQclear(res);
 	}
 
 	/*
diff --git a/src/bin/pgbench/t/001_pgbench_with_server.pl b/src/bin/pgbench/t/001_pgbench_with_server.pl
index b82d3f65c4..fb0f6b677d 100644
--- a/src/bin/pgbench/t/001_pgbench_with_server.pl
+++ b/src/bin/pgbench/t/001_pgbench_with_server.pl
@@ -58,6 +58,17 @@ sub pgbench
 	return;
 }
 
+# tablespace for testing
+my $ts = $node->basedir . '/regress_pgbench_tap_1_ts_dir';
+mkdir $ts or die "cannot create directory $ts";
+my $ets = TestLib::perl2host($ts);
+# add needed escaping!
+$ets =~ s/'/''/;
+
+$node->safe_psql('postgres',
+	"CREATE TABLESPACE regress_pgbench_tap_1_ts LOCATION '$ets';"
+);
+
 # Test concurrent OID generation via pg_enum_oid_index.  This indirectly
 # exercises LWLock and spinlock concurrency.
 my $labels = join ',', map { "'l$_'" } 1 .. 1000;
@@ -100,12 +111,13 @@ pgbench(
 
 # Again, with all possible options
 pgbench(
-	'--initialize --init-steps=dtpvg --scale=1 --unlogged-tables --fillfactor=98 --foreign-keys --quiet --tablespace=pg_default --index-tablespace=pg_default',
+	'--initialize --init-steps=dtpvg --scale=1 --unlogged-tables --fillfactor=98 --foreign-keys --quiet --tablespace=regress_pgbench_tap_1_ts --index-tablespace=regress_pgbench_tap_1_ts --partitions=2 --partition-method=hash',
 	0,
 	[qr{^$}i],
 	[
 		qr{dropping old tables},
 		qr{creating tables},
+		qr{creating 2 partitions},
 		qr{vacuuming},
 		qr{creating primary keys},
 		qr{creating foreign keys},
@@ -116,12 +128,13 @@ pgbench(
 
 # Test interaction of --init-steps with legacy step-selection options
 pgbench(
-	'--initialize --init-steps=dtpvgvv --no-vacuum --foreign-keys --unlogged-tables',
+	'--initialize --init-steps=dtpvgvv --no-vacuum --foreign-keys --unlogged-tables --partitions=3',
 	0,
 	[qr{^$}],
 	[
 		qr{dropping old tables},
 		qr{creating tables},
+		qr{creating 3 partitions},
 		qr{creating primary keys},
 		qr{.* of .* tuples \(.*\) done},
 		qr{creating foreign keys},
@@ -909,6 +922,8 @@ pgbench(
 check_pgbench_logs($bdir, '001_pgbench_log_3', 1, 10, 10,
 	qr{^\d \d{1,2} \d+ \d \d+ \d+$});
 
+$node->safe_psql('postgres', 'DROP TABLESPACE regress_pgbench_tap_1_ts');
+
 # done
 $node->stop;
 done_testing();
diff --git a/src/bin/pgbench/t/002_pgbench_no_server.pl b/src/bin/pgbench/t/002_pgbench_no_server.pl
index f7fa18418b..1e9542af3f 100644
--- a/src/bin/pgbench/t/002_pgbench_no_server.pl
+++ b/src/bin/pgbench/t/002_pgbench_no_server.pl
@@ -157,6 +157,13 @@ my @options = (
 			qr{error while setting random seed from --random-seed option}
 		]
 	],
+	[ 'bad partition type', '-i --partition-method=BAD', [qr{"range"}, qr{"hash"}, qr{"BAD"}] ],
+	[ 'bad partition number', '-i --partitions -1', [ qr{invalid number of partitions: "-1"} ] ],
+	[
+		'partition method without partitioning',
+		'-i --partition-method=hash',
+		[ qr{partition-method requires greater than zero --partitions} ]
+	],
 
 	# logging sub-options
 	[

#62

Alvaro Herrera

alvherre@2ndquadrant.com

over 6 years ago

In reply to: Fabien COELHO (#61)

Re: pgbench - allow to create partitioned tables

pgbench's main() is overly long already, and the new code being added
seems to pollute it even more. Can we split it out into a static
function that gets placed, say, just below disconnect_all() or maybe
after runInitSteps?

(Also, we seem to be afraid of function prototypes. Why not move the
append_fillfactor() to *below* the functions that use it?)

--
ï¿½lvaro Herrera https://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services

#63

Fabien COELHO

coelho@cri.ensmp.fr

over 6 years ago

In reply to: Alvaro Herrera (#62)

Re: pgbench - allow to create partitioned tables

Hello Alvaro,

pgbench's main() is overly long already, and the new code being added
seems to pollute it even more. Can we split it out into a static
function that gets placed, say, just below disconnect_all() or maybe
after runInitSteps?

I agree that refactoring is a good idea, but I do not think it belongs to
this patch. The file is pretty long too, probably some functions could be
moved to distinct files (eg expression evaluation, variable management,
...).

(Also, we seem to be afraid of function prototypes. Why not move the
append_fillfactor() to *below* the functions that use it?)

Because we avoid one more line for the function prototype? I try to put
functions in def/use order if possible, especially for small functions
like this one.

--
Fabien.

#64

Alvaro Herrera

alvherre@2ndquadrant.com

over 6 years ago

In reply to: Fabien COELHO (#63)

Re: pgbench - allow to create partitioned tables

On 2019-Sep-26, Fabien COELHO wrote:

Hello Alvaro,

pgbench's main() is overly long already, and the new code being added
seems to pollute it even more. Can we split it out into a static
function that gets placed, say, just below disconnect_all() or maybe
after runInitSteps?

I agree that refactoring is a good idea, but I do not think it belongs to
this patch. The file is pretty long too, probably some functions could be
moved to distinct files (eg expression evaluation, variable management,
...).

I'm not suggesting to refactor anything as part of this patch -- just
that instead of adding that new code to main(), you create a new
function for it.

(Also, we seem to be afraid of function prototypes. Why not move the
append_fillfactor() to *below* the functions that use it?)

Because we avoid one more line for the function prototype? I try to put
functions in def/use order if possible, especially for small functions like
this one.

I can see that ... I used to do that too. But nowadays I think it's
less messy to put important stuff first, secondary uninteresting stuff
later. So I suggest to move that new function so that it appears below
the code that uses it. Not a big deal anyhow.

--
ï¿½lvaro Herrera https://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services

#65

Amit Kapila

amit.kapila16@gmail.com

over 6 years ago

In reply to: Alvaro Herrera (#64)

Re: pgbench - allow to create partitioned tables

On Fri, Sep 27, 2019 at 2:36 AM Alvaro Herrera <alvherre@2ndquadrant.com> wrote:

On 2019-Sep-26, Fabien COELHO wrote:

pgbench's main() is overly long already, and the new code being added
seems to pollute it even more. Can we split it out into a static
function that gets placed, say, just below disconnect_all() or maybe
after runInitSteps?

I agree that refactoring is a good idea, but I do not think it belongs to
this patch. The file is pretty long too, probably some functions could be
moved to distinct files (eg expression evaluation, variable management,
...).

I'm not suggesting to refactor anything as part of this patch -- just
that instead of adding that new code to main(), you create a new
function for it.

(Also, we seem to be afraid of function prototypes. Why not move the
append_fillfactor() to *below* the functions that use it?)

Because we avoid one more line for the function prototype? I try to put
functions in def/use order if possible, especially for small functions like
this one.

I can see that ... I used to do that too. But nowadays I think it's
less messy to put important stuff first, secondary uninteresting stuff
later. So I suggest to move that new function so that it appears below
the code that uses it. Not a big deal anyhow.

Thanks, Alvaro, both seem like good suggestions to me. However, there
are a few more things where your feedback can help:
a. With new options, we will partition pgbench_accounts and the
reason is that because that is the largest table. Do we need to be
explicit about the reason in docs?
b. I am not comfortable with test modification in
001_pgbench_with_server.pl. Basically, it doesn't seem like we should
modify the existing test to use non-default tablespaces as part of
this patch. It might be a good idea in general, but I am not sure
doing as part of this patch is a good idea as there is no big value
addition with that modification as far as this patch is concerned.
OTOH, as such there is no harm in testing with non-default
tablespaces.

The other thing is that the query used in patch to fetch partition
information seems correct to me, but maybe there is a better way to
get that information.

--
With Regards,
Amit Kapila.
EnterpriseDB: http://www.enterprisedb.com

#66

Alvaro Herrera

alvherre@2ndquadrant.com

over 6 years ago

In reply to: Amit Kapila (#65)

Re: pgbench - allow to create partitioned tables

On 2019-Sep-27, Amit Kapila wrote:

Thanks, Alvaro, both seem like good suggestions to me. However, there
are a few more things where your feedback can help:
a. With new options, we will partition pgbench_accounts and the
reason is that because that is the largest table. Do we need to be
explicit about the reason in docs?

Hmm, I would document what is it that we do, and stop there without
explaining why. Unless you have concrete reasons to want the reason
documented?

b. I am not comfortable with test modification in
001_pgbench_with_server.pl. Basically, it doesn't seem like we should
modify the existing test to use non-default tablespaces as part of
this patch. It might be a good idea in general, but I am not sure
doing as part of this patch is a good idea as there is no big value
addition with that modification as far as this patch is concerned.
OTOH, as such there is no harm in testing with non-default
tablespaces.

Yeah, this change certainly is out of place in this patch.

The other thing is that the query used in patch to fetch partition
information seems correct to me, but maybe there is a better way to
get that information.

I hadn't looked at that, but yeah it seems that it should be using
pg_partition_tree().

--
ï¿½lvaro Herrera https://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services

#67

Amit Kapila

amit.kapila16@gmail.com

over 6 years ago

In reply to: Alvaro Herrera (#66)

Re: pgbench - allow to create partitioned tables

On Fri, Sep 27, 2019 at 7:05 PM Alvaro Herrera <alvherre@2ndquadrant.com> wrote:

On 2019-Sep-27, Amit Kapila wrote:

The other thing is that the query used in patch to fetch partition
information seems correct to me, but maybe there is a better way to
get that information.

I hadn't looked at that, but yeah it seems that it should be using
pg_partition_tree().

I think we might also need to use pg_get_partkeydef along with
pg_partition_tree to fetch the partition method information. However,
I think to find reloid of pgbench_accounts in the current search path,
we might need to use some part of query constructed by Fabien.

Fabien, what do you think about Alvaro's suggestion?

--
With Regards,
Amit Kapila.
EnterpriseDB: http://www.enterprisedb.com

#68

Fabien COELHO

coelho@cri.ensmp.fr

over 6 years ago

In reply to: Amit Kapila (#67)

1 attachment(s)

Re: pgbench - allow to create partitioned tables

Hello Amit,

I think we might also need to use pg_get_partkeydef along with
pg_partition_tree to fetch the partition method information. However,
I think to find reloid of pgbench_accounts in the current search path,
we might need to use some part of query constructed by Fabien.

Fabien, what do you think about Alvaro's suggestion?

I think that the current straightforward SQL query is and works fine, and
I find it pretty elegant. No doubt other solutions could be implemented to
the same effect, with SQL or possibly through introspection functions.

Incidentally, ISTM that "pg_partition_tree" appears in v12, while
partitions exist in v11, so it would break uselessly backward
compatibility of the feature which currently work with v11, which I do not
find desirable.

Attached v18:
- remove the test tablespace
I had to work around a strange issue around partitioned tables and
the default tablespace.
- creates a separate function for setting scale, partitions and
partition_method

--
Fabien.

Attachments:

pgbench-init-partitioned-18.patchtext/x-diff; name=pgbench-init-partitioned-18.patchDownload

diff --git a/doc/src/sgml/ref/pgbench.sgml b/doc/src/sgml/ref/pgbench.sgml
index c857aa3cba..e3a0abb4c7 100644
--- a/doc/src/sgml/ref/pgbench.sgml
+++ b/doc/src/sgml/ref/pgbench.sgml
@@ -306,6 +306,31 @@ pgbench <optional> <replaceable>options</replaceable> </optional> <replaceable>d
       </listitem>
      </varlistentry>
 
+     <varlistentry>
+      <term><option>--partitions=<replaceable>NUM</replaceable></option></term>
+      <listitem>
+       <para>
+        Create a partitioned <literal>pgbench_accounts</literal> table with
+        <replaceable>NUM</replaceable> partitions of nearly equal size for
+        the scaled number of accounts.
+        Default is <literal>0</literal>, meaning no partitioning.
+       </para>
+      </listitem>
+     </varlistentry>
+
+     <varlistentry>
+      <term><option>--partition-method=<replaceable>NAME</replaceable></option></term>
+      <listitem>
+       <para>
+        Create a partitioned <literal>pgbench_accounts</literal> table with
+        <replaceable>NAME</replaceable> method.
+        Expected values are <literal>range</literal> or <literal>hash</literal>.
+        This option requires that <option>--partitions</option> is set to non-zero.
+        If unspecified, default is <literal>range</literal>.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry>
       <term><option>--tablespace=<replaceable>tablespace</replaceable></option></term>
       <listitem>
diff --git a/src/bin/pgbench/pgbench.c b/src/bin/pgbench/pgbench.c
index ed7652bfbf..c2a6bb9f15 100644
--- a/src/bin/pgbench/pgbench.c
+++ b/src/bin/pgbench/pgbench.c
@@ -186,6 +186,25 @@ int64		latency_limit = 0;
 char	   *tablespace = NULL;
 char	   *index_tablespace = NULL;
 
+/*
+ * Number of "pgbench_accounts" partitions, found or to create.
+ * When creating, 0 is the default and means no partitioning.
+ * When running, this is the actual number of partitions.
+ */
+static int	partitions = 0;
+
+/* partitioning strategy for "pgbench_accounts" */
+typedef enum
+{
+	PART_NONE,		/* no partitioning */
+	PART_RANGE,	/* range partitioning */
+	PART_HASH		/* hash partitioning */
+}
+			partition_method_t;
+
+static partition_method_t partition_method = PART_NONE;
+static const char *PARTITION_METHOD[] = {"none", "range", "hash"};
+
 /* random seed used to initialize base_random_sequence */
 int64		random_seed = -1;
 
@@ -617,6 +636,9 @@ usage(void)
 		   "  --foreign-keys           create foreign key constraints between tables\n"
 		   "  --index-tablespace=TABLESPACE\n"
 		   "                           create indexes in the specified tablespace\n"
+		   "  --partitions=NUM         partition pgbench_accounts in NUM parts (default: 0)\n"
+		   "  --partition-method=(range|hash)\n"
+		   "                           partition pgbench_accounts with this method (default: range)\n"
 		   "  --tablespace=TABLESPACE  create tables in the specified tablespace\n"
 		   "  --unlogged-tables        create tables as unlogged tables\n"
 		   "\nOptions to select what to run:\n"
@@ -3601,6 +3623,89 @@ initDropTables(PGconn *con)
 					 "pgbench_tellers");
 }
 
+/*
+ * add fillfactor percent option.
+ *
+ * As default is 100, it could be removed in this case.
+ */
+static void
+append_fillfactor(char *opts, int len)
+{
+	snprintf(opts + strlen(opts), len - strlen(opts),
+			 " with (fillfactor=%d)", fillfactor);
+}
+
+/*
+ * Create "pgbench_accounts" partitions if needed.
+ *
+ * This is the larger table of pgbench default tpc-b like schema
+ * with a known size, so that it can be partitioned by range.
+ */
+static void
+createPartitions(PGconn *con)
+{
+	char		ff[64];
+
+	ff[0] = '\0';
+
+	/*
+	 * Per ddlinfo in initCreateTables below, fillfactor is needed on
+	 * table pgbench_accounts.
+	 */
+	append_fillfactor(ff, sizeof(ff));
+
+	/* we must have to create some partitions */
+	Assert(partitions > 0);
+
+	fprintf(stderr, "creating %d partitions...\n", partitions);
+
+	for (int p = 1; p <= partitions; p++)
+	{
+		char		query[256];
+
+		if (partition_method == PART_RANGE)
+		{
+			int64		part_size = (naccounts * (int64) scale + partitions - 1) / partitions;
+			char		minvalue[32],
+						maxvalue[32];
+
+			/*
+			 * For RANGE, we use open-ended partitions at the beginning and
+			 * end to allow any valid value for the primary key.
+			 * Although the actual minimum and maximum values can be derived
+			 * from the scale, it is more generic and the performance is better.
+			 */
+			if (p == 1)
+				sprintf(minvalue, "minvalue");
+			else
+				sprintf(minvalue, INT64_FORMAT, (p - 1) * part_size + 1);
+
+			if (p < partitions)
+				sprintf(maxvalue, INT64_FORMAT, p * part_size + 1);
+			else
+				sprintf(maxvalue, "maxvalue");
+
+			snprintf(query, sizeof(query),
+					 "create%s table pgbench_accounts_%d\n"
+					 "  partition of pgbench_accounts\n"
+					 "  for values from (%s) to (%s)%s\n",
+					 unlogged_tables ? " unlogged" : "", p,
+					 minvalue, maxvalue, ff);
+		}
+		else if (partition_method == PART_HASH)
+			snprintf(query, sizeof(query),
+					 "create%s table pgbench_accounts_%d\n"
+					 "  partition of pgbench_accounts\n"
+					 "  for values with (modulus %d, remainder %d)%s\n",
+					 unlogged_tables ? " unlogged" : "", p,
+					 partitions, p - 1, ff);
+		else					/* cannot get there */
+			Assert(0);
+
+		executeStatement(con, query);
+	}
+}
+
 /*
  * Create pgbench's standard tables
  */
@@ -3664,10 +3769,16 @@ initCreateTables(PGconn *con)
 
 		/* Construct new create table statement. */
 		opts[0] = '\0';
-		if (ddl->declare_fillfactor)
+
+		/* Partition pgbench_accounts table */
+		if (partition_method != PART_NONE && strcmp(ddl->table, "pgbench_accounts") == 0)
 			snprintf(opts + strlen(opts), sizeof(opts) - strlen(opts),
-					 " with (fillfactor=%d)", fillfactor);
-		if (tablespace != NULL)
+					 " partition by %s (aid)", PARTITION_METHOD[partition_method]);
+		else if (ddl->declare_fillfactor)
+			/* fillfactor is only expected on actual tables */
+			append_fillfactor(opts, sizeof(opts));
+
+		if (tablespace != NULL && strcmp(tablespace, "pg_default") != 0)
 		{
 			char	   *escape_tablespace;
 
@@ -3686,6 +3797,9 @@ initCreateTables(PGconn *con)
 
 		executeStatement(con, buffer);
 	}
+
+	if (partition_method != PART_NONE)
+		createPartitions(con);
 }
 
 /*
@@ -3859,7 +3973,7 @@ initCreatePKeys(PGconn *con)
 
 		strlcpy(buffer, DDLINDEXes[i], sizeof(buffer));
 
-		if (index_tablespace != NULL)
+		if (index_tablespace != NULL && strcmp(index_tablespace, "pg_default") != 0)
 		{
 			char	   *escape_tablespace;
 
@@ -4919,6 +5033,10 @@ printResults(StatsData *total, instr_time total_time,
 	printf("transaction type: %s\n",
 		   num_scripts == 1 ? sql_script[0].desc : "multiple scripts");
 	printf("scaling factor: %d\n", scale);
+	/* only print partitioning information if some partitioning was detected */
+	if (partition_method != PART_NONE)
+		printf("partition method: %s\npartitions: %d\n",
+			   PARTITION_METHOD[partition_method], partitions);
 	printf("query mode: %s\n", QUERYMODE[querymode]);
 	printf("number of clients: %d\n", nclients);
 	printf("number of threads: %d\n", nthreads);
@@ -5083,6 +5201,122 @@ set_random_seed(const char *seed)
 	return true;
 }
 
+/*
+ * Extract pgbench table informations into global variables scale,
+ * partition_method and partitions.
+ */
+static void
+GetTableInfo(PGconn *con, bool scale_given)
+{
+	PGresult	*res;
+
+	/*
+	 * get the scaling factor that should be same as count(*) from
+	 * pgbench_branches if this is not a custom query
+	 */
+	res = PQexec(con, "select count(*) from pgbench_branches");
+	if (PQresultStatus(res) != PGRES_TUPLES_OK)
+	{
+		char	   *sqlState = PQresultErrorField(res, PG_DIAG_SQLSTATE);
+
+		fprintf(stderr, "%s", PQerrorMessage(con));
+		if (sqlState && strcmp(sqlState, ERRCODE_UNDEFINED_TABLE) == 0)
+		{
+			fprintf(stderr, "Perhaps you need to do initialization (\"pgbench -i\") in database \"%s\"\n", PQdb(con));
+		}
+
+		exit(1);
+	}
+	scale = atoi(PQgetvalue(res, 0, 0));
+	if (scale < 0)
+	{
+		fprintf(stderr, "invalid count(*) from pgbench_branches: \"%s\"\n",
+				PQgetvalue(res, 0, 0));
+		exit(1);
+	}
+	PQclear(res);
+
+	/* warn if we override user-given -s switch */
+	if (scale_given)
+		fprintf(stderr,
+				"scale option ignored, using count from pgbench_branches table (%d)\n",
+				scale);
+
+	/*
+	 * Get the partition information for the first "pgbench_accounts" table
+	 * found in search_path.
+	 *
+	 * The result is empty if no "pgbench_accounts" is found.
+	 *
+	 * Otherwise, it always returns one row even if the table is not
+	 * partitioned (in which case the partition strategy is NULL).
+	 *
+	 * The number of partitions can be 0 even for partitioned tables, if no
+	 * partition are attached.
+	 *
+	 * We Assume no partitioning on any failure, so as to avoid failing on
+	 * an old version without "pg_partitioned_table".
+	 */
+	res = PQexec(con,
+				 "select o.n, p.partstrat, pg_catalog.count(i.inhparent) "
+				 "from pg_catalog.pg_class as c "
+				 "join pg_catalog.pg_namespace as n on (n.oid = c.relnamespace) "
+				 "cross join lateral (select pg_catalog.array_position(pg_catalog.current_schemas(true), n.nspname)) as o(n) "
+				 "left join pg_catalog.pg_partitioned_table as p on (p.partrelid = c.oid) "
+				 "left join pg_catalog.pg_inherits as i on (c.oid = i.inhparent) "
+				 "where c.relname = 'pgbench_accounts' and o.n is not null "
+				 "group by 1, 2 "
+				 "order by 1 asc "
+				 "limit 1");
+
+	if (PQresultStatus(res) != PGRES_TUPLES_OK)
+	{
+		/* probably an older version, coldly assume no partitioning */
+		partition_method = PART_NONE;
+		partitions = 0;
+	}
+	else if (PQntuples(res) == 0)
+	{
+		/*
+		 * This case is unlikely as pgbench already found "pgbench_branches"
+		 * above to compute the scale.
+		 */
+		fprintf(stderr,
+				"no pgbench_accounts table found in search_path\n"
+				"Perhaps you need to do initialization (\"pgbench -i\") in database \"%s\"\n", PQdb(con));
+		exit(1);
+	}
+	else /* PQntupes(res) == 1 */
+	{
+		/* normal case, extract partition information */
+		if (PQgetisnull(res, 0, 1))
+			partition_method = PART_NONE;
+		else
+		{
+			char	   *ps = PQgetvalue(res, 0, 1);
+
+			/* column must be there */
+			Assert(ps != NULL);
+
+			if (strcmp(ps, "r") == 0)
+				partition_method = PART_RANGE;
+			else if (strcmp(ps, "h") == 0)
+				partition_method = PART_HASH;
+			else
+			{
+				/* possibly a newer version with new partition method */
+				fprintf(stderr, "unexpected partition method: \"%s\"\n", ps);
+				exit(1);
+			}
+		}
+
+		partitions = atoi(PQgetvalue(res, 0, 2));
+	}
+
+	PQclear(res);
+}
+
+
 int
 main(int argc, char **argv)
 {
@@ -5126,6 +5360,8 @@ main(int argc, char **argv)
 		{"foreign-keys", no_argument, NULL, 8},
 		{"random-seed", required_argument, NULL, 9},
 		{"show-script", required_argument, NULL, 10},
+		{"partitions", required_argument, NULL, 11},
+		{"partition-method", required_argument, NULL, 12},
 		{NULL, 0, NULL, 0}
 	};
 
@@ -5160,7 +5396,6 @@ main(int argc, char **argv)
 #endif
 
 	PGconn	   *con;
-	PGresult   *res;
 	char	   *env;
 
 	int			exit_code = 0;
@@ -5486,6 +5721,29 @@ main(int argc, char **argv)
 					exit(0);
 				}
 				break;
+			case 11:			/* partitions */
+				initialization_option_set = true;
+				partitions = atoi(optarg);
+				if (partitions < 0)
+				{
+					fprintf(stderr, "invalid number of partitions: \"%s\"\n",
+							optarg);
+					exit(1);
+				}
+				break;
+			case 12:			/* partition-method */
+				initialization_option_set = true;
+				if (pg_strcasecmp(optarg, "range") == 0)
+					partition_method = PART_RANGE;
+				else if (pg_strcasecmp(optarg, "hash") == 0)
+					partition_method = PART_HASH;
+				else
+				{
+					fprintf(stderr, "invalid partition method, expecting \"range\" or \"hash\","
+							" got: \"%s\"\n", optarg);
+					exit(1);
+				}
+				break;
 			default:
 				fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
 				exit(1);
@@ -5567,6 +5825,16 @@ main(int argc, char **argv)
 			exit(1);
 		}
 
+		if (partitions == 0 && partition_method != PART_NONE)
+		{
+			fprintf(stderr, "--partition-method requires greater than zero --partitions\n");
+			exit(1);
+		}
+
+		/* set default method */
+		if (partitions > 0 && partition_method == PART_NONE)
+			partition_method = PART_RANGE;
+
 		if (initialize_steps == NULL)
 			initialize_steps = pg_strdup(DEFAULT_INIT_STEPS);
 
@@ -5724,39 +5992,7 @@ main(int argc, char **argv)
 	}
 
 	if (internal_script_used)
-	{
-		/*
-		 * get the scaling factor that should be same as count(*) from
-		 * pgbench_branches if this is not a custom query
-		 */
-		res = PQexec(con, "select count(*) from pgbench_branches");
-		if (PQresultStatus(res) != PGRES_TUPLES_OK)
-		{
-			char	   *sqlState = PQresultErrorField(res, PG_DIAG_SQLSTATE);
-
-			fprintf(stderr, "%s", PQerrorMessage(con));
-			if (sqlState && strcmp(sqlState, ERRCODE_UNDEFINED_TABLE) == 0)
-			{
-				fprintf(stderr, "Perhaps you need to do initialization (\"pgbench -i\") in database \"%s\"\n", PQdb(con));
-			}
-
-			exit(1);
-		}
-		scale = atoi(PQgetvalue(res, 0, 0));
-		if (scale < 0)
-		{
-			fprintf(stderr, "invalid count(*) from pgbench_branches: \"%s\"\n",
-					PQgetvalue(res, 0, 0));
-			exit(1);
-		}
-		PQclear(res);
-
-		/* warn if we override user-given -s switch */
-		if (scale_given)
-			fprintf(stderr,
-					"scale option ignored, using count from pgbench_branches table (%d)\n",
-					scale);
-	}
+		GetTableInfo(con, scale_given);
 
 	/*
 	 * :scale variables normally get -s or database scale, but don't override
diff --git a/src/bin/pgbench/t/001_pgbench_with_server.pl b/src/bin/pgbench/t/001_pgbench_with_server.pl
index b82d3f65c4..473f312962 100644
--- a/src/bin/pgbench/t/001_pgbench_with_server.pl
+++ b/src/bin/pgbench/t/001_pgbench_with_server.pl
@@ -100,12 +100,13 @@ pgbench(
 
 # Again, with all possible options
 pgbench(
-	'--initialize --init-steps=dtpvg --scale=1 --unlogged-tables --fillfactor=98 --foreign-keys --quiet --tablespace=pg_default --index-tablespace=pg_default',
+	'--initialize --init-steps=dtpvg --scale=1 --unlogged-tables --fillfactor=98 --foreign-keys --quiet --tablespace=pg_default --index-tablespace=pg_default --partitions=2 --partition-method=hash',
 	0,
 	[qr{^$}i],
 	[
 		qr{dropping old tables},
 		qr{creating tables},
+		qr{creating 2 partitions},
 		qr{vacuuming},
 		qr{creating primary keys},
 		qr{creating foreign keys},
@@ -116,12 +117,13 @@ pgbench(
 
 # Test interaction of --init-steps with legacy step-selection options
 pgbench(
-	'--initialize --init-steps=dtpvgvv --no-vacuum --foreign-keys --unlogged-tables',
+	'--initialize --init-steps=dtpvgvv --no-vacuum --foreign-keys --unlogged-tables --partitions=3',
 	0,
 	[qr{^$}],
 	[
 		qr{dropping old tables},
 		qr{creating tables},
+		qr{creating 3 partitions},
 		qr{creating primary keys},
 		qr{.* of .* tuples \(.*\) done},
 		qr{creating foreign keys},
diff --git a/src/bin/pgbench/t/002_pgbench_no_server.pl b/src/bin/pgbench/t/002_pgbench_no_server.pl
index f7fa18418b..1e9542af3f 100644
--- a/src/bin/pgbench/t/002_pgbench_no_server.pl
+++ b/src/bin/pgbench/t/002_pgbench_no_server.pl
@@ -157,6 +157,13 @@ my @options = (
 			qr{error while setting random seed from --random-seed option}
 		]
 	],
+	[ 'bad partition type', '-i --partition-method=BAD', [qr{"range"}, qr{"hash"}, qr{"BAD"}] ],
+	[ 'bad partition number', '-i --partitions -1', [ qr{invalid number of partitions: "-1"} ] ],
+	[
+		'partition method without partitioning',
+		'-i --partition-method=hash',
+		[ qr{partition-method requires greater than zero --partitions} ]
+	],
 
 	# logging sub-options
 	[

#69

Amit Kapila

amit.kapila16@gmail.com

over 6 years ago

In reply to: Fabien COELHO (#68)

Re: pgbench - allow to create partitioned tables

On Sat, Sep 28, 2019 at 11:41 AM Fabien COELHO <coelho@cri.ensmp.fr> wrote:

Hello Amit,

I think we might also need to use pg_get_partkeydef along with
pg_partition_tree to fetch the partition method information. However,
I think to find reloid of pgbench_accounts in the current search path,
we might need to use some part of query constructed by Fabien.

Fabien, what do you think about Alvaro's suggestion?

I think that the current straightforward SQL query is and works fine, and
I find it pretty elegant. No doubt other solutions could be implemented to
the same effect, with SQL or possibly through introspection functions.

Incidentally, ISTM that "pg_partition_tree" appears in v12, while
partitions exist in v11, so it would break uselessly backward
compatibility of the feature which currently work with v11, which I do not
find desirable.

Fair enough. Alvaro, do let us know if you think this can be
simplified? I think even if we find some better way to get that
information as compare to what Fabien has done here, we can change it
later without any impact.

Attached v18:
- remove the test tablespace
I had to work around a strange issue around partitioned tables and
the default tablespace.

- if (tablespace != NULL)
+
+ if (tablespace != NULL && strcmp(tablespace, "pg_default") != 0)
  {

- if (index_tablespace != NULL)
+ if (index_tablespace != NULL && strcmp(index_tablespace, "pg_default") != 0)

I don't think such a workaround is a good idea for two reasons (a)
having check on the name ("pg_default") is not advisable, we should
get the tablespace oid and then check if it is same as
DEFAULTTABLESPACE_OID, (b) this will change something which was
previously allowed i.e. to append default tablespace name for the
non-partitioned tables.

I don't think we need any such check, rather if the user gives
default_tablespace with 'partitions' option, then let it fail with an
error "cannot specify default tablespace for partitioned relations".
If we do that then one of the modified pgbench tests will start
failing. I think we have two options here:

(a) Don't test partitions with "all possible options" test and add a
comment on why we are not testing it there.
(b) Create a non-default tablespace to test partitions with "all
possible options" test as you have in your previous version. Also,
add a comment explaining why in that test we are using non-default
tablespace.

I am leaning towards approach (b) unless you and or Alvaro feels (a)
is good for now or if you have some other idea.

If we want to go with option (b), I have small comment in your previous test:
+# tablespace for testing
+my $ts = $node->basedir . '/regress_pgbench_tap_1_ts_dir';
+mkdir $ts or die "cannot create directory $ts";
+my $ets = TestLib::perl2host($ts);
+# add needed escaping!
+$ets =~ s/'/''/;

I am not sure if we really need this quote skipping stuff. Why can't
we write the test as below:

# tablespace for testing
my $basedir = $node->basedir;
my $ts = "$basedir/regress_pgbench_tap_1_ts_dir";
mkdir $ts or die "cannot create directory $ts";
$ts = TestLib::perl2host($ts);
$node->safe_psql('postgres',
"CREATE TABLESPACE regress_pgbench_tap_1_ts LOCATION '$ets';"

--
With Regards,
Amit Kapila.
EnterpriseDB: http://www.enterprisedb.com

#70

Fabien COELHO

coelho@cri.ensmp.fr

over 6 years ago

In reply to: Amit Kapila (#69)

1 attachment(s)

Re: pgbench - allow to create partitioned tables

Hello Amit,

Attached v18:
- remove the test tablespace
I had to work around a strange issue around partitioned tables and
the default tablespace.
- if (tablespace != NULL)
+ if (tablespace != NULL && strcmp(tablespace, "pg_default") != 0)
[...]

I don't think we need any such check, rather if the user gives
default_tablespace with 'partitions' option, then let it fail with an
error "cannot specify default tablespace for partitioned relations".

That is the one I wanted to avoid, which is triggered by TAP tests, but
I'm fine with putting back a tablespace. Given partitioned table strange
constraints, ISTM desirable to check that it works with options such as
tablespace and fillfactor.

(b) Create a non-default tablespace to test partitions with "all
possible options" test as you have in your previous version.

Also, add a comment explaining why in that test we are using non-default
tablespace.

I am leaning towards approach (b) unless you and or Alvaro feels (a)
is good for now or if you have some other idea.

No other idea. I put back the tablespace creation which I just removed,
with comments about why it is there.

If we want to go with option (b), I have small comment in your previous test:
+# tablespace for testing
+my $ts = $node->basedir . '/regress_pgbench_tap_1_ts_dir';
+mkdir $ts or die "cannot create directory $ts";
+my $ets = TestLib::perl2host($ts);
+# add needed escaping!
+$ets =~ s/'/''/;
I am not sure if we really need this quote skipping stuff. Why can't
we write the test as below:

# tablespace for testing
my $basedir = $node->basedir;
my $ts = "$basedir/regress_pgbench_tap_1_ts_dir";
mkdir $ts or die "cannot create directory $ts";
$ts = TestLib::perl2host($ts);
$node->safe_psql('postgres',
"CREATE TABLESPACE regress_pgbench_tap_1_ts LOCATION '$ets';"

I think that this last command fails if the path contains a "'", so the
'-escaping is necessary. I had to make changes in TAP tests before because
it was not working when the path was a little bit strange, so now I'm
careful.

Attached v19:
- put back a local tablespace plus comments
- remove the pg_default doubtful workaround.

--
Fabien.

Attachments:

pgbench-init-partitioned-19.patchtext/x-diff; name=pgbench-init-partitioned-19.patchDownload

diff --git a/doc/src/sgml/ref/pgbench.sgml b/doc/src/sgml/ref/pgbench.sgml
index c857aa3cba..e3a0abb4c7 100644
--- a/doc/src/sgml/ref/pgbench.sgml
+++ b/doc/src/sgml/ref/pgbench.sgml
@@ -306,6 +306,31 @@ pgbench <optional> <replaceable>options</replaceable> </optional> <replaceable>d
       </listitem>
      </varlistentry>
 
+     <varlistentry>
+      <term><option>--partitions=<replaceable>NUM</replaceable></option></term>
+      <listitem>
+       <para>
+        Create a partitioned <literal>pgbench_accounts</literal> table with
+        <replaceable>NUM</replaceable> partitions of nearly equal size for
+        the scaled number of accounts.
+        Default is <literal>0</literal>, meaning no partitioning.
+       </para>
+      </listitem>
+     </varlistentry>
+
+     <varlistentry>
+      <term><option>--partition-method=<replaceable>NAME</replaceable></option></term>
+      <listitem>
+       <para>
+        Create a partitioned <literal>pgbench_accounts</literal> table with
+        <replaceable>NAME</replaceable> method.
+        Expected values are <literal>range</literal> or <literal>hash</literal>.
+        This option requires that <option>--partitions</option> is set to non-zero.
+        If unspecified, default is <literal>range</literal>.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry>
       <term><option>--tablespace=<replaceable>tablespace</replaceable></option></term>
       <listitem>
diff --git a/src/bin/pgbench/pgbench.c b/src/bin/pgbench/pgbench.c
index ed7652bfbf..d71e38b8a8 100644
--- a/src/bin/pgbench/pgbench.c
+++ b/src/bin/pgbench/pgbench.c
@@ -186,6 +186,25 @@ int64		latency_limit = 0;
 char	   *tablespace = NULL;
 char	   *index_tablespace = NULL;
 
+/*
+ * Number of "pgbench_accounts" partitions, found or to create.
+ * When creating, 0 is the default and means no partitioning.
+ * When running, this is the actual number of partitions.
+ */
+static int	partitions = 0;
+
+/* partitioning strategy for "pgbench_accounts" */
+typedef enum
+{
+	PART_NONE,		/* no partitioning */
+	PART_RANGE,	/* range partitioning */
+	PART_HASH		/* hash partitioning */
+}
+			partition_method_t;
+
+static partition_method_t partition_method = PART_NONE;
+static const char *PARTITION_METHOD[] = {"none", "range", "hash"};
+
 /* random seed used to initialize base_random_sequence */
 int64		random_seed = -1;
 
@@ -617,6 +636,9 @@ usage(void)
 		   "  --foreign-keys           create foreign key constraints between tables\n"
 		   "  --index-tablespace=TABLESPACE\n"
 		   "                           create indexes in the specified tablespace\n"
+		   "  --partitions=NUM         partition pgbench_accounts in NUM parts (default: 0)\n"
+		   "  --partition-method=(range|hash)\n"
+		   "                           partition pgbench_accounts with this method (default: range)\n"
 		   "  --tablespace=TABLESPACE  create tables in the specified tablespace\n"
 		   "  --unlogged-tables        create tables as unlogged tables\n"
 		   "\nOptions to select what to run:\n"
@@ -3601,6 +3623,89 @@ initDropTables(PGconn *con)
 					 "pgbench_tellers");
 }
 
+/*
+ * add fillfactor percent option.
+ *
+ * As default is 100, it could be removed in this case.
+ */
+static void
+append_fillfactor(char *opts, int len)
+{
+	snprintf(opts + strlen(opts), len - strlen(opts),
+			 " with (fillfactor=%d)", fillfactor);
+}
+
+/*
+ * Create "pgbench_accounts" partitions if needed.
+ *
+ * This is the larger table of pgbench default tpc-b like schema
+ * with a known size, so that it can be partitioned by range.
+ */
+static void
+createPartitions(PGconn *con)
+{
+	char		ff[64];
+
+	ff[0] = '\0';
+
+	/*
+	 * Per ddlinfo in initCreateTables below, fillfactor is needed on
+	 * table pgbench_accounts.
+	 */
+	append_fillfactor(ff, sizeof(ff));
+
+	/* we must have to create some partitions */
+	Assert(partitions > 0);
+
+	fprintf(stderr, "creating %d partitions...\n", partitions);
+
+	for (int p = 1; p <= partitions; p++)
+	{
+		char		query[256];
+
+		if (partition_method == PART_RANGE)
+		{
+			int64		part_size = (naccounts * (int64) scale + partitions - 1) / partitions;
+			char		minvalue[32],
+						maxvalue[32];
+
+			/*
+			 * For RANGE, we use open-ended partitions at the beginning and
+			 * end to allow any valid value for the primary key.
+			 * Although the actual minimum and maximum values can be derived
+			 * from the scale, it is more generic and the performance is better.
+			 */
+			if (p == 1)
+				sprintf(minvalue, "minvalue");
+			else
+				sprintf(minvalue, INT64_FORMAT, (p - 1) * part_size + 1);
+
+			if (p < partitions)
+				sprintf(maxvalue, INT64_FORMAT, p * part_size + 1);
+			else
+				sprintf(maxvalue, "maxvalue");
+
+			snprintf(query, sizeof(query),
+					 "create%s table pgbench_accounts_%d\n"
+					 "  partition of pgbench_accounts\n"
+					 "  for values from (%s) to (%s)%s\n",
+					 unlogged_tables ? " unlogged" : "", p,
+					 minvalue, maxvalue, ff);
+		}
+		else if (partition_method == PART_HASH)
+			snprintf(query, sizeof(query),
+					 "create%s table pgbench_accounts_%d\n"
+					 "  partition of pgbench_accounts\n"
+					 "  for values with (modulus %d, remainder %d)%s\n",
+					 unlogged_tables ? " unlogged" : "", p,
+					 partitions, p - 1, ff);
+		else					/* cannot get there */
+			Assert(0);
+
+		executeStatement(con, query);
+	}
+}
+
 /*
  * Create pgbench's standard tables
  */
@@ -3664,9 +3769,15 @@ initCreateTables(PGconn *con)
 
 		/* Construct new create table statement. */
 		opts[0] = '\0';
-		if (ddl->declare_fillfactor)
+
+		/* Partition pgbench_accounts table */
+		if (partition_method != PART_NONE && strcmp(ddl->table, "pgbench_accounts") == 0)
 			snprintf(opts + strlen(opts), sizeof(opts) - strlen(opts),
-					 " with (fillfactor=%d)", fillfactor);
+					 " partition by %s (aid)", PARTITION_METHOD[partition_method]);
+		else if (ddl->declare_fillfactor)
+			/* fillfactor is only expected on actual tables */
+			append_fillfactor(opts, sizeof(opts));
+
 		if (tablespace != NULL)
 		{
 			char	   *escape_tablespace;
@@ -3686,6 +3797,9 @@ initCreateTables(PGconn *con)
 
 		executeStatement(con, buffer);
 	}
+
+	if (partition_method != PART_NONE)
+		createPartitions(con);
 }
 
 /*
@@ -4919,6 +5033,10 @@ printResults(StatsData *total, instr_time total_time,
 	printf("transaction type: %s\n",
 		   num_scripts == 1 ? sql_script[0].desc : "multiple scripts");
 	printf("scaling factor: %d\n", scale);
+	/* only print partitioning information if some partitioning was detected */
+	if (partition_method != PART_NONE)
+		printf("partition method: %s\npartitions: %d\n",
+			   PARTITION_METHOD[partition_method], partitions);
 	printf("query mode: %s\n", QUERYMODE[querymode]);
 	printf("number of clients: %d\n", nclients);
 	printf("number of threads: %d\n", nthreads);
@@ -5083,6 +5201,122 @@ set_random_seed(const char *seed)
 	return true;
 }
 
+/*
+ * Extract pgbench table informations into global variables scale,
+ * partition_method and partitions.
+ */
+static void
+GetTableInfo(PGconn *con, bool scale_given)
+{
+	PGresult	*res;
+
+	/*
+	 * get the scaling factor that should be same as count(*) from
+	 * pgbench_branches if this is not a custom query
+	 */
+	res = PQexec(con, "select count(*) from pgbench_branches");
+	if (PQresultStatus(res) != PGRES_TUPLES_OK)
+	{
+		char	   *sqlState = PQresultErrorField(res, PG_DIAG_SQLSTATE);
+
+		fprintf(stderr, "%s", PQerrorMessage(con));
+		if (sqlState && strcmp(sqlState, ERRCODE_UNDEFINED_TABLE) == 0)
+		{
+			fprintf(stderr, "Perhaps you need to do initialization (\"pgbench -i\") in database \"%s\"\n", PQdb(con));
+		}
+
+		exit(1);
+	}
+	scale = atoi(PQgetvalue(res, 0, 0));
+	if (scale < 0)
+	{
+		fprintf(stderr, "invalid count(*) from pgbench_branches: \"%s\"\n",
+				PQgetvalue(res, 0, 0));
+		exit(1);
+	}
+	PQclear(res);
+
+	/* warn if we override user-given -s switch */
+	if (scale_given)
+		fprintf(stderr,
+				"scale option ignored, using count from pgbench_branches table (%d)\n",
+				scale);
+
+	/*
+	 * Get the partition information for the first "pgbench_accounts" table
+	 * found in search_path.
+	 *
+	 * The result is empty if no "pgbench_accounts" is found.
+	 *
+	 * Otherwise, it always returns one row even if the table is not
+	 * partitioned (in which case the partition strategy is NULL).
+	 *
+	 * The number of partitions can be 0 even for partitioned tables, if no
+	 * partition are attached.
+	 *
+	 * We Assume no partitioning on any failure, so as to avoid failing on
+	 * an old version without "pg_partitioned_table".
+	 */
+	res = PQexec(con,
+				 "select o.n, p.partstrat, pg_catalog.count(i.inhparent) "
+				 "from pg_catalog.pg_class as c "
+				 "join pg_catalog.pg_namespace as n on (n.oid = c.relnamespace) "
+				 "cross join lateral (select pg_catalog.array_position(pg_catalog.current_schemas(true), n.nspname)) as o(n) "
+				 "left join pg_catalog.pg_partitioned_table as p on (p.partrelid = c.oid) "
+				 "left join pg_catalog.pg_inherits as i on (c.oid = i.inhparent) "
+				 "where c.relname = 'pgbench_accounts' and o.n is not null "
+				 "group by 1, 2 "
+				 "order by 1 asc "
+				 "limit 1");
+
+	if (PQresultStatus(res) != PGRES_TUPLES_OK)
+	{
+		/* probably an older version, coldly assume no partitioning */
+		partition_method = PART_NONE;
+		partitions = 0;
+	}
+	else if (PQntuples(res) == 0)
+	{
+		/*
+		 * This case is unlikely as pgbench already found "pgbench_branches"
+		 * above to compute the scale.
+		 */
+		fprintf(stderr,
+				"no pgbench_accounts table found in search_path\n"
+				"Perhaps you need to do initialization (\"pgbench -i\") in database \"%s\"\n", PQdb(con));
+		exit(1);
+	}
+	else /* PQntupes(res) == 1 */
+	{
+		/* normal case, extract partition information */
+		if (PQgetisnull(res, 0, 1))
+			partition_method = PART_NONE;
+		else
+		{
+			char	   *ps = PQgetvalue(res, 0, 1);
+
+			/* column must be there */
+			Assert(ps != NULL);
+
+			if (strcmp(ps, "r") == 0)
+				partition_method = PART_RANGE;
+			else if (strcmp(ps, "h") == 0)
+				partition_method = PART_HASH;
+			else
+			{
+				/* possibly a newer version with new partition method */
+				fprintf(stderr, "unexpected partition method: \"%s\"\n", ps);
+				exit(1);
+			}
+		}
+
+		partitions = atoi(PQgetvalue(res, 0, 2));
+	}
+
+	PQclear(res);
+}
+
+
 int
 main(int argc, char **argv)
 {
@@ -5126,6 +5360,8 @@ main(int argc, char **argv)
 		{"foreign-keys", no_argument, NULL, 8},
 		{"random-seed", required_argument, NULL, 9},
 		{"show-script", required_argument, NULL, 10},
+		{"partitions", required_argument, NULL, 11},
+		{"partition-method", required_argument, NULL, 12},
 		{NULL, 0, NULL, 0}
 	};
 
@@ -5160,7 +5396,6 @@ main(int argc, char **argv)
 #endif
 
 	PGconn	   *con;
-	PGresult   *res;
 	char	   *env;
 
 	int			exit_code = 0;
@@ -5486,6 +5721,29 @@ main(int argc, char **argv)
 					exit(0);
 				}
 				break;
+			case 11:			/* partitions */
+				initialization_option_set = true;
+				partitions = atoi(optarg);
+				if (partitions < 0)
+				{
+					fprintf(stderr, "invalid number of partitions: \"%s\"\n",
+							optarg);
+					exit(1);
+				}
+				break;
+			case 12:			/* partition-method */
+				initialization_option_set = true;
+				if (pg_strcasecmp(optarg, "range") == 0)
+					partition_method = PART_RANGE;
+				else if (pg_strcasecmp(optarg, "hash") == 0)
+					partition_method = PART_HASH;
+				else
+				{
+					fprintf(stderr, "invalid partition method, expecting \"range\" or \"hash\","
+							" got: \"%s\"\n", optarg);
+					exit(1);
+				}
+				break;
 			default:
 				fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
 				exit(1);
@@ -5567,6 +5825,16 @@ main(int argc, char **argv)
 			exit(1);
 		}
 
+		if (partitions == 0 && partition_method != PART_NONE)
+		{
+			fprintf(stderr, "--partition-method requires greater than zero --partitions\n");
+			exit(1);
+		}
+
+		/* set default method */
+		if (partitions > 0 && partition_method == PART_NONE)
+			partition_method = PART_RANGE;
+
 		if (initialize_steps == NULL)
 			initialize_steps = pg_strdup(DEFAULT_INIT_STEPS);
 
@@ -5724,39 +5992,7 @@ main(int argc, char **argv)
 	}
 
 	if (internal_script_used)
-	{
-		/*
-		 * get the scaling factor that should be same as count(*) from
-		 * pgbench_branches if this is not a custom query
-		 */
-		res = PQexec(con, "select count(*) from pgbench_branches");
-		if (PQresultStatus(res) != PGRES_TUPLES_OK)
-		{
-			char	   *sqlState = PQresultErrorField(res, PG_DIAG_SQLSTATE);
-
-			fprintf(stderr, "%s", PQerrorMessage(con));
-			if (sqlState && strcmp(sqlState, ERRCODE_UNDEFINED_TABLE) == 0)
-			{
-				fprintf(stderr, "Perhaps you need to do initialization (\"pgbench -i\") in database \"%s\"\n", PQdb(con));
-			}
-
-			exit(1);
-		}
-		scale = atoi(PQgetvalue(res, 0, 0));
-		if (scale < 0)
-		{
-			fprintf(stderr, "invalid count(*) from pgbench_branches: \"%s\"\n",
-					PQgetvalue(res, 0, 0));
-			exit(1);
-		}
-		PQclear(res);
-
-		/* warn if we override user-given -s switch */
-		if (scale_given)
-			fprintf(stderr,
-					"scale option ignored, using count from pgbench_branches table (%d)\n",
-					scale);
-	}
+		GetTableInfo(con, scale_given);
 
 	/*
 	 * :scale variables normally get -s or database scale, but don't override
diff --git a/src/bin/pgbench/t/001_pgbench_with_server.pl b/src/bin/pgbench/t/001_pgbench_with_server.pl
index b82d3f65c4..98e170e0c9 100644
--- a/src/bin/pgbench/t/001_pgbench_with_server.pl
+++ b/src/bin/pgbench/t/001_pgbench_with_server.pl
@@ -58,6 +58,20 @@ sub pgbench
 	return;
 }
 
+# tablespace for testing, because partitioned tables cannot use pg_default
+# explicitely and we want to test that table creation with tablespace works
+# for partitioned tables.
+my $ts = $node->basedir . '/regress_pgbench_tap_1_ts_dir';
+mkdir $ts or die "cannot create directory $ts";
+# this takes care of WIN-specific path issues
+my $ets = TestLib::perl2host($ts);
+# add SQL single-quote string escaping, in case the path contains a quote
+$ets =~ s/'/''/;
+
+$node->safe_psql('postgres',
+       "CREATE TABLESPACE regress_pgbench_tap_1_ts LOCATION '$ets';"
+);
+
 # Test concurrent OID generation via pg_enum_oid_index.  This indirectly
 # exercises LWLock and spinlock concurrency.
 my $labels = join ',', map { "'l$_'" } 1 .. 1000;
@@ -100,12 +114,13 @@ pgbench(
 
 # Again, with all possible options
 pgbench(
-	'--initialize --init-steps=dtpvg --scale=1 --unlogged-tables --fillfactor=98 --foreign-keys --quiet --tablespace=pg_default --index-tablespace=pg_default',
+	'--initialize --init-steps=dtpvg --scale=1 --unlogged-tables --fillfactor=98 --foreign-keys --quiet --tablespace=regress_pgbench_tap_1_ts --index-tablespace=regress_pgbench_tap_1_ts --partitions=2 --partition-method=hash',
 	0,
 	[qr{^$}i],
 	[
 		qr{dropping old tables},
 		qr{creating tables},
+		qr{creating 2 partitions},
 		qr{vacuuming},
 		qr{creating primary keys},
 		qr{creating foreign keys},
@@ -116,12 +131,13 @@ pgbench(
 
 # Test interaction of --init-steps with legacy step-selection options
 pgbench(
-	'--initialize --init-steps=dtpvgvv --no-vacuum --foreign-keys --unlogged-tables',
+	'--initialize --init-steps=dtpvgvv --no-vacuum --foreign-keys --unlogged-tables --partitions=3',
 	0,
 	[qr{^$}],
 	[
 		qr{dropping old tables},
 		qr{creating tables},
+		qr{creating 3 partitions},
 		qr{creating primary keys},
 		qr{.* of .* tuples \(.*\) done},
 		qr{creating foreign keys},
@@ -910,5 +926,6 @@ check_pgbench_logs($bdir, '001_pgbench_log_3', 1, 10, 10,
 	qr{^\d \d{1,2} \d+ \d \d+ \d+$});
 
 # done
+$node->safe_psql('postgres', 'DROP TABLESPACE regress_pgbench_tap_1_ts');
 $node->stop;
 done_testing();
diff --git a/src/bin/pgbench/t/002_pgbench_no_server.pl b/src/bin/pgbench/t/002_pgbench_no_server.pl
index f7fa18418b..1e9542af3f 100644
--- a/src/bin/pgbench/t/002_pgbench_no_server.pl
+++ b/src/bin/pgbench/t/002_pgbench_no_server.pl
@@ -157,6 +157,13 @@ my @options = (
 			qr{error while setting random seed from --random-seed option}
 		]
 	],
+	[ 'bad partition type', '-i --partition-method=BAD', [qr{"range"}, qr{"hash"}, qr{"BAD"}] ],
+	[ 'bad partition number', '-i --partitions -1', [ qr{invalid number of partitions: "-1"} ] ],
+	[
+		'partition method without partitioning',
+		'-i --partition-method=hash',
+		[ qr{partition-method requires greater than zero --partitions} ]
+	],
 
 	# logging sub-options
 	[

#71

Amit Kapila

amit.kapila16@gmail.com

over 6 years ago

In reply to: Fabien COELHO (#70)

Re: pgbench - allow to create partitioned tables

On Mon, Sep 30, 2019 at 2:26 PM Fabien COELHO <coelho@cri.ensmp.fr> wrote:

I am leaning towards approach (b) unless you and or Alvaro feels (a)
is good for now or if you have some other idea.

No other idea. I put back the tablespace creation which I just removed,
with comments about why it is there.
If we want to go with option (b), I have small comment in your previous test:
+# tablespace for testing
+my $ts = $node->basedir . '/regress_pgbench_tap_1_ts_dir';
+mkdir $ts or die "cannot create directory $ts";
+my $ets = TestLib::perl2host($ts);
+# add needed escaping!
+$ets =~ s/'/''/;
I am not sure if we really need this quote skipping stuff. Why can't
we write the test as below:

# tablespace for testing
my $basedir = $node->basedir;
my $ts = "$basedir/regress_pgbench_tap_1_ts_dir";
mkdir $ts or die "cannot create directory $ts";
$ts = TestLib::perl2host($ts);
$node->safe_psql('postgres',
"CREATE TABLESPACE regress_pgbench_tap_1_ts LOCATION '$ets';"
I think that this last command fails if the path contains a "'", so the
'-escaping is necessary. I had to make changes in TAP tests before because
it was not working when the path was a little bit strange, so now I'm
careful.

Hmm, I don't know what kind of issues you have earlier faced, but
tablespace creation doesn't allow quotes. See the message "tablespace
location cannot contain single quotes" in CreateTableSpace. Also,
there are other places in tests like
src/bin/pg_checksums/t/002_actions.pl which uses the way I have
mentioned. I don't think there is any need for escaping single-quotes
here and I am not seeing the use of same. I don't want to introduce a
new pattern in tests which people can then tomorrow copy at other
places even though such code is not required. OTOH, if there is a
genuine need for the same, then I am fine.

--
With Regards,
Amit Kapila.
EnterpriseDB: http://www.enterprisedb.com

#72

Fabien COELHO

coelho@cri.ensmp.fr

over 6 years ago

In reply to: Amit Kapila (#71)

1 attachment(s)

Re: pgbench - allow to create partitioned tables

Hello Amit,

$node->safe_psql('postgres',
"CREATE TABLESPACE regress_pgbench_tap_1_ts LOCATION '$ets';"

I think that this last command fails if the path contains a "'", so the
'-escaping is necessary. I had to make changes in TAP tests before because
it was not working when the path was a little bit strange, so now I'm
careful.

Hmm, I don't know what kind of issues you have earlier faced,

AFAICR, path with shell-sensitive characters ($ ? * ...) which was
breaking something somewhere.

but tablespace creation doesn't allow quotes. See the message
"tablespace location cannot contain single quotes" in CreateTableSpace.

Hmmm. That is the problem of CreateTableSpace. From an SQL perspective,
escaping is required. If the command fails later, that is the problem of
the command implementation, but otherwise this is just a plain syntax
error at the SQL level.

Also, there are other places in tests like
src/bin/pg_checksums/t/002_actions.pl which uses the way I have
mentioned.

Yes, I looked at it and imported the window-specific function to handle
the path. It does not do anything about escaping.

I don't think there is any need for escaping single-quotes
here

As said, this is required for SQL, or you must know that there are no
single quotes in the string.

and I am not seeing the use of same.

Sure. It is probably buggy there too.

I don't want to introduce a new pattern in tests which people can then
tomorrow copy at other places even though such code is not required.
OTOH, if there is a genuine need for the same, then I am fine.

Hmmm. The committer is right by definition. Here is a version without
escaping but with a comment instead.

--
Fabien.

Attachments:

pgbench-init-partitioned-20.patchtext/x-diff; name=pgbench-init-partitioned-20.patchDownload

diff --git a/doc/src/sgml/ref/pgbench.sgml b/doc/src/sgml/ref/pgbench.sgml
index c857aa3cba..e3a0abb4c7 100644
--- a/doc/src/sgml/ref/pgbench.sgml
+++ b/doc/src/sgml/ref/pgbench.sgml
@@ -306,6 +306,31 @@ pgbench <optional> <replaceable>options</replaceable> </optional> <replaceable>d
       </listitem>
      </varlistentry>
 
+     <varlistentry>
+      <term><option>--partitions=<replaceable>NUM</replaceable></option></term>
+      <listitem>
+       <para>
+        Create a partitioned <literal>pgbench_accounts</literal> table with
+        <replaceable>NUM</replaceable> partitions of nearly equal size for
+        the scaled number of accounts.
+        Default is <literal>0</literal>, meaning no partitioning.
+       </para>
+      </listitem>
+     </varlistentry>
+
+     <varlistentry>
+      <term><option>--partition-method=<replaceable>NAME</replaceable></option></term>
+      <listitem>
+       <para>
+        Create a partitioned <literal>pgbench_accounts</literal> table with
+        <replaceable>NAME</replaceable> method.
+        Expected values are <literal>range</literal> or <literal>hash</literal>.
+        This option requires that <option>--partitions</option> is set to non-zero.
+        If unspecified, default is <literal>range</literal>.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry>
       <term><option>--tablespace=<replaceable>tablespace</replaceable></option></term>
       <listitem>
diff --git a/src/bin/pgbench/pgbench.c b/src/bin/pgbench/pgbench.c
index ed7652bfbf..d71e38b8a8 100644
--- a/src/bin/pgbench/pgbench.c
+++ b/src/bin/pgbench/pgbench.c
@@ -186,6 +186,25 @@ int64		latency_limit = 0;
 char	   *tablespace = NULL;
 char	   *index_tablespace = NULL;
 
+/*
+ * Number of "pgbench_accounts" partitions, found or to create.
+ * When creating, 0 is the default and means no partitioning.
+ * When running, this is the actual number of partitions.
+ */
+static int	partitions = 0;
+
+/* partitioning strategy for "pgbench_accounts" */
+typedef enum
+{
+	PART_NONE,		/* no partitioning */
+	PART_RANGE,	/* range partitioning */
+	PART_HASH		/* hash partitioning */
+}
+			partition_method_t;
+
+static partition_method_t partition_method = PART_NONE;
+static const char *PARTITION_METHOD[] = {"none", "range", "hash"};
+
 /* random seed used to initialize base_random_sequence */
 int64		random_seed = -1;
 
@@ -617,6 +636,9 @@ usage(void)
 		   "  --foreign-keys           create foreign key constraints between tables\n"
 		   "  --index-tablespace=TABLESPACE\n"
 		   "                           create indexes in the specified tablespace\n"
+		   "  --partitions=NUM         partition pgbench_accounts in NUM parts (default: 0)\n"
+		   "  --partition-method=(range|hash)\n"
+		   "                           partition pgbench_accounts with this method (default: range)\n"
 		   "  --tablespace=TABLESPACE  create tables in the specified tablespace\n"
 		   "  --unlogged-tables        create tables as unlogged tables\n"
 		   "\nOptions to select what to run:\n"
@@ -3601,6 +3623,89 @@ initDropTables(PGconn *con)
 					 "pgbench_tellers");
 }
 
+/*
+ * add fillfactor percent option.
+ *
+ * As default is 100, it could be removed in this case.
+ */
+static void
+append_fillfactor(char *opts, int len)
+{
+	snprintf(opts + strlen(opts), len - strlen(opts),
+			 " with (fillfactor=%d)", fillfactor);
+}
+
+/*
+ * Create "pgbench_accounts" partitions if needed.
+ *
+ * This is the larger table of pgbench default tpc-b like schema
+ * with a known size, so that it can be partitioned by range.
+ */
+static void
+createPartitions(PGconn *con)
+{
+	char		ff[64];
+
+	ff[0] = '\0';
+
+	/*
+	 * Per ddlinfo in initCreateTables below, fillfactor is needed on
+	 * table pgbench_accounts.
+	 */
+	append_fillfactor(ff, sizeof(ff));
+
+	/* we must have to create some partitions */
+	Assert(partitions > 0);
+
+	fprintf(stderr, "creating %d partitions...\n", partitions);
+
+	for (int p = 1; p <= partitions; p++)
+	{
+		char		query[256];
+
+		if (partition_method == PART_RANGE)
+		{
+			int64		part_size = (naccounts * (int64) scale + partitions - 1) / partitions;
+			char		minvalue[32],
+						maxvalue[32];
+
+			/*
+			 * For RANGE, we use open-ended partitions at the beginning and
+			 * end to allow any valid value for the primary key.
+			 * Although the actual minimum and maximum values can be derived
+			 * from the scale, it is more generic and the performance is better.
+			 */
+			if (p == 1)
+				sprintf(minvalue, "minvalue");
+			else
+				sprintf(minvalue, INT64_FORMAT, (p - 1) * part_size + 1);
+
+			if (p < partitions)
+				sprintf(maxvalue, INT64_FORMAT, p * part_size + 1);
+			else
+				sprintf(maxvalue, "maxvalue");
+
+			snprintf(query, sizeof(query),
+					 "create%s table pgbench_accounts_%d\n"
+					 "  partition of pgbench_accounts\n"
+					 "  for values from (%s) to (%s)%s\n",
+					 unlogged_tables ? " unlogged" : "", p,
+					 minvalue, maxvalue, ff);
+		}
+		else if (partition_method == PART_HASH)
+			snprintf(query, sizeof(query),
+					 "create%s table pgbench_accounts_%d\n"
+					 "  partition of pgbench_accounts\n"
+					 "  for values with (modulus %d, remainder %d)%s\n",
+					 unlogged_tables ? " unlogged" : "", p,
+					 partitions, p - 1, ff);
+		else					/* cannot get there */
+			Assert(0);
+
+		executeStatement(con, query);
+	}
+}
+
 /*
  * Create pgbench's standard tables
  */
@@ -3664,9 +3769,15 @@ initCreateTables(PGconn *con)
 
 		/* Construct new create table statement. */
 		opts[0] = '\0';
-		if (ddl->declare_fillfactor)
+
+		/* Partition pgbench_accounts table */
+		if (partition_method != PART_NONE && strcmp(ddl->table, "pgbench_accounts") == 0)
 			snprintf(opts + strlen(opts), sizeof(opts) - strlen(opts),
-					 " with (fillfactor=%d)", fillfactor);
+					 " partition by %s (aid)", PARTITION_METHOD[partition_method]);
+		else if (ddl->declare_fillfactor)
+			/* fillfactor is only expected on actual tables */
+			append_fillfactor(opts, sizeof(opts));
+
 		if (tablespace != NULL)
 		{
 			char	   *escape_tablespace;
@@ -3686,6 +3797,9 @@ initCreateTables(PGconn *con)
 
 		executeStatement(con, buffer);
 	}
+
+	if (partition_method != PART_NONE)
+		createPartitions(con);
 }
 
 /*
@@ -4919,6 +5033,10 @@ printResults(StatsData *total, instr_time total_time,
 	printf("transaction type: %s\n",
 		   num_scripts == 1 ? sql_script[0].desc : "multiple scripts");
 	printf("scaling factor: %d\n", scale);
+	/* only print partitioning information if some partitioning was detected */
+	if (partition_method != PART_NONE)
+		printf("partition method: %s\npartitions: %d\n",
+			   PARTITION_METHOD[partition_method], partitions);
 	printf("query mode: %s\n", QUERYMODE[querymode]);
 	printf("number of clients: %d\n", nclients);
 	printf("number of threads: %d\n", nthreads);
@@ -5083,6 +5201,122 @@ set_random_seed(const char *seed)
 	return true;
 }
 
+/*
+ * Extract pgbench table informations into global variables scale,
+ * partition_method and partitions.
+ */
+static void
+GetTableInfo(PGconn *con, bool scale_given)
+{
+	PGresult	*res;
+
+	/*
+	 * get the scaling factor that should be same as count(*) from
+	 * pgbench_branches if this is not a custom query
+	 */
+	res = PQexec(con, "select count(*) from pgbench_branches");
+	if (PQresultStatus(res) != PGRES_TUPLES_OK)
+	{
+		char	   *sqlState = PQresultErrorField(res, PG_DIAG_SQLSTATE);
+
+		fprintf(stderr, "%s", PQerrorMessage(con));
+		if (sqlState && strcmp(sqlState, ERRCODE_UNDEFINED_TABLE) == 0)
+		{
+			fprintf(stderr, "Perhaps you need to do initialization (\"pgbench -i\") in database \"%s\"\n", PQdb(con));
+		}
+
+		exit(1);
+	}
+	scale = atoi(PQgetvalue(res, 0, 0));
+	if (scale < 0)
+	{
+		fprintf(stderr, "invalid count(*) from pgbench_branches: \"%s\"\n",
+				PQgetvalue(res, 0, 0));
+		exit(1);
+	}
+	PQclear(res);
+
+	/* warn if we override user-given -s switch */
+	if (scale_given)
+		fprintf(stderr,
+				"scale option ignored, using count from pgbench_branches table (%d)\n",
+				scale);
+
+	/*
+	 * Get the partition information for the first "pgbench_accounts" table
+	 * found in search_path.
+	 *
+	 * The result is empty if no "pgbench_accounts" is found.
+	 *
+	 * Otherwise, it always returns one row even if the table is not
+	 * partitioned (in which case the partition strategy is NULL).
+	 *
+	 * The number of partitions can be 0 even for partitioned tables, if no
+	 * partition are attached.
+	 *
+	 * We Assume no partitioning on any failure, so as to avoid failing on
+	 * an old version without "pg_partitioned_table".
+	 */
+	res = PQexec(con,
+				 "select o.n, p.partstrat, pg_catalog.count(i.inhparent) "
+				 "from pg_catalog.pg_class as c "
+				 "join pg_catalog.pg_namespace as n on (n.oid = c.relnamespace) "
+				 "cross join lateral (select pg_catalog.array_position(pg_catalog.current_schemas(true), n.nspname)) as o(n) "
+				 "left join pg_catalog.pg_partitioned_table as p on (p.partrelid = c.oid) "
+				 "left join pg_catalog.pg_inherits as i on (c.oid = i.inhparent) "
+				 "where c.relname = 'pgbench_accounts' and o.n is not null "
+				 "group by 1, 2 "
+				 "order by 1 asc "
+				 "limit 1");
+
+	if (PQresultStatus(res) != PGRES_TUPLES_OK)
+	{
+		/* probably an older version, coldly assume no partitioning */
+		partition_method = PART_NONE;
+		partitions = 0;
+	}
+	else if (PQntuples(res) == 0)
+	{
+		/*
+		 * This case is unlikely as pgbench already found "pgbench_branches"
+		 * above to compute the scale.
+		 */
+		fprintf(stderr,
+				"no pgbench_accounts table found in search_path\n"
+				"Perhaps you need to do initialization (\"pgbench -i\") in database \"%s\"\n", PQdb(con));
+		exit(1);
+	}
+	else /* PQntupes(res) == 1 */
+	{
+		/* normal case, extract partition information */
+		if (PQgetisnull(res, 0, 1))
+			partition_method = PART_NONE;
+		else
+		{
+			char	   *ps = PQgetvalue(res, 0, 1);
+
+			/* column must be there */
+			Assert(ps != NULL);
+
+			if (strcmp(ps, "r") == 0)
+				partition_method = PART_RANGE;
+			else if (strcmp(ps, "h") == 0)
+				partition_method = PART_HASH;
+			else
+			{
+				/* possibly a newer version with new partition method */
+				fprintf(stderr, "unexpected partition method: \"%s\"\n", ps);
+				exit(1);
+			}
+		}
+
+		partitions = atoi(PQgetvalue(res, 0, 2));
+	}
+
+	PQclear(res);
+}
+
+
 int
 main(int argc, char **argv)
 {
@@ -5126,6 +5360,8 @@ main(int argc, char **argv)
 		{"foreign-keys", no_argument, NULL, 8},
 		{"random-seed", required_argument, NULL, 9},
 		{"show-script", required_argument, NULL, 10},
+		{"partitions", required_argument, NULL, 11},
+		{"partition-method", required_argument, NULL, 12},
 		{NULL, 0, NULL, 0}
 	};
 
@@ -5160,7 +5396,6 @@ main(int argc, char **argv)
 #endif
 
 	PGconn	   *con;
-	PGresult   *res;
 	char	   *env;
 
 	int			exit_code = 0;
@@ -5486,6 +5721,29 @@ main(int argc, char **argv)
 					exit(0);
 				}
 				break;
+			case 11:			/* partitions */
+				initialization_option_set = true;
+				partitions = atoi(optarg);
+				if (partitions < 0)
+				{
+					fprintf(stderr, "invalid number of partitions: \"%s\"\n",
+							optarg);
+					exit(1);
+				}
+				break;
+			case 12:			/* partition-method */
+				initialization_option_set = true;
+				if (pg_strcasecmp(optarg, "range") == 0)
+					partition_method = PART_RANGE;
+				else if (pg_strcasecmp(optarg, "hash") == 0)
+					partition_method = PART_HASH;
+				else
+				{
+					fprintf(stderr, "invalid partition method, expecting \"range\" or \"hash\","
+							" got: \"%s\"\n", optarg);
+					exit(1);
+				}
+				break;
 			default:
 				fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
 				exit(1);
@@ -5567,6 +5825,16 @@ main(int argc, char **argv)
 			exit(1);
 		}
 
+		if (partitions == 0 && partition_method != PART_NONE)
+		{
+			fprintf(stderr, "--partition-method requires greater than zero --partitions\n");
+			exit(1);
+		}
+
+		/* set default method */
+		if (partitions > 0 && partition_method == PART_NONE)
+			partition_method = PART_RANGE;
+
 		if (initialize_steps == NULL)
 			initialize_steps = pg_strdup(DEFAULT_INIT_STEPS);
 
@@ -5724,39 +5992,7 @@ main(int argc, char **argv)
 	}
 
 	if (internal_script_used)
-	{
-		/*
-		 * get the scaling factor that should be same as count(*) from
-		 * pgbench_branches if this is not a custom query
-		 */
-		res = PQexec(con, "select count(*) from pgbench_branches");
-		if (PQresultStatus(res) != PGRES_TUPLES_OK)
-		{
-			char	   *sqlState = PQresultErrorField(res, PG_DIAG_SQLSTATE);
-
-			fprintf(stderr, "%s", PQerrorMessage(con));
-			if (sqlState && strcmp(sqlState, ERRCODE_UNDEFINED_TABLE) == 0)
-			{
-				fprintf(stderr, "Perhaps you need to do initialization (\"pgbench -i\") in database \"%s\"\n", PQdb(con));
-			}
-
-			exit(1);
-		}
-		scale = atoi(PQgetvalue(res, 0, 0));
-		if (scale < 0)
-		{
-			fprintf(stderr, "invalid count(*) from pgbench_branches: \"%s\"\n",
-					PQgetvalue(res, 0, 0));
-			exit(1);
-		}
-		PQclear(res);
-
-		/* warn if we override user-given -s switch */
-		if (scale_given)
-			fprintf(stderr,
-					"scale option ignored, using count from pgbench_branches table (%d)\n",
-					scale);
-	}
+		GetTableInfo(con, scale_given);
 
 	/*
 	 * :scale variables normally get -s or database scale, but don't override
diff --git a/src/bin/pgbench/t/001_pgbench_with_server.pl b/src/bin/pgbench/t/001_pgbench_with_server.pl
index b82d3f65c4..46ce986129 100644
--- a/src/bin/pgbench/t/001_pgbench_with_server.pl
+++ b/src/bin/pgbench/t/001_pgbench_with_server.pl
@@ -58,6 +58,19 @@ sub pgbench
 	return;
 }
 
+# tablespace for testing, because partitioned tables cannot use pg_default
+# explicitely and we want to test that table creation with tablespace works
+# for partitioned tables.
+my $ts = $node->basedir . '/regress_pgbench_tap_1_ts_dir';
+mkdir $ts or die "cannot create directory $ts";
+# this takes care of WIN-specific path issues
+my $ets = TestLib::perl2host($ts);
+
+# the next commands will issue a syntax error if the path contains a "'"
+$node->safe_psql('postgres',
+       "CREATE TABLESPACE regress_pgbench_tap_1_ts LOCATION '$ets';"
+);
+
 # Test concurrent OID generation via pg_enum_oid_index.  This indirectly
 # exercises LWLock and spinlock concurrency.
 my $labels = join ',', map { "'l$_'" } 1 .. 1000;
@@ -100,12 +113,13 @@ pgbench(
 
 # Again, with all possible options
 pgbench(
-	'--initialize --init-steps=dtpvg --scale=1 --unlogged-tables --fillfactor=98 --foreign-keys --quiet --tablespace=pg_default --index-tablespace=pg_default',
+	'--initialize --init-steps=dtpvg --scale=1 --unlogged-tables --fillfactor=98 --foreign-keys --quiet --tablespace=regress_pgbench_tap_1_ts --index-tablespace=regress_pgbench_tap_1_ts --partitions=2 --partition-method=hash',
 	0,
 	[qr{^$}i],
 	[
 		qr{dropping old tables},
 		qr{creating tables},
+		qr{creating 2 partitions},
 		qr{vacuuming},
 		qr{creating primary keys},
 		qr{creating foreign keys},
@@ -116,12 +130,13 @@ pgbench(
 
 # Test interaction of --init-steps with legacy step-selection options
 pgbench(
-	'--initialize --init-steps=dtpvgvv --no-vacuum --foreign-keys --unlogged-tables',
+	'--initialize --init-steps=dtpvgvv --no-vacuum --foreign-keys --unlogged-tables --partitions=3',
 	0,
 	[qr{^$}],
 	[
 		qr{dropping old tables},
 		qr{creating tables},
+		qr{creating 3 partitions},
 		qr{creating primary keys},
 		qr{.* of .* tuples \(.*\) done},
 		qr{creating foreign keys},
@@ -910,5 +925,6 @@ check_pgbench_logs($bdir, '001_pgbench_log_3', 1, 10, 10,
 	qr{^\d \d{1,2} \d+ \d \d+ \d+$});
 
 # done
+$node->safe_psql('postgres', 'DROP TABLESPACE regress_pgbench_tap_1_ts');
 $node->stop;
 done_testing();
diff --git a/src/bin/pgbench/t/002_pgbench_no_server.pl b/src/bin/pgbench/t/002_pgbench_no_server.pl
index f7fa18418b..1e9542af3f 100644
--- a/src/bin/pgbench/t/002_pgbench_no_server.pl
+++ b/src/bin/pgbench/t/002_pgbench_no_server.pl
@@ -157,6 +157,13 @@ my @options = (
 			qr{error while setting random seed from --random-seed option}
 		]
 	],
+	[ 'bad partition type', '-i --partition-method=BAD', [qr{"range"}, qr{"hash"}, qr{"BAD"}] ],
+	[ 'bad partition number', '-i --partitions -1', [ qr{invalid number of partitions: "-1"} ] ],
+	[
+		'partition method without partitioning',
+		'-i --partition-method=hash',
+		[ qr{partition-method requires greater than zero --partitions} ]
+	],
 
 	# logging sub-options
 	[

#73

Amit Kapila

amit.kapila16@gmail.com

over 6 years ago

In reply to: Fabien COELHO (#72)

1 attachment(s)

Re: pgbench - allow to create partitioned tables

On Mon, Sep 30, 2019 at 5:17 PM Fabien COELHO <coelho@cri.ensmp.fr> wrote:

I don't want to introduce a new pattern in tests which people can then
tomorrow copy at other places even though such code is not required.
OTOH, if there is a genuine need for the same, then I am fine.

Hmmm. The committer is right by definition. Here is a version without
escaping but with a comment instead.

Thanks, attached is a patch with minor modifications which I am
planning to push after one more round of review on Thursday morning
IST unless there are more comments by anyone else.

The changes include:
1. ran pgindent
2. As per Alvaro's suggestions move few function definitions.
3. Changed one or two comments and fixed spelling at one place.

The one place where some suggestion might help:
+ else if (PQntuples(res) == 0)
+ {
+ /*
+ * This case is unlikely as pgbench already found "pgbench_branches"
+ * above to compute the scale.
+ */
+ fprintf(stderr,
+ "no pgbench_accounts table found in search_path\n"
+ "Perhaps you need to do initialization (\"pgbench -i\") in database
\"%s\"\n", PQdb(con));
+ exit(1);
+ }

Can anyone else think of a better error message either in wording or
style for above case?

--
With Regards,
Amit Kapila.
EnterpriseDB: http://www.enterprisedb.com

Attachments:

pgbench-init-partitioned-21.patchapplication/octet-stream; name=pgbench-init-partitioned-21.patchDownload

From 62e8fec09bd07184f351f386b81bfaef34e10bd9 Mon Sep 17 00:00:00 2001
From: Amit Kapila <akapila@postgresql.org>
Date: Tue, 1 Oct 2019 09:50:26 +0530
Subject: [PATCH] pgbench: add --partitions and --partition-method options.

These new options allow users to partition the pgbench_accounts table by
specifying the number of partitions and partitioning method.  The values
allowed for partitioning method are range and hash.

This feature allows users to measure the overhead of partitioning if any.

Author: Fabien COELHO
Reviewed-by: Amit Kapila, Amit Langote, Dilip Kumar, Asif Rehman, and
Alvaro Herrera
Discussion: https://postgr.es/m/alpine.DEB.2.21.1907230826190.7008@lancre
---
 doc/src/sgml/ref/pgbench.sgml                |  25 +++
 src/bin/pgbench/pgbench.c                    | 306 +++++++++++++++++++++++----
 src/bin/pgbench/t/001_pgbench_with_server.pl |  20 +-
 src/bin/pgbench/t/002_pgbench_no_server.pl   |   7 +
 4 files changed, 320 insertions(+), 38 deletions(-)

diff --git a/doc/src/sgml/ref/pgbench.sgml b/doc/src/sgml/ref/pgbench.sgml
index c857aa3..e3a0abb 100644
--- a/doc/src/sgml/ref/pgbench.sgml
+++ b/doc/src/sgml/ref/pgbench.sgml
@@ -307,6 +307,31 @@ pgbench <optional> <replaceable>options</replaceable> </optional> <replaceable>d
      </varlistentry>
 
      <varlistentry>
+      <term><option>--partitions=<replaceable>NUM</replaceable></option></term>
+      <listitem>
+       <para>
+        Create a partitioned <literal>pgbench_accounts</literal> table with
+        <replaceable>NUM</replaceable> partitions of nearly equal size for
+        the scaled number of accounts.
+        Default is <literal>0</literal>, meaning no partitioning.
+       </para>
+      </listitem>
+     </varlistentry>
+
+     <varlistentry>
+      <term><option>--partition-method=<replaceable>NAME</replaceable></option></term>
+      <listitem>
+       <para>
+        Create a partitioned <literal>pgbench_accounts</literal> table with
+        <replaceable>NAME</replaceable> method.
+        Expected values are <literal>range</literal> or <literal>hash</literal>.
+        This option requires that <option>--partitions</option> is set to non-zero.
+        If unspecified, default is <literal>range</literal>.
+       </para>
+      </listitem>
+     </varlistentry>
+
+     <varlistentry>
       <term><option>--tablespace=<replaceable>tablespace</replaceable></option></term>
       <listitem>
        <para>
diff --git a/src/bin/pgbench/pgbench.c b/src/bin/pgbench/pgbench.c
index ed7652b..52ea50b 100644
--- a/src/bin/pgbench/pgbench.c
+++ b/src/bin/pgbench/pgbench.c
@@ -186,6 +186,23 @@ int64		latency_limit = 0;
 char	   *tablespace = NULL;
 char	   *index_tablespace = NULL;
 
+/*
+ * Number of "pgbench_accounts" partitions.  0 is the default and means no
+ * partitioning.
+ */
+static int	partitions = 0;
+
+/* partitioning strategy for "pgbench_accounts" */
+typedef enum
+{
+	PART_NONE,					/* no partitioning */
+	PART_RANGE,					/* range partitioning */
+	PART_HASH					/* hash partitioning */
+}			partition_method_t;
+
+static partition_method_t partition_method = PART_NONE;
+static const char *PARTITION_METHOD[] = {"none", "range", "hash"};
+
 /* random seed used to initialize base_random_sequence */
 int64		random_seed = -1;
 
@@ -582,6 +599,7 @@ static void doLog(TState *thread, CState *st,
 				  StatsData *agg, bool skipped, double latency, double lag);
 static void processXactStats(TState *thread, CState *st, instr_time *now,
 							 bool skipped, StatsData *agg);
+static void append_fillfactor(char *opts, int len);
 static void addScript(ParsedScript script);
 static void *threadRun(void *arg);
 static void finishCon(CState *st);
@@ -617,6 +635,9 @@ usage(void)
 		   "  --foreign-keys           create foreign key constraints between tables\n"
 		   "  --index-tablespace=TABLESPACE\n"
 		   "                           create indexes in the specified tablespace\n"
+		   "  --partitions=NUM         partition pgbench_accounts in NUM parts (default: 0)\n"
+		   "  --partition-method=(range|hash)\n"
+		   "                           partition pgbench_accounts with this method (default: range)\n"
 		   "  --tablespace=TABLESPACE  create tables in the specified tablespace\n"
 		   "  --unlogged-tables        create tables as unlogged tables\n"
 		   "\nOptions to select what to run:\n"
@@ -3602,6 +3623,77 @@ initDropTables(PGconn *con)
 }
 
 /*
+ * Create "pgbench_accounts" partitions if needed.
+ *
+ * This is the larger table of pgbench default tpc-b like schema
+ * with a known size, so we choose to partition it.
+ */
+static void
+createPartitions(PGconn *con)
+{
+	char		ff[64];
+
+	ff[0] = '\0';
+
+	/*
+	 * Per ddlinfo in initCreateTables, fillfactor is needed on table
+	 * pgbench_accounts.
+	 */
+	append_fillfactor(ff, sizeof(ff));
+
+	/* we must have to create some partitions */
+	Assert(partitions > 0);
+
+	fprintf(stderr, "creating %d partitions...\n", partitions);
+
+	for (int p = 1; p <= partitions; p++)
+	{
+		char		query[256];
+
+		if (partition_method == PART_RANGE)
+		{
+			int64		part_size = (naccounts * (int64) scale + partitions - 1) / partitions;
+			char		minvalue[32],
+						maxvalue[32];
+
+			/*
+			 * For RANGE, we use open-ended partitions at the beginning and
+			 * end to allow any valid value for the primary key.  Although the
+			 * actual minimum and maximum values can be derived from the
+			 * scale, it is more generic and the performance is better.
+			 */
+			if (p == 1)
+				sprintf(minvalue, "minvalue");
+			else
+				sprintf(minvalue, INT64_FORMAT, (p - 1) * part_size + 1);
+
+			if (p < partitions)
+				sprintf(maxvalue, INT64_FORMAT, p * part_size + 1);
+			else
+				sprintf(maxvalue, "maxvalue");
+
+			snprintf(query, sizeof(query),
+					 "create%s table pgbench_accounts_%d\n"
+					 "  partition of pgbench_accounts\n"
+					 "  for values from (%s) to (%s)%s\n",
+					 unlogged_tables ? " unlogged" : "", p,
+					 minvalue, maxvalue, ff);
+		}
+		else if (partition_method == PART_HASH)
+			snprintf(query, sizeof(query),
+					 "create%s table pgbench_accounts_%d\n"
+					 "  partition of pgbench_accounts\n"
+					 "  for values with (modulus %d, remainder %d)%s\n",
+					 unlogged_tables ? " unlogged" : "", p,
+					 partitions, p - 1, ff);
+		else					/* cannot get there */
+			Assert(0);
+
+		executeStatement(con, query);
+	}
+}
+
+/*
  * Create pgbench's standard tables
  */
 static void
@@ -3664,9 +3756,15 @@ initCreateTables(PGconn *con)
 
 		/* Construct new create table statement. */
 		opts[0] = '\0';
-		if (ddl->declare_fillfactor)
+
+		/* Partition pgbench_accounts table */
+		if (partition_method != PART_NONE && strcmp(ddl->table, "pgbench_accounts") == 0)
 			snprintf(opts + strlen(opts), sizeof(opts) - strlen(opts),
-					 " with (fillfactor=%d)", fillfactor);
+					 " partition by %s (aid)", PARTITION_METHOD[partition_method]);
+		else if (ddl->declare_fillfactor)
+			/* fillfactor is only expected on actual tables */
+			append_fillfactor(opts, sizeof(opts));
+
 		if (tablespace != NULL)
 		{
 			char	   *escape_tablespace;
@@ -3686,6 +3784,21 @@ initCreateTables(PGconn *con)
 
 		executeStatement(con, buffer);
 	}
+
+	if (partition_method != PART_NONE)
+		createPartitions(con);
+}
+
+/*
+ * add fillfactor percent option.
+ *
+ * XXX - As default is 100, it could be removed in this case.
+ */
+static void
+append_fillfactor(char *opts, int len)
+{
+	snprintf(opts + strlen(opts), len - strlen(opts),
+			 " with (fillfactor=%d)", fillfactor);
 }
 
 /*
@@ -4011,6 +4124,121 @@ runInitSteps(const char *initialize_steps)
 }
 
 /*
+ * Extract pgbench table informations into global variables scale,
+ * partition_method and partitions.
+ */
+static void
+GetTableInfo(PGconn *con, bool scale_given)
+{
+	PGresult   *res;
+
+	/*
+	 * get the scaling factor that should be same as count(*) from
+	 * pgbench_branches if this is not a custom query
+	 */
+	res = PQexec(con, "select count(*) from pgbench_branches");
+	if (PQresultStatus(res) != PGRES_TUPLES_OK)
+	{
+		char	   *sqlState = PQresultErrorField(res, PG_DIAG_SQLSTATE);
+
+		fprintf(stderr, "%s", PQerrorMessage(con));
+		if (sqlState && strcmp(sqlState, ERRCODE_UNDEFINED_TABLE) == 0)
+		{
+			fprintf(stderr, "Perhaps you need to do initialization (\"pgbench -i\") in database \"%s\"\n", PQdb(con));
+		}
+
+		exit(1);
+	}
+	scale = atoi(PQgetvalue(res, 0, 0));
+	if (scale < 0)
+	{
+		fprintf(stderr, "invalid count(*) from pgbench_branches: \"%s\"\n",
+				PQgetvalue(res, 0, 0));
+		exit(1);
+	}
+	PQclear(res);
+
+	/* warn if we override user-given -s switch */
+	if (scale_given)
+		fprintf(stderr,
+				"scale option ignored, using count from pgbench_branches table (%d)\n",
+				scale);
+
+	/*
+	 * Get the partition information for the first "pgbench_accounts" table
+	 * found in search_path.
+	 *
+	 * The result is empty if no "pgbench_accounts" is found.
+	 *
+	 * Otherwise, it always returns one row even if the table is not
+	 * partitioned (in which case the partition strategy is NULL).
+	 *
+	 * The number of partitions can be 0 even for partitioned tables, if no
+	 * partition is attached.
+	 *
+	 * We assume no partitioning on any failure, so as to avoid failing on an
+	 * old version without "pg_partitioned_table".
+	 */
+	res = PQexec(con,
+				 "select o.n, p.partstrat, pg_catalog.count(i.inhparent) "
+				 "from pg_catalog.pg_class as c "
+				 "join pg_catalog.pg_namespace as n on (n.oid = c.relnamespace) "
+				 "cross join lateral (select pg_catalog.array_position(pg_catalog.current_schemas(true), n.nspname)) as o(n) "
+				 "left join pg_catalog.pg_partitioned_table as p on (p.partrelid = c.oid) "
+				 "left join pg_catalog.pg_inherits as i on (c.oid = i.inhparent) "
+				 "where c.relname = 'pgbench_accounts' and o.n is not null "
+				 "group by 1, 2 "
+				 "order by 1 asc "
+				 "limit 1");
+
+	if (PQresultStatus(res) != PGRES_TUPLES_OK)
+	{
+		/* probably an older version, coldly assume no partitioning */
+		partition_method = PART_NONE;
+		partitions = 0;
+	}
+	else if (PQntuples(res) == 0)
+	{
+		/*
+		 * This case is unlikely as pgbench already found "pgbench_branches"
+		 * above to compute the scale.
+		 */
+		fprintf(stderr,
+				"no pgbench_accounts table found in search_path\n"
+				"Perhaps you need to do initialization (\"pgbench -i\") in database \"%s\"\n", PQdb(con));
+		exit(1);
+	}
+	else						/* PQntupes(res) == 1 */
+	{
+		/* normal case, extract partition information */
+		if (PQgetisnull(res, 0, 1))
+			partition_method = PART_NONE;
+		else
+		{
+			char	   *ps = PQgetvalue(res, 0, 1);
+
+			/* column must be there */
+			Assert(ps != NULL);
+
+			if (strcmp(ps, "r") == 0)
+				partition_method = PART_RANGE;
+			else if (strcmp(ps, "h") == 0)
+				partition_method = PART_HASH;
+			else
+			{
+				/* possibly a newer version with new partition method */
+				fprintf(stderr, "unexpected partition method: \"%s\"\n", ps);
+				exit(1);
+			}
+		}
+
+		partitions = atoi(PQgetvalue(res, 0, 2));
+	}
+
+	PQclear(res);
+}
+
+/*
  * Replace :param with $n throughout the command's SQL text, which
  * is a modifiable string in cmd->lines.
  */
@@ -4919,6 +5147,10 @@ printResults(StatsData *total, instr_time total_time,
 	printf("transaction type: %s\n",
 		   num_scripts == 1 ? sql_script[0].desc : "multiple scripts");
 	printf("scaling factor: %d\n", scale);
+	/* only print partitioning information if some partitioning was detected */
+	if (partition_method != PART_NONE)
+		printf("partition method: %s\npartitions: %d\n",
+			   PARTITION_METHOD[partition_method], partitions);
 	printf("query mode: %s\n", QUERYMODE[querymode]);
 	printf("number of clients: %d\n", nclients);
 	printf("number of threads: %d\n", nthreads);
@@ -5126,6 +5358,8 @@ main(int argc, char **argv)
 		{"foreign-keys", no_argument, NULL, 8},
 		{"random-seed", required_argument, NULL, 9},
 		{"show-script", required_argument, NULL, 10},
+		{"partitions", required_argument, NULL, 11},
+		{"partition-method", required_argument, NULL, 12},
 		{NULL, 0, NULL, 0}
 	};
 
@@ -5160,7 +5394,6 @@ main(int argc, char **argv)
 #endif
 
 	PGconn	   *con;
-	PGresult   *res;
 	char	   *env;
 
 	int			exit_code = 0;
@@ -5486,6 +5719,29 @@ main(int argc, char **argv)
 					exit(0);
 				}
 				break;
+			case 11:			/* partitions */
+				initialization_option_set = true;
+				partitions = atoi(optarg);
+				if (partitions < 0)
+				{
+					fprintf(stderr, "invalid number of partitions: \"%s\"\n",
+							optarg);
+					exit(1);
+				}
+				break;
+			case 12:			/* partition-method */
+				initialization_option_set = true;
+				if (pg_strcasecmp(optarg, "range") == 0)
+					partition_method = PART_RANGE;
+				else if (pg_strcasecmp(optarg, "hash") == 0)
+					partition_method = PART_HASH;
+				else
+				{
+					fprintf(stderr, "invalid partition method, expecting \"range\" or \"hash\","
+							" got: \"%s\"\n", optarg);
+					exit(1);
+				}
+				break;
 			default:
 				fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
 				exit(1);
@@ -5567,6 +5823,16 @@ main(int argc, char **argv)
 			exit(1);
 		}
 
+		if (partitions == 0 && partition_method != PART_NONE)
+		{
+			fprintf(stderr, "--partition-method requires greater than zero --partitions\n");
+			exit(1);
+		}
+
+		/* set default method */
+		if (partitions > 0 && partition_method == PART_NONE)
+			partition_method = PART_RANGE;
+
 		if (initialize_steps == NULL)
 			initialize_steps = pg_strdup(DEFAULT_INIT_STEPS);
 
@@ -5724,39 +5990,7 @@ main(int argc, char **argv)
 	}
 
 	if (internal_script_used)
-	{
-		/*
-		 * get the scaling factor that should be same as count(*) from
-		 * pgbench_branches if this is not a custom query
-		 */
-		res = PQexec(con, "select count(*) from pgbench_branches");
-		if (PQresultStatus(res) != PGRES_TUPLES_OK)
-		{
-			char	   *sqlState = PQresultErrorField(res, PG_DIAG_SQLSTATE);
-
-			fprintf(stderr, "%s", PQerrorMessage(con));
-			if (sqlState && strcmp(sqlState, ERRCODE_UNDEFINED_TABLE) == 0)
-			{
-				fprintf(stderr, "Perhaps you need to do initialization (\"pgbench -i\") in database \"%s\"\n", PQdb(con));
-			}
-
-			exit(1);
-		}
-		scale = atoi(PQgetvalue(res, 0, 0));
-		if (scale < 0)
-		{
-			fprintf(stderr, "invalid count(*) from pgbench_branches: \"%s\"\n",
-					PQgetvalue(res, 0, 0));
-			exit(1);
-		}
-		PQclear(res);
-
-		/* warn if we override user-given -s switch */
-		if (scale_given)
-			fprintf(stderr,
-					"scale option ignored, using count from pgbench_branches table (%d)\n",
-					scale);
-	}
+		GetTableInfo(con, scale_given);
 
 	/*
 	 * :scale variables normally get -s or database scale, but don't override
diff --git a/src/bin/pgbench/t/001_pgbench_with_server.pl b/src/bin/pgbench/t/001_pgbench_with_server.pl
index b82d3f6..c441626 100644
--- a/src/bin/pgbench/t/001_pgbench_with_server.pl
+++ b/src/bin/pgbench/t/001_pgbench_with_server.pl
@@ -58,6 +58,19 @@ sub pgbench
 	return;
 }
 
+# tablespace for testing, because partitioned tables cannot use pg_default
+# explicitly and we want to test that table creation with tablespace works
+# for partitioned tables.
+my $ts = $node->basedir . '/regress_pgbench_tap_1_ts_dir';
+mkdir $ts or die "cannot create directory $ts";
+# this takes care of WIN-specific path issues
+my $ets = TestLib::perl2host($ts);
+
+# the next commands will issue a syntax error if the path contains a "'"
+$node->safe_psql('postgres',
+       "CREATE TABLESPACE regress_pgbench_tap_1_ts LOCATION '$ets';"
+);
+
 # Test concurrent OID generation via pg_enum_oid_index.  This indirectly
 # exercises LWLock and spinlock concurrency.
 my $labels = join ',', map { "'l$_'" } 1 .. 1000;
@@ -100,12 +113,13 @@ pgbench(
 
 # Again, with all possible options
 pgbench(
-	'--initialize --init-steps=dtpvg --scale=1 --unlogged-tables --fillfactor=98 --foreign-keys --quiet --tablespace=pg_default --index-tablespace=pg_default',
+	'--initialize --init-steps=dtpvg --scale=1 --unlogged-tables --fillfactor=98 --foreign-keys --quiet --tablespace=regress_pgbench_tap_1_ts --index-tablespace=regress_pgbench_tap_1_ts --partitions=2 --partition-method=hash',
 	0,
 	[qr{^$}i],
 	[
 		qr{dropping old tables},
 		qr{creating tables},
+		qr{creating 2 partitions},
 		qr{vacuuming},
 		qr{creating primary keys},
 		qr{creating foreign keys},
@@ -116,12 +130,13 @@ pgbench(
 
 # Test interaction of --init-steps with legacy step-selection options
 pgbench(
-	'--initialize --init-steps=dtpvgvv --no-vacuum --foreign-keys --unlogged-tables',
+	'--initialize --init-steps=dtpvgvv --no-vacuum --foreign-keys --unlogged-tables --partitions=3',
 	0,
 	[qr{^$}],
 	[
 		qr{dropping old tables},
 		qr{creating tables},
+		qr{creating 3 partitions},
 		qr{creating primary keys},
 		qr{.* of .* tuples \(.*\) done},
 		qr{creating foreign keys},
@@ -910,5 +925,6 @@ check_pgbench_logs($bdir, '001_pgbench_log_3', 1, 10, 10,
 	qr{^\d \d{1,2} \d+ \d \d+ \d+$});
 
 # done
+$node->safe_psql('postgres', 'DROP TABLESPACE regress_pgbench_tap_1_ts');
 $node->stop;
 done_testing();
diff --git a/src/bin/pgbench/t/002_pgbench_no_server.pl b/src/bin/pgbench/t/002_pgbench_no_server.pl
index f7fa184..1e9542a 100644
--- a/src/bin/pgbench/t/002_pgbench_no_server.pl
+++ b/src/bin/pgbench/t/002_pgbench_no_server.pl
@@ -157,6 +157,13 @@ my @options = (
 			qr{error while setting random seed from --random-seed option}
 		]
 	],
+	[ 'bad partition type', '-i --partition-method=BAD', [qr{"range"}, qr{"hash"}, qr{"BAD"}] ],
+	[ 'bad partition number', '-i --partitions -1', [ qr{invalid number of partitions: "-1"} ] ],
+	[
+		'partition method without partitioning',
+		'-i --partition-method=hash',
+		[ qr{partition-method requires greater than zero --partitions} ]
+	],
 
 	# logging sub-options
 	[
-- 
1.8.3.1

#74

Fabien COELHO

coelho@cri.ensmp.fr

over 6 years ago

In reply to: Amit Kapila (#73)

Re: pgbench - allow to create partitioned tables

Hello Amit,

1. ran pgindent
2. As per Alvaro's suggestions move few function definitions.
3. Changed one or two comments and fixed spelling at one place.

Thanks for the improvements.

Not sure why you put "XXX - " in front of "append_fillfactor" comment,
though.

+ fprintf(stderr,
+ "no pgbench_accounts table found in search_path\n"
+ "Perhaps you need to do initialization (\"pgbench -i\") in database
\"%s\"\n", PQdb(con));

Can anyone else think of a better error message either in wording or
style for above case?

No better idea from me. The second part is a duplicate from a earlier
comment, when getting the scale fails.

--
Fabien.

#75

Amit Kapila

amit.kapila16@gmail.com

over 6 years ago

In reply to: Fabien COELHO (#74)

Re: pgbench - allow to create partitioned tables

On Tue, Oct 1, 2019 at 11:51 AM Fabien COELHO <coelho@cri.ensmp.fr> wrote:

Hello Amit,

1. ran pgindent
2. As per Alvaro's suggestions move few function definitions.
3. Changed one or two comments and fixed spelling at one place.

Thanks for the improvements.

Not sure why you put "XXX - " in front of "append_fillfactor" comment,
though.

It is to indicate that we can do this after further consideration.

+ fprintf(stderr,
+ "no pgbench_accounts table found in search_path\n"
+ "Perhaps you need to do initialization (\"pgbench -i\") in database
\"%s\"\n", PQdb(con));
Can anyone else think of a better error message either in wording or
style for above case?

No better idea from me. The second part is a duplicate from a earlier
comment, when getting the scale fails.

Yeah, I know that, but this doesn't look quite right. I mean to say
whatever we want to say via this message is correct, but I am not
completely happy with the display part. How about something like:
"pgbench_accounts is missing, you need to do initialization (\"pgbench
-i\") in database \"%s\"\n"? Feel free to propose something else on
similar lines? If possible, I want to convey this information in a single
sentence.

--
With Regards,
Amit Kapila.
EnterpriseDB: http://www.enterprisedb.com

#76

Rafia Sabih

rafia.pghackers@gmail.com

over 6 years ago

In reply to: Amit Kapila (#75)

Re: pgbench - allow to create partitioned tables

On Tue, 1 Oct 2019 at 15:39, Amit Kapila <amit.kapila16@gmail.com> wrote:

On Tue, Oct 1, 2019 at 11:51 AM Fabien COELHO <coelho@cri.ensmp.fr> wrote:

Hello Amit,

1. ran pgindent
2. As per Alvaro's suggestions move few function definitions.
3. Changed one or two comments and fixed spelling at one place.

Thanks for the improvements.

Not sure why you put "XXX - " in front of "append_fillfactor" comment,
though.

It is to indicate that we can do this after further consideration.
+ fprintf(stderr,
+ "no pgbench_accounts table found in search_path\n"
+ "Perhaps you need to do initialization (\"pgbench -i\") in database
\"%s\"\n", PQdb(con));
Can anyone else think of a better error message either in wording or
style for above case?

No better idea from me. The second part is a duplicate from a earlier
comment, when getting the scale fails.
Yeah, I know that, but this doesn't look quite right. I mean to say
whatever we want to say via this message is correct, but I am not
completely happy with the display part. How about something like:
"pgbench_accounts is missing, you need to do initialization (\"pgbench
-i\") in database \"%s\"\n"? Feel free to propose something else on
similar lines? If possible, I want to convey this information in a single
sentence.

How about, "pgbench_accounts is missing, initialize (\"pgbench -i\") in

database \"%s\"\n"?

--
Regards,
Rafia Sabih

#77

Fabien COELHO

coelho@cri.ensmp.fr

over 6 years ago

In reply to: Rafia Sabih (#76)

Re: pgbench - allow to create partitioned tables

Yeah, I know that, but this doesn't look quite right. I mean to say
whatever we want to say via this message is correct, but I am not
completely happy with the display part. How about something like:
"pgbench_accounts is missing, you need to do initialization (\"pgbench
-i\") in database \"%s\"\n"? Feel free to propose something else on
similar lines? If possible, I want to convey this information in a single
sentence.

How about, "pgbench_accounts is missing, initialize (\"pgbench -i\") in

database \"%s\"\n"?

I think that we should not presume too much about the solution: perhaps
the user did not specify the right database or host and it has nothing to
do with initialization.

Maybe something like:

"pgbench_accounts is missing, perhaps you need to initialize (\"pgbench
-i\") in database \"%s\"\n"

The two sentences approach has the logic of "error" and a separate "hint"
which is often used.

--
Fabien.

#78

Rafia Sabih

rafia.pghackers@gmail.com

over 6 years ago

In reply to: Fabien COELHO (#77)

Re: pgbench - allow to create partitioned tables

On Tue, 1 Oct 2019 at 16:48, Fabien COELHO <coelho@cri.ensmp.fr> wrote:

Yeah, I know that, but this doesn't look quite right. I mean to say
whatever we want to say via this message is correct, but I am not
completely happy with the display part. How about something like:
"pgbench_accounts is missing, you need to do initialization (\"pgbench
-i\") in database \"%s\"\n"? Feel free to propose something else on
similar lines? If possible, I want to convey this information in a

single

sentence.

How about, "pgbench_accounts is missing, initialize (\"pgbench -i\") in

database \"%s\"\n"?

I think that we should not presume too much about the solution: perhaps
the user did not specify the right database or host and it has nothing to
do with initialization.

Maybe something like:

"pgbench_accounts is missing, perhaps you need to initialize (\"pgbench
-i\") in database \"%s\"\n"

The two sentences approach has the logic of "error" and a separate "hint"
which is often used.

+1 for error and hint separation.

--
Regards,
Rafia Sabih

#79

Amit Kapila

amit.kapila16@gmail.com

over 6 years ago

In reply to: Rafia Sabih (#78)

Re: pgbench - allow to create partitioned tables

On Tue, Oct 1, 2019 at 8:45 PM Rafia Sabih <rafia.pghackers@gmail.com>
wrote:

On Tue, 1 Oct 2019 at 16:48, Fabien COELHO <coelho@cri.ensmp.fr> wrote:

Yeah, I know that, but this doesn't look quite right. I mean to say
whatever we want to say via this message is correct, but I am not
completely happy with the display part. How about something like:
"pgbench_accounts is missing, you need to do initialization (\"pgbench
-i\") in database \"%s\"\n"? Feel free to propose something else on
similar lines? If possible, I want to convey this information in a

single

sentence.

How about, "pgbench_accounts is missing, initialize (\"pgbench -i\") in

database \"%s\"\n"?

I think that we should not presume too much about the solution: perhaps
the user did not specify the right database or host and it has nothing to
do with initialization.

Maybe something like:

"pgbench_accounts is missing, perhaps you need to initialize (\"pgbench
-i\") in database \"%s\"\n"

The two sentences approach has the logic of "error" and a separate "hint"
which is often used.

+1 for error and hint separation.

Okay, if you people like the approach of two sentences for the separation
of "hint" and "error", then I think the second line should end with a
period. See below note in docs[1]https://www.postgresql.org/docs/devel/error-style-guide.html:
"Grammar and Punctuation

The rules are different for primary error messages and for detail/hint
messages:

Primary error messages: Do not capitalize the first letter. Do not end a
message with a period. Do not even think about ending a message with an
exclamation point.

Detail and hint messages: Use complete sentences, and end each with a
period. Capitalize the first word of sentences. Put two spaces after the
period if another sentence follows (for English text; might be
inappropriate in other languages)."

Also, the similar style is used in other places in code, see
contrib/oid2name/oid2name.c, contrib/pg_standby/pg_standby.c for similar
usage.

I shall modify this before commit unless you disagree.

[1]: https://www.postgresql.org/docs/devel/error-style-guide.html

--
With Regards,
Amit Kapila.
EnterpriseDB: http://www.enterprisedb.com

#80

Amit Kapila

amit.kapila16@gmail.com

over 6 years ago

In reply to: Amit Kapila (#73)

Re: pgbench - allow to create partitioned tables

On Tue, Oct 1, 2019 at 10:20 AM Amit Kapila <amit.kapila16@gmail.com> wrote:

On Mon, Sep 30, 2019 at 5:17 PM Fabien COELHO <coelho@cri.ensmp.fr> wrote:

I don't want to introduce a new pattern in tests which people can then
tomorrow copy at other places even though such code is not required.
OTOH, if there is a genuine need for the same, then I am fine.

Hmmm. The committer is right by definition. Here is a version without
escaping but with a comment instead.

Thanks, attached is a patch with minor modifications which I am
planning to push after one more round of review on Thursday morning
IST unless there are more comments by anyone else.

Pushed.

--
With Regards,
Amit Kapila.
EnterpriseDB: http://www.enterprisedb.com

#81

Fabien COELHO

coelho@cri.ensmp.fr

over 6 years ago

In reply to: Amit Kapila (#80)

Re: pgbench - allow to create partitioned tables

Thanks, attached is a patch with minor modifications which I am
planning to push after one more round of review on Thursday morning
IST unless there are more comments by anyone else.

Pushed.

Thanks!

--
Fabien.

#82

Ashutosh Sharma

ashu.coek88@gmail.com

over 6 years ago

In reply to: Fabien COELHO (#81)

Re: pgbench - allow to create partitioned tables

Hi Fabien, Amit,

I could see that when an invalid number of partitions is specified,
sometimes pgbench fails with an error "invalid number of partitions:
..." whereas many a times it doesn't, instead it creates number of
partitions that hasn't been specified by the user.

As partitions is an integer type variable, the maximum value it can
hold is "2147483647". But if I specify partitions as "3147483647",
atoi function returns a value lesser than zero and pgbench terminates
with an error. However, if the value for number of partitions
specified is something like "5147483647", atoi returns a non-negative
number and pgbench creates as many number of partitions as the value
returned by atoi function.

Have a look at the below examples,

[ashu@localhost bin]$ ./pgbench -i -s 10 --partitions=2147483647 postgres
dropping old tables...
creating tables...
creating 2147483647 partitions...
^C
[ashu@localhost bin]$ ./pgbench -i -s 10 --partitions=3147483647 postgres
invalid number of partitions: "3147483647"

[ashu@localhost bin]$ ./pgbench -i -s 10 --partitions=5147483647 postgres
dropping old tables...
creating tables...
creating 852516351 partitions...
^C

This seems like a problem with atoi function, isn't it?

atoi functions has been used at several places in pgbench script and I
can see similar behaviour for all. For e.g. it has been used with
scale factor and above observation is true for that as well. So, is
this a bug or you guys feel that it isn't and can be ignored? Please
let me know your thoughts on this. Thank you.

--
With Regards,
Ashutosh Sharma
EnterpriseDB:http://www.enterprisedb.com

Show quoted text

On Thu, Oct 3, 2019 at 10:30 AM Fabien COELHO <coelho@cri.ensmp.fr> wrote:

Thanks, attached is a patch with minor modifications which I am
planning to push after one more round of review on Thursday morning
IST unless there are more comments by anyone else.

Pushed.

Thanks!

--
Fabien.

#83

Fabien COELHO

coelho@cri.ensmp.fr

over 6 years ago

In reply to: Ashutosh Sharma (#82)

Re: pgbench - allow to create partitioned tables

Hello,

As partitions is an integer type variable, the maximum value it can
hold is "2147483647". But if I specify partitions as "3147483647",
atoi function returns a value lesser than zero and pgbench terminates
with an error. However, if the value for number of partitions
specified is something like "5147483647", atoi returns a non-negative
number and pgbench creates as many number of partitions as the value
returned by atoi function.

This seems like a problem with atoi function, isn't it?

Yes.

atoi functions has been used at several places in pgbench script and I
can see similar behaviour for all. For e.g. it has been used with
scale factor and above observation is true for that as well. So, is
this a bug or you guys feel that it isn't and can be ignored? Please
let me know your thoughts on this. Thank you.

I think that it is a known bug (as you noted atoi is used more or less
everywhere in pgbench and other commands) which shoud be addressed
separately: all integer user inputs should be validated for syntax and
overflow, everywhere, really. This is not currently the case, so I simply
replicated the current bad practice when developing this feature.

There is/was a current patch/discussion to improve integer parsing, which
could address this.

--
Fabien.

#84

Ashutosh Sharma

ashu.coek88@gmail.com

over 6 years ago

In reply to: Fabien COELHO (#83)

Re: pgbench - allow to create partitioned tables

On Thu, Oct 3, 2019 at 1:53 PM Fabien COELHO <coelho@cri.ensmp.fr> wrote:

Hello,

As partitions is an integer type variable, the maximum value it can
hold is "2147483647". But if I specify partitions as "3147483647",
atoi function returns a value lesser than zero and pgbench terminates
with an error. However, if the value for number of partitions
specified is something like "5147483647", atoi returns a non-negative
number and pgbench creates as many number of partitions as the value
returned by atoi function.

This seems like a problem with atoi function, isn't it?

Yes.

atoi functions has been used at several places in pgbench script and I
can see similar behaviour for all. For e.g. it has been used with
scale factor and above observation is true for that as well. So, is
this a bug or you guys feel that it isn't and can be ignored? Please
let me know your thoughts on this. Thank you.

I think that it is a known bug (as you noted atoi is used more or less
everywhere in pgbench and other commands) which shoud be addressed
separately: all integer user inputs should be validated for syntax and
overflow, everywhere, really. This is not currently the case, so I simply
replicated the current bad practice when developing this feature.

Okay, I think we should possibly replace atoi with strtol function
call for better error handling. It handles the erroneous inputs better
than atoi.

There is/was a current patch/discussion to improve integer parsing, which
could address this.

It seems like you are trying to point out the following discussion on hackers,

/messages/by-id/20190724040237.GB64205@begriffs.com

--
With Regards,
Ashutosh Sharma
EnterpriseDB:http://www.enterprisedb.com

#85

Peter Eisentraut

peter.eisentraut@2ndquadrant.com

about 6 years ago

In reply to: Amit Kapila (#73)

Re: pgbench - allow to create partitioned tables

The documentation and pgbench --help output that accompanied this patch
claims that the argument to pgbench --partition-method is optional and
defaults to "range", but that is not actually the case, as the
implementation requires an argument. Could you please sort this out?

Personally, I think making the argument optional is unnecessary and
confusing, so I'd just change the documentation.

--
Peter Eisentraut http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services

#86

Amit Kapila

amit.kapila16@gmail.com

about 6 years ago

In reply to: Peter Eisentraut (#85)

Re: pgbench - allow to create partitioned tables

On Fri, Jan 3, 2020 at 3:24 PM Peter Eisentraut
<peter.eisentraut@2ndquadrant.com> wrote:

The documentation and pgbench --help output that accompanied this patch
claims that the argument to pgbench --partition-method is optional and
defaults to "range", but that is not actually the case, as the
implementation requires an argument. Could you please sort this out?

AFAICS, if the user omits this argument, then the default is range as
specified in docs. I tried by using something like 'pgbench.exe -i -s
1 --partitions=2 postgres' and then run 'pgbench -S postgres'.

--
With Regards,
Amit Kapila.
EnterpriseDB: http://www.enterprisedb.com

#87

Peter Eisentraut

peter.eisentraut@2ndquadrant.com

about 6 years ago

In reply to: Amit Kapila (#86)

Re: pgbench - allow to create partitioned tables

On 2020-01-03 11:04, Amit Kapila wrote:

On Fri, Jan 3, 2020 at 3:24 PM Peter Eisentraut
<peter.eisentraut@2ndquadrant.com> wrote:

The documentation and pgbench --help output that accompanied this patch
claims that the argument to pgbench --partition-method is optional and
defaults to "range", but that is not actually the case, as the
implementation requires an argument. Could you please sort this out?

AFAICS, if the user omits this argument, then the default is range as
specified in docs. I tried by using something like 'pgbench.exe -i -s
1 --partitions=2 postgres' and then run 'pgbench -S postgres'.

Ah, the way I interpreted this is that the argument to
--partition-method itself is optional.

--
Peter Eisentraut http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services

#88

Fabien COELHO

coelho@cri.ensmp.fr

about 6 years ago

In reply to: Peter Eisentraut (#87)

1 attachment(s)

Re: pgbench - allow to create partitioned tables

Hello Peter,

The documentation and pgbench --help output that accompanied this patch
claims that the argument to pgbench --partition-method is optional and
defaults to "range", but that is not actually the case, as the
implementation requires an argument. Could you please sort this out?

AFAICS, if the user omits this argument, then the default is range as
specified in docs. I tried by using something like 'pgbench.exe -i -s
1 --partitions=2 postgres' and then run 'pgbench -S postgres'.

Ah, the way I interpreted this is that the argument to --partition-method
itself is optional.

Yep. Optionnal stuff would be in [], where () is used for choices.

Would the attached have improved your understanding? It is somehow more
consistent with other help lines.

--
Fabien.

Attachments:

pgbench-help-part-1.patchtext/x-diff; name=pgbench-help-part-1.patchDownload

diff --git a/src/bin/pgbench/pgbench.c b/src/bin/pgbench/pgbench.c
index a1e0663c8b..8d4f5f0866 100644
--- a/src/bin/pgbench/pgbench.c
+++ b/src/bin/pgbench/pgbench.c
@@ -639,7 +639,7 @@ usage(void)
 		   "  --index-tablespace=TABLESPACE\n"
 		   "                           create indexes in the specified tablespace\n"
 		   "  --partitions=NUM         partition pgbench_accounts in NUM parts (default: 0)\n"
-		   "  --partition-method=(range|hash)\n"
+		   "  --partition-method=range|hash\n"
 		   "                           partition pgbench_accounts with this method (default: range)\n"
 		   "  --tablespace=TABLESPACE  create tables in the specified tablespace\n"
 		   "  --unlogged-tables        create tables as unlogged tables\n"