*** a/contrib/pgbench/pgbench.c
--- b/contrib/pgbench/pgbench.c
***************
*** 106,111 **** extern int optind;
--- 106,114 ----
#define LOG_STEP_SECONDS 5 /* seconds between log messages */
#define DEFAULT_NXACTS 10 /* default nxacts */
+ #define MIN_GAUSSIAN_THRESHOLD 2.0 /* use gaussian distributed random generator */
+ #define MIN_EXPONENTIAL_THRESHOLD 2.0 /* use exponential distributed random generator */
+
int nxacts = 0; /* number of transactions per client */
int duration = 0; /* duration in seconds */
***************
*** 176,181 **** int progress_nthreads = 0; /* number of threads for progress report */
--- 179,188 ----
bool is_connect; /* establish connection for each transaction */
bool is_latencies; /* report per-command latencies */
int main_pid; /* main process id used in log filename */
+ double stdev_threshold = 5; /* standard deviation threshold using gaussian */
+ double exp_threshold = 5; /* */
+ bool gaussian_option = false; /* use gaussian distribution random generator */
+ bool exponential_option = false; /* use exponential distribution random generator */
char *pghost = "";
char *pgport = "";
***************
*** 338,346 **** static char *select_only = {
--- 345,436 ----
"SELECT abalance FROM pgbench_accounts WHERE aid = :aid;\n"
};
+ /* --exponential case */
+ static char *exponential_tpc_b = {
+ "\\set nbranches " CppAsString2(nbranches) " * :scale\n"
+ "\\set ntellers " CppAsString2(ntellers) " * :scale\n"
+ "\\set naccounts " CppAsString2(naccounts) " * :scale\n"
+ "\\setexponential aid 1 :naccounts :exp_threshold\n"
+ "\\setrandom bid 1 :nbranches\n"
+ "\\setrandom tid 1 :ntellers\n"
+ "\\setrandom delta -5000 5000\n"
+ "BEGIN;\n"
+ "UPDATE pgbench_accounts SET abalance = abalance + :delta WHERE aid = :aid;\n"
+ "SELECT abalance FROM pgbench_accounts WHERE aid = :aid;\n"
+ "UPDATE pgbench_tellers SET tbalance = tbalance + :delta WHERE tid = :tid;\n"
+ "UPDATE pgbench_branches SET bbalance = bbalance + :delta WHERE bid = :bid;\n"
+ "INSERT INTO pgbench_history (tid, bid, aid, delta, mtime) VALUES (:tid, :bid, :aid, :delta, CURRENT_TIMESTAMP);\n"
+ "END;\n"
+ };
+
+ /* --exponential with -N case */
+ static char *exponential_simple_update = {
+ "\\set nbranches " CppAsString2(nbranches) " * :scale\n"
+ "\\set ntellers " CppAsString2(ntellers) " * :scale\n"
+ "\\set naccounts " CppAsString2(naccounts) " * :scale\n"
+ "\\setexponential aid 1 :naccounts :exp_threshold\n"
+ "\\setrandom bid 1 :nbranches\n"
+ "\\setrandom tid 1 :ntellers\n"
+ "\\setrandom delta -5000 5000\n"
+ "BEGIN;\n"
+ "UPDATE pgbench_accounts SET abalance = abalance + :delta WHERE aid = :aid;\n"
+ "SELECT abalance FROM pgbench_accounts WHERE aid = :aid;\n"
+ "INSERT INTO pgbench_history (tid, bid, aid, delta, mtime) VALUES (:tid, :bid, :aid, :delta, CURRENT_TIMESTAMP);\n"
+ "END;\n"
+ };
+
+ /* --exponential with -S case */
+ static char *exponential_select_only = {
+ "\\set naccounts " CppAsString2(naccounts) " * :scale\n"
+ "\\setexponential aid 1 :naccounts :exp_threshold\n"
+ "SELECT abalance FROM pgbench_accounts WHERE aid = :aid;\n"
+ };
+
+ /* --gaussian case */
+ static char *gaussian_tpc_b = {
+ "\\set nbranches " CppAsString2(nbranches) " * :scale\n"
+ "\\set ntellers " CppAsString2(ntellers) " * :scale\n"
+ "\\set naccounts " CppAsString2(naccounts) " * :scale\n"
+ "\\setgaussian aid 1 :naccounts :stdev_threshold\n"
+ "\\setrandom bid 1 :nbranches\n"
+ "\\setrandom tid 1 :ntellers\n"
+ "\\setrandom delta -5000 5000\n"
+ "BEGIN;\n"
+ "UPDATE pgbench_accounts SET abalance = abalance + :delta WHERE aid = :aid;\n"
+ "SELECT abalance FROM pgbench_accounts WHERE aid = :aid;\n"
+ "UPDATE pgbench_tellers SET tbalance = tbalance + :delta WHERE tid = :tid;\n"
+ "UPDATE pgbench_branches SET bbalance = bbalance + :delta WHERE bid = :bid;\n"
+ "INSERT INTO pgbench_history (tid, bid, aid, delta, mtime) VALUES (:tid, :bid, :aid, :delta, CURRENT_TIMESTAMP);\n"
+ "END;\n"
+ };
+
+ /* --gaussian with -N case */
+ static char *gaussian_simple_update = {
+ "\\set nbranches " CppAsString2(nbranches) " * :scale\n"
+ "\\set ntellers " CppAsString2(ntellers) " * :scale\n"
+ "\\set naccounts " CppAsString2(naccounts) " * :scale\n"
+ "\\setgaussian aid 1 :naccounts :stdev_threshold\n"
+ "\\setrandom bid 1 :nbranches\n"
+ "\\setrandom tid 1 :ntellers\n"
+ "\\setrandom delta -5000 5000\n"
+ "BEGIN;\n"
+ "UPDATE pgbench_accounts SET abalance = abalance + :delta WHERE aid = :aid;\n"
+ "SELECT abalance FROM pgbench_accounts WHERE aid = :aid;\n"
+ "INSERT INTO pgbench_history (tid, bid, aid, delta, mtime) VALUES (:tid, :bid, :aid, :delta, CURRENT_TIMESTAMP);\n"
+ "END;\n"
+ };
+
+ /* --gaussian with -S case */
+ static char *gaussian_select_only = {
+ "\\set naccounts " CppAsString2(naccounts) " * :scale\n"
+ "\\setgaussian aid 1 :naccounts :stdev_threshold\n"
+ "SELECT abalance FROM pgbench_accounts WHERE aid = :aid;\n"
+ };
+
/* Function prototypes */
static void setalarm(int seconds);
static void *threadRun(void *arg);
+ static inline double sqrtd(const double x);
static void
usage(void)
***************
*** 381,386 **** usage(void)
--- 471,478 ----
" -v, --vacuum-all vacuum all four standard tables before tests\n"
" --aggregate-interval=NUM aggregate data over NUM seconds\n"
" --sampling-rate=NUM fraction of transactions to log (e.g. 0.01 for 1%%)\n"
+ " --exponential=NUM exponential distribution with NUM threshold parameter\n "
+ " --gaussian=NUM gaussian distribution with NUM standard deviation threshold\n"
"\nCommon options:\n"
" -d, --debug print debugging output\n"
" -h, --host=HOSTNAME database server host or socket directory\n"
***************
*** 477,482 **** getrand(TState *thread, int64 min, int64 max)
--- 569,670 ----
return min + (int64) ((max - min + 1) * pg_erand48(thread->random_state));
}
+ /* random number generator: exponential distribution from min to max inclusive */
+ static int64
+ getExponentialrand(TState *thread, int64 min, int64 max, double exp_threshold)
+ {
+ double rand;
+
+ /*
+ * Get user specified random number in this loop. This loop is executed until
+ * appeared ranged number we want. However, this loop could not almost go on,
+ * because min exp_threshold is 2, then the possibility of retry-loop is under
+ * 1 percent. Because -log(0.01) = 2.
+ */
+ do
+ {
+ /* normalization to (0,1] */
+ rand = 1.0 - pg_erand48(thread->random_state);
+ rand = -log(rand);
+ } while (rand > exp_threshold);
+
+ /* normalization to (0,1] */
+ rand = rand / exp_threshold;
+
+ /* return int64 random number within between min and max */
+ return min + (int64)((max - min + 1) * rand);
+ }
+
+ /* random number generator: gaussian distribution from min to max inclusive */
+ static int64
+ getGaussianrand(TState *thread, int64 min, int64 max, double stdev_threshold)
+ {
+ double stdev;
+ double rand;
+ double rand1;
+ static double rand2;
+ static double var_sqrt;
+ static bool reuse = false;
+
+ /*
+ * Get user specified random number(-stdev_threshold < stdev <= stdev_threshold)
+ * in this loop. This loop is executed until appeared ranged number we want.
+ * However, this loop could not almost go on, because min stdev_threshold is 2
+ * then the possibility of retry-loop is under 4 percent. And possibility of
+ * re-retry-loop is under 1.6 percent. And it doesn't happen frequentry even if
+ * we also think about the cycle of the trigonometric function.
+ */
+ do
+ {
+ /* reuse pre calculation result as possible */
+ if(!reuse)
+ {
+ /*
+ * pg_erand48 generates [0,1) random number. However rand1
+ * needs (0,1) random number because log(0) cannot calculate.
+ * And rand2 also needs (0,1) random number in strictly. But
+ * normalization cost is high and we can substitute (0,1] at
+ * rand1 and [0,1) at rand2, so we use approximate calculation.
+ */
+ rand1 = 1.0 - pg_erand48(thread->random_state);
+ rand2 = pg_erand48(thread->random_state);
+
+ /* Box-Muller transform */
+ var_sqrt = sqrtd(-2.0 * log(rand1));
+ stdev = var_sqrt * sin(2.0 * M_PI * rand2);
+ reuse = true;
+ }
+ else
+ {
+ stdev = var_sqrt * cos(2.0 * M_PI * rand2);
+ reuse = false;
+ }
+ } while (stdev < -stdev_threshold || stdev >= stdev_threshold);
+
+ /* normalization to [0,1) */
+ rand = (stdev + stdev_threshold) / (stdev_threshold * 2.0);
+
+ /* return int64 random number within between min and max */
+ return min + (int64)((max - min + 1) * rand);
+ }
+
+ /*
+ * fast sqrt algorithm: reference from Fast inverse square root algorithms.
+ */
+ static inline double
+ sqrtd(const double x)
+ {
+ double x_half = 0.5 * x;
+ long long int tmp = 0x5FE6EB50C7B537AAl - ( *(long long int*)&x >> 1);
+ double x_result = *(double*)&tmp;
+
+ x_result *= (1.5 - (x_half * x_result * x_result));
+ /* retry this calculation, it becomes higher precision at sqrt */
+ x_result *= (1.5 - (x_half * x_result * x_result));
+
+ return x_result * x;
+ }
+
/* call PQexec() and exit() on failure */
static void
executeStatement(PGconn *con, const char *sql)
***************
*** 1315,1325 **** top:
fprintf(stderr, "\n");
}
! if (pg_strcasecmp(argv[0], "setrandom") == 0)
{
char *var;
int64 min,
max;
char res[64];
if (*argv[2] == ':')
--- 1503,1516 ----
fprintf(stderr, "\n");
}
! if ((pg_strcasecmp(argv[0], "setrandom") == 0) ||
! (pg_strcasecmp(argv[0], "setgaussian") == 0) ||
! (pg_strcasecmp(argv[0], "setexponential") == 0))
{
char *var;
int64 min,
max;
+ double threshold = 0;
char res[64];
if (*argv[2] == ':')
***************
*** 1365,1375 **** top:
}
/*
! * getrand() needs to be able to subtract max from min and add one
! * to the result without overflowing. Since we know max > min, we
! * can detect overflow just by checking for a negative result. But
! * we must check both that the subtraction doesn't overflow, and
! * that adding one to the result doesn't overflow either.
*/
if (max - min < 0 || (max - min) + 1 < 0)
{
--- 1556,1566 ----
}
/*
! * Generate random number functions need to be able to subtract
! * max from min and add one to the result without overflowing.
! * Since we know max > min, we can detect overflow just by checking
! * for a negative result. But we must check both that the subtraction
! * doesn't overflow, and that adding one to the result doesn't overflow either.
*/
if (max - min < 0 || (max - min) + 1 < 0)
{
***************
*** 1378,1387 **** top:
return true;
}
#ifdef DEBUG
! printf("min: " INT64_FORMAT " max: " INT64_FORMAT " random: " INT64_FORMAT "\n", min, max, getrand(thread, min, max));
#endif
! snprintf(res, sizeof(res), INT64_FORMAT, getrand(thread, min, max));
if (!putVariable(st, argv[0], argv[1], res))
{
--- 1569,1623 ----
return true;
}
+ if (pg_strcasecmp(argv[0], "setrandom") == 0)
+ {
+ #ifdef DEBUG
+ printf("min: " INT64_FORMAT " max: " INT64_FORMAT " random: " INT64_FORMAT "\n", min, max, getrand(thread, min, max));
+ #endif
+ snprintf(res, sizeof(res), INT64_FORMAT, getrand(thread, min, max));
+ }
+ else if ((pg_strcasecmp(argv[0], "setgaussian") == 0) || (pg_strcasecmp(argv[0], "setexponential") == 0))
+ {
+ if(*argv[4] == ':')
+ {
+ if((var = getVariable(st, argv[4] + 1)) == NULL)
+ {
+ fprintf(stderr, "%s: invalid threshold number %s\n", argv[0], argv[4]);
+ st->ecnt++;
+ return true;
+ }
+ threshold = strtod(var, NULL);
+ }
+ else
+ threshold = strtod(argv[4], NULL);
+
+ if (pg_strcasecmp(argv[0], "setgaussian") == 0)
+ {
+ if (threshold < MIN_GAUSSIAN_THRESHOLD)
+ {
+ fprintf(stderr, "%s: gaussian threshold must be more than 2\n,", argv[4]);
+ st->ecnt++;
+ return true;
+ }
+ #ifdef DEBUG
+ printf("min: " INT64_FORMAT " max: " INT64_FORMAT " random: " INT64_FORMAT "\n", min, max, getGaussianrand(thread, min, max, threshold));
+ #endif
+ snprintf(res, sizeof(res), INT64_FORMAT, getGaussianrand(thread, min, max, threshold));
+ }
+ else if (pg_strcasecmp(argv[0], "setexponential") == 0)
+ {
+ if (threshold < MIN_EXPONENTIAL_THRESHOLD)
+ {
+ fprintf(stderr, "%s: exponential threshold must be more than 2\n,", argv[4]);
+ st->ecnt++;
+ return true;
+ }
#ifdef DEBUG
! printf("min: " INT64_FORMAT " max: " INT64_FORMAT " random: " INT64_FORMAT "\n", min, max, getExponentialrand(thread, min, max, threshold));
#endif
! snprintf(res, sizeof(res), INT64_FORMAT, getExponentialrand(thread, min, max, threshold));
! }
! }
if (!putVariable(st, argv[0], argv[1], res))
{
***************
*** 1915,1920 **** process_commands(char *buf)
--- 2151,2169 ----
fprintf(stderr, "%s: extra argument \"%s\" ignored\n",
my_commands->argv[0], my_commands->argv[j]);
}
+ else if ((pg_strcasecmp(my_commands->argv[0], "setgaussian") == 0) ||
+ (pg_strcasecmp(my_commands->argv[0], "setexponential") == 0))
+ {
+ if (my_commands->argc < 5)
+ {
+ fprintf(stderr, "%s: missing argument\n", my_commands->argv[0]);
+ exit(1);
+ }
+
+ for (j = 5; j < my_commands->argc; j++)
+ fprintf(stderr, "%s: extra argument \"%s\" ignored\n",
+ my_commands->argv[0], my_commands->argv[j]);
+ }
else if (pg_strcasecmp(my_commands->argv[0], "set") == 0)
{
if (my_commands->argc < 3)
***************
*** 2188,2203 **** printResults(int ttype, int normal_xacts, int nclients,
(INSTR_TIME_GET_DOUBLE(conn_total_time) / nthreads));
if (ttype == 0)
! s = "TPC-B (sort of)";
else if (ttype == 2)
! s = "Update only pgbench_accounts";
else if (ttype == 1)
! s = "SELECT only";
else
s = "Custom query";
printf("transaction type: %s\n", s);
printf("scaling factor: %d\n", scale);
printf("query mode: %s\n", QUERYMODE[querymode]);
printf("number of clients: %d\n", nclients);
printf("number of threads: %d\n", nthreads);
--- 2437,2483 ----
(INSTR_TIME_GET_DOUBLE(conn_total_time) / nthreads));
if (ttype == 0)
! {
! if (gaussian_option)
! s = "Gaussian distributed TPC-B (sort of)";
! else if (exponential_option)
! s = "Exponential distributed TPC-B (sort of)";
! else
! s = "TPC-B (sort of)";
! }
else if (ttype == 2)
! {
! if (gaussian_option)
! s = "Gaussian distributed update only pgbench_accounts";
! else if (exponential_option)
! s = "Exponential distributed update only pgbench_accounts";
! else
! s = "Update only pgbench_accounts";
! }
else if (ttype == 1)
! {
! if (gaussian_option)
! s = "Gaussian distributed SELECT only";
! else if (exponential_option)
! s = "Exponential distributed SELECT only";
! else
! s = "SELECT only";
! }
else
s = "Custom query";
printf("transaction type: %s\n", s);
printf("scaling factor: %d\n", scale);
+
+ /* output in only gaussian distributed benchmark */
+ if (gaussian_option)
+ {
+ printf("standard deviation threshold: %.5f\n", stdev_threshold);
+ printf("access probability of top 20%%, 10%% and 5%% records: %.5f %.5f %.5f\n",
+ (double) ((erf (stdev_threshold * 0.2 / sqrt(2.0))) / (erf (stdev_threshold / sqrt(2.0)))),
+ (double) ((erf (stdev_threshold * 0.1 / sqrt(2.0))) / (erf (stdev_threshold / sqrt(2.0)))),
+ (double) ((erf (stdev_threshold * 0.05 / sqrt(2.0))) / (erf (stdev_threshold / sqrt(2.0)))));
+ }
printf("query mode: %s\n", QUERYMODE[querymode]);
printf("number of clients: %d\n", nclients);
printf("number of threads: %d\n", nthreads);
***************
*** 2327,2332 **** main(int argc, char **argv)
--- 2607,2614 ----
{"unlogged-tables", no_argument, &unlogged_tables, 1},
{"sampling-rate", required_argument, NULL, 4},
{"aggregate-interval", required_argument, NULL, 5},
+ {"gaussian", required_argument, NULL, 6},
+ {"exponential", required_argument, NULL, 7},
{"rate", required_argument, NULL, 'R'},
{NULL, 0, NULL, 0}
};
***************
*** 2606,2611 **** main(int argc, char **argv)
--- 2888,2911 ----
}
#endif
break;
+ case 6:
+ gaussian_option = true;
+ stdev_threshold = atof(optarg);
+ if(stdev_threshold < MIN_GAUSSIAN_THRESHOLD)
+ {
+ fprintf(stderr, "--gaussian=NUM must be more than %f: %f\n", MIN_GAUSSIAN_THRESHOLD, stdev_threshold);
+ exit(1);
+ }
+ break;
+ case 7:
+ exponential_option = true;
+ exp_threshold = atof(optarg);
+ if(exp_threshold < MIN_EXPONENTIAL_THRESHOLD)
+ {
+ fprintf(stderr, "--exponential=NUM must be more than %f: %f\n", MIN_EXPONENTIAL_THRESHOLD, exp_threshold);
+ exit(1);
+ }
+ break;
default:
fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
exit(1);
***************
*** 2803,2808 **** main(int argc, char **argv)
--- 3103,3130 ----
}
}
+ /* set :stdev_threshold variable */
+ if(getVariable(&state[0], "stdev_threshold") == NULL)
+ {
+ snprintf(val, sizeof(val), "%lf", stdev_threshold);
+ for (i = 0; i < nclients; i++)
+ {
+ if (!putVariable(&state[i], "startup", "stdev_threshold", val))
+ exit(1);
+ }
+ }
+
+ /* set :exp_threshold variable */
+ if(getVariable(&state[0], "exp_threshold") == NULL)
+ {
+ snprintf(val, sizeof(val), "%lf", exp_threshold);
+ for (i = 0; i < nclients; i++)
+ {
+ if (!putVariable(&state[i], "startup", "exp_threshold", val))
+ exit(1);
+ }
+ }
+
if (!is_no_vacuum)
{
fprintf(stderr, "starting vacuum...");
***************
*** 2828,2844 **** main(int argc, char **argv)
switch (ttype)
{
case 0:
! sql_files[0] = process_builtin(tpc_b);
num_files = 1;
break;
case 1:
! sql_files[0] = process_builtin(select_only);
num_files = 1;
break;
case 2:
! sql_files[0] = process_builtin(simple_update);
num_files = 1;
break;
--- 3150,3181 ----
switch (ttype)
{
case 0:
! if (gaussian_option)
! sql_files[0] = process_builtin(gaussian_tpc_b);
! else if (exponential_option)
! sql_files[0] = process_builtin(exponential_tpc_b);
! else
! sql_files[0] = process_builtin(tpc_b);
num_files = 1;
break;
case 1:
! if (gaussian_option)
! sql_files[0] = process_builtin(gaussian_select_only);
! else if (exponential_option)
! sql_files[0] = process_builtin(exponential_select_only);
! else
! sql_files[0] = process_builtin(select_only);
num_files = 1;
break;
case 2:
! if (gaussian_option)
! sql_files[0] = process_builtin(gaussian_simple_update);
! else if (exponential_option)
! sql_files[0] = process_builtin(exponential_simple_update);
! else
! sql_files[0] = process_builtin(simple_update);
num_files = 1;
break;
*** a/doc/src/sgml/pgbench.sgml
--- b/doc/src/sgml/pgbench.sgml
***************
*** 320,325 **** pgbench options> dbname>
--- 320,359 ----
+ deviation threshold>
+
+
+ Exponential distribution pgbench option. Need the deviation threshold.
+ Deviation threshold can control distribution of access patern that
+ is used by aid in pgbench_accounts table. If we set larger deviation threshold,
+ pgbench access patern limited more specific records. On the other
+ hands, if you set smaller deviation threshold, pgbench access patern will be
+ more gently distribution. Deviation threshold must be higher than 2.
+ This rule is needed for realizing realistic calculation costs. If you add
+ '-N' or '-S' options, you can execute gaussian distribution pgbench in these
+ benchmarks.
+
+
+
+
+
+ standard deviation threshold>
+
+
+ Gaussian distribution pgbench option. Need the standard deviation threshold.
+ Standard deviation threshold can control distribution of access patern that
+ is used by aid in pgbench_accounts table. If we set larger standard deviation
+ threshold, pgbench access patern limited more specific records. On the other
+ hands, if you set smaller standard deviation threshold, pgbench access patern
+ will be more gently distribution. Standard deviation threshold must be higher
+ than 2. This rule is needed for realizing realistic calculation costs. If you
+ add '-N' or '-S' options, you can execute gaussian distribution pgbench in these
+ benchmarks.
+
+
+
+
+
threads>
threads>
***************
*** 770,775 **** pgbench options> dbname>
--- 804,863 ----
+ \setgaussian varname> min> max>
+ standard deviation threshold
+
+
+
+
+ Sets variable varname> to a gaussian random integer value
+ between the limits min> and max> inclusive.
+ Each limit can be either an integer constant or a
+ :>variablename> reference to a variable
+ having an integer value. Standard deviation threshold controls
+ distribution of access patern. If we set larger value at standard
+ deviation threshold, more frequentry access patern will be more
+ limited ranges. Min standard deviation threshold is 2. This rule
+ needs for realizing realistic calculation costs.
+
+
+
+ Example:
+
+ \setgaussian aid 1 :naccounts 5
+
+
+
+
+
+
+ \setexponential varname> min> max>
+ deviation threshold
+
+
+
+
+ Sets variable varname> to a exponential random integer value
+ between the limits min> and max> inclusive.
+ Each limit can be either an integer constant or a
+ :>variablename> reference to a variable
+ having an integer value. Deviation threshold controls distribution
+ of access patern. If we set larger value at deviation threshold,
+ more frequentry access patern will be more limited ranges. Min
+ deviation threshold is 2. This rule needs for realizing
+ realistic calculation costs.
+
+
+
+ Example:
+
+ \setexponential aid 1 :naccounts 5
+
+
+
+
+
+
\sleep number> [ us | ms | s ]