*** a/contrib/pgbench/pgbench.c --- b/contrib/pgbench/pgbench.c *************** *** 106,111 **** extern int optind; --- 106,114 ---- #define LOG_STEP_SECONDS 5 /* seconds between log messages */ #define DEFAULT_NXACTS 10 /* default nxacts */ + #define MIN_GAUSSIAN_THRESHOLD 2.0 /* use gaussian distributed random generator */ + #define MIN_EXPONENTIAL_THRESHOLD 2.0 /* use exponential distributed random generator */ + int nxacts = 0; /* number of transactions per client */ int duration = 0; /* duration in seconds */ *************** *** 176,181 **** int progress_nthreads = 0; /* number of threads for progress report */ --- 179,188 ---- bool is_connect; /* establish connection for each transaction */ bool is_latencies; /* report per-command latencies */ int main_pid; /* main process id used in log filename */ + double stdev_threshold = 5; /* standard deviation threshold using gaussian */ + double exp_threshold = 5; /* */ + bool gaussian_option = false; /* use gaussian distribution random generator */ + bool exponential_option = false; /* use exponential distribution random generator */ char *pghost = ""; char *pgport = ""; *************** *** 338,346 **** static char *select_only = { --- 345,436 ---- "SELECT abalance FROM pgbench_accounts WHERE aid = :aid;\n" }; + /* --exponential case */ + static char *exponential_tpc_b = { + "\\set nbranches " CppAsString2(nbranches) " * :scale\n" + "\\set ntellers " CppAsString2(ntellers) " * :scale\n" + "\\set naccounts " CppAsString2(naccounts) " * :scale\n" + "\\setexponential aid 1 :naccounts :exp_threshold\n" + "\\setrandom bid 1 :nbranches\n" + "\\setrandom tid 1 :ntellers\n" + "\\setrandom delta -5000 5000\n" + "BEGIN;\n" + "UPDATE pgbench_accounts SET abalance = abalance + :delta WHERE aid = :aid;\n" + "SELECT abalance FROM pgbench_accounts WHERE aid = :aid;\n" + "UPDATE pgbench_tellers SET tbalance = tbalance + :delta WHERE tid = :tid;\n" + "UPDATE pgbench_branches SET bbalance = bbalance + :delta WHERE bid = :bid;\n" + "INSERT INTO pgbench_history (tid, bid, aid, delta, mtime) VALUES (:tid, :bid, :aid, :delta, CURRENT_TIMESTAMP);\n" + "END;\n" + }; + + /* --exponential with -N case */ + static char *exponential_simple_update = { + "\\set nbranches " CppAsString2(nbranches) " * :scale\n" + "\\set ntellers " CppAsString2(ntellers) " * :scale\n" + "\\set naccounts " CppAsString2(naccounts) " * :scale\n" + "\\setexponential aid 1 :naccounts :exp_threshold\n" + "\\setrandom bid 1 :nbranches\n" + "\\setrandom tid 1 :ntellers\n" + "\\setrandom delta -5000 5000\n" + "BEGIN;\n" + "UPDATE pgbench_accounts SET abalance = abalance + :delta WHERE aid = :aid;\n" + "SELECT abalance FROM pgbench_accounts WHERE aid = :aid;\n" + "INSERT INTO pgbench_history (tid, bid, aid, delta, mtime) VALUES (:tid, :bid, :aid, :delta, CURRENT_TIMESTAMP);\n" + "END;\n" + }; + + /* --exponential with -S case */ + static char *exponential_select_only = { + "\\set naccounts " CppAsString2(naccounts) " * :scale\n" + "\\setexponential aid 1 :naccounts :exp_threshold\n" + "SELECT abalance FROM pgbench_accounts WHERE aid = :aid;\n" + }; + + /* --gaussian case */ + static char *gaussian_tpc_b = { + "\\set nbranches " CppAsString2(nbranches) " * :scale\n" + "\\set ntellers " CppAsString2(ntellers) " * :scale\n" + "\\set naccounts " CppAsString2(naccounts) " * :scale\n" + "\\setgaussian aid 1 :naccounts :stdev_threshold\n" + "\\setrandom bid 1 :nbranches\n" + "\\setrandom tid 1 :ntellers\n" + "\\setrandom delta -5000 5000\n" + "BEGIN;\n" + "UPDATE pgbench_accounts SET abalance = abalance + :delta WHERE aid = :aid;\n" + "SELECT abalance FROM pgbench_accounts WHERE aid = :aid;\n" + "UPDATE pgbench_tellers SET tbalance = tbalance + :delta WHERE tid = :tid;\n" + "UPDATE pgbench_branches SET bbalance = bbalance + :delta WHERE bid = :bid;\n" + "INSERT INTO pgbench_history (tid, bid, aid, delta, mtime) VALUES (:tid, :bid, :aid, :delta, CURRENT_TIMESTAMP);\n" + "END;\n" + }; + + /* --gaussian with -N case */ + static char *gaussian_simple_update = { + "\\set nbranches " CppAsString2(nbranches) " * :scale\n" + "\\set ntellers " CppAsString2(ntellers) " * :scale\n" + "\\set naccounts " CppAsString2(naccounts) " * :scale\n" + "\\setgaussian aid 1 :naccounts :stdev_threshold\n" + "\\setrandom bid 1 :nbranches\n" + "\\setrandom tid 1 :ntellers\n" + "\\setrandom delta -5000 5000\n" + "BEGIN;\n" + "UPDATE pgbench_accounts SET abalance = abalance + :delta WHERE aid = :aid;\n" + "SELECT abalance FROM pgbench_accounts WHERE aid = :aid;\n" + "INSERT INTO pgbench_history (tid, bid, aid, delta, mtime) VALUES (:tid, :bid, :aid, :delta, CURRENT_TIMESTAMP);\n" + "END;\n" + }; + + /* --gaussian with -S case */ + static char *gaussian_select_only = { + "\\set naccounts " CppAsString2(naccounts) " * :scale\n" + "\\setgaussian aid 1 :naccounts :stdev_threshold\n" + "SELECT abalance FROM pgbench_accounts WHERE aid = :aid;\n" + }; + /* Function prototypes */ static void setalarm(int seconds); static void *threadRun(void *arg); + static inline double sqrtd(const double x); static void usage(void) *************** *** 381,386 **** usage(void) --- 471,478 ---- " -v, --vacuum-all vacuum all four standard tables before tests\n" " --aggregate-interval=NUM aggregate data over NUM seconds\n" " --sampling-rate=NUM fraction of transactions to log (e.g. 0.01 for 1%%)\n" + " --exponential=NUM exponential distribution with NUM threshold parameter\n " + " --gaussian=NUM gaussian distribution with NUM standard deviation threshold\n" "\nCommon options:\n" " -d, --debug print debugging output\n" " -h, --host=HOSTNAME database server host or socket directory\n" *************** *** 477,482 **** getrand(TState *thread, int64 min, int64 max) --- 569,670 ---- return min + (int64) ((max - min + 1) * pg_erand48(thread->random_state)); } + /* random number generator: exponential distribution from min to max inclusive */ + static int64 + getExponentialrand(TState *thread, int64 min, int64 max, double exp_threshold) + { + double rand; + + /* + * Get user specified random number in this loop. This loop is executed until + * appeared ranged number we want. However, this loop could not almost go on, + * because min exp_threshold is 2, then the possibility of retry-loop is under + * 1 percent. Because -log(0.01) = 2. + */ + do + { + /* normalization to (0,1] */ + rand = 1.0 - pg_erand48(thread->random_state); + rand = -log(rand); + } while (rand > exp_threshold); + + /* normalization to (0,1] */ + rand = rand / exp_threshold; + + /* return int64 random number within between min and max */ + return min + (int64)((max - min + 1) * rand); + } + + /* random number generator: gaussian distribution from min to max inclusive */ + static int64 + getGaussianrand(TState *thread, int64 min, int64 max, double stdev_threshold) + { + double stdev; + double rand; + double rand1; + static double rand2; + static double var_sqrt; + static bool reuse = false; + + /* + * Get user specified random number(-stdev_threshold < stdev <= stdev_threshold) + * in this loop. This loop is executed until appeared ranged number we want. + * However, this loop could not almost go on, because min stdev_threshold is 2 + * then the possibility of retry-loop is under 4 percent. And possibility of + * re-retry-loop is under 1.6 percent. And it doesn't happen frequentry even if + * we also think about the cycle of the trigonometric function. + */ + do + { + /* reuse pre calculation result as possible */ + if(!reuse) + { + /* + * pg_erand48 generates [0,1) random number. However rand1 + * needs (0,1) random number because log(0) cannot calculate. + * And rand2 also needs (0,1) random number in strictly. But + * normalization cost is high and we can substitute (0,1] at + * rand1 and [0,1) at rand2, so we use approximate calculation. + */ + rand1 = 1.0 - pg_erand48(thread->random_state); + rand2 = pg_erand48(thread->random_state); + + /* Box-Muller transform */ + var_sqrt = sqrtd(-2.0 * log(rand1)); + stdev = var_sqrt * sin(2.0 * M_PI * rand2); + reuse = true; + } + else + { + stdev = var_sqrt * cos(2.0 * M_PI * rand2); + reuse = false; + } + } while (stdev < -stdev_threshold || stdev >= stdev_threshold); + + /* normalization to [0,1) */ + rand = (stdev + stdev_threshold) / (stdev_threshold * 2.0); + + /* return int64 random number within between min and max */ + return min + (int64)((max - min + 1) * rand); + } + + /* + * fast sqrt algorithm: reference from Fast inverse square root algorithms. + */ + static inline double + sqrtd(const double x) + { + double x_half = 0.5 * x; + long long int tmp = 0x5FE6EB50C7B537AAl - ( *(long long int*)&x >> 1); + double x_result = *(double*)&tmp; + + x_result *= (1.5 - (x_half * x_result * x_result)); + /* retry this calculation, it becomes higher precision at sqrt */ + x_result *= (1.5 - (x_half * x_result * x_result)); + + return x_result * x; + } + /* call PQexec() and exit() on failure */ static void executeStatement(PGconn *con, const char *sql) *************** *** 1315,1325 **** top: fprintf(stderr, "\n"); } ! if (pg_strcasecmp(argv[0], "setrandom") == 0) { char *var; int64 min, max; char res[64]; if (*argv[2] == ':') --- 1503,1516 ---- fprintf(stderr, "\n"); } ! if ((pg_strcasecmp(argv[0], "setrandom") == 0) || ! (pg_strcasecmp(argv[0], "setgaussian") == 0) || ! (pg_strcasecmp(argv[0], "setexponential") == 0)) { char *var; int64 min, max; + double threshold = 0; char res[64]; if (*argv[2] == ':') *************** *** 1365,1375 **** top: } /* ! * getrand() needs to be able to subtract max from min and add one ! * to the result without overflowing. Since we know max > min, we ! * can detect overflow just by checking for a negative result. But ! * we must check both that the subtraction doesn't overflow, and ! * that adding one to the result doesn't overflow either. */ if (max - min < 0 || (max - min) + 1 < 0) { --- 1556,1566 ---- } /* ! * Generate random number functions need to be able to subtract ! * max from min and add one to the result without overflowing. ! * Since we know max > min, we can detect overflow just by checking ! * for a negative result. But we must check both that the subtraction ! * doesn't overflow, and that adding one to the result doesn't overflow either. */ if (max - min < 0 || (max - min) + 1 < 0) { *************** *** 1378,1387 **** top: return true; } #ifdef DEBUG ! printf("min: " INT64_FORMAT " max: " INT64_FORMAT " random: " INT64_FORMAT "\n", min, max, getrand(thread, min, max)); #endif ! snprintf(res, sizeof(res), INT64_FORMAT, getrand(thread, min, max)); if (!putVariable(st, argv[0], argv[1], res)) { --- 1569,1623 ---- return true; } + if (pg_strcasecmp(argv[0], "setrandom") == 0) + { + #ifdef DEBUG + printf("min: " INT64_FORMAT " max: " INT64_FORMAT " random: " INT64_FORMAT "\n", min, max, getrand(thread, min, max)); + #endif + snprintf(res, sizeof(res), INT64_FORMAT, getrand(thread, min, max)); + } + else if ((pg_strcasecmp(argv[0], "setgaussian") == 0) || (pg_strcasecmp(argv[0], "setexponential") == 0)) + { + if(*argv[4] == ':') + { + if((var = getVariable(st, argv[4] + 1)) == NULL) + { + fprintf(stderr, "%s: invalid threshold number %s\n", argv[0], argv[4]); + st->ecnt++; + return true; + } + threshold = strtod(var, NULL); + } + else + threshold = strtod(argv[4], NULL); + + if (pg_strcasecmp(argv[0], "setgaussian") == 0) + { + if (threshold < MIN_GAUSSIAN_THRESHOLD) + { + fprintf(stderr, "%s: gaussian threshold must be more than 2\n,", argv[4]); + st->ecnt++; + return true; + } + #ifdef DEBUG + printf("min: " INT64_FORMAT " max: " INT64_FORMAT " random: " INT64_FORMAT "\n", min, max, getGaussianrand(thread, min, max, threshold)); + #endif + snprintf(res, sizeof(res), INT64_FORMAT, getGaussianrand(thread, min, max, threshold)); + } + else if (pg_strcasecmp(argv[0], "setexponential") == 0) + { + if (threshold < MIN_EXPONENTIAL_THRESHOLD) + { + fprintf(stderr, "%s: exponential threshold must be more than 2\n,", argv[4]); + st->ecnt++; + return true; + } #ifdef DEBUG ! printf("min: " INT64_FORMAT " max: " INT64_FORMAT " random: " INT64_FORMAT "\n", min, max, getExponentialrand(thread, min, max, threshold)); #endif ! snprintf(res, sizeof(res), INT64_FORMAT, getExponentialrand(thread, min, max, threshold)); ! } ! } if (!putVariable(st, argv[0], argv[1], res)) { *************** *** 1915,1920 **** process_commands(char *buf) --- 2151,2169 ---- fprintf(stderr, "%s: extra argument \"%s\" ignored\n", my_commands->argv[0], my_commands->argv[j]); } + else if ((pg_strcasecmp(my_commands->argv[0], "setgaussian") == 0) || + (pg_strcasecmp(my_commands->argv[0], "setexponential") == 0)) + { + if (my_commands->argc < 5) + { + fprintf(stderr, "%s: missing argument\n", my_commands->argv[0]); + exit(1); + } + + for (j = 5; j < my_commands->argc; j++) + fprintf(stderr, "%s: extra argument \"%s\" ignored\n", + my_commands->argv[0], my_commands->argv[j]); + } else if (pg_strcasecmp(my_commands->argv[0], "set") == 0) { if (my_commands->argc < 3) *************** *** 2188,2203 **** printResults(int ttype, int normal_xacts, int nclients, (INSTR_TIME_GET_DOUBLE(conn_total_time) / nthreads)); if (ttype == 0) ! s = "TPC-B (sort of)"; else if (ttype == 2) ! s = "Update only pgbench_accounts"; else if (ttype == 1) ! s = "SELECT only"; else s = "Custom query"; printf("transaction type: %s\n", s); printf("scaling factor: %d\n", scale); printf("query mode: %s\n", QUERYMODE[querymode]); printf("number of clients: %d\n", nclients); printf("number of threads: %d\n", nthreads); --- 2437,2483 ---- (INSTR_TIME_GET_DOUBLE(conn_total_time) / nthreads)); if (ttype == 0) ! { ! if (gaussian_option) ! s = "Gaussian distributed TPC-B (sort of)"; ! else if (exponential_option) ! s = "Exponential distributed TPC-B (sort of)"; ! else ! s = "TPC-B (sort of)"; ! } else if (ttype == 2) ! { ! if (gaussian_option) ! s = "Gaussian distributed update only pgbench_accounts"; ! else if (exponential_option) ! s = "Exponential distributed update only pgbench_accounts"; ! else ! s = "Update only pgbench_accounts"; ! } else if (ttype == 1) ! { ! if (gaussian_option) ! s = "Gaussian distributed SELECT only"; ! else if (exponential_option) ! s = "Exponential distributed SELECT only"; ! else ! s = "SELECT only"; ! } else s = "Custom query"; printf("transaction type: %s\n", s); printf("scaling factor: %d\n", scale); + + /* output in only gaussian distributed benchmark */ + if (gaussian_option) + { + printf("standard deviation threshold: %.5f\n", stdev_threshold); + printf("access probability of top 20%%, 10%% and 5%% records: %.5f %.5f %.5f\n", + (double) ((erf (stdev_threshold * 0.2 / sqrt(2.0))) / (erf (stdev_threshold / sqrt(2.0)))), + (double) ((erf (stdev_threshold * 0.1 / sqrt(2.0))) / (erf (stdev_threshold / sqrt(2.0)))), + (double) ((erf (stdev_threshold * 0.05 / sqrt(2.0))) / (erf (stdev_threshold / sqrt(2.0))))); + } printf("query mode: %s\n", QUERYMODE[querymode]); printf("number of clients: %d\n", nclients); printf("number of threads: %d\n", nthreads); *************** *** 2327,2332 **** main(int argc, char **argv) --- 2607,2614 ---- {"unlogged-tables", no_argument, &unlogged_tables, 1}, {"sampling-rate", required_argument, NULL, 4}, {"aggregate-interval", required_argument, NULL, 5}, + {"gaussian", required_argument, NULL, 6}, + {"exponential", required_argument, NULL, 7}, {"rate", required_argument, NULL, 'R'}, {NULL, 0, NULL, 0} }; *************** *** 2606,2611 **** main(int argc, char **argv) --- 2888,2911 ---- } #endif break; + case 6: + gaussian_option = true; + stdev_threshold = atof(optarg); + if(stdev_threshold < MIN_GAUSSIAN_THRESHOLD) + { + fprintf(stderr, "--gaussian=NUM must be more than %f: %f\n", MIN_GAUSSIAN_THRESHOLD, stdev_threshold); + exit(1); + } + break; + case 7: + exponential_option = true; + exp_threshold = atof(optarg); + if(exp_threshold < MIN_EXPONENTIAL_THRESHOLD) + { + fprintf(stderr, "--exponential=NUM must be more than %f: %f\n", MIN_EXPONENTIAL_THRESHOLD, exp_threshold); + exit(1); + } + break; default: fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname); exit(1); *************** *** 2803,2808 **** main(int argc, char **argv) --- 3103,3130 ---- } } + /* set :stdev_threshold variable */ + if(getVariable(&state[0], "stdev_threshold") == NULL) + { + snprintf(val, sizeof(val), "%lf", stdev_threshold); + for (i = 0; i < nclients; i++) + { + if (!putVariable(&state[i], "startup", "stdev_threshold", val)) + exit(1); + } + } + + /* set :exp_threshold variable */ + if(getVariable(&state[0], "exp_threshold") == NULL) + { + snprintf(val, sizeof(val), "%lf", exp_threshold); + for (i = 0; i < nclients; i++) + { + if (!putVariable(&state[i], "startup", "exp_threshold", val)) + exit(1); + } + } + if (!is_no_vacuum) { fprintf(stderr, "starting vacuum..."); *************** *** 2828,2844 **** main(int argc, char **argv) switch (ttype) { case 0: ! sql_files[0] = process_builtin(tpc_b); num_files = 1; break; case 1: ! sql_files[0] = process_builtin(select_only); num_files = 1; break; case 2: ! sql_files[0] = process_builtin(simple_update); num_files = 1; break; --- 3150,3181 ---- switch (ttype) { case 0: ! if (gaussian_option) ! sql_files[0] = process_builtin(gaussian_tpc_b); ! else if (exponential_option) ! sql_files[0] = process_builtin(exponential_tpc_b); ! else ! sql_files[0] = process_builtin(tpc_b); num_files = 1; break; case 1: ! if (gaussian_option) ! sql_files[0] = process_builtin(gaussian_select_only); ! else if (exponential_option) ! sql_files[0] = process_builtin(exponential_select_only); ! else ! sql_files[0] = process_builtin(select_only); num_files = 1; break; case 2: ! if (gaussian_option) ! sql_files[0] = process_builtin(gaussian_simple_update); ! else if (exponential_option) ! sql_files[0] = process_builtin(exponential_simple_update); ! else ! sql_files[0] = process_builtin(simple_update); num_files = 1; break; *** a/doc/src/sgml/pgbench.sgml --- b/doc/src/sgml/pgbench.sgml *************** *** 320,325 **** pgbench options dbname --- 320,359 ---- + deviation threshold + + + Exponential distribution pgbench option. Need the deviation threshold. + Deviation threshold can control distribution of access patern that + is used by aid in pgbench_accounts table. If we set larger deviation threshold, + pgbench access patern limited more specific records. On the other + hands, if you set smaller deviation threshold, pgbench access patern will be + more gently distribution. Deviation threshold must be higher than 2. + This rule is needed for realizing realistic calculation costs. If you add + '-N' or '-S' options, you can execute gaussian distribution pgbench in these + benchmarks. + + + + + + standard deviation threshold + + + Gaussian distribution pgbench option. Need the standard deviation threshold. + Standard deviation threshold can control distribution of access patern that + is used by aid in pgbench_accounts table. If we set larger standard deviation + threshold, pgbench access patern limited more specific records. On the other + hands, if you set smaller standard deviation threshold, pgbench access patern + will be more gently distribution. Standard deviation threshold must be higher + than 2. This rule is needed for realizing realistic calculation costs. If you + add '-N' or '-S' options, you can execute gaussian distribution pgbench in these + benchmarks. + + + + + threads threads *************** *** 770,775 **** pgbench options dbname --- 804,863 ---- + \setgaussian varname min max + standard deviation threshold + + + + + Sets variable varname to a gaussian random integer value + between the limits min and max inclusive. + Each limit can be either an integer constant or a + :variablename reference to a variable + having an integer value. Standard deviation threshold controls + distribution of access patern. If we set larger value at standard + deviation threshold, more frequentry access patern will be more + limited ranges. Min standard deviation threshold is 2. This rule + needs for realizing realistic calculation costs. + + + + Example: + + \setgaussian aid 1 :naccounts 5 + + + + + + + \setexponential varname min max + deviation threshold + + + + + Sets variable varname to a exponential random integer value + between the limits min and max inclusive. + Each limit can be either an integer constant or a + :variablename reference to a variable + having an integer value. Deviation threshold controls distribution + of access patern. If we set larger value at deviation threshold, + more frequentry access patern will be more limited ranges. Min + deviation threshold is 2. This rule needs for realizing + realistic calculation costs. + + + + Example: + + \setexponential aid 1 :naccounts 5 + + + + + + \sleep number [ us | ms | s ]