diff --git a/doc/src/sgml/ref/pgbench.sgml b/doc/src/sgml/ref/pgbench.sgml index 64b043b48a..1dea6e4b17 100644 --- a/doc/src/sgml/ref/pgbench.sgml +++ b/doc/src/sgml/ref/pgbench.sgml @@ -1014,6 +1014,14 @@ pgbench options dbname an integer between 1 and 10 + random_zipfian(lb, ub, parameter) + integer + Zipfian-distributed random integer in [lb, ub], + see below + random_zipfian(1, 10, 1.2) + an integer between 1 and 10 + + sqrt(x) double square root @@ -1094,6 +1102,24 @@ f(x) = PHI(2.0 * parameter * (x - mu) / (max - min + 1)) / of the Box-Muller transform. + + + + For Zipfian distribution, parameter + defines how skewed the distribution. A physical sense of this parameter + is following: 1 generated N times, + 2 generated 2 ^ parameter less, + 3 generated 3 ^ parameter less, ... + X generated X ^ parameter less times. + + + Intuitively, the larger the parameter, the more + frequently value values to the beginning of the interval are drawn. + The closer to 0 parameter is, + the flatter (more uniform) the access distribution. + + + diff --git a/src/bin/pgbench/exprparse.y b/src/bin/pgbench/exprparse.y index b3a2d9bfd3..25d5ad48e5 100644 --- a/src/bin/pgbench/exprparse.y +++ b/src/bin/pgbench/exprparse.y @@ -191,6 +191,9 @@ static const struct { "random_exponential", 3, PGBENCH_RANDOM_EXPONENTIAL }, + { + "random_zipfian", 3, PGBENCH_RANDOM_ZIPFIAN + }, /* keep as last array element */ { NULL, 0, 0 diff --git a/src/bin/pgbench/pgbench.c b/src/bin/pgbench/pgbench.c index 4d364a1db4..9e515abe9b 100644 --- a/src/bin/pgbench/pgbench.c +++ b/src/bin/pgbench/pgbench.c @@ -94,6 +94,7 @@ static int pthread_join(pthread_t th, void **thread_return); #define DEFAULT_NXACTS 10 /* default nxacts */ #define MIN_GAUSSIAN_PARAM 2.0 /* minimum parameter for gauss */ +#define MIN_ZIPFIAN_PARAM 1.0000001 /* minimum parameter for zipfian */ int nxacts = 0; /* number of transactions per client */ int duration = 0; /* duration in seconds */ @@ -184,6 +185,14 @@ char *dbName; char *logfile_prefix = NULL; const char *progname; +/* Variables used in zipfian_random */ +double zipf_zetan; /* zeta(n) */ +double zipf_prev_theta; /* theta parameter of previous execution */ +double zipf_prev_nitems; /* n(ub - lb + 1) parameter of previous execution */ +double zipf_alpha; +double zipf_beta; +double zipf_eta; + #define WSEP '@' /* weight separator */ volatile bool timer_exceeded = false; /* flag from signal handler */ @@ -737,6 +746,58 @@ getPoissonRand(TState *thread, int64 center) return (int64) (-log(uniform) * ((double) center) + 0.5); } +/* helper function for getZipfianRand */ +static double +zeta(int64 n, double theta) +{ + int i; + double ans = 0.0; + + for (i = 1; i <= n; i++) + ans += pow(1. / (double)i, theta); + return (ans); +} + +/* helper function for getZipfianRand. Computes random variable in range 1..n */ +static int64 +zipfn(TState *thread, int64 n, double theta) +{ + double u = pg_erand48(thread->random_state); + double uz = u * zipf_zetan; + + if (uz < 1.) + return 1; + if (uz < 1 + zipf_beta) + return 2; + return 1 + (int64)(n * pow(zipf_eta * u - zipf_eta + 1., zipf_alpha)); +} + +/* random number generator: zipfian distribution from min to max inclusive */ +static int64 +getZipfianRand(TState *thread, int64 min, int64 max, double theta) +{ + Assert(theta > MIN_ZIPFIAN_PARAM); + + int64 n = max - min + 1; + double zipf_zeta2; + + /* update cached variables if current parameters are different from previous */ + if (n != zipf_prev_nitems || theta != zipf_prev_theta) + { + zipf_prev_theta = theta; + zipf_prev_nitems = n; + + zipf_zeta2 = zeta(2, theta); + zipf_zetan = zeta(n, theta); + + zipf_alpha = 1. / (1 - theta); + zipf_beta = pow(0.5, theta); + zipf_eta = (1. - pow(2. / n, 1 - theta)) / (1. - zipf_zeta2 / zipf_zetan); + } + + return min + zipfn(thread, n, theta) - 1; +} + /* * Initialize the given SimpleStats struct to all zeroes */ @@ -1567,6 +1628,7 @@ evalFunc(TState *thread, CState *st, case PGBENCH_RANDOM: case PGBENCH_RANDOM_EXPONENTIAL: case PGBENCH_RANDOM_GAUSSIAN: + case PGBENCH_RANDOM_ZIPFIAN: { int64 imin, imax; @@ -1617,6 +1679,18 @@ evalFunc(TState *thread, CState *st, setIntValue(retval, getGaussianRand(thread, imin, imax, param)); } + else if (func == PGBENCH_RANDOM_ZIPFIAN) + { + if (param <= MIN_ZIPFIAN_PARAM) + { + fprintf(stderr, + "zipfian parameter must be greater than %f " + "(not %f)\n", MIN_ZIPFIAN_PARAM, param); + return false; + } + setIntValue(retval, + getZipfianRand(thread, imin, imax, param)); + } else /* exponential */ { if (param <= 0.0) diff --git a/src/bin/pgbench/pgbench.h b/src/bin/pgbench/pgbench.h index abc13e9463..1a29f1260c 100644 --- a/src/bin/pgbench/pgbench.h +++ b/src/bin/pgbench/pgbench.h @@ -75,7 +75,8 @@ typedef enum PgBenchFunction PGBENCH_SQRT, PGBENCH_RANDOM, PGBENCH_RANDOM_GAUSSIAN, - PGBENCH_RANDOM_EXPONENTIAL + PGBENCH_RANDOM_EXPONENTIAL, + PGBENCH_RANDOM_ZIPFIAN } PgBenchFunction; typedef struct PgBenchExpr PgBenchExpr;