diff --git a/doc/src/sgml/ref/pgbench.sgml b/doc/src/sgml/ref/pgbench.sgml index 64b043b48a..1dea6e4b17 100644 --- a/doc/src/sgml/ref/pgbench.sgml +++ b/doc/src/sgml/ref/pgbench.sgml @@ -1014,6 +1014,14 @@ pgbench options dbname an integer between 1 and 10 + random_zipfian(lb, ub, parameter) + integer + Zipfian-distributed random integer in [lb, ub], + see below + random_zipfian(1, 10, 1.2) + an integer between 1 and 10 + + sqrt(x) double square root @@ -1094,6 +1102,24 @@ f(x) = PHI(2.0 * parameter * (x - mu) / (max - min + 1)) / of the Box-Muller transform. + + + + For Zipfian distribution, parameter + defines how skewed the distribution. A physical sense of this parameter + is following: 1 generated N times, + 2 generated 2 ^ parameter less, + 3 generated 3 ^ parameter less, ... + X generated X ^ parameter less times. + + + Intuitively, the larger the parameter, the more + frequently value values to the beginning of the interval are drawn. + The closer to 0 parameter is, + the flatter (more uniform) the access distribution. + + + diff --git a/src/bin/pgbench/exprparse.y b/src/bin/pgbench/exprparse.y index b3a2d9bfd3..25d5ad48e5 100644 --- a/src/bin/pgbench/exprparse.y +++ b/src/bin/pgbench/exprparse.y @@ -191,6 +191,9 @@ static const struct { "random_exponential", 3, PGBENCH_RANDOM_EXPONENTIAL }, + { + "random_zipfian", 3, PGBENCH_RANDOM_ZIPFIAN + }, /* keep as last array element */ { NULL, 0, 0 diff --git a/src/bin/pgbench/pgbench.c b/src/bin/pgbench/pgbench.c index 4d364a1db4..9f7cc5f018 100644 --- a/src/bin/pgbench/pgbench.c +++ b/src/bin/pgbench/pgbench.c @@ -93,7 +93,10 @@ static int pthread_join(pthread_t th, void **thread_return); #define LOG_STEP_SECONDS 5 /* seconds between log messages */ #define DEFAULT_NXACTS 10 /* default nxacts */ +#define ZIPF_CACHE_SIZE 15 /* cache cells number */ + #define MIN_GAUSSIAN_PARAM 2.0 /* minimum parameter for gauss */ +#define MIN_ZIPFIAN_PARAM 1.0000001 /* minimum parameter for zipfian */ int nxacts = 0; /* number of transactions per client */ int duration = 0; /* duration in seconds */ @@ -334,6 +337,28 @@ typedef struct } CState; /* + * Cache cell for zipfian_random call + */ +typedef struct +{ + double zetan; /* zeta(n) */ + double theta; /* theta parameter of previous execution */ + int64 nitems; /* n(ub - lb + 1) parameter of previous execution */ + double alpha; + double beta; + double eta; +} ZipfData; + +/* + * Zipf cache for zeta values + */ +typedef struct +{ + int cells_inited; + ZipfData *cells; +} ZipfCache; + +/* * Thread state */ typedef struct @@ -345,6 +370,7 @@ typedef struct unsigned short random_state[3]; /* separate randomness for each thread */ int64 throttle_trigger; /* previous/next throttling (us) */ FILE *logfile; /* where to log, or NULL */ + ZipfCache zipf_cache; /* per thread collected stats */ instr_time start_time; /* thread start time */ @@ -737,6 +763,101 @@ getPoissonRand(TState *thread, int64 center) return (int64) (-log(uniform) * ((double) center) + 0.5); } +/* helper function for getZipfianRand */ +static double +zipfZetaRange(int64 from, int64 to, double theta) +{ + int i; + double ans = 0.0; + + for (i = to; i > from; i--) + ans += pow(i, -theta); + return ans; +} + +/* helper function for getZipfianRand. Computes random variable in range 1..n */ +static int64 +zipfn(TState *thread, ZipfData *zipf, int64 n, double theta) +{ + double u = pg_erand48(thread->random_state); + double uz = u * zipf->zetan; + + if (uz < 1.) + return 1; + if (uz < 1 + zipf->beta) + return 2; + return 1 + (int64)(n * pow(zipf->eta * u - zipf->eta + 1., zipf->alpha)); +} + +/* set zeta value and other parametrs to cache cell */ +static void +zipfSetCacheCell(ZipfData *cell, ZipfData *nearest, int64 n, double theta) +{ + double zipf_zeta2; + double sub_zeta; + + cell->nitems = n; + cell->theta = theta; + + zipf_zeta2 = zipfZetaRange(1, 2, theta) + 1; + + if (nearest != NULL && nearest->nitems - n < n) + { + sub_zeta = zipfZetaRange(Min(n, nearest->nitems), Max(n, nearest->nitems), theta); + cell->zetan = nearest->zetan + (n - nearest->nitems > 0 ? sub_zeta : -sub_zeta); + } + else + cell->zetan = zipfZetaRange(1, n, theta) + 1; + + cell->alpha = 1. / (1 - theta); + cell->beta = pow(0.5, theta); + cell->eta = (1. - pow(2. / n, 1 - theta)) / (1. - zipf_zeta2 / cell->zetan); +} + +static ZipfData * +zipfFindOrCreateCacheCell(ZipfCache *cache, int64 n, double theta) +{ + int i; + ZipfData *cell; + ZipfData *nearest = NULL; + + if (!cache->cells) + cache->cells = (ZipfData *) pg_malloc(sizeof(ZipfData) * ZIPF_CACHE_SIZE); + + /* search zeta for cached for given parameters */ + for(i = 0; i < cache->cells_inited; i++) + { + cell = &cache->cells[i]; + if (cell->nitems == n && cell->theta == theta) + return &cache->cells[i]; + + if (cell->theta == theta && + (nearest == NULL || Abs(cell->nitems - n) < Abs(nearest->nitems - n))) + nearest = cell; + } + + /* create new one if it is not exist */ + if (cache->cells_inited != ZIPF_CACHE_SIZE) + i = cache->cells_inited++; + else + i = pg_lrand48() % ZIPF_CACHE_SIZE; + + zipfSetCacheCell(&cache->cells[i], nearest, n, theta); + + return &cache->cells[i]; +} + +/* random number generator: zipfian distribution from min to max inclusive */ +static int64 +getZipfianRand(TState *thread, int64 min, int64 max, double theta) +{ + /* Assert(theta > MIN_ZIPFIAN_PARAM); */ + int64 n = max - min + 1; + ZipfData *cell = zipfFindOrCreateCacheCell(&thread->zipf_cache, n, theta); + + return min + zipfn(thread, cell, n, theta) - 1; +} + /* * Initialize the given SimpleStats struct to all zeroes */ @@ -1567,6 +1688,7 @@ evalFunc(TState *thread, CState *st, case PGBENCH_RANDOM: case PGBENCH_RANDOM_EXPONENTIAL: case PGBENCH_RANDOM_GAUSSIAN: + case PGBENCH_RANDOM_ZIPFIAN: { int64 imin, imax; @@ -1617,6 +1739,18 @@ evalFunc(TState *thread, CState *st, setIntValue(retval, getGaussianRand(thread, imin, imax, param)); } + else if (func == PGBENCH_RANDOM_ZIPFIAN) + { + if (param <= MIN_ZIPFIAN_PARAM) + { + fprintf(stderr, + "zipfian parameter must be greater than %f " + "(not %f)\n", MIN_ZIPFIAN_PARAM, param); + return false; + } + setIntValue(retval, + getZipfianRand(thread, imin, imax, param)); + } else /* exponential */ { if (param <= 0.0) @@ -4262,8 +4396,10 @@ main(int argc, char **argv) thread->random_state[0] = random(); thread->random_state[1] = random(); thread->random_state[2] = random(); - thread->logfile = NULL; /* filled in later */ + thread->logfile = NULL; /* filled in later */ thread->latency_late = 0; + thread->zipf_cache.cells = NULL; /* filled in later */ + thread->zipf_cache.cells_inited = 0; initStats(&thread->stats, 0); nclients_dealt += thread->nstate; diff --git a/src/bin/pgbench/pgbench.h b/src/bin/pgbench/pgbench.h index abc13e9463..1a29f1260c 100644 --- a/src/bin/pgbench/pgbench.h +++ b/src/bin/pgbench/pgbench.h @@ -75,7 +75,8 @@ typedef enum PgBenchFunction PGBENCH_SQRT, PGBENCH_RANDOM, PGBENCH_RANDOM_GAUSSIAN, - PGBENCH_RANDOM_EXPONENTIAL + PGBENCH_RANDOM_EXPONENTIAL, + PGBENCH_RANDOM_ZIPFIAN } PgBenchFunction; typedef struct PgBenchExpr PgBenchExpr;