diff --git a/doc/src/sgml/ref/pgbench.sgml b/doc/src/sgml/ref/pgbench.sgml
index 64b043b48a..1dea6e4b17 100644
--- a/doc/src/sgml/ref/pgbench.sgml
+++ b/doc/src/sgml/ref/pgbench.sgml
@@ -1014,6 +1014,14 @@ pgbench options> dbname>
an integer between 1> and 10>>
+ random_zipfian(lb>, ub>, parameter>)>>>
+ integer>
+ Zipfian-distributed random integer in [lb, ub]>,
+ see below>
+ random_zipfian(1, 10, 1.2)>>
+ an integer between 1> and 10>>
+
+
sqrt(x>)>>>
double>
square root>
@@ -1094,6 +1102,24 @@ f(x) = PHI(2.0 * parameter * (x - mu) / (max - min + 1)) /
of the Box-Muller transform.
+
+
+
+ For Zipfian distribution, parameter>
+ defines how skewed the distribution. A physical sense of this parameter
+ is following: 1> generated N> times,
+ 2> generated 2 ^ parameter> less,
+ 3> generated 3 ^ parameter> less, ...
+ X > generated X ^ parameter> less times.
+
+
+ Intuitively, the larger the parameter>, the more
+ frequently value values to the beginning of the interval are drawn.
+ The closer to 0 parameter> is,
+ the flatter (more uniform) the access distribution.
+
+
+
diff --git a/src/bin/pgbench/exprparse.y b/src/bin/pgbench/exprparse.y
index b3a2d9bfd3..25d5ad48e5 100644
--- a/src/bin/pgbench/exprparse.y
+++ b/src/bin/pgbench/exprparse.y
@@ -191,6 +191,9 @@ static const struct
{
"random_exponential", 3, PGBENCH_RANDOM_EXPONENTIAL
},
+ {
+ "random_zipfian", 3, PGBENCH_RANDOM_ZIPFIAN
+ },
/* keep as last array element */
{
NULL, 0, 0
diff --git a/src/bin/pgbench/pgbench.c b/src/bin/pgbench/pgbench.c
index 4d364a1db4..9f7cc5f018 100644
--- a/src/bin/pgbench/pgbench.c
+++ b/src/bin/pgbench/pgbench.c
@@ -93,7 +93,10 @@ static int pthread_join(pthread_t th, void **thread_return);
#define LOG_STEP_SECONDS 5 /* seconds between log messages */
#define DEFAULT_NXACTS 10 /* default nxacts */
+#define ZIPF_CACHE_SIZE 15 /* cache cells number */
+
#define MIN_GAUSSIAN_PARAM 2.0 /* minimum parameter for gauss */
+#define MIN_ZIPFIAN_PARAM 1.0000001 /* minimum parameter for zipfian */
int nxacts = 0; /* number of transactions per client */
int duration = 0; /* duration in seconds */
@@ -334,6 +337,28 @@ typedef struct
} CState;
/*
+ * Cache cell for zipfian_random call
+ */
+typedef struct
+{
+ double zetan; /* zeta(n) */
+ double theta; /* theta parameter of previous execution */
+ int64 nitems; /* n(ub - lb + 1) parameter of previous execution */
+ double alpha;
+ double beta;
+ double eta;
+} ZipfData;
+
+/*
+ * Zipf cache for zeta values
+ */
+typedef struct
+{
+ int cells_inited;
+ ZipfData *cells;
+} ZipfCache;
+
+/*
* Thread state
*/
typedef struct
@@ -345,6 +370,7 @@ typedef struct
unsigned short random_state[3]; /* separate randomness for each thread */
int64 throttle_trigger; /* previous/next throttling (us) */
FILE *logfile; /* where to log, or NULL */
+ ZipfCache zipf_cache;
/* per thread collected stats */
instr_time start_time; /* thread start time */
@@ -737,6 +763,101 @@ getPoissonRand(TState *thread, int64 center)
return (int64) (-log(uniform) * ((double) center) + 0.5);
}
+/* helper function for getZipfianRand */
+static double
+zipfZetaRange(int64 from, int64 to, double theta)
+{
+ int i;
+ double ans = 0.0;
+
+ for (i = to; i > from; i--)
+ ans += pow(i, -theta);
+ return ans;
+}
+
+/* helper function for getZipfianRand. Computes random variable in range 1..n */
+static int64
+zipfn(TState *thread, ZipfData *zipf, int64 n, double theta)
+{
+ double u = pg_erand48(thread->random_state);
+ double uz = u * zipf->zetan;
+
+ if (uz < 1.)
+ return 1;
+ if (uz < 1 + zipf->beta)
+ return 2;
+ return 1 + (int64)(n * pow(zipf->eta * u - zipf->eta + 1., zipf->alpha));
+}
+
+/* set zeta value and other parametrs to cache cell */
+static void
+zipfSetCacheCell(ZipfData *cell, ZipfData *nearest, int64 n, double theta)
+{
+ double zipf_zeta2;
+ double sub_zeta;
+
+ cell->nitems = n;
+ cell->theta = theta;
+
+ zipf_zeta2 = zipfZetaRange(1, 2, theta) + 1;
+
+ if (nearest != NULL && nearest->nitems - n < n)
+ {
+ sub_zeta = zipfZetaRange(Min(n, nearest->nitems), Max(n, nearest->nitems), theta);
+ cell->zetan = nearest->zetan + (n - nearest->nitems > 0 ? sub_zeta : -sub_zeta);
+ }
+ else
+ cell->zetan = zipfZetaRange(1, n, theta) + 1;
+
+ cell->alpha = 1. / (1 - theta);
+ cell->beta = pow(0.5, theta);
+ cell->eta = (1. - pow(2. / n, 1 - theta)) / (1. - zipf_zeta2 / cell->zetan);
+}
+
+static ZipfData *
+zipfFindOrCreateCacheCell(ZipfCache *cache, int64 n, double theta)
+{
+ int i;
+ ZipfData *cell;
+ ZipfData *nearest = NULL;
+
+ if (!cache->cells)
+ cache->cells = (ZipfData *) pg_malloc(sizeof(ZipfData) * ZIPF_CACHE_SIZE);
+
+ /* search zeta for cached for given parameters */
+ for(i = 0; i < cache->cells_inited; i++)
+ {
+ cell = &cache->cells[i];
+ if (cell->nitems == n && cell->theta == theta)
+ return &cache->cells[i];
+
+ if (cell->theta == theta &&
+ (nearest == NULL || Abs(cell->nitems - n) < Abs(nearest->nitems - n)))
+ nearest = cell;
+ }
+
+ /* create new one if it is not exist */
+ if (cache->cells_inited != ZIPF_CACHE_SIZE)
+ i = cache->cells_inited++;
+ else
+ i = pg_lrand48() % ZIPF_CACHE_SIZE;
+
+ zipfSetCacheCell(&cache->cells[i], nearest, n, theta);
+
+ return &cache->cells[i];
+}
+
+/* random number generator: zipfian distribution from min to max inclusive */
+static int64
+getZipfianRand(TState *thread, int64 min, int64 max, double theta)
+{
+ /* Assert(theta > MIN_ZIPFIAN_PARAM); */
+ int64 n = max - min + 1;
+ ZipfData *cell = zipfFindOrCreateCacheCell(&thread->zipf_cache, n, theta);
+
+ return min + zipfn(thread, cell, n, theta) - 1;
+}
+
/*
* Initialize the given SimpleStats struct to all zeroes
*/
@@ -1567,6 +1688,7 @@ evalFunc(TState *thread, CState *st,
case PGBENCH_RANDOM:
case PGBENCH_RANDOM_EXPONENTIAL:
case PGBENCH_RANDOM_GAUSSIAN:
+ case PGBENCH_RANDOM_ZIPFIAN:
{
int64 imin,
imax;
@@ -1617,6 +1739,18 @@ evalFunc(TState *thread, CState *st,
setIntValue(retval,
getGaussianRand(thread, imin, imax, param));
}
+ else if (func == PGBENCH_RANDOM_ZIPFIAN)
+ {
+ if (param <= MIN_ZIPFIAN_PARAM)
+ {
+ fprintf(stderr,
+ "zipfian parameter must be greater than %f "
+ "(not %f)\n", MIN_ZIPFIAN_PARAM, param);
+ return false;
+ }
+ setIntValue(retval,
+ getZipfianRand(thread, imin, imax, param));
+ }
else /* exponential */
{
if (param <= 0.0)
@@ -4262,8 +4396,10 @@ main(int argc, char **argv)
thread->random_state[0] = random();
thread->random_state[1] = random();
thread->random_state[2] = random();
- thread->logfile = NULL; /* filled in later */
+ thread->logfile = NULL; /* filled in later */
thread->latency_late = 0;
+ thread->zipf_cache.cells = NULL; /* filled in later */
+ thread->zipf_cache.cells_inited = 0;
initStats(&thread->stats, 0);
nclients_dealt += thread->nstate;
diff --git a/src/bin/pgbench/pgbench.h b/src/bin/pgbench/pgbench.h
index abc13e9463..1a29f1260c 100644
--- a/src/bin/pgbench/pgbench.h
+++ b/src/bin/pgbench/pgbench.h
@@ -75,7 +75,8 @@ typedef enum PgBenchFunction
PGBENCH_SQRT,
PGBENCH_RANDOM,
PGBENCH_RANDOM_GAUSSIAN,
- PGBENCH_RANDOM_EXPONENTIAL
+ PGBENCH_RANDOM_EXPONENTIAL,
+ PGBENCH_RANDOM_ZIPFIAN
} PgBenchFunction;
typedef struct PgBenchExpr PgBenchExpr;