>From 6cf109c466b60ecf62c27ce04757da51a9726729 Mon Sep 17 00:00:00 2001
From: Tomas Vondra <tomas@pgaddict.com>
Date: Sat, 9 Jan 2016 20:45:03 +0100
Subject: [PATCH] extend HyperLogLog API

Extends the HyperLogLog API with two new functions. One allows
specifying desired error rate when initializing the counter,
the other one frees the internal counter state.
---
 src/backend/lib/hyperloglog.c | 45 +++++++++++++++++++++++++++++++++++++++++++
 src/include/lib/hyperloglog.h |  2 ++
 2 files changed, 47 insertions(+)

diff --git a/src/backend/lib/hyperloglog.c b/src/backend/lib/hyperloglog.c
index 094bc09..2dca070 100644
--- a/src/backend/lib/hyperloglog.c
+++ b/src/backend/lib/hyperloglog.c
@@ -108,6 +108,51 @@ initHyperLogLog(hyperLogLogState *cState, uint8 bwidth)
 }
 
 /*
+ * Initialize HyperLogLog track state
+ *
+ * Instead of specifying bwidth (number of bits used for addressing the
+ * register), this method allows sizing the counter for particular error
+ * rate using a simple formula from the paper:
+ *
+ *   e = 1.04 / sqrt(m)
+ *
+ * where 'm' is the number of registers, i.e. (2^bwidth). The method
+ * finds the lowest bwidth with 'e' below the requested error rate, and
+ * the uses it to initialize the counter.
+ *
+ * As bwidth has to be between 4 and 16, the worst error possible rate
+ * is ~25% (bwidth=4) and 0.4% (bwidth=16).
+ */
+void
+initHyperLogLogError(hyperLogLogState *cState, double error)
+{
+	uint8 bwidth = 4;
+
+	while (bwidth < 16)
+	{
+		double m = (Size)1 << bwidth;
+		if (1.04 / sqrt(m) < error)
+			break;
+		bwidth++;
+	}
+
+	initHyperLogLog(cState, bwidth);
+}
+
+/*
+ * Free HyperLogLog track state
+ *
+ * Releases the internal state, but not the state itself (in case it's
+ * not allocated by palloc).
+ */
+void
+freeHyperLogLog(hyperLogLogState *cState)
+{
+	Assert(cState->hashesArr != NULL);
+	pfree(cState->hashesArr);
+}
+
+/*
  * Adds element to the estimator, from caller-supplied hash.
  *
  * It is critical that the hash value passed be an actual hash value, typically
diff --git a/src/include/lib/hyperloglog.h b/src/include/lib/hyperloglog.h
index 5a1d4d3..b999b30 100644
--- a/src/include/lib/hyperloglog.h
+++ b/src/include/lib/hyperloglog.h
@@ -60,8 +60,10 @@ typedef struct hyperLogLogState
 } hyperLogLogState;
 
 extern void initHyperLogLog(hyperLogLogState *cState, uint8 bwidth);
+extern void initHyperLogLogError(hyperLogLogState *cState, double error);
 extern void addHyperLogLog(hyperLogLogState *cState, uint32 hash);
 extern double estimateHyperLogLog(hyperLogLogState *cState);
 extern void mergeHyperLogLog(hyperLogLogState *cState, const hyperLogLogState *oState);
+extern void freeHyperLogLog(hyperLogLogState *cState);
 
 #endif   /* HYPERLOGLOG_H */
-- 
2.1.0

