summaryrefslogtreecommitdiffstats
path: root/utils/hyperloglog
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-14 13:40:54 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-14 13:40:54 +0000
commit317c0644ccf108aa23ef3fd8358bd66c2840bfc0 (patch)
treec417b3d25c86b775989cb5ac042f37611b626c8a /utils/hyperloglog
parentInitial commit. (diff)
downloadredis-317c0644ccf108aa23ef3fd8358bd66c2840bfc0.tar.xz
redis-317c0644ccf108aa23ef3fd8358bd66c2840bfc0.zip
Adding upstream version 5:7.2.4.upstream/5%7.2.4
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'utils/hyperloglog')
-rw-r--r--utils/hyperloglog/.gitignore1
-rw-r--r--utils/hyperloglog/hll-err.rb27
-rw-r--r--utils/hyperloglog/hll-gnuplot-graph.rb88
3 files changed, 116 insertions, 0 deletions
diff --git a/utils/hyperloglog/.gitignore b/utils/hyperloglog/.gitignore
new file mode 100644
index 0000000..2211df6
--- /dev/null
+++ b/utils/hyperloglog/.gitignore
@@ -0,0 +1 @@
+*.txt
diff --git a/utils/hyperloglog/hll-err.rb b/utils/hyperloglog/hll-err.rb
new file mode 100644
index 0000000..2c71ac5
--- /dev/null
+++ b/utils/hyperloglog/hll-err.rb
@@ -0,0 +1,27 @@
+# hll-err.rb - Copyright (C) 2014 Salvatore Sanfilippo
+# BSD license, See the COPYING file for more information.
+#
+# Check error of HyperLogLog Redis implementation for different set sizes.
+
+require 'rubygems'
+require 'redis'
+require 'digest/sha1'
+
+r = Redis.new
+r.del('hll')
+i = 0
+while true do
+ 100.times {
+ elements = []
+ 1000.times {
+ ele = Digest::SHA1.hexdigest(i.to_s)
+ elements << ele
+ i += 1
+ }
+ r.pfadd('hll',elements)
+ }
+ approx = r.pfcount('hll')
+ abs_err = (approx-i).abs
+ rel_err = 100.to_f*abs_err/i
+ puts "#{i} vs #{approx}: #{rel_err}%"
+end
diff --git a/utils/hyperloglog/hll-gnuplot-graph.rb b/utils/hyperloglog/hll-gnuplot-graph.rb
new file mode 100644
index 0000000..6c7596d
--- /dev/null
+++ b/utils/hyperloglog/hll-gnuplot-graph.rb
@@ -0,0 +1,88 @@
+# hll-err.rb - Copyright (C) 2014 Salvatore Sanfilippo
+# BSD license, See the COPYING file for more information.
+#
+# This program is suited to output average and maximum errors of
+# the Redis HyperLogLog implementation in a format suitable to print
+# graphs using gnuplot.
+
+require 'rubygems'
+require 'redis'
+require 'digest/sha1'
+
+# Generate an array of [cardinality,relative_error] pairs
+# in the 0 - max range, with the specified step.
+#
+# 'r' is the Redis object used to perform the queries.
+# 'seed' must be different every time you want a test performed
+# with a different set. The function guarantees that if 'seed' is the
+# same, exactly the same dataset is used, and when it is different,
+# a totally unrelated different data set is used (without any common
+# element in practice).
+def run_experiment(r,seed,max,step)
+ r.del('hll')
+ i = 0
+ samples = []
+ step = 1000 if step > 1000
+ while i < max do
+ elements = []
+ step.times {
+ ele = Digest::SHA1.hexdigest(i.to_s+seed.to_s)
+ elements << ele
+ i += 1
+ }
+ r.pfadd('hll',elements)
+ approx = r.pfcount('hll')
+ err = approx-i
+ rel_err = 100.to_f*err/i
+ samples << [i,rel_err]
+ end
+ samples
+end
+
+def filter_samples(numsets,max,step,filter)
+ r = Redis.new
+ dataset = {}
+ (0...numsets).each{|i|
+ dataset[i] = run_experiment(r,i,max,step)
+ STDERR.puts "Set #{i}"
+ }
+ dataset[0].each_with_index{|ele,index|
+ if filter == :max
+ card=ele[0]
+ err=ele[1].abs
+ (1...numsets).each{|i|
+ err = dataset[i][index][1] if err < dataset[i][index][1]
+ }
+ puts "#{card} #{err}"
+ elsif filter == :avg
+ card=ele[0]
+ err = 0
+ (0...numsets).each{|i|
+ err += dataset[i][index][1]
+ }
+ err /= numsets
+ puts "#{card} #{err}"
+ elsif filter == :absavg
+ card=ele[0]
+ err = 0
+ (0...numsets).each{|i|
+ err += dataset[i][index][1].abs
+ }
+ err /= numsets
+ puts "#{card} #{err}"
+ elsif filter == :all
+ (0...numsets).each{|i|
+ card,err = dataset[i][index]
+ puts "#{card} #{err}"
+ }
+ else
+ raise "Unknown filter #{filter}"
+ end
+ }
+end
+
+if ARGV.length != 4
+ puts "Usage: hll-gnuplot-graph <samples> <max> <step> (max|avg|absavg|all)"
+ exit 1
+end
+filter_samples(ARGV[0].to_i,ARGV[1].to_i,ARGV[2].to_i,ARGV[3].to_sym)