diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-14 13:40:54 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-14 13:40:54 +0000 |
commit | 317c0644ccf108aa23ef3fd8358bd66c2840bfc0 (patch) | |
tree | c417b3d25c86b775989cb5ac042f37611b626c8a /utils/hyperloglog | |
parent | Initial commit. (diff) | |
download | redis-317c0644ccf108aa23ef3fd8358bd66c2840bfc0.tar.xz redis-317c0644ccf108aa23ef3fd8358bd66c2840bfc0.zip |
Adding upstream version 5:7.2.4.upstream/5%7.2.4
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'utils/hyperloglog')
-rw-r--r-- | utils/hyperloglog/.gitignore | 1 | ||||
-rw-r--r-- | utils/hyperloglog/hll-err.rb | 27 | ||||
-rw-r--r-- | utils/hyperloglog/hll-gnuplot-graph.rb | 88 |
3 files changed, 116 insertions, 0 deletions
diff --git a/utils/hyperloglog/.gitignore b/utils/hyperloglog/.gitignore new file mode 100644 index 0000000..2211df6 --- /dev/null +++ b/utils/hyperloglog/.gitignore @@ -0,0 +1 @@ +*.txt diff --git a/utils/hyperloglog/hll-err.rb b/utils/hyperloglog/hll-err.rb new file mode 100644 index 0000000..2c71ac5 --- /dev/null +++ b/utils/hyperloglog/hll-err.rb @@ -0,0 +1,27 @@ +# hll-err.rb - Copyright (C) 2014 Salvatore Sanfilippo +# BSD license, See the COPYING file for more information. +# +# Check error of HyperLogLog Redis implementation for different set sizes. + +require 'rubygems' +require 'redis' +require 'digest/sha1' + +r = Redis.new +r.del('hll') +i = 0 +while true do + 100.times { + elements = [] + 1000.times { + ele = Digest::SHA1.hexdigest(i.to_s) + elements << ele + i += 1 + } + r.pfadd('hll',elements) + } + approx = r.pfcount('hll') + abs_err = (approx-i).abs + rel_err = 100.to_f*abs_err/i + puts "#{i} vs #{approx}: #{rel_err}%" +end diff --git a/utils/hyperloglog/hll-gnuplot-graph.rb b/utils/hyperloglog/hll-gnuplot-graph.rb new file mode 100644 index 0000000..6c7596d --- /dev/null +++ b/utils/hyperloglog/hll-gnuplot-graph.rb @@ -0,0 +1,88 @@ +# hll-err.rb - Copyright (C) 2014 Salvatore Sanfilippo +# BSD license, See the COPYING file for more information. +# +# This program is suited to output average and maximum errors of +# the Redis HyperLogLog implementation in a format suitable to print +# graphs using gnuplot. + +require 'rubygems' +require 'redis' +require 'digest/sha1' + +# Generate an array of [cardinality,relative_error] pairs +# in the 0 - max range, with the specified step. +# +# 'r' is the Redis object used to perform the queries. +# 'seed' must be different every time you want a test performed +# with a different set. The function guarantees that if 'seed' is the +# same, exactly the same dataset is used, and when it is different, +# a totally unrelated different data set is used (without any common +# element in practice). +def run_experiment(r,seed,max,step) + r.del('hll') + i = 0 + samples = [] + step = 1000 if step > 1000 + while i < max do + elements = [] + step.times { + ele = Digest::SHA1.hexdigest(i.to_s+seed.to_s) + elements << ele + i += 1 + } + r.pfadd('hll',elements) + approx = r.pfcount('hll') + err = approx-i + rel_err = 100.to_f*err/i + samples << [i,rel_err] + end + samples +end + +def filter_samples(numsets,max,step,filter) + r = Redis.new + dataset = {} + (0...numsets).each{|i| + dataset[i] = run_experiment(r,i,max,step) + STDERR.puts "Set #{i}" + } + dataset[0].each_with_index{|ele,index| + if filter == :max + card=ele[0] + err=ele[1].abs + (1...numsets).each{|i| + err = dataset[i][index][1] if err < dataset[i][index][1] + } + puts "#{card} #{err}" + elsif filter == :avg + card=ele[0] + err = 0 + (0...numsets).each{|i| + err += dataset[i][index][1] + } + err /= numsets + puts "#{card} #{err}" + elsif filter == :absavg + card=ele[0] + err = 0 + (0...numsets).each{|i| + err += dataset[i][index][1].abs + } + err /= numsets + puts "#{card} #{err}" + elsif filter == :all + (0...numsets).each{|i| + card,err = dataset[i][index] + puts "#{card} #{err}" + } + else + raise "Unknown filter #{filter}" + end + } +end + +if ARGV.length != 4 + puts "Usage: hll-gnuplot-graph <samples> <max> <step> (max|avg|absavg|all)" + exit 1 +end +filter_samples(ARGV[0].to_i,ARGV[1].to_i,ARGV[2].to_i,ARGV[3].to_sym) |