From 317c0644ccf108aa23ef3fd8358bd66c2840bfc0 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sun, 14 Apr 2024 15:40:54 +0200 Subject: Adding upstream version 5:7.2.4. Signed-off-by: Daniel Baumann --- utils/hyperloglog/.gitignore | 1 + utils/hyperloglog/hll-err.rb | 27 +++++++++++ utils/hyperloglog/hll-gnuplot-graph.rb | 88 ++++++++++++++++++++++++++++++++++ 3 files changed, 116 insertions(+) create mode 100644 utils/hyperloglog/.gitignore create mode 100644 utils/hyperloglog/hll-err.rb create mode 100644 utils/hyperloglog/hll-gnuplot-graph.rb (limited to 'utils/hyperloglog') diff --git a/utils/hyperloglog/.gitignore b/utils/hyperloglog/.gitignore new file mode 100644 index 0000000..2211df6 --- /dev/null +++ b/utils/hyperloglog/.gitignore @@ -0,0 +1 @@ +*.txt diff --git a/utils/hyperloglog/hll-err.rb b/utils/hyperloglog/hll-err.rb new file mode 100644 index 0000000..2c71ac5 --- /dev/null +++ b/utils/hyperloglog/hll-err.rb @@ -0,0 +1,27 @@ +# hll-err.rb - Copyright (C) 2014 Salvatore Sanfilippo +# BSD license, See the COPYING file for more information. +# +# Check error of HyperLogLog Redis implementation for different set sizes. + +require 'rubygems' +require 'redis' +require 'digest/sha1' + +r = Redis.new +r.del('hll') +i = 0 +while true do + 100.times { + elements = [] + 1000.times { + ele = Digest::SHA1.hexdigest(i.to_s) + elements << ele + i += 1 + } + r.pfadd('hll',elements) + } + approx = r.pfcount('hll') + abs_err = (approx-i).abs + rel_err = 100.to_f*abs_err/i + puts "#{i} vs #{approx}: #{rel_err}%" +end diff --git a/utils/hyperloglog/hll-gnuplot-graph.rb b/utils/hyperloglog/hll-gnuplot-graph.rb new file mode 100644 index 0000000..6c7596d --- /dev/null +++ b/utils/hyperloglog/hll-gnuplot-graph.rb @@ -0,0 +1,88 @@ +# hll-err.rb - Copyright (C) 2014 Salvatore Sanfilippo +# BSD license, See the COPYING file for more information. +# +# This program is suited to output average and maximum errors of +# the Redis HyperLogLog implementation in a format suitable to print +# graphs using gnuplot. + +require 'rubygems' +require 'redis' +require 'digest/sha1' + +# Generate an array of [cardinality,relative_error] pairs +# in the 0 - max range, with the specified step. +# +# 'r' is the Redis object used to perform the queries. +# 'seed' must be different every time you want a test performed +# with a different set. The function guarantees that if 'seed' is the +# same, exactly the same dataset is used, and when it is different, +# a totally unrelated different data set is used (without any common +# element in practice). +def run_experiment(r,seed,max,step) + r.del('hll') + i = 0 + samples = [] + step = 1000 if step > 1000 + while i < max do + elements = [] + step.times { + ele = Digest::SHA1.hexdigest(i.to_s+seed.to_s) + elements << ele + i += 1 + } + r.pfadd('hll',elements) + approx = r.pfcount('hll') + err = approx-i + rel_err = 100.to_f*err/i + samples << [i,rel_err] + end + samples +end + +def filter_samples(numsets,max,step,filter) + r = Redis.new + dataset = {} + (0...numsets).each{|i| + dataset[i] = run_experiment(r,i,max,step) + STDERR.puts "Set #{i}" + } + dataset[0].each_with_index{|ele,index| + if filter == :max + card=ele[0] + err=ele[1].abs + (1...numsets).each{|i| + err = dataset[i][index][1] if err < dataset[i][index][1] + } + puts "#{card} #{err}" + elsif filter == :avg + card=ele[0] + err = 0 + (0...numsets).each{|i| + err += dataset[i][index][1] + } + err /= numsets + puts "#{card} #{err}" + elsif filter == :absavg + card=ele[0] + err = 0 + (0...numsets).each{|i| + err += dataset[i][index][1].abs + } + err /= numsets + puts "#{card} #{err}" + elsif filter == :all + (0...numsets).each{|i| + card,err = dataset[i][index] + puts "#{card} #{err}" + } + else + raise "Unknown filter #{filter}" + end + } +end + +if ARGV.length != 4 + puts "Usage: hll-gnuplot-graph (max|avg|absavg|all)" + exit 1 +end +filter_samples(ARGV[0].to_i,ARGV[1].to_i,ARGV[2].to_i,ARGV[3].to_sym) -- cgit v1.2.3