summaryrefslogtreecommitdiffstats
path: root/third_party/rust/rust_cascade/test_data/make-sample-data.py
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 19:33:14 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 19:33:14 +0000
commit36d22d82aa202bb199967e9512281e9a53db42c9 (patch)
tree105e8c98ddea1c1e4784a60a5a6410fa416be2de /third_party/rust/rust_cascade/test_data/make-sample-data.py
parentInitial commit. (diff)
downloadfirefox-esr-upstream.tar.xz
firefox-esr-upstream.zip
Adding upstream version 115.7.0esr.upstream/115.7.0esrupstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'third_party/rust/rust_cascade/test_data/make-sample-data.py')
-rw-r--r--third_party/rust/rust_cascade/test_data/make-sample-data.py106
1 files changed, 106 insertions, 0 deletions
diff --git a/third_party/rust/rust_cascade/test_data/make-sample-data.py b/third_party/rust/rust_cascade/test_data/make-sample-data.py
new file mode 100644
index 0000000000..c9f117da5f
--- /dev/null
+++ b/third_party/rust/rust_cascade/test_data/make-sample-data.py
@@ -0,0 +1,106 @@
+import filtercascade
+import hashlib
+from pathlib import Path
+
+import sys
+import logging
+
+logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
+
+
+def predictable_serial_gen(start, end):
+ counter = start
+ while counter < end:
+ counter += 1
+ m = hashlib.sha256()
+ m.update(counter.to_bytes(4, byteorder="big"))
+ yield m.hexdigest()
+
+
+def store(fc, path):
+ if path.exists():
+ path.unlink()
+ with open(path, "wb") as f:
+ fc.tofile(f)
+
+
+small_set = list(set(predictable_serial_gen(0, 500)))
+large_set = set(predictable_serial_gen(500, 10_000))
+
+# filter parameters
+growth_factor = 1.0
+min_filter_length = 177 # 177 * 1.44 ~ 256, so smallest filter will have 256 bits
+
+print("--- v2_sha256l32_with_salt ---")
+v2_sha256l32_with_salt = filtercascade.FilterCascade(
+ [],
+ defaultHashAlg=filtercascade.fileformats.HashAlgorithm.SHA256,
+ salt=b"nacl",
+ growth_factor=growth_factor,
+ min_filter_length=min_filter_length,
+)
+v2_sha256l32_with_salt.initialize(
+ include=[b"this", b"that"] + small_set, exclude=large_set | set([b"other"])
+)
+store(v2_sha256l32_with_salt, Path("test_v2_sha256l32_salt_mlbf"))
+
+print("--- v2_sha256l32 ---")
+v2_sha256l32 = filtercascade.FilterCascade(
+ [],
+ defaultHashAlg=filtercascade.fileformats.HashAlgorithm.SHA256,
+ growth_factor=growth_factor,
+ min_filter_length=min_filter_length,
+)
+v2_sha256l32.initialize(
+ include=[b"this", b"that"] + small_set, exclude=large_set | set([b"other"])
+)
+store(v2_sha256l32, Path("test_v2_sha256l32_mlbf"))
+
+print("--- v2_murmur ---")
+v2_murmur = filtercascade.FilterCascade(
+ [],
+ defaultHashAlg=filtercascade.fileformats.HashAlgorithm.MURMUR3,
+ growth_factor=growth_factor,
+ min_filter_length=min_filter_length,
+)
+v2_murmur.initialize(
+ include=[b"this", b"that"] + small_set, exclude=large_set | set([b"other"])
+)
+store(v2_murmur, Path("test_v2_murmur_mlbf"))
+
+print("--- v2_murmur_inverted ---")
+v2_murmur_inverted = filtercascade.FilterCascade(
+ [],
+ defaultHashAlg=filtercascade.fileformats.HashAlgorithm.MURMUR3,
+ growth_factor=growth_factor,
+ min_filter_length=min_filter_length,
+)
+v2_murmur_inverted.initialize(
+ include=large_set | set([b"this", b"that"]), exclude=[b"other"] + small_set
+)
+store(v2_murmur_inverted, Path("test_v2_murmur_inverted_mlbf"))
+
+print("--- v2_sha256l32_inverted ---")
+v2_sha256l32_inverted = filtercascade.FilterCascade(
+ [],
+ defaultHashAlg=filtercascade.fileformats.HashAlgorithm.SHA256,
+ growth_factor=growth_factor,
+ min_filter_length=min_filter_length,
+)
+v2_sha256l32_inverted.initialize(
+ include=large_set | set([b"this", b"that"]), exclude=[b"other"] + small_set
+)
+store(v2_sha256l32_inverted, Path("test_v2_sha256l32_inverted_mlbf"))
+
+print("--- v2_sha256ctr_with_salt ---")
+v2_sha256ctr_with_salt = filtercascade.FilterCascade(
+ [],
+ defaultHashAlg=filtercascade.fileformats.HashAlgorithm.SHA256CTR,
+ salt=b"nacl",
+ growth_factor=growth_factor,
+ min_filter_length=min_filter_length,
+)
+v2_sha256ctr_with_salt.initialize(
+ include=[b"this", b"that"] + small_set, exclude=large_set | set([b"other"])
+)
+store(v2_sha256ctr_with_salt, Path("test_v2_sha256ctr_salt_mlbf"))