diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-21 11:54:28 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-21 11:54:28 +0000 |
commit | e6918187568dbd01842d8d1d2c808ce16a894239 (patch) | |
tree | 64f88b554b444a49f656b6c656111a145cbbaa28 /src/zstd/tests | |
parent | Initial commit. (diff) | |
download | ceph-e6918187568dbd01842d8d1d2c808ce16a894239.tar.xz ceph-e6918187568dbd01842d8d1d2c808ce16a894239.zip |
Adding upstream version 18.2.2.upstream/18.2.2
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
90 files changed, 25382 insertions, 0 deletions
diff --git a/src/zstd/tests/.gitignore b/src/zstd/tests/.gitignore new file mode 100644 index 000000000..9a6939a57 --- /dev/null +++ b/src/zstd/tests/.gitignore @@ -0,0 +1,68 @@ +# local binary (Makefile) +fullbench +fullbench32 +fullbench-lib +fuzzer +fuzzer32 +fuzzer-dll +zbufftest +zbufftest32 +zbufftest-dll +zstreamtest +zstreamtest32 +zstreamtest_asan +zstreamtest_tsan +zstreamtest-dll +datagen +paramgrill +paramgrill32 +roundTripCrash +longmatch +symbols +legacy +decodecorpus +pool +poolTests +invalidDictionaries +checkTag +zcat +zstdcat +tm + +# test artifacts +dictionary +grillResults.txt +_* +tmp* +*.zst +*.gz +!gzip/hufts-segv.gz +result +out +*.zstd +hello* +world + +# Tmp test directory +zstdtest +speedTest +versionsTest +namespaceTest +dirTest* + +# fuzzer +afl + +# Local script +startSpeedTest +speedTest.pid +*.bat + +# Generic Object files +*.o +*.ko + +# Generic Executables +*.exe +*.out +*.app diff --git a/src/zstd/tests/DEPRECATED-test-zstd-speed.py b/src/zstd/tests/DEPRECATED-test-zstd-speed.py new file mode 100755 index 000000000..b3f807459 --- /dev/null +++ b/src/zstd/tests/DEPRECATED-test-zstd-speed.py @@ -0,0 +1,378 @@ +#! /usr/bin/env python3 +# THIS BENCHMARK IS BEING REPLACED BY automated-bencmarking.py + +# ################################################################ +# Copyright (c) 2016-2020, Przemyslaw Skibinski, Yann Collet, Facebook, Inc. +# All rights reserved. +# +# This source code is licensed under both the BSD-style license (found in the +# LICENSE file in the root directory of this source tree) and the GPLv2 (found +# in the COPYING file in the root directory of this source tree). +# You may select, at your option, one of the above-listed licenses. +# ########################################################################## + +# Limitations: +# - doesn't support filenames with spaces +# - dir1/zstd and dir2/zstd will be merged in a single results file + +import argparse +import os # getloadavg +import string +import subprocess +import time # strftime +import traceback +import hashlib +import platform # system + +script_version = 'v1.1.2 (2017-03-26)' +default_repo_url = 'https://github.com/facebook/zstd.git' +working_dir_name = 'speedTest' +working_path = os.getcwd() + '/' + working_dir_name # /path/to/zstd/tests/speedTest +clone_path = working_path + '/' + 'zstd' # /path/to/zstd/tests/speedTest/zstd +email_header = 'ZSTD_speedTest' +pid = str(os.getpid()) +verbose = False +clang_version = "unknown" +gcc_version = "unknown" +args = None + + +def hashfile(hasher, fname, blocksize=65536): + with open(fname, "rb") as f: + for chunk in iter(lambda: f.read(blocksize), b""): + hasher.update(chunk) + return hasher.hexdigest() + + +def log(text): + print(time.strftime("%Y/%m/%d %H:%M:%S") + ' - ' + text) + + +def execute(command, print_command=True, print_output=False, print_error=True, param_shell=True): + if print_command: + log("> " + command) + popen = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=param_shell, cwd=execute.cwd) + stdout_lines, stderr_lines = popen.communicate(timeout=args.timeout) + stderr_lines = stderr_lines.decode("utf-8") + stdout_lines = stdout_lines.decode("utf-8") + if print_output: + if stdout_lines: + print(stdout_lines) + if stderr_lines: + print(stderr_lines) + if popen.returncode is not None and popen.returncode != 0: + if stderr_lines and not print_output and print_error: + print(stderr_lines) + raise RuntimeError(stdout_lines + stderr_lines) + return (stdout_lines + stderr_lines).splitlines() +execute.cwd = None + + +def does_command_exist(command): + try: + execute(command, verbose, False, False) + except Exception: + return False + return True + + +def send_email(emails, topic, text, have_mutt, have_mail): + logFileName = working_path + '/' + 'tmpEmailContent' + with open(logFileName, "w") as myfile: + myfile.writelines(text) + myfile.close() + if have_mutt: + execute('mutt -s "' + topic + '" ' + emails + ' < ' + logFileName, verbose) + elif have_mail: + execute('mail -s "' + topic + '" ' + emails + ' < ' + logFileName, verbose) + else: + log("e-mail cannot be sent (mail or mutt not found)") + + +def send_email_with_attachments(branch, commit, last_commit, args, text, results_files, + logFileName, have_mutt, have_mail): + with open(logFileName, "w") as myfile: + myfile.writelines(text) + myfile.close() + email_topic = '[%s:%s] Warning for %s:%s last_commit=%s speed<%s ratio<%s' \ + % (email_header, pid, branch, commit, last_commit, + args.lowerLimit, args.ratioLimit) + if have_mutt: + execute('mutt -s "' + email_topic + '" ' + args.emails + ' -a ' + results_files + + ' < ' + logFileName) + elif have_mail: + execute('mail -s "' + email_topic + '" ' + args.emails + ' < ' + logFileName) + else: + log("e-mail cannot be sent (mail or mutt not found)") + + +def git_get_branches(): + execute('git fetch -p', verbose) + branches = execute('git branch -rl', verbose) + output = [] + for line in branches: + if ("HEAD" not in line) and ("coverity_scan" not in line) and ("gh-pages" not in line): + output.append(line.strip()) + return output + + +def git_get_changes(branch, commit, last_commit): + fmt = '--format="%h: (%an) %s, %ar"' + if last_commit is None: + commits = execute('git log -n 10 %s %s' % (fmt, commit)) + else: + commits = execute('git --no-pager log %s %s..%s' % (fmt, last_commit, commit)) + return str('Changes in %s since %s:\n' % (branch, last_commit)) + '\n'.join(commits) + + +def get_last_results(resultsFileName): + if not os.path.isfile(resultsFileName): + return None, None, None, None + commit = None + csize = [] + cspeed = [] + dspeed = [] + with open(resultsFileName, 'r') as f: + for line in f: + words = line.split() + if len(words) <= 4: # branch + commit + compilerVer + md5 + commit = words[1] + csize = [] + cspeed = [] + dspeed = [] + if (len(words) == 8) or (len(words) == 9): # results: "filename" or "XX files" + csize.append(int(words[1])) + cspeed.append(float(words[3])) + dspeed.append(float(words[5])) + return commit, csize, cspeed, dspeed + + +def benchmark_and_compare(branch, commit, last_commit, args, executableName, md5sum, compilerVersion, resultsFileName, + testFilePath, fileName, last_csize, last_cspeed, last_dspeed): + sleepTime = 30 + while os.getloadavg()[0] > args.maxLoadAvg: + log("WARNING: bench loadavg=%.2f is higher than %s, sleeping for %s seconds" + % (os.getloadavg()[0], args.maxLoadAvg, sleepTime)) + time.sleep(sleepTime) + start_load = str(os.getloadavg()) + osType = platform.system() + if osType == 'Linux': + cpuSelector = "taskset --cpu-list 0" + else: + cpuSelector = "" + if args.dictionary: + result = execute('%s programs/%s -rqi5b1e%s -D %s %s' % (cpuSelector, executableName, args.lastCLevel, args.dictionary, testFilePath), print_output=True) + else: + result = execute('%s programs/%s -rqi5b1e%s %s' % (cpuSelector, executableName, args.lastCLevel, testFilePath), print_output=True) + end_load = str(os.getloadavg()) + linesExpected = args.lastCLevel + 1 + if len(result) != linesExpected: + raise RuntimeError("ERROR: number of result lines=%d is different that expected %d\n%s" % (len(result), linesExpected, '\n'.join(result))) + with open(resultsFileName, "a") as myfile: + myfile.write('%s %s %s md5=%s\n' % (branch, commit, compilerVersion, md5sum)) + myfile.write('\n'.join(result) + '\n') + myfile.close() + if (last_cspeed == None): + log("WARNING: No data for comparison for branch=%s file=%s " % (branch, fileName)) + return "" + commit, csize, cspeed, dspeed = get_last_results(resultsFileName) + text = "" + for i in range(0, min(len(cspeed), len(last_cspeed))): + print("%s:%s -%d cSpeed=%6.2f cLast=%6.2f cDiff=%1.4f dSpeed=%6.2f dLast=%6.2f dDiff=%1.4f ratioDiff=%1.4f %s" % (branch, commit, i+1, cspeed[i], last_cspeed[i], cspeed[i]/last_cspeed[i], dspeed[i], last_dspeed[i], dspeed[i]/last_dspeed[i], float(last_csize[i])/csize[i], fileName)) + if (cspeed[i]/last_cspeed[i] < args.lowerLimit): + text += "WARNING: %s -%d cSpeed=%.2f cLast=%.2f cDiff=%.4f %s\n" % (executableName, i+1, cspeed[i], last_cspeed[i], cspeed[i]/last_cspeed[i], fileName) + if (dspeed[i]/last_dspeed[i] < args.lowerLimit): + text += "WARNING: %s -%d dSpeed=%.2f dLast=%.2f dDiff=%.4f %s\n" % (executableName, i+1, dspeed[i], last_dspeed[i], dspeed[i]/last_dspeed[i], fileName) + if (float(last_csize[i])/csize[i] < args.ratioLimit): + text += "WARNING: %s -%d cSize=%d last_cSize=%d diff=%.4f %s\n" % (executableName, i+1, csize[i], last_csize[i], float(last_csize[i])/csize[i], fileName) + if text: + text = args.message + ("\nmaxLoadAvg=%s load average at start=%s end=%s\n%s last_commit=%s md5=%s\n" % (args.maxLoadAvg, start_load, end_load, compilerVersion, last_commit, md5sum)) + text + return text + + +def update_config_file(branch, commit): + last_commit = None + commitFileName = working_path + "/commit_" + branch.replace("/", "_") + ".txt" + if os.path.isfile(commitFileName): + with open(commitFileName, 'r') as infile: + last_commit = infile.read() + with open(commitFileName, 'w') as outfile: + outfile.write(commit) + return last_commit + + +def double_check(branch, commit, args, executableName, md5sum, compilerVersion, resultsFileName, filePath, fileName): + last_commit, csize, cspeed, dspeed = get_last_results(resultsFileName) + if not args.dry_run: + text = benchmark_and_compare(branch, commit, last_commit, args, executableName, md5sum, compilerVersion, resultsFileName, filePath, fileName, csize, cspeed, dspeed) + if text: + log("WARNING: redoing tests for branch %s: commit %s" % (branch, commit)) + text = benchmark_and_compare(branch, commit, last_commit, args, executableName, md5sum, compilerVersion, resultsFileName, filePath, fileName, csize, cspeed, dspeed) + return text + + +def test_commit(branch, commit, last_commit, args, testFilePaths, have_mutt, have_mail): + local_branch = branch.split('/')[1] + version = local_branch.rpartition('-')[2] + '_' + commit + if not args.dry_run: + execute('make -C programs clean zstd CC=clang MOREFLAGS="-Werror -Wconversion -Wno-sign-conversion -DZSTD_GIT_COMMIT=%s" && ' % version + + 'mv programs/zstd programs/zstd_clang && ' + + 'make -C programs clean zstd zstd32 MOREFLAGS="-DZSTD_GIT_COMMIT=%s"' % version) + md5_zstd = hashfile(hashlib.md5(), clone_path + '/programs/zstd') + md5_zstd32 = hashfile(hashlib.md5(), clone_path + '/programs/zstd32') + md5_zstd_clang = hashfile(hashlib.md5(), clone_path + '/programs/zstd_clang') + print("md5(zstd)=%s\nmd5(zstd32)=%s\nmd5(zstd_clang)=%s" % (md5_zstd, md5_zstd32, md5_zstd_clang)) + print("gcc_version=%s clang_version=%s" % (gcc_version, clang_version)) + + logFileName = working_path + "/log_" + branch.replace("/", "_") + ".txt" + text_to_send = [] + results_files = "" + if args.dictionary: + dictName = args.dictionary.rpartition('/')[2] + else: + dictName = None + + for filePath in testFilePaths: + fileName = filePath.rpartition('/')[2] + if dictName: + resultsFileName = working_path + "/" + dictName.replace(".", "_") + "_" + branch.replace("/", "_") + "_" + fileName.replace(".", "_") + ".txt" + else: + resultsFileName = working_path + "/results_" + branch.replace("/", "_") + "_" + fileName.replace(".", "_") + ".txt" + text = double_check(branch, commit, args, 'zstd', md5_zstd, 'gcc_version='+gcc_version, resultsFileName, filePath, fileName) + if text: + text_to_send.append(text) + results_files += resultsFileName + " " + resultsFileName = working_path + "/results32_" + branch.replace("/", "_") + "_" + fileName.replace(".", "_") + ".txt" + text = double_check(branch, commit, args, 'zstd32', md5_zstd32, 'gcc_version='+gcc_version, resultsFileName, filePath, fileName) + if text: + text_to_send.append(text) + results_files += resultsFileName + " " + resultsFileName = working_path + "/resultsClang_" + branch.replace("/", "_") + "_" + fileName.replace(".", "_") + ".txt" + text = double_check(branch, commit, args, 'zstd_clang', md5_zstd_clang, 'clang_version='+clang_version, resultsFileName, filePath, fileName) + if text: + text_to_send.append(text) + results_files += resultsFileName + " " + if text_to_send: + send_email_with_attachments(branch, commit, last_commit, args, text_to_send, results_files, logFileName, have_mutt, have_mail) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('testFileNames', help='file or directory names list for speed benchmark') + parser.add_argument('emails', help='list of e-mail addresses to send warnings') + parser.add_argument('--dictionary', '-D', help='path to the dictionary') + parser.add_argument('--message', '-m', help='attach an additional message to e-mail', default="") + parser.add_argument('--repoURL', help='changes default repository URL', default=default_repo_url) + parser.add_argument('--lowerLimit', '-l', type=float, help='send email if speed is lower than given limit', default=0.98) + parser.add_argument('--ratioLimit', '-r', type=float, help='send email if ratio is lower than given limit', default=0.999) + parser.add_argument('--maxLoadAvg', type=float, help='maximum load average to start testing', default=0.75) + parser.add_argument('--lastCLevel', type=int, help='last compression level for testing', default=5) + parser.add_argument('--sleepTime', '-s', type=int, help='frequency of repository checking in seconds', default=300) + parser.add_argument('--timeout', '-t', type=int, help='timeout for executing shell commands', default=1800) + parser.add_argument('--dry-run', dest='dry_run', action='store_true', help='not build', default=False) + parser.add_argument('--verbose', '-v', action='store_true', help='more verbose logs', default=False) + args = parser.parse_args() + verbose = args.verbose + + # check if test files are accessible + testFileNames = args.testFileNames.split() + testFilePaths = [] + for fileName in testFileNames: + fileName = os.path.expanduser(fileName) + if os.path.isfile(fileName) or os.path.isdir(fileName): + testFilePaths.append(os.path.abspath(fileName)) + else: + log("ERROR: File/directory not found: " + fileName) + exit(1) + + # check if dictionary is accessible + if args.dictionary: + args.dictionary = os.path.abspath(os.path.expanduser(args.dictionary)) + if not os.path.isfile(args.dictionary): + log("ERROR: Dictionary not found: " + args.dictionary) + exit(1) + + # check availability of e-mail senders + have_mutt = does_command_exist("mutt -h") + have_mail = does_command_exist("mail -V") + if not have_mutt and not have_mail: + log("ERROR: e-mail senders 'mail' or 'mutt' not found") + exit(1) + + clang_version = execute("clang -v 2>&1 | grep ' version ' | sed -e 's:.*version \\([0-9.]*\\).*:\\1:' -e 's:\\.\\([0-9][0-9]\\):\\1:g'", verbose)[0]; + gcc_version = execute("gcc -dumpversion", verbose)[0]; + + if verbose: + print("PARAMETERS:\nrepoURL=%s" % args.repoURL) + print("working_path=%s" % working_path) + print("clone_path=%s" % clone_path) + print("testFilePath(%s)=%s" % (len(testFilePaths), testFilePaths)) + print("message=%s" % args.message) + print("emails=%s" % args.emails) + print("dictionary=%s" % args.dictionary) + print("maxLoadAvg=%s" % args.maxLoadAvg) + print("lowerLimit=%s" % args.lowerLimit) + print("ratioLimit=%s" % args.ratioLimit) + print("lastCLevel=%s" % args.lastCLevel) + print("sleepTime=%s" % args.sleepTime) + print("timeout=%s" % args.timeout) + print("dry_run=%s" % args.dry_run) + print("verbose=%s" % args.verbose) + print("have_mutt=%s have_mail=%s" % (have_mutt, have_mail)) + + # clone ZSTD repo if needed + if not os.path.isdir(working_path): + os.mkdir(working_path) + if not os.path.isdir(clone_path): + execute.cwd = working_path + execute('git clone ' + args.repoURL) + if not os.path.isdir(clone_path): + log("ERROR: ZSTD clone not found: " + clone_path) + exit(1) + execute.cwd = clone_path + + # check if speedTest.pid already exists + pidfile = "./speedTest.pid" + if os.path.isfile(pidfile): + log("ERROR: %s already exists, exiting" % pidfile) + exit(1) + + send_email(args.emails, '[%s:%s] test-zstd-speed.py %s has been started' % (email_header, pid, script_version), args.message, have_mutt, have_mail) + with open(pidfile, 'w') as the_file: + the_file.write(pid) + + branch = "" + commit = "" + first_time = True + while True: + try: + if first_time: + first_time = False + else: + time.sleep(args.sleepTime) + loadavg = os.getloadavg()[0] + if (loadavg <= args.maxLoadAvg): + branches = git_get_branches() + for branch in branches: + commit = execute('git show -s --format=%h ' + branch, verbose)[0] + last_commit = update_config_file(branch, commit) + if commit == last_commit: + log("skipping branch %s: head %s already processed" % (branch, commit)) + else: + log("build branch %s: head %s is different from prev %s" % (branch, commit, last_commit)) + execute('git checkout -- . && git checkout ' + branch) + print(git_get_changes(branch, commit, last_commit)) + test_commit(branch, commit, last_commit, args, testFilePaths, have_mutt, have_mail) + else: + log("WARNING: main loadavg=%.2f is higher than %s" % (loadavg, args.maxLoadAvg)) + if verbose: + log("sleep for %s seconds" % args.sleepTime) + except Exception as e: + stack = traceback.format_exc() + email_topic = '[%s:%s] ERROR in %s:%s' % (email_header, pid, branch, commit) + send_email(args.emails, email_topic, stack, have_mutt, have_mail) + print(stack) + except KeyboardInterrupt: + os.unlink(pidfile) + send_email(args.emails, '[%s:%s] test-zstd-speed.py %s has been stopped' % (email_header, pid, script_version), args.message, have_mutt, have_mail) + exit(0) diff --git a/src/zstd/tests/Makefile b/src/zstd/tests/Makefile new file mode 100644 index 000000000..d347a948a --- /dev/null +++ b/src/zstd/tests/Makefile @@ -0,0 +1,475 @@ +# ################################################################ +# Copyright (c) 2015-2020, Yann Collet, Facebook, Inc. +# All rights reserved. +# +# This source code is licensed under both the BSD-style license (found in the +# LICENSE file in the root directory of this source tree) and the GPLv2 (found +# in the COPYING file in the root directory of this source tree). +# You may select, at your option, one of the above-listed licenses. +# ################################################################ +# datagen : Synthetic and parametrable data generator, for tests +# fullbench : Precisely measure speed for each zstd inner functions +# fullbench32: Same as fullbench, but forced to compile in 32-bits mode +# fuzzer : Test tool, to check zstd integrity on target platform +# fuzzer32: Same as fuzzer, but forced to compile in 32-bits mode +# paramgrill : parameter tester for zstd +# test-zstd-speed.py : script for testing zstd speed difference between commits +# versionsTest : compatibility test between zstd versions stored on Github (v0.1+) +# zstreamtest : Fuzzer test tool for zstd streaming API +# zstreamtest32: Same as zstreamtest, but forced to compile in 32-bits mode +# ########################################################################## + +ZSTDDIR = ../lib +PRGDIR = ../programs +PYTHON ?= python3 +TESTARTEFACT := versionsTest + +DEBUGLEVEL ?= 1 +DEBUGFLAGS = -g -DDEBUGLEVEL=$(DEBUGLEVEL) +CPPFLAGS += -I$(ZSTDDIR) -I$(ZSTDDIR)/common -I$(ZSTDDIR)/compress \ + -I$(ZSTDDIR)/dictBuilder -I$(ZSTDDIR)/deprecated -I$(PRGDIR) +ifeq ($(OS),Windows_NT) # MinGW assumed +CPPFLAGS += -D__USE_MINGW_ANSI_STDIO # compatibility with %zu formatting +endif +CFLAGS ?= -O3 +CFLAGS += -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \ + -Wstrict-aliasing=1 -Wswitch-enum -Wdeclaration-after-statement \ + -Wstrict-prototypes -Wundef \ + -Wvla -Wformat=2 -Winit-self -Wfloat-equal -Wwrite-strings \ + -Wredundant-decls -Wmissing-prototypes +CFLAGS += $(DEBUGFLAGS) $(MOREFLAGS) +FLAGS = $(CPPFLAGS) $(CFLAGS) $(LDFLAGS) + + +ZSTDCOMMON_FILES := $(ZSTDDIR)/common/*.c +ZSTDCOMP_FILES := $(ZSTDDIR)/compress/*.c +ZSTDDECOMP_FILES := $(ZSTDDIR)/decompress/*.c +ZSTD_FILES := $(ZSTDDECOMP_FILES) $(ZSTDCOMMON_FILES) $(ZSTDCOMP_FILES) +ZBUFF_FILES := $(ZSTDDIR)/deprecated/*.c +ZDICT_FILES := $(ZSTDDIR)/dictBuilder/*.c + +ZSTD_F1 := $(wildcard $(ZSTD_FILES)) +ZSTD_OBJ1 := $(subst $(ZSTDDIR)/common/,zstdm_,$(ZSTD_F1)) +ZSTD_OBJ2 := $(subst $(ZSTDDIR)/compress/,zstdc_,$(ZSTD_OBJ1)) +ZSTD_OBJ3 := $(subst $(ZSTDDIR)/decompress/,zstdd_,$(ZSTD_OBJ2)) +ZSTD_OBJECTS := $(ZSTD_OBJ3:.c=.o) + +ZSTDMT_OBJ1 := $(subst $(ZSTDDIR)/common/,zstdmt_m_,$(ZSTD_F1)) +ZSTDMT_OBJ2 := $(subst $(ZSTDDIR)/compress/,zstdmt_c_,$(ZSTDMT_OBJ1)) +ZSTDMT_OBJ3 := $(subst $(ZSTDDIR)/decompress/,zstdmt_d_,$(ZSTDMT_OBJ2)) +ZSTDMT_OBJECTS := $(ZSTDMT_OBJ3:.c=.o) + +# Define *.exe as extension for Windows systems +ifneq (,$(filter Windows%,$(OS))) +EXT =.exe +MULTITHREAD_CPP = -DZSTD_MULTITHREAD +MULTITHREAD_LD = +else +EXT = +MULTITHREAD_CPP = -DZSTD_MULTITHREAD +MULTITHREAD_LD = -pthread +endif +MULTITHREAD = $(MULTITHREAD_CPP) $(MULTITHREAD_LD) + +VOID = /dev/null +ZSTREAM_TESTTIME ?= -T90s +FUZZERTEST ?= -T200s +ZSTDRTTEST = --test-large-data +DECODECORPUS_TESTTIME ?= -T30 + +.PHONY: default all all32 allnothread dll clean test test32 test-all versionsTest + +default: fullbench + @echo $(ZSTDMT_OBJECTS) + +all: fullbench fuzzer zstreamtest paramgrill datagen decodecorpus roundTripCrash \ + fullbench-lib poolTests + +all32: fullbench32 fuzzer32 zstreamtest32 + +allnothread: MULTITHREAD_CPP= +allnothread: MULTITHREAD_LD= +allnothread: fullbench fuzzer paramgrill datagen decodecorpus + +# note : broken : requires symbols unavailable from dynamic library +dll: fuzzer-dll zstreamtest-dll + +PHONY: zstd zstd32 zstd-nolegacy # must be phony, only external makefile knows how to build them, or if they need an update +zstd zstd32 zstd-nolegacy: + $(MAKE) -C $(PRGDIR) $@ MOREFLAGS+="$(DEBUGFLAGS)" + +gzstd: + $(MAKE) -C $(PRGDIR) $@ HAVE_ZLIB=1 MOREFLAGS+="$(DEBUGFLAGS)" + +.PHONY: libzstd +libzstd : + $(MAKE) -C $(ZSTDDIR) libzstd + +%-dll : libzstd +%-dll : LDFLAGS+= -L$(ZSTDDIR) -lzstd + +.PHONY: zstd-staticLib +zstd-staticLib : + $(MAKE) -C $(ZSTDDIR) libzstd.a + +zstdm_%.o : $(ZSTDDIR)/common/%.c + $(CC) -c $(CPPFLAGS) $(CFLAGS) $< -o $@ + +zstdc_%.o : $(ZSTDDIR)/compress/%.c + $(CC) -c $(CPPFLAGS) $(CFLAGS) $< -o $@ + +zstdd_%.o : $(ZSTDDIR)/decompress/%.c + $(CC) -c $(CPPFLAGS) $(CFLAGS) $< -o $@ + +zstdmt%.o : CPPFLAGS += $(MULTITHREAD_CPP) + +zstdmt_m_%.o : $(ZSTDDIR)/common/%.c + $(CC) -c $(CPPFLAGS) $(CFLAGS) $< -o $@ + +zstdmt_c_%.o : $(ZSTDDIR)/compress/%.c + $(CC) -c $(CPPFLAGS) $(CFLAGS) $< -o $@ + +zstdmt_d_%.o : $(ZSTDDIR)/decompress/%.c + $(CC) -c $(CPPFLAGS) $(CFLAGS) $< -o $@ + +fullbench32: CPPFLAGS += -m32 +fullbench fullbench32 : CPPFLAGS += $(MULTITHREAD_CPP) +fullbench fullbench32 : LDFLAGS += $(MULTITHREAD_LD) +fullbench fullbench32 : DEBUGFLAGS = -DNDEBUG # turn off assert() for speed measurements +fullbench fullbench32 : $(ZSTD_FILES) +fullbench fullbench32 : $(PRGDIR)/datagen.c $(PRGDIR)/util.c $(PRGDIR)/timefn.c $(PRGDIR)/benchfn.c fullbench.c + $(CC) $(FLAGS) $^ -o $@$(EXT) + +fullbench-lib : CPPFLAGS += -DXXH_NAMESPACE=ZSTD_ +fullbench-lib : zstd-staticLib +fullbench-lib : $(PRGDIR)/datagen.c $(PRGDIR)/util.c $(PRGDIR)/timefn.c $(PRGDIR)/benchfn.c fullbench.c + $(CC) $(FLAGS) $(filter %.c,$^) -o $@$(EXT) $(ZSTDDIR)/libzstd.a + +# note : broken : requires symbols unavailable from dynamic library +fullbench-dll: $(PRGDIR)/datagen.c $(PRGDIR)/util.c $(PRGDIR)/benchfn.c $(PRGDIR)/timefn.c fullbench.c +# $(CC) $(FLAGS) $(filter %.c,$^) -o $@$(EXT) -DZSTD_DLL_IMPORT=1 $(ZSTDDIR)/dll/libzstd.dll + $(CC) $(FLAGS) $(filter %.c,$^) -o $@$(EXT) + +fuzzer : CPPFLAGS += $(MULTITHREAD_CPP) +fuzzer : LDFLAGS += $(MULTITHREAD_LD) +fuzzer32: CFLAGS += -m32 +fuzzer : $(ZSTDMT_OBJECTS) +fuzzer32: $(ZSTD_FILES) +fuzzer fuzzer32 : $(ZDICT_FILES) $(PRGDIR)/util.c $(PRGDIR)/timefn.c $(PRGDIR)/datagen.c fuzzer.c + $(CC) $(FLAGS) $^ -o $@$(EXT) + +# note : broken : requires symbols unavailable from dynamic library +fuzzer-dll : $(ZSTDDIR)/common/xxhash.c $(PRGDIR)/util.c $(PRGDIR)/timefn.c $(PRGDIR)/datagen.c fuzzer.c + $(CC) $(CPPFLAGS) $(CFLAGS) $(filter %.c,$^) $(LDFLAGS) -o $@$(EXT) + +zbufftest zbufftest32 zbufftest-dll : CPPFLAGS += -I$(ZSTDDIR)/deprecated +zbufftest zbufftest32 zbufftest-dll : CFLAGS += -Wno-deprecated-declarations # required to silence deprecation warnings +zbufftest32 : CFLAGS += -m32 +zbufftest zbufftest32 : $(ZSTD_OBJECTS) $(ZBUFF_FILES) $(PRGDIR)/util.c $(PRGDIR)/timefn.c $(PRGDIR)/datagen.c zbufftest.c + $(CC) $(FLAGS) $^ -o $@$(EXT) + +zbufftest-dll : $(ZSTDDIR)/common/xxhash.c $(PRGDIR)/util.c $(PRGDIR)/timefn.c $(PRGDIR)/datagen.c zbufftest.c + $(CC) $(CPPFLAGS) $(CFLAGS) $(filter %.c,$^) $(LDFLAGS) -o $@$(EXT) + +ZSTREAM_LOCAL_FILES := $(PRGDIR)/datagen.c $(PRGDIR)/util.c $(PRGDIR)/timefn.c seqgen.c zstreamtest.c +ZSTREAM_PROPER_FILES := $(ZDICT_FILES) $(ZSTREAM_LOCAL_FILES) +ZSTREAMFILES := $(ZSTD_FILES) $(ZSTREAM_PROPER_FILES) +zstreamtest32 : CFLAGS += -m32 +zstreamtest zstreamtest32 : CPPFLAGS += $(MULTITHREAD_CPP) +zstreamtest zstreamtest32 : LDFLAGS += $(MULTITHREAD_LD) +zstreamtest : $(ZSTDMT_OBJECTS) $(ZSTREAM_PROPER_FILES) +zstreamtest32 : $(ZSTREAMFILES) +zstreamtest zstreamtest32 : + $(CC) $(FLAGS) $^ -o $@$(EXT) + +zstreamtest_asan : CFLAGS += -fsanitize=address +zstreamtest_asan : $(ZSTREAMFILES) + $(CC) $(FLAGS) $(MULTITHREAD) $^ -o $@$(EXT) + +zstreamtest_tsan : CFLAGS += -fsanitize=thread +zstreamtest_tsan : $(ZSTREAMFILES) + $(CC) $(FLAGS) $(MULTITHREAD) $^ -o $@$(EXT) + +# note : broken : requires symbols unavailable from dynamic library +zstreamtest-dll : $(ZSTDDIR)/common/xxhash.c # xxh symbols not exposed from dll +zstreamtest-dll : $(ZSTREAM_LOCAL_FILES) + $(CC) $(CPPFLAGS) $(CFLAGS) $(filter %.c,$^) $(LDFLAGS) -o $@$(EXT) + +paramgrill : DEBUGFLAGS = # turn off assert() by default for speed measurements +paramgrill : $(ZSTD_FILES) $(PRGDIR)/util.c $(PRGDIR)/timefn.c $(PRGDIR)/benchfn.c $(PRGDIR)/benchzstd.c $(PRGDIR)/datagen.c paramgrill.c + $(CC) $(FLAGS) $^ -lm -o $@$(EXT) + +datagen : $(PRGDIR)/datagen.c datagencli.c + $(CC) $(FLAGS) $^ -o $@$(EXT) + +roundTripCrash : $(ZSTD_OBJECTS) roundTripCrash.c + $(CC) $(FLAGS) $(MULTITHREAD) $^ -o $@$(EXT) + +longmatch : $(ZSTD_OBJECTS) longmatch.c + $(CC) $(FLAGS) $^ -o $@$(EXT) + +bigdict: $(ZSTDMT_OBJECTS) $(PRGDIR)/datagen.c bigdict.c + $(CC) $(FLAGS) $(MULTITHREAD) $^ -o $@$(EXT) + +invalidDictionaries : $(ZSTD_OBJECTS) invalidDictionaries.c + $(CC) $(FLAGS) $^ -o $@$(EXT) + +legacy : CPPFLAGS += -I$(ZSTDDIR)/legacy -DZSTD_LEGACY_SUPPORT=4 +legacy : $(ZSTD_FILES) $(wildcard $(ZSTDDIR)/legacy/*.c) legacy.c + $(CC) $(FLAGS) $^ -o $@$(EXT) + +decodecorpus : $(filter-out zstdc_zstd_compress.o, $(ZSTD_OBJECTS)) $(ZDICT_FILES) $(PRGDIR)/util.c $(PRGDIR)/timefn.c decodecorpus.c + $(CC) $(FLAGS) $^ -o $@$(EXT) -lm + +poolTests : $(PRGDIR)/util.c $(PRGDIR)/timefn.c poolTests.c $(ZSTDDIR)/common/pool.c $(ZSTDDIR)/common/threading.c $(ZSTDDIR)/common/zstd_common.c $(ZSTDDIR)/common/error_private.c + $(CC) $(FLAGS) $(MULTITHREAD) $^ -o $@$(EXT) + +.PHONY: versionsTest +versionsTest: clean + $(PYTHON) test-zstd-versions.py + +automated_benchmarking: clean + $(PYTHON) automated_benchmarking.py + +checkTag: checkTag.c $(ZSTDDIR)/zstd.h + $(CC) $(FLAGS) $< -o $@$(EXT) + +clean: + $(MAKE) -C $(ZSTDDIR) clean + $(MAKE) -C $(PRGDIR) clean + @$(RM) -fR $(TESTARTEFACT) + @$(RM) -rf tmp* # some test directories are named tmp* + @$(RM) core *.o *.tmp result* *.gcda dictionary *.zst \ + $(PRGDIR)/zstd$(EXT) $(PRGDIR)/zstd32$(EXT) \ + fullbench$(EXT) fullbench32$(EXT) \ + fullbench-lib$(EXT) fullbench-dll$(EXT) \ + fuzzer$(EXT) fuzzer32$(EXT) zbufftest$(EXT) zbufftest32$(EXT) \ + fuzzer-dll$(EXT) zstreamtest-dll$(EXT) zbufftest-dll$(EXT) \ + zstreamtest$(EXT) zstreamtest32$(EXT) \ + datagen$(EXT) paramgrill$(EXT) roundTripCrash$(EXT) longmatch$(EXT) \ + symbols$(EXT) invalidDictionaries$(EXT) legacy$(EXT) poolTests$(EXT) \ + decodecorpus$(EXT) checkTag$(EXT) bigdict$(EXT) + @echo Cleaning completed + + +#---------------------------------------------------------------------------------- +# valgrind tests are validated only for some posix platforms +#---------------------------------------------------------------------------------- +ifneq (,$(filter $(shell uname),Linux Darwin GNU/kFreeBSD GNU OpenBSD FreeBSD NetBSD DragonFly SunOS)) +HOST_OS = POSIX + +valgrindTest: VALGRIND = valgrind --leak-check=full --show-leak-kinds=all --error-exitcode=1 +valgrindTest: zstd datagen fuzzer fullbench + @echo "\n ---- valgrind tests : memory analyzer ----" + $(VALGRIND) ./datagen -g50M > $(VOID) + $(VALGRIND) $(PRGDIR)/zstd ; if [ $$? -eq 0 ] ; then echo "zstd without argument should have failed"; false; fi + ./datagen -g80 | $(VALGRIND) $(PRGDIR)/zstd - -c > $(VOID) + ./datagen -g16KB | $(VALGRIND) $(PRGDIR)/zstd -vf - -c > $(VOID) + ./datagen -g2930KB | $(VALGRIND) $(PRGDIR)/zstd -5 -vf - -o tmp + $(VALGRIND) $(PRGDIR)/zstd -vdf tmp -c > $(VOID) + ./datagen -g64MB | $(VALGRIND) $(PRGDIR)/zstd -vf - -c > $(VOID) + @rm tmp + $(VALGRIND) ./fuzzer -T1mn -t1 + $(VALGRIND) ./fullbench -i1 + +endif + + +ifneq (,$(filter MINGW% MSYS%,$(shell uname))) +HOST_OS = MSYS +endif + + +#----------------------------------------------------------------------------- +# make tests validated only for below targets +#----------------------------------------------------------------------------- +ifneq (,$(filter $(HOST_OS),MSYS POSIX)) + +DIFF:=diff +ifneq (,$(filter $(shell uname),SunOS)) +DIFF:=gdiff +endif + +.PHONY: list +list: + @$(MAKE) -pRrq -f $(lastword $(MAKEFILE_LIST)) : 2>/dev/null | awk -v RS= -F: '/^# File/,/^# Finished Make data base/ {if ($$1 !~ "^[#.]") {print $$1}}' | sort | egrep -v -e '^[^[:alnum:]]' -e '^$@$$' | xargs + +.PHONY: shortest +shortest: ZSTDRTTEST= +shortest: test-zstd + +.PHONY: fuzztest +fuzztest: test-fuzzer test-zstream test-decodecorpus + +.PHONY: test +test: test-zstd test-fullbench test-fuzzer test-zstream test-invalidDictionaries test-legacy test-decodecorpus +ifeq ($(QEMU_SYS),) +test: test-pool +endif + +test32: test-zstd32 test-fullbench32 test-fuzzer32 test-zstream32 + +test-all: test test32 valgrindTest test-decodecorpus-cli + + +.PHONY: test-zstd test-zstd32 test-zstd-nolegacy test-zstdgrep +test-zstd: ZSTD = $(PRGDIR)/zstd +test-zstd: zstd + +test-zstd32: ZSTD = $(PRGDIR)/zstd32 +test-zstd32: zstd32 + +test-zstd-nolegacy: ZSTD = $(PRGDIR)/zstd-nolegacy +test-zstd-nolegacy: zstd-nolegacy + +test-zstd test-zstd32 test-zstd-nolegacy: datagen + file $(ZSTD) + EXE_PREFIX="$(QEMU_SYS)" ZSTD_BIN="$(ZSTD)" DATAGEN_BIN=./datagen ./playTests.sh $(ZSTDRTTEST) + + +test-gzstd: gzstd + $(PRGDIR)/zstd -f README.md test-zstd-speed.py + gzip -f README.md test-zstd-speed.py + cat README.md.zst test-zstd-speed.py.gz >zstd_gz.zst + cat README.md.gz test-zstd-speed.py.zst >gz_zstd.gz + $(PRGDIR)/zstd -df README.md.gz -o README2.md + $(PRGDIR)/zstd -df README.md.gz test-zstd-speed.py.gz + $(PRGDIR)/zstd -df zstd_gz.zst gz_zstd.gz + $(DIFF) -q zstd_gz gz_zstd + echo Hello World ZSTD | $(PRGDIR)/zstd -c - >hello.zst + echo Hello World GZIP | gzip -c - >hello.gz + echo Hello World TEXT >hello.txt + cat hello.zst hello.gz hello.txt >hello_zst_gz_txt.gz + $(PRGDIR)/zstd -dcf hello.* + $(PRGDIR)/zstd -dcf - <hello_zst_gz_txt.gz + $(RM) *.gz *.zst README2.md gz_zstd zstd_gz hello.txt + +test-zstdgrep: gzstd + -[ -f /tmp/zstdcat ] || ln -s $(PWD)/$(PRGDIR)/zstd /tmp/zstdcat + echo a | $(PRGDIR)/zstd | env ZCAT=/tmp/zstdcat $(PRGDIR)/zstdgrep a + echo a | $(PRGDIR)/zstd | env ZCAT=/tmp/zstdcat $(PRGDIR)/zstdgrep b && return 1 || return 0 + -echo 'hello world' > test.txt && $(PRGDIR)/zstd test.txt + env ZCAT=/tmp/zstdcat $(PRGDIR)/zstdgrep hello test.txt.zst + env ZCAT=/tmp/zstdcat $(PRGDIR)/zstdgrep weird test.txt.zst && return 1 || return 0 + -echo 'hello' > pattern.txt + env ZCAT=/tmp/zstdcat $(PRGDIR)/zstdgrep -f pattern.txt test.txt.zst + $(RM) test.txt test.txt.zst pattern.txt + +test-fullbench: fullbench datagen + $(QEMU_SYS) ./fullbench -i1 + $(QEMU_SYS) ./fullbench -i1 -P0 + +test-fullbench32: fullbench32 datagen + $(QEMU_SYS) ./fullbench32 -i1 + $(QEMU_SYS) ./fullbench32 -i1 -P0 + +test-fuzzer: fuzzer + $(QEMU_SYS) ./fuzzer -v $(FUZZERTEST) $(FUZZER_FLAGS) + +test-fuzzer-stackmode: MOREFLAGS += -DZSTD_HEAPMODE=0 +test-fuzzer-stackmode: test-fuzzer + +test-fuzzer32: fuzzer32 + $(QEMU_SYS) ./fuzzer32 -v $(FUZZERTEST) $(FUZZER_FLAGS) + +test-zbuff: zbufftest + $(QEMU_SYS) ./zbufftest $(ZSTREAM_TESTTIME) + +test-zbuff32: zbufftest32 + $(QEMU_SYS) ./zbufftest32 $(ZSTREAM_TESTTIME) + +test-zstream: zstreamtest + $(QEMU_SYS) ./zstreamtest -v $(ZSTREAM_TESTTIME) $(FUZZER_FLAGS) + $(QEMU_SYS) ./zstreamtest --mt -t1 $(ZSTREAM_TESTTIME) $(FUZZER_FLAGS) + $(QEMU_SYS) ./zstreamtest --newapi -t1 $(ZSTREAM_TESTTIME) $(FUZZER_FLAGS) + +test-zstream32: zstreamtest32 + $(QEMU_SYS) ./zstreamtest32 $(ZSTREAM_TESTTIME) $(FUZZER_FLAGS) + +test-longmatch: longmatch + $(QEMU_SYS) ./longmatch + +test-bigdict: bigdict + $(QEMU_SYS) ./bigdict + +test-invalidDictionaries: invalidDictionaries + $(QEMU_SYS) ./invalidDictionaries + +test-legacy: legacy + $(QEMU_SYS) ./legacy + +test-decodecorpus: decodecorpus + $(QEMU_SYS) ./decodecorpus -t $(DECODECORPUS_TESTTIME) + +test-decodecorpus-cli: decodecorpus + @echo "\n ---- decodecorpus basic cli tests ----" + @mkdir testdir + ./decodecorpus -n5 -otestdir -ptestdir + @cd testdir && \ + $(ZSTD) -d z000000.zst -o tmp0 && \ + $(ZSTD) -d z000001.zst -o tmp1 && \ + $(ZSTD) -d z000002.zst -o tmp2 && \ + $(ZSTD) -d z000003.zst -o tmp3 && \ + $(ZSTD) -d z000004.zst -o tmp4 && \ + diff z000000 tmp0 && \ + diff z000001 tmp1 && \ + diff z000002 tmp2 && \ + diff z000003 tmp3 && \ + diff z000004 tmp4 && \ + rm ./* && \ + cd .. + @echo "\n ---- decodecorpus dictionary cli tests ----" + ./decodecorpus -n5 -otestdir -ptestdir --use-dict=1MB + @cd testdir && \ + $(ZSTD) -d z000000.zst -D dictionary -o tmp0 && \ + $(ZSTD) -d z000001.zst -D dictionary -o tmp1 && \ + $(ZSTD) -d z000002.zst -D dictionary -o tmp2 && \ + $(ZSTD) -d z000003.zst -D dictionary -o tmp3 && \ + $(ZSTD) -d z000004.zst -D dictionary -o tmp4 && \ + diff z000000 tmp0 && \ + diff z000001 tmp1 && \ + diff z000002 tmp2 && \ + diff z000003 tmp3 && \ + diff z000004 tmp4 && \ + cd .. + @rm -rf testdir + +test-pool: poolTests + $(QEMU_SYS) ./poolTests + +test-lz4: ZSTD = LD_LIBRARY_PATH=/usr/local/lib $(PRGDIR)/zstd +test-lz4: ZSTD_LZ4 = LD_LIBRARY_PATH=/usr/local/lib ./lz4 +test-lz4: ZSTD_UNLZ4 = LD_LIBRARY_PATH=/usr/local/lib ./unlz4 +test-lz4: zstd decodecorpus datagen + [ -f lz4 ] || ln -s $(PRGDIR)/zstd lz4 + [ -f unlz4 ] || ln -s $(PRGDIR)/zstd unlz4 + + ./decodecorpus -ptmp + # lz4 -> zstd + lz4 < tmp | \ + $(ZSTD) -d | \ + cmp - tmp + lz4 < tmp | \ + $(ZSTD_UNLZ4) | \ + cmp - tmp + # zstd -> lz4 + $(ZSTD) --format=lz4 < tmp | \ + lz4 -d | \ + cmp - tmp + $(ZSTD_LZ4) < tmp | \ + lz4 -d | \ + cmp - tmp + # zstd -> zstd + $(ZSTD) --format=lz4 < tmp | \ + $(ZSTD) -d | \ + cmp - tmp + # zstd -> zstd + $(ZSTD) < tmp | \ + $(ZSTD) -d | \ + cmp - tmp + + ./datagen -g384KB | $(ZSTD) --format=lz4 | $(ZSTD) -d > /dev/null + + rm tmp lz4 unlz4 + +endif diff --git a/src/zstd/tests/README.md b/src/zstd/tests/README.md new file mode 100644 index 000000000..23e00767c --- /dev/null +++ b/src/zstd/tests/README.md @@ -0,0 +1,185 @@ +Programs and scripts for automated testing of Zstandard +======================================================= + +This directory contains the following programs and scripts: +- `datagen` : Synthetic and parametrable data generator, for tests +- `fullbench` : Precisely measure speed for each zstd inner functions +- `fuzzer` : Test tool, to check zstd integrity on target platform +- `paramgrill` : parameter tester for zstd +- `test-zstd-speed.py` : script for testing zstd speed difference between commits +- `test-zstd-versions.py` : compatibility test between zstd versions stored on Github (v0.1+) +- `zbufftest` : Test tool to check ZBUFF (a buffered streaming API) integrity +- `zstreamtest` : Fuzzer test tool for zstd streaming API +- `legacy` : Test tool to test decoding of legacy zstd frames +- `decodecorpus` : Tool to generate valid Zstandard frames, for verifying decoder implementations + + +#### `test-zstd-versions.py` - script for testing zstd interoperability between versions + +This script creates `versionsTest` directory to which zstd repository is cloned. +Then all tagged (released) versions of zstd are compiled. +In the following step interoperability between zstd versions is checked. + +#### `automated-benchmarking.py` - script for benchmarking zstd prs to dev + +This script benchmarks facebook:dev and changes from pull requests made to zstd and compares +them against facebook:dev to detect regressions. This script currently runs on a dedicated +desktop machine for every pull request that is made to the zstd repo but can also +be run on any machine via the command line interface. + +There are three modes of usage for this script: fastmode will just run a minimal single +build comparison (between facebook:dev and facebook:master), onetime will pull all the current +pull requests from the zstd repo and compare facebook:dev to all of them once, continuous +will continuously get pull requests from the zstd repo and run benchmarks against facebook:dev. + +``` +Example usage: python automated_benchmarking.py +``` + +``` +usage: automated_benchmarking.py [-h] [--directory DIRECTORY] + [--levels LEVELS] [--iterations ITERATIONS] + [--emails EMAILS] [--frequency FREQUENCY] + [--mode MODE] [--dict DICT] + +optional arguments: + -h, --help show this help message and exit + --directory DIRECTORY + directory with files to benchmark + --levels LEVELS levels to test eg ('1,2,3') + --iterations ITERATIONS + number of benchmark iterations to run + --emails EMAILS email addresses of people who will be alerted upon + regression. Only for continuous mode + --frequency FREQUENCY + specifies the number of seconds to wait before each + successive check for new PRs in continuous mode + --mode MODE 'fastmode', 'onetime', 'current', or 'continuous' (see + README.md for details) + --dict DICT filename of dictionary to use (when set, this + dictioanry will be used to compress the files provided + inside --directory) +``` + +#### `test-zstd-speed.py` - script for testing zstd speed difference between commits + +DEPRECATED + +This script creates `speedTest` directory to which zstd repository is cloned. +Then it compiles all branches of zstd and performs a speed benchmark for a given list of files (the `testFileNames` parameter). +After `sleepTime` (an optional parameter, default 300 seconds) seconds the script checks repository for new commits. +If a new commit is found it is compiled and a speed benchmark for this commit is performed. +The results of the speed benchmark are compared to the previous results. +If compression or decompression speed for one of zstd levels is lower than `lowerLimit` (an optional parameter, default 0.98) the speed benchmark is restarted. +If second results are also lower than `lowerLimit` the warning e-mail is send to recipients from the list (the `emails` parameter). + +Additional remarks: +- To be sure that speed results are accurate the script should be run on a "stable" target system with no other jobs running in parallel +- Using the script with virtual machines can lead to large variations of speed results +- The speed benchmark is not performed until computers' load average is lower than `maxLoadAvg` (an optional parameter, default 0.75) +- The script sends e-mails using `mutt`; if `mutt` is not available it sends e-mails without attachments using `mail`; if both are not available it only prints a warning + + +The example usage with two test files, one e-mail address, and with an additional message: +``` +./test-zstd-speed.py "silesia.tar calgary.tar" "email@gmail.com" --message "tested on my laptop" --sleepTime 60 +``` + +To run the script in background please use: +``` +nohup ./test-zstd-speed.py testFileNames emails & +``` + +The full list of parameters: +``` +positional arguments: + testFileNames file names list for speed benchmark + emails list of e-mail addresses to send warnings + +optional arguments: + -h, --help show this help message and exit + --message MESSAGE attach an additional message to e-mail + --lowerLimit LOWERLIMIT + send email if speed is lower than given limit + --maxLoadAvg MAXLOADAVG + maximum load average to start testing + --lastCLevel LASTCLEVEL + last compression level for testing + --sleepTime SLEEPTIME + frequency of repository checking in seconds +``` + +#### `decodecorpus` - tool to generate Zstandard frames for decoder testing +Command line tool to generate test .zst files. + +This tool will generate .zst files with checksums, +as well as optionally output the corresponding correct uncompressed data for +extra verification. + +Example: +``` +./decodecorpus -ptestfiles -otestfiles -n10000 -s5 +``` +will generate 10,000 sample .zst files using a seed of 5 in the `testfiles` directory, +with the zstd checksum field set, +as well as the 10,000 original files for more detailed comparison of decompression results. + +``` +./decodecorpus -t -T1mn +``` +will choose a random seed, and for 1 minute, +generate random test frames and ensure that the +zstd library correctly decompresses them in both simple and streaming modes. + +#### `paramgrill` - tool for generating compression table parameters and optimizing parameters on file given constraints + +Full list of arguments +``` + -T# : set level 1 speed objective + -B# : cut input into blocks of size # (default : single block) + -S : benchmarks a single run (example command: -Sl3w10h12) + w# - windowLog + h# - hashLog + c# - chainLog + s# - searchLog + l# - minMatch + t# - targetLength + S# - strategy + L# - level + --zstd= : Single run, parameter selection syntax same as zstdcli with more parameters + (Added forceAttachDictionary / fadt) + When invoked with --optimize, this represents the sample to exceed. + --optimize= : find parameters to maximize compression ratio given parameters + Can use all --zstd= commands to constrain the type of solution found in addition to the following constraints + cSpeed= : Minimum compression speed + dSpeed= : Minimum decompression speed + cMem= : Maximum compression memory + lvl= : Searches for solutions which are strictly better than that compression lvl in ratio and cSpeed, + stc= : When invoked with lvl=, represents percentage slack in ratio/cSpeed allowed for a solution to be considered (Default 100%) + : In normal operation, represents percentage slack in choosing viable starting strategy selection in choosing the default parameters + (Lower value will begin with stronger strategies) (Default 90%) + speedRatio= (accepts decimals) + : determines value of gains in speed vs gains in ratio + when determining overall winner (default 5 (1% ratio = 5% speed)). + tries= : Maximum number of random restarts on a single strategy before switching (Default 5) + Higher values will make optimizer run longer, more chances to find better solution. + memLog : Limits the log of the size of each memotable (1 per strategy). Will use hash tables when state space is larger than max size. + Setting memLog = 0 turns off memoization + --display= : specify which parameters are included in the output + can use all --zstd parameter names and 'cParams' as a shorthand for all parameters used in ZSTD_compressionParameters + (Default: display all params available) + -P# : generated sample compressibility (when no file is provided) + -t# : Caps runtime of operation in seconds (default : 99999 seconds (about 27 hours )) + -v : Prints Benchmarking output + -D : Next argument dictionary file + -s : Benchmark all files separately + -q : Quiet, repeat for more quiet + -q Prints parameters + results whenever a new best is found + -qq Only prints parameters whenever a new best is found, prints final parameters + results + -qqq Only print final parameters + results + -qqqq Only prints final parameter set in the form --zstd= + -v : Verbose, cancels quiet, repeat for more volume + -v Prints all candidate parameters and results + +``` + Any inputs afterwards are treated as files to benchmark. diff --git a/src/zstd/tests/automated_benchmarking.py b/src/zstd/tests/automated_benchmarking.py new file mode 100644 index 000000000..d0cfb1fbe --- /dev/null +++ b/src/zstd/tests/automated_benchmarking.py @@ -0,0 +1,326 @@ +# ################################################################ +# Copyright (c) 2020-2020, Facebook, Inc. +# All rights reserved. +# +# This source code is licensed under both the BSD-style license (found in the +# LICENSE file in the root directory of this source tree) and the GPLv2 (found +# in the COPYING file in the root directory of this source tree). +# You may select, at your option, one of the above-listed licenses. +# ########################################################################## + +import argparse +import glob +import json +import os +import time +import pickle as pk +import subprocess +import urllib.request + + +GITHUB_API_PR_URL = "https://api.github.com/repos/facebook/zstd/pulls?state=open" +GITHUB_URL_TEMPLATE = "https://github.com/{}/zstd" +MASTER_BUILD = {"user": "facebook", "branch": "dev", "hash": None} + +# check to see if there are any new PRs every minute +DEFAULT_MAX_API_CALL_FREQUENCY_SEC = 60 +PREVIOUS_PRS_FILENAME = "prev_prs.pk" + +# Not sure what the threshold for triggering alarms should be +# 1% regression sounds like a little too sensitive but the desktop +# that I'm running it on is pretty stable so I think this is fine +CSPEED_REGRESSION_TOLERANCE = 0.01 +DSPEED_REGRESSION_TOLERANCE = 0.01 + + +def get_new_open_pr_builds(prev_state=True): + prev_prs = None + if os.path.exists(PREVIOUS_PRS_FILENAME): + with open(PREVIOUS_PRS_FILENAME, "rb") as f: + prev_prs = pk.load(f) + data = json.loads(urllib.request.urlopen(GITHUB_API_PR_URL).read().decode("utf-8")) + prs = { + d["url"]: { + "user": d["user"]["login"], + "branch": d["head"]["ref"], + "hash": d["head"]["sha"].strip(), + } + for d in data + } + with open(PREVIOUS_PRS_FILENAME, "wb") as f: + pk.dump(prs, f) + if not prev_state or prev_prs == None: + return list(prs.values()) + return [pr for url, pr in prs.items() if url not in prev_prs or prev_prs[url] != pr] + + +def get_latest_hashes(): + tmp = subprocess.run(["git", "log", "-1"], stdout=subprocess.PIPE).stdout.decode( + "utf-8" + ) + sha1 = tmp.split("\n")[0].split(" ")[1] + tmp = subprocess.run( + ["git", "show", "{}^1".format(sha1)], stdout=subprocess.PIPE + ).stdout.decode("utf-8") + sha2 = tmp.split("\n")[0].split(" ")[1] + tmp = subprocess.run( + ["git", "show", "{}^2".format(sha1)], stdout=subprocess.PIPE + ).stdout.decode("utf-8") + sha3 = "" if len(tmp) == 0 else tmp.split("\n")[0].split(" ")[1] + return [sha1.strip(), sha2.strip(), sha3.strip()] + + +def get_builds_for_latest_hash(): + hashes = get_latest_hashes() + for b in get_new_open_pr_builds(False): + if b["hash"] in hashes: + return [b] + return [] + + +def clone_and_build(build): + if build["user"] != None: + github_url = GITHUB_URL_TEMPLATE.format(build["user"]) + os.system( + """ + rm -rf zstd-{user}-{sha} && + git clone {github_url} zstd-{user}-{sha} && + cd zstd-{user}-{sha} && + {checkout_command} + make && + cd ../ + """.format( + user=build["user"], + github_url=github_url, + sha=build["hash"], + checkout_command="git checkout {} &&".format(build["hash"]) + if build["hash"] != None + else "", + ) + ) + return "zstd-{user}-{sha}/zstd".format(user=build["user"], sha=build["hash"]) + else: + os.system("cd ../ && make && cd tests") + return "../zstd" + + +def parse_benchmark_output(output): + idx = [i for i, d in enumerate(output) if d == "MB/s"] + return [float(output[idx[0] - 1]), float(output[idx[1] - 1])] + + +def benchmark_single(executable, level, filename): + return parse_benchmark_output(( + subprocess.run( + [executable, "-qb{}".format(level), filename], stderr=subprocess.PIPE + ) + .stderr.decode("utf-8") + .split(" ") + )) + + +def benchmark_n(executable, level, filename, n): + speeds_arr = [benchmark_single(executable, level, filename) for _ in range(n)] + cspeed, dspeed = max(b[0] for b in speeds_arr), max(b[1] for b in speeds_arr) + print( + "Bench (executable={} level={} filename={}, iterations={}):\n\t[cspeed: {} MB/s, dspeed: {} MB/s]".format( + os.path.basename(executable), + level, + os.path.basename(filename), + n, + cspeed, + dspeed, + ) + ) + return (cspeed, dspeed) + + +def benchmark(build, filenames, levels, iterations): + executable = clone_and_build(build) + return [ + [benchmark_n(executable, l, f, iterations) for f in filenames] for l in levels + ] + + +def benchmark_dictionary_single(executable, filenames_directory, dictionary_filename, level, iterations): + cspeeds, dspeeds = [], [] + for _ in range(iterations): + output = subprocess.run([executable, "-qb{}".format(level), "-D", dictionary_filename, "-r", filenames_directory], stderr=subprocess.PIPE).stderr.decode("utf-8").split(" ") + cspeed, dspeed = parse_benchmark_output(output) + cspeeds.append(cspeed) + dspeeds.append(dspeed) + max_cspeed, max_dspeed = max(cspeeds), max(dspeeds) + print( + "Bench (executable={} level={} filenames_directory={}, dictionary_filename={}, iterations={}):\n\t[cspeed: {} MB/s, dspeed: {} MB/s]".format( + os.path.basename(executable), + level, + os.path.basename(filenames_directory), + os.path.basename(dictionary_filename), + iterations, + max_cspeed, + max_dspeed, + ) + ) + return (max_cspeed, max_dspeed) + + +def benchmark_dictionary(build, filenames_directory, dictionary_filename, levels, iterations): + executable = clone_and_build(build) + return [benchmark_dictionary_single(executable, filenames_directory, dictionary_filename, l, iterations) for l in levels] + + +def parse_regressions_and_labels(old_cspeed, new_cspeed, old_dspeed, new_dspeed, baseline_build, test_build): + cspeed_reg = (old_cspeed - new_cspeed) / old_cspeed + dspeed_reg = (old_dspeed - new_dspeed) / old_dspeed + baseline_label = "{}:{} ({})".format( + baseline_build["user"], baseline_build["branch"], baseline_build["hash"] + ) + test_label = "{}:{} ({})".format( + test_build["user"], test_build["branch"], test_build["hash"] + ) + return cspeed_reg, dspeed_reg, baseline_label, test_label + + +def get_regressions(baseline_build, test_build, iterations, filenames, levels): + old = benchmark(baseline_build, filenames, levels, iterations) + new = benchmark(test_build, filenames, levels, iterations) + regressions = [] + for j, level in enumerate(levels): + for k, filename in enumerate(filenames): + old_cspeed, old_dspeed = old[j][k] + new_cspeed, new_dspeed = new[j][k] + cspeed_reg, dspeed_reg, baseline_label, test_label = parse_regressions_and_labels( + old_cspeed, new_cspeed, old_dspeed, new_dspeed, baseline_build, test_build + ) + if cspeed_reg > CSPEED_REGRESSION_TOLERANCE: + regressions.append( + "[COMPRESSION REGRESSION] (level={} filename={})\n\t{} -> {}\n\t{} -> {} ({:0.2f}%)".format( + level, + filename, + baseline_label, + test_label, + old_cspeed, + new_cspeed, + cspeed_reg * 100.0, + ) + ) + if dspeed_reg > DSPEED_REGRESSION_TOLERANCE: + regressions.append( + "[DECOMPRESSION REGRESSION] (level={} filename={})\n\t{} -> {}\n\t{} -> {} ({:0.2f}%)".format( + level, + filename, + baseline_label, + test_label, + old_dspeed, + new_dspeed, + dspeed_reg * 100.0, + ) + ) + return regressions + +def get_regressions_dictionary(baseline_build, test_build, filenames_directory, dictionary_filename, levels, iterations): + old = benchmark_dictionary(baseline_build, filenames_directory, dictionary_filename, levels, iterations) + new = benchmark_dictionary(test_build, filenames_directory, dictionary_filename, levels, iterations) + regressions = [] + for j, level in enumerate(levels): + old_cspeed, old_dspeed = old[j] + new_cspeed, new_dspeed = new[j] + cspeed_reg, dspeed_reg, baesline_label, test_label = parse_regressions_and_labels( + old_cspeed, new_cspeed, old_dspeed, new_dspeed, baseline_build, test_build + ) + if cspeed_reg > CSPEED_REGRESSION_TOLERANCE: + regressions.append( + "[COMPRESSION REGRESSION] (level={} filenames_directory={} dictionary_filename={})\n\t{} -> {}\n\t{} -> {} ({:0.2f}%)".format( + level, + filenames_directory, + dictionary_filename, + baseline_label, + test_label, + old_cspeed, + new_cspeed, + cspeed_reg * 100.0, + ) + ) + if dspeed_reg > DSPEED_REGRESSION_TOLERANCE: + regressions.append( + "[DECOMPRESSION REGRESSION] (level={} filenames_directory={} dictionary_filename={})\n\t{} -> {}\n\t{} -> {} ({:0.2f}%)".format( + level, + filenames_directory, + dictionary_filename, + baseline_label, + test_label, + old_dspeed, + new_dspeed, + dspeed_reg * 100.0, + ) + ) + return regressions + + +def main(filenames, levels, iterations, builds=None, emails=None, continuous=False, frequency=DEFAULT_MAX_API_CALL_FREQUENCY_SEC, dictionary_filename=None): + if builds == None: + builds = get_new_open_pr_builds() + while True: + for test_build in builds: + if dictionary_filename == None: + regressions = get_regressions( + MASTER_BUILD, test_build, iterations, filenames, levels + ) + else: + regressions = get_regressions_dictionary( + MASTER_BUILD, test_build, filenames, dictionary_filename, levels, iterations + ) + body = "\n".join(regressions) + if len(regressions) > 0: + if emails != None: + os.system( + """ + echo "{}" | mutt -s "[zstd regression] caused by new pr" {} + """.format( + body, emails + ) + ) + print("Emails sent to {}".format(emails)) + print(body) + if not continuous: + break + time.sleep(frequency) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + + parser.add_argument("--directory", help="directory with files to benchmark", default="golden-compression") + parser.add_argument("--levels", help="levels to test eg ('1,2,3')", default="1") + parser.add_argument("--iterations", help="number of benchmark iterations to run", default="1") + parser.add_argument("--emails", help="email addresses of people who will be alerted upon regression. Only for continuous mode", default=None) + parser.add_argument("--frequency", help="specifies the number of seconds to wait before each successive check for new PRs in continuous mode", default=DEFAULT_MAX_API_CALL_FREQUENCY_SEC) + parser.add_argument("--mode", help="'fastmode', 'onetime', 'current', or 'continuous' (see README.md for details)", default="current") + parser.add_argument("--dict", help="filename of dictionary to use (when set, this dictioanry will be used to compress the files provided inside --directory)", default=None) + + args = parser.parse_args() + filenames = args.directory + levels = [int(l) for l in args.levels.split(",")] + mode = args.mode + iterations = int(args.iterations) + emails = args.emails + frequency = int(args.frequency) + dictionary_filename = args.dict + + if dictionary_filename == None: + filenames = glob.glob("{}/**".format(filenames)) + + if (len(filenames) == 0): + print("0 files found") + quit() + + if mode == "onetime": + main(filenames, levels, iterations, frequency=frequenc, dictionary_filename=dictionary_filename) + elif mode == "current": + builds = [{"user": None, "branch": "None", "hash": None}] + main(filenames, levels, iterations, builds, frequency=frequency, dictionary_filename=dictionary_filename) + elif mode == "fastmode": + builds = [{"user": "facebook", "branch": "master", "hash": None}] + main(filenames, levels, iterations, builds, frequency=frequency, dictionary_filename=dictionary_filename) + else: + main(filenames, levels, iterations, None, emails, True, frequency=frequency, dictionary_filename=dictionary_filename) diff --git a/src/zstd/tests/bigdict.c b/src/zstd/tests/bigdict.c new file mode 100644 index 000000000..aeda56cb5 --- /dev/null +++ b/src/zstd/tests/bigdict.c @@ -0,0 +1,128 @@ +/* + * Copyright (c) 2017-2020, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#include <assert.h> +#include <stdio.h> +#include <stddef.h> +#include <stdlib.h> +#include <stdint.h> +#include "datagen.h" +#include "mem.h" +#define ZSTD_STATIC_LINKING_ONLY +#include "zstd.h" + +static int +compress(ZSTD_CCtx* cctx, ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + void const* src, size_t srcSize, + void* roundtrip, ZSTD_EndDirective end) +{ + ZSTD_inBuffer in = {src, srcSize, 0}; + ZSTD_outBuffer out = {dst, dstCapacity, 0}; + int ended = 0; + + while (!ended && (in.pos < in.size || out.pos > 0)) { + size_t rc; + out.pos = 0; + rc = ZSTD_compressStream2(cctx, &out, &in, end); + if (ZSTD_isError(rc)) + return 1; + if (end == ZSTD_e_end && rc == 0) + ended = 1; + { + ZSTD_inBuffer rtIn = {dst, out.pos, 0}; + ZSTD_outBuffer rtOut = {roundtrip, srcSize, 0}; + rc = 1; + while (rtIn.pos < rtIn.size || rtOut.pos > 0) { + rtOut.pos = 0; + rc = ZSTD_decompressStream(dctx, &rtOut, &rtIn); + if (ZSTD_isError(rc)) { + fprintf(stderr, "Decompression error: %s\n", ZSTD_getErrorName(rc)); + return 1; + } + if (rc == 0) + break; + } + if (ended && rc != 0) { + fprintf(stderr, "Frame not finished!\n"); + return 1; + } + } + } + + return 0; +} + +int main(int argc, const char** argv) +{ + ZSTD_CCtx* cctx = ZSTD_createCCtx(); + ZSTD_DCtx* dctx = ZSTD_createDCtx(); + const size_t dataSize = (size_t)1 << 30; + const size_t outSize = ZSTD_compressBound(dataSize); + const size_t bufferSize = (size_t)1 << 31; + char* buffer = (char*)malloc(bufferSize); + void* out = malloc(outSize); + void* roundtrip = malloc(dataSize); + (void)argc; + (void)argv; + + if (!buffer || !out || !roundtrip || !cctx || !dctx) { + fprintf(stderr, "Allocation failure\n"); + return 1; + } + + if (ZSTD_isError(ZSTD_CCtx_setParameter(cctx, ZSTD_c_windowLog, 31))) + return 1; + if (ZSTD_isError(ZSTD_CCtx_setParameter(cctx, ZSTD_c_nbWorkers, 1))) + return 1; + if (ZSTD_isError(ZSTD_CCtx_setParameter(cctx, ZSTD_c_overlapLog, 9))) + return 1; + if (ZSTD_isError(ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, 1))) + return 1; + if (ZSTD_isError(ZSTD_CCtx_setParameter(cctx, ZSTD_c_strategy, ZSTD_btopt))) + return 1; + if (ZSTD_isError(ZSTD_CCtx_setParameter(cctx, ZSTD_c_targetLength, 7))) + return 1; + if (ZSTD_isError(ZSTD_CCtx_setParameter(cctx, ZSTD_c_minMatch, 7))) + return 1; + if (ZSTD_isError(ZSTD_CCtx_setParameter(cctx, ZSTD_c_searchLog, 1))) + return 1; + if (ZSTD_isError(ZSTD_CCtx_setParameter(cctx, ZSTD_c_hashLog, 10))) + return 1; + if (ZSTD_isError(ZSTD_CCtx_setParameter(cctx, ZSTD_c_chainLog, 10))) + return 1; + + if (ZSTD_isError(ZSTD_DCtx_setParameter(dctx, ZSTD_d_windowLogMax, 31))) + return 1; + + RDG_genBuffer(buffer, bufferSize, 1.0, 0.0, 0xbeefcafe); + + /* Compress 30 GB */ + { + int i; + for (i = 0; i < 10; ++i) { + fprintf(stderr, "Compressing 1 GB\n"); + if (compress(cctx, dctx, out, outSize, buffer, dataSize, roundtrip, ZSTD_e_continue)) + return 1; + } + } + fprintf(stderr, "Compressing 1 GB\n"); + if (compress(cctx, dctx, out, outSize, buffer, dataSize, roundtrip, ZSTD_e_end)) + return 1; + + fprintf(stderr, "Success!\n"); + + free(roundtrip); + free(out); + free(buffer); + ZSTD_freeDCtx(dctx); + ZSTD_freeCCtx(cctx); + return 0; +} diff --git a/src/zstd/tests/checkTag.c b/src/zstd/tests/checkTag.c new file mode 100644 index 000000000..90af24ab1 --- /dev/null +++ b/src/zstd/tests/checkTag.c @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2018-2020, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/* checkTag : validation tool for libzstd + * command : + * $ ./checkTag tag + * checkTag validates tags of following format : v[0-9].[0-9].[0-9]{any} + * The tag is then compared to zstd version number. + * They are compatible if first 3 digits are identical. + * Anything beyond that is free, and doesn't impact validation. + * Example : tag v1.8.1.2 is compatible with version 1.8.1 + * When tag and version are not compatible, program exits with error code 1. + * When they are compatible, it exists with a code 0. + * checkTag is intended to be used in automated testing environment. + */ + +#include <stdio.h> /* printf */ +#include <string.h> /* strlen, strncmp */ +#include "zstd.h" /* ZSTD_VERSION_STRING */ + + +/* validate() : + * @return 1 if tag is compatible, 0 if not. + */ +static int validate(const char* const tag) +{ + size_t const tagLength = strlen(tag); + size_t const verLength = strlen(ZSTD_VERSION_STRING); + + if (tagLength < 2) return 0; + if (tag[0] != 'v') return 0; + if (tagLength <= verLength) return 0; + + if (strncmp(ZSTD_VERSION_STRING, tag+1, verLength)) return 0; + + return 1; +} + +int main(int argc, const char** argv) +{ + const char* const exeName = argv[0]; + const char* const tag = argv[1]; + if (argc!=2) { + printf("incorrect usage : %s tag \n", exeName); + return 2; + } + + printf("Version : %s \n", ZSTD_VERSION_STRING); + printf("Tag : %s \n", tag); + + if (validate(tag)) { + printf("OK : tag is compatible with zstd version \n"); + return 0; + } + + printf("!! error : tag and versions are not compatible !! \n"); + return 1; +} diff --git a/src/zstd/tests/datagencli.c b/src/zstd/tests/datagencli.c new file mode 100644 index 000000000..713ca9963 --- /dev/null +++ b/src/zstd/tests/datagencli.c @@ -0,0 +1,130 @@ +/* + * Copyright (c) 2015-2020, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + +/*-************************************ +* Dependencies +**************************************/ +#include "util.h" /* Compiler options */ +#include <stdio.h> /* fprintf, stderr */ +#include "datagen.h" /* RDG_generate */ + + +/*-************************************ +* Constants +**************************************/ +#define KB *(1 <<10) +#define MB *(1 <<20) +#define GB *(1U<<30) + +#define SIZE_DEFAULT ((64 KB) + 1) +#define SEED_DEFAULT 0 +#define COMPRESSIBILITY_DEFAULT 50 + + +/*-************************************ +* Macros +**************************************/ +#define DISPLAY(...) fprintf(stderr, __VA_ARGS__) +#define DISPLAYLEVEL(l, ...) if (displayLevel>=l) { DISPLAY(__VA_ARGS__); } +static unsigned displayLevel = 2; + + +/*-******************************************************* +* Command line +*********************************************************/ +static int usage(const char* programName) +{ + DISPLAY( "Compressible data generator\n"); + DISPLAY( "Usage :\n"); + DISPLAY( " %s [args]\n", programName); + DISPLAY( "\n"); + DISPLAY( "Arguments :\n"); + DISPLAY( " -g# : generate # data (default:%i)\n", SIZE_DEFAULT); + DISPLAY( " -s# : Select seed (default:%i)\n", SEED_DEFAULT); + DISPLAY( " -P# : Select compressibility in %% (default:%i%%)\n", + COMPRESSIBILITY_DEFAULT); + DISPLAY( " -h : display help and exit\n"); + return 0; +} + + +int main(int argc, const char** argv) +{ + unsigned probaU32 = COMPRESSIBILITY_DEFAULT; + double litProba = 0.0; + U64 size = SIZE_DEFAULT; + U32 seed = SEED_DEFAULT; + const char* const programName = argv[0]; + + int argNb; + for(argNb=1; argNb<argc; argNb++) { + const char* argument = argv[argNb]; + + if(!argument) continue; /* Protection if argument empty */ + + /* Handle commands. Aggregated commands are allowed */ + if (*argument=='-') { + argument++; + while (*argument!=0) { + switch(*argument) + { + case 'h': + return usage(programName); + case 'g': + argument++; + size=0; + while ((*argument>='0') && (*argument<='9')) + size *= 10, size += *argument++ - '0'; + if (*argument=='K') { size <<= 10; argument++; } + if (*argument=='M') { size <<= 20; argument++; } + if (*argument=='G') { size <<= 30; argument++; } + if (*argument=='B') { argument++; } + break; + case 's': + argument++; + seed=0; + while ((*argument>='0') && (*argument<='9')) + seed *= 10, seed += *argument++ - '0'; + break; + case 'P': + argument++; + probaU32 = 0; + while ((*argument>='0') && (*argument<='9')) + probaU32 *= 10, probaU32 += *argument++ - '0'; + if (probaU32>100) probaU32 = 100; + break; + case 'L': /* hidden argument : Literal distribution probability */ + argument++; + litProba=0.; + while ((*argument>='0') && (*argument<='9')) + litProba *= 10, litProba += *argument++ - '0'; + if (litProba>100.) litProba=100.; + litProba /= 100.; + break; + case 'v': + displayLevel = 4; + argument++; + break; + default: + return usage(programName); + } + } } } /* for(argNb=1; argNb<argc; argNb++) */ + + DISPLAYLEVEL(4, "Compressible data Generator \n"); + if (probaU32!=COMPRESSIBILITY_DEFAULT) + DISPLAYLEVEL(3, "Compressibility : %i%%\n", probaU32); + DISPLAYLEVEL(3, "Seed = %u \n", (unsigned)seed); + + RDG_genStdout(size, (double)probaU32/100, litProba, seed); + DISPLAYLEVEL(1, "\n"); + + return 0; +} diff --git a/src/zstd/tests/decodecorpus.c b/src/zstd/tests/decodecorpus.c new file mode 100644 index 000000000..a46fc24d8 --- /dev/null +++ b/src/zstd/tests/decodecorpus.c @@ -0,0 +1,1932 @@ +/* + * Copyright (c) 2017-2020, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#include <limits.h> +#include <math.h> +#include <stddef.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "util.h" +#include "timefn.h" /* UTIL_clockSpanMicro, SEC_TO_MICRO, UTIL_TIME_INITIALIZER */ +#include "zstd.h" +#include "zstd_internal.h" +#include "mem.h" +#define ZDICT_STATIC_LINKING_ONLY +#include "zdict.h" + +/* Direct access to internal compression functions is required */ +#include "zstd_compress.c" + +#define XXH_STATIC_LINKING_ONLY +#include "xxhash.h" /* XXH64 */ + +#ifndef MIN + #define MIN(a, b) ((a) < (b) ? (a) : (b)) +#endif + +#ifndef MAX_PATH + #ifdef PATH_MAX + #define MAX_PATH PATH_MAX + #else + #define MAX_PATH 256 + #endif +#endif + +/*-************************************ +* DISPLAY Macros +**************************************/ +#define DISPLAY(...) fprintf(stderr, __VA_ARGS__) +#define DISPLAYLEVEL(l, ...) if (g_displayLevel>=l) { DISPLAY(__VA_ARGS__); } +static U32 g_displayLevel = 2; + +#define DISPLAYUPDATE(...) \ + do { \ + if ((UTIL_clockSpanMicro(g_displayClock) > g_refreshRate) || \ + (g_displayLevel >= 4)) { \ + g_displayClock = UTIL_getTime(); \ + DISPLAY(__VA_ARGS__); \ + if (g_displayLevel >= 4) fflush(stderr); \ + } \ + } while (0) + +static const U64 g_refreshRate = SEC_TO_MICRO / 6; +static UTIL_time_t g_displayClock = UTIL_TIME_INITIALIZER; + +#define CHECKERR(code) \ + do { \ + if (ZSTD_isError(code)) { \ + DISPLAY("Error occurred while generating data: %s\n", \ + ZSTD_getErrorName(code)); \ + exit(1); \ + } \ + } while (0) + +/*-******************************************************* +* Random function +*********************************************************/ +static U32 RAND(U32* src) +{ +#define RAND_rotl32(x,r) ((x << r) | (x >> (32 - r))) + static const U32 prime1 = 2654435761U; + static const U32 prime2 = 2246822519U; + U32 rand32 = *src; + rand32 *= prime1; + rand32 += prime2; + rand32 = RAND_rotl32(rand32, 13); + *src = rand32; + return RAND_rotl32(rand32, 27); +#undef RAND_rotl32 +} + +#define DISTSIZE (8192) + +/* Write `size` bytes into `ptr`, all of which are less than or equal to `maxSymb` */ +static void RAND_bufferMaxSymb(U32* seed, void* ptr, size_t size, int maxSymb) +{ + size_t i; + BYTE* op = ptr; + + for (i = 0; i < size; i++) { + op[i] = (BYTE) (RAND(seed) % (maxSymb + 1)); + } +} + +/* Write `size` random bytes into `ptr` */ +static void RAND_buffer(U32* seed, void* ptr, size_t size) +{ + size_t i; + BYTE* op = ptr; + + for (i = 0; i + 4 <= size; i += 4) { + MEM_writeLE32(op + i, RAND(seed)); + } + for (; i < size; i++) { + op[i] = RAND(seed) & 0xff; + } +} + +/* Write `size` bytes into `ptr` following the distribution `dist` */ +static void RAND_bufferDist(U32* seed, BYTE* dist, void* ptr, size_t size) +{ + size_t i; + BYTE* op = ptr; + + for (i = 0; i < size; i++) { + op[i] = dist[RAND(seed) % DISTSIZE]; + } +} + +/* Generate a random distribution where the frequency of each symbol follows a + * geometric distribution defined by `weight` + * `dist` should have size at least `DISTSIZE` */ +static void RAND_genDist(U32* seed, BYTE* dist, double weight) +{ + size_t i = 0; + size_t statesLeft = DISTSIZE; + BYTE symb = (BYTE) (RAND(seed) % 256); + BYTE step = (BYTE) ((RAND(seed) % 256) | 1); /* force it to be odd so it's relatively prime to 256 */ + + while (i < DISTSIZE) { + size_t states = ((size_t)(weight * statesLeft)) + 1; + size_t j; + for (j = 0; j < states && i < DISTSIZE; j++, i++) { + dist[i] = symb; + } + + symb += step; + statesLeft -= states; + } +} + +/* Generates a random number in the range [min, max) */ +static inline U32 RAND_range(U32* seed, U32 min, U32 max) +{ + return (RAND(seed) % (max-min)) + min; +} + +#define ROUND(x) ((U32)(x + 0.5)) + +/* Generates a random number in an exponential distribution with mean `mean` */ +static double RAND_exp(U32* seed, double mean) +{ + double const u = RAND(seed) / (double) UINT_MAX; + return log(1-u) * (-mean); +} + +/*-******************************************************* +* Constants and Structs +*********************************************************/ +const char *BLOCK_TYPES[] = {"raw", "rle", "compressed"}; + +#define MAX_DECOMPRESSED_SIZE_LOG 20 +#define MAX_DECOMPRESSED_SIZE (1ULL << MAX_DECOMPRESSED_SIZE_LOG) + +#define MAX_WINDOW_LOG 22 /* Recommended support is 8MB, so limit to 4MB + mantissa */ + +#define MIN_SEQ_LEN (3) +#define MAX_NB_SEQ ((ZSTD_BLOCKSIZE_MAX + MIN_SEQ_LEN - 1) / MIN_SEQ_LEN) + +BYTE CONTENT_BUFFER[MAX_DECOMPRESSED_SIZE]; +BYTE FRAME_BUFFER[MAX_DECOMPRESSED_SIZE * 2]; +BYTE LITERAL_BUFFER[ZSTD_BLOCKSIZE_MAX]; + +seqDef SEQUENCE_BUFFER[MAX_NB_SEQ]; +BYTE SEQUENCE_LITERAL_BUFFER[ZSTD_BLOCKSIZE_MAX]; /* storeSeq expects a place to copy literals to */ +BYTE SEQUENCE_LLCODE[ZSTD_BLOCKSIZE_MAX]; +BYTE SEQUENCE_MLCODE[ZSTD_BLOCKSIZE_MAX]; +BYTE SEQUENCE_OFCODE[ZSTD_BLOCKSIZE_MAX]; + +unsigned WKSP[HUF_WORKSPACE_SIZE_U32]; + +typedef struct { + size_t contentSize; /* 0 means unknown (unless contentSize == windowSize == 0) */ + unsigned windowSize; /* contentSize >= windowSize means single segment */ +} frameHeader_t; + +/* For repeat modes */ +typedef struct { + U32 rep[ZSTD_REP_NUM]; + + int hufInit; + /* the distribution used in the previous block for repeat mode */ + BYTE hufDist[DISTSIZE]; + U32 hufTable [256]; /* HUF_CElt is an incomplete type */ + + int fseInit; + FSE_CTable offcodeCTable [FSE_CTABLE_SIZE_U32(OffFSELog, MaxOff)]; + FSE_CTable matchlengthCTable[FSE_CTABLE_SIZE_U32(MLFSELog, MaxML)]; + FSE_CTable litlengthCTable [FSE_CTABLE_SIZE_U32(LLFSELog, MaxLL)]; + + /* Symbols that were present in the previous distribution, for use with + * set_repeat */ + BYTE litlengthSymbolSet[36]; + BYTE offsetSymbolSet[29]; + BYTE matchlengthSymbolSet[53]; +} cblockStats_t; + +typedef struct { + void* data; + void* dataStart; + void* dataEnd; + + void* src; + void* srcStart; + void* srcEnd; + + frameHeader_t header; + + cblockStats_t stats; + cblockStats_t oldStats; /* so they can be rolled back if uncompressible */ +} frame_t; + +typedef struct { + int useDict; + U32 dictID; + size_t dictContentSize; + BYTE* dictContent; +} dictInfo; + +typedef enum { + gt_frame = 0, /* generate frames */ + gt_block, /* generate compressed blocks without block/frame headers */ +} genType_e; + +/*-******************************************************* +* Global variables (set from command line) +*********************************************************/ +U32 g_maxDecompressedSizeLog = MAX_DECOMPRESSED_SIZE_LOG; /* <= 20 */ +U32 g_maxBlockSize = ZSTD_BLOCKSIZE_MAX; /* <= 128 KB */ + +/*-******************************************************* +* Generator Functions +*********************************************************/ + +struct { + int contentSize; /* force the content size to be present */ +} opts; /* advanced options on generation */ + +/* Generate and write a random frame header */ +static void writeFrameHeader(U32* seed, frame_t* frame, dictInfo info) +{ + BYTE* const op = frame->data; + size_t pos = 0; + frameHeader_t fh; + + BYTE windowByte = 0; + + int singleSegment = 0; + int contentSizeFlag = 0; + int fcsCode = 0; + + memset(&fh, 0, sizeof(fh)); + + /* generate window size */ + { + /* Follow window algorithm from specification */ + int const exponent = RAND(seed) % (MAX_WINDOW_LOG - 10); + int const mantissa = RAND(seed) % 8; + windowByte = (BYTE) ((exponent << 3) | mantissa); + fh.windowSize = (1U << (exponent + 10)); + fh.windowSize += fh.windowSize / 8 * mantissa; + } + + { + /* Generate random content size */ + size_t highBit; + if (RAND(seed) & 7 && g_maxDecompressedSizeLog > 7) { + /* do content of at least 128 bytes */ + highBit = 1ULL << RAND_range(seed, 7, g_maxDecompressedSizeLog); + } else if (RAND(seed) & 3) { + /* do small content */ + highBit = 1ULL << RAND_range(seed, 0, MIN(7, 1U << g_maxDecompressedSizeLog)); + } else { + /* 0 size frame */ + highBit = 0; + } + fh.contentSize = highBit ? highBit + (RAND(seed) % highBit) : 0; + + /* provide size sometimes */ + contentSizeFlag = opts.contentSize | (RAND(seed) & 1); + + if (contentSizeFlag && (fh.contentSize == 0 || !(RAND(seed) & 7))) { + /* do single segment sometimes */ + fh.windowSize = (U32) fh.contentSize; + singleSegment = 1; + } + } + + if (contentSizeFlag) { + /* Determine how large fcs field has to be */ + int minFcsCode = (fh.contentSize >= 256) + + (fh.contentSize >= 65536 + 256) + + (fh.contentSize > 0xFFFFFFFFU); + if (!singleSegment && !minFcsCode) { + minFcsCode = 1; + } + fcsCode = minFcsCode + (RAND(seed) % (4 - minFcsCode)); + if (fcsCode == 1 && fh.contentSize < 256) fcsCode++; + } + + /* write out the header */ + MEM_writeLE32(op + pos, ZSTD_MAGICNUMBER); + pos += 4; + + { + /* + * fcsCode: 2-bit flag specifying how many bytes used to represent Frame_Content_Size (bits 7-6) + * singleSegment: 1-bit flag describing if data must be regenerated within a single continuous memory segment. (bit 5) + * contentChecksumFlag: 1-bit flag that is set if frame includes checksum at the end -- set to 1 below (bit 2) + * dictBits: 2-bit flag describing how many bytes Dictionary_ID uses -- set to 3 (bits 1-0) + * For more information: https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#frame_header + */ + int const dictBits = info.useDict ? 3 : 0; + BYTE const frameHeaderDescriptor = + (BYTE) ((fcsCode << 6) | (singleSegment << 5) | (1 << 2) | dictBits); + op[pos++] = frameHeaderDescriptor; + } + + if (!singleSegment) { + op[pos++] = windowByte; + } + if (info.useDict) { + MEM_writeLE32(op + pos, (U32) info.dictID); + pos += 4; + } + if (contentSizeFlag) { + switch (fcsCode) { + default: /* Impossible */ + case 0: op[pos++] = (BYTE) fh.contentSize; break; + case 1: MEM_writeLE16(op + pos, (U16) (fh.contentSize - 256)); pos += 2; break; + case 2: MEM_writeLE32(op + pos, (U32) fh.contentSize); pos += 4; break; + case 3: MEM_writeLE64(op + pos, (U64) fh.contentSize); pos += 8; break; + } + } + + DISPLAYLEVEL(3, " frame content size:\t%u\n", (unsigned)fh.contentSize); + DISPLAYLEVEL(3, " frame window size:\t%u\n", fh.windowSize); + DISPLAYLEVEL(3, " content size flag:\t%d\n", contentSizeFlag); + DISPLAYLEVEL(3, " single segment flag:\t%d\n", singleSegment); + + frame->data = op + pos; + frame->header = fh; +} + +/* Write a literal block in either raw or RLE form, return the literals size */ +static size_t writeLiteralsBlockSimple(U32* seed, frame_t* frame, size_t contentSize) +{ + BYTE* op = (BYTE*)frame->data; + int const type = RAND(seed) % 2; + int const sizeFormatDesc = RAND(seed) % 8; + size_t litSize; + size_t maxLitSize = MIN(contentSize, g_maxBlockSize); + + if (sizeFormatDesc == 0) { + /* Size_FormatDesc = ?0 */ + maxLitSize = MIN(maxLitSize, 31); + } else if (sizeFormatDesc <= 4) { + /* Size_FormatDesc = 01 */ + maxLitSize = MIN(maxLitSize, 4095); + } else { + /* Size_Format = 11 */ + maxLitSize = MIN(maxLitSize, 1048575); + } + + litSize = RAND(seed) % (maxLitSize + 1); + if (frame->src == frame->srcStart && litSize == 0) { + litSize = 1; /* no empty literals if there's nothing preceding this block */ + } + if (litSize + 3 > contentSize) { + litSize = contentSize; /* no matches shorter than 3 are allowed */ + } + /* use smallest size format that fits */ + if (litSize < 32) { + op[0] = (type | (0 << 2) | (litSize << 3)) & 0xff; + op += 1; + } else if (litSize < 4096) { + op[0] = (type | (1 << 2) | (litSize << 4)) & 0xff; + op[1] = (litSize >> 4) & 0xff; + op += 2; + } else { + op[0] = (type | (3 << 2) | (litSize << 4)) & 0xff; + op[1] = (litSize >> 4) & 0xff; + op[2] = (litSize >> 12) & 0xff; + op += 3; + } + + if (type == 0) { + /* Raw literals */ + DISPLAYLEVEL(4, " raw literals\n"); + + RAND_buffer(seed, LITERAL_BUFFER, litSize); + memcpy(op, LITERAL_BUFFER, litSize); + op += litSize; + } else { + /* RLE literals */ + BYTE const symb = (BYTE) (RAND(seed) % 256); + + DISPLAYLEVEL(4, " rle literals: 0x%02x\n", (unsigned)symb); + + memset(LITERAL_BUFFER, symb, litSize); + op[0] = symb; + op++; + } + + frame->data = op; + + return litSize; +} + +/* Generate a Huffman header for the given source */ +static size_t writeHufHeader(U32* seed, HUF_CElt* hufTable, void* dst, size_t dstSize, + const void* src, size_t srcSize) +{ + BYTE* const ostart = (BYTE*)dst; + BYTE* op = ostart; + + unsigned huffLog = 11; + unsigned maxSymbolValue = 255; + + unsigned count[HUF_SYMBOLVALUE_MAX+1]; + + /* Scan input and build symbol stats */ + { size_t const largest = HIST_count_wksp (count, &maxSymbolValue, (const BYTE*)src, srcSize, WKSP, sizeof(WKSP)); + assert(!HIST_isError(largest)); + if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 0; } /* single symbol, rle */ + if (largest <= (srcSize >> 7)+1) return 0; /* Fast heuristic : not compressible enough */ + } + + /* Build Huffman Tree */ + /* Max Huffman log is 11, min is highbit(maxSymbolValue)+1 */ + huffLog = RAND_range(seed, ZSTD_highbit32(maxSymbolValue)+1, huffLog+1); + DISPLAYLEVEL(6, " huffman log: %u\n", huffLog); + { size_t const maxBits = HUF_buildCTable_wksp (hufTable, count, maxSymbolValue, huffLog, WKSP, sizeof(WKSP)); + CHECKERR(maxBits); + huffLog = (U32)maxBits; + } + + /* Write table description header */ + { size_t const hSize = HUF_writeCTable (op, dstSize, hufTable, maxSymbolValue, huffLog); + if (hSize + 12 >= srcSize) return 0; /* not useful to try compression */ + op += hSize; + } + + return op - ostart; +} + +/* Write a Huffman coded literals block and return the literals size */ +static size_t writeLiteralsBlockCompressed(U32* seed, frame_t* frame, size_t contentSize) +{ + BYTE* origop = (BYTE*)frame->data; + BYTE* opend = (BYTE*)frame->dataEnd; + BYTE* op; + BYTE* const ostart = origop; + int const sizeFormat = RAND(seed) % 4; + size_t litSize; + size_t hufHeaderSize = 0; + size_t compressedSize = 0; + size_t maxLitSize = MIN(contentSize-3, g_maxBlockSize); + + symbolEncodingType_e hType; + + if (contentSize < 64) { + /* make sure we get reasonably-sized literals for compression */ + return ERROR(GENERIC); + } + + DISPLAYLEVEL(4, " compressed literals\n"); + + switch (sizeFormat) { + case 0: /* fall through, size is the same as case 1 */ + case 1: + maxLitSize = MIN(maxLitSize, 1023); + origop += 3; + break; + case 2: + maxLitSize = MIN(maxLitSize, 16383); + origop += 4; + break; + case 3: + maxLitSize = MIN(maxLitSize, 262143); + origop += 5; + break; + default:; /* impossible */ + } + + do { + op = origop; + do { + litSize = RAND(seed) % (maxLitSize + 1); + } while (litSize < 32); /* avoid small literal sizes */ + if (litSize + 3 > contentSize) { + litSize = contentSize; /* no matches shorter than 3 are allowed */ + } + + /* most of the time generate a new distribution */ + if ((RAND(seed) & 3) || !frame->stats.hufInit) { + do { + if (RAND(seed) & 3) { + /* add 10 to ensure some compressibility */ + double const weight = ((RAND(seed) % 90) + 10) / 100.0; + + DISPLAYLEVEL(5, " distribution weight: %d%%\n", + (int)(weight * 100)); + + RAND_genDist(seed, frame->stats.hufDist, weight); + } else { + /* sometimes do restricted range literals to force + * non-huffman headers */ + DISPLAYLEVEL(5, " small range literals\n"); + RAND_bufferMaxSymb(seed, frame->stats.hufDist, DISTSIZE, + 15); + } + RAND_bufferDist(seed, frame->stats.hufDist, LITERAL_BUFFER, + litSize); + + /* generate the header from the distribution instead of the + * actual data to avoid bugs with symbols that were in the + * distribution but never showed up in the output */ + hufHeaderSize = writeHufHeader( + seed, (HUF_CElt*)frame->stats.hufTable, op, opend - op, + frame->stats.hufDist, DISTSIZE); + CHECKERR(hufHeaderSize); + /* repeat until a valid header is written */ + } while (hufHeaderSize == 0); + op += hufHeaderSize; + hType = set_compressed; + + frame->stats.hufInit = 1; + } else { + /* repeat the distribution/table from last time */ + DISPLAYLEVEL(5, " huffman repeat stats\n"); + RAND_bufferDist(seed, frame->stats.hufDist, LITERAL_BUFFER, + litSize); + hufHeaderSize = 0; + hType = set_repeat; + } + + do { + compressedSize = + sizeFormat == 0 + ? HUF_compress1X_usingCTable( + op, opend - op, LITERAL_BUFFER, litSize, + (HUF_CElt*)frame->stats.hufTable) + : HUF_compress4X_usingCTable( + op, opend - op, LITERAL_BUFFER, litSize, + (HUF_CElt*)frame->stats.hufTable); + CHECKERR(compressedSize); + /* this only occurs when it could not compress or similar */ + } while (compressedSize <= 0); + + op += compressedSize; + + compressedSize += hufHeaderSize; + DISPLAYLEVEL(5, " regenerated size: %u\n", (unsigned)litSize); + DISPLAYLEVEL(5, " compressed size: %u\n", (unsigned)compressedSize); + if (compressedSize >= litSize) { + DISPLAYLEVEL(5, " trying again\n"); + /* if we have to try again, reset the stats so we don't accidentally + * try to repeat a distribution we just made */ + frame->stats = frame->oldStats; + } else { + break; + } + } while (1); + + /* write header */ + switch (sizeFormat) { + case 0: /* fall through, size is the same as case 1 */ + case 1: { + U32 const header = hType | (sizeFormat << 2) | ((U32)litSize << 4) | + ((U32)compressedSize << 14); + MEM_writeLE24(ostart, header); + break; + } + case 2: { + U32 const header = hType | (sizeFormat << 2) | ((U32)litSize << 4) | + ((U32)compressedSize << 18); + MEM_writeLE32(ostart, header); + break; + } + case 3: { + U32 const header = hType | (sizeFormat << 2) | ((U32)litSize << 4) | + ((U32)compressedSize << 22); + MEM_writeLE32(ostart, header); + ostart[4] = (BYTE)(compressedSize >> 10); + break; + } + default:; /* impossible */ + } + + frame->data = op; + return litSize; +} + +static size_t writeLiteralsBlock(U32* seed, frame_t* frame, size_t contentSize) +{ + /* only do compressed for larger segments to avoid compressibility issues */ + if (RAND(seed) & 7 && contentSize >= 64) { + return writeLiteralsBlockCompressed(seed, frame, contentSize); + } else { + return writeLiteralsBlockSimple(seed, frame, contentSize); + } +} + +static inline void initSeqStore(seqStore_t *seqStore) { + seqStore->maxNbSeq = MAX_NB_SEQ; + seqStore->maxNbLit = ZSTD_BLOCKSIZE_MAX; + seqStore->sequencesStart = SEQUENCE_BUFFER; + seqStore->litStart = SEQUENCE_LITERAL_BUFFER; + seqStore->llCode = SEQUENCE_LLCODE; + seqStore->mlCode = SEQUENCE_MLCODE; + seqStore->ofCode = SEQUENCE_OFCODE; + + ZSTD_resetSeqStore(seqStore); +} + +/* Randomly generate sequence commands */ +static U32 generateSequences(U32* seed, frame_t* frame, seqStore_t* seqStore, + size_t contentSize, size_t literalsSize, dictInfo info) +{ + /* The total length of all the matches */ + size_t const remainingMatch = contentSize - literalsSize; + size_t excessMatch = 0; + U32 numSequences = 0; + + U32 i; + + + const BYTE* literals = LITERAL_BUFFER; + BYTE* srcPtr = frame->src; + + if (literalsSize != contentSize) { + /* each match must be at least MIN_SEQ_LEN, so this is the maximum + * number of sequences we can have */ + U32 const maxSequences = (U32)remainingMatch / MIN_SEQ_LEN; + numSequences = (RAND(seed) % maxSequences) + 1; + + /* the extra match lengths we have to allocate to each sequence */ + excessMatch = remainingMatch - numSequences * MIN_SEQ_LEN; + } + + DISPLAYLEVEL(5, " total match lengths: %u\n", (unsigned)remainingMatch); + for (i = 0; i < numSequences; i++) { + /* Generate match and literal lengths by exponential distribution to + * ensure nice numbers */ + U32 matchLen = + MIN_SEQ_LEN + + ROUND(RAND_exp(seed, excessMatch / (double)(numSequences - i))); + U32 literalLen = + (RAND(seed) & 7) + ? ROUND(RAND_exp(seed, + literalsSize / + (double)(numSequences - i))) + : 0; + /* actual offset, code to send, and point to copy up to when shifting + * codes in the repeat offsets history */ + U32 offset, offsetCode, repIndex; + + /* bounds checks */ + matchLen = (U32) MIN(matchLen, excessMatch + MIN_SEQ_LEN); + literalLen = MIN(literalLen, (U32) literalsSize); + if (i == 0 && srcPtr == frame->srcStart && literalLen == 0) literalLen = 1; + if (i + 1 == numSequences) matchLen = MIN_SEQ_LEN + (U32) excessMatch; + + memcpy(srcPtr, literals, literalLen); + srcPtr += literalLen; + do { + if (RAND(seed) & 7) { + /* do a normal offset */ + U32 const dataDecompressed = (U32)((BYTE*)srcPtr-(BYTE*)frame->srcStart); + offset = (RAND(seed) % + MIN(frame->header.windowSize, + (size_t)((BYTE*)srcPtr - (BYTE*)frame->srcStart))) + + 1; + if (info.useDict && (RAND(seed) & 1) && i + 1 != numSequences && dataDecompressed < frame->header.windowSize) { + /* need to occasionally generate offsets that go past the start */ + /* including i+1 != numSequences because the last sequences has to adhere to predetermined contentSize */ + U32 lenPastStart = (RAND(seed) % info.dictContentSize) + 1; + offset = (U32)((BYTE*)srcPtr - (BYTE*)frame->srcStart)+lenPastStart; + if (offset > frame->header.windowSize) { + if (lenPastStart < MIN_SEQ_LEN) { + /* when offset > windowSize, matchLen bound by end of dictionary (lenPastStart) */ + /* this also means that lenPastStart must be greater than MIN_SEQ_LEN */ + /* make sure lenPastStart does not go past dictionary start though */ + lenPastStart = MIN(lenPastStart+MIN_SEQ_LEN, (U32)info.dictContentSize); + offset = (U32)((BYTE*)srcPtr - (BYTE*)frame->srcStart) + lenPastStart; + } + { + U32 const matchLenBound = MIN(frame->header.windowSize, lenPastStart); + matchLen = MIN(matchLen, matchLenBound); + } + } + } + offsetCode = offset + ZSTD_REP_MOVE; + repIndex = 2; + } else { + /* do a repeat offset */ + offsetCode = RAND(seed) % 3; + if (literalLen > 0) { + offset = frame->stats.rep[offsetCode]; + repIndex = offsetCode; + } else { + /* special case */ + offset = offsetCode == 2 ? frame->stats.rep[0] - 1 + : frame->stats.rep[offsetCode + 1]; + repIndex = MIN(2, offsetCode + 1); + } + } + } while (((!info.useDict) && (offset > (size_t)((BYTE*)srcPtr - (BYTE*)frame->srcStart))) || offset == 0); + + { + size_t j; + BYTE* const dictEnd = info.dictContent + info.dictContentSize; + for (j = 0; j < matchLen; j++) { + if ((U32)((BYTE*)srcPtr - (BYTE*)frame->srcStart) < offset) { + /* copy from dictionary instead of literals */ + size_t const dictOffset = offset - (srcPtr - (BYTE*)frame->srcStart); + *srcPtr = *(dictEnd - dictOffset); + } + else { + *srcPtr = *(srcPtr-offset); + } + srcPtr++; + } + } + + { int r; + for (r = repIndex; r > 0; r--) { + frame->stats.rep[r] = frame->stats.rep[r - 1]; + } + frame->stats.rep[0] = offset; + } + + DISPLAYLEVEL(6, " LL: %5u OF: %5u ML: %5u", + (unsigned)literalLen, (unsigned)offset, (unsigned)matchLen); + DISPLAYLEVEL(7, " srcPos: %8u seqNb: %3u", + (unsigned)((BYTE*)srcPtr - (BYTE*)frame->srcStart), (unsigned)i); + DISPLAYLEVEL(6, "\n"); + if (offsetCode < 3) { + DISPLAYLEVEL(7, " repeat offset: %d\n", (int)repIndex); + } + /* use libzstd sequence handling */ + ZSTD_storeSeq(seqStore, literalLen, literals, literals + literalLen, + offsetCode, matchLen - MINMATCH); + + literalsSize -= literalLen; + excessMatch -= (matchLen - MIN_SEQ_LEN); + literals += literalLen; + } + + memcpy(srcPtr, literals, literalsSize); + srcPtr += literalsSize; + DISPLAYLEVEL(6, " excess literals: %5u", (unsigned)literalsSize); + DISPLAYLEVEL(7, " srcPos: %8u", (unsigned)((BYTE*)srcPtr - (BYTE*)frame->srcStart)); + DISPLAYLEVEL(6, "\n"); + + return numSequences; +} + +static void initSymbolSet(const BYTE* symbols, size_t len, BYTE* set, BYTE maxSymbolValue) +{ + size_t i; + + memset(set, 0, (size_t)maxSymbolValue+1); + + for (i = 0; i < len; i++) { + set[symbols[i]] = 1; + } +} + +static int isSymbolSubset(const BYTE* symbols, size_t len, const BYTE* set, BYTE maxSymbolValue) +{ + size_t i; + + for (i = 0; i < len; i++) { + if (symbols[i] > maxSymbolValue || !set[symbols[i]]) { + return 0; + } + } + return 1; +} + +static size_t writeSequences(U32* seed, frame_t* frame, seqStore_t* seqStorePtr, + size_t nbSeq) +{ + /* This code is mostly copied from ZSTD_compressSequences in zstd_compress.c */ + unsigned count[MaxSeq+1]; + S16 norm[MaxSeq+1]; + FSE_CTable* CTable_LitLength = frame->stats.litlengthCTable; + FSE_CTable* CTable_OffsetBits = frame->stats.offcodeCTable; + FSE_CTable* CTable_MatchLength = frame->stats.matchlengthCTable; + U32 LLtype, Offtype, MLtype; /* compressed, raw or rle */ + const seqDef* const sequences = seqStorePtr->sequencesStart; + const BYTE* const ofCodeTable = seqStorePtr->ofCode; + const BYTE* const llCodeTable = seqStorePtr->llCode; + const BYTE* const mlCodeTable = seqStorePtr->mlCode; + BYTE* const oend = (BYTE*)frame->dataEnd; + BYTE* op = (BYTE*)frame->data; + BYTE* seqHead; + BYTE scratchBuffer[1<<MAX(MLFSELog,LLFSELog)]; + + /* literals compressing block removed so that can be done separately */ + + /* Sequences Header */ + if ((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead */) return ERROR(dstSize_tooSmall); + if (nbSeq < 0x7F) *op++ = (BYTE)nbSeq; + else if (nbSeq < LONGNBSEQ) op[0] = (BYTE)((nbSeq>>8) + 0x80), op[1] = (BYTE)nbSeq, op+=2; + else op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3; + + if (nbSeq==0) { + frame->data = op; + return 0; + } + + /* seqHead : flags for FSE encoding type */ + seqHead = op++; + + /* convert length/distances into codes */ + ZSTD_seqToCodes(seqStorePtr); + + /* CTable for Literal Lengths */ + { unsigned max = MaxLL; + size_t const mostFrequent = HIST_countFast_wksp(count, &max, llCodeTable, nbSeq, WKSP, sizeof(WKSP)); /* cannot fail */ + assert(!HIST_isError(mostFrequent)); + if (frame->stats.fseInit && !(RAND(seed) & 3) && + isSymbolSubset(llCodeTable, nbSeq, + frame->stats.litlengthSymbolSet, 35)) { + /* maybe do repeat mode if we're allowed to */ + LLtype = set_repeat; + } else if (mostFrequent == nbSeq) { + /* do RLE if we have the chance */ + *op++ = llCodeTable[0]; + FSE_buildCTable_rle(CTable_LitLength, (BYTE)max); + LLtype = set_rle; + } else if (!(RAND(seed) & 3)) { + /* maybe use the default distribution */ + FSE_buildCTable_wksp(CTable_LitLength, LL_defaultNorm, MaxLL, LL_defaultNormLog, scratchBuffer, sizeof(scratchBuffer)); + LLtype = set_basic; + } else { + /* fall back on a full table */ + size_t nbSeq_1 = nbSeq; + const U32 tableLog = FSE_optimalTableLog(LLFSELog, nbSeq, max); + if (count[llCodeTable[nbSeq-1]]>1) { count[llCodeTable[nbSeq-1]]--; nbSeq_1--; } + FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max); + { size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */ + if (FSE_isError(NCountSize)) return ERROR(GENERIC); + op += NCountSize; } + FSE_buildCTable_wksp(CTable_LitLength, norm, max, tableLog, scratchBuffer, sizeof(scratchBuffer)); + LLtype = set_compressed; + } } + + /* CTable for Offsets */ + /* see Literal Lengths for descriptions of mode choices */ + { unsigned max = MaxOff; + size_t const mostFrequent = HIST_countFast_wksp(count, &max, ofCodeTable, nbSeq, WKSP, sizeof(WKSP)); /* cannot fail */ + assert(!HIST_isError(mostFrequent)); + if (frame->stats.fseInit && !(RAND(seed) & 3) && + isSymbolSubset(ofCodeTable, nbSeq, + frame->stats.offsetSymbolSet, 28)) { + Offtype = set_repeat; + } else if (mostFrequent == nbSeq) { + *op++ = ofCodeTable[0]; + FSE_buildCTable_rle(CTable_OffsetBits, (BYTE)max); + Offtype = set_rle; + } else if (!(RAND(seed) & 3)) { + FSE_buildCTable_wksp(CTable_OffsetBits, OF_defaultNorm, DefaultMaxOff, OF_defaultNormLog, scratchBuffer, sizeof(scratchBuffer)); + Offtype = set_basic; + } else { + size_t nbSeq_1 = nbSeq; + const U32 tableLog = FSE_optimalTableLog(OffFSELog, nbSeq, max); + if (count[ofCodeTable[nbSeq-1]]>1) { count[ofCodeTable[nbSeq-1]]--; nbSeq_1--; } + FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max); + { size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */ + if (FSE_isError(NCountSize)) return ERROR(GENERIC); + op += NCountSize; } + FSE_buildCTable_wksp(CTable_OffsetBits, norm, max, tableLog, scratchBuffer, sizeof(scratchBuffer)); + Offtype = set_compressed; + } } + + /* CTable for MatchLengths */ + /* see Literal Lengths for descriptions of mode choices */ + { unsigned max = MaxML; + size_t const mostFrequent = HIST_countFast_wksp(count, &max, mlCodeTable, nbSeq, WKSP, sizeof(WKSP)); /* cannot fail */ + assert(!HIST_isError(mostFrequent)); + if (frame->stats.fseInit && !(RAND(seed) & 3) && + isSymbolSubset(mlCodeTable, nbSeq, + frame->stats.matchlengthSymbolSet, 52)) { + MLtype = set_repeat; + } else if (mostFrequent == nbSeq) { + *op++ = *mlCodeTable; + FSE_buildCTable_rle(CTable_MatchLength, (BYTE)max); + MLtype = set_rle; + } else if (!(RAND(seed) & 3)) { + /* sometimes do default distribution */ + FSE_buildCTable_wksp(CTable_MatchLength, ML_defaultNorm, MaxML, ML_defaultNormLog, scratchBuffer, sizeof(scratchBuffer)); + MLtype = set_basic; + } else { + /* fall back on table */ + size_t nbSeq_1 = nbSeq; + const U32 tableLog = FSE_optimalTableLog(MLFSELog, nbSeq, max); + if (count[mlCodeTable[nbSeq-1]]>1) { count[mlCodeTable[nbSeq-1]]--; nbSeq_1--; } + FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max); + { size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */ + if (FSE_isError(NCountSize)) return ERROR(GENERIC); + op += NCountSize; } + FSE_buildCTable_wksp(CTable_MatchLength, norm, max, tableLog, scratchBuffer, sizeof(scratchBuffer)); + MLtype = set_compressed; + } } + frame->stats.fseInit = 1; + initSymbolSet(llCodeTable, nbSeq, frame->stats.litlengthSymbolSet, 35); + initSymbolSet(ofCodeTable, nbSeq, frame->stats.offsetSymbolSet, 28); + initSymbolSet(mlCodeTable, nbSeq, frame->stats.matchlengthSymbolSet, 52); + + DISPLAYLEVEL(5, " LL type: %d OF type: %d ML type: %d\n", (unsigned)LLtype, (unsigned)Offtype, (unsigned)MLtype); + + *seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2)); + + /* Encoding Sequences */ + { BIT_CStream_t blockStream; + FSE_CState_t stateMatchLength; + FSE_CState_t stateOffsetBits; + FSE_CState_t stateLitLength; + + RETURN_ERROR_IF( + ERR_isError(BIT_initCStream(&blockStream, op, oend-op)), + dstSize_tooSmall, "not enough space remaining"); + + /* first symbols */ + FSE_initCState2(&stateMatchLength, CTable_MatchLength, mlCodeTable[nbSeq-1]); + FSE_initCState2(&stateOffsetBits, CTable_OffsetBits, ofCodeTable[nbSeq-1]); + FSE_initCState2(&stateLitLength, CTable_LitLength, llCodeTable[nbSeq-1]); + BIT_addBits(&blockStream, sequences[nbSeq-1].litLength, LL_bits[llCodeTable[nbSeq-1]]); + if (MEM_32bits()) BIT_flushBits(&blockStream); + BIT_addBits(&blockStream, sequences[nbSeq-1].matchLength, ML_bits[mlCodeTable[nbSeq-1]]); + if (MEM_32bits()) BIT_flushBits(&blockStream); + BIT_addBits(&blockStream, sequences[nbSeq-1].offset, ofCodeTable[nbSeq-1]); + BIT_flushBits(&blockStream); + + { size_t n; + for (n=nbSeq-2 ; n<nbSeq ; n--) { /* intentional underflow */ + BYTE const llCode = llCodeTable[n]; + BYTE const ofCode = ofCodeTable[n]; + BYTE const mlCode = mlCodeTable[n]; + U32 const llBits = LL_bits[llCode]; + U32 const ofBits = ofCode; /* 32b*/ /* 64b*/ + U32 const mlBits = ML_bits[mlCode]; + /* (7)*/ /* (7)*/ + FSE_encodeSymbol(&blockStream, &stateOffsetBits, ofCode); /* 15 */ /* 15 */ + FSE_encodeSymbol(&blockStream, &stateMatchLength, mlCode); /* 24 */ /* 24 */ + if (MEM_32bits()) BIT_flushBits(&blockStream); /* (7)*/ + FSE_encodeSymbol(&blockStream, &stateLitLength, llCode); /* 16 */ /* 33 */ + if (MEM_32bits() || (ofBits+mlBits+llBits >= 64-7-(LLFSELog+MLFSELog+OffFSELog))) + BIT_flushBits(&blockStream); /* (7)*/ + BIT_addBits(&blockStream, sequences[n].litLength, llBits); + if (MEM_32bits() && ((llBits+mlBits)>24)) BIT_flushBits(&blockStream); + BIT_addBits(&blockStream, sequences[n].matchLength, mlBits); + if (MEM_32bits()) BIT_flushBits(&blockStream); /* (7)*/ + BIT_addBits(&blockStream, sequences[n].offset, ofBits); /* 31 */ + BIT_flushBits(&blockStream); /* (7)*/ + } } + + FSE_flushCState(&blockStream, &stateMatchLength); + FSE_flushCState(&blockStream, &stateOffsetBits); + FSE_flushCState(&blockStream, &stateLitLength); + + { size_t const streamSize = BIT_closeCStream(&blockStream); + if (streamSize==0) return ERROR(dstSize_tooSmall); /* not enough space */ + op += streamSize; + } } + + frame->data = op; + + return 0; +} + +static size_t writeSequencesBlock(U32* seed, frame_t* frame, size_t contentSize, + size_t literalsSize, dictInfo info) +{ + seqStore_t seqStore; + size_t numSequences; + + + initSeqStore(&seqStore); + + /* randomly generate sequences */ + numSequences = generateSequences(seed, frame, &seqStore, contentSize, literalsSize, info); + /* write them out to the frame data */ + CHECKERR(writeSequences(seed, frame, &seqStore, numSequences)); + + return numSequences; +} + +static size_t writeCompressedBlock(U32* seed, frame_t* frame, size_t contentSize, dictInfo info) +{ + BYTE* const blockStart = (BYTE*)frame->data; + size_t literalsSize; + size_t nbSeq; + + DISPLAYLEVEL(4, " compressed block:\n"); + + literalsSize = writeLiteralsBlock(seed, frame, contentSize); + + DISPLAYLEVEL(4, " literals size: %u\n", (unsigned)literalsSize); + + nbSeq = writeSequencesBlock(seed, frame, contentSize, literalsSize, info); + + DISPLAYLEVEL(4, " number of sequences: %u\n", (unsigned)nbSeq); + + return (BYTE*)frame->data - blockStart; +} + +static void writeBlock(U32* seed, frame_t* frame, size_t contentSize, + int lastBlock, dictInfo info) +{ + int const blockTypeDesc = RAND(seed) % 8; + size_t blockSize; + int blockType; + + BYTE *const header = (BYTE*)frame->data; + BYTE *op = header + 3; + + DISPLAYLEVEL(4, " block:\n"); + DISPLAYLEVEL(4, " block content size: %u\n", (unsigned)contentSize); + DISPLAYLEVEL(4, " last block: %s\n", lastBlock ? "yes" : "no"); + + if (blockTypeDesc == 0) { + /* Raw data frame */ + + RAND_buffer(seed, frame->src, contentSize); + memcpy(op, frame->src, contentSize); + + op += contentSize; + blockType = 0; + blockSize = contentSize; + } else if (blockTypeDesc == 1 && frame->header.contentSize > 0) { + /* RLE (Don't create RLE block if frame content is 0 since block size of 1 may exceed max block size)*/ + BYTE const symbol = RAND(seed) & 0xff; + + op[0] = symbol; + memset(frame->src, symbol, contentSize); + + op++; + blockType = 1; + blockSize = contentSize; + } else { + /* compressed, most common */ + size_t compressedSize; + blockType = 2; + + frame->oldStats = frame->stats; + + frame->data = op; + compressedSize = writeCompressedBlock(seed, frame, contentSize, info); + if (compressedSize >= contentSize) { /* compressed block must be strictly smaller than uncompressed one */ + blockType = 0; + memcpy(op, frame->src, contentSize); + + op += contentSize; + blockSize = contentSize; /* fall back on raw block if data doesn't + compress */ + + frame->stats = frame->oldStats; /* don't update the stats */ + } else { + op += compressedSize; + blockSize = compressedSize; + } + } + frame->src = (BYTE*)frame->src + contentSize; + + DISPLAYLEVEL(4, " block type: %s\n", BLOCK_TYPES[blockType]); + DISPLAYLEVEL(4, " block size field: %u\n", (unsigned)blockSize); + + header[0] = (BYTE) ((lastBlock | (blockType << 1) | (blockSize << 3)) & 0xff); + MEM_writeLE16(header + 1, (U16) (blockSize >> 5)); + + frame->data = op; +} + +static void writeBlocks(U32* seed, frame_t* frame, dictInfo info) +{ + size_t contentLeft = frame->header.contentSize; + size_t const maxBlockSize = MIN(g_maxBlockSize, frame->header.windowSize); + while (1) { + /* 1 in 4 chance of ending frame */ + int const lastBlock = contentLeft > maxBlockSize ? 0 : !(RAND(seed) & 3); + size_t blockContentSize; + if (lastBlock) { + blockContentSize = contentLeft; + } else { + if (contentLeft > 0 && (RAND(seed) & 7)) { + /* some variable size block */ + blockContentSize = RAND(seed) % (MIN(maxBlockSize, contentLeft)+1); + } else if (contentLeft > maxBlockSize && (RAND(seed) & 1)) { + /* some full size block */ + blockContentSize = maxBlockSize; + } else { + /* some empty block */ + blockContentSize = 0; + } + } + + writeBlock(seed, frame, blockContentSize, lastBlock, info); + + contentLeft -= blockContentSize; + if (lastBlock) break; + } +} + +static void writeChecksum(frame_t* frame) +{ + /* write checksum so implementations can verify their output */ + U64 digest = XXH64(frame->srcStart, (BYTE*)frame->src-(BYTE*)frame->srcStart, 0); + DISPLAYLEVEL(3, " checksum: %08x\n", (unsigned)digest); + MEM_writeLE32(frame->data, (U32)digest); + frame->data = (BYTE*)frame->data + 4; +} + +static void outputBuffer(const void* buf, size_t size, const char* const path) +{ + /* write data out to file */ + const BYTE* ip = (const BYTE*)buf; + FILE* out; + if (path) { + out = fopen(path, "wb"); + } else { + out = stdout; + } + if (!out) { + fprintf(stderr, "Failed to open file at %s: ", path); + perror(NULL); + exit(1); + } + + { size_t fsize = size; + size_t written = 0; + while (written < fsize) { + written += fwrite(ip + written, 1, fsize - written, out); + if (ferror(out)) { + fprintf(stderr, "Failed to write to file at %s: ", path); + perror(NULL); + exit(1); + } + } + } + + if (path) { + fclose(out); + } +} + +static void initFrame(frame_t* fr) +{ + memset(fr, 0, sizeof(*fr)); + fr->data = fr->dataStart = FRAME_BUFFER; + fr->dataEnd = FRAME_BUFFER + sizeof(FRAME_BUFFER); + fr->src = fr->srcStart = CONTENT_BUFFER; + fr->srcEnd = CONTENT_BUFFER + sizeof(CONTENT_BUFFER); + + /* init repeat codes */ + fr->stats.rep[0] = 1; + fr->stats.rep[1] = 4; + fr->stats.rep[2] = 8; +} + +/** + * Generated a single zstd compressed block with no block/frame header. + * Returns the final seed. + */ +static U32 generateCompressedBlock(U32 seed, frame_t* frame, dictInfo info) +{ + size_t blockContentSize; + int blockWritten = 0; + BYTE* op; + DISPLAYLEVEL(4, "block seed: %u\n", (unsigned)seed); + initFrame(frame); + op = (BYTE*)frame->data; + + while (!blockWritten) { + size_t cSize; + /* generate window size */ + { int const exponent = RAND(&seed) % (MAX_WINDOW_LOG - 10); + int const mantissa = RAND(&seed) % 8; + frame->header.windowSize = (1U << (exponent + 10)); + frame->header.windowSize += (frame->header.windowSize / 8) * mantissa; + } + + /* generate content size */ + { size_t const maxBlockSize = MIN(g_maxBlockSize, frame->header.windowSize); + if (RAND(&seed) & 15) { + /* some full size blocks */ + blockContentSize = maxBlockSize; + } else if (RAND(&seed) & 7 && g_maxBlockSize >= (1U << 7)) { + /* some small blocks <= 128 bytes*/ + blockContentSize = RAND(&seed) % (1U << 7); + } else { + /* some variable size blocks */ + blockContentSize = RAND(&seed) % maxBlockSize; + } + } + + /* try generating a compressed block */ + frame->oldStats = frame->stats; + frame->data = op; + cSize = writeCompressedBlock(&seed, frame, blockContentSize, info); + if (cSize >= blockContentSize) { /* compressed size must be strictly smaller than decompressed size : https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#blocks */ + /* data doesn't compress -- try again */ + frame->stats = frame->oldStats; /* don't update the stats */ + DISPLAYLEVEL(5, " can't compress block : try again \n"); + } else { + blockWritten = 1; + DISPLAYLEVEL(4, " block size: %u \n", (unsigned)cSize); + frame->src = (BYTE*)frame->src + blockContentSize; + } + } + return seed; +} + +/* Return the final seed */ +static U32 generateFrame(U32 seed, frame_t* fr, dictInfo info) +{ + /* generate a complete frame */ + DISPLAYLEVEL(3, "frame seed: %u\n", (unsigned)seed); + initFrame(fr); + + writeFrameHeader(&seed, fr, info); + writeBlocks(&seed, fr, info); + writeChecksum(fr); + + return seed; +} + +/*_******************************************************* +* Dictionary Helper Functions +*********************************************************/ +/* returns 0 if successful, otherwise returns 1 upon error */ +static int genRandomDict(U32 dictID, U32 seed, size_t dictSize, BYTE* fullDict) +{ + /* allocate space for samples */ + int ret = 0; + unsigned const numSamples = 4; + size_t sampleSizes[4]; + BYTE* const samples = malloc(5000*sizeof(BYTE)); + if (samples == NULL) { + DISPLAY("Error: could not allocate space for samples\n"); + return 1; + } + + /* generate samples */ + { unsigned literalValue = 1; + unsigned samplesPos = 0; + size_t currSize = 1; + while (literalValue <= 4) { + sampleSizes[literalValue - 1] = currSize; + { size_t k; + for (k = 0; k < currSize; k++) { + *(samples + (samplesPos++)) = (BYTE)literalValue; + } } + literalValue++; + currSize *= 16; + } } + + { size_t dictWriteSize = 0; + ZDICT_params_t zdictParams; + size_t const headerSize = MAX(dictSize/4, 256); + size_t const dictContentSize = dictSize - headerSize; + BYTE* const dictContent = fullDict + headerSize; + if (dictContentSize < ZDICT_CONTENTSIZE_MIN || dictSize < ZDICT_DICTSIZE_MIN) { + DISPLAY("Error: dictionary size is too small\n"); + ret = 1; + goto exitGenRandomDict; + } + + /* init dictionary params */ + memset(&zdictParams, 0, sizeof(zdictParams)); + zdictParams.dictID = dictID; + zdictParams.notificationLevel = 1; + + /* fill in dictionary content */ + RAND_buffer(&seed, (void*)dictContent, dictContentSize); + + /* finalize dictionary with random samples */ + dictWriteSize = ZDICT_finalizeDictionary(fullDict, dictSize, + dictContent, dictContentSize, + samples, sampleSizes, numSamples, + zdictParams); + + if (ZDICT_isError(dictWriteSize)) { + DISPLAY("Could not finalize dictionary: %s\n", ZDICT_getErrorName(dictWriteSize)); + ret = 1; + } + } + +exitGenRandomDict: + free(samples); + return ret; +} + +static dictInfo initDictInfo(int useDict, size_t dictContentSize, BYTE* dictContent, U32 dictID){ + /* allocate space statically */ + dictInfo dictOp; + memset(&dictOp, 0, sizeof(dictOp)); + dictOp.useDict = useDict; + dictOp.dictContentSize = dictContentSize; + dictOp.dictContent = dictContent; + dictOp.dictID = dictID; + return dictOp; +} + +/*-******************************************************* +* Test Mode +*********************************************************/ + +BYTE DECOMPRESSED_BUFFER[MAX_DECOMPRESSED_SIZE]; + +static size_t testDecodeSimple(frame_t* fr) +{ + /* test decoding the generated data with the simple API */ + size_t const ret = ZSTD_decompress(DECOMPRESSED_BUFFER, MAX_DECOMPRESSED_SIZE, + fr->dataStart, (BYTE*)fr->data - (BYTE*)fr->dataStart); + + if (ZSTD_isError(ret)) return ret; + + if (memcmp(DECOMPRESSED_BUFFER, fr->srcStart, + (BYTE*)fr->src - (BYTE*)fr->srcStart) != 0) { + return ERROR(corruption_detected); + } + + return ret; +} + +static size_t testDecodeStreaming(frame_t* fr) +{ + /* test decoding the generated data with the streaming API */ + ZSTD_DStream* zd = ZSTD_createDStream(); + ZSTD_inBuffer in; + ZSTD_outBuffer out; + size_t ret; + + if (!zd) return ERROR(memory_allocation); + + in.src = fr->dataStart; + in.pos = 0; + in.size = (BYTE*)fr->data - (BYTE*)fr->dataStart; + + out.dst = DECOMPRESSED_BUFFER; + out.pos = 0; + out.size = ZSTD_DStreamOutSize(); + + ZSTD_initDStream(zd); + while (1) { + ret = ZSTD_decompressStream(zd, &out, &in); + if (ZSTD_isError(ret)) goto cleanup; /* error */ + if (ret == 0) break; /* frame is done */ + + /* force decoding to be done in chunks */ + out.size += MIN(ZSTD_DStreamOutSize(), MAX_DECOMPRESSED_SIZE - out.size); + } + + ret = out.pos; + + if (memcmp(out.dst, fr->srcStart, out.pos) != 0) { + return ERROR(corruption_detected); + } + +cleanup: + ZSTD_freeDStream(zd); + return ret; +} + +static size_t testDecodeWithDict(U32 seed, genType_e genType) +{ + /* create variables */ + size_t const dictSize = RAND(&seed) % (10 << 20) + ZDICT_DICTSIZE_MIN + ZDICT_CONTENTSIZE_MIN; + U32 const dictID = RAND(&seed); + size_t errorDetected = 0; + BYTE* const fullDict = malloc(dictSize); + if (fullDict == NULL) { + return ERROR(GENERIC); + } + + /* generate random dictionary */ + if (genRandomDict(dictID, seed, dictSize, fullDict)) { /* return 0 on success */ + errorDetected = ERROR(GENERIC); + goto dictTestCleanup; + } + + + { frame_t fr; + dictInfo info; + ZSTD_DCtx* const dctx = ZSTD_createDCtx(); + size_t ret; + + /* get dict info */ + { size_t const headerSize = MAX(dictSize/4, 256); + size_t const dictContentSize = dictSize-headerSize; + BYTE* const dictContent = fullDict+headerSize; + info = initDictInfo(1, dictContentSize, dictContent, dictID); + } + + /* manually decompress and check difference */ + if (genType == gt_frame) { + /* Test frame */ + generateFrame(seed, &fr, info); + ret = ZSTD_decompress_usingDict(dctx, DECOMPRESSED_BUFFER, MAX_DECOMPRESSED_SIZE, + fr.dataStart, (BYTE*)fr.data - (BYTE*)fr.dataStart, + fullDict, dictSize); + } else { + /* Test block */ + generateCompressedBlock(seed, &fr, info); + ret = ZSTD_decompressBegin_usingDict(dctx, fullDict, dictSize); + if (ZSTD_isError(ret)) { + errorDetected = ret; + ZSTD_freeDCtx(dctx); + goto dictTestCleanup; + } + ret = ZSTD_decompressBlock(dctx, DECOMPRESSED_BUFFER, MAX_DECOMPRESSED_SIZE, + fr.dataStart, (BYTE*)fr.data - (BYTE*)fr.dataStart); + } + ZSTD_freeDCtx(dctx); + + if (ZSTD_isError(ret)) { + errorDetected = ret; + goto dictTestCleanup; + } + + if (memcmp(DECOMPRESSED_BUFFER, fr.srcStart, (BYTE*)fr.src - (BYTE*)fr.srcStart) != 0) { + errorDetected = ERROR(corruption_detected); + goto dictTestCleanup; + } + } + +dictTestCleanup: + free(fullDict); + return errorDetected; +} + +static size_t testDecodeRawBlock(frame_t* fr) +{ + ZSTD_DCtx* dctx = ZSTD_createDCtx(); + size_t ret = ZSTD_decompressBegin(dctx); + if (ZSTD_isError(ret)) return ret; + + ret = ZSTD_decompressBlock( + dctx, + DECOMPRESSED_BUFFER, MAX_DECOMPRESSED_SIZE, + fr->dataStart, (BYTE*)fr->data - (BYTE*)fr->dataStart); + ZSTD_freeDCtx(dctx); + if (ZSTD_isError(ret)) return ret; + + if (memcmp(DECOMPRESSED_BUFFER, fr->srcStart, + (BYTE*)fr->src - (BYTE*)fr->srcStart) != 0) { + return ERROR(corruption_detected); + } + + return ret; +} + +static int runBlockTest(U32* seed) +{ + frame_t fr; + U32 const seedCopy = *seed; + { dictInfo const info = initDictInfo(0, 0, NULL, 0); + *seed = generateCompressedBlock(*seed, &fr, info); + } + + { size_t const r = testDecodeRawBlock(&fr); + if (ZSTD_isError(r)) { + DISPLAY("Error in block mode on test seed %u: %s\n", + (unsigned)seedCopy, ZSTD_getErrorName(r)); + return 1; + } + } + + { size_t const r = testDecodeWithDict(*seed, gt_block); + if (ZSTD_isError(r)) { + DISPLAY("Error in block mode with dictionary on test seed %u: %s\n", + (unsigned)seedCopy, ZSTD_getErrorName(r)); + return 1; + } + } + return 0; +} + +static int runFrameTest(U32* seed) +{ + frame_t fr; + U32 const seedCopy = *seed; + { dictInfo const info = initDictInfo(0, 0, NULL, 0); + *seed = generateFrame(*seed, &fr, info); + } + + { size_t const r = testDecodeSimple(&fr); + if (ZSTD_isError(r)) { + DISPLAY("Error in simple mode on test seed %u: %s\n", + (unsigned)seedCopy, ZSTD_getErrorName(r)); + return 1; + } + } + { size_t const r = testDecodeStreaming(&fr); + if (ZSTD_isError(r)) { + DISPLAY("Error in streaming mode on test seed %u: %s\n", + (unsigned)seedCopy, ZSTD_getErrorName(r)); + return 1; + } + } + { size_t const r = testDecodeWithDict(*seed, gt_frame); /* avoid big dictionaries */ + if (ZSTD_isError(r)) { + DISPLAY("Error in dictionary mode on test seed %u: %s\n", + (unsigned)seedCopy, ZSTD_getErrorName(r)); + return 1; + } + } + return 0; +} + +static int runTestMode(U32 seed, unsigned numFiles, unsigned const testDurationS, + genType_e genType) +{ + unsigned fnum; + + UTIL_time_t const startClock = UTIL_getTime(); + U64 const maxClockSpan = testDurationS * SEC_TO_MICRO; + + if (numFiles == 0 && !testDurationS) numFiles = 1; + + DISPLAY("seed: %u\n", (unsigned)seed); + + for (fnum = 0; fnum < numFiles || UTIL_clockSpanMicro(startClock) < maxClockSpan; fnum++) { + if (fnum < numFiles) + DISPLAYUPDATE("\r%u/%u ", fnum, numFiles); + else + DISPLAYUPDATE("\r%u ", fnum); + + { int const ret = (genType == gt_frame) ? + runFrameTest(&seed) : + runBlockTest(&seed); + if (ret) return ret; + } + } + + DISPLAY("\r%u tests completed: ", fnum); + DISPLAY("OK\n"); + + return 0; +} + +/*-******************************************************* +* File I/O +*********************************************************/ + +static int generateFile(U32 seed, const char* const path, + const char* const origPath, genType_e genType) +{ + frame_t fr; + + DISPLAY("seed: %u\n", (unsigned)seed); + + { dictInfo const info = initDictInfo(0, 0, NULL, 0); + if (genType == gt_frame) { + generateFrame(seed, &fr, info); + } else { + generateCompressedBlock(seed, &fr, info); + } + } + outputBuffer(fr.dataStart, (BYTE*)fr.data - (BYTE*)fr.dataStart, path); + if (origPath) { + outputBuffer(fr.srcStart, (BYTE*)fr.src - (BYTE*)fr.srcStart, origPath); + } + return 0; +} + +static int generateCorpus(U32 seed, unsigned numFiles, const char* const path, + const char* const origPath, genType_e genType) +{ + char outPath[MAX_PATH]; + unsigned fnum; + + DISPLAY("seed: %u\n", (unsigned)seed); + + for (fnum = 0; fnum < numFiles; fnum++) { + frame_t fr; + + DISPLAYUPDATE("\r%u/%u ", fnum, numFiles); + + { dictInfo const info = initDictInfo(0, 0, NULL, 0); + if (genType == gt_frame) { + seed = generateFrame(seed, &fr, info); + } else { + seed = generateCompressedBlock(seed, &fr, info); + } + } + + if (snprintf(outPath, MAX_PATH, "%s/z%06u.zst", path, fnum) + 1 > MAX_PATH) { + DISPLAY("Error: path too long\n"); + return 1; + } + outputBuffer(fr.dataStart, (BYTE*)fr.data - (BYTE*)fr.dataStart, outPath); + + if (origPath) { + if (snprintf(outPath, MAX_PATH, "%s/z%06u", origPath, fnum) + 1 > MAX_PATH) { + DISPLAY("Error: path too long\n"); + return 1; + } + outputBuffer(fr.srcStart, (BYTE*)fr.src - (BYTE*)fr.srcStart, outPath); + } + } + + DISPLAY("\r%u/%u \n", fnum, numFiles); + + return 0; +} + +static int generateCorpusWithDict(U32 seed, unsigned numFiles, const char* const path, + const char* const origPath, const size_t dictSize, + genType_e genType) +{ + char outPath[MAX_PATH]; + BYTE* fullDict; + U32 const dictID = RAND(&seed); + int errorDetected = 0; + + if (snprintf(outPath, MAX_PATH, "%s/dictionary", path) + 1 > MAX_PATH) { + DISPLAY("Error: path too long\n"); + return 1; + } + + /* allocate space for the dictionary */ + fullDict = malloc(dictSize); + if (fullDict == NULL) { + DISPLAY("Error: could not allocate space for full dictionary.\n"); + return 1; + } + + /* randomly generate the dictionary */ + { int const ret = genRandomDict(dictID, seed, dictSize, fullDict); + if (ret != 0) { + errorDetected = ret; + goto dictCleanup; + } + } + + /* write out dictionary */ + if (numFiles != 0) { + if (snprintf(outPath, MAX_PATH, "%s/dictionary", path) + 1 > MAX_PATH) { + DISPLAY("Error: dictionary path too long\n"); + errorDetected = 1; + goto dictCleanup; + } + outputBuffer(fullDict, dictSize, outPath); + } + else { + outputBuffer(fullDict, dictSize, "dictionary"); + } + + /* generate random compressed/decompressed files */ + { unsigned fnum; + for (fnum = 0; fnum < MAX(numFiles, 1); fnum++) { + frame_t fr; + DISPLAYUPDATE("\r%u/%u ", fnum, numFiles); + { + size_t const headerSize = MAX(dictSize/4, 256); + size_t const dictContentSize = dictSize-headerSize; + BYTE* const dictContent = fullDict+headerSize; + dictInfo const info = initDictInfo(1, dictContentSize, dictContent, dictID); + if (genType == gt_frame) { + seed = generateFrame(seed, &fr, info); + } else { + seed = generateCompressedBlock(seed, &fr, info); + } + } + + if (numFiles != 0) { + if (snprintf(outPath, MAX_PATH, "%s/z%06u.zst", path, fnum) + 1 > MAX_PATH) { + DISPLAY("Error: path too long\n"); + errorDetected = 1; + goto dictCleanup; + } + outputBuffer(fr.dataStart, (BYTE*)fr.data - (BYTE*)fr.dataStart, outPath); + + if (origPath) { + if (snprintf(outPath, MAX_PATH, "%s/z%06u", origPath, fnum) + 1 > MAX_PATH) { + DISPLAY("Error: path too long\n"); + errorDetected = 1; + goto dictCleanup; + } + outputBuffer(fr.srcStart, (BYTE*)fr.src - (BYTE*)fr.srcStart, outPath); + } + } + else { + outputBuffer(fr.dataStart, (BYTE*)fr.data - (BYTE*)fr.dataStart, path); + if (origPath) { + outputBuffer(fr.srcStart, (BYTE*)fr.src - (BYTE*)fr.srcStart, origPath); + } + } + } + } + +dictCleanup: + free(fullDict); + return errorDetected; +} + + +/*_******************************************************* +* Command line +*********************************************************/ +static U32 makeSeed(void) +{ + U32 t = (U32) time(NULL); + return XXH32(&t, sizeof(t), 0) % 65536; +} + +static unsigned readInt(const char** argument) +{ + unsigned val = 0; + while ((**argument>='0') && (**argument<='9')) { + val *= 10; + val += **argument - '0'; + (*argument)++; + } + return val; +} + +static void usage(const char* programName) +{ + DISPLAY( "Usage :\n"); + DISPLAY( " %s [args]\n", programName); + DISPLAY( "\n"); + DISPLAY( "Arguments :\n"); + DISPLAY( " -p<path> : select output path (default:stdout)\n"); + DISPLAY( " in multiple files mode this should be a directory\n"); + DISPLAY( " -o<path> : select path to output original file (default:no output)\n"); + DISPLAY( " in multiple files mode this should be a directory\n"); + DISPLAY( " -s# : select seed (default:random based on time)\n"); + DISPLAY( " -n# : number of files to generate (default:1)\n"); + DISPLAY( " -t : activate test mode (test files against libzstd instead of outputting them)\n"); + DISPLAY( " -T# : length of time to run tests for\n"); + DISPLAY( " -v : increase verbosity level (default:0, max:7)\n"); + DISPLAY( " -h/H : display help/long help and exit\n"); +} + +static void advancedUsage(const char* programName) +{ + usage(programName); + DISPLAY( "\n"); + DISPLAY( "Advanced arguments :\n"); + DISPLAY( " --content-size : always include the content size in the frame header\n"); + DISPLAY( " --use-dict=# : include a dictionary used to decompress the corpus\n"); + DISPLAY( " --gen-blocks : generate raw compressed blocks without block/frame headers\n"); + DISPLAY( " --max-block-size-log=# : max block size log, must be in range [2, 17]\n"); + DISPLAY( " --max-content-size-log=# : max content size log, must be <= 20\n"); + DISPLAY( " (this is ignored with gen-blocks)\n"); +} + +/*! readU32FromChar() : + @return : unsigned integer value read from input in `char` format + allows and interprets K, KB, KiB, M, MB and MiB suffix. + Will also modify `*stringPtr`, advancing it to position where it stopped reading. + Note : function result can overflow if digit string > MAX_UINT */ +static unsigned readU32FromChar(const char** stringPtr) +{ + unsigned result = 0; + while ((**stringPtr >='0') && (**stringPtr <='9')) + result *= 10, result += **stringPtr - '0', (*stringPtr)++ ; + if ((**stringPtr=='K') || (**stringPtr=='M')) { + result <<= 10; + if (**stringPtr=='M') result <<= 10; + (*stringPtr)++ ; + if (**stringPtr=='i') (*stringPtr)++; + if (**stringPtr=='B') (*stringPtr)++; + } + return result; +} + +/** longCommandWArg() : + * check if *stringPtr is the same as longCommand. + * If yes, @return 1 and advances *stringPtr to the position which immediately follows longCommand. + * @return 0 and doesn't modify *stringPtr otherwise. + */ +static unsigned longCommandWArg(const char** stringPtr, const char* longCommand) +{ + size_t const comSize = strlen(longCommand); + int const result = !strncmp(*stringPtr, longCommand, comSize); + if (result) *stringPtr += comSize; + return result; +} + +int main(int argc, char** argv) +{ + U32 seed = 0; + int seedset = 0; + unsigned numFiles = 0; + unsigned testDuration = 0; + int testMode = 0; + const char* path = NULL; + const char* origPath = NULL; + int useDict = 0; + unsigned dictSize = (10 << 10); /* 10 kB default */ + genType_e genType = gt_frame; + + int argNb; + + /* Check command line */ + for (argNb=1; argNb<argc; argNb++) { + const char* argument = argv[argNb]; + if(!argument) continue; /* Protection if argument empty */ + + /* Handle commands. Aggregated commands are allowed */ + if (argument[0]=='-') { + argument++; + while (*argument!=0) { + switch(*argument) + { + case 'h': + usage(argv[0]); + return 0; + case 'H': + advancedUsage(argv[0]); + return 0; + case 'v': + argument++; + g_displayLevel++; + break; + case 's': + argument++; + seedset=1; + seed = readInt(&argument); + break; + case 'n': + argument++; + numFiles = readInt(&argument); + break; + case 'T': + argument++; + testDuration = readInt(&argument); + if (*argument == 'm') { + testDuration *= 60; + argument++; + if (*argument == 'n') argument++; + } + break; + case 'o': + argument++; + origPath = argument; + argument += strlen(argument); + break; + case 'p': + argument++; + path = argument; + argument += strlen(argument); + break; + case 't': + argument++; + testMode = 1; + break; + case '-': + argument++; + if (strcmp(argument, "content-size") == 0) { + opts.contentSize = 1; + } else if (longCommandWArg(&argument, "use-dict=")) { + dictSize = readU32FromChar(&argument); + useDict = 1; + } else if (strcmp(argument, "gen-blocks") == 0) { + genType = gt_block; + } else if (longCommandWArg(&argument, "max-block-size-log=")) { + U32 value = readU32FromChar(&argument); + if (value >= 2 && value <= ZSTD_BLOCKSIZE_MAX) { + g_maxBlockSize = 1U << value; + } + } else if (longCommandWArg(&argument, "max-content-size-log=")) { + U32 value = readU32FromChar(&argument); + g_maxDecompressedSizeLog = + MIN(MAX_DECOMPRESSED_SIZE_LOG, value); + } else { + advancedUsage(argv[0]); + return 1; + } + argument += strlen(argument); + break; + default: + usage(argv[0]); + return 1; + } } } } /* for (argNb=1; argNb<argc; argNb++) */ + + if (!seedset) { + seed = makeSeed(); + } + + if (testMode) { + return runTestMode(seed, numFiles, testDuration, genType); + } else { + if (testDuration) { + DISPLAY("Error: -T requires test mode (-t)\n\n"); + usage(argv[0]); + return 1; + } + } + + if (!path) { + DISPLAY("Error: path is required in file generation mode\n"); + usage(argv[0]); + return 1; + } + + if (numFiles == 0 && useDict == 0) { + return generateFile(seed, path, origPath, genType); + } else if (useDict == 0){ + return generateCorpus(seed, numFiles, path, origPath, genType); + } else { + /* should generate files with a dictionary */ + return generateCorpusWithDict(seed, numFiles, path, origPath, dictSize, genType); + } + +} diff --git a/src/zstd/tests/dict-files/zero-weight-dict b/src/zstd/tests/dict-files/zero-weight-dict Binary files differnew file mode 100644 index 000000000..c40412052 --- /dev/null +++ b/src/zstd/tests/dict-files/zero-weight-dict diff --git a/src/zstd/tests/fullbench.c b/src/zstd/tests/fullbench.c new file mode 100644 index 000000000..f0179a9d1 --- /dev/null +++ b/src/zstd/tests/fullbench.c @@ -0,0 +1,843 @@ +/* + * Copyright (c) 2015-2020, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + +/*_************************************ +* Includes +**************************************/ +#include "util.h" /* Compiler options, UTIL_GetFileSize */ +#include <stdlib.h> /* malloc */ +#include <stdio.h> /* fprintf, fopen, ftello64 */ +#include <assert.h> + +#include "timefn.h" /* UTIL_clockSpanNano, UTIL_getTime */ +#include "mem.h" /* U32 */ +#ifndef ZSTD_DLL_IMPORT + #include "zstd_internal.h" /* ZSTD_decodeSeqHeaders, ZSTD_blockHeaderSize, ZSTD_getcBlockSize, blockType_e, KB, MB */ +#else + #define KB *(1 <<10) + #define MB *(1 <<20) + #define GB *(1U<<30) + typedef enum { bt_raw, bt_rle, bt_compressed, bt_reserved } blockType_e; +#endif +#define ZSTD_STATIC_LINKING_ONLY /* ZSTD_compressBegin, ZSTD_compressContinue, etc. */ +#include "zstd.h" /* ZSTD_versionString */ +#include "util.h" /* time functions */ +#include "datagen.h" +#include "benchfn.h" /* CustomBench */ +#include "benchzstd.h" /* MB_UNIT */ + + +/*_************************************ +* Constants +**************************************/ +#define PROGRAM_DESCRIPTION "Zstandard speed analyzer" +#define AUTHOR "Yann Collet" +#define WELCOME_MESSAGE "*** %s %s %i-bits, by %s (%s) ***\n", PROGRAM_DESCRIPTION, ZSTD_versionString(), (int)(sizeof(void*)*8), AUTHOR, __DATE__ + +#define NBLOOPS 6 +#define TIMELOOP_S 2 + +#define MAX_MEM (1984 MB) + +#define DEFAULT_CLEVEL 1 + +#define COMPRESSIBILITY_DEFAULT 0.50 +static const size_t kSampleSizeDefault = 10000000; + +#define TIMELOOP_NANOSEC (1*1000000000ULL) /* 1 second */ + + +/*_************************************ +* Macros +**************************************/ +#define DISPLAY(...) fprintf(stderr, __VA_ARGS__) + +#define CONTROL(c) { if (!(c)) { abort(); } } /* like assert(), but cannot be disabled */ + +/*_************************************ +* Benchmark Parameters +**************************************/ +static unsigned g_nbIterations = NBLOOPS; + + +/*_******************************************************* +* Private functions +*********************************************************/ +static size_t BMK_findMaxMem(U64 requiredMem) +{ + size_t const step = 64 MB; + void* testmem = NULL; + + requiredMem = (((requiredMem >> 26) + 1) << 26); + if (requiredMem > MAX_MEM) requiredMem = MAX_MEM; + + requiredMem += step; + do { + testmem = malloc ((size_t)requiredMem); + requiredMem -= step; + } while (!testmem); + + free (testmem); + return (size_t) requiredMem; +} + + +/*_******************************************************* +* Benchmark wrappers +*********************************************************/ + +static ZSTD_CCtx* g_zcc = NULL; + +static size_t +local_ZSTD_compress(const void* src, size_t srcSize, + void* dst, size_t dstSize, + void* payload) +{ + ZSTD_parameters p; + ZSTD_frameParameters f = { 1 /* contentSizeHeader*/, 0, 0 }; + p.fParams = f; + p.cParams = *(ZSTD_compressionParameters*)payload; + return ZSTD_compress_advanced (g_zcc, dst, dstSize, src, srcSize, NULL ,0, p); + //return ZSTD_compress(dst, dstSize, src, srcSize, cLevel); +} + +static size_t g_cSize = 0; +static size_t local_ZSTD_decompress(const void* src, size_t srcSize, + void* dst, size_t dstSize, + void* buff2) +{ + (void)src; (void)srcSize; + return ZSTD_decompress(dst, dstSize, buff2, g_cSize); +} + +static ZSTD_DCtx* g_zdc = NULL; + +#ifndef ZSTD_DLL_IMPORT +extern size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* ctx, const void* src, size_t srcSize); +static size_t local_ZSTD_decodeLiteralsBlock(const void* src, size_t srcSize, void* dst, size_t dstSize, void* buff2) +{ + (void)src; (void)srcSize; (void)dst; (void)dstSize; + return ZSTD_decodeLiteralsBlock(g_zdc, buff2, g_cSize); +} + +static size_t local_ZSTD_decodeSeqHeaders(const void* src, size_t srcSize, void* dst, size_t dstSize, void* buff2) +{ + int nbSeq; + (void)src; (void)srcSize; (void)dst; (void)dstSize; + return ZSTD_decodeSeqHeaders(g_zdc, &nbSeq, buff2, g_cSize); +} +#endif + +static ZSTD_CStream* g_cstream= NULL; +static size_t +local_ZSTD_compressStream(const void* src, size_t srcSize, + void* dst, size_t dstCapacity, + void* payload) +{ + ZSTD_outBuffer buffOut; + ZSTD_inBuffer buffIn; + ZSTD_parameters p; + ZSTD_frameParameters f = {1 /* contentSizeHeader*/, 0, 0}; + p.fParams = f; + p.cParams = *(ZSTD_compressionParameters*)payload; + ZSTD_initCStream_advanced(g_cstream, NULL, 0, p, ZSTD_CONTENTSIZE_UNKNOWN); + buffOut.dst = dst; + buffOut.size = dstCapacity; + buffOut.pos = 0; + buffIn.src = src; + buffIn.size = srcSize; + buffIn.pos = 0; + ZSTD_compressStream(g_cstream, &buffOut, &buffIn); + ZSTD_endStream(g_cstream, &buffOut); + return buffOut.pos; +} + +static size_t +local_ZSTD_compressStream_freshCCtx(const void* src, size_t srcSize, + void* dst, size_t dstCapacity, + void* payload) +{ + ZSTD_CCtx* const cctx = ZSTD_createCCtx(); + size_t r; + assert(cctx != NULL); + + r = local_ZSTD_compressStream(src, srcSize, dst, dstCapacity, payload); + + ZSTD_freeCCtx(cctx); + + return r; +} + +static size_t +local_ZSTD_compress_generic_end(const void* src, size_t srcSize, + void* dst, size_t dstCapacity, + void* payload) +{ + (void)payload; + return ZSTD_compress2(g_cstream, dst, dstCapacity, src, srcSize); +} + +static size_t +local_ZSTD_compress_generic_continue(const void* src, size_t srcSize, + void* dst, size_t dstCapacity, + void* payload) +{ + ZSTD_outBuffer buffOut; + ZSTD_inBuffer buffIn; + (void)payload; + buffOut.dst = dst; + buffOut.size = dstCapacity; + buffOut.pos = 0; + buffIn.src = src; + buffIn.size = srcSize; + buffIn.pos = 0; + ZSTD_compressStream2(g_cstream, &buffOut, &buffIn, ZSTD_e_continue); + ZSTD_compressStream2(g_cstream, &buffOut, &buffIn, ZSTD_e_end); + return buffOut.pos; +} + +static size_t +local_ZSTD_compress_generic_T2_end(const void* src, size_t srcSize, + void* dst, size_t dstCapacity, + void* payload) +{ + (void)payload; + ZSTD_CCtx_setParameter(g_cstream, ZSTD_c_nbWorkers, 2); + return ZSTD_compress2(g_cstream, dst, dstCapacity, src, srcSize); +} + +static size_t +local_ZSTD_compress_generic_T2_continue(const void* src, size_t srcSize, + void* dst, size_t dstCapacity, + void* payload) +{ + ZSTD_outBuffer buffOut; + ZSTD_inBuffer buffIn; + (void)payload; + ZSTD_CCtx_setParameter(g_cstream, ZSTD_c_nbWorkers, 2); + buffOut.dst = dst; + buffOut.size = dstCapacity; + buffOut.pos = 0; + buffIn.src = src; + buffIn.size = srcSize; + buffIn.pos = 0; + ZSTD_compressStream2(g_cstream, &buffOut, &buffIn, ZSTD_e_continue); + while(ZSTD_compressStream2(g_cstream, &buffOut, &buffIn, ZSTD_e_end)) {} + return buffOut.pos; +} + +static ZSTD_DStream* g_dstream= NULL; +static size_t +local_ZSTD_decompressStream(const void* src, size_t srcSize, + void* dst, size_t dstCapacity, + void* buff2) +{ + ZSTD_outBuffer buffOut; + ZSTD_inBuffer buffIn; + (void)src; (void)srcSize; + ZSTD_initDStream(g_dstream); + buffOut.dst = dst; + buffOut.size = dstCapacity; + buffOut.pos = 0; + buffIn.src = buff2; + buffIn.size = g_cSize; + buffIn.pos = 0; + ZSTD_decompressStream(g_dstream, &buffOut, &buffIn); + return buffOut.pos; +} + +#ifndef ZSTD_DLL_IMPORT +static size_t local_ZSTD_compressContinue(const void* src, size_t srcSize, + void* dst, size_t dstCapacity, + void* payload) +{ + ZSTD_parameters p; + ZSTD_frameParameters f = { 1 /* contentSizeHeader*/, 0, 0 }; + p.fParams = f; + p.cParams = *(ZSTD_compressionParameters*)payload; + ZSTD_compressBegin_advanced(g_zcc, NULL, 0, p, srcSize); + return ZSTD_compressEnd(g_zcc, dst, dstCapacity, src, srcSize); +} + +#define FIRST_BLOCK_SIZE 8 +static size_t +local_ZSTD_compressContinue_extDict(const void* src, size_t srcSize, + void* dst, size_t dstCapacity, + void* payload) +{ + BYTE firstBlockBuf[FIRST_BLOCK_SIZE]; + + ZSTD_parameters p; + ZSTD_frameParameters const f = { 1, 0, 0 }; + p.fParams = f; + p.cParams = *(ZSTD_compressionParameters*)payload; + ZSTD_compressBegin_advanced(g_zcc, NULL, 0, p, srcSize); + memcpy(firstBlockBuf, src, FIRST_BLOCK_SIZE); + + { size_t const compressResult = ZSTD_compressContinue(g_zcc, + dst, dstCapacity, + firstBlockBuf, FIRST_BLOCK_SIZE); + if (ZSTD_isError(compressResult)) { + DISPLAY("local_ZSTD_compressContinue_extDict error : %s\n", + ZSTD_getErrorName(compressResult)); + return compressResult; + } + dst = (BYTE*)dst + compressResult; + dstCapacity -= compressResult; + } + return ZSTD_compressEnd(g_zcc, dst, dstCapacity, + (const BYTE*)src + FIRST_BLOCK_SIZE, + srcSize - FIRST_BLOCK_SIZE); +} + +static size_t local_ZSTD_decompressContinue(const void* src, size_t srcSize, + void* dst, size_t dstCapacity, + void* buff2) +{ + size_t regeneratedSize = 0; + const BYTE* ip = (const BYTE*)buff2; + const BYTE* const iend = ip + g_cSize; + BYTE* op = (BYTE*)dst; + size_t remainingCapacity = dstCapacity; + + (void)src; (void)srcSize; /* unused */ + ZSTD_decompressBegin(g_zdc); + while (ip < iend) { + size_t const iSize = ZSTD_nextSrcSizeToDecompress(g_zdc); + size_t const decodedSize = ZSTD_decompressContinue(g_zdc, op, remainingCapacity, ip, iSize); + ip += iSize; + regeneratedSize += decodedSize; + op += decodedSize; + remainingCapacity -= decodedSize; + } + + return regeneratedSize; +} +#endif + + +/*_******************************************************* +* Bench functions +*********************************************************/ +static int benchMem(unsigned benchNb, + const void* src, size_t srcSize, + int cLevel, ZSTD_compressionParameters cparams) +{ + size_t dstBuffSize = ZSTD_compressBound(srcSize); + BYTE* dstBuff; + void* dstBuff2; + void* payload; + const char* benchName; + BMK_benchFn_t benchFunction; + int errorcode = 0; + + /* Selection */ + switch(benchNb) + { + case 1: + benchFunction = local_ZSTD_compress; benchName = "compress"; + break; + case 2: + benchFunction = local_ZSTD_decompress; benchName = "decompress"; + break; +#ifndef ZSTD_DLL_IMPORT + case 11: + benchFunction = local_ZSTD_compressContinue; benchName = "compressContinue"; + break; + case 12: + benchFunction = local_ZSTD_compressContinue_extDict; benchName = "compressContinue_extDict"; + break; + case 13: + benchFunction = local_ZSTD_decompressContinue; benchName = "decompressContinue"; + break; + case 31: + benchFunction = local_ZSTD_decodeLiteralsBlock; benchName = "decodeLiteralsBlock"; + break; + case 32: + benchFunction = local_ZSTD_decodeSeqHeaders; benchName = "decodeSeqHeaders"; + break; +#endif + case 41: + benchFunction = local_ZSTD_compressStream; benchName = "compressStream"; + break; + case 42: + benchFunction = local_ZSTD_decompressStream; benchName = "decompressStream"; + break; + case 43: + benchFunction = local_ZSTD_compressStream_freshCCtx; benchName = "compressStream_freshCCtx"; + break; + case 51: + benchFunction = local_ZSTD_compress_generic_continue; benchName = "compress_generic, continue"; + break; + case 52: + benchFunction = local_ZSTD_compress_generic_end; benchName = "compress_generic, end"; + break; + case 61: + benchFunction = local_ZSTD_compress_generic_T2_continue; benchName = "compress_generic, -T2, continue"; + break; + case 62: + benchFunction = local_ZSTD_compress_generic_T2_end; benchName = "compress_generic, -T2, end"; + break; + default : + return 0; + } + + /* Allocation */ + dstBuff = (BYTE*)malloc(dstBuffSize); + dstBuff2 = malloc(dstBuffSize); + if ((!dstBuff) || (!dstBuff2)) { + DISPLAY("\nError: not enough memory!\n"); + free(dstBuff); free(dstBuff2); + return 12; + } + payload = dstBuff2; + if (g_zcc==NULL) g_zcc = ZSTD_createCCtx(); + if (g_zdc==NULL) g_zdc = ZSTD_createDCtx(); + if (g_cstream==NULL) g_cstream = ZSTD_createCStream(); + if (g_dstream==NULL) g_dstream = ZSTD_createDStream(); + + /* DISPLAY("params: cLevel %d, wlog %d hlog %d clog %d slog %d mml %d tlen %d strat %d \n", + cLevel, cparams->windowLog, cparams->hashLog, cparams->chainLog, cparams->searchLog, + cparams->minMatch, cparams->targetLength, cparams->strategy); */ + + ZSTD_CCtx_setParameter(g_zcc, ZSTD_c_compressionLevel, cLevel); + ZSTD_CCtx_setParameter(g_zcc, ZSTD_c_windowLog, (int)cparams.windowLog); + ZSTD_CCtx_setParameter(g_zcc, ZSTD_c_hashLog, (int)cparams.hashLog); + ZSTD_CCtx_setParameter(g_zcc, ZSTD_c_chainLog, (int)cparams.chainLog); + ZSTD_CCtx_setParameter(g_zcc, ZSTD_c_searchLog, (int)cparams.searchLog); + ZSTD_CCtx_setParameter(g_zcc, ZSTD_c_minMatch, (int)cparams.minMatch); + ZSTD_CCtx_setParameter(g_zcc, ZSTD_c_targetLength, (int)cparams.targetLength); + ZSTD_CCtx_setParameter(g_zcc, ZSTD_c_strategy, cparams.strategy); + + + ZSTD_CCtx_setParameter(g_cstream, ZSTD_c_compressionLevel, cLevel); + ZSTD_CCtx_setParameter(g_cstream, ZSTD_c_windowLog, (int)cparams.windowLog); + ZSTD_CCtx_setParameter(g_cstream, ZSTD_c_hashLog, (int)cparams.hashLog); + ZSTD_CCtx_setParameter(g_cstream, ZSTD_c_chainLog, (int)cparams.chainLog); + ZSTD_CCtx_setParameter(g_cstream, ZSTD_c_searchLog, (int)cparams.searchLog); + ZSTD_CCtx_setParameter(g_cstream, ZSTD_c_minMatch, (int)cparams.minMatch); + ZSTD_CCtx_setParameter(g_cstream, ZSTD_c_targetLength, (int)cparams.targetLength); + ZSTD_CCtx_setParameter(g_cstream, ZSTD_c_strategy, cparams.strategy); + + /* Preparation */ + switch(benchNb) + { + case 1: + payload = &cparams; + break; + case 2: + g_cSize = ZSTD_compress(dstBuff2, dstBuffSize, src, srcSize, cLevel); + break; +#ifndef ZSTD_DLL_IMPORT + case 11: + payload = &cparams; + break; + case 12: + payload = &cparams; + break; + case 13 : + g_cSize = ZSTD_compress(dstBuff2, dstBuffSize, src, srcSize, cLevel); + break; + case 31: /* ZSTD_decodeLiteralsBlock : starts literals block in dstBuff2 */ + { size_t frameHeaderSize; + g_cSize = ZSTD_compress(dstBuff, dstBuffSize, src, srcSize, cLevel); + frameHeaderSize = ZSTD_frameHeaderSize(dstBuff, ZSTD_FRAMEHEADERSIZE_PREFIX(ZSTD_f_zstd1)); + CONTROL(!ZSTD_isError(frameHeaderSize)); + /* check block is compressible, hence contains a literals section */ + { blockProperties_t bp; + ZSTD_getcBlockSize(dstBuff+frameHeaderSize, dstBuffSize, &bp); /* Get 1st block type */ + if (bp.blockType != bt_compressed) { + DISPLAY("ZSTD_decodeLiteralsBlock : impossible to test on this sample (not compressible)\n"); + goto _cleanOut; + } } + { size_t const skippedSize = frameHeaderSize + ZSTD_blockHeaderSize; + memcpy(dstBuff2, dstBuff+skippedSize, g_cSize-skippedSize); + } + srcSize = srcSize > 128 KB ? 128 KB : srcSize; /* speed relative to block */ + ZSTD_decompressBegin(g_zdc); + break; + } + case 32: /* ZSTD_decodeSeqHeaders */ + { blockProperties_t bp; + const BYTE* ip = dstBuff; + const BYTE* iend; + { size_t const cSize = ZSTD_compress(dstBuff, dstBuffSize, src, srcSize, cLevel); + CONTROL(cSize > ZSTD_FRAMEHEADERSIZE_PREFIX(ZSTD_f_zstd1)); + } + /* Skip frame Header */ + { size_t const frameHeaderSize = ZSTD_frameHeaderSize(dstBuff, ZSTD_FRAMEHEADERSIZE_PREFIX(ZSTD_f_zstd1)); + CONTROL(!ZSTD_isError(frameHeaderSize)); + ip += frameHeaderSize; + } + /* Find end of block */ + { size_t const cBlockSize = ZSTD_getcBlockSize(ip, dstBuffSize, &bp); /* Get 1st block type */ + if (bp.blockType != bt_compressed) { + DISPLAY("ZSTD_decodeSeqHeaders : impossible to test on this sample (not compressible)\n"); + goto _cleanOut; + } + iend = ip + ZSTD_blockHeaderSize + cBlockSize; /* End of first block */ + } + ip += ZSTD_blockHeaderSize; /* skip block header */ + ZSTD_decompressBegin(g_zdc); + CONTROL(iend > ip); + ip += ZSTD_decodeLiteralsBlock(g_zdc, ip, (size_t)(iend-ip)); /* skip literal segment */ + g_cSize = (size_t)(iend-ip); + memcpy(dstBuff2, ip, g_cSize); /* copy rest of block (it starts by SeqHeader) */ + srcSize = srcSize > 128 KB ? 128 KB : srcSize; /* speed relative to block */ + break; + } +#else + case 31: + goto _cleanOut; +#endif + case 41 : + payload = &cparams; + break; + case 42 : + g_cSize = ZSTD_compress(payload, dstBuffSize, src, srcSize, cLevel); + break; + case 43 : + payload = &cparams; + break; + + /* test functions */ + /* convention: test functions have ID > 100 */ + + default : ; + } + + /* warming up dstBuff */ + { size_t i; for (i=0; i<dstBuffSize; i++) dstBuff[i]=(BYTE)i; } + + /* benchmark loop */ + { BMK_timedFnState_t* const tfs = BMK_createTimedFnState(g_nbIterations * 1000, 1000); + void* const avoidStrictAliasingPtr = &dstBuff; + BMK_benchParams_t bp; + BMK_runTime_t bestResult; + bestResult.sumOfReturn = 0; + bestResult.nanoSecPerRun = (double)TIMELOOP_NANOSEC * 2000000000; /* hopefully large enough : must be larger than any potential measurement */ + CONTROL(tfs != NULL); + + bp.benchFn = benchFunction; + bp.benchPayload = payload; + bp.initFn = NULL; + bp.initPayload = NULL; + bp.errorFn = ZSTD_isError; + bp.blockCount = 1; + bp.srcBuffers = &src; + bp.srcSizes = &srcSize; + bp.dstBuffers = (void* const*) avoidStrictAliasingPtr; /* circumvent strict aliasing warning on gcc-8, + * because gcc considers that `void* const *` and `void**` are 2 different types */ + bp.dstCapacities = &dstBuffSize; + bp.blockResults = NULL; + + for (;;) { + BMK_runOutcome_t const bOutcome = BMK_benchTimedFn(tfs, bp); + + if (!BMK_isSuccessful_runOutcome(bOutcome)) { + DISPLAY("ERROR benchmarking function ! ! \n"); + errorcode = 1; + goto _cleanOut; + } + + { BMK_runTime_t const newResult = BMK_extract_runTime(bOutcome); + if (newResult.nanoSecPerRun < bestResult.nanoSecPerRun ) + bestResult.nanoSecPerRun = newResult.nanoSecPerRun; + DISPLAY("\r%2u#%-29.29s:%8.1f MB/s (%8u) ", + benchNb, benchName, + (double)srcSize * TIMELOOP_NANOSEC / bestResult.nanoSecPerRun / MB_UNIT, + (unsigned)newResult.sumOfReturn ); + } + + if ( BMK_isCompleted_TimedFn(tfs) ) break; + } + BMK_freeTimedFnState(tfs); + } + DISPLAY("\n"); + +_cleanOut: + free(dstBuff); + free(dstBuff2); + ZSTD_freeCCtx(g_zcc); g_zcc=NULL; + ZSTD_freeDCtx(g_zdc); g_zdc=NULL; + ZSTD_freeCStream(g_cstream); g_cstream=NULL; + ZSTD_freeDStream(g_dstream); g_dstream=NULL; + return errorcode; +} + + +static int benchSample(U32 benchNb, + size_t benchedSize, double compressibility, + int cLevel, ZSTD_compressionParameters cparams) +{ + /* Allocation */ + void* const origBuff = malloc(benchedSize); + if (!origBuff) { DISPLAY("\nError: not enough memory!\n"); return 12; } + + /* Fill buffer */ + RDG_genBuffer(origBuff, benchedSize, compressibility, 0.0, 0); + + /* bench */ + DISPLAY("\r%70s\r", ""); + DISPLAY(" Sample %u bytes : \n", (unsigned)benchedSize); + if (benchNb) { + benchMem(benchNb, origBuff, benchedSize, cLevel, cparams); + } else { /* 0 == run all tests */ + for (benchNb=0; benchNb<100; benchNb++) { + benchMem(benchNb, origBuff, benchedSize, cLevel, cparams); + } } + + free(origBuff); + return 0; +} + + +static int benchFiles(U32 benchNb, + const char** fileNamesTable, const int nbFiles, + int cLevel, ZSTD_compressionParameters cparams) +{ + /* Loop for each file */ + int fileIdx; + for (fileIdx=0; fileIdx<nbFiles; fileIdx++) { + const char* const inFileName = fileNamesTable[fileIdx]; + FILE* const inFile = fopen( inFileName, "rb" ); + size_t benchedSize; + + /* Check file existence */ + if (inFile==NULL) { DISPLAY( "Pb opening %s\n", inFileName); return 11; } + + /* Memory allocation & restrictions */ + { U64 const inFileSize = UTIL_getFileSize(inFileName); + if (inFileSize == UTIL_FILESIZE_UNKNOWN) { + DISPLAY( "Cannot measure size of %s\n", inFileName); + fclose(inFile); + return 11; + } + benchedSize = BMK_findMaxMem(inFileSize*3) / 3; + if ((U64)benchedSize > inFileSize) + benchedSize = (size_t)inFileSize; + if ((U64)benchedSize < inFileSize) { + DISPLAY("Not enough memory for '%s' full size; testing %u MB only... \n", + inFileName, (unsigned)(benchedSize>>20)); + } } + + /* Alloc */ + { void* const origBuff = malloc(benchedSize); + if (!origBuff) { DISPLAY("\nError: not enough memory!\n"); fclose(inFile); return 12; } + + /* Fill input buffer */ + DISPLAY("Loading %s... \r", inFileName); + { size_t const readSize = fread(origBuff, 1, benchedSize, inFile); + fclose(inFile); + if (readSize != benchedSize) { + DISPLAY("\nError: problem reading file '%s' !! \n", inFileName); + free(origBuff); + return 13; + } } + + /* bench */ + DISPLAY("\r%70s\r", ""); /* blank line */ + DISPLAY(" %s : \n", inFileName); + if (benchNb) { + benchMem(benchNb, origBuff, benchedSize, cLevel, cparams); + } else { + for (benchNb=0; benchNb<100; benchNb++) { + benchMem(benchNb, origBuff, benchedSize, cLevel, cparams); + } } + + free(origBuff); + } } + + return 0; +} + + + +/*_******************************************************* +* Argument Parsing +*********************************************************/ + +#define ERROR_OUT(msg) { DISPLAY("%s \n", msg); exit(1); } + +static unsigned readU32FromChar(const char** stringPtr) +{ + const char errorMsg[] = "error: numeric value too large"; + unsigned result = 0; + while ((**stringPtr >='0') && (**stringPtr <='9')) { + unsigned const max = (((unsigned)(-1)) / 10) - 1; + if (result > max) ERROR_OUT(errorMsg); + result *= 10; + result += (unsigned)(**stringPtr - '0'); + (*stringPtr)++ ; + } + if ((**stringPtr=='K') || (**stringPtr=='M')) { + unsigned const maxK = ((unsigned)(-1)) >> 10; + if (result > maxK) ERROR_OUT(errorMsg); + result <<= 10; + if (**stringPtr=='M') { + if (result > maxK) ERROR_OUT(errorMsg); + result <<= 10; + } + (*stringPtr)++; /* skip `K` or `M` */ + if (**stringPtr=='i') (*stringPtr)++; + if (**stringPtr=='B') (*stringPtr)++; + } + return result; +} + +static int longCommandWArg(const char** stringPtr, const char* longCommand) +{ + size_t const comSize = strlen(longCommand); + int const result = !strncmp(*stringPtr, longCommand, comSize); + if (result) *stringPtr += comSize; + return result; +} + + +/*_******************************************************* +* Command line +*********************************************************/ + +static int usage(const char* exename) +{ + DISPLAY( "Usage :\n"); + DISPLAY( " %s [arg] file1 file2 ... fileX\n", exename); + DISPLAY( "Arguments :\n"); + DISPLAY( " -H/-h : Help (this text + advanced options)\n"); + return 0; +} + +static int usage_advanced(const char* exename) +{ + usage(exename); + DISPLAY( "\nAdvanced options :\n"); + DISPLAY( " -b# : test only function # \n"); + DISPLAY( " -l# : benchmark functions at that compression level (default : %i)\n", DEFAULT_CLEVEL); + DISPLAY( "--zstd= : custom parameter selection. Format same as zstdcli \n"); + DISPLAY( " -P# : sample compressibility (default : %.1f%%)\n", COMPRESSIBILITY_DEFAULT * 100); + DISPLAY( " -B# : sample size (default : %u)\n", (unsigned)kSampleSizeDefault); + DISPLAY( " -i# : iteration loops [1-9](default : %i)\n", NBLOOPS); + return 0; +} + +static int badusage(const char* exename) +{ + DISPLAY("Wrong parameters\n"); + usage(exename); + return 1; +} + +int main(int argc, const char** argv) +{ + int argNb, filenamesStart=0, result; + const char* const exename = argv[0]; + const char* input_filename = NULL; + U32 benchNb = 0, main_pause = 0; + int cLevel = DEFAULT_CLEVEL; + ZSTD_compressionParameters cparams = ZSTD_getCParams(cLevel, 0, 0); + size_t sampleSize = kSampleSizeDefault; + double compressibility = COMPRESSIBILITY_DEFAULT; + + DISPLAY(WELCOME_MESSAGE); + if (argc<1) return badusage(exename); + + for (argNb=1; argNb<argc; argNb++) { + const char* argument = argv[argNb]; + CONTROL(argument != NULL); + + if (longCommandWArg(&argument, "--zstd=")) { + for ( ; ;) { + if (longCommandWArg(&argument, "windowLog=") || longCommandWArg(&argument, "wlog=")) { cparams.windowLog = readU32FromChar(&argument); if (argument[0]==',') { argument++; continue; } else break; } + if (longCommandWArg(&argument, "chainLog=") || longCommandWArg(&argument, "clog=")) { cparams.chainLog = readU32FromChar(&argument); if (argument[0]==',') { argument++; continue; } else break; } + if (longCommandWArg(&argument, "hashLog=") || longCommandWArg(&argument, "hlog=")) { cparams.hashLog = readU32FromChar(&argument); if (argument[0]==',') { argument++; continue; } else break; } + if (longCommandWArg(&argument, "searchLog=") || longCommandWArg(&argument, "slog=")) { cparams.searchLog = readU32FromChar(&argument); if (argument[0]==',') { argument++; continue; } else break; } + if (longCommandWArg(&argument, "minMatch=") || longCommandWArg(&argument, "mml=")) { cparams.minMatch = readU32FromChar(&argument); if (argument[0]==',') { argument++; continue; } else break; } + if (longCommandWArg(&argument, "targetLength=") || longCommandWArg(&argument, "tlen=")) { cparams.targetLength = readU32FromChar(&argument); if (argument[0]==',') { argument++; continue; } else break; } + if (longCommandWArg(&argument, "strategy=") || longCommandWArg(&argument, "strat=")) { cparams.strategy = (ZSTD_strategy)(readU32FromChar(&argument)); if (argument[0]==',') { argument++; continue; } else break; } + if (longCommandWArg(&argument, "level=") || longCommandWArg(&argument, "lvl=")) { cLevel = (int)readU32FromChar(&argument); cparams = ZSTD_getCParams(cLevel, 0, 0); if (argument[0]==',') { argument++; continue; } else break; } + DISPLAY("invalid compression parameter \n"); + return 1; + } + + /* check end of string */ + if (argument[0] != 0) { + DISPLAY("invalid --zstd= format \n"); + return 1; + } else { + continue; + } + + } else if (argument[0]=='-') { /* Commands (note : aggregated commands are allowed) */ + argument++; + while (argument[0]!=0) { + + switch(argument[0]) + { + /* Display help on usage */ + case 'h': + case 'H': return usage_advanced(exename); + + /* Pause at the end (hidden option) */ + case 'p': main_pause = 1; break; + + /* Select specific algorithm to bench */ + case 'b': + argument++; + benchNb = readU32FromChar(&argument); + break; + + /* Select compression level to use */ + case 'l': + argument++; + cLevel = (int)readU32FromChar(&argument); + cparams = ZSTD_getCParams(cLevel, 0, 0); + break; + + /* Select compressibility of synthetic sample */ + case 'P': + argument++; + compressibility = (double)readU32FromChar(&argument) / 100.; + break; + + /* Select size of synthetic sample */ + case 'B': + argument++; + sampleSize = (size_t)readU32FromChar(&argument); + break; + + /* Modify Nb Iterations */ + case 'i': + argument++; + g_nbIterations = readU32FromChar(&argument); + break; + + /* Unknown command */ + default : return badusage(exename); + } + } + continue; + } + + /* first provided filename is input */ + if (!input_filename) { input_filename=argument; filenamesStart=argNb; continue; } + } + + + + if (filenamesStart==0) /* no input file */ + result = benchSample(benchNb, sampleSize, compressibility, cLevel, cparams); + else + result = benchFiles(benchNb, argv+filenamesStart, argc-filenamesStart, cLevel, cparams); + + if (main_pause) { int unused; printf("press enter...\n"); unused = getchar(); (void)unused; } + + return result; +} diff --git a/src/zstd/tests/fuzz/.gitignore b/src/zstd/tests/fuzz/.gitignore new file mode 100644 index 000000000..b6fc6e506 --- /dev/null +++ b/src/zstd/tests/fuzz/.gitignore @@ -0,0 +1,20 @@ +# test artefacts +corpora +block_decompress +block_round_trip +dictionary_decompress +dictionary_loader +dictionary_round_trip +dictionary_stream_round_trip +raw_dictionary_round_trip +simple_compress +simple_decompress +simple_round_trip +stream_decompress +stream_round_trip +zstd_frame_info +fuzz-*.log + +# misc +trace +tmp* diff --git a/src/zstd/tests/fuzz/Makefile b/src/zstd/tests/fuzz/Makefile new file mode 100644 index 000000000..1af3dc734 --- /dev/null +++ b/src/zstd/tests/fuzz/Makefile @@ -0,0 +1,212 @@ +# ################################################################ +# Copyright (c) 2016-2020, Facebook, Inc. +# All rights reserved. +# +# This source code is licensed under both the BSD-style license (found in the +# LICENSE file in the root directory of this source tree) and the GPLv2 (found +# in the COPYING file in the root directory of this source tree). +# You may select, at your option, one of the above-listed licenses. +# ################################################################ + +# Optionally user defined flags +CFLAGS ?= -O3 +CXXFLAGS ?= -O3 +CPPFLAGS ?= +LDFLAGS ?= +ARFLAGS ?= +LIB_FUZZING_ENGINE ?= libregression.a +PYTHON ?= python +ifeq ($(shell uname), Darwin) + DOWNLOAD?=curl -L -o +else + DOWNLOAD?=wget -O +endif +CORPORA_URL_PREFIX:=https://github.com/facebook/zstd/releases/download/fuzz-corpora/ + +ZSTDDIR = ../../lib +PRGDIR = ../../programs + +FUZZ_CPPFLAGS := -I$(ZSTDDIR) -I$(ZSTDDIR)/common -I$(ZSTDDIR)/compress \ + -I$(ZSTDDIR)/dictBuilder -I$(ZSTDDIR)/deprecated -I$(ZSTDDIR)/legacy \ + -I$(PRGDIR) -DZSTD_MULTITHREAD -DZSTD_LEGACY_SUPPORT=1 $(CPPFLAGS) +FUZZ_EXTRA_FLAGS := -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \ + -Wstrict-aliasing=1 -Wswitch-enum -Wdeclaration-after-statement \ + -Wstrict-prototypes -Wundef \ + -Wvla -Wformat=2 -Winit-self -Wfloat-equal -Wwrite-strings \ + -Wredundant-decls \ + -g -fno-omit-frame-pointer +FUZZ_CFLAGS := $(FUZZ_EXTRA_FLAGS) $(CFLAGS) +FUZZ_CXXFLAGS := $(FUZZ_EXTRA_FLAGS) -std=c++11 $(CXXFLAGS) +FUZZ_LDFLAGS := -pthread $(LDFLAGS) +FUZZ_ARFLAGS := $(ARFLAGS) +FUZZ_TARGET_FLAGS = $(FUZZ_CPPFLAGS) $(FUZZ_CXXFLAGS) $(FUZZ_LDFLAGS) + +FUZZ_ROUND_TRIP_FLAGS := -DFUZZING_ASSERT_VALID_SEQUENCE + +FUZZ_HEADERS := fuzz_helpers.h fuzz.h zstd_helpers.h fuzz_data_producer.h +FUZZ_SRC := $(PRGDIR)/util.c ./fuzz_helpers.c ./zstd_helpers.c ./fuzz_data_producer.c + +ZSTDCOMMON_SRC := $(ZSTDDIR)/common/*.c +ZSTDCOMP_SRC := $(ZSTDDIR)/compress/*.c +ZSTDDECOMP_SRC := $(ZSTDDIR)/decompress/*.c +ZSTDDICT_SRC := $(ZSTDDIR)/dictBuilder/*.c +ZSTDLEGACY_SRC := $(ZSTDDIR)/legacy/*.c +FUZZ_SRC := \ + $(FUZZ_SRC) \ + $(ZSTDDECOMP_SRC) \ + $(ZSTDCOMMON_SRC) \ + $(ZSTDCOMP_SRC) \ + $(ZSTDDICT_SRC) \ + $(ZSTDLEGACY_SRC) +FUZZ_SRC := $(wildcard $(FUZZ_SRC)) + +FUZZ_D_OBJ1 := $(subst $(ZSTDDIR)/common/,d_lib_common_,$(FUZZ_SRC)) +FUZZ_D_OBJ2 := $(subst $(ZSTDDIR)/compress/,d_lib_compress_,$(FUZZ_D_OBJ1)) +FUZZ_D_OBJ3 := $(subst $(ZSTDDIR)/decompress/,d_lib_decompress_,$(FUZZ_D_OBJ2)) +FUZZ_D_OBJ4 := $(subst $(ZSTDDIR)/dictBuilder/,d_lib_dictBuilder_,$(FUZZ_D_OBJ3)) +FUZZ_D_OBJ5 := $(subst $(ZSTDDIR)/legacy/,d_lib_legacy_,$(FUZZ_D_OBJ4)) +FUZZ_D_OBJ6 := $(subst $(PRGDIR)/,d_prg_,$(FUZZ_D_OBJ5)) +FUZZ_D_OBJ7 := $(subst $\./,d_fuzz_,$(FUZZ_D_OBJ6)) +FUZZ_DECOMPRESS_OBJ := $(FUZZ_D_OBJ7:.c=.o) + +FUZZ_RT_OBJ1 := $(subst $(ZSTDDIR)/common/,rt_lib_common_,$(FUZZ_SRC)) +FUZZ_RT_OBJ2 := $(subst $(ZSTDDIR)/compress/,rt_lib_compress_,$(FUZZ_RT_OBJ1)) +FUZZ_RT_OBJ3 := $(subst $(ZSTDDIR)/decompress/,rt_lib_decompress_,$(FUZZ_RT_OBJ2)) +FUZZ_RT_OBJ4 := $(subst $(ZSTDDIR)/dictBuilder/,rt_lib_dictBuilder_,$(FUZZ_RT_OBJ3)) +FUZZ_RT_OBJ5 := $(subst $(ZSTDDIR)/legacy/,rt_lib_legacy_,$(FUZZ_RT_OBJ4)) +FUZZ_RT_OBJ6 := $(subst $(PRGDIR)/,rt_prg_,$(FUZZ_RT_OBJ5)) +FUZZ_RT_OBJ7 := $(subst $\./,rt_fuzz_,$(FUZZ_RT_OBJ6)) +FUZZ_ROUND_TRIP_OBJ := $(FUZZ_RT_OBJ7:.c=.o) + +.PHONY: default all clean cleanall + +default: all + +FUZZ_TARGETS := \ + simple_round_trip \ + stream_round_trip \ + block_round_trip \ + simple_decompress \ + stream_decompress \ + block_decompress \ + dictionary_round_trip \ + dictionary_decompress \ + zstd_frame_info \ + simple_compress \ + dictionary_loader \ + raw_dictionary_round_trip \ + dictionary_stream_round_trip + +all: $(FUZZ_TARGETS) + +rt_lib_common_%.o: $(ZSTDDIR)/common/%.c + $(CC) $(FUZZ_CPPFLAGS) $(FUZZ_CFLAGS) $(FUZZ_ROUND_TRIP_FLAGS) $< -c -o $@ + +rt_lib_compress_%.o: $(ZSTDDIR)/compress/%.c + $(CC) $(FUZZ_CPPFLAGS) $(FUZZ_CFLAGS) $(FUZZ_ROUND_TRIP_FLAGS) $< -c -o $@ + +rt_lib_decompress_%.o: $(ZSTDDIR)/decompress/%.c + $(CC) $(FUZZ_CPPFLAGS) $(FUZZ_CFLAGS) $(FUZZ_ROUND_TRIP_FLAGS) $< -c -o $@ + +rt_lib_dictBuilder_%.o: $(ZSTDDIR)/dictBuilder/%.c + $(CC) $(FUZZ_CPPFLAGS) $(FUZZ_CFLAGS) $(FUZZ_ROUND_TRIP_FLAGS) $< -c -o $@ + +rt_lib_legacy_%.o: $(ZSTDDIR)/legacy/%.c + $(CC) $(FUZZ_CPPFLAGS) $(FUZZ_CFLAGS) $(FUZZ_ROUND_TRIP_FLAGS) $< -c -o $@ + +rt_prg_%.o: $(PRGDIR)/%.c + $(CC) $(FUZZ_CPPFLAGS) $(FUZZ_CFLAGS) $(FUZZ_ROUND_TRIP_FLAGS) $< -c -o $@ + +rt_fuzz_%.o: %.c + $(CC) $(FUZZ_CPPFLAGS) $(FUZZ_CFLAGS) $(FUZZ_ROUND_TRIP_FLAGS) $< -c -o $@ + +d_lib_common_%.o: $(ZSTDDIR)/common/%.c + $(CC) $(FUZZ_CPPFLAGS) $(FUZZ_CFLAGS) $< -c -o $@ + +d_lib_compress_%.o: $(ZSTDDIR)/compress/%.c + $(CC) $(FUZZ_CPPFLAGS) $(FUZZ_CFLAGS) $< -c -o $@ + +d_lib_decompress_%.o: $(ZSTDDIR)/decompress/%.c + $(CC) $(FUZZ_CPPFLAGS) $(FUZZ_CFLAGS) $< -c -o $@ + +d_lib_dictBuilder_%.o: $(ZSTDDIR)/dictBuilder/%.c + $(CC) $(FUZZ_CPPFLAGS) $(FUZZ_CFLAGS) $< -c -o $@ + +d_lib_legacy_%.o: $(ZSTDDIR)/legacy/%.c + $(CC) $(FUZZ_CPPFLAGS) $(FUZZ_CFLAGS) $< -c -o $@ + +d_prg_%.o: $(PRGDIR)/%.c + $(CC) $(FUZZ_CPPFLAGS) $(FUZZ_CFLAGS) $< -c -o $@ + +d_fuzz_%.o: %.c + $(CC) $(FUZZ_CPPFLAGS) $(FUZZ_CFLAGS) $< -c -o $@ + +simple_round_trip: $(FUZZ_HEADERS) $(FUZZ_ROUND_TRIP_OBJ) rt_fuzz_simple_round_trip.o + $(CXX) $(FUZZ_TARGET_FLAGS) $(FUZZ_ROUND_TRIP_OBJ) rt_fuzz_simple_round_trip.o $(LIB_FUZZING_ENGINE) -o $@ + +stream_round_trip: $(FUZZ_HEADERS) $(FUZZ_ROUND_TRIP_OBJ) rt_fuzz_stream_round_trip.o + $(CXX) $(FUZZ_TARGET_FLAGS) $(FUZZ_ROUND_TRIP_OBJ) rt_fuzz_stream_round_trip.o $(LIB_FUZZING_ENGINE) -o $@ + +block_round_trip: $(FUZZ_HEADERS) $(FUZZ_ROUND_TRIP_OBJ) rt_fuzz_block_round_trip.o + $(CXX) $(FUZZ_TARGET_FLAGS) $(FUZZ_ROUND_TRIP_OBJ) rt_fuzz_block_round_trip.o $(LIB_FUZZING_ENGINE) -o $@ + +simple_decompress: $(FUZZ_HEADERS) $(FUZZ_DECOMPRESS_OBJ) d_fuzz_simple_decompress.o + $(CXX) $(FUZZ_TARGET_FLAGS) $(FUZZ_DECOMPRESS_OBJ) d_fuzz_simple_decompress.o $(LIB_FUZZING_ENGINE) -o $@ + +stream_decompress: $(FUZZ_HEADERS) $(FUZZ_DECOMPRESS_OBJ) d_fuzz_stream_decompress.o + $(CXX) $(FUZZ_TARGET_FLAGS) $(FUZZ_DECOMPRESS_OBJ) d_fuzz_stream_decompress.o $(LIB_FUZZING_ENGINE) -o $@ + +block_decompress: $(FUZZ_HEADERS) $(FUZZ_DECOMPRESS_OBJ) d_fuzz_block_decompress.o + $(CXX) $(FUZZ_TARGET_FLAGS) $(FUZZ_DECOMPRESS_OBJ) d_fuzz_block_decompress.o $(LIB_FUZZING_ENGINE) -o $@ + +dictionary_round_trip: $(FUZZ_HEADERS) $(FUZZ_ROUND_TRIP_OBJ) rt_fuzz_dictionary_round_trip.o + $(CXX) $(FUZZ_TARGET_FLAGS) $(FUZZ_ROUND_TRIP_OBJ) rt_fuzz_dictionary_round_trip.o $(LIB_FUZZING_ENGINE) -o $@ + +raw_dictionary_round_trip: $(FUZZ_HEADERS) $(FUZZ_ROUND_TRIP_OBJ) rt_fuzz_raw_dictionary_round_trip.o + $(CXX) $(FUZZ_TARGET_FLAGS) $(FUZZ_ROUND_TRIP_OBJ) rt_fuzz_raw_dictionary_round_trip.o $(LIB_FUZZING_ENGINE) -o $@ + +dictionary_stream_round_trip: $(FUZZ_HEADERS) $(FUZZ_ROUND_TRIP_OBJ) rt_fuzz_dictionary_stream_round_trip.o + $(CXX) $(FUZZ_TARGET_FLAGS) $(FUZZ_ROUND_TRIP_OBJ) rt_fuzz_dictionary_stream_round_trip.o $(LIB_FUZZING_ENGINE) -o $@ + +dictionary_decompress: $(FUZZ_HEADERS) $(FUZZ_DECOMPRESS_OBJ) d_fuzz_dictionary_decompress.o + $(CXX) $(FUZZ_TARGET_FLAGS) $(FUZZ_DECOMPRESS_OBJ) d_fuzz_dictionary_decompress.o $(LIB_FUZZING_ENGINE) -o $@ + +simple_compress: $(FUZZ_HEADERS) $(FUZZ_ROUND_TRIP_OBJ) rt_fuzz_simple_compress.o + $(CXX) $(FUZZ_TARGET_FLAGS) $(FUZZ_ROUND_TRIP_OBJ) rt_fuzz_simple_compress.o $(LIB_FUZZING_ENGINE) -o $@ + +zstd_frame_info: $(FUZZ_HEADERS) $(FUZZ_DECOMPRESS_OBJ) d_fuzz_zstd_frame_info.o + $(CXX) $(FUZZ_TARGET_FLAGS) $(FUZZ_DECOMPRESS_OBJ) d_fuzz_zstd_frame_info.o $(LIB_FUZZING_ENGINE) -o $@ + +dictionary_loader: $(FUZZ_HEADERS) $(FUZZ_ROUND_TRIP_OBJ) rt_fuzz_dictionary_loader.o + $(CXX) $(FUZZ_TARGET_FLAGS) $(FUZZ_ROUND_TRIP_OBJ) rt_fuzz_dictionary_loader.o $(LIB_FUZZING_ENGINE) -o $@ + +libregression.a: $(FUZZ_HEADERS) $(PRGDIR)/util.h $(PRGDIR)/util.c d_fuzz_regression_driver.o + $(AR) $(FUZZ_ARFLAGS) $@ d_fuzz_regression_driver.o + +corpora/%_seed_corpus.zip: + @mkdir -p corpora + $(DOWNLOAD) $@ $(CORPORA_URL_PREFIX)$*_seed_corpus.zip + +corpora/%: corpora/%_seed_corpus.zip + unzip -q $^ -d $@ + +.PHONY: corpora +corpora: $(patsubst %,corpora/%,$(FUZZ_TARGETS)) + +.PHONY: seedcorpora +seedcorpora: $(patsubst %,corpora/%_seed_corpus.zip,$(FUZZ_TARGETS)) + +regressiontest: corpora + CC="$(CC)" CXX="$(CXX)" CFLAGS="$(CFLAGS)" CXXFLAGS="$(CXXFLAGS)" LDFLAGS="$(LDFLAGS)" $(PYTHON) ./fuzz.py build all + $(PYTHON) ./fuzz.py regression all + +clean: + @$(RM) *.a *.o + @$(RM) simple_round_trip stream_round_trip simple_decompress \ + stream_decompress block_decompress block_round_trip \ + simple_compress dictionary_round_trip dictionary_decompress \ + zstd_frame_info + +cleanall: + @$(RM) -r Fuzzer + @$(RM) -r corpora diff --git a/src/zstd/tests/fuzz/README.md b/src/zstd/tests/fuzz/README.md new file mode 100644 index 000000000..71afa4063 --- /dev/null +++ b/src/zstd/tests/fuzz/README.md @@ -0,0 +1,101 @@ +# Fuzzing + +Each fuzzing target can be built with multiple engines. +Zstd provides a fuzz corpus for each target that can be downloaded with +the command: + +``` +make corpora +``` + +It will download each corpus into `./corpora/TARGET`. + +## fuzz.py + +`fuzz.py` is a helper script for building and running fuzzers. +Run `./fuzz.py -h` for the commands and run `./fuzz.py COMMAND -h` for +command specific help. + +### Generating Data + +`fuzz.py` provides a utility to generate seed data for each fuzzer. + +``` +make -C ../tests decodecorpus +./fuzz.py gen TARGET +``` + +By default it outputs 100 samples, each at most 8KB into `corpora/TARGET-seed`, +but that can be configured with the `--number`, `--max-size-log` and `--seed` +flags. + +### Build +It respects the usual build environment variables `CC`, `CFLAGS`, etc. +The environment variables can be overridden with the corresponding flags +`--cc`, `--cflags`, etc. +The specific fuzzing engine is selected with `LIB_FUZZING_ENGINE` or +`--lib-fuzzing-engine`, the default is `libregression.a`. +Alternatively, you can use Clang's built in fuzzing engine with +`--enable-fuzzer`. +It has flags that can easily set up sanitizers `--enable-{a,ub,m}san`, and +coverage instrumentation `--enable-coverage`. +It sets sane defaults which can be overridden with flags `--debug`, +`--enable-ubsan-pointer-overflow`, etc. +Run `./fuzz.py build -h` for help. + +### Running Fuzzers + +`./fuzz.py` can run `libfuzzer`, `afl`, and `regression` tests. +See the help of the relevant command for options. +Flags not parsed by `fuzz.py` are passed to the fuzzing engine. +The command used to run the fuzzer is printed for debugging. + +## LibFuzzer + +``` +# Build the fuzz targets +./fuzz.py build all --enable-fuzzer --enable-asan --enable-ubsan --cc clang --cxx clang++ +# OR equivalently +CC=clang CXX=clang++ ./fuzz.py build all --enable-fuzzer --enable-asan --enable-ubsan +# Run the fuzzer +./fuzz.py libfuzzer TARGET <libfuzzer args like -jobs=4> +``` + +where `TARGET` could be `simple_decompress`, `stream_round_trip`, etc. + +### MSAN + +Fuzzing with `libFuzzer` and `MSAN` is as easy as: + +``` +CC=clang CXX=clang++ ./fuzz.py build all --enable-fuzzer --enable-msan +./fuzz.py libfuzzer TARGET <libfuzzer args> +``` + +`fuzz.py` respects the environment variables / flags `MSAN_EXTRA_CPPFLAGS`, +`MSAN_EXTRA_CFLAGS`, `MSAN_EXTRA_CXXFLAGS`, `MSAN_EXTRA_LDFLAGS` to easily pass +the extra parameters only for MSAN. + +## AFL + +The default `LIB_FUZZING_ENGINE` is `libregression.a`, which produces a binary +that AFL can use. + +``` +# Build the fuzz targets +CC=afl-clang CXX=afl-clang++ ./fuzz.py build all --enable-asan --enable-ubsan +# Run the fuzzer without a memory limit because of ASAN +./fuzz.py afl TARGET -m none +``` + +## Regression Testing + +The regression test supports the `all` target to run all the fuzzers in one +command. + +``` +CC=clang CXX=clang++ ./fuzz.py build all --enable-asan --enable-ubsan +./fuzz.py regression all +CC=clang CXX=clang++ ./fuzz.py build all --enable-msan +./fuzz.py regression all +``` diff --git a/src/zstd/tests/fuzz/block_decompress.c b/src/zstd/tests/fuzz/block_decompress.c new file mode 100644 index 000000000..64d70f005 --- /dev/null +++ b/src/zstd/tests/fuzz/block_decompress.c @@ -0,0 +1,49 @@ +/** + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/** + * This fuzz target attempts to decompress the fuzzed data with the simple + * decompression function to ensure the decompressor never crashes. + */ + +#define ZSTD_STATIC_LINKING_ONLY + +#include <stddef.h> +#include <stdlib.h> +#include <stdio.h> +#include "fuzz_helpers.h" +#include "zstd.h" + +static ZSTD_DCtx *dctx = NULL; +static void* rBuf = NULL; +static size_t bufSize = 0; + +int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) +{ + size_t const neededBufSize = ZSTD_BLOCKSIZE_MAX; + + /* Allocate all buffers and contexts if not already allocated */ + if (neededBufSize > bufSize) { + free(rBuf); + rBuf = FUZZ_malloc(neededBufSize); + bufSize = neededBufSize; + } + if (!dctx) { + dctx = ZSTD_createDCtx(); + FUZZ_ASSERT(dctx); + } + ZSTD_decompressBegin(dctx); + ZSTD_decompressBlock(dctx, rBuf, neededBufSize, src, size); + +#ifndef STATEFUL_FUZZING + ZSTD_freeDCtx(dctx); dctx = NULL; +#endif + return 0; +} diff --git a/src/zstd/tests/fuzz/block_round_trip.c b/src/zstd/tests/fuzz/block_round_trip.c new file mode 100644 index 000000000..097fc01b8 --- /dev/null +++ b/src/zstd/tests/fuzz/block_round_trip.c @@ -0,0 +1,99 @@ +/** + * Copyright (c) 2016-2020, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/** + * This fuzz target performs a zstd round-trip test (compress & decompress), + * compares the result with the original, and calls abort() on corruption. + */ + +#define ZSTD_STATIC_LINKING_ONLY + +#include <stddef.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include "fuzz_helpers.h" +#include "zstd.h" +#include "zstd_helpers.h" +#include "fuzz_data_producer.h" + +static ZSTD_CCtx *cctx = NULL; +static ZSTD_DCtx *dctx = NULL; +static void* cBuf = NULL; +static void* rBuf = NULL; +static size_t bufSize = 0; + +static size_t roundTripTest(void *result, size_t resultCapacity, + void *compressed, size_t compressedCapacity, + const void *src, size_t srcSize, + int cLevel) +{ + ZSTD_parameters const params = ZSTD_getParams(cLevel, srcSize, 0); + size_t ret = ZSTD_compressBegin_advanced(cctx, NULL, 0, params, srcSize); + FUZZ_ZASSERT(ret); + + ret = ZSTD_compressBlock(cctx, compressed, compressedCapacity, src, srcSize); + FUZZ_ZASSERT(ret); + if (ret == 0) { + FUZZ_ASSERT(resultCapacity >= srcSize); + if (srcSize > 0) { + memcpy(result, src, srcSize); + } + return srcSize; + } + ZSTD_decompressBegin(dctx); + return ZSTD_decompressBlock(dctx, result, resultCapacity, compressed, ret); +} + +int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) +{ + /* Give a random portion of src data to the producer, to use for + parameter generation. The rest will be used for (de)compression */ + FUZZ_dataProducer_t *producer = FUZZ_dataProducer_create(src, size); + size = FUZZ_dataProducer_reserveDataPrefix(producer); + + int const cLevel = FUZZ_dataProducer_int32Range(producer, kMinClevel, kMaxClevel); + + size_t neededBufSize = size; + if (size > ZSTD_BLOCKSIZE_MAX) + size = ZSTD_BLOCKSIZE_MAX; + + /* Allocate all buffers and contexts if not already allocated */ + if (neededBufSize > bufSize || !cBuf || !rBuf) { + free(cBuf); + free(rBuf); + cBuf = FUZZ_malloc(neededBufSize); + rBuf = FUZZ_malloc(neededBufSize); + bufSize = neededBufSize; + } + if (!cctx) { + cctx = ZSTD_createCCtx(); + FUZZ_ASSERT(cctx); + } + if (!dctx) { + dctx = ZSTD_createDCtx(); + FUZZ_ASSERT(dctx); + } + + { + size_t const result = + roundTripTest(rBuf, neededBufSize, cBuf, neededBufSize, src, size, + cLevel); + FUZZ_ZASSERT(result); + FUZZ_ASSERT_MSG(result == size, "Incorrect regenerated size"); + FUZZ_ASSERT_MSG(!FUZZ_memcmp(src, rBuf, size), "Corruption!"); + } + FUZZ_dataProducer_free(producer); +#ifndef STATEFUL_FUZZING + ZSTD_freeCCtx(cctx); cctx = NULL; + ZSTD_freeDCtx(dctx); dctx = NULL; +#endif + return 0; +} diff --git a/src/zstd/tests/fuzz/dictionary_decompress.c b/src/zstd/tests/fuzz/dictionary_decompress.c new file mode 100644 index 000000000..9944baa15 --- /dev/null +++ b/src/zstd/tests/fuzz/dictionary_decompress.c @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2016-2020, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/** + * This fuzz target attempts to decompress the fuzzed data with the dictionary + * decompression function to ensure the decompressor never crashes. It does not + * fuzz the dictionary. + */ + +#include <stddef.h> +#include <stdlib.h> +#include <stdio.h> +#include "fuzz_helpers.h" +#include "zstd_helpers.h" +#include "fuzz_data_producer.h" + +static ZSTD_DCtx *dctx = NULL; + +int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) +{ + /* Give a random portion of src data to the producer, to use for + parameter generation. The rest will be used for (de)compression */ + FUZZ_dataProducer_t *producer = FUZZ_dataProducer_create(src, size); + size = FUZZ_dataProducer_reserveDataPrefix(producer); + + FUZZ_dict_t dict; + ZSTD_DDict* ddict = NULL; + + if (!dctx) { + dctx = ZSTD_createDCtx(); + FUZZ_ASSERT(dctx); + } + dict = FUZZ_train(src, size, producer); + if (FUZZ_dataProducer_uint32Range(producer, 0, 1) == 0) { + ddict = ZSTD_createDDict(dict.buff, dict.size); + FUZZ_ASSERT(ddict); + } else { + if (FUZZ_dataProducer_uint32Range(producer, 0, 1) == 0) + FUZZ_ZASSERT(ZSTD_DCtx_loadDictionary_advanced( + dctx, dict.buff, dict.size, + (ZSTD_dictLoadMethod_e)FUZZ_dataProducer_uint32Range(producer, 0, 1), + (ZSTD_dictContentType_e)FUZZ_dataProducer_uint32Range(producer, 0, 2))); + else + FUZZ_ZASSERT(ZSTD_DCtx_refPrefix_advanced( + dctx, dict.buff, dict.size, + (ZSTD_dictContentType_e)FUZZ_dataProducer_uint32Range(producer, 0, 2))); + } + + { + size_t const bufSize = FUZZ_dataProducer_uint32Range(producer, 0, 10 * size); + void* rBuf = FUZZ_malloc(bufSize); + if (ddict) { + ZSTD_decompress_usingDDict(dctx, rBuf, bufSize, src, size, ddict); + } else { + ZSTD_decompressDCtx(dctx, rBuf, bufSize, src, size); + } + free(rBuf); + } + free(dict.buff); + FUZZ_dataProducer_free(producer); + ZSTD_freeDDict(ddict); +#ifndef STATEFUL_FUZZING + ZSTD_freeDCtx(dctx); dctx = NULL; +#endif + return 0; +} diff --git a/src/zstd/tests/fuzz/dictionary_loader.c b/src/zstd/tests/fuzz/dictionary_loader.c new file mode 100644 index 000000000..f1fdf4da9 --- /dev/null +++ b/src/zstd/tests/fuzz/dictionary_loader.c @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2016-2020, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/** + * This fuzz target makes sure that whenever a compression dictionary can be + * loaded, the data can be round tripped. + */ + +#include <stddef.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include "fuzz_helpers.h" +#include "zstd_helpers.h" +#include "fuzz_data_producer.h" + +/** + * Compresses the data and returns the compressed size or an error. + */ +static size_t compress(void* compressed, size_t compressedCapacity, + void const* source, size_t sourceSize, + void const* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType, + int const refPrefix) +{ + ZSTD_CCtx* cctx = ZSTD_createCCtx(); + if (refPrefix) + FUZZ_ZASSERT(ZSTD_CCtx_refPrefix_advanced( + cctx, dict, dictSize, dictContentType)); + else + FUZZ_ZASSERT(ZSTD_CCtx_loadDictionary_advanced( + cctx, dict, dictSize, dictLoadMethod, dictContentType)); + size_t const compressedSize = ZSTD_compress2( + cctx, compressed, compressedCapacity, source, sourceSize); + ZSTD_freeCCtx(cctx); + return compressedSize; +} + +static size_t decompress(void* result, size_t resultCapacity, + void const* compressed, size_t compressedSize, + void const* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType, + int const refPrefix) +{ + ZSTD_DCtx* dctx = ZSTD_createDCtx(); + if (refPrefix) + FUZZ_ZASSERT(ZSTD_DCtx_refPrefix_advanced( + dctx, dict, dictSize, dictContentType)); + else + FUZZ_ZASSERT(ZSTD_DCtx_loadDictionary_advanced( + dctx, dict, dictSize, dictLoadMethod, dictContentType)); + size_t const resultSize = ZSTD_decompressDCtx( + dctx, result, resultCapacity, compressed, compressedSize); + FUZZ_ZASSERT(resultSize); + ZSTD_freeDCtx(dctx); + return resultSize; +} + +int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) +{ + FUZZ_dataProducer_t *producer = FUZZ_dataProducer_create(src, size); + int const refPrefix = FUZZ_dataProducer_uint32Range(producer, 0, 1) != 0; + ZSTD_dictLoadMethod_e const dlm = + size = FUZZ_dataProducer_uint32Range(producer, 0, 1); + ZSTD_dictContentType_e const dct = + FUZZ_dataProducer_uint32Range(producer, 0, 2); + size = FUZZ_dataProducer_remainingBytes(producer); + + DEBUGLOG(2, "Dict load method %d", dlm); + DEBUGLOG(2, "Dict content type %d", dct); + DEBUGLOG(2, "Dict size %u", (unsigned)size); + + void* const rBuf = FUZZ_malloc(size); + size_t const cBufSize = ZSTD_compressBound(size); + void* const cBuf = FUZZ_malloc(cBufSize); + + size_t const cSize = + compress(cBuf, cBufSize, src, size, src, size, dlm, dct, refPrefix); + /* compression failing is okay */ + if (ZSTD_isError(cSize)) { + FUZZ_ASSERT_MSG(dct != ZSTD_dct_rawContent, "Raw must always succeed!"); + goto out; + } + size_t const rSize = + decompress(rBuf, size, cBuf, cSize, src, size, dlm, dct, refPrefix); + FUZZ_ASSERT_MSG(rSize == size, "Incorrect regenerated size"); + FUZZ_ASSERT_MSG(!FUZZ_memcmp(src, rBuf, size), "Corruption!"); + +out: + free(cBuf); + free(rBuf); + FUZZ_dataProducer_free(producer); + return 0; +} diff --git a/src/zstd/tests/fuzz/dictionary_round_trip.c b/src/zstd/tests/fuzz/dictionary_round_trip.c new file mode 100644 index 000000000..7b7771e48 --- /dev/null +++ b/src/zstd/tests/fuzz/dictionary_round_trip.c @@ -0,0 +1,121 @@ +/* + * Copyright (c) 2016-2020, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/** + * This fuzz target performs a zstd round-trip test (compress & decompress) with + * a dictionary, compares the result with the original, and calls abort() on + * corruption. + */ + +#include <stddef.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include "fuzz_helpers.h" +#include "zstd_helpers.h" +#include "fuzz_data_producer.h" + +static ZSTD_CCtx *cctx = NULL; +static ZSTD_DCtx *dctx = NULL; + +static size_t roundTripTest(void *result, size_t resultCapacity, + void *compressed, size_t compressedCapacity, + const void *src, size_t srcSize, + FUZZ_dataProducer_t *producer) +{ + ZSTD_dictContentType_e dictContentType = ZSTD_dct_auto; + FUZZ_dict_t dict = FUZZ_train(src, srcSize, producer); + int const refPrefix = FUZZ_dataProducer_uint32Range(producer, 0, 1) != 0; + size_t cSize; + if (FUZZ_dataProducer_uint32Range(producer, 0, 15) == 0) { + int const cLevel = FUZZ_dataProducer_int32Range(producer, kMinClevel, kMaxClevel); + + cSize = ZSTD_compress_usingDict(cctx, + compressed, compressedCapacity, + src, srcSize, + dict.buff, dict.size, + cLevel); + } else { + dictContentType = FUZZ_dataProducer_uint32Range(producer, 0, 2); + FUZZ_setRandomParameters(cctx, srcSize, producer); + /* Disable checksum so we can use sizes smaller than compress bound. */ + FUZZ_ZASSERT(ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, 0)); + if (refPrefix) + FUZZ_ZASSERT(ZSTD_CCtx_refPrefix_advanced( + cctx, dict.buff, dict.size, + dictContentType)); + else + FUZZ_ZASSERT(ZSTD_CCtx_loadDictionary_advanced( + cctx, dict.buff, dict.size, + (ZSTD_dictLoadMethod_e)FUZZ_dataProducer_uint32Range(producer, 0, 1), + dictContentType)); + cSize = ZSTD_compress2(cctx, compressed, compressedCapacity, src, srcSize); + } + FUZZ_ZASSERT(cSize); + if (refPrefix) + FUZZ_ZASSERT(ZSTD_DCtx_refPrefix_advanced( + dctx, dict.buff, dict.size, + dictContentType)); + else + FUZZ_ZASSERT(ZSTD_DCtx_loadDictionary_advanced( + dctx, dict.buff, dict.size, + (ZSTD_dictLoadMethod_e)FUZZ_dataProducer_uint32Range(producer, 0, 1), + dictContentType)); + { + size_t const ret = ZSTD_decompressDCtx( + dctx, result, resultCapacity, compressed, cSize); + free(dict.buff); + return ret; + } +} + +int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) +{ + /* Give a random portion of src data to the producer, to use for + parameter generation. The rest will be used for (de)compression */ + FUZZ_dataProducer_t *producer = FUZZ_dataProducer_create(src, size); + size = FUZZ_dataProducer_reserveDataPrefix(producer); + + size_t const rBufSize = size; + void* rBuf = FUZZ_malloc(rBufSize); + size_t cBufSize = ZSTD_compressBound(size); + void *cBuf; + /* Half of the time fuzz with a 1 byte smaller output size. + * This will still succeed because we force the checksum to be disabled, + * giving us 4 bytes of overhead. + */ + cBufSize -= FUZZ_dataProducer_uint32Range(producer, 0, 1); + cBuf = FUZZ_malloc(cBufSize); + + if (!cctx) { + cctx = ZSTD_createCCtx(); + FUZZ_ASSERT(cctx); + } + if (!dctx) { + dctx = ZSTD_createDCtx(); + FUZZ_ASSERT(dctx); + } + + { + size_t const result = + roundTripTest(rBuf, rBufSize, cBuf, cBufSize, src, size, producer); + FUZZ_ZASSERT(result); + FUZZ_ASSERT_MSG(result == size, "Incorrect regenerated size"); + FUZZ_ASSERT_MSG(!FUZZ_memcmp(src, rBuf, size), "Corruption!"); + } + free(rBuf); + free(cBuf); + FUZZ_dataProducer_free(producer); +#ifndef STATEFUL_FUZZING + ZSTD_freeCCtx(cctx); cctx = NULL; + ZSTD_freeDCtx(dctx); dctx = NULL; +#endif + return 0; +} diff --git a/src/zstd/tests/fuzz/dictionary_stream_round_trip.c b/src/zstd/tests/fuzz/dictionary_stream_round_trip.c new file mode 100644 index 000000000..67e8c69ef --- /dev/null +++ b/src/zstd/tests/fuzz/dictionary_stream_round_trip.c @@ -0,0 +1,206 @@ +/* + * Copyright (c) 2016-2020, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/** + * This fuzz target performs a zstd round-trip test (compress & decompress), + * compares the result with the original, and calls abort() on corruption. + */ + +#define ZSTD_STATIC_LINKING_ONLY + +#include <stddef.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include "fuzz_helpers.h" +#include "zstd_helpers.h" +#include "fuzz_data_producer.h" + +ZSTD_CCtx *cctx = NULL; +static ZSTD_DCtx *dctx = NULL; +static uint8_t* cBuf = NULL; +static uint8_t* rBuf = NULL; +static size_t bufSize = 0; + +static ZSTD_outBuffer makeOutBuffer(uint8_t *dst, size_t capacity, + FUZZ_dataProducer_t *producer) +{ + ZSTD_outBuffer buffer = { dst, 0, 0 }; + + FUZZ_ASSERT(capacity > 0); + buffer.size = (FUZZ_dataProducer_uint32Range(producer, 1, capacity)); + FUZZ_ASSERT(buffer.size <= capacity); + + return buffer; +} + +static ZSTD_inBuffer makeInBuffer(const uint8_t **src, size_t *size, + FUZZ_dataProducer_t *producer) +{ + ZSTD_inBuffer buffer = { *src, 0, 0 }; + + FUZZ_ASSERT(*size > 0); + buffer.size = (FUZZ_dataProducer_uint32Range(producer, 1, *size)); + FUZZ_ASSERT(buffer.size <= *size); + *src += buffer.size; + *size -= buffer.size; + + return buffer; +} + +static size_t compress(uint8_t *dst, size_t capacity, + const uint8_t *src, size_t srcSize, + const uint8_t* dict, size_t dictSize, + FUZZ_dataProducer_t *producer, int refPrefix, + ZSTD_dictContentType_e dictContentType) +{ + size_t dstSize = 0; + ZSTD_CCtx_reset(cctx, ZSTD_reset_session_only); + FUZZ_setRandomParameters(cctx, srcSize, producer); + + /* Disable checksum so we can use sizes smaller than compress bound. */ + FUZZ_ZASSERT(ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, 0)); + if (refPrefix) + FUZZ_ZASSERT(ZSTD_CCtx_refPrefix_advanced( + cctx, dict, dictSize, + dictContentType)); + else + FUZZ_ZASSERT(ZSTD_CCtx_loadDictionary_advanced( + cctx, dict, dictSize, + (ZSTD_dictLoadMethod_e)FUZZ_dataProducer_uint32Range(producer, 0, 1), + dictContentType)); + + while (srcSize > 0) { + ZSTD_inBuffer in = makeInBuffer(&src, &srcSize, producer); + /* Mode controls the action. If mode == -1 we pick a new mode */ + int mode = -1; + while (in.pos < in.size || mode != -1) { + ZSTD_outBuffer out = makeOutBuffer(dst, capacity, producer); + /* Previous action finished, pick a new mode. */ + if (mode == -1) mode = FUZZ_dataProducer_uint32Range(producer, 0, 9); + switch (mode) { + case 0: /* fall-through */ + case 1: /* fall-through */ + case 2: { + size_t const ret = + ZSTD_compressStream2(cctx, &out, &in, ZSTD_e_flush); + FUZZ_ZASSERT(ret); + if (ret == 0) + mode = -1; + break; + } + case 3: { + size_t ret = + ZSTD_compressStream2(cctx, &out, &in, ZSTD_e_end); + FUZZ_ZASSERT(ret); + /* Reset the compressor when the frame is finished */ + if (ret == 0) { + ZSTD_CCtx_reset(cctx, ZSTD_reset_session_only); + if (FUZZ_dataProducer_uint32Range(producer, 0, 7) == 0) { + size_t const remaining = in.size - in.pos; + FUZZ_setRandomParameters(cctx, remaining, producer); + } + mode = -1; + } + break; + } + case 4: { + ZSTD_inBuffer nullIn = { NULL, 0, 0 }; + ZSTD_outBuffer nullOut = { NULL, 0, 0 }; + size_t const ret = ZSTD_compressStream2(cctx, &nullOut, &nullIn, ZSTD_e_continue); + FUZZ_ZASSERT(ret); + } + /* fall-through */ + default: { + size_t const ret = + ZSTD_compressStream2(cctx, &out, &in, ZSTD_e_continue); + FUZZ_ZASSERT(ret); + mode = -1; + } + } + dst += out.pos; + dstSize += out.pos; + capacity -= out.pos; + } + } + for (;;) { + ZSTD_inBuffer in = {NULL, 0, 0}; + ZSTD_outBuffer out = makeOutBuffer(dst, capacity, producer); + size_t const ret = ZSTD_compressStream2(cctx, &out, &in, ZSTD_e_end); + FUZZ_ZASSERT(ret); + + dst += out.pos; + dstSize += out.pos; + capacity -= out.pos; + if (ret == 0) + break; + } + return dstSize; +} + +int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) +{ + size_t neededBufSize; + + /* Give a random portion of src data to the producer, to use for + parameter generation. The rest will be used for (de)compression */ + FUZZ_dataProducer_t *producer = FUZZ_dataProducer_create(src, size); + size = FUZZ_dataProducer_reserveDataPrefix(producer); + + neededBufSize = ZSTD_compressBound(size) * 15; + + /* Allocate all buffers and contexts if not already allocated */ + if (neededBufSize > bufSize) { + free(cBuf); + free(rBuf); + cBuf = (uint8_t*)FUZZ_malloc(neededBufSize); + rBuf = (uint8_t*)FUZZ_malloc(neededBufSize); + bufSize = neededBufSize; + } + if (!cctx) { + cctx = ZSTD_createCCtx(); + FUZZ_ASSERT(cctx); + } + if (!dctx) { + dctx = ZSTD_createDCtx(); + FUZZ_ASSERT(dctx); + } + + { + ZSTD_dictContentType_e dictContentType = FUZZ_dataProducer_uint32Range(producer, 0, 2); + FUZZ_dict_t dict = FUZZ_train(src, size, producer); + int const refPrefix = FUZZ_dataProducer_uint32Range(producer, 0, 1) != 0; + + size_t const cSize = compress(cBuf, neededBufSize, src, size, dict.buff, dict.size, producer, refPrefix, dictContentType); + + if (refPrefix) + FUZZ_ZASSERT(ZSTD_DCtx_refPrefix_advanced( + dctx, dict.buff, dict.size, + dictContentType)); + else + FUZZ_ZASSERT(ZSTD_DCtx_loadDictionary_advanced( + dctx, dict.buff, dict.size, + (ZSTD_dictLoadMethod_e)FUZZ_dataProducer_uint32Range(producer, 0, 1), + dictContentType)); + size_t const rSize = + ZSTD_decompressDCtx(dctx, rBuf, neededBufSize, cBuf, cSize); + FUZZ_ZASSERT(rSize); + FUZZ_ASSERT_MSG(rSize == size, "Incorrect regenerated size"); + FUZZ_ASSERT_MSG(!FUZZ_memcmp(src, rBuf, size), "Corruption!"); + free(dict.buff); + } + + FUZZ_dataProducer_free(producer); +#ifndef STATEFUL_FUZZING + ZSTD_freeCCtx(cctx); cctx = NULL; + ZSTD_freeDCtx(dctx); dctx = NULL; +#endif + return 0; +} diff --git a/src/zstd/tests/fuzz/fuzz.h b/src/zstd/tests/fuzz/fuzz.h new file mode 100644 index 000000000..8ee964536 --- /dev/null +++ b/src/zstd/tests/fuzz/fuzz.h @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2016-2020, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/** + * Fuzz target interface. + * Fuzz targets have some common parameters passed as macros during compilation. + * Check the documentation for each individual fuzzer for more parameters. + * + * @param STATEFUL_FUZZING: + * Define this to reuse state between fuzzer runs. This can be useful to + * test code paths which are only executed when contexts are reused. + * WARNING: Makes reproducing crashes much harder. + * Default: Not defined. + * @param DEBUGLEVEL: + * This is a parameter for the zstd library. Defining `DEBUGLEVEL=1` + * enables assert() statements in the zstd library. Higher levels enable + * logging, so aren't recommended. Defining `DEBUGLEVEL=1` is + * recommended. + * @param MEM_FORCE_MEMORY_ACCESS: + * This flag controls how the zstd library accesses unaligned memory. + * It can be undefined, or 0 through 2. If it is undefined, it selects + * the method to use based on the compiler. If testing with UBSAN set + * MEM_FORCE_MEMORY_ACCESS=0 to use the standard compliant method. + * @param FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + * This is the canonical flag to enable deterministic builds for fuzzing. + * Changes to zstd for fuzzing are gated behind this define. + * It is recommended to define this when building zstd for fuzzing. + */ + +#ifndef FUZZ_H +#define FUZZ_H + +#include <stddef.h> +#include <stdint.h> + +#ifdef __cplusplus +extern "C" { +#endif + +int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/zstd/tests/fuzz/fuzz.py b/src/zstd/tests/fuzz/fuzz.py new file mode 100755 index 000000000..6875d1d60 --- /dev/null +++ b/src/zstd/tests/fuzz/fuzz.py @@ -0,0 +1,887 @@ +#!/usr/bin/env python + +# ################################################################ +# Copyright (c) 2016-2020, Facebook, Inc. +# All rights reserved. +# +# This source code is licensed under both the BSD-style license (found in the +# LICENSE file in the root directory of this source tree) and the GPLv2 (found +# in the COPYING file in the root directory of this source tree). +# You may select, at your option, one of the above-listed licenses. +# ########################################################################## + +import argparse +import contextlib +import os +import re +import shlex +import shutil +import subprocess +import sys +import tempfile + + +def abs_join(a, *p): + return os.path.abspath(os.path.join(a, *p)) + + +class InputType(object): + RAW_DATA = 1 + COMPRESSED_DATA = 2 + DICTIONARY_DATA = 3 + + +class FrameType(object): + ZSTD = 1 + BLOCK = 2 + + +class TargetInfo(object): + def __init__(self, input_type, frame_type=FrameType.ZSTD): + self.input_type = input_type + self.frame_type = frame_type + + +# Constants +FUZZ_DIR = os.path.abspath(os.path.dirname(__file__)) +CORPORA_DIR = abs_join(FUZZ_DIR, 'corpora') +TARGET_INFO = { + 'simple_round_trip': TargetInfo(InputType.RAW_DATA), + 'stream_round_trip': TargetInfo(InputType.RAW_DATA), + 'block_round_trip': TargetInfo(InputType.RAW_DATA, FrameType.BLOCK), + 'simple_decompress': TargetInfo(InputType.COMPRESSED_DATA), + 'stream_decompress': TargetInfo(InputType.COMPRESSED_DATA), + 'block_decompress': TargetInfo(InputType.COMPRESSED_DATA, FrameType.BLOCK), + 'dictionary_round_trip': TargetInfo(InputType.RAW_DATA), + 'dictionary_decompress': TargetInfo(InputType.COMPRESSED_DATA), + 'zstd_frame_info': TargetInfo(InputType.COMPRESSED_DATA), + 'simple_compress': TargetInfo(InputType.RAW_DATA), + 'dictionary_loader': TargetInfo(InputType.DICTIONARY_DATA), + 'raw_dictionary_round_trip': TargetInfo(InputType.RAW_DATA), + 'dictionary_stream_round_trip': TargetInfo(InputType.RAW_DATA), +} +TARGETS = list(TARGET_INFO.keys()) +ALL_TARGETS = TARGETS + ['all'] +FUZZ_RNG_SEED_SIZE = 4 + +# Standard environment variables +CC = os.environ.get('CC', 'cc') +CXX = os.environ.get('CXX', 'c++') +CPPFLAGS = os.environ.get('CPPFLAGS', '') +CFLAGS = os.environ.get('CFLAGS', '-O3') +CXXFLAGS = os.environ.get('CXXFLAGS', CFLAGS) +LDFLAGS = os.environ.get('LDFLAGS', '') +MFLAGS = os.environ.get('MFLAGS', '-j') + +# Fuzzing environment variables +LIB_FUZZING_ENGINE = os.environ.get('LIB_FUZZING_ENGINE', 'libregression.a') +AFL_FUZZ = os.environ.get('AFL_FUZZ', 'afl-fuzz') +DECODECORPUS = os.environ.get('DECODECORPUS', + abs_join(FUZZ_DIR, '..', 'decodecorpus')) +ZSTD = os.environ.get('ZSTD', abs_join(FUZZ_DIR, '..', '..', 'zstd')) + +# Sanitizer environment variables +MSAN_EXTRA_CPPFLAGS = os.environ.get('MSAN_EXTRA_CPPFLAGS', '') +MSAN_EXTRA_CFLAGS = os.environ.get('MSAN_EXTRA_CFLAGS', '') +MSAN_EXTRA_CXXFLAGS = os.environ.get('MSAN_EXTRA_CXXFLAGS', '') +MSAN_EXTRA_LDFLAGS = os.environ.get('MSAN_EXTRA_LDFLAGS', '') + + +def create(r): + d = os.path.abspath(r) + if not os.path.isdir(d): + os.makedirs(d) + return d + + +def check(r): + d = os.path.abspath(r) + if not os.path.isdir(d): + return None + return d + + +@contextlib.contextmanager +def tmpdir(): + dirpath = tempfile.mkdtemp() + try: + yield dirpath + finally: + shutil.rmtree(dirpath, ignore_errors=True) + + +def parse_targets(in_targets): + targets = set() + for target in in_targets: + if not target: + continue + if target == 'all': + targets = targets.union(TARGETS) + elif target in TARGETS: + targets.add(target) + else: + raise RuntimeError('{} is not a valid target'.format(target)) + return list(targets) + + +def targets_parser(args, description): + parser = argparse.ArgumentParser(prog=args.pop(0), description=description) + parser.add_argument( + 'TARGET', + nargs='*', + type=str, + help='Fuzz target(s) to build {{{}}}'.format(', '.join(ALL_TARGETS))) + args, extra = parser.parse_known_args(args) + args.extra = extra + + args.TARGET = parse_targets(args.TARGET) + + return args + + +def parse_env_flags(args, flags): + """ + Look for flags set by environment variables. + """ + san_flags = ','.join(re.findall('-fsanitize=((?:[a-z]+,?)+)', flags)) + nosan_flags = ','.join(re.findall('-fno-sanitize=((?:[a-z]+,?)+)', flags)) + + def set_sanitizer(sanitizer, default, san, nosan): + if sanitizer in san and sanitizer in nosan: + raise RuntimeError('-fno-sanitize={s} and -fsanitize={s} passed'. + format(s=sanitizer)) + if sanitizer in san: + return True + if sanitizer in nosan: + return False + return default + + san = set(san_flags.split(',')) + nosan = set(nosan_flags.split(',')) + + args.asan = set_sanitizer('address', args.asan, san, nosan) + args.msan = set_sanitizer('memory', args.msan, san, nosan) + args.ubsan = set_sanitizer('undefined', args.ubsan, san, nosan) + + args.sanitize = args.asan or args.msan or args.ubsan + + return args + + +def compiler_version(cc, cxx): + """ + Determines the compiler and version. + Only works for clang and gcc. + """ + cc_version_bytes = subprocess.check_output([cc, "--version"]) + cxx_version_bytes = subprocess.check_output([cxx, "--version"]) + compiler = None + version = None + if b'clang' in cc_version_bytes: + assert(b'clang' in cxx_version_bytes) + compiler = 'clang' + elif b'gcc' in cc_version_bytes: + assert(b'gcc' in cxx_version_bytes or b'g++' in cxx_version_bytes) + compiler = 'gcc' + if compiler is not None: + version_regex = b'([0-9])+\.([0-9])+\.([0-9])+' + version_match = re.search(version_regex, cc_version_bytes) + version = tuple(int(version_match.group(i)) for i in range(1, 4)) + return compiler, version + + +def overflow_ubsan_flags(cc, cxx): + compiler, version = compiler_version(cc, cxx) + if compiler == 'gcc': + return ['-fno-sanitize=signed-integer-overflow'] + if compiler == 'clang' and version >= (5, 0, 0): + return ['-fno-sanitize=pointer-overflow'] + return [] + + +def build_parser(args): + description = """ + Cleans the repository and builds a fuzz target (or all). + Many flags default to environment variables (default says $X='y'). + Options that aren't enabling features default to the correct values for + zstd. + Enable sanitizers with --enable-*san. + For regression testing just build. + For libFuzzer set LIB_FUZZING_ENGINE and pass --enable-coverage. + For AFL set CC and CXX to AFL's compilers and set + LIB_FUZZING_ENGINE='libregression.a'. + """ + parser = argparse.ArgumentParser(prog=args.pop(0), description=description) + parser.add_argument( + '--lib-fuzzing-engine', + dest='lib_fuzzing_engine', + type=str, + default=LIB_FUZZING_ENGINE, + help=('The fuzzing engine to use e.g. /path/to/libFuzzer.a ' + "(default: $LIB_FUZZING_ENGINE='{})".format(LIB_FUZZING_ENGINE))) + + fuzz_group = parser.add_mutually_exclusive_group() + fuzz_group.add_argument( + '--enable-coverage', + dest='coverage', + action='store_true', + help='Enable coverage instrumentation (-fsanitize-coverage)') + fuzz_group.add_argument( + '--enable-fuzzer', + dest='fuzzer', + action='store_true', + help=('Enable clang fuzzer (-fsanitize=fuzzer). When enabled ' + 'LIB_FUZZING_ENGINE is ignored') + ) + + parser.add_argument( + '--enable-asan', dest='asan', action='store_true', help='Enable UBSAN') + parser.add_argument( + '--enable-ubsan', + dest='ubsan', + action='store_true', + help='Enable UBSAN') + parser.add_argument( + '--enable-ubsan-pointer-overflow', + dest='ubsan_pointer_overflow', + action='store_true', + help='Enable UBSAN pointer overflow check (known failure)') + parser.add_argument( + '--enable-msan', dest='msan', action='store_true', help='Enable MSAN') + parser.add_argument( + '--enable-msan-track-origins', dest='msan_track_origins', + action='store_true', help='Enable MSAN origin tracking') + parser.add_argument( + '--msan-extra-cppflags', + dest='msan_extra_cppflags', + type=str, + default=MSAN_EXTRA_CPPFLAGS, + help="Extra CPPFLAGS for MSAN (default: $MSAN_EXTRA_CPPFLAGS='{}')". + format(MSAN_EXTRA_CPPFLAGS)) + parser.add_argument( + '--msan-extra-cflags', + dest='msan_extra_cflags', + type=str, + default=MSAN_EXTRA_CFLAGS, + help="Extra CFLAGS for MSAN (default: $MSAN_EXTRA_CFLAGS='{}')".format( + MSAN_EXTRA_CFLAGS)) + parser.add_argument( + '--msan-extra-cxxflags', + dest='msan_extra_cxxflags', + type=str, + default=MSAN_EXTRA_CXXFLAGS, + help="Extra CXXFLAGS for MSAN (default: $MSAN_EXTRA_CXXFLAGS='{}')". + format(MSAN_EXTRA_CXXFLAGS)) + parser.add_argument( + '--msan-extra-ldflags', + dest='msan_extra_ldflags', + type=str, + default=MSAN_EXTRA_LDFLAGS, + help="Extra LDFLAGS for MSAN (default: $MSAN_EXTRA_LDFLAGS='{}')". + format(MSAN_EXTRA_LDFLAGS)) + parser.add_argument( + '--enable-sanitize-recover', + dest='sanitize_recover', + action='store_true', + help='Non-fatal sanitizer errors where possible') + parser.add_argument( + '--debug', + dest='debug', + type=int, + default=1, + help='Set DEBUGLEVEL (default: 1)') + parser.add_argument( + '--force-memory-access', + dest='memory_access', + type=int, + default=0, + help='Set MEM_FORCE_MEMORY_ACCESS (default: 0)') + parser.add_argument( + '--fuzz-rng-seed-size', + dest='fuzz_rng_seed_size', + type=int, + default=4, + help='Set FUZZ_RNG_SEED_SIZE (default: 4)') + parser.add_argument( + '--disable-fuzzing-mode', + dest='fuzzing_mode', + action='store_false', + help='Do not define FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION') + parser.add_argument( + '--enable-stateful-fuzzing', + dest='stateful_fuzzing', + action='store_true', + help='Reuse contexts between runs (makes reproduction impossible)') + parser.add_argument( + '--cc', + dest='cc', + type=str, + default=CC, + help="CC (default: $CC='{}')".format(CC)) + parser.add_argument( + '--cxx', + dest='cxx', + type=str, + default=CXX, + help="CXX (default: $CXX='{}')".format(CXX)) + parser.add_argument( + '--cppflags', + dest='cppflags', + type=str, + default=CPPFLAGS, + help="CPPFLAGS (default: $CPPFLAGS='{}')".format(CPPFLAGS)) + parser.add_argument( + '--cflags', + dest='cflags', + type=str, + default=CFLAGS, + help="CFLAGS (default: $CFLAGS='{}')".format(CFLAGS)) + parser.add_argument( + '--cxxflags', + dest='cxxflags', + type=str, + default=CXXFLAGS, + help="CXXFLAGS (default: $CXXFLAGS='{}')".format(CXXFLAGS)) + parser.add_argument( + '--ldflags', + dest='ldflags', + type=str, + default=LDFLAGS, + help="LDFLAGS (default: $LDFLAGS='{}')".format(LDFLAGS)) + parser.add_argument( + '--mflags', + dest='mflags', + type=str, + default=MFLAGS, + help="Extra Make flags (default: $MFLAGS='{}')".format(MFLAGS)) + parser.add_argument( + 'TARGET', + nargs='*', + type=str, + help='Fuzz target(s) to build {{{}}}'.format(', '.join(ALL_TARGETS)) + ) + args = parser.parse_args(args) + args = parse_env_flags(args, ' '.join( + [args.cppflags, args.cflags, args.cxxflags, args.ldflags])) + + # Check option sanity + if args.msan and (args.asan or args.ubsan): + raise RuntimeError('MSAN may not be used with any other sanitizers') + if args.msan_track_origins and not args.msan: + raise RuntimeError('--enable-msan-track-origins requires MSAN') + if args.ubsan_pointer_overflow and not args.ubsan: + raise RuntimeError('--enable-ubsan-pointer-overflow requires UBSAN') + if args.sanitize_recover and not args.sanitize: + raise RuntimeError('--enable-sanitize-recover but no sanitizers used') + + return args + + +def build(args): + try: + args = build_parser(args) + except Exception as e: + print(e) + return 1 + # The compilation flags we are setting + targets = args.TARGET + cc = args.cc + cxx = args.cxx + cppflags = shlex.split(args.cppflags) + cflags = shlex.split(args.cflags) + ldflags = shlex.split(args.ldflags) + cxxflags = shlex.split(args.cxxflags) + mflags = shlex.split(args.mflags) + # Flags to be added to both cflags and cxxflags + common_flags = [] + + cppflags += [ + '-DDEBUGLEVEL={}'.format(args.debug), + '-DMEM_FORCE_MEMORY_ACCESS={}'.format(args.memory_access), + '-DFUZZ_RNG_SEED_SIZE={}'.format(args.fuzz_rng_seed_size), + ] + + # Set flags for options + assert not (args.fuzzer and args.coverage) + if args.coverage: + common_flags += [ + '-fsanitize-coverage=trace-pc-guard,indirect-calls,trace-cmp' + ] + if args.fuzzer: + common_flags += ['-fsanitize=fuzzer'] + args.lib_fuzzing_engine = '' + + mflags += ['LIB_FUZZING_ENGINE={}'.format(args.lib_fuzzing_engine)] + + if args.sanitize_recover: + recover_flags = ['-fsanitize-recover=all'] + else: + recover_flags = ['-fno-sanitize-recover=all'] + if args.sanitize: + common_flags += recover_flags + + if args.msan: + msan_flags = ['-fsanitize=memory'] + if args.msan_track_origins: + msan_flags += ['-fsanitize-memory-track-origins'] + common_flags += msan_flags + # Append extra MSAN flags (it might require special setup) + cppflags += [args.msan_extra_cppflags] + cflags += [args.msan_extra_cflags] + cxxflags += [args.msan_extra_cxxflags] + ldflags += [args.msan_extra_ldflags] + + if args.asan: + common_flags += ['-fsanitize=address'] + + if args.ubsan: + ubsan_flags = ['-fsanitize=undefined'] + if not args.ubsan_pointer_overflow: + ubsan_flags += overflow_ubsan_flags(cc, cxx) + common_flags += ubsan_flags + + if args.stateful_fuzzing: + cppflags += ['-DSTATEFUL_FUZZING'] + + if args.fuzzing_mode: + cppflags += ['-DFUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION'] + + if args.lib_fuzzing_engine == 'libregression.a': + targets = ['libregression.a'] + targets + + # Append the common flags + cflags += common_flags + cxxflags += common_flags + + # Prepare the flags for Make + cc_str = "CC={}".format(cc) + cxx_str = "CXX={}".format(cxx) + cppflags_str = "CPPFLAGS={}".format(' '.join(cppflags)) + cflags_str = "CFLAGS={}".format(' '.join(cflags)) + cxxflags_str = "CXXFLAGS={}".format(' '.join(cxxflags)) + ldflags_str = "LDFLAGS={}".format(' '.join(ldflags)) + + # Print the flags + print('MFLAGS={}'.format(' '.join(mflags))) + print(cc_str) + print(cxx_str) + print(cppflags_str) + print(cflags_str) + print(cxxflags_str) + print(ldflags_str) + + # Clean and build + clean_cmd = ['make', 'clean'] + mflags + print(' '.join(clean_cmd)) + subprocess.check_call(clean_cmd) + build_cmd = [ + 'make', + cc_str, + cxx_str, + cppflags_str, + cflags_str, + cxxflags_str, + ldflags_str, + ] + mflags + targets + print(' '.join(build_cmd)) + subprocess.check_call(build_cmd) + return 0 + + +def libfuzzer_parser(args): + description = """ + Runs a libfuzzer binary. + Passes all extra arguments to libfuzzer. + The fuzzer should have been build with LIB_FUZZING_ENGINE pointing to + libFuzzer.a. + Generates output in the CORPORA directory, puts crashes in the ARTIFACT + directory, and takes extra input from the SEED directory. + To merge AFL's output pass the SEED as AFL's output directory and pass + '-merge=1'. + """ + parser = argparse.ArgumentParser(prog=args.pop(0), description=description) + parser.add_argument( + '--corpora', + type=str, + help='Override the default corpora dir (default: {})'.format( + abs_join(CORPORA_DIR, 'TARGET'))) + parser.add_argument( + '--artifact', + type=str, + help='Override the default artifact dir (default: {})'.format( + abs_join(CORPORA_DIR, 'TARGET-crash'))) + parser.add_argument( + '--seed', + type=str, + help='Override the default seed dir (default: {})'.format( + abs_join(CORPORA_DIR, 'TARGET-seed'))) + parser.add_argument( + 'TARGET', + type=str, + help='Fuzz target(s) to build {{{}}}'.format(', '.join(TARGETS))) + args, extra = parser.parse_known_args(args) + args.extra = extra + + if args.TARGET and args.TARGET not in TARGETS: + raise RuntimeError('{} is not a valid target'.format(args.TARGET)) + + return args + + +def libfuzzer(target, corpora=None, artifact=None, seed=None, extra_args=None): + if corpora is None: + corpora = abs_join(CORPORA_DIR, target) + if artifact is None: + artifact = abs_join(CORPORA_DIR, '{}-crash'.format(target)) + if seed is None: + seed = abs_join(CORPORA_DIR, '{}-seed'.format(target)) + if extra_args is None: + extra_args = [] + + target = abs_join(FUZZ_DIR, target) + + corpora = [create(corpora)] + artifact = create(artifact) + seed = check(seed) + + corpora += [artifact] + if seed is not None: + corpora += [seed] + + cmd = [target, '-artifact_prefix={}/'.format(artifact)] + cmd += corpora + extra_args + print(' '.join(cmd)) + subprocess.check_call(cmd) + + +def libfuzzer_cmd(args): + try: + args = libfuzzer_parser(args) + except Exception as e: + print(e) + return 1 + libfuzzer(args.TARGET, args.corpora, args.artifact, args.seed, args.extra) + return 0 + + +def afl_parser(args): + description = """ + Runs an afl-fuzz job. + Passes all extra arguments to afl-fuzz. + The fuzzer should have been built with CC/CXX set to the AFL compilers, + and with LIB_FUZZING_ENGINE='libregression.a'. + Takes input from CORPORA and writes output to OUTPUT. + Uses AFL_FUZZ as the binary (set from flag or environment variable). + """ + parser = argparse.ArgumentParser(prog=args.pop(0), description=description) + parser.add_argument( + '--corpora', + type=str, + help='Override the default corpora dir (default: {})'.format( + abs_join(CORPORA_DIR, 'TARGET'))) + parser.add_argument( + '--output', + type=str, + help='Override the default AFL output dir (default: {})'.format( + abs_join(CORPORA_DIR, 'TARGET-afl'))) + parser.add_argument( + '--afl-fuzz', + type=str, + default=AFL_FUZZ, + help='AFL_FUZZ (default: $AFL_FUZZ={})'.format(AFL_FUZZ)) + parser.add_argument( + 'TARGET', + type=str, + help='Fuzz target(s) to build {{{}}}'.format(', '.join(TARGETS))) + args, extra = parser.parse_known_args(args) + args.extra = extra + + if args.TARGET and args.TARGET not in TARGETS: + raise RuntimeError('{} is not a valid target'.format(args.TARGET)) + + if not args.corpora: + args.corpora = abs_join(CORPORA_DIR, args.TARGET) + if not args.output: + args.output = abs_join(CORPORA_DIR, '{}-afl'.format(args.TARGET)) + + return args + + +def afl(args): + try: + args = afl_parser(args) + except Exception as e: + print(e) + return 1 + target = abs_join(FUZZ_DIR, args.TARGET) + + corpora = create(args.corpora) + output = create(args.output) + + cmd = [args.afl_fuzz, '-i', corpora, '-o', output] + args.extra + cmd += [target, '@@'] + print(' '.join(cmd)) + subprocess.call(cmd) + return 0 + + +def regression(args): + try: + description = """ + Runs one or more regression tests. + The fuzzer should have been built with with + LIB_FUZZING_ENGINE='libregression.a'. + Takes input from CORPORA. + """ + args = targets_parser(args, description) + except Exception as e: + print(e) + return 1 + for target in args.TARGET: + corpora = create(abs_join(CORPORA_DIR, target)) + target = abs_join(FUZZ_DIR, target) + cmd = [target, corpora] + print(' '.join(cmd)) + subprocess.check_call(cmd) + return 0 + + +def gen_parser(args): + description = """ + Generate a seed corpus appropriate for TARGET with data generated with + decodecorpus. + The fuzz inputs are prepended with a seed before the zstd data, so the + output of decodecorpus shouldn't be used directly. + Generates NUMBER samples prepended with FUZZ_RNG_SEED_SIZE random bytes and + puts the output in SEED. + DECODECORPUS is the decodecorpus binary, and must already be built. + """ + parser = argparse.ArgumentParser(prog=args.pop(0), description=description) + parser.add_argument( + '--number', + '-n', + type=int, + default=100, + help='Number of samples to generate') + parser.add_argument( + '--max-size-log', + type=int, + default=18, + help='Maximum sample size to generate') + parser.add_argument( + '--seed', + type=str, + help='Override the default seed dir (default: {})'.format( + abs_join(CORPORA_DIR, 'TARGET-seed'))) + parser.add_argument( + '--decodecorpus', + type=str, + default=DECODECORPUS, + help="decodecorpus binary (default: $DECODECORPUS='{}')".format( + DECODECORPUS)) + parser.add_argument( + '--zstd', + type=str, + default=ZSTD, + help="zstd binary (default: $ZSTD='{}')".format(ZSTD)) + parser.add_argument( + '--fuzz-rng-seed-size', + type=int, + default=4, + help="FUZZ_RNG_SEED_SIZE used for generate the samples (must match)" + ) + parser.add_argument( + 'TARGET', + type=str, + help='Fuzz target(s) to build {{{}}}'.format(', '.join(TARGETS))) + args, extra = parser.parse_known_args(args) + args.extra = extra + + if args.TARGET and args.TARGET not in TARGETS: + raise RuntimeError('{} is not a valid target'.format(args.TARGET)) + + if not args.seed: + args.seed = abs_join(CORPORA_DIR, '{}-seed'.format(args.TARGET)) + + if not os.path.isfile(args.decodecorpus): + raise RuntimeError("{} is not a file run 'make -C {} decodecorpus'". + format(args.decodecorpus, abs_join(FUZZ_DIR, '..'))) + + return args + + +def gen(args): + try: + args = gen_parser(args) + except Exception as e: + print(e) + return 1 + + seed = create(args.seed) + with tmpdir() as compressed, tmpdir() as decompressed, tmpdir() as dict: + info = TARGET_INFO[args.TARGET] + + if info.input_type == InputType.DICTIONARY_DATA: + number = max(args.number, 1000) + else: + number = args.number + cmd = [ + args.decodecorpus, + '-n{}'.format(args.number), + '-p{}/'.format(compressed), + '-o{}'.format(decompressed), + ] + + if info.frame_type == FrameType.BLOCK: + cmd += [ + '--gen-blocks', + '--max-block-size-log={}'.format(min(args.max_size_log, 17)) + ] + else: + cmd += ['--max-content-size-log={}'.format(args.max_size_log)] + + print(' '.join(cmd)) + subprocess.check_call(cmd) + + if info.input_type == InputType.RAW_DATA: + print('using decompressed data in {}'.format(decompressed)) + samples = decompressed + elif info.input_type == InputType.COMPRESSED_DATA: + print('using compressed data in {}'.format(compressed)) + samples = compressed + else: + assert info.input_type == InputType.DICTIONARY_DATA + print('making dictionary data from {}'.format(decompressed)) + samples = dict + min_dict_size_log = 9 + max_dict_size_log = max(min_dict_size_log + 1, args.max_size_log) + for dict_size_log in range(min_dict_size_log, max_dict_size_log): + dict_size = 1 << dict_size_log + cmd = [ + args.zstd, + '--train', + '-r', decompressed, + '--maxdict={}'.format(dict_size), + '-o', abs_join(dict, '{}.zstd-dict'.format(dict_size)) + ] + print(' '.join(cmd)) + subprocess.check_call(cmd) + + # Copy the samples over and prepend the RNG seeds + for name in os.listdir(samples): + samplename = abs_join(samples, name) + outname = abs_join(seed, name) + with open(samplename, 'rb') as sample: + with open(outname, 'wb') as out: + CHUNK_SIZE = 131072 + chunk = sample.read(CHUNK_SIZE) + while len(chunk) > 0: + out.write(chunk) + chunk = sample.read(CHUNK_SIZE) + return 0 + + +def minimize(args): + try: + description = """ + Runs a libfuzzer fuzzer with -merge=1 to build a minimal corpus in + TARGET_seed_corpus. All extra args are passed to libfuzzer. + """ + args = targets_parser(args, description) + except Exception as e: + print(e) + return 1 + + for target in args.TARGET: + # Merge the corpus + anything else into the seed_corpus + corpus = abs_join(CORPORA_DIR, target) + seed_corpus = abs_join(CORPORA_DIR, "{}_seed_corpus".format(target)) + extra_args = [corpus, "-merge=1"] + args.extra + libfuzzer(target, corpora=seed_corpus, extra_args=extra_args) + seeds = set(os.listdir(seed_corpus)) + # Copy all crashes directly into the seed_corpus if not already present + crashes = abs_join(CORPORA_DIR, '{}-crash'.format(target)) + for crash in os.listdir(crashes): + if crash not in seeds: + shutil.copy(abs_join(crashes, crash), seed_corpus) + seeds.add(crash) + + +def zip_cmd(args): + try: + description = """ + Zips up the seed corpus. + """ + args = targets_parser(args, description) + except Exception as e: + print(e) + return 1 + + for target in args.TARGET: + # Zip the seed_corpus + seed_corpus = abs_join(CORPORA_DIR, "{}_seed_corpus".format(target)) + zip_file = "{}.zip".format(seed_corpus) + cmd = ["zip", "-r", "-q", "-j", "-9", zip_file, "."] + print(' '.join(cmd)) + subprocess.check_call(cmd, cwd=seed_corpus) + + +def list_cmd(args): + print("\n".join(TARGETS)) + + +def short_help(args): + name = args[0] + print("Usage: {} [OPTIONS] COMMAND [ARGS]...\n".format(name)) + + +def help(args): + short_help(args) + print("\tfuzzing helpers (select a command and pass -h for help)\n") + print("Options:") + print("\t-h, --help\tPrint this message") + print("") + print("Commands:") + print("\tbuild\t\tBuild a fuzzer") + print("\tlibfuzzer\tRun a libFuzzer fuzzer") + print("\tafl\t\tRun an AFL fuzzer") + print("\tregression\tRun a regression test") + print("\tgen\t\tGenerate a seed corpus for a fuzzer") + print("\tminimize\tMinimize the test corpora") + print("\tzip\t\tZip the minimized corpora up") + print("\tlist\t\tList the available targets") + + +def main(): + args = sys.argv + if len(args) < 2: + help(args) + return 1 + if args[1] == '-h' or args[1] == '--help' or args[1] == '-H': + help(args) + return 1 + command = args.pop(1) + args[0] = "{} {}".format(args[0], command) + if command == "build": + return build(args) + if command == "libfuzzer": + return libfuzzer_cmd(args) + if command == "regression": + return regression(args) + if command == "afl": + return afl(args) + if command == "gen": + return gen(args) + if command == "minimize": + return minimize(args) + if command == "zip": + return zip_cmd(args) + if command == "list": + return list_cmd(args) + short_help(args) + print("Error: No such command {} (pass -h for help)".format(command)) + return 1 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/src/zstd/tests/fuzz/fuzz_data_producer.c b/src/zstd/tests/fuzz/fuzz_data_producer.c new file mode 100644 index 000000000..6518af309 --- /dev/null +++ b/src/zstd/tests/fuzz/fuzz_data_producer.c @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2016-2020, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#include "fuzz_data_producer.h" + +struct FUZZ_dataProducer_s{ + const uint8_t *data; + size_t size; +}; + +FUZZ_dataProducer_t *FUZZ_dataProducer_create(const uint8_t *data, size_t size) { + FUZZ_dataProducer_t *producer = FUZZ_malloc(sizeof(FUZZ_dataProducer_t)); + + producer->data = data; + producer->size = size; + return producer; +} + +void FUZZ_dataProducer_free(FUZZ_dataProducer_t *producer) { free(producer); } + +uint32_t FUZZ_dataProducer_uint32Range(FUZZ_dataProducer_t *producer, uint32_t min, + uint32_t max) { + FUZZ_ASSERT(min <= max); + + uint32_t range = max - min; + uint32_t rolling = range; + uint32_t result = 0; + + while (rolling > 0 && producer->size > 0) { + uint8_t next = *(producer->data + producer->size - 1); + producer->size -= 1; + result = (result << 8) | next; + rolling >>= 8; + } + + if (range == 0xffffffff) { + return result; + } + + return min + result % (range + 1); +} + +uint32_t FUZZ_dataProducer_uint32(FUZZ_dataProducer_t *producer) { + return FUZZ_dataProducer_uint32Range(producer, 0, 0xffffffff); +} + +int32_t FUZZ_dataProducer_int32Range(FUZZ_dataProducer_t *producer, + int32_t min, int32_t max) +{ + FUZZ_ASSERT(min <= max); + + if (min < 0) + return (int)FUZZ_dataProducer_uint32Range(producer, 0, max - min) + min; + + return FUZZ_dataProducer_uint32Range(producer, min, max); +} + +size_t FUZZ_dataProducer_remainingBytes(FUZZ_dataProducer_t *producer){ + return producer->size; +} + +size_t FUZZ_dataProducer_contract(FUZZ_dataProducer_t *producer, size_t newSize) +{ + newSize = newSize > producer->size ? producer->size : newSize; + + size_t remaining = producer->size - newSize; + producer->data = producer->data + remaining; + producer->size = newSize; + return remaining; +} + +size_t FUZZ_dataProducer_reserveDataPrefix(FUZZ_dataProducer_t *producer) +{ + size_t producerSliceSize = FUZZ_dataProducer_uint32Range( + producer, 0, producer->size); + return FUZZ_dataProducer_contract(producer, producerSliceSize); +} diff --git a/src/zstd/tests/fuzz/fuzz_data_producer.h b/src/zstd/tests/fuzz/fuzz_data_producer.h new file mode 100644 index 000000000..41e0b52d5 --- /dev/null +++ b/src/zstd/tests/fuzz/fuzz_data_producer.h @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2016-2020, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/** + * Helper APIs for generating random data from input data stream. + The producer reads bytes from the end of the input and appends them together + to generate a random number in the requested range. If it runs out of input + data, it will keep returning the same value (min) over and over again. + + */ + +#ifndef FUZZ_DATA_PRODUCER_H +#define FUZZ_DATA_PRODUCER_H + +#include <stddef.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> + +#include "fuzz_helpers.h" + +/* Struct used for maintaining the state of the data */ +typedef struct FUZZ_dataProducer_s FUZZ_dataProducer_t; + +/* Returns a data producer state struct. Use for producer initialization. */ +FUZZ_dataProducer_t *FUZZ_dataProducer_create(const uint8_t *data, size_t size); + +/* Frees the data producer */ +void FUZZ_dataProducer_free(FUZZ_dataProducer_t *producer); + +/* Returns value between [min, max] */ +uint32_t FUZZ_dataProducer_uint32Range(FUZZ_dataProducer_t *producer, uint32_t min, + uint32_t max); + +/* Returns a uint32 value */ +uint32_t FUZZ_dataProducer_uint32(FUZZ_dataProducer_t *producer); + +/* Returns a signed value between [min, max] */ +int32_t FUZZ_dataProducer_int32Range(FUZZ_dataProducer_t *producer, + int32_t min, int32_t max); + +/* Returns the size of the remaining bytes of data in the producer */ +size_t FUZZ_dataProducer_remainingBytes(FUZZ_dataProducer_t *producer); + +/* Restricts the producer to only the last newSize bytes of data. +If newSize > current data size, nothing happens. Returns the number of bytes +the producer won't use anymore, after contracting. */ +size_t FUZZ_dataProducer_contract(FUZZ_dataProducer_t *producer, size_t newSize); + +/* Restricts the producer to use only the last X bytes of data, where X is + a random number in the interval [0, data_size]. Returns the size of the + remaining data the producer won't use anymore (the prefix). */ +size_t FUZZ_dataProducer_reserveDataPrefix(FUZZ_dataProducer_t *producer); +#endif // FUZZ_DATA_PRODUCER_H diff --git a/src/zstd/tests/fuzz/fuzz_helpers.c b/src/zstd/tests/fuzz/fuzz_helpers.c new file mode 100644 index 000000000..b80dc7571 --- /dev/null +++ b/src/zstd/tests/fuzz/fuzz_helpers.c @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2016-2020, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ +#include "fuzz_helpers.h" + +#include <stddef.h> +#include <stdlib.h> +#include <string.h> + +void* FUZZ_malloc(size_t size) +{ + if (size > 0) { + void* const mem = malloc(size); + FUZZ_ASSERT(mem); + return mem; + } + return NULL; +} + +int FUZZ_memcmp(void const* lhs, void const* rhs, size_t size) +{ + if (size == 0) { + return 0; + } + return memcmp(lhs, rhs, size); +}
\ No newline at end of file diff --git a/src/zstd/tests/fuzz/fuzz_helpers.h b/src/zstd/tests/fuzz/fuzz_helpers.h new file mode 100644 index 000000000..cde2c4ea7 --- /dev/null +++ b/src/zstd/tests/fuzz/fuzz_helpers.h @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2016-2020, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/** + * Helper functions for fuzzing. + */ + +#ifndef FUZZ_HELPERS_H +#define FUZZ_HELPERS_H + +#include "debug.h" +#include "fuzz.h" +#include "xxhash.h" +#include "zstd.h" +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> + +#ifdef __cplusplus +extern "C" { +#endif + +#define MIN(a, b) ((a) < (b) ? (a) : (b)) +#define MAX(a, b) ((a) > (b) ? (a) : (b)) + +#define FUZZ_QUOTE_IMPL(str) #str +#define FUZZ_QUOTE(str) FUZZ_QUOTE_IMPL(str) + +/** + * Asserts for fuzzing that are always enabled. + */ +#define FUZZ_ASSERT_MSG(cond, msg) \ + ((cond) ? (void)0 \ + : (fprintf(stderr, "%s: %u: Assertion: `%s' failed. %s\n", __FILE__, \ + __LINE__, FUZZ_QUOTE(cond), (msg)), \ + abort())) +#define FUZZ_ASSERT(cond) FUZZ_ASSERT_MSG((cond), ""); +#define FUZZ_ZASSERT(code) \ + FUZZ_ASSERT_MSG(!ZSTD_isError(code), ZSTD_getErrorName(code)) + +#if defined(__GNUC__) +#define FUZZ_STATIC static __inline __attribute__((unused)) +#elif defined(__cplusplus) || \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) +#define FUZZ_STATIC static inline +#elif defined(_MSC_VER) +#define FUZZ_STATIC static __inline +#else +#define FUZZ_STATIC static +#endif + +/** + * malloc except return NULL for zero sized data and FUZZ_ASSERT + * that malloc doesn't fail. + */ +void* FUZZ_malloc(size_t size); + +/** + * memcmp but accepts NULL. + */ +int FUZZ_memcmp(void const* lhs, void const* rhs, size_t size); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/zstd/tests/fuzz/raw_dictionary_round_trip.c b/src/zstd/tests/fuzz/raw_dictionary_round_trip.c new file mode 100644 index 000000000..08e5fd9ed --- /dev/null +++ b/src/zstd/tests/fuzz/raw_dictionary_round_trip.c @@ -0,0 +1,115 @@ +/* + * Copyright (c) 2016-2020, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/** + * This fuzz target performs a zstd round-trip test (compress & decompress) with + * a raw content dictionary, compares the result with the original, and calls + * abort() on corruption. + */ + +#include <stddef.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include "fuzz_helpers.h" +#include "zstd_helpers.h" +#include "fuzz_data_producer.h" + +static ZSTD_CCtx *cctx = NULL; +static ZSTD_DCtx *dctx = NULL; + +static size_t roundTripTest(void *result, size_t resultCapacity, + void *compressed, size_t compressedCapacity, + const void *src, size_t srcSize, + const void *dict, size_t dictSize, + FUZZ_dataProducer_t *producer) +{ + ZSTD_dictContentType_e const dictContentType = ZSTD_dct_rawContent; + int const refPrefix = FUZZ_dataProducer_uint32Range(producer, 0, 1) != 0; + size_t cSize; + + FUZZ_setRandomParameters(cctx, srcSize, producer); + /* Disable checksum so we can use sizes smaller than compress bound. */ + FUZZ_ZASSERT(ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, 0)); + if (refPrefix) + FUZZ_ZASSERT(ZSTD_CCtx_refPrefix_advanced( + cctx, dict, dictSize, + ZSTD_dct_rawContent)); + else + FUZZ_ZASSERT(ZSTD_CCtx_loadDictionary_advanced( + cctx, dict, dictSize, + (ZSTD_dictLoadMethod_e)FUZZ_dataProducer_uint32Range(producer, 0, 1), + ZSTD_dct_rawContent)); + cSize = ZSTD_compress2(cctx, compressed, compressedCapacity, src, srcSize); + FUZZ_ZASSERT(cSize); + + if (refPrefix) + FUZZ_ZASSERT(ZSTD_DCtx_refPrefix_advanced( + dctx, dict, dictSize, + dictContentType)); + else + FUZZ_ZASSERT(ZSTD_DCtx_loadDictionary_advanced( + dctx, dict, dictSize, + (ZSTD_dictLoadMethod_e)FUZZ_dataProducer_uint32Range(producer, 0, 1), + dictContentType)); + { + size_t const ret = ZSTD_decompressDCtx( + dctx, result, resultCapacity, compressed, cSize); + return ret; + } +} + +int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) +{ + /* Give a random portion of src data to the producer, to use for + parameter generation. The rest will be used for (de)compression */ + FUZZ_dataProducer_t *producer = FUZZ_dataProducer_create(src, size); + size = FUZZ_dataProducer_reserveDataPrefix(producer); + + uint8_t const* const srcBuf = src; + size_t const srcSize = FUZZ_dataProducer_uint32Range(producer, 0, size); + uint8_t const* const dictBuf = srcBuf + srcSize; + size_t const dictSize = size - srcSize; + size_t const decompSize = srcSize; + void* const decompBuf = FUZZ_malloc(decompSize); + size_t compSize = ZSTD_compressBound(srcSize); + void* compBuf; + /* Half of the time fuzz with a 1 byte smaller output size. + * This will still succeed because we force the checksum to be disabled, + * giving us 4 bytes of overhead. + */ + compSize -= FUZZ_dataProducer_uint32Range(producer, 0, 1); + compBuf = FUZZ_malloc(compSize); + + if (!cctx) { + cctx = ZSTD_createCCtx(); + FUZZ_ASSERT(cctx); + } + if (!dctx) { + dctx = ZSTD_createDCtx(); + FUZZ_ASSERT(dctx); + } + + { + size_t const result = + roundTripTest(decompBuf, decompSize, compBuf, compSize, srcBuf, srcSize, dictBuf, dictSize, producer); + FUZZ_ZASSERT(result); + FUZZ_ASSERT_MSG(result == srcSize, "Incorrect regenerated size"); + FUZZ_ASSERT_MSG(!FUZZ_memcmp(src, decompBuf, srcSize), "Corruption!"); + } + free(decompBuf); + free(compBuf); + FUZZ_dataProducer_free(producer); +#ifndef STATEFUL_FUZZING + ZSTD_freeCCtx(cctx); cctx = NULL; + ZSTD_freeDCtx(dctx); dctx = NULL; +#endif + return 0; +} diff --git a/src/zstd/tests/fuzz/regression_driver.c b/src/zstd/tests/fuzz/regression_driver.c new file mode 100644 index 000000000..8180ca822 --- /dev/null +++ b/src/zstd/tests/fuzz/regression_driver.c @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2016-2020, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#include "fuzz.h" +#include "fuzz_helpers.h" +#include "util.h" +#include <stddef.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> + +int main(int argc, char const **argv) { + size_t const kMaxFileSize = (size_t)1 << 27; + int const kFollowLinks = 1; + FileNamesTable* files; + const char** const fnTable = argv + 1; + uint8_t *buffer = NULL; + size_t bufferSize = 0; + unsigned i; + unsigned numFilesTested = 0; + int ret = 0; + + { + unsigned const numFiles = (unsigned)(argc - 1); +#ifdef UTIL_HAS_CREATEFILELIST + files = UTIL_createExpandedFNT(fnTable, numFiles, kFollowLinks); +#else + files = UTIL_createFNT_fromROTable(fnTable, numFiles); + assert(numFiles == files->tableSize); +#endif + } + if (!files) { + fprintf(stderr, "ERROR: Failed to create file names table\n"); + return 1; + } + if (files->tableSize == 0) + fprintf(stderr, "WARNING: No files passed to %s\n", argv[0]); + for (i = 0; i < files->tableSize; ++i) { + char const *fileName = files->fileNames[i]; + DEBUGLOG(3, "Running %s", fileName); + size_t const fileSize = UTIL_getFileSize(fileName); + size_t readSize; + FILE *file; + + /* Check that it is a regular file, and that the fileSize is valid. + * If it is not a regular file, then it may have been deleted since we + * constructed the list, so just skip it, but return an error exit code. + */ + if (!UTIL_isRegularFile(fileName)) { + ret = 1; + continue; + } + FUZZ_ASSERT_MSG(fileSize <= kMaxFileSize, fileName); + /* Ensure we have a large enough buffer allocated */ + if (fileSize > bufferSize) { + free(buffer); + buffer = (uint8_t *)malloc(fileSize); + FUZZ_ASSERT_MSG(buffer, fileName); + bufferSize = fileSize; + } + /* Open the file */ + file = fopen(fileName, "rb"); + FUZZ_ASSERT_MSG(file, fileName); + /* Read the file */ + readSize = fread(buffer, 1, fileSize, file); + FUZZ_ASSERT_MSG(readSize == fileSize, fileName); + /* Close the file */ + fclose(file); + /* Run the fuzz target */ + LLVMFuzzerTestOneInput(buffer, fileSize); + ++numFilesTested; + } + fprintf(stderr, "Tested %u files: ", numFilesTested); + if (ret == 0) { + fprintf(stderr, "Success!\n"); + } else { + fprintf(stderr, "Failure!\n"); + } + free(buffer); + UTIL_freeFileNamesTable(files); + return ret; +} diff --git a/src/zstd/tests/fuzz/simple_compress.c b/src/zstd/tests/fuzz/simple_compress.c new file mode 100644 index 000000000..b64f373eb --- /dev/null +++ b/src/zstd/tests/fuzz/simple_compress.c @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2016-2020, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/** + * This fuzz target attempts to comprss the fuzzed data with the simple + * compression function with an output buffer that may be too small to + * ensure that the compressor never crashes. + */ + +#include <stddef.h> +#include <stdlib.h> +#include <stdio.h> +#include "fuzz_helpers.h" +#include "zstd.h" +#include "zstd_helpers.h" +#include "fuzz_data_producer.h" + +static ZSTD_CCtx *cctx = NULL; + +int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) +{ + /* Give a random portion of src data to the producer, to use for + parameter generation. The rest will be used for (de)compression */ + FUZZ_dataProducer_t *producer = FUZZ_dataProducer_create(src, size); + size = FUZZ_dataProducer_reserveDataPrefix(producer); + + size_t const maxSize = ZSTD_compressBound(size); + size_t const bufSize = FUZZ_dataProducer_uint32Range(producer, 0, maxSize); + + int const cLevel = FUZZ_dataProducer_int32Range(producer, kMinClevel, kMaxClevel); + + if (!cctx) { + cctx = ZSTD_createCCtx(); + FUZZ_ASSERT(cctx); + } + + void *rBuf = FUZZ_malloc(bufSize); + ZSTD_compressCCtx(cctx, rBuf, bufSize, src, size, cLevel); + free(rBuf); + FUZZ_dataProducer_free(producer); +#ifndef STATEFUL_FUZZING + ZSTD_freeCCtx(cctx); cctx = NULL; +#endif + return 0; +} diff --git a/src/zstd/tests/fuzz/simple_decompress.c b/src/zstd/tests/fuzz/simple_decompress.c new file mode 100644 index 000000000..c3903ce8b --- /dev/null +++ b/src/zstd/tests/fuzz/simple_decompress.c @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2016-2020, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/** + * This fuzz target attempts to decompress the fuzzed data with the simple + * decompression function to ensure the decompressor never crashes. + */ + +#include <stddef.h> +#include <stdlib.h> +#include <stdio.h> +#include "fuzz_helpers.h" +#include "zstd.h" +#include "fuzz_data_producer.h" + +static ZSTD_DCtx *dctx = NULL; + +int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) +{ + /* Give a random portion of src data to the producer, to use for + parameter generation. The rest will be used for (de)compression */ + FUZZ_dataProducer_t *producer = FUZZ_dataProducer_create(src, size); + size = FUZZ_dataProducer_reserveDataPrefix(producer); + + if (!dctx) { + dctx = ZSTD_createDCtx(); + FUZZ_ASSERT(dctx); + } + + size_t const bufSize = FUZZ_dataProducer_uint32Range(producer, 0, 10 * size); + void *rBuf = FUZZ_malloc(bufSize); + + ZSTD_decompressDCtx(dctx, rBuf, bufSize, src, size); + free(rBuf); + + FUZZ_dataProducer_free(producer); + +#ifndef STATEFUL_FUZZING + ZSTD_freeDCtx(dctx); dctx = NULL; +#endif + return 0; +} diff --git a/src/zstd/tests/fuzz/simple_round_trip.c b/src/zstd/tests/fuzz/simple_round_trip.c new file mode 100644 index 000000000..2f008d06a --- /dev/null +++ b/src/zstd/tests/fuzz/simple_round_trip.c @@ -0,0 +1,114 @@ +/* + * Copyright (c) 2016-2020, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/** + * This fuzz target performs a zstd round-trip test (compress & decompress), + * compares the result with the original, and calls abort() on corruption. + */ + +#define ZSTD_STATIC_LINKING_ONLY + +#include <stddef.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include "fuzz_helpers.h" +#include "zstd_helpers.h" +#include "fuzz_data_producer.h" + +static ZSTD_CCtx *cctx = NULL; +static ZSTD_DCtx *dctx = NULL; + +static size_t roundTripTest(void *result, size_t resultCapacity, + void *compressed, size_t compressedCapacity, + const void *src, size_t srcSize, + FUZZ_dataProducer_t *producer) +{ + size_t cSize; + size_t dSize; + int targetCBlockSize = 0; + if (FUZZ_dataProducer_uint32Range(producer, 0, 1)) { + FUZZ_setRandomParameters(cctx, srcSize, producer); + cSize = ZSTD_compress2(cctx, compressed, compressedCapacity, src, srcSize); + FUZZ_ZASSERT(ZSTD_CCtx_getParameter(cctx, ZSTD_c_targetCBlockSize, &targetCBlockSize)); + } else { + int const cLevel = FUZZ_dataProducer_int32Range(producer, kMinClevel, kMaxClevel); + + cSize = ZSTD_compressCCtx( + cctx, compressed, compressedCapacity, src, srcSize, cLevel); + } + FUZZ_ZASSERT(cSize); + dSize = ZSTD_decompressDCtx(dctx, result, resultCapacity, compressed, cSize); + FUZZ_ZASSERT(dSize); + /* When superblock is enabled make sure we don't expand the block more than expected. */ + if (targetCBlockSize != 0) { + size_t normalCSize; + FUZZ_ZASSERT(ZSTD_CCtx_setParameter(cctx, ZSTD_c_targetCBlockSize, 0)); + normalCSize = ZSTD_compress2(cctx, compressed, compressedCapacity, src, srcSize); + FUZZ_ZASSERT(normalCSize); + { + size_t const bytesPerBlock = 3 /* block header */ + + 5 /* Literal header */ + + 6 /* Huffman jump table */ + + 3 /* number of sequences */ + + 1 /* symbol compression modes */; + size_t const expectedExpansion = bytesPerBlock * (1 + (normalCSize / MAX(1, targetCBlockSize))); + size_t const allowedExpansion = (srcSize >> 3) + 5 * expectedExpansion + 10; + FUZZ_ASSERT(cSize <= normalCSize + allowedExpansion); + } + } + return dSize; +} + +int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) +{ + size_t const rBufSize = size; + void* rBuf = FUZZ_malloc(rBufSize); + size_t cBufSize = ZSTD_compressBound(size); + void* cBuf; + + /* Give a random portion of src data to the producer, to use for + parameter generation. The rest will be used for (de)compression */ + FUZZ_dataProducer_t *producer = FUZZ_dataProducer_create(src, size); + size = FUZZ_dataProducer_reserveDataPrefix(producer); + + /* Half of the time fuzz with a 1 byte smaller output size. + * This will still succeed because we don't use a dictionary, so the dictID + * field is empty, giving us 4 bytes of overhead. + */ + cBufSize -= FUZZ_dataProducer_uint32Range(producer, 0, 1); + + cBuf = FUZZ_malloc(cBufSize); + + if (!cctx) { + cctx = ZSTD_createCCtx(); + FUZZ_ASSERT(cctx); + } + if (!dctx) { + dctx = ZSTD_createDCtx(); + FUZZ_ASSERT(dctx); + } + + { + size_t const result = + roundTripTest(rBuf, rBufSize, cBuf, cBufSize, src, size, producer); + FUZZ_ZASSERT(result); + FUZZ_ASSERT_MSG(result == size, "Incorrect regenerated size"); + FUZZ_ASSERT_MSG(!FUZZ_memcmp(src, rBuf, size), "Corruption!"); + } + free(rBuf); + free(cBuf); + FUZZ_dataProducer_free(producer); +#ifndef STATEFUL_FUZZING + ZSTD_freeCCtx(cctx); cctx = NULL; + ZSTD_freeDCtx(dctx); dctx = NULL; +#endif + return 0; +} diff --git a/src/zstd/tests/fuzz/stream_decompress.c b/src/zstd/tests/fuzz/stream_decompress.c new file mode 100644 index 000000000..25901b1eb --- /dev/null +++ b/src/zstd/tests/fuzz/stream_decompress.c @@ -0,0 +1,116 @@ +/* + * Copyright (c) 2016-2020, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/** + * This fuzz target attempts to decompress the fuzzed data with the simple + * decompression function to ensure the decompressor never crashes. + */ + +#define ZSTD_STATIC_LINKING_ONLY + +#include <stddef.h> +#include <stdlib.h> +#include <stdio.h> +#include "fuzz_helpers.h" +#include "zstd.h" +#include "fuzz_data_producer.h" + +static size_t const kBufSize = ZSTD_BLOCKSIZE_MAX; + +static ZSTD_DStream *dstream = NULL; +static void* buf = NULL; +uint32_t seed; + +static ZSTD_outBuffer makeOutBuffer(FUZZ_dataProducer_t *producer, uint32_t min) +{ + ZSTD_outBuffer buffer = { buf, 0, 0 }; + + buffer.size = (FUZZ_dataProducer_uint32Range(producer, min, kBufSize)); + FUZZ_ASSERT(buffer.size <= kBufSize); + + if (buffer.size == 0) { + buffer.dst = NULL; + } + + return buffer; +} + +static ZSTD_inBuffer makeInBuffer(const uint8_t **src, size_t *size, + FUZZ_dataProducer_t *producer, + uint32_t min) +{ + ZSTD_inBuffer buffer = { *src, 0, 0 }; + + FUZZ_ASSERT(*size > 0); + buffer.size = (FUZZ_dataProducer_uint32Range(producer, min, *size)); + FUZZ_ASSERT(buffer.size <= *size); + *src += buffer.size; + *size -= buffer.size; + + if (buffer.size == 0) { + buffer.src = NULL; + } + + return buffer; +} + +int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) +{ + /* Give a random portion of src data to the producer, to use for + parameter generation. The rest will be used for (de)compression */ + FUZZ_dataProducer_t *producer = FUZZ_dataProducer_create(src, size); + /* Guarantee forward progress by refusing to generate 2 zero sized + * buffers in a row. */ + int prevInWasZero = 0; + int prevOutWasZero = 0; + int stableOutBuffer; + ZSTD_outBuffer out; + size = FUZZ_dataProducer_reserveDataPrefix(producer); + + /* Allocate all buffers and contexts if not already allocated */ + if (!buf) { + buf = FUZZ_malloc(kBufSize); + } + + if (!dstream) { + dstream = ZSTD_createDStream(); + FUZZ_ASSERT(dstream); + } else { + FUZZ_ZASSERT(ZSTD_DCtx_reset(dstream, ZSTD_reset_session_only)); + } + + stableOutBuffer = FUZZ_dataProducer_uint32Range(producer, 0, 10) == 5; + if (stableOutBuffer) { + FUZZ_ZASSERT(ZSTD_DCtx_setParameter(dstream, ZSTD_d_stableOutBuffer, 1)); + out.dst = buf; + out.size = kBufSize; + out.pos = 0; + } + + while (size > 0) { + ZSTD_inBuffer in = makeInBuffer(&src, &size, producer, prevInWasZero ? 1 : 0); + prevInWasZero = in.size == 0; + while (in.pos != in.size) { + if (!stableOutBuffer || prevOutWasZero || FUZZ_dataProducer_uint32Range(producer, 0, 100) == 55) { + out = makeOutBuffer(producer, prevOutWasZero ? 1 : 0); + } + prevOutWasZero = out.size == 0; + size_t const rc = ZSTD_decompressStream(dstream, &out, &in); + if (ZSTD_isError(rc)) goto error; + } + } + +error: +#ifndef STATEFUL_FUZZING + ZSTD_freeDStream(dstream); dstream = NULL; +#endif + FUZZ_dataProducer_free(producer); + return 0; +} diff --git a/src/zstd/tests/fuzz/stream_round_trip.c b/src/zstd/tests/fuzz/stream_round_trip.c new file mode 100644 index 000000000..286d3871b --- /dev/null +++ b/src/zstd/tests/fuzz/stream_round_trip.c @@ -0,0 +1,177 @@ +/* + * Copyright (c) 2016-2020, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/** + * This fuzz target performs a zstd round-trip test (compress & decompress), + * compares the result with the original, and calls abort() on corruption. + */ + +#define ZSTD_STATIC_LINKING_ONLY + +#include <stddef.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include "fuzz_helpers.h" +#include "zstd_helpers.h" +#include "fuzz_data_producer.h" + +ZSTD_CCtx *cctx = NULL; +static ZSTD_DCtx *dctx = NULL; +static uint8_t* cBuf = NULL; +static uint8_t* rBuf = NULL; +static size_t bufSize = 0; + +static ZSTD_outBuffer makeOutBuffer(uint8_t *dst, size_t capacity, + FUZZ_dataProducer_t *producer) +{ + ZSTD_outBuffer buffer = { dst, 0, 0 }; + + FUZZ_ASSERT(capacity > 0); + buffer.size = (FUZZ_dataProducer_uint32Range(producer, 1, capacity)); + FUZZ_ASSERT(buffer.size <= capacity); + + return buffer; +} + +static ZSTD_inBuffer makeInBuffer(const uint8_t **src, size_t *size, + FUZZ_dataProducer_t *producer) +{ + ZSTD_inBuffer buffer = { *src, 0, 0 }; + + FUZZ_ASSERT(*size > 0); + buffer.size = (FUZZ_dataProducer_uint32Range(producer, 1, *size)); + FUZZ_ASSERT(buffer.size <= *size); + *src += buffer.size; + *size -= buffer.size; + + return buffer; +} + +static size_t compress(uint8_t *dst, size_t capacity, + const uint8_t *src, size_t srcSize, + FUZZ_dataProducer_t *producer) +{ + size_t dstSize = 0; + ZSTD_CCtx_reset(cctx, ZSTD_reset_session_only); + FUZZ_setRandomParameters(cctx, srcSize, producer); + + while (srcSize > 0) { + ZSTD_inBuffer in = makeInBuffer(&src, &srcSize, producer); + /* Mode controls the action. If mode == -1 we pick a new mode */ + int mode = -1; + while (in.pos < in.size || mode != -1) { + ZSTD_outBuffer out = makeOutBuffer(dst, capacity, producer); + /* Previous action finished, pick a new mode. */ + if (mode == -1) mode = FUZZ_dataProducer_uint32Range(producer, 0, 9); + switch (mode) { + case 0: /* fall-through */ + case 1: /* fall-through */ + case 2: { + size_t const ret = + ZSTD_compressStream2(cctx, &out, &in, ZSTD_e_flush); + FUZZ_ZASSERT(ret); + if (ret == 0) + mode = -1; + break; + } + case 3: { + size_t ret = + ZSTD_compressStream2(cctx, &out, &in, ZSTD_e_end); + FUZZ_ZASSERT(ret); + /* Reset the compressor when the frame is finished */ + if (ret == 0) { + ZSTD_CCtx_reset(cctx, ZSTD_reset_session_only); + if (FUZZ_dataProducer_uint32Range(producer, 0, 7) == 0) { + size_t const remaining = in.size - in.pos; + FUZZ_setRandomParameters(cctx, remaining, producer); + } + mode = -1; + } + break; + } + case 4: { + ZSTD_inBuffer nullIn = { NULL, 0, 0 }; + ZSTD_outBuffer nullOut = { NULL, 0, 0 }; + size_t const ret = ZSTD_compressStream2(cctx, &nullOut, &nullIn, ZSTD_e_continue); + FUZZ_ZASSERT(ret); + } + /* fall-through */ + default: { + size_t const ret = + ZSTD_compressStream2(cctx, &out, &in, ZSTD_e_continue); + FUZZ_ZASSERT(ret); + mode = -1; + } + } + dst += out.pos; + dstSize += out.pos; + capacity -= out.pos; + } + } + for (;;) { + ZSTD_inBuffer in = {NULL, 0, 0}; + ZSTD_outBuffer out = makeOutBuffer(dst, capacity, producer); + size_t const ret = ZSTD_compressStream2(cctx, &out, &in, ZSTD_e_end); + FUZZ_ZASSERT(ret); + + dst += out.pos; + dstSize += out.pos; + capacity -= out.pos; + if (ret == 0) + break; + } + return dstSize; +} + +int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) +{ + size_t neededBufSize; + + /* Give a random portion of src data to the producer, to use for + parameter generation. The rest will be used for (de)compression */ + FUZZ_dataProducer_t *producer = FUZZ_dataProducer_create(src, size); + size = FUZZ_dataProducer_reserveDataPrefix(producer); + + neededBufSize = ZSTD_compressBound(size) * 15; + + /* Allocate all buffers and contexts if not already allocated */ + if (neededBufSize > bufSize) { + free(cBuf); + free(rBuf); + cBuf = (uint8_t*)FUZZ_malloc(neededBufSize); + rBuf = (uint8_t*)FUZZ_malloc(neededBufSize); + bufSize = neededBufSize; + } + if (!cctx) { + cctx = ZSTD_createCCtx(); + FUZZ_ASSERT(cctx); + } + if (!dctx) { + dctx = ZSTD_createDCtx(); + FUZZ_ASSERT(dctx); + } + + { + size_t const cSize = compress(cBuf, neededBufSize, src, size, producer); + size_t const rSize = + ZSTD_decompressDCtx(dctx, rBuf, neededBufSize, cBuf, cSize); + FUZZ_ZASSERT(rSize); + FUZZ_ASSERT_MSG(rSize == size, "Incorrect regenerated size"); + FUZZ_ASSERT_MSG(!FUZZ_memcmp(src, rBuf, size), "Corruption!"); + } + + FUZZ_dataProducer_free(producer); +#ifndef STATEFUL_FUZZING + ZSTD_freeCCtx(cctx); cctx = NULL; + ZSTD_freeDCtx(dctx); dctx = NULL; +#endif + return 0; +} diff --git a/src/zstd/tests/fuzz/zstd_frame_info.c b/src/zstd/tests/fuzz/zstd_frame_info.c new file mode 100644 index 000000000..876a74e9a --- /dev/null +++ b/src/zstd/tests/fuzz/zstd_frame_info.c @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2016-2020, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/** + * This fuzz target fuzzes all of the helper functions that consume compressed + * input. + */ + +#include <stddef.h> +#include <stdlib.h> +#include <stdio.h> +#include "fuzz_helpers.h" +#include "zstd_helpers.h" + +int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) +{ + ZSTD_frameHeader zfh; + if (size == 0) { + src = NULL; + } + /* You can fuzz any helper functions here that are fast, and take zstd + * compressed data as input. E.g. don't expect the input to be a dictionary, + * so don't fuzz ZSTD_getDictID_fromDict(). + */ + ZSTD_getFrameContentSize(src, size); + ZSTD_getDecompressedSize(src, size); + ZSTD_findFrameCompressedSize(src, size); + ZSTD_getDictID_fromFrame(src, size); + ZSTD_findDecompressedSize(src, size); + ZSTD_decompressBound(src, size); + ZSTD_frameHeaderSize(src, size); + ZSTD_isFrame(src, size); + ZSTD_getFrameHeader(&zfh, src, size); + ZSTD_getFrameHeader_advanced(&zfh, src, size, ZSTD_f_zstd1); + return 0; +} diff --git a/src/zstd/tests/fuzz/zstd_helpers.c b/src/zstd/tests/fuzz/zstd_helpers.c new file mode 100644 index 000000000..5680bd628 --- /dev/null +++ b/src/zstd/tests/fuzz/zstd_helpers.c @@ -0,0 +1,145 @@ +/* + * Copyright (c) 2016-2020, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#define ZSTD_STATIC_LINKING_ONLY +#define ZDICT_STATIC_LINKING_ONLY + +#include <string.h> + +#include "zstd_helpers.h" +#include "fuzz_helpers.h" +#include "zstd.h" +#include "zdict.h" + +const int kMinClevel = -3; +const int kMaxClevel = 19; + +static void set(ZSTD_CCtx *cctx, ZSTD_cParameter param, int value) +{ + FUZZ_ZASSERT(ZSTD_CCtx_setParameter(cctx, param, value)); +} + +static void setRand(ZSTD_CCtx *cctx, ZSTD_cParameter param, unsigned min, + unsigned max, FUZZ_dataProducer_t *producer) { + unsigned const value = FUZZ_dataProducer_uint32Range(producer, min, max); + set(cctx, param, value); +} + +ZSTD_compressionParameters FUZZ_randomCParams(size_t srcSize, FUZZ_dataProducer_t *producer) +{ + /* Select compression parameters */ + ZSTD_compressionParameters cParams; + cParams.windowLog = FUZZ_dataProducer_uint32Range(producer, ZSTD_WINDOWLOG_MIN, 15); + cParams.hashLog = FUZZ_dataProducer_uint32Range(producer, ZSTD_HASHLOG_MIN, 15); + cParams.chainLog = FUZZ_dataProducer_uint32Range(producer, ZSTD_CHAINLOG_MIN, 16); + cParams.searchLog = FUZZ_dataProducer_uint32Range(producer, ZSTD_SEARCHLOG_MIN, 9); + cParams.minMatch = FUZZ_dataProducer_uint32Range(producer, ZSTD_MINMATCH_MIN, + ZSTD_MINMATCH_MAX); + cParams.targetLength = FUZZ_dataProducer_uint32Range(producer, 0, 512); + cParams.strategy = FUZZ_dataProducer_uint32Range(producer, ZSTD_STRATEGY_MIN, ZSTD_STRATEGY_MAX); + return ZSTD_adjustCParams(cParams, srcSize, 0); +} + +ZSTD_frameParameters FUZZ_randomFParams(FUZZ_dataProducer_t *producer) +{ + /* Select frame parameters */ + ZSTD_frameParameters fParams; + fParams.contentSizeFlag = FUZZ_dataProducer_uint32Range(producer, 0, 1); + fParams.checksumFlag = FUZZ_dataProducer_uint32Range(producer, 0, 1); + fParams.noDictIDFlag = FUZZ_dataProducer_uint32Range(producer, 0, 1); + return fParams; +} + +ZSTD_parameters FUZZ_randomParams(size_t srcSize, FUZZ_dataProducer_t *producer) +{ + ZSTD_parameters params; + params.cParams = FUZZ_randomCParams(srcSize, producer); + params.fParams = FUZZ_randomFParams(producer); + return params; +} + +void FUZZ_setRandomParameters(ZSTD_CCtx *cctx, size_t srcSize, FUZZ_dataProducer_t *producer) +{ + ZSTD_compressionParameters cParams = FUZZ_randomCParams(srcSize, producer); + set(cctx, ZSTD_c_windowLog, cParams.windowLog); + set(cctx, ZSTD_c_hashLog, cParams.hashLog); + set(cctx, ZSTD_c_chainLog, cParams.chainLog); + set(cctx, ZSTD_c_searchLog, cParams.searchLog); + set(cctx, ZSTD_c_minMatch, cParams.minMatch); + set(cctx, ZSTD_c_targetLength, cParams.targetLength); + set(cctx, ZSTD_c_strategy, cParams.strategy); + /* Select frame parameters */ + setRand(cctx, ZSTD_c_contentSizeFlag, 0, 1, producer); + setRand(cctx, ZSTD_c_checksumFlag, 0, 1, producer); + setRand(cctx, ZSTD_c_dictIDFlag, 0, 1, producer); + /* Select long distance matching parameters */ + setRand(cctx, ZSTD_c_enableLongDistanceMatching, 0, 1, producer); + setRand(cctx, ZSTD_c_ldmHashLog, ZSTD_HASHLOG_MIN, 16, producer); + setRand(cctx, ZSTD_c_ldmMinMatch, ZSTD_LDM_MINMATCH_MIN, + ZSTD_LDM_MINMATCH_MAX, producer); + setRand(cctx, ZSTD_c_ldmBucketSizeLog, 0, ZSTD_LDM_BUCKETSIZELOG_MAX, + producer); + setRand(cctx, ZSTD_c_ldmHashRateLog, ZSTD_LDM_HASHRATELOG_MIN, + ZSTD_LDM_HASHRATELOG_MAX, producer); + /* Set misc parameters */ + setRand(cctx, ZSTD_c_nbWorkers, 0, 2, producer); + setRand(cctx, ZSTD_c_rsyncable, 0, 1, producer); + setRand(cctx, ZSTD_c_forceMaxWindow, 0, 1, producer); + setRand(cctx, ZSTD_c_literalCompressionMode, 0, 2, producer); + setRand(cctx, ZSTD_c_forceAttachDict, 0, 2, producer); + if (FUZZ_dataProducer_uint32Range(producer, 0, 1) == 0) { + setRand(cctx, ZSTD_c_srcSizeHint, ZSTD_SRCSIZEHINT_MIN, 2 * srcSize, producer); + } + if (FUZZ_dataProducer_uint32Range(producer, 0, 1) == 0) { + setRand(cctx, ZSTD_c_targetCBlockSize, ZSTD_TARGETCBLOCKSIZE_MIN, ZSTD_TARGETCBLOCKSIZE_MAX, producer); + } +} + +FUZZ_dict_t FUZZ_train(void const* src, size_t srcSize, FUZZ_dataProducer_t *producer) +{ + size_t const dictSize = MAX(srcSize / 8, 1024); + size_t const totalSampleSize = dictSize * 11; + FUZZ_dict_t dict = { FUZZ_malloc(dictSize), dictSize }; + char* const samples = (char*)FUZZ_malloc(totalSampleSize); + unsigned nbSamples = 100; + size_t* const samplesSizes = (size_t*)FUZZ_malloc(sizeof(size_t) * nbSamples); + size_t pos = 0; + size_t sample = 0; + ZDICT_fastCover_params_t params; + + for (sample = 0; sample < nbSamples; ++sample) { + size_t const remaining = totalSampleSize - pos; + size_t const offset = FUZZ_dataProducer_uint32Range(producer, 0, MAX(srcSize, 1) - 1); + size_t const limit = MIN(srcSize - offset, remaining); + size_t const toCopy = MIN(limit, remaining / (nbSamples - sample)); + memcpy(samples + pos, src + offset, toCopy); + pos += toCopy; + samplesSizes[sample] = toCopy; + } + memset(samples + pos, 0, totalSampleSize - pos); + + memset(¶ms, 0, sizeof(params)); + params.accel = 5; + params.k = 40; + params.d = 8; + params.f = 14; + params.zParams.compressionLevel = 1; + dict.size = ZDICT_trainFromBuffer_fastCover(dict.buff, dictSize, + samples, samplesSizes, nbSamples, params); + if (ZSTD_isError(dict.size)) { + free(dict.buff); + memset(&dict, 0, sizeof(dict)); + } + + free(samplesSizes); + free(samples); + + return dict; +} diff --git a/src/zstd/tests/fuzz/zstd_helpers.h b/src/zstd/tests/fuzz/zstd_helpers.h new file mode 100644 index 000000000..6a4e340d3 --- /dev/null +++ b/src/zstd/tests/fuzz/zstd_helpers.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2016-2020, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ +/** + * Helper functions for fuzzing. + */ + +#ifndef ZSTD_HELPERS_H +#define ZSTD_HELPERS_H + +#define ZSTD_STATIC_LINKING_ONLY + +#include "zstd.h" +#include "fuzz_data_producer.h" +#include <stdint.h> + +#ifdef __cplusplus +extern "C" { +#endif + +extern const int kMinClevel; +extern const int kMaxClevel; + +void FUZZ_setRandomParameters(ZSTD_CCtx *cctx, size_t srcSize, FUZZ_dataProducer_t *producer); + +ZSTD_compressionParameters FUZZ_randomCParams(size_t srcSize, FUZZ_dataProducer_t *producer); +ZSTD_frameParameters FUZZ_randomFParams(FUZZ_dataProducer_t *producer); +ZSTD_parameters FUZZ_randomParams(size_t srcSize, FUZZ_dataProducer_t *producer); + +typedef struct { + void* buff; + size_t size; +} FUZZ_dict_t; + +/* Quickly train a dictionary from a source for fuzzing. + * NOTE: Don't use this to train production dictionaries, it is only optimized + * for speed, and doesn't care about dictionary quality. + */ +FUZZ_dict_t FUZZ_train(void const* src, size_t srcSize, FUZZ_dataProducer_t *producer); + +#ifdef __cplusplus +} +#endif + +#endif /* ZSTD_HELPERS_H */ diff --git a/src/zstd/tests/fuzzer.c b/src/zstd/tests/fuzzer.c new file mode 100644 index 000000000..8ac2864f3 --- /dev/null +++ b/src/zstd/tests/fuzzer.c @@ -0,0 +1,3422 @@ +/* + * Copyright (c) 2015-2020, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + +/*-************************************ +* Compiler specific +**************************************/ +#ifdef _MSC_VER /* Visual Studio */ +# define _CRT_SECURE_NO_WARNINGS /* fgets */ +# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ +# pragma warning(disable : 4204) /* disable: C4204: non-constant aggregate initializer */ +#endif + + +/*-************************************ +* Includes +**************************************/ +#include <stdlib.h> /* free */ +#include <stdio.h> /* fgets, sscanf */ +#include <string.h> /* strcmp */ +#undef NDEBUG +#include <assert.h> +#define ZSTD_STATIC_LINKING_ONLY /* ZSTD_compressContinue, ZSTD_compressBlock */ +#include "debug.h" /* DEBUG_STATIC_ASSERT */ +#include "fse.h" +#include "zstd.h" /* ZSTD_VERSION_STRING */ +#include "zstd_errors.h" /* ZSTD_getErrorCode */ +#include "zstdmt_compress.h" +#define ZDICT_STATIC_LINKING_ONLY +#include "zdict.h" /* ZDICT_trainFromBuffer */ +#include "mem.h" +#include "datagen.h" /* RDG_genBuffer */ +#define XXH_STATIC_LINKING_ONLY /* XXH64_state_t */ +#include "xxhash.h" /* XXH64 */ +#include "util.h" +#include "timefn.h" /* SEC_TO_MICRO, UTIL_time_t, UTIL_TIME_INITIALIZER, UTIL_clockSpanMicro, UTIL_getTime */ +/* must be included after util.h, due to ERROR macro redefinition issue on Visual Studio */ +#include "zstd_internal.h" /* ZSTD_WORKSPACETOOLARGE_MAXDURATION, ZSTD_WORKSPACETOOLARGE_FACTOR, KB, MB */ + + +/*-************************************ +* Constants +**************************************/ +#define GB *(1U<<30) + +static const int FUZ_compressibility_default = 50; +static const int nbTestsDefault = 30000; + + +/*-************************************ +* Display Macros +**************************************/ +#define DISPLAY(...) fprintf(stderr, __VA_ARGS__) +#define DISPLAYLEVEL(l, ...) if (g_displayLevel>=l) { DISPLAY(__VA_ARGS__); } +static U32 g_displayLevel = 2; + +static const U64 g_refreshRate = SEC_TO_MICRO / 6; +static UTIL_time_t g_displayClock = UTIL_TIME_INITIALIZER; + +#define DISPLAYUPDATE(l, ...) \ + if (g_displayLevel>=l) { \ + if ((UTIL_clockSpanMicro(g_displayClock) > g_refreshRate) || (g_displayLevel>=4)) \ + { g_displayClock = UTIL_getTime(); DISPLAY(__VA_ARGS__); \ + if (g_displayLevel>=4) fflush(stderr); } \ + } + + +/*-******************************************************* +* Compile time test +*********************************************************/ +#undef MIN +#undef MAX +/* Declaring the function, to avoid -Wmissing-prototype */ +void FUZ_bug976(void); +void FUZ_bug976(void) +{ /* these constants shall not depend on MIN() macro */ + assert(ZSTD_HASHLOG_MAX < 31); + assert(ZSTD_CHAINLOG_MAX < 31); +} + + +/*-******************************************************* +* Internal functions +*********************************************************/ +#define MIN(a,b) ((a)<(b)?(a):(b)) +#define MAX(a,b) ((a)>(b)?(a):(b)) + +#define FUZ_rotl32(x,r) ((x << r) | (x >> (32 - r))) +static U32 FUZ_rand(U32* src) +{ + static const U32 prime1 = 2654435761U; + static const U32 prime2 = 2246822519U; + U32 rand32 = *src; + rand32 *= prime1; + rand32 += prime2; + rand32 = FUZ_rotl32(rand32, 13); + *src = rand32; + return rand32 >> 5; +} + +static U32 FUZ_highbit32(U32 v32) +{ + unsigned nbBits = 0; + if (v32==0) return 0; + while (v32) v32 >>= 1, nbBits++; + return nbBits; +} + + +/*============================================= +* Test macros +=============================================*/ +#define CHECK_Z(f) { \ + size_t const err = f; \ + if (ZSTD_isError(err)) { \ + DISPLAY("Error => %s : %s ", \ + #f, ZSTD_getErrorName(err)); \ + exit(1); \ +} } + +#define CHECK_VAR(var, fn) var = fn; if (ZSTD_isError(var)) { DISPLAYLEVEL(1, "%s : fails : %s \n", #fn, ZSTD_getErrorName(var)); goto _output_error; } +#define CHECK_NEWV(var, fn) size_t const CHECK_VAR(var, fn) +#define CHECK(fn) { CHECK_NEWV(err, fn); } +#define CHECKPLUS(var, fn, more) { CHECK_NEWV(var, fn); more; } + +#define CHECK_OP(op, lhs, rhs) { \ + if (!((lhs) op (rhs))) { \ + DISPLAY("Error L%u => FAILED %s %s %s ", __LINE__, #lhs, #op, #rhs); \ + goto _output_error; \ + } \ +} +#define CHECK_EQ(lhs, rhs) CHECK_OP(==, lhs, rhs) +#define CHECK_LT(lhs, rhs) CHECK_OP(<, lhs, rhs) + + +/*============================================= +* Memory Tests +=============================================*/ +#if defined(__APPLE__) && defined(__MACH__) + +#include <malloc/malloc.h> /* malloc_size */ + +typedef struct { + unsigned long long totalMalloc; + size_t currentMalloc; + size_t peakMalloc; + unsigned nbMalloc; + unsigned nbFree; +} mallocCounter_t; + +static const mallocCounter_t INIT_MALLOC_COUNTER = { 0, 0, 0, 0, 0 }; + +static void* FUZ_mallocDebug(void* counter, size_t size) +{ + mallocCounter_t* const mcPtr = (mallocCounter_t*)counter; + void* const ptr = malloc(size); + if (ptr==NULL) return NULL; + DISPLAYLEVEL(4, "allocating %u KB => effectively %u KB \n", + (unsigned)(size >> 10), (unsigned)(malloc_size(ptr) >> 10)); /* OS-X specific */ + mcPtr->totalMalloc += size; + mcPtr->currentMalloc += size; + if (mcPtr->currentMalloc > mcPtr->peakMalloc) + mcPtr->peakMalloc = mcPtr->currentMalloc; + mcPtr->nbMalloc += 1; + return ptr; +} + +static void FUZ_freeDebug(void* counter, void* address) +{ + mallocCounter_t* const mcPtr = (mallocCounter_t*)counter; + DISPLAYLEVEL(4, "freeing %u KB \n", (unsigned)(malloc_size(address) >> 10)); + mcPtr->nbFree += 1; + mcPtr->currentMalloc -= malloc_size(address); /* OS-X specific */ + free(address); +} + +static void FUZ_displayMallocStats(mallocCounter_t count) +{ + DISPLAYLEVEL(3, "peak:%6u KB, nbMallocs:%2u, total:%6u KB \n", + (unsigned)(count.peakMalloc >> 10), + count.nbMalloc, + (unsigned)(count.totalMalloc >> 10)); +} + +static int FUZ_mallocTests_internal(unsigned seed, double compressibility, unsigned part, + void* inBuffer, size_t inSize, void* outBuffer, size_t outSize) +{ + /* test only played in verbose mode, as they are long */ + if (g_displayLevel<3) return 0; + + /* Create compressible noise */ + if (!inBuffer || !outBuffer) { + DISPLAY("Not enough memory, aborting\n"); + exit(1); + } + RDG_genBuffer(inBuffer, inSize, compressibility, 0. /*auto*/, seed); + + /* simple compression tests */ + if (part <= 1) + { int compressionLevel; + for (compressionLevel=1; compressionLevel<=6; compressionLevel++) { + mallocCounter_t malcount = INIT_MALLOC_COUNTER; + ZSTD_customMem const cMem = { FUZ_mallocDebug, FUZ_freeDebug, &malcount }; + ZSTD_CCtx* const cctx = ZSTD_createCCtx_advanced(cMem); + CHECK_Z( ZSTD_compressCCtx(cctx, outBuffer, outSize, inBuffer, inSize, compressionLevel) ); + ZSTD_freeCCtx(cctx); + DISPLAYLEVEL(3, "compressCCtx level %i : ", compressionLevel); + FUZ_displayMallocStats(malcount); + } } + + /* streaming compression tests */ + if (part <= 2) + { int compressionLevel; + for (compressionLevel=1; compressionLevel<=6; compressionLevel++) { + mallocCounter_t malcount = INIT_MALLOC_COUNTER; + ZSTD_customMem const cMem = { FUZ_mallocDebug, FUZ_freeDebug, &malcount }; + ZSTD_CCtx* const cstream = ZSTD_createCStream_advanced(cMem); + ZSTD_outBuffer out = { outBuffer, outSize, 0 }; + ZSTD_inBuffer in = { inBuffer, inSize, 0 }; + CHECK_Z( ZSTD_initCStream(cstream, compressionLevel) ); + CHECK_Z( ZSTD_compressStream(cstream, &out, &in) ); + CHECK_Z( ZSTD_endStream(cstream, &out) ); + ZSTD_freeCStream(cstream); + DISPLAYLEVEL(3, "compressStream level %i : ", compressionLevel); + FUZ_displayMallocStats(malcount); + } } + + /* advanced MT API test */ + if (part <= 3) + { int nbThreads; + for (nbThreads=1; nbThreads<=4; nbThreads++) { + int compressionLevel; + for (compressionLevel=1; compressionLevel<=6; compressionLevel++) { + mallocCounter_t malcount = INIT_MALLOC_COUNTER; + ZSTD_customMem const cMem = { FUZ_mallocDebug, FUZ_freeDebug, &malcount }; + ZSTD_CCtx* const cctx = ZSTD_createCCtx_advanced(cMem); + CHECK_Z( ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, compressionLevel) ); + CHECK_Z( ZSTD_CCtx_setParameter(cctx, ZSTD_c_nbWorkers, nbThreads) ); + CHECK_Z( ZSTD_compress2(cctx, outBuffer, outSize, inBuffer, inSize) ); + ZSTD_freeCCtx(cctx); + DISPLAYLEVEL(3, "compress_generic,-T%i,end level %i : ", + nbThreads, compressionLevel); + FUZ_displayMallocStats(malcount); + } } } + + /* advanced MT streaming API test */ + if (part <= 4) + { int nbThreads; + for (nbThreads=1; nbThreads<=4; nbThreads++) { + int compressionLevel; + for (compressionLevel=1; compressionLevel<=6; compressionLevel++) { + mallocCounter_t malcount = INIT_MALLOC_COUNTER; + ZSTD_customMem const cMem = { FUZ_mallocDebug, FUZ_freeDebug, &malcount }; + ZSTD_CCtx* const cctx = ZSTD_createCCtx_advanced(cMem); + ZSTD_outBuffer out = { outBuffer, outSize, 0 }; + ZSTD_inBuffer in = { inBuffer, inSize, 0 }; + CHECK_Z( ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, compressionLevel) ); + CHECK_Z( ZSTD_CCtx_setParameter(cctx, ZSTD_c_nbWorkers, nbThreads) ); + CHECK_Z( ZSTD_compressStream2(cctx, &out, &in, ZSTD_e_continue) ); + while ( ZSTD_compressStream2(cctx, &out, &in, ZSTD_e_end) ) {} + ZSTD_freeCCtx(cctx); + DISPLAYLEVEL(3, "compress_generic,-T%i,continue level %i : ", + nbThreads, compressionLevel); + FUZ_displayMallocStats(malcount); + } } } + + return 0; +} + +static int FUZ_mallocTests(unsigned seed, double compressibility, unsigned part) +{ + size_t const inSize = 64 MB + 16 MB + 4 MB + 1 MB + 256 KB + 64 KB; /* 85.3 MB */ + size_t const outSize = ZSTD_compressBound(inSize); + void* const inBuffer = malloc(inSize); + void* const outBuffer = malloc(outSize); + int result; + + /* Create compressible noise */ + if (!inBuffer || !outBuffer) { + DISPLAY("Not enough memory, aborting \n"); + exit(1); + } + + result = FUZ_mallocTests_internal(seed, compressibility, part, + inBuffer, inSize, outBuffer, outSize); + + free(inBuffer); + free(outBuffer); + return result; +} + +#else + +static int FUZ_mallocTests(unsigned seed, double compressibility, unsigned part) +{ + (void)seed; (void)compressibility; (void)part; + return 0; +} + +#endif + +static void FUZ_decodeSequences(BYTE* dst, ZSTD_Sequence* seqs, size_t seqsSize, BYTE* src, size_t size) +{ + size_t i; + size_t j; + for(i = 0; i < seqsSize - 1; ++i) { + assert(dst + seqs[i].litLength + seqs[i].matchLength < dst + size); + assert(src + seqs[i].litLength + seqs[i].matchLength < src + size); + + memcpy(dst, src, seqs[i].litLength); + dst += seqs[i].litLength; + src += seqs[i].litLength; + size -= seqs[i].litLength; + + for (j = 0; j < seqs[i].matchLength; ++j) + dst[j] = dst[j - seqs[i].offset]; + dst += seqs[i].matchLength; + src += seqs[i].matchLength; + size -= seqs[i].matchLength; + } + memcpy(dst, src, size); +} + +/*============================================= +* Unit tests +=============================================*/ + +static int basicUnitTests(U32 const seed, double compressibility) +{ + size_t const CNBuffSize = 5 MB; + void* const CNBuffer = malloc(CNBuffSize); + size_t const compressedBufferSize = ZSTD_compressBound(CNBuffSize); + void* const compressedBuffer = malloc(compressedBufferSize); + void* const decodedBuffer = malloc(CNBuffSize); + int testResult = 0; + unsigned testNb=0; + size_t cSize; + + /* Create compressible noise */ + if (!CNBuffer || !compressedBuffer || !decodedBuffer) { + DISPLAY("Not enough memory, aborting\n"); + testResult = 1; + goto _end; + } + RDG_genBuffer(CNBuffer, CNBuffSize, compressibility, 0., seed); + + /* Basic tests */ + DISPLAYLEVEL(3, "test%3u : ZSTD_getErrorName : ", testNb++); + { const char* errorString = ZSTD_getErrorName(0); + DISPLAYLEVEL(3, "OK : %s \n", errorString); + } + + DISPLAYLEVEL(3, "test%3u : ZSTD_getErrorName with wrong value : ", testNb++); + { const char* errorString = ZSTD_getErrorName(499); + DISPLAYLEVEL(3, "OK : %s \n", errorString); + } + + DISPLAYLEVEL(3, "test%3u : min compression level : ", testNb++); + { int const mcl = ZSTD_minCLevel(); + DISPLAYLEVEL(3, "%i (OK) \n", mcl); + } + + DISPLAYLEVEL(3, "test%3u : ZSTD_versionNumber : ", testNb++); + { unsigned const vn = ZSTD_versionNumber(); + DISPLAYLEVEL(3, "%u (OK) \n", vn); + } + + DISPLAYLEVEL(3, "test%3u : compress %u bytes : ", testNb++, (unsigned)CNBuffSize); + { ZSTD_CCtx* const cctx = ZSTD_createCCtx(); + if (cctx==NULL) goto _output_error; + CHECK_VAR(cSize, ZSTD_compressCCtx(cctx, + compressedBuffer, compressedBufferSize, + CNBuffer, CNBuffSize, 1) ); + DISPLAYLEVEL(3, "OK (%u bytes : %.2f%%)\n", (unsigned)cSize, (double)cSize/CNBuffSize*100); + + DISPLAYLEVEL(3, "test%3i : size of cctx for level 1 : ", testNb++); + { size_t const cctxSize = ZSTD_sizeof_CCtx(cctx); + DISPLAYLEVEL(3, "%u bytes \n", (unsigned)cctxSize); + } + ZSTD_freeCCtx(cctx); + } + + DISPLAYLEVEL(3, "test%3i : decompress skippable frame -8 size : ", testNb++); + { + char const skippable8[] = "\x50\x2a\x4d\x18\xf8\xff\xff\xff"; + size_t const size = ZSTD_decompress(NULL, 0, skippable8, 8); + if (!ZSTD_isError(size)) goto _output_error; + } + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3i : ZSTD_getFrameContentSize test : ", testNb++); + { unsigned long long const rSize = ZSTD_getFrameContentSize(compressedBuffer, cSize); + if (rSize != CNBuffSize) goto _output_error; + } + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3i : ZSTD_getDecompressedSize test : ", testNb++); + { unsigned long long const rSize = ZSTD_getDecompressedSize(compressedBuffer, cSize); + if (rSize != CNBuffSize) goto _output_error; + } + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3i : ZSTD_findDecompressedSize test : ", testNb++); + { unsigned long long const rSize = ZSTD_findDecompressedSize(compressedBuffer, cSize); + if (rSize != CNBuffSize) goto _output_error; + } + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3i : tight ZSTD_decompressBound test : ", testNb++); + { + unsigned long long bound = ZSTD_decompressBound(compressedBuffer, cSize); + if (bound != CNBuffSize) goto _output_error; + } + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3i : ZSTD_decompressBound test with invalid srcSize : ", testNb++); + { + unsigned long long bound = ZSTD_decompressBound(compressedBuffer, cSize - 1); + if (bound != ZSTD_CONTENTSIZE_ERROR) goto _output_error; + } + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3i : decompress %u bytes : ", testNb++, (unsigned)CNBuffSize); + { size_t const r = ZSTD_decompress(decodedBuffer, CNBuffSize, compressedBuffer, cSize); + if (r != CNBuffSize) goto _output_error; } + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3i : check decompressed result : ", testNb++); + { size_t u; + for (u=0; u<CNBuffSize; u++) { + if (((BYTE*)decodedBuffer)[u] != ((BYTE*)CNBuffer)[u]) goto _output_error; + } } + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3i : ZSTD_checkCParams : ", testNb++); + { + ZSTD_parameters params = ZSTD_getParams(3, 0, 0); + assert(!ZSTD_checkCParams(params.cParams)); + } + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3i : ZSTD_createDCtx_advanced and ZSTD_sizeof_DCtx: ", testNb++); + { + ZSTD_DCtx* const dctx = ZSTD_createDCtx_advanced(ZSTD_defaultCMem); + assert(dctx != NULL); + assert(ZSTD_sizeof_DCtx(dctx) != 0); + ZSTD_freeDCtx(dctx); + } + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3i : misc unaccounted for zstd symbols : ", testNb++); + { + /* %p takes a void*. In ISO C, it's illegal to cast a function pointer + * to a data pointer. (Although in POSIX you're required to be allowed + * to do it...) So we have to fall back to our trusty friend memcpy. */ + unsigned (* const funcptr_getDictID)(const ZSTD_DDict* ddict) = + ZSTD_getDictID_fromDDict; + ZSTD_DStream* (* const funcptr_createDStream)( + ZSTD_customMem customMem) = ZSTD_createDStream_advanced; + void (* const funcptr_copyDCtx)( + ZSTD_DCtx* dctx, const ZSTD_DCtx* preparedDCtx) = ZSTD_copyDCtx; + ZSTD_nextInputType_e (* const funcptr_nextInputType)(ZSTD_DCtx* dctx) = + ZSTD_nextInputType; + const void *voidptr_getDictID; + const void *voidptr_createDStream; + const void *voidptr_copyDCtx; + const void *voidptr_nextInputType; + DEBUG_STATIC_ASSERT(sizeof(funcptr_getDictID) == sizeof(voidptr_getDictID)); + memcpy( + (void*)&voidptr_getDictID, + (const void*)&funcptr_getDictID, + sizeof(void*)); + memcpy( + (void*)&voidptr_createDStream, + (const void*)&funcptr_createDStream, + sizeof(void*)); + memcpy( + (void*)&voidptr_copyDCtx, + (const void*)&funcptr_copyDCtx, + sizeof(void*)); + memcpy( + (void*)&voidptr_nextInputType, + (const void*)&funcptr_nextInputType, + sizeof(void*)); + DISPLAYLEVEL(3, "%p ", voidptr_getDictID); + DISPLAYLEVEL(3, "%p ", voidptr_createDStream); + DISPLAYLEVEL(3, "%p ", voidptr_copyDCtx); + DISPLAYLEVEL(3, "%p ", voidptr_nextInputType); + } + DISPLAYLEVEL(3, ": OK \n"); + + DISPLAYLEVEL(3, "test%3i : decompress with null dict : ", testNb++); + { ZSTD_DCtx* const dctx = ZSTD_createDCtx(); assert(dctx != NULL); + { size_t const r = ZSTD_decompress_usingDict(dctx, + decodedBuffer, CNBuffSize, + compressedBuffer, cSize, + NULL, 0); + if (r != CNBuffSize) goto _output_error; + } + ZSTD_freeDCtx(dctx); + } + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3i : decompress with null DDict : ", testNb++); + { ZSTD_DCtx* const dctx = ZSTD_createDCtx(); assert(dctx != NULL); + { size_t const r = ZSTD_decompress_usingDDict(dctx, + decodedBuffer, CNBuffSize, + compressedBuffer, cSize, + NULL); + if (r != CNBuffSize) goto _output_error; + } + ZSTD_freeDCtx(dctx); + } + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3i : decompress with 1 missing byte : ", testNb++); + { size_t const r = ZSTD_decompress(decodedBuffer, CNBuffSize, compressedBuffer, cSize-1); + if (!ZSTD_isError(r)) goto _output_error; + if (ZSTD_getErrorCode((size_t)r) != ZSTD_error_srcSize_wrong) goto _output_error; } + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3i : decompress with 1 too much byte : ", testNb++); + { size_t const r = ZSTD_decompress(decodedBuffer, CNBuffSize, compressedBuffer, cSize+1); + if (!ZSTD_isError(r)) goto _output_error; + if (ZSTD_getErrorCode(r) != ZSTD_error_srcSize_wrong) goto _output_error; } + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3i : decompress too large input : ", testNb++); + { size_t const r = ZSTD_decompress(decodedBuffer, CNBuffSize, compressedBuffer, compressedBufferSize); + if (!ZSTD_isError(r)) goto _output_error; + if (ZSTD_getErrorCode(r) != ZSTD_error_srcSize_wrong) goto _output_error; } + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3i : decompress into NULL buffer : ", testNb++); + { size_t const r = ZSTD_decompress(NULL, 0, compressedBuffer, compressedBufferSize); + if (!ZSTD_isError(r)) goto _output_error; + if (ZSTD_getErrorCode(r) != ZSTD_error_dstSize_tooSmall) goto _output_error; } + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3i : ZSTD_decompressBound test with content size missing : ", testNb++); + { /* create compressed buffer with content size missing */ + ZSTD_CCtx* const cctx = ZSTD_createCCtx(); + CHECK_Z( ZSTD_CCtx_setParameter(cctx, ZSTD_c_contentSizeFlag, 0) ); + CHECK_VAR(cSize, ZSTD_compress2(cctx, + compressedBuffer, compressedBufferSize, + CNBuffer, CNBuffSize) ); + ZSTD_freeCCtx(cctx); + } + { /* ensure frame content size is missing */ + ZSTD_frameHeader zfh; + size_t const ret = ZSTD_getFrameHeader(&zfh, compressedBuffer, compressedBufferSize); + if (ret != 0 || zfh.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN) goto _output_error; + } + { /* ensure CNBuffSize <= decompressBound */ + unsigned long long const bound = ZSTD_decompressBound(compressedBuffer, compressedBufferSize); + if (CNBuffSize > bound) goto _output_error; + } + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3d: check DCtx size is reduced after many oversized calls : ", testNb++); + { + size_t const largeFrameSrcSize = 200; + size_t const smallFrameSrcSize = 10; + size_t const nbFrames = 256; + + size_t i = 0, consumed = 0, produced = 0, prevDCtxSize = 0; + int sizeReduced = 0; + + BYTE* const dst = (BYTE*)compressedBuffer; + ZSTD_DCtx* dctx = ZSTD_createDCtx(); + + /* create a large frame and then a bunch of small frames */ + size_t srcSize = ZSTD_compress((void*)dst, + compressedBufferSize, CNBuffer, largeFrameSrcSize, 3); + for (i = 0; i < nbFrames; i++) + srcSize += ZSTD_compress((void*)(dst + srcSize), + compressedBufferSize - srcSize, CNBuffer, + smallFrameSrcSize, 3); + + /* decompressStream and make sure that dctx size was reduced at least once */ + while (consumed < srcSize) { + ZSTD_inBuffer in = {(void*)(dst + consumed), MIN(1, srcSize - consumed), 0}; + ZSTD_outBuffer out = {(BYTE*)CNBuffer + produced, CNBuffSize - produced, 0}; + ZSTD_decompressStream(dctx, &out, &in); + consumed += in.pos; + produced += out.pos; + + /* success! size was reduced from the previous frame */ + if (prevDCtxSize > ZSTD_sizeof_DCtx(dctx)) + sizeReduced = 1; + + prevDCtxSize = ZSTD_sizeof_DCtx(dctx); + } + + assert(sizeReduced); + + ZSTD_freeDCtx(dctx); + } + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3i : ldm fill dict out-of-bounds check", testNb++); + { + ZSTD_CCtx* const cctx = ZSTD_createCCtx(); + + size_t const size = (1U << 10); + size_t const dstCapacity = ZSTD_compressBound(size); + void* dict = (void*)malloc(size); + void* src = (void*)malloc(size); + void* dst = (void*)malloc(dstCapacity); + + RDG_genBuffer(dict, size, 0.5, 0.5, seed); + RDG_genBuffer(src, size, 0.5, 0.5, seed); + + CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_enableLongDistanceMatching, 1)); + assert(!ZSTD_isError(ZSTD_compress_usingDict(cctx, dst, dstCapacity, src, size, dict, size, 3))); + + ZSTD_freeCCtx(cctx); + free(dict); + free(src); + free(dst); + } + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3i : testing dict compression with enableLdm and forceMaxWindow : ", testNb++); + { + ZSTD_CCtx* const cctx = ZSTD_createCCtx(); + ZSTD_DCtx* const dctx = ZSTD_createDCtx(); + void* dict = (void*)malloc(CNBuffSize); + int nbWorkers; + + for (nbWorkers = 0; nbWorkers < 3; ++nbWorkers) { + RDG_genBuffer(dict, CNBuffSize, 0.5, 0.5, seed); + RDG_genBuffer(CNBuffer, CNBuffSize, 0.6, 0.6, seed); + + CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_nbWorkers, nbWorkers)); + CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, 1)); + CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_forceMaxWindow, 1)); + CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_enableLongDistanceMatching, 1)); + CHECK_Z(ZSTD_CCtx_refPrefix(cctx, dict, CNBuffSize)); + cSize = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, CNBuffSize); + CHECK_Z(cSize); + CHECK_Z(ZSTD_decompress_usingDict(dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize, dict, CNBuffSize)); + } + + ZSTD_freeCCtx(cctx); + ZSTD_freeDCtx(dctx); + free(dict); + } + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3i : testing ldm dictionary gets invalidated : ", testNb++); + { + ZSTD_CCtx* const cctx = ZSTD_createCCtx(); + ZSTD_DCtx* const dctx = ZSTD_createDCtx(); + void* dict = (void*)malloc(CNBuffSize); + size_t const kWindowLog = 10; + size_t const kWindowSize = (size_t)1 << kWindowLog; + size_t const dictSize = kWindowSize * 10; + size_t const srcSize1 = kWindowSize / 2; + size_t const srcSize2 = kWindowSize * 10; + + if (CNBuffSize < dictSize) goto _output_error; + + RDG_genBuffer(dict, dictSize, 0.5, 0.5, seed); + RDG_genBuffer(CNBuffer, srcSize1 + srcSize2, 0.5, 0.5, seed); + + /* Enable checksum to verify round trip. */ + CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, 1)); + /* Disable content size to skip single-pass decompression. */ + CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_contentSizeFlag, 0)); + CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_windowLog, (int)kWindowLog)); + CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_enableLongDistanceMatching, 1)); + CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_ldmMinMatch, 32)); + CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_ldmHashRateLog, 1)); + CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_ldmHashLog, 16)); + CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_ldmBucketSizeLog, 3)); + + /* Round trip once with a dictionary. */ + CHECK_Z(ZSTD_CCtx_refPrefix(cctx, dict, dictSize)); + cSize = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, srcSize1); + CHECK_Z(cSize); + CHECK_Z(ZSTD_decompress_usingDict(dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize, dict, dictSize)); + cSize = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, srcSize2); + /* Streaming decompression to catch out of bounds offsets. */ + { + ZSTD_inBuffer in = {compressedBuffer, cSize, 0}; + ZSTD_outBuffer out = {decodedBuffer, CNBuffSize, 0}; + size_t const dSize = ZSTD_decompressStream(dctx, &out, &in); + CHECK_Z(dSize); + if (dSize != 0) goto _output_error; + } + + CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_nbWorkers, 2)); + /* Round trip once with a dictionary. */ + CHECK_Z(ZSTD_CCtx_refPrefix(cctx, dict, dictSize)); + { + ZSTD_inBuffer in = {CNBuffer, srcSize1, 0}; + ZSTD_outBuffer out = {compressedBuffer, compressedBufferSize, 0}; + CHECK_Z(ZSTD_compressStream2(cctx, &out, &in, ZSTD_e_flush)); + CHECK_Z(ZSTD_compressStream2(cctx, &out, &in, ZSTD_e_end)); + cSize = out.pos; + } + CHECK_Z(ZSTD_decompress_usingDict(dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize, dict, dictSize)); + { + ZSTD_inBuffer in = {CNBuffer, srcSize2, 0}; + ZSTD_outBuffer out = {compressedBuffer, compressedBufferSize, 0}; + CHECK_Z(ZSTD_compressStream2(cctx, &out, &in, ZSTD_e_flush)); + CHECK_Z(ZSTD_compressStream2(cctx, &out, &in, ZSTD_e_end)); + cSize = out.pos; + } + /* Streaming decompression to catch out of bounds offsets. */ + { + ZSTD_inBuffer in = {compressedBuffer, cSize, 0}; + ZSTD_outBuffer out = {decodedBuffer, CNBuffSize, 0}; + size_t const dSize = ZSTD_decompressStream(dctx, &out, &in); + CHECK_Z(dSize); + if (dSize != 0) goto _output_error; + } + + ZSTD_freeCCtx(cctx); + ZSTD_freeDCtx(dctx); + free(dict); + } + DISPLAYLEVEL(3, "OK \n"); + + /* Note: this test takes 0.5 seconds to run */ + DISPLAYLEVEL(3, "test%3i : testing refPrefx vs refPrefx + ldm (size comparison) : ", testNb++); + { + /* test a big buffer so that ldm can take effect */ + size_t const size = 100 MB; + int const windowLog = 27; + size_t const dstSize = ZSTD_compressBound(size); + + void* dict = (void*)malloc(size); + void* src = (void*)malloc(size); + void* dst = (void*)malloc(dstSize); + void* recon = (void*)malloc(size); + + size_t refPrefixCompressedSize = 0; + size_t refPrefixLdmComrpessedSize = 0; + size_t reconSize = 0; + + ZSTD_CCtx* const cctx = ZSTD_createCCtx(); + ZSTD_DCtx* const dctx = ZSTD_createDCtx(); + + /* make dict and src the same uncompressible data */ + RDG_genBuffer(src, size, 0, 0, seed); + memcpy(dict, src, size); + assert(!memcmp(dict, src, size)); + + /* set level 1 and windowLog to cover src */ + CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, 1)); + CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_windowLog, windowLog)); + + /* compress on level 1 using just refPrefix and no ldm */ + ZSTD_CCtx_refPrefix(cctx, dict, size); + refPrefixCompressedSize = ZSTD_compress2(cctx, dst, dstSize, src, size); + assert(!ZSTD_isError(refPrefixCompressedSize)); + + /* test round trip just refPrefix */ + ZSTD_DCtx_refPrefix(dctx, dict, size); + reconSize = ZSTD_decompressDCtx(dctx, recon, size, dst, refPrefixCompressedSize); + assert(!ZSTD_isError(reconSize)); + assert(reconSize == size); + assert(!memcmp(recon, src, size)); + + /* compress on level 1 using refPrefix and ldm */ + ZSTD_CCtx_refPrefix(cctx, dict, size);; + CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_enableLongDistanceMatching, 1)) + refPrefixLdmComrpessedSize = ZSTD_compress2(cctx, dst, dstSize, src, size); + assert(!ZSTD_isError(refPrefixLdmComrpessedSize)); + + /* test round trip refPrefix + ldm*/ + ZSTD_DCtx_refPrefix(dctx, dict, size); + reconSize = ZSTD_decompressDCtx(dctx, recon, size, dst, refPrefixLdmComrpessedSize); + assert(!ZSTD_isError(reconSize)); + assert(reconSize == size); + assert(!memcmp(recon, src, size)); + + /* make sure that refPrefixCompressedSize is significantly greater */ + assert(refPrefixCompressedSize > 10 * refPrefixLdmComrpessedSize); + /* make sure the ldm comrpessed size is less than 1% of original */ + assert((double)refPrefixLdmComrpessedSize / (double)size < 0.01); + + ZSTD_freeDCtx(dctx); + ZSTD_freeCCtx(cctx); + free(recon); + free(dict); + free(src); + free(dst); + } + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3d: superblock uncompressible data, too many nocompress superblocks : ", testNb++); + { + ZSTD_CCtx* const cctx = ZSTD_createCCtx(); + const BYTE* src = (BYTE*)CNBuffer; BYTE* dst = (BYTE*)compressedBuffer; + size_t srcSize = 321656; size_t dstCapacity = ZSTD_compressBound(srcSize); + + /* This is the number of bytes to stream before ending. This value + * was obtained by trial and error :/. */ + + const size_t streamCompressThreshold = 161792; + const size_t streamCompressDelta = 1024; + + /* The first 1/5 of the buffer is compressible and the last 4/5 is + * uncompressible. This is an approximation of the type of data + * the fuzzer generated to catch this bug. Streams like this were making + * zstd generate noCompress superblocks (which are larger than the src + * they come from). Do this enough times, and we'll run out of room + * and throw a dstSize_tooSmall error. */ + + const size_t compressiblePartSize = srcSize/5; + const size_t uncompressiblePartSize = srcSize-compressiblePartSize; + RDG_genBuffer(CNBuffer, compressiblePartSize, 0.5, 0.5, seed); + RDG_genBuffer((BYTE*)CNBuffer+compressiblePartSize, uncompressiblePartSize, 0, 0, seed); + + /* Setting target block size so that superblock is used */ + + assert(cctx != NULL); + ZSTD_CCtx_setParameter(cctx, ZSTD_c_targetCBlockSize, 81); + + { size_t read; + for (read = 0; read < streamCompressThreshold; read += streamCompressDelta) { + ZSTD_inBuffer in = {src, streamCompressDelta, 0}; + ZSTD_outBuffer out = {dst, dstCapacity, 0}; + CHECK_Z(ZSTD_compressStream2(cctx, &out, &in, ZSTD_e_continue)); + CHECK_Z(ZSTD_compressStream2(cctx, &out, &in, ZSTD_e_end)); + src += streamCompressDelta; srcSize -= streamCompressDelta; + dst += out.pos; dstCapacity -= out.pos;}} + + /* This is trying to catch a dstSize_tooSmall error */ + + { ZSTD_inBuffer in = {src, srcSize, 0}; + ZSTD_outBuffer out = {dst, dstCapacity, 0}; + CHECK_Z(ZSTD_compressStream2(cctx, &out, &in, ZSTD_e_end));} + ZSTD_freeCCtx(cctx); + } + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3d: superblock with no literals : ", testNb++); + /* Generate the same data 20 times over */ + { + size_t const avgChunkSize = CNBuffSize / 20; + size_t b; + for (b = 0; b < CNBuffSize; b += avgChunkSize) { + size_t const chunkSize = MIN(CNBuffSize - b, avgChunkSize); + RDG_genBuffer((char*)CNBuffer + b, chunkSize, compressibility, 0. /* auto */, seed); + } + } + { + ZSTD_CCtx* const cctx = ZSTD_createCCtx(); + size_t const normalCSize = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, CNBuffSize); + size_t const allowedExpansion = (CNBuffSize * 3 / 1000); + size_t superCSize; + CHECK_Z(normalCSize); + ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, 19); + ZSTD_CCtx_setParameter(cctx, ZSTD_c_targetCBlockSize, 1000); + superCSize = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, CNBuffSize); + CHECK_Z(superCSize); + if (superCSize > normalCSize + allowedExpansion) { + DISPLAYLEVEL(1, "Superblock too big: %u > %u + %u \n", (U32)superCSize, (U32)normalCSize, (U32)allowedExpansion); + goto _output_error; + } + ZSTD_freeCCtx(cctx); + } + DISPLAYLEVEL(3, "OK \n"); + + RDG_genBuffer(CNBuffer, CNBuffSize, compressibility, 0. /*auto*/, seed); + DISPLAYLEVEL(3, "test%3d: superblock enough room for checksum : ", testNb++) + { + /* This tests whether or not we leave enough room for the checksum at the end + * of the dst buffer. The bug that motivated this test was found by the + * stream_round_trip fuzzer but this crashes for the same reason and is + * far more compact than re-creating the stream_round_trip fuzzer's code path */ + ZSTD_CCtx *cctx = ZSTD_createCCtx(); + ZSTD_CCtx_setParameter(cctx, ZSTD_c_targetCBlockSize, 64); + assert(!ZSTD_isError(ZSTD_compress2(cctx, compressedBuffer, 1339, CNBuffer, 1278))); + ZSTD_freeCCtx(cctx); + } + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3i : compress a NULL input with each level : ", testNb++); + { int level = -1; + ZSTD_CCtx* cctx = ZSTD_createCCtx(); + if (!cctx) goto _output_error; + for (level = -1; level <= ZSTD_maxCLevel(); ++level) { + CHECK_Z( ZSTD_compress(compressedBuffer, compressedBufferSize, NULL, 0, level) ); + CHECK_Z( ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, level) ); + CHECK_Z( ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, NULL, 0) ); + } + ZSTD_freeCCtx(cctx); + } + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3d : check CCtx size after compressing empty input : ", testNb++); + { ZSTD_CCtx* const cctx = ZSTD_createCCtx(); + size_t const r = ZSTD_compressCCtx(cctx, compressedBuffer, compressedBufferSize, NULL, 0, 19); + if (ZSTD_isError(r)) goto _output_error; + if (ZSTD_sizeof_CCtx(cctx) > (1U << 20)) goto _output_error; + ZSTD_freeCCtx(cctx); + cSize = r; + } + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3d : decompress empty frame into NULL : ", testNb++); + { size_t const r = ZSTD_decompress(NULL, 0, compressedBuffer, cSize); + if (ZSTD_isError(r)) goto _output_error; + if (r != 0) goto _output_error; + } + { ZSTD_CCtx* const cctx = ZSTD_createCCtx(); + ZSTD_outBuffer output; + if (cctx==NULL) goto _output_error; + output.dst = compressedBuffer; + output.size = compressedBufferSize; + output.pos = 0; + CHECK_Z( ZSTD_initCStream(cctx, 1) ); /* content size unknown */ + CHECK_Z( ZSTD_flushStream(cctx, &output) ); /* ensure no possibility to "concatenate" and determine the content size */ + CHECK_Z( ZSTD_endStream(cctx, &output) ); + ZSTD_freeCCtx(cctx); + /* single scan decompression */ + { size_t const r = ZSTD_decompress(NULL, 0, compressedBuffer, output.pos); + if (ZSTD_isError(r)) goto _output_error; + if (r != 0) goto _output_error; + } + /* streaming decompression */ + { ZSTD_DCtx* const dstream = ZSTD_createDStream(); + ZSTD_inBuffer dinput; + ZSTD_outBuffer doutput; + size_t ipos; + if (dstream==NULL) goto _output_error; + dinput.src = compressedBuffer; + dinput.size = 0; + dinput.pos = 0; + doutput.dst = NULL; + doutput.size = 0; + doutput.pos = 0; + CHECK_Z ( ZSTD_initDStream(dstream) ); + for (ipos=1; ipos<=output.pos; ipos++) { + dinput.size = ipos; + CHECK_Z ( ZSTD_decompressStream(dstream, &doutput, &dinput) ); + } + if (doutput.pos != 0) goto _output_error; + ZSTD_freeDStream(dstream); + } + } + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3d : re-use CCtx with expanding block size : ", testNb++); + { ZSTD_CCtx* const cctx = ZSTD_createCCtx(); + ZSTD_parameters const params = ZSTD_getParams(1, ZSTD_CONTENTSIZE_UNKNOWN, 0); + assert(params.fParams.contentSizeFlag == 1); /* block size will be adapted if pledgedSrcSize is enabled */ + CHECK_Z( ZSTD_compressBegin_advanced(cctx, NULL, 0, params, 1 /*pledgedSrcSize*/) ); + CHECK_Z( ZSTD_compressEnd(cctx, compressedBuffer, compressedBufferSize, CNBuffer, 1) ); /* creates a block size of 1 */ + + CHECK_Z( ZSTD_compressBegin_advanced(cctx, NULL, 0, params, ZSTD_CONTENTSIZE_UNKNOWN) ); /* re-use same parameters */ + { size_t const inSize = 2* 128 KB; + size_t const outSize = ZSTD_compressBound(inSize); + CHECK_Z( ZSTD_compressEnd(cctx, compressedBuffer, outSize, CNBuffer, inSize) ); + /* will fail if blockSize is not resized */ + } + ZSTD_freeCCtx(cctx); + } + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3d : re-using a CCtx should compress the same : ", testNb++); + { size_t const sampleSize = 30; + int i; + for (i=0; i<20; i++) + ((char*)CNBuffer)[i] = (char)i; /* ensure no match during initial section */ + memcpy((char*)CNBuffer + 20, CNBuffer, 10); /* create one match, starting from beginning of sample, which is the difficult case (see #1241) */ + for (i=1; i<=19; i++) { + ZSTD_CCtx* const cctx = ZSTD_createCCtx(); + size_t size1, size2; + DISPLAYLEVEL(5, "l%i ", i); + size1 = ZSTD_compressCCtx(cctx, compressedBuffer, compressedBufferSize, CNBuffer, sampleSize, i); + CHECK_Z(size1); + + size2 = ZSTD_compressCCtx(cctx, compressedBuffer, compressedBufferSize, CNBuffer, sampleSize, i); + CHECK_Z(size2); + CHECK_EQ(size1, size2); + + CHECK_Z( ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, i) ); + size2 = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, sampleSize); + CHECK_Z(size2); + CHECK_EQ(size1, size2); + + size2 = ZSTD_compress2(cctx, compressedBuffer, ZSTD_compressBound(sampleSize) - 1, CNBuffer, sampleSize); /* force streaming, as output buffer is not large enough to guarantee success */ + CHECK_Z(size2); + CHECK_EQ(size1, size2); + + { ZSTD_inBuffer inb; + ZSTD_outBuffer outb; + inb.src = CNBuffer; + inb.pos = 0; + inb.size = sampleSize; + outb.dst = compressedBuffer; + outb.pos = 0; + outb.size = ZSTD_compressBound(sampleSize) - 1; /* force streaming, as output buffer is not large enough to guarantee success */ + CHECK_Z( ZSTD_compressStream2(cctx, &outb, &inb, ZSTD_e_end) ); + assert(inb.pos == inb.size); + CHECK_EQ(size1, outb.pos); + } + + ZSTD_freeCCtx(cctx); + } + } + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3d : btultra2 & 1st block : ", testNb++); + { size_t const sampleSize = 1024; + ZSTD_CCtx* const cctx = ZSTD_createCCtx(); + ZSTD_inBuffer inb; + ZSTD_outBuffer outb; + inb.src = CNBuffer; + inb.pos = 0; + inb.size = 0; + outb.dst = compressedBuffer; + outb.pos = 0; + outb.size = compressedBufferSize; + CHECK_Z( ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, ZSTD_maxCLevel()) ); + + inb.size = sampleSize; /* start with something, so that context is already used */ + CHECK_Z( ZSTD_compressStream2(cctx, &outb, &inb, ZSTD_e_end) ); /* will break internal assert if stats_init is not disabled */ + assert(inb.pos == inb.size); + outb.pos = 0; /* cancel output */ + + CHECK_Z( ZSTD_CCtx_setPledgedSrcSize(cctx, sampleSize) ); + inb.size = 4; /* too small size : compression will be skipped */ + inb.pos = 0; + CHECK_Z( ZSTD_compressStream2(cctx, &outb, &inb, ZSTD_e_flush) ); + assert(inb.pos == inb.size); + + inb.size += 5; /* too small size : compression will be skipped */ + CHECK_Z( ZSTD_compressStream2(cctx, &outb, &inb, ZSTD_e_flush) ); + assert(inb.pos == inb.size); + + inb.size += 11; /* small enough to attempt compression */ + CHECK_Z( ZSTD_compressStream2(cctx, &outb, &inb, ZSTD_e_flush) ); + assert(inb.pos == inb.size); + + assert(inb.pos < sampleSize); + inb.size = sampleSize; /* large enough to trigger stats_init, but no longer at beginning */ + CHECK_Z( ZSTD_compressStream2(cctx, &outb, &inb, ZSTD_e_end) ); /* will break internal assert if stats_init is not disabled */ + assert(inb.pos == inb.size); + ZSTD_freeCCtx(cctx); + } + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3d : ZSTD_CCtx_getParameter() : ", testNb++); + { ZSTD_CCtx* const cctx = ZSTD_createCCtx(); + ZSTD_outBuffer out = {NULL, 0, 0}; + ZSTD_inBuffer in = {NULL, 0, 0}; + int value; + + CHECK_Z(ZSTD_CCtx_getParameter(cctx, ZSTD_c_compressionLevel, &value)); + CHECK_EQ(value, 3); + CHECK_Z(ZSTD_CCtx_getParameter(cctx, ZSTD_c_hashLog, &value)); + CHECK_EQ(value, 0); + CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_hashLog, ZSTD_HASHLOG_MIN)); + CHECK_Z(ZSTD_CCtx_getParameter(cctx, ZSTD_c_compressionLevel, &value)); + CHECK_EQ(value, 3); + CHECK_Z(ZSTD_CCtx_getParameter(cctx, ZSTD_c_hashLog, &value)); + CHECK_EQ(value, ZSTD_HASHLOG_MIN); + CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, 7)); + CHECK_Z(ZSTD_CCtx_getParameter(cctx, ZSTD_c_compressionLevel, &value)); + CHECK_EQ(value, 7); + CHECK_Z(ZSTD_CCtx_getParameter(cctx, ZSTD_c_hashLog, &value)); + CHECK_EQ(value, ZSTD_HASHLOG_MIN); + /* Start a compression job */ + ZSTD_compressStream2(cctx, &out, &in, ZSTD_e_continue); + CHECK_Z(ZSTD_CCtx_getParameter(cctx, ZSTD_c_compressionLevel, &value)); + CHECK_EQ(value, 7); + CHECK_Z(ZSTD_CCtx_getParameter(cctx, ZSTD_c_hashLog, &value)); + CHECK_EQ(value, ZSTD_HASHLOG_MIN); + /* Reset the CCtx */ + ZSTD_CCtx_reset(cctx, ZSTD_reset_session_only); + CHECK_Z(ZSTD_CCtx_getParameter(cctx, ZSTD_c_compressionLevel, &value)); + CHECK_EQ(value, 7); + CHECK_Z(ZSTD_CCtx_getParameter(cctx, ZSTD_c_hashLog, &value)); + CHECK_EQ(value, ZSTD_HASHLOG_MIN); + /* Reset the parameters */ + ZSTD_CCtx_reset(cctx, ZSTD_reset_parameters); + CHECK_Z(ZSTD_CCtx_getParameter(cctx, ZSTD_c_compressionLevel, &value)); + CHECK_EQ(value, 3); + CHECK_Z(ZSTD_CCtx_getParameter(cctx, ZSTD_c_hashLog, &value)); + CHECK_EQ(value, 0); + + ZSTD_freeCCtx(cctx); + } + DISPLAYLEVEL(3, "OK \n"); + + /* this test is really too long, and should be made faster */ + DISPLAYLEVEL(3, "test%3d : overflow protection with large windowLog : ", testNb++); + { ZSTD_CCtx* const cctx = ZSTD_createCCtx(); + ZSTD_parameters params = ZSTD_getParams(-999, ZSTD_CONTENTSIZE_UNKNOWN, 0); + size_t const nbCompressions = ((1U << 31) / CNBuffSize) + 2; /* ensure U32 overflow protection is triggered */ + size_t cnb; + assert(cctx != NULL); + params.fParams.contentSizeFlag = 0; + params.cParams.windowLog = ZSTD_WINDOWLOG_MAX; + for (cnb = 0; cnb < nbCompressions; ++cnb) { + DISPLAYLEVEL(6, "run %zu / %zu \n", cnb, nbCompressions); + CHECK_Z( ZSTD_compressBegin_advanced(cctx, NULL, 0, params, ZSTD_CONTENTSIZE_UNKNOWN) ); /* re-use same parameters */ + CHECK_Z( ZSTD_compressEnd(cctx, compressedBuffer, compressedBufferSize, CNBuffer, CNBuffSize) ); + } + ZSTD_freeCCtx(cctx); + } + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3d : size down context : ", testNb++); + { ZSTD_CCtx* const largeCCtx = ZSTD_createCCtx(); + assert(largeCCtx != NULL); + CHECK_Z( ZSTD_compressBegin(largeCCtx, 19) ); /* streaming implies ZSTD_CONTENTSIZE_UNKNOWN, which maximizes memory usage */ + CHECK_Z( ZSTD_compressEnd(largeCCtx, compressedBuffer, compressedBufferSize, CNBuffer, 1) ); + { size_t const largeCCtxSize = ZSTD_sizeof_CCtx(largeCCtx); /* size of context must be measured after compression */ + { ZSTD_CCtx* const smallCCtx = ZSTD_createCCtx(); + assert(smallCCtx != NULL); + CHECK_Z(ZSTD_compressCCtx(smallCCtx, compressedBuffer, compressedBufferSize, CNBuffer, 1, 1)); + { size_t const smallCCtxSize = ZSTD_sizeof_CCtx(smallCCtx); + DISPLAYLEVEL(5, "(large) %zuKB > 32*%zuKB (small) : ", + largeCCtxSize>>10, smallCCtxSize>>10); + assert(largeCCtxSize > 32* smallCCtxSize); /* note : "too large" definition is handled within zstd_compress.c . + * make this test case extreme, so that it doesn't depend on a possibly fluctuating definition */ + } + ZSTD_freeCCtx(smallCCtx); + } + { U32 const maxNbAttempts = 1100; /* nb of usages before triggering size down is handled within zstd_compress.c. + * currently defined as 128x, but could be adjusted in the future. + * make this test long enough so that it's not too much tied to the current definition within zstd_compress.c */ + unsigned u; + for (u=0; u<maxNbAttempts; u++) { + CHECK_Z(ZSTD_compressCCtx(largeCCtx, compressedBuffer, compressedBufferSize, CNBuffer, 1, 1)); + if (ZSTD_sizeof_CCtx(largeCCtx) < largeCCtxSize) break; /* sized down */ + } + DISPLAYLEVEL(5, "size down after %u attempts : ", u); + if (u==maxNbAttempts) goto _output_error; /* no sizedown happened */ + } + } + ZSTD_freeCCtx(largeCCtx); + } + DISPLAYLEVEL(3, "OK \n"); + + /* Static CCtx tests */ +#define STATIC_CCTX_LEVEL 4 + DISPLAYLEVEL(3, "test%3i : create static CCtx for level %u : ", testNb++, STATIC_CCTX_LEVEL); + { size_t const staticCStreamSize = ZSTD_estimateCStreamSize(STATIC_CCTX_LEVEL); + void* const staticCCtxBuffer = malloc(staticCStreamSize); + size_t const staticDCtxSize = ZSTD_estimateDCtxSize(); + void* const staticDCtxBuffer = malloc(staticDCtxSize); + DISPLAYLEVEL(4, "CStream size = %u, ", (U32)staticCStreamSize); + if (staticCCtxBuffer==NULL || staticDCtxBuffer==NULL) { + free(staticCCtxBuffer); + free(staticDCtxBuffer); + DISPLAY("Not enough memory, aborting\n"); + testResult = 1; + goto _end; + } + { size_t const smallInSize = 32 KB; + ZSTD_compressionParameters const cparams_small = ZSTD_getCParams(STATIC_CCTX_LEVEL, smallInSize, 0); + size_t const smallCCtxSize = ZSTD_estimateCCtxSize_usingCParams(cparams_small); + size_t const staticCCtxSize = ZSTD_estimateCCtxSize(STATIC_CCTX_LEVEL); + ZSTD_CCtx* staticCCtx = ZSTD_initStaticCCtx(staticCCtxBuffer, smallCCtxSize); + ZSTD_DCtx* const staticDCtx = ZSTD_initStaticDCtx(staticDCtxBuffer, staticDCtxSize); + DISPLAYLEVEL(4, "Full CCtx size = %u, ", (U32)staticCCtxSize); + DISPLAYLEVEL(4, "CCtx for 32 KB = %u, ", (U32)smallCCtxSize); + if ((staticCCtx==NULL) || (staticDCtx==NULL)) goto _output_error; + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3i : compress small input with small static CCtx : ", testNb++); + CHECK_VAR(cSize, ZSTD_compressCCtx(staticCCtx, + compressedBuffer, compressedBufferSize, + CNBuffer, smallInSize, STATIC_CCTX_LEVEL) ); + DISPLAYLEVEL(3, "OK (%u bytes : %.2f%%)\n", + (unsigned)cSize, (double)cSize/smallInSize*100); + + DISPLAYLEVEL(3, "test%3i : compress large input with small static CCtx (must fail) : ", testNb++); + { size_t const r = ZSTD_compressCCtx(staticCCtx, + compressedBuffer, compressedBufferSize, + CNBuffer, CNBuffSize, STATIC_CCTX_LEVEL); + if (ZSTD_getErrorCode((size_t)r) != ZSTD_error_memory_allocation) goto _output_error; + } + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3i : resize context to full CCtx size : ", testNb++); + staticCCtx = ZSTD_initStaticCStream(staticCCtxBuffer, staticCCtxSize); + DISPLAYLEVEL(4, "staticCCtxBuffer = %p, staticCCtx = %p , ", staticCCtxBuffer, staticCCtx); + if (staticCCtx == NULL) goto _output_error; + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3i : compress large input with static CCtx : ", testNb++); + CHECK_VAR(cSize, ZSTD_compressCCtx(staticCCtx, + compressedBuffer, compressedBufferSize, + CNBuffer, CNBuffSize, STATIC_CCTX_LEVEL) ); + DISPLAYLEVEL(3, "OK (%u bytes : %.2f%%)\n", + (unsigned)cSize, (double)cSize/CNBuffSize*100); + + DISPLAYLEVEL(3, "test%3i : compress small input often enough to trigger context reduce : ", testNb++); + { int nbc; + assert(staticCCtxSize > smallCCtxSize * ZSTD_WORKSPACETOOLARGE_FACTOR); /* ensure size down scenario */ + assert(CNBuffSize > smallInSize + ZSTD_WORKSPACETOOLARGE_MAXDURATION + 3); + for (nbc=0; nbc<ZSTD_WORKSPACETOOLARGE_MAXDURATION+2; nbc++) { + CHECK_Z(ZSTD_compressCCtx(staticCCtx, + compressedBuffer, compressedBufferSize, + (char*)CNBuffer + nbc, smallInSize, + STATIC_CCTX_LEVEL) ); + } } + DISPLAYLEVEL(3, "OK \n") + + DISPLAYLEVEL(3, "test%3i : init CCtx for level %u : ", testNb++, STATIC_CCTX_LEVEL); + CHECK_Z( ZSTD_compressBegin(staticCCtx, STATIC_CCTX_LEVEL) ); + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3i : compression again with static CCtx : ", testNb++); + CHECK_VAR(cSize, ZSTD_compressCCtx(staticCCtx, + compressedBuffer, compressedBufferSize, + CNBuffer, CNBuffSize, STATIC_CCTX_LEVEL) ); + DISPLAYLEVEL(3, "OK (%u bytes : %.2f%%)\n", + (unsigned)cSize, (double)cSize/CNBuffSize*100); + + DISPLAYLEVEL(3, "test%3i : simple decompression test with static DCtx : ", testNb++); + { size_t const r = ZSTD_decompressDCtx(staticDCtx, + decodedBuffer, CNBuffSize, + compressedBuffer, cSize); + if (r != CNBuffSize) goto _output_error; } + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3i : check decompressed result : ", testNb++); + if (memcmp(decodedBuffer, CNBuffer, CNBuffSize)) goto _output_error; + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3i : init CCtx for too large level (must fail) : ", testNb++); + { size_t const r = ZSTD_compressBegin(staticCCtx, ZSTD_maxCLevel()); + if (!ZSTD_isError(r)) goto _output_error; } + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3i : init CCtx for small level %u (should work again) : ", testNb++, 1); + CHECK( ZSTD_compressBegin(staticCCtx, 1) ); + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3i : use CStream on CCtx-sized static context (should fail) : ", testNb++); + CHECK_Z( ZSTD_initCStream(staticCCtx, STATIC_CCTX_LEVEL) ); /* note : doesn't allocate */ + { ZSTD_outBuffer output = { compressedBuffer, compressedBufferSize, 0 }; + ZSTD_inBuffer input = { CNBuffer, CNBuffSize, 0 }; + size_t const r = ZSTD_compressStream(staticCCtx, &output, &input); /* now allocates, should fail */ + if (!ZSTD_isError(r)) goto _output_error; + } + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3i : resize context to CStream size, then stream compress : ", testNb++); + staticCCtx = ZSTD_initStaticCStream(staticCCtxBuffer, staticCStreamSize); + assert(staticCCtx != NULL); + CHECK_Z( ZSTD_initCStream(staticCCtx, STATIC_CCTX_LEVEL) ); /* note : doesn't allocate */ + { ZSTD_outBuffer output = { compressedBuffer, compressedBufferSize, 0 }; + ZSTD_inBuffer input = { CNBuffer, CNBuffSize, 0 }; + CHECK_Z( ZSTD_compressStream(staticCCtx, &output, &input) ); + } + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3i : CStream for small level %u : ", testNb++, 1); + CHECK_Z( ZSTD_initCStream(staticCCtx, 1) ); /* note : doesn't allocate */ + { ZSTD_outBuffer output = { compressedBuffer, compressedBufferSize, 0 }; + ZSTD_inBuffer input = { CNBuffer, CNBuffSize, 0 }; + CHECK_Z( ZSTD_compressStream(staticCCtx, &output, &input) ); + } + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3i : init static CStream with dictionary (should fail) : ", testNb++); + { size_t const r = ZSTD_initCStream_usingDict(staticCCtx, CNBuffer, 64 KB, 1); + if (!ZSTD_isError(r)) goto _output_error; } + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3i : use DStream on DCtx-sized static context (should fail) : ", testNb++); + CHECK_Z( ZSTD_initDStream(staticDCtx) ); + { ZSTD_outBuffer output = { decodedBuffer, CNBuffSize, 0 }; + ZSTD_inBuffer input = { compressedBuffer, ZSTD_FRAMEHEADERSIZE_MAX+1, 0 }; + size_t const r = ZSTD_decompressStream(staticDCtx, &output, &input); + if (!ZSTD_isError(r)) goto _output_error; + } + DISPLAYLEVEL(3, "OK \n"); + } + free(staticCCtxBuffer); + free(staticDCtxBuffer); + } + + DISPLAYLEVEL(3, "test%3i : Static context sizes for negative levels : ", testNb++); + { size_t const cctxSizeN1 = ZSTD_estimateCCtxSize(-1); + size_t const cctxSizeP1 = ZSTD_estimateCCtxSize(1); + size_t const cstreamSizeN1 = ZSTD_estimateCStreamSize(-1); + size_t const cstreamSizeP1 = ZSTD_estimateCStreamSize(1); + + if (!(0 < cctxSizeN1 && cctxSizeN1 <= cctxSizeP1)) goto _output_error; + if (!(0 < cstreamSizeN1 && cstreamSizeN1 <= cstreamSizeP1)) goto _output_error; + } + DISPLAYLEVEL(3, "OK \n"); + + + /* ZSTDMT simple MT compression test */ + DISPLAYLEVEL(3, "test%3i : create ZSTDMT CCtx : ", testNb++); + { ZSTDMT_CCtx* const mtctx = ZSTDMT_createCCtx(2); + if (mtctx==NULL) { + DISPLAY("mtctx : not enough memory, aborting \n"); + testResult = 1; + goto _end; + } + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3u : compress %u bytes with 2 threads : ", testNb++, (unsigned)CNBuffSize); + CHECK_VAR(cSize, ZSTDMT_compressCCtx(mtctx, + compressedBuffer, compressedBufferSize, + CNBuffer, CNBuffSize, + 1) ); + DISPLAYLEVEL(3, "OK (%u bytes : %.2f%%)\n", (unsigned)cSize, (double)cSize/CNBuffSize*100); + + DISPLAYLEVEL(3, "test%3i : decompressed size test : ", testNb++); + { unsigned long long const rSize = ZSTD_getFrameContentSize(compressedBuffer, cSize); + if (rSize != CNBuffSize) { + DISPLAY("ZSTD_getFrameContentSize incorrect : %u != %u \n", (unsigned)rSize, (unsigned)CNBuffSize); + goto _output_error; + } } + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3i : decompress %u bytes : ", testNb++, (unsigned)CNBuffSize); + { size_t const r = ZSTD_decompress(decodedBuffer, CNBuffSize, compressedBuffer, cSize); + if (r != CNBuffSize) goto _output_error; } + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3i : check decompressed result : ", testNb++); + { size_t u; + for (u=0; u<CNBuffSize; u++) { + if (((BYTE*)decodedBuffer)[u] != ((BYTE*)CNBuffer)[u]) goto _output_error; + } } + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3i : compress -T2 with checksum : ", testNb++); + { ZSTD_parameters params = ZSTD_getParams(1, CNBuffSize, 0); + params.fParams.checksumFlag = 1; + params.fParams.contentSizeFlag = 1; + CHECK_VAR(cSize, ZSTDMT_compress_advanced(mtctx, + compressedBuffer, compressedBufferSize, + CNBuffer, CNBuffSize, + NULL, params, 3 /*overlapRLog*/) ); + } + DISPLAYLEVEL(3, "OK (%u bytes : %.2f%%)\n", (unsigned)cSize, (double)cSize/CNBuffSize*100); + + DISPLAYLEVEL(3, "test%3i : decompress %u bytes : ", testNb++, (unsigned)CNBuffSize); + { size_t const r = ZSTD_decompress(decodedBuffer, CNBuffSize, compressedBuffer, cSize); + if (r != CNBuffSize) goto _output_error; } + DISPLAYLEVEL(3, "OK \n"); + + ZSTDMT_freeCCtx(mtctx); + } + + DISPLAYLEVEL(3, "test%3u : compress empty string and decompress with small window log : ", testNb++); + { ZSTD_CCtx* const cctx = ZSTD_createCCtx(); + ZSTD_DCtx* const dctx = ZSTD_createDCtx(); + char out[32]; + if (cctx == NULL || dctx == NULL) goto _output_error; + CHECK( ZSTD_CCtx_setParameter(cctx, ZSTD_c_contentSizeFlag, 0) ); + CHECK_VAR(cSize, ZSTD_compress2(cctx, out, sizeof(out), NULL, 0) ); + DISPLAYLEVEL(3, "OK (%u bytes)\n", (unsigned)cSize); + + CHECK( ZSTD_DCtx_setParameter(dctx, ZSTD_d_windowLogMax, 10) ); + { char const* outPtr = out; + ZSTD_inBuffer inBuffer = { outPtr, cSize, 0 }; + ZSTD_outBuffer outBuffer = { NULL, 0, 0 }; + size_t dSize; + CHECK_VAR(dSize, ZSTD_decompressStream(dctx, &outBuffer, &inBuffer) ); + if (dSize != 0) goto _output_error; + } + + ZSTD_freeDCtx(dctx); + ZSTD_freeCCtx(cctx); + } + + DISPLAYLEVEL(3, "test%3i : compress -T2 with/without literals compression : ", testNb++) + { ZSTD_CCtx* cctx = ZSTD_createCCtx(); + size_t cSize1, cSize2; + CHECK( ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, 1) ); + CHECK( ZSTD_CCtx_setParameter(cctx, ZSTD_c_nbWorkers, 2) ); + cSize1 = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, CNBuffSize); + CHECK(cSize1); + CHECK( ZSTD_CCtx_setParameter(cctx, ZSTD_c_literalCompressionMode, ZSTD_lcm_uncompressed) ); + cSize2 = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, CNBuffSize); + CHECK(cSize2); + CHECK_LT(cSize1, cSize2); + ZSTD_freeCCtx(cctx); + } + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3i : Multithreaded ZSTD_compress2() with rsyncable : ", testNb++) + { ZSTD_CCtx* cctx = ZSTD_createCCtx(); + /* Set rsyncable and don't give the ZSTD_compressBound(CNBuffSize) so + * ZSTDMT is forced to not take the shortcut. + */ + CHECK( ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, 1) ); + CHECK( ZSTD_CCtx_setParameter(cctx, ZSTD_c_nbWorkers, 1) ); + CHECK( ZSTD_CCtx_setParameter(cctx, ZSTD_c_rsyncable, 1) ); + CHECK( ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize - 1, CNBuffer, CNBuffSize) ); + ZSTD_freeCCtx(cctx); + } + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3i : setting multithreaded parameters : ", testNb++) + { ZSTD_CCtx_params* params = ZSTD_createCCtxParams(); + int value; + /* Check that the overlap log and job size are unset. */ + CHECK( ZSTD_CCtxParams_getParameter(params, ZSTD_c_overlapLog, &value) ); + CHECK_EQ(value, 0); + CHECK( ZSTD_CCtxParams_getParameter(params, ZSTD_c_jobSize, &value) ); + CHECK_EQ(value, 0); + /* Set and check the overlap log and job size. */ + CHECK( ZSTD_CCtxParams_setParameter(params, ZSTD_c_overlapLog, 5) ); + CHECK( ZSTD_CCtxParams_setParameter(params, ZSTD_c_jobSize, 2 MB) ); + CHECK( ZSTD_CCtxParams_getParameter(params, ZSTD_c_overlapLog, &value) ); + CHECK_EQ(value, 5); + CHECK( ZSTD_CCtxParams_getParameter(params, ZSTD_c_jobSize, &value) ); + CHECK_EQ(value, 2 MB); + /* Set the number of workers and check the overlap log and job size. */ + CHECK( ZSTD_CCtxParams_setParameter(params, ZSTD_c_nbWorkers, 2) ); + CHECK( ZSTD_CCtxParams_getParameter(params, ZSTD_c_overlapLog, &value) ); + CHECK_EQ(value, 5); + CHECK( ZSTD_CCtxParams_getParameter(params, ZSTD_c_jobSize, &value) ); + CHECK_EQ(value, 2 MB); + ZSTD_freeCCtxParams(params); + + } + DISPLAYLEVEL(3, "OK \n"); + + /* Simple API multiframe test */ + DISPLAYLEVEL(3, "test%3i : compress multiple frames : ", testNb++); + { size_t off = 0; + int i; + int const segs = 4; + /* only use the first half so we don't push against size limit of compressedBuffer */ + size_t const segSize = (CNBuffSize / 2) / segs; + for (i = 0; i < segs; i++) { + CHECK_NEWV(r, ZSTD_compress( + (BYTE*)compressedBuffer + off, CNBuffSize - off, + (BYTE*)CNBuffer + segSize * (size_t)i, segSize, + 5) ); + off += r; + if (i == segs/2) { + /* insert skippable frame */ + const U32 skipLen = 129 KB; + MEM_writeLE32((BYTE*)compressedBuffer + off, ZSTD_MAGIC_SKIPPABLE_START); + MEM_writeLE32((BYTE*)compressedBuffer + off + 4, skipLen); + off += skipLen + ZSTD_SKIPPABLEHEADERSIZE; + } + } + cSize = off; + } + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3i : get decompressed size of multiple frames : ", testNb++); + { unsigned long long const r = ZSTD_findDecompressedSize(compressedBuffer, cSize); + if (r != CNBuffSize / 2) goto _output_error; } + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3i : get tight decompressed bound of multiple frames : ", testNb++); + { unsigned long long const bound = ZSTD_decompressBound(compressedBuffer, cSize); + if (bound != CNBuffSize / 2) goto _output_error; } + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3i : decompress multiple frames : ", testNb++); + { CHECK_NEWV(r, ZSTD_decompress(decodedBuffer, CNBuffSize, compressedBuffer, cSize)); + if (r != CNBuffSize / 2) goto _output_error; } + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3i : check decompressed result : ", testNb++); + if (memcmp(decodedBuffer, CNBuffer, CNBuffSize / 2) != 0) goto _output_error; + DISPLAYLEVEL(3, "OK \n"); + + /* Dictionary and CCtx Duplication tests */ + { ZSTD_CCtx* const ctxOrig = ZSTD_createCCtx(); + ZSTD_CCtx* const ctxDuplicated = ZSTD_createCCtx(); + ZSTD_DCtx* const dctx = ZSTD_createDCtx(); + static const size_t dictSize = 551; + assert(dctx != NULL); assert(ctxOrig != NULL); assert(ctxDuplicated != NULL); + + DISPLAYLEVEL(3, "test%3i : copy context too soon : ", testNb++); + { size_t const copyResult = ZSTD_copyCCtx(ctxDuplicated, ctxOrig, 0); + if (!ZSTD_isError(copyResult)) goto _output_error; } /* error must be detected */ + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3i : load dictionary into context : ", testNb++); + CHECK( ZSTD_compressBegin_usingDict(ctxOrig, CNBuffer, dictSize, 2) ); + CHECK( ZSTD_copyCCtx(ctxDuplicated, ctxOrig, 0) ); /* Begin_usingDict implies unknown srcSize, so match that */ + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3i : compress with flat dictionary : ", testNb++); + cSize = 0; + CHECKPLUS(r, ZSTD_compressEnd(ctxOrig, + compressedBuffer, compressedBufferSize, + (const char*)CNBuffer + dictSize, CNBuffSize - dictSize), + cSize += r); + DISPLAYLEVEL(3, "OK (%u bytes : %.2f%%)\n", (unsigned)cSize, (double)cSize/CNBuffSize*100); + + DISPLAYLEVEL(3, "test%3i : frame built with flat dictionary should be decompressible : ", testNb++); + CHECKPLUS(r, ZSTD_decompress_usingDict(dctx, + decodedBuffer, CNBuffSize, + compressedBuffer, cSize, + CNBuffer, dictSize), + if (r != CNBuffSize - dictSize) goto _output_error); + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3i : compress with duplicated context : ", testNb++); + { size_t const cSizeOrig = cSize; + cSize = 0; + CHECKPLUS(r, ZSTD_compressEnd(ctxDuplicated, + compressedBuffer, compressedBufferSize, + (const char*)CNBuffer + dictSize, CNBuffSize - dictSize), + cSize += r); + if (cSize != cSizeOrig) goto _output_error; /* should be identical ==> same size */ + } + DISPLAYLEVEL(3, "OK (%u bytes : %.2f%%)\n", (unsigned)cSize, (double)cSize/CNBuffSize*100); + + DISPLAYLEVEL(3, "test%3i : frame built with duplicated context should be decompressible : ", testNb++); + CHECKPLUS(r, ZSTD_decompress_usingDict(dctx, + decodedBuffer, CNBuffSize, + compressedBuffer, cSize, + CNBuffer, dictSize), + if (r != CNBuffSize - dictSize) goto _output_error); + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3i : decompress with DDict : ", testNb++); + { ZSTD_DDict* const ddict = ZSTD_createDDict(CNBuffer, dictSize); + size_t const r = ZSTD_decompress_usingDDict(dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize, ddict); + if (r != CNBuffSize - dictSize) goto _output_error; + DISPLAYLEVEL(3, "OK (size of DDict : %u) \n", (unsigned)ZSTD_sizeof_DDict(ddict)); + ZSTD_freeDDict(ddict); + } + + DISPLAYLEVEL(3, "test%3i : decompress with static DDict : ", testNb++); + { size_t const ddictBufferSize = ZSTD_estimateDDictSize(dictSize, ZSTD_dlm_byCopy); + void* const ddictBuffer = malloc(ddictBufferSize); + if (ddictBuffer == NULL) goto _output_error; + { const ZSTD_DDict* const ddict = ZSTD_initStaticDDict(ddictBuffer, ddictBufferSize, CNBuffer, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto); + size_t const r = ZSTD_decompress_usingDDict(dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize, ddict); + if (r != CNBuffSize - dictSize) goto _output_error; + } + free(ddictBuffer); + DISPLAYLEVEL(3, "OK (size of static DDict : %u) \n", (unsigned)ddictBufferSize); + } + + DISPLAYLEVEL(3, "test%3i : check content size on duplicated context : ", testNb++); + { size_t const testSize = CNBuffSize / 3; + { ZSTD_parameters p = ZSTD_getParams(2, testSize, dictSize); + p.fParams.contentSizeFlag = 1; + CHECK( ZSTD_compressBegin_advanced(ctxOrig, CNBuffer, dictSize, p, testSize-1) ); + } + CHECK( ZSTD_copyCCtx(ctxDuplicated, ctxOrig, testSize) ); + + CHECK_VAR(cSize, ZSTD_compressEnd(ctxDuplicated, compressedBuffer, ZSTD_compressBound(testSize), + (const char*)CNBuffer + dictSize, testSize) ); + { ZSTD_frameHeader zfh; + if (ZSTD_getFrameHeader(&zfh, compressedBuffer, cSize)) goto _output_error; + if ((zfh.frameContentSize != testSize) && (zfh.frameContentSize != 0)) goto _output_error; + } } + DISPLAYLEVEL(3, "OK \n"); + + if ((int)(compressibility * 100 + 0.1) == FUZ_compressibility_default) { /* test only valid with known input */ + size_t const flatdictSize = 22 KB; + size_t const contentSize = 9 KB; + const void* const dict = (const char*)CNBuffer; + const void* const contentStart = (const char*)dict + flatdictSize; + size_t const target_nodict_cSize[22+1] = { 3840, 3770, 3870, 3830, 3770, + 3770, 3770, 3770, 3750, 3750, + 3740, 3670, 3670, 3660, 3660, + 3660, 3660, 3660, 3660, 3660, + 3660, 3660, 3660 }; + size_t const target_wdict_cSize[22+1] = { 2830, 2890, 2890, 2820, 2940, + 2950, 2950, 2920, 2900, 2890, + 2910, 2910, 2910, 2770, 2760, + 2750, 2750, 2750, 2750, 2750, + 2750, 2750, 2750 }; + int l = 1; + int const maxLevel = ZSTD_maxCLevel(); + + DISPLAYLEVEL(3, "test%3i : flat-dictionary efficiency test : \n", testNb++); + assert(maxLevel == 22); + RDG_genBuffer(CNBuffer, flatdictSize + contentSize, compressibility, 0., seed); + DISPLAYLEVEL(4, "content hash : %016llx; dict hash : %016llx \n", XXH64(contentStart, contentSize, 0), XXH64(dict, flatdictSize, 0)); + + for ( ; l <= maxLevel; l++) { + size_t const nodict_cSize = ZSTD_compress(compressedBuffer, compressedBufferSize, + contentStart, contentSize, l); + if (nodict_cSize > target_nodict_cSize[l]) { + DISPLAYLEVEL(1, "error : compression at level %i worse than expected (%u > %u) \n", + l, (unsigned)nodict_cSize, (unsigned)target_nodict_cSize[l]); + goto _output_error; + } + DISPLAYLEVEL(4, "level %i : max expected %u >= reached %u \n", + l, (unsigned)target_nodict_cSize[l], (unsigned)nodict_cSize); + } + for ( l=1 ; l <= maxLevel; l++) { + size_t const wdict_cSize = ZSTD_compress_usingDict(ctxOrig, + compressedBuffer, compressedBufferSize, + contentStart, contentSize, + dict, flatdictSize, + l); + if (wdict_cSize > target_wdict_cSize[l]) { + DISPLAYLEVEL(1, "error : compression with dictionary at level %i worse than expected (%u > %u) \n", + l, (unsigned)wdict_cSize, (unsigned)target_wdict_cSize[l]); + goto _output_error; + } + DISPLAYLEVEL(4, "level %i with dictionary : max expected %u >= reached %u \n", + l, (unsigned)target_wdict_cSize[l], (unsigned)wdict_cSize); + } + + DISPLAYLEVEL(4, "compression efficiency tests OK \n"); + } + + ZSTD_freeCCtx(ctxOrig); + ZSTD_freeCCtx(ctxDuplicated); + ZSTD_freeDCtx(dctx); + } + + /* Dictionary and dictBuilder tests */ + { ZSTD_CCtx* const cctx = ZSTD_createCCtx(); + size_t const dictBufferCapacity = 16 KB; + void* const dictBuffer = malloc(dictBufferCapacity); + size_t const totalSampleSize = 1 MB; + size_t const sampleUnitSize = 8 KB; + U32 const nbSamples = (U32)(totalSampleSize / sampleUnitSize); + size_t* const samplesSizes = (size_t*) malloc(nbSamples * sizeof(size_t)); + size_t dictSize; + U32 dictID; + size_t dictHeaderSize; + + if (dictBuffer==NULL || samplesSizes==NULL) { + free(dictBuffer); + free(samplesSizes); + goto _output_error; + } + + DISPLAYLEVEL(3, "test%3i : dictBuilder on cyclic data : ", testNb++); + assert(compressedBufferSize >= totalSampleSize); + { U32 u; for (u=0; u<totalSampleSize; u++) ((BYTE*)decodedBuffer)[u] = (BYTE)u; } + { U32 u; for (u=0; u<nbSamples; u++) samplesSizes[u] = sampleUnitSize; } + { size_t const sDictSize = ZDICT_trainFromBuffer(dictBuffer, dictBufferCapacity, + decodedBuffer, samplesSizes, nbSamples); + if (ZDICT_isError(sDictSize)) goto _output_error; + DISPLAYLEVEL(3, "OK, created dictionary of size %u \n", (unsigned)sDictSize); + } + + DISPLAYLEVEL(3, "test%3i : dictBuilder : ", testNb++); + { U32 u; for (u=0; u<nbSamples; u++) samplesSizes[u] = sampleUnitSize; } + dictSize = ZDICT_trainFromBuffer(dictBuffer, dictBufferCapacity, + CNBuffer, samplesSizes, nbSamples); + if (ZDICT_isError(dictSize)) goto _output_error; + DISPLAYLEVEL(3, "OK, created dictionary of size %u \n", (unsigned)dictSize); + + DISPLAYLEVEL(3, "test%3i : Multithreaded COVER dictBuilder : ", testNb++); + { U32 u; for (u=0; u<nbSamples; u++) samplesSizes[u] = sampleUnitSize; } + { ZDICT_cover_params_t coverParams; + memset(&coverParams, 0, sizeof(coverParams)); + coverParams.steps = 8; + coverParams.nbThreads = 4; + dictSize = ZDICT_optimizeTrainFromBuffer_cover( + dictBuffer, dictBufferCapacity, + CNBuffer, samplesSizes, nbSamples/8, /* less samples for faster tests */ + &coverParams); + if (ZDICT_isError(dictSize)) goto _output_error; + } + DISPLAYLEVEL(3, "OK, created dictionary of size %u \n", (unsigned)dictSize); + + DISPLAYLEVEL(3, "test%3i : COVER dictBuilder with shrinkDict: ", testNb++); + { U32 u; for (u=0; u<nbSamples; u++) samplesSizes[u] = sampleUnitSize; } + { ZDICT_cover_params_t coverParams; + memset(&coverParams, 0, sizeof(coverParams)); + coverParams.steps = 8; + coverParams.nbThreads = 4; + coverParams.shrinkDict = 1; + coverParams.shrinkDictMaxRegression = 1; + dictSize = ZDICT_optimizeTrainFromBuffer_cover( + dictBuffer, dictBufferCapacity, + CNBuffer, samplesSizes, nbSamples/8, /* less samples for faster tests */ + &coverParams); + if (ZDICT_isError(dictSize)) goto _output_error; + } + DISPLAYLEVEL(3, "OK, created dictionary of size %u \n", (unsigned)dictSize); + + DISPLAYLEVEL(3, "test%3i : Multithreaded FASTCOVER dictBuilder : ", testNb++); + { U32 u; for (u=0; u<nbSamples; u++) samplesSizes[u] = sampleUnitSize; } + { ZDICT_fastCover_params_t fastCoverParams; + memset(&fastCoverParams, 0, sizeof(fastCoverParams)); + fastCoverParams.steps = 8; + fastCoverParams.nbThreads = 4; + dictSize = ZDICT_optimizeTrainFromBuffer_fastCover( + dictBuffer, dictBufferCapacity, + CNBuffer, samplesSizes, nbSamples, + &fastCoverParams); + if (ZDICT_isError(dictSize)) goto _output_error; + } + DISPLAYLEVEL(3, "OK, created dictionary of size %u \n", (unsigned)dictSize); + + DISPLAYLEVEL(3, "test%3i : FASTCOVER dictBuilder with shrinkDict: ", testNb++); + { U32 u; for (u=0; u<nbSamples; u++) samplesSizes[u] = sampleUnitSize; } + { ZDICT_fastCover_params_t fastCoverParams; + memset(&fastCoverParams, 0, sizeof(fastCoverParams)); + fastCoverParams.steps = 8; + fastCoverParams.nbThreads = 4; + fastCoverParams.shrinkDict = 1; + fastCoverParams.shrinkDictMaxRegression = 1; + dictSize = ZDICT_optimizeTrainFromBuffer_fastCover( + dictBuffer, dictBufferCapacity, + CNBuffer, samplesSizes, nbSamples, + &fastCoverParams); + if (ZDICT_isError(dictSize)) goto _output_error; + } + DISPLAYLEVEL(3, "OK, created dictionary of size %u \n", (unsigned)dictSize); + + DISPLAYLEVEL(3, "test%3i : check dictID : ", testNb++); + dictID = ZDICT_getDictID(dictBuffer, dictSize); + if (dictID==0) goto _output_error; + DISPLAYLEVEL(3, "OK : %u \n", (unsigned)dictID); + + DISPLAYLEVEL(3, "test%3i : check dict header size no error : ", testNb++); + dictHeaderSize = ZDICT_getDictHeaderSize(dictBuffer, dictSize); + if (dictHeaderSize==0) goto _output_error; + DISPLAYLEVEL(3, "OK : %u \n", (unsigned)dictHeaderSize); + + DISPLAYLEVEL(3, "test%3i : check dict header size correctness : ", testNb++); + { unsigned char const dictBufferFixed[144] = { 0x37, 0xa4, 0x30, 0xec, 0x63, 0x00, 0x00, 0x00, 0x08, 0x10, 0x00, 0x1f, + 0x0f, 0x00, 0x28, 0xe5, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x80, 0x0f, 0x9e, 0x0f, 0x00, 0x00, 0x24, 0x40, 0x80, 0x00, 0x01, + 0x02, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0xde, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0xbc, 0xe1, 0x4b, 0x92, 0x0e, 0xb4, 0x7b, 0x18, + 0x86, 0x61, 0x18, 0xc6, 0x18, 0x63, 0x8c, 0x31, 0xc6, 0x18, 0x63, 0x8c, + 0x31, 0x66, 0x66, 0x66, 0x66, 0xb6, 0x6d, 0x01, 0x00, 0x00, 0x00, 0x04, + 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x20, 0x73, 0x6f, 0x64, 0x61, + 0x6c, 0x65, 0x73, 0x20, 0x74, 0x6f, 0x72, 0x74, 0x6f, 0x72, 0x20, 0x65, + 0x6c, 0x65, 0x69, 0x66, 0x65, 0x6e, 0x64, 0x2e, 0x20, 0x41, 0x6c, 0x69 }; + dictHeaderSize = ZDICT_getDictHeaderSize(dictBufferFixed, 144); + if (dictHeaderSize != 115) goto _output_error; + } + DISPLAYLEVEL(3, "OK : %u \n", (unsigned)dictHeaderSize); + + DISPLAYLEVEL(3, "test%3i : compress with dictionary : ", testNb++); + cSize = ZSTD_compress_usingDict(cctx, compressedBuffer, compressedBufferSize, + CNBuffer, CNBuffSize, + dictBuffer, dictSize, 4); + if (ZSTD_isError(cSize)) goto _output_error; + DISPLAYLEVEL(3, "OK (%u bytes : %.2f%%)\n", (unsigned)cSize, (double)cSize/CNBuffSize*100); + + DISPLAYLEVEL(3, "test%3i : retrieve dictID from dictionary : ", testNb++); + { U32 const did = ZSTD_getDictID_fromDict(dictBuffer, dictSize); + if (did != dictID) goto _output_error; /* non-conformant (content-only) dictionary */ + } + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3i : retrieve dictID from frame : ", testNb++); + { U32 const did = ZSTD_getDictID_fromFrame(compressedBuffer, cSize); + if (did != dictID) goto _output_error; /* non-conformant (content-only) dictionary */ + } + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3i : frame built with dictionary should be decompressible : ", testNb++); + { ZSTD_DCtx* const dctx = ZSTD_createDCtx(); assert(dctx != NULL); + CHECKPLUS(r, ZSTD_decompress_usingDict(dctx, + decodedBuffer, CNBuffSize, + compressedBuffer, cSize, + dictBuffer, dictSize), + if (r != CNBuffSize) goto _output_error); + ZSTD_freeDCtx(dctx); + } + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3i : estimate CDict size : ", testNb++); + { ZSTD_compressionParameters const cParams = ZSTD_getCParams(1, CNBuffSize, dictSize); + size_t const estimatedSize = ZSTD_estimateCDictSize_advanced(dictSize, cParams, ZSTD_dlm_byRef); + DISPLAYLEVEL(3, "OK : %u \n", (unsigned)estimatedSize); + } + + DISPLAYLEVEL(3, "test%3i : compress with CDict ", testNb++); + { ZSTD_compressionParameters const cParams = ZSTD_getCParams(1, CNBuffSize, dictSize); + ZSTD_CDict* const cdict = ZSTD_createCDict_advanced(dictBuffer, dictSize, + ZSTD_dlm_byRef, ZSTD_dct_auto, + cParams, ZSTD_defaultCMem); + assert(cdict != NULL); + DISPLAYLEVEL(3, "(size : %u) : ", (unsigned)ZSTD_sizeof_CDict(cdict)); + cSize = ZSTD_compress_usingCDict(cctx, compressedBuffer, compressedBufferSize, + CNBuffer, CNBuffSize, cdict); + ZSTD_freeCDict(cdict); + if (ZSTD_isError(cSize)) goto _output_error; + } + DISPLAYLEVEL(3, "OK (%u bytes : %.2f%%)\n", (unsigned)cSize, (double)cSize/CNBuffSize*100); + + DISPLAYLEVEL(3, "test%3i : retrieve dictID from frame : ", testNb++); + { U32 const did = ZSTD_getDictID_fromFrame(compressedBuffer, cSize); + if (did != dictID) goto _output_error; /* non-conformant (content-only) dictionary */ + } + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3i : frame built with dictionary should be decompressible : ", testNb++); + { ZSTD_DCtx* const dctx = ZSTD_createDCtx(); assert(dctx != NULL); + CHECKPLUS(r, ZSTD_decompress_usingDict(dctx, + decodedBuffer, CNBuffSize, + compressedBuffer, cSize, + dictBuffer, dictSize), + if (r != CNBuffSize) goto _output_error); + ZSTD_freeDCtx(dctx); + } + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3i : compress with static CDict : ", testNb++); + { int const maxLevel = ZSTD_maxCLevel(); + int level; + for (level = 1; level <= maxLevel; ++level) { + ZSTD_compressionParameters const cParams = ZSTD_getCParams(level, CNBuffSize, dictSize); + size_t const cdictSize = ZSTD_estimateCDictSize_advanced(dictSize, cParams, ZSTD_dlm_byCopy); + void* const cdictBuffer = malloc(cdictSize); + if (cdictBuffer==NULL) goto _output_error; + { const ZSTD_CDict* const cdict = ZSTD_initStaticCDict( + cdictBuffer, cdictSize, + dictBuffer, dictSize, + ZSTD_dlm_byCopy, ZSTD_dct_auto, + cParams); + if (cdict == NULL) { + DISPLAY("ZSTD_initStaticCDict failed "); + goto _output_error; + } + cSize = ZSTD_compress_usingCDict(cctx, + compressedBuffer, compressedBufferSize, + CNBuffer, MIN(10 KB, CNBuffSize), cdict); + if (ZSTD_isError(cSize)) { + DISPLAY("ZSTD_compress_usingCDict failed "); + goto _output_error; + } } + free(cdictBuffer); + } } + DISPLAYLEVEL(3, "OK (%u bytes : %.2f%%)\n", (unsigned)cSize, (double)cSize/CNBuffSize*100); + + DISPLAYLEVEL(3, "test%3i : ZSTD_compress_usingCDict_advanced, no contentSize, no dictID : ", testNb++); + { ZSTD_frameParameters const fParams = { 0 /* frameSize */, 1 /* checksum */, 1 /* noDictID*/ }; + ZSTD_compressionParameters const cParams = ZSTD_getCParams(1, CNBuffSize, dictSize); + ZSTD_CDict* const cdict = ZSTD_createCDict_advanced(dictBuffer, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto, cParams, ZSTD_defaultCMem); + assert(cdict != NULL); + cSize = ZSTD_compress_usingCDict_advanced(cctx, + compressedBuffer, compressedBufferSize, + CNBuffer, CNBuffSize, + cdict, fParams); + ZSTD_freeCDict(cdict); + if (ZSTD_isError(cSize)) goto _output_error; + } + DISPLAYLEVEL(3, "OK (%u bytes : %.2f%%)\n", (unsigned)cSize, (double)cSize/CNBuffSize*100); + + DISPLAYLEVEL(3, "test%3i : try retrieving contentSize from frame : ", testNb++); + { U64 const contentSize = ZSTD_getFrameContentSize(compressedBuffer, cSize); + if (contentSize != ZSTD_CONTENTSIZE_UNKNOWN) goto _output_error; + } + DISPLAYLEVEL(3, "OK (unknown)\n"); + + DISPLAYLEVEL(3, "test%3i : frame built without dictID should be decompressible : ", testNb++); + { ZSTD_DCtx* const dctx = ZSTD_createDCtx(); + assert(dctx != NULL); + CHECKPLUS(r, ZSTD_decompress_usingDict(dctx, + decodedBuffer, CNBuffSize, + compressedBuffer, cSize, + dictBuffer, dictSize), + if (r != CNBuffSize) goto _output_error); + ZSTD_freeDCtx(dctx); + } + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3i : ZSTD_compress_advanced, no dictID : ", testNb++); + { ZSTD_parameters p = ZSTD_getParams(3, CNBuffSize, dictSize); + p.fParams.noDictIDFlag = 1; + cSize = ZSTD_compress_advanced(cctx, compressedBuffer, compressedBufferSize, + CNBuffer, CNBuffSize, + dictBuffer, dictSize, p); + if (ZSTD_isError(cSize)) goto _output_error; + } + DISPLAYLEVEL(3, "OK (%u bytes : %.2f%%)\n", (unsigned)cSize, (double)cSize/CNBuffSize*100); + + DISPLAYLEVEL(3, "test%3i : frame built without dictID should be decompressible : ", testNb++); + { ZSTD_DCtx* const dctx = ZSTD_createDCtx(); assert(dctx != NULL); + CHECKPLUS(r, ZSTD_decompress_usingDict(dctx, + decodedBuffer, CNBuffSize, + compressedBuffer, cSize, + dictBuffer, dictSize), + if (r != CNBuffSize) goto _output_error); + ZSTD_freeDCtx(dctx); + } + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3i : dictionary containing only header should return error : ", testNb++); + { ZSTD_DCtx* const dctx = ZSTD_createDCtx(); + assert(dctx != NULL); + { const size_t ret = ZSTD_decompress_usingDict( + dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize, + "\x37\xa4\x30\xec\x11\x22\x33\x44", 8); + if (ZSTD_getErrorCode(ret) != ZSTD_error_dictionary_corrupted) + goto _output_error; + } + ZSTD_freeDCtx(dctx); + } + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3i : Building cdict w/ ZSTD_dct_fullDict on a good dictionary : ", testNb++); + { ZSTD_compressionParameters const cParams = ZSTD_getCParams(1, CNBuffSize, dictSize); + ZSTD_CDict* const cdict = ZSTD_createCDict_advanced(dictBuffer, dictSize, ZSTD_dlm_byRef, ZSTD_dct_fullDict, cParams, ZSTD_defaultCMem); + if (cdict==NULL) goto _output_error; + ZSTD_freeCDict(cdict); + } + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3i : Building cdict w/ ZSTD_dct_fullDict on a rawContent (must fail) : ", testNb++); + { ZSTD_compressionParameters const cParams = ZSTD_getCParams(1, CNBuffSize, dictSize); + ZSTD_CDict* const cdict = ZSTD_createCDict_advanced((const char*)dictBuffer+1, dictSize-1, ZSTD_dlm_byRef, ZSTD_dct_fullDict, cParams, ZSTD_defaultCMem); + if (cdict!=NULL) goto _output_error; + ZSTD_freeCDict(cdict); + } + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3i : Loading rawContent starting with dict header w/ ZSTD_dct_auto should fail : ", testNb++); + { + size_t ret; + MEM_writeLE32((char*)dictBuffer+2, ZSTD_MAGIC_DICTIONARY); + /* Either operation is allowed to fail, but one must fail. */ + ret = ZSTD_CCtx_loadDictionary_advanced( + cctx, (const char*)dictBuffer+2, dictSize-2, ZSTD_dlm_byRef, ZSTD_dct_auto); + if (!ZSTD_isError(ret)) { + ret = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, MIN(CNBuffSize, 100)); + if (!ZSTD_isError(ret)) goto _output_error; + } + } + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3i : Loading rawContent starting with dict header w/ ZSTD_dct_rawContent should pass : ", testNb++); + { + size_t ret; + MEM_writeLE32((char*)dictBuffer+2, ZSTD_MAGIC_DICTIONARY); + ret = ZSTD_CCtx_loadDictionary_advanced( + cctx, (const char*)dictBuffer+2, dictSize-2, ZSTD_dlm_byRef, ZSTD_dct_rawContent); + if (ZSTD_isError(ret)) goto _output_error; + ret = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, MIN(CNBuffSize, 100)); + if (ZSTD_isError(ret)) goto _output_error; + } + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3i : ZSTD_CCtx_refCDict() then set parameters : ", testNb++); + { ZSTD_CDict* const cdict = ZSTD_createCDict(CNBuffer, dictSize, 1); + CHECK_Z( ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, 1) ); + CHECK_Z( ZSTD_CCtx_setParameter(cctx, ZSTD_c_hashLog, 12 )); + CHECK_Z( ZSTD_CCtx_refCDict(cctx, cdict) ); + CHECK_Z( ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, 1) ); + CHECK_Z( ZSTD_CCtx_setParameter(cctx, ZSTD_c_hashLog, 12 )); + ZSTD_freeCDict(cdict); + } + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3i : Loading dictionary before setting parameters is the same as loading after : ", testNb++); + { + size_t size1, size2; + ZSTD_CCtx_reset(cctx, ZSTD_reset_session_and_parameters); + CHECK_Z( ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, 7) ); + CHECK_Z( ZSTD_CCtx_loadDictionary(cctx, CNBuffer, MIN(CNBuffSize, 10 KB)) ); + size1 = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, MIN(CNBuffSize, 100 KB)); + if (ZSTD_isError(size1)) goto _output_error; + + ZSTD_CCtx_reset(cctx, ZSTD_reset_session_and_parameters); + CHECK_Z( ZSTD_CCtx_loadDictionary(cctx, CNBuffer, MIN(CNBuffSize, 10 KB)) ); + CHECK_Z( ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, 7) ); + size2 = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, MIN(CNBuffSize, 100 KB)); + if (ZSTD_isError(size2)) goto _output_error; + + if (size1 != size2) goto _output_error; + } + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3i : Loading a dictionary clears the prefix : ", testNb++); + { + CHECK_Z( ZSTD_CCtx_refPrefix(cctx, (const char*)dictBuffer, dictSize) ); + CHECK_Z( ZSTD_CCtx_loadDictionary(cctx, (const char*)dictBuffer, dictSize) ); + CHECK_Z( ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, MIN(CNBuffSize, 100)) ); + } + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3i : Loading a dictionary clears the cdict : ", testNb++); + { + ZSTD_CDict* const cdict = ZSTD_createCDict(dictBuffer, dictSize, 1); + CHECK_Z( ZSTD_CCtx_refCDict(cctx, cdict) ); + CHECK_Z( ZSTD_CCtx_loadDictionary(cctx, (const char*)dictBuffer, dictSize) ); + CHECK_Z( ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, MIN(CNBuffSize, 100)) ); + ZSTD_freeCDict(cdict); + } + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3i : Loading a cdict clears the prefix : ", testNb++); + { + ZSTD_CDict* const cdict = ZSTD_createCDict(dictBuffer, dictSize, 1); + CHECK_Z( ZSTD_CCtx_refPrefix(cctx, (const char*)dictBuffer, dictSize) ); + CHECK_Z( ZSTD_CCtx_refCDict(cctx, cdict) ); + CHECK_Z( ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, MIN(CNBuffSize, 100)) ); + ZSTD_freeCDict(cdict); + } + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3i : Loading a cdict clears the dictionary : ", testNb++); + { + ZSTD_CDict* const cdict = ZSTD_createCDict(dictBuffer, dictSize, 1); + CHECK_Z( ZSTD_CCtx_loadDictionary(cctx, (const char*)dictBuffer, dictSize) ); + CHECK_Z( ZSTD_CCtx_refCDict(cctx, cdict) ); + CHECK_Z( ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, MIN(CNBuffSize, 100)) ); + ZSTD_freeCDict(cdict); + } + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3i : Loading a prefix clears the dictionary : ", testNb++); + { + CHECK_Z( ZSTD_CCtx_loadDictionary(cctx, (const char*)dictBuffer, dictSize) ); + CHECK_Z( ZSTD_CCtx_refPrefix(cctx, (const char*)dictBuffer, dictSize) ); + CHECK_Z( ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, MIN(CNBuffSize, 100)) ); + } + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3i : Loading a prefix clears the cdict : ", testNb++); + { + ZSTD_CDict* const cdict = ZSTD_createCDict(dictBuffer, dictSize, 1); + CHECK_Z( ZSTD_CCtx_refCDict(cctx, cdict) ); + CHECK_Z( ZSTD_CCtx_refPrefix(cctx, (const char*)dictBuffer, dictSize) ); + CHECK_Z( ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, MIN(CNBuffSize, 100)) ); + ZSTD_freeCDict(cdict); + } + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3i : Loaded dictionary persists across reset session : ", testNb++); + { + size_t size1, size2; + ZSTD_CCtx_reset(cctx, ZSTD_reset_session_and_parameters); + CHECK_Z( ZSTD_CCtx_loadDictionary(cctx, CNBuffer, MIN(CNBuffSize, 10 KB)) ); + size1 = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, MIN(CNBuffSize, 100 KB)); + if (ZSTD_isError(size1)) goto _output_error; + + ZSTD_CCtx_reset(cctx, ZSTD_reset_session_only); + size2 = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, MIN(CNBuffSize, 100 KB)); + if (ZSTD_isError(size2)) goto _output_error; + + if (size1 != size2) goto _output_error; + } + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3i : Loaded dictionary is cleared after resetting parameters : ", testNb++); + { + size_t size1, size2; + ZSTD_CCtx_reset(cctx, ZSTD_reset_session_and_parameters); + CHECK_Z( ZSTD_CCtx_loadDictionary(cctx, CNBuffer, MIN(CNBuffSize, 10 KB)) ); + size1 = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, MIN(CNBuffSize, 100 KB)); + if (ZSTD_isError(size1)) goto _output_error; + + ZSTD_CCtx_reset(cctx, ZSTD_reset_session_and_parameters); + size2 = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, MIN(CNBuffSize, 100 KB)); + if (ZSTD_isError(size2)) goto _output_error; + + if (size1 == size2) goto _output_error; + } + DISPLAYLEVEL(3, "OK \n"); + + ZSTD_CCtx_reset(cctx, ZSTD_reset_session_and_parameters); + CHECK_Z( ZSTD_CCtx_loadDictionary(cctx, dictBuffer, dictSize) ); + cSize = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, MIN(CNBuffSize, 100 KB)); + CHECK_Z(cSize); + DISPLAYLEVEL(3, "test%3i : ZSTD_decompressDCtx() with dictionary : ", testNb++); + { + ZSTD_DCtx* dctx = ZSTD_createDCtx(); + size_t ret; + /* We should fail to decompress without a dictionary. */ + ZSTD_DCtx_reset(dctx, ZSTD_reset_session_and_parameters); + ret = ZSTD_decompressDCtx(dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize); + if (!ZSTD_isError(ret)) goto _output_error; + /* We should succeed to decompress with the dictionary. */ + ZSTD_DCtx_reset(dctx, ZSTD_reset_session_and_parameters); + CHECK_Z( ZSTD_DCtx_loadDictionary(dctx, dictBuffer, dictSize) ); + CHECK_Z( ZSTD_decompressDCtx(dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize) ); + /* The dictionary should presist across calls. */ + CHECK_Z( ZSTD_decompressDCtx(dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize) ); + /* When we reset the context the dictionary is cleared. */ + ZSTD_DCtx_reset(dctx, ZSTD_reset_session_and_parameters); + ret = ZSTD_decompressDCtx(dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize); + if (!ZSTD_isError(ret)) goto _output_error; + ZSTD_freeDCtx(dctx); + } + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3i : ZSTD_decompressDCtx() with ddict : ", testNb++); + { + ZSTD_DCtx* dctx = ZSTD_createDCtx(); + ZSTD_DDict* ddict = ZSTD_createDDict(dictBuffer, dictSize); + size_t ret; + /* We should succeed to decompress with the ddict. */ + ZSTD_DCtx_reset(dctx, ZSTD_reset_session_and_parameters); + CHECK_Z( ZSTD_DCtx_refDDict(dctx, ddict) ); + CHECK_Z( ZSTD_decompressDCtx(dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize) ); + /* The ddict should presist across calls. */ + CHECK_Z( ZSTD_decompressDCtx(dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize) ); + /* When we reset the context the ddict is cleared. */ + ZSTD_DCtx_reset(dctx, ZSTD_reset_session_and_parameters); + ret = ZSTD_decompressDCtx(dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize); + if (!ZSTD_isError(ret)) goto _output_error; + ZSTD_freeDCtx(dctx); + ZSTD_freeDDict(ddict); + } + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3i : ZSTD_decompressDCtx() with prefix : ", testNb++); + { + ZSTD_DCtx* dctx = ZSTD_createDCtx(); + size_t ret; + /* We should succeed to decompress with the prefix. */ + ZSTD_DCtx_reset(dctx, ZSTD_reset_session_and_parameters); + CHECK_Z( ZSTD_DCtx_refPrefix_advanced(dctx, dictBuffer, dictSize, ZSTD_dct_auto) ); + CHECK_Z( ZSTD_decompressDCtx(dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize) ); + /* The prefix should be cleared after the first compression. */ + ret = ZSTD_decompressDCtx(dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize); + if (!ZSTD_isError(ret)) goto _output_error; + ZSTD_freeDCtx(dctx); + } + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3i : Dictionary with non-default repcodes : ", testNb++); + { U32 u; for (u=0; u<nbSamples; u++) samplesSizes[u] = sampleUnitSize; } + dictSize = ZDICT_trainFromBuffer(dictBuffer, dictSize, + CNBuffer, samplesSizes, nbSamples); + if (ZDICT_isError(dictSize)) goto _output_error; + /* Set all the repcodes to non-default */ + { + BYTE* dictPtr = (BYTE*)dictBuffer; + BYTE* dictLimit = dictPtr + dictSize - 12; + /* Find the repcodes */ + while (dictPtr < dictLimit && + (MEM_readLE32(dictPtr) != 1 || MEM_readLE32(dictPtr + 4) != 4 || + MEM_readLE32(dictPtr + 8) != 8)) { + ++dictPtr; + } + if (dictPtr >= dictLimit) goto _output_error; + MEM_writeLE32(dictPtr + 0, 10); + MEM_writeLE32(dictPtr + 4, 10); + MEM_writeLE32(dictPtr + 8, 10); + /* Set the last 8 bytes to 'x' */ + memset((BYTE*)dictBuffer + dictSize - 8, 'x', 8); + } + /* The optimal parser checks all the repcodes. + * Make sure at least one is a match >= targetLength so that it is + * immediately chosen. This will make sure that the compressor and + * decompressor agree on at least one of the repcodes. + */ + { size_t dSize; + BYTE data[1024]; + ZSTD_DCtx* const dctx = ZSTD_createDCtx(); + ZSTD_compressionParameters const cParams = ZSTD_getCParams(19, CNBuffSize, dictSize); + ZSTD_CDict* const cdict = ZSTD_createCDict_advanced(dictBuffer, dictSize, + ZSTD_dlm_byRef, ZSTD_dct_auto, + cParams, ZSTD_defaultCMem); + assert(dctx != NULL); assert(cdict != NULL); + memset(data, 'x', sizeof(data)); + cSize = ZSTD_compress_usingCDict(cctx, compressedBuffer, compressedBufferSize, + data, sizeof(data), cdict); + ZSTD_freeCDict(cdict); + if (ZSTD_isError(cSize)) { DISPLAYLEVEL(5, "Compression error %s : ", ZSTD_getErrorName(cSize)); goto _output_error; } + dSize = ZSTD_decompress_usingDict(dctx, decodedBuffer, sizeof(data), compressedBuffer, cSize, dictBuffer, dictSize); + if (ZSTD_isError(dSize)) { DISPLAYLEVEL(5, "Decompression error %s : ", ZSTD_getErrorName(dSize)); goto _output_error; } + if (memcmp(data, decodedBuffer, sizeof(data))) { DISPLAYLEVEL(5, "Data corruption : "); goto _output_error; } + ZSTD_freeDCtx(dctx); + } + DISPLAYLEVEL(3, "OK \n"); + + ZSTD_freeCCtx(cctx); + free(dictBuffer); + free(samplesSizes); + } + + /* COVER dictionary builder tests */ + { ZSTD_CCtx* const cctx = ZSTD_createCCtx(); + size_t dictSize = 16 KB; + size_t optDictSize = dictSize; + void* dictBuffer = malloc(dictSize); + size_t const totalSampleSize = 1 MB; + size_t const sampleUnitSize = 8 KB; + U32 const nbSamples = (U32)(totalSampleSize / sampleUnitSize); + size_t* const samplesSizes = (size_t*) malloc(nbSamples * sizeof(size_t)); + U32 seed32 = seed; + ZDICT_cover_params_t params; + U32 dictID; + + if (dictBuffer==NULL || samplesSizes==NULL) { + free(dictBuffer); + free(samplesSizes); + goto _output_error; + } + + DISPLAYLEVEL(3, "test%3i : ZDICT_trainFromBuffer_cover : ", testNb++); + { U32 u; for (u=0; u<nbSamples; u++) samplesSizes[u] = sampleUnitSize; } + memset(¶ms, 0, sizeof(params)); + params.d = 1 + (FUZ_rand(&seed32) % 16); + params.k = params.d + (FUZ_rand(&seed32) % 256); + dictSize = ZDICT_trainFromBuffer_cover(dictBuffer, dictSize, + CNBuffer, samplesSizes, nbSamples, + params); + if (ZDICT_isError(dictSize)) goto _output_error; + DISPLAYLEVEL(3, "OK, created dictionary of size %u \n", (unsigned)dictSize); + + DISPLAYLEVEL(3, "test%3i : check dictID : ", testNb++); + dictID = ZDICT_getDictID(dictBuffer, dictSize); + if (dictID==0) goto _output_error; + DISPLAYLEVEL(3, "OK : %u \n", (unsigned)dictID); + + DISPLAYLEVEL(3, "test%3i : ZDICT_optimizeTrainFromBuffer_cover : ", testNb++); + memset(¶ms, 0, sizeof(params)); + params.steps = 4; + optDictSize = ZDICT_optimizeTrainFromBuffer_cover(dictBuffer, optDictSize, + CNBuffer, samplesSizes, + nbSamples / 4, ¶ms); + if (ZDICT_isError(optDictSize)) goto _output_error; + DISPLAYLEVEL(3, "OK, created dictionary of size %u \n", (unsigned)optDictSize); + + DISPLAYLEVEL(3, "test%3i : check dictID : ", testNb++); + dictID = ZDICT_getDictID(dictBuffer, optDictSize); + if (dictID==0) goto _output_error; + DISPLAYLEVEL(3, "OK : %u \n", (unsigned)dictID); + + ZSTD_freeCCtx(cctx); + free(dictBuffer); + free(samplesSizes); + } + + /* Decompression defense tests */ + DISPLAYLEVEL(3, "test%3i : Check input length for magic number : ", testNb++); + { size_t const r = ZSTD_decompress(decodedBuffer, CNBuffSize, CNBuffer, 3); /* too small input */ + if (!ZSTD_isError(r)) goto _output_error; + if (ZSTD_getErrorCode(r) != ZSTD_error_srcSize_wrong) goto _output_error; } + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3i : Check magic Number : ", testNb++); + ((char*)(CNBuffer))[0] = 1; + { size_t const r = ZSTD_decompress(decodedBuffer, CNBuffSize, CNBuffer, 4); + if (!ZSTD_isError(r)) goto _output_error; } + DISPLAYLEVEL(3, "OK \n"); + + /* content size verification test */ + DISPLAYLEVEL(3, "test%3i : Content size verification : ", testNb++); + { ZSTD_CCtx* const cctx = ZSTD_createCCtx(); + size_t const srcSize = 5000; + size_t const wrongSrcSize = (srcSize + 1000); + ZSTD_parameters params = ZSTD_getParams(1, wrongSrcSize, 0); + params.fParams.contentSizeFlag = 1; + CHECK( ZSTD_compressBegin_advanced(cctx, NULL, 0, params, wrongSrcSize) ); + { size_t const result = ZSTD_compressEnd(cctx, decodedBuffer, CNBuffSize, CNBuffer, srcSize); + if (!ZSTD_isError(result)) goto _output_error; + if (ZSTD_getErrorCode(result) != ZSTD_error_srcSize_wrong) goto _output_error; + DISPLAYLEVEL(3, "OK : %s \n", ZSTD_getErrorName(result)); + } + ZSTD_freeCCtx(cctx); + } + + /* negative compression level test : ensure simple API and advanced API produce same result */ + DISPLAYLEVEL(3, "test%3i : negative compression level : ", testNb++); + { ZSTD_CCtx* const cctx = ZSTD_createCCtx(); + size_t const srcSize = CNBuffSize / 5; + int const compressionLevel = -1; + + assert(cctx != NULL); + { ZSTD_parameters const params = ZSTD_getParams(compressionLevel, srcSize, 0); + size_t const cSize_1pass = ZSTD_compress_advanced(cctx, + compressedBuffer, compressedBufferSize, + CNBuffer, srcSize, + NULL, 0, + params); + if (ZSTD_isError(cSize_1pass)) goto _output_error; + + CHECK( ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, compressionLevel) ); + { size_t const compressionResult = ZSTD_compress2(cctx, + compressedBuffer, compressedBufferSize, + CNBuffer, srcSize); + DISPLAYLEVEL(5, "simple=%zu vs %zu=advanced : ", cSize_1pass, compressionResult); + if (ZSTD_isError(compressionResult)) goto _output_error; + if (compressionResult != cSize_1pass) goto _output_error; + } } + ZSTD_freeCCtx(cctx); + } + DISPLAYLEVEL(3, "OK \n"); + + /* parameters order test */ + { size_t const inputSize = CNBuffSize / 2; + U64 xxh64; + + { ZSTD_CCtx* const cctx = ZSTD_createCCtx(); + DISPLAYLEVEL(3, "test%3i : parameters in order : ", testNb++); + assert(cctx != NULL); + CHECK( ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, 2) ); + CHECK( ZSTD_CCtx_setParameter(cctx, ZSTD_c_enableLongDistanceMatching, 1) ); + CHECK( ZSTD_CCtx_setParameter(cctx, ZSTD_c_windowLog, 18) ); + { size_t const compressedSize = ZSTD_compress2(cctx, + compressedBuffer, ZSTD_compressBound(inputSize), + CNBuffer, inputSize); + CHECK(compressedSize); + cSize = compressedSize; + xxh64 = XXH64(compressedBuffer, compressedSize, 0); + } + DISPLAYLEVEL(3, "OK (compress : %u -> %u bytes)\n", (unsigned)inputSize, (unsigned)cSize); + ZSTD_freeCCtx(cctx); + } + + { ZSTD_CCtx* cctx = ZSTD_createCCtx(); + DISPLAYLEVEL(3, "test%3i : parameters disordered : ", testNb++); + CHECK( ZSTD_CCtx_setParameter(cctx, ZSTD_c_windowLog, 18) ); + CHECK( ZSTD_CCtx_setParameter(cctx, ZSTD_c_enableLongDistanceMatching, 1) ); + CHECK( ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, 2) ); + { size_t const result = ZSTD_compress2(cctx, + compressedBuffer, ZSTD_compressBound(inputSize), + CNBuffer, inputSize); + CHECK(result); + if (result != cSize) goto _output_error; /* must result in same compressed result, hence same size */ + if (XXH64(compressedBuffer, result, 0) != xxh64) goto _output_error; /* must result in exactly same content, hence same hash */ + DISPLAYLEVEL(3, "OK (compress : %u -> %u bytes)\n", (unsigned)inputSize, (unsigned)result); + } + ZSTD_freeCCtx(cctx); + } + } + + /* advanced parameters for decompression */ + { ZSTD_DCtx* const dctx = ZSTD_createDCtx(); + assert(dctx != NULL); + + DISPLAYLEVEL(3, "test%3i : get dParameter bounds ", testNb++); + { ZSTD_bounds const bounds = ZSTD_dParam_getBounds(ZSTD_d_windowLogMax); + CHECK(bounds.error); + } + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3i : wrong dParameter : ", testNb++); + { size_t const sr = ZSTD_DCtx_setParameter(dctx, (ZSTD_dParameter)999999, 0); + if (!ZSTD_isError(sr)) goto _output_error; + } + { ZSTD_bounds const bounds = ZSTD_dParam_getBounds((ZSTD_dParameter)999998); + if (!ZSTD_isError(bounds.error)) goto _output_error; + } + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3i : out of bound dParameter : ", testNb++); + { size_t const sr = ZSTD_DCtx_setParameter(dctx, ZSTD_d_windowLogMax, 9999); + if (!ZSTD_isError(sr)) goto _output_error; + } + { size_t const sr = ZSTD_DCtx_setParameter(dctx, ZSTD_d_format, (ZSTD_format_e)888); + if (!ZSTD_isError(sr)) goto _output_error; + } + DISPLAYLEVEL(3, "OK \n"); + + ZSTD_freeDCtx(dctx); + } + + + /* custom formats tests */ + { ZSTD_CCtx* const cctx = ZSTD_createCCtx(); + ZSTD_DCtx* const dctx = ZSTD_createDCtx(); + size_t const inputSize = CNBuffSize / 2; /* won't cause pb with small dict size */ + assert(dctx != NULL); assert(cctx != NULL); + + /* basic block compression */ + DISPLAYLEVEL(3, "test%3i : magic-less format test : ", testNb++); + CHECK( ZSTD_CCtx_setParameter(cctx, ZSTD_c_format, ZSTD_f_zstd1_magicless) ); + { ZSTD_inBuffer in = { CNBuffer, inputSize, 0 }; + ZSTD_outBuffer out = { compressedBuffer, ZSTD_compressBound(inputSize), 0 }; + size_t const result = ZSTD_compressStream2(cctx, &out, &in, ZSTD_e_end); + if (result != 0) goto _output_error; + if (in.pos != in.size) goto _output_error; + cSize = out.pos; + } + DISPLAYLEVEL(3, "OK (compress : %u -> %u bytes)\n", (unsigned)inputSize, (unsigned)cSize); + + DISPLAYLEVEL(3, "test%3i : decompress normally (should fail) : ", testNb++); + { size_t const decodeResult = ZSTD_decompressDCtx(dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize); + if (ZSTD_getErrorCode(decodeResult) != ZSTD_error_prefix_unknown) goto _output_error; + DISPLAYLEVEL(3, "OK : %s \n", ZSTD_getErrorName(decodeResult)); + } + + DISPLAYLEVEL(3, "test%3i : decompress of magic-less frame : ", testNb++); + ZSTD_DCtx_reset(dctx, ZSTD_reset_session_and_parameters); + CHECK( ZSTD_DCtx_setParameter(dctx, ZSTD_d_format, ZSTD_f_zstd1_magicless) ); + { ZSTD_frameHeader zfh; + size_t const zfhrt = ZSTD_getFrameHeader_advanced(&zfh, compressedBuffer, cSize, ZSTD_f_zstd1_magicless); + if (zfhrt != 0) goto _output_error; + } + /* one shot */ + { size_t const result = ZSTD_decompressDCtx(dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize); + if (result != inputSize) goto _output_error; + DISPLAYLEVEL(3, "one-shot OK, "); + } + /* streaming */ + { ZSTD_inBuffer in = { compressedBuffer, cSize, 0 }; + ZSTD_outBuffer out = { decodedBuffer, CNBuffSize, 0 }; + size_t const result = ZSTD_decompressStream(dctx, &out, &in); + if (result != 0) goto _output_error; + if (in.pos != in.size) goto _output_error; + if (out.pos != inputSize) goto _output_error; + DISPLAYLEVEL(3, "streaming OK : regenerated %u bytes \n", (unsigned)out.pos); + } + + /* basic block compression */ + DISPLAYLEVEL(3, "test%3i : empty magic-less format test : ", testNb++); + CHECK( ZSTD_CCtx_setParameter(cctx, ZSTD_c_format, ZSTD_f_zstd1_magicless) ); + { ZSTD_inBuffer in = { CNBuffer, 0, 0 }; + ZSTD_outBuffer out = { compressedBuffer, ZSTD_compressBound(0), 0 }; + size_t const result = ZSTD_compressStream2(cctx, &out, &in, ZSTD_e_end); + if (result != 0) goto _output_error; + if (in.pos != in.size) goto _output_error; + cSize = out.pos; + } + DISPLAYLEVEL(3, "OK (compress : %u -> %u bytes)\n", (unsigned)0, (unsigned)cSize); + + DISPLAYLEVEL(3, "test%3i : decompress of empty magic-less frame : ", testNb++); + ZSTD_DCtx_reset(dctx, ZSTD_reset_session_and_parameters); + CHECK( ZSTD_DCtx_setParameter(dctx, ZSTD_d_format, ZSTD_f_zstd1_magicless) ); + /* one shot */ + { size_t const result = ZSTD_decompressDCtx(dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize); + if (result != 0) goto _output_error; + DISPLAYLEVEL(3, "one-shot OK, "); + } + /* streaming */ + { ZSTD_inBuffer in = { compressedBuffer, cSize, 0 }; + ZSTD_outBuffer out = { decodedBuffer, CNBuffSize, 0 }; + size_t const result = ZSTD_decompressStream(dctx, &out, &in); + if (result != 0) goto _output_error; + if (in.pos != in.size) goto _output_error; + if (out.pos != 0) goto _output_error; + DISPLAYLEVEL(3, "streaming OK : regenerated %u bytes \n", (unsigned)out.pos); + } + + ZSTD_freeCCtx(cctx); + ZSTD_freeDCtx(dctx); + } + + /* block API tests */ + { ZSTD_CCtx* const cctx = ZSTD_createCCtx(); + ZSTD_DCtx* const dctx = ZSTD_createDCtx(); + static const size_t dictSize = 65 KB; + static const size_t blockSize = 100 KB; /* won't cause pb with small dict size */ + size_t cSize2; + assert(cctx != NULL); assert(dctx != NULL); + + /* basic block compression */ + DISPLAYLEVEL(3, "test%3i : Block compression test : ", testNb++); + CHECK( ZSTD_compressBegin(cctx, 5) ); + CHECK( ZSTD_getBlockSize(cctx) >= blockSize); + CHECK_VAR(cSize, ZSTD_compressBlock(cctx, compressedBuffer, ZSTD_compressBound(blockSize), CNBuffer, blockSize) ); + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3i : Block decompression test : ", testNb++); + CHECK( ZSTD_decompressBegin(dctx) ); + { CHECK_NEWV(r, ZSTD_decompressBlock(dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize) ); + if (r != blockSize) goto _output_error; } + DISPLAYLEVEL(3, "OK \n"); + + /* very long stream of block compression */ + DISPLAYLEVEL(3, "test%3i : Huge block streaming compression test : ", testNb++); + CHECK( ZSTD_compressBegin(cctx, -199) ); /* we just want to quickly overflow internal U32 index */ + CHECK( ZSTD_getBlockSize(cctx) >= blockSize); + { U64 const toCompress = 5000000000ULL; /* > 4 GB */ + U64 compressed = 0; + while (compressed < toCompress) { + size_t const blockCSize = ZSTD_compressBlock(cctx, compressedBuffer, ZSTD_compressBound(blockSize), CNBuffer, blockSize); + assert(blockCSize != 0); + if (ZSTD_isError(blockCSize)) goto _output_error; + compressed += blockCSize; + } } + DISPLAYLEVEL(3, "OK \n"); + + /* dictionary block compression */ + DISPLAYLEVEL(3, "test%3i : Dictionary Block compression test : ", testNb++); + CHECK( ZSTD_compressBegin_usingDict(cctx, CNBuffer, dictSize, 5) ); + CHECK_VAR(cSize, ZSTD_compressBlock(cctx, compressedBuffer, ZSTD_compressBound(blockSize), (char*)CNBuffer+dictSize, blockSize)); + RDG_genBuffer((char*)CNBuffer+dictSize+blockSize, blockSize, 0.0, 0.0, seed); /* create a non-compressible second block */ + { CHECK_NEWV(r, ZSTD_compressBlock(cctx, (char*)compressedBuffer+cSize, ZSTD_compressBound(blockSize), (char*)CNBuffer+dictSize+blockSize, blockSize) ); /* for cctx history consistency */ + assert(r == 0); /* non-compressible block */ } + memcpy((char*)compressedBuffer+cSize, (char*)CNBuffer+dictSize+blockSize, blockSize); /* send non-compressed block (without header) */ + CHECK_VAR(cSize2, ZSTD_compressBlock(cctx, (char*)compressedBuffer+cSize+blockSize, ZSTD_compressBound(blockSize), + (char*)CNBuffer+dictSize+2*blockSize, blockSize)); + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3i : Dictionary Block decompression test : ", testNb++); + CHECK( ZSTD_decompressBegin_usingDict(dctx, CNBuffer, dictSize) ); + { CHECK_NEWV( r, ZSTD_decompressBlock(dctx, decodedBuffer, blockSize, compressedBuffer, cSize) ); + if (r != blockSize) { + DISPLAYLEVEL(1, "ZSTD_decompressBlock() with _usingDict() fails : %u, instead of %u expected \n", (unsigned)r, (unsigned)blockSize); + goto _output_error; + } } + memcpy((char*)decodedBuffer+blockSize, (char*)compressedBuffer+cSize, blockSize); + ZSTD_insertBlock(dctx, (char*)decodedBuffer+blockSize, blockSize); /* insert non-compressed block into dctx history */ + { CHECK_NEWV( r, ZSTD_decompressBlock(dctx, (char*)decodedBuffer+2*blockSize, blockSize, (char*)compressedBuffer+cSize+blockSize, cSize2) ); + if (r != blockSize) { + DISPLAYLEVEL(1, "ZSTD_decompressBlock() with _usingDict() and after insertBlock() fails : %u, instead of %u expected \n", (unsigned)r, (unsigned)blockSize); + goto _output_error; + } } + assert(memcpy((char*)CNBuffer+dictSize, decodedBuffer, blockSize*3)); /* ensure regenerated content is identical to origin */ + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3i : Block compression with CDict : ", testNb++); + { ZSTD_CDict* const cdict = ZSTD_createCDict(CNBuffer, dictSize, 3); + if (cdict==NULL) goto _output_error; + CHECK( ZSTD_compressBegin_usingCDict(cctx, cdict) ); + CHECK( ZSTD_compressBlock(cctx, compressedBuffer, ZSTD_compressBound(blockSize), (char*)CNBuffer+dictSize, blockSize) ); + ZSTD_freeCDict(cdict); + } + DISPLAYLEVEL(3, "OK \n"); + + ZSTD_freeCCtx(cctx); + ZSTD_freeDCtx(dctx); + } + + /* long rle test */ + { size_t sampleSize = 0; + size_t expectedCompressedSize = 39; /* block 1, 2: compressed, block 3: RLE, zstd 1.4.4 */ + DISPLAYLEVEL(3, "test%3i : Long RLE test : ", testNb++); + memset((char*)CNBuffer+sampleSize, 'B', 256 KB - 1); + sampleSize += 256 KB - 1; + memset((char*)CNBuffer+sampleSize, 'A', 96 KB); + sampleSize += 96 KB; + cSize = ZSTD_compress(compressedBuffer, ZSTD_compressBound(sampleSize), CNBuffer, sampleSize, 1); + if (ZSTD_isError(cSize) || cSize > expectedCompressedSize) goto _output_error; + { CHECK_NEWV(regenSize, ZSTD_decompress(decodedBuffer, sampleSize, compressedBuffer, cSize)); + if (regenSize!=sampleSize) goto _output_error; } + DISPLAYLEVEL(3, "OK \n"); + } + + DISPLAYLEVEL(3, "test%3i : ZSTD_getSequences decode from sequences test : ", testNb++); + { + size_t srcSize = 100 KB; + BYTE* src = (BYTE*)CNBuffer; + BYTE* decoded = (BYTE*)compressedBuffer; + + ZSTD_CCtx* cctx = ZSTD_createCCtx(); + ZSTD_Sequence* seqs = (ZSTD_Sequence*)malloc(srcSize * sizeof(ZSTD_Sequence)); + size_t seqsSize; + + if (seqs == NULL) goto _output_error; + assert(cctx != NULL); + + /* Populate src with random data */ + RDG_genBuffer(CNBuffer, srcSize, compressibility, 0., seed); + + /* get the sequences */ + seqsSize = ZSTD_getSequences(cctx, seqs, srcSize, src, srcSize); + + /* "decode" and compare the sequences */ + FUZ_decodeSequences(decoded, seqs, seqsSize, src, srcSize); + assert(!memcmp(CNBuffer, compressedBuffer, srcSize)); + + ZSTD_freeCCtx(cctx); + free(seqs); + } + + /* Multiple blocks of zeros test */ + #define LONGZEROSLENGTH 1000000 /* 1MB of zeros */ + DISPLAYLEVEL(3, "test%3i : compress %u zeroes : ", testNb++, LONGZEROSLENGTH); + memset(CNBuffer, 0, LONGZEROSLENGTH); + CHECK_VAR(cSize, ZSTD_compress(compressedBuffer, ZSTD_compressBound(LONGZEROSLENGTH), CNBuffer, LONGZEROSLENGTH, 1) ); + DISPLAYLEVEL(3, "OK (%u bytes : %.2f%%)\n", (unsigned)cSize, (double)cSize/LONGZEROSLENGTH*100); + + DISPLAYLEVEL(3, "test%3i : decompress %u zeroes : ", testNb++, LONGZEROSLENGTH); + { CHECK_NEWV(r, ZSTD_decompress(decodedBuffer, LONGZEROSLENGTH, compressedBuffer, cSize) ); + if (r != LONGZEROSLENGTH) goto _output_error; } + DISPLAYLEVEL(3, "OK \n"); + + /* All zeroes test (test bug #137) */ + #define ZEROESLENGTH 100 + DISPLAYLEVEL(3, "test%3i : compress %u zeroes : ", testNb++, ZEROESLENGTH); + memset(CNBuffer, 0, ZEROESLENGTH); + CHECK_VAR(cSize, ZSTD_compress(compressedBuffer, ZSTD_compressBound(ZEROESLENGTH), CNBuffer, ZEROESLENGTH, 1) ); + DISPLAYLEVEL(3, "OK (%u bytes : %.2f%%)\n", (unsigned)cSize, (double)cSize/ZEROESLENGTH*100); + + DISPLAYLEVEL(3, "test%3i : decompress %u zeroes : ", testNb++, ZEROESLENGTH); + { CHECK_NEWV(r, ZSTD_decompress(decodedBuffer, ZEROESLENGTH, compressedBuffer, cSize) ); + if (r != ZEROESLENGTH) goto _output_error; } + DISPLAYLEVEL(3, "OK \n"); + + /* nbSeq limit test */ + #define _3BYTESTESTLENGTH 131000 + #define NB3BYTESSEQLOG 9 + #define NB3BYTESSEQ (1 << NB3BYTESSEQLOG) + #define NB3BYTESSEQMASK (NB3BYTESSEQ-1) + /* creates a buffer full of 3-bytes sequences */ + { BYTE _3BytesSeqs[NB3BYTESSEQ][3]; + U32 rSeed = 1; + + /* create batch of 3-bytes sequences */ + { int i; + for (i=0; i < NB3BYTESSEQ; i++) { + _3BytesSeqs[i][0] = (BYTE)(FUZ_rand(&rSeed) & 255); + _3BytesSeqs[i][1] = (BYTE)(FUZ_rand(&rSeed) & 255); + _3BytesSeqs[i][2] = (BYTE)(FUZ_rand(&rSeed) & 255); + } } + + /* randomly fills CNBuffer with prepared 3-bytes sequences */ + { int i; + for (i=0; i < _3BYTESTESTLENGTH; i += 3) { /* note : CNBuffer size > _3BYTESTESTLENGTH+3 */ + U32 const id = FUZ_rand(&rSeed) & NB3BYTESSEQMASK; + ((BYTE*)CNBuffer)[i+0] = _3BytesSeqs[id][0]; + ((BYTE*)CNBuffer)[i+1] = _3BytesSeqs[id][1]; + ((BYTE*)CNBuffer)[i+2] = _3BytesSeqs[id][2]; + } } } + DISPLAYLEVEL(3, "test%3i : growing nbSeq : ", testNb++); + { ZSTD_CCtx* const cctx = ZSTD_createCCtx(); + size_t const maxNbSeq = _3BYTESTESTLENGTH / 3; + size_t const bound = ZSTD_compressBound(_3BYTESTESTLENGTH); + size_t nbSeq = 1; + while (nbSeq <= maxNbSeq) { + CHECK(ZSTD_compressCCtx(cctx, compressedBuffer, bound, CNBuffer, nbSeq * 3, 19)); + /* Check every sequence for the first 100, then skip more rapidly. */ + if (nbSeq < 100) { + ++nbSeq; + } else { + nbSeq += (nbSeq >> 2); + } + } + ZSTD_freeCCtx(cctx); + } + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3i : compress lots 3-bytes sequences : ", testNb++); + CHECK_VAR(cSize, ZSTD_compress(compressedBuffer, ZSTD_compressBound(_3BYTESTESTLENGTH), + CNBuffer, _3BYTESTESTLENGTH, 19) ); + DISPLAYLEVEL(3, "OK (%u bytes : %.2f%%)\n", (unsigned)cSize, (double)cSize/_3BYTESTESTLENGTH*100); + + DISPLAYLEVEL(3, "test%3i : decompress lots 3-bytes sequence : ", testNb++); + { CHECK_NEWV(r, ZSTD_decompress(decodedBuffer, _3BYTESTESTLENGTH, compressedBuffer, cSize) ); + if (r != _3BYTESTESTLENGTH) goto _output_error; } + DISPLAYLEVEL(3, "OK \n"); + + + DISPLAYLEVEL(3, "test%3i : growing literals buffer : ", testNb++); + RDG_genBuffer(CNBuffer, CNBuffSize, 0.0, 0.1, seed); + { ZSTD_CCtx* const cctx = ZSTD_createCCtx(); + size_t const bound = ZSTD_compressBound(CNBuffSize); + size_t size = 1; + while (size <= CNBuffSize) { + CHECK(ZSTD_compressCCtx(cctx, compressedBuffer, bound, CNBuffer, size, 3)); + /* Check every size for the first 100, then skip more rapidly. */ + if (size < 100) { + ++size; + } else { + size += (size >> 2); + } + } + ZSTD_freeCCtx(cctx); + } + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3i : incompressible data and ill suited dictionary : ", testNb++); + { /* Train a dictionary on low characters */ + size_t dictSize = 16 KB; + void* const dictBuffer = malloc(dictSize); + size_t const totalSampleSize = 1 MB; + size_t const sampleUnitSize = 8 KB; + U32 const nbSamples = (U32)(totalSampleSize / sampleUnitSize); + size_t* const samplesSizes = (size_t*) malloc(nbSamples * sizeof(size_t)); + if (!dictBuffer || !samplesSizes) goto _output_error; + { U32 u; for (u=0; u<nbSamples; u++) samplesSizes[u] = sampleUnitSize; } + dictSize = ZDICT_trainFromBuffer(dictBuffer, dictSize, CNBuffer, samplesSizes, nbSamples); + if (ZDICT_isError(dictSize)) goto _output_error; + /* Reverse the characters to make the dictionary ill suited */ + { U32 u; + for (u = 0; u < CNBuffSize; ++u) { + ((BYTE*)CNBuffer)[u] = 255 - ((BYTE*)CNBuffer)[u]; + } + } + { /* Compress the data */ + size_t const inputSize = 500; + size_t const outputSize = ZSTD_compressBound(inputSize); + void* const outputBuffer = malloc(outputSize); + ZSTD_CCtx* const cctx = ZSTD_createCCtx(); + if (!outputBuffer || !cctx) goto _output_error; + CHECK(ZSTD_compress_usingDict(cctx, outputBuffer, outputSize, CNBuffer, inputSize, dictBuffer, dictSize, 1)); + free(outputBuffer); + ZSTD_freeCCtx(cctx); + } + + free(dictBuffer); + free(samplesSizes); + } + DISPLAYLEVEL(3, "OK \n"); + + + /* findFrameCompressedSize on skippable frames */ + DISPLAYLEVEL(3, "test%3i : frame compressed size of skippable frame : ", testNb++); + { const char* frame = "\x50\x2a\x4d\x18\x05\x0\x0\0abcde"; + size_t const frameSrcSize = 13; + if (ZSTD_findFrameCompressedSize(frame, frameSrcSize) != frameSrcSize) goto _output_error; } + DISPLAYLEVEL(3, "OK \n"); + + /* error string tests */ + DISPLAYLEVEL(3, "test%3i : testing ZSTD error code strings : ", testNb++); + if (strcmp("No error detected", ZSTD_getErrorName((ZSTD_ErrorCode)(0-ZSTD_error_no_error))) != 0) goto _output_error; + if (strcmp("No error detected", ZSTD_getErrorString(ZSTD_error_no_error)) != 0) goto _output_error; + if (strcmp("Unspecified error code", ZSTD_getErrorString((ZSTD_ErrorCode)(0-ZSTD_error_GENERIC))) != 0) goto _output_error; + if (strcmp("Error (generic)", ZSTD_getErrorName((size_t)0-ZSTD_error_GENERIC)) != 0) goto _output_error; + if (strcmp("Error (generic)", ZSTD_getErrorString(ZSTD_error_GENERIC)) != 0) goto _output_error; + if (strcmp("No error detected", ZSTD_getErrorName(ZSTD_error_GENERIC)) != 0) goto _output_error; + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3i : testing ZSTD dictionary sizes : ", testNb++); + RDG_genBuffer(CNBuffer, CNBuffSize, compressibility, 0., seed); + { + size_t const size = MIN(128 KB, CNBuffSize); + ZSTD_CCtx* const cctx = ZSTD_createCCtx(); + ZSTD_CDict* const lgCDict = ZSTD_createCDict(CNBuffer, size, 1); + ZSTD_CDict* const smCDict = ZSTD_createCDict(CNBuffer, 1 KB, 1); + ZSTD_frameHeader lgHeader; + ZSTD_frameHeader smHeader; + + CHECK_Z(ZSTD_compress_usingCDict(cctx, compressedBuffer, compressedBufferSize, CNBuffer, size, lgCDict)); + CHECK_Z(ZSTD_getFrameHeader(&lgHeader, compressedBuffer, compressedBufferSize)); + CHECK_Z(ZSTD_compress_usingCDict(cctx, compressedBuffer, compressedBufferSize, CNBuffer, size, smCDict)); + CHECK_Z(ZSTD_getFrameHeader(&smHeader, compressedBuffer, compressedBufferSize)); + + if (lgHeader.windowSize != smHeader.windowSize) goto _output_error; + + ZSTD_freeCDict(smCDict); + ZSTD_freeCDict(lgCDict); + ZSTD_freeCCtx(cctx); + } + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3i : testing FSE_normalizeCount() PR#1255: ", testNb++); + { + short norm[32]; + unsigned count[32]; + unsigned const tableLog = 5; + size_t const nbSeq = 32; + unsigned const maxSymbolValue = 31; + size_t i; + + for (i = 0; i < 32; ++i) + count[i] = 1; + /* Calling FSE_normalizeCount() on a uniform distribution should not + * cause a division by zero. + */ + FSE_normalizeCount(norm, tableLog, count, nbSeq, maxSymbolValue); + } + DISPLAYLEVEL(3, "OK \n"); +#ifdef ZSTD_MULTITHREAD + DISPLAYLEVEL(3, "test%3i : passing wrong full dict should fail on compressStream2 refPrefix ", testNb++); + { + ZSTD_CCtx* cctx = ZSTD_createCCtx(); + /* A little more than ZSTDMT_JOBSIZE_MIN */ + size_t const srcSize = 1 MB + 5; + size_t const dstSize = ZSTD_compressBound(srcSize); + void* const src = CNBuffer; + void* const dst = compressedBuffer; + void* dict = (void*)malloc(srcSize); + + RDG_genBuffer(src, srcSize, compressibility, 0.5, seed); + RDG_genBuffer(dict, srcSize, compressibility, 0., seed); + + /* Make sure there is no ZSTD_MAGIC_NUMBER */ + memset(dict, 0, sizeof(U32)); + + /* something more than 1 */ + CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_nbWorkers, 2)); + /* lie and claim this is a full dict */ + CHECK_Z(ZSTD_CCtx_refPrefix_advanced(cctx, dict, srcSize, ZSTD_dct_fullDict)); + + { + ZSTD_outBuffer out = {dst, dstSize, 0}; + ZSTD_inBuffer in = {src, srcSize, 0}; + + /* should fail because its not a full dict like we said it was */ + assert(ZSTD_isError(ZSTD_compressStream2(cctx, &out, &in, ZSTD_e_flush))); + } + + ZSTD_freeCCtx(cctx); + free(dict); + } + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3i : small dictionary with multithreading and LDM ", testNb++); + { + ZSTD_CCtx* cctx = ZSTD_createCCtx(); + /* A little more than ZSTDMT_JOBSIZE_MIN */ + size_t const srcSize = 1 MB + 5; + size_t const dictSize = 10; + size_t const dstSize = ZSTD_compressBound(srcSize); + void* const src = CNBuffer; + void* const dst = compressedBuffer; + void* dict = (void*)malloc(dictSize); + + RDG_genBuffer(src, srcSize, compressibility, 0.5, seed); + RDG_genBuffer(dict, dictSize, compressibility, 0., seed); + + /* Make sure there is no ZSTD_MAGIC_NUMBER */ + memset(dict, 0, sizeof(U32)); + + /* Enable MT, LDM, and use refPrefix() for a small dict */ + CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_nbWorkers, 2)); + CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_enableLongDistanceMatching, 1)); + CHECK_Z(ZSTD_CCtx_refPrefix(cctx, dict, dictSize)); + + CHECK_Z(ZSTD_compress2(cctx, dst, dstSize, src, srcSize)); + + ZSTD_freeCCtx(cctx); + free(dict); + } + DISPLAYLEVEL(3, "OK \n"); +#endif + + /* note : this test is rather long, it would be great to find a way to speed up its execution */ + DISPLAYLEVEL(3, "test%3i : table cleanliness through index reduction : ", testNb++); + { + int cLevel; + size_t approxIndex = 0; + size_t maxIndex = ((3U << 29) + (1U << ZSTD_WINDOWLOG_MAX)); /* ZSTD_CURRENT_MAX from zstd_compress_internal.h */ + + /* Provision enough space in a static context so that we can do all + * this without ever reallocating, which would reset the indices. */ + size_t const staticCCtxSize = ZSTD_estimateCStreamSize(22); + void* const staticCCtxBuffer = malloc(staticCCtxSize); + ZSTD_CCtx* const cctx = ZSTD_initStaticCCtx(staticCCtxBuffer, staticCCtxSize); + + /* bump the indices so the following compressions happen at high + * indices. */ + { ZSTD_outBuffer out = { compressedBuffer, compressedBufferSize, 0 }; + ZSTD_inBuffer in = { CNBuffer, CNBuffSize, 0 }; + ZSTD_CCtx_reset(cctx, ZSTD_reset_session_and_parameters); + CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, -500)); + while (approxIndex <= (maxIndex / 4) * 3) { + CHECK_Z(ZSTD_compressStream2(cctx, &out, &in, ZSTD_e_flush)); + approxIndex += in.pos; + CHECK(in.pos == in.size); + in.pos = 0; + out.pos = 0; + } + CHECK_Z(ZSTD_compressStream2(cctx, &out, &in, ZSTD_e_end)); + } + + /* spew a bunch of stuff into the table area */ + for (cLevel = 1; cLevel <= 22; cLevel++) { + ZSTD_outBuffer out = { compressedBuffer, compressedBufferSize / (unsigned)cLevel, 0 }; + ZSTD_inBuffer in = { CNBuffer, CNBuffSize, 0 }; + ZSTD_CCtx_reset(cctx, ZSTD_reset_session_and_parameters); + CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, cLevel)); + CHECK_Z(ZSTD_compressStream2(cctx, &out, &in, ZSTD_e_flush)); + CHECK_Z(ZSTD_compressStream2(cctx, &out, &in, ZSTD_e_end)); + approxIndex += in.pos; + } + + /* now crank the indices so we overflow */ + { ZSTD_outBuffer out = { compressedBuffer, compressedBufferSize, 0 }; + ZSTD_inBuffer in = { CNBuffer, CNBuffSize, 0 }; + ZSTD_CCtx_reset(cctx, ZSTD_reset_session_and_parameters); + CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, -500)); + while (approxIndex <= maxIndex) { + CHECK_Z(ZSTD_compressStream2(cctx, &out, &in, ZSTD_e_flush)); + approxIndex += in.pos; + CHECK(in.pos == in.size); + in.pos = 0; + out.pos = 0; + } + CHECK_Z(ZSTD_compressStream2(cctx, &out, &in, ZSTD_e_end)); + } + + /* do a bunch of compressions again in low indices and ensure we don't + * hit untracked invalid indices */ + for (cLevel = 1; cLevel <= 22; cLevel++) { + ZSTD_outBuffer out = { compressedBuffer, compressedBufferSize / (unsigned)cLevel, 0 }; + ZSTD_inBuffer in = { CNBuffer, CNBuffSize, 0 }; + ZSTD_CCtx_reset(cctx, ZSTD_reset_session_and_parameters); + CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, cLevel)); + CHECK_Z(ZSTD_compressStream2(cctx, &out, &in, ZSTD_e_flush)); + CHECK_Z(ZSTD_compressStream2(cctx, &out, &in, ZSTD_e_end)); + approxIndex += in.pos; + } + + free(staticCCtxBuffer); + } + DISPLAYLEVEL(3, "OK \n"); + +_end: + free(CNBuffer); + free(compressedBuffer); + free(decodedBuffer); + return testResult; + +_output_error: + testResult = 1; + DISPLAY("Error detected in Unit tests ! \n"); + goto _end; +} + + +static size_t findDiff(const void* buf1, const void* buf2, size_t max) +{ + const BYTE* b1 = (const BYTE*)buf1; + const BYTE* b2 = (const BYTE*)buf2; + size_t u; + for (u=0; u<max; u++) { + if (b1[u] != b2[u]) break; + } + return u; +} + + +static ZSTD_parameters FUZ_makeParams(ZSTD_compressionParameters cParams, ZSTD_frameParameters fParams) +{ + ZSTD_parameters params; + params.cParams = cParams; + params.fParams = fParams; + return params; +} + +static size_t FUZ_rLogLength(U32* seed, U32 logLength) +{ + size_t const lengthMask = ((size_t)1 << logLength) - 1; + return (lengthMask+1) + (FUZ_rand(seed) & lengthMask); +} + +static size_t FUZ_randomLength(U32* seed, U32 maxLog) +{ + U32 const logLength = FUZ_rand(seed) % maxLog; + return FUZ_rLogLength(seed, logLength); +} + +#undef CHECK +#define CHECK(cond, ...) { \ + if (cond) { \ + DISPLAY("Error => "); \ + DISPLAY(__VA_ARGS__); \ + DISPLAY(" (seed %u, test nb %u) \n", (unsigned)seed, testNb); \ + goto _output_error; \ +} } + +#undef CHECK_Z +#define CHECK_Z(f) { \ + size_t const err = f; \ + if (ZSTD_isError(err)) { \ + DISPLAY("Error => %s : %s ", \ + #f, ZSTD_getErrorName(err)); \ + DISPLAY(" (seed %u, test nb %u) \n", (unsigned)seed, testNb); \ + goto _output_error; \ +} } + + +static int fuzzerTests(U32 seed, unsigned nbTests, unsigned startTest, U32 const maxDurationS, double compressibility, int bigTests) +{ + static const U32 maxSrcLog = 23; + static const U32 maxSampleLog = 22; + size_t const srcBufferSize = (size_t)1<<maxSrcLog; + size_t const dstBufferSize = (size_t)1<<maxSampleLog; + size_t const cBufferSize = ZSTD_compressBound(dstBufferSize); + BYTE* cNoiseBuffer[5]; + BYTE* const cBuffer = (BYTE*) malloc (cBufferSize); + BYTE* const dstBuffer = (BYTE*) malloc (dstBufferSize); + BYTE* const mirrorBuffer = (BYTE*) malloc (dstBufferSize); + ZSTD_CCtx* const refCtx = ZSTD_createCCtx(); + ZSTD_CCtx* const ctx = ZSTD_createCCtx(); + ZSTD_DCtx* const dctx = ZSTD_createDCtx(); + U32 result = 0; + unsigned testNb = 0; + U32 coreSeed = seed; + UTIL_time_t const startClock = UTIL_getTime(); + U64 const maxClockSpan = maxDurationS * SEC_TO_MICRO; + int const cLevelLimiter = bigTests ? 3 : 2; + + /* allocation */ + cNoiseBuffer[0] = (BYTE*)malloc (srcBufferSize); + cNoiseBuffer[1] = (BYTE*)malloc (srcBufferSize); + cNoiseBuffer[2] = (BYTE*)malloc (srcBufferSize); + cNoiseBuffer[3] = (BYTE*)malloc (srcBufferSize); + cNoiseBuffer[4] = (BYTE*)malloc (srcBufferSize); + CHECK (!cNoiseBuffer[0] || !cNoiseBuffer[1] || !cNoiseBuffer[2] || !cNoiseBuffer[3] || !cNoiseBuffer[4] + || !dstBuffer || !mirrorBuffer || !cBuffer || !refCtx || !ctx || !dctx, + "Not enough memory, fuzzer tests cancelled"); + + /* Create initial samples */ + RDG_genBuffer(cNoiseBuffer[0], srcBufferSize, 0.00, 0., coreSeed); /* pure noise */ + RDG_genBuffer(cNoiseBuffer[1], srcBufferSize, 0.05, 0., coreSeed); /* barely compressible */ + RDG_genBuffer(cNoiseBuffer[2], srcBufferSize, compressibility, 0., coreSeed); + RDG_genBuffer(cNoiseBuffer[3], srcBufferSize, 0.95, 0., coreSeed); /* highly compressible */ + RDG_genBuffer(cNoiseBuffer[4], srcBufferSize, 1.00, 0., coreSeed); /* sparse content */ + + /* catch up testNb */ + for (testNb=1; testNb < startTest; testNb++) FUZ_rand(&coreSeed); + + /* main test loop */ + for ( ; (testNb <= nbTests) || (UTIL_clockSpanMicro(startClock) < maxClockSpan); testNb++ ) { + BYTE* srcBuffer; /* jumping pointer */ + U32 lseed; + size_t sampleSize, maxTestSize, totalTestSize; + size_t cSize, totalCSize, totalGenSize; + U64 crcOrig; + BYTE* sampleBuffer; + const BYTE* dict; + size_t dictSize; + + /* notification */ + if (nbTests >= testNb) { DISPLAYUPDATE(2, "\r%6u/%6u ", testNb, nbTests); } + else { DISPLAYUPDATE(2, "\r%6u ", testNb); } + + FUZ_rand(&coreSeed); + { U32 const prime1 = 2654435761U; lseed = coreSeed ^ prime1; } + + /* srcBuffer selection [0-4] */ + { U32 buffNb = FUZ_rand(&lseed) & 0x7F; + if (buffNb & 7) buffNb=2; /* most common : compressible (P) */ + else { + buffNb >>= 3; + if (buffNb & 7) { + const U32 tnb[2] = { 1, 3 }; /* barely/highly compressible */ + buffNb = tnb[buffNb >> 3]; + } else { + const U32 tnb[2] = { 0, 4 }; /* not compressible / sparse */ + buffNb = tnb[buffNb >> 3]; + } } + srcBuffer = cNoiseBuffer[buffNb]; + } + + /* select src segment */ + sampleSize = FUZ_randomLength(&lseed, maxSampleLog); + + /* create sample buffer (to catch read error with valgrind & sanitizers) */ + sampleBuffer = (BYTE*)malloc(sampleSize); + CHECK(sampleBuffer==NULL, "not enough memory for sample buffer"); + { size_t const sampleStart = FUZ_rand(&lseed) % (srcBufferSize - sampleSize); + memcpy(sampleBuffer, srcBuffer + sampleStart, sampleSize); } + crcOrig = XXH64(sampleBuffer, sampleSize, 0); + + /* compression tests */ + { int const cLevelPositive = (int) + ( FUZ_rand(&lseed) % + ((U32)ZSTD_maxCLevel() - (FUZ_highbit32((U32)sampleSize) / (U32)cLevelLimiter)) ) + + 1; + int const cLevel = ((FUZ_rand(&lseed) & 15) == 3) ? + - (int)((FUZ_rand(&lseed) & 7) + 1) : /* test negative cLevel */ + cLevelPositive; + DISPLAYLEVEL(5, "fuzzer t%u: Simple compression test (level %i) \n", testNb, cLevel); + cSize = ZSTD_compressCCtx(ctx, cBuffer, cBufferSize, sampleBuffer, sampleSize, cLevel); + CHECK(ZSTD_isError(cSize), "ZSTD_compressCCtx failed : %s", ZSTD_getErrorName(cSize)); + + /* compression failure test : too small dest buffer */ + assert(cSize > 3); + { const size_t missing = (FUZ_rand(&lseed) % (cSize-2)) + 1; + const size_t tooSmallSize = cSize - missing; + const unsigned endMark = 0x4DC2B1A9; + memcpy(dstBuffer+tooSmallSize, &endMark, sizeof(endMark)); + DISPLAYLEVEL(5, "fuzzer t%u: compress into too small buffer of size %u (missing %u bytes) \n", + testNb, (unsigned)tooSmallSize, (unsigned)missing); + { size_t const errorCode = ZSTD_compressCCtx(ctx, dstBuffer, tooSmallSize, sampleBuffer, sampleSize, cLevel); + CHECK(!ZSTD_isError(errorCode), "ZSTD_compressCCtx should have failed ! (buffer too small : %u < %u)", (unsigned)tooSmallSize, (unsigned)cSize); } + { unsigned endCheck; memcpy(&endCheck, dstBuffer+tooSmallSize, sizeof(endCheck)); + CHECK(endCheck != endMark, "ZSTD_compressCCtx : dst buffer overflow (check.%08X != %08X.mark)", endCheck, endMark); } + } } + + /* frame header decompression test */ + { ZSTD_frameHeader zfh; + CHECK_Z( ZSTD_getFrameHeader(&zfh, cBuffer, cSize) ); + CHECK(zfh.frameContentSize != sampleSize, "Frame content size incorrect"); + } + + /* Decompressed size test */ + { unsigned long long const rSize = ZSTD_findDecompressedSize(cBuffer, cSize); + CHECK(rSize != sampleSize, "decompressed size incorrect"); + } + + /* successful decompression test */ + DISPLAYLEVEL(5, "fuzzer t%u: simple decompression test \n", testNb); + { size_t const margin = (FUZ_rand(&lseed) & 1) ? 0 : (FUZ_rand(&lseed) & 31) + 1; + size_t const dSize = ZSTD_decompress(dstBuffer, sampleSize + margin, cBuffer, cSize); + CHECK(dSize != sampleSize, "ZSTD_decompress failed (%s) (srcSize : %u ; cSize : %u)", ZSTD_getErrorName(dSize), (unsigned)sampleSize, (unsigned)cSize); + { U64 const crcDest = XXH64(dstBuffer, sampleSize, 0); + CHECK(crcOrig != crcDest, "decompression result corrupted (pos %u / %u)", (unsigned)findDiff(sampleBuffer, dstBuffer, sampleSize), (unsigned)sampleSize); + } } + + free(sampleBuffer); /* no longer useful after this point */ + + /* truncated src decompression test */ + DISPLAYLEVEL(5, "fuzzer t%u: decompression of truncated source \n", testNb); + { size_t const missing = (FUZ_rand(&lseed) % (cSize-2)) + 1; /* no problem, as cSize > 4 (frameHeaderSizer) */ + size_t const tooSmallSize = cSize - missing; + void* cBufferTooSmall = malloc(tooSmallSize); /* valgrind will catch read overflows */ + CHECK(cBufferTooSmall == NULL, "not enough memory !"); + memcpy(cBufferTooSmall, cBuffer, tooSmallSize); + { size_t const errorCode = ZSTD_decompress(dstBuffer, dstBufferSize, cBufferTooSmall, tooSmallSize); + CHECK(!ZSTD_isError(errorCode), "ZSTD_decompress should have failed ! (truncated src buffer)"); } + free(cBufferTooSmall); + } + + /* too small dst decompression test */ + DISPLAYLEVEL(5, "fuzzer t%u: decompress into too small dst buffer \n", testNb); + if (sampleSize > 3) { + size_t const missing = (FUZ_rand(&lseed) % (sampleSize-2)) + 1; /* no problem, as cSize > 4 (frameHeaderSizer) */ + size_t const tooSmallSize = sampleSize - missing; + static const BYTE token = 0xA9; + dstBuffer[tooSmallSize] = token; + { size_t const errorCode = ZSTD_decompress(dstBuffer, tooSmallSize, cBuffer, cSize); + CHECK(!ZSTD_isError(errorCode), "ZSTD_decompress should have failed : %u > %u (dst buffer too small)", (unsigned)errorCode, (unsigned)tooSmallSize); } + CHECK(dstBuffer[tooSmallSize] != token, "ZSTD_decompress : dst buffer overflow"); + } + + /* noisy src decompression test */ + if (cSize > 6) { + /* insert noise into src */ + { U32 const maxNbBits = FUZ_highbit32((U32)(cSize-4)); + size_t pos = 4; /* preserve magic number (too easy to detect) */ + for (;;) { + /* keep some original src */ + { U32 const nbBits = FUZ_rand(&lseed) % maxNbBits; + size_t const mask = (1<<nbBits) - 1; + size_t const skipLength = FUZ_rand(&lseed) & mask; + pos += skipLength; + } + if (pos >= cSize) break; + /* add noise */ + { U32 const nbBitsCodes = FUZ_rand(&lseed) % maxNbBits; + U32 const nbBits = nbBitsCodes ? nbBitsCodes-1 : 0; + size_t const mask = (1<<nbBits) - 1; + size_t const rNoiseLength = (FUZ_rand(&lseed) & mask) + 1; + size_t const noiseLength = MIN(rNoiseLength, cSize-pos); + size_t const noiseStart = FUZ_rand(&lseed) % (srcBufferSize - noiseLength); + memcpy(cBuffer + pos, srcBuffer + noiseStart, noiseLength); + pos += noiseLength; + } } } + + /* decompress noisy source */ + DISPLAYLEVEL(5, "fuzzer t%u: decompress noisy source \n", testNb); + { U32 const endMark = 0xA9B1C3D6; + memcpy(dstBuffer+sampleSize, &endMark, 4); + { size_t const decompressResult = ZSTD_decompress(dstBuffer, sampleSize, cBuffer, cSize); + /* result *may* be an unlikely success, but even then, it must strictly respect dst buffer boundaries */ + CHECK((!ZSTD_isError(decompressResult)) && (decompressResult>sampleSize), + "ZSTD_decompress on noisy src : result is too large : %u > %u (dst buffer)", (unsigned)decompressResult, (unsigned)sampleSize); + } + { U32 endCheck; memcpy(&endCheck, dstBuffer+sampleSize, 4); + CHECK(endMark!=endCheck, "ZSTD_decompress on noisy src : dst buffer overflow"); + } } } /* noisy src decompression test */ + + /*===== Bufferless streaming compression test, scattered segments and dictionary =====*/ + DISPLAYLEVEL(5, "fuzzer t%u: Bufferless streaming compression test \n", testNb); + { U32 const testLog = FUZ_rand(&lseed) % maxSrcLog; + U32 const dictLog = FUZ_rand(&lseed) % maxSrcLog; + int const cLevel = (FUZ_rand(&lseed) % + (ZSTD_maxCLevel() - + (MAX(testLog, dictLog) / cLevelLimiter))) + + 1; + maxTestSize = FUZ_rLogLength(&lseed, testLog); + if (maxTestSize >= dstBufferSize) maxTestSize = dstBufferSize-1; + + dictSize = FUZ_rLogLength(&lseed, dictLog); /* needed also for decompression */ + dict = srcBuffer + (FUZ_rand(&lseed) % (srcBufferSize - dictSize)); + + DISPLAYLEVEL(6, "fuzzer t%u: Compressing up to <=%u bytes at level %i with dictionary size %u \n", + testNb, (unsigned)maxTestSize, cLevel, (unsigned)dictSize); + + if (FUZ_rand(&lseed) & 0xF) { + CHECK_Z ( ZSTD_compressBegin_usingDict(refCtx, dict, dictSize, cLevel) ); + } else { + ZSTD_compressionParameters const cPar = ZSTD_getCParams(cLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize); + ZSTD_frameParameters const fPar = { FUZ_rand(&lseed)&1 /* contentSizeFlag */, + !(FUZ_rand(&lseed)&3) /* contentChecksumFlag*/, + 0 /*NodictID*/ }; /* note : since dictionary is fake, dictIDflag has no impact */ + ZSTD_parameters const p = FUZ_makeParams(cPar, fPar); + CHECK_Z ( ZSTD_compressBegin_advanced(refCtx, dict, dictSize, p, 0) ); + } + CHECK_Z( ZSTD_copyCCtx(ctx, refCtx, 0) ); + } + + { U32 const nbChunks = (FUZ_rand(&lseed) & 127) + 2; + U32 n; + XXH64_state_t xxhState; + XXH64_reset(&xxhState, 0); + for (totalTestSize=0, cSize=0, n=0 ; n<nbChunks ; n++) { + size_t const segmentSize = FUZ_randomLength(&lseed, maxSampleLog); + size_t const segmentStart = FUZ_rand(&lseed) % (srcBufferSize - segmentSize); + + if (cBufferSize-cSize < ZSTD_compressBound(segmentSize)) break; /* avoid invalid dstBufferTooSmall */ + if (totalTestSize+segmentSize > maxTestSize) break; + + { size_t const compressResult = ZSTD_compressContinue(ctx, cBuffer+cSize, cBufferSize-cSize, srcBuffer+segmentStart, segmentSize); + CHECK (ZSTD_isError(compressResult), "multi-segments compression error : %s", ZSTD_getErrorName(compressResult)); + cSize += compressResult; + } + XXH64_update(&xxhState, srcBuffer+segmentStart, segmentSize); + memcpy(mirrorBuffer + totalTestSize, srcBuffer+segmentStart, segmentSize); + totalTestSize += segmentSize; + } + + { size_t const flushResult = ZSTD_compressEnd(ctx, cBuffer+cSize, cBufferSize-cSize, NULL, 0); + CHECK (ZSTD_isError(flushResult), "multi-segments epilogue error : %s", ZSTD_getErrorName(flushResult)); + cSize += flushResult; + } + crcOrig = XXH64_digest(&xxhState); + } + + /* streaming decompression test */ + DISPLAYLEVEL(5, "fuzzer t%u: Bufferless streaming decompression test \n", testNb); + /* ensure memory requirement is good enough (should always be true) */ + { ZSTD_frameHeader zfh; + CHECK( ZSTD_getFrameHeader(&zfh, cBuffer, ZSTD_FRAMEHEADERSIZE_MAX), + "ZSTD_getFrameHeader(): error retrieving frame information"); + { size_t const roundBuffSize = ZSTD_decodingBufferSize_min(zfh.windowSize, zfh.frameContentSize); + CHECK_Z(roundBuffSize); + CHECK((roundBuffSize > totalTestSize) && (zfh.frameContentSize!=ZSTD_CONTENTSIZE_UNKNOWN), + "ZSTD_decodingBufferSize_min() requires more memory (%u) than necessary (%u)", + (unsigned)roundBuffSize, (unsigned)totalTestSize ); + } } + if (dictSize<8) dictSize=0, dict=NULL; /* disable dictionary */ + CHECK_Z( ZSTD_decompressBegin_usingDict(dctx, dict, dictSize) ); + totalCSize = 0; + totalGenSize = 0; + while (totalCSize < cSize) { + size_t const inSize = ZSTD_nextSrcSizeToDecompress(dctx); + size_t const genSize = ZSTD_decompressContinue(dctx, dstBuffer+totalGenSize, dstBufferSize-totalGenSize, cBuffer+totalCSize, inSize); + CHECK (ZSTD_isError(genSize), "ZSTD_decompressContinue error : %s", ZSTD_getErrorName(genSize)); + totalGenSize += genSize; + totalCSize += inSize; + } + CHECK (ZSTD_nextSrcSizeToDecompress(dctx) != 0, "frame not fully decoded"); + CHECK (totalGenSize != totalTestSize, "streaming decompressed data : wrong size") + CHECK (totalCSize != cSize, "compressed data should be fully read") + { U64 const crcDest = XXH64(dstBuffer, totalTestSize, 0); + CHECK(crcOrig != crcDest, "streaming decompressed data corrupted (pos %u / %u)", + (unsigned)findDiff(mirrorBuffer, dstBuffer, totalTestSize), (unsigned)totalTestSize); + } + } /* for ( ; (testNb <= nbTests) */ + DISPLAY("\r%u fuzzer tests completed \n", testNb-1); + +_cleanup: + ZSTD_freeCCtx(refCtx); + ZSTD_freeCCtx(ctx); + ZSTD_freeDCtx(dctx); + free(cNoiseBuffer[0]); + free(cNoiseBuffer[1]); + free(cNoiseBuffer[2]); + free(cNoiseBuffer[3]); + free(cNoiseBuffer[4]); + free(cBuffer); + free(dstBuffer); + free(mirrorBuffer); + return result; + +_output_error: + result = 1; + goto _cleanup; +} + + +/*_******************************************************* +* Command line +*********************************************************/ +static int FUZ_usage(const char* programName) +{ + DISPLAY( "Usage :\n"); + DISPLAY( " %s [args]\n", programName); + DISPLAY( "\n"); + DISPLAY( "Arguments :\n"); + DISPLAY( " -i# : Number of tests (default:%i)\n", nbTestsDefault); + DISPLAY( " -T# : Max duration to run for. Overrides number of tests. (e.g. -T1m or -T60s for one minute)\n"); + DISPLAY( " -s# : Select seed (default:prompt user)\n"); + DISPLAY( " -t# : Select starting test number (default:0)\n"); + DISPLAY( " -P# : Select compressibility in %% (default:%i%%)\n", FUZ_compressibility_default); + DISPLAY( " -v : verbose\n"); + DISPLAY( " -p : pause at the end\n"); + DISPLAY( " -h : display help and exit\n"); + return 0; +} + +/*! readU32FromChar() : + @return : unsigned integer value read from input in `char` format + allows and interprets K, KB, KiB, M, MB and MiB suffix. + Will also modify `*stringPtr`, advancing it to position where it stopped reading. + Note : function result can overflow if digit string > MAX_UINT */ +static unsigned readU32FromChar(const char** stringPtr) +{ + unsigned result = 0; + while ((**stringPtr >='0') && (**stringPtr <='9')) + result *= 10, result += **stringPtr - '0', (*stringPtr)++ ; + if ((**stringPtr=='K') || (**stringPtr=='M')) { + result <<= 10; + if (**stringPtr=='M') result <<= 10; + (*stringPtr)++ ; + if (**stringPtr=='i') (*stringPtr)++; + if (**stringPtr=='B') (*stringPtr)++; + } + return result; +} + +/** longCommandWArg() : + * check if *stringPtr is the same as longCommand. + * If yes, @return 1 and advances *stringPtr to the position which immediately follows longCommand. + * @return 0 and doesn't modify *stringPtr otherwise. + */ +static int longCommandWArg(const char** stringPtr, const char* longCommand) +{ + size_t const comSize = strlen(longCommand); + int const result = !strncmp(*stringPtr, longCommand, comSize); + if (result) *stringPtr += comSize; + return result; +} + +int main(int argc, const char** argv) +{ + U32 seed = 0; + int seedset = 0; + int argNb; + int nbTests = nbTestsDefault; + int testNb = 0; + int proba = FUZ_compressibility_default; + int result = 0; + U32 mainPause = 0; + U32 maxDuration = 0; + int bigTests = 1; + U32 memTestsOnly = 0; + const char* const programName = argv[0]; + + /* Check command line */ + for (argNb=1; argNb<argc; argNb++) { + const char* argument = argv[argNb]; + if(!argument) continue; /* Protection if argument empty */ + + /* Handle commands. Aggregated commands are allowed */ + if (argument[0]=='-') { + + if (longCommandWArg(&argument, "--memtest=")) { memTestsOnly = readU32FromChar(&argument); continue; } + + if (!strcmp(argument, "--memtest")) { memTestsOnly=1; continue; } + if (!strcmp(argument, "--no-big-tests")) { bigTests=0; continue; } + + argument++; + while (*argument!=0) { + switch(*argument) + { + case 'h': + return FUZ_usage(programName); + + case 'v': + argument++; + g_displayLevel++; + break; + + case 'q': + argument++; + g_displayLevel--; + break; + + case 'p': /* pause at the end */ + argument++; + mainPause = 1; + break; + + case 'i': + argument++; maxDuration = 0; + nbTests = (int)readU32FromChar(&argument); + break; + + case 'T': + argument++; + nbTests = 0; + maxDuration = readU32FromChar(&argument); + if (*argument=='s') argument++; /* seconds */ + if (*argument=='m') maxDuration *= 60, argument++; /* minutes */ + if (*argument=='n') argument++; + break; + + case 's': + argument++; + seedset = 1; + seed = readU32FromChar(&argument); + break; + + case 't': + argument++; + testNb = (int)readU32FromChar(&argument); + break; + + case 'P': /* compressibility % */ + argument++; + proba = (int)readU32FromChar(&argument); + if (proba>100) proba = 100; + break; + + default: + return (FUZ_usage(programName), 1); + } } } } /* for (argNb=1; argNb<argc; argNb++) */ + + /* Get Seed */ + DISPLAY("Starting zstd tester (%i-bits, %s)\n", (int)(sizeof(size_t)*8), ZSTD_VERSION_STRING); + + if (!seedset) { + time_t const t = time(NULL); + U32 const h = XXH32(&t, sizeof(t), 1); + seed = h % 10000; + } + + DISPLAY("Seed = %u\n", (unsigned)seed); + if (proba!=FUZ_compressibility_default) DISPLAY("Compressibility : %i%%\n", proba); + + if (memTestsOnly) { + g_displayLevel = MAX(3, g_displayLevel); + return FUZ_mallocTests(seed, ((double)proba) / 100, memTestsOnly); + } + + if (nbTests < testNb) nbTests = testNb; + + if (testNb==0) + result = basicUnitTests(0, ((double)proba) / 100); /* constant seed for predictability */ + if (!result) + result = fuzzerTests(seed, nbTests, testNb, maxDuration, ((double)proba) / 100, bigTests); + if (mainPause) { + int unused; + DISPLAY("Press Enter \n"); + unused = getchar(); + (void)unused; + } + return result; +} diff --git a/src/zstd/tests/golden-compression/huffman-compressed-larger b/src/zstd/tests/golden-compression/huffman-compressed-larger Binary files differnew file mode 100644 index 000000000..f594f1ae9 --- /dev/null +++ b/src/zstd/tests/golden-compression/huffman-compressed-larger diff --git a/src/zstd/tests/golden-decompression/rle-first-block.zst b/src/zstd/tests/golden-decompression/rle-first-block.zst Binary files differnew file mode 100644 index 000000000..fd067edd7 --- /dev/null +++ b/src/zstd/tests/golden-decompression/rle-first-block.zst diff --git a/src/zstd/tests/gzip/Makefile b/src/zstd/tests/gzip/Makefile new file mode 100644 index 000000000..73f62f0eb --- /dev/null +++ b/src/zstd/tests/gzip/Makefile @@ -0,0 +1,45 @@ +# ################################################################ +# Copyright (c) 2017-2020, Facebook, Inc. +# All rights reserved. +# +# This source code is licensed under both the BSD-style license (found in the +# LICENSE file in the root directory of this source tree) and the GPLv2 (found +# in the COPYING file in the root directory of this source tree). +# You may select, at your option, one of the above-listed licenses. +# ################################################################ + +PRGDIR = ../../programs +VOID = /dev/null +export PATH := .:$(PATH) + +.PHONY: all +#all: test-gzip-env +all: test-helin-segv test-hufts test-keep test-list test-memcpy-abuse test-mixed +all: test-null-suffix-clobber test-stdin test-trailing-nul test-unpack-invalid +all: test-zdiff test-zgrep-context test-zgrep-f test-zgrep-signal test-znew-k test-z-suffix + @echo Testing completed + +.PHONY: zstd +zstd: + $(MAKE) -C $(PRGDIR) zstd + ln -sf $(PRGDIR)/zstd gzip + @echo PATH=$(PATH) + gzip --version + +.PHONY: clean +clean: + @$(MAKE) -C $(PRGDIR) $@ > $(VOID) + @$(RM) *.trs *.log + @echo Cleaning completed + + +#------------------------------------------------------------------------------ +# validated only for Linux, macOS, Hurd and some BSD targets +#------------------------------------------------------------------------------ +ifneq (,$(filter $(shell uname),Linux Darwin GNU/kFreeBSD GNU FreeBSD DragonFly NetBSD)) + +test-%: zstd + @./test-driver.sh --test-name $* --log-file $*.log --trs-file $*.trs --expect-failure "no" --color-tests "yes" --enable-hard-errors "yes" ./$*.sh + # || echo ignoring error + +endif diff --git a/src/zstd/tests/gzip/gzip-env.sh b/src/zstd/tests/gzip/gzip-env.sh new file mode 100755 index 000000000..120e52d78 --- /dev/null +++ b/src/zstd/tests/gzip/gzip-env.sh @@ -0,0 +1,46 @@ +#!/bin/sh +# Test the obsolescent GZIP environment variable. + +# Copyright 2015-2016 Free Software Foundation, Inc. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# limit so don't run it by default. + +. "${srcdir=.}/init.sh"; path_prepend_ . + +#echo PATH=$PATH +#gzip --version + +echo a >exp || framework_failure_ +gzip <exp >in || framework_failure_ + +fail=0 +GZIP=-qv gzip -d <in >out 2>err || fail=1 +compare exp out || fail=1 + +for badopt in -- -c --stdout -d --decompress -f --force -h --help -k --keep \ + -l --list -L --license -r --recursive -Sxxx --suffix=xxx '--suffix xxx' \ + -t --test -V --version +do + GZIP=$badopt gzip -d <in >out 2>err && fail=1 +done + +for goodopt in -n --no-name -N --name -q --quiet -v --verbose \ + -1 --fast -2 -3 -4 -5 -6 -7 -8 -9 --best +do + GZIP=$goodopt gzip -d <in >out 2>err || fail=1 + compare exp out || fail=1 +done + +Exit $fail diff --git a/src/zstd/tests/gzip/helin-segv.sh b/src/zstd/tests/gzip/helin-segv.sh new file mode 100644 index 000000000..f182c8066 --- /dev/null +++ b/src/zstd/tests/gzip/helin-segv.sh @@ -0,0 +1,31 @@ +#!/bin/sh +# Before gzip-1.4, gzip -d would segfault on some inputs. + +# Copyright (C) 2010-2016 Free Software Foundation, Inc. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# limit so don't run it by default. + +. "${srcdir=.}/init.sh"; path_prepend_ . + +# This test case was provided by Aki Helin. +printf '\037\235\220\0\0\0\304' > helin.gz || framework_failure_ +printf '\0\0' > exp || framework_failure_ + +fail=0 + +gzip -dc helin.gz > out || fail=1 +compare exp out || fail=1 + +Exit $fail diff --git a/src/zstd/tests/gzip/help-version.sh b/src/zstd/tests/gzip/help-version.sh new file mode 100644 index 000000000..ee0c19f7d --- /dev/null +++ b/src/zstd/tests/gzip/help-version.sh @@ -0,0 +1,270 @@ +#! /bin/sh +# Make sure all these programs work properly +# when invoked with --help or --version. + +# Copyright (C) 2000-2016 Free Software Foundation, Inc. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +# Ensure that $SHELL is set to *some* value and exported. +# This is required for dircolors, which would fail e.g., when +# invoked via debuild (which removes SHELL from the environment). +test "x$SHELL" = x && SHELL=/bin/sh +export SHELL + +. "${srcdir=.}/init.sh"; path_prepend_ . + +expected_failure_status_chroot=125 +expected_failure_status_env=125 +expected_failure_status_nice=125 +expected_failure_status_nohup=125 +expected_failure_status_stdbuf=125 +expected_failure_status_su=125 +expected_failure_status_timeout=125 +expected_failure_status_printenv=2 +expected_failure_status_tty=3 +expected_failure_status_sort=2 +expected_failure_status_expr=3 +expected_failure_status_lbracket=2 +expected_failure_status_dir=2 +expected_failure_status_ls=2 +expected_failure_status_vdir=2 + +expected_failure_status_cmp=2 +expected_failure_status_zcmp=2 +expected_failure_status_sdiff=2 +expected_failure_status_diff3=2 +expected_failure_status_diff=2 +expected_failure_status_zdiff=2 +expected_failure_status_zgrep=2 +expected_failure_status_zegrep=2 +expected_failure_status_zfgrep=2 + +expected_failure_status_grep=2 +expected_failure_status_egrep=2 +expected_failure_status_fgrep=2 + +test "$built_programs" \ + || fail_ "built_programs not specified!?!" + +test "$VERSION" \ + || fail_ "set envvar VERSION; it is required for a PATH sanity-check" + +# Extract version from --version output of the first program +for i in $built_programs; do + v=$(env $i --version | sed -n '1s/.* //p;q') + break +done + +# Ensure that it matches $VERSION. +test "x$v" = "x$VERSION" \ + || fail_ "--version-\$VERSION mismatch" + +for lang in C fr da; do + for i in $built_programs; do + + # Skip `test'; it doesn't accept --help or --version. + test $i = test && continue; + + # false fails even when invoked with --help or --version. + if test $i = false; then + env LC_MESSAGES=$lang $i --help >/dev/null && fail=1 + env LC_MESSAGES=$lang $i --version >/dev/null && fail=1 + continue + fi + + args= + + # The just-built install executable is always named `ginstall'. + test $i = install && i=ginstall + + # Make sure they exit successfully, under normal conditions. + eval "env \$i $args --help > h-\$i " || fail=1 + eval "env \$i $args --version >/dev/null" || fail=1 + + # Make sure they mention the bug-reporting address in --help output. + grep "$PACKAGE_BUGREPORT" h-$i > /dev/null || fail=1 + rm -f h-$i + + # Make sure they fail upon `disk full' error. + if test -w /dev/full && test -c /dev/full; then + eval "env \$i $args --help >/dev/full 2>/dev/null" && fail=1 + eval "env \$i $args --version >/dev/full 2>/dev/null" && fail=1 + status=$? + test $i = [ && prog=lbracket || prog=$i + eval "expected=\$expected_failure_status_$prog" + test x$expected = x && expected=1 + if test $status = $expected; then + : # ok + else + fail=1 + echo "*** $i: bad exit status \`$status' (expected $expected)," 1>&2 + echo " with --help or --version output redirected to /dev/full" 1>&2 + fi + fi + done +done + +bigZ_in=bigZ-in.Z +zin=zin.gz +zin2=zin2.gz + +tmp=tmp-$$ +tmp_in=in-$$ +tmp_in2=in2-$$ +tmp_dir=dir-$$ +tmp_out=out-$$ +mkdir $tmp || fail=1 +cd $tmp || fail=1 + +comm_setup () { args="$tmp_in $tmp_in"; } +csplit_setup () { args="$tmp_in //"; } +cut_setup () { args='-f 1'; } +join_setup () { args="$tmp_in $tmp_in"; } +tr_setup () { args='a a'; } + +chmod_setup () { args="a+x $tmp_in"; } +# Punt on these. +chgrp_setup () { args=--version; } +chown_setup () { args=--version; } +mkfifo_setup () { args=--version; } +mknod_setup () { args=--version; } +# Punt on uptime, since it fails (e.g., failing to get boot time) +# on some systems, and we shouldn't let that stop `make check'. +uptime_setup () { args=--version; } + +# Create a file in the current directory, not in $TMPDIR. +mktemp_setup () { args=mktemp.XXXX; } + +cmp_setup () { args="$tmp_in $tmp_in2"; } + +# Tell dd not to print the line with transfer rate and total. +# The transfer rate would vary between runs. +dd_setup () { args=status=noxfer; } + +zdiff_setup () { args="$args $zin $zin2"; } +zcmp_setup () { zdiff_setup; } +zcat_setup () { args="$args $zin"; } +gunzip_setup () { zcat_setup; } +zmore_setup () { zcat_setup; } +zless_setup () { zcat_setup; } +znew_setup () { args="$args $bigZ_in"; } +zforce_setup () { zcat_setup; } +zgrep_setup () { args="$args z $zin"; } +zegrep_setup () { zgrep_setup; } +zfgrep_setup () { zgrep_setup; } +gzexe_setup () { args="$args $tmp_in"; } + +# We know that $tmp_in contains a "0" +grep_setup () { args="0 $tmp_in"; } +egrep_setup () { args="0 $tmp_in"; } +fgrep_setup () { args="0 $tmp_in"; } + +diff_setup () { args="$tmp_in $tmp_in2"; } +sdiff_setup () { args="$tmp_in $tmp_in2"; } +diff3_setup () { args="$tmp_in $tmp_in2 $tmp_in2"; } +cp_setup () { args="$tmp_in $tmp_in2"; } +ln_setup () { args="$tmp_in ln-target"; } +ginstall_setup () { args="$tmp_in $tmp_in2"; } +mv_setup () { args="$tmp_in $tmp_in2"; } +mkdir_setup () { args=$tmp_dir/subdir; } +rmdir_setup () { args=$tmp_dir; } +rm_setup () { args=$tmp_in; } +shred_setup () { args=$tmp_in; } +touch_setup () { args=$tmp_in2; } +truncate_setup () { args="--reference=$tmp_in $tmp_in2"; } + +basename_setup () { args=$tmp_in; } +dirname_setup () { args=$tmp_in; } +expr_setup () { args=foo; } + +# Punt, in case GNU `id' hasn't been installed yet. +groups_setup () { args=--version; } + +pathchk_setup () { args=$tmp_in; } +yes_setup () { args=--version; } +logname_setup () { args=--version; } +nohup_setup () { args=--version; } +printf_setup () { args=foo; } +seq_setup () { args=10; } +sleep_setup () { args=0; } +su_setup () { args=--version; } +stdbuf_setup () { args="-oL true"; } +timeout_setup () { args=--version; } + +# I'd rather not run sync, since it spins up disks that I've +# deliberately caused to spin down (but not unmounted). +sync_setup () { args=--version; } + +test_setup () { args=foo; } + +# This is necessary in the unusual event that there is +# no valid entry in /etc/mtab. +df_setup () { args=/; } + +# This is necessary in the unusual event that getpwuid (getuid ()) fails. +id_setup () { args=-u; } + +# Use env to avoid invoking built-in sleep of Solaris 11's /bin/sh. +kill_setup () { + env sleep 10m & + args=$! +} + +link_setup () { args="$tmp_in link-target"; } +unlink_setup () { args=$tmp_in; } + +readlink_setup () { + ln -s . slink + args=slink; +} + +stat_setup () { args=$tmp_in; } +unlink_setup () { args=$tmp_in; } +lbracket_setup () { args=": ]"; } + +# Ensure that each program "works" (exits successfully) when doing +# something more than --help or --version. +for i in $built_programs; do + # Skip these. + case $i in chroot|stty|tty|false|chcon|runcon) continue;; esac + + rm -rf $tmp_in $tmp_in2 $tmp_dir $tmp_out $bigZ_in $zin $zin2 + echo z |gzip > $zin + cp $zin $zin2 + cp $zin $bigZ_in + + # This is sort of kludgey: use numbers so this is valid input for factor, + # and two tokens so it's valid input for tsort. + echo 2147483647 0 > $tmp_in + # Make $tmp_in2 identical. Then, using $tmp_in and $tmp_in2 as arguments + # to the likes of cmp and diff makes them exit successfully. + cp $tmp_in $tmp_in2 + mkdir $tmp_dir + # echo ================== $i + test $i = [ && prog=lbracket || prog=$i + args= + if type ${prog}_setup > /dev/null 2>&1; then + ${prog}_setup + fi + if eval "env \$i $args < \$tmp_in > \$tmp_out"; then + : # ok + else + echo FAIL: $i + fail=1 + fi + rm -rf $tmp_in $tmp_in2 $tmp_out $tmp_dir +done + +Exit $fail diff --git a/src/zstd/tests/gzip/hufts-segv.gz b/src/zstd/tests/gzip/hufts-segv.gz Binary files differnew file mode 100644 index 000000000..32cb2a256 --- /dev/null +++ b/src/zstd/tests/gzip/hufts-segv.gz diff --git a/src/zstd/tests/gzip/hufts.sh b/src/zstd/tests/gzip/hufts.sh new file mode 100644 index 000000000..9b9576ce3 --- /dev/null +++ b/src/zstd/tests/gzip/hufts.sh @@ -0,0 +1,34 @@ +#!/bin/sh +# Exercise a bug whereby an invalid input could make gzip -d misbehave. + +# Copyright (C) 2009-2016 Free Software Foundation, Inc. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# limit so don't run it by default. + +. "${srcdir=.}/init.sh"; path_prepend_ . + +printf '\n...: invalid compressed data--format violated\n' > exp \ + || framework_failure_ + +fail=0 +gzip -dc "$abs_srcdir/hufts-segv.gz" > out 2> err +test $? = 1 || fail=1 + +compare /dev/null out || fail=1 + +sed 's/.*hufts-segv.gz: /...: /' err > k; mv k err || fail=1 +compare exp err || fail=1 + +Exit $fail diff --git a/src/zstd/tests/gzip/init.cfg b/src/zstd/tests/gzip/init.cfg new file mode 100644 index 000000000..901209cea --- /dev/null +++ b/src/zstd/tests/gzip/init.cfg @@ -0,0 +1,5 @@ +# This file is sourced by init.sh, *before* its initialization. + +# This goes hand in hand with the "exec 9>&2;" in Makefile.am's +# TESTS_ENVIRONMENT definition. +stderr_fileno_=9 diff --git a/src/zstd/tests/gzip/init.sh b/src/zstd/tests/gzip/init.sh new file mode 100644 index 000000000..97e4e4ba5 --- /dev/null +++ b/src/zstd/tests/gzip/init.sh @@ -0,0 +1,616 @@ +# source this file; set up for tests + +# Copyright (C) 2009-2016 Free Software Foundation, Inc. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +# Using this file in a test +# ========================= +# +# The typical skeleton of a test looks like this: +# +# #!/bin/sh +# . "${srcdir=.}/init.sh"; path_prepend_ . +# Execute some commands. +# Note that these commands are executed in a subdirectory, therefore you +# need to prepend "../" to relative filenames in the build directory. +# Note that the "path_prepend_ ." is useful only if the body of your +# test invokes programs residing in the initial directory. +# For example, if the programs you want to test are in src/, and this test +# script is named tests/test-1, then you would use "path_prepend_ ../src", +# or perhaps export PATH='$(abs_top_builddir)/src$(PATH_SEPARATOR)'"$$PATH" +# to all tests via automake's TESTS_ENVIRONMENT. +# Set the exit code 0 for success, 77 for skipped, or 1 or other for failure. +# Use the skip_ and fail_ functions to print a diagnostic and then exit +# with the corresponding exit code. +# Exit $? + +# Executing a test that uses this file +# ==================================== +# +# Running a single test: +# $ make check TESTS=test-foo.sh +# +# Running a single test, with verbose output: +# $ make check TESTS=test-foo.sh VERBOSE=yes +# +# Running a single test, with single-stepping: +# 1. Go into a sub-shell: +# $ bash +# 2. Set relevant environment variables from TESTS_ENVIRONMENT in the +# Makefile: +# $ export srcdir=../../tests # this is an example +# 3. Execute the commands from the test, copy&pasting them one by one: +# $ . "$srcdir/init.sh"; path_prepend_ . +# ... +# 4. Finally +# $ exit + +ME_=`expr "./$0" : '.*/\(.*\)$'` + +# We use a trap below for cleanup. This requires us to go through +# hoops to get the right exit status transported through the handler. +# So use 'Exit STATUS' instead of 'exit STATUS' inside of the tests. +# Turn off errexit here so that we don't trip the bug with OSF1/Tru64 +# sh inside this function. +Exit () { set +e; (exit $1); exit $1; } + +# Print warnings (e.g., about skipped and failed tests) to this file number. +# Override by defining to say, 9, in init.cfg, and putting say, +# export ...ENVVAR_SETTINGS...; $(SHELL) 9>&2 +# in the definition of TESTS_ENVIRONMENT in your tests/Makefile.am file. +# This is useful when using automake's parallel tests mode, to print +# the reason for skip/failure to console, rather than to the .log files. +: ${stderr_fileno_=2} + +# Note that correct expansion of "$*" depends on IFS starting with ' '. +# Always write the full diagnostic to stderr. +# When stderr_fileno_ is not 2, also emit the first line of the +# diagnostic to that file descriptor. +warn_ () +{ + # If IFS does not start with ' ', set it and emit the warning in a subshell. + case $IFS in + ' '*) printf '%s\n' "$*" >&2 + test $stderr_fileno_ = 2 \ + || { printf '%s\n' "$*" | sed 1q >&$stderr_fileno_ ; } ;; + *) (IFS=' '; warn_ "$@");; + esac +} +fail_ () { warn_ "$ME_: failed test: $@"; Exit 1; } +skip_ () { warn_ "$ME_: skipped test: $@"; Exit 77; } +fatal_ () { warn_ "$ME_: hard error: $@"; Exit 99; } +framework_failure_ () { warn_ "$ME_: set-up failure: $@"; Exit 99; } + +# This is used to simplify checking of the return value +# which is useful when ensuring a command fails as desired. +# I.e., just doing `command ... &&fail=1` will not catch +# a segfault in command for example. With this helper you +# instead check an explicit exit code like +# returns_ 1 command ... || fail +returns_ () { + # Disable tracing so it doesn't interfere with stderr of the wrapped command + { set +x; } 2>/dev/null + + local exp_exit="$1" + shift + "$@" + test $? -eq $exp_exit && ret_=0 || ret_=1 + + if test "$VERBOSE" = yes && test "$gl_set_x_corrupts_stderr_" = false; then + set -x + fi + { return $ret_; } 2>/dev/null +} + +# Sanitize this shell to POSIX mode, if possible. +DUALCASE=1; export DUALCASE +if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then + emulate sh + NULLCMD=: + alias -g '${1+"$@"}'='"$@"' + setopt NO_GLOB_SUBST +else + case `(set -o) 2>/dev/null` in + *posix*) set -o posix ;; + esac +fi + +# We require $(...) support unconditionally. +# We require a few additional shell features only when $EXEEXT is nonempty, +# in order to support automatic $EXEEXT emulation: +# - hyphen-containing alias names +# - we prefer to use ${var#...} substitution, rather than having +# to work around lack of support for that feature. +# The following code attempts to find a shell with support for these features. +# If the current shell passes the test, we're done. Otherwise, test other +# shells until we find one that passes. If one is found, re-exec it. +# If no acceptable shell is found, skip the current test. +# +# The "...set -x; P=1 true 2>err..." test is to disqualify any shell that +# emits "P=1" into err, as /bin/sh from SunOS 5.11 and OpenBSD 4.7 do. +# +# Use "9" to indicate success (rather than 0), in case some shell acts +# like Solaris 10's /bin/sh but exits successfully instead of with status 2. + +# Eval this code in a subshell to determine a shell's suitability. +# 10 - passes all tests; ok to use +# 9 - ok, but enabling "set -x" corrupts app stderr; prefer higher score +# ? - not ok +gl_shell_test_script_=' +test $(echo y) = y || exit 1 +f_local_() { local v=1; }; f_local_ || exit 1 +score_=10 +if test "$VERBOSE" = yes; then + test -n "$( (exec 3>&1; set -x; P=1 true 2>&3) 2> /dev/null)" && score_=9 +fi +test -z "$EXEEXT" && exit $score_ +shopt -s expand_aliases +alias a-b="echo zoo" +v=abx + test ${v%x} = ab \ + && test ${v#a} = bx \ + && test $(a-b) = zoo \ + && exit $score_ +' + +if test "x$1" = "x--no-reexec"; then + shift +else + # Assume a working shell. Export to subshells (setup_ needs this). + gl_set_x_corrupts_stderr_=false + export gl_set_x_corrupts_stderr_ + + # Record the first marginally acceptable shell. + marginal_= + + # Search for a shell that meets our requirements. + for re_shell_ in __current__ "${CONFIG_SHELL:-no_shell}" \ + /bin/sh bash dash zsh pdksh fail + do + test "$re_shell_" = no_shell && continue + + # If we've made it all the way to the sentinel, "fail" without + # finding even a marginal shell, skip this test. + if test "$re_shell_" = fail; then + test -z "$marginal_" && skip_ failed to find an adequate shell + re_shell_=$marginal_ + break + fi + + # When testing the current shell, simply "eval" the test code. + # Otherwise, run it via $re_shell_ -c ... + if test "$re_shell_" = __current__; then + # 'eval'ing this code makes Solaris 10's /bin/sh exit with + # $? set to 2. It does not evaluate any of the code after the + # "unexpected" first '('. Thus, we must run it in a subshell. + ( eval "$gl_shell_test_script_" ) > /dev/null 2>&1 + else + "$re_shell_" -c "$gl_shell_test_script_" 2>/dev/null + fi + + st_=$? + + # $re_shell_ works just fine. Use it. + if test $st_ = 10; then + gl_set_x_corrupts_stderr_=false + break + fi + + # If this is our first marginally acceptable shell, remember it. + if test "$st_:$marginal_" = 9: ; then + marginal_="$re_shell_" + gl_set_x_corrupts_stderr_=true + fi + done + + if test "$re_shell_" != __current__; then + # Found a usable shell. Preserve -v and -x. + case $- in + *v*x* | *x*v*) opts_=-vx ;; + *v*) opts_=-v ;; + *x*) opts_=-x ;; + *) opts_= ;; + esac + re_shell=$re_shell_ + export re_shell + exec "$re_shell_" $opts_ "$0" --no-reexec "$@" + echo "$ME_: exec failed" 1>&2 + exit 127 + fi +fi + +# If this is bash, turn off all aliases. +test -n "$BASH_VERSION" && unalias -a + +# Note that when supporting $EXEEXT (transparently mapping from PROG_NAME to +# PROG_NAME.exe), we want to support hyphen-containing names like test-acos. +# That is part of the shell-selection test above. Why use aliases rather +# than functions? Because support for hyphen-containing aliases is more +# widespread than that for hyphen-containing function names. +test -n "$EXEEXT" && shopt -s expand_aliases + +# Enable glibc's malloc-perturbing option. +# This is useful for exposing code that depends on the fact that +# malloc-related functions often return memory that is mostly zeroed. +# If you have the time and cycles, use valgrind to do an even better job. +: ${MALLOC_PERTURB_=87} +export MALLOC_PERTURB_ + +# This is a stub function that is run upon trap (upon regular exit and +# interrupt). Override it with a per-test function, e.g., to unmount +# a partition, or to undo any other global state changes. +cleanup_ () { :; } + +# Emit a header similar to that from diff -u; Print the simulated "diff" +# command so that the order of arguments is clear. Don't bother with @@ lines. +emit_diff_u_header_ () +{ + printf '%s\n' "diff -u $*" \ + "--- $1 1970-01-01" \ + "+++ $2 1970-01-01" +} + +# Arrange not to let diff or cmp operate on /dev/null, +# since on some systems (at least OSF/1 5.1), that doesn't work. +# When there are not two arguments, or no argument is /dev/null, return 2. +# When one argument is /dev/null and the other is not empty, +# cat the nonempty file to stderr and return 1. +# Otherwise, return 0. +compare_dev_null_ () +{ + test $# = 2 || return 2 + + if test "x$1" = x/dev/null; then + test -s "$2" || return 0 + emit_diff_u_header_ "$@"; sed 's/^/+/' "$2" + return 1 + fi + + if test "x$2" = x/dev/null; then + test -s "$1" || return 0 + emit_diff_u_header_ "$@"; sed 's/^/-/' "$1" + return 1 + fi + + return 2 +} + +if diff_out_=`exec 2>/dev/null; diff -u "$0" "$0" < /dev/null` \ + && diff -u Makefile "$0" 2>/dev/null | grep '^[+]#!' >/dev/null; then + # diff accepts the -u option and does not (like AIX 7 'diff') produce an + # extra space on column 1 of every content line. + if test -z "$diff_out_"; then + compare_ () { diff -u "$@"; } + else + compare_ () + { + if diff -u "$@" > diff.out; then + # No differences were found, but Solaris 'diff' produces output + # "No differences encountered". Hide this output. + rm -f diff.out + true + else + cat diff.out + rm -f diff.out + false + fi + } + fi +elif + for diff_opt_ in -U3 -c '' no; do + test "$diff_opt_" = no && break + diff_out_=`exec 2>/dev/null; diff $diff_opt_ "$0" "$0" </dev/null` && break + done + test "$diff_opt_" != no +then + if test -z "$diff_out_"; then + compare_ () { diff $diff_opt_ "$@"; } + else + compare_ () + { + if diff $diff_opt_ "$@" > diff.out; then + # No differences were found, but AIX and HP-UX 'diff' produce output + # "No differences encountered" or "There are no differences between the + # files.". Hide this output. + rm -f diff.out + true + else + cat diff.out + rm -f diff.out + false + fi + } + fi +elif cmp -s /dev/null /dev/null 2>/dev/null; then + compare_ () { cmp -s "$@"; } +else + compare_ () { cmp "$@"; } +fi + +# Usage: compare EXPECTED ACTUAL +# +# Given compare_dev_null_'s preprocessing, defer to compare_ if 2 or more. +# Otherwise, propagate $? to caller: any diffs have already been printed. +compare () +{ + # This looks like it can be factored to use a simple "case $?" + # after unchecked compare_dev_null_ invocation, but that would + # fail in a "set -e" environment. + if compare_dev_null_ "$@"; then + return 0 + else + case $? in + 1) return 1;; + *) compare_ "$@";; + esac + fi +} + +# An arbitrary prefix to help distinguish test directories. +testdir_prefix_ () { printf gt; } + +# Run the user-overridable cleanup_ function, remove the temporary +# directory and exit with the incoming value of $?. +remove_tmp_ () +{ + __st=$? + cleanup_ + # cd out of the directory we're about to remove + cd "$initial_cwd_" || cd / || cd /tmp + chmod -R u+rwx "$test_dir_" + # If removal fails and exit status was to be 0, then change it to 1. + rm -rf "$test_dir_" || { test $__st = 0 && __st=1; } + exit $__st +} + +# Given a directory name, DIR, if every entry in it that matches *.exe +# contains only the specified bytes (see the case stmt below), then print +# a space-separated list of those names and return 0. Otherwise, don't +# print anything and return 1. Naming constraints apply also to DIR. +find_exe_basenames_ () +{ + feb_dir_=$1 + feb_fail_=0 + feb_result_= + feb_sp_= + for feb_file_ in $feb_dir_/*.exe; do + # If there was no *.exe file, or there existed a file named "*.exe" that + # was deleted between the above glob expansion and the existence test + # below, just skip it. + test "x$feb_file_" = "x$feb_dir_/*.exe" && test ! -f "$feb_file_" \ + && continue + # Exempt [.exe, since we can't create a function by that name, yet + # we can't invoke [ by PATH search anyways due to shell builtins. + test "x$feb_file_" = "x$feb_dir_/[.exe" && continue + case $feb_file_ in + *[!-a-zA-Z/0-9_.+]*) feb_fail_=1; break;; + *) # Remove leading file name components as well as the .exe suffix. + feb_file_=${feb_file_##*/} + feb_file_=${feb_file_%.exe} + feb_result_="$feb_result_$feb_sp_$feb_file_";; + esac + feb_sp_=' ' + done + test $feb_fail_ = 0 && printf %s "$feb_result_" + return $feb_fail_ +} + +# Consider the files in directory, $1. +# For each file name of the form PROG.exe, create an alias named +# PROG that simply invokes PROG.exe, then return 0. If any selected +# file name or the directory name, $1, contains an unexpected character, +# define no alias and return 1. +create_exe_shims_ () +{ + case $EXEEXT in + '') return 0 ;; + .exe) ;; + *) echo "$0: unexpected \$EXEEXT value: $EXEEXT" 1>&2; return 1 ;; + esac + + base_names_=`find_exe_basenames_ $1` \ + || { echo "$0 (exe_shim): skipping directory: $1" 1>&2; return 0; } + + if test -n "$base_names_"; then + for base_ in $base_names_; do + alias "$base_"="$base_$EXEEXT" + done + fi + + return 0 +} + +# Use this function to prepend to PATH an absolute name for each +# specified, possibly-$initial_cwd_-relative, directory. +path_prepend_ () +{ + while test $# != 0; do + path_dir_=$1 + case $path_dir_ in + '') fail_ "invalid path dir: '$1'";; + /*) abs_path_dir_=$path_dir_;; + *) abs_path_dir_=$initial_cwd_/$path_dir_;; + esac + case $abs_path_dir_ in + *:*) fail_ "invalid path dir: '$abs_path_dir_'";; + esac + PATH="$abs_path_dir_:$PATH" + + # Create an alias, FOO, for each FOO.exe in this directory. + create_exe_shims_ "$abs_path_dir_" \ + || fail_ "something failed (above): $abs_path_dir_" + shift + done + export PATH +} + +setup_ () +{ + if test "$VERBOSE" = yes; then + # Test whether set -x may cause the selected shell to corrupt an + # application's stderr. Many do, including zsh-4.3.10 and the /bin/sh + # from SunOS 5.11, OpenBSD 4.7 and Irix 5.x and 6.5. + # If enabling verbose output this way would cause trouble, simply + # issue a warning and refrain. + if $gl_set_x_corrupts_stderr_; then + warn_ "using SHELL=$SHELL with 'set -x' corrupts stderr" + else + set -x + fi + fi + + initial_cwd_=$PWD + + pfx_=`testdir_prefix_` + test_dir_=`mktempd_ "$initial_cwd_" "$pfx_-$ME_.XXXX"` \ + || fail_ "failed to create temporary directory in $initial_cwd_" + cd "$test_dir_" || fail_ "failed to cd to temporary directory" + + # As autoconf-generated configure scripts do, ensure that IFS + # is defined initially, so that saving and restoring $IFS works. + gl_init_sh_nl_=' +' + IFS=" "" $gl_init_sh_nl_" + + # This trap statement, along with a trap on 0 below, ensure that the + # temporary directory, $test_dir_, is removed upon exit as well as + # upon receipt of any of the listed signals. + for sig_ in 1 2 3 13 15; do + eval "trap 'Exit $(expr $sig_ + 128)' $sig_" + done +} + +# Create a temporary directory, much like mktemp -d does. +# Written by Jim Meyering. +# +# Usage: mktempd_ /tmp phoey.XXXXXXXXXX +# +# First, try to use the mktemp program. +# Failing that, we'll roll our own mktemp-like function: +# - try to get random bytes from /dev/urandom +# - failing that, generate output from a combination of quickly-varying +# sources and gzip. Ignore non-varying gzip header, and extract +# "random" bits from there. +# - given those bits, map to file-name bytes using tr, and try to create +# the desired directory. +# - make only $MAX_TRIES_ attempts + +# Helper function. Print $N pseudo-random bytes from a-zA-Z0-9. +rand_bytes_ () +{ + n_=$1 + + # Maybe try openssl rand -base64 $n_prime_|tr '+/=\012' abcd first? + # But if they have openssl, they probably have mktemp, too. + + chars_=abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 + dev_rand_=/dev/urandom + if test -r "$dev_rand_"; then + # Note: 256-length($chars_) == 194; 3 copies of $chars_ is 186 + 8 = 194. + dd ibs=$n_ count=1 if=$dev_rand_ 2>/dev/null \ + | LC_ALL=C tr -c $chars_ 01234567$chars_$chars_$chars_ + return + fi + + n_plus_50_=`expr $n_ + 50` + cmds_='date; date +%N; free; who -a; w; ps auxww; ps ef; netstat -n' + data_=` (eval "$cmds_") 2>&1 | gzip ` + + # Ensure that $data_ has length at least 50+$n_ + while :; do + len_=`echo "$data_"|wc -c` + test $n_plus_50_ -le $len_ && break; + data_=` (echo "$data_"; eval "$cmds_") 2>&1 | gzip ` + done + + echo "$data_" \ + | dd bs=1 skip=50 count=$n_ 2>/dev/null \ + | LC_ALL=C tr -c $chars_ 01234567$chars_$chars_$chars_ +} + +mktempd_ () +{ + case $# in + 2);; + *) fail_ "Usage: mktempd_ DIR TEMPLATE";; + esac + + destdir_=$1 + template_=$2 + + MAX_TRIES_=4 + + # Disallow any trailing slash on specified destdir: + # it would subvert the post-mktemp "case"-based destdir test. + case $destdir_ in + / | //) destdir_slash_=$destdir;; + */) fail_ "invalid destination dir: remove trailing slash(es)";; + *) destdir_slash_=$destdir_/;; + esac + + case $template_ in + *XXXX) ;; + *) fail_ \ + "invalid template: $template_ (must have a suffix of at least 4 X's)";; + esac + + # First, try to use mktemp. + d=`unset TMPDIR; { mktemp -d -t -p "$destdir_" "$template_"; } 2>/dev/null` && + + # The resulting name must be in the specified directory. + case $d in "$destdir_slash_"*) :;; *) false;; esac && + + # It must have created the directory. + test -d "$d" && + + # It must have 0700 permissions. Handle sticky "S" bits. + perms=`ls -dgo "$d" 2>/dev/null` && + case $perms in drwx--[-S]---*) :;; *) false;; esac && { + echo "$d" + return + } + + # If we reach this point, we'll have to create a directory manually. + + # Get a copy of the template without its suffix of X's. + base_template_=`echo "$template_"|sed 's/XX*$//'` + + # Calculate how many X's we've just removed. + template_length_=`echo "$template_" | wc -c` + nx_=`echo "$base_template_" | wc -c` + nx_=`expr $template_length_ - $nx_` + + err_= + i_=1 + while :; do + X_=`rand_bytes_ $nx_` + candidate_dir_="$destdir_slash_$base_template_$X_" + err_=`mkdir -m 0700 "$candidate_dir_" 2>&1` \ + && { echo "$candidate_dir_"; return; } + test $MAX_TRIES_ -le $i_ && break; + i_=`expr $i_ + 1` + done + fail_ "$err_" +} + +# If you want to override the testdir_prefix_ function, +# or to add more utility functions, use this file. +test -f "$srcdir/init.cfg" \ + && . "$srcdir/init.cfg" + +setup_ "$@" +# This trap is here, rather than in the setup_ function, because some +# shells run the exit trap at shell function exit, rather than script exit. +trap remove_tmp_ 0 diff --git a/src/zstd/tests/gzip/keep.sh b/src/zstd/tests/gzip/keep.sh new file mode 100644 index 000000000..ab9a21811 --- /dev/null +++ b/src/zstd/tests/gzip/keep.sh @@ -0,0 +1,51 @@ +#!/bin/sh +# Exercise the --keep option. + +# Copyright (C) 2013-2016 Free Software Foundation, Inc. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# limit so don't run it by default. + +. "${srcdir=.}/init.sh"; path_prepend_ . + +echo fooooooooo > in || framework_failure_ +cp in orig || framework_failure_ + +fail=0 + +# Compress and decompress both with and without --keep. +for k in --keep ''; do + # With --keep, the source must be retained, otherwise, it must be removed. + case $k in --keep) op='||' ;; *) op='&&' ;; esac + + gzip $k in || fail=1 + eval "test -f in $op fail=1" + test -f in.gz || fail=1 + rm -f in || fail=1 + + gzip -d $k in.gz || fail=1 + eval "test -f in.gz $op fail=1" + test -f in || fail=1 + compare in orig || fail=1 + rm -f in.gz || fail=1 +done + +cp orig in || framework_failure_ +log=$(gzip -kv in 2>&1) || fail=1 +case $log in + *'created in.gz'*) ;; + *) fail=1;; +esac + +Exit $fail diff --git a/src/zstd/tests/gzip/list.sh b/src/zstd/tests/gzip/list.sh new file mode 100644 index 000000000..75912e1e2 --- /dev/null +++ b/src/zstd/tests/gzip/list.sh @@ -0,0 +1,31 @@ +#!/bin/sh +# Exercise the --list option. + +# Copyright 2016 Free Software Foundation, Inc. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# limit so don't run it by default. + +. "${srcdir=.}/init.sh"; path_prepend_ . + +echo zoology zucchini > in || framework_failure_ +cp in orig || framework_failure_ + +gzip -l in && fail=1 +gzip -9 in || fail=1 +gzip -l in.gz >out1 || fail=1 +gzip -l in.gz | cat >out2 || fail=1 +compare out1 out2 || fail=1 + +Exit $fail diff --git a/src/zstd/tests/gzip/memcpy-abuse.sh b/src/zstd/tests/gzip/memcpy-abuse.sh new file mode 100644 index 000000000..7d5c056de --- /dev/null +++ b/src/zstd/tests/gzip/memcpy-abuse.sh @@ -0,0 +1,34 @@ +#!/bin/sh +# Before gzip-1.4, this the use of memcpy in inflate_codes could +# mistakenly operate on overlapping regions. Exercise that code. + +# Copyright (C) 2010-2016 Free Software Foundation, Inc. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# limit so don't run it by default. + +. "${srcdir=.}/init.sh"; path_prepend_ . + +# The input must be larger than 32KiB and slightly +# less uniform than e.g., all zeros. +printf wxy%032767d 0 | tee in | gzip > in.gz || framework_failure_ + +fail=0 + +# Before the fix, this would call memcpy with overlapping regions. +gzip -dc in.gz > out || fail=1 + +compare in out || fail=1 + +Exit $fail diff --git a/src/zstd/tests/gzip/mixed.sh b/src/zstd/tests/gzip/mixed.sh new file mode 100644 index 000000000..383a54f5e --- /dev/null +++ b/src/zstd/tests/gzip/mixed.sh @@ -0,0 +1,68 @@ +#!/bin/sh +# Ensure that gzip -cdf handles mixed compressed/not-compressed data +# Before gzip-1.5, it would produce invalid output. + +# Copyright (C) 2010-2016 Free Software Foundation, Inc. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# limit so don't run it by default. + +. "${srcdir=.}/init.sh"; path_prepend_ . + +printf 'xxx\nyyy\n' > exp2 || framework_failure_ +printf 'aaa\nbbb\nccc\n' > exp3 || framework_failure_ + +fail=0 + +(echo xxx; echo yyy) > in || fail=1 +gzip -cdf < in > out || fail=1 +compare exp2 out || fail=1 + +# Uncompressed input, followed by compressed data. +# Currently fails, so skip it. +# (echo xxx; echo yyy|gzip) > in || fail=1 +# gzip -cdf < in > out || fail=1 +# compare exp2 out || fail=1 + +# Compressed input, followed by regular (not-compressed) data. +(echo xxx|gzip; echo yyy) > in || fail=1 +gzip -cdf < in > out || fail=1 +compare exp2 out || fail=1 + +(echo xxx|gzip; echo yyy|gzip) > in || fail=1 +gzip -cdf < in > out || fail=1 +compare exp2 out || fail=1 + +in_str=0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_-+=% +for i in 0 1 2 3 4 5 6 7 8 9 a; do in_str="$in_str$in_str" ;done + +# Start with some small sizes. $(seq 64) +sizes=$(i=0; while :; do echo $i; test $i = 64 && break; i=$(expr $i + 1); done) + +# gzip's internal buffer size is 32KiB + 64 bytes: +sizes="$sizes 32831 32832 32833" + +# 128KiB, +/- 1 +sizes="$sizes 131071 131072 131073" + +# Ensure that "gzip -cdf" acts like cat, for a range of small input files. +i=0 +for i in $sizes; do + echo $i + printf %$i.${i}s $in_str > in + gzip -cdf < in > out + compare in out || fail=1 +done + +Exit $fail diff --git a/src/zstd/tests/gzip/null-suffix-clobber.sh b/src/zstd/tests/gzip/null-suffix-clobber.sh new file mode 100644 index 000000000..0efd0e344 --- /dev/null +++ b/src/zstd/tests/gzip/null-suffix-clobber.sh @@ -0,0 +1,35 @@ +#!/bin/sh +# Before gzip-1.5, gzip -d -S '' k.gz would delete F.gz and not create "F" + +# Copyright (C) 2010-2016 Free Software Foundation, Inc. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# limit so don't run it by default. + +. "${srcdir=.}/init.sh"; path_prepend_ . + +printf anything | gzip > F.gz || framework_failure_ +echo y > yes || framework_failure_ +echo "gzip: invalid suffix ''" > expected-err || framework_failure_ + +fail=0 + +gzip ---presume-input-tty -d -S '' F.gz < yes > out 2>err && fail=1 + +compare /dev/null out || fail=1 +compare expected-err err || fail=1 + +test -f F.gz || fail=1 + +Exit $fail diff --git a/src/zstd/tests/gzip/stdin.sh b/src/zstd/tests/gzip/stdin.sh new file mode 100644 index 000000000..eef4cd8b1 --- /dev/null +++ b/src/zstd/tests/gzip/stdin.sh @@ -0,0 +1,31 @@ +#!/bin/sh +# Ensure that gzip interprets "-" as stdin. + +# Copyright (C) 2009-2016 Free Software Foundation, Inc. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# limit so don't run it by default. + +. "${srcdir=.}/init.sh"; path_prepend_ . + +printf a | gzip > in || framework_failure_ +printf aaa > exp || framework_failure_ + +fail=0 +gzip -dc in - in < in > out 2>err || fail=1 + +compare exp out || fail=1 +compare /dev/null err || fail=1 + +Exit $fail diff --git a/src/zstd/tests/gzip/test-driver.sh b/src/zstd/tests/gzip/test-driver.sh new file mode 100644 index 000000000..649c084e4 --- /dev/null +++ b/src/zstd/tests/gzip/test-driver.sh @@ -0,0 +1,150 @@ +#! /bin/sh +# test-driver - basic testsuite driver script. + +scriptversion=2016-01-11.22; # UTC + +# Copyright (C) 2011-2015 Free Software Foundation, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +# As a special exception to the GNU General Public License, if you +# distribute this file as part of a program that contains a +# configuration script generated by Autoconf, you may include it under +# the same distribution terms that you use for the rest of that program. + +# This file is maintained in Automake, please report +# bugs to <bug-automake@gnu.org> or send patches to +# <automake-patches@gnu.org>. + +# Make unconditional expansion of undefined variables an error. This +# helps a lot in preventing typo-related bugs. +set -u + +usage_error () +{ + echo "$0: $*" >&2 + print_usage >&2 + exit 2 +} + +print_usage () +{ + cat <<END +Usage: + test-driver --test-name=NAME --log-file=PATH --trs-file=PATH + [--expect-failure={yes|no}] [--color-tests={yes|no}] + [--enable-hard-errors={yes|no}] [--] + TEST-SCRIPT [TEST-SCRIPT-ARGUMENTS] +The '--test-name', '--log-file' and '--trs-file' options are mandatory. +END +} + +test_name= # Used for reporting. +log_file= # Where to save the output of the test script. +trs_file= # Where to save the metadata of the test run. +expect_failure=no +color_tests=no +enable_hard_errors=yes +while test $# -gt 0; do + case $1 in + --help) print_usage; exit $?;; + --version) echo "test-driver $scriptversion"; exit $?;; + --test-name) test_name=$2; shift;; + --log-file) log_file=$2; shift;; + --trs-file) trs_file=$2; shift;; + --color-tests) color_tests=$2; shift;; + --expect-failure) expect_failure=$2; shift;; + --enable-hard-errors) enable_hard_errors=$2; shift;; + --) shift; break;; + -*) usage_error "invalid option: '$1'";; + *) break;; + esac + shift +done + +missing_opts= +test x"$test_name" = x && missing_opts="$missing_opts --test-name" +test x"$log_file" = x && missing_opts="$missing_opts --log-file" +test x"$trs_file" = x && missing_opts="$missing_opts --trs-file" +if test x"$missing_opts" != x; then + usage_error "the following mandatory options are missing:$missing_opts" +fi + +if test $# -eq 0; then + usage_error "missing argument" +fi + +if test $color_tests = yes; then + # Keep this in sync with 'lib/am/check.am:$(am__tty_colors)'. + red='[0;31m' # Red. + grn='[0;32m' # Green. + lgn='[1;32m' # Light green. + blu='[1;34m' # Blue. + mgn='[0;35m' # Magenta. + std='[m' # No color. +else + red= grn= lgn= blu= mgn= std= +fi + +do_exit='rm -f $log_file $trs_file; (exit $st); exit $st' +trap "st=129; $do_exit" 1 +trap "st=130; $do_exit" 2 +trap "st=141; $do_exit" 13 +trap "st=143; $do_exit" 15 + +# Test script is run here. +"$@" >$log_file 2>&1 +estatus=$? + +if test $enable_hard_errors = no && test $estatus -eq 99; then + tweaked_estatus=1 +else + tweaked_estatus=$estatus +fi + +case $tweaked_estatus:$expect_failure in + 0:yes) col=$red res=XPASS recheck=yes gcopy=yes;; + 0:*) col=$grn res=PASS recheck=no gcopy=no;; + 77:*) col=$blu res=SKIP recheck=no gcopy=yes;; + 99:*) col=$mgn res=ERROR recheck=yes gcopy=yes;; + *:yes) col=$lgn res=XFAIL recheck=no gcopy=yes;; + *:*) col=$red res=FAIL recheck=yes gcopy=yes;; +esac + +# Report the test outcome and exit status in the logs, so that one can +# know whether the test passed or failed simply by looking at the '.log' +# file, without the need of also peaking into the corresponding '.trs' +# file (automake bug#11814). +echo "$res $test_name (exit status: $estatus)" >>$log_file + +# Report outcome to console. +echo "${col}${res}${std}: $test_name" + +# Register the test result, and other relevant metadata. +echo ":test-result: $res" > $trs_file +echo ":global-test-result: $res" >> $trs_file +echo ":recheck: $recheck" >> $trs_file +echo ":copy-in-global-log: $gcopy" >> $trs_file + +# Local Variables: +# mode: shell-script +# sh-indentation: 2 +# eval: (add-hook 'write-file-hooks 'time-stamp) +# time-stamp-start: "scriptversion=" +# time-stamp-format: "%:y-%02m-%02d.%02H" +# time-stamp-time-zone: "UTC0" +# time-stamp-end: "; # UTC" +# End: + +exit $tweaked_estatus diff --git a/src/zstd/tests/gzip/trailing-nul.sh b/src/zstd/tests/gzip/trailing-nul.sh new file mode 100644 index 000000000..7b15d5e55 --- /dev/null +++ b/src/zstd/tests/gzip/trailing-nul.sh @@ -0,0 +1,37 @@ +#!/bin/sh +# gzip accepts trailing NUL bytes; don't fail if there is exactly one. +# Before gzip-1.4, this would fail. + +# Copyright (C) 2009-2016 Free Software Foundation, Inc. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# limit so don't run it by default. + +. "${srcdir=.}/init.sh"; path_prepend_ . + +(echo 0 | gzip; printf '\0') > 0.gz || framework_failure_ +(echo 00 | gzip; printf '\0\0') > 00.gz || framework_failure_ +(echo 1 | gzip; printf '\1') > 1.gz || framework_failure_ + +fail=0 + +for i in 0 00 1; do + gzip -d $i.gz; ret=$? + test $ret -eq $i || fail=1 + test $ret = 1 && continue + echo $i > exp || fail=1 + compare exp $i || fail=1 +done + +Exit $fail diff --git a/src/zstd/tests/gzip/unpack-invalid.sh b/src/zstd/tests/gzip/unpack-invalid.sh new file mode 100644 index 000000000..fe8384d73 --- /dev/null +++ b/src/zstd/tests/gzip/unpack-invalid.sh @@ -0,0 +1,36 @@ +#!/bin/sh +# gzip should report invalid 'unpack' input when uncompressing. +# With gzip-1.5, it would output invalid data instead. + +# Copyright (C) 2012-2016 Free Software Foundation, Inc. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# limit so don't run it by default. + +. "${srcdir=.}/init.sh"; path_prepend_ . + +for input in \ + '\037\036\000\000\037\213\010\000\000\000\000\000\002\003\036\000\000\000\002\003\037\213\010\000\000\000\000\000\002\003\355\301\001\015\000\000\000\302\240\037\000\302\240\037\213\010\000\000\000\000\000\002\003\355\301' \ + '\037\213\010\000\000\000\000\000\002\003\355\301\001\015\000\000\000\302\240\076\366\017\370\036\016\030\000\000\000\000\000\000\000\000\000\034\010\105\140\104\025\020\047\000\000\037\036\016\030\000\000\000'; do + + printf "$input" >in || framework_failure_ + + if gzip -d <in >out 2>err; then + fail=1 + else + fail=0 + fi +done + +Exit $fail diff --git a/src/zstd/tests/gzip/z-suffix.sh b/src/zstd/tests/gzip/z-suffix.sh new file mode 100644 index 000000000..a870a5408 --- /dev/null +++ b/src/zstd/tests/gzip/z-suffix.sh @@ -0,0 +1,30 @@ +#!/bin/sh +# Check that -Sz works. + +# Copyright 2014-2016 Free Software Foundation, Inc. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# limit so don't run it by default. + +. "${srcdir=.}/init.sh"; path_prepend_ . + +printf anything > F && cp F G || framework_failure_ +gzip -Sz F || fail=1 +test ! -f F || fail=1 +test -f Fz || fail=1 +gzip -dSz F || fail=1 +test ! -f Fz || fail=1 +compare F G || fail\1 + +Exit $fail diff --git a/src/zstd/tests/gzip/zdiff.sh b/src/zstd/tests/gzip/zdiff.sh new file mode 100644 index 000000000..d62a84606 --- /dev/null +++ b/src/zstd/tests/gzip/zdiff.sh @@ -0,0 +1,48 @@ +#!/bin/sh +# Exercise zdiff with two compressed inputs. +# Before gzip-1.4, this would fail. + +# Copyright (C) 2009-2016 Free Software Foundation, Inc. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# limit so don't run it by default. + +. "${srcdir=.}/init.sh"; path_prepend_ . + +echo a > a || framework_failure_ +echo b > b || framework_failure_ +gzip a b || framework_failure_ + +cat <<EOF > exp +1c1 +< a +--- +> b +EOF + +fail=0 +zdiff a.gz b.gz > out 2>&1 +test $? = 1 || fail=1 + +compare exp out || fail=1 + +rm -f out +# expect success, for equal files +zdiff a.gz a.gz > out 2> err || fail=1 +# expect no output +test -s out && fail=1 +# expect no stderr +test -s err && fail=1 + +Exit $fail diff --git a/src/zstd/tests/gzip/zgrep-context.sh b/src/zstd/tests/gzip/zgrep-context.sh new file mode 100644 index 000000000..c8648b7e4 --- /dev/null +++ b/src/zstd/tests/gzip/zgrep-context.sh @@ -0,0 +1,47 @@ +#!/bin/sh +# Ensure that zgrep -15 works. Before gzip-1.5, it would fail. + +# Copyright (C) 2012-2016 Free Software Foundation, Inc. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# limit so don't run it by default. + +. "${srcdir=.}/init.sh"; path_prepend_ . + +# A limited replacement for seq: handle 1 or 2 args; increment must be 1 +seq() +{ + case $# in + 1) start=1 final=$1;; + 2) start=$1 final=$2;; + *) echo you lose 1>&2; exit 1;; + esac + awk 'BEGIN{for(i='$start';i<='$final';i++) print i}' < /dev/null +} + +seq 40 > in || framework_failure_ +gzip < in > in.gz || framework_failure_ +seq 2 32 > exp || framework_failure_ + +: ${GREP=grep} +$GREP -15 17 - < in > out && compare exp out || { + echo >&2 "$0: $GREP does not support context options; skipping this test" + exit 77 +} + +fail=0 +zgrep -15 17 - < in.gz > out || fail=1 +compare exp out || fail=1 + +Exit $fail diff --git a/src/zstd/tests/gzip/zgrep-f.sh b/src/zstd/tests/gzip/zgrep-f.sh new file mode 100644 index 000000000..d0cf27f7e --- /dev/null +++ b/src/zstd/tests/gzip/zgrep-f.sh @@ -0,0 +1,43 @@ +#!/bin/sh +# Ensure that zgrep -f - works like grep -f - +# Before gzip-1.4, it would fail. + +# Copyright (C) 2009-2016 Free Software Foundation, Inc. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# limit so don't run it by default. + +. "${srcdir=.}/init.sh"; path_prepend_ . + +printf 'needle\nn2\n' > n || framework_failure_ +cp n haystack || framework_failure_ +gzip haystack || framework_failure_ + +fail=0 +zgrep -f - haystack.gz < n > out 2>&1 || fail=1 + +compare out n || fail=1 + +if ${BASH_VERSION+:} false; then + set +o posix + # This failed with gzip 1.6. + cat n n >nn || framework_failure_ + eval 'zgrep -h -f <(cat n) haystack.gz haystack.gz' >out || fail=1 + compare out nn || fail=1 +fi + +# This failed with gzip 1.4. +echo a-b | zgrep -e - > /dev/null || fail=1 + +Exit $fail diff --git a/src/zstd/tests/gzip/zgrep-signal.sh b/src/zstd/tests/gzip/zgrep-signal.sh new file mode 100644 index 000000000..a8c53881a --- /dev/null +++ b/src/zstd/tests/gzip/zgrep-signal.sh @@ -0,0 +1,64 @@ +#!/bin/sh +# Check that zgrep is terminated gracefully by signal when +# its grep/sed pipeline is terminated by a signal. + +# Copyright (C) 2010-2016 Free Software Foundation, Inc. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# limit so don't run it by default. + +. "${srcdir=.}/init.sh"; path_prepend_ . + +echo a | gzip -c > f.gz || framework_failure_ + +test "x$PERL" = x && PERL=perl +("$PERL" -e 'use POSIX qw(dup2)') >/dev/null 2>&1 || + skip_ "no suitable perl found" + +# Run the arguments as a command, in a process where stdout is a +# dangling pipe and SIGPIPE has the default signal-handling action. +# This can't be done portably in the shell, because if SIGPIPE is +# ignored when the shell is entered, the shell might refuse to trap +# it. Fall back on Perl+POSIX, if available. Take care to close the +# pipe's read end before running the program; the equivalent of the +# shell's "command | :" has a race condition in that COMMAND could +# write before ":" exits. +write_to_dangling_pipe () { + program=${1?} + shift + args= + for arg; do + args="$args, '$arg'" + done + "$PERL" -e ' + use POSIX qw(dup2); + $SIG{PIPE} = "DEFAULT"; + pipe my ($read_end, $write_end) or die "pipe: $!\n"; + dup2 fileno $write_end, 1 or die "dup2: $!\n"; + close $read_end or die "close: $!\n"; + exec '"'$program'$args"'; + ' +} + +write_to_dangling_pipe cat f.gz f.gz +signal_status=$? +test 128 -lt $signal_status || + framework_failure_ 'signal handling busted on this host' + +fail=0 + +write_to_dangling_pipe zgrep a f.gz f.gz +test $? -eq $signal_status || fail=1 + +Exit $fail diff --git a/src/zstd/tests/gzip/znew-k.sh b/src/zstd/tests/gzip/znew-k.sh new file mode 100644 index 000000000..6c239e28e --- /dev/null +++ b/src/zstd/tests/gzip/znew-k.sh @@ -0,0 +1,40 @@ +#!/bin/sh +# Check that znew -K works without compress(1). + +# Copyright (C) 2010-2016 Free Software Foundation, Inc. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# limit so don't run it by default. + +. "${srcdir=.}/init.sh"; path_prepend_ . + +cat <<'EOF' >compress || framework_failure_ +#!/bin/sh +echo >&2 'compress has been invoked' +exit 1 +EOF +chmod +x compress || framework_failure_ + +# Note that the basename must have a length of 6 or greater. +# Otherwise, "test -f $name" below would fail. +name=123456.Z + +printf '%1012977s' ' ' | gzip -c > $name || framework_failure_ + +fail=0 + +znew -K $name || fail=1 +test -f $name || fail=1 + +Exit $fail diff --git a/src/zstd/tests/invalidDictionaries.c b/src/zstd/tests/invalidDictionaries.c new file mode 100644 index 000000000..23e93fd54 --- /dev/null +++ b/src/zstd/tests/invalidDictionaries.c @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#include <stddef.h> +#include "zstd.h" + +static const char invalidRepCode[] = { + 0x37, 0xa4, 0x30, 0xec, 0x2a, 0x00, 0x00, 0x00, 0x39, 0x10, 0xc0, 0xc2, + 0xa6, 0x00, 0x0c, 0x30, 0xc0, 0x00, 0x03, 0x0c, 0x30, 0x20, 0x72, 0xf8, + 0xb4, 0x6d, 0x4b, 0x9f, 0xfc, 0x97, 0x29, 0x49, 0xb2, 0xdf, 0x4b, 0x29, + 0x7d, 0x4a, 0xfc, 0x83, 0x18, 0x22, 0x75, 0x23, 0x24, 0x44, 0x4d, 0x02, + 0xb7, 0x97, 0x96, 0xf6, 0xcb, 0xd1, 0xcf, 0xe8, 0x22, 0xea, 0x27, 0x36, + 0xb7, 0x2c, 0x40, 0x46, 0x01, 0x08, 0x23, 0x01, 0x00, 0x00, 0x06, 0x1e, + 0x3c, 0x83, 0x81, 0xd6, 0x18, 0xd4, 0x12, 0x3a, 0x04, 0x00, 0x80, 0x03, + 0x08, 0x0e, 0x12, 0x1c, 0x12, 0x11, 0x0d, 0x0e, 0x0a, 0x0b, 0x0a, 0x09, + 0x10, 0x0c, 0x09, 0x05, 0x04, 0x03, 0x06, 0x06, 0x06, 0x02, 0x00, 0x03, + 0x00, 0x00, 0x02, 0x02, 0x00, 0x04, 0x06, 0x03, 0x06, 0x08, 0x24, 0x6b, + 0x0d, 0x01, 0x10, 0x04, 0x81, 0x07, 0x00, 0x00, 0x04, 0xb9, 0x58, 0x18, + 0x06, 0x59, 0x92, 0x43, 0xce, 0x28, 0xa5, 0x08, 0x88, 0xc0, 0x80, 0x88, + 0x8c, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x08, 0x00, 0x00, 0x00 +}; + +typedef struct dictionary_s { + const char *data; + size_t size; +} dictionary; + +static const dictionary dictionaries[] = { + {invalidRepCode, sizeof(invalidRepCode)}, + {NULL, 0}, +}; + +int main(int argc, const char** argv) { + const dictionary *dict; + for (dict = dictionaries; dict->data != NULL; ++dict) { + ZSTD_CDict *cdict; + ZSTD_DDict *ddict; + cdict = ZSTD_createCDict(dict->data, dict->size, 1); + if (cdict) { + ZSTD_freeCDict(cdict); + return 1; + } + ddict = ZSTD_createDDict(dict->data, dict->size); + if (ddict) { + ZSTD_freeDDict(ddict); + return 2; + } + } + + (void)argc; + (void)argv; + return 0; +} diff --git a/src/zstd/tests/legacy.c b/src/zstd/tests/legacy.c new file mode 100644 index 000000000..3d3ec4358 --- /dev/null +++ b/src/zstd/tests/legacy.c @@ -0,0 +1,260 @@ +/* + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/* + This program uses hard-coded data compressed with Zstd legacy versions + and tests that the API decompresses them correctly +*/ + +/*=========================================== +* Dependencies +*==========================================*/ +#include <stddef.h> /* size_t */ +#include <stdlib.h> /* malloc, free */ +#include <stdio.h> /* fprintf */ +#include <string.h> /* strlen */ +#define ZSTD_STATIC_LINKING_ONLY /* ZSTD_decompressBound */ +#include "zstd.h" +#include "zstd_errors.h" + +/*=========================================== +* Macros +*==========================================*/ +#define DISPLAY(...) fprintf(stderr, __VA_ARGS__) + +/*=========================================== +* Precompressed frames +*==========================================*/ +const char* const COMPRESSED; /* content is at end of file */ +size_t const COMPRESSED_SIZE = 917; +const char* const EXPECTED; /* content is at end of file */ + + +static int testSimpleAPI(void) +{ + size_t const size = strlen(EXPECTED); + char* const output = malloc(size); + + if (!output) { + DISPLAY("ERROR: Not enough memory!\n"); + return 1; + } + + { + size_t const ret = ZSTD_decompress(output, size, COMPRESSED, COMPRESSED_SIZE); + if (ZSTD_isError(ret)) { + if (ret == ZSTD_error_prefix_unknown) { + DISPLAY("ERROR: Invalid frame magic number, was this compiled " + "without legacy support?\n"); + } else { + DISPLAY("ERROR: %s\n", ZSTD_getErrorName(ret)); + } + return 1; + } + if (ret != size) { + DISPLAY("ERROR: Wrong decoded size\n"); + } + } + if (memcmp(EXPECTED, output, size) != 0) { + DISPLAY("ERROR: Wrong decoded output produced\n"); + return 1; + } + + free(output); + DISPLAY("Simple API OK\n"); + return 0; +} + + +static int testStreamingAPI(void) +{ + int error_code = 0; + size_t const outBuffSize = ZSTD_DStreamOutSize(); + char* const outBuff = malloc(outBuffSize); + ZSTD_DStream* const stream = ZSTD_createDStream(); + ZSTD_inBuffer input = { COMPRESSED, COMPRESSED_SIZE, 0 }; + size_t outputPos = 0; + int needsInit = 1; + + if (outBuff == NULL) { + DISPLAY("ERROR: Could not allocate memory\n"); + return 1; + } + if (stream == NULL) { + DISPLAY("ERROR: Could not create dstream\n"); + free(outBuff); + return 1; + } + + while (1) { + ZSTD_outBuffer output = {outBuff, outBuffSize, 0}; + if (needsInit) { + size_t const ret = ZSTD_initDStream(stream); + if (ZSTD_isError(ret)) { + DISPLAY("ERROR: ZSTD_initDStream: %s\n", ZSTD_getErrorName(ret)); + error_code = 1; + break; + } } + + { size_t const ret = ZSTD_decompressStream(stream, &output, &input); + if (ZSTD_isError(ret)) { + DISPLAY("ERROR: ZSTD_decompressStream: %s\n", ZSTD_getErrorName(ret)); + error_code = 1; + break; + } + + if (ret == 0) { + needsInit = 1; + } } + + if (memcmp(outBuff, EXPECTED + outputPos, output.pos) != 0) { + DISPLAY("ERROR: Wrong decoded output produced\n"); + error_code = 1; + break; + } + outputPos += output.pos; + if (input.pos == input.size && output.pos < output.size) { + break; + } + } + + free(outBuff); + ZSTD_freeDStream(stream); + if (error_code == 0) DISPLAY("Streaming API OK\n"); + return error_code; +} + +static int testFrameDecoding(void) +{ + if (strlen(EXPECTED) > ZSTD_decompressBound(COMPRESSED, COMPRESSED_SIZE)) { + DISPLAY("ERROR: ZSTD_decompressBound: decompressed bound too small\n"); + return 1; + } + { const char* ip = COMPRESSED; + size_t remainingSize = COMPRESSED_SIZE; + while (1) { + size_t frameSize = ZSTD_findFrameCompressedSize(ip, remainingSize); + if (ZSTD_isError(frameSize)) { + DISPLAY("ERROR: ZSTD_findFrameCompressedSize: %s\n", ZSTD_getErrorName(frameSize)); + return 1; + } + if (frameSize > remainingSize) { + DISPLAY("ERROR: ZSTD_findFrameCompressedSize: expected frameSize to align with src buffer"); + return 1; + } + ip += frameSize; + remainingSize -= frameSize; + if (remainingSize == 0) break; + } + } + DISPLAY("Frame Decoding OK\n"); + return 0; +} + +int main(void) +{ + { int const ret = testSimpleAPI(); + if (ret) return ret; } + { int const ret = testStreamingAPI(); + if (ret) return ret; } + { int const ret = testFrameDecoding(); + if (ret) return ret; } + + DISPLAY("OK\n"); + return 0; +} + +/* Consists of the "EXPECTED" string compressed with default settings on + - v0.4.3 + - v0.5.0 + - v0.6.0 + - v0.7.0 + - v0.8.0 +*/ +const char* const COMPRESSED = + "\x24\xB5\x2F\xFD\x00\x00\x00\xBB\xB0\x02\xC0\x10\x00\x1E\xB0\x01" + "\x02\x00\x00\x80\x00\xE8\x92\x34\x12\x97\xC8\xDF\xE9\xF3\xEF\x53" + "\xEA\x1D\x27\x4F\x0C\x44\x90\x0C\x8D\xF1\xB4\x89\x17\x00\x18\x00" + "\x18\x00\x3F\xE6\xE2\xE3\x74\xD6\xEC\xC9\x4A\xE0\x71\x71\x42\x3E" + "\x64\x4F\x6A\x45\x4E\x78\xEC\x49\x03\x3F\xC6\x80\xAB\x8F\x75\x5E" + "\x6F\x2E\x3E\x7E\xC6\xDC\x45\x69\x6C\xC5\xFD\xC7\x40\xB8\x84\x8A" + "\x01\xEB\xA8\xD1\x40\x39\x90\x4C\x64\xF8\xEB\x53\xE6\x18\x0B\x67" + "\x12\xAD\xB8\x99\xB3\x5A\x6F\x8A\x19\x03\x01\x50\x67\x56\xF5\x9F" + "\x35\x84\x60\xA0\x60\x91\xC9\x0A\xDC\xAB\xAB\xE0\xE2\x81\xFA\xCF" + "\xC6\xBA\x01\x0E\x00\x54\x00\x00\x19\x00\x00\x54\x14\x00\x24\x24" + "\x04\xFE\x04\x84\x4E\x41\x00\x27\xE2\x02\xC4\xB1\x00\xD2\x51\x00" + "\x79\x58\x41\x28\x00\xE0\x0C\x01\x68\x65\x00\x04\x13\x0C\xDA\x0C" + "\x80\x22\x06\xC0\x00\x00\x25\xB5\x2F\xFD\x00\x00\x00\xAD\x12\xB0" + "\x7D\x1E\xB0\x01\x02\x00\x00\x80\x00\xE8\x92\x34\x12\x97\xC8\xDF" + "\xE9\xF3\xEF\x53\xEA\x1D\x27\x4F\x0C\x44\x90\x0C\x8D\xF1\xB4\x89" + "\x03\x01\x50\x67\x56\xF5\x9F\x35\x84\x60\xA0\x60\x91\xC9\x0A\xDC" + "\xAB\xAB\xE0\xE2\x81\xFA\xCF\xC6\xBA\xEB\xA8\xD1\x40\x39\x90\x4C" + "\x64\xF8\xEB\x53\xE6\x18\x0B\x67\x12\xAD\xB8\x99\xB3\x5A\x6F\x8A" + "\xF9\x63\x0C\xB8\xFA\x58\xE7\xF5\xE6\xE2\xE3\x67\xCC\x5D\x94\xC6" + "\x56\xDC\x7F\x0C\x84\x4B\xA8\xF8\x63\x2E\x3E\x4E\x67\xCD\x9E\xAC" + "\x04\x1E\x17\x27\xE4\x43\xF6\xA4\x56\xE4\x84\xC7\x9E\x34\x0E\x00" + "\x00\x32\x40\x80\xA8\x00\x01\x49\x81\xE0\x3C\x01\x29\x1D\x00\x87" + "\xCE\x80\x75\x08\x80\x72\x24\x00\x7B\x52\x00\x94\x00\x20\xCC\x01" + "\x86\xD2\x00\x81\x09\x83\xC1\x34\xA0\x88\x01\xC0\x00\x00\x26\xB5" + "\x2F\xFD\x42\xEF\x00\x00\xA6\x12\xB0\x7D\x1E\xB0\x01\x02\x00\x00" + "\x54\xA0\xBA\x24\x8D\xC4\x25\xF2\x77\xFA\xFC\xFB\x94\x7A\xC7\xC9" + "\x13\x03\x11\x24\x43\x63\x3C\x6D\x22\x03\x01\x50\x67\x56\xF5\x9F" + "\x35\x84\x60\xA0\x60\x91\xC9\x0A\xDC\xAB\xAB\xE0\xE2\x81\xFA\xCF" + "\xC6\xBA\xEB\xA8\xD1\x40\x39\x90\x4C\x64\xF8\xEB\x53\xE6\x18\x0B" + "\x67\x12\xAD\xB8\x99\xB3\x5A\x6F\x8A\xF9\x63\x0C\xB8\xFA\x58\xE7" + "\xF5\xE6\xE2\xE3\x67\xCC\x5D\x94\xC6\x56\xDC\x7F\x0C\x84\x4B\xA8" + "\xF8\x63\x2E\x3E\x4E\x67\xCD\x9E\xAC\x04\x1E\x17\x27\xE4\x43\xF6" + "\xA4\x56\xE4\x84\xC7\x9E\x34\x0E\x00\x35\x0B\x71\xB5\xC0\x2A\x5C" + "\x26\x94\x22\x20\x8B\x4C\x8D\x13\x47\x58\x67\x15\x6C\xF1\x1C\x4B" + "\x54\x10\x9D\x31\x50\x85\x4B\x54\x0E\x01\x4B\x3D\x01\xC0\x00\x00" + "\x27\xB5\x2F\xFD\x20\xEF\x00\x00\xA6\x12\xE4\x84\x1F\xB0\x01\x10" + "\x00\x00\x00\x35\x59\xA6\xE7\xA1\xEF\x7C\xFC\xBD\x3F\xFF\x9F\xEF" + "\xEE\xEF\x61\xC3\xAA\x31\x1D\x34\x38\x22\x22\x04\x44\x21\x80\x32" + "\xAD\x28\xF3\xD6\x28\x0C\x0A\x0E\xD6\x5C\xAC\x19\x8D\x20\x5F\x45" + "\x02\x2E\x17\x50\x66\x6D\xAC\x8B\x9C\x6E\x07\x73\x46\xBB\x44\x14" + "\xE7\x98\xC3\xB9\x17\x32\x6E\x33\x7C\x0E\x21\xB1\xDB\xCB\x89\x51" + "\x23\x34\xAB\x9D\xBC\x6D\x20\xF5\x03\xA9\x91\x4C\x2E\x1F\x59\xDB" + "\xD9\x35\x67\x4B\x0C\x95\x79\x10\x00\x85\xA6\x96\x95\x2E\xDF\x78" + "\x7B\x4A\x5C\x09\x76\x97\xD1\x5C\x96\x12\x75\x35\xA3\x55\x4A\xD4" + "\x0B\x00\x35\x0B\x71\xB5\xC0\x2A\x5C\xE6\x08\x45\xF1\x39\x43\xF1" + "\x1C\x4B\x54\x10\x9D\x31\x50\x85\x4B\x54\x0E\x01\x4B\x3D\x01\xC0" + "\x00\x00\x28\xB5\x2F\xFD\x24\xEF\x35\x05\x00\x92\x0B\x21\x1F\xB0" + "\x01\x10\x00\x00\x00\x35\x59\xA6\xE7\xA1\xEF\x7C\xFC\xBD\x3F\xFF" + "\x9F\xEF\xEE\xEF\x61\xC3\xAA\x31\x1D\x34\x38\x22\x22\x04\x44\x21" + "\x80\x32\xAD\x28\xF3\xD6\x28\x0C\x0A\x0E\xD6\x5C\xAC\x19\x8D\x20" + "\x5F\x45\x02\x2E\x17\x50\x66\x6D\xAC\x8B\x9C\x6E\x07\x73\x46\xBB" + "\x44\x14\xE7\x98\xC3\xB9\x17\x32\x6E\x33\x7C\x0E\x21\xB1\xDB\xCB" + "\x89\x51\x23\x34\xAB\x9D\xBC\x6D\x20\xF5\x03\xA9\x91\x4C\x2E\x1F" + "\x59\xDB\xD9\x35\x67\x4B\x0C\x95\x79\x10\x00\x85\xA6\x96\x95\x2E" + "\xDF\x78\x7B\x4A\x5C\x09\x76\x97\xD1\x5C\x96\x12\x75\x35\xA3\x55" + "\x4A\xD4\x0B\x00\x35\x0B\x71\xB5\xC0\x2A\x5C\xE6\x08\x45\xF1\x39" + "\x43\xF1\x1C\x4B\x54\x10\x9D\x31\x50\x85\x4B\x54\x0E\x01\x4B\x3D" + "\x01\xD2\x2F\x21\x80"; + +const char* const EXPECTED = + "snowden is snowed in / he's now then in his snow den / when does the snow end?\n" + "goodbye little dog / you dug some holes in your day / they'll be hard to fill.\n" + "when life shuts a door, / just open it. it’s a door. / that is how doors work.\n" + + "snowden is snowed in / he's now then in his snow den / when does the snow end?\n" + "goodbye little dog / you dug some holes in your day / they'll be hard to fill.\n" + "when life shuts a door, / just open it. it’s a door. / that is how doors work.\n" + + "snowden is snowed in / he's now then in his snow den / when does the snow end?\n" + "goodbye little dog / you dug some holes in your day / they'll be hard to fill.\n" + "when life shuts a door, / just open it. it’s a door. / that is how doors work.\n" + + "snowden is snowed in / he's now then in his snow den / when does the snow end?\n" + "goodbye little dog / you dug some holes in your day / they'll be hard to fill.\n" + "when life shuts a door, / just open it. it’s a door. / that is how doors work.\n" + + "snowden is snowed in / he's now then in his snow den / when does the snow end?\n" + "goodbye little dog / you dug some holes in your day / they'll be hard to fill.\n" + "when life shuts a door, / just open it. it’s a door. / that is how doors work.\n"; diff --git a/src/zstd/tests/libzstd_partial_builds.sh b/src/zstd/tests/libzstd_partial_builds.sh new file mode 100755 index 000000000..b1c1e3b1a --- /dev/null +++ b/src/zstd/tests/libzstd_partial_builds.sh @@ -0,0 +1,89 @@ +#!/bin/sh -e + +DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" + +ECHO=echo +RM="rm -f" +GREP="grep" +INTOVOID="/dev/null" + +die() { + $ECHO "$@" 1>&2 + exit 1 +} + +isPresent() { + $GREP $@ tmplog || die "$@" "should be present" +} + +mustBeAbsent() { + $GREP $@ tmplog && die "$@ should not be there !!" + $ECHO "$@ correctly not present" # for some reason, this $ECHO must exist, otherwise mustBeAbsent() always fails (??) +} + +# default compilation : all features enabled +make clean > /dev/null +$ECHO "testing default library compilation" +CFLAGS= make -C $DIR/../lib libzstd.a > $INTOVOID +nm $DIR/../lib/libzstd.a | $GREP "\.o" > tmplog +isPresent "zstd_compress.o" +isPresent "zstd_decompress.o" +isPresent "zdict.o" +isPresent "zstd_v07.o" +isPresent "zbuff_compress.o" +$RM $DIR/../lib/libzstd.a tmplog + +# compression disabled => also disable zdict and zbuff +$ECHO "testing with compression disabled" +ZSTD_LIB_COMPRESSION=0 CFLAGS= make -C $DIR/../lib libzstd.a > $INTOVOID +nm $DIR/../lib/libzstd.a | $GREP "\.o" > tmplog +mustBeAbsent "zstd_compress.o" +isPresent "zstd_decompress.o" +mustBeAbsent "zdict.o" +isPresent "zstd_v07.o" +mustBeAbsent "zbuff_compress.o" +$RM $DIR/../lib/libzstd.a tmplog + +# decompression disabled => also disable legacy and zbuff +$ECHO "testing with decompression disabled" +ZSTD_LIB_DECOMPRESSION=0 CFLAGS= make -C $DIR/../lib libzstd.a > $INTOVOID +nm $DIR/../lib/libzstd.a | $GREP "\.o" > tmplog +isPresent "zstd_compress.o" +mustBeAbsent "zstd_decompress.o" +isPresent "zdict.o" +mustBeAbsent "zstd_v07.o" +mustBeAbsent "zbuff_compress.o" +$RM $DIR/../lib/libzstd.a tmplog + +# deprecated function disabled => only remove zbuff +$ECHO "testing with deprecated functions disabled" +ZSTD_LIB_DEPRECATED=0 CFLAGS= make -C $DIR/../lib libzstd.a > $INTOVOID +nm $DIR/../lib/libzstd.a | $GREP "\.o" > tmplog +isPresent "zstd_compress.o" +isPresent "zstd_decompress.o" +isPresent "zdict.o" +isPresent "zstd_v07.o" +mustBeAbsent "zbuff_compress.o" +$RM $DIR/../lib/libzstd.a tmplog + +# dictionary builder disabled => only remove zdict +$ECHO "testing with dictionary builder disabled" +ZSTD_LIB_DICTBUILDER=0 CFLAGS= make -C $DIR/../lib libzstd.a > $INTOVOID +nm $DIR/../lib/libzstd.a | $GREP "\.o" > tmplog +isPresent "zstd_compress.o" +isPresent "zstd_decompress.o" +mustBeAbsent "zdict.o" +isPresent "zstd_v07.o" +isPresent "zbuff_compress.o" +$RM $DIR/../lib/libzstd.a tmplog + +# both decompression and dictionary builder disabled => only compression remains +$ECHO "testing with both decompression and dictionary builder disabled (only compression remains)" +ZSTD_LIB_DECOMPRESSION=0 ZSTD_LIB_DICTBUILDER=0 CFLAGS= make -C $DIR/../lib libzstd.a > $INTOVOID +nm $DIR/../lib/libzstd.a | $GREP "\.o" > tmplog +isPresent "zstd_compress.o" +mustBeAbsent "zstd_decompress.o" +mustBeAbsent "zdict.o" +mustBeAbsent "zstd_v07.o" +mustBeAbsent "zbuff_compress.o" +$RM $DIR/../lib/libzstd.a tmplog diff --git a/src/zstd/tests/longmatch.c b/src/zstd/tests/longmatch.c new file mode 100644 index 000000000..93e78dd1f --- /dev/null +++ b/src/zstd/tests/longmatch.c @@ -0,0 +1,101 @@ +/* + * Copyright (c) 2017-2020, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + +#include <stdio.h> +#include <stddef.h> +#include <stdlib.h> +#include <stdint.h> +#include "mem.h" +#define ZSTD_STATIC_LINKING_ONLY +#include "zstd.h" + +static int +compress(ZSTD_CStream *ctx, ZSTD_outBuffer out, const void *data, size_t size) +{ + ZSTD_inBuffer in = { data, size, 0 }; + while (in.pos < in.size) { + ZSTD_outBuffer tmp = out; + const size_t rc = ZSTD_compressStream(ctx, &tmp, &in); + if (ZSTD_isError(rc)) return 1; + } + { ZSTD_outBuffer tmp = out; + const size_t rc = ZSTD_flushStream(ctx, &tmp); + if (rc != 0) { return 1; } + } + return 0; +} + +int main(int argc, const char** argv) +{ + ZSTD_CStream* ctx; + ZSTD_parameters params; + size_t rc; + unsigned windowLog; + (void)argc; + (void)argv; + /* Create stream */ + ctx = ZSTD_createCStream(); + if (!ctx) { return 1; } + /* Set parameters */ + memset(¶ms, 0, sizeof(params)); + params.cParams.windowLog = 18; + params.cParams.chainLog = 13; + params.cParams.hashLog = 14; + params.cParams.searchLog = 1; + params.cParams.minMatch = 7; + params.cParams.targetLength = 16; + params.cParams.strategy = ZSTD_fast; + windowLog = params.cParams.windowLog; + /* Initialize stream */ + rc = ZSTD_initCStream_advanced(ctx, NULL, 0, params, 0); + if (ZSTD_isError(rc)) { return 2; } + { + U64 compressed = 0; + const U64 toCompress = ((U64)1) << 33; + const size_t size = 1 << windowLog; + size_t pos = 0; + char *srcBuffer = (char*) malloc(1 << windowLog); + char *dstBuffer = (char*) malloc(ZSTD_compressBound(1 << windowLog)); + ZSTD_outBuffer out = { dstBuffer, ZSTD_compressBound(1 << windowLog), 0 }; + const char match[] = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"; + const size_t randomData = (1 << windowLog) - 2*sizeof(match); + size_t i; + printf("\n === Long Match Test === \n"); + printf("Creating random data to produce long matches \n"); + for (i = 0; i < sizeof(match); ++i) { + srcBuffer[i] = match[i]; + } + for (i = 0; i < randomData; ++i) { + srcBuffer[sizeof(match) + i] = (char)(rand() & 0xFF); + } + for (i = 0; i < sizeof(match); ++i) { + srcBuffer[sizeof(match) + randomData + i] = match[i]; + } + printf("Compressing, trying to generate a segfault \n"); + if (compress(ctx, out, srcBuffer, size)) { + return 1; + } + compressed += size; + while (compressed < toCompress) { + const size_t block = rand() % (size - pos + 1); + if (pos == size) { pos = 0; } + if (compress(ctx, out, srcBuffer + pos, block)) { + return 1; + } + pos += block; + compressed += block; + } + printf("Compression completed successfully (no error triggered)\n"); + free(srcBuffer); + free(dstBuffer); + } + return 0; +} diff --git a/src/zstd/tests/paramgrill.c b/src/zstd/tests/paramgrill.c new file mode 100644 index 000000000..e9cc2a94e --- /dev/null +++ b/src/zstd/tests/paramgrill.c @@ -0,0 +1,2966 @@ +/* + * Copyright (c) 2015-2020, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + +/*-************************************ +* Dependencies +**************************************/ +#include "util.h" /* Ensure platform.h is compiled first; also : compiler options, UTIL_GetFileSize */ +#include <stdlib.h> /* malloc */ +#include <stdio.h> /* fprintf, fopen, ftello64 */ +#include <string.h> /* strcmp */ +#include <math.h> /* log */ +#include <assert.h> + +#include "timefn.h" /* SEC_TO_MICRO, UTIL_time_t, UTIL_clockSpanMicro, UTIL_clockSpanNano, UTIL_getTime */ +#include "mem.h" +#define ZSTD_STATIC_LINKING_ONLY /* ZSTD_parameters, ZSTD_estimateCCtxSize */ +#include "zstd.h" +#include "datagen.h" +#include "xxhash.h" +#include "benchfn.h" +#include "benchzstd.h" +#include "zstd_errors.h" +#include "zstd_internal.h" /* should not be needed */ + + +/*-************************************ +* Constants +**************************************/ +#define PROGRAM_DESCRIPTION "ZSTD parameters tester" +#define AUTHOR "Yann Collet" +#define WELCOME_MESSAGE "*** %s %s %i-bits, by %s ***\n", PROGRAM_DESCRIPTION, ZSTD_VERSION_STRING, (int)(sizeof(void*)*8), AUTHOR + +#define TIMELOOP_NANOSEC (1*1000000000ULL) /* 1 second */ +#define NB_LEVELS_TRACKED 22 /* ensured being >= ZSTD_maxCLevel() in BMK_init_level_constraints() */ + +static const size_t maxMemory = (sizeof(size_t)==4) ? (2 GB - 64 MB) : (size_t)(1ULL << ((sizeof(size_t)*8)-31)); + +#define COMPRESSIBILITY_DEFAULT 0.50 + +static const U64 g_maxVariationTime = 60 * SEC_TO_MICRO; +static const int g_maxNbVariations = 64; + + +/*-************************************ +* Macros +**************************************/ +#define DISPLAY(...) fprintf(stderr, __VA_ARGS__) +#define DISPLAYLEVEL(n, ...) if(g_displayLevel >= n) { fprintf(stderr, __VA_ARGS__); } +#define DEBUGOUTPUT(...) { if (DEBUG) DISPLAY(__VA_ARGS__); } + +#define TIMED 0 +#ifndef DEBUG +# define DEBUG 0 +#endif + +#undef MIN +#undef MAX +#define MIN(a,b) ( (a) < (b) ? (a) : (b) ) +#define MAX(a,b) ( (a) > (b) ? (a) : (b) ) +#define CUSTOM_LEVEL 99 +#define BASE_CLEVEL 1 + +#define FADT_MIN 0 +#define FADT_MAX ((U32)-1) + +#define WLOG_RANGE (ZSTD_WINDOWLOG_MAX - ZSTD_WINDOWLOG_MIN + 1) +#define CLOG_RANGE (ZSTD_CHAINLOG_MAX - ZSTD_CHAINLOG_MIN + 1) +#define HLOG_RANGE (ZSTD_HASHLOG_MAX - ZSTD_HASHLOG_MIN + 1) +#define SLOG_RANGE (ZSTD_SEARCHLOG_MAX - ZSTD_SEARCHLOG_MIN + 1) +#define MML_RANGE (ZSTD_MINMATCH_MAX - ZSTD_MINMATCH_MIN + 1) +#define TLEN_RANGE 17 +#define STRT_RANGE (ZSTD_STRATEGY_MAX - ZSTD_STRATEGY_MIN + 1) +#define FADT_RANGE 3 + +#define CHECKTIME(r) { if(BMK_timeSpan_s(g_time) > g_timeLimit_s) { DEBUGOUTPUT("Time Limit Reached\n"); return r; } } +#define CHECKTIMEGT(ret, val, _gototag) { if(BMK_timeSpan_s(g_time) > g_timeLimit_s) { DEBUGOUTPUT("Time Limit Reached\n"); ret = val; goto _gototag; } } + +#define PARAM_UNSET ((U32)-2) /* can't be -1 b/c fadt uses -1 */ + +static const char* g_stratName[ZSTD_STRATEGY_MAX+1] = { + "(none) ", "ZSTD_fast ", "ZSTD_dfast ", + "ZSTD_greedy ", "ZSTD_lazy ", "ZSTD_lazy2 ", + "ZSTD_btlazy2 ", "ZSTD_btopt ", "ZSTD_btultra ", + "ZSTD_btultra2"}; + +static const U32 tlen_table[TLEN_RANGE] = { 0, 1, 2, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 256, 512, 999 }; + + +/*-************************************ +* Setup for Adding new params +**************************************/ + +/* indices for each of the variables */ +typedef enum { + wlog_ind = 0, + clog_ind = 1, + hlog_ind = 2, + slog_ind = 3, + mml_ind = 4, + tlen_ind = 5, + strt_ind = 6, + fadt_ind = 7, /* forceAttachDict */ + NUM_PARAMS = 8 +} varInds_t; + +typedef struct { + U32 vals[NUM_PARAMS]; +} paramValues_t; + +/* minimum value of parameters */ +static const U32 mintable[NUM_PARAMS] = + { ZSTD_WINDOWLOG_MIN, ZSTD_CHAINLOG_MIN, ZSTD_HASHLOG_MIN, ZSTD_SEARCHLOG_MIN, ZSTD_MINMATCH_MIN, ZSTD_TARGETLENGTH_MIN, ZSTD_STRATEGY_MIN, FADT_MIN }; + +/* maximum value of parameters */ +static const U32 maxtable[NUM_PARAMS] = + { ZSTD_WINDOWLOG_MAX, ZSTD_CHAINLOG_MAX, ZSTD_HASHLOG_MAX, ZSTD_SEARCHLOG_MAX, ZSTD_MINMATCH_MAX, ZSTD_TARGETLENGTH_MAX, ZSTD_STRATEGY_MAX, FADT_MAX }; + +/* # of values parameters can take on */ +static const U32 rangetable[NUM_PARAMS] = + { WLOG_RANGE, CLOG_RANGE, HLOG_RANGE, SLOG_RANGE, MML_RANGE, TLEN_RANGE, STRT_RANGE, FADT_RANGE }; + +/* ZSTD_cctxSetParameter() index to set */ +static const ZSTD_cParameter cctxSetParamTable[NUM_PARAMS] = + { ZSTD_c_windowLog, ZSTD_c_chainLog, ZSTD_c_hashLog, ZSTD_c_searchLog, ZSTD_c_minMatch, ZSTD_c_targetLength, ZSTD_c_strategy, ZSTD_c_forceAttachDict }; + +/* names of parameters */ +static const char* g_paramNames[NUM_PARAMS] = + { "windowLog", "chainLog", "hashLog","searchLog", "minMatch", "targetLength", "strategy", "forceAttachDict" }; + +/* shortened names of parameters */ +static const char* g_shortParamNames[NUM_PARAMS] = + { "wlog", "clog", "hlog", "slog", "mml", "tlen", "strat", "fadt" }; + +/* maps value from { 0 to rangetable[param] - 1 } to valid paramvalues */ +static U32 rangeMap(varInds_t param, int ind) +{ + U32 const uind = (U32)MAX(MIN(ind, (int)rangetable[param] - 1), 0); + switch(param) { + case wlog_ind: /* using default: triggers -Wswitch-enum */ + case clog_ind: + case hlog_ind: + case slog_ind: + case mml_ind: + case strt_ind: + return mintable[param] + uind; + case tlen_ind: + return tlen_table[uind]; + case fadt_ind: /* 0, 1, 2 -> -1, 0, 1 */ + return uind - 1; + case NUM_PARAMS: + default:; + } + DISPLAY("Error, not a valid param\n "); + assert(0); + return (U32)-1; +} + +/* inverse of rangeMap */ +static int invRangeMap(varInds_t param, U32 value) +{ + value = MIN(MAX(mintable[param], value), maxtable[param]); + switch(param) { + case wlog_ind: + case clog_ind: + case hlog_ind: + case slog_ind: + case mml_ind: + case strt_ind: + return (int)(value - mintable[param]); + case tlen_ind: /* bin search */ + { + int lo = 0; + int hi = TLEN_RANGE; + while(lo < hi) { + int mid = (lo + hi) / 2; + if(tlen_table[mid] < value) { + lo = mid + 1; + } if(tlen_table[mid] == value) { + return mid; + } else { + hi = mid; + } + } + return lo; + } + case fadt_ind: + return (int)value + 1; + case NUM_PARAMS: + default:; + } + DISPLAY("Error, not a valid param\n "); + assert(0); + return -2; +} + +/* display of params */ +static void displayParamVal(FILE* f, varInds_t param, unsigned value, int width) +{ + switch(param) { + case wlog_ind: + case clog_ind: + case hlog_ind: + case slog_ind: + case mml_ind: + case tlen_ind: + if(width) { + fprintf(f, "%*u", width, value); + } else { + fprintf(f, "%u", value); + } + break; + case strt_ind: + if(width) { + fprintf(f, "%*s", width, g_stratName[value]); + } else { + fprintf(f, "%s", g_stratName[value]); + } + break; + case fadt_ind: /* force attach dict */ + if(width) { + fprintf(f, "%*d", width, (int)value); + } else { + fprintf(f, "%d", (int)value); + } + break; + case NUM_PARAMS: + default: + DISPLAY("Error, not a valid param\n "); + assert(0); + break; + } +} + + +/*-************************************ +* Benchmark Parameters/Global Variables +**************************************/ + +/* General Utility */ +static U32 g_timeLimit_s = 99999; /* about 27 hours */ +static UTIL_time_t g_time; /* to be used to compare solution finding speeds to compare to original */ +static U32 g_blockSize = 0; +static U32 g_rand = 1; + +/* Display */ +static int g_displayLevel = 3; +static BYTE g_silenceParams[NUM_PARAMS]; /* can selectively silence some params when displaying them */ + +/* Mode Selection */ +static U32 g_singleRun = 0; +static U32 g_optimizer = 0; +static int g_optmode = 0; + +/* For cLevel Table generation */ +static U32 g_target = 0; +static U32 g_noSeed = 0; + +/* For optimizer */ +static paramValues_t g_params; /* Initialized at the beginning of main w/ emptyParams() function */ +static double g_ratioMultiplier = 5.; +static U32 g_strictness = PARAM_UNSET; /* range 1 - 100, measure of how strict */ +static BMK_benchResult_t g_lvltarget; + +typedef enum { + directMap, + xxhashMap, + noMemo +} memoTableType_t; + +typedef struct { + memoTableType_t tableType; + BYTE* table; + size_t tableLen; + varInds_t varArray[NUM_PARAMS]; + size_t varLen; +} memoTable_t; + +typedef struct { + BMK_benchResult_t result; + paramValues_t params; +} winnerInfo_t; + +typedef struct { + U32 cSpeed; /* bytes / sec */ + U32 dSpeed; + U32 cMem; /* bytes */ +} constraint_t; + +typedef struct winner_ll_node winner_ll_node; +struct winner_ll_node { + winnerInfo_t res; + winner_ll_node* next; +}; + +static winner_ll_node* g_winners; /* linked list sorted ascending by cSize & cSpeed */ + +/* + * Additional Global Variables (Defined Above Use) + * g_level_constraint + * g_alreadyTested + * g_maxTries + * g_clockGranularity + */ + + +/*-******************************************************* +* General Util Functions +*********************************************************/ + +/* nullified useless params, to ensure count stats */ +/* cleans up params for memoizing / display */ +static paramValues_t sanitizeParams(paramValues_t params) +{ + if (params.vals[strt_ind] == ZSTD_fast) + params.vals[clog_ind] = 0, params.vals[slog_ind] = 0; + if (params.vals[strt_ind] == ZSTD_dfast) + params.vals[slog_ind] = 0; + if ( (params.vals[strt_ind] < ZSTD_btopt) && (params.vals[strt_ind] != ZSTD_fast) ) + params.vals[tlen_ind] = 0; + + return params; +} + +static ZSTD_compressionParameters pvalsToCParams(paramValues_t p) +{ + ZSTD_compressionParameters c; + memset(&c, 0, sizeof(ZSTD_compressionParameters)); + c.windowLog = p.vals[wlog_ind]; + c.chainLog = p.vals[clog_ind]; + c.hashLog = p.vals[hlog_ind]; + c.searchLog = p.vals[slog_ind]; + c.minMatch = p.vals[mml_ind]; + c.targetLength = p.vals[tlen_ind]; + c.strategy = p.vals[strt_ind]; + /* no forceAttachDict */ + return c; +} + +static paramValues_t cParamsToPVals(ZSTD_compressionParameters c) +{ + paramValues_t p; + varInds_t i; + p.vals[wlog_ind] = c.windowLog; + p.vals[clog_ind] = c.chainLog; + p.vals[hlog_ind] = c.hashLog; + p.vals[slog_ind] = c.searchLog; + p.vals[mml_ind] = c.minMatch; + p.vals[tlen_ind] = c.targetLength; + p.vals[strt_ind] = c.strategy; + + /* set all other params to their minimum value */ + for (i = strt_ind + 1; i < NUM_PARAMS; i++) { + p.vals[i] = mintable[i]; + } + return p; +} + +/* equivalent of ZSTD_adjustCParams for paramValues_t */ +static paramValues_t +adjustParams(paramValues_t p, const size_t maxBlockSize, const size_t dictSize) +{ + paramValues_t ot = p; + varInds_t i; + p = cParamsToPVals(ZSTD_adjustCParams(pvalsToCParams(p), maxBlockSize, dictSize)); + if (!dictSize) { p.vals[fadt_ind] = 0; } + /* retain value of all other parameters */ + for(i = strt_ind + 1; i < NUM_PARAMS; i++) { + p.vals[i] = ot.vals[i]; + } + return p; +} + +static size_t BMK_findMaxMem(U64 requiredMem) +{ + size_t const step = 64 MB; + void* testmem = NULL; + + requiredMem = (((requiredMem >> 26) + 1) << 26); + if (requiredMem > maxMemory) requiredMem = maxMemory; + + requiredMem += 2 * step; + while (!testmem && requiredMem > 0) { + testmem = malloc ((size_t)requiredMem); + requiredMem -= step; + } + + free (testmem); + return (size_t) requiredMem; +} + +/* accuracy in seconds only, span can be multiple years */ +static U32 BMK_timeSpan_s(const UTIL_time_t tStart) +{ + return (U32)(UTIL_clockSpanMicro(tStart) / 1000000ULL); +} + +static U32 FUZ_rotl32(U32 x, U32 r) +{ + return ((x << r) | (x >> (32 - r))); +} + +static U32 FUZ_rand(U32* src) +{ + const U32 prime1 = 2654435761U; + const U32 prime2 = 2246822519U; + U32 rand32 = *src; + rand32 *= prime1; + rand32 += prime2; + rand32 = FUZ_rotl32(rand32, 13); + *src = rand32; + return rand32 >> 5; +} + +#define BOUNDCHECK(val,min,max) { \ + if (((val)<(min)) | ((val)>(max))) { \ + DISPLAY("INVALID PARAMETER CONSTRAINTS\n"); \ + return 0; \ +} } + +static int paramValid(const paramValues_t paramTarget) +{ + U32 i; + for(i = 0; i < NUM_PARAMS; i++) { + BOUNDCHECK(paramTarget.vals[i], mintable[i], maxtable[i]); + } + return 1; +} + +/* cParamUnsetMin() : + * if any parameter in paramTarget is not yet set, + * it will receive its corresponding minimal value. + * This function never fails */ +static paramValues_t cParamUnsetMin(paramValues_t paramTarget) +{ + varInds_t vi; + for (vi = 0; vi < NUM_PARAMS; vi++) { + if (paramTarget.vals[vi] == PARAM_UNSET) { + paramTarget.vals[vi] = mintable[vi]; + } + } + return paramTarget; +} + +static paramValues_t emptyParams(void) +{ + U32 i; + paramValues_t p; + for(i = 0; i < NUM_PARAMS; i++) { + p.vals[i] = PARAM_UNSET; + } + return p; +} + +static winnerInfo_t initWinnerInfo(const paramValues_t p) +{ + winnerInfo_t w1; + w1.result.cSpeed = 0.; + w1.result.dSpeed = 0.; + w1.result.cMem = (size_t)-1; + w1.result.cSize = (size_t)-1; + w1.params = p; + return w1; +} + +static paramValues_t +overwriteParams(paramValues_t base, const paramValues_t mask) +{ + U32 i; + for(i = 0; i < NUM_PARAMS; i++) { + if(mask.vals[i] != PARAM_UNSET) { + base.vals[i] = mask.vals[i]; + } + } + return base; +} + +static void +paramVaryOnce(const varInds_t paramIndex, const int amt, paramValues_t* ptr) +{ + ptr->vals[paramIndex] = rangeMap(paramIndex, + invRangeMap(paramIndex, ptr->vals[paramIndex]) + amt); +} + +/* varies ptr by nbChanges respecting varyParams*/ +static void +paramVariation(paramValues_t* ptr, memoTable_t* mtAll, const U32 nbChanges) +{ + paramValues_t p; + int validated = 0; + while (!validated) { + U32 i; + p = *ptr; + for (i = 0 ; i < nbChanges ; i++) { + const U32 changeID = (U32)FUZ_rand(&g_rand) % (mtAll[p.vals[strt_ind]].varLen << 1); + paramVaryOnce(mtAll[p.vals[strt_ind]].varArray[changeID >> 1], + (int)((changeID & 1) << 1) - 1, + &p); + } + validated = paramValid(p); + } + *ptr = p; +} + +/* Completely random parameter selection */ +static paramValues_t randomParams(void) +{ + varInds_t v; paramValues_t p; + for(v = 0; v < NUM_PARAMS; v++) { + p.vals[v] = rangeMap(v, (int)(FUZ_rand(&g_rand) % rangetable[v])); + } + return p; +} + +static U64 g_clockGranularity = 100000000ULL; + +static void init_clockGranularity(void) +{ + UTIL_time_t const clockStart = UTIL_getTime(); + U64 el1 = 0, el2 = 0; + int i = 0; + do { + el1 = el2; + el2 = UTIL_clockSpanNano(clockStart); + if(el1 < el2) { + U64 iv = el2 - el1; + if(g_clockGranularity > iv) { + g_clockGranularity = iv; + i = 0; + } else { + i++; + } + } + } while(i < 10); + DEBUGOUTPUT("Granularity: %llu\n", (unsigned long long)g_clockGranularity); +} + +/*-************************************ +* Optimizer Util Functions +**************************************/ + +/* checks results are feasible */ +static int feasible(const BMK_benchResult_t results, const constraint_t target) { + return (results.cSpeed >= target.cSpeed) + && (results.dSpeed >= target.dSpeed) + && (results.cMem <= target.cMem) + && (!g_optmode || results.cSize <= g_lvltarget.cSize); +} + +/* hill climbing value for part 1 */ +/* Scoring here is a linear reward for all set constraints normalized between 0 to 1 + * (with 0 at 0 and 1 being fully fulfilling the constraint), summed with a logarithmic + * bonus to exceeding the constraint value. We also give linear ratio for compression ratio. + * The constant factors are experimental. + */ +static double +resultScore(const BMK_benchResult_t res, const size_t srcSize, const constraint_t target) +{ + double cs = 0., ds = 0., rt, cm = 0.; + const double r1 = 1, r2 = 0.1, rtr = 0.5; + double ret; + if(target.cSpeed) { cs = res.cSpeed / (double)target.cSpeed; } + if(target.dSpeed) { ds = res.dSpeed / (double)target.dSpeed; } + if(target.cMem != (U32)-1) { cm = (double)target.cMem / res.cMem; } + rt = ((double)srcSize / res.cSize); + + ret = (MIN(1, cs) + MIN(1, ds) + MIN(1, cm))*r1 + rt * rtr + + (MAX(0, log(cs))+ MAX(0, log(ds))+ MAX(0, log(cm))) * r2; + + return ret; +} + +/* calculates normalized squared euclidean distance of result1 if it is in the first quadrant relative to lvlRes */ +static double +resultDistLvl(const BMK_benchResult_t result1, const BMK_benchResult_t lvlRes) +{ + double normalizedCSpeedGain1 = (result1.cSpeed / lvlRes.cSpeed) - 1; + double normalizedRatioGain1 = ((double)lvlRes.cSize / result1.cSize) - 1; + if(normalizedRatioGain1 < 0 || normalizedCSpeedGain1 < 0) { + return 0.0; + } + return normalizedRatioGain1 * g_ratioMultiplier + normalizedCSpeedGain1; +} + +/* return true if r2 strictly better than r1 */ +static int +compareResultLT(const BMK_benchResult_t result1, const BMK_benchResult_t result2, const constraint_t target, size_t srcSize) +{ + if(feasible(result1, target) && feasible(result2, target)) { + if(g_optmode) { + return resultDistLvl(result1, g_lvltarget) < resultDistLvl(result2, g_lvltarget); + } else { + return (result1.cSize > result2.cSize) + || (result1.cSize == result2.cSize && result2.cSpeed > result1.cSpeed) + || (result1.cSize == result2.cSize && result2.cSpeed == result1.cSpeed && result2.dSpeed > result1.dSpeed); + } + } + return feasible(result2, target) + || (!feasible(result1, target) + && (resultScore(result1, srcSize, target) < resultScore(result2, srcSize, target))); +} + +static constraint_t relaxTarget(constraint_t target) { + target.cMem = (U32)-1; + target.cSpeed = (target.cSpeed * g_strictness) / 100; + target.dSpeed = (target.dSpeed * g_strictness) / 100; + return target; +} + +static void optimizerAdjustInput(paramValues_t* pc, const size_t maxBlockSize) +{ + varInds_t v; + for(v = 0; v < NUM_PARAMS; v++) { + if(pc->vals[v] != PARAM_UNSET) { + U32 newval = MIN(MAX(pc->vals[v], mintable[v]), maxtable[v]); + if(newval != pc->vals[v]) { + pc->vals[v] = newval; + DISPLAY("Warning: parameter %s not in valid range, adjusting to ", + g_paramNames[v]); + displayParamVal(stderr, v, newval, 0); DISPLAY("\n"); + } + } + } + + if(pc->vals[wlog_ind] != PARAM_UNSET) { + + U32 sshb = maxBlockSize > 1 ? ZSTD_highbit32((U32)(maxBlockSize-1)) + 1 : 1; + /* edge case of highBit not working for 0 */ + + if(maxBlockSize < (1ULL << 31) && sshb + 1 < pc->vals[wlog_ind]) { + U32 adjust = MAX(mintable[wlog_ind], sshb); + if(adjust != pc->vals[wlog_ind]) { + pc->vals[wlog_ind] = adjust; + DISPLAY("Warning: windowLog larger than src/block size, adjusted to %u\n", + (unsigned)pc->vals[wlog_ind]); + } + } + } + + if(pc->vals[wlog_ind] != PARAM_UNSET && pc->vals[clog_ind] != PARAM_UNSET) { + U32 maxclog; + if(pc->vals[strt_ind] == PARAM_UNSET || pc->vals[strt_ind] >= (U32)ZSTD_btlazy2) { + maxclog = pc->vals[wlog_ind] + 1; + } else { + maxclog = pc->vals[wlog_ind]; + } + + if(pc->vals[clog_ind] > maxclog) { + pc->vals[clog_ind] = maxclog; + DISPLAY("Warning: chainlog too much larger than windowLog size, adjusted to %u\n", + (unsigned)pc->vals[clog_ind]); + } + } + + if(pc->vals[wlog_ind] != PARAM_UNSET && pc->vals[hlog_ind] != PARAM_UNSET) { + if(pc->vals[wlog_ind] + 1 < pc->vals[hlog_ind]) { + pc->vals[hlog_ind] = pc->vals[wlog_ind] + 1; + DISPLAY("Warning: hashlog too much larger than windowLog size, adjusted to %u\n", + (unsigned)pc->vals[hlog_ind]); + } + } + + if(pc->vals[slog_ind] != PARAM_UNSET && pc->vals[clog_ind] != PARAM_UNSET) { + if(pc->vals[slog_ind] > pc->vals[clog_ind]) { + pc->vals[clog_ind] = pc->vals[slog_ind]; + DISPLAY("Warning: searchLog larger than chainLog, adjusted to %u\n", + (unsigned)pc->vals[slog_ind]); + } + } +} + +static int +redundantParams(const paramValues_t paramValues, const constraint_t target, const size_t maxBlockSize) +{ + return + (ZSTD_estimateCStreamSize_usingCParams(pvalsToCParams(paramValues)) > (size_t)target.cMem) /* Uses too much memory */ + || ((1ULL << (paramValues.vals[wlog_ind] - 1)) >= maxBlockSize && paramValues.vals[wlog_ind] != mintable[wlog_ind]) /* wlog too much bigger than src size */ + || (paramValues.vals[clog_ind] > (paramValues.vals[wlog_ind] + (paramValues.vals[strt_ind] > ZSTD_btlazy2))) /* chainLog larger than windowLog*/ + || (paramValues.vals[slog_ind] > paramValues.vals[clog_ind]) /* searchLog larger than chainLog */ + || (paramValues.vals[hlog_ind] > paramValues.vals[wlog_ind] + 1); /* hashLog larger than windowLog + 1 */ +} + + +/*-************************************ +* Display Functions +**************************************/ + +/* BMK_paramValues_into_commandLine() : + * transform a set of parameters paramValues_t + * into a command line compatible with `zstd` syntax + * and writes it into FILE* f. + * f must be already opened and writable */ +static void +BMK_paramValues_into_commandLine(FILE* f, const paramValues_t params) +{ + varInds_t v; + int first = 1; + fprintf(f,"--zstd="); + for (v = 0; v < NUM_PARAMS; v++) { + if (g_silenceParams[v]) { continue; } + if (!first) { fprintf(f, ","); } + fprintf(f,"%s=", g_paramNames[v]); + + if (v == strt_ind) { fprintf(f,"%u", (unsigned)params.vals[v]); } + else { displayParamVal(f, v, params.vals[v], 0); } + first = 0; + } + fprintf(f, "\n"); +} + + +/* comparison function: */ +/* strictly better, strictly worse, equal, speed-side adv, size-side adv */ +#define WORSE_RESULT 0 +#define BETTER_RESULT 1 +#define ERROR_RESULT 2 + +#define SPEED_RESULT 4 +#define SIZE_RESULT 5 +/* maybe have epsilon-eq to limit table size? */ +static int +speedSizeCompare(const BMK_benchResult_t r1, const BMK_benchResult_t r2) +{ + if(r1.cSpeed < r2.cSpeed) { + if(r1.cSize >= r2.cSize) { + return BETTER_RESULT; + } + return SPEED_RESULT; /* r2 is smaller but not faster. */ + } else { + if(r1.cSize <= r2.cSize) { + return WORSE_RESULT; + } + return SIZE_RESULT; /* r2 is faster but not smaller */ + } +} + +/* 0 for insertion, 1 for no insert */ +/* maintain invariant speedSizeCompare(n, n->next) = SPEED_RESULT */ +static int +insertWinner(const winnerInfo_t w, const constraint_t targetConstraints) +{ + BMK_benchResult_t r = w.result; + winner_ll_node* cur_node = g_winners; + /* first node to insert */ + if(!feasible(r, targetConstraints)) { + return 1; + } + + if(g_winners == NULL) { + winner_ll_node* first_node = malloc(sizeof(winner_ll_node)); + if(first_node == NULL) { + return 1; + } + first_node->next = NULL; + first_node->res = w; + g_winners = first_node; + return 0; + } + + while(cur_node->next != NULL) { + switch(speedSizeCompare(cur_node->res.result, r)) { + case WORSE_RESULT: + { + return 1; /* never insert if better */ + } + case BETTER_RESULT: + { + winner_ll_node* tmp; + cur_node->res = cur_node->next->res; + tmp = cur_node->next; + cur_node->next = cur_node->next->next; + free(tmp); + break; + } + case SIZE_RESULT: + { + cur_node = cur_node->next; + break; + } + case SPEED_RESULT: /* insert after first size result, then return */ + { + winner_ll_node* newnode = malloc(sizeof(winner_ll_node)); + if(newnode == NULL) { + return 1; + } + newnode->res = cur_node->res; + cur_node->res = w; + newnode->next = cur_node->next; + cur_node->next = newnode; + return 0; + } + } + + } + + assert(cur_node->next == NULL); + switch(speedSizeCompare(cur_node->res.result, r)) { + case WORSE_RESULT: + { + return 1; /* never insert if better */ + } + case BETTER_RESULT: + { + cur_node->res = w; + return 0; + } + case SIZE_RESULT: + { + winner_ll_node* newnode = malloc(sizeof(winner_ll_node)); + if(newnode == NULL) { + return 1; + } + newnode->res = w; + newnode->next = NULL; + cur_node->next = newnode; + return 0; + } + case SPEED_RESULT: /* insert before first size result, then return */ + { + winner_ll_node* newnode = malloc(sizeof(winner_ll_node)); + if(newnode == NULL) { + return 1; + } + newnode->res = cur_node->res; + cur_node->res = w; + newnode->next = cur_node->next; + cur_node->next = newnode; + return 0; + } + default: + return 1; + } +} + +static void +BMK_displayOneResult(FILE* f, winnerInfo_t res, const size_t srcSize) +{ + varInds_t v; + int first = 1; + res.params = cParamUnsetMin(res.params); + fprintf(f, " {"); + for (v = 0; v < NUM_PARAMS; v++) { + if (g_silenceParams[v]) { continue; } + if (!first) { fprintf(f, ","); } + displayParamVal(f, v, res.params.vals[v], 3); + first = 0; + } + + { double const ratio = res.result.cSize ? + (double)srcSize / res.result.cSize : 0; + double const cSpeedMBps = (double)res.result.cSpeed / MB_UNIT; + double const dSpeedMBps = (double)res.result.dSpeed / MB_UNIT; + + fprintf(f, " }, /* R:%5.3f at %5.1f MB/s - %5.1f MB/s */\n", + ratio, cSpeedMBps, dSpeedMBps); + } +} + +/* Writes to f the results of a parameter benchmark */ +/* when used with --optimize, will only print results better than previously discovered */ +static void +BMK_printWinner(FILE* f, const int cLevel, const BMK_benchResult_t result, const paramValues_t params, const size_t srcSize) +{ + char lvlstr[15] = "Custom Level"; + winnerInfo_t w; + w.params = params; + w.result = result; + + fprintf(f, "\r%79s\r", ""); + + if(cLevel != CUSTOM_LEVEL) { + snprintf(lvlstr, 15, " Level %2d ", cLevel); + } + + if(TIMED) { + const U64 mn_in_ns = 60ULL * TIMELOOP_NANOSEC; + const U64 time_ns = UTIL_clockSpanNano(g_time); + const U64 minutes = time_ns / mn_in_ns; + fprintf(f, "%1lu:%2lu:%05.2f - ", + (unsigned long) minutes / 60, + (unsigned long) minutes % 60, + (double)(time_ns - (minutes * mn_in_ns)) / TIMELOOP_NANOSEC ); + } + + fprintf(f, "/* %s */ ", lvlstr); + BMK_displayOneResult(f, w, srcSize); +} + +static void +BMK_printWinnerOpt(FILE* f, const U32 cLevel, const BMK_benchResult_t result, const paramValues_t params, const constraint_t targetConstraints, const size_t srcSize) +{ + /* global winner used for constraints */ + /* cSize, cSpeed, dSpeed, cMem */ + static winnerInfo_t g_winner = { { (size_t)-1LL, 0, 0, (size_t)-1LL }, + { { PARAM_UNSET, PARAM_UNSET, PARAM_UNSET, PARAM_UNSET, PARAM_UNSET, PARAM_UNSET, PARAM_UNSET, PARAM_UNSET } } + }; + if ( DEBUG + || compareResultLT(g_winner.result, result, targetConstraints, srcSize) + || g_displayLevel >= 4) { + if ( DEBUG + && compareResultLT(g_winner.result, result, targetConstraints, srcSize)) { + DISPLAY("New Winner: \n"); + } + + if(g_displayLevel >= 2) { + BMK_printWinner(f, cLevel, result, params, srcSize); + } + + if(compareResultLT(g_winner.result, result, targetConstraints, srcSize)) { + if(g_displayLevel >= 1) { BMK_paramValues_into_commandLine(f, params); } + g_winner.result = result; + g_winner.params = params; + } + } + + if(g_optmode && g_optimizer && (DEBUG || g_displayLevel == 3)) { + winnerInfo_t w; + winner_ll_node* n; + w.result = result; + w.params = params; + insertWinner(w, targetConstraints); + + if(!DEBUG) { fprintf(f, "\033c"); } + fprintf(f, "\n"); + + /* the table */ + fprintf(f, "================================\n"); + for(n = g_winners; n != NULL; n = n->next) { + BMK_displayOneResult(f, n->res, srcSize); + } + fprintf(f, "================================\n"); + fprintf(f, "Level Bounds: R: > %.3f AND C: < %.1f MB/s \n\n", + (double)srcSize / g_lvltarget.cSize, (double)g_lvltarget.cSpeed / MB_UNIT); + + + fprintf(f, "Overall Winner: \n"); + BMK_displayOneResult(f, g_winner, srcSize); + BMK_paramValues_into_commandLine(f, g_winner.params); + + fprintf(f, "Latest BMK: \n");\ + BMK_displayOneResult(f, w, srcSize); + } +} + + +/* BMK_print_cLevelEntry() : + * Writes one cLevelTable entry, for one level. + * f must exist, be already opened, and be seekable. + * this function cannot error. + */ +static void +BMK_print_cLevelEntry(FILE* f, const int cLevel, + paramValues_t params, + const BMK_benchResult_t result, const size_t srcSize) +{ + varInds_t v; + int first = 1; + + assert(cLevel >= 0); + assert(cLevel <= NB_LEVELS_TRACKED); + params = cParamUnsetMin(params); + + fprintf(f, " {"); + /* print cParams. + * assumption : all cParams are present and in order in the following range */ + for (v = 0; v <= strt_ind; v++) { + if (!first) { fprintf(f, ","); } + displayParamVal(f, v, params.vals[v], 3); + first = 0; + } + /* print comment */ + { double const ratio = result.cSize ? + (double)srcSize / result.cSize : 0; + double const cSpeedMBps = (double)result.cSpeed / MB_UNIT; + double const dSpeedMBps = (double)result.dSpeed / MB_UNIT; + + fprintf(f, " }, /* level %2i: R=%5.3f at %5.1f MB/s - %5.1f MB/s */\n", + cLevel, ratio, cSpeedMBps, dSpeedMBps); + } +} + + +/* BMK_print_cLevelTable() : + * print candidate compression table into proposed FILE* f. + * f must exist, be already opened, and be seekable. + * winners must be a table of NB_LEVELS_TRACKED+1 elements winnerInfo_t, all entries presumed initialized + * this function cannot error. + */ +static void +BMK_print_cLevelTable(FILE* f, const winnerInfo_t* winners, const size_t srcSize) +{ + int cLevel; + + fprintf(f, "\n /* Proposed configurations : */ \n"); + fprintf(f, " /* W, C, H, S, L, T, strat */ \n"); + + for (cLevel=0; cLevel <= NB_LEVELS_TRACKED; cLevel++) + BMK_print_cLevelEntry(f, + cLevel, winners[cLevel].params, + winners[cLevel].result, srcSize); +} + + +/* BMK_saveAndPrint_cLevelTable() : + * save candidate compression table into FILE* f, + * and then to stdout. + * f must exist, be already opened, and be seekable. + * winners must be a table of NB_LEVELS_TRACKED+1 elements winnerInfo_t, all entries presumed initialized + * this function cannot error. + */ +static void +BMK_saveAndPrint_cLevelTable(FILE* const f, + const winnerInfo_t* winners, + const size_t srcSize) +{ + fseek(f, 0, SEEK_SET); + BMK_print_cLevelTable(f, winners, srcSize); + fflush(f); + BMK_print_cLevelTable(stdout, winners, srcSize); +} + + +/*-******************************************************* +* Functions to Benchmark +*********************************************************/ + +typedef struct { + ZSTD_CCtx* cctx; + const void* dictBuffer; + size_t dictBufferSize; + int cLevel; + const paramValues_t* comprParams; +} BMK_initCCtxArgs; + +static size_t local_initCCtx(void* payload) { + const BMK_initCCtxArgs* ag = (const BMK_initCCtxArgs*)payload; + varInds_t i; + ZSTD_CCtx_reset(ag->cctx, ZSTD_reset_session_and_parameters); + ZSTD_CCtx_setParameter(ag->cctx, ZSTD_c_compressionLevel, ag->cLevel); + + for(i = 0; i < NUM_PARAMS; i++) { + if(ag->comprParams->vals[i] != PARAM_UNSET) + ZSTD_CCtx_setParameter(ag->cctx, cctxSetParamTable[i], ag->comprParams->vals[i]); + } + ZSTD_CCtx_loadDictionary(ag->cctx, ag->dictBuffer, ag->dictBufferSize); + + return 0; +} + +typedef struct { + ZSTD_DCtx* dctx; + const void* dictBuffer; + size_t dictBufferSize; +} BMK_initDCtxArgs; + +static size_t local_initDCtx(void* payload) { + const BMK_initDCtxArgs* ag = (const BMK_initDCtxArgs*)payload; + ZSTD_DCtx_reset(ag->dctx, ZSTD_reset_session_and_parameters); + ZSTD_DCtx_loadDictionary(ag->dctx, ag->dictBuffer, ag->dictBufferSize); + return 0; +} + +/* additional argument is just the context */ +static size_t local_defaultCompress( + const void* srcBuffer, size_t srcSize, + void* dstBuffer, size_t dstSize, + void* addArgs) +{ + ZSTD_CCtx* cctx = (ZSTD_CCtx*)addArgs; + assert(dstSize == ZSTD_compressBound(srcSize)); /* specific to this version, which is only used in paramgrill */ + return ZSTD_compress2(cctx, dstBuffer, dstSize, srcBuffer, srcSize); +} + +/* additional argument is just the context */ +static size_t local_defaultDecompress( + const void* srcBuffer, size_t srcSize, + void* dstBuffer, size_t dstSize, + void* addArgs) { + size_t moreToFlush = 1; + ZSTD_DCtx* dctx = (ZSTD_DCtx*)addArgs; + ZSTD_inBuffer in; + ZSTD_outBuffer out; + in.src = srcBuffer; + in.size = srcSize; + in.pos = 0; + out.dst = dstBuffer; + out.size = dstSize; + out.pos = 0; + while (moreToFlush) { + if(out.pos == out.size) { + return (size_t)-ZSTD_error_dstSize_tooSmall; + } + moreToFlush = ZSTD_decompressStream(dctx, + &out, &in); + if (ZSTD_isError(moreToFlush)) { + return moreToFlush; + } + } + return out.pos; + +} + +/*-************************************ +* Data Initialization Functions +**************************************/ + +typedef struct { + void* srcBuffer; + size_t srcSize; + const void** srcPtrs; + size_t* srcSizes; + void** dstPtrs; + size_t* dstCapacities; + size_t* dstSizes; + void** resPtrs; + size_t* resSizes; + size_t nbBlocks; + size_t maxBlockSize; +} buffers_t; + +typedef struct { + size_t dictSize; + void* dictBuffer; + ZSTD_CCtx* cctx; + ZSTD_DCtx* dctx; +} contexts_t; + +static void freeNonSrcBuffers(const buffers_t b) { + free(b.srcPtrs); + free(b.srcSizes); + + if(b.dstPtrs != NULL) { + free(b.dstPtrs[0]); + } + free(b.dstPtrs); + free(b.dstCapacities); + free(b.dstSizes); + + if(b.resPtrs != NULL) { + free(b.resPtrs[0]); + } + free(b.resPtrs); + free(b.resSizes); +} + +static void freeBuffers(const buffers_t b) { + if(b.srcPtrs != NULL) { + free(b.srcBuffer); + } + freeNonSrcBuffers(b); +} + +/* srcBuffer will be freed by freeBuffers now */ +static int createBuffersFromMemory(buffers_t* buff, void * srcBuffer, const size_t nbFiles, + const size_t* fileSizes) +{ + size_t pos = 0, n, blockSize; + U32 maxNbBlocks, blockNb = 0; + buff->srcSize = 0; + for(n = 0; n < nbFiles; n++) { + buff->srcSize += fileSizes[n]; + } + + if(buff->srcSize == 0) { + DISPLAY("No data to bench\n"); + return 1; + } + + blockSize = g_blockSize ? g_blockSize : buff->srcSize; + maxNbBlocks = (U32) ((buff->srcSize + (blockSize-1)) / blockSize) + (U32)nbFiles; + + buff->srcPtrs = (const void**)calloc(maxNbBlocks, sizeof(void*)); + buff->srcSizes = (size_t*)malloc(maxNbBlocks * sizeof(size_t)); + + buff->dstPtrs = (void**)calloc(maxNbBlocks, sizeof(void*)); + buff->dstCapacities = (size_t*)malloc(maxNbBlocks * sizeof(size_t)); + buff->dstSizes = (size_t*)malloc(maxNbBlocks * sizeof(size_t)); + + buff->resPtrs = (void**)calloc(maxNbBlocks, sizeof(void*)); + buff->resSizes = (size_t*)malloc(maxNbBlocks * sizeof(size_t)); + + if(!buff->srcPtrs || !buff->srcSizes || !buff->dstPtrs || !buff->dstCapacities || !buff->dstSizes || !buff->resPtrs || !buff->resSizes) { + DISPLAY("alloc error\n"); + freeNonSrcBuffers(*buff); + return 1; + } + + buff->srcBuffer = srcBuffer; + buff->srcPtrs[0] = (const void*)buff->srcBuffer; + buff->dstPtrs[0] = malloc(ZSTD_compressBound(buff->srcSize) + (maxNbBlocks * 1024)); + buff->resPtrs[0] = malloc(buff->srcSize); + + if(!buff->dstPtrs[0] || !buff->resPtrs[0]) { + DISPLAY("alloc error\n"); + freeNonSrcBuffers(*buff); + return 1; + } + + for(n = 0; n < nbFiles; n++) { + size_t pos_end = pos + fileSizes[n]; + for(; pos < pos_end; blockNb++) { + buff->srcPtrs[blockNb] = (const void*)((char*)srcBuffer + pos); + buff->srcSizes[blockNb] = blockSize; + pos += blockSize; + } + + if(fileSizes[n] > 0) { buff->srcSizes[blockNb - 1] = ((fileSizes[n] - 1) % blockSize) + 1; } + pos = pos_end; + } + + buff->dstCapacities[0] = ZSTD_compressBound(buff->srcSizes[0]); + buff->dstSizes[0] = buff->dstCapacities[0]; + buff->resSizes[0] = buff->srcSizes[0]; + buff->maxBlockSize = buff->srcSizes[0]; + + for(n = 1; n < blockNb; n++) { + buff->dstPtrs[n] = ((char*)buff->dstPtrs[n-1]) + buff->dstCapacities[n-1]; + buff->resPtrs[n] = ((char*)buff->resPtrs[n-1]) + buff->resSizes[n-1]; + buff->dstCapacities[n] = ZSTD_compressBound(buff->srcSizes[n]); + buff->dstSizes[n] = buff->dstCapacities[n]; + buff->resSizes[n] = buff->srcSizes[n]; + + buff->maxBlockSize = MAX(buff->maxBlockSize, buff->srcSizes[n]); + } + + buff->nbBlocks = blockNb; + + return 0; +} + +/* allocates buffer's arguments. returns success / failure */ +static int createBuffers(buffers_t* buff, const char* const * const fileNamesTable, + size_t nbFiles) { + size_t pos = 0; + size_t n; + size_t totalSizeToLoad = UTIL_getTotalFileSize(fileNamesTable, (U32)nbFiles); + size_t benchedSize = MIN(BMK_findMaxMem(totalSizeToLoad * 3) / 3, totalSizeToLoad); + size_t* fileSizes = calloc(sizeof(size_t), nbFiles); + void* srcBuffer = NULL; + int ret = 0; + + if(!totalSizeToLoad || !benchedSize) { + ret = 1; + DISPLAY("Nothing to Bench\n"); + goto _cleanUp; + } + + srcBuffer = malloc(benchedSize); + + if(!fileSizes || !srcBuffer) { + ret = 1; + goto _cleanUp; + } + + for(n = 0; n < nbFiles; n++) { + FILE* f; + U64 fileSize = UTIL_getFileSize(fileNamesTable[n]); + if (UTIL_isDirectory(fileNamesTable[n])) { + DISPLAY("Ignoring %s directory... \n", fileNamesTable[n]); + continue; + } + if (fileSize == UTIL_FILESIZE_UNKNOWN) { + DISPLAY("Cannot evaluate size of %s, ignoring ... \n", fileNamesTable[n]); + continue; + } + f = fopen(fileNamesTable[n], "rb"); + if (f==NULL) { + DISPLAY("impossible to open file %s\n", fileNamesTable[n]); + fclose(f); + ret = 10; + goto _cleanUp; + } + + DISPLAYLEVEL(2, "Loading %s... \r", fileNamesTable[n]); + + if (fileSize + pos > benchedSize) fileSize = benchedSize - pos, nbFiles=n; /* buffer too small - stop after this file */ + { + char* buffer = (char*)(srcBuffer); + size_t const readSize = fread((buffer)+pos, 1, (size_t)fileSize, f); + fclose(f); + if (readSize != (size_t)fileSize) { + DISPLAY("could not read %s", fileNamesTable[n]); + ret = 1; + goto _cleanUp; + } + + fileSizes[n] = readSize; + pos += readSize; + } + } + + ret = createBuffersFromMemory(buff, srcBuffer, nbFiles, fileSizes); + +_cleanUp: + if(ret) { free(srcBuffer); } + free(fileSizes); + return ret; +} + +static void freeContexts(const contexts_t ctx) { + free(ctx.dictBuffer); + ZSTD_freeCCtx(ctx.cctx); + ZSTD_freeDCtx(ctx.dctx); +} + +static int createContexts(contexts_t* ctx, const char* dictFileName) { + FILE* f; + size_t readSize; + ctx->cctx = ZSTD_createCCtx(); + ctx->dctx = ZSTD_createDCtx(); + assert(ctx->cctx != NULL); + assert(ctx->dctx != NULL); + + if(dictFileName == NULL) { + ctx->dictSize = 0; + ctx->dictBuffer = NULL; + return 0; + } + { U64 const dictFileSize = UTIL_getFileSize(dictFileName); + assert(dictFileSize != UTIL_FILESIZE_UNKNOWN); + ctx->dictSize = dictFileSize; + assert((U64)ctx->dictSize == dictFileSize); /* check overflow */ + } + ctx->dictBuffer = malloc(ctx->dictSize); + + f = fopen(dictFileName, "rb"); + + if (f==NULL) { + DISPLAY("unable to open file\n"); + freeContexts(*ctx); + return 1; + } + + if (ctx->dictSize > 64 MB || !(ctx->dictBuffer)) { + DISPLAY("dictionary too large\n"); + fclose(f); + freeContexts(*ctx); + return 1; + } + readSize = fread(ctx->dictBuffer, 1, ctx->dictSize, f); + fclose(f); + if (readSize != ctx->dictSize) { + DISPLAY("unable to read file\n"); + freeContexts(*ctx); + return 1; + } + return 0; +} + +/*-************************************ +* Optimizer Memoization Functions +**************************************/ + +/* return: new length */ +/* keep old array, will need if iter over strategy. */ +/* prunes useless params */ +static size_t sanitizeVarArray(varInds_t* varNew, const size_t varLength, const varInds_t* varArray, const ZSTD_strategy strat) { + size_t i, j = 0; + for(i = 0; i < varLength; i++) { + if( !((varArray[i] == clog_ind && strat == ZSTD_fast) + || (varArray[i] == slog_ind && strat == ZSTD_fast) + || (varArray[i] == slog_ind && strat == ZSTD_dfast) + || (varArray[i] == tlen_ind && strat < ZSTD_btopt && strat != ZSTD_fast))) { + varNew[j] = varArray[i]; + j++; + } + } + return j; +} + +/* res should be NUM_PARAMS size */ +/* constructs varArray from paramValues_t style parameter */ +/* pass in using dict. */ +static size_t variableParams(const paramValues_t paramConstraints, varInds_t* res, const int usingDictionary) { + varInds_t i; + size_t j = 0; + for(i = 0; i < NUM_PARAMS; i++) { + if(paramConstraints.vals[i] == PARAM_UNSET) { + if(i == fadt_ind && !usingDictionary) continue; /* don't use fadt if no dictionary */ + res[j] = i; j++; + } + } + return j; +} + +/* length of memo table given free variables */ +static size_t memoTableLen(const varInds_t* varyParams, const size_t varyLen) { + size_t arrayLen = 1; + size_t i; + for(i = 0; i < varyLen; i++) { + if(varyParams[i] == strt_ind) continue; /* strategy separated by table */ + arrayLen *= rangetable[varyParams[i]]; + } + return arrayLen; +} + +/* returns unique index in memotable of compression parameters */ +static unsigned memoTableIndDirect(const paramValues_t* ptr, const varInds_t* varyParams, const size_t varyLen) { + size_t i; + unsigned ind = 0; + for(i = 0; i < varyLen; i++) { + varInds_t v = varyParams[i]; + if(v == strt_ind) continue; /* exclude strategy from memotable */ + ind *= rangetable[v]; ind += (unsigned)invRangeMap(v, ptr->vals[v]); + } + return ind; +} + +static size_t memoTableGet(const memoTable_t* memoTableArray, const paramValues_t p) { + const memoTable_t mt = memoTableArray[p.vals[strt_ind]]; + switch(mt.tableType) { + case directMap: + return mt.table[memoTableIndDirect(&p, mt.varArray, mt.varLen)]; + case xxhashMap: + return mt.table[(XXH64(&p.vals, sizeof(U32) * NUM_PARAMS, 0) >> 3) % mt.tableLen]; + case noMemo: + return 0; + } + return 0; /* should never happen, stop compiler warnings */ +} + +static void memoTableSet(const memoTable_t* memoTableArray, const paramValues_t p, const BYTE value) { + const memoTable_t mt = memoTableArray[p.vals[strt_ind]]; + switch(mt.tableType) { + case directMap: + mt.table[memoTableIndDirect(&p, mt.varArray, mt.varLen)] = value; break; + case xxhashMap: + mt.table[(XXH64(&p.vals, sizeof(U32) * NUM_PARAMS, 0) >> 3) % mt.tableLen] = value; break; + case noMemo: + break; + } +} + +/* frees all allocated memotables */ +/* secret contract : + * mtAll is a table of (ZSTD_STRATEGY_MAX+1) memoTable_t */ +static void freeMemoTableArray(memoTable_t* const mtAll) { + int i; + if(mtAll == NULL) { return; } + for(i = 1; i <= (int)ZSTD_STRATEGY_MAX; i++) { + free(mtAll[i].table); + } + free(mtAll); +} + +/* inits memotables for all (including mallocs), all strategies */ +/* takes unsanitized varyParams */ +static memoTable_t* +createMemoTableArray(const paramValues_t p, + const varInds_t* const varyParams, + const size_t varyLen, + const U32 memoTableLog) +{ + memoTable_t* const mtAll = (memoTable_t*)calloc(sizeof(memoTable_t),(ZSTD_STRATEGY_MAX + 1)); + ZSTD_strategy i, stratMin = ZSTD_STRATEGY_MIN, stratMax = ZSTD_STRATEGY_MAX; + + if(mtAll == NULL) { + return NULL; + } + + for(i = 1; i <= (int)ZSTD_STRATEGY_MAX; i++) { + mtAll[i].varLen = sanitizeVarArray(mtAll[i].varArray, varyLen, varyParams, i); + } + + /* no memoization */ + if(memoTableLog == 0) { + for(i = 1; i <= (int)ZSTD_STRATEGY_MAX; i++) { + mtAll[i].tableType = noMemo; + mtAll[i].table = NULL; + mtAll[i].tableLen = 0; + } + return mtAll; + } + + + if(p.vals[strt_ind] != PARAM_UNSET) { + stratMin = p.vals[strt_ind]; + stratMax = p.vals[strt_ind]; + } + + + for(i = stratMin; i <= stratMax; i++) { + size_t mtl = memoTableLen(mtAll[i].varArray, mtAll[i].varLen); + mtAll[i].tableType = directMap; + + if(memoTableLog != PARAM_UNSET && mtl > (1ULL << memoTableLog)) { /* use hash table */ /* provide some option to only use hash tables? */ + mtAll[i].tableType = xxhashMap; + mtl = (1ULL << memoTableLog); + } + + mtAll[i].table = (BYTE*)calloc(sizeof(BYTE), mtl); + mtAll[i].tableLen = mtl; + + if(mtAll[i].table == NULL) { + freeMemoTableArray(mtAll); + return NULL; + } + } + + return mtAll; +} + +/* Sets pc to random unmeasured set of parameters */ +/* specify strategy */ +static void randomConstrainedParams(paramValues_t* pc, const memoTable_t* memoTableArray, const ZSTD_strategy st) +{ + size_t j; + const memoTable_t mt = memoTableArray[st]; + pc->vals[strt_ind] = st; + for(j = 0; j < mt.tableLen; j++) { + int i; + for(i = 0; i < NUM_PARAMS; i++) { + varInds_t v = mt.varArray[i]; + if(v == strt_ind) continue; + pc->vals[v] = rangeMap(v, FUZ_rand(&g_rand) % rangetable[v]); + } + + if(!(memoTableGet(memoTableArray, *pc))) break; /* only pick unpicked params. */ + } +} + +/*-************************************ +* Benchmarking Functions +**************************************/ + +static void display_params_tested(paramValues_t cParams) +{ + varInds_t vi; + DISPLAYLEVEL(3, "\r testing :"); + for (vi=0; vi < NUM_PARAMS; vi++) { + DISPLAYLEVEL(3, "%3u,", (unsigned)cParams.vals[vi]); + } + DISPLAYLEVEL(3, "\b \r"); +} + +/* Replicate functionality of benchMemAdvanced, but with pre-split src / dst buffers */ +/* The purpose is so that sufficient information is returned so that a decompression call to benchMemInvertible is possible */ +/* BMK_benchMemAdvanced(srcBuffer,srcSize, dstBuffer, dstSize, fileSizes, nbFiles, 0, &cParams, dictBuffer, dictSize, ctx, dctx, 0, "File", &adv); */ +/* nbSeconds used in same way as in BMK_advancedParams_t */ +/* if in decodeOnly, then srcPtr's will be compressed blocks, and uncompressedBlocks will be written to dstPtrs */ +/* dictionary nullable, nothing else though. */ +/* note : it would be a lot better if this function was present in benchzstd.c, + * sharing code with benchMemAdvanced(), since it's technically a part of it */ +static BMK_benchOutcome_t +BMK_benchMemInvertible( buffers_t buf, contexts_t ctx, + int cLevel, const paramValues_t* comprParams, + BMK_mode_t mode, unsigned nbSeconds) +{ + U32 i; + BMK_benchResult_t bResult; + const void *const *const srcPtrs = (const void *const *const)buf.srcPtrs; + size_t const *const srcSizes = buf.srcSizes; + void** const dstPtrs = buf.dstPtrs; + size_t const *const dstCapacities = buf.dstCapacities; + size_t* const dstSizes = buf.dstSizes; + void** const resPtrs = buf.resPtrs; + size_t const *const resSizes = buf.resSizes; + const void* dictBuffer = ctx.dictBuffer; + const size_t dictBufferSize = ctx.dictSize; + const size_t nbBlocks = buf.nbBlocks; + const size_t srcSize = buf.srcSize; + ZSTD_CCtx* cctx = ctx.cctx; + ZSTD_DCtx* dctx = ctx.dctx; + + /* init */ + display_params_tested(*comprParams); + memset(&bResult, 0, sizeof(bResult)); + + /* warming up memory */ + for (i = 0; i < buf.nbBlocks; i++) { + if (mode != BMK_decodeOnly) { + RDG_genBuffer(dstPtrs[i], dstCapacities[i], 0.10, 0.50, 1); + } else { + RDG_genBuffer(resPtrs[i], resSizes[i], 0.10, 0.50, 1); + } + } + + /* Bench */ + { + /* init args */ + int compressionCompleted = (mode == BMK_decodeOnly); + int decompressionCompleted = (mode == BMK_compressOnly); + BMK_timedFnState_t* timeStateCompress = BMK_createTimedFnState(nbSeconds * 1000, 1000); + BMK_timedFnState_t* timeStateDecompress = BMK_createTimedFnState(nbSeconds * 1000, 1000); + BMK_benchParams_t cbp, dbp; + BMK_initCCtxArgs cctxprep; + BMK_initDCtxArgs dctxprep; + + cbp.benchFn = local_defaultCompress; + cbp.benchPayload = cctx; + cbp.initFn = local_initCCtx; + cbp.initPayload = &cctxprep; + cbp.errorFn = ZSTD_isError; + cbp.blockCount = nbBlocks; + cbp.srcBuffers = srcPtrs; + cbp.srcSizes = srcSizes; + cbp.dstBuffers = dstPtrs; + cbp.dstCapacities = dstCapacities; + cbp.blockResults = dstSizes; + + cctxprep.cctx = cctx; + cctxprep.dictBuffer = dictBuffer; + cctxprep.dictBufferSize = dictBufferSize; + cctxprep.cLevel = cLevel; + cctxprep.comprParams = comprParams; + + dbp.benchFn = local_defaultDecompress; + dbp.benchPayload = dctx; + dbp.initFn = local_initDCtx; + dbp.initPayload = &dctxprep; + dbp.errorFn = ZSTD_isError; + dbp.blockCount = nbBlocks; + dbp.srcBuffers = (const void* const *) dstPtrs; + dbp.srcSizes = dstCapacities; + dbp.dstBuffers = resPtrs; + dbp.dstCapacities = resSizes; + dbp.blockResults = NULL; + + dctxprep.dctx = dctx; + dctxprep.dictBuffer = dictBuffer; + dctxprep.dictBufferSize = dictBufferSize; + + assert(timeStateCompress != NULL); + assert(timeStateDecompress != NULL); + while(!compressionCompleted) { + BMK_runOutcome_t const cOutcome = BMK_benchTimedFn(timeStateCompress, cbp); + + if (!BMK_isSuccessful_runOutcome(cOutcome)) { + BMK_benchOutcome_t bOut; + memset(&bOut, 0, sizeof(bOut)); + bOut.tag = 1; /* should rather be a function or a constant */ + BMK_freeTimedFnState(timeStateCompress); + BMK_freeTimedFnState(timeStateDecompress); + return bOut; + } + { BMK_runTime_t const rResult = BMK_extract_runTime(cOutcome); + bResult.cSpeed = (unsigned long long)((double)srcSize * TIMELOOP_NANOSEC / rResult.nanoSecPerRun); + bResult.cSize = rResult.sumOfReturn; + } + compressionCompleted = BMK_isCompleted_TimedFn(timeStateCompress); + } + + while (!decompressionCompleted) { + BMK_runOutcome_t const dOutcome = BMK_benchTimedFn(timeStateDecompress, dbp); + + if (!BMK_isSuccessful_runOutcome(dOutcome)) { + BMK_benchOutcome_t bOut; + memset(&bOut, 0, sizeof(bOut)); + bOut.tag = 1; /* should rather be a function or a constant */ + BMK_freeTimedFnState(timeStateCompress); + BMK_freeTimedFnState(timeStateDecompress); + return bOut; + } + { BMK_runTime_t const rResult = BMK_extract_runTime(dOutcome); + bResult.dSpeed = (unsigned long long)((double)srcSize * TIMELOOP_NANOSEC / rResult.nanoSecPerRun); + } + decompressionCompleted = BMK_isCompleted_TimedFn(timeStateDecompress); + } + + BMK_freeTimedFnState(timeStateCompress); + BMK_freeTimedFnState(timeStateDecompress); + } + + /* Bench */ + bResult.cMem = (1 << (comprParams->vals[wlog_ind])) + ZSTD_sizeof_CCtx(cctx); + + { BMK_benchOutcome_t bOut; + bOut.tag = 0; + bOut.internal_never_use_directly = bResult; /* should be a function */ + return bOut; + } +} + +/* BMK_benchParam() : + * benchmark a set of `cParams` over sample `buf`, + * store the result in `resultPtr`. + * @return : 0 if success, 1 if error */ +static int BMK_benchParam ( BMK_benchResult_t* resultPtr, + buffers_t buf, contexts_t ctx, + paramValues_t cParams) +{ + BMK_benchOutcome_t const outcome = BMK_benchMemInvertible(buf, ctx, + BASE_CLEVEL, &cParams, + BMK_both, 3); + if (!BMK_isSuccessful_benchOutcome(outcome)) return 1; + *resultPtr = BMK_extract_benchResult(outcome); + return 0; +} + + +/* Benchmarking which stops when we are sufficiently sure the solution is infeasible / worse than the winner */ +#define VARIANCE 1.2 +static int allBench(BMK_benchResult_t* resultPtr, + const buffers_t buf, const contexts_t ctx, + const paramValues_t cParams, + const constraint_t target, + BMK_benchResult_t* winnerResult, int feas) +{ + BMK_benchResult_t benchres; + double uncertaintyConstantC = 3., uncertaintyConstantD = 3.; + double winnerRS; + + BMK_benchOutcome_t const outcome = BMK_benchMemInvertible(buf, ctx, BASE_CLEVEL, &cParams, BMK_both, 2); + if (!BMK_isSuccessful_benchOutcome(outcome)) { + DEBUGOUTPUT("Benchmarking failed \n"); + return ERROR_RESULT; + } + benchres = BMK_extract_benchResult(outcome); + + winnerRS = resultScore(*winnerResult, buf.srcSize, target); + DEBUGOUTPUT("WinnerScore: %f \n ", winnerRS); + + *resultPtr = benchres; + + /* anything with worse ratio in feas is definitely worse, discard */ + if(feas && benchres.cSize < winnerResult->cSize && !g_optmode) { + return WORSE_RESULT; + } + + /* calculate uncertainty in compression / decompression runs */ + if (benchres.cSpeed) { + U64 const loopDurationC = (((U64)buf.srcSize * TIMELOOP_NANOSEC) / benchres.cSpeed); + uncertaintyConstantC = ((loopDurationC + (double)(2 * g_clockGranularity))/loopDurationC); + } + + if (benchres.dSpeed) { + U64 const loopDurationD = (((U64)buf.srcSize * TIMELOOP_NANOSEC) / benchres.dSpeed); + uncertaintyConstantD = ((loopDurationD + (double)(2 * g_clockGranularity))/loopDurationD); + } + + /* optimistic assumption of benchres */ + { BMK_benchResult_t resultMax = benchres; + resultMax.cSpeed = (unsigned long long)(resultMax.cSpeed * uncertaintyConstantC * VARIANCE); + resultMax.dSpeed = (unsigned long long)(resultMax.dSpeed * uncertaintyConstantD * VARIANCE); + + /* disregard infeasible results in feas mode */ + /* disregard if resultMax < winner in infeas mode */ + if((feas && !feasible(resultMax, target)) || + (!feas && (winnerRS > resultScore(resultMax, buf.srcSize, target)))) { + return WORSE_RESULT; + } + } + + /* compare by resultScore when in infeas */ + /* compare by compareResultLT when in feas */ + if((!feas && (resultScore(benchres, buf.srcSize, target) > resultScore(*winnerResult, buf.srcSize, target))) || + (feas && (compareResultLT(*winnerResult, benchres, target, buf.srcSize))) ) { + return BETTER_RESULT; + } else { + return WORSE_RESULT; + } +} + + +#define INFEASIBLE_THRESHOLD 200 +/* Memoized benchmarking, won't benchmark anything which has already been benchmarked before. */ +static int benchMemo(BMK_benchResult_t* resultPtr, + const buffers_t buf, const contexts_t ctx, + const paramValues_t cParams, + const constraint_t target, + BMK_benchResult_t* winnerResult, memoTable_t* const memoTableArray, + const int feas) { + static int bmcount = 0; + int res; + + if ( memoTableGet(memoTableArray, cParams) >= INFEASIBLE_THRESHOLD + || redundantParams(cParams, target, buf.maxBlockSize) ) { + return WORSE_RESULT; + } + + res = allBench(resultPtr, buf, ctx, cParams, target, winnerResult, feas); + + if(DEBUG && !(bmcount % 250)) { + DISPLAY("Count: %d\n", bmcount); + bmcount++; + } + BMK_printWinnerOpt(stdout, CUSTOM_LEVEL, *resultPtr, cParams, target, buf.srcSize); + + if(res == BETTER_RESULT || feas) { + memoTableSet(memoTableArray, cParams, 255); /* what happens if collisions are frequent */ + } + return res; +} + + +typedef struct { + U64 cSpeed_min; + U64 dSpeed_min; + U32 windowLog_max; + ZSTD_strategy strategy_max; +} level_constraints_t; + +static level_constraints_t g_level_constraint[NB_LEVELS_TRACKED+1]; + +static void BMK_init_level_constraints(int bytePerSec_level1) +{ + assert(NB_LEVELS_TRACKED >= ZSTD_maxCLevel()); + memset(g_level_constraint, 0, sizeof(g_level_constraint)); + g_level_constraint[1].cSpeed_min = bytePerSec_level1; + g_level_constraint[1].dSpeed_min = 0.; + g_level_constraint[1].windowLog_max = 19; + g_level_constraint[1].strategy_max = ZSTD_fast; + + /* establish speed objectives (relative to level 1) */ + { int l; + for (l=2; l<=NB_LEVELS_TRACKED; l++) { + g_level_constraint[l].cSpeed_min = (g_level_constraint[l-1].cSpeed_min * 49) / 64; + g_level_constraint[l].dSpeed_min = 0.; + g_level_constraint[l].windowLog_max = (l<20) ? 23 : l+5; /* only --ultra levels >= 20 can use windowlog > 23 */ + g_level_constraint[l].strategy_max = ZSTD_STRATEGY_MAX; + } } +} + +static int BMK_seed(winnerInfo_t* winners, + const paramValues_t params, + const buffers_t buf, + const contexts_t ctx) +{ + BMK_benchResult_t testResult; + int better = 0; + int cLevel; + + BMK_benchParam(&testResult, buf, ctx, params); + + for (cLevel = 1; cLevel <= NB_LEVELS_TRACKED; cLevel++) { + + if (testResult.cSpeed < g_level_constraint[cLevel].cSpeed_min) + continue; /* not fast enough for this level */ + if (testResult.dSpeed < g_level_constraint[cLevel].dSpeed_min) + continue; /* not fast enough for this level */ + if (params.vals[wlog_ind] > g_level_constraint[cLevel].windowLog_max) + continue; /* too much memory for this level */ + if (params.vals[strt_ind] > g_level_constraint[cLevel].strategy_max) + continue; /* forbidden strategy for this level */ + if (winners[cLevel].result.cSize==0) { + /* first solution for this cLevel */ + winners[cLevel].result = testResult; + winners[cLevel].params = params; + BMK_print_cLevelEntry(stdout, cLevel, params, testResult, buf.srcSize); + better = 1; + continue; + } + + if ((double)testResult.cSize <= ((double)winners[cLevel].result.cSize * (1. + (0.02 / cLevel))) ) { + /* Validate solution is "good enough" */ + double W_ratio = (double)buf.srcSize / testResult.cSize; + double O_ratio = (double)buf.srcSize / winners[cLevel].result.cSize; + double W_ratioNote = log (W_ratio); + double O_ratioNote = log (O_ratio); + size_t W_DMemUsed = (1 << params.vals[wlog_ind]) + (16 KB); + size_t O_DMemUsed = (1 << winners[cLevel].params.vals[wlog_ind]) + (16 KB); + double W_DMemUsed_note = W_ratioNote * ( 40 + 9*cLevel) - log((double)W_DMemUsed); + double O_DMemUsed_note = O_ratioNote * ( 40 + 9*cLevel) - log((double)O_DMemUsed); + + size_t W_CMemUsed = (1 << params.vals[wlog_ind]) + ZSTD_estimateCCtxSize_usingCParams(pvalsToCParams(params)); + size_t O_CMemUsed = (1 << winners[cLevel].params.vals[wlog_ind]) + ZSTD_estimateCCtxSize_usingCParams(pvalsToCParams(winners[cLevel].params)); + double W_CMemUsed_note = W_ratioNote * ( 50 + 13*cLevel) - log((double)W_CMemUsed); + double O_CMemUsed_note = O_ratioNote * ( 50 + 13*cLevel) - log((double)O_CMemUsed); + + double W_CSpeed_note = W_ratioNote * ( 30 + 10*cLevel) + log(testResult.cSpeed); + double O_CSpeed_note = O_ratioNote * ( 30 + 10*cLevel) + log(winners[cLevel].result.cSpeed); + + double W_DSpeed_note = W_ratioNote * ( 20 + 2*cLevel) + log(testResult.dSpeed); + double O_DSpeed_note = O_ratioNote * ( 20 + 2*cLevel) + log(winners[cLevel].result.dSpeed); + + if (W_DMemUsed_note < O_DMemUsed_note) { + /* uses too much Decompression memory for too little benefit */ + if (W_ratio > O_ratio) + DISPLAYLEVEL(3, "Decompression Memory : %5.3f @ %4.1f MB vs %5.3f @ %4.1f MB : not enough for level %i\n", + W_ratio, (double)(W_DMemUsed) / 1024 / 1024, + O_ratio, (double)(O_DMemUsed) / 1024 / 1024, cLevel); + continue; + } + if (W_CMemUsed_note < O_CMemUsed_note) { + /* uses too much memory for compression for too little benefit */ + if (W_ratio > O_ratio) + DISPLAYLEVEL(3, "Compression Memory : %5.3f @ %4.1f MB vs %5.3f @ %4.1f MB : not enough for level %i\n", + W_ratio, (double)(W_CMemUsed) / 1024 / 1024, + O_ratio, (double)(O_CMemUsed) / 1024 / 1024, + cLevel); + continue; + } + if (W_CSpeed_note < O_CSpeed_note ) { + /* too large compression speed difference for the compression benefit */ + if (W_ratio > O_ratio) + DISPLAYLEVEL(3, "Compression Speed : %5.3f @ %4.1f MB/s vs %5.3f @ %4.1f MB/s : not enough for level %i\n", + W_ratio, (double)testResult.cSpeed / MB_UNIT, + O_ratio, (double)winners[cLevel].result.cSpeed / MB_UNIT, + cLevel); + continue; + } + if (W_DSpeed_note < O_DSpeed_note ) { + /* too large decompression speed difference for the compression benefit */ + if (W_ratio > O_ratio) + DISPLAYLEVEL(3, "Decompression Speed : %5.3f @ %4.1f MB/s vs %5.3f @ %4.1f MB/s : not enough for level %i\n", + W_ratio, (double)testResult.dSpeed / MB_UNIT, + O_ratio, (double)winners[cLevel].result.dSpeed / MB_UNIT, + cLevel); + continue; + } + + if (W_ratio < O_ratio) + DISPLAYLEVEL(3, "Solution %4.3f selected over %4.3f at level %i, due to better secondary statistics \n", + W_ratio, O_ratio, cLevel); + + winners[cLevel].result = testResult; + winners[cLevel].params = params; + BMK_print_cLevelEntry(stdout, cLevel, params, testResult, buf.srcSize); + + better = 1; + } } + + return better; +} + +/*-************************************ +* Compression Level Table Generation Functions +**************************************/ + +#define PARAMTABLELOG 25 +#define PARAMTABLESIZE (1<<PARAMTABLELOG) +#define PARAMTABLEMASK (PARAMTABLESIZE-1) +static BYTE g_alreadyTested[PARAMTABLESIZE] = {0}; /* init to zero */ + +static BYTE* NB_TESTS_PLAYED(paramValues_t p) +{ + ZSTD_compressionParameters const cParams = pvalsToCParams(sanitizeParams(p)); + unsigned long long const h64 = XXH64(&cParams, sizeof(cParams), 0); + return &g_alreadyTested[(h64 >> 3) & PARAMTABLEMASK]; +} + +static void playAround(FILE* f, + winnerInfo_t* winners, + paramValues_t p, + const buffers_t buf, const contexts_t ctx) +{ + int nbVariations = 0; + UTIL_time_t const clockStart = UTIL_getTime(); + + while (UTIL_clockSpanMicro(clockStart) < g_maxVariationTime) { + if (nbVariations++ > g_maxNbVariations) break; + + do { + int i; + for(i = 0; i < 4; i++) { + paramVaryOnce(FUZ_rand(&g_rand) % (strt_ind + 1), + ((FUZ_rand(&g_rand) & 1) << 1) - 1, + &p); + } + } while (!paramValid(p)); + + /* exclude faster if already played params */ + if (FUZ_rand(&g_rand) & ((1 << *NB_TESTS_PLAYED(p))-1)) + continue; + + /* test */ + { BYTE* const b = NB_TESTS_PLAYED(p); + (*b)++; + } + if (!BMK_seed(winners, p, buf, ctx)) continue; + + /* improvement found => search more */ + BMK_saveAndPrint_cLevelTable(f, winners, buf.srcSize); + playAround(f, winners, p, buf, ctx); + } + +} + +static void +BMK_selectRandomStart( FILE* f, + winnerInfo_t* winners, + const buffers_t buf, const contexts_t ctx) +{ + U32 const id = FUZ_rand(&g_rand) % (NB_LEVELS_TRACKED+1); + if ((id==0) || (winners[id].params.vals[wlog_ind]==0)) { + /* use some random entry */ + paramValues_t const p = adjustParams(cParamsToPVals(pvalsToCParams(randomParams())), /* defaults nonCompression parameters */ + buf.srcSize, 0); + playAround(f, winners, p, buf, ctx); + } else { + playAround(f, winners, winners[id].params, buf, ctx); + } +} + + +/* BMK_generate_cLevelTable() : + * test a large number of configurations + * and distribute them across compression levels according to speed conditions. + * display and save all intermediate results into rfName = "grillResults.txt". + * the function automatically stops after g_timeLimit_s. + * this function cannot error, it directly exit() in case of problem. + */ +static void BMK_generate_cLevelTable(const buffers_t buf, const contexts_t ctx) +{ + paramValues_t params; + winnerInfo_t winners[NB_LEVELS_TRACKED+1]; + const char* const rfName = "grillResults.txt"; + FILE* const f = fopen(rfName, "w"); + + /* init */ + assert(g_singleRun==0); + memset(winners, 0, sizeof(winners)); + if (f==NULL) { DISPLAY("error opening %s \n", rfName); exit(1); } + + if (g_target) { + BMK_init_level_constraints(g_target * MB_UNIT); + } else { + /* baseline config for level 1 */ + paramValues_t const l1params = cParamsToPVals(ZSTD_getCParams(1, buf.maxBlockSize, ctx.dictSize)); + BMK_benchResult_t testResult; + BMK_benchParam(&testResult, buf, ctx, l1params); + BMK_init_level_constraints((int)((testResult.cSpeed * 31) / 32)); + } + + /* populate initial solution */ + { const int maxSeeds = g_noSeed ? 1 : ZSTD_maxCLevel(); + int i; + for (i=0; i<=maxSeeds; i++) { + params = cParamsToPVals(ZSTD_getCParams(i, buf.maxBlockSize, 0)); + BMK_seed(winners, params, buf, ctx); + } } + BMK_saveAndPrint_cLevelTable(f, winners, buf.srcSize); + + /* start tests */ + { const UTIL_time_t grillStart = UTIL_getTime(); + do { + BMK_selectRandomStart(f, winners, buf, ctx); + } while (BMK_timeSpan_s(grillStart) < g_timeLimit_s); + } + + /* end summary */ + BMK_saveAndPrint_cLevelTable(f, winners, buf.srcSize); + DISPLAY("grillParams operations completed \n"); + + /* clean up*/ + fclose(f); +} + + +/*-************************************ +* Single Benchmark Functions +**************************************/ + +static int +benchOnce(const buffers_t buf, const contexts_t ctx, const int cLevel) +{ + BMK_benchResult_t testResult; + g_params = adjustParams(overwriteParams(cParamsToPVals(ZSTD_getCParams(cLevel, buf.maxBlockSize, ctx.dictSize)), g_params), buf.maxBlockSize, ctx.dictSize); + + if (BMK_benchParam(&testResult, buf, ctx, g_params)) { + DISPLAY("Error during benchmarking\n"); + return 1; + } + + BMK_printWinner(stdout, CUSTOM_LEVEL, testResult, g_params, buf.srcSize); + + return 0; +} + +static int benchSample(double compressibility, int cLevel) +{ + const char* const name = "Sample 10MB"; + size_t const benchedSize = 10 MB; + void* const srcBuffer = malloc(benchedSize); + int ret = 0; + + buffers_t buf; + contexts_t ctx; + + if(srcBuffer == NULL) { + DISPLAY("Out of Memory\n"); + return 2; + } + + RDG_genBuffer(srcBuffer, benchedSize, compressibility, 0.0, 0); + + if(createBuffersFromMemory(&buf, srcBuffer, 1, &benchedSize)) { + DISPLAY("Buffer Creation Error\n"); + free(srcBuffer); + return 3; + } + + if(createContexts(&ctx, NULL)) { + DISPLAY("Context Creation Error\n"); + freeBuffers(buf); + return 1; + } + + /* bench */ + DISPLAY("\r%79s\r", ""); + DISPLAY("using %s %i%%: \n", name, (int)(compressibility*100)); + + if(g_singleRun) { + ret = benchOnce(buf, ctx, cLevel); + } else { + BMK_generate_cLevelTable(buf, ctx); + } + + freeBuffers(buf); + freeContexts(ctx); + + return ret; +} + +/* benchFiles() : + * note: while this function takes a table of filenames, + * in practice, only the first filename will be used */ +static int benchFiles(const char** fileNamesTable, int nbFiles, + const char* dictFileName, int cLevel) +{ + buffers_t buf; + contexts_t ctx; + int ret = 0; + + if (createBuffers(&buf, fileNamesTable, nbFiles)) { + DISPLAY("unable to load files\n"); + return 1; + } + + if (createContexts(&ctx, dictFileName)) { + DISPLAY("unable to load dictionary\n"); + freeBuffers(buf); + return 2; + } + + DISPLAY("\r%79s\r", ""); + if (nbFiles == 1) { + DISPLAY("using %s : \n", fileNamesTable[0]); + } else { + DISPLAY("using %d Files : \n", nbFiles); + } + + if (g_singleRun) { + ret = benchOnce(buf, ctx, cLevel); + } else { + BMK_generate_cLevelTable(buf, ctx); + } + + freeBuffers(buf); + freeContexts(ctx); + return ret; +} + + +/*-************************************ +* Local Optimization Functions +**************************************/ + +/* One iteration of hill climbing. Specifically, it first tries all + * valid parameter configurations w/ manhattan distance 1 and picks the best one + * failing that, it progressively tries candidates further and further away (up to #dim + 2) + * if it finds a candidate exceeding winnerInfo, it will repeat. Otherwise, it will stop the + * current stage of hill climbing. + * Each iteration of hill climbing proceeds in 2 'phases'. Phase 1 climbs according to + * the resultScore function, which is effectively a linear increase in reward until it reaches + * the constraint-satisfying value, it which point any excess results in only logarithmic reward. + * This aims to find some constraint-satisfying point. + * Phase 2 optimizes in accordance with what the original function sets out to maximize, with + * all feasible solutions valued over all infeasible solutions. + */ + +/* sanitize all params here. + * all generation after random should be sanitized. (maybe sanitize random) + */ +static winnerInfo_t climbOnce(const constraint_t target, + memoTable_t* mtAll, + const buffers_t buf, const contexts_t ctx, + const paramValues_t init) +{ + /* + * cparam - currently considered 'center' + * candidate - params to benchmark/results + * winner - best option found so far. + */ + paramValues_t cparam = init; + winnerInfo_t candidateInfo, winnerInfo; + int better = 1; + int feas = 0; + + winnerInfo = initWinnerInfo(init); + candidateInfo = winnerInfo; + + { winnerInfo_t bestFeasible1 = initWinnerInfo(cparam); + DEBUGOUTPUT("Climb Part 1\n"); + while(better) { + int offset; + size_t i, dist; + const size_t varLen = mtAll[cparam.vals[strt_ind]].varLen; + better = 0; + DEBUGOUTPUT("Start\n"); + cparam = winnerInfo.params; + candidateInfo.params = cparam; + /* all dist-1 candidates */ + for (i = 0; i < varLen; i++) { + for (offset = -1; offset <= 1; offset += 2) { + CHECKTIME(winnerInfo); + candidateInfo.params = cparam; + paramVaryOnce(mtAll[cparam.vals[strt_ind]].varArray[i], + offset, + &candidateInfo.params); + + if(paramValid(candidateInfo.params)) { + int res; + res = benchMemo(&candidateInfo.result, buf, ctx, + sanitizeParams(candidateInfo.params), target, &winnerInfo.result, mtAll, feas); + DEBUGOUTPUT("Res: %d\n", res); + if(res == BETTER_RESULT) { /* synonymous with better when called w/ infeasibleBM */ + winnerInfo = candidateInfo; + better = 1; + if(compareResultLT(bestFeasible1.result, winnerInfo.result, target, buf.srcSize)) { + bestFeasible1 = winnerInfo; + } + } + } + } /* for (offset = -1; offset <= 1; offset += 2) */ + } /* for (i = 0; i < varLen; i++) */ + + if(better) { + continue; + } + + for (dist = 2; dist < varLen + 2; dist++) { /* varLen is # dimensions */ + for (i = 0; i < (1 << varLen) / varLen + 2; i++) { + int res; + CHECKTIME(winnerInfo); + candidateInfo.params = cparam; + /* param error checking already done here */ + paramVariation(&candidateInfo.params, mtAll, (U32)dist); + + res = benchMemo(&candidateInfo.result, + buf, ctx, + sanitizeParams(candidateInfo.params), target, + &winnerInfo.result, mtAll, feas); + DEBUGOUTPUT("Res: %d\n", res); + if (res == BETTER_RESULT) { /* synonymous with better in this case*/ + winnerInfo = candidateInfo; + better = 1; + if (compareResultLT(bestFeasible1.result, winnerInfo.result, target, buf.srcSize)) { + bestFeasible1 = winnerInfo; + } + break; + } + } + + if (better) { + break; + } + } /* for(dist = 2; dist < varLen + 2; dist++) */ + + if (!better) { /* infeas -> feas -> stop */ + if (feas) return winnerInfo; + feas = 1; + better = 1; + winnerInfo = bestFeasible1; /* note with change, bestFeasible may not necessarily be feasible, but if one has been benchmarked, it will be. */ + DEBUGOUTPUT("Climb Part 2\n"); + } + } + winnerInfo = bestFeasible1; + } + + return winnerInfo; +} + +/* Optimizes for a fixed strategy */ + +/* flexible parameters: iterations of failed climbing (or if we do non-random, maybe this is when everything is close to visited) + weight more on visit for bad results, less on good results/more on later results / ones with more failures. + allocate memoTable here. + */ +static winnerInfo_t +optimizeFixedStrategy(const buffers_t buf, const contexts_t ctx, + const constraint_t target, paramValues_t paramTarget, + const ZSTD_strategy strat, + memoTable_t* memoTableArray, const int tries) +{ + int i = 0; + + paramValues_t init; + winnerInfo_t winnerInfo, candidateInfo; + winnerInfo = initWinnerInfo(emptyParams()); + /* so climb is given the right fixed strategy */ + paramTarget.vals[strt_ind] = strat; + /* to pass ZSTD_checkCParams */ + paramTarget = cParamUnsetMin(paramTarget); + + init = paramTarget; + + for(i = 0; i < tries; i++) { + DEBUGOUTPUT("Restart\n"); + do { + randomConstrainedParams(&init, memoTableArray, strat); + } while(redundantParams(init, target, buf.maxBlockSize)); + candidateInfo = climbOnce(target, memoTableArray, buf, ctx, init); + if (compareResultLT(winnerInfo.result, candidateInfo.result, target, buf.srcSize)) { + winnerInfo = candidateInfo; + BMK_printWinnerOpt(stdout, CUSTOM_LEVEL, winnerInfo.result, winnerInfo.params, target, buf.srcSize); + i = 0; + continue; + } + CHECKTIME(winnerInfo); + i++; + } + return winnerInfo; +} + +/* goes best, best-1, best+1, best-2, ... */ +/* return 0 if nothing remaining */ +static int nextStrategy(const int currentStrategy, const int bestStrategy) +{ + if(bestStrategy <= currentStrategy) { + int candidate = 2 * bestStrategy - currentStrategy - 1; + if(candidate < 1) { + candidate = currentStrategy + 1; + if(candidate > (int)ZSTD_STRATEGY_MAX) { + return 0; + } else { + return candidate; + } + } else { + return candidate; + } + } else { /* bestStrategy >= currentStrategy */ + int candidate = 2 * bestStrategy - currentStrategy; + if(candidate > (int)ZSTD_STRATEGY_MAX) { + candidate = currentStrategy - 1; + if(candidate < 1) { + return 0; + } else { + return candidate; + } + } else { + return candidate; + } + } +} + +/* experiment with playing with this and decay value */ + +/* main fn called when using --optimize */ +/* Does strategy selection by benchmarking default compression levels + * then optimizes by strategy, starting with the best one and moving + * progressively moving further away by number + * args: + * fileNamesTable - list of files to benchmark + * nbFiles - length of fileNamesTable + * dictFileName - name of dictionary file if one, else NULL + * target - performance constraints (cSpeed, dSpeed, cMem) + * paramTarget - parameter constraints (i.e. restriction search space to where strategy = ZSTD_fast) + * cLevel - compression level to exceed (all solutions must be > lvl in cSpeed + ratio) + */ + +static unsigned g_maxTries = 5; +#define TRY_DECAY 1 + +static int +optimizeForSize(const char* const * const fileNamesTable, const size_t nbFiles, + const char* dictFileName, + constraint_t target, paramValues_t paramTarget, + const int cLevelOpt, const int cLevelRun, + const U32 memoTableLog) +{ + varInds_t varArray [NUM_PARAMS]; + int ret = 0; + const size_t varLen = variableParams(paramTarget, varArray, dictFileName != NULL); + winnerInfo_t winner = initWinnerInfo(emptyParams()); + memoTable_t* allMT = NULL; + paramValues_t paramBase; + contexts_t ctx; + buffers_t buf; + g_time = UTIL_getTime(); + + if (createBuffers(&buf, fileNamesTable, nbFiles)) { + DISPLAY("unable to load files\n"); + return 1; + } + + if (createContexts(&ctx, dictFileName)) { + DISPLAY("unable to load dictionary\n"); + freeBuffers(buf); + return 2; + } + + if (nbFiles == 1) { + DISPLAYLEVEL(2, "Loading %s... \r", fileNamesTable[0]); + } else { + DISPLAYLEVEL(2, "Loading %lu Files... \r", (unsigned long)nbFiles); + } + + /* sanitize paramTarget */ + optimizerAdjustInput(¶mTarget, buf.maxBlockSize); + paramBase = cParamUnsetMin(paramTarget); + + allMT = createMemoTableArray(paramTarget, varArray, varLen, memoTableLog); + + if (!allMT) { + DISPLAY("MemoTable Init Error\n"); + ret = 2; + goto _cleanUp; + } + + /* default strictnesses */ + if (g_strictness == PARAM_UNSET) { + if(g_optmode) { + g_strictness = 100; + } else { + g_strictness = 90; + } + } else { + if(0 >= g_strictness || g_strictness > 100) { + DISPLAY("Strictness Outside of Bounds\n"); + ret = 4; + goto _cleanUp; + } + } + + /* use level'ing mode instead of normal target mode */ + if (g_optmode) { + winner.params = cParamsToPVals(ZSTD_getCParams(cLevelOpt, buf.maxBlockSize, ctx.dictSize)); + if(BMK_benchParam(&winner.result, buf, ctx, winner.params)) { + ret = 3; + goto _cleanUp; + } + + g_lvltarget = winner.result; + g_lvltarget.cSpeed = (g_lvltarget.cSpeed * g_strictness) / 100; + g_lvltarget.dSpeed = (g_lvltarget.dSpeed * g_strictness) / 100; + g_lvltarget.cSize = (g_lvltarget.cSize * 100) / g_strictness; + + target.cSpeed = (U32)g_lvltarget.cSpeed; + target.dSpeed = (U32)g_lvltarget.dSpeed; + + BMK_printWinnerOpt(stdout, cLevelOpt, winner.result, winner.params, target, buf.srcSize); + } + + /* Don't want it to return anything worse than the best known result */ + if (g_singleRun) { + BMK_benchResult_t res; + g_params = adjustParams(overwriteParams(cParamsToPVals(ZSTD_getCParams(cLevelRun, buf.maxBlockSize, ctx.dictSize)), g_params), buf.maxBlockSize, ctx.dictSize); + if (BMK_benchParam(&res, buf, ctx, g_params)) { + ret = 45; + goto _cleanUp; + } + if(compareResultLT(winner.result, res, relaxTarget(target), buf.srcSize)) { + winner.result = res; + winner.params = g_params; + } + } + + /* bench */ + DISPLAYLEVEL(2, "\r%79s\r", ""); + if(nbFiles == 1) { + DISPLAYLEVEL(2, "optimizing for %s", fileNamesTable[0]); + } else { + DISPLAYLEVEL(2, "optimizing for %lu Files", (unsigned long)nbFiles); + } + + if(target.cSpeed != 0) { DISPLAYLEVEL(2," - limit compression speed %u MB/s", (unsigned)(target.cSpeed >> 20)); } + if(target.dSpeed != 0) { DISPLAYLEVEL(2, " - limit decompression speed %u MB/s", (unsigned)(target.dSpeed >> 20)); } + if(target.cMem != (U32)-1) { DISPLAYLEVEL(2, " - limit memory %u MB", (unsigned)(target.cMem >> 20)); } + + DISPLAYLEVEL(2, "\n"); + init_clockGranularity(); + + { paramValues_t CParams; + + /* find best solution from default params */ + { const int maxSeeds = g_noSeed ? 1 : ZSTD_maxCLevel(); + DEBUGOUTPUT("Strategy Selection\n"); + if (paramTarget.vals[strt_ind] == PARAM_UNSET) { + BMK_benchResult_t candidate; + int i; + for (i=1; i<=maxSeeds; i++) { + int ec; + CParams = overwriteParams(cParamsToPVals(ZSTD_getCParams(i, buf.maxBlockSize, ctx.dictSize)), paramTarget); + ec = BMK_benchParam(&candidate, buf, ctx, CParams); + BMK_printWinnerOpt(stdout, i, candidate, CParams, target, buf.srcSize); + + if(!ec && compareResultLT(winner.result, candidate, relaxTarget(target), buf.srcSize)) { + winner.result = candidate; + winner.params = CParams; + } + + CHECKTIMEGT(ret, 0, _displayCleanUp); /* if pass time limit, stop */ + /* if the current params are too slow, just stop. */ + if(target.cSpeed > candidate.cSpeed * 3 / 2) { break; } + } + + BMK_printWinnerOpt(stdout, CUSTOM_LEVEL, winner.result, winner.params, target, buf.srcSize); + } + } + + DEBUGOUTPUT("Real Opt\n"); + /* start 'real' optimization */ + { int bestStrategy = (int)winner.params.vals[strt_ind]; + if (paramTarget.vals[strt_ind] == PARAM_UNSET) { + int st = bestStrategy; + int tries = g_maxTries; + + /* one iterations of hill climbing with the level-defined parameters. */ + { winnerInfo_t const w1 = climbOnce(target, allMT, buf, ctx, winner.params); + if (compareResultLT(winner.result, w1.result, target, buf.srcSize)) { + winner = w1; + } + CHECKTIMEGT(ret, 0, _displayCleanUp); + } + + while(st && tries > 0) { + winnerInfo_t wc; + DEBUGOUTPUT("StrategySwitch: %s\n", g_stratName[st]); + + wc = optimizeFixedStrategy(buf, ctx, target, paramBase, st, allMT, tries); + + if(compareResultLT(winner.result, wc.result, target, buf.srcSize)) { + winner = wc; + tries = g_maxTries; + bestStrategy = st; + } else { + st = nextStrategy(st, bestStrategy); + tries -= TRY_DECAY; + } + CHECKTIMEGT(ret, 0, _displayCleanUp); + } + } else { + winner = optimizeFixedStrategy(buf, ctx, target, paramBase, paramTarget.vals[strt_ind], allMT, g_maxTries); + } + + } + + /* no solution found */ + if(winner.result.cSize == (size_t)-1) { + ret = 1; + DISPLAY("No feasible solution found\n"); + goto _cleanUp; + } + + /* end summary */ +_displayCleanUp: + if (g_displayLevel >= 0) { + BMK_displayOneResult(stdout, winner, buf.srcSize); + } + BMK_paramValues_into_commandLine(stdout, winner.params); + DISPLAYLEVEL(1, "grillParams size - optimizer completed \n"); + } + +_cleanUp: + freeContexts(ctx); + freeBuffers(buf); + freeMemoTableArray(allMT); + return ret; +} + +/*-************************************ +* CLI parsing functions +**************************************/ + +/** longCommandWArg() : + * check if *stringPtr is the same as longCommand. + * If yes, @return 1 and advances *stringPtr to the position which immediately follows longCommand. + * @return 0 and doesn't modify *stringPtr otherwise. + * from zstdcli.c + */ +static int longCommandWArg(const char** stringPtr, const char* longCommand) +{ + size_t const comSize = strlen(longCommand); + int const result = !strncmp(*stringPtr, longCommand, comSize); + if (result) *stringPtr += comSize; + return result; +} + +static void errorOut(const char* msg) +{ + DISPLAY("%s \n", msg); exit(1); +} + +/*! readU32FromChar() : + * @return : unsigned integer value read from input in `char` format. + * allows and interprets K, KB, KiB, M, MB and MiB suffix. + * Will also modify `*stringPtr`, advancing it to position where it stopped reading. + * Note : function will exit() program if digit sequence overflows */ +static unsigned readU32FromChar(const char** stringPtr) +{ + const char errorMsg[] = "error: numeric value too large"; + unsigned sign = 1; + unsigned result = 0; + if(**stringPtr == '-') { sign = (unsigned)-1; (*stringPtr)++; } + while ((**stringPtr >='0') && (**stringPtr <='9')) { + unsigned const max = (((unsigned)(-1)) / 10) - 1; + if (result > max) errorOut(errorMsg); + result *= 10; + assert(**stringPtr >= '0'); + result += (unsigned)(**stringPtr - '0'); + (*stringPtr)++ ; + } + if ((**stringPtr=='K') || (**stringPtr=='M')) { + unsigned const maxK = ((unsigned)(-1)) >> 10; + if (result > maxK) errorOut(errorMsg); + result <<= 10; + if (**stringPtr=='M') { + if (result > maxK) errorOut(errorMsg); + result <<= 10; + } + (*stringPtr)++; /* skip `K` or `M` */ + if (**stringPtr=='i') (*stringPtr)++; + if (**stringPtr=='B') (*stringPtr)++; + } + return result * sign; +} + +static double readDoubleFromChar(const char** stringPtr) +{ + double result = 0, divide = 10; + while ((**stringPtr >='0') && (**stringPtr <='9')) { + result *= 10, result += **stringPtr - '0', (*stringPtr)++ ; + } + if(**stringPtr!='.') { + return result; + } + (*stringPtr)++; + while ((**stringPtr >='0') && (**stringPtr <='9')) { + result += (double)(**stringPtr - '0') / divide, divide *= 10, (*stringPtr)++ ; + } + return result; +} + +static int usage(const char* exename) +{ + DISPLAY( "Usage :\n"); + DISPLAY( " %s [arg] file\n", exename); + DISPLAY( "Arguments :\n"); + DISPLAY( " file : path to the file used as reference (if none, generates a compressible sample)\n"); + DISPLAY( " -H/-h : Help (this text + advanced options)\n"); + return 0; +} + +static int usage_advanced(void) +{ + DISPLAY( "\nAdvanced options :\n"); + DISPLAY( " -T# : set level 1 speed objective \n"); + DISPLAY( " -B# : cut input into blocks of size # (default : single block) \n"); + DISPLAY( " --optimize= : same as -O with more verbose syntax (see README.md)\n"); + DISPLAY( " -S : Single run \n"); + DISPLAY( " --zstd : Single run, parameter selection same as zstdcli \n"); + DISPLAY( " -P# : generated sample compressibility (default : %.1f%%) \n", COMPRESSIBILITY_DEFAULT * 100); + DISPLAY( " -t# : Caps runtime of operation in seconds (default : %u seconds (%.1f hours)) \n", + (unsigned)g_timeLimit_s, (double)g_timeLimit_s / 3600); + DISPLAY( " -v : Prints Benchmarking output\n"); + DISPLAY( " -D : Next argument dictionary file\n"); + DISPLAY( " -s : Seperate Files\n"); + return 0; +} + +static int badusage(const char* exename) +{ + DISPLAY("Wrong parameters\n"); + usage(exename); + return 1; +} + +#define PARSE_SUB_ARGS(stringLong, stringShort, variable) { \ + if ( longCommandWArg(&argument, stringLong) \ + || longCommandWArg(&argument, stringShort) ) { \ + variable = readU32FromChar(&argument); \ + if (argument[0]==',') { \ + argument++; continue; \ + } else break; \ +} } + +/* 1 if successful parse, 0 otherwise */ +static int parse_params(const char** argptr, paramValues_t* pv) { + int matched = 0; + const char* argOrig = *argptr; + varInds_t v; + for(v = 0; v < NUM_PARAMS; v++) { + if ( longCommandWArg(argptr,g_shortParamNames[v]) + || longCommandWArg(argptr, g_paramNames[v]) ) { + if(**argptr == '=') { + (*argptr)++; + pv->vals[v] = readU32FromChar(argptr); + matched = 1; + break; + } + } + /* reset and try again */ + *argptr = argOrig; + } + return matched; +} + +/*-************************************ +* Main +**************************************/ + +int main(int argc, const char** argv) +{ + int i, + filenamesStart=0, + result; + const char* exename=argv[0]; + const char* input_filename = NULL; + const char* dictFileName = NULL; + U32 main_pause = 0; + int cLevelOpt = 0, cLevelRun = 0; + int seperateFiles = 0; + double compressibility = COMPRESSIBILITY_DEFAULT; + U32 memoTableLog = PARAM_UNSET; + constraint_t target = { 0, 0, (U32)-1 }; + + paramValues_t paramTarget = emptyParams(); + g_params = emptyParams(); + + assert(argc>=1); /* for exename */ + + for(i=1; i<argc; i++) { + const char* argument = argv[i]; + DEBUGOUTPUT("%d: %s\n", i, argument); + assert(argument != NULL); + + if(!strcmp(argument,"--no-seed")) { g_noSeed = 1; continue; } + + if (longCommandWArg(&argument, "--optimize=")) { + g_optimizer = 1; + for ( ; ;) { + if(parse_params(&argument, ¶mTarget)) { if(argument[0] == ',') { argument++; continue; } else break; } + PARSE_SUB_ARGS("compressionSpeed=" , "cSpeed=", target.cSpeed); + PARSE_SUB_ARGS("decompressionSpeed=", "dSpeed=", target.dSpeed); + PARSE_SUB_ARGS("compressionMemory=" , "cMem=", target.cMem); + PARSE_SUB_ARGS("strict=", "stc=", g_strictness); + PARSE_SUB_ARGS("maxTries=", "tries=", g_maxTries); + PARSE_SUB_ARGS("memoLimitLog=", "memLog=", memoTableLog); + if (longCommandWArg(&argument, "level=") || longCommandWArg(&argument, "lvl=")) { cLevelOpt = (int)readU32FromChar(&argument); g_optmode = 1; if (argument[0]==',') { argument++; continue; } else break; } + if (longCommandWArg(&argument, "speedForRatio=") || longCommandWArg(&argument, "speedRatio=")) { g_ratioMultiplier = readDoubleFromChar(&argument); if (argument[0]==',') { argument++; continue; } else break; } + + DISPLAY("invalid optimization parameter \n"); + return 1; + } + + if (argument[0] != 0) { + DISPLAY("invalid --optimize= format\n"); + return 1; /* check the end of string */ + } + continue; + } else if (longCommandWArg(&argument, "--zstd=")) { + /* Decode command (note : aggregated commands are allowed) */ + g_singleRun = 1; + for ( ; ;) { + if(parse_params(&argument, &g_params)) { if(argument[0] == ',') { argument++; continue; } else break; } + if (longCommandWArg(&argument, "level=") || longCommandWArg(&argument, "lvl=")) { cLevelRun = (int)readU32FromChar(&argument); g_params = emptyParams(); if (argument[0]==',') { argument++; continue; } else break; } + + DISPLAY("invalid compression parameter \n"); + return 1; + } + + if (argument[0] != 0) { + DISPLAY("invalid --zstd= format\n"); + return 1; /* check the end of string */ + } + continue; + /* if not return, success */ + + } else if (longCommandWArg(&argument, "--display=")) { + /* Decode command (note : aggregated commands are allowed) */ + memset(g_silenceParams, 1, sizeof(g_silenceParams)); + for ( ; ;) { + int found = 0; + varInds_t v; + for(v = 0; v < NUM_PARAMS; v++) { + if(longCommandWArg(&argument, g_shortParamNames[v]) || longCommandWArg(&argument, g_paramNames[v])) { + g_silenceParams[v] = 0; + found = 1; + } + } + if(longCommandWArg(&argument, "compressionParameters") || longCommandWArg(&argument, "cParams")) { + for(v = 0; v <= strt_ind; v++) { + g_silenceParams[v] = 0; + } + found = 1; + } + + + if(found) { + if(argument[0]==',') { + continue; + } else { + break; + } + } + DISPLAY("invalid parameter name parameter \n"); + return 1; + } + + if (argument[0] != 0) { + DISPLAY("invalid --display format\n"); + return 1; /* check the end of string */ + } + continue; + } else if (argument[0]=='-') { + argument++; + + while (argument[0]!=0) { + + switch(argument[0]) + { + /* Display help on usage */ + case 'h' : + case 'H': usage(exename); usage_advanced(); return 0; + + /* Pause at the end (hidden option) */ + case 'p': main_pause = 1; argument++; break; + + /* Sample compressibility (when no file provided) */ + case 'P': + argument++; + { U32 const proba32 = readU32FromChar(&argument); + compressibility = (double)proba32 / 100.; + } + break; + + /* Run Single conf */ + case 'S': + g_singleRun = 1; + argument++; + for ( ; ; ) { + switch(*argument) + { + case 'w': + argument++; + g_params.vals[wlog_ind] = readU32FromChar(&argument); + continue; + case 'c': + argument++; + g_params.vals[clog_ind] = readU32FromChar(&argument); + continue; + case 'h': + argument++; + g_params.vals[hlog_ind] = readU32FromChar(&argument); + continue; + case 's': + argument++; + g_params.vals[slog_ind] = readU32FromChar(&argument); + continue; + case 'l': /* search length */ + argument++; + g_params.vals[mml_ind] = readU32FromChar(&argument); + continue; + case 't': /* target length */ + argument++; + g_params.vals[tlen_ind] = readU32FromChar(&argument); + continue; + case 'S': /* strategy */ + argument++; + g_params.vals[strt_ind] = readU32FromChar(&argument); + continue; + case 'f': /* forceAttachDict */ + argument++; + g_params.vals[fadt_ind] = readU32FromChar(&argument); + continue; + case 'L': + { argument++; + cLevelRun = (int)readU32FromChar(&argument); + g_params = emptyParams(); + continue; + } + default : ; + } + break; + } + + break; + + /* target level1 speed objective, in MB/s */ + case 'T': + argument++; + g_target = readU32FromChar(&argument); + break; + + /* cut input into blocks */ + case 'B': + argument++; + g_blockSize = readU32FromChar(&argument); + DISPLAY("using %u KB block size \n", (unsigned)(g_blockSize>>10)); + break; + + /* caps runtime (in seconds) */ + case 't': + argument++; + g_timeLimit_s = readU32FromChar(&argument); + break; + + case 's': + argument++; + seperateFiles = 1; + break; + + case 'q': + while (argument[0] == 'q') { argument++; g_displayLevel--; } + break; + + case 'v': + while (argument[0] == 'v') { argument++; g_displayLevel++; } + break; + + /* load dictionary file (only applicable for optimizer rn) */ + case 'D': + if(i == argc - 1) { /* last argument, return error. */ + DISPLAY("Dictionary file expected but not given : %d\n", i); + return 1; + } else { + i++; + dictFileName = argv[i]; + argument += strlen(argument); + } + break; + + /* Unknown command */ + default : return badusage(exename); + } + } + continue; + } /* if (argument[0]=='-') */ + + /* first provided filename is input */ + if (!input_filename) { input_filename=argument; filenamesStart=i; continue; } + } + + /* Welcome message */ + DISPLAYLEVEL(2, WELCOME_MESSAGE); + + if (filenamesStart==0) { + if (g_optimizer) { + DISPLAY("Optimizer Expects File\n"); + return 1; + } else { + result = benchSample(compressibility, cLevelRun); + } + } else { + if(seperateFiles) { + for(i = 0; i < argc - filenamesStart; i++) { + if (g_optimizer) { + result = optimizeForSize(argv+filenamesStart + i, 1, dictFileName, target, paramTarget, cLevelOpt, cLevelRun, memoTableLog); + if(result) { DISPLAY("Error on File %d", i); return result; } + } else { + result = benchFiles(argv+filenamesStart + i, 1, dictFileName, cLevelRun); + if(result) { DISPLAY("Error on File %d", i); return result; } + } + } + } else { + if (g_optimizer) { + assert(filenamesStart < argc); + result = optimizeForSize(argv+filenamesStart, (size_t)(argc-filenamesStart), dictFileName, target, paramTarget, cLevelOpt, cLevelRun, memoTableLog); + } else { + result = benchFiles(argv+filenamesStart, argc-filenamesStart, dictFileName, cLevelRun); + } + } + } + + if (main_pause) { int unused; printf("press enter...\n"); unused = getchar(); (void)unused; } + + return result; +} diff --git a/src/zstd/tests/playTests.sh b/src/zstd/tests/playTests.sh new file mode 100755 index 000000000..f353229be --- /dev/null +++ b/src/zstd/tests/playTests.sh @@ -0,0 +1,1381 @@ +#!/bin/sh + +set -e + +die() { + println "$@" 1>&2 + exit 1 +} + +datagen() { + "$DATAGEN_BIN" $@ +} + +zstd() { + if [ -z "$EXEC_PREFIX" ]; then + "$ZSTD_BIN" $@ + else + "$EXEC_PREFIX" "$ZSTD_BIN" $@ + fi +} + +sudoZstd() { + if [ -z "$EXEC_PREFIX" ]; then + sudo "$ZSTD_BIN" $@ + else + sudo "$EXEC_PREFIX" "$ZSTD_BIN" $@ + fi +} + +roundTripTest() { + if [ -n "$3" ]; then + cLevel="$3" + proba="$2" + else + cLevel="$2" + proba="" + fi + if [ -n "$4" ]; then + dLevel="$4" + else + dLevel="$cLevel" + fi + + rm -f tmp1 tmp2 + println "roundTripTest: datagen $1 $proba | zstd -v$cLevel | zstd -d$dLevel" + datagen $1 $proba | $MD5SUM > tmp1 + datagen $1 $proba | zstd --ultra -v$cLevel | zstd -d$dLevel | $MD5SUM > tmp2 + $DIFF -q tmp1 tmp2 +} + +fileRoundTripTest() { + if [ -n "$3" ]; then + local_c="$3" + local_p="$2" + else + local_c="$2" + local_p="" + fi + if [ -n "$4" ]; then + local_d="$4" + else + local_d="$local_c" + fi + + rm -f tmp.zst tmp.md5.1 tmp.md5.2 + println "fileRoundTripTest: datagen $1 $local_p > tmp && zstd -v$local_c -c tmp | zstd -d$local_d" + datagen $1 $local_p > tmp + < tmp $MD5SUM > tmp.md5.1 + zstd --ultra -v$local_c -c tmp | zstd -d$local_d | $MD5SUM > tmp.md5.2 + $DIFF -q tmp.md5.1 tmp.md5.2 +} + +truncateLastByte() { + dd bs=1 count=$(($(wc -c < "$1") - 1)) if="$1" +} + +println() { + printf '%b\n' "${*}" +} + + +SCRIPT_DIR=$(cd "$(dirname "$0")" && pwd) +PRGDIR="$SCRIPT_DIR/../programs" +TESTDIR="$SCRIPT_DIR/../tests" +UNAME=$(uname) +ZSTDGREP="$PRGDIR/zstdgrep" + +detectedTerminal=false +if [ -t 0 ] && [ -t 1 ] +then + detectedTerminal=true +fi +isTerminal=${isTerminal:-$detectedTerminal} + +isWindows=false +INTOVOID="/dev/null" +case "$UNAME" in + GNU) DEVDEVICE="/dev/random" ;; + *) DEVDEVICE="/dev/zero" ;; +esac +case "$OS" in + Windows*) + isWindows=true + INTOVOID="NUL" + DEVDEVICE="NUL" + ;; +esac + +case "$UNAME" in + Darwin) MD5SUM="md5 -r" ;; + FreeBSD) MD5SUM="gmd5sum" ;; + OpenBSD) MD5SUM="md5" ;; + *) MD5SUM="md5sum" ;; +esac + +MTIME="stat -c %Y" +case "$UNAME" in + Darwin | FreeBSD | OpenBSD) MTIME="stat -f %m" ;; +esac + +DIFF="diff" +case "$UNAME" in + SunOS) DIFF="gdiff" ;; +esac + +println "\nStarting playTests.sh isWindows=$isWindows EXE_PREFIX='$EXE_PREFIX' ZSTD_BIN='$ZSTD_BIN' DATAGEN_BIN='$DATAGEN_BIN'" + +[ -n "$ZSTD_BIN" ] || die "\$ZSTD_BIN variable must be defined!" +[ -n "$DATAGEN_BIN" ] || die "\$DATAGEN_BIN variable must be defined!" + +if echo hello | zstd -v -T2 2>&1 > $INTOVOID | grep -q 'multi-threading is disabled' +then + hasMT="" +else + hasMT="true" +fi + + + +println "\n===> simple tests " + +datagen > tmp +println "test : basic compression " +zstd -f tmp # trivial compression case, creates tmp.zst +println "test : basic decompression" +zstd -df tmp.zst # trivial decompression case (overwrites tmp) +println "test : too large compression level => auto-fix" +zstd -99 -f tmp # too large compression level, automatic sized down +zstd -5000000000 -f tmp && die "too large numeric value : must fail" +println "test : --fast aka negative compression levels" +zstd --fast -f tmp # == -1 +zstd --fast=3 -f tmp # == -3 +zstd --fast=200000 -f tmp # too low compression level, automatic fixed +zstd --fast=5000000000 -f tmp && die "too large numeric value : must fail" +zstd -c --fast=0 tmp > $INTOVOID && die "--fast must not accept value 0" +println "test : too large numeric argument" +zstd --fast=9999999999 -f tmp && die "should have refused numeric value" +println "test : set compression level with environment variable ZSTD_CLEVEL" +ZSTD_CLEVEL=12 zstd -f tmp # positive compression level +ZSTD_CLEVEL=-12 zstd -f tmp # negative compression level +ZSTD_CLEVEL=+12 zstd -f tmp # valid: verbose '+' sign +ZSTD_CLEVEL='' zstd -f tmp # empty env var, warn and revert to default setting +ZSTD_CLEVEL=- zstd -f tmp # malformed env var, warn and revert to default setting +ZSTD_CLEVEL=a zstd -f tmp # malformed env var, warn and revert to default setting +ZSTD_CLEVEL=+a zstd -f tmp # malformed env var, warn and revert to default setting +ZSTD_CLEVEL=3a7 zstd -f tmp # malformed env var, warn and revert to default setting +ZSTD_CLEVEL=50000000000 zstd -f tmp # numeric value too large, warn and revert to default setting +println "test : override ZSTD_CLEVEL with command line option" +ZSTD_CLEVEL=12 zstd --fast=3 -f tmp # overridden by command line option +println "test : compress to stdout" +zstd tmp -c > tmpCompressed +zstd tmp --stdout > tmpCompressed # long command format +println "test : compress to named file" +rm tmpCompressed +zstd tmp -o tmpCompressed +test -f tmpCompressed # file must be created +println "test : -o must be followed by filename (must fail)" +zstd tmp -of tmpCompressed && die "-o must be followed by filename " +println "test : force write, correct order" +zstd tmp -fo tmpCompressed +println "test : forgotten argument" +cp tmp tmp2 +zstd tmp2 -fo && die "-o must be followed by filename " +println "test : implied stdout when input is stdin" +println bob | zstd | zstd -d +if [ "$isTerminal" = true ]; then +println "test : compressed data to terminal" +println bob | zstd && die "should have refused : compressed data to terminal" +println "test : compressed data from terminal (a hang here is a test fail, zstd is wrongly waiting on data from terminal)" +zstd -d > $INTOVOID && die "should have refused : compressed data from terminal" +fi +println "test : null-length file roundtrip" +println -n '' | zstd - --stdout | zstd -d --stdout +println "test : ensure small file doesn't add 3-bytes null block" +datagen -g1 > tmp1 +zstd tmp1 -c | wc -c | grep "14" +zstd < tmp1 | wc -c | grep "14" +println "test : decompress file with wrong suffix (must fail)" +zstd -d tmpCompressed && die "wrong suffix error not detected!" +zstd -df tmp && die "should have refused : wrong extension" +println "test : decompress into stdout" +zstd -d tmpCompressed -c > tmpResult # decompression using stdout +zstd --decompress tmpCompressed -c > tmpResult +zstd --decompress tmpCompressed --stdout > tmpResult +println "test : decompress from stdin into stdout" +zstd -dc < tmp.zst > $INTOVOID # combine decompression, stdin & stdout +zstd -dc - < tmp.zst > $INTOVOID +zstd -d < tmp.zst > $INTOVOID # implicit stdout when stdin is used +zstd -d - < tmp.zst > $INTOVOID +println "test : impose memory limitation (must fail)" +zstd -d -f tmp.zst -M2K -c > $INTOVOID && die "decompression needs more memory than allowed" +zstd -d -f tmp.zst --memlimit=2K -c > $INTOVOID && die "decompression needs more memory than allowed" # long command +zstd -d -f tmp.zst --memory=2K -c > $INTOVOID && die "decompression needs more memory than allowed" # long command +zstd -d -f tmp.zst --memlimit-decompress=2K -c > $INTOVOID && die "decompression needs more memory than allowed" # long command +println "test : overwrite protection" +zstd -q tmp && die "overwrite check failed!" +println "test : force overwrite" +zstd -q -f tmp +zstd -q --force tmp +println "test : overwrite readonly file" +rm -f tmpro tmpro.zst +println foo > tmpro.zst +println foo > tmpro +chmod 400 tmpro.zst +zstd -q tmpro && die "should have refused to overwrite read-only file" +zstd -q -f tmpro +println "test: --no-progress flag" +zstd tmpro -c --no-progress | zstd -d -f -o "$INTOVOID" --no-progress +zstd tmpro -cv --no-progress | zstd -dv -f -o "$INTOVOID" --no-progress +rm -f tmpro tmpro.zst +println "test: overwrite input file (must fail)" +zstd tmp -fo tmp && die "zstd compression overwrote the input file" +zstd tmp.zst -dfo tmp.zst && die "zstd decompression overwrote the input file" +println "test: detect that input file does not exist" +zstd nothere && die "zstd hasn't detected that input file does not exist" +println "test: --[no-]compress-literals" +zstd tmp -c --no-compress-literals -1 | zstd -t +zstd tmp -c --no-compress-literals --fast=1 | zstd -t +zstd tmp -c --no-compress-literals -19 | zstd -t +zstd tmp -c --compress-literals -1 | zstd -t +zstd tmp -c --compress-literals --fast=1 | zstd -t +zstd tmp -c --compress-literals -19 | zstd -t +zstd -b --fast=1 -i0e1 tmp --compress-literals +zstd -b --fast=1 -i0e1 tmp --no-compress-literals + +println "\n===> zstdgrep tests" +ln -sf "$ZSTD_BIN" zstdcat +rm -f tmp_grep +echo "1234" > tmp_grep +zstd -f tmp_grep +lines=$(ZCAT=./zstdcat $ZSTDGREP 2>&1 "1234" tmp_grep tmp_grep.zst | wc -l) +test 2 -eq $lines +ZCAT=./zstdcat $ZSTDGREP 2>&1 "1234" tmp_grep_bad.zst && die "Should have failed" +ZCAT=./zstdcat $ZSTDGREP 2>&1 "1234" tmp_grep_bad.zst | grep "No such file or directory" || true +rm -f tmp_grep* + +println "\n===> --exclude-compressed flag" +rm -rf precompressedFilterTestDir +mkdir -p precompressedFilterTestDir +datagen $size > precompressedFilterTestDir/input.5 +datagen $size > precompressedFilterTestDir/input.6 +zstd --exclude-compressed --long --rm -r precompressedFilterTestDir +datagen $size > precompressedFilterTestDir/input.7 +datagen $size > precompressedFilterTestDir/input.8 +zstd --exclude-compressed --long --rm -r precompressedFilterTestDir +test ! -f precompressedFilterTestDir/input.5.zst.zst +test ! -f precompressedFilterTestDir/input.6.zst.zst +file1timestamp=`$MTIME precompressedFilterTestDir/input.5.zst` +file2timestamp=`$MTIME precompressedFilterTestDir/input.7.zst` +if [ $file2timestamp -ge $file1timestamp ]; then + println "Test is successful. input.5.zst is precompressed and therefore not compressed/modified again." +else + println "Test is not successful" +fi +# File Extension check. +datagen $size > precompressedFilterTestDir/input.zstbar +zstd --exclude-compressed --long --rm -r precompressedFilterTestDir +# zstd should compress input.zstbar +test -f precompressedFilterTestDir/input.zstbar.zst +# Check without the --exclude-compressed flag +zstd --long --rm -r precompressedFilterTestDir +# Files should get compressed again without the --exclude-compressed flag. +test -f precompressedFilterTestDir/input.5.zst.zst +test -f precompressedFilterTestDir/input.6.zst.zst +println "Test completed" + + +println "\n===> recursive mode test " +# combination of -r with empty list of input file +zstd -c -r < tmp > tmp.zst + + +println "\n===> file removal" +zstd -f --rm tmp +test ! -f tmp # tmp should no longer be present +zstd -f -d --rm tmp.zst +test ! -f tmp.zst # tmp.zst should no longer be present +println "test : should quietly not remove non-regular file" +println hello > tmp +zstd tmp -f -o "$DEVDEVICE" 2>tmplog > "$INTOVOID" +grep -v "Refusing to remove non-regular file" tmplog +rm -f tmplog +zstd tmp -f -o "$INTOVOID" 2>&1 | grep -v "Refusing to remove non-regular file" +println "test : --rm on stdin" +println a | zstd --rm > $INTOVOID # --rm should remain silent +rm tmp +zstd -f tmp && die "tmp not present : should have failed" +test ! -f tmp.zst # tmp.zst should not be created +println "test : -d -f do not delete destination when source is not present" +touch tmp # create destination file +zstd -d -f tmp.zst && die "attempt to decompress a non existing file" +test -f tmp # destination file should still be present +println "test : -f do not delete destination when source is not present" +rm tmp # erase source file +touch tmp.zst # create destination file +zstd -f tmp && die "attempt to compress a non existing file" +test -f tmp.zst # destination file should still be present +rm -rf tmp* # may also erase tmp* directory from previous failed run + + +println "\n===> decompression only tests " +# the following test verifies that the decoder is compatible with RLE as first block +# older versions of zstd cli are not able to decode such corner case. +# As a consequence, the zstd cli do not generate them, to maintain compatibility with older versions. +dd bs=1048576 count=1 if=/dev/zero of=tmp +zstd -d -o tmp1 "$TESTDIR/golden-decompression/rle-first-block.zst" +$DIFF -s tmp1 tmp +rm tmp* + + +println "\n===> compress multiple files" +println hello > tmp1 +println world > tmp2 +zstd tmp1 tmp2 -o "$INTOVOID" -f +zstd tmp1 tmp2 -c | zstd -t +zstd tmp1 tmp2 -o tmp.zst +test ! -f tmp1.zst +test ! -f tmp2.zst +zstd tmp1 tmp2 +zstd -t tmp1.zst tmp2.zst +zstd -dc tmp1.zst tmp2.zst +zstd tmp1.zst tmp2.zst -o "$INTOVOID" -f +zstd -d tmp1.zst tmp2.zst -o tmp +touch tmpexists +zstd tmp1 tmp2 -f -o tmpexists +zstd tmp1 tmp2 -o tmpexists && die "should have refused to overwrite" +# Bug: PR #972 +if [ "$?" -eq 139 ]; then + die "should not have segfaulted" +fi +println "\n===> multiple files and shell completion " +datagen -s1 > tmp1 2> $INTOVOID +datagen -s2 -g100K > tmp2 2> $INTOVOID +datagen -s3 -g1M > tmp3 2> $INTOVOID +println "compress tmp* : " +zstd -f tmp* +test -f tmp1.zst +test -f tmp2.zst +test -f tmp3.zst +rm tmp1 tmp2 tmp3 +println "decompress tmp* : " +zstd -df ./*.zst +test -f tmp1 +test -f tmp2 +test -f tmp3 +println "compress tmp* into stdout > tmpall : " +zstd -c tmp1 tmp2 tmp3 > tmpall +test -f tmpall # should check size of tmpall (should be tmp1.zst + tmp2.zst + tmp3.zst) +println "decompress tmpall* into stdout > tmpdec : " +cp tmpall tmpall2 +zstd -dc tmpall* > tmpdec +test -f tmpdec # should check size of tmpdec (should be 2*(tmp1 + tmp2 + tmp3)) +println "compress multiple files including a missing one (notHere) : " +zstd -f tmp1 notHere tmp2 && die "missing file not detected!" +rm tmp* + + +if [ "$isWindows" = false ] ; then + println "\n===> zstd fifo named pipe test " + echo "Hello World!" > tmp_original + mkfifo tmp_named_pipe + # note : fifo test doesn't work in combination with `dd` or `cat` + echo "Hello World!" > tmp_named_pipe & + zstd tmp_named_pipe -o tmp_compressed + zstd -d -o tmp_decompressed tmp_compressed + $DIFF -s tmp_original tmp_decompressed + rm -rf tmp* +fi + + +if [ -n "$DEVNULLRIGHTS" ] ; then + # these tests requires sudo rights, which is uncommon. + # they are only triggered if DEVNULLRIGHTS macro is defined. + println "\n===> checking /dev/null permissions are unaltered " + datagen > tmp + sudoZstd tmp -o $INTOVOID # sudo rights could modify /dev/null permissions + sudoZstd tmp -c > $INTOVOID + zstd tmp -f -o tmp.zst + sudoZstd -d tmp.zst -c > $INTOVOID + sudoZstd -d tmp.zst -o $INTOVOID + ls -las $INTOVOID | grep "rw-rw-rw-" +fi + + +println "\n===> compress multiple files into an output directory, --output-dir-flat" +println henlo > tmp1 +mkdir tmpInputTestDir +mkdir tmpInputTestDir/we +mkdir tmpInputTestDir/we/must +mkdir tmpInputTestDir/we/must/go +mkdir tmpInputTestDir/we/must/go/deeper +println cool > tmpInputTestDir/we/must/go/deeper/tmp2 +mkdir tmpOutDir +zstd tmp1 tmpInputTestDir/we/must/go/deeper/tmp2 --output-dir-flat tmpOutDir +test -f tmpOutDir/tmp1.zst +test -f tmpOutDir/tmp2.zst +println "test : decompress multiple files into an output directory, --output-dir-flat" +mkdir tmpOutDirDecomp +zstd tmpOutDir -r -d --output-dir-flat tmpOutDirDecomp +test -f tmpOutDirDecomp/tmp2 +test -f tmpOutDirDecomp/tmp1 +rm -f tmpOutDirDecomp/* +zstd tmpOutDir -r -d --output-dir-flat=tmpOutDirDecomp +test -f tmpOutDirDecomp/tmp2 +test -f tmpOutDirDecomp/tmp1 +rm -rf tmp* + + +println "test : compress multiple files reading them from a file, --filelist=FILE" +println "Hello world!, file1" > tmp1 +println "Hello world!, file2" > tmp2 +println tmp1 > tmp_fileList +println tmp2 >> tmp_fileList +zstd -f --filelist=tmp_fileList +test -f tmp2.zst +test -f tmp1.zst + +println "test : reading file list from a symlink, --filelist=FILE" +rm -f *.zst +ln -s tmp_fileList tmp_symLink +zstd -f --filelist=tmp_symLink +test -f tmp2.zst +test -f tmp1.zst + +println "test : compress multiple files reading them from multiple files, --filelist=FILE" +rm -f *.zst +println "Hello world!, file3" > tmp3 +println "Hello world!, file4" > tmp4 +println tmp3 > tmp_fileList2 +println tmp4 >> tmp_fileList2 +zstd -f --filelist=tmp_fileList --filelist=tmp_fileList2 +test -f tmp1.zst +test -f tmp2.zst +test -f tmp3.zst +test -f tmp4.zst + +println "test : decompress multiple files reading them from a file, --filelist=FILE" +rm -f tmp1 tmp2 +println tmp1.zst > tmpZst +println tmp2.zst >> tmpZst +zstd -d -f --filelist=tmpZst +test -f tmp1 +test -f tmp2 + +println "test : decompress multiple files reading them from multiple files, --filelist=FILE" +rm -f tmp1 tmp2 tmp3 tmp4 +println tmp3.zst > tmpZst2 +println tmp4.zst >> tmpZst2 +zstd -d -f --filelist=tmpZst --filelist=tmpZst2 +test -f tmp1 +test -f tmp2 +test -f tmp3 +test -f tmp4 + +println "test : survive a list of files which is text garbage (--filelist=FILE)" +datagen > tmp_badList +zstd -f --filelist=tmp_badList && die "should have failed : list is text garbage" + +println "test : survive a list of files which is binary garbage (--filelist=FILE)" +datagen -P0 -g1M > tmp_badList +zstd -qq -f --filelist=tmp_badList && die "should have failed : list is binary garbage" # let's avoid printing binary garbage on console + +println "test : try to overflow internal list of files (--filelist=FILE)" +touch tmp1 tmp2 tmp3 tmp4 tmp5 tmp6 +ls tmp* > tmpList +zstd -f tmp1 --filelist=tmpList --filelist=tmpList tmp2 tmp3 # can trigger an overflow of internal file list +rm -rf tmp* + +println "\n===> --[no-]content-size tests" + +datagen > tmp_contentsize +zstd -f tmp_contentsize +zstd -lv tmp_contentsize.zst | grep "Decompressed Size:" +zstd -f --no-content-size tmp_contentsize +zstd -lv tmp_contentsize.zst | grep "Decompressed Size:" && die +zstd -f --content-size tmp_contentsize +zstd -lv tmp_contentsize.zst | grep "Decompressed Size:" +zstd -f --content-size --no-content-size tmp_contentsize +zstd -lv tmp_contentsize.zst | grep "Decompressed Size:" && die +rm -rf tmp* + +println "test : show-default-cparams regular" +datagen > tmp +zstd --show-default-cparams -f tmp +rm -rf tmp* + +println "test : show-default-cparams recursive" +mkdir tmp_files +datagen -g15000 > tmp_files/tmp1 +datagen -g129000 > tmp_files/tmp2 +datagen -g257000 > tmp_files/tmp3 +zstd --show-default-cparams -f -r tmp_files +rm -rf tmp* + +println "\n===> Advanced compression parameters " +println "Hello world!" | zstd --zstd=windowLog=21, - -o tmp.zst && die "wrong parameters not detected!" +println "Hello world!" | zstd --zstd=windowLo=21 - -o tmp.zst && die "wrong parameters not detected!" +println "Hello world!" | zstd --zstd=windowLog=21,slog - -o tmp.zst && die "wrong parameters not detected!" +println "Hello world!" | zstd --zstd=strategy=10 - -o tmp.zst && die "parameter out of bound not detected!" # > btultra2 : does not exist +test ! -f tmp.zst # tmp.zst should not be created +roundTripTest -g512K +roundTripTest -g512K " --zstd=mml=3,tlen=48,strat=6" +roundTripTest -g512K " --zstd=strat=6,wlog=23,clog=23,hlog=22,slog=6" +roundTripTest -g512K " --zstd=windowLog=23,chainLog=23,hashLog=22,searchLog=6,minMatch=3,targetLength=48,strategy=6" +roundTripTest -g512K " --single-thread --long --zstd=ldmHashLog=20,ldmMinMatch=64,ldmBucketSizeLog=1,ldmHashRateLog=7" +roundTripTest -g512K " --single-thread --long --zstd=lhlog=20,lmml=64,lblog=1,lhrlog=7" +roundTripTest -g64K "19 --zstd=strat=9" # btultra2 + + +println "\n===> Pass-Through mode " +println "Hello world 1!" | zstd -df +println "Hello world 2!" | zstd -dcf +println "Hello world 3!" > tmp1 +zstd -dcf tmp1 + + +println "\n===> frame concatenation " +println "hello " > hello.tmp +println "world!" > world.tmp +cat hello.tmp world.tmp > helloworld.tmp +zstd -c hello.tmp > hello.zst +zstd -c world.tmp > world.zst +cat hello.zst world.zst > helloworld.zst +zstd -dc helloworld.zst > result.tmp +cat result.tmp +$DIFF helloworld.tmp result.tmp +println "frame concatenation without checksum" +zstd -c hello.tmp > hello.zst --no-check +zstd -c world.tmp > world.zst --no-check +cat hello.zst world.zst > helloworld.zstd +zstd -dc helloworld.zst > result.tmp +$DIFF helloworld.tmp result.tmp +println "testing zstdcat symlink" +ln -sf "$ZSTD_BIN" zstdcat +$EXE_PREFIX ./zstdcat helloworld.zst > result.tmp +$DIFF helloworld.tmp result.tmp +ln -s helloworld.zst helloworld.link.zst +$EXE_PREFIX ./zstdcat helloworld.link.zst > result.tmp +$DIFF helloworld.tmp result.tmp +rm zstdcat +rm result.tmp +println "testing zcat symlink" +ln -sf "$ZSTD_BIN" zcat +$EXE_PREFIX ./zcat helloworld.zst > result.tmp +$DIFF helloworld.tmp result.tmp +$EXE_PREFIX ./zcat helloworld.link.zst > result.tmp +$DIFF helloworld.tmp result.tmp +rm zcat +rm ./*.tmp ./*.zstd +println "frame concatenation tests completed" + + +if [ "$isWindows" = false ] && [ "$UNAME" != 'SunOS' ] && [ "$UNAME" != "OpenBSD" ] ; then +println "\n**** flush write error test **** " + +println "println foo | zstd > /dev/full" +println foo | zstd > /dev/full && die "write error not detected!" +println "println foo | zstd | zstd -d > /dev/full" +println foo | zstd | zstd -d > /dev/full && die "write error not detected!" + +fi + + +if [ "$isWindows" = false ] && [ "$UNAME" != 'SunOS' ] ; then + +println "\n===> symbolic link test " + +rm -f hello.tmp world.tmp world2.tmp hello.tmp.zst world.tmp.zst +println "hello world" > hello.tmp +ln -s hello.tmp world.tmp +ln -s hello.tmp world2.tmp +zstd world.tmp hello.tmp || true +test -f hello.tmp.zst # regular file should have been compressed! +test ! -f world.tmp.zst # symbolic link should not have been compressed! +zstd world.tmp || true +test ! -f world.tmp.zst # symbolic link should not have been compressed! +zstd world.tmp world2.tmp || true +test ! -f world.tmp.zst # symbolic link should not have been compressed! +test ! -f world2.tmp.zst # symbolic link should not have been compressed! +zstd world.tmp hello.tmp -f +test -f world.tmp.zst # symbolic link should have been compressed with --force +rm -f hello.tmp world.tmp world2.tmp hello.tmp.zst world.tmp.zst + +fi + + +println "\n===> test sparse file support " + +datagen -g5M -P100 > tmpSparse +zstd tmpSparse -c | zstd -dv -o tmpSparseRegen +$DIFF -s tmpSparse tmpSparseRegen +zstd tmpSparse -c | zstd -dv --sparse -c > tmpOutSparse +$DIFF -s tmpSparse tmpOutSparse +zstd tmpSparse -c | zstd -dv --no-sparse -c > tmpOutNoSparse +$DIFF -s tmpSparse tmpOutNoSparse +ls -ls tmpSparse* # look at file size and block size on disk +datagen -s1 -g1200007 -P100 | zstd | zstd -dv --sparse -c > tmpSparseOdd # Odd size file (to not finish on an exact nb of blocks) +datagen -s1 -g1200007 -P100 | $DIFF -s - tmpSparseOdd +ls -ls tmpSparseOdd # look at file size and block size on disk +println "\n Sparse Compatibility with Console :" +println "Hello World 1 !" | zstd | zstd -d -c +println "Hello World 2 !" | zstd | zstd -d | cat +println "\n Sparse Compatibility with Append :" +datagen -P100 -g1M > tmpSparse1M +cat tmpSparse1M tmpSparse1M > tmpSparse2M +zstd -v -f tmpSparse1M -o tmpSparseCompressed +zstd -d -v -f tmpSparseCompressed -o tmpSparseRegenerated +zstd -d -v -f tmpSparseCompressed -c >> tmpSparseRegenerated +ls -ls tmpSparse* # look at file size and block size on disk +$DIFF tmpSparse2M tmpSparseRegenerated +rm tmpSparse* + + +println "\n===> stream-size mode" + +datagen -g11000 > tmp +println "test : basic file compression vs sized streaming compression" +file_size=$(zstd -14 -f tmp -o tmp.zst && wc -c < tmp.zst) +stream_size=$(cat tmp | zstd -14 --stream-size=11000 | wc -c) +if [ "$stream_size" -gt "$file_size" ]; then + die "hinted compression larger than expected" +fi +println "test : sized streaming compression and decompression" +cat tmp | zstd -14 -f tmp -o --stream-size=11000 tmp.zst +zstd -df tmp.zst -o tmp_decompress +cmp tmp tmp_decompress || die "difference between original and decompressed file" +println "test : incorrect stream size" +cat tmp | zstd -14 -f -o tmp.zst --stream-size=11001 && die "should fail with incorrect stream size" + +println "\n===> zstd zero weight dict test " +rm -f tmp* +cp "$TESTDIR/dict-files/zero-weight-dict" tmp_input +zstd -D "$TESTDIR/dict-files/zero-weight-dict" tmp_input +zstd -D "$TESTDIR/dict-files/zero-weight-dict" -d tmp_input.zst -o tmp_decomp +$DIFF tmp_decomp tmp_input +rm -rf tmp* + +println "\n===> zstd (valid) zero weight dict test " +rm -f tmp* +# 0 has a non-zero weight in the dictionary +echo "0000000000000000000000000" > tmp_input +zstd -D "$TESTDIR/dict-files/zero-weight-dict" tmp_input +zstd -D "$TESTDIR/dict-files/zero-weight-dict" -d tmp_input.zst -o tmp_decomp +$DIFF tmp_decomp tmp_input +rm -rf tmp* + +println "\n===> size-hint mode" + +datagen -g11000 > tmp +datagen -g11000 > tmp2 +datagen > tmpDict +println "test : basic file compression vs hinted streaming compression" +file_size=$(zstd -14 -f tmp -o tmp.zst && wc -c < tmp.zst) +stream_size=$(cat tmp | zstd -14 --size-hint=11000 | wc -c) +if [ "$stream_size" -ge "$file_size" ]; then + die "hinted compression larger than expected" +fi +println "test : hinted streaming compression and decompression" +cat tmp | zstd -14 -f -o tmp.zst --size-hint=11000 +zstd -df tmp.zst -o tmp_decompress +cmp tmp tmp_decompress || die "difference between original and decompressed file" +println "test : hinted streaming compression with dictionary" +cat tmp | zstd -14 -f -D tmpDict --size-hint=11000 | zstd -t -D tmpDict +println "test : multiple file compression with hints and dictionary" +zstd -14 -f -D tmpDict --size-hint=11000 tmp tmp2 +zstd -14 -f -o tmp1_.zst -D tmpDict --size-hint=11000 tmp +zstd -14 -f -o tmp2_.zst -D tmpDict --size-hint=11000 tmp2 +cmp tmp.zst tmp1_.zst || die "first file's output differs" +cmp tmp2.zst tmp2_.zst || die "second file's output differs" +println "test : incorrect hinted stream sizes" +cat tmp | zstd -14 -f --size-hint=11050 | zstd -t # slightly too high +cat tmp | zstd -14 -f --size-hint=10950 | zstd -t # slightly too low +cat tmp | zstd -14 -f --size-hint=22000 | zstd -t # considerably too high +cat tmp | zstd -14 -f --size-hint=5500 | zstd -t # considerably too low + + +println "\n===> dictionary tests " + +println "- test with raw dict (content only) " +datagen > tmpDict +datagen -g1M | $MD5SUM > tmp1 +datagen -g1M | zstd -D tmpDict | zstd -D tmpDict -dvq | $MD5SUM > tmp2 +$DIFF -q tmp1 tmp2 +println "- Create first dictionary " +TESTFILE="$PRGDIR"/zstdcli.c +zstd --train "$TESTDIR"/*.c "$PRGDIR"/*.c -o tmpDict +cp "$TESTFILE" tmp +println "- Test dictionary compression with tmpDict as an input file and dictionary" +zstd -f tmpDict -D tmpDict && die "compression error not detected!" +println "- Dictionary compression roundtrip" +zstd -f tmp -D tmpDict +zstd -d tmp.zst -D tmpDict -fo result +$DIFF "$TESTFILE" result +println "- Dictionary compression with btlazy2 strategy" +zstd -f tmp -D tmpDict --zstd=strategy=6 +zstd -d tmp.zst -D tmpDict -fo result +$DIFF "$TESTFILE" result +if [ -n "$hasMT" ] +then + println "- Test dictionary compression with multithreading " + datagen -g5M | zstd -T2 -D tmpDict | zstd -t -D tmpDict # fails with v1.3.2 +fi +println "- Create second (different) dictionary " +zstd --train "$TESTDIR"/*.c "$PRGDIR"/*.c "$PRGDIR"/*.h -o tmpDictC +zstd -d tmp.zst -D tmpDictC -fo result && die "wrong dictionary not detected!" +println "- Create dictionary with short dictID" +zstd --train "$TESTDIR"/*.c "$PRGDIR"/*.c --dictID=1 -o tmpDict1 +cmp tmpDict tmpDict1 && die "dictionaries should have different ID !" +println "- Create dictionary with wrong dictID parameter order (must fail)" +zstd --train "$TESTDIR"/*.c "$PRGDIR"/*.c --dictID -o 1 tmpDict1 && die "wrong order : --dictID must be followed by argument " +println "- Create dictionary with size limit" +zstd --train "$TESTDIR"/*.c "$PRGDIR"/*.c -o tmpDict2 --maxdict=4K -v +println "- Create dictionary with small size limit" +zstd --train "$TESTDIR"/*.c "$PRGDIR"/*.c -o tmpDict3 --maxdict=1K -v +println "- Create dictionary with wrong parameter order (must fail)" +zstd --train "$TESTDIR"/*.c "$PRGDIR"/*.c -o tmpDict3 --maxdict -v 4K && die "wrong order : --maxdict must be followed by argument " +println "- Compress without dictID" +zstd -f tmp -D tmpDict1 --no-dictID +zstd -d tmp.zst -D tmpDict -fo result +$DIFF "$TESTFILE" result +println "- Compress with wrong argument order (must fail)" +zstd tmp -Df tmpDict1 -c > $INTOVOID && die "-D must be followed by dictionary name " +println "- Compress multiple files with dictionary" +rm -rf dirTestDict +mkdir dirTestDict +cp "$TESTDIR"/*.c dirTestDict +cp "$PRGDIR"/*.c dirTestDict +cp "$PRGDIR"/*.h dirTestDict +$MD5SUM dirTestDict/* > tmph1 +zstd -f --rm dirTestDict/* -D tmpDictC +zstd -d --rm dirTestDict/*.zst -D tmpDictC # note : use internal checksum by default +case "$UNAME" in + Darwin) println "md5sum -c not supported on OS-X : test skipped" ;; # not compatible with OS-X's md5 + *) $MD5SUM -c tmph1 ;; +esac +rm -rf dirTestDict +println "- dictionary builder on bogus input" +println "Hello World" > tmp +zstd --train-legacy -q tmp && die "Dictionary training should fail : not enough input source" +datagen -P0 -g10M > tmp +zstd --train-legacy -q tmp && die "Dictionary training should fail : source is pure noise" +println "- Test -o before --train" +rm -f tmpDict dictionary +zstd -o tmpDict --train "$TESTDIR"/*.c "$PRGDIR"/*.c +test -f tmpDict +zstd --train "$TESTDIR"/*.c "$PRGDIR"/*.c +test -f dictionary +println "- Test dictionary training fails" +echo "000000000000000000000000000000000" > tmpz +zstd --train tmpz tmpz tmpz tmpz tmpz tmpz tmpz tmpz tmpz && die "Dictionary training should fail : source is all zeros" +if [ -n "$hasMT" ] +then + zstd --train -T0 tmpz tmpz tmpz tmpz tmpz tmpz tmpz tmpz tmpz && die "Dictionary training should fail : source is all zeros" + println "- Create dictionary with multithreading enabled" + zstd --train -T0 "$TESTDIR"/*.c "$PRGDIR"/*.c -o tmpDict +fi +rm tmp* dictionary + + +println "\n===> fastCover dictionary builder : advanced options " +TESTFILE="$PRGDIR"/zstdcli.c +datagen > tmpDict +println "- Create first dictionary" +zstd --train-fastcover=k=46,d=8,f=15,split=80 "$TESTDIR"/*.c "$PRGDIR"/*.c -o tmpDict +cp "$TESTFILE" tmp +zstd -f tmp -D tmpDict +zstd -d tmp.zst -D tmpDict -fo result +$DIFF "$TESTFILE" result +println "- Create second (different) dictionary" +zstd --train-fastcover=k=56,d=8 "$TESTDIR"/*.c "$PRGDIR"/*.c "$PRGDIR"/*.h -o tmpDictC +zstd -d tmp.zst -D tmpDictC -fo result && die "wrong dictionary not detected!" +zstd --train-fastcover=k=56,d=8 && die "Create dictionary without input file" +println "- Create dictionary with short dictID" +zstd --train-fastcover=k=46,d=8,f=15,split=80 "$TESTDIR"/*.c "$PRGDIR"/*.c --dictID=1 -o tmpDict1 +cmp tmpDict tmpDict1 && die "dictionaries should have different ID !" +println "- Create dictionaries with shrink-dict flag enabled" +zstd --train-fastcover=steps=1,shrink "$TESTDIR"/*.c "$PRGDIR"/*.c -o tmpShrinkDict +zstd --train-fastcover=steps=1,shrink=1 "$TESTDIR"/*.c "$PRGDIR"/*.c -o tmpShrinkDict1 +zstd --train-fastcover=steps=1,shrink=5 "$TESTDIR"/*.c "$PRGDIR"/*.c -o tmpShrinkDict2 +println "- Create dictionary with size limit" +zstd --train-fastcover=steps=1 "$TESTDIR"/*.c "$PRGDIR"/*.c -o tmpDict2 --maxdict=4K +println "- Create dictionary using all samples for both training and testing" +zstd --train-fastcover=k=56,d=8,split=100 -r "$TESTDIR"/*.c "$PRGDIR"/*.c +println "- Create dictionary using f=16" +zstd --train-fastcover=k=56,d=8,f=16 -r "$TESTDIR"/*.c "$PRGDIR"/*.c +zstd --train-fastcover=k=56,d=8,accel=15 -r "$TESTDIR"/*.c "$PRGDIR"/*.c && die "Created dictionary using accel=15" +println "- Create dictionary using accel=2" +zstd --train-fastcover=k=56,d=8,accel=2 -r "$TESTDIR"/*.c "$PRGDIR"/*.c +println "- Create dictionary using accel=10" +zstd --train-fastcover=k=56,d=8,accel=10 -r "$TESTDIR"/*.c "$PRGDIR"/*.c +println "- Create dictionary with multithreading" +zstd --train-fastcover -T4 -r "$TESTDIR"/*.c "$PRGDIR"/*.c +println "- Test -o before --train-fastcover" +rm -f tmpDict dictionary +zstd -o tmpDict --train-fastcover=k=56,d=8 "$TESTDIR"/*.c "$PRGDIR"/*.c +test -f tmpDict +zstd --train-fastcover=k=56,d=8 "$TESTDIR"/*.c "$PRGDIR"/*.c +test -f dictionary +rm tmp* dictionary + + +println "\n===> legacy dictionary builder " + +TESTFILE="$PRGDIR"/zstdcli.c +datagen > tmpDict +println "- Create first dictionary" +zstd --train-legacy=selectivity=8 "$TESTDIR"/*.c "$PRGDIR"/*.c -o tmpDict +cp "$TESTFILE" tmp +zstd -f tmp -D tmpDict +zstd -d tmp.zst -D tmpDict -fo result +$DIFF "$TESTFILE" result +zstd --train-legacy=s=8 && die "Create dictionary without input files (should error)" +println "- Create second (different) dictionary" +zstd --train-legacy=s=5 "$TESTDIR"/*.c "$PRGDIR"/*.c "$PRGDIR"/*.h -o tmpDictC +zstd -d tmp.zst -D tmpDictC -fo result && die "wrong dictionary not detected!" +println "- Create dictionary with short dictID" +zstd --train-legacy -s5 "$TESTDIR"/*.c "$PRGDIR"/*.c --dictID=1 -o tmpDict1 +cmp tmpDict tmpDict1 && die "dictionaries should have different ID !" +println "- Create dictionary with size limit" +zstd --train-legacy -s9 "$TESTDIR"/*.c "$PRGDIR"/*.c -o tmpDict2 --maxdict=4K +println "- Test -o before --train-legacy" +rm -f tmpDict dictionary +zstd -o tmpDict --train-legacy "$TESTDIR"/*.c "$PRGDIR"/*.c +test -f tmpDict +zstd --train-legacy "$TESTDIR"/*.c "$PRGDIR"/*.c +test -f dictionary +rm tmp* dictionary + + +println "\n===> integrity tests " + +println "test one file (tmp1.zst) " +datagen > tmp1 +zstd tmp1 +zstd -t tmp1.zst +zstd --test tmp1.zst +println "test multiple files (*.zst) " +zstd -t ./*.zst +println "test bad files (*) " +zstd -t ./* && die "bad files not detected !" +zstd -t tmp1 && die "bad file not detected !" +cp tmp1 tmp2.zst +zstd -t tmp2.zst && die "bad file not detected !" +datagen -g0 > tmp3 +zstd -t tmp3 && die "bad file not detected !" # detects 0-sized files as bad +println "test --rm and --test combined " +zstd -t --rm tmp1.zst +test -f tmp1.zst # check file is still present +split -b16384 tmp1.zst tmpSplit. +zstd -t tmpSplit.* && die "bad file not detected !" +datagen | zstd -c | zstd -t + + +println "\n===> golden files tests " + +zstd -t -r "$TESTDIR/golden-compression" +zstd -c -r "$TESTDIR/golden-compression" | zstd -t + + +println "\n===> benchmark mode tests " + +println "bench one file" +datagen > tmp1 +zstd -bi0 tmp1 +println "bench multiple levels" +zstd -i0b0e3 tmp1 +println "bench negative level" +zstd -bi0 --fast tmp1 +println "with recursive and quiet modes" +zstd -rqi0b1e2 tmp1 +println "benchmark decompression only" +zstd -f tmp1 +zstd -b -d -i0 tmp1.zst + + +println "\n===> zstd compatibility tests " + +datagen > tmp +rm -f tmp.zst +zstd --format=zstd -f tmp +test -f tmp.zst + + +println "\n===> gzip compatibility tests " + +GZIPMODE=1 +zstd --format=gzip -V || GZIPMODE=0 +if [ $GZIPMODE -eq 1 ]; then + println "gzip support detected" + GZIPEXE=1 + gzip -V || GZIPEXE=0 + if [ $GZIPEXE -eq 1 ]; then + datagen > tmp + zstd --format=gzip -f tmp + gzip -t -v tmp.gz + gzip -f tmp + zstd -d -f -v tmp.gz + rm tmp* + else + println "gzip binary not detected" + fi +else + println "gzip mode not supported" +fi + + +println "\n===> gzip frame tests " + +if [ $GZIPMODE -eq 1 ]; then + datagen > tmp + zstd -f --format=gzip tmp + zstd -f tmp + cat tmp.gz tmp.zst tmp.gz tmp.zst | zstd -d -f -o tmp + truncateLastByte tmp.gz | zstd -t > $INTOVOID && die "incomplete frame not detected !" + rm tmp* +else + println "gzip mode not supported" +fi + +if [ $GZIPMODE -eq 1 ]; then + datagen > tmp + rm -f tmp.zst + zstd --format=gzip --format=zstd -f tmp + test -f tmp.zst +fi + +println "\n===> xz compatibility tests " + +LZMAMODE=1 +zstd --format=xz -V || LZMAMODE=0 +if [ $LZMAMODE -eq 1 ]; then + println "xz support detected" + XZEXE=1 + xz -Q -V && lzma -Q -V || XZEXE=0 + if [ $XZEXE -eq 1 ]; then + println "Testing zstd xz and lzma support" + datagen > tmp + zstd --format=lzma -f tmp + zstd --format=xz -f tmp + xz -Q -t -v tmp.xz + xz -Q -t -v tmp.lzma + xz -Q -f -k tmp + lzma -Q -f -k --lzma1 tmp + zstd -d -f -v tmp.xz + zstd -d -f -v tmp.lzma + rm tmp* + println "Creating symlinks" + ln -s "$ZSTD_BIN" ./xz + ln -s "$ZSTD_BIN" ./unxz + ln -s "$ZSTD_BIN" ./lzma + ln -s "$ZSTD_BIN" ./unlzma + println "Testing xz and lzma symlinks" + datagen > tmp + ./xz tmp + xz -Q -d tmp.xz + ./lzma tmp + lzma -Q -d tmp.lzma + println "Testing unxz and unlzma symlinks" + xz -Q tmp + ./xz -d tmp.xz + lzma -Q tmp + ./lzma -d tmp.lzma + rm xz unxz lzma unlzma + rm tmp* + else + println "xz binary not detected" + fi +else + println "xz mode not supported" +fi + + +println "\n===> xz frame tests " + +if [ $LZMAMODE -eq 1 ]; then + datagen > tmp + zstd -f --format=xz tmp + zstd -f --format=lzma tmp + zstd -f tmp + cat tmp.xz tmp.lzma tmp.zst tmp.lzma tmp.xz tmp.zst | zstd -d -f -o tmp + truncateLastByte tmp.xz | zstd -t > $INTOVOID && die "incomplete frame not detected !" + truncateLastByte tmp.lzma | zstd -t > $INTOVOID && die "incomplete frame not detected !" + rm tmp* +else + println "xz mode not supported" +fi + +println "\n===> lz4 compatibility tests " + +LZ4MODE=1 +zstd --format=lz4 -V || LZ4MODE=0 +if [ $LZ4MODE -eq 1 ]; then + println "lz4 support detected" + LZ4EXE=1 + lz4 -V || LZ4EXE=0 + if [ $LZ4EXE -eq 1 ]; then + datagen > tmp + zstd --format=lz4 -f tmp + lz4 -t -v tmp.lz4 + lz4 -f tmp + zstd -d -f -v tmp.lz4 + rm tmp* + else + println "lz4 binary not detected" + fi +else + println "lz4 mode not supported" +fi + + +if [ $LZ4MODE -eq 1 ]; then + println "\n===> lz4 frame tests " + datagen > tmp + zstd -f --format=lz4 tmp + zstd -f tmp + cat tmp.lz4 tmp.zst tmp.lz4 tmp.zst | zstd -d -f -o tmp + truncateLastByte tmp.lz4 | zstd -t > $INTOVOID && die "incomplete frame not detected !" + rm tmp* +else + println "\nlz4 mode not supported" +fi + + +println "\n===> suffix list test" + +! zstd -d tmp.abc 2> tmplg + +if [ $GZIPMODE -ne 1 ]; then + grep ".gz" tmplg > $INTOVOID && die "Unsupported suffix listed" +fi + +if [ $LZMAMODE -ne 1 ]; then + grep ".lzma" tmplg > $INTOVOID && die "Unsupported suffix listed" + grep ".xz" tmplg > $INTOVOID && die "Unsupported suffix listed" +fi + +if [ $LZ4MODE -ne 1 ]; then + grep ".lz4" tmplg > $INTOVOID && die "Unsupported suffix listed" +fi + + +println "\n===> tar extension tests " + +rm -f tmp tmp.tar tmp.tzst tmp.tgz tmp.txz tmp.tlz4 + +datagen > tmp +tar cf tmp.tar tmp +zstd tmp.tar -o tmp.tzst +rm tmp.tar +zstd -d tmp.tzst +[ -e tmp.tar ] || die ".tzst failed to decompress to .tar!" +rm -f tmp.tar tmp.tzst + +if [ $GZIPMODE -eq 1 ]; then + tar czf tmp.tgz tmp + zstd -d tmp.tgz + [ -e tmp.tar ] || die ".tgz failed to decompress to .tar!" + rm -f tmp.tar tmp.tgz +fi + +if [ $LZMAMODE -eq 1 ]; then + tar c tmp | zstd --format=xz > tmp.txz + zstd -d tmp.txz + [ -e tmp.tar ] || die ".txz failed to decompress to .tar!" + rm -f tmp.tar tmp.txz +fi + +if [ $LZ4MODE -eq 1 ]; then + tar c tmp | zstd --format=lz4 > tmp.tlz4 + zstd -d tmp.tlz4 + [ -e tmp.tar ] || die ".tlz4 failed to decompress to .tar!" + rm -f tmp.tar tmp.tlz4 +fi + +touch tmp.t tmp.tz tmp.tzs +! zstd -d tmp.t +! zstd -d tmp.tz +! zstd -d tmp.tzs + + +println "\n===> zstd round-trip tests " + +roundTripTest +roundTripTest -g15K # TableID==3 +roundTripTest -g127K # TableID==2 +roundTripTest -g255K # TableID==1 +roundTripTest -g522K # TableID==0 +roundTripTest -g519K 6 # greedy, hash chain +roundTripTest -g517K 16 # btlazy2 +roundTripTest -g516K 19 # btopt + +fileRoundTripTest -g500K + +println "\n===> zstd long distance matching round-trip tests " +roundTripTest -g0 "2 --single-thread --long" +roundTripTest -g1000K "1 --single-thread --long" +roundTripTest -g517K "6 --single-thread --long" +roundTripTest -g516K "16 --single-thread --long" +roundTripTest -g518K "19 --single-thread --long" +fileRoundTripTest -g5M "3 --single-thread --long" + + +roundTripTest -g96K "5 --single-thread" +if [ -n "$hasMT" ] +then + println "\n===> zstdmt round-trip tests " + roundTripTest -g4M "1 -T0" + roundTripTest -g8M "3 -T2" + roundTripTest -g8000K "2 --threads=2" + fileRoundTripTest -g4M "19 -T2 -B1M" + + println "\n===> zstdmt long distance matching round-trip tests " + roundTripTest -g8M "3 --long=24 -T2" + + println "\n===> ovLog tests " + datagen -g2MB > tmp + refSize=$(zstd tmp -6 -c --zstd=wlog=18 | wc -c) + ov9Size=$(zstd tmp -6 -c --zstd=wlog=18,ovlog=9 | wc -c) + ov1Size=$(zstd tmp -6 -c --zstd=wlog=18,ovlog=1 | wc -c) + if [ "$refSize" -eq "$ov9Size" ]; then + echo ov9Size should be different from refSize + exit 1 + fi + if [ "$refSize" -eq "$ov1Size" ]; then + echo ov1Size should be different from refSize + exit 1 + fi + if [ "$ov9Size" -ge "$ov1Size" ]; then + echo ov9Size="$ov9Size" should be smaller than ov1Size="$ov1Size" + exit 1 + fi + +else + println "\n===> no multithreading, skipping zstdmt tests " +fi + +rm tmp* + +println "\n===> zstd --list/-l single frame tests " +datagen > tmp1 +datagen > tmp2 +datagen > tmp3 +zstd tmp* +zstd -l ./*.zst +zstd -lv ./*.zst | grep "Decompressed Size:" # check that decompressed size is present in header +zstd --list ./*.zst +zstd --list -v ./*.zst + +println "\n===> zstd --list/-l multiple frame tests " +cat tmp1.zst tmp2.zst > tmp12.zst +cat tmp12.zst tmp3.zst > tmp123.zst +zstd -l ./*.zst +zstd -lv ./*.zst + +println "\n===> zstd --list/-l error detection tests " +zstd -l tmp1 tmp1.zst && die "-l must fail on non-zstd file" +zstd --list tmp* && die "-l must fail on non-zstd file" +zstd -lv tmp1* && die "-l must fail on non-zstd file" +zstd --list -v tmp2 tmp12.zst && die "-l must fail on non-zstd file" + +println "test : detect truncated compressed file " +TEST_DATA_FILE=truncatable-input.txt +FULL_COMPRESSED_FILE=${TEST_DATA_FILE}.zst +TRUNCATED_COMPRESSED_FILE=truncated-input.txt.zst +datagen -g50000 > $TEST_DATA_FILE +zstd -f $TEST_DATA_FILE -o $FULL_COMPRESSED_FILE +dd bs=1 count=100 if=$FULL_COMPRESSED_FILE of=$TRUNCATED_COMPRESSED_FILE +zstd --list $TRUNCATED_COMPRESSED_FILE && die "-l must fail on truncated file" + +rm $TEST_DATA_FILE +rm $FULL_COMPRESSED_FILE +rm $TRUNCATED_COMPRESSED_FILE + +println "\n===> zstd --list/-l errors when presented with stdin / no files" +zstd -l && die "-l must fail on empty list of files" +zstd -l - && die "-l does not work on stdin" +zstd -l < tmp1.zst && die "-l does not work on stdin" +zstd -l - < tmp1.zst && die "-l does not work on stdin" +zstd -l - tmp1.zst && die "-l does not work on stdin" +zstd -l - tmp1.zst < tmp1.zst && die "-l does not work on stdin" +zstd -l tmp1.zst < tmp2.zst # this will check tmp1.zst, but not tmp2.zst, which is not an error : zstd simply doesn't read stdin in this case. It must not error just because stdin is not a tty + +println "\n===> zstd --list/-l test with null files " +datagen -g0 > tmp5 +zstd tmp5 +zstd -l tmp5.zst +zstd -l tmp5* && die "-l must fail on non-zstd file" +zstd -lv tmp5.zst | grep "Decompressed Size: 0.00 KB (0 B)" # check that 0 size is present in header +zstd -lv tmp5* && die "-l must fail on non-zstd file" + +println "\n===> zstd --list/-l test with no content size field " +datagen -g513K | zstd > tmp6.zst +zstd -l tmp6.zst +zstd -lv tmp6.zst | grep "Decompressed Size:" && die "Field :Decompressed Size: should not be available in this compressed file" + +println "\n===> zstd --list/-l test with no checksum " +zstd -f --no-check tmp1 +zstd -l tmp1.zst +zstd -lv tmp1.zst + +rm tmp* + + +println "\n===> zstd long distance matching tests " +roundTripTest -g0 " --single-thread --long" +roundTripTest -g9M "2 --single-thread --long" +# Test parameter parsing +roundTripTest -g1M -P50 "1 --single-thread --long=29" " --memory=512MB" +roundTripTest -g1M -P50 "1 --single-thread --long=29 --zstd=wlog=28" " --memory=256MB" +roundTripTest -g1M -P50 "1 --single-thread --long=29" " --long=28 --memory=512MB" +roundTripTest -g1M -P50 "1 --single-thread --long=29" " --zstd=wlog=28 --memory=512MB" + + + + +if [ "$1" != "--test-large-data" ]; then + println "Skipping large data tests" + exit 0 +fi + + +############################################################################# + + +if [ -n "$hasMT" ] +then + println "\n===> adaptive mode " + roundTripTest -g270000000 " --adapt" + roundTripTest -g27000000 " --adapt=min=1,max=4" + println "===> test: --adapt must fail on incoherent bounds " + datagen > tmp + zstd -f -vv --adapt=min=10,max=9 tmp && die "--adapt must fail on incoherent bounds" + + println "\n===> rsyncable mode " + roundTripTest -g10M " --rsyncable" + roundTripTest -g10M " --rsyncable -B100K" + println "===> test: --rsyncable must fail with --single-thread" + zstd -f -vv --rsyncable --single-thread tmp && die "--rsyncable must fail with --single-thread" +fi + +println "\n===> patch-from tests" + +datagen -g1000 -P50 > tmp_dict +datagen -g1000 -P10 > tmp_patch +zstd --patch-from=tmp_dict tmp_patch -o tmp_patch_diff +zstd -d --patch-from=tmp_dict tmp_patch_diff -o tmp_patch_recon +$DIFF -s tmp_patch_recon tmp_patch +rm -rf tmp_* + +println "\n===> patch-from recursive tests" + +mkdir tmp_dir +datagen > tmp_dir/tmp1 +datagen > tmp_dir/tmp2 +datagen > tmp_dict +zstd --patch-from=tmp_dict -r tmp_dir && die +rm -rf tmp* + +println "\n===> patch-from long mode trigger larger file test" + +datagen -g5000000 > tmp_dict +datagen -g5000000 > tmp_patch +zstd -15 --patch-from=tmp_dict tmp_patch 2>&1 | grep "long mode automaticaly triggered" +rm -rf tmp* + +println "\n===> large files tests " + +roundTripTest -g270000000 1 +roundTripTest -g250000000 2 +roundTripTest -g230000000 3 + +roundTripTest -g140000000 -P60 4 +roundTripTest -g130000000 -P62 5 +roundTripTest -g120000000 -P65 6 + +roundTripTest -g70000000 -P70 7 +roundTripTest -g60000000 -P71 8 +roundTripTest -g50000000 -P73 9 + +roundTripTest -g35000000 -P75 10 +roundTripTest -g30000000 -P76 11 +roundTripTest -g25000000 -P78 12 + +roundTripTest -g18000013 -P80 13 +roundTripTest -g18000014 -P80 14 +roundTripTest -g18000015 -P81 15 +roundTripTest -g18000016 -P84 16 +roundTripTest -g18000017 -P88 17 +roundTripTest -g18000018 -P94 18 +roundTripTest -g18000019 -P96 19 + +roundTripTest -g5000000000 -P99 "1 --zstd=wlog=25" +roundTripTest -g3700000000 -P0 "1 --zstd=strategy=6,wlog=25" # ensure btlazy2 can survive an overflow rescale + +fileRoundTripTest -g4193M -P99 1 + + +println "\n===> zstd long, long distance matching round-trip tests " +roundTripTest -g270000000 "1 --single-thread --long" +roundTripTest -g130000000 -P60 "5 --single-thread --long" +roundTripTest -g35000000 -P70 "8 --single-thread --long" +roundTripTest -g18000001 -P80 "18 --single-thread --long" +# Test large window logs +roundTripTest -g700M -P50 "1 --single-thread --long=29" +roundTripTest -g600M -P50 "1 --single-thread --long --zstd=wlog=29,clog=28" + + +if [ -n "$hasMT" ] +then + println "\n===> zstdmt long round-trip tests " + roundTripTest -g80000000 -P99 "19 -T2" " " + roundTripTest -g5000000000 -P99 "1 -T2" " " + roundTripTest -g500000000 -P97 "1 -T999" " " + fileRoundTripTest -g4103M -P98 " -T0" " " + roundTripTest -g400000000 -P97 "1 --long=24 -T2" " " + # Exposes the bug in https://github.com/facebook/zstd/pull/1678 + # This test fails on 4 different travis builds at the time of writing + # because it needs to allocate 8 GB of memory. + # roundTripTest -g10G -P99 "1 -T1 --long=31 --zstd=clog=27 --fast=1000" +else + println "\n**** no multithreading, skipping zstdmt tests **** " +fi + + +println "\n===> cover dictionary builder : advanced options " + +TESTFILE="$PRGDIR"/zstdcli.c +datagen > tmpDict +println "- Create first dictionary" +zstd --train-cover=k=46,d=8,split=80 "$TESTDIR"/*.c "$PRGDIR"/*.c -o tmpDict +cp "$TESTFILE" tmp +zstd -f tmp -D tmpDict +zstd -d tmp.zst -D tmpDict -fo result +$DIFF "$TESTFILE" result +zstd --train-cover=k=56,d=8 && die "Create dictionary without input file (should error)" +println "- Create second (different) dictionary" +zstd --train-cover=k=56,d=8 "$TESTDIR"/*.c "$PRGDIR"/*.c "$PRGDIR"/*.h -o tmpDictC +zstd -d tmp.zst -D tmpDictC -fo result && die "wrong dictionary not detected!" +println "- Create dictionary using shrink-dict flag" +zstd --train-cover=steps=256,shrink "$TESTDIR"/*.c "$PRGDIR"/*.c --dictID=1 -o tmpShrinkDict +zstd --train-cover=steps=256,shrink=1 "$TESTDIR"/*.c "$PRGDIR"/*.c --dictID=1 -o tmpShrinkDict1 +zstd --train-cover=steps=256,shrink=5 "$TESTDIR"/*.c "$PRGDIR"/*.c --dictID=1 -o tmpShrinkDict2 +println "- Create dictionary with short dictID" +zstd --train-cover=k=46,d=8,split=80 "$TESTDIR"/*.c "$PRGDIR"/*.c --dictID=1 -o tmpDict1 +cmp tmpDict tmpDict1 && die "dictionaries should have different ID !" +println "- Create dictionary with size limit" +zstd --train-cover=steps=8 "$TESTDIR"/*.c "$PRGDIR"/*.c -o tmpDict2 --maxdict=4K +println "- Compare size of dictionary from 90% training samples with 80% training samples" +zstd --train-cover=split=90 -r "$TESTDIR"/*.c "$PRGDIR"/*.c +zstd --train-cover=split=80 -r "$TESTDIR"/*.c "$PRGDIR"/*.c +println "- Create dictionary using all samples for both training and testing" +zstd --train-cover=split=100 -r "$TESTDIR"/*.c "$PRGDIR"/*.c +println "- Test -o before --train-cover" +rm -f tmpDict dictionary +zstd -o tmpDict --train-cover "$TESTDIR"/*.c "$PRGDIR"/*.c +test -f tmpDict +zstd --train-cover "$TESTDIR"/*.c "$PRGDIR"/*.c +test -f dictionary +rm -f tmp* dictionary + +rm -f tmp* diff --git a/src/zstd/tests/poolTests.c b/src/zstd/tests/poolTests.c new file mode 100644 index 000000000..e1576ba85 --- /dev/null +++ b/src/zstd/tests/poolTests.c @@ -0,0 +1,271 @@ +/* + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + +#include "pool.h" +#include "threading.h" +#include "util.h" +#include "timefn.h" +#include <stddef.h> +#include <stdio.h> + +#define ASSERT_TRUE(p) \ + do { \ + if (!(p)) { \ + return 1; \ + } \ + } while (0) +#define ASSERT_FALSE(p) ASSERT_TRUE(!(p)) +#define ASSERT_EQ(lhs, rhs) ASSERT_TRUE((lhs) == (rhs)) + +struct data { + ZSTD_pthread_mutex_t mutex; + unsigned data[16]; + size_t i; +}; + +static void fn(void *opaque) +{ + struct data *data = (struct data *)opaque; + ZSTD_pthread_mutex_lock(&data->mutex); + data->data[data->i] = (unsigned)(data->i); + ++data->i; + ZSTD_pthread_mutex_unlock(&data->mutex); +} + +static int testOrder(size_t numThreads, size_t queueSize) +{ + struct data data; + POOL_ctx* const ctx = POOL_create(numThreads, queueSize); + ASSERT_TRUE(ctx); + data.i = 0; + ASSERT_FALSE(ZSTD_pthread_mutex_init(&data.mutex, NULL)); + { size_t i; + for (i = 0; i < 16; ++i) { + POOL_add(ctx, &fn, &data); + } + } + POOL_free(ctx); + ASSERT_EQ(16, data.i); + { size_t i; + for (i = 0; i < data.i; ++i) { + ASSERT_EQ(i, data.data[i]); + } + } + ZSTD_pthread_mutex_destroy(&data.mutex); + return 0; +} + + +/* --- test deadlocks --- */ + +static void waitFn(void *opaque) { + (void)opaque; + UTIL_sleepMilli(1); +} + +/* Tests for deadlock */ +static int testWait(size_t numThreads, size_t queueSize) { + struct data data; + POOL_ctx* const ctx = POOL_create(numThreads, queueSize); + ASSERT_TRUE(ctx); + { size_t i; + for (i = 0; i < 16; ++i) { + POOL_add(ctx, &waitFn, &data); + } + } + POOL_free(ctx); + return 0; +} + + +/* --- test POOL_resize() --- */ + +typedef struct { + ZSTD_pthread_mutex_t mut; + int countdown; + int val; + int max; + ZSTD_pthread_cond_t cond; +} poolTest_t; + +static void waitLongFn(void *opaque) { + poolTest_t* const test = (poolTest_t*) opaque; + ZSTD_pthread_mutex_lock(&test->mut); + test->val++; + if (test->val > test->max) + test->max = test->val; + ZSTD_pthread_mutex_unlock(&test->mut); + + UTIL_sleepMilli(10); + + ZSTD_pthread_mutex_lock(&test->mut); + test->val--; + test->countdown--; + if (test->countdown == 0) + ZSTD_pthread_cond_signal(&test->cond); + ZSTD_pthread_mutex_unlock(&test->mut); +} + +static int testThreadReduction_internal(POOL_ctx* ctx, poolTest_t test) +{ + int const nbWaits = 16; + + test.countdown = nbWaits; + test.val = 0; + test.max = 0; + + { int i; + for (i=0; i<nbWaits; i++) + POOL_add(ctx, &waitLongFn, &test); + } + ZSTD_pthread_mutex_lock(&test.mut); + while (test.countdown > 0) + ZSTD_pthread_cond_wait(&test.cond, &test.mut); + ASSERT_EQ(test.val, 0); + ASSERT_EQ(test.max, 4); + ZSTD_pthread_mutex_unlock(&test.mut); + + ASSERT_EQ( POOL_resize(ctx, 2/*nbThreads*/) , 0 ); + test.countdown = nbWaits; + test.val = 0; + test.max = 0; + { int i; + for (i=0; i<nbWaits; i++) + POOL_add(ctx, &waitLongFn, &test); + } + ZSTD_pthread_mutex_lock(&test.mut); + while (test.countdown > 0) + ZSTD_pthread_cond_wait(&test.cond, &test.mut); + ASSERT_EQ(test.val, 0); + ASSERT_EQ(test.max, 2); + ZSTD_pthread_mutex_unlock(&test.mut); + + return 0; +} + +static int testThreadReduction(void) { + int result; + poolTest_t test; + POOL_ctx* const ctx = POOL_create(4 /*nbThreads*/, 2 /*queueSize*/); + + ASSERT_TRUE(ctx); + + memset(&test, 0, sizeof(test)); + ASSERT_FALSE( ZSTD_pthread_mutex_init(&test.mut, NULL) ); + ASSERT_FALSE( ZSTD_pthread_cond_init(&test.cond, NULL) ); + + result = testThreadReduction_internal(ctx, test); + + ZSTD_pthread_mutex_destroy(&test.mut); + ZSTD_pthread_cond_destroy(&test.cond); + POOL_free(ctx); + + return result; +} + + +/* --- test abrupt ending --- */ + +typedef struct { + ZSTD_pthread_mutex_t mut; + int val; +} abruptEndCanary_t; + +static void waitIncFn(void *opaque) { + abruptEndCanary_t* test = (abruptEndCanary_t*) opaque; + UTIL_sleepMilli(10); + ZSTD_pthread_mutex_lock(&test->mut); + test->val = test->val + 1; + ZSTD_pthread_mutex_unlock(&test->mut); +} + +static int testAbruptEnding_internal(abruptEndCanary_t test) +{ + int const nbWaits = 16; + + POOL_ctx* const ctx = POOL_create(3 /*numThreads*/, nbWaits /*queueSize*/); + ASSERT_TRUE(ctx); + test.val = 0; + + { int i; + for (i=0; i<nbWaits; i++) + POOL_add(ctx, &waitIncFn, &test); /* all jobs pushed into queue */ + } + ASSERT_EQ( POOL_resize(ctx, 1 /*numThreads*/) , 0 ); /* downsize numThreads, to try to break end condition */ + + POOL_free(ctx); /* must finish all jobs in queue before giving back control */ + ASSERT_EQ(test.val, nbWaits); + return 0; +} + +static int testAbruptEnding(void) { + int result; + abruptEndCanary_t test; + + memset(&test, 0, sizeof(test)); + ASSERT_FALSE( ZSTD_pthread_mutex_init(&test.mut, NULL) ); + + result = testAbruptEnding_internal(test); + + ZSTD_pthread_mutex_destroy(&test.mut); + return result; +} + + + +/* --- test launcher --- */ + +int main(int argc, const char **argv) { + size_t numThreads; + (void)argc; + (void)argv; + + if (POOL_create(0, 1)) { /* should not be possible */ + printf("FAIL: should not create POOL with 0 threads\n"); + return 1; + } + + for (numThreads = 1; numThreads <= 4; ++numThreads) { + size_t queueSize; + for (queueSize = 0; queueSize <= 2; ++queueSize) { + printf("queueSize==%u, numThreads=%u \n", + (unsigned)queueSize, (unsigned)numThreads); + if (testOrder(numThreads, queueSize)) { + printf("FAIL: testOrder\n"); + return 1; + } + printf("SUCCESS: testOrder\n"); + if (testWait(numThreads, queueSize)) { + printf("FAIL: testWait\n"); + return 1; + } + printf("SUCCESS: testWait\n"); + } + } + + if (testThreadReduction()) { + printf("FAIL: thread reduction not effective \n"); + return 1; + } else { + printf("SUCCESS: thread reduction effective \n"); + } + + if (testAbruptEnding()) { + printf("FAIL: jobs in queue not completed on early end \n"); + return 1; + } else { + printf("SUCCESS: all jobs in queue completed on early end \n"); + } + + printf("PASS: all POOL tests\n"); + + return 0; +} diff --git a/src/zstd/tests/rateLimiter.py b/src/zstd/tests/rateLimiter.py new file mode 100755 index 000000000..1068c4424 --- /dev/null +++ b/src/zstd/tests/rateLimiter.py @@ -0,0 +1,41 @@ +#!/usr/bin/env python3 + +# ################################################################ +# Copyright (c) 2018-2020, Facebook, Inc. +# All rights reserved. +# +# This source code is licensed under both the BSD-style license (found in the +# LICENSE file in the root directory of this source tree) and the GPLv2 (found +# in the COPYING file in the root directory of this source tree). +# You may select, at your option, one of the above-listed licenses. +# ########################################################################## + +# Rate limiter, replacement for pv +# this rate limiter does not "catch up" after a blocking period +# Limitations: +# - only accepts limit speed in MB/s + +import sys +import time + +MB = 1024 * 1024 +rate = float(sys.argv[1]) * MB +start = time.time() +total_read = 0 + +# sys.stderr.close() # remove error message, for Ctrl+C + +try: + buf = " " + while len(buf): + now = time.time() + to_read = max(int(rate * (now - start)), 1) + max_buf_size = 1 * MB + to_read = min(to_read, max_buf_size) + start = now + + buf = sys.stdin.buffer.read(to_read) + sys.stdout.buffer.write(buf) + +except (KeyboardInterrupt, BrokenPipeError) as e: + pass diff --git a/src/zstd/tests/regression/.gitignore b/src/zstd/tests/regression/.gitignore new file mode 100644 index 000000000..1b2618f41 --- /dev/null +++ b/src/zstd/tests/regression/.gitignore @@ -0,0 +1,3 @@ +# regression test artifacts +data-cache +test diff --git a/src/zstd/tests/regression/Makefile b/src/zstd/tests/regression/Makefile new file mode 100644 index 000000000..87c1c2b96 --- /dev/null +++ b/src/zstd/tests/regression/Makefile @@ -0,0 +1,59 @@ +# ################################################################ +# Copyright (c) 2015-2020, Facebook, Inc. +# All rights reserved. +# +# This source code is licensed under both the BSD-style license (found in the +# LICENSE file in the root directory of this source tree) and the GPLv2 (found +# in the COPYING file in the root directory of this source tree). +# You may select, at your option, one of the above-listed licenses. +# ################################################################ + +CFLAGS ?= -O3 + +CURL_CFLAGS := $(shell curl-config --cflags) +CURL_LDFLAGS := $(shell curl-config --libs) -pthread + +PROGDIR := ../../programs +LIBDIR := ../../lib +ZSTD_CPPFLAGS := -I$(PROGDIR) -I$(LIBDIR) -I$(LIBDIR)/common + +REGRESSION_CFLAGS = $(CFLAGS) $(CURL_CFLAGS) +REGRESSION_CPPFLAGS = $(CPPFLAGS) $(ZSTD_CPPFLAGS) +REGRESSION_LDFLAGS = $(LDFLAGS) $(CURL_LDFLAGS) + +all: test + +xxhash.o: $(LIBDIR)/common/xxhash.c $(LIBDIR)/common/xxhash.h + $(CC) $(REGRESSION_CFLAGS) $(REGRESSION_CPPFLAGS) $< -c -o $@ + +util.o: $(PROGDIR)/util.c $(PROGDIR)/util.h + $(CC) $(REGRESSION_CFLAGS) $(REGRESSION_CPPFLAGS) $< -c -o $@ + +data.o: data.c data.h $(PROGDIR)/util.h $(LIBDIR)/common/xxhash.h + $(CC) $(REGRESSION_CFLAGS) $(REGRESSION_CPPFLAGS) $< -c -o $@ + +config.o: config.c config.h levels.h + $(CC) $(REGRESSION_CFLAGS) $(REGRESSION_CPPFLAGS) $< -c -o $@ + +method.h: data.h config.h result.h + +method.o: method.c method.h + $(CC) $(REGRESSION_CFLAGS) $(REGRESSION_CPPFLAGS) $< -c -o $@ + +result.o: result.c result.h + $(CC) $(REGRESSION_CFLAGS) $(REGRESSION_CPPFLAGS) $< -c -o $@ + +test.o: test.c data.h config.h method.h + $(CC) $(REGRESSION_CFLAGS) $(REGRESSION_CPPFLAGS) $< -c -o $@ + +libzstd.a: + $(MAKE) -C $(LIBDIR) libzstd.a-mt + cp $(LIBDIR)/libzstd.a . + +test: test.o data.o config.o util.o method.o result.o xxhash.o libzstd.a + $(CC) $^ $(REGRESSION_LDFLAGS) -o $@ + +.PHONY: clean +clean: + $(MAKE) -C $(LIBDIR) clean + $(RM) *.o *.a test diff --git a/src/zstd/tests/regression/config.c b/src/zstd/tests/regression/config.c new file mode 100644 index 000000000..ed6b69235 --- /dev/null +++ b/src/zstd/tests/regression/config.c @@ -0,0 +1,278 @@ +/* + * Copyright (c) 2016-2020, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#include "config.h" + +/* Define a config for each fast level we want to test with. */ +#define FAST_LEVEL(x) \ + param_value_t const level_fast##x##_param_values[] = { \ + {.param = ZSTD_c_compressionLevel, .value = -x}, \ + }; \ + config_t const level_fast##x = { \ + .name = "level -" #x, \ + .cli_args = "--fast=" #x, \ + .param_values = PARAM_VALUES(level_fast##x##_param_values), \ + }; \ + config_t const level_fast##x##_dict = { \ + .name = "level -" #x " with dict", \ + .cli_args = "--fast=" #x, \ + .param_values = PARAM_VALUES(level_fast##x##_param_values), \ + .use_dictionary = 1, \ + }; + +/* Define a config for each level we want to test with. */ +#define LEVEL(x) \ + param_value_t const level_##x##_param_values[] = { \ + {.param = ZSTD_c_compressionLevel, .value = x}, \ + }; \ + config_t const level_##x = { \ + .name = "level " #x, \ + .cli_args = "-" #x, \ + .param_values = PARAM_VALUES(level_##x##_param_values), \ + }; \ + config_t const level_##x##_dict = { \ + .name = "level " #x " with dict", \ + .cli_args = "-" #x, \ + .param_values = PARAM_VALUES(level_##x##_param_values), \ + .use_dictionary = 1, \ + }; + +#define PARAM_VALUES(pv) \ + { .data = pv, .size = sizeof(pv) / sizeof((pv)[0]) } + +#include "levels.h" + +#undef LEVEL +#undef FAST_LEVEL + +static config_t no_pledged_src_size = { + .name = "no source size", + .cli_args = "", + .param_values = PARAM_VALUES(level_0_param_values), + .no_pledged_src_size = 1, +}; + +static param_value_t const ldm_param_values[] = { + {.param = ZSTD_c_enableLongDistanceMatching, .value = 1}, +}; + +static config_t ldm = { + .name = "long distance mode", + .cli_args = "--long", + .param_values = PARAM_VALUES(ldm_param_values), +}; + +static param_value_t const mt_param_values[] = { + {.param = ZSTD_c_nbWorkers, .value = 2}, +}; + +static config_t mt = { + .name = "multithreaded", + .cli_args = "-T2", + .param_values = PARAM_VALUES(mt_param_values), +}; + +static param_value_t const mt_ldm_param_values[] = { + {.param = ZSTD_c_nbWorkers, .value = 2}, + {.param = ZSTD_c_enableLongDistanceMatching, .value = 1}, +}; + +static config_t mt_ldm = { + .name = "multithreaded long distance mode", + .cli_args = "-T2 --long", + .param_values = PARAM_VALUES(mt_ldm_param_values), +}; + +static param_value_t mt_advanced_param_values[] = { + {.param = ZSTD_c_nbWorkers, .value = 2}, + {.param = ZSTD_c_literalCompressionMode, .value = ZSTD_lcm_uncompressed}, +}; + +static config_t mt_advanced = { + .name = "multithreaded with advanced params", + .cli_args = "-T2 --no-compress-literals", + .param_values = PARAM_VALUES(mt_advanced_param_values), +}; + +static param_value_t const small_wlog_param_values[] = { + {.param = ZSTD_c_windowLog, .value = 10}, +}; + +static config_t small_wlog = { + .name = "small window log", + .cli_args = "--zstd=wlog=10", + .param_values = PARAM_VALUES(small_wlog_param_values), +}; + +static param_value_t const small_hlog_param_values[] = { + {.param = ZSTD_c_hashLog, .value = 6}, + {.param = ZSTD_c_strategy, .value = (int)ZSTD_btopt}, +}; + +static config_t small_hlog = { + .name = "small hash log", + .cli_args = "--zstd=hlog=6,strat=7", + .param_values = PARAM_VALUES(small_hlog_param_values), +}; + +static param_value_t const small_clog_param_values[] = { + {.param = ZSTD_c_chainLog, .value = 6}, + {.param = ZSTD_c_strategy, .value = (int)ZSTD_btopt}, +}; + +static config_t small_clog = { + .name = "small chain log", + .cli_args = "--zstd=clog=6,strat=7", + .param_values = PARAM_VALUES(small_clog_param_values), +}; + +static param_value_t const uncompressed_literals_param_values[] = { + {.param = ZSTD_c_compressionLevel, .value = 3}, + {.param = ZSTD_c_literalCompressionMode, .value = ZSTD_lcm_uncompressed}, +}; + +static config_t uncompressed_literals = { + .name = "uncompressed literals", + .cli_args = "-3 --no-compress-literals", + .param_values = PARAM_VALUES(uncompressed_literals_param_values), +}; + +static param_value_t const uncompressed_literals_opt_param_values[] = { + {.param = ZSTD_c_compressionLevel, .value = 19}, + {.param = ZSTD_c_literalCompressionMode, .value = ZSTD_lcm_uncompressed}, +}; + +static config_t uncompressed_literals_opt = { + .name = "uncompressed literals optimal", + .cli_args = "-19 --no-compress-literals", + .param_values = PARAM_VALUES(uncompressed_literals_opt_param_values), +}; + +static param_value_t const huffman_literals_param_values[] = { + {.param = ZSTD_c_compressionLevel, .value = -1}, + {.param = ZSTD_c_literalCompressionMode, .value = ZSTD_lcm_huffman}, +}; + +static config_t huffman_literals = { + .name = "huffman literals", + .cli_args = "--fast=1 --compress-literals", + .param_values = PARAM_VALUES(huffman_literals_param_values), +}; + +static param_value_t const explicit_params_param_values[] = { + {.param = ZSTD_c_checksumFlag, .value = 1}, + {.param = ZSTD_c_contentSizeFlag, .value = 0}, + {.param = ZSTD_c_dictIDFlag, .value = 0}, + {.param = ZSTD_c_strategy, .value = (int)ZSTD_greedy}, + {.param = ZSTD_c_windowLog, .value = 18}, + {.param = ZSTD_c_hashLog, .value = 21}, + {.param = ZSTD_c_chainLog, .value = 21}, + {.param = ZSTD_c_targetLength, .value = 100}, +}; + +static config_t explicit_params = { + .name = "explicit params", + .cli_args = "--no-check --no-dictID --zstd=strategy=3,wlog=18,hlog=21,clog=21,tlen=100", + .param_values = PARAM_VALUES(explicit_params_param_values), +}; + +static config_t const* g_configs[] = { + +#define FAST_LEVEL(x) &level_fast##x, &level_fast##x##_dict, +#define LEVEL(x) &level_##x, &level_##x##_dict, +#include "levels.h" +#undef LEVEL +#undef FAST_LEVEL + + &no_pledged_src_size, + &ldm, + &mt, + &mt_ldm, + &small_wlog, + &small_hlog, + &small_clog, + &explicit_params, + &uncompressed_literals, + &uncompressed_literals_opt, + &huffman_literals, + &mt_advanced, + NULL, +}; + +config_t const* const* configs = g_configs; + +int config_skip_data(config_t const* config, data_t const* data) { + return config->use_dictionary && !data_has_dict(data); +} + +int config_get_level(config_t const* config) +{ + param_values_t const params = config->param_values; + size_t i; + for (i = 0; i < params.size; ++i) { + if (params.data[i].param == ZSTD_c_compressionLevel) + return (int)params.data[i].value; + } + return CONFIG_NO_LEVEL; +} + +ZSTD_parameters config_get_zstd_params( + config_t const* config, + uint64_t srcSize, + size_t dictSize) +{ + ZSTD_parameters zparams = {}; + param_values_t const params = config->param_values; + int level = config_get_level(config); + if (level == CONFIG_NO_LEVEL) + level = 3; + zparams = ZSTD_getParams( + level, + config->no_pledged_src_size ? ZSTD_CONTENTSIZE_UNKNOWN : srcSize, + dictSize); + for (size_t i = 0; i < params.size; ++i) { + unsigned const value = params.data[i].value; + switch (params.data[i].param) { + case ZSTD_c_contentSizeFlag: + zparams.fParams.contentSizeFlag = value; + break; + case ZSTD_c_checksumFlag: + zparams.fParams.checksumFlag = value; + break; + case ZSTD_c_dictIDFlag: + zparams.fParams.noDictIDFlag = !value; + break; + case ZSTD_c_windowLog: + zparams.cParams.windowLog = value; + break; + case ZSTD_c_chainLog: + zparams.cParams.chainLog = value; + break; + case ZSTD_c_hashLog: + zparams.cParams.hashLog = value; + break; + case ZSTD_c_searchLog: + zparams.cParams.searchLog = value; + break; + case ZSTD_c_minMatch: + zparams.cParams.minMatch = value; + break; + case ZSTD_c_targetLength: + zparams.cParams.targetLength = value; + break; + case ZSTD_c_strategy: + zparams.cParams.strategy = (ZSTD_strategy)value; + break; + default: + break; + } + } + return zparams; +} diff --git a/src/zstd/tests/regression/config.h b/src/zstd/tests/regression/config.h new file mode 100644 index 000000000..aa563b9e9 --- /dev/null +++ b/src/zstd/tests/regression/config.h @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2016-2020, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef CONFIG_H +#define CONFIG_H + +#include <stddef.h> + +#define ZSTD_STATIC_LINKING_ONLY +#include <zstd.h> + +#include "data.h" + +typedef struct { + ZSTD_cParameter param; + int value; +} param_value_t; + +typedef struct { + size_t size; + param_value_t const* data; +} param_values_t; + +/** + * The config tells the compression method what options to use. + */ +typedef struct { + const char* name; /**< Identifies the config in the results table */ + /** + * Optional arguments to pass to the CLI. If not set, CLI-based methods + * will skip this config. + */ + char const* cli_args; + /** + * Parameters to pass to the advanced API. If the advanced API isn't used, + * the parameters will be derived from these. + */ + param_values_t param_values; + /** + * Boolean parameter that says if we should use a dictionary. If the data + * doesn't have a dictionary, this config is skipped. Defaults to no. + */ + int use_dictionary; + /** + * Boolean parameter that says if we should pass the pledged source size + * when the method allows it. Defaults to yes. + */ + int no_pledged_src_size; +} config_t; + +/** + * Returns true if the config should skip this data. + * For instance, if the config requires a dictionary but the data doesn't have + * one. + */ +int config_skip_data(config_t const* config, data_t const* data); + +#define CONFIG_NO_LEVEL (-ZSTD_TARGETLENGTH_MAX - 1) +/** + * Returns the compression level specified by the config, or CONFIG_NO_LEVEL if + * no level is specified. Note that 0 is a valid compression level, meaning + * default. + */ +int config_get_level(config_t const* config); + +/** + * Returns the compression parameters specified by the config. + */ +ZSTD_parameters config_get_zstd_params( + config_t const* config, + uint64_t srcSize, + size_t dictSize); + +/** + * The NULL-terminated list of configs. + */ +extern config_t const* const* configs; + +#endif diff --git a/src/zstd/tests/regression/data.c b/src/zstd/tests/regression/data.c new file mode 100644 index 000000000..b75ac1192 --- /dev/null +++ b/src/zstd/tests/regression/data.c @@ -0,0 +1,613 @@ +/* + * Copyright (c) 2016-2020, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#include "data.h" + +#include <assert.h> +#include <errno.h> +#include <stdio.h> +#include <string.h> + +#include <sys/stat.h> + +#include <curl/curl.h> + +#include "mem.h" +#include "util.h" +#define XXH_STATIC_LINKING_ONLY +#include "xxhash.h" + +/** + * Data objects + */ + +#define REGRESSION_RELEASE(x) \ + "https://github.com/facebook/zstd/releases/download/regression-data/" x + +data_t silesia = { + .name = "silesia", + .type = data_type_dir, + .data = + { + .url = REGRESSION_RELEASE("silesia.tar.zst"), + .xxhash64 = 0x48a199f92f93e977LL, + }, +}; + +data_t silesia_tar = { + .name = "silesia.tar", + .type = data_type_file, + .data = + { + .url = REGRESSION_RELEASE("silesia.tar.zst"), + .xxhash64 = 0x48a199f92f93e977LL, + }, +}; + +data_t github = { + .name = "github", + .type = data_type_dir, + .data = + { + .url = REGRESSION_RELEASE("github.tar.zst"), + .xxhash64 = 0xa9b1b44b020df292LL, + }, + .dict = + { + .url = REGRESSION_RELEASE("github.dict.zst"), + .xxhash64 = 0x1eddc6f737d3cb53LL, + + }, +}; + +static data_t* g_data[] = { + &silesia, + &silesia_tar, + &github, + NULL, +}; + +data_t const* const* data = (data_t const* const*)g_data; + +/** + * data helpers. + */ + +int data_has_dict(data_t const* data) { + return data->dict.url != NULL; +} + +/** + * data buffer helper functions (documented in header). + */ + +data_buffer_t data_buffer_create(size_t const capacity) { + data_buffer_t buffer = {}; + + buffer.data = (uint8_t*)malloc(capacity); + if (buffer.data == NULL) + return buffer; + buffer.capacity = capacity; + return buffer; +} + +data_buffer_t data_buffer_read(char const* filename) { + data_buffer_t buffer = {}; + + uint64_t const size = UTIL_getFileSize(filename); + if (size == UTIL_FILESIZE_UNKNOWN) { + fprintf(stderr, "unknown size for %s\n", filename); + return buffer; + } + + buffer.data = (uint8_t*)malloc(size); + if (buffer.data == NULL) { + fprintf(stderr, "malloc failed\n"); + return buffer; + } + buffer.capacity = size; + + FILE* file = fopen(filename, "rb"); + if (file == NULL) { + fprintf(stderr, "file null\n"); + goto err; + } + buffer.size = fread(buffer.data, 1, buffer.capacity, file); + fclose(file); + if (buffer.size != buffer.capacity) { + fprintf(stderr, "read %zu != %zu\n", buffer.size, buffer.capacity); + goto err; + } + + return buffer; +err: + free(buffer.data); + memset(&buffer, 0, sizeof(buffer)); + return buffer; +} + +data_buffer_t data_buffer_get_data(data_t const* data) { + data_buffer_t const kEmptyBuffer = {}; + + if (data->type != data_type_file) + return kEmptyBuffer; + + return data_buffer_read(data->data.path); +} + +data_buffer_t data_buffer_get_dict(data_t const* data) { + data_buffer_t const kEmptyBuffer = {}; + + if (!data_has_dict(data)) + return kEmptyBuffer; + + return data_buffer_read(data->dict.path); +} + +int data_buffer_compare(data_buffer_t buffer1, data_buffer_t buffer2) { + size_t const size = + buffer1.size < buffer2.size ? buffer1.size : buffer2.size; + int const cmp = memcmp(buffer1.data, buffer2.data, size); + if (cmp != 0) + return cmp; + if (buffer1.size < buffer2.size) + return -1; + if (buffer1.size == buffer2.size) + return 0; + assert(buffer1.size > buffer2.size); + return 1; +} + +void data_buffer_free(data_buffer_t buffer) { + free(buffer.data); +} + +/** + * data filenames helpers. + */ + +FileNamesTable* data_filenames_get(data_t const* data) +{ + char const* const path = data->data.path; + return UTIL_createExpandedFNT(&path, 1, 0 /* followLinks */ ); +} + +/** + * data buffers helpers. + */ + +data_buffers_t data_buffers_get(data_t const* data) { + data_buffers_t buffers = {.size = 0}; + FileNamesTable* const filenames = data_filenames_get(data); + if (filenames == NULL) return buffers; + if (filenames->tableSize == 0) { + UTIL_freeFileNamesTable(filenames); + return buffers; + } + + data_buffer_t* buffersPtr = + (data_buffer_t*)malloc(filenames->tableSize * sizeof(*buffersPtr)); + if (buffersPtr == NULL) { + UTIL_freeFileNamesTable(filenames); + return buffers; + } + buffers.buffers = (data_buffer_t const*)buffersPtr; + buffers.size = filenames->tableSize; + + for (size_t i = 0; i < filenames->tableSize; ++i) { + buffersPtr[i] = data_buffer_read(filenames->fileNames[i]); + if (buffersPtr[i].data == NULL) { + data_buffers_t const kEmptyBuffer = {}; + data_buffers_free(buffers); + UTIL_freeFileNamesTable(filenames); + return kEmptyBuffer; + } + } + + UTIL_freeFileNamesTable(filenames); + return buffers; +} + +/** + * Frees the data buffers. + */ +void data_buffers_free(data_buffers_t buffers) { + free((data_buffer_t*)buffers.buffers); +} + +/** + * Initialization and download functions. + */ + +static char* g_data_dir = NULL; + +/* mkdir -p */ +static int ensure_directory_exists(char const* indir) { + char* const dir = strdup(indir); + char* end = dir; + int ret = 0; + if (dir == NULL) { + ret = EINVAL; + goto out; + } + do { + /* Find the next directory level. */ + for (++end; *end != '\0' && *end != '/'; ++end) + ; + /* End the string there, make the directory, and restore the string. */ + char const save = *end; + *end = '\0'; + int const isdir = UTIL_isDirectory(dir); + ret = mkdir(dir, S_IRWXU); + *end = save; + /* Its okay if the directory already exists. */ + if (ret == 0 || (errno == EEXIST && isdir)) + continue; + ret = errno; + fprintf(stderr, "mkdir() failed\n"); + goto out; + } while (*end != '\0'); + + ret = 0; +out: + free(dir); + return ret; +} + +/** Concatenate 3 strings into a new buffer. */ +static char* cat3(char const* str1, char const* str2, char const* str3) { + size_t const size1 = strlen(str1); + size_t const size2 = strlen(str2); + size_t const size3 = str3 == NULL ? 0 : strlen(str3); + size_t const size = size1 + size2 + size3 + 1; + char* const dst = (char*)malloc(size); + if (dst == NULL) + return NULL; + strcpy(dst, str1); + strcpy(dst + size1, str2); + if (str3 != NULL) + strcpy(dst + size1 + size2, str3); + assert(strlen(dst) == size1 + size2 + size3); + return dst; +} + +static char* cat2(char const* str1, char const* str2) { + return cat3(str1, str2, NULL); +} + +/** + * State needed by the curl callback. + * It takes data from curl, hashes it, and writes it to the file. + */ +typedef struct { + FILE* file; + XXH64_state_t xxhash64; + int error; +} curl_data_t; + +/** Create the curl state. */ +static curl_data_t curl_data_create( + data_resource_t const* resource, + data_type_t type) { + curl_data_t cdata = {}; + + XXH64_reset(&cdata.xxhash64, 0); + + assert(UTIL_isDirectory(g_data_dir)); + + if (type == data_type_file) { + /* Decompress the resource and store to the path. */ + char* cmd = cat3("zstd -dqfo '", resource->path, "'"); + if (cmd == NULL) { + cdata.error = ENOMEM; + return cdata; + } + cdata.file = popen(cmd, "w"); + free(cmd); + } else { + /* Decompress and extract the resource to the cache directory. */ + char* cmd = cat3("zstd -dc | tar -x -C '", g_data_dir, "'"); + if (cmd == NULL) { + cdata.error = ENOMEM; + return cdata; + } + cdata.file = popen(cmd, "w"); + free(cmd); + } + if (cdata.file == NULL) { + cdata.error = errno; + } + + return cdata; +} + +/** Free the curl state. */ +static int curl_data_free(curl_data_t cdata) { + return pclose(cdata.file); +} + +/** curl callback. Updates the hash, and writes to the file. */ +static size_t curl_write(void* data, size_t size, size_t count, void* ptr) { + curl_data_t* cdata = (curl_data_t*)ptr; + size_t const written = fwrite(data, size, count, cdata->file); + XXH64_update(&cdata->xxhash64, data, written * size); + return written; +} + +static int curl_download_resource( + CURL* curl, + data_resource_t const* resource, + data_type_t type) { + curl_data_t cdata; + /* Download the data. */ + if (curl_easy_setopt(curl, CURLOPT_URL, resource->url) != 0) + return EINVAL; + if (curl_easy_setopt(curl, CURLOPT_WRITEDATA, &cdata) != 0) + return EINVAL; + cdata = curl_data_create(resource, type); + if (cdata.error != 0) + return cdata.error; + int const curl_err = curl_easy_perform(curl); + int const close_err = curl_data_free(cdata); + if (curl_err) { + fprintf( + stderr, + "downloading '%s' for '%s' failed\n", + resource->url, + resource->path); + return EIO; + } + if (close_err) { + fprintf(stderr, "writing data to '%s' failed\n", resource->path); + return EIO; + } + /* check that the file exists. */ + if (type == data_type_file && !UTIL_isRegularFile(resource->path)) { + fprintf(stderr, "output file '%s' does not exist\n", resource->path); + return EIO; + } + if (type == data_type_dir && !UTIL_isDirectory(resource->path)) { + fprintf( + stderr, "output directory '%s' does not exist\n", resource->path); + return EIO; + } + /* Check that the hash matches. */ + if (XXH64_digest(&cdata.xxhash64) != resource->xxhash64) { + fprintf( + stderr, + "checksum does not match: 0x%llxLL != 0x%llxLL\n", + (unsigned long long)XXH64_digest(&cdata.xxhash64), + (unsigned long long)resource->xxhash64); + return EINVAL; + } + + return 0; +} + +/** Download a single data object. */ +static int curl_download_datum(CURL* curl, data_t const* data) { + int ret; + ret = curl_download_resource(curl, &data->data, data->type); + if (ret != 0) + return ret; + if (data_has_dict(data)) { + ret = curl_download_resource(curl, &data->dict, data_type_file); + if (ret != 0) + return ret; + } + return ret; +} + +/** Download all the data. */ +static int curl_download_data(data_t const* const* data) { + if (curl_global_init(CURL_GLOBAL_ALL) != 0) + return EFAULT; + + curl_data_t cdata = {}; + CURL* curl = curl_easy_init(); + int err = EFAULT; + + if (curl == NULL) + return EFAULT; + + if (curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 1L) != 0) + goto out; + if (curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L) != 0) + goto out; + if (curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, curl_write) != 0) + goto out; + + assert(data != NULL); + for (; *data != NULL; ++data) { + if (curl_download_datum(curl, *data) != 0) + goto out; + } + + err = 0; +out: + curl_easy_cleanup(curl); + curl_global_cleanup(); + return err; +} + +/** Fill the path member variable of the data objects. */ +static int data_create_paths(data_t* const* data, char const* dir) { + size_t const dirlen = strlen(dir); + assert(data != NULL); + for (; *data != NULL; ++data) { + data_t* const datum = *data; + datum->data.path = cat3(dir, "/", datum->name); + if (datum->data.path == NULL) + return ENOMEM; + if (data_has_dict(datum)) { + datum->dict.path = cat2(datum->data.path, ".dict"); + if (datum->dict.path == NULL) + return ENOMEM; + } + } + return 0; +} + +/** Free the path member variable of the data objects. */ +static void data_free_paths(data_t* const* data) { + assert(data != NULL); + for (; *data != NULL; ++data) { + data_t* datum = *data; + free((void*)datum->data.path); + free((void*)datum->dict.path); + datum->data.path = NULL; + datum->dict.path = NULL; + } +} + +static char const kStampName[] = "STAMP"; + +static void xxh_update_le(XXH64_state_t* state, uint64_t data) { + if (!MEM_isLittleEndian()) + data = MEM_swap64(data); + XXH64_update(state, &data, sizeof(data)); +} + +/** Hash the data to create the stamp. */ +static uint64_t stamp_hash(data_t const* const* data) { + XXH64_state_t state; + + XXH64_reset(&state, 0); + assert(data != NULL); + for (; *data != NULL; ++data) { + data_t const* datum = *data; + /* We don't care about the URL that we fetch from. */ + /* The path is derived from the name. */ + XXH64_update(&state, datum->name, strlen(datum->name)); + xxh_update_le(&state, datum->data.xxhash64); + xxh_update_le(&state, datum->dict.xxhash64); + xxh_update_le(&state, datum->type); + } + return XXH64_digest(&state); +} + +/** Check if the stamp matches the stamp in the cache directory. */ +static int stamp_check(char const* dir, data_t const* const* data) { + char* stamp = cat3(dir, "/", kStampName); + uint64_t const expected = stamp_hash(data); + XXH64_canonical_t actual; + FILE* stampfile = NULL; + int matches = 0; + + if (stamp == NULL) + goto out; + if (!UTIL_isRegularFile(stamp)) { + fprintf(stderr, "stamp does not exist: recreating the data cache\n"); + goto out; + } + + stampfile = fopen(stamp, "rb"); + if (stampfile == NULL) { + fprintf(stderr, "could not open stamp: recreating the data cache\n"); + goto out; + } + + size_t b; + if ((b = fread(&actual, sizeof(actual), 1, stampfile)) != 1) { + fprintf(stderr, "invalid stamp: recreating the data cache\n"); + goto out; + } + + matches = (expected == XXH64_hashFromCanonical(&actual)); + if (matches) + fprintf(stderr, "stamp matches: reusing the cached data\n"); + else + fprintf(stderr, "stamp does not match: recreating the data cache\n"); + +out: + free(stamp); + if (stampfile != NULL) + fclose(stampfile); + return matches; +} + +/** On success write a new stamp, on failure delete the old stamp. */ +static int +stamp_write(char const* dir, data_t const* const* data, int const data_err) { + char* stamp = cat3(dir, "/", kStampName); + FILE* stampfile = NULL; + int err = EIO; + + if (stamp == NULL) + return ENOMEM; + + if (data_err != 0) { + err = data_err; + goto out; + } + XXH64_canonical_t hash; + + XXH64_canonicalFromHash(&hash, stamp_hash(data)); + + stampfile = fopen(stamp, "wb"); + if (stampfile == NULL) + goto out; + if (fwrite(&hash, sizeof(hash), 1, stampfile) != 1) + goto out; + err = 0; + fprintf(stderr, "stamped new data cache\n"); +out: + if (err != 0) + /* Ignore errors. */ + unlink(stamp); + free(stamp); + if (stampfile != NULL) + fclose(stampfile); + return err; +} + +int data_init(char const* dir) { + int err; + + if (dir == NULL) + return EINVAL; + + /* This must be first to simplify logic. */ + err = ensure_directory_exists(dir); + if (err != 0) + return err; + + /* Save the cache directory. */ + g_data_dir = strdup(dir); + if (g_data_dir == NULL) + return ENOMEM; + + err = data_create_paths(g_data, dir); + if (err != 0) + return err; + + /* If the stamp matches then we are good to go. + * This must be called before any modifications to the data cache. + * After this point, we MUST call stamp_write() to update the STAMP, + * since we've updated the data cache. + */ + if (stamp_check(dir, data)) + return 0; + + err = curl_download_data(data); + if (err != 0) + goto out; + +out: + /* This must be last, since it must know if data_init() succeeded. */ + stamp_write(dir, data, err); + return err; +} + +void data_finish(void) { + data_free_paths(g_data); + free(g_data_dir); + g_data_dir = NULL; +} diff --git a/src/zstd/tests/regression/data.h b/src/zstd/tests/regression/data.h new file mode 100644 index 000000000..90ed22f19 --- /dev/null +++ b/src/zstd/tests/regression/data.h @@ -0,0 +1,121 @@ +/* + * Copyright (c) 2016-2020, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef DATA_H +#define DATA_H + +#include <stddef.h> +#include <stdint.h> + +typedef enum { + data_type_file = 1, /**< This data is a file. *.zst */ + data_type_dir = 2, /**< This data is a directory. *.tar.zst */ +} data_type_t; + +typedef struct { + char const* url; /**< Where to get this resource. */ + uint64_t xxhash64; /**< Hash of the url contents. */ + char const* path; /**< The path of the unpacked resource (derived). */ +} data_resource_t; + +typedef struct { + data_resource_t data; + data_resource_t dict; + data_type_t type; /**< The type of the data. */ + char const* name; /**< The logical name of the data (no extension). */ +} data_t; + +/** + * The NULL-terminated list of data objects. + */ +extern data_t const* const* data; + + +int data_has_dict(data_t const* data); + +/** + * Initializes the data module and downloads the data necessary. + * Caches the downloads in dir. We add a stamp file in the directory after + * a successful download. If a stamp file already exists, and matches our + * current data stamp, we will use the cached data without downloading. + * + * @param dir The directory to cache the downloaded data into. + * + * @returns 0 on success. + */ +int data_init(char const* dir); + +/** + * Must be called at exit to free resources allocated by data_init(). + */ +void data_finish(void); + +typedef struct { + uint8_t* data; + size_t size; + size_t capacity; +} data_buffer_t; + +/** + * Read the file that data points to into a buffer. + * NOTE: data must be a file, not a directory. + * + * @returns The buffer, which is NULL on failure. + */ +data_buffer_t data_buffer_get_data(data_t const* data); + +/** + * Read the dictionary that the data points to into a buffer. + * + * @returns The buffer, which is NULL on failure. + */ +data_buffer_t data_buffer_get_dict(data_t const* data); + +/** + * Read the contents of filename into a buffer. + * + * @returns The buffer, which is NULL on failure. + */ +data_buffer_t data_buffer_read(char const* filename); + +/** + * Create a buffer with the specified capacity. + * + * @returns The buffer, which is NULL on failure. + */ +data_buffer_t data_buffer_create(size_t capacity); + +/** + * Calls memcmp() on the contents [0, size) of both buffers. + */ +int data_buffer_compare(data_buffer_t buffer1, data_buffer_t buffer2); + +/** + * Frees an allocated buffer. + */ +void data_buffer_free(data_buffer_t buffer); + + +typedef struct { + data_buffer_t const* buffers; + size_t size; +} data_buffers_t; + +/** + * @returns a list of buffers for every file in data. It is zero sized on error. + */ +data_buffers_t data_buffers_get(data_t const* data); + +/** + * Frees the data buffers. + */ +void data_buffers_free(data_buffers_t buffers); + +#endif diff --git a/src/zstd/tests/regression/levels.h b/src/zstd/tests/regression/levels.h new file mode 100644 index 000000000..5e7d40a7d --- /dev/null +++ b/src/zstd/tests/regression/levels.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2016-2020, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef LEVEL +# error LEVEL(x) must be defined +#endif +#ifndef FAST_LEVEL +# error FAST_LEVEL(x) must be defined +#endif + +/** + * The levels are chosen to trigger every strategy in every source size, + * as well as some fast levels and the default level. + * If you change the compression levels, you should probably update these. + */ + +FAST_LEVEL(5) + +FAST_LEVEL(3) + +FAST_LEVEL(1) +LEVEL(0) +LEVEL(1) + +LEVEL(3) +LEVEL(4) +LEVEL(5) +LEVEL(6) +LEVEL(7) + +LEVEL(9) + +LEVEL(13) + +LEVEL(16) + +LEVEL(19) diff --git a/src/zstd/tests/regression/method.c b/src/zstd/tests/regression/method.c new file mode 100644 index 000000000..3c949a278 --- /dev/null +++ b/src/zstd/tests/regression/method.c @@ -0,0 +1,688 @@ +/* + * Copyright (c) 2016-2020, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#include "method.h" + +#include <stdio.h> +#include <stdlib.h> + +#define ZSTD_STATIC_LINKING_ONLY +#include <zstd.h> + +#define MIN(x, y) ((x) < (y) ? (x) : (y)) + +static char const* g_zstdcli = NULL; + +void method_set_zstdcli(char const* zstdcli) { + g_zstdcli = zstdcli; +} + +/** + * Macro to get a pointer of type, given ptr, which is a member variable with + * the given name, member. + * + * method_state_t* base = ...; + * buffer_state_t* state = container_of(base, buffer_state_t, base); + */ +#define container_of(ptr, type, member) \ + ((type*)(ptr == NULL ? NULL : (char*)(ptr)-offsetof(type, member))) + +/** State to reuse the same buffers between compression calls. */ +typedef struct { + method_state_t base; + data_buffers_t inputs; /**< The input buffer for each file. */ + data_buffer_t dictionary; /**< The dictionary. */ + data_buffer_t compressed; /**< The compressed data buffer. */ + data_buffer_t decompressed; /**< The decompressed data buffer. */ +} buffer_state_t; + +static size_t buffers_max_size(data_buffers_t buffers) { + size_t max = 0; + for (size_t i = 0; i < buffers.size; ++i) { + if (buffers.buffers[i].size > max) + max = buffers.buffers[i].size; + } + return max; +} + +static method_state_t* buffer_state_create(data_t const* data) { + buffer_state_t* state = (buffer_state_t*)calloc(1, sizeof(buffer_state_t)); + if (state == NULL) + return NULL; + state->base.data = data; + state->inputs = data_buffers_get(data); + state->dictionary = data_buffer_get_dict(data); + size_t const max_size = buffers_max_size(state->inputs); + state->compressed = data_buffer_create(ZSTD_compressBound(max_size)); + state->decompressed = data_buffer_create(max_size); + return &state->base; +} + +static void buffer_state_destroy(method_state_t* base) { + if (base == NULL) + return; + buffer_state_t* state = container_of(base, buffer_state_t, base); + free(state); +} + +static int buffer_state_bad( + buffer_state_t const* state, + config_t const* config) { + if (state == NULL) { + fprintf(stderr, "buffer_state_t is NULL\n"); + return 1; + } + if (state->inputs.size == 0 || state->compressed.data == NULL || + state->decompressed.data == NULL) { + fprintf(stderr, "buffer state allocation failure\n"); + return 1; + } + if (config->use_dictionary && state->dictionary.data == NULL) { + fprintf(stderr, "dictionary loading failed\n"); + return 1; + } + return 0; +} + +static result_t simple_compress(method_state_t* base, config_t const* config) { + buffer_state_t* state = container_of(base, buffer_state_t, base); + + if (buffer_state_bad(state, config)) + return result_error(result_error_system_error); + + /* Keep the tests short by skipping directories, since behavior shouldn't + * change. + */ + if (base->data->type != data_type_file) + return result_error(result_error_skip); + + if (config->use_dictionary || config->no_pledged_src_size) + return result_error(result_error_skip); + + /* If the config doesn't specify a level, skip. */ + int const level = config_get_level(config); + if (level == CONFIG_NO_LEVEL) + return result_error(result_error_skip); + + data_buffer_t const input = state->inputs.buffers[0]; + + /* Compress, decompress, and check the result. */ + state->compressed.size = ZSTD_compress( + state->compressed.data, + state->compressed.capacity, + input.data, + input.size, + level); + if (ZSTD_isError(state->compressed.size)) + return result_error(result_error_compression_error); + + state->decompressed.size = ZSTD_decompress( + state->decompressed.data, + state->decompressed.capacity, + state->compressed.data, + state->compressed.size); + if (ZSTD_isError(state->decompressed.size)) + return result_error(result_error_decompression_error); + if (data_buffer_compare(input, state->decompressed)) + return result_error(result_error_round_trip_error); + + result_data_t data; + data.total_size = state->compressed.size; + return result_data(data); +} + +static result_t compress_cctx_compress( + method_state_t* base, + config_t const* config) { + buffer_state_t* state = container_of(base, buffer_state_t, base); + + if (buffer_state_bad(state, config)) + return result_error(result_error_system_error); + + if (config->no_pledged_src_size) + return result_error(result_error_skip); + + if (base->data->type != data_type_dir) + return result_error(result_error_skip); + + int const level = config_get_level(config); + + ZSTD_CCtx* cctx = ZSTD_createCCtx(); + ZSTD_DCtx* dctx = ZSTD_createDCtx(); + if (cctx == NULL || dctx == NULL) { + fprintf(stderr, "context creation failed\n"); + return result_error(result_error_system_error); + } + + result_t result; + result_data_t data = {.total_size = 0}; + for (size_t i = 0; i < state->inputs.size; ++i) { + data_buffer_t const input = state->inputs.buffers[i]; + ZSTD_parameters const params = + config_get_zstd_params(config, input.size, state->dictionary.size); + + if (level == CONFIG_NO_LEVEL) + state->compressed.size = ZSTD_compress_advanced( + cctx, + state->compressed.data, + state->compressed.capacity, + input.data, + input.size, + config->use_dictionary ? state->dictionary.data : NULL, + config->use_dictionary ? state->dictionary.size : 0, + params); + else if (config->use_dictionary) + state->compressed.size = ZSTD_compress_usingDict( + cctx, + state->compressed.data, + state->compressed.capacity, + input.data, + input.size, + state->dictionary.data, + state->dictionary.size, + level); + else + state->compressed.size = ZSTD_compressCCtx( + cctx, + state->compressed.data, + state->compressed.capacity, + input.data, + input.size, + level); + + if (ZSTD_isError(state->compressed.size)) { + result = result_error(result_error_compression_error); + goto out; + } + + if (config->use_dictionary) + state->decompressed.size = ZSTD_decompress_usingDict( + dctx, + state->decompressed.data, + state->decompressed.capacity, + state->compressed.data, + state->compressed.size, + state->dictionary.data, + state->dictionary.size); + else + state->decompressed.size = ZSTD_decompressDCtx( + dctx, + state->decompressed.data, + state->decompressed.capacity, + state->compressed.data, + state->compressed.size); + if (ZSTD_isError(state->decompressed.size)) { + result = result_error(result_error_decompression_error); + goto out; + } + if (data_buffer_compare(input, state->decompressed)) { + result = result_error(result_error_round_trip_error); + goto out; + } + + data.total_size += state->compressed.size; + } + + result = result_data(data); +out: + ZSTD_freeCCtx(cctx); + ZSTD_freeDCtx(dctx); + return result; +} + +/** Generic state creation function. */ +static method_state_t* method_state_create(data_t const* data) { + method_state_t* state = (method_state_t*)malloc(sizeof(method_state_t)); + if (state == NULL) + return NULL; + state->data = data; + return state; +} + +static void method_state_destroy(method_state_t* state) { + free(state); +} + +static result_t cli_compress(method_state_t* state, config_t const* config) { + if (config->cli_args == NULL) + return result_error(result_error_skip); + + /* We don't support no pledged source size with directories. Too slow. */ + if (state->data->type == data_type_dir && config->no_pledged_src_size) + return result_error(result_error_skip); + + if (g_zstdcli == NULL) + return result_error(result_error_system_error); + + /* '<zstd>' -cqr <args> [-D '<dict>'] '<file/dir>' */ + char cmd[1024]; + size_t const cmd_size = snprintf( + cmd, + sizeof(cmd), + "'%s' -cqr %s %s%s%s %s '%s'", + g_zstdcli, + config->cli_args, + config->use_dictionary ? "-D '" : "", + config->use_dictionary ? state->data->dict.path : "", + config->use_dictionary ? "'" : "", + config->no_pledged_src_size ? "<" : "", + state->data->data.path); + if (cmd_size >= sizeof(cmd)) { + fprintf(stderr, "command too large: %s\n", cmd); + return result_error(result_error_system_error); + } + FILE* zstd = popen(cmd, "r"); + if (zstd == NULL) { + fprintf(stderr, "failed to popen command: %s\n", cmd); + return result_error(result_error_system_error); + } + + char out[4096]; + size_t total_size = 0; + while (1) { + size_t const size = fread(out, 1, sizeof(out), zstd); + total_size += size; + if (size != sizeof(out)) + break; + } + if (ferror(zstd) || pclose(zstd) != 0) { + fprintf(stderr, "zstd failed with command: %s\n", cmd); + return result_error(result_error_compression_error); + } + + result_data_t const data = {.total_size = total_size}; + return result_data(data); +} + +static int advanced_config( + ZSTD_CCtx* cctx, + buffer_state_t* state, + config_t const* config) { + ZSTD_CCtx_reset(cctx, ZSTD_reset_session_and_parameters); + for (size_t p = 0; p < config->param_values.size; ++p) { + param_value_t const pv = config->param_values.data[p]; + if (ZSTD_isError(ZSTD_CCtx_setParameter(cctx, pv.param, pv.value))) { + return 1; + } + } + if (config->use_dictionary) { + if (ZSTD_isError(ZSTD_CCtx_loadDictionary( + cctx, state->dictionary.data, state->dictionary.size))) { + return 1; + } + } + return 0; +} + +static result_t advanced_one_pass_compress_output_adjustment( + method_state_t* base, + config_t const* config, + size_t const subtract) { + buffer_state_t* state = container_of(base, buffer_state_t, base); + + if (buffer_state_bad(state, config)) + return result_error(result_error_system_error); + + ZSTD_CCtx* cctx = ZSTD_createCCtx(); + result_t result; + + if (!cctx || advanced_config(cctx, state, config)) { + result = result_error(result_error_compression_error); + goto out; + } + + result_data_t data = {.total_size = 0}; + for (size_t i = 0; i < state->inputs.size; ++i) { + data_buffer_t const input = state->inputs.buffers[i]; + + if (!config->no_pledged_src_size) { + if (ZSTD_isError(ZSTD_CCtx_setPledgedSrcSize(cctx, input.size))) { + result = result_error(result_error_compression_error); + goto out; + } + } + size_t const size = ZSTD_compress2( + cctx, + state->compressed.data, + ZSTD_compressBound(input.size) - subtract, + input.data, + input.size); + if (ZSTD_isError(size)) { + result = result_error(result_error_compression_error); + goto out; + } + data.total_size += size; + } + + result = result_data(data); +out: + ZSTD_freeCCtx(cctx); + return result; +} + +static result_t advanced_one_pass_compress( + method_state_t* base, + config_t const* config) { + return advanced_one_pass_compress_output_adjustment(base, config, 0); +} + +static result_t advanced_one_pass_compress_small_output( + method_state_t* base, + config_t const* config) { + return advanced_one_pass_compress_output_adjustment(base, config, 1); +} + +static result_t advanced_streaming_compress( + method_state_t* base, + config_t const* config) { + buffer_state_t* state = container_of(base, buffer_state_t, base); + + if (buffer_state_bad(state, config)) + return result_error(result_error_system_error); + + ZSTD_CCtx* cctx = ZSTD_createCCtx(); + result_t result; + + if (!cctx || advanced_config(cctx, state, config)) { + result = result_error(result_error_compression_error); + goto out; + } + + result_data_t data = {.total_size = 0}; + for (size_t i = 0; i < state->inputs.size; ++i) { + data_buffer_t input = state->inputs.buffers[i]; + + if (!config->no_pledged_src_size) { + if (ZSTD_isError(ZSTD_CCtx_setPledgedSrcSize(cctx, input.size))) { + result = result_error(result_error_compression_error); + goto out; + } + } + + while (input.size > 0) { + ZSTD_inBuffer in = {input.data, MIN(input.size, 4096)}; + input.data += in.size; + input.size -= in.size; + ZSTD_EndDirective const op = + input.size > 0 ? ZSTD_e_continue : ZSTD_e_end; + size_t ret = 0; + while (in.pos < in.size || (op == ZSTD_e_end && ret != 0)) { + ZSTD_outBuffer out = {state->compressed.data, + MIN(state->compressed.capacity, 1024)}; + ret = ZSTD_compressStream2(cctx, &out, &in, op); + if (ZSTD_isError(ret)) { + result = result_error(result_error_compression_error); + goto out; + } + data.total_size += out.pos; + } + } + } + + result = result_data(data); +out: + ZSTD_freeCCtx(cctx); + return result; +} + +static int init_cstream( + buffer_state_t* state, + ZSTD_CStream* zcs, + config_t const* config, + int const advanced, + ZSTD_CDict** cdict) +{ + size_t zret; + if (advanced) { + ZSTD_parameters const params = config_get_zstd_params(config, 0, 0); + ZSTD_CDict* dict = NULL; + if (cdict) { + if (!config->use_dictionary) + return 1; + *cdict = ZSTD_createCDict_advanced( + state->dictionary.data, + state->dictionary.size, + ZSTD_dlm_byRef, + ZSTD_dct_auto, + params.cParams, + ZSTD_defaultCMem); + if (!*cdict) { + return 1; + } + zret = ZSTD_initCStream_usingCDict_advanced( + zcs, *cdict, params.fParams, ZSTD_CONTENTSIZE_UNKNOWN); + } else { + zret = ZSTD_initCStream_advanced( + zcs, + config->use_dictionary ? state->dictionary.data : NULL, + config->use_dictionary ? state->dictionary.size : 0, + params, + ZSTD_CONTENTSIZE_UNKNOWN); + } + } else { + int const level = config_get_level(config); + if (level == CONFIG_NO_LEVEL) + return 1; + if (cdict) { + if (!config->use_dictionary) + return 1; + *cdict = ZSTD_createCDict( + state->dictionary.data, + state->dictionary.size, + level); + if (!*cdict) { + return 1; + } + zret = ZSTD_initCStream_usingCDict(zcs, *cdict); + } else if (config->use_dictionary) { + zret = ZSTD_initCStream_usingDict( + zcs, + state->dictionary.data, + state->dictionary.size, + level); + } else { + zret = ZSTD_initCStream(zcs, level); + } + } + if (ZSTD_isError(zret)) { + return 1; + } + return 0; +} + +static result_t old_streaming_compress_internal( + method_state_t* base, + config_t const* config, + int const advanced, + int const cdict) { + buffer_state_t* state = container_of(base, buffer_state_t, base); + + if (buffer_state_bad(state, config)) + return result_error(result_error_system_error); + + + ZSTD_CStream* zcs = ZSTD_createCStream(); + ZSTD_CDict* cd = NULL; + result_t result; + if (zcs == NULL) { + result = result_error(result_error_compression_error); + goto out; + } + if (!advanced && config_get_level(config) == CONFIG_NO_LEVEL) { + result = result_error(result_error_skip); + goto out; + } + if (cdict && !config->use_dictionary) { + result = result_error(result_error_skip); + goto out; + } + if (init_cstream(state, zcs, config, advanced, cdict ? &cd : NULL)) { + result = result_error(result_error_compression_error); + goto out; + } + + result_data_t data = {.total_size = 0}; + for (size_t i = 0; i < state->inputs.size; ++i) { + data_buffer_t input = state->inputs.buffers[i]; + size_t zret = ZSTD_resetCStream( + zcs, + config->no_pledged_src_size ? ZSTD_CONTENTSIZE_UNKNOWN : input.size); + if (ZSTD_isError(zret)) { + result = result_error(result_error_compression_error); + goto out; + } + + while (input.size > 0) { + ZSTD_inBuffer in = {input.data, MIN(input.size, 4096)}; + input.data += in.size; + input.size -= in.size; + ZSTD_EndDirective const op = + input.size > 0 ? ZSTD_e_continue : ZSTD_e_end; + zret = 0; + while (in.pos < in.size || (op == ZSTD_e_end && zret != 0)) { + ZSTD_outBuffer out = {state->compressed.data, + MIN(state->compressed.capacity, 1024)}; + if (op == ZSTD_e_continue || in.pos < in.size) + zret = ZSTD_compressStream(zcs, &out, &in); + else + zret = ZSTD_endStream(zcs, &out); + if (ZSTD_isError(zret)) { + result = result_error(result_error_compression_error); + goto out; + } + data.total_size += out.pos; + } + } + } + + result = result_data(data); +out: + ZSTD_freeCStream(zcs); + ZSTD_freeCDict(cd); + return result; +} + +static result_t old_streaming_compress( + method_state_t* base, + config_t const* config) +{ + return old_streaming_compress_internal( + base, config, /* advanced */ 0, /* cdict */ 0); +} + +static result_t old_streaming_compress_advanced( + method_state_t* base, + config_t const* config) +{ + return old_streaming_compress_internal( + base, config, /* advanced */ 1, /* cdict */ 0); +} + +static result_t old_streaming_compress_cdict( + method_state_t* base, + config_t const* config) +{ + return old_streaming_compress_internal( + base, config, /* advanced */ 0, /* cdict */ 1); +} + +static result_t old_streaming_compress_cdict_advanced( + method_state_t* base, + config_t const* config) +{ + return old_streaming_compress_internal( + base, config, /* advanced */ 1, /* cdict */ 1); +} + +method_t const simple = { + .name = "compress simple", + .create = buffer_state_create, + .compress = simple_compress, + .destroy = buffer_state_destroy, +}; + +method_t const compress_cctx = { + .name = "compress cctx", + .create = buffer_state_create, + .compress = compress_cctx_compress, + .destroy = buffer_state_destroy, +}; + +method_t const advanced_one_pass = { + .name = "advanced one pass", + .create = buffer_state_create, + .compress = advanced_one_pass_compress, + .destroy = buffer_state_destroy, +}; + +method_t const advanced_one_pass_small_out = { + .name = "advanced one pass small out", + .create = buffer_state_create, + .compress = advanced_one_pass_compress, + .destroy = buffer_state_destroy, +}; + +method_t const advanced_streaming = { + .name = "advanced streaming", + .create = buffer_state_create, + .compress = advanced_streaming_compress, + .destroy = buffer_state_destroy, +}; + +method_t const old_streaming = { + .name = "old streaming", + .create = buffer_state_create, + .compress = old_streaming_compress, + .destroy = buffer_state_destroy, +}; + +method_t const old_streaming_advanced = { + .name = "old streaming advanced", + .create = buffer_state_create, + .compress = old_streaming_compress_advanced, + .destroy = buffer_state_destroy, +}; + +method_t const old_streaming_cdict = { + .name = "old streaming cdcit", + .create = buffer_state_create, + .compress = old_streaming_compress_cdict, + .destroy = buffer_state_destroy, +}; + +method_t const old_streaming_advanced_cdict = { + .name = "old streaming advanced cdict", + .create = buffer_state_create, + .compress = old_streaming_compress_cdict_advanced, + .destroy = buffer_state_destroy, +}; + +method_t const cli = { + .name = "zstdcli", + .create = method_state_create, + .compress = cli_compress, + .destroy = method_state_destroy, +}; + +static method_t const* g_methods[] = { + &simple, + &compress_cctx, + &cli, + &advanced_one_pass, + &advanced_one_pass_small_out, + &advanced_streaming, + &old_streaming, + &old_streaming_advanced, + &old_streaming_cdict, + &old_streaming_advanced_cdict, + NULL, +}; + +method_t const* const* methods = g_methods; diff --git a/src/zstd/tests/regression/method.h b/src/zstd/tests/regression/method.h new file mode 100644 index 000000000..6884e5418 --- /dev/null +++ b/src/zstd/tests/regression/method.h @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2016-2020, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef METHOD_H +#define METHOD_H + +#include <stddef.h> + +#include "data.h" +#include "config.h" +#include "result.h" + +/** + * The base class for state that methods keep. + * All derived method state classes must have a member of this type. + */ +typedef struct { + data_t const* data; +} method_state_t; + +/** + * A method that compresses the data using config. + */ +typedef struct { + char const* name; /**< The identifier for this method in the results. */ + /** + * Creates a state that must contain a member variable of method_state_t, + * and returns a pointer to that member variable. + * + * This method can be used to do expensive work that only depends on the + * data, like loading the data file into a buffer. + */ + method_state_t* (*create)(data_t const* data); + /** + * Compresses the data in the state using the given config. + * + * @param state A pointer to the state returned by create(). + * + * @returns The total compressed size on success, or an error code. + */ + result_t (*compress)(method_state_t* state, config_t const* config); + /** + * Frees the state. + */ + void (*destroy)(method_state_t* state); +} method_t; + +/** + * Set the zstd cli path. Must be called before any methods are used. + */ +void method_set_zstdcli(char const* zstdcli); + +/** + * A NULL-terminated list of methods. + */ +extern method_t const* const* methods; + +#endif diff --git a/src/zstd/tests/regression/result.c b/src/zstd/tests/regression/result.c new file mode 100644 index 000000000..2911722cd --- /dev/null +++ b/src/zstd/tests/regression/result.c @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2016-2020, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#include "result.h" + +char const* result_get_error_string(result_t result) { + switch (result_get_error(result)) { + case result_error_ok: + return "okay"; + case result_error_skip: + return "skip"; + case result_error_system_error: + return "system error"; + case result_error_compression_error: + return "compression error"; + case result_error_decompression_error: + return "decompression error"; + case result_error_round_trip_error: + return "round trip error"; + } +} diff --git a/src/zstd/tests/regression/result.h b/src/zstd/tests/regression/result.h new file mode 100644 index 000000000..0085c2adf --- /dev/null +++ b/src/zstd/tests/regression/result.h @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2016-2020, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef RESULT_H +#define RESULT_H + +#include <stddef.h> + +/** + * The error type enum. + */ +typedef enum { + result_error_ok, /**< No error. */ + result_error_skip, /**< This method was skipped. */ + result_error_system_error, /**< Some internal error happened. */ + result_error_compression_error, /**< Compression failed. */ + result_error_decompression_error, /**< Decompression failed. */ + result_error_round_trip_error, /**< Data failed to round trip. */ +} result_error_t; + +/** + * The success type. + */ +typedef struct { + size_t total_size; /**< The total compressed size. */ +} result_data_t; + +/** + * The result type. + * Do not access the member variables directory, use the helper functions. + */ +typedef struct { + result_error_t internal_error; + result_data_t internal_data; +} result_t; + +/** + * Create a result of the error type. + */ +static result_t result_error(result_error_t error); +/** + * Create a result of the success type. + */ +static result_t result_data(result_data_t data); + +/** + * Check if the result is an error or skip. + */ +static int result_is_error(result_t result); +/** + * Check if the result error is skip. + */ +static int result_is_skip(result_t result); +/** + * Get the result error or okay. + */ +static result_error_t result_get_error(result_t result); +/** + * Get the result data. The result MUST be checked with result_is_error() first. + */ +static result_data_t result_get_data(result_t result); + +static result_t result_error(result_error_t error) { + result_t result = { + .internal_error = error, + }; + return result; +} + +static result_t result_data(result_data_t data) { + result_t result = { + .internal_error = result_error_ok, + .internal_data = data, + }; + return result; +} + +static int result_is_error(result_t result) { + return result_get_error(result) != result_error_ok; +} + +static int result_is_skip(result_t result) { + return result_get_error(result) == result_error_skip; +} + +static result_error_t result_get_error(result_t result) { + return result.internal_error; +} + +char const* result_get_error_string(result_t result); + +static result_data_t result_get_data(result_t result) { + return result.internal_data; +} + +#endif diff --git a/src/zstd/tests/regression/results.csv b/src/zstd/tests/regression/results.csv new file mode 100644 index 000000000..4db42a488 --- /dev/null +++ b/src/zstd/tests/regression/results.csv @@ -0,0 +1,636 @@ +Data, Config, Method, Total compressed size +silesia.tar, level -5, compress simple, 6738558 +silesia.tar, level -3, compress simple, 6446362 +silesia.tar, level -1, compress simple, 6186038 +silesia.tar, level 0, compress simple, 4861374 +silesia.tar, level 1, compress simple, 5334825 +silesia.tar, level 3, compress simple, 4861374 +silesia.tar, level 4, compress simple, 4799583 +silesia.tar, level 5, compress simple, 4722271 +silesia.tar, level 6, compress simple, 4672231 +silesia.tar, level 7, compress simple, 4606657 +silesia.tar, level 9, compress simple, 4554099 +silesia.tar, level 13, compress simple, 4491706 +silesia.tar, level 16, compress simple, 4381265 +silesia.tar, level 19, compress simple, 4281551 +silesia.tar, uncompressed literals, compress simple, 4861374 +silesia.tar, uncompressed literals optimal, compress simple, 4281551 +silesia.tar, huffman literals, compress simple, 6186038 +silesia, level -5, compress cctx, 6737567 +silesia, level -3, compress cctx, 6444663 +silesia, level -1, compress cctx, 6178442 +silesia, level 0, compress cctx, 4849491 +silesia, level 1, compress cctx, 5313144 +silesia, level 3, compress cctx, 4849491 +silesia, level 4, compress cctx, 4786913 +silesia, level 5, compress cctx, 4710178 +silesia, level 6, compress cctx, 4659996 +silesia, level 7, compress cctx, 4596234 +silesia, level 9, compress cctx, 4543862 +silesia, level 13, compress cctx, 4482073 +silesia, level 16, compress cctx, 4377389 +silesia, level 19, compress cctx, 4293262 +silesia, long distance mode, compress cctx, 4849491 +silesia, multithreaded, compress cctx, 4849491 +silesia, multithreaded long distance mode, compress cctx, 4849491 +silesia, small window log, compress cctx, 7078156 +silesia, small hash log, compress cctx, 6554898 +silesia, small chain log, compress cctx, 4931093 +silesia, explicit params, compress cctx, 4794609 +silesia, uncompressed literals, compress cctx, 4849491 +silesia, uncompressed literals optimal, compress cctx, 4293262 +silesia, huffman literals, compress cctx, 6178442 +silesia, multithreaded with advanced params, compress cctx, 4849491 +github, level -5, compress cctx, 205285 +github, level -5 with dict, compress cctx, 47294 +github, level -3, compress cctx, 190643 +github, level -3 with dict, compress cctx, 48047 +github, level -1, compress cctx, 175568 +github, level -1 with dict, compress cctx, 43527 +github, level 0, compress cctx, 136311 +github, level 0 with dict, compress cctx, 41534 +github, level 1, compress cctx, 142450 +github, level 1 with dict, compress cctx, 42157 +github, level 3, compress cctx, 136311 +github, level 3 with dict, compress cctx, 41534 +github, level 4, compress cctx, 136144 +github, level 4 with dict, compress cctx, 41725 +github, level 5, compress cctx, 135106 +github, level 5 with dict, compress cctx, 38934 +github, level 6, compress cctx, 135108 +github, level 6 with dict, compress cctx, 38628 +github, level 7, compress cctx, 135108 +github, level 7 with dict, compress cctx, 38741 +github, level 9, compress cctx, 135108 +github, level 9 with dict, compress cctx, 39335 +github, level 13, compress cctx, 133717 +github, level 13 with dict, compress cctx, 39923 +github, level 16, compress cctx, 133717 +github, level 16 with dict, compress cctx, 37568 +github, level 19, compress cctx, 133717 +github, level 19 with dict, compress cctx, 37567 +github, long distance mode, compress cctx, 141101 +github, multithreaded, compress cctx, 141101 +github, multithreaded long distance mode, compress cctx, 141101 +github, small window log, compress cctx, 141101 +github, small hash log, compress cctx, 138943 +github, small chain log, compress cctx, 139239 +github, explicit params, compress cctx, 140924 +github, uncompressed literals, compress cctx, 136311 +github, uncompressed literals optimal, compress cctx, 133717 +github, huffman literals, compress cctx, 175568 +github, multithreaded with advanced params, compress cctx, 141101 +silesia, level -5, zstdcli, 6882514 +silesia, level -3, zstdcli, 6568406 +silesia, level -1, zstdcli, 6183433 +silesia, level 0, zstdcli, 4849539 +silesia, level 1, zstdcli, 5314157 +silesia, level 3, zstdcli, 4849539 +silesia, level 4, zstdcli, 4786961 +silesia, level 5, zstdcli, 4710226 +silesia, level 6, zstdcli, 4660044 +silesia, level 7, zstdcli, 4596282 +silesia, level 9, zstdcli, 4543910 +silesia, level 13, zstdcli, 4482121 +silesia, level 16, zstdcli, 4377437 +silesia, level 19, zstdcli, 4293310 +silesia, long distance mode, zstdcli, 4839698 +silesia, multithreaded, zstdcli, 4849539 +silesia, multithreaded long distance mode, zstdcli, 4839698 +silesia, small window log, zstdcli, 7104616 +silesia, small hash log, zstdcli, 6554946 +silesia, small chain log, zstdcli, 4931141 +silesia, explicit params, zstdcli, 4797048 +silesia, uncompressed literals, zstdcli, 5128008 +silesia, uncompressed literals optimal, zstdcli, 4325482 +silesia, huffman literals, zstdcli, 5331158 +silesia, multithreaded with advanced params, zstdcli, 5128008 +silesia.tar, level -5, zstdcli, 6738906 +silesia.tar, level -3, zstdcli, 6448409 +silesia.tar, level -1, zstdcli, 6186908 +silesia.tar, level 0, zstdcli, 4861462 +silesia.tar, level 1, zstdcli, 5336255 +silesia.tar, level 3, zstdcli, 4861462 +silesia.tar, level 4, zstdcli, 4800482 +silesia.tar, level 5, zstdcli, 4723312 +silesia.tar, level 6, zstdcli, 4673616 +silesia.tar, level 7, zstdcli, 4608346 +silesia.tar, level 9, zstdcli, 4554702 +silesia.tar, level 13, zstdcli, 4491710 +silesia.tar, level 16, zstdcli, 4381269 +silesia.tar, level 19, zstdcli, 4281555 +silesia.tar, no source size, zstdcli, 4861458 +silesia.tar, long distance mode, zstdcli, 4853140 +silesia.tar, multithreaded, zstdcli, 4861462 +silesia.tar, multithreaded long distance mode, zstdcli, 4853140 +silesia.tar, small window log, zstdcli, 7095284 +silesia.tar, small hash log, zstdcli, 6587841 +silesia.tar, small chain log, zstdcli, 4943269 +silesia.tar, explicit params, zstdcli, 4822318 +silesia.tar, uncompressed literals, zstdcli, 5129548 +silesia.tar, uncompressed literals optimal, zstdcli, 4320914 +silesia.tar, huffman literals, zstdcli, 5347560 +silesia.tar, multithreaded with advanced params, zstdcli, 5129548 +github, level -5, zstdcli, 207285 +github, level -5 with dict, zstdcli, 48718 +github, level -3, zstdcli, 192643 +github, level -3 with dict, zstdcli, 47395 +github, level -1, zstdcli, 177568 +github, level -1 with dict, zstdcli, 45170 +github, level 0, zstdcli, 138311 +github, level 0 with dict, zstdcli, 43148 +github, level 1, zstdcli, 144450 +github, level 1 with dict, zstdcli, 43682 +github, level 3, zstdcli, 138311 +github, level 3 with dict, zstdcli, 43148 +github, level 4, zstdcli, 138144 +github, level 4 with dict, zstdcli, 43251 +github, level 5, zstdcli, 137106 +github, level 5 with dict, zstdcli, 40938 +github, level 6, zstdcli, 137108 +github, level 6 with dict, zstdcli, 40632 +github, level 7, zstdcli, 137108 +github, level 7 with dict, zstdcli, 40766 +github, level 9, zstdcli, 137108 +github, level 9 with dict, zstdcli, 41326 +github, level 13, zstdcli, 135717 +github, level 13 with dict, zstdcli, 41716 +github, level 16, zstdcli, 135717 +github, level 16 with dict, zstdcli, 39577 +github, level 19, zstdcli, 135717 +github, level 19 with dict, zstdcli, 39576 +github, long distance mode, zstdcli, 138311 +github, multithreaded, zstdcli, 138311 +github, multithreaded long distance mode, zstdcli, 138311 +github, small window log, zstdcli, 138311 +github, small hash log, zstdcli, 137467 +github, small chain log, zstdcli, 138314 +github, explicit params, zstdcli, 136140 +github, uncompressed literals, zstdcli, 167915 +github, uncompressed literals optimal, zstdcli, 158824 +github, huffman literals, zstdcli, 144450 +github, multithreaded with advanced params, zstdcli, 167915 +silesia, level -5, advanced one pass, 6737567 +silesia, level -3, advanced one pass, 6444663 +silesia, level -1, advanced one pass, 6178442 +silesia, level 0, advanced one pass, 4849491 +silesia, level 1, advanced one pass, 5313144 +silesia, level 3, advanced one pass, 4849491 +silesia, level 4, advanced one pass, 4786913 +silesia, level 5, advanced one pass, 4710178 +silesia, level 6, advanced one pass, 4659996 +silesia, level 7, advanced one pass, 4596234 +silesia, level 9, advanced one pass, 4543862 +silesia, level 13, advanced one pass, 4482073 +silesia, level 16, advanced one pass, 4377389 +silesia, level 19, advanced one pass, 4293262 +silesia, no source size, advanced one pass, 4849491 +silesia, long distance mode, advanced one pass, 4839650 +silesia, multithreaded, advanced one pass, 4849491 +silesia, multithreaded long distance mode, advanced one pass, 4839650 +silesia, small window log, advanced one pass, 7089646 +silesia, small hash log, advanced one pass, 6554898 +silesia, small chain log, advanced one pass, 4931093 +silesia, explicit params, advanced one pass, 4797035 +silesia, uncompressed literals, advanced one pass, 5127960 +silesia, uncompressed literals optimal, advanced one pass, 4325434 +silesia, huffman literals, advanced one pass, 5326210 +silesia, multithreaded with advanced params, advanced one pass, 5127960 +silesia.tar, level -5, advanced one pass, 6738558 +silesia.tar, level -3, advanced one pass, 6446362 +silesia.tar, level -1, advanced one pass, 6186038 +silesia.tar, level 0, advanced one pass, 4861374 +silesia.tar, level 1, advanced one pass, 5334825 +silesia.tar, level 3, advanced one pass, 4861374 +silesia.tar, level 4, advanced one pass, 4799583 +silesia.tar, level 5, advanced one pass, 4722271 +silesia.tar, level 6, advanced one pass, 4672231 +silesia.tar, level 7, advanced one pass, 4606657 +silesia.tar, level 9, advanced one pass, 4554099 +silesia.tar, level 13, advanced one pass, 4491706 +silesia.tar, level 16, advanced one pass, 4381265 +silesia.tar, level 19, advanced one pass, 4281551 +silesia.tar, no source size, advanced one pass, 4861374 +silesia.tar, long distance mode, advanced one pass, 4848046 +silesia.tar, multithreaded, advanced one pass, 4860726 +silesia.tar, multithreaded long distance mode, advanced one pass, 4847343 +silesia.tar, small window log, advanced one pass, 7095237 +silesia.tar, small hash log, advanced one pass, 6587833 +silesia.tar, small chain log, advanced one pass, 4943266 +silesia.tar, explicit params, advanced one pass, 4808543 +silesia.tar, uncompressed literals, advanced one pass, 5129447 +silesia.tar, uncompressed literals optimal, advanced one pass, 4320910 +silesia.tar, huffman literals, advanced one pass, 5347283 +silesia.tar, multithreaded with advanced params, advanced one pass, 5129766 +github, level -5, advanced one pass, 205285 +github, level -5 with dict, advanced one pass, 46718 +github, level -3, advanced one pass, 190643 +github, level -3 with dict, advanced one pass, 45395 +github, level -1, advanced one pass, 175568 +github, level -1 with dict, advanced one pass, 43170 +github, level 0, advanced one pass, 136311 +github, level 0 with dict, advanced one pass, 41148 +github, level 1, advanced one pass, 142450 +github, level 1 with dict, advanced one pass, 41682 +github, level 3, advanced one pass, 136311 +github, level 3 with dict, advanced one pass, 41148 +github, level 4, advanced one pass, 136144 +github, level 4 with dict, advanced one pass, 41251 +github, level 5, advanced one pass, 135106 +github, level 5 with dict, advanced one pass, 38938 +github, level 6, advanced one pass, 135108 +github, level 6 with dict, advanced one pass, 38632 +github, level 7, advanced one pass, 135108 +github, level 7 with dict, advanced one pass, 38766 +github, level 9, advanced one pass, 135108 +github, level 9 with dict, advanced one pass, 39326 +github, level 13, advanced one pass, 133717 +github, level 13 with dict, advanced one pass, 39716 +github, level 16, advanced one pass, 133717 +github, level 16 with dict, advanced one pass, 37577 +github, level 19, advanced one pass, 133717 +github, level 19 with dict, advanced one pass, 37576 +github, no source size, advanced one pass, 136311 +github, long distance mode, advanced one pass, 136311 +github, multithreaded, advanced one pass, 136311 +github, multithreaded long distance mode, advanced one pass, 136311 +github, small window log, advanced one pass, 136311 +github, small hash log, advanced one pass, 135467 +github, small chain log, advanced one pass, 136314 +github, explicit params, advanced one pass, 137670 +github, uncompressed literals, advanced one pass, 165915 +github, uncompressed literals optimal, advanced one pass, 156824 +github, huffman literals, advanced one pass, 142450 +github, multithreaded with advanced params, advanced one pass, 165915 +silesia, level -5, advanced one pass small out, 6737567 +silesia, level -3, advanced one pass small out, 6444663 +silesia, level -1, advanced one pass small out, 6178442 +silesia, level 0, advanced one pass small out, 4849491 +silesia, level 1, advanced one pass small out, 5313144 +silesia, level 3, advanced one pass small out, 4849491 +silesia, level 4, advanced one pass small out, 4786913 +silesia, level 5, advanced one pass small out, 4710178 +silesia, level 6, advanced one pass small out, 4659996 +silesia, level 7, advanced one pass small out, 4596234 +silesia, level 9, advanced one pass small out, 4543862 +silesia, level 13, advanced one pass small out, 4482073 +silesia, level 16, advanced one pass small out, 4377389 +silesia, level 19, advanced one pass small out, 4293262 +silesia, no source size, advanced one pass small out, 4849491 +silesia, long distance mode, advanced one pass small out, 4839650 +silesia, multithreaded, advanced one pass small out, 4849491 +silesia, multithreaded long distance mode, advanced one pass small out, 4839650 +silesia, small window log, advanced one pass small out, 7089646 +silesia, small hash log, advanced one pass small out, 6554898 +silesia, small chain log, advanced one pass small out, 4931093 +silesia, explicit params, advanced one pass small out, 4797035 +silesia, uncompressed literals, advanced one pass small out, 5127960 +silesia, uncompressed literals optimal, advanced one pass small out, 4325434 +silesia, huffman literals, advanced one pass small out, 5326210 +silesia, multithreaded with advanced params, advanced one pass small out, 5127960 +silesia.tar, level -5, advanced one pass small out, 6738558 +silesia.tar, level -3, advanced one pass small out, 6446362 +silesia.tar, level -1, advanced one pass small out, 6186038 +silesia.tar, level 0, advanced one pass small out, 4861374 +silesia.tar, level 1, advanced one pass small out, 5334825 +silesia.tar, level 3, advanced one pass small out, 4861374 +silesia.tar, level 4, advanced one pass small out, 4799583 +silesia.tar, level 5, advanced one pass small out, 4722271 +silesia.tar, level 6, advanced one pass small out, 4672231 +silesia.tar, level 7, advanced one pass small out, 4606657 +silesia.tar, level 9, advanced one pass small out, 4554099 +silesia.tar, level 13, advanced one pass small out, 4491706 +silesia.tar, level 16, advanced one pass small out, 4381265 +silesia.tar, level 19, advanced one pass small out, 4281551 +silesia.tar, no source size, advanced one pass small out, 4861374 +silesia.tar, long distance mode, advanced one pass small out, 4848046 +silesia.tar, multithreaded, advanced one pass small out, 4860726 +silesia.tar, multithreaded long distance mode, advanced one pass small out, 4847343 +silesia.tar, small window log, advanced one pass small out, 7095237 +silesia.tar, small hash log, advanced one pass small out, 6587833 +silesia.tar, small chain log, advanced one pass small out, 4943266 +silesia.tar, explicit params, advanced one pass small out, 4808543 +silesia.tar, uncompressed literals, advanced one pass small out, 5129447 +silesia.tar, uncompressed literals optimal, advanced one pass small out, 4320910 +silesia.tar, huffman literals, advanced one pass small out, 5347283 +silesia.tar, multithreaded with advanced params, advanced one pass small out, 5129766 +github, level -5, advanced one pass small out, 205285 +github, level -5 with dict, advanced one pass small out, 46718 +github, level -3, advanced one pass small out, 190643 +github, level -3 with dict, advanced one pass small out, 45395 +github, level -1, advanced one pass small out, 175568 +github, level -1 with dict, advanced one pass small out, 43170 +github, level 0, advanced one pass small out, 136311 +github, level 0 with dict, advanced one pass small out, 41148 +github, level 1, advanced one pass small out, 142450 +github, level 1 with dict, advanced one pass small out, 41682 +github, level 3, advanced one pass small out, 136311 +github, level 3 with dict, advanced one pass small out, 41148 +github, level 4, advanced one pass small out, 136144 +github, level 4 with dict, advanced one pass small out, 41251 +github, level 5, advanced one pass small out, 135106 +github, level 5 with dict, advanced one pass small out, 38938 +github, level 6, advanced one pass small out, 135108 +github, level 6 with dict, advanced one pass small out, 38632 +github, level 7, advanced one pass small out, 135108 +github, level 7 with dict, advanced one pass small out, 38766 +github, level 9, advanced one pass small out, 135108 +github, level 9 with dict, advanced one pass small out, 39326 +github, level 13, advanced one pass small out, 133717 +github, level 13 with dict, advanced one pass small out, 39716 +github, level 16, advanced one pass small out, 133717 +github, level 16 with dict, advanced one pass small out, 37577 +github, level 19, advanced one pass small out, 133717 +github, level 19 with dict, advanced one pass small out, 37576 +github, no source size, advanced one pass small out, 136311 +github, long distance mode, advanced one pass small out, 136311 +github, multithreaded, advanced one pass small out, 136311 +github, multithreaded long distance mode, advanced one pass small out, 136311 +github, small window log, advanced one pass small out, 136311 +github, small hash log, advanced one pass small out, 135467 +github, small chain log, advanced one pass small out, 136314 +github, explicit params, advanced one pass small out, 137670 +github, uncompressed literals, advanced one pass small out, 165915 +github, uncompressed literals optimal, advanced one pass small out, 156824 +github, huffman literals, advanced one pass small out, 142450 +github, multithreaded with advanced params, advanced one pass small out, 165915 +silesia, level -5, advanced streaming, 6882466 +silesia, level -3, advanced streaming, 6568358 +silesia, level -1, advanced streaming, 6183385 +silesia, level 0, advanced streaming, 4849491 +silesia, level 1, advanced streaming, 5314109 +silesia, level 3, advanced streaming, 4849491 +silesia, level 4, advanced streaming, 4786913 +silesia, level 5, advanced streaming, 4710178 +silesia, level 6, advanced streaming, 4659996 +silesia, level 7, advanced streaming, 4596234 +silesia, level 9, advanced streaming, 4543862 +silesia, level 13, advanced streaming, 4482073 +silesia, level 16, advanced streaming, 4377389 +silesia, level 19, advanced streaming, 4293262 +silesia, no source size, advanced streaming, 4849455 +silesia, long distance mode, advanced streaming, 4839650 +silesia, multithreaded, advanced streaming, 4849491 +silesia, multithreaded long distance mode, advanced streaming, 4839650 +silesia, small window log, advanced streaming, 7105714 +silesia, small hash log, advanced streaming, 6554898 +silesia, small chain log, advanced streaming, 4931093 +silesia, explicit params, advanced streaming, 4797048 +silesia, uncompressed literals, advanced streaming, 5127960 +silesia, uncompressed literals optimal, advanced streaming, 4325434 +silesia, huffman literals, advanced streaming, 5331110 +silesia, multithreaded with advanced params, advanced streaming, 5127960 +silesia.tar, level -5, advanced streaming, 6982738 +silesia.tar, level -3, advanced streaming, 6641264 +silesia.tar, level -1, advanced streaming, 6190789 +silesia.tar, level 0, advanced streaming, 4861376 +silesia.tar, level 1, advanced streaming, 5336879 +silesia.tar, level 3, advanced streaming, 4861376 +silesia.tar, level 4, advanced streaming, 4799583 +silesia.tar, level 5, advanced streaming, 4722276 +silesia.tar, level 6, advanced streaming, 4672240 +silesia.tar, level 7, advanced streaming, 4606657 +silesia.tar, level 9, advanced streaming, 4554106 +silesia.tar, level 13, advanced streaming, 4491707 +silesia.tar, level 16, advanced streaming, 4381284 +silesia.tar, level 19, advanced streaming, 4281511 +silesia.tar, no source size, advanced streaming, 4861372 +silesia.tar, long distance mode, advanced streaming, 4848046 +silesia.tar, multithreaded, advanced streaming, 4861458 +silesia.tar, multithreaded long distance mode, advanced streaming, 4853136 +silesia.tar, small window log, advanced streaming, 7112148 +silesia.tar, small hash log, advanced streaming, 6587834 +silesia.tar, small chain log, advanced streaming, 4943271 +silesia.tar, explicit params, advanced streaming, 4808570 +silesia.tar, uncompressed literals, advanced streaming, 5129450 +silesia.tar, uncompressed literals optimal, advanced streaming, 4320841 +silesia.tar, huffman literals, advanced streaming, 5352306 +silesia.tar, multithreaded with advanced params, advanced streaming, 5129544 +github, level -5, advanced streaming, 205285 +github, level -5 with dict, advanced streaming, 46718 +github, level -3, advanced streaming, 190643 +github, level -3 with dict, advanced streaming, 45395 +github, level -1, advanced streaming, 175568 +github, level -1 with dict, advanced streaming, 43170 +github, level 0, advanced streaming, 136311 +github, level 0 with dict, advanced streaming, 41148 +github, level 1, advanced streaming, 142450 +github, level 1 with dict, advanced streaming, 41682 +github, level 3, advanced streaming, 136311 +github, level 3 with dict, advanced streaming, 41148 +github, level 4, advanced streaming, 136144 +github, level 4 with dict, advanced streaming, 41251 +github, level 5, advanced streaming, 135106 +github, level 5 with dict, advanced streaming, 38938 +github, level 6, advanced streaming, 135108 +github, level 6 with dict, advanced streaming, 38632 +github, level 7, advanced streaming, 135108 +github, level 7 with dict, advanced streaming, 38766 +github, level 9, advanced streaming, 135108 +github, level 9 with dict, advanced streaming, 39326 +github, level 13, advanced streaming, 133717 +github, level 13 with dict, advanced streaming, 39716 +github, level 16, advanced streaming, 133717 +github, level 16 with dict, advanced streaming, 37577 +github, level 19, advanced streaming, 133717 +github, level 19 with dict, advanced streaming, 37576 +github, no source size, advanced streaming, 136311 +github, long distance mode, advanced streaming, 136311 +github, multithreaded, advanced streaming, 136311 +github, multithreaded long distance mode, advanced streaming, 136311 +github, small window log, advanced streaming, 136311 +github, small hash log, advanced streaming, 135467 +github, small chain log, advanced streaming, 136314 +github, explicit params, advanced streaming, 137670 +github, uncompressed literals, advanced streaming, 165915 +github, uncompressed literals optimal, advanced streaming, 156824 +github, huffman literals, advanced streaming, 142450 +github, multithreaded with advanced params, advanced streaming, 165915 +silesia, level -5, old streaming, 6882466 +silesia, level -3, old streaming, 6568358 +silesia, level -1, old streaming, 6183385 +silesia, level 0, old streaming, 4849491 +silesia, level 1, old streaming, 5314109 +silesia, level 3, old streaming, 4849491 +silesia, level 4, old streaming, 4786913 +silesia, level 5, old streaming, 4710178 +silesia, level 6, old streaming, 4659996 +silesia, level 7, old streaming, 4596234 +silesia, level 9, old streaming, 4543862 +silesia, level 13, old streaming, 4482073 +silesia, level 16, old streaming, 4377389 +silesia, level 19, old streaming, 4293262 +silesia, no source size, old streaming, 4849455 +silesia, uncompressed literals, old streaming, 4849491 +silesia, uncompressed literals optimal, old streaming, 4293262 +silesia, huffman literals, old streaming, 6183385 +silesia.tar, level -5, old streaming, 6982738 +silesia.tar, level -3, old streaming, 6641264 +silesia.tar, level -1, old streaming, 6190789 +silesia.tar, level 0, old streaming, 4861376 +silesia.tar, level 1, old streaming, 5336879 +silesia.tar, level 3, old streaming, 4861376 +silesia.tar, level 4, old streaming, 4799583 +silesia.tar, level 5, old streaming, 4722276 +silesia.tar, level 6, old streaming, 4672240 +silesia.tar, level 7, old streaming, 4606657 +silesia.tar, level 9, old streaming, 4554106 +silesia.tar, level 13, old streaming, 4491707 +silesia.tar, level 16, old streaming, 4381284 +silesia.tar, level 19, old streaming, 4281511 +silesia.tar, no source size, old streaming, 4861372 +silesia.tar, uncompressed literals, old streaming, 4861376 +silesia.tar, uncompressed literals optimal, old streaming, 4281511 +silesia.tar, huffman literals, old streaming, 6190789 +github, level -5, old streaming, 205285 +github, level -5 with dict, old streaming, 46718 +github, level -3, old streaming, 190643 +github, level -3 with dict, old streaming, 45395 +github, level -1, old streaming, 175568 +github, level -1 with dict, old streaming, 43170 +github, level 0, old streaming, 136311 +github, level 0 with dict, old streaming, 41148 +github, level 1, old streaming, 142450 +github, level 1 with dict, old streaming, 41682 +github, level 3, old streaming, 136311 +github, level 3 with dict, old streaming, 41148 +github, level 4, old streaming, 136144 +github, level 4 with dict, old streaming, 41251 +github, level 5, old streaming, 135106 +github, level 5 with dict, old streaming, 38938 +github, level 6, old streaming, 135108 +github, level 6 with dict, old streaming, 38632 +github, level 7, old streaming, 135108 +github, level 7 with dict, old streaming, 38766 +github, level 9, old streaming, 135108 +github, level 9 with dict, old streaming, 39326 +github, level 13, old streaming, 133717 +github, level 13 with dict, old streaming, 39716 +github, level 16, old streaming, 133717 +github, level 16 with dict, old streaming, 37577 +github, level 19, old streaming, 133717 +github, level 19 with dict, old streaming, 37576 +github, no source size, old streaming, 140631 +github, uncompressed literals, old streaming, 136311 +github, uncompressed literals optimal, old streaming, 133717 +github, huffman literals, old streaming, 175568 +silesia, level -5, old streaming advanced, 6882466 +silesia, level -3, old streaming advanced, 6568358 +silesia, level -1, old streaming advanced, 6183385 +silesia, level 0, old streaming advanced, 4849491 +silesia, level 1, old streaming advanced, 5314109 +silesia, level 3, old streaming advanced, 4849491 +silesia, level 4, old streaming advanced, 4786913 +silesia, level 5, old streaming advanced, 4710178 +silesia, level 6, old streaming advanced, 4659996 +silesia, level 7, old streaming advanced, 4596234 +silesia, level 9, old streaming advanced, 4543862 +silesia, level 13, old streaming advanced, 4482073 +silesia, level 16, old streaming advanced, 4377389 +silesia, level 19, old streaming advanced, 4293262 +silesia, no source size, old streaming advanced, 4849455 +silesia, long distance mode, old streaming advanced, 4849491 +silesia, multithreaded, old streaming advanced, 4849491 +silesia, multithreaded long distance mode, old streaming advanced, 4849491 +silesia, small window log, old streaming advanced, 7105714 +silesia, small hash log, old streaming advanced, 6554898 +silesia, small chain log, old streaming advanced, 4931093 +silesia, explicit params, old streaming advanced, 4797048 +silesia, uncompressed literals, old streaming advanced, 4849491 +silesia, uncompressed literals optimal, old streaming advanced, 4293262 +silesia, huffman literals, old streaming advanced, 6183385 +silesia, multithreaded with advanced params, old streaming advanced, 4849491 +silesia.tar, level -5, old streaming advanced, 6982738 +silesia.tar, level -3, old streaming advanced, 6641264 +silesia.tar, level -1, old streaming advanced, 6190789 +silesia.tar, level 0, old streaming advanced, 4861376 +silesia.tar, level 1, old streaming advanced, 5336879 +silesia.tar, level 3, old streaming advanced, 4861376 +silesia.tar, level 4, old streaming advanced, 4799583 +silesia.tar, level 5, old streaming advanced, 4722276 +silesia.tar, level 6, old streaming advanced, 4672240 +silesia.tar, level 7, old streaming advanced, 4606657 +silesia.tar, level 9, old streaming advanced, 4554106 +silesia.tar, level 13, old streaming advanced, 4491707 +silesia.tar, level 16, old streaming advanced, 4381284 +silesia.tar, level 19, old streaming advanced, 4281511 +silesia.tar, no source size, old streaming advanced, 4861372 +silesia.tar, long distance mode, old streaming advanced, 4861376 +silesia.tar, multithreaded, old streaming advanced, 4861376 +silesia.tar, multithreaded long distance mode, old streaming advanced, 4861376 +silesia.tar, small window log, old streaming advanced, 7112151 +silesia.tar, small hash log, old streaming advanced, 6587834 +silesia.tar, small chain log, old streaming advanced, 4943271 +silesia.tar, explicit params, old streaming advanced, 4808570 +silesia.tar, uncompressed literals, old streaming advanced, 4861376 +silesia.tar, uncompressed literals optimal, old streaming advanced, 4281511 +silesia.tar, huffman literals, old streaming advanced, 6190789 +silesia.tar, multithreaded with advanced params, old streaming advanced, 4861376 +github, level -5, old streaming advanced, 216734 +github, level -5 with dict, old streaming advanced, 49562 +github, level -3, old streaming advanced, 192160 +github, level -3 with dict, old streaming advanced, 44956 +github, level -1, old streaming advanced, 181108 +github, level -1 with dict, old streaming advanced, 42383 +github, level 0, old streaming advanced, 141090 +github, level 0 with dict, old streaming advanced, 41113 +github, level 1, old streaming advanced, 143682 +github, level 1 with dict, old streaming advanced, 42430 +github, level 3, old streaming advanced, 141090 +github, level 3 with dict, old streaming advanced, 41113 +github, level 4, old streaming advanced, 141090 +github, level 4 with dict, old streaming advanced, 41084 +github, level 5, old streaming advanced, 139391 +github, level 5 with dict, old streaming advanced, 39159 +github, level 6, old streaming advanced, 139394 +github, level 6 with dict, old streaming advanced, 38749 +github, level 7, old streaming advanced, 138675 +github, level 7 with dict, old streaming advanced, 38746 +github, level 9, old streaming advanced, 138675 +github, level 9 with dict, old streaming advanced, 38987 +github, level 13, old streaming advanced, 138675 +github, level 13 with dict, old streaming advanced, 39724 +github, level 16, old streaming advanced, 138675 +github, level 16 with dict, old streaming advanced, 40771 +github, level 19, old streaming advanced, 133717 +github, level 19 with dict, old streaming advanced, 37576 +github, no source size, old streaming advanced, 140631 +github, long distance mode, old streaming advanced, 141090 +github, multithreaded, old streaming advanced, 141090 +github, multithreaded long distance mode, old streaming advanced, 141090 +github, small window log, old streaming advanced, 141090 +github, small hash log, old streaming advanced, 141578 +github, small chain log, old streaming advanced, 139258 +github, explicit params, old streaming advanced, 140930 +github, uncompressed literals, old streaming advanced, 141090 +github, uncompressed literals optimal, old streaming advanced, 133717 +github, huffman literals, old streaming advanced, 181108 +github, multithreaded with advanced params, old streaming advanced, 141090 +github, level -5 with dict, old streaming cdcit, 46718 +github, level -3 with dict, old streaming cdcit, 45395 +github, level -1 with dict, old streaming cdcit, 43170 +github, level 0 with dict, old streaming cdcit, 41148 +github, level 1 with dict, old streaming cdcit, 41682 +github, level 3 with dict, old streaming cdcit, 41148 +github, level 4 with dict, old streaming cdcit, 41251 +github, level 5 with dict, old streaming cdcit, 38938 +github, level 6 with dict, old streaming cdcit, 38632 +github, level 7 with dict, old streaming cdcit, 38766 +github, level 9 with dict, old streaming cdcit, 39326 +github, level 13 with dict, old streaming cdcit, 39716 +github, level 16 with dict, old streaming cdcit, 37577 +github, level 19 with dict, old streaming cdcit, 37576 +github, level -5 with dict, old streaming advanced cdict, 49562 +github, level -3 with dict, old streaming advanced cdict, 44956 +github, level -1 with dict, old streaming advanced cdict, 42383 +github, level 0 with dict, old streaming advanced cdict, 41113 +github, level 1 with dict, old streaming advanced cdict, 42430 +github, level 3 with dict, old streaming advanced cdict, 41113 +github, level 4 with dict, old streaming advanced cdict, 41084 +github, level 5 with dict, old streaming advanced cdict, 39158 +github, level 6 with dict, old streaming advanced cdict, 38748 +github, level 7 with dict, old streaming advanced cdict, 38744 +github, level 9 with dict, old streaming advanced cdict, 38986 +github, level 13 with dict, old streaming advanced cdict, 39724 +github, level 16 with dict, old streaming advanced cdict, 40771 +github, level 19 with dict, old streaming advanced cdict, 37576 diff --git a/src/zstd/tests/regression/test.c b/src/zstd/tests/regression/test.c new file mode 100644 index 000000000..ff2cdba30 --- /dev/null +++ b/src/zstd/tests/regression/test.c @@ -0,0 +1,362 @@ +/* + * Copyright (c) 2016-2020, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#include <assert.h> +#include <getopt.h> +#include <stdio.h> +#include <string.h> + +#include "config.h" +#include "data.h" +#include "method.h" + +static int g_max_name_len = 0; + +/** Check if a name contains a comma or is too long. */ +static int is_name_bad(char const* name) { + if (name == NULL) + return 1; + int const len = strlen(name); + if (len > g_max_name_len) + g_max_name_len = len; + for (; *name != '\0'; ++name) + if (*name == ',') + return 1; + return 0; +} + +/** Check if any of the names contain a comma. */ +static int are_names_bad() { + for (size_t method = 0; methods[method] != NULL; ++method) + if (is_name_bad(methods[method]->name)) { + fprintf(stderr, "method name %s is bad\n", methods[method]->name); + return 1; + } + for (size_t datum = 0; data[datum] != NULL; ++datum) + if (is_name_bad(data[datum]->name)) { + fprintf(stderr, "data name %s is bad\n", data[datum]->name); + return 1; + } + for (size_t config = 0; configs[config] != NULL; ++config) + if (is_name_bad(configs[config]->name)) { + fprintf(stderr, "config name %s is bad\n", configs[config]->name); + return 1; + } + return 0; +} + +/** + * Option parsing using getopt. + * When you add a new option update: long_options, long_extras, and + * short_options. + */ + +/** Option variables filled by parse_args. */ +static char const* g_output = NULL; +static char const* g_diff = NULL; +static char const* g_cache = NULL; +static char const* g_zstdcli = NULL; +static char const* g_config = NULL; +static char const* g_data = NULL; +static char const* g_method = NULL; + +typedef enum { + required_option, + optional_option, + help_option, +} option_type; + +/** + * Extra state that we need to keep per-option that we can't store in getopt. + */ +struct option_extra { + int id; /**< The short option name, used as an id. */ + char const* help; /**< The help message. */ + option_type opt_type; /**< The option type: required, optional, or help. */ + char const** value; /**< The value to set or NULL if no_argument. */ +}; + +/** The options. */ +static struct option long_options[] = { + {"cache", required_argument, NULL, 'c'}, + {"output", required_argument, NULL, 'o'}, + {"zstd", required_argument, NULL, 'z'}, + {"config", required_argument, NULL, 128}, + {"data", required_argument, NULL, 129}, + {"method", required_argument, NULL, 130}, + {"diff", required_argument, NULL, 'd'}, + {"help", no_argument, NULL, 'h'}, +}; + +static size_t const nargs = sizeof(long_options) / sizeof(long_options[0]); + +/** The extra info for the options. Must be in the same order as the options. */ +static struct option_extra long_extras[] = { + {'c', "the cache directory", required_option, &g_cache}, + {'o', "write the results here", required_option, &g_output}, + {'z', "zstd cli tool", required_option, &g_zstdcli}, + {128, "use this config", optional_option, &g_config}, + {129, "use this data", optional_option, &g_data}, + {130, "use this method", optional_option, &g_method}, + {'d', "compare the results to this file", optional_option, &g_diff}, + {'h', "display this message", help_option, NULL}, +}; + +/** The short options. Must correspond to the options. */ +static char const short_options[] = "c:d:ho:z:"; + +/** Return the help string for the option type. */ +static char const* required_message(option_type opt_type) { + switch (opt_type) { + case required_option: + return "[required]"; + case optional_option: + return "[optional]"; + case help_option: + return ""; + default: + assert(0); + return NULL; + } +} + +/** Print the help for the program. */ +static void print_help(void) { + fprintf(stderr, "regression test runner\n"); + size_t const nargs = sizeof(long_options) / sizeof(long_options[0]); + for (size_t i = 0; i < nargs; ++i) { + if (long_options[i].val < 128) { + /* Long / short - help [option type] */ + fprintf( + stderr, + "--%s / -%c \t- %s %s\n", + long_options[i].name, + long_options[i].val, + long_extras[i].help, + required_message(long_extras[i].opt_type)); + } else { + /* Short / long - help [option type] */ + fprintf( + stderr, + "--%s \t- %s %s\n", + long_options[i].name, + long_extras[i].help, + required_message(long_extras[i].opt_type)); + } + } +} + +/** Parse the arguments. Return 0 on success. Print help on failure. */ +static int parse_args(int argc, char** argv) { + int option_index = 0; + int c; + + while (1) { + c = getopt_long(argc, argv, short_options, long_options, &option_index); + if (c == -1) + break; + + int found = 0; + for (size_t i = 0; i < nargs; ++i) { + if (c == long_extras[i].id && long_extras[i].value != NULL) { + *long_extras[i].value = optarg; + found = 1; + break; + } + } + if (found) + continue; + + switch (c) { + case 'h': + case '?': + default: + print_help(); + return 1; + } + } + + int bad = 0; + for (size_t i = 0; i < nargs; ++i) { + if (long_extras[i].opt_type != required_option) + continue; + if (long_extras[i].value == NULL) + continue; + if (*long_extras[i].value != NULL) + continue; + fprintf( + stderr, + "--%s is a required argument but is not set\n", + long_options[i].name); + bad = 1; + } + if (bad) { + fprintf(stderr, "\n"); + print_help(); + return 1; + } + + return 0; +} + +/** Helper macro to print to stderr and a file. */ +#define tprintf(file, ...) \ + do { \ + fprintf(file, __VA_ARGS__); \ + fprintf(stderr, __VA_ARGS__); \ + } while (0) +/** Helper macro to flush stderr and a file. */ +#define tflush(file) \ + do { \ + fflush(file); \ + fflush(stderr); \ + } while (0) + +void tprint_names( + FILE* results, + char const* data_name, + char const* config_name, + char const* method_name) { + int const data_padding = g_max_name_len - strlen(data_name); + int const config_padding = g_max_name_len - strlen(config_name); + int const method_padding = g_max_name_len - strlen(method_name); + + tprintf( + results, + "%s, %*s%s, %*s%s, %*s", + data_name, + data_padding, + "", + config_name, + config_padding, + "", + method_name, + method_padding, + ""); +} + +/** + * Run all the regression tests and record the results table to results and + * stderr progressively. + */ +static int run_all(FILE* results) { + tprint_names(results, "Data", "Config", "Method"); + tprintf(results, "Total compressed size\n"); + for (size_t method = 0; methods[method] != NULL; ++method) { + if (g_method != NULL && strcmp(methods[method]->name, g_method)) + continue; + for (size_t datum = 0; data[datum] != NULL; ++datum) { + if (g_data != NULL && strcmp(data[datum]->name, g_data)) + continue; + /* Create the state common to all configs */ + method_state_t* state = methods[method]->create(data[datum]); + for (size_t config = 0; configs[config] != NULL; ++config) { + if (g_config != NULL && strcmp(configs[config]->name, g_config)) + continue; + if (config_skip_data(configs[config], data[datum])) + continue; + /* Print the result for the (method, data, config) tuple. */ + result_t const result = + methods[method]->compress(state, configs[config]); + if (result_is_skip(result)) + continue; + tprint_names( + results, + data[datum]->name, + configs[config]->name, + methods[method]->name); + if (result_is_error(result)) { + tprintf(results, "%s\n", result_get_error_string(result)); + } else { + tprintf( + results, + "%llu\n", + (unsigned long long)result_get_data(result).total_size); + } + tflush(results); + } + methods[method]->destroy(state); + } + } + return 0; +} + +/** memcmp() the old results file and the new results file. */ +static int diff_results(char const* actual_file, char const* expected_file) { + data_buffer_t const actual = data_buffer_read(actual_file); + data_buffer_t const expected = data_buffer_read(expected_file); + int ret = 1; + + if (actual.data == NULL) { + fprintf(stderr, "failed to open results '%s' for diff\n", actual_file); + goto out; + } + if (expected.data == NULL) { + fprintf( + stderr, + "failed to open previous results '%s' for diff\n", + expected_file); + goto out; + } + + ret = data_buffer_compare(actual, expected); + if (ret != 0) { + fprintf( + stderr, + "actual results '%s' does not match expected results '%s'\n", + actual_file, + expected_file); + } else { + fprintf(stderr, "actual results match expected results\n"); + } +out: + data_buffer_free(actual); + data_buffer_free(expected); + return ret; +} + +int main(int argc, char** argv) { + /* Parse args and validate modules. */ + int ret = parse_args(argc, argv); + if (ret != 0) + return ret; + + if (are_names_bad()) + return 1; + + /* Initialize modules. */ + method_set_zstdcli(g_zstdcli); + ret = data_init(g_cache); + if (ret != 0) { + fprintf(stderr, "data_init() failed with error=%s\n", strerror(ret)); + return 1; + } + + /* Run the regression tests. */ + ret = 1; + FILE* results = fopen(g_output, "w"); + if (results == NULL) { + fprintf(stderr, "Failed to open the output file\n"); + goto out; + } + ret = run_all(results); + fclose(results); + + if (ret != 0) + goto out; + + if (g_diff) + /* Diff the new results with the previous results. */ + ret = diff_results(g_output, g_diff); + +out: + data_finish(); + return ret; +} diff --git a/src/zstd/tests/roundTripCrash.c b/src/zstd/tests/roundTripCrash.c new file mode 100644 index 000000000..c117d2c26 --- /dev/null +++ b/src/zstd/tests/roundTripCrash.c @@ -0,0 +1,241 @@ +/* + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/* + This program takes a file in input, + performs a zstd round-trip test (compression - decompress) + compares the result with original + and generates a crash (double free) on corruption detection. +*/ + +/*=========================================== +* Dependencies +*==========================================*/ +#include <stddef.h> /* size_t */ +#include <stdlib.h> /* malloc, free, exit */ +#include <stdio.h> /* fprintf */ +#include <string.h> /* strcmp */ +#include <sys/types.h> /* stat */ +#include <sys/stat.h> /* stat */ +#include "xxhash.h" + +#define ZSTD_STATIC_LINKING_ONLY +#include "zstd.h" + +/*=========================================== +* Macros +*==========================================*/ +#define MIN(a,b) ( (a) < (b) ? (a) : (b) ) + +static void crash(int errorCode){ + /* abort if AFL/libfuzzer, exit otherwise */ + #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION /* could also use __AFL_COMPILER */ + abort(); + #else + exit(errorCode); + #endif +} + +#define CHECK_Z(f) { \ + size_t const err = f; \ + if (ZSTD_isError(err)) { \ + fprintf(stderr, \ + "Error=> %s: %s", \ + #f, ZSTD_getErrorName(err)); \ + crash(1); \ +} } + +/** roundTripTest() : +* Compresses `srcBuff` into `compressedBuff`, +* then decompresses `compressedBuff` into `resultBuff`. +* Compression level used is derived from first content byte. +* @return : result of decompression, which should be == `srcSize` +* or an error code if either compression or decompression fails. +* Note : `compressedBuffCapacity` should be `>= ZSTD_compressBound(srcSize)` +* for compression to be guaranteed to work */ +static size_t roundTripTest(void* resultBuff, size_t resultBuffCapacity, + void* compressedBuff, size_t compressedBuffCapacity, + const void* srcBuff, size_t srcBuffSize) +{ + static const int maxClevel = 19; + size_t const hashLength = MIN(128, srcBuffSize); + unsigned const h32 = XXH32(srcBuff, hashLength, 0); + int const cLevel = h32 % maxClevel; + size_t const cSize = ZSTD_compress(compressedBuff, compressedBuffCapacity, srcBuff, srcBuffSize, cLevel); + if (ZSTD_isError(cSize)) { + fprintf(stderr, "Compression error : %s \n", ZSTD_getErrorName(cSize)); + return cSize; + } + return ZSTD_decompress(resultBuff, resultBuffCapacity, compressedBuff, cSize); +} + +/** cctxParamRoundTripTest() : + * Same as roundTripTest() except allows experimenting with ZSTD_CCtx_params. */ +static size_t cctxParamRoundTripTest(void* resultBuff, size_t resultBuffCapacity, + void* compressedBuff, size_t compressedBuffCapacity, + const void* srcBuff, size_t srcBuffSize) +{ + ZSTD_CCtx* const cctx = ZSTD_createCCtx(); + ZSTD_CCtx_params* const cctxParams = ZSTD_createCCtxParams(); + ZSTD_inBuffer inBuffer = { srcBuff, srcBuffSize, 0 }; + ZSTD_outBuffer outBuffer = { compressedBuff, compressedBuffCapacity, 0 }; + + static const int maxClevel = 19; + size_t const hashLength = MIN(128, srcBuffSize); + unsigned const h32 = XXH32(srcBuff, hashLength, 0); + int const cLevel = h32 % maxClevel; + + /* Set parameters */ + CHECK_Z( ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_compressionLevel, cLevel) ); + CHECK_Z( ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_nbWorkers, 2) ); + CHECK_Z( ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_overlapLog, 5) ); + + + /* Apply parameters */ + CHECK_Z( ZSTD_CCtx_setParametersUsingCCtxParams(cctx, cctxParams) ); + + CHECK_Z (ZSTD_compressStream2(cctx, &outBuffer, &inBuffer, ZSTD_e_end) ); + + ZSTD_freeCCtxParams(cctxParams); + ZSTD_freeCCtx(cctx); + + return ZSTD_decompress(resultBuff, resultBuffCapacity, compressedBuff, outBuffer.pos); +} + +static size_t checkBuffers(const void* buff1, const void* buff2, size_t buffSize) +{ + const char* ip1 = (const char*)buff1; + const char* ip2 = (const char*)buff2; + size_t pos; + + for (pos=0; pos<buffSize; pos++) + if (ip1[pos]!=ip2[pos]) + break; + + return pos; +} + +static void roundTripCheck(const void* srcBuff, size_t srcBuffSize, int testCCtxParams) +{ + size_t const cBuffSize = ZSTD_compressBound(srcBuffSize); + void* cBuff = malloc(cBuffSize); + void* rBuff = malloc(cBuffSize); + + if (!cBuff || !rBuff) { + fprintf(stderr, "not enough memory ! \n"); + exit (1); + } + + { size_t const result = testCCtxParams ? + cctxParamRoundTripTest(rBuff, cBuffSize, cBuff, cBuffSize, srcBuff, srcBuffSize) + : roundTripTest(rBuff, cBuffSize, cBuff, cBuffSize, srcBuff, srcBuffSize); + if (ZSTD_isError(result)) { + fprintf(stderr, "roundTripTest error : %s \n", ZSTD_getErrorName(result)); + crash(1); + } + if (result != srcBuffSize) { + fprintf(stderr, "Incorrect regenerated size : %u != %u\n", (unsigned)result, (unsigned)srcBuffSize); + crash(1); + } + if (checkBuffers(srcBuff, rBuff, srcBuffSize) != srcBuffSize) { + fprintf(stderr, "Silent decoding corruption !!!"); + crash(1); + } + } + + free(cBuff); + free(rBuff); +} + + +static size_t getFileSize(const char* infilename) +{ + int r; +#if defined(_MSC_VER) + struct _stat64 statbuf; + r = _stat64(infilename, &statbuf); + if (r || !(statbuf.st_mode & S_IFREG)) return 0; /* No good... */ +#else + struct stat statbuf; + r = stat(infilename, &statbuf); + if (r || !S_ISREG(statbuf.st_mode)) return 0; /* No good... */ +#endif + return (size_t)statbuf.st_size; +} + + +static int isDirectory(const char* infilename) +{ + int r; +#if defined(_MSC_VER) + struct _stat64 statbuf; + r = _stat64(infilename, &statbuf); + if (!r && (statbuf.st_mode & _S_IFDIR)) return 1; +#else + struct stat statbuf; + r = stat(infilename, &statbuf); + if (!r && S_ISDIR(statbuf.st_mode)) return 1; +#endif + return 0; +} + + +/** loadFile() : +* requirement : `buffer` size >= `fileSize` */ +static void loadFile(void* buffer, const char* fileName, size_t fileSize) +{ + FILE* const f = fopen(fileName, "rb"); + if (isDirectory(fileName)) { + fprintf(stderr, "Ignoring %s directory \n", fileName); + exit(2); + } + if (f==NULL) { + fprintf(stderr, "Impossible to open %s \n", fileName); + exit(3); + } + { size_t const readSize = fread(buffer, 1, fileSize, f); + if (readSize != fileSize) { + fprintf(stderr, "Error reading %s \n", fileName); + exit(5); + } } + fclose(f); +} + + +static void fileCheck(const char* fileName, int testCCtxParams) +{ + size_t const fileSize = getFileSize(fileName); + void* const buffer = malloc(fileSize + !fileSize /* avoid 0 */); + if (!buffer) { + fprintf(stderr, "not enough memory \n"); + exit(4); + } + loadFile(buffer, fileName, fileSize); + roundTripCheck(buffer, fileSize, testCCtxParams); + free (buffer); +} + +int main(int argCount, const char** argv) { + int argNb = 1; + int testCCtxParams = 0; + if (argCount < 2) { + fprintf(stderr, "Error : no argument : need input file \n"); + exit(9); + } + + if (!strcmp(argv[argNb], "--cctxParams")) { + testCCtxParams = 1; + argNb++; + } + + fileCheck(argv[argNb], testCCtxParams); + fprintf(stderr, "no pb detected\n"); + return 0; +} diff --git a/src/zstd/tests/seqgen.c b/src/zstd/tests/seqgen.c new file mode 100644 index 000000000..29c0c4054 --- /dev/null +++ b/src/zstd/tests/seqgen.c @@ -0,0 +1,260 @@ +/* + * Copyright (c) 2017-2020, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#include "seqgen.h" +#include "mem.h" +#include <string.h> + +#define MIN(a, b) ((a) < (b) ? (a) : (b)) + +static const size_t kMatchBytes = 128; + +#define SEQ_rotl32(x,r) ((x << r) | (x >> (32 - r))) +static BYTE SEQ_randByte(unsigned* src) +{ + static const U32 prime1 = 2654435761U; + static const U32 prime2 = 2246822519U; + U32 rand32 = *src; + rand32 *= prime1; + rand32 ^= prime2; + rand32 = SEQ_rotl32(rand32, 13); + *src = rand32; + return (BYTE)(rand32 >> 5); +} + +SEQ_stream SEQ_initStream(unsigned seed) +{ + SEQ_stream stream; + stream.state = 0; + XXH64_reset(&stream.xxh, 0); + stream.seed = seed; + return stream; +} + +/* Generates a single guard byte, then match length + 1 of a different byte, + * then another guard byte. + */ +static size_t SEQ_gen_matchLength(SEQ_stream* stream, unsigned value, + SEQ_outBuffer* out) +{ + typedef enum { + ml_first_byte = 0, + ml_match_bytes, + ml_last_byte, + } ml_state; + BYTE* const ostart = (BYTE*)out->dst; + BYTE* const oend = ostart + out->size; + BYTE* op = ostart + out->pos; + + switch ((ml_state)stream->state) { + case ml_first_byte: + /* Generate a single byte and pick a different byte for the match */ + if (op >= oend) { + stream->bytesLeft = 1; + break; + } + *op = SEQ_randByte(&stream->seed) & 0xFF; + do { + stream->saved = SEQ_randByte(&stream->seed) & 0xFF; + } while (*op == stream->saved); + ++op; + /* State transition */ + stream->state = ml_match_bytes; + stream->bytesLeft = value + 1; + /* fall-through */ + case ml_match_bytes: { + /* Copy matchLength + 1 bytes to the output buffer */ + size_t const setLength = MIN(stream->bytesLeft, (size_t)(oend - op)); + if (setLength > 0) { + memset(op, stream->saved, setLength); + op += setLength; + stream->bytesLeft -= setLength; + } + if (stream->bytesLeft > 0) + break; + /* State transition */ + stream->state = ml_last_byte; + } + /* fall-through */ + case ml_last_byte: + /* Generate a single byte and pick a different byte for the match */ + if (op >= oend) { + stream->bytesLeft = 1; + break; + } + do { + *op = SEQ_randByte(&stream->seed) & 0xFF; + } while (*op == stream->saved); + ++op; + /* State transition */ + /* fall-through */ + default: + stream->state = 0; + stream->bytesLeft = 0; + break; + } + XXH64_update(&stream->xxh, ostart + out->pos, (op - ostart) - out->pos); + out->pos = op - ostart; + return stream->bytesLeft; +} + +/* Saves the current seed then generates kMatchBytes random bytes >= 128. + * Generates literal length - kMatchBytes random bytes < 128. + * Generates another kMatchBytes using the saved seed to generate a match. + * This way the match is easy to find for the compressors. + */ +static size_t SEQ_gen_litLength(SEQ_stream* stream, unsigned value, SEQ_outBuffer* out) +{ + typedef enum { + ll_start = 0, + ll_run_bytes, + ll_literals, + ll_run_match, + } ll_state; + BYTE* const ostart = (BYTE*)out->dst; + BYTE* const oend = ostart + out->size; + BYTE* op = ostart + out->pos; + + switch ((ll_state)stream->state) { + case ll_start: + stream->state = ll_run_bytes; + stream->saved = stream->seed; + stream->bytesLeft = MIN(kMatchBytes, value); + /* fall-through */ + case ll_run_bytes: + while (stream->bytesLeft > 0 && op < oend) { + *op++ = SEQ_randByte(&stream->seed) | 0x80; + --stream->bytesLeft; + } + if (stream->bytesLeft > 0) + break; + /* State transition */ + stream->state = ll_literals; + stream->bytesLeft = value - MIN(kMatchBytes, value); + /* fall-through */ + case ll_literals: + while (stream->bytesLeft > 0 && op < oend) { + *op++ = SEQ_randByte(&stream->seed) & 0x7F; + --stream->bytesLeft; + } + if (stream->bytesLeft > 0) + break; + /* State transition */ + stream->state = ll_run_match; + stream->bytesLeft = MIN(kMatchBytes, value); + /* fall-through */ + case ll_run_match: { + while (stream->bytesLeft > 0 && op < oend) { + *op++ = SEQ_randByte(&stream->saved) | 0x80; + --stream->bytesLeft; + } + if (stream->bytesLeft > 0) + break; + } + /* fall-through */ + default: + stream->state = 0; + stream->bytesLeft = 0; + break; + } + XXH64_update(&stream->xxh, ostart + out->pos, (op - ostart) - out->pos); + out->pos = op - ostart; + return stream->bytesLeft; +} + +/* Saves the current seed then generates kMatchBytes random bytes >= 128. + * Generates offset - kMatchBytes of zeros to get a large offset without + * polluting the hash tables. + * Generates another kMatchBytes using the saved seed to generate a with the + * required offset. + */ +static size_t SEQ_gen_offset(SEQ_stream* stream, unsigned value, SEQ_outBuffer* out) +{ + typedef enum { + of_start = 0, + of_run_bytes, + of_offset, + of_run_match, + } of_state; + BYTE* const ostart = (BYTE*)out->dst; + BYTE* const oend = ostart + out->size; + BYTE* op = ostart + out->pos; + + switch ((of_state)stream->state) { + case of_start: + stream->state = of_run_bytes; + stream->saved = stream->seed; + stream->bytesLeft = MIN(value, kMatchBytes); + /* fall-through */ + case of_run_bytes: { + while (stream->bytesLeft > 0 && op < oend) { + *op++ = SEQ_randByte(&stream->seed) | 0x80; + --stream->bytesLeft; + } + if (stream->bytesLeft > 0) + break; + /* State transition */ + stream->state = of_offset; + stream->bytesLeft = value - MIN(value, kMatchBytes); + } + /* fall-through */ + case of_offset: { + /* Copy matchLength + 1 bytes to the output buffer */ + size_t const setLength = MIN(stream->bytesLeft, (size_t)(oend - op)); + if (setLength > 0) { + memset(op, 0, setLength); + op += setLength; + stream->bytesLeft -= setLength; + } + if (stream->bytesLeft > 0) + break; + /* State transition */ + stream->state = of_run_match; + stream->bytesLeft = MIN(value, kMatchBytes); + } + /* fall-through */ + case of_run_match: { + while (stream->bytesLeft > 0 && op < oend) { + *op++ = SEQ_randByte(&stream->saved) | 0x80; + --stream->bytesLeft; + } + if (stream->bytesLeft > 0) + break; + } + /* fall-through */ + default: + stream->state = 0; + stream->bytesLeft = 0; + break; + } + XXH64_update(&stream->xxh, ostart + out->pos, (op - ostart) - out->pos); + out->pos = op - ostart; + return stream->bytesLeft; +} + +/* Returns the number of bytes left to generate. + * Must pass the same type/value until it returns 0. + */ +size_t SEQ_gen(SEQ_stream* stream, SEQ_gen_type type, unsigned value, SEQ_outBuffer* out) +{ + switch (type) { + case SEQ_gen_ml: return SEQ_gen_matchLength(stream, value, out); + case SEQ_gen_ll: return SEQ_gen_litLength(stream, value, out); + case SEQ_gen_of: return SEQ_gen_offset(stream, value, out); + case SEQ_gen_max: /* fall-through */ + default: return 0; + } +} + +/* Returns the xxhash of the data produced so far */ +XXH64_hash_t SEQ_digest(SEQ_stream const* stream) +{ + return XXH64_digest(&stream->xxh); +} diff --git a/src/zstd/tests/seqgen.h b/src/zstd/tests/seqgen.h new file mode 100644 index 000000000..808099ba9 --- /dev/null +++ b/src/zstd/tests/seqgen.h @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2017-2020, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef SEQGEN_H +#define SEQGEN_H + +#define XXH_STATIC_LINKING_ONLY + +#include "xxhash.h" +#include <stddef.h> /* size_t */ + +typedef enum { + SEQ_gen_ml = 0, + SEQ_gen_ll, + SEQ_gen_of, + SEQ_gen_max /* Must be the last value */ +} SEQ_gen_type; + +/* Internal state, do not use */ +typedef struct { + XXH64_state_t xxh; /* xxh state for all the data produced so far (seed=0) */ + unsigned seed; + int state; /* enum to control state machine (clean=0) */ + unsigned saved; + size_t bytesLeft; +} SEQ_stream; + +SEQ_stream SEQ_initStream(unsigned seed); + +typedef struct { + void* dst; + size_t size; + size_t pos; +} SEQ_outBuffer; + +/* Returns non-zero until the current type/value has been generated. + * Must pass the same type/value until it returns 0. + * + * Recommended to pick a value in the middle of the range you want, since there + * may be some noise that causes actual results to be slightly different. + * We try to be more accurate for smaller values. + * + * NOTE: Very small values don't work well (< 6). + */ +size_t SEQ_gen(SEQ_stream* stream, SEQ_gen_type type, unsigned value, + SEQ_outBuffer* out); + +/* Returns the xxhash of the data produced so far */ +XXH64_hash_t SEQ_digest(SEQ_stream const* stream); + +#endif /* SEQGEN_H */ diff --git a/src/zstd/tests/test-license.py b/src/zstd/tests/test-license.py new file mode 100755 index 000000000..522884dba --- /dev/null +++ b/src/zstd/tests/test-license.py @@ -0,0 +1,141 @@ +#!/usr/bin/env python3 + +# ################################################################ +# Copyright (c) 2016-2020, Facebook, Inc. +# All rights reserved. +# +# This source code is licensed under both the BSD-style license (found in the +# LICENSE file in the root directory of this source tree) and the GPLv2 (found +# in the COPYING file in the root directory of this source tree). +# You may select, at your option, one of the above-listed licenses. +# ################################################################ + +import datetime +import enum +import glob +import os +import sys + +YEAR = datetime.datetime.now().year + +YEAR_STR = str(YEAR) + +ROOT = os.path.join(os.path.dirname(__file__), "..") + +RELDIRS = [ + "doc", + "examples", + "lib", + "programs", + "tests", +] + +DIRS = [os.path.join(ROOT, d) for d in RELDIRS] + +class File(enum.Enum): + C = 1 + H = 2 + MAKE = 3 + PY = 4 + +SUFFIX = { + File.C: ".c", + File.H: ".h", + File.MAKE: "Makefile", + File.PY: ".py", +} + +# License should certainly be in the first 10 KB. +MAX_BYTES = 10000 +MAX_LINES = 50 + +LICENSE_LINES = [ + "This source code is licensed under both the BSD-style license (found in the", + "LICENSE file in the root directory of this source tree) and the GPLv2 (found", + "in the COPYING file in the root directory of this source tree).", + "You may select, at your option, one of the above-listed licenses.", +] + +COPYRIGHT_EXCEPTIONS = { + # From zstdmt + "threading.c", + "threading.h", + # From divsufsort + "divsufsort.c", + "divsufsort.h", +} + +LICENSE_EXCEPTIONS = { + # From divsufsort + "divsufsort.c", + "divsufsort.h", +} + + +def valid_copyright(lines): + for line in lines: + line = line.strip() + if "Copyright" not in line: + continue + if "present" in line: + return (False, f"Copyright line '{line}' contains 'present'!") + if "Facebook, Inc" not in line: + return (False, f"Copyright line '{line}' does not contain 'Facebook, Inc'") + if YEAR_STR not in line: + return (False, f"Copyright line '{line}' does not contain {YEAR}") + if " (c) " not in line: + return (False, f"Copyright line '{line}' does not contain ' (c) '!") + return (True, "") + return (False, "Copyright not found!") + + +def valid_license(lines): + for b in range(len(lines)): + if LICENSE_LINES[0] not in lines[b]: + continue + for l in range(len(LICENSE_LINES)): + if LICENSE_LINES[l] not in lines[b + l]: + message = f"""Invalid license line found starting on line {b + l}! +Expected: '{LICENSE_LINES[l]}' +Actual: '{lines[b + l]}'""" + return (False, message) + return (True, "") + return (False, "License not found!") + + +def valid_file(filename): + with open(filename, "r") as f: + lines = f.readlines(MAX_BYTES) + lines = lines[:min(len(lines), MAX_LINES)] + + ok = True + if os.path.basename(filename) not in COPYRIGHT_EXCEPTIONS: + c_ok, c_msg = valid_copyright(lines) + if not c_ok: + print(f"{filename}: {c_msg}") + ok = False + if os.path.basename(filename) not in LICENSE_EXCEPTIONS: + l_ok, l_msg = valid_license(lines) + if not l_ok: + print(f"{filename}: {l_msg}") + ok = False + return ok + + +def main(): + invalid_files = [] + for directory in DIRS: + for suffix in SUFFIX.values(): + files = set(glob.glob(f"{directory}/*{suffix}")) + files |= set(glob.glob(f"{directory}/**/*{suffix}")) + for filename in files: + if not valid_file(filename): + invalid_files.append(filename) + if len(invalid_files) > 0: + print(f"Invalid files: {invalid_files}") + else: + print("Pass!") + return len(invalid_files) + +if __name__ == "__main__": + sys.exit(main())
\ No newline at end of file diff --git a/src/zstd/tests/test-zstd-versions.py b/src/zstd/tests/test-zstd-versions.py new file mode 100755 index 000000000..fa2171752 --- /dev/null +++ b/src/zstd/tests/test-zstd-versions.py @@ -0,0 +1,277 @@ +#!/usr/bin/env python3 +"""Test zstd interoperability between versions""" + +# ################################################################ +# Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. +# All rights reserved. +# +# This source code is licensed under both the BSD-style license (found in the +# LICENSE file in the root directory of this source tree) and the GPLv2 (found +# in the COPYING file in the root directory of this source tree). +# You may select, at your option, one of the above-listed licenses. +# ################################################################ + +import filecmp +import glob +import hashlib +import os +import shutil +import sys +import subprocess +from subprocess import Popen, PIPE + +repo_url = 'https://github.com/facebook/zstd.git' +tmp_dir_name = 'tests/versionsTest' +make_cmd = 'make' +git_cmd = 'git' +test_dat_src = 'README.md' +test_dat = 'test_dat' +head = 'vdevel' +dict_source = 'dict_source' +dict_files = './zstd/programs/*.c ./zstd/lib/common/*.c ./zstd/lib/compress/*.c ./zstd/lib/decompress/*.c ./zstd/lib/dictBuilder/*.c ./zstd/lib/legacy/*.c ' +dict_files += './zstd/programs/*.h ./zstd/lib/common/*.h ./zstd/lib/compress/*.h ./zstd/lib/dictBuilder/*.h ./zstd/lib/legacy/*.h' + + +def execute(command, print_output=False, print_error=True, param_shell=False): + popen = Popen(command, stdout=PIPE, stderr=PIPE, shell=param_shell) + stdout_lines, stderr_lines = popen.communicate() + stderr_lines = stderr_lines.decode("utf-8") + stdout_lines = stdout_lines.decode("utf-8") + if print_output: + print(stdout_lines) + print(stderr_lines) + if popen.returncode is not None and popen.returncode != 0: + if not print_output and print_error: + print(stderr_lines) + return popen.returncode + + +def proc(cmd_args, pipe=True, dummy=False): + if dummy: + return + if pipe: + subproc = Popen(cmd_args, stdout=PIPE, stderr=PIPE) + else: + subproc = Popen(cmd_args) + return subproc.communicate() + + +def make(args, pipe=True): + return proc([make_cmd] + args, pipe) + + +def git(args, pipe=True): + return proc([git_cmd] + args, pipe) + + +def get_git_tags(): + stdout, stderr = git(['tag', '-l', 'v[0-9].[0-9].[0-9]']) + tags = stdout.decode('utf-8').split() + return tags + + +def create_dict(tag, dict_source_path): + dict_name = 'dict.' + tag + if not os.path.isfile(dict_name): + cFiles = glob.glob(dict_source_path + "/*.c") + hFiles = glob.glob(dict_source_path + "/*.h") + if tag == 'v0.5.0': + result = execute('./dictBuilder.' + tag + ' ' + ' '.join(cFiles) + ' ' + ' '.join(hFiles) + ' -o ' + dict_name, print_output=False, param_shell=True) + else: + result = execute('./zstd.' + tag + ' -f --train ' + ' '.join(cFiles) + ' ' + ' '.join(hFiles) + ' -o ' + dict_name, print_output=False, param_shell=True) + if result == 0: + print(dict_name + ' created') + else: + print('ERROR: creating of ' + dict_name + ' failed') + else: + print(dict_name + ' already exists') + + +def dict_compress_sample(tag, sample): + dict_name = 'dict.' + tag + DEVNULL = open(os.devnull, 'wb') + if subprocess.call(['./zstd.' + tag, '-D', dict_name, '-f', sample], stderr=DEVNULL) == 0: + os.rename(sample + '.zst', sample + '_01_64_' + tag + '_dictio.zst') + if subprocess.call(['./zstd.' + tag, '-D', dict_name, '-5f', sample], stderr=DEVNULL) == 0: + os.rename(sample + '.zst', sample + '_05_64_' + tag + '_dictio.zst') + if subprocess.call(['./zstd.' + tag, '-D', dict_name, '-9f', sample], stderr=DEVNULL) == 0: + os.rename(sample + '.zst', sample + '_09_64_' + tag + '_dictio.zst') + if subprocess.call(['./zstd.' + tag, '-D', dict_name, '-15f', sample], stderr=DEVNULL) == 0: + os.rename(sample + '.zst', sample + '_15_64_' + tag + '_dictio.zst') + if subprocess.call(['./zstd.' + tag, '-D', dict_name, '-18f', sample], stderr=DEVNULL) == 0: + os.rename(sample + '.zst', sample + '_18_64_' + tag + '_dictio.zst') + # zstdFiles = glob.glob("*.zst*") + # print(zstdFiles) + print(tag + " : dict compression completed") + + +def compress_sample(tag, sample): + DEVNULL = open(os.devnull, 'wb') + if subprocess.call(['./zstd.' + tag, '-f', sample], stderr=DEVNULL) == 0: + os.rename(sample + '.zst', sample + '_01_64_' + tag + '_nodict.zst') + if subprocess.call(['./zstd.' + tag, '-5f', sample], stderr=DEVNULL) == 0: + os.rename(sample + '.zst', sample + '_05_64_' + tag + '_nodict.zst') + if subprocess.call(['./zstd.' + tag, '-9f', sample], stderr=DEVNULL) == 0: + os.rename(sample + '.zst', sample + '_09_64_' + tag + '_nodict.zst') + if subprocess.call(['./zstd.' + tag, '-15f', sample], stderr=DEVNULL) == 0: + os.rename(sample + '.zst', sample + '_15_64_' + tag + '_nodict.zst') + if subprocess.call(['./zstd.' + tag, '-18f', sample], stderr=DEVNULL) == 0: + os.rename(sample + '.zst', sample + '_18_64_' + tag + '_nodict.zst') + # zstdFiles = glob.glob("*.zst*") + # print(zstdFiles) + print(tag + " : compression completed") + + +# http://stackoverflow.com/a/19711609/2132223 +def sha1_of_file(filepath): + with open(filepath, 'rb') as f: + return hashlib.sha1(f.read()).hexdigest() + + +def remove_duplicates(): + list_of_zst = sorted(glob.glob('*.zst')) + for i, ref_zst in enumerate(list_of_zst): + if not os.path.isfile(ref_zst): + continue + for j in range(i + 1, len(list_of_zst)): + compared_zst = list_of_zst[j] + if not os.path.isfile(compared_zst): + continue + if filecmp.cmp(ref_zst, compared_zst): + os.remove(compared_zst) + print('duplicated : {} == {}'.format(ref_zst, compared_zst)) + + +def decompress_zst(tag): + dec_error = 0 + list_zst = sorted(glob.glob('*_nodict.zst')) + for file_zst in list_zst: + print(file_zst, end=' ') + print(tag, end=' ') + file_dec = file_zst + '_d64_' + tag + '.dec' + if tag <= 'v0.5.0': + params = ['./zstd.' + tag, '-df', file_zst, file_dec] + else: + params = ['./zstd.' + tag, '-df', file_zst, '-o', file_dec] + if execute(params) == 0: + if not filecmp.cmp(file_dec, test_dat): + print('ERR !! ') + dec_error = 1 + else: + print('OK ') + else: + print('command does not work') + dec_error = 1 + return dec_error + + +def decompress_dict(tag): + dec_error = 0 + list_zst = sorted(glob.glob('*_dictio.zst')) + for file_zst in list_zst: + dict_tag = file_zst[0:len(file_zst)-11] # remove "_dictio.zst" + if head in dict_tag: # find vdevel + dict_tag = head + else: + dict_tag = dict_tag[dict_tag.rfind('v'):] + if tag == 'v0.6.0' and dict_tag < 'v0.6.0': + continue + dict_name = 'dict.' + dict_tag + print(file_zst + ' ' + tag + ' dict=' + dict_tag, end=' ') + file_dec = file_zst + '_d64_' + tag + '.dec' + if tag <= 'v0.5.0': + params = ['./zstd.' + tag, '-D', dict_name, '-df', file_zst, file_dec] + else: + params = ['./zstd.' + tag, '-D', dict_name, '-df', file_zst, '-o', file_dec] + if execute(params) == 0: + if not filecmp.cmp(file_dec, test_dat): + print('ERR !! ') + dec_error = 1 + else: + print('OK ') + else: + print('command does not work') + dec_error = 1 + return dec_error + + +if __name__ == '__main__': + error_code = 0 + base_dir = os.getcwd() + '/..' # /path/to/zstd + tmp_dir = base_dir + '/' + tmp_dir_name # /path/to/zstd/tests/versionsTest + clone_dir = tmp_dir + '/' + 'zstd' # /path/to/zstd/tests/versionsTest/zstd + dict_source_path = tmp_dir + '/' + dict_source # /path/to/zstd/tests/versionsTest/dict_source + programs_dir = base_dir + '/programs' # /path/to/zstd/programs + os.makedirs(tmp_dir, exist_ok=True) + + # since Travis clones limited depth, we should clone full repository + if not os.path.isdir(clone_dir): + git(['clone', repo_url, clone_dir]) + + shutil.copy2(base_dir + '/' + test_dat_src, tmp_dir + '/' + test_dat) + + # Retrieve all release tags + print('Retrieve all release tags :') + os.chdir(clone_dir) + alltags = get_git_tags() + [head] + tags = [t for t in alltags if t >= 'v0.5.0'] + print(tags) + + # Build all release zstd + for tag in tags: + os.chdir(base_dir) + dst_zstd = '{}/zstd.{}'.format(tmp_dir, tag) # /path/to/zstd/tests/versionsTest/zstd.<TAG> + if not os.path.isfile(dst_zstd) or tag == head: + if tag != head: + r_dir = '{}/{}'.format(tmp_dir, tag) # /path/to/zstd/tests/versionsTest/<TAG> + os.makedirs(r_dir, exist_ok=True) + os.chdir(clone_dir) + git(['--work-tree=' + r_dir, 'checkout', tag, '--', '.'], False) + if tag == 'v0.5.0': + os.chdir(r_dir + '/dictBuilder') # /path/to/zstd/tests/versionsTest/v0.5.0/dictBuilder + make(['clean', 'dictBuilder'], False) + shutil.copy2('dictBuilder', '{}/dictBuilder.{}'.format(tmp_dir, tag)) + os.chdir(r_dir + '/programs') # /path/to/zstd/tests/versionsTest/<TAG>/programs + make(['clean', 'zstd'], False) + else: + os.chdir(programs_dir) + make(['zstd'], False) + shutil.copy2('zstd', dst_zstd) + + # remove any remaining *.zst and *.dec from previous test + os.chdir(tmp_dir) + for compressed in glob.glob("*.zst"): + os.remove(compressed) + for dec in glob.glob("*.dec"): + os.remove(dec) + + # copy *.c and *.h to a temporary directory ("dict_source") + if not os.path.isdir(dict_source_path): + os.mkdir(dict_source_path) + print('cp ' + dict_files + ' ' + dict_source_path) + execute('cp ' + dict_files + ' ' + dict_source_path, param_shell=True) + + print('Compress test.dat by all released zstd') + + error_code = 0 + for tag in tags: + print(tag) + if tag >= 'v0.5.0': + create_dict(tag, dict_source_path) + dict_compress_sample(tag, test_dat) + remove_duplicates() + error_code += decompress_dict(tag) + compress_sample(tag, test_dat) + remove_duplicates() + error_code += decompress_zst(tag) + + print('') + print('Enumerate different compressed files') + zstds = sorted(glob.glob('*.zst')) + for zstd in zstds: + print(zstd + ' : ' + repr(os.path.getsize(zstd)) + ', ' + sha1_of_file(zstd)) + + if error_code != 0: + print('====== ERROR !!! =======') + + sys.exit(error_code) diff --git a/src/zstd/tests/zbufftest.c b/src/zstd/tests/zbufftest.c new file mode 100644 index 000000000..cd3706af4 --- /dev/null +++ b/src/zstd/tests/zbufftest.c @@ -0,0 +1,625 @@ +/* + * Copyright (c) 2015-2020, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + +/*-************************************ +* Compiler specific +**************************************/ +#ifdef _MSC_VER /* Visual Studio */ +# define _CRT_SECURE_NO_WARNINGS /* fgets */ +# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ +# pragma warning(disable : 4146) /* disable: C4146: minus unsigned expression */ +#endif + + +/*-************************************ +* Includes +**************************************/ +#include <stdlib.h> /* free */ +#include <stdio.h> /* fgets, sscanf */ +#include <string.h> /* strcmp */ +#include "timefn.h" /* UTIL_time_t */ +#include "mem.h" +#define ZSTD_STATIC_LINKING_ONLY /* ZSTD_maxCLevel */ +#include "zstd.h" /* ZSTD_compressBound */ +#define ZBUFF_STATIC_LINKING_ONLY /* ZBUFF_createCCtx_advanced */ +#include "zbuff.h" /* ZBUFF_isError */ +#include "datagen.h" /* RDG_genBuffer */ +#define XXH_STATIC_LINKING_ONLY +#include "xxhash.h" /* XXH64_* */ +#include "util.h" +#include "assert.h" + + +/*-************************************ +* Constants +**************************************/ +#define KB *(1U<<10) +#define MB *(1U<<20) +#define GB *(1U<<30) + +static const U32 nbTestsDefault = 10000; +#define COMPRESSIBLE_NOISE_LENGTH (10 MB) +#define FUZ_COMPRESSIBILITY_DEFAULT 50 +static const U32 prime1 = 2654435761U; +static const U32 prime2 = 2246822519U; + + + +/*-************************************ +* Display Macros +**************************************/ +#define DISPLAY(...) fprintf(stderr, __VA_ARGS__) +#define DISPLAYLEVEL(l, ...) if (g_displayLevel>=l) { DISPLAY(__VA_ARGS__); } +static U32 g_displayLevel = 2; + +static const U64 g_refreshRate = SEC_TO_MICRO / 6; +static UTIL_time_t g_displayClock = UTIL_TIME_INITIALIZER; + +#define DISPLAYUPDATE(l, ...) if (g_displayLevel>=l) { \ + if ((UTIL_clockSpanMicro(g_displayClock) > g_refreshRate) || (g_displayLevel>=4)) \ + { g_displayClock = UTIL_getTime(); DISPLAY(__VA_ARGS__); \ + if (g_displayLevel>=4) fflush(stderr); } } + +static U64 g_clockTime = 0; + + +/*-******************************************************* +* Fuzzer functions +*********************************************************/ +#undef MIN +#undef MAX +#define MIN(a,b) ((a)<(b)?(a):(b)) +#define MAX(a,b) ((a)>(b)?(a):(b)) +/*! FUZ_rand() : + @return : a 27 bits random value, from a 32-bits `seed`. + `seed` is also modified */ +# define FUZ_rotl32(x,r) ((x << r) | (x >> (32 - r))) +static unsigned int FUZ_rand(unsigned int* seedPtr) +{ + U32 rand32 = *seedPtr; + rand32 *= prime1; + rand32 += prime2; + rand32 = FUZ_rotl32(rand32, 13); + *seedPtr = rand32; + return rand32 >> 5; +} + + +/* +static unsigned FUZ_highbit32(U32 v32) +{ + unsigned nbBits = 0; + if (v32==0) return 0; + for ( ; v32 ; v32>>=1) nbBits++; + return nbBits; +} +*/ + +static void* ZBUFF_allocFunction(void* opaque, size_t size) +{ + void* address = malloc(size); + (void)opaque; + /* DISPLAYLEVEL(4, "alloc %p, %d opaque=%p \n", address, (int)size, opaque); */ + return address; +} + +static void ZBUFF_freeFunction(void* opaque, void* address) +{ + (void)opaque; + /* if (address) DISPLAYLEVEL(4, "free %p opaque=%p \n", address, opaque); */ + free(address); +} + +static int basicUnitTests(U32 seed, double compressibility, ZSTD_customMem customMem) +{ + int testResult = 0; + size_t CNBufferSize = COMPRESSIBLE_NOISE_LENGTH; + void* CNBuffer = malloc(CNBufferSize); + size_t const skippableFrameSize = 11; + size_t const compressedBufferSize = (8 + skippableFrameSize) + ZSTD_compressBound(COMPRESSIBLE_NOISE_LENGTH); + void* compressedBuffer = malloc(compressedBufferSize); + size_t const decodedBufferSize = CNBufferSize; + void* decodedBuffer = malloc(decodedBufferSize); + size_t cSize, readSize, readSkipSize, genSize; + U32 testNb=0; + ZBUFF_CCtx* zc = ZBUFF_createCCtx_advanced(customMem); + ZBUFF_DCtx* zd = ZBUFF_createDCtx_advanced(customMem); + + /* Create compressible test buffer */ + if (!CNBuffer || !compressedBuffer || !decodedBuffer || !zc || !zd) { + DISPLAY("Not enough memory, aborting\n"); + goto _output_error; + } + RDG_genBuffer(CNBuffer, CNBufferSize, compressibility, 0., seed); + + /* generate skippable frame */ + MEM_writeLE32(compressedBuffer, ZSTD_MAGIC_SKIPPABLE_START); + MEM_writeLE32(((char*)compressedBuffer)+4, (U32)skippableFrameSize); + cSize = skippableFrameSize + 8; + + /* Basic compression test */ + DISPLAYLEVEL(4, "test%3i : compress %u bytes : ", testNb++, COMPRESSIBLE_NOISE_LENGTH); + ZBUFF_compressInitDictionary(zc, CNBuffer, 128 KB, 1); + readSize = CNBufferSize; + genSize = compressedBufferSize; + { size_t const r = ZBUFF_compressContinue(zc, ((char*)compressedBuffer)+cSize, &genSize, CNBuffer, &readSize); + if (ZBUFF_isError(r)) goto _output_error; } + if (readSize != CNBufferSize) goto _output_error; /* entire input should be consumed */ + cSize += genSize; + genSize = compressedBufferSize - cSize; + { size_t const r = ZBUFF_compressEnd(zc, ((char*)compressedBuffer)+cSize, &genSize); + if (r != 0) goto _output_error; } /* error, or some data not flushed */ + cSize += genSize; + DISPLAYLEVEL(4, "OK (%u bytes : %.2f%%)\n", (U32)cSize, (double)cSize/COMPRESSIBLE_NOISE_LENGTH*100); + + /* skippable frame test */ + DISPLAYLEVEL(4, "test%3i : decompress skippable frame : ", testNb++); + ZBUFF_decompressInitDictionary(zd, CNBuffer, 128 KB); + readSkipSize = cSize; + genSize = CNBufferSize; + { size_t const r = ZBUFF_decompressContinue(zd, decodedBuffer, &genSize, compressedBuffer, &readSkipSize); + if (r != 0) goto _output_error; } + if (genSize != 0) goto _output_error; /* skippable frame len is 0 */ + DISPLAYLEVEL(4, "OK \n"); + + /* Basic decompression test */ + DISPLAYLEVEL(4, "test%3i : decompress %u bytes : ", testNb++, COMPRESSIBLE_NOISE_LENGTH); + ZBUFF_decompressInitDictionary(zd, CNBuffer, 128 KB); + readSize = cSize - readSkipSize; + genSize = CNBufferSize; + { size_t const r = ZBUFF_decompressContinue(zd, decodedBuffer, &genSize, ((char*)compressedBuffer)+readSkipSize, &readSize); + if (r != 0) goto _output_error; } /* should reach end of frame == 0; otherwise, some data left, or an error */ + if (genSize != CNBufferSize) goto _output_error; /* should regenerate the same amount */ + if (readSize+readSkipSize != cSize) goto _output_error; /* should have read the entire frame */ + DISPLAYLEVEL(4, "OK \n"); + + DISPLAYLEVEL(4, "test%3i : ZBUFF_recommendedCInSize : ", testNb++); { assert(ZBUFF_recommendedCInSize() != 0); } DISPLAYLEVEL(4, "OK \n"); + DISPLAYLEVEL(4, "test%3i : ZBUFF_recommendedCOutSize : ", testNb++); { assert(ZBUFF_recommendedCOutSize() != 0); } DISPLAYLEVEL(4, "OK \n"); + DISPLAYLEVEL(4, "test%3i : ZBUFF_recommendedDInSize : ", testNb++); { assert(ZBUFF_recommendedDInSize() != 0); } DISPLAYLEVEL(4, "OK \n"); + DISPLAYLEVEL(4, "test%3i : ZBUFF_recommendedDOutSize : ", testNb++); { assert(ZBUFF_recommendedDOutSize() != 0); } DISPLAYLEVEL(4, "OK \n"); + + /* check regenerated data is byte exact */ + DISPLAYLEVEL(4, "test%3i : check decompressed result : ", testNb++); + { size_t i; + for (i=0; i<CNBufferSize; i++) { + if (((BYTE*)decodedBuffer)[i] != ((BYTE*)CNBuffer)[i]) goto _output_error; + } } + DISPLAYLEVEL(4, "OK \n"); + + /* Byte-by-byte decompression test */ + DISPLAYLEVEL(4, "test%3i : decompress byte-by-byte : ", testNb++); + { size_t r, pIn=0, pOut=0; + do + { ZBUFF_decompressInitDictionary(zd, CNBuffer, 128 KB); + r = 1; + while (r) { + size_t inS = 1; + size_t outS = 1; + r = ZBUFF_decompressContinue(zd, ((BYTE*)decodedBuffer)+pOut, &outS, ((BYTE*)compressedBuffer)+pIn, &inS); + pIn += inS; + pOut += outS; + } + readSize = pIn; + genSize = pOut; + } while (genSize==0); + } + if (genSize != CNBufferSize) goto _output_error; /* should regenerate the same amount */ + if (readSize != cSize) goto _output_error; /* should have read the entire frame */ + DISPLAYLEVEL(4, "OK \n"); + + /* check regenerated data is byte exact */ + DISPLAYLEVEL(4, "test%3i : check decompressed result : ", testNb++); + { size_t i; + for (i=0; i<CNBufferSize; i++) { + if (((BYTE*)decodedBuffer)[i] != ((BYTE*)CNBuffer)[i]) goto _output_error; + } } + DISPLAYLEVEL(4, "OK \n"); + +_end: + ZBUFF_freeCCtx(zc); + ZBUFF_freeDCtx(zd); + free(CNBuffer); + free(compressedBuffer); + free(decodedBuffer); + return testResult; + +_output_error: + testResult = 1; + DISPLAY("Error detected in Unit tests ! \n"); + goto _end; +} + + +static size_t findDiff(const void* buf1, const void* buf2, size_t max) +{ + const BYTE* b1 = (const BYTE*)buf1; + const BYTE* b2 = (const BYTE*)buf2; + size_t u; + for (u=0; u<max; u++) { + if (b1[u] != b2[u]) break; + } + return u; +} + +static size_t FUZ_rLogLength(U32* seed, U32 logLength) +{ + size_t const lengthMask = ((size_t)1 << logLength) - 1; + return (lengthMask+1) + (FUZ_rand(seed) & lengthMask); +} + +static size_t FUZ_randomLength(U32* seed, U32 maxLog) +{ + U32 const logLength = FUZ_rand(seed) % maxLog; + return FUZ_rLogLength(seed, logLength); +} + +#define CHECK(cond, ...) if (cond) { DISPLAY("Error => "); DISPLAY(__VA_ARGS__); \ + DISPLAY(" (seed %u, test nb %u) \n", seed, testNb); goto _output_error; } + +static int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compressibility) +{ + static const U32 maxSrcLog = 24; + static const U32 maxSampleLog = 19; + BYTE* cNoiseBuffer[5]; + size_t const srcBufferSize = (size_t)1<<maxSrcLog; + BYTE* copyBuffer; + size_t const copyBufferSize= srcBufferSize + (1<<maxSampleLog); + BYTE* cBuffer; + size_t const cBufferSize = ZSTD_compressBound(srcBufferSize); + BYTE* dstBuffer; + size_t dstBufferSize = srcBufferSize; + U32 result = 0; + U32 testNb = 0; + U32 coreSeed = seed; + ZBUFF_CCtx* zc; + ZBUFF_DCtx* zd; + UTIL_time_t startClock = UTIL_getTime(); + + /* allocations */ + zc = ZBUFF_createCCtx(); + zd = ZBUFF_createDCtx(); + cNoiseBuffer[0] = (BYTE*)malloc (srcBufferSize); + cNoiseBuffer[1] = (BYTE*)malloc (srcBufferSize); + cNoiseBuffer[2] = (BYTE*)malloc (srcBufferSize); + cNoiseBuffer[3] = (BYTE*)malloc (srcBufferSize); + cNoiseBuffer[4] = (BYTE*)malloc (srcBufferSize); + copyBuffer= (BYTE*)malloc (copyBufferSize); + dstBuffer = (BYTE*)malloc (dstBufferSize); + cBuffer = (BYTE*)malloc (cBufferSize); + CHECK (!cNoiseBuffer[0] || !cNoiseBuffer[1] || !cNoiseBuffer[2] || !cNoiseBuffer[3] || !cNoiseBuffer[4] || + !copyBuffer || !dstBuffer || !cBuffer || !zc || !zd, + "Not enough memory, fuzzer tests cancelled"); + + /* Create initial samples */ + RDG_genBuffer(cNoiseBuffer[0], srcBufferSize, 0.00, 0., coreSeed); /* pure noise */ + RDG_genBuffer(cNoiseBuffer[1], srcBufferSize, 0.05, 0., coreSeed); /* barely compressible */ + RDG_genBuffer(cNoiseBuffer[2], srcBufferSize, compressibility, 0., coreSeed); + RDG_genBuffer(cNoiseBuffer[3], srcBufferSize, 0.95, 0., coreSeed); /* highly compressible */ + RDG_genBuffer(cNoiseBuffer[4], srcBufferSize, 1.00, 0., coreSeed); /* sparse content */ + memset(copyBuffer, 0x65, copyBufferSize); /* make copyBuffer considered initialized */ + + /* catch up testNb */ + for (testNb=1; testNb < startTest; testNb++) + FUZ_rand(&coreSeed); + + /* test loop */ + for ( ; (testNb <= nbTests) || (UTIL_clockSpanMicro(startClock) < g_clockTime) ; testNb++ ) { + U32 lseed; + const BYTE* srcBuffer; + const BYTE* dict; + size_t maxTestSize, dictSize; + size_t cSize, totalTestSize, totalCSize, totalGenSize; + size_t errorCode; + U32 n, nbChunks; + XXH64_state_t xxhState; + U64 crcOrig; + + /* init */ + DISPLAYUPDATE(2, "\r%6u", testNb); + if (nbTests >= testNb) DISPLAYUPDATE(2, "/%6u ", nbTests); + FUZ_rand(&coreSeed); + lseed = coreSeed ^ prime1; + + /* states full reset (unsynchronized) */ + /* some issues only happen when reusing states in a specific sequence of parameters */ + if ((FUZ_rand(&lseed) & 0xFF) == 131) { ZBUFF_freeCCtx(zc); zc = ZBUFF_createCCtx(); } + if ((FUZ_rand(&lseed) & 0xFF) == 132) { ZBUFF_freeDCtx(zd); zd = ZBUFF_createDCtx(); } + + /* srcBuffer selection [0-4] */ + { U32 buffNb = FUZ_rand(&lseed) & 0x7F; + if (buffNb & 7) buffNb=2; /* most common : compressible (P) */ + else { + buffNb >>= 3; + if (buffNb & 7) { + const U32 tnb[2] = { 1, 3 }; /* barely/highly compressible */ + buffNb = tnb[buffNb >> 3]; + } else { + const U32 tnb[2] = { 0, 4 }; /* not compressible / sparse */ + buffNb = tnb[buffNb >> 3]; + } } + srcBuffer = cNoiseBuffer[buffNb]; + } + + /* compression init */ + { U32 const testLog = FUZ_rand(&lseed) % maxSrcLog; + U32 const cLevel = (FUZ_rand(&lseed) % (ZSTD_maxCLevel() - (testLog/3))) + 1; + maxTestSize = FUZ_rLogLength(&lseed, testLog); + dictSize = (FUZ_rand(&lseed)==1) ? FUZ_randomLength(&lseed, maxSampleLog) : 0; + /* random dictionary selection */ + { size_t const dictStart = FUZ_rand(&lseed) % (srcBufferSize - dictSize); + dict = srcBuffer + dictStart; + } + { ZSTD_parameters params = ZSTD_getParams(cLevel, 0, dictSize); + params.fParams.checksumFlag = FUZ_rand(&lseed) & 1; + params.fParams.noDictIDFlag = FUZ_rand(&lseed) & 1; + { size_t const initError = ZBUFF_compressInit_advanced(zc, dict, dictSize, params, ZSTD_CONTENTSIZE_UNKNOWN); + CHECK (ZBUFF_isError(initError),"init error : %s", ZBUFF_getErrorName(initError)); + } } } + + /* multi-segments compression test */ + XXH64_reset(&xxhState, 0); + nbChunks = (FUZ_rand(&lseed) & 127) + 2; + for (n=0, cSize=0, totalTestSize=0 ; (n<nbChunks) && (totalTestSize < maxTestSize) ; n++) { + /* compress random chunk into random size dst buffer */ + { size_t readChunkSize = FUZ_randomLength(&lseed, maxSampleLog); + size_t const randomDstSize = FUZ_randomLength(&lseed, maxSampleLog); + size_t dstBuffSize = MIN(cBufferSize - cSize, randomDstSize); + size_t const srcStart = FUZ_rand(&lseed) % (srcBufferSize - readChunkSize); + + size_t const compressionError = ZBUFF_compressContinue(zc, cBuffer+cSize, &dstBuffSize, srcBuffer+srcStart, &readChunkSize); + CHECK (ZBUFF_isError(compressionError), "compression error : %s", ZBUFF_getErrorName(compressionError)); + + XXH64_update(&xxhState, srcBuffer+srcStart, readChunkSize); + memcpy(copyBuffer+totalTestSize, srcBuffer+srcStart, readChunkSize); + cSize += dstBuffSize; + totalTestSize += readChunkSize; + } + + /* random flush operation, to mess around */ + if ((FUZ_rand(&lseed) & 15) == 0) { + size_t const randomDstSize = FUZ_randomLength(&lseed, maxSampleLog); + size_t dstBuffSize = MIN(cBufferSize - cSize, randomDstSize); + size_t const flushError = ZBUFF_compressFlush(zc, cBuffer+cSize, &dstBuffSize); + CHECK (ZBUFF_isError(flushError), "flush error : %s", ZBUFF_getErrorName(flushError)); + cSize += dstBuffSize; + } } + + /* final frame epilogue */ + { size_t remainingToFlush = (size_t)(-1); + while (remainingToFlush) { + size_t const randomDstSize = FUZ_randomLength(&lseed, maxSampleLog); + size_t dstBuffSize = MIN(cBufferSize - cSize, randomDstSize); + U32 const enoughDstSize = dstBuffSize >= remainingToFlush; + remainingToFlush = ZBUFF_compressEnd(zc, cBuffer+cSize, &dstBuffSize); + CHECK (ZBUFF_isError(remainingToFlush), "flush error : %s", ZBUFF_getErrorName(remainingToFlush)); + CHECK (enoughDstSize && remainingToFlush, "ZBUFF_compressEnd() not fully flushed (%u remaining), but enough space available", (U32)remainingToFlush); + cSize += dstBuffSize; + } } + crcOrig = XXH64_digest(&xxhState); + + /* multi - fragments decompression test */ + ZBUFF_decompressInitDictionary(zd, dict, dictSize); + errorCode = 1; + for (totalCSize = 0, totalGenSize = 0 ; errorCode ; ) { + size_t readCSrcSize = FUZ_randomLength(&lseed, maxSampleLog); + size_t const randomDstSize = FUZ_randomLength(&lseed, maxSampleLog); + size_t dstBuffSize = MIN(dstBufferSize - totalGenSize, randomDstSize); + errorCode = ZBUFF_decompressContinue(zd, dstBuffer+totalGenSize, &dstBuffSize, cBuffer+totalCSize, &readCSrcSize); + CHECK (ZBUFF_isError(errorCode), "decompression error : %s", ZBUFF_getErrorName(errorCode)); + totalGenSize += dstBuffSize; + totalCSize += readCSrcSize; + } + CHECK (errorCode != 0, "frame not fully decoded"); + CHECK (totalGenSize != totalTestSize, "decompressed data : wrong size") + CHECK (totalCSize != cSize, "compressed data should be fully read") + { U64 const crcDest = XXH64(dstBuffer, totalTestSize, 0); + if (crcDest!=crcOrig) findDiff(copyBuffer, dstBuffer, totalTestSize); + CHECK (crcDest!=crcOrig, "decompressed data corrupted"); } + + /*===== noisy/erroneous src decompression test =====*/ + + /* add some noise */ + { U32 const nbNoiseChunks = (FUZ_rand(&lseed) & 7) + 2; + U32 nn; for (nn=0; nn<nbNoiseChunks; nn++) { + size_t const randomNoiseSize = FUZ_randomLength(&lseed, maxSampleLog); + size_t const noiseSize = MIN((cSize/3) , randomNoiseSize); + size_t const noiseStart = FUZ_rand(&lseed) % (srcBufferSize - noiseSize); + size_t const cStart = FUZ_rand(&lseed) % (cSize - noiseSize); + memcpy(cBuffer+cStart, srcBuffer+noiseStart, noiseSize); + } } + + /* try decompression on noisy data */ + ZBUFF_decompressInit(zd); + totalCSize = 0; + totalGenSize = 0; + while ( (totalCSize < cSize) && (totalGenSize < dstBufferSize) ) { + size_t readCSrcSize = FUZ_randomLength(&lseed, maxSampleLog); + size_t const randomDstSize = FUZ_randomLength(&lseed, maxSampleLog); + size_t dstBuffSize = MIN(dstBufferSize - totalGenSize, randomDstSize); + size_t const decompressError = ZBUFF_decompressContinue(zd, dstBuffer+totalGenSize, &dstBuffSize, cBuffer+totalCSize, &readCSrcSize); + if (ZBUFF_isError(decompressError)) break; /* error correctly detected */ + totalGenSize += dstBuffSize; + totalCSize += readCSrcSize; + } } + DISPLAY("\r%u fuzzer tests completed \n", testNb); + +_cleanup: + ZBUFF_freeCCtx(zc); + ZBUFF_freeDCtx(zd); + free(cNoiseBuffer[0]); + free(cNoiseBuffer[1]); + free(cNoiseBuffer[2]); + free(cNoiseBuffer[3]); + free(cNoiseBuffer[4]); + free(copyBuffer); + free(cBuffer); + free(dstBuffer); + return result; + +_output_error: + result = 1; + goto _cleanup; +} + + +/*-******************************************************* +* Command line +*********************************************************/ +static int FUZ_usage(const char* programName) +{ + DISPLAY( "Usage :\n"); + DISPLAY( " %s [args]\n", programName); + DISPLAY( "\n"); + DISPLAY( "Arguments :\n"); + DISPLAY( " -i# : Nb of tests (default:%u) \n", nbTestsDefault); + DISPLAY( " -s# : Select seed (default:prompt user)\n"); + DISPLAY( " -t# : Select starting test number (default:0)\n"); + DISPLAY( " -P# : Select compressibility in %% (default:%i%%)\n", FUZ_COMPRESSIBILITY_DEFAULT); + DISPLAY( " -v : verbose\n"); + DISPLAY( " -p : pause at the end\n"); + DISPLAY( " -h : display help and exit\n"); + return 0; +} + + +int main(int argc, const char** argv) +{ + U32 seed=0; + int seedset=0; + int argNb; + int nbTests = nbTestsDefault; + int testNb = 0; + int proba = FUZ_COMPRESSIBILITY_DEFAULT; + int result=0; + U32 mainPause = 0; + const char* programName = argv[0]; + ZSTD_customMem customMem = { ZBUFF_allocFunction, ZBUFF_freeFunction, NULL }; + ZSTD_customMem customNULL = { NULL, NULL, NULL }; + + /* Check command line */ + for(argNb=1; argNb<argc; argNb++) { + const char* argument = argv[argNb]; + if(!argument) continue; /* Protection if argument empty */ + + /* Parsing commands. Aggregated commands are allowed */ + if (argument[0]=='-') { + argument++; + + while (*argument!=0) { + switch(*argument) + { + case 'h': + return FUZ_usage(programName); + case 'v': + argument++; + g_displayLevel=4; + break; + case 'q': + argument++; + g_displayLevel--; + break; + case 'p': /* pause at the end */ + argument++; + mainPause = 1; + break; + + case 'i': + argument++; + nbTests=0; g_clockTime=0; + while ((*argument>='0') && (*argument<='9')) { + nbTests *= 10; + nbTests += *argument - '0'; + argument++; + } + break; + + case 'T': + argument++; + nbTests=0; g_clockTime=0; + while ((*argument>='0') && (*argument<='9')) { + g_clockTime *= 10; + g_clockTime += *argument - '0'; + argument++; + } + if (*argument=='m') g_clockTime *=60, argument++; + if (*argument=='n') argument++; + g_clockTime *= SEC_TO_MICRO; + break; + + case 's': + argument++; + seed=0; + seedset=1; + while ((*argument>='0') && (*argument<='9')) { + seed *= 10; + seed += *argument - '0'; + argument++; + } + break; + + case 't': + argument++; + testNb=0; + while ((*argument>='0') && (*argument<='9')) { + testNb *= 10; + testNb += *argument - '0'; + argument++; + } + break; + + case 'P': /* compressibility % */ + argument++; + proba=0; + while ((*argument>='0') && (*argument<='9')) { + proba *= 10; + proba += *argument - '0'; + argument++; + } + if (proba<0) proba=0; + if (proba>100) proba=100; + break; + + default: + return FUZ_usage(programName); + } + } } } /* for(argNb=1; argNb<argc; argNb++) */ + + /* Get Seed */ + DISPLAY("Starting zstd_buffered tester (%i-bits, %s)\n", (int)(sizeof(size_t)*8), ZSTD_VERSION_STRING); + + if (!seedset) { + time_t const t = time(NULL); + U32 const h = XXH32(&t, sizeof(t), 1); + seed = h % 10000; + } + DISPLAY("Seed = %u\n", seed); + if (proba!=FUZ_COMPRESSIBILITY_DEFAULT) DISPLAY("Compressibility : %i%%\n", proba); + + if (nbTests<=0) nbTests=1; + + if (testNb==0) { + result = basicUnitTests(0, ((double)proba) / 100, customNULL); /* constant seed for predictability */ + if (!result) { + DISPLAYLEVEL(4, "Unit tests using customMem :\n") + result = basicUnitTests(0, ((double)proba) / 100, customMem); /* use custom memory allocation functions */ + } } + + if (!result) + result = fuzzerTests(seed, nbTests, testNb, ((double)proba) / 100); + + if (mainPause) { + int unused; + DISPLAY("Press Enter \n"); + unused = getchar(); + (void)unused; + } + return result; +} diff --git a/src/zstd/tests/zstreamtest.c b/src/zstd/tests/zstreamtest.c new file mode 100644 index 000000000..79d5a8281 --- /dev/null +++ b/src/zstd/tests/zstreamtest.c @@ -0,0 +1,2654 @@ +/* + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + +/*-************************************ + * Compiler specific + **************************************/ +#ifdef _MSC_VER /* Visual Studio */ +# define _CRT_SECURE_NO_WARNINGS /* fgets */ +# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ +# pragma warning(disable : 4146) /* disable: C4146: minus unsigned expression */ +#endif + + +/*-************************************ + * Includes + **************************************/ +#include <stdlib.h> /* free */ +#include <stdio.h> /* fgets, sscanf */ +#include <string.h> /* strcmp */ +#include <assert.h> /* assert */ +#include "timefn.h" /* UTIL_time_t, UTIL_getTime */ +#include "mem.h" +#define ZSTD_STATIC_LINKING_ONLY /* ZSTD_maxCLevel, ZSTD_customMem, ZSTD_getDictID_fromFrame */ +#include "zstd.h" /* ZSTD_compressBound */ +#include "zstd_errors.h" /* ZSTD_error_srcSize_wrong */ +#include "zstdmt_compress.h" +#include "zdict.h" /* ZDICT_trainFromBuffer */ +#include "datagen.h" /* RDG_genBuffer */ +#define XXH_STATIC_LINKING_ONLY /* XXH64_state_t */ +#include "xxhash.h" /* XXH64_* */ +#include "seqgen.h" +#include "util.h" +#include "timefn.h" /* UTIL_time_t, UTIL_clockSpanMicro, UTIL_getTime */ + + +/*-************************************ + * Constants + **************************************/ +#define KB *(1U<<10) +#define MB *(1U<<20) +#define GB *(1U<<30) + +static const int nbTestsDefault = 10000; +static const U32 g_cLevelMax_smallTests = 10; +#define COMPRESSIBLE_NOISE_LENGTH (10 MB) +#define FUZ_COMPRESSIBILITY_DEFAULT 50 +static const U32 prime32 = 2654435761U; + + +/*-************************************ + * Display Macros + **************************************/ +#define DISPLAY(...) fprintf(stderr, __VA_ARGS__) +#define DISPLAYLEVEL(l, ...) if (g_displayLevel>=l) { \ + DISPLAY(__VA_ARGS__); \ + if (g_displayLevel>=4) fflush(stderr); } +static U32 g_displayLevel = 2; + +static const U64 g_refreshRate = SEC_TO_MICRO / 6; +static UTIL_time_t g_displayClock = UTIL_TIME_INITIALIZER; + +#define DISPLAYUPDATE(l, ...) if (g_displayLevel>=l) { \ + if ((UTIL_clockSpanMicro(g_displayClock) > g_refreshRate) || (g_displayLevel>=4)) \ + { g_displayClock = UTIL_getTime(); DISPLAY(__VA_ARGS__); \ + if (g_displayLevel>=4) fflush(stderr); } } + +static U64 g_clockTime = 0; + + +/*-******************************************************* + * Check macros + *********************************************************/ +#undef MIN +#undef MAX +#define MIN(a,b) ((a)<(b)?(a):(b)) +#define MAX(a,b) ((a)>(b)?(a):(b)) +/*! FUZ_rand() : + @return : a 27 bits random value, from a 32-bits `seed`. + `seed` is also modified */ +#define FUZ_rotl32(x,r) ((x << r) | (x >> (32 - r))) +static U32 FUZ_rand(U32* seedPtr) +{ + static const U32 prime2 = 2246822519U; + U32 rand32 = *seedPtr; + rand32 *= prime32; + rand32 += prime2; + rand32 = FUZ_rotl32(rand32, 13); + *seedPtr = rand32; + return rand32 >> 5; +} + +#define CHECK(cond, ...) { \ + if (cond) { \ + DISPLAY("Error => "); \ + DISPLAY(__VA_ARGS__); \ + DISPLAY(" (seed %u, test nb %u, line %u) \n", \ + (unsigned)seed, testNb, __LINE__); \ + goto _output_error; \ +} } + +#define CHECK_Z(f) { \ + size_t const err = f; \ + CHECK(ZSTD_isError(err), "%s : %s ", \ + #f, ZSTD_getErrorName(err)); \ +} + +#define CHECK_RET(ret, cond, ...) { \ + if (cond) { \ + DISPLAY("Error %llu => ", (unsigned long long)ret); \ + DISPLAY(__VA_ARGS__); \ + DISPLAY(" (line %u)\n", __LINE__); \ + return ret; \ +} } + +#define CHECK_RET_Z(f) { \ + size_t const err = f; \ + CHECK_RET(err, ZSTD_isError(err), "%s : %s ", \ + #f, ZSTD_getErrorName(err)); \ +} + + +/*====================================================== + * Basic Unit tests + *======================================================*/ + +typedef struct { + void* start; + size_t size; + size_t filled; +} buffer_t; + +static const buffer_t kBuffNull = { NULL, 0 , 0 }; + +static void FUZ_freeDictionary(buffer_t dict) +{ + free(dict.start); +} + +static buffer_t FUZ_createDictionary(const void* src, size_t srcSize, size_t blockSize, size_t requestedDictSize) +{ + buffer_t dict = kBuffNull; + size_t const nbBlocks = (srcSize + (blockSize-1)) / blockSize; + size_t* const blockSizes = (size_t*)malloc(nbBlocks * sizeof(size_t)); + if (!blockSizes) return kBuffNull; + dict.start = malloc(requestedDictSize); + if (!dict.start) { free(blockSizes); return kBuffNull; } + { size_t nb; + for (nb=0; nb<nbBlocks-1; nb++) blockSizes[nb] = blockSize; + blockSizes[nbBlocks-1] = srcSize - (blockSize * (nbBlocks-1)); + } + { size_t const dictSize = ZDICT_trainFromBuffer(dict.start, requestedDictSize, src, blockSizes, (unsigned)nbBlocks); + free(blockSizes); + if (ZDICT_isError(dictSize)) { FUZ_freeDictionary(dict); return kBuffNull; } + dict.size = requestedDictSize; + dict.filled = dictSize; + return dict; + } +} + +/* Round trips data and updates xxh with the decompressed data produced */ +static size_t SEQ_roundTrip(ZSTD_CCtx* cctx, ZSTD_DCtx* dctx, + XXH64_state_t* xxh, void* data, size_t size, + ZSTD_EndDirective endOp) +{ + static BYTE compressed[1024]; + static BYTE uncompressed[1024]; + + ZSTD_inBuffer cin = {data, size, 0}; + size_t cret; + + do { + ZSTD_outBuffer cout = { compressed, sizeof(compressed), 0 }; + ZSTD_inBuffer din = { compressed, 0, 0 }; + ZSTD_outBuffer dout = { uncompressed, 0, 0 }; + + cret = ZSTD_compressStream2(cctx, &cout, &cin, endOp); + if (ZSTD_isError(cret)) + return cret; + + din.size = cout.pos; + while (din.pos < din.size || (endOp == ZSTD_e_end && cret == 0)) { + size_t dret; + + dout.pos = 0; + dout.size = sizeof(uncompressed); + dret = ZSTD_decompressStream(dctx, &dout, &din); + if (ZSTD_isError(dret)) + return dret; + XXH64_update(xxh, dout.dst, dout.pos); + if (dret == 0) + break; + } + } while (cin.pos < cin.size || (endOp != ZSTD_e_continue && cret != 0)); + return 0; +} + +/* Generates some data and round trips it */ +static size_t SEQ_generateRoundTrip(ZSTD_CCtx* cctx, ZSTD_DCtx* dctx, + XXH64_state_t* xxh, SEQ_stream* seq, + SEQ_gen_type type, unsigned value) +{ + static BYTE data[1024]; + size_t gen; + + do { + SEQ_outBuffer sout = {data, sizeof(data), 0}; + size_t ret; + gen = SEQ_gen(seq, type, value, &sout); + + ret = SEQ_roundTrip(cctx, dctx, xxh, sout.dst, sout.pos, ZSTD_e_continue); + if (ZSTD_isError(ret)) + return ret; + } while (gen != 0); + + return 0; +} + +static size_t getCCtxParams(ZSTD_CCtx* zc, ZSTD_parameters* savedParams) +{ + int value; + CHECK_RET_Z(ZSTD_CCtx_getParameter(zc, ZSTD_c_windowLog, (int*)&savedParams->cParams.windowLog)); + CHECK_RET_Z(ZSTD_CCtx_getParameter(zc, ZSTD_c_hashLog, (int*)&savedParams->cParams.hashLog)); + CHECK_RET_Z(ZSTD_CCtx_getParameter(zc, ZSTD_c_chainLog, (int*)&savedParams->cParams.chainLog)); + CHECK_RET_Z(ZSTD_CCtx_getParameter(zc, ZSTD_c_searchLog, (int*)&savedParams->cParams.searchLog)); + CHECK_RET_Z(ZSTD_CCtx_getParameter(zc, ZSTD_c_minMatch, (int*)&savedParams->cParams.minMatch)); + CHECK_RET_Z(ZSTD_CCtx_getParameter(zc, ZSTD_c_targetLength, (int*)&savedParams->cParams.targetLength)); + CHECK_RET_Z(ZSTD_CCtx_getParameter(zc, ZSTD_c_strategy, &value)); + savedParams->cParams.strategy = value; + + CHECK_RET_Z(ZSTD_CCtx_getParameter(zc, ZSTD_c_checksumFlag, &savedParams->fParams.checksumFlag)); + CHECK_RET_Z(ZSTD_CCtx_getParameter(zc, ZSTD_c_contentSizeFlag, &savedParams->fParams.contentSizeFlag)); + CHECK_RET_Z(ZSTD_CCtx_getParameter(zc, ZSTD_c_dictIDFlag, &value)); + savedParams->fParams.noDictIDFlag = !value; + return 0; +} + +static U32 badParameters(ZSTD_CCtx* zc, ZSTD_parameters const savedParams) +{ + ZSTD_parameters params; + if (ZSTD_isError(getCCtxParams(zc, ¶ms))) return 10; + CHECK_RET(1, params.cParams.windowLog != savedParams.cParams.windowLog, "windowLog"); + CHECK_RET(2, params.cParams.hashLog != savedParams.cParams.hashLog, "hashLog"); + CHECK_RET(3, params.cParams.chainLog != savedParams.cParams.chainLog, "chainLog"); + CHECK_RET(4, params.cParams.searchLog != savedParams.cParams.searchLog, "searchLog"); + CHECK_RET(5, params.cParams.minMatch != savedParams.cParams.minMatch, "minMatch"); + CHECK_RET(6, params.cParams.targetLength != savedParams.cParams.targetLength, "targetLength"); + + CHECK_RET(7, params.fParams.checksumFlag != savedParams.fParams.checksumFlag, "checksumFlag"); + CHECK_RET(8, params.fParams.contentSizeFlag != savedParams.fParams.contentSizeFlag, "contentSizeFlag"); + CHECK_RET(9, params.fParams.noDictIDFlag != savedParams.fParams.noDictIDFlag, "noDictIDFlag"); + return 0; +} + +static int basicUnitTests(U32 seed, double compressibility) +{ + size_t const CNBufferSize = COMPRESSIBLE_NOISE_LENGTH; + void* CNBuffer = malloc(CNBufferSize); + size_t const skippableFrameSize = 200 KB; + size_t const compressedBufferSize = (8 + skippableFrameSize) + ZSTD_compressBound(COMPRESSIBLE_NOISE_LENGTH); + void* compressedBuffer = malloc(compressedBufferSize); + size_t const decodedBufferSize = CNBufferSize; + void* decodedBuffer = malloc(decodedBufferSize); + size_t cSize; + int testResult = 0; + int testNb = 1; + U32 coreSeed = 0; /* this name to conform with CHECK_Z macro display */ + ZSTD_CStream* zc = ZSTD_createCStream(); + ZSTD_DStream* zd = ZSTD_createDStream(); + ZSTDMT_CCtx* mtctx = ZSTDMT_createCCtx(2); + + ZSTD_inBuffer inBuff, inBuff2; + ZSTD_outBuffer outBuff; + buffer_t dictionary = kBuffNull; + size_t const dictSize = 128 KB; + unsigned dictID = 0; + + /* Create compressible test buffer */ + if (!CNBuffer || !compressedBuffer || !decodedBuffer || !zc || !zd) { + DISPLAY("Not enough memory, aborting \n"); + goto _output_error; + } + RDG_genBuffer(CNBuffer, CNBufferSize, compressibility, 0., seed); + + /* Create dictionary */ + DISPLAYLEVEL(3, "creating dictionary for unit tests \n"); + dictionary = FUZ_createDictionary(CNBuffer, CNBufferSize / 3, 16 KB, 48 KB); + if (!dictionary.start) { + DISPLAY("Error creating dictionary, aborting \n"); + goto _output_error; + } + dictID = ZDICT_getDictID(dictionary.start, dictionary.filled); + + /* Basic compression test */ + DISPLAYLEVEL(3, "test%3i : compress %u bytes : ", testNb++, COMPRESSIBLE_NOISE_LENGTH); + CHECK_Z( ZSTD_initCStream(zc, 1 /* cLevel */) ); + outBuff.dst = (char*)(compressedBuffer); + outBuff.size = compressedBufferSize; + outBuff.pos = 0; + inBuff.src = CNBuffer; + inBuff.size = CNBufferSize; + inBuff.pos = 0; + CHECK_Z( ZSTD_compressStream(zc, &outBuff, &inBuff) ); + if (inBuff.pos != inBuff.size) goto _output_error; /* entire input should be consumed */ + { size_t const r = ZSTD_endStream(zc, &outBuff); + if (r != 0) goto _output_error; } /* error, or some data not flushed */ + DISPLAYLEVEL(3, "OK (%u bytes)\n", (unsigned)outBuff.pos); + + /* generate skippable frame */ + MEM_writeLE32(compressedBuffer, ZSTD_MAGIC_SKIPPABLE_START); + MEM_writeLE32(((char*)compressedBuffer)+4, (U32)skippableFrameSize); + cSize = skippableFrameSize + 8; + + /* Basic compression test using dict */ + DISPLAYLEVEL(3, "test%3i : skipframe + compress %u bytes : ", testNb++, COMPRESSIBLE_NOISE_LENGTH); + CHECK_Z( ZSTD_initCStream_usingDict(zc, CNBuffer, dictSize, 1 /* cLevel */) ); + outBuff.dst = (char*)(compressedBuffer)+cSize; + assert(compressedBufferSize > cSize); + outBuff.size = compressedBufferSize - cSize; + outBuff.pos = 0; + inBuff.src = CNBuffer; + inBuff.size = CNBufferSize; + inBuff.pos = 0; + CHECK_Z( ZSTD_compressStream(zc, &outBuff, &inBuff) ); + if (inBuff.pos != inBuff.size) goto _output_error; /* entire input should be consumed */ + { size_t const r = ZSTD_endStream(zc, &outBuff); + if (r != 0) goto _output_error; } /* error, or some data not flushed */ + cSize += outBuff.pos; + DISPLAYLEVEL(3, "OK (%u bytes : %.2f%%)\n", + (unsigned)cSize, (double)cSize/COMPRESSIBLE_NOISE_LENGTH*100); + + /* context size functions */ + DISPLAYLEVEL(3, "test%3i : estimate CStream size : ", testNb++); + { ZSTD_compressionParameters const cParams = ZSTD_getCParams(1, CNBufferSize, dictSize); + size_t const cstreamSize = ZSTD_estimateCStreamSize_usingCParams(cParams); + size_t const cdictSize = ZSTD_estimateCDictSize_advanced(dictSize, cParams, ZSTD_dlm_byCopy); /* uses ZSTD_initCStream_usingDict() */ + if (ZSTD_isError(cstreamSize)) goto _output_error; + if (ZSTD_isError(cdictSize)) goto _output_error; + DISPLAYLEVEL(3, "OK (%u bytes) \n", (unsigned)(cstreamSize + cdictSize)); + } + + /* context size functions */ + DISPLAYLEVEL(3, "test%3i : estimate CStream size using CCtxParams : ", testNb++); + { ZSTD_CCtx_params* const params = ZSTD_createCCtxParams(); + size_t cstreamSize, cctxSize; + CHECK_Z( ZSTD_CCtxParams_setParameter(params, ZSTD_c_compressionLevel, 19) ); + cstreamSize = ZSTD_estimateCStreamSize_usingCCtxParams(params); + CHECK_Z(cstreamSize); + cctxSize = ZSTD_estimateCCtxSize_usingCCtxParams(params); + CHECK_Z(cctxSize); + if (cstreamSize <= cctxSize + 2 * ZSTD_BLOCKSIZE_MAX) goto _output_error; + ZSTD_freeCCtxParams(params); + DISPLAYLEVEL(3, "OK \n"); + } + + DISPLAYLEVEL(3, "test%3i : check actual CStream size : ", testNb++); + { size_t const s = ZSTD_sizeof_CStream(zc); + if (ZSTD_isError(s)) goto _output_error; + DISPLAYLEVEL(3, "OK (%u bytes) \n", (unsigned)s); + } + + /* Attempt bad compression parameters */ + DISPLAYLEVEL(3, "test%3i : use bad compression parameters : ", testNb++); + { size_t r; + ZSTD_parameters params = ZSTD_getParams(1, 0, 0); + params.cParams.minMatch = 2; + r = ZSTD_initCStream_advanced(zc, NULL, 0, params, 0); + if (!ZSTD_isError(r)) goto _output_error; + DISPLAYLEVEL(3, "init error : %s \n", ZSTD_getErrorName(r)); + } + + /* skippable frame test */ + DISPLAYLEVEL(3, "test%3i : decompress skippable frame : ", testNb++); + CHECK_Z( ZSTD_initDStream_usingDict(zd, CNBuffer, dictSize) ); + inBuff.src = compressedBuffer; + inBuff.size = cSize; + inBuff.pos = 0; + outBuff.dst = decodedBuffer; + outBuff.size = CNBufferSize; + outBuff.pos = 0; + { size_t const r = ZSTD_decompressStream(zd, &outBuff, &inBuff); + DISPLAYLEVEL(5, " ( ZSTD_decompressStream => %u ) ", (unsigned)r); + if (r != 0) goto _output_error; + } + if (outBuff.pos != 0) goto _output_error; /* skippable frame output len is 0 */ + DISPLAYLEVEL(3, "OK \n"); + + /* Basic decompression test */ + inBuff2 = inBuff; + DISPLAYLEVEL(3, "test%3i : decompress %u bytes : ", testNb++, COMPRESSIBLE_NOISE_LENGTH); + ZSTD_initDStream_usingDict(zd, CNBuffer, dictSize); + CHECK_Z( ZSTD_DCtx_setParameter(zd, ZSTD_d_windowLogMax, ZSTD_WINDOWLOG_LIMIT_DEFAULT+1) ); /* large limit */ + { size_t const remaining = ZSTD_decompressStream(zd, &outBuff, &inBuff); + if (remaining != 0) goto _output_error; } /* should reach end of frame == 0; otherwise, some data left, or an error */ + if (outBuff.pos != CNBufferSize) goto _output_error; /* should regenerate the same amount */ + if (inBuff.pos != inBuff.size) goto _output_error; /* should have read the entire frame */ + DISPLAYLEVEL(3, "OK \n"); + + /* Re-use without init */ + DISPLAYLEVEL(3, "test%3i : decompress again without init (re-use previous settings): ", testNb++); + outBuff.pos = 0; + { size_t const remaining = ZSTD_decompressStream(zd, &outBuff, &inBuff2); + if (remaining != 0) goto _output_error; } /* should reach end of frame == 0; otherwise, some data left, or an error */ + if (outBuff.pos != CNBufferSize) goto _output_error; /* should regenerate the same amount */ + if (inBuff.pos != inBuff.size) goto _output_error; /* should have read the entire frame */ + DISPLAYLEVEL(3, "OK \n"); + + /* check regenerated data is byte exact */ + DISPLAYLEVEL(3, "test%3i : check decompressed result : ", testNb++); + { size_t i; + for (i=0; i<CNBufferSize; i++) { + if (((BYTE*)decodedBuffer)[i] != ((BYTE*)CNBuffer)[i]) goto _output_error; + } } + DISPLAYLEVEL(3, "OK \n"); + + /* context size functions */ + DISPLAYLEVEL(3, "test%3i : estimate DStream size : ", testNb++); + { ZSTD_frameHeader fhi; + const void* cStart = (char*)compressedBuffer + (skippableFrameSize + 8); + size_t const gfhError = ZSTD_getFrameHeader(&fhi, cStart, cSize); + if (gfhError!=0) goto _output_error; + DISPLAYLEVEL(5, " (windowSize : %u) ", (unsigned)fhi.windowSize); + { size_t const s = ZSTD_estimateDStreamSize(fhi.windowSize) + /* uses ZSTD_initDStream_usingDict() */ + + ZSTD_estimateDDictSize(dictSize, ZSTD_dlm_byCopy); + if (ZSTD_isError(s)) goto _output_error; + DISPLAYLEVEL(3, "OK (%u bytes) \n", (unsigned)s); + } } + + DISPLAYLEVEL(3, "test%3i : check actual DStream size : ", testNb++); + { size_t const s = ZSTD_sizeof_DStream(zd); + if (ZSTD_isError(s)) goto _output_error; + DISPLAYLEVEL(3, "OK (%u bytes) \n", (unsigned)s); + } + + /* Decompression by small increment */ + DISPLAYLEVEL(3, "test%3i : decompress byte-by-byte : ", testNb++); + { /* skippable frame */ + size_t r = 1; + ZSTD_initDStream_usingDict(zd, CNBuffer, dictSize); + inBuff.src = compressedBuffer; + outBuff.dst = decodedBuffer; + inBuff.pos = 0; + outBuff.pos = 0; + while (r) { /* skippable frame */ + size_t const inSize = (FUZ_rand(&coreSeed) & 15) + 1; + size_t const outSize = (FUZ_rand(&coreSeed) & 15) + 1; + inBuff.size = inBuff.pos + inSize; + outBuff.size = outBuff.pos + outSize; + r = ZSTD_decompressStream(zd, &outBuff, &inBuff); + if (ZSTD_isError(r)) DISPLAYLEVEL(4, "ZSTD_decompressStream on skippable frame error : %s \n", ZSTD_getErrorName(r)); + if (ZSTD_isError(r)) goto _output_error; + } + /* normal frame */ + ZSTD_initDStream_usingDict(zd, CNBuffer, dictSize); + r=1; + while (r) { + size_t const inSize = FUZ_rand(&coreSeed) & 15; + size_t const outSize = (FUZ_rand(&coreSeed) & 15) + (!inSize); /* avoid having both sizes at 0 => would trigger a no_forward_progress error */ + inBuff.size = inBuff.pos + inSize; + outBuff.size = outBuff.pos + outSize; + r = ZSTD_decompressStream(zd, &outBuff, &inBuff); + if (ZSTD_isError(r)) DISPLAYLEVEL(4, "ZSTD_decompressStream error : %s \n", ZSTD_getErrorName(r)); + if (ZSTD_isError(r)) goto _output_error; + } + } + if (outBuff.pos != CNBufferSize) DISPLAYLEVEL(4, "outBuff.pos != CNBufferSize : should have regenerated same amount ! \n"); + if (outBuff.pos != CNBufferSize) goto _output_error; /* should regenerate the same amount */ + if (inBuff.pos != cSize) DISPLAYLEVEL(4, "inBuff.pos != cSize : should have real all input ! \n"); + if (inBuff.pos != cSize) goto _output_error; /* should have read the entire frame */ + DISPLAYLEVEL(3, "OK \n"); + + /* check regenerated data is byte exact */ + DISPLAYLEVEL(3, "test%3i : check decompressed result : ", testNb++); + { size_t i; + for (i=0; i<CNBufferSize; i++) { + if (((BYTE*)decodedBuffer)[i] != ((BYTE*)CNBuffer)[i]) goto _output_error; + } } + DISPLAYLEVEL(3, "OK \n"); + + /* Decompression forward progress */ + DISPLAYLEVEL(3, "test%3i : generate error when ZSTD_decompressStream() doesn't progress : ", testNb++); + { /* skippable frame */ + size_t r = 0; + int decNb = 0; + int const maxDec = 100; + inBuff.src = compressedBuffer; + inBuff.size = cSize; + inBuff.pos = 0; + + outBuff.dst = decodedBuffer; + outBuff.pos = 0; + outBuff.size = CNBufferSize-1; /* 1 byte missing */ + + for (decNb=0; decNb<maxDec; decNb++) { + if (r==0) ZSTD_initDStream_usingDict(zd, CNBuffer, dictSize); + r = ZSTD_decompressStream(zd, &outBuff, &inBuff); + if (ZSTD_isError(r)) break; + } + if (!ZSTD_isError(r)) DISPLAYLEVEL(4, "ZSTD_decompressStream should have triggered a no_forward_progress error \n"); + if (!ZSTD_isError(r)) goto _output_error; /* should have triggered no_forward_progress error */ + } + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3i : NULL buffers : ", testNb++); + inBuff.src = NULL; + inBuff.size = 0; + inBuff.pos = 0; + outBuff.dst = NULL; + outBuff.size = 0; + outBuff.pos = 0; + CHECK_Z( ZSTD_compressStream(zc, &outBuff, &inBuff) ); + CHECK(inBuff.pos != inBuff.size, "Entire input should be consumed"); + CHECK_Z( ZSTD_endStream(zc, &outBuff) ); + outBuff.dst = (char*)(compressedBuffer); + outBuff.size = compressedBufferSize; + outBuff.pos = 0; + { size_t const r = ZSTD_endStream(zc, &outBuff); + CHECK(r != 0, "Error or some data not flushed (ret=%zu)", r); + } + inBuff.src = outBuff.dst; + inBuff.size = outBuff.pos; + inBuff.pos = 0; + outBuff.dst = NULL; + outBuff.size = 0; + outBuff.pos = 0; + CHECK_Z( ZSTD_initDStream(zd) ); + { size_t const ret = ZSTD_decompressStream(zd, &outBuff, &inBuff); + if (ret != 0) goto _output_error; + } + DISPLAYLEVEL(3, "OK\n"); + /* _srcSize compression test */ + DISPLAYLEVEL(3, "test%3i : compress_srcSize %u bytes : ", testNb++, COMPRESSIBLE_NOISE_LENGTH); + CHECK_Z( ZSTD_initCStream_srcSize(zc, 1, CNBufferSize) ); + outBuff.dst = (char*)(compressedBuffer); + outBuff.size = compressedBufferSize; + outBuff.pos = 0; + inBuff.src = CNBuffer; + inBuff.size = CNBufferSize; + inBuff.pos = 0; + CHECK_Z( ZSTD_compressStream(zc, &outBuff, &inBuff) ); + CHECK(inBuff.pos != inBuff.size, "Entire input should be consumed"); + { size_t const r = ZSTD_endStream(zc, &outBuff); + CHECK(r != 0, "Error or some data not flushed (ret=%zu)", r); + } + { unsigned long long origSize = ZSTD_findDecompressedSize(outBuff.dst, outBuff.pos); + CHECK(origSize == ZSTD_CONTENTSIZE_UNKNOWN, "Unknown!"); + CHECK((size_t)origSize != CNBufferSize, "Exact original size must be present (got %llu)", origSize); + } + DISPLAYLEVEL(3, "OK (%u bytes : %.2f%%)\n", (unsigned)cSize, (double)cSize/COMPRESSIBLE_NOISE_LENGTH*100); + + /* wrong _srcSize compression test */ + DISPLAYLEVEL(3, "test%3i : too large srcSize : %u bytes : ", testNb++, COMPRESSIBLE_NOISE_LENGTH-1); + ZSTD_initCStream_srcSize(zc, 1, CNBufferSize+1); + outBuff.dst = (char*)(compressedBuffer); + outBuff.size = compressedBufferSize; + outBuff.pos = 0; + inBuff.src = CNBuffer; + inBuff.size = CNBufferSize; + inBuff.pos = 0; + CHECK_Z( ZSTD_compressStream(zc, &outBuff, &inBuff) ); + if (inBuff.pos != inBuff.size) goto _output_error; /* entire input should be consumed */ + { size_t const r = ZSTD_endStream(zc, &outBuff); + if (ZSTD_getErrorCode(r) != ZSTD_error_srcSize_wrong) goto _output_error; /* must fail : wrong srcSize */ + DISPLAYLEVEL(3, "OK (error detected : %s) \n", ZSTD_getErrorName(r)); } + + /* wrong _srcSize compression test */ + DISPLAYLEVEL(3, "test%3i : too small srcSize : %u bytes : ", testNb++, COMPRESSIBLE_NOISE_LENGTH-1); + ZSTD_initCStream_srcSize(zc, 1, CNBufferSize-1); + outBuff.dst = (char*)(compressedBuffer); + outBuff.size = compressedBufferSize; + outBuff.pos = 0; + inBuff.src = CNBuffer; + inBuff.size = CNBufferSize; + inBuff.pos = 0; + { size_t const r = ZSTD_compressStream(zc, &outBuff, &inBuff); + if (ZSTD_getErrorCode(r) != ZSTD_error_srcSize_wrong) goto _output_error; /* must fail : wrong srcSize */ + DISPLAYLEVEL(3, "OK (error detected : %s) \n", ZSTD_getErrorName(r)); + } + + DISPLAYLEVEL(3, "test%3i : wrong srcSize !contentSizeFlag : %u bytes : ", testNb++, COMPRESSIBLE_NOISE_LENGTH-1); + { ZSTD_parameters params = ZSTD_getParams(1, CNBufferSize, 0); + params.fParams.contentSizeFlag = 0; + CHECK_Z(ZSTD_initCStream_advanced(zc, NULL, 0, params, CNBufferSize - MIN(CNBufferSize, 200 KB))); + outBuff.dst = (char*)compressedBuffer; + outBuff.size = compressedBufferSize; + outBuff.pos = 0; + inBuff.src = CNBuffer; + inBuff.size = CNBufferSize; + inBuff.pos = 0; + { size_t const r = ZSTD_compressStream(zc, &outBuff, &inBuff); + if (ZSTD_getErrorCode(r) != ZSTD_error_srcSize_wrong) goto _output_error; /* must fail : wrong srcSize */ + DISPLAYLEVEL(3, "OK (error detected : %s) \n", ZSTD_getErrorName(r)); + } } + + /* Complex context re-use scenario */ + DISPLAYLEVEL(3, "test%3i : context re-use : ", testNb++); + ZSTD_freeCStream(zc); + zc = ZSTD_createCStream(); + if (zc==NULL) goto _output_error; /* memory allocation issue */ + /* use 1 */ + { size_t const inSize = 513; + DISPLAYLEVEL(5, "use1 "); + ZSTD_initCStream_advanced(zc, NULL, 0, ZSTD_getParams(19, inSize, 0), inSize); /* needs btopt + search3 to trigger hashLog3 */ + inBuff.src = CNBuffer; + inBuff.size = inSize; + inBuff.pos = 0; + outBuff.dst = (char*)(compressedBuffer)+cSize; + outBuff.size = ZSTD_compressBound(inSize); + outBuff.pos = 0; + DISPLAYLEVEL(5, "compress1 "); + CHECK_Z( ZSTD_compressStream(zc, &outBuff, &inBuff) ); + if (inBuff.pos != inBuff.size) goto _output_error; /* entire input should be consumed */ + DISPLAYLEVEL(5, "end1 "); + { size_t const r = ZSTD_endStream(zc, &outBuff); + if (r != 0) goto _output_error; } /* error, or some data not flushed */ + } + /* use 2 */ + { size_t const inSize = 1025; /* will not continue, because tables auto-adjust and are therefore different size */ + DISPLAYLEVEL(5, "use2 "); + ZSTD_initCStream_advanced(zc, NULL, 0, ZSTD_getParams(19, inSize, 0), inSize); /* needs btopt + search3 to trigger hashLog3 */ + inBuff.src = CNBuffer; + inBuff.size = inSize; + inBuff.pos = 0; + outBuff.dst = (char*)(compressedBuffer)+cSize; + outBuff.size = ZSTD_compressBound(inSize); + outBuff.pos = 0; + DISPLAYLEVEL(5, "compress2 "); + CHECK_Z( ZSTD_compressStream(zc, &outBuff, &inBuff) ); + if (inBuff.pos != inBuff.size) goto _output_error; /* entire input should be consumed */ + DISPLAYLEVEL(5, "end2 "); + { size_t const r = ZSTD_endStream(zc, &outBuff); + if (r != 0) goto _output_error; } /* error, or some data not flushed */ + } + DISPLAYLEVEL(3, "OK \n"); + + /* Decompression single pass with empty frame */ + cSize = ZSTD_compress(compressedBuffer, compressedBufferSize, NULL, 0, 1); + CHECK_Z(cSize); + DISPLAYLEVEL(3, "test%3i : ZSTD_decompressStream() single pass on empty frame : ", testNb++); + { ZSTD_DCtx* dctx = ZSTD_createDCtx(); + size_t const dctxSize = ZSTD_sizeof_DCtx(dctx); + CHECK_Z(ZSTD_DCtx_setParameter(dctx, ZSTD_d_stableOutBuffer, 1)); + + outBuff.dst = decodedBuffer; + outBuff.pos = 0; + outBuff.size = CNBufferSize; + + inBuff.src = compressedBuffer; + inBuff.size = cSize; + inBuff.pos = 0; + { size_t const r = ZSTD_decompressStream(dctx, &outBuff, &inBuff); + CHECK_Z(r); + CHECK(r != 0, "Entire frame must be decompressed"); + CHECK(outBuff.pos != 0, "Wrong size!"); + CHECK(memcmp(CNBuffer, outBuff.dst, CNBufferSize) != 0, "Corruption!"); + } + CHECK(dctxSize != ZSTD_sizeof_DCtx(dctx), "No buffers allocated"); + ZSTD_freeDCtx(dctx); + } + DISPLAYLEVEL(3, "OK \n"); + + /* Decompression with ZSTD_d_stableOutBuffer */ + cSize = ZSTD_compress(compressedBuffer, compressedBufferSize, CNBuffer, CNBufferSize, 1); + CHECK_Z(cSize); + { ZSTD_DCtx* dctx = ZSTD_createDCtx(); + size_t const dctxSize0 = ZSTD_sizeof_DCtx(dctx); + size_t dctxSize1; + CHECK_Z(ZSTD_DCtx_setParameter(dctx, ZSTD_d_stableOutBuffer, 1)); + + outBuff.dst = decodedBuffer; + outBuff.pos = 0; + outBuff.size = CNBufferSize; + + DISPLAYLEVEL(3, "test%3i : ZSTD_decompressStream() single pass : ", testNb++); + inBuff.src = compressedBuffer; + inBuff.size = cSize; + inBuff.pos = 0; + { size_t const r = ZSTD_decompressStream(dctx, &outBuff, &inBuff); + CHECK_Z(r); + CHECK(r != 0, "Entire frame must be decompressed"); + CHECK(outBuff.pos != CNBufferSize, "Wrong size!"); + CHECK(memcmp(CNBuffer, outBuff.dst, CNBufferSize) != 0, "Corruption!"); + } + CHECK(dctxSize0 != ZSTD_sizeof_DCtx(dctx), "No buffers allocated"); + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3i : ZSTD_decompressStream() stable out buffer : ", testNb++); + outBuff.pos = 0; + inBuff.pos = 0; + inBuff.size = 0; + while (inBuff.pos < cSize) { + inBuff.size += MIN(cSize - inBuff.pos, 1 + (FUZ_rand(&coreSeed) & 15)); + CHECK_Z(ZSTD_decompressStream(dctx, &outBuff, &inBuff)); + } + CHECK(outBuff.pos != CNBufferSize, "Wrong size!"); + CHECK(memcmp(CNBuffer, outBuff.dst, CNBufferSize) != 0, "Corruption!"); + dctxSize1 = ZSTD_sizeof_DCtx(dctx); + CHECK(!(dctxSize0 < dctxSize1), "Input buffer allocated"); + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3i : ZSTD_decompressStream() stable out buffer too small : ", testNb++); + ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only); + CHECK_Z(ZSTD_DCtx_setParameter(dctx, ZSTD_d_stableOutBuffer, 1)); + inBuff.src = compressedBuffer; + inBuff.size = cSize; + inBuff.pos = 0; + outBuff.pos = 0; + outBuff.size = CNBufferSize - 1; + { size_t const r = ZSTD_decompressStream(dctx, &outBuff, &inBuff); + CHECK(ZSTD_getErrorCode(r) != ZSTD_error_dstSize_tooSmall, "Must error but got %s", ZSTD_getErrorName(r)); + } + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3i : ZSTD_decompressStream() stable out buffer modified : ", testNb++); + ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only); + CHECK_Z(ZSTD_DCtx_setParameter(dctx, ZSTD_d_stableOutBuffer, 1)); + inBuff.src = compressedBuffer; + inBuff.size = cSize - 1; + inBuff.pos = 0; + outBuff.pos = 0; + outBuff.size = CNBufferSize; + CHECK_Z(ZSTD_decompressStream(dctx, &outBuff, &inBuff)); + ++inBuff.size; + outBuff.pos = 0; + { size_t const r = ZSTD_decompressStream(dctx, &outBuff, &inBuff); + CHECK(ZSTD_getErrorCode(r) != ZSTD_error_dstBuffer_wrong, "Must error but got %s", ZSTD_getErrorName(r)); + } + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3i : ZSTD_decompressStream() buffered output : ", testNb++); + ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only); + CHECK_Z(ZSTD_DCtx_setParameter(dctx, ZSTD_d_stableOutBuffer, 0)); + outBuff.pos = 0; + inBuff.pos = 0; + inBuff.size = 0; + while (inBuff.pos < cSize) { + inBuff.size += MIN(cSize - inBuff.pos, 1 + (FUZ_rand(&coreSeed) & 15)); + CHECK_Z(ZSTD_decompressStream(dctx, &outBuff, &inBuff)); + } + CHECK(outBuff.pos != CNBufferSize, "Wrong size!"); + CHECK(memcmp(CNBuffer, outBuff.dst, CNBufferSize) != 0, "Corruption!"); + CHECK(!(dctxSize1 < ZSTD_sizeof_DCtx(dctx)), "Output buffer allocated"); + DISPLAYLEVEL(3, "OK \n"); + + ZSTD_freeDCtx(dctx); + } + + /* CDict scenario */ + DISPLAYLEVEL(3, "test%3i : digested dictionary : ", testNb++); + { ZSTD_CDict* const cdict = ZSTD_createCDict(dictionary.start, dictionary.filled, 1 /*byRef*/ ); + size_t const initError = ZSTD_initCStream_usingCDict(zc, cdict); + DISPLAYLEVEL(5, "ZSTD_initCStream_usingCDict result : %u ", (unsigned)initError); + if (ZSTD_isError(initError)) goto _output_error; + outBuff.dst = compressedBuffer; + outBuff.size = compressedBufferSize; + outBuff.pos = 0; + inBuff.src = CNBuffer; + inBuff.size = CNBufferSize; + inBuff.pos = 0; + DISPLAYLEVEL(5, "- starting ZSTD_compressStream "); + CHECK_Z( ZSTD_compressStream(zc, &outBuff, &inBuff) ); + if (inBuff.pos != inBuff.size) goto _output_error; /* entire input should be consumed */ + { size_t const r = ZSTD_endStream(zc, &outBuff); + DISPLAYLEVEL(5, "- ZSTD_endStream result : %u ", (unsigned)r); + if (r != 0) goto _output_error; /* error, or some data not flushed */ + } + cSize = outBuff.pos; + ZSTD_freeCDict(cdict); + DISPLAYLEVEL(3, "OK (%u bytes : %.2f%%)\n", (unsigned)cSize, (double)cSize/CNBufferSize*100); + } + + DISPLAYLEVEL(3, "test%3i : check CStream size : ", testNb++); + { size_t const s = ZSTD_sizeof_CStream(zc); + if (ZSTD_isError(s)) goto _output_error; + DISPLAYLEVEL(3, "OK (%u bytes) \n", (unsigned)s); + } + + DISPLAYLEVEL(4, "test%3i : check Dictionary ID : ", testNb++); + { unsigned const dID = ZSTD_getDictID_fromFrame(compressedBuffer, cSize); + if (dID != dictID) goto _output_error; + DISPLAYLEVEL(4, "OK (%u) \n", dID); + } + + /* DDict scenario */ + DISPLAYLEVEL(3, "test%3i : decompress %u bytes with digested dictionary : ", testNb++, (unsigned)CNBufferSize); + { ZSTD_DDict* const ddict = ZSTD_createDDict(dictionary.start, dictionary.filled); + size_t const initError = ZSTD_initDStream_usingDDict(zd, ddict); + if (ZSTD_isError(initError)) goto _output_error; + outBuff.dst = decodedBuffer; + outBuff.size = CNBufferSize; + outBuff.pos = 0; + inBuff.src = compressedBuffer; + inBuff.size = cSize; + inBuff.pos = 0; + { size_t const r = ZSTD_decompressStream(zd, &outBuff, &inBuff); + if (r != 0) goto _output_error; } /* should reach end of frame == 0; otherwise, some data left, or an error */ + if (outBuff.pos != CNBufferSize) goto _output_error; /* should regenerate the same amount */ + if (inBuff.pos != inBuff.size) goto _output_error; /* should have read the entire frame */ + ZSTD_freeDDict(ddict); + DISPLAYLEVEL(3, "OK \n"); + } + + /* Memory restriction */ + DISPLAYLEVEL(3, "test%3i : maxWindowSize < frame requirement : ", testNb++); + ZSTD_initDStream_usingDict(zd, CNBuffer, dictSize); + CHECK_Z( ZSTD_DCtx_setParameter(zd, ZSTD_d_windowLogMax, 10) ); /* too small limit */ + outBuff.dst = decodedBuffer; + outBuff.size = CNBufferSize; + outBuff.pos = 0; + inBuff.src = compressedBuffer; + inBuff.size = cSize; + inBuff.pos = 0; + { size_t const r = ZSTD_decompressStream(zd, &outBuff, &inBuff); + if (!ZSTD_isError(r)) goto _output_error; /* must fail : frame requires > 100 bytes */ + DISPLAYLEVEL(3, "OK (%s)\n", ZSTD_getErrorName(r)); } + ZSTD_DCtx_reset(zd, ZSTD_reset_session_and_parameters); /* leave zd in good shape for next tests */ + + DISPLAYLEVEL(3, "test%3i : dictionary source size and level : ", testNb++); + { ZSTD_DCtx* const dctx = ZSTD_createDCtx(); + int const maxLevel = 16; /* first level with zstd_opt */ + int level; + assert(maxLevel < ZSTD_maxCLevel()); + CHECK_Z( ZSTD_DCtx_loadDictionary_byReference(dctx, dictionary.start, dictionary.filled) ); + for (level = 1; level <= maxLevel; ++level) { + ZSTD_CDict* const cdict = ZSTD_createCDict(dictionary.start, dictionary.filled, level); + size_t const maxSize = MIN(1 MB, CNBufferSize); + size_t size; + for (size = 512; size <= maxSize; size <<= 1) { + U64 const crcOrig = XXH64(CNBuffer, size, 0); + ZSTD_CCtx* const cctx = ZSTD_createCCtx(); + ZSTD_parameters savedParams; + getCCtxParams(cctx, &savedParams); + outBuff.dst = compressedBuffer; + outBuff.size = compressedBufferSize; + outBuff.pos = 0; + inBuff.src = CNBuffer; + inBuff.size = size; + inBuff.pos = 0; + CHECK_Z(ZSTD_CCtx_refCDict(cctx, cdict)); + CHECK_Z(ZSTD_compressStream2(cctx, &outBuff, &inBuff, ZSTD_e_end)); + CHECK(badParameters(cctx, savedParams), "Bad CCtx params"); + if (inBuff.pos != inBuff.size) goto _output_error; + { ZSTD_outBuffer decOut = {decodedBuffer, size, 0}; + ZSTD_inBuffer decIn = {outBuff.dst, outBuff.pos, 0}; + CHECK_Z( ZSTD_decompressStream(dctx, &decOut, &decIn) ); + if (decIn.pos != decIn.size) goto _output_error; + if (decOut.pos != size) goto _output_error; + { U64 const crcDec = XXH64(decOut.dst, decOut.pos, 0); + if (crcDec != crcOrig) goto _output_error; + } } + ZSTD_freeCCtx(cctx); + } + ZSTD_freeCDict(cdict); + } + ZSTD_freeDCtx(dctx); + } + DISPLAYLEVEL(3, "OK\n"); + + ZSTD_CCtx_reset(zc, ZSTD_reset_session_and_parameters); + CHECK_Z( ZSTD_CCtx_loadDictionary(zc, dictionary.start, dictionary.filled) ); + cSize = ZSTD_compress2(zc, compressedBuffer, compressedBufferSize, CNBuffer, MIN(CNBufferSize, 100 KB)); + CHECK_Z(cSize); + DISPLAYLEVEL(3, "test%3i : ZSTD_decompressStream() with dictionary : ", testNb++); + { + ZSTD_DCtx* dctx = ZSTD_createDCtx(); + /* We should fail to decompress without a dictionary. */ + ZSTD_DCtx_reset(dctx, ZSTD_reset_session_and_parameters); + { ZSTD_outBuffer out = {decodedBuffer, decodedBufferSize, 0}; + ZSTD_inBuffer in = {compressedBuffer, cSize, 0}; + size_t const ret = ZSTD_decompressStream(dctx, &out, &in); + if (!ZSTD_isError(ret)) goto _output_error; + } + /* We should succeed to decompress with the dictionary. */ + ZSTD_DCtx_reset(dctx, ZSTD_reset_session_and_parameters); + CHECK_Z( ZSTD_DCtx_loadDictionary(dctx, dictionary.start, dictionary.filled) ); + { ZSTD_outBuffer out = {decodedBuffer, decodedBufferSize, 0}; + ZSTD_inBuffer in = {compressedBuffer, cSize, 0}; + if (ZSTD_decompressStream(dctx, &out, &in) != 0) goto _output_error; + if (in.pos != in.size) goto _output_error; + } + /* The dictionary should presist across calls. */ + { ZSTD_outBuffer out = {decodedBuffer, decodedBufferSize, 0}; + ZSTD_inBuffer in = {compressedBuffer, cSize, 0}; + if (ZSTD_decompressStream(dctx, &out, &in) != 0) goto _output_error; + if (in.pos != in.size) goto _output_error; + } + /* The dictionary should not be cleared by ZSTD_reset_session_only. */ + ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only); + { ZSTD_outBuffer out = {decodedBuffer, decodedBufferSize, 0}; + ZSTD_inBuffer in = {compressedBuffer, cSize, 0}; + if (ZSTD_decompressStream(dctx, &out, &in) != 0) goto _output_error; + if (in.pos != in.size) goto _output_error; + } + /* When we reset the context the dictionary is cleared. */ + ZSTD_DCtx_reset(dctx, ZSTD_reset_session_and_parameters); + { ZSTD_outBuffer out = {decodedBuffer, decodedBufferSize, 0}; + ZSTD_inBuffer in = {compressedBuffer, cSize, 0}; + size_t const ret = ZSTD_decompressStream(dctx, &out, &in); + if (!ZSTD_isError(ret)) goto _output_error; + } + ZSTD_freeDCtx(dctx); + } + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3i : ZSTD_resetDStream() with dictionary : ", testNb++); + { + ZSTD_DCtx* dctx = ZSTD_createDCtx(); + /* We should succeed to decompress with the dictionary. */ + ZSTD_resetDStream(dctx); + CHECK_Z( ZSTD_DCtx_loadDictionary(dctx, dictionary.start, dictionary.filled) ); + { ZSTD_outBuffer out = {decodedBuffer, decodedBufferSize, 0}; + ZSTD_inBuffer in = {compressedBuffer, cSize, 0}; + if (ZSTD_decompressStream(dctx, &out, &in) != 0) goto _output_error; + if (in.pos != in.size) goto _output_error; + } + /* The dictionary should not be cleared by ZSTD_resetDStream(). */ + ZSTD_resetDStream(dctx); + { ZSTD_outBuffer out = {decodedBuffer, decodedBufferSize, 0}; + ZSTD_inBuffer in = {compressedBuffer, cSize, 0}; + if (ZSTD_decompressStream(dctx, &out, &in) != 0) goto _output_error; + if (in.pos != in.size) goto _output_error; + } + /* The dictionary should be cleared by ZSTD_initDStream(). */ + CHECK_Z( ZSTD_initDStream(dctx) ); + { ZSTD_outBuffer out = {decodedBuffer, decodedBufferSize, 0}; + ZSTD_inBuffer in = {compressedBuffer, cSize, 0}; + size_t const ret = ZSTD_decompressStream(dctx, &out, &in); + if (!ZSTD_isError(ret)) goto _output_error; + } + ZSTD_freeDCtx(dctx); + } + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3i : ZSTD_decompressStream() with ddict : ", testNb++); + { + ZSTD_DCtx* dctx = ZSTD_createDCtx(); + ZSTD_DDict* ddict = ZSTD_createDDict(dictionary.start, dictionary.filled); + /* We should succeed to decompress with the ddict. */ + ZSTD_DCtx_reset(dctx, ZSTD_reset_session_and_parameters); + CHECK_Z( ZSTD_DCtx_refDDict(dctx, ddict) ); + { ZSTD_outBuffer out = {decodedBuffer, decodedBufferSize, 0}; + ZSTD_inBuffer in = {compressedBuffer, cSize, 0}; + if (ZSTD_decompressStream(dctx, &out, &in) != 0) goto _output_error; + if (in.pos != in.size) goto _output_error; + } + /* The ddict should presist across calls. */ + { ZSTD_outBuffer out = {decodedBuffer, decodedBufferSize, 0}; + ZSTD_inBuffer in = {compressedBuffer, cSize, 0}; + if (ZSTD_decompressStream(dctx, &out, &in) != 0) goto _output_error; + if (in.pos != in.size) goto _output_error; + } + /* When we reset the context the ddict is cleared. */ + ZSTD_DCtx_reset(dctx, ZSTD_reset_session_and_parameters); + { ZSTD_outBuffer out = {decodedBuffer, decodedBufferSize, 0}; + ZSTD_inBuffer in = {compressedBuffer, cSize, 0}; + size_t const ret = ZSTD_decompressStream(dctx, &out, &in); + if (!ZSTD_isError(ret)) goto _output_error; + } + ZSTD_freeDCtx(dctx); + ZSTD_freeDDict(ddict); + } + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3i : ZSTD_decompressDCtx() with prefix : ", testNb++); + { + ZSTD_DCtx* dctx = ZSTD_createDCtx(); + /* We should succeed to decompress with the prefix. */ + ZSTD_DCtx_reset(dctx, ZSTD_reset_session_and_parameters); + CHECK_Z( ZSTD_DCtx_refPrefix_advanced(dctx, dictionary.start, dictionary.filled, ZSTD_dct_auto) ); + { ZSTD_outBuffer out = {decodedBuffer, decodedBufferSize, 0}; + ZSTD_inBuffer in = {compressedBuffer, cSize, 0}; + if (ZSTD_decompressStream(dctx, &out, &in) != 0) goto _output_error; + if (in.pos != in.size) goto _output_error; + } + /* The prefix should be cleared after the first compression. */ + { ZSTD_outBuffer out = {decodedBuffer, decodedBufferSize, 0}; + ZSTD_inBuffer in = {compressedBuffer, cSize, 0}; + size_t const ret = ZSTD_decompressStream(dctx, &out, &in); + if (!ZSTD_isError(ret)) goto _output_error; + } + ZSTD_freeDCtx(dctx); + } + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3i : ZSTD_initDStream*() with dictionary : ", testNb++); + { + ZSTD_DCtx* dctx = ZSTD_createDCtx(); + ZSTD_DDict* ddict = ZSTD_createDDict(dictionary.start, dictionary.filled); + size_t ret; + /* We should succeed to decompress with the dictionary. */ + CHECK_Z( ZSTD_initDStream_usingDict(dctx, dictionary.start, dictionary.filled) ); + CHECK_Z( ZSTD_decompressDCtx(dctx, decodedBuffer, decodedBufferSize, compressedBuffer, cSize) ); + /* The dictionary should presist across calls. */ + CHECK_Z( ZSTD_decompressDCtx(dctx, decodedBuffer, decodedBufferSize, compressedBuffer, cSize) ); + /* We should succeed to decompress with the ddict. */ + CHECK_Z( ZSTD_initDStream_usingDDict(dctx, ddict) ); + CHECK_Z( ZSTD_decompressDCtx(dctx, decodedBuffer, decodedBufferSize, compressedBuffer, cSize) ); + /* The ddict should presist across calls. */ + CHECK_Z( ZSTD_decompressDCtx(dctx, decodedBuffer, decodedBufferSize, compressedBuffer, cSize) ); + /* When we reset the context the ddict is cleared. */ + CHECK_Z( ZSTD_initDStream(dctx) ); + ret = ZSTD_decompressDCtx(dctx, decodedBuffer, decodedBufferSize, compressedBuffer, cSize); + if (!ZSTD_isError(ret)) goto _output_error; + ZSTD_freeDCtx(dctx); + ZSTD_freeDDict(ddict); + } + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3i : ZSTD_initCStream_usingCDict_advanced with masked dictID : ", testNb++); + { ZSTD_compressionParameters const cParams = ZSTD_getCParams(1, CNBufferSize, dictionary.filled); + ZSTD_frameParameters const fParams = { 1 /* contentSize */, 1 /* checksum */, 1 /* noDictID */}; + ZSTD_CDict* const cdict = ZSTD_createCDict_advanced(dictionary.start, dictionary.filled, ZSTD_dlm_byRef, ZSTD_dct_auto, cParams, ZSTD_defaultCMem); + size_t const initError = ZSTD_initCStream_usingCDict_advanced(zc, cdict, fParams, CNBufferSize); + if (ZSTD_isError(initError)) goto _output_error; + outBuff.dst = compressedBuffer; + outBuff.size = compressedBufferSize; + outBuff.pos = 0; + inBuff.src = CNBuffer; + inBuff.size = CNBufferSize; + inBuff.pos = 0; + CHECK_Z( ZSTD_compressStream(zc, &outBuff, &inBuff) ); + if (inBuff.pos != inBuff.size) goto _output_error; /* entire input should be consumed */ + { size_t const r = ZSTD_endStream(zc, &outBuff); + if (r != 0) goto _output_error; } /* error, or some data not flushed */ + cSize = outBuff.pos; + ZSTD_freeCDict(cdict); + DISPLAYLEVEL(3, "OK (%u bytes : %.2f%%)\n", (unsigned)cSize, (double)cSize/CNBufferSize*100); + } + + DISPLAYLEVEL(3, "test%3i : try retrieving dictID from frame : ", testNb++); + { U32 const did = ZSTD_getDictID_fromFrame(compressedBuffer, cSize); + if (did != 0) goto _output_error; + } + DISPLAYLEVEL(3, "OK (not detected) \n"); + + DISPLAYLEVEL(3, "test%3i : decompress without dictionary : ", testNb++); + { size_t const r = ZSTD_decompress(decodedBuffer, CNBufferSize, compressedBuffer, cSize); + if (!ZSTD_isError(r)) goto _output_error; /* must fail : dictionary not used */ + DISPLAYLEVEL(3, "OK (%s)\n", ZSTD_getErrorName(r)); + } + + DISPLAYLEVEL(3, "test%3i : compress with ZSTD_CCtx_refPrefix : ", testNb++); + CHECK_Z( ZSTD_CCtx_refPrefix(zc, dictionary.start, dictionary.filled) ); + outBuff.dst = compressedBuffer; + outBuff.size = compressedBufferSize; + outBuff.pos = 0; + inBuff.src = CNBuffer; + inBuff.size = CNBufferSize; + inBuff.pos = 0; + CHECK_Z( ZSTD_compressStream2(zc, &outBuff, &inBuff, ZSTD_e_end) ); + if (inBuff.pos != inBuff.size) goto _output_error; /* entire input should be consumed */ + cSize = outBuff.pos; + DISPLAYLEVEL(3, "OK (%u bytes : %.2f%%)\n", (unsigned)cSize, (double)cSize/CNBufferSize*100); + + DISPLAYLEVEL(3, "test%3i : decompress with ZSTD_DCtx_refPrefix : ", testNb++); + CHECK_Z( ZSTD_DCtx_refPrefix(zd, dictionary.start, dictionary.filled) ); + outBuff.dst = decodedBuffer; + outBuff.size = CNBufferSize; + outBuff.pos = 0; + inBuff.src = compressedBuffer; + inBuff.size = cSize; + inBuff.pos = 0; + CHECK_Z( ZSTD_decompressStream(zd, &outBuff, &inBuff) ); + if (inBuff.pos != inBuff.size) goto _output_error; /* entire input should be consumed */ + if (outBuff.pos != CNBufferSize) goto _output_error; /* must regenerate whole input */ + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3i : decompress without dictionary (should fail): ", testNb++); + { size_t const r = ZSTD_decompress(decodedBuffer, CNBufferSize, compressedBuffer, cSize); + if (!ZSTD_isError(r)) goto _output_error; /* must fail : dictionary not used */ + DISPLAYLEVEL(3, "OK (%s)\n", ZSTD_getErrorName(r)); + } + + DISPLAYLEVEL(3, "test%3i : compress again with ZSTD_compressStream2 : ", testNb++); + outBuff.dst = compressedBuffer; + outBuff.size = compressedBufferSize; + outBuff.pos = 0; + inBuff.src = CNBuffer; + inBuff.size = CNBufferSize; + inBuff.pos = 0; + CHECK_Z( ZSTD_compressStream2(zc, &outBuff, &inBuff, ZSTD_e_end) ); + if (inBuff.pos != inBuff.size) goto _output_error; /* entire input should be consumed */ + cSize = outBuff.pos; + DISPLAYLEVEL(3, "OK (%u bytes : %.2f%%)\n", (unsigned)cSize, (double)cSize/CNBufferSize*100); + + DISPLAYLEVEL(3, "test%3i : decompress without dictionary (should work): ", testNb++); + CHECK_Z( ZSTD_decompress(decodedBuffer, CNBufferSize, compressedBuffer, cSize) ); + DISPLAYLEVEL(3, "OK \n"); + + /* Empty srcSize */ + DISPLAYLEVEL(3, "test%3i : ZSTD_initCStream_advanced with pledgedSrcSize=0 and dict : ", testNb++); + { ZSTD_parameters params = ZSTD_getParams(5, 0, 0); + params.fParams.contentSizeFlag = 1; + CHECK_Z( ZSTD_initCStream_advanced(zc, dictionary.start, dictionary.filled, params, 0 /* pledgedSrcSize==0 means "empty" when params.fParams.contentSizeFlag is set */) ); + } /* cstream advanced shall write content size = 0 */ + outBuff.dst = compressedBuffer; + outBuff.size = compressedBufferSize; + outBuff.pos = 0; + inBuff.src = CNBuffer; + inBuff.size = 0; + inBuff.pos = 0; + CHECK_Z( ZSTD_compressStream(zc, &outBuff, &inBuff) ); + if (ZSTD_endStream(zc, &outBuff) != 0) goto _output_error; + cSize = outBuff.pos; + if (ZSTD_findDecompressedSize(compressedBuffer, cSize) != 0) goto _output_error; + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3i : pledgedSrcSize == 0 behaves properly : ", testNb++); + { ZSTD_parameters params = ZSTD_getParams(5, 0, 0); + params.fParams.contentSizeFlag = 1; + CHECK_Z( ZSTD_initCStream_advanced(zc, NULL, 0, params, 0) ); + } /* cstream advanced shall write content size = 0 */ + inBuff.src = CNBuffer; + inBuff.size = 0; + inBuff.pos = 0; + outBuff.dst = compressedBuffer; + outBuff.size = compressedBufferSize; + outBuff.pos = 0; + CHECK_Z( ZSTD_compressStream(zc, &outBuff, &inBuff) ); + if (ZSTD_endStream(zc, &outBuff) != 0) goto _output_error; + cSize = outBuff.pos; + if (ZSTD_findDecompressedSize(compressedBuffer, cSize) != 0) goto _output_error; + + ZSTD_resetCStream(zc, 0); /* resetCStream should treat 0 as unknown */ + outBuff.dst = compressedBuffer; + outBuff.size = compressedBufferSize; + outBuff.pos = 0; + inBuff.src = CNBuffer; + inBuff.size = 0; + inBuff.pos = 0; + CHECK_Z( ZSTD_compressStream(zc, &outBuff, &inBuff) ); + if (ZSTD_endStream(zc, &outBuff) != 0) goto _output_error; + cSize = outBuff.pos; + if (ZSTD_findDecompressedSize(compressedBuffer, cSize) != ZSTD_CONTENTSIZE_UNKNOWN) goto _output_error; + DISPLAYLEVEL(3, "OK \n"); + + /* Basic multithreading compression test */ + DISPLAYLEVEL(3, "test%3i : compress %u bytes with multiple threads : ", testNb++, COMPRESSIBLE_NOISE_LENGTH); + { ZSTD_parameters const params = ZSTD_getParams(1, 0, 0); + int jobSize; + CHECK_Z( ZSTDMT_getMTCtxParameter(mtctx, ZSTDMT_p_jobSize, &jobSize)); + CHECK(jobSize != 0, "job size non-zero"); + CHECK_Z( ZSTDMT_initCStream_advanced(mtctx, CNBuffer, dictSize, params, CNBufferSize) ); + CHECK_Z( ZSTDMT_getMTCtxParameter(mtctx, ZSTDMT_p_jobSize, &jobSize)); + CHECK(jobSize != 0, "job size non-zero"); + } + outBuff.dst = compressedBuffer; + outBuff.size = compressedBufferSize; + outBuff.pos = 0; + inBuff.src = CNBuffer; + inBuff.size = CNBufferSize; + inBuff.pos = 0; + { size_t const compressResult = ZSTDMT_compressStream_generic(mtctx, &outBuff, &inBuff, ZSTD_e_end); + if (compressResult != 0) goto _output_error; /* compression must be completed in a single round */ + } + if (inBuff.pos != inBuff.size) goto _output_error; /* entire input should be consumed */ + { size_t const compressedSize = ZSTD_findFrameCompressedSize(compressedBuffer, outBuff.pos); + if (compressedSize != outBuff.pos) goto _output_error; /* must be a full valid frame */ + } + DISPLAYLEVEL(3, "OK \n"); + + /* Complex multithreading + dictionary test */ + { U32 const nbWorkers = 2; + size_t const jobSize = 4 * 1 MB; + size_t const srcSize = jobSize * nbWorkers; /* we want each job to have predictable size */ + size_t const segLength = 2 KB; + size_t const offset = 600 KB; /* must be larger than window defined in cdict */ + size_t const start = jobSize + (offset-1); + const BYTE* const srcToCopy = (const BYTE*)CNBuffer + start; + BYTE* const dst = (BYTE*)CNBuffer + start - offset; + DISPLAYLEVEL(3, "test%3i : compress %u bytes with multiple threads + dictionary : ", testNb++, (unsigned)srcSize); + CHECK_Z( ZSTD_CCtx_setParameter(zc, ZSTD_c_compressionLevel, 3) ); + CHECK_Z( ZSTD_CCtx_setParameter(zc, ZSTD_c_nbWorkers, nbWorkers) ); + CHECK_Z( ZSTD_CCtx_setParameter(zc, ZSTD_c_jobSize, jobSize) ); + assert(start > offset); + assert(start + segLength < COMPRESSIBLE_NOISE_LENGTH); + memcpy(dst, srcToCopy, segLength); /* create a long repetition at long distance for job 2 */ + outBuff.dst = compressedBuffer; + outBuff.size = compressedBufferSize; + outBuff.pos = 0; + inBuff.src = CNBuffer; + inBuff.size = srcSize; assert(srcSize < COMPRESSIBLE_NOISE_LENGTH); + inBuff.pos = 0; + } + { ZSTD_compressionParameters const cParams = ZSTD_getCParams(1, 4 KB, dictionary.filled); /* intentionally lies on estimatedSrcSize, to push cdict into targeting a small window size */ + ZSTD_CDict* const cdict = ZSTD_createCDict_advanced(dictionary.start, dictionary.filled, ZSTD_dlm_byRef, ZSTD_dct_fullDict, cParams, ZSTD_defaultCMem); + DISPLAYLEVEL(5, "cParams.windowLog = %u : ", cParams.windowLog); + CHECK_Z( ZSTD_CCtx_refCDict(zc, cdict) ); + CHECK_Z( ZSTD_compressStream2(zc, &outBuff, &inBuff, ZSTD_e_end) ); + CHECK_Z( ZSTD_CCtx_refCDict(zc, NULL) ); /* do not keep a reference to cdict, as its lifetime ends */ + ZSTD_freeCDict(cdict); + } + if (inBuff.pos != inBuff.size) goto _output_error; /* entire input should be consumed */ + cSize = outBuff.pos; + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3i : decompress large frame created from multiple threads + dictionary : ", testNb++); + { ZSTD_DStream* const dstream = ZSTD_createDCtx(); + ZSTD_frameHeader zfh; + ZSTD_getFrameHeader(&zfh, compressedBuffer, cSize); + DISPLAYLEVEL(5, "frame windowsize = %u : ", (unsigned)zfh.windowSize); + outBuff.dst = decodedBuffer; + outBuff.size = CNBufferSize; + outBuff.pos = 0; + inBuff.src = compressedBuffer; + inBuff.pos = 0; + CHECK_Z( ZSTD_initDStream_usingDict(dstream, dictionary.start, dictionary.filled) ); + inBuff.size = 1; /* avoid shortcut to single-pass mode */ + CHECK_Z( ZSTD_decompressStream(dstream, &outBuff, &inBuff) ); + inBuff.size = cSize; + CHECK_Z( ZSTD_decompressStream(dstream, &outBuff, &inBuff) ); + if (inBuff.pos != inBuff.size) goto _output_error; /* entire input should be consumed */ + ZSTD_freeDStream(dstream); + } + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3i : check dictionary FSE tables can represent every code : ", testNb++); + { unsigned const kMaxWindowLog = 24; + unsigned value; + ZSTD_compressionParameters cParams = ZSTD_getCParams(3, 1U << kMaxWindowLog, 1024); + ZSTD_CDict* cdict; + ZSTD_DDict* ddict; + SEQ_stream seq = SEQ_initStream(0x87654321); + SEQ_gen_type type; + XXH64_state_t xxh; + + XXH64_reset(&xxh, 0); + cParams.windowLog = kMaxWindowLog; + cdict = ZSTD_createCDict_advanced(dictionary.start, dictionary.filled, ZSTD_dlm_byRef, ZSTD_dct_fullDict, cParams, ZSTD_defaultCMem); + ddict = ZSTD_createDDict(dictionary.start, dictionary.filled); + + if (!cdict || !ddict) goto _output_error; + + ZSTD_CCtx_reset(zc, ZSTD_reset_session_only); + ZSTD_resetDStream(zd); + CHECK_Z(ZSTD_CCtx_refCDict(zc, cdict)); + CHECK_Z(ZSTD_initDStream_usingDDict(zd, ddict)); + CHECK_Z(ZSTD_DCtx_setParameter(zd, ZSTD_d_windowLogMax, kMaxWindowLog)); + /* Test all values < 300 */ + for (value = 0; value < 300; ++value) { + for (type = (SEQ_gen_type)0; type < SEQ_gen_max; ++type) { + CHECK_Z(SEQ_generateRoundTrip(zc, zd, &xxh, &seq, type, value)); + } + } + /* Test values 2^8 to 2^17 */ + for (value = (1 << 8); value < (1 << 17); value <<= 1) { + for (type = (SEQ_gen_type)0; type < SEQ_gen_max; ++type) { + CHECK_Z(SEQ_generateRoundTrip(zc, zd, &xxh, &seq, type, value)); + CHECK_Z(SEQ_generateRoundTrip(zc, zd, &xxh, &seq, type, value + (value >> 2))); + } + } + /* Test offset values up to the max window log */ + for (value = 8; value <= kMaxWindowLog; ++value) { + CHECK_Z(SEQ_generateRoundTrip(zc, zd, &xxh, &seq, SEQ_gen_of, (1U << value) - 1)); + } + + CHECK_Z(SEQ_roundTrip(zc, zd, &xxh, NULL, 0, ZSTD_e_end)); + CHECK(SEQ_digest(&seq) != XXH64_digest(&xxh), "SEQ XXH64 does not match"); + + ZSTD_freeCDict(cdict); + ZSTD_freeDDict(ddict); + } + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3i : ZSTD_initCStream_srcSize sets requestedParams : ", testNb++); + { int level; + CHECK_Z(ZSTD_initCStream_srcSize(zc, 11, ZSTD_CONTENTSIZE_UNKNOWN)); + CHECK_Z(ZSTD_CCtx_getParameter(zc, ZSTD_c_compressionLevel, &level)); + CHECK(level != 11, "Compression level does not match"); + ZSTD_resetCStream(zc, ZSTD_CONTENTSIZE_UNKNOWN); + CHECK_Z(ZSTD_CCtx_getParameter(zc, ZSTD_c_compressionLevel, &level)); + CHECK(level != 11, "Compression level does not match"); + } + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3i : ZSTD_initCStream_advanced sets requestedParams : ", testNb++); + { ZSTD_parameters const params = ZSTD_getParams(9, 0, 0); + CHECK_Z(ZSTD_initCStream_advanced(zc, NULL, 0, params, ZSTD_CONTENTSIZE_UNKNOWN)); + CHECK(badParameters(zc, params), "Compression parameters do not match"); + ZSTD_resetCStream(zc, ZSTD_CONTENTSIZE_UNKNOWN); + CHECK(badParameters(zc, params), "Compression parameters do not match"); + } + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3i : ZSTD_c_srcSizeHint bounds : ", testNb++); + ZSTD_CCtx_reset(zc, ZSTD_reset_session_and_parameters); + CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_c_srcSizeHint, INT_MAX)); + { int srcSizeHint; + CHECK_Z(ZSTD_CCtx_getParameter(zc, ZSTD_c_srcSizeHint, &srcSizeHint)); + CHECK(!(srcSizeHint == INT_MAX), "srcSizeHint doesn't match"); + } + CHECK(!ZSTD_isError(ZSTD_CCtx_setParameter(zc, ZSTD_c_srcSizeHint, -1)), "Out of range doesn't error"); + DISPLAYLEVEL(3, "OK \n"); + + /* Overlen overwriting window data bug */ + DISPLAYLEVEL(3, "test%3i : wildcopy doesn't overwrite potential match data : ", testNb++); + { /* This test has a window size of 1024 bytes and consists of 3 blocks: + 1. 'a' repeated 517 times + 2. 'b' repeated 516 times + 3. a compressed block with no literals and 3 sequence commands: + litlength = 0, offset = 24, match length = 24 + litlength = 0, offset = 24, match length = 3 (this one creates an overlength write of length 2*WILDCOPY_OVERLENGTH - 3) + litlength = 0, offset = 1021, match length = 3 (this one will try to read from overwritten data if the buffer is too small) */ + + const char* testCase = + "\x28\xB5\x2F\xFD\x04\x00\x4C\x00\x00\x10\x61\x61\x01\x00\x00\x2A" + "\x80\x05\x44\x00\x00\x08\x62\x01\x00\x00\x2A\x20\x04\x5D\x00\x00" + "\x00\x03\x40\x00\x00\x64\x60\x27\xB0\xE0\x0C\x67\x62\xCE\xE0"; + ZSTD_DStream* const zds = ZSTD_createDStream(); + if (zds==NULL) goto _output_error; + + CHECK_Z( ZSTD_initDStream(zds) ); + inBuff.src = testCase; + inBuff.size = 47; + inBuff.pos = 0; + outBuff.dst = decodedBuffer; + outBuff.size = CNBufferSize; + outBuff.pos = 0; + + while (inBuff.pos < inBuff.size) { + CHECK_Z( ZSTD_decompressStream(zds, &outBuff, &inBuff) ); + } + + ZSTD_freeDStream(zds); + } + DISPLAYLEVEL(3, "OK \n"); + + /* Small Sequence Section bug */ + DISPLAYLEVEL(3, "test%3i : decompress blocks with small sequences section : ", testNb++); + { /* This test consists of 3 blocks. Each block has one sequence. + The sequence has literal length of 10, match length of 10 and offset of 10. + The sequence value and compression mode for the blocks are following: + The order of values are ll, ml, of. + - First block : (10, 7, 13) (rle, rle, rle) + - size of sequences section: 6 bytes (1 byte for nbSeq, 1 byte for encoding mode, 3 bytes for rle, 1 byte bitstream) + - Second block : (10, 7, 1) (repeat, repeat, rle) + - size of sequences section: 4 bytes (1 byte for nbSeq, 1 byte for encoding mode, 1 bytes for rle, 1 byte bitstream) + - Third block : (10, 7, 1) (repeat, repeat, repeat) + - size of sequences section: 3 bytes (1 byte for nbSeq, 1 byte for encoding mode, 1 byte bitstream) */ + + unsigned char compressed[] = { + 0x28, 0xb5, 0x2f, 0xfd, 0x24, 0x3c, 0x35, 0x01, 0x00, 0xf0, 0x85, 0x08, + 0xc2, 0xc4, 0x70, 0xcf, 0xd7, 0xc0, 0x96, 0x7e, 0x4c, 0x6b, 0xa9, 0x8b, + 0xbc, 0xc5, 0xb6, 0xd9, 0x7f, 0x4c, 0xf1, 0x05, 0xa6, 0x54, 0xef, 0xac, + 0x69, 0x94, 0x89, 0x1c, 0x03, 0x44, 0x0a, 0x07, 0x00, 0xb4, 0x04, 0x80, + 0x40, 0x0a, 0xa4 + }; + unsigned int compressedSize = 51; + unsigned char decompressed[] = { + 0x85, 0x08, 0xc2, 0xc4, 0x70, 0xcf, 0xd7, 0xc0, 0x96, 0x7e, 0x85, 0x08, + 0xc2, 0xc4, 0x70, 0xcf, 0xd7, 0xc0, 0x96, 0x7e, 0x4c, 0x6b, 0xa9, 0x8b, + 0xbc, 0xc5, 0xb6, 0xd9, 0x7f, 0x4c, 0x4c, 0x6b, 0xa9, 0x8b, 0xbc, 0xc5, + 0xb6, 0xd9, 0x7f, 0x4c, 0xf1, 0x05, 0xa6, 0x54, 0xef, 0xac, 0x69, 0x94, + 0x89, 0x1c, 0xf1, 0x05, 0xa6, 0x54, 0xef, 0xac, 0x69, 0x94, 0x89, 0x1c + }; + unsigned int decompressedSize = 60; + + ZSTD_DStream* const zds = ZSTD_createDStream(); + if (zds==NULL) goto _output_error; + + CHECK_Z( ZSTD_initDStream(zds) ); + inBuff.src = compressed; + inBuff.size = compressedSize; + inBuff.pos = 0; + outBuff.dst = decodedBuffer; + outBuff.size = CNBufferSize; + outBuff.pos = 0; + + CHECK(ZSTD_decompressStream(zds, &outBuff, &inBuff) != 0, + "Decompress did not reach the end of frame"); + CHECK(inBuff.pos != inBuff.size, "Decompress did not fully consume input"); + CHECK(outBuff.pos != decompressedSize, "Decompressed size does not match"); + CHECK(memcmp(outBuff.dst, decompressed, decompressedSize) != 0, + "Decompressed data does not match"); + + ZSTD_freeDStream(zds); + } + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3i : raw block can be streamed: ", testNb++); + { size_t const inputSize = 10000; + size_t const compCapacity = ZSTD_compressBound(inputSize); + BYTE* const input = (BYTE*)malloc(inputSize); + BYTE* const comp = (BYTE*)malloc(compCapacity); + BYTE* const decomp = (BYTE*)malloc(inputSize); + + CHECK(input == NULL || comp == NULL || decomp == NULL, "failed to alloc buffers"); + + RDG_genBuffer(input, inputSize, 0.0, 0.0, seed); + { size_t const compSize = ZSTD_compress(comp, compCapacity, input, inputSize, -(int)inputSize); + ZSTD_inBuffer in = { comp, 0, 0 }; + ZSTD_outBuffer out = { decomp, 0, 0 }; + CHECK_Z(compSize); + CHECK_Z( ZSTD_DCtx_reset(zd, ZSTD_reset_session_and_parameters) ); + while (in.size < compSize) { + in.size = MIN(in.size + 100, compSize); + while (in.pos < in.size) { + size_t const outPos = out.pos; + if (out.pos == out.size) { + out.size = MIN(out.size + 10, inputSize); + } + CHECK_Z( ZSTD_decompressStream(zd, &out, &in) ); + CHECK(!(out.pos > outPos), "We are not streaming (no output generated)"); + } + } + CHECK(in.pos != compSize, "Not all input consumed!"); + CHECK(out.pos != inputSize, "Not all output produced!"); + } + CHECK(memcmp(input, decomp, inputSize), "round trip failed!"); + + free(input); + free(comp); + free(decomp); + } + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3i : dictionary + uncompressible block + reusing tables checks offset table validity: ", testNb++); + { ZSTD_CDict* const cdict = ZSTD_createCDict_advanced( + dictionary.start, dictionary.filled, + ZSTD_dlm_byRef, ZSTD_dct_fullDict, + ZSTD_getCParams(3, 0, dictionary.filled), + ZSTD_defaultCMem); + const size_t inbufsize = 2 * 128 * 1024; /* 2 blocks */ + const size_t outbufsize = ZSTD_compressBound(inbufsize); + size_t inbufpos = 0; + size_t cursegmentlen; + BYTE *inbuf = (BYTE *)malloc(inbufsize); + BYTE *outbuf = (BYTE *)malloc(outbufsize); + BYTE *checkbuf = (BYTE *)malloc(inbufsize); + size_t ret; + + CHECK(cdict == NULL, "failed to alloc cdict"); + CHECK(inbuf == NULL, "failed to alloc input buffer"); + + /* first block is uncompressible */ + cursegmentlen = 128 * 1024; + RDG_genBuffer(inbuf + inbufpos, cursegmentlen, 0., 0., seed); + inbufpos += cursegmentlen; + + /* second block is compressible */ + cursegmentlen = 128 * 1024 - 256; + RDG_genBuffer(inbuf + inbufpos, cursegmentlen, 0.05, 0., seed); + inbufpos += cursegmentlen; + + /* and includes a very long backref */ + cursegmentlen = 128; + memcpy(inbuf + inbufpos, (BYTE*)dictionary.start + 256, cursegmentlen); + inbufpos += cursegmentlen; + + /* and includes a very long backref */ + cursegmentlen = 128; + memcpy(inbuf + inbufpos, (BYTE*)dictionary.start + 128, cursegmentlen); + inbufpos += cursegmentlen; + + ret = ZSTD_compress_usingCDict(zc, outbuf, outbufsize, inbuf, inbufpos, cdict); + CHECK_Z(ret); + + ret = ZSTD_decompress_usingDict(zd, checkbuf, inbufsize, outbuf, ret, dictionary.start, dictionary.filled); + CHECK_Z(ret); + + CHECK(memcmp(inbuf, checkbuf, inbufpos), "start and finish buffers don't match"); + + ZSTD_freeCDict(cdict); + free(inbuf); + free(outbuf); + free(checkbuf); + } + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3i : dictionary + small blocks + reusing tables checks offset table validity: ", testNb++); + { ZSTD_CDict* const cdict = ZSTD_createCDict_advanced( + dictionary.start, dictionary.filled, + ZSTD_dlm_byRef, ZSTD_dct_fullDict, + ZSTD_getCParams(3, 0, dictionary.filled), + ZSTD_defaultCMem); + ZSTD_outBuffer out = {compressedBuffer, compressedBufferSize, 0}; + int remainingInput = 256 * 1024; + int offset; + + CHECK_Z(ZSTD_CCtx_reset(zc, ZSTD_reset_session_and_parameters)); + CHECK_Z(ZSTD_CCtx_refCDict(zc, cdict)); + CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_c_checksumFlag, 1)); + /* Write a bunch of 6 byte blocks */ + while (remainingInput > 0) { + char testBuffer[6] = "\xAA\xAA\xAA\xAA\xAA\xAA"; + const size_t kSmallBlockSize = sizeof(testBuffer); + ZSTD_inBuffer in = {testBuffer, kSmallBlockSize, 0}; + + CHECK_Z(ZSTD_compressStream2(zc, &out, &in, ZSTD_e_flush)); + CHECK(in.pos != in.size, "input not fully consumed"); + remainingInput -= kSmallBlockSize; + } + /* Write several very long offset matches into the dictionary */ + for (offset = 1024; offset >= 0; offset -= 128) { + ZSTD_inBuffer in = {(BYTE*)dictionary.start + offset, 128, 0}; + ZSTD_EndDirective flush = offset > 0 ? ZSTD_e_continue : ZSTD_e_end; + CHECK_Z(ZSTD_compressStream2(zc, &out, &in, flush)); + CHECK(in.pos != in.size, "input not fully consumed"); + } + /* Ensure decompression works */ + CHECK_Z(ZSTD_decompress_usingDict(zd, decodedBuffer, CNBufferSize, out.dst, out.pos, dictionary.start, dictionary.filled)); + + ZSTD_freeCDict(cdict); + } + DISPLAYLEVEL(3, "OK \n"); + +_end: + FUZ_freeDictionary(dictionary); + ZSTD_freeCStream(zc); + ZSTD_freeDStream(zd); + ZSTDMT_freeCCtx(mtctx); + free(CNBuffer); + free(compressedBuffer); + free(decodedBuffer); + return testResult; + +_output_error: + testResult = 1; + DISPLAY("Error detected in Unit tests ! \n"); + goto _end; +} + + +/* ====== Fuzzer tests ====== */ + +static size_t findDiff(const void* buf1, const void* buf2, size_t max) +{ + const BYTE* b1 = (const BYTE*)buf1; + const BYTE* b2 = (const BYTE*)buf2; + size_t u; + for (u=0; u<max; u++) { + if (b1[u] != b2[u]) break; + } + if (u==max) { + DISPLAY("=> No difference detected within %u bytes \n", (unsigned)max); + return u; + } + DISPLAY("Error at position %u / %u \n", (unsigned)u, (unsigned)max); + if (u>=3) + DISPLAY(" %02X %02X %02X ", + b1[u-3], b1[u-2], b1[u-1]); + DISPLAY(" :%02X: %02X %02X %02X %02X %02X \n", + b1[u], b1[u+1], b1[u+2], b1[u+3], b1[u+4], b1[u+5]); + if (u>=3) + DISPLAY(" %02X %02X %02X ", + b2[u-3], b2[u-2], b2[u-1]); + DISPLAY(" :%02X: %02X %02X %02X %02X %02X \n", + b2[u], b2[u+1], b2[u+2], b2[u+3], b2[u+4], b2[u+5]); + return u; +} + +static size_t FUZ_rLogLength(U32* seed, U32 logLength) +{ + size_t const lengthMask = ((size_t)1 << logLength) - 1; + return (lengthMask+1) + (FUZ_rand(seed) & lengthMask); +} + +static size_t FUZ_randomLength(U32* seed, U32 maxLog) +{ + U32 const logLength = FUZ_rand(seed) % maxLog; + return FUZ_rLogLength(seed, logLength); +} + +/* Return value in range minVal <= v <= maxVal */ +static U32 FUZ_randomClampedLength(U32* seed, U32 minVal, U32 maxVal) +{ + U32 const mod = maxVal < minVal ? 1 : (maxVal + 1) - minVal; + return (U32)((FUZ_rand(seed) % mod) + minVal); +} + +static int fuzzerTests(U32 seed, unsigned nbTests, unsigned startTest, double compressibility, int bigTests) +{ + U32 const maxSrcLog = bigTests ? 24 : 22; + static const U32 maxSampleLog = 19; + size_t const srcBufferSize = (size_t)1<<maxSrcLog; + BYTE* cNoiseBuffer[5]; + size_t const copyBufferSize = srcBufferSize + (1<<maxSampleLog); + BYTE* const copyBuffer = (BYTE*)malloc (copyBufferSize); + size_t const cBufferSize = ZSTD_compressBound(srcBufferSize); + BYTE* const cBuffer = (BYTE*)malloc (cBufferSize); + size_t const dstBufferSize = srcBufferSize; + BYTE* const dstBuffer = (BYTE*)malloc (dstBufferSize); + U32 result = 0; + unsigned testNb = 0; + U32 coreSeed = seed; + ZSTD_CStream* zc = ZSTD_createCStream(); /* will be re-created sometimes */ + ZSTD_DStream* zd = ZSTD_createDStream(); /* will be re-created sometimes */ + ZSTD_DStream* const zd_noise = ZSTD_createDStream(); + UTIL_time_t const startClock = UTIL_getTime(); + const BYTE* dict = NULL; /* can keep same dict on 2 consecutive tests */ + size_t dictSize = 0; + U32 oldTestLog = 0; + U32 const cLevelMax = bigTests ? (U32)ZSTD_maxCLevel() : g_cLevelMax_smallTests; + + /* allocations */ + cNoiseBuffer[0] = (BYTE*)malloc (srcBufferSize); + cNoiseBuffer[1] = (BYTE*)malloc (srcBufferSize); + cNoiseBuffer[2] = (BYTE*)malloc (srcBufferSize); + cNoiseBuffer[3] = (BYTE*)malloc (srcBufferSize); + cNoiseBuffer[4] = (BYTE*)malloc (srcBufferSize); + CHECK (!cNoiseBuffer[0] || !cNoiseBuffer[1] || !cNoiseBuffer[2] || !cNoiseBuffer[3] || !cNoiseBuffer[4] || + !copyBuffer || !dstBuffer || !cBuffer || !zc || !zd || !zd_noise , + "Not enough memory, fuzzer tests cancelled"); + + /* Create initial samples */ + RDG_genBuffer(cNoiseBuffer[0], srcBufferSize, 0.00, 0., coreSeed); /* pure noise */ + RDG_genBuffer(cNoiseBuffer[1], srcBufferSize, 0.05, 0., coreSeed); /* barely compressible */ + RDG_genBuffer(cNoiseBuffer[2], srcBufferSize, compressibility, 0., coreSeed); + RDG_genBuffer(cNoiseBuffer[3], srcBufferSize, 0.95, 0., coreSeed); /* highly compressible */ + RDG_genBuffer(cNoiseBuffer[4], srcBufferSize, 1.00, 0., coreSeed); /* sparse content */ + memset(copyBuffer, 0x65, copyBufferSize); /* make copyBuffer considered initialized */ + ZSTD_initDStream_usingDict(zd, NULL, 0); /* ensure at least one init */ + + /* catch up testNb */ + for (testNb=1; testNb < startTest; testNb++) + FUZ_rand(&coreSeed); + + /* test loop */ + for ( ; (testNb <= nbTests) || (UTIL_clockSpanMicro(startClock) < g_clockTime) ; testNb++ ) { + U32 lseed; + const BYTE* srcBuffer; + size_t totalTestSize, totalGenSize, cSize; + XXH64_state_t xxhState; + U64 crcOrig; + U32 resetAllowed = 1; + size_t maxTestSize; + + /* init */ + FUZ_rand(&coreSeed); + lseed = coreSeed ^ prime32; + if (nbTests >= testNb) { + DISPLAYUPDATE(2, "\r%6u/%6u ", testNb, nbTests); + } else { + DISPLAYUPDATE(2, "\r%6u ", testNb); + } + + /* states full reset (deliberately not synchronized) */ + /* some issues can only happen when reusing states */ + if ((FUZ_rand(&lseed) & 0xFF) == 131) { + ZSTD_freeCStream(zc); + zc = ZSTD_createCStream(); + CHECK(zc==NULL, "ZSTD_createCStream : allocation error"); + resetAllowed=0; + } + if ((FUZ_rand(&lseed) & 0xFF) == 132) { + ZSTD_freeDStream(zd); + zd = ZSTD_createDStream(); + CHECK(zd==NULL, "ZSTD_createDStream : allocation error"); + CHECK_Z( ZSTD_initDStream_usingDict(zd, NULL, 0) ); /* ensure at least one init */ + } + + /* srcBuffer selection [0-4] */ + { U32 buffNb = FUZ_rand(&lseed) & 0x7F; + if (buffNb & 7) buffNb=2; /* most common : compressible (P) */ + else { + buffNb >>= 3; + if (buffNb & 7) { + const U32 tnb[2] = { 1, 3 }; /* barely/highly compressible */ + buffNb = tnb[buffNb >> 3]; + } else { + const U32 tnb[2] = { 0, 4 }; /* not compressible / sparse */ + buffNb = tnb[buffNb >> 3]; + } } + srcBuffer = cNoiseBuffer[buffNb]; + } + + /* compression init */ + if ((FUZ_rand(&lseed)&1) /* at beginning, to keep same nb of rand */ + && oldTestLog /* at least one test happened */ && resetAllowed) { + maxTestSize = FUZ_randomLength(&lseed, oldTestLog+2); + maxTestSize = MIN(maxTestSize, srcBufferSize-16); + { U64 const pledgedSrcSize = (FUZ_rand(&lseed) & 3) ? 0 : maxTestSize; + CHECK_Z( ZSTD_resetCStream(zc, pledgedSrcSize) ); + } + } else { + U32 const testLog = FUZ_rand(&lseed) % maxSrcLog; + U32 const dictLog = FUZ_rand(&lseed) % maxSrcLog; + U32 const cLevelCandidate = ( FUZ_rand(&lseed) % + (ZSTD_maxCLevel() - + (MAX(testLog, dictLog) / 3))) + + 1; + U32 const cLevel = MIN(cLevelCandidate, cLevelMax); + maxTestSize = FUZ_rLogLength(&lseed, testLog); + oldTestLog = testLog; + /* random dictionary selection */ + dictSize = ((FUZ_rand(&lseed)&7)==1) ? FUZ_rLogLength(&lseed, dictLog) : 0; + { size_t const dictStart = FUZ_rand(&lseed) % (srcBufferSize - dictSize); + dict = srcBuffer + dictStart; + } + { U64 const pledgedSrcSize = (FUZ_rand(&lseed) & 3) ? ZSTD_CONTENTSIZE_UNKNOWN : maxTestSize; + ZSTD_parameters params = ZSTD_getParams(cLevel, pledgedSrcSize, dictSize); + params.fParams.checksumFlag = FUZ_rand(&lseed) & 1; + params.fParams.noDictIDFlag = FUZ_rand(&lseed) & 1; + params.fParams.contentSizeFlag = FUZ_rand(&lseed) & 1; + CHECK_Z ( ZSTD_initCStream_advanced(zc, dict, dictSize, params, pledgedSrcSize) ); + } } + + /* multi-segments compression test */ + XXH64_reset(&xxhState, 0); + { ZSTD_outBuffer outBuff = { cBuffer, cBufferSize, 0 } ; + U32 n; + for (n=0, cSize=0, totalTestSize=0 ; totalTestSize < maxTestSize ; n++) { + /* compress random chunks into randomly sized dst buffers */ + { size_t const randomSrcSize = FUZ_randomLength(&lseed, maxSampleLog); + size_t const srcSize = MIN(maxTestSize-totalTestSize, randomSrcSize); + size_t const srcStart = FUZ_rand(&lseed) % (srcBufferSize - srcSize); + size_t const randomDstSize = FUZ_randomLength(&lseed, maxSampleLog); + size_t const dstBuffSize = MIN(cBufferSize - cSize, randomDstSize); + ZSTD_inBuffer inBuff = { srcBuffer+srcStart, srcSize, 0 }; + outBuff.size = outBuff.pos + dstBuffSize; + + CHECK_Z( ZSTD_compressStream(zc, &outBuff, &inBuff) ); + + XXH64_update(&xxhState, srcBuffer+srcStart, inBuff.pos); + memcpy(copyBuffer+totalTestSize, srcBuffer+srcStart, inBuff.pos); + totalTestSize += inBuff.pos; + } + + /* random flush operation, to mess around */ + if ((FUZ_rand(&lseed) & 15) == 0) { + size_t const randomDstSize = FUZ_randomLength(&lseed, maxSampleLog); + size_t const adjustedDstSize = MIN(cBufferSize - cSize, randomDstSize); + outBuff.size = outBuff.pos + adjustedDstSize; + CHECK_Z( ZSTD_flushStream(zc, &outBuff) ); + } } + + /* final frame epilogue */ + { size_t remainingToFlush = (size_t)(-1); + while (remainingToFlush) { + size_t const randomDstSize = FUZ_randomLength(&lseed, maxSampleLog); + size_t const adjustedDstSize = MIN(cBufferSize - cSize, randomDstSize); + outBuff.size = outBuff.pos + adjustedDstSize; + remainingToFlush = ZSTD_endStream(zc, &outBuff); + CHECK (ZSTD_isError(remainingToFlush), "end error : %s", ZSTD_getErrorName(remainingToFlush)); + } } + crcOrig = XXH64_digest(&xxhState); + cSize = outBuff.pos; + } + + /* multi - fragments decompression test */ + if (!dictSize /* don't reset if dictionary : could be different */ && (FUZ_rand(&lseed) & 1)) { + CHECK_Z ( ZSTD_resetDStream(zd) ); + } else { + CHECK_Z ( ZSTD_initDStream_usingDict(zd, dict, dictSize) ); + } + { size_t decompressionResult = 1; + ZSTD_inBuffer inBuff = { cBuffer, cSize, 0 }; + ZSTD_outBuffer outBuff= { dstBuffer, dstBufferSize, 0 }; + for (totalGenSize = 0 ; decompressionResult ; ) { + size_t const readCSrcSize = FUZ_randomLength(&lseed, maxSampleLog); + size_t const randomDstSize = FUZ_randomLength(&lseed, maxSampleLog); + size_t const dstBuffSize = MIN(dstBufferSize - totalGenSize, randomDstSize); + inBuff.size = inBuff.pos + readCSrcSize; + outBuff.size = outBuff.pos + dstBuffSize; + decompressionResult = ZSTD_decompressStream(zd, &outBuff, &inBuff); + if (ZSTD_getErrorCode(decompressionResult) == ZSTD_error_checksum_wrong) { + DISPLAY("checksum error : \n"); + findDiff(copyBuffer, dstBuffer, totalTestSize); + } + CHECK( ZSTD_isError(decompressionResult), "decompression error : %s", + ZSTD_getErrorName(decompressionResult) ); + } + CHECK (decompressionResult != 0, "frame not fully decoded"); + CHECK (outBuff.pos != totalTestSize, "decompressed data : wrong size (%u != %u)", + (unsigned)outBuff.pos, (unsigned)totalTestSize); + CHECK (inBuff.pos != cSize, "compressed data should be fully read") + { U64 const crcDest = XXH64(dstBuffer, totalTestSize, 0); + if (crcDest!=crcOrig) findDiff(copyBuffer, dstBuffer, totalTestSize); + CHECK (crcDest!=crcOrig, "decompressed data corrupted"); + } } + + /*===== noisy/erroneous src decompression test =====*/ + + /* add some noise */ + { U32 const nbNoiseChunks = (FUZ_rand(&lseed) & 7) + 2; + U32 nn; for (nn=0; nn<nbNoiseChunks; nn++) { + size_t const randomNoiseSize = FUZ_randomLength(&lseed, maxSampleLog); + size_t const noiseSize = MIN((cSize/3) , randomNoiseSize); + size_t const noiseStart = FUZ_rand(&lseed) % (srcBufferSize - noiseSize); + size_t const cStart = FUZ_rand(&lseed) % (cSize - noiseSize); + memcpy(cBuffer+cStart, srcBuffer+noiseStart, noiseSize); + } } + + /* try decompression on noisy data */ + CHECK_Z( ZSTD_initDStream(zd_noise) ); /* note : no dictionary */ + { ZSTD_inBuffer inBuff = { cBuffer, cSize, 0 }; + ZSTD_outBuffer outBuff= { dstBuffer, dstBufferSize, 0 }; + while (outBuff.pos < dstBufferSize) { + size_t const randomCSrcSize = FUZ_randomLength(&lseed, maxSampleLog); + size_t const randomDstSize = FUZ_randomLength(&lseed, maxSampleLog); + size_t const adjustedDstSize = MIN(dstBufferSize - outBuff.pos, randomDstSize); + size_t const adjustedCSrcSize = MIN(cSize - inBuff.pos, randomCSrcSize); + outBuff.size = outBuff.pos + adjustedDstSize; + inBuff.size = inBuff.pos + adjustedCSrcSize; + { size_t const decompressError = ZSTD_decompressStream(zd, &outBuff, &inBuff); + if (ZSTD_isError(decompressError)) break; /* error correctly detected */ + /* No forward progress possible */ + if (outBuff.pos < outBuff.size && inBuff.pos == cSize) break; + } } } } + DISPLAY("\r%u fuzzer tests completed \n", testNb); + +_cleanup: + ZSTD_freeCStream(zc); + ZSTD_freeDStream(zd); + ZSTD_freeDStream(zd_noise); + free(cNoiseBuffer[0]); + free(cNoiseBuffer[1]); + free(cNoiseBuffer[2]); + free(cNoiseBuffer[3]); + free(cNoiseBuffer[4]); + free(copyBuffer); + free(cBuffer); + free(dstBuffer); + return result; + +_output_error: + result = 1; + goto _cleanup; +} + + +/* fuzzing ZSTDMT_* interface */ +static int fuzzerTests_MT(U32 seed, int nbTests, int startTest, + double compressibility, int bigTests) +{ + const U32 maxSrcLog = bigTests ? 24 : 22; + static const U32 maxSampleLog = 19; + size_t const srcBufferSize = (size_t)1<<maxSrcLog; + BYTE* cNoiseBuffer[5]; + size_t const copyBufferSize= srcBufferSize + (1<<maxSampleLog); + BYTE* const copyBuffer = (BYTE*)malloc (copyBufferSize); + size_t const cBufferSize = ZSTD_compressBound(srcBufferSize); + BYTE* const cBuffer = (BYTE*)malloc (cBufferSize); + size_t const dstBufferSize = srcBufferSize; + BYTE* const dstBuffer = (BYTE*)malloc (dstBufferSize); + U32 result = 0; + int testNb = 0; + U32 coreSeed = seed; + int nbThreads = 2; + ZSTDMT_CCtx* zc = ZSTDMT_createCCtx(nbThreads); /* will be reset sometimes */ + ZSTD_DStream* zd = ZSTD_createDStream(); /* will be reset sometimes */ + ZSTD_DStream* const zd_noise = ZSTD_createDStream(); + UTIL_time_t const startClock = UTIL_getTime(); + const BYTE* dict=NULL; /* can keep same dict on 2 consecutive tests */ + size_t dictSize = 0; + int const cLevelMax = bigTests ? (U32)ZSTD_maxCLevel()-1 : g_cLevelMax_smallTests; + U32 const nbThreadsMax = bigTests ? 4 : 2; + + /* allocations */ + cNoiseBuffer[0] = (BYTE*)malloc (srcBufferSize); + cNoiseBuffer[1] = (BYTE*)malloc (srcBufferSize); + cNoiseBuffer[2] = (BYTE*)malloc (srcBufferSize); + cNoiseBuffer[3] = (BYTE*)malloc (srcBufferSize); + cNoiseBuffer[4] = (BYTE*)malloc (srcBufferSize); + CHECK (!cNoiseBuffer[0] || !cNoiseBuffer[1] || !cNoiseBuffer[2] || !cNoiseBuffer[3] || !cNoiseBuffer[4] || + !copyBuffer || !dstBuffer || !cBuffer || !zc || !zd || !zd_noise , + "Not enough memory, fuzzer tests cancelled"); + + /* Create initial samples */ + RDG_genBuffer(cNoiseBuffer[0], srcBufferSize, 0.00, 0., coreSeed); /* pure noise */ + RDG_genBuffer(cNoiseBuffer[1], srcBufferSize, 0.05, 0., coreSeed); /* barely compressible */ + RDG_genBuffer(cNoiseBuffer[2], srcBufferSize, compressibility, 0., coreSeed); + RDG_genBuffer(cNoiseBuffer[3], srcBufferSize, 0.95, 0., coreSeed); /* highly compressible */ + RDG_genBuffer(cNoiseBuffer[4], srcBufferSize, 1.00, 0., coreSeed); /* sparse content */ + memset(copyBuffer, 0x65, copyBufferSize); /* make copyBuffer considered initialized */ + ZSTD_initDStream_usingDict(zd, NULL, 0); /* ensure at least one init */ + DISPLAYLEVEL(6, "Creating initial context with %i threads \n", nbThreads); + + /* catch up testNb */ + for (testNb=1; testNb < startTest; testNb++) + FUZ_rand(&coreSeed); + + /* test loop */ + for ( ; (testNb <= nbTests) || (UTIL_clockSpanMicro(startClock) < g_clockTime) ; testNb++ ) { + U32 lseed; + const BYTE* srcBuffer; + size_t totalTestSize, totalGenSize, cSize; + XXH64_state_t xxhState; + U64 crcOrig; + size_t maxTestSize; + + FUZ_rand(&coreSeed); + if (nbTests >= testNb) { + DISPLAYUPDATE(2, "\r%6u/%6u ", testNb, nbTests); + } else { + DISPLAYUPDATE(2, "\r%6u ", testNb); + } + lseed = coreSeed ^ prime32; + + /* states full reset (deliberately not synchronized) */ + /* some issues can only happen when reusing states */ + if ((FUZ_rand(&lseed) & 0xFF) == 131) { + nbThreads = (FUZ_rand(&lseed) % nbThreadsMax) + 1; + DISPLAYLEVEL(5, "Creating new context with %u threads \n", nbThreads); + ZSTDMT_freeCCtx(zc); + zc = ZSTDMT_createCCtx(nbThreads); + CHECK(zc==NULL, "ZSTDMT_createCCtx allocation error") + } + if ((FUZ_rand(&lseed) & 0xFF) == 132) { + ZSTD_freeDStream(zd); + zd = ZSTD_createDStream(); + CHECK(zd==NULL, "ZSTDMT_createCCtx allocation error") + ZSTD_initDStream_usingDict(zd, NULL, 0); /* ensure at least one init */ + } + + /* srcBuffer selection [0-4] */ + { U32 buffNb = FUZ_rand(&lseed) & 0x7F; + if (buffNb & 7) buffNb=2; /* most common : compressible (P) */ + else { + buffNb >>= 3; + if (buffNb & 7) { + const U32 tnb[2] = { 1, 3 }; /* barely/highly compressible */ + buffNb = tnb[buffNb >> 3]; + } else { + const U32 tnb[2] = { 0, 4 }; /* not compressible / sparse */ + buffNb = tnb[buffNb >> 3]; + } } + srcBuffer = cNoiseBuffer[buffNb]; + } + + /* compression init */ + { U32 const testLog = FUZ_rand(&lseed) % maxSrcLog; + U32 const dictLog = FUZ_rand(&lseed) % maxSrcLog; + int const cLevelCandidate = ( FUZ_rand(&lseed) + % (ZSTD_maxCLevel() - (MAX(testLog, dictLog) / 2)) ) + + 1; + int const cLevelThreadAdjusted = cLevelCandidate - (nbThreads * 2) + 2; /* reduce cLevel when multiple threads to reduce memory consumption */ + int const cLevelMin = MAX(cLevelThreadAdjusted, 1); /* no negative cLevel yet */ + int const cLevel = MIN(cLevelMin, cLevelMax); + maxTestSize = FUZ_rLogLength(&lseed, testLog); + + if (FUZ_rand(&lseed)&1) { /* simple init */ + int const compressionLevel = (FUZ_rand(&lseed) % 5) + 1; + DISPLAYLEVEL(5, "Init with compression level = %i \n", compressionLevel); + CHECK_Z( ZSTDMT_initCStream(zc, compressionLevel) ); + } else { /* advanced init */ + /* random dictionary selection */ + dictSize = ((FUZ_rand(&lseed)&63)==1) ? FUZ_rLogLength(&lseed, dictLog) : 0; + { size_t const dictStart = FUZ_rand(&lseed) % (srcBufferSize - dictSize); + dict = srcBuffer + dictStart; + } + { U64 const pledgedSrcSize = (FUZ_rand(&lseed) & 3) ? ZSTD_CONTENTSIZE_UNKNOWN : maxTestSize; + ZSTD_parameters params = ZSTD_getParams(cLevel, pledgedSrcSize, dictSize); + DISPLAYLEVEL(5, "Init with windowLog = %u, pledgedSrcSize = %u, dictSize = %u \n", + params.cParams.windowLog, (unsigned)pledgedSrcSize, (unsigned)dictSize); + params.fParams.checksumFlag = FUZ_rand(&lseed) & 1; + params.fParams.noDictIDFlag = FUZ_rand(&lseed) & 1; + params.fParams.contentSizeFlag = FUZ_rand(&lseed) & 1; + DISPLAYLEVEL(5, "checksumFlag : %u \n", params.fParams.checksumFlag); + CHECK_Z( ZSTDMT_setMTCtxParameter(zc, ZSTDMT_p_overlapLog, FUZ_rand(&lseed) % 12) ); + CHECK_Z( ZSTDMT_setMTCtxParameter(zc, ZSTDMT_p_jobSize, FUZ_rand(&lseed) % (2*maxTestSize+1)) ); /* custom job size */ + CHECK_Z( ZSTDMT_initCStream_advanced(zc, dict, dictSize, params, pledgedSrcSize) ); + } } } + + /* multi-segments compression test */ + XXH64_reset(&xxhState, 0); + { ZSTD_outBuffer outBuff = { cBuffer, cBufferSize, 0 } ; + U32 n; + for (n=0, cSize=0, totalTestSize=0 ; totalTestSize < maxTestSize ; n++) { + /* compress random chunks into randomly sized dst buffers */ + { size_t const randomSrcSize = FUZ_randomLength(&lseed, maxSampleLog); + size_t const srcSize = MIN (maxTestSize-totalTestSize, randomSrcSize); + size_t const srcStart = FUZ_rand(&lseed) % (srcBufferSize - srcSize); + size_t const randomDstSize = FUZ_randomLength(&lseed, maxSampleLog); + size_t const dstBuffSize = MIN(cBufferSize - cSize, randomDstSize); + ZSTD_inBuffer inBuff = { srcBuffer+srcStart, srcSize, 0 }; + outBuff.size = outBuff.pos + dstBuffSize; + + DISPLAYLEVEL(6, "Sending %u bytes to compress \n", (unsigned)srcSize); + CHECK_Z( ZSTDMT_compressStream(zc, &outBuff, &inBuff) ); + DISPLAYLEVEL(6, "%u bytes read by ZSTDMT_compressStream \n", (unsigned)inBuff.pos); + + XXH64_update(&xxhState, srcBuffer+srcStart, inBuff.pos); + memcpy(copyBuffer+totalTestSize, srcBuffer+srcStart, inBuff.pos); + totalTestSize += inBuff.pos; + } + + /* random flush operation, to mess around */ + if ((FUZ_rand(&lseed) & 15) == 0) { + size_t const randomDstSize = FUZ_randomLength(&lseed, maxSampleLog); + size_t const adjustedDstSize = MIN(cBufferSize - cSize, randomDstSize); + size_t const previousPos = outBuff.pos; + outBuff.size = outBuff.pos + adjustedDstSize; + DISPLAYLEVEL(5, "Flushing into dst buffer of size %u \n", (unsigned)adjustedDstSize); + CHECK_Z( ZSTDMT_flushStream(zc, &outBuff) ); + assert(outBuff.pos >= previousPos); + DISPLAYLEVEL(6, "%u bytes flushed by ZSTDMT_flushStream \n", (unsigned)(outBuff.pos-previousPos)); + } } + + /* final frame epilogue */ + { size_t remainingToFlush = (size_t)(-1); + while (remainingToFlush) { + size_t const randomDstSize = FUZ_randomLength(&lseed, maxSampleLog); + size_t const adjustedDstSize = MIN(cBufferSize - cSize, randomDstSize); + size_t const previousPos = outBuff.pos; + outBuff.size = outBuff.pos + adjustedDstSize; + DISPLAYLEVEL(5, "Ending into dst buffer of size %u \n", (unsigned)adjustedDstSize); + remainingToFlush = ZSTDMT_endStream(zc, &outBuff); + CHECK (ZSTD_isError(remainingToFlush), "ZSTDMT_endStream error : %s", ZSTD_getErrorName(remainingToFlush)); + assert(outBuff.pos >= previousPos); + DISPLAYLEVEL(6, "%u bytes flushed by ZSTDMT_endStream \n", (unsigned)(outBuff.pos-previousPos)); + DISPLAYLEVEL(5, "endStream : remainingToFlush : %u \n", (unsigned)remainingToFlush); + } } + crcOrig = XXH64_digest(&xxhState); + cSize = outBuff.pos; + DISPLAYLEVEL(5, "Frame completed : %u bytes compressed into %u bytes \n", + (unsigned)totalTestSize, (unsigned)cSize); + } + + /* multi - fragments decompression test */ + assert(totalTestSize < dstBufferSize); + memset(dstBuffer, 170, totalTestSize); /* init dest area */ + if (!dictSize /* don't reset if dictionary : could be different */ && (FUZ_rand(&lseed) & 1)) { + CHECK_Z( ZSTD_resetDStream(zd) ); + } else { + CHECK_Z( ZSTD_initDStream_usingDict(zd, dict, dictSize) ); + } + { size_t decompressionResult = 1; + ZSTD_inBuffer inBuff = { cBuffer, cSize, 0 }; + ZSTD_outBuffer outBuff= { dstBuffer, dstBufferSize, 0 }; + for (totalGenSize = 0 ; decompressionResult ; ) { + size_t const readCSrcSize = FUZ_randomLength(&lseed, maxSampleLog); + size_t const randomDstSize = FUZ_randomLength(&lseed, maxSampleLog); + size_t const dstBuffSize = MIN(dstBufferSize - totalGenSize, randomDstSize); + inBuff.size = inBuff.pos + readCSrcSize; + outBuff.size = outBuff.pos + dstBuffSize; + DISPLAYLEVEL(6, "ZSTD_decompressStream input %u bytes into outBuff %u bytes \n", + (unsigned)readCSrcSize, (unsigned)dstBuffSize); + decompressionResult = ZSTD_decompressStream(zd, &outBuff, &inBuff); + if (ZSTD_isError(decompressionResult)) { + DISPLAY("ZSTD_decompressStream error : %s \n", ZSTD_getErrorName(decompressionResult)); + findDiff(copyBuffer, dstBuffer, totalTestSize); + } + CHECK (ZSTD_isError(decompressionResult), "decompression error : %s", ZSTD_getErrorName(decompressionResult)); + DISPLAYLEVEL(6, "total ingested (inBuff.pos) = %u and produced (outBuff.pos) = %u \n", + (unsigned)inBuff.pos, (unsigned)outBuff.pos); + } + CHECK (outBuff.pos != totalTestSize, + "decompressed data : wrong size (%u != %u)", + (unsigned)outBuff.pos, (unsigned)totalTestSize ); + CHECK (inBuff.pos != cSize, + "compressed data should be fully read (%u != %u)", + (unsigned)inBuff.pos, (unsigned)cSize ); + { U64 const crcDest = XXH64(dstBuffer, totalTestSize, 0); + if (crcDest!=crcOrig) findDiff(copyBuffer, dstBuffer, totalTestSize); + CHECK (crcDest!=crcOrig, "decompressed data corrupted"); + } } + + /*===== noisy/erroneous src decompression test =====*/ + + /* add some noise */ + { U32 const nbNoiseChunks = (FUZ_rand(&lseed) & 7) + 2; + U32 nn; for (nn=0; nn<nbNoiseChunks; nn++) { + size_t const randomNoiseSize = FUZ_randomLength(&lseed, maxSampleLog); + size_t const noiseSize = MIN((cSize/3) , randomNoiseSize); + size_t const noiseStart = FUZ_rand(&lseed) % (srcBufferSize - noiseSize); + size_t const cStart = FUZ_rand(&lseed) % (cSize - noiseSize); + memcpy(cBuffer+cStart, srcBuffer+noiseStart, noiseSize); + } } + + /* try decompression on noisy data */ + CHECK_Z( ZSTD_initDStream(zd_noise) ); /* note : no dictionary */ + { ZSTD_inBuffer inBuff = { cBuffer, cSize, 0 }; + ZSTD_outBuffer outBuff= { dstBuffer, dstBufferSize, 0 }; + while (outBuff.pos < dstBufferSize) { + size_t const randomCSrcSize = FUZ_randomLength(&lseed, maxSampleLog); + size_t const randomDstSize = FUZ_randomLength(&lseed, maxSampleLog); + size_t const adjustedDstSize = MIN(dstBufferSize - outBuff.pos, randomDstSize); + size_t const adjustedCSrcSize = MIN(cSize - inBuff.pos, randomCSrcSize); + outBuff.size = outBuff.pos + adjustedDstSize; + inBuff.size = inBuff.pos + adjustedCSrcSize; + { size_t const decompressError = ZSTD_decompressStream(zd, &outBuff, &inBuff); + if (ZSTD_isError(decompressError)) break; /* error correctly detected */ + /* No forward progress possible */ + if (outBuff.pos < outBuff.size && inBuff.pos == cSize) break; + } } } } + DISPLAY("\r%u fuzzer tests completed \n", testNb); + +_cleanup: + ZSTDMT_freeCCtx(zc); + ZSTD_freeDStream(zd); + ZSTD_freeDStream(zd_noise); + free(cNoiseBuffer[0]); + free(cNoiseBuffer[1]); + free(cNoiseBuffer[2]); + free(cNoiseBuffer[3]); + free(cNoiseBuffer[4]); + free(copyBuffer); + free(cBuffer); + free(dstBuffer); + return result; + +_output_error: + result = 1; + goto _cleanup; +} + +/** If useOpaqueAPI, sets param in cctxParams. + * Otherwise, sets the param in zc. */ +static size_t setCCtxParameter(ZSTD_CCtx* zc, ZSTD_CCtx_params* cctxParams, + ZSTD_cParameter param, unsigned value, + int useOpaqueAPI) +{ + if (useOpaqueAPI) { + return ZSTD_CCtxParams_setParameter(cctxParams, param, value); + } else { + return ZSTD_CCtx_setParameter(zc, param, value); + } +} + +/* Tests for ZSTD_compress_generic() API */ +static int fuzzerTests_newAPI(U32 seed, int nbTests, int startTest, + double compressibility, int bigTests) +{ + U32 const maxSrcLog = bigTests ? 24 : 22; + static const U32 maxSampleLog = 19; + size_t const srcBufferSize = (size_t)1<<maxSrcLog; + BYTE* cNoiseBuffer[5]; + size_t const copyBufferSize= srcBufferSize + (1<<maxSampleLog); + BYTE* const copyBuffer = (BYTE*)malloc (copyBufferSize); + size_t const cBufferSize = ZSTD_compressBound(srcBufferSize); + BYTE* const cBuffer = (BYTE*)malloc (cBufferSize); + size_t const dstBufferSize = srcBufferSize; + BYTE* const dstBuffer = (BYTE*)malloc (dstBufferSize); + U32 result = 0; + int testNb = 0; + U32 coreSeed = seed; + ZSTD_CCtx* zc = ZSTD_createCCtx(); /* will be reset sometimes */ + ZSTD_DStream* zd = ZSTD_createDStream(); /* will be reset sometimes */ + ZSTD_DStream* const zd_noise = ZSTD_createDStream(); + UTIL_time_t const startClock = UTIL_getTime(); + const BYTE* dict = NULL; /* can keep same dict on 2 consecutive tests */ + size_t dictSize = 0; + U32 oldTestLog = 0; + U32 windowLogMalus = 0; /* can survive between 2 loops */ + U32 const cLevelMax = bigTests ? (U32)ZSTD_maxCLevel()-1 : g_cLevelMax_smallTests; + U32 const nbThreadsMax = bigTests ? 4 : 2; + ZSTD_CCtx_params* cctxParams = ZSTD_createCCtxParams(); + + /* allocations */ + cNoiseBuffer[0] = (BYTE*)malloc (srcBufferSize); + cNoiseBuffer[1] = (BYTE*)malloc (srcBufferSize); + cNoiseBuffer[2] = (BYTE*)malloc (srcBufferSize); + cNoiseBuffer[3] = (BYTE*)malloc (srcBufferSize); + cNoiseBuffer[4] = (BYTE*)malloc (srcBufferSize); + CHECK (!cNoiseBuffer[0] || !cNoiseBuffer[1] || !cNoiseBuffer[2] || !cNoiseBuffer[3] || !cNoiseBuffer[4] || + !copyBuffer || !dstBuffer || !cBuffer || !zc || !zd || !zd_noise , + "Not enough memory, fuzzer tests cancelled"); + + /* Create initial samples */ + RDG_genBuffer(cNoiseBuffer[0], srcBufferSize, 0.00, 0., coreSeed); /* pure noise */ + RDG_genBuffer(cNoiseBuffer[1], srcBufferSize, 0.05, 0., coreSeed); /* barely compressible */ + RDG_genBuffer(cNoiseBuffer[2], srcBufferSize, compressibility, 0., coreSeed); + RDG_genBuffer(cNoiseBuffer[3], srcBufferSize, 0.95, 0., coreSeed); /* highly compressible */ + RDG_genBuffer(cNoiseBuffer[4], srcBufferSize, 1.00, 0., coreSeed); /* sparse content */ + memset(copyBuffer, 0x65, copyBufferSize); /* make copyBuffer considered initialized */ + CHECK_Z( ZSTD_initDStream_usingDict(zd, NULL, 0) ); /* ensure at least one init */ + + /* catch up testNb */ + for (testNb=1; testNb < startTest; testNb++) + FUZ_rand(&coreSeed); + + /* test loop */ + for ( ; (testNb <= nbTests) || (UTIL_clockSpanMicro(startClock) < g_clockTime) ; testNb++ ) { + U32 lseed; + int opaqueAPI; + const BYTE* srcBuffer; + size_t totalTestSize, totalGenSize, cSize; + XXH64_state_t xxhState; + U64 crcOrig; + U32 resetAllowed = 1; + size_t maxTestSize; + ZSTD_parameters savedParams; + + /* init */ + if (nbTests >= testNb) { DISPLAYUPDATE(2, "\r%6u/%6u ", testNb, nbTests); } + else { DISPLAYUPDATE(2, "\r%6u ", testNb); } + FUZ_rand(&coreSeed); + lseed = coreSeed ^ prime32; + DISPLAYLEVEL(5, " *** Test %u *** \n", testNb); + opaqueAPI = FUZ_rand(&lseed) & 1; + + /* states full reset (deliberately not synchronized) */ + /* some issues can only happen when reusing states */ + if ((FUZ_rand(&lseed) & 0xFF) == 131) { + DISPLAYLEVEL(5, "Creating new context \n"); + ZSTD_freeCCtx(zc); + zc = ZSTD_createCCtx(); + CHECK(zc == NULL, "ZSTD_createCCtx allocation error"); + resetAllowed = 0; + } + if ((FUZ_rand(&lseed) & 0xFF) == 132) { + ZSTD_freeDStream(zd); + zd = ZSTD_createDStream(); + CHECK(zd == NULL, "ZSTD_createDStream allocation error"); + ZSTD_initDStream_usingDict(zd, NULL, 0); /* ensure at least one init */ + } + + /* srcBuffer selection [0-4] */ + { U32 buffNb = FUZ_rand(&lseed) & 0x7F; + if (buffNb & 7) buffNb=2; /* most common : compressible (P) */ + else { + buffNb >>= 3; + if (buffNb & 7) { + const U32 tnb[2] = { 1, 3 }; /* barely/highly compressible */ + buffNb = tnb[buffNb >> 3]; + } else { + const U32 tnb[2] = { 0, 4 }; /* not compressible / sparse */ + buffNb = tnb[buffNb >> 3]; + } } + srcBuffer = cNoiseBuffer[buffNb]; + } + + /* compression init */ + CHECK_Z( ZSTD_CCtx_loadDictionary(zc, NULL, 0) ); /* cancel previous dict /*/ + if ((FUZ_rand(&lseed)&1) /* at beginning, to keep same nb of rand */ + && oldTestLog /* at least one test happened */ + && resetAllowed) { + /* just set a compression level */ + maxTestSize = FUZ_randomLength(&lseed, oldTestLog+2); + if (maxTestSize >= srcBufferSize) maxTestSize = srcBufferSize-1; + { int const compressionLevel = (FUZ_rand(&lseed) % 5) + 1; + DISPLAYLEVEL(5, "t%u : compression level : %i \n", testNb, compressionLevel); + CHECK_Z (setCCtxParameter(zc, cctxParams, ZSTD_c_compressionLevel, compressionLevel, opaqueAPI) ); + } + } else { + U32 const testLog = FUZ_rand(&lseed) % maxSrcLog; + U32 const dictLog = FUZ_rand(&lseed) % maxSrcLog; + U32 const cLevelCandidate = (FUZ_rand(&lseed) % + (ZSTD_maxCLevel() - + (MAX(testLog, dictLog) / 2))) + + 1; + int const cLevel = MIN(cLevelCandidate, cLevelMax); + DISPLAYLEVEL(5, "t%i: base cLevel : %u \n", testNb, cLevel); + maxTestSize = FUZ_rLogLength(&lseed, testLog); + DISPLAYLEVEL(5, "t%i: maxTestSize : %u \n", testNb, (unsigned)maxTestSize); + oldTestLog = testLog; + /* random dictionary selection */ + dictSize = ((FUZ_rand(&lseed)&63)==1) ? FUZ_rLogLength(&lseed, dictLog) : 0; + { size_t const dictStart = FUZ_rand(&lseed) % (srcBufferSize - dictSize); + dict = srcBuffer + dictStart; + if (!dictSize) dict=NULL; + } + { U64 const pledgedSrcSize = (FUZ_rand(&lseed) & 3) ? ZSTD_CONTENTSIZE_UNKNOWN : maxTestSize; + ZSTD_compressionParameters cParams = ZSTD_getCParams(cLevel, pledgedSrcSize, dictSize); + const U32 windowLogMax = bigTests ? 24 : 20; + const U32 searchLogMax = bigTests ? 15 : 13; + if (dictSize) + DISPLAYLEVEL(5, "t%u: with dictionary of size : %zu \n", testNb, dictSize); + + /* mess with compression parameters */ + cParams.windowLog += (FUZ_rand(&lseed) & 3) - 1; + cParams.windowLog = MIN(windowLogMax, cParams.windowLog); + cParams.hashLog += (FUZ_rand(&lseed) & 3) - 1; + cParams.chainLog += (FUZ_rand(&lseed) & 3) - 1; + cParams.searchLog += (FUZ_rand(&lseed) & 3) - 1; + cParams.searchLog = MIN(searchLogMax, cParams.searchLog); + cParams.minMatch += (FUZ_rand(&lseed) & 3) - 1; + cParams.targetLength = (U32)((cParams.targetLength + 1 ) * (0.5 + ((double)(FUZ_rand(&lseed) & 127) / 128))); + cParams = ZSTD_adjustCParams(cParams, pledgedSrcSize, dictSize); + + if (FUZ_rand(&lseed) & 1) { + DISPLAYLEVEL(5, "t%u: windowLog : %u \n", testNb, cParams.windowLog); + CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_c_windowLog, cParams.windowLog, opaqueAPI) ); + assert(cParams.windowLog >= ZSTD_WINDOWLOG_MIN); /* guaranteed by ZSTD_adjustCParams() */ + windowLogMalus = (cParams.windowLog - ZSTD_WINDOWLOG_MIN) / 5; + } + if (FUZ_rand(&lseed) & 1) { + DISPLAYLEVEL(5, "t%u: hashLog : %u \n", testNb, cParams.hashLog); + CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_c_hashLog, cParams.hashLog, opaqueAPI) ); + } + if (FUZ_rand(&lseed) & 1) { + DISPLAYLEVEL(5, "t%u: chainLog : %u \n", testNb, cParams.chainLog); + CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_c_chainLog, cParams.chainLog, opaqueAPI) ); + } + if (FUZ_rand(&lseed) & 1) CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_c_searchLog, cParams.searchLog, opaqueAPI) ); + if (FUZ_rand(&lseed) & 1) CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_c_minMatch, cParams.minMatch, opaqueAPI) ); + if (FUZ_rand(&lseed) & 1) CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_c_targetLength, cParams.targetLength, opaqueAPI) ); + + /* mess with long distance matching parameters */ + if (bigTests) { + if (FUZ_rand(&lseed) & 1) CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_c_enableLongDistanceMatching, FUZ_rand(&lseed) & 63, opaqueAPI) ); + if (FUZ_rand(&lseed) & 3) CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_c_ldmHashLog, FUZ_randomClampedLength(&lseed, ZSTD_HASHLOG_MIN, 23), opaqueAPI) ); + if (FUZ_rand(&lseed) & 3) CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_c_ldmMinMatch, FUZ_randomClampedLength(&lseed, ZSTD_LDM_MINMATCH_MIN, ZSTD_LDM_MINMATCH_MAX), opaqueAPI) ); + if (FUZ_rand(&lseed) & 3) CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_c_ldmBucketSizeLog, FUZ_randomClampedLength(&lseed, ZSTD_LDM_BUCKETSIZELOG_MIN, ZSTD_LDM_BUCKETSIZELOG_MAX), opaqueAPI) ); + if (FUZ_rand(&lseed) & 3) CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_c_ldmHashRateLog, FUZ_randomClampedLength(&lseed, ZSTD_LDM_HASHRATELOG_MIN, ZSTD_LDM_HASHRATELOG_MAX), opaqueAPI) ); + if (FUZ_rand(&lseed) & 3) CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_c_srcSizeHint, FUZ_randomClampedLength(&lseed, ZSTD_SRCSIZEHINT_MIN, ZSTD_SRCSIZEHINT_MAX), opaqueAPI) ); + } + + /* mess with frame parameters */ + if (FUZ_rand(&lseed) & 1) { + int const checksumFlag = FUZ_rand(&lseed) & 1; + DISPLAYLEVEL(5, "t%u: frame checksum : %u \n", testNb, checksumFlag); + CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_c_checksumFlag, checksumFlag, opaqueAPI) ); + } + if (FUZ_rand(&lseed) & 1) CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_c_dictIDFlag, FUZ_rand(&lseed) & 1, opaqueAPI) ); + if (FUZ_rand(&lseed) & 1) CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_c_contentSizeFlag, FUZ_rand(&lseed) & 1, opaqueAPI) ); + if (FUZ_rand(&lseed) & 1) { + DISPLAYLEVEL(5, "t%u: pledgedSrcSize : %u \n", testNb, (unsigned)pledgedSrcSize); + CHECK_Z( ZSTD_CCtx_setPledgedSrcSize(zc, pledgedSrcSize) ); + } + + /* multi-threading parameters. Only adjust occasionally for small tests. */ + if (bigTests || (FUZ_rand(&lseed) & 0xF) == 0xF) { + U32 const nbThreadsCandidate = (FUZ_rand(&lseed) & 4) + 1; + U32 const nbThreadsAdjusted = (windowLogMalus < nbThreadsCandidate) ? nbThreadsCandidate - windowLogMalus : 1; + int const nbThreads = MIN(nbThreadsAdjusted, nbThreadsMax); + DISPLAYLEVEL(5, "t%i: nbThreads : %u \n", testNb, nbThreads); + CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_c_nbWorkers, nbThreads, opaqueAPI) ); + if (nbThreads > 1) { + U32 const jobLog = FUZ_rand(&lseed) % (testLog+1); + CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_c_overlapLog, FUZ_rand(&lseed) % 10, opaqueAPI) ); + CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_c_jobSize, (U32)FUZ_rLogLength(&lseed, jobLog), opaqueAPI) ); + } + } + /* Enable rsyncable mode 1 in 4 times. */ + setCCtxParameter(zc, cctxParams, ZSTD_c_rsyncable, (FUZ_rand(&lseed) % 4 == 0), opaqueAPI); + + if (FUZ_rand(&lseed) & 1) CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_c_forceMaxWindow, FUZ_rand(&lseed) & 1, opaqueAPI) ); + + /* Apply parameters */ + if (opaqueAPI) { + DISPLAYLEVEL(5, "t%u: applying CCtxParams \n", testNb); + CHECK_Z (ZSTD_CCtx_setParametersUsingCCtxParams(zc, cctxParams) ); + } + + if (FUZ_rand(&lseed) & 1) { + if (FUZ_rand(&lseed) & 1) { + CHECK_Z( ZSTD_CCtx_loadDictionary(zc, dict, dictSize) ); + } else { + CHECK_Z( ZSTD_CCtx_loadDictionary_byReference(zc, dict, dictSize) ); + } + } else { + CHECK_Z( ZSTD_CCtx_refPrefix(zc, dict, dictSize) ); + } + } } + + CHECK_Z(getCCtxParams(zc, &savedParams)); + + /* multi-segments compression test */ + XXH64_reset(&xxhState, 0); + { ZSTD_outBuffer outBuff = { cBuffer, cBufferSize, 0 } ; + for (cSize=0, totalTestSize=0 ; (totalTestSize < maxTestSize) ; ) { + /* compress random chunks into randomly sized dst buffers */ + size_t const randomSrcSize = FUZ_randomLength(&lseed, maxSampleLog); + size_t const srcSize = MIN(maxTestSize-totalTestSize, randomSrcSize); + size_t const srcStart = FUZ_rand(&lseed) % (srcBufferSize - srcSize); + size_t const randomDstSize = FUZ_randomLength(&lseed, maxSampleLog+1); + size_t const dstBuffSize = MIN(cBufferSize - cSize, randomDstSize); + ZSTD_EndDirective const flush = (FUZ_rand(&lseed) & 15) ? ZSTD_e_continue : ZSTD_e_flush; + ZSTD_inBuffer inBuff = { srcBuffer+srcStart, srcSize, 0 }; + outBuff.size = outBuff.pos + dstBuffSize; + + CHECK_Z( ZSTD_compressStream2(zc, &outBuff, &inBuff, flush) ); + DISPLAYLEVEL(6, "t%u: compress consumed %u bytes (total : %u) ; flush: %u (total : %u) \n", + testNb, (unsigned)inBuff.pos, (unsigned)(totalTestSize + inBuff.pos), (unsigned)flush, (unsigned)outBuff.pos); + + XXH64_update(&xxhState, srcBuffer+srcStart, inBuff.pos); + memcpy(copyBuffer+totalTestSize, srcBuffer+srcStart, inBuff.pos); + totalTestSize += inBuff.pos; + } + + /* final frame epilogue */ + { size_t remainingToFlush = 1; + while (remainingToFlush) { + ZSTD_inBuffer inBuff = { NULL, 0, 0 }; + size_t const randomDstSize = FUZ_randomLength(&lseed, maxSampleLog+1); + size_t const adjustedDstSize = MIN(cBufferSize - cSize, randomDstSize); + outBuff.size = outBuff.pos + adjustedDstSize; + DISPLAYLEVEL(6, "t%u: End-flush into dst buffer of size %u \n", testNb, (unsigned)adjustedDstSize); + remainingToFlush = ZSTD_compressStream2(zc, &outBuff, &inBuff, ZSTD_e_end); + DISPLAYLEVEL(6, "t%u: Total flushed so far : %u bytes \n", testNb, (unsigned)outBuff.pos); + CHECK( ZSTD_isError(remainingToFlush), + "ZSTD_compressStream2 w/ ZSTD_e_end error : %s", + ZSTD_getErrorName(remainingToFlush) ); + } } + crcOrig = XXH64_digest(&xxhState); + cSize = outBuff.pos; + DISPLAYLEVEL(5, "Frame completed : %zu bytes \n", cSize); + } + + CHECK(badParameters(zc, savedParams), "CCtx params are wrong"); + + /* multi - fragments decompression test */ + if (!dictSize /* don't reset if dictionary : could be different */ && (FUZ_rand(&lseed) & 1)) { + DISPLAYLEVEL(5, "resetting DCtx (dict:%p) \n", dict); + CHECK_Z( ZSTD_resetDStream(zd) ); + } else { + if (dictSize) + DISPLAYLEVEL(5, "using dictionary of size %zu \n", dictSize); + CHECK_Z( ZSTD_initDStream_usingDict(zd, dict, dictSize) ); + } + { size_t decompressionResult = 1; + ZSTD_inBuffer inBuff = { cBuffer, cSize, 0 }; + ZSTD_outBuffer outBuff= { dstBuffer, dstBufferSize, 0 }; + for (totalGenSize = 0 ; decompressionResult ; ) { + size_t const readCSrcSize = FUZ_randomLength(&lseed, maxSampleLog); + size_t const randomDstSize = FUZ_randomLength(&lseed, maxSampleLog); + size_t const dstBuffSize = MIN(dstBufferSize - totalGenSize, randomDstSize); + inBuff.size = inBuff.pos + readCSrcSize; + outBuff.size = outBuff.pos + dstBuffSize; + DISPLAYLEVEL(6, "decompression presented %u new bytes (pos:%u/%u)\n", + (unsigned)readCSrcSize, (unsigned)inBuff.pos, (unsigned)cSize); + decompressionResult = ZSTD_decompressStream(zd, &outBuff, &inBuff); + DISPLAYLEVEL(6, "so far: consumed = %u, produced = %u \n", + (unsigned)inBuff.pos, (unsigned)outBuff.pos); + if (ZSTD_isError(decompressionResult)) { + DISPLAY("ZSTD_decompressStream error : %s \n", ZSTD_getErrorName(decompressionResult)); + findDiff(copyBuffer, dstBuffer, totalTestSize); + } + CHECK (ZSTD_isError(decompressionResult), "decompression error : %s", ZSTD_getErrorName(decompressionResult)); + CHECK (inBuff.pos > cSize, "ZSTD_decompressStream consumes too much input : %u > %u ", (unsigned)inBuff.pos, (unsigned)cSize); + } + CHECK (inBuff.pos != cSize, "compressed data should be fully read (%u != %u)", (unsigned)inBuff.pos, (unsigned)cSize); + CHECK (outBuff.pos != totalTestSize, "decompressed data : wrong size (%u != %u)", (unsigned)outBuff.pos, (unsigned)totalTestSize); + { U64 const crcDest = XXH64(dstBuffer, totalTestSize, 0); + if (crcDest!=crcOrig) findDiff(copyBuffer, dstBuffer, totalTestSize); + CHECK (crcDest!=crcOrig, "decompressed data corrupted"); + } } + + /*===== noisy/erroneous src decompression test =====*/ + + /* add some noise */ + { U32 const nbNoiseChunks = (FUZ_rand(&lseed) & 7) + 2; + U32 nn; for (nn=0; nn<nbNoiseChunks; nn++) { + size_t const randomNoiseSize = FUZ_randomLength(&lseed, maxSampleLog); + size_t const noiseSize = MIN((cSize/3) , randomNoiseSize); + size_t const noiseStart = FUZ_rand(&lseed) % (srcBufferSize - noiseSize); + size_t const cStart = FUZ_rand(&lseed) % (cSize - noiseSize); + memcpy(cBuffer+cStart, srcBuffer+noiseStart, noiseSize); + } } + + /* try decompression on noisy data */ + CHECK_Z( ZSTD_initDStream(zd_noise) ); /* note : no dictionary */ + { ZSTD_inBuffer inBuff = { cBuffer, cSize, 0 }; + ZSTD_outBuffer outBuff= { dstBuffer, dstBufferSize, 0 }; + while (outBuff.pos < dstBufferSize) { + size_t const randomCSrcSize = FUZ_randomLength(&lseed, maxSampleLog); + size_t const randomDstSize = FUZ_randomLength(&lseed, maxSampleLog); + size_t const adjustedDstSize = MIN(dstBufferSize - outBuff.pos, randomDstSize); + size_t const adjustedCSrcSize = MIN(cSize - inBuff.pos, randomCSrcSize); + outBuff.size = outBuff.pos + adjustedDstSize; + inBuff.size = inBuff.pos + adjustedCSrcSize; + { size_t const decompressError = ZSTD_decompressStream(zd, &outBuff, &inBuff); + if (ZSTD_isError(decompressError)) break; /* error correctly detected */ + /* Good so far, but no more progress possible */ + if (outBuff.pos < outBuff.size && inBuff.pos == cSize) break; + } } } } + DISPLAY("\r%u fuzzer tests completed \n", testNb-1); + +_cleanup: + ZSTD_freeCCtx(zc); + ZSTD_freeDStream(zd); + ZSTD_freeDStream(zd_noise); + ZSTD_freeCCtxParams(cctxParams); + free(cNoiseBuffer[0]); + free(cNoiseBuffer[1]); + free(cNoiseBuffer[2]); + free(cNoiseBuffer[3]); + free(cNoiseBuffer[4]); + free(copyBuffer); + free(cBuffer); + free(dstBuffer); + return result; + +_output_error: + result = 1; + goto _cleanup; +} + +/*-******************************************************* +* Command line +*********************************************************/ +static int FUZ_usage(const char* programName) +{ + DISPLAY( "Usage :\n"); + DISPLAY( " %s [args]\n", programName); + DISPLAY( "\n"); + DISPLAY( "Arguments :\n"); + DISPLAY( " -i# : Number of tests (default:%u)\n", nbTestsDefault); + DISPLAY( " -T# : Max duration to run for. Overrides number of tests. (e.g. -T1m or -T60s for one minute)\n"); + DISPLAY( " -s# : Select seed (default:prompt user)\n"); + DISPLAY( " -t# : Select starting test number (default:0)\n"); + DISPLAY( " -P# : Select compressibility in %% (default:%i%%)\n", FUZ_COMPRESSIBILITY_DEFAULT); + DISPLAY( " -v : verbose\n"); + DISPLAY( " -p : pause at the end\n"); + DISPLAY( " -h : display help and exit\n"); + return 0; +} + +typedef enum { simple_api, mt_api, advanced_api } e_api; + +int main(int argc, const char** argv) +{ + U32 seed = 0; + int seedset = 0; + int nbTests = nbTestsDefault; + int testNb = 0; + int proba = FUZ_COMPRESSIBILITY_DEFAULT; + int result = 0; + int mainPause = 0; + int bigTests = (sizeof(size_t) == 8); + e_api selected_api = simple_api; + const char* const programName = argv[0]; + int argNb; + + /* Check command line */ + for(argNb=1; argNb<argc; argNb++) { + const char* argument = argv[argNb]; + assert(argument != NULL); + + /* Parsing commands. Aggregated commands are allowed */ + if (argument[0]=='-') { + + if (!strcmp(argument, "--mt")) { selected_api=mt_api; testNb += !testNb; continue; } + if (!strcmp(argument, "--newapi")) { selected_api=advanced_api; testNb += !testNb; continue; } + if (!strcmp(argument, "--no-big-tests")) { bigTests=0; continue; } + + argument++; + while (*argument!=0) { + switch(*argument) + { + case 'h': + return FUZ_usage(programName); + + case 'v': + argument++; + g_displayLevel++; + break; + + case 'q': + argument++; + g_displayLevel--; + break; + + case 'p': /* pause at the end */ + argument++; + mainPause = 1; + break; + + case 'i': /* limit tests by nb of iterations (default) */ + argument++; + nbTests=0; g_clockTime=0; + while ((*argument>='0') && (*argument<='9')) { + nbTests *= 10; + nbTests += *argument - '0'; + argument++; + } + break; + + case 'T': /* limit tests by time */ + argument++; + nbTests=0; g_clockTime=0; + while ((*argument>='0') && (*argument<='9')) { + g_clockTime *= 10; + g_clockTime += *argument - '0'; + argument++; + } + if (*argument=='m') { /* -T1m == -T60 */ + g_clockTime *=60, argument++; + if (*argument=='n') argument++; /* -T1mn == -T60 */ + } else if (*argument=='s') argument++; /* -T10s == -T10 */ + g_clockTime *= SEC_TO_MICRO; + break; + + case 's': /* manually select seed */ + argument++; + seedset=1; + seed=0; + while ((*argument>='0') && (*argument<='9')) { + seed *= 10; + seed += *argument - '0'; + argument++; + } + break; + + case 't': /* select starting test number */ + argument++; + testNb=0; + while ((*argument>='0') && (*argument<='9')) { + testNb *= 10; + testNb += *argument - '0'; + argument++; + } + break; + + case 'P': /* compressibility % */ + argument++; + proba=0; + while ((*argument>='0') && (*argument<='9')) { + proba *= 10; + proba += *argument - '0'; + argument++; + } + if (proba<0) proba=0; + if (proba>100) proba=100; + break; + + default: + return FUZ_usage(programName); + } + } } } /* for(argNb=1; argNb<argc; argNb++) */ + + /* Get Seed */ + DISPLAY("Starting zstream tester (%i-bits, %s)\n", (int)(sizeof(size_t)*8), ZSTD_VERSION_STRING); + + if (!seedset) { + time_t const t = time(NULL); + U32 const h = XXH32(&t, sizeof(t), 1); + seed = h % 10000; + } + + DISPLAY("Seed = %u\n", (unsigned)seed); + if (proba!=FUZ_COMPRESSIBILITY_DEFAULT) DISPLAY("Compressibility : %i%%\n", proba); + + if (nbTests<=0) nbTests=1; + + if (testNb==0) { + result = basicUnitTests(0, ((double)proba) / 100); /* constant seed for predictability */ + } + + if (!result) { + switch(selected_api) + { + case simple_api : + result = fuzzerTests(seed, nbTests, testNb, ((double)proba) / 100, bigTests); + break; + case mt_api : + result = fuzzerTests_MT(seed, nbTests, testNb, ((double)proba) / 100, bigTests); + break; + case advanced_api : + result = fuzzerTests_newAPI(seed, nbTests, testNb, ((double)proba) / 100, bigTests); + break; + default : + assert(0); /* impossible */ + } + } + + if (mainPause) { + int unused; + DISPLAY("Press Enter \n"); + unused = getchar(); + (void)unused; + } + return result; +} |