diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-27 18:24:20 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-27 18:24:20 +0000 |
commit | 483eb2f56657e8e7f419ab1a4fab8dce9ade8609 (patch) | |
tree | e5d88d25d870d5dedacb6bbdbe2a966086a0a5cf /src/zstd/tests | |
parent | Initial commit. (diff) | |
download | ceph-483eb2f56657e8e7f419ab1a4fab8dce9ade8609.tar.xz ceph-483eb2f56657e8e7f419ab1a4fab8dce9ade8609.zip |
Adding upstream version 14.2.21.upstream/14.2.21upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
60 files changed, 14321 insertions, 0 deletions
diff --git a/src/zstd/tests/.gitignore b/src/zstd/tests/.gitignore new file mode 100644 index 00000000..b16e26e3 --- /dev/null +++ b/src/zstd/tests/.gitignore @@ -0,0 +1,61 @@ +# local binary (Makefile) +fullbench +fullbench32 +fuzzer +fuzzer32 +fuzzer-dll +zbufftest +zbufftest32 +zbufftest-dll +zstreamtest +zstreamtest32 +zstreamtest_asan +zstreamtest_tsan +zstreamtest-dll +datagen +paramgrill +paramgrill32 +roundTripCrash +longmatch +symbols +legacy +decodecorpus +pool +poolTests +invalidDictionaries + +# Tmp test directory +zstdtest +speedTest +versionsTest +namespaceTest + +# Local script +startSpeedTest +speedTest.pid + +# Object files +*.o +*.ko + +# Executables +*.exe +*.out +*.app + +# Default result files +dictionary +grillResults.txt +_* +tmp* +*.zst +*.gz +result +out + +# fuzzer +afl + +# Misc files +*.bat +dirTest* diff --git a/src/zstd/tests/Makefile b/src/zstd/tests/Makefile new file mode 100644 index 00000000..651833bb --- /dev/null +++ b/src/zstd/tests/Makefile @@ -0,0 +1,410 @@ +# ################################################################ +# Copyright (c) 2015-present, Yann Collet, Facebook, Inc. +# All rights reserved. +# +# This source code is licensed under both the BSD-style license (found in the +# LICENSE file in the root directory of this source tree) and the GPLv2 (found +# in the COPYING file in the root directory of this source tree). +# ################################################################ +# datagen : Synthetic and parametrable data generator, for tests +# fullbench : Precisely measure speed for each zstd inner functions +# fullbench32: Same as fullbench, but forced to compile in 32-bits mode +# fuzzer : Test tool, to check zstd integrity on target platform +# fuzzer32: Same as fuzzer, but forced to compile in 32-bits mode +# paramgrill : parameter tester for zstd +# test-zstd-speed.py : script for testing zstd speed difference between commits +# versionsTest : compatibility test between zstd versions stored on Github (v0.1+) +# zstreamtest : Fuzzer test tool for zstd streaming API +# zstreamtest32: Same as zstreamtest, but forced to compile in 32-bits mode +# ########################################################################## + +ZSTDDIR = ../lib +PRGDIR = ../programs +PYTHON ?= python3 +TESTARTEFACT := versionsTest namespaceTest + +DEBUGLEVEL ?= 1 +DEBUGFLAGS = -g -DZSTD_DEBUG=$(DEBUGLEVEL) +CPPFLAGS += -I$(ZSTDDIR) -I$(ZSTDDIR)/common -I$(ZSTDDIR)/compress \ + -I$(ZSTDDIR)/dictBuilder -I$(ZSTDDIR)/deprecated -I$(PRGDIR) +CFLAGS ?= -O3 +CFLAGS += -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \ + -Wstrict-aliasing=1 -Wswitch-enum -Wdeclaration-after-statement \ + -Wstrict-prototypes -Wundef -Wformat-security \ + -Wvla -Wformat=2 -Winit-self -Wfloat-equal -Wwrite-strings \ + -Wredundant-decls +CFLAGS += $(DEBUGFLAGS) $(MOREFLAGS) +FLAGS = $(CPPFLAGS) $(CFLAGS) $(LDFLAGS) + + +ZSTDCOMMON_FILES := $(ZSTDDIR)/common/*.c +ZSTDCOMP_FILES := $(ZSTDDIR)/compress/*.c +ZSTDDECOMP_FILES := $(ZSTDDIR)/decompress/*.c +ZSTD_FILES := $(ZSTDDECOMP_FILES) $(ZSTDCOMMON_FILES) $(ZSTDCOMP_FILES) +ZBUFF_FILES := $(ZSTDDIR)/deprecated/*.c +ZDICT_FILES := $(ZSTDDIR)/dictBuilder/*.c + + +# Define *.exe as extension for Windows systems +ifneq (,$(filter Windows%,$(OS))) +EXT =.exe +MULTITHREAD_CPP = -DZSTD_MULTITHREAD +MULTITHREAD_LD = +else +EXT = +MULTITHREAD_CPP = -DZSTD_MULTITHREAD +MULTITHREAD_LD = -pthread +endif +MULTITHREAD = $(MULTITHREAD_CPP) $(MULTITHREAD_LD) + +VOID = /dev/null +ZSTREAM_TESTTIME ?= -T90s +FUZZERTEST ?= -T200s +ZSTDRTTEST = --test-large-data +DECODECORPUS_TESTTIME ?= -T30 + +.PHONY: default all all32 allnothread dll clean test test32 test-all namespaceTest versionsTest + +default: fullbench + +all: fullbench fuzzer zstreamtest paramgrill datagen decodecorpus + +all32: fullbench32 fuzzer32 zstreamtest32 + +allnothread: fullbench fuzzer paramgrill datagen decodecorpus + +dll: fuzzer-dll zstreamtest-dll + +zstd: + $(MAKE) -C $(PRGDIR) $@ + +zstd32: + $(MAKE) -C $(PRGDIR) $@ + +zstd-nolegacy: + $(MAKE) -C $(PRGDIR) $@ + +gzstd: + $(MAKE) -C $(PRGDIR) zstd HAVE_ZLIB=1 + +fullbench32: CPPFLAGS += -m32 +fullbench fullbench32 : CPPFLAGS += $(MULTITHREAD_CPP) +fullbench fullbench32 : LDFLAGS += $(MULTITHREAD_LD) +fullbench fullbench32 : DEBUGFLAGS = # turn off assert() for speed measurements +fullbench fullbench32 : $(ZSTD_FILES) $(PRGDIR)/datagen.c fullbench.c + $(CC) $(FLAGS) $^ -o $@$(EXT) + +fullbench-lib: $(PRGDIR)/datagen.c fullbench.c + $(MAKE) -C $(ZSTDDIR) libzstd.a + $(CC) $(FLAGS) $^ -o $@$(EXT) $(ZSTDDIR)/libzstd.a + +fullbench-dll: $(PRGDIR)/datagen.c fullbench.c + $(MAKE) -C $(ZSTDDIR) libzstd + $(CC) $(FLAGS) $^ -o $@$(EXT) -DZSTD_DLL_IMPORT=1 $(ZSTDDIR)/dll/libzstd.dll + +fuzzer : CPPFLAGS += $(MULTITHREAD_CPP) +fuzzer : LDFLAGS += $(MULTITHREAD_LD) +fuzzer32: CFLAGS += -m32 +fuzzer fuzzer32 : $(ZSTD_FILES) $(ZDICT_FILES) $(PRGDIR)/datagen.c fuzzer.c + $(CC) $(FLAGS) $^ -o $@$(EXT) + +fuzzer-dll : LDFLAGS+= -L$(ZSTDDIR) -lzstd +fuzzer-dll : $(ZSTDDIR)/common/xxhash.c $(PRGDIR)/datagen.c fuzzer.c + $(MAKE) -C $(ZSTDDIR) libzstd + $(CC) $(CPPFLAGS) $(CFLAGS) $^ $(LDFLAGS) -o $@$(EXT) + +zbufftest : CPPFLAGS += -I$(ZSTDDIR)/deprecated +zbufftest : CFLAGS += -Wno-deprecated-declarations # required to silence deprecation warnings +zbufftest : $(ZSTD_FILES) $(ZBUFF_FILES) $(PRGDIR)/datagen.c zbufftest.c + $(CC) $(FLAGS) $^ -o $@$(EXT) + +zbufftest32 : CPPFLAGS += -I$(ZSTDDIR)/deprecated +zbufftest32 : CFLAGS += -Wno-deprecated-declarations -m32 +zbufftest32 : $(ZSTD_FILES) $(ZBUFF_FILES) $(PRGDIR)/datagen.c zbufftest.c + $(CC) $(FLAGS) $^ -o $@$(EXT) + +zbufftest-dll : CPPFLAGS += -I$(ZSTDDIR)/deprecated +zbufftest-dll : CFLAGS += -Wno-deprecated-declarations # required to silence deprecation warnings +zbufftest-dll : LDFLAGS+= -L$(ZSTDDIR) -lzstd +zbufftest-dll : $(ZSTDDIR)/common/xxhash.c $(PRGDIR)/datagen.c zbufftest.c + $(MAKE) -C $(ZSTDDIR) libzstd + $(CC) $(CPPFLAGS) $(CFLAGS) $^ $(LDFLAGS) -o $@$(EXT) + +ZSTREAMFILES := $(ZSTD_FILES) $(ZDICT_FILES) $(PRGDIR)/datagen.c zstreamtest.c +zstreamtest : CPPFLAGS += $(MULTITHREAD_CPP) +zstreamtest : LDFLAGS += $(MULTITHREAD_LD) +zstreamtest : $(ZSTREAMFILES) + $(CC) $(FLAGS) $^ -o $@$(EXT) + +zstreamtest32 : CFLAGS += -m32 +zstreamtest32 : $(ZSTREAMFILES) + $(CC) $(FLAGS) $(MULTITHREAD) $^ -o $@$(EXT) + +zstreamtest_asan : CFLAGS += -fsanitize=address +zstreamtest_asan : $(ZSTREAMFILES) + $(CC) $(FLAGS) $(MULTITHREAD) $^ -o $@$(EXT) + +zstreamtest_tsan : CFLAGS += -fsanitize=thread +zstreamtest_tsan : $(ZSTREAMFILES) + $(CC) $(FLAGS) $(MULTITHREAD) $^ -o $@$(EXT) + +zstreamtest-dll : LDFLAGS+= -L$(ZSTDDIR) -lzstd +zstreamtest-dll : $(ZSTDDIR)/common/xxhash.c $(PRGDIR)/datagen.c zstreamtest.c + $(MAKE) -C $(ZSTDDIR) libzstd + $(CC) $(CPPFLAGS) $(CFLAGS) $^ $(LDFLAGS) -o $@$(EXT) + +paramgrill : DEBUGFLAGS = +paramgrill : $(ZSTD_FILES) $(PRGDIR)/datagen.c paramgrill.c + $(CC) $(FLAGS) $^ -lm -o $@$(EXT) + +datagen : $(PRGDIR)/datagen.c datagencli.c + $(CC) $(FLAGS) $^ -o $@$(EXT) + +roundTripCrash : $(ZSTD_FILES) roundTripCrash.c + $(CC) $(FLAGS) $(MULTITHREAD) $^ -o $@$(EXT) + +longmatch : $(ZSTD_FILES) longmatch.c + $(CC) $(FLAGS) $^ -o $@$(EXT) + +invalidDictionaries : $(ZSTD_FILES) invalidDictionaries.c + $(CC) $(FLAGS) $^ -o $@$(EXT) + +legacy : CFLAGS+= -DZSTD_LEGACY_SUPPORT=4 +legacy : CPPFLAGS+= -I$(ZSTDDIR)/legacy +legacy : $(ZSTD_FILES) $(wildcard $(ZSTDDIR)/legacy/*.c) legacy.c + $(CC) $(FLAGS) $^ -o $@$(EXT) + +decodecorpus : $(filter-out $(ZSTDDIR)/compress/zstd_compress.c, $(wildcard $(ZSTD_FILES))) $(ZDICT_FILES) decodecorpus.c + $(CC) $(FLAGS) $^ -o $@$(EXT) -lm + +symbols : symbols.c + $(MAKE) -C $(ZSTDDIR) libzstd +ifneq (,$(filter Windows%,$(OS))) + cp $(ZSTDDIR)/dll/libzstd.dll . + $(CC) $(FLAGS) $^ -o $@$(EXT) -DZSTD_DLL_IMPORT=1 libzstd.dll +else + $(CC) $(FLAGS) $^ -o $@$(EXT) -Wl,-rpath=$(ZSTDDIR) $(ZSTDDIR)/libzstd.so +endif + +poolTests : poolTests.c $(ZSTDDIR)/common/pool.c $(ZSTDDIR)/common/threading.c $(ZSTDDIR)/common/zstd_common.c $(ZSTDDIR)/common/error_private.c + $(CC) $(FLAGS) $(MULTITHREAD) $^ -o $@$(EXT) + +namespaceTest: + if $(CC) namespaceTest.c ../lib/common/xxhash.c -o $@ ; then echo compilation should fail; exit 1 ; fi + $(RM) $@ + +versionsTest: clean + $(PYTHON) test-zstd-versions.py + +clean: + $(MAKE) -C $(ZSTDDIR) clean + @$(RM) -fR $(TESTARTEFACT) + @$(RM) -f core *.o tmp* result* *.gcda dictionary *.zst \ + $(PRGDIR)/zstd$(EXT) $(PRGDIR)/zstd32$(EXT) \ + fullbench$(EXT) fullbench32$(EXT) \ + fullbench-lib$(EXT) fullbench-dll$(EXT) \ + fuzzer$(EXT) fuzzer32$(EXT) zbufftest$(EXT) zbufftest32$(EXT) \ + fuzzer-dll$(EXT) zstreamtest-dll$(EXT) zbufftest-dll$(EXT)\ + zstreamtest$(EXT) zstreamtest32$(EXT) \ + datagen$(EXT) paramgrill$(EXT) roundTripCrash$(EXT) longmatch$(EXT) \ + symbols$(EXT) invalidDictionaries$(EXT) legacy$(EXT) poolTests$(EXT) \ + decodecorpus$(EXT) + @echo Cleaning completed + + +#---------------------------------------------------------------------------------- +#make valgrindTest is validated only for Linux, OSX, BSD, Hurd and Solaris targets +#---------------------------------------------------------------------------------- +ifneq (,$(filter $(shell uname),Linux Darwin GNU/kFreeBSD GNU OpenBSD FreeBSD NetBSD DragonFly SunOS)) +HOST_OS = POSIX + +valgrindTest: VALGRIND = valgrind --leak-check=full --show-leak-kinds=all --error-exitcode=1 +valgrindTest: zstd datagen fuzzer fullbench + @echo "\n ---- valgrind tests : memory analyzer ----" + $(VALGRIND) ./datagen -g50M > $(VOID) + $(VALGRIND) $(PRGDIR)/zstd ; if [ $$? -eq 0 ] ; then echo "zstd without argument should have failed"; false; fi + ./datagen -g80 | $(VALGRIND) $(PRGDIR)/zstd - -c > $(VOID) + ./datagen -g16KB | $(VALGRIND) $(PRGDIR)/zstd -vf - -c > $(VOID) + ./datagen -g2930KB | $(VALGRIND) $(PRGDIR)/zstd -5 -vf - -o tmp + $(VALGRIND) $(PRGDIR)/zstd -vdf tmp -c > $(VOID) + ./datagen -g64MB | $(VALGRIND) $(PRGDIR)/zstd -vf - -c > $(VOID) + @rm tmp + $(VALGRIND) ./fuzzer -T1mn -t1 + $(VALGRIND) ./fullbench -i1 + +endif + + +ifneq (,$(filter MSYS%,$(shell uname))) +HOST_OS = MSYS +endif + + +#----------------------------------------------------------------------------- +# make tests validated only for below targets +#----------------------------------------------------------------------------- +ifneq (,$(filter $(HOST_OS),MSYS POSIX)) + +DIFF:=diff +ifneq (,$(filter $(shell uname),SunOS)) +DIFF:=gdiff +endif + +zstd-playTests: datagen + file $(ZSTD) + ZSTD="$(QEMU_SYS) $(ZSTD)" ./playTests.sh $(ZSTDRTTEST) + +shortest: ZSTDRTTEST= +shortest: test-zstd + +fuzztest: test-fuzzer test-zstream test-decodecorpus + +test: test-zstd test-fullbench test-fuzzer test-zstream test-invalidDictionaries test-legacy test-decodecorpus +ifeq ($(QEMU_SYS),) +test: test-pool +endif + +test32: test-zstd32 test-fullbench32 test-fuzzer32 test-zstream32 + +test-all: test test32 valgrindTest test-decodecorpus-cli + +test-zstd: ZSTD = $(PRGDIR)/zstd +test-zstd: zstd zstd-playTests + +test-zstd32: ZSTD = $(PRGDIR)/zstd32 +test-zstd32: zstd32 zstd-playTests + +test-zstd-nolegacy: ZSTD = $(PRGDIR)/zstd-nolegacy +test-zstd-nolegacy: zstd-nolegacy zstd-playTests + +test-gzstd: gzstd + $(PRGDIR)/zstd -f README.md test-zstd-speed.py + gzip -f README.md test-zstd-speed.py + cat README.md.zst test-zstd-speed.py.gz >zstd_gz.zst + cat README.md.gz test-zstd-speed.py.zst >gz_zstd.gz + $(PRGDIR)/zstd -df README.md.gz -o README2.md + $(PRGDIR)/zstd -df README.md.gz test-zstd-speed.py.gz + $(PRGDIR)/zstd -df zstd_gz.zst gz_zstd.gz + $(DIFF) -q zstd_gz gz_zstd + echo Hello World ZSTD | $(PRGDIR)/zstd -c - >hello.zst + echo Hello World GZIP | gzip -c - >hello.gz + echo Hello World TEXT >hello.txt + cat hello.zst hello.gz hello.txt >hello_zst_gz_txt.gz + $(PRGDIR)/zstd -dcf hello.* + $(PRGDIR)/zstd -dcf - <hello_zst_gz_txt.gz + $(RM) *.gz *.zst README2.md gz_zstd zstd_gz hello.txt + +test-fullbench: fullbench datagen + $(QEMU_SYS) ./fullbench -i1 + $(QEMU_SYS) ./fullbench -i1 -P0 + +test-fullbench32: fullbench32 datagen + $(QEMU_SYS) ./fullbench32 -i1 + $(QEMU_SYS) ./fullbench32 -i1 -P0 + +test-fuzzer: fuzzer + $(QEMU_SYS) ./fuzzer $(FUZZERTEST) $(FUZZER_FLAGS) + +test-fuzzer32: fuzzer32 + $(QEMU_SYS) ./fuzzer32 $(FUZZERTEST) $(FUZZER_FLAGS) + +test-zbuff: zbufftest + $(QEMU_SYS) ./zbufftest $(ZSTREAM_TESTTIME) + +test-zbuff32: zbufftest32 + $(QEMU_SYS) ./zbufftest32 $(ZSTREAM_TESTTIME) + +test-zstream: zstreamtest + $(QEMU_SYS) ./zstreamtest $(ZSTREAM_TESTTIME) $(FUZZER_FLAGS) + $(QEMU_SYS) ./zstreamtest --mt -t1 $(ZSTREAM_TESTTIME) $(FUZZER_FLAGS) + $(QEMU_SYS) ./zstreamtest --newapi -t1 $(ZSTREAM_TESTTIME) $(FUZZER_FLAGS) + $(QEMU_SYS) ./zstreamtest --opaqueapi -t1 $(ZSTREAM_TESTTIME) $(FUZZER_FLAGS) + +test-zstream32: zstreamtest32 + $(QEMU_SYS) ./zstreamtest32 $(ZSTREAM_TESTTIME) $(FUZZER_FLAGS) + +test-longmatch: longmatch + $(QEMU_SYS) ./longmatch + +test-invalidDictionaries: invalidDictionaries + $(QEMU_SYS) ./invalidDictionaries + +test-symbols: symbols + $(QEMU_SYS) ./symbols + +test-legacy: legacy + $(QEMU_SYS) ./legacy + +test-decodecorpus: decodecorpus + $(QEMU_SYS) ./decodecorpus -t $(DECODECORPUS_TESTTIME) + +test-decodecorpus-cli: decodecorpus + @echo "\n ---- decodecorpus basic cli tests ----" + @mkdir testdir + ./decodecorpus -n5 -otestdir -ptestdir + @cd testdir && \ + $(ZSTD) -d z000000.zst -o tmp0 && \ + $(ZSTD) -d z000001.zst -o tmp1 && \ + $(ZSTD) -d z000002.zst -o tmp2 && \ + $(ZSTD) -d z000003.zst -o tmp3 && \ + $(ZSTD) -d z000004.zst -o tmp4 && \ + diff z000000 tmp0 && \ + diff z000001 tmp1 && \ + diff z000002 tmp2 && \ + diff z000003 tmp3 && \ + diff z000004 tmp4 && \ + rm ./* && \ + cd .. + @echo "\n ---- decodecorpus dictionary cli tests ----" + ./decodecorpus -n5 -otestdir -ptestdir --use-dict=1MB + @cd testdir && \ + $(ZSTD) -d z000000.zst -D dictionary -o tmp0 && \ + $(ZSTD) -d z000001.zst -D dictionary -o tmp1 && \ + $(ZSTD) -d z000002.zst -D dictionary -o tmp2 && \ + $(ZSTD) -d z000003.zst -D dictionary -o tmp3 && \ + $(ZSTD) -d z000004.zst -D dictionary -o tmp4 && \ + diff z000000 tmp0 && \ + diff z000001 tmp1 && \ + diff z000002 tmp2 && \ + diff z000003 tmp3 && \ + diff z000004 tmp4 && \ + cd .. + @rm -rf testdir + +test-pool: poolTests + $(QEMU_SYS) ./poolTests + +test-lz4: ZSTD = LD_LIBRARY_PATH=/usr/local/lib $(PRGDIR)/zstd +test-lz4: ZSTD_LZ4 = LD_LIBRARY_PATH=/usr/local/lib ./lz4 +test-lz4: ZSTD_UNLZ4 = LD_LIBRARY_PATH=/usr/local/lib ./unlz4 +test-lz4: zstd decodecorpus + ln -s $(PRGDIR)/zstd lz4 + ln -s $(PRGDIR)/zstd unlz4 + + ./decodecorpus -ptmp + # lz4 -> zstd + lz4 < tmp | \ + $(ZSTD) -d | \ + cmp - tmp + lz4 < tmp | \ + $(ZSTD_UNLZ4) | \ + cmp - tmp + # zstd -> lz4 + $(ZSTD) --format=lz4 < tmp | \ + lz4 -d | \ + cmp - tmp + $(ZSTD_LZ4) < tmp | \ + lz4 -d | \ + cmp - tmp + # zstd -> zstd + $(ZSTD) --format=lz4 < tmp | \ + $(ZSTD) -d | \ + cmp - tmp + # zstd -> zstd + $(ZSTD) < tmp | \ + $(ZSTD) -d | \ + cmp - tmp + + rm tmp lz4 unlz4 + +endif diff --git a/src/zstd/tests/README.md b/src/zstd/tests/README.md new file mode 100644 index 00000000..24a28ab7 --- /dev/null +++ b/src/zstd/tests/README.md @@ -0,0 +1,90 @@ +Programs and scripts for automated testing of Zstandard +======================================================= + +This directory contains the following programs and scripts: +- `datagen` : Synthetic and parametrable data generator, for tests +- `fullbench` : Precisely measure speed for each zstd inner functions +- `fuzzer` : Test tool, to check zstd integrity on target platform +- `paramgrill` : parameter tester for zstd +- `test-zstd-speed.py` : script for testing zstd speed difference between commits +- `test-zstd-versions.py` : compatibility test between zstd versions stored on Github (v0.1+) +- `zbufftest` : Test tool to check ZBUFF (a buffered streaming API) integrity +- `zstreamtest` : Fuzzer test tool for zstd streaming API +- `legacy` : Test tool to test decoding of legacy zstd frames +- `decodecorpus` : Tool to generate valid Zstandard frames, for verifying decoder implementations + + +#### `test-zstd-versions.py` - script for testing zstd interoperability between versions + +This script creates `versionsTest` directory to which zstd repository is cloned. +Then all tagged (released) versions of zstd are compiled. +In the following step interoperability between zstd versions is checked. + + +#### `test-zstd-speed.py` - script for testing zstd speed difference between commits + +This script creates `speedTest` directory to which zstd repository is cloned. +Then it compiles all branches of zstd and performs a speed benchmark for a given list of files (the `testFileNames` parameter). +After `sleepTime` (an optional parameter, default 300 seconds) seconds the script checks repository for new commits. +If a new commit is found it is compiled and a speed benchmark for this commit is performed. +The results of the speed benchmark are compared to the previous results. +If compression or decompression speed for one of zstd levels is lower than `lowerLimit` (an optional parameter, default 0.98) the speed benchmark is restarted. +If second results are also lower than `lowerLimit` the warning e-mail is send to recipients from the list (the `emails` parameter). + +Additional remarks: +- To be sure that speed results are accurate the script should be run on a "stable" target system with no other jobs running in parallel +- Using the script with virtual machines can lead to large variations of speed results +- The speed benchmark is not performed until computers' load average is lower than `maxLoadAvg` (an optional parameter, default 0.75) +- The script sends e-mails using `mutt`; if `mutt` is not available it sends e-mails without attachments using `mail`; if both are not available it only prints a warning + + +The example usage with two test files, one e-mail address, and with an additional message: +``` +./test-zstd-speed.py "silesia.tar calgary.tar" "email@gmail.com" --message "tested on my laptop" --sleepTime 60 +``` + +To run the script in background please use: +``` +nohup ./test-zstd-speed.py testFileNames emails & +``` + +The full list of parameters: +``` +positional arguments: + testFileNames file names list for speed benchmark + emails list of e-mail addresses to send warnings + +optional arguments: + -h, --help show this help message and exit + --message MESSAGE attach an additional message to e-mail + --lowerLimit LOWERLIMIT + send email if speed is lower than given limit + --maxLoadAvg MAXLOADAVG + maximum load average to start testing + --lastCLevel LASTCLEVEL + last compression level for testing + --sleepTime SLEEPTIME + frequency of repository checking in seconds +``` + +#### `decodecorpus` - tool to generate Zstandard frames for decoder testing +Command line tool to generate test .zst files. + +This tool will generate .zst files with checksums, +as well as optionally output the corresponding correct uncompressed data for +extra verfication. + +Example: +``` +./decodecorpus -ptestfiles -otestfiles -n10000 -s5 +``` +will generate 10,000 sample .zst files using a seed of 5 in the `testfiles` directory, +with the zstd checksum field set, +as well as the 10,000 original files for more detailed comparison of decompression results. + +``` +./decodecorpus -t -T1mn +``` +will choose a random seed, and for 1 minute, +generate random test frames and ensure that the +zstd library correctly decompresses them in both simple and streaming modes. diff --git a/src/zstd/tests/datagencli.c b/src/zstd/tests/datagencli.c new file mode 100644 index 00000000..4814974e --- /dev/null +++ b/src/zstd/tests/datagencli.c @@ -0,0 +1,130 @@ +/* + * Copyright (c) 2015-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + +/*-************************************ +* Dependencies +**************************************/ +#include "util.h" /* Compiler options */ +#include <stdio.h> /* fprintf, stderr */ +#include "datagen.h" /* RDG_generate */ + + +/*-************************************ +* Constants +**************************************/ +#define KB *(1 <<10) +#define MB *(1 <<20) +#define GB *(1U<<30) + +#define SIZE_DEFAULT ((64 KB) + 1) +#define SEED_DEFAULT 0 +#define COMPRESSIBILITY_DEFAULT 50 + + +/*-************************************ +* Macros +**************************************/ +#define DISPLAY(...) fprintf(stderr, __VA_ARGS__) +#define DISPLAYLEVEL(l, ...) if (displayLevel>=l) { DISPLAY(__VA_ARGS__); } +static unsigned displayLevel = 2; + + +/*-******************************************************* +* Command line +*********************************************************/ +static int usage(const char* programName) +{ + DISPLAY( "Compressible data generator\n"); + DISPLAY( "Usage :\n"); + DISPLAY( " %s [args]\n", programName); + DISPLAY( "\n"); + DISPLAY( "Arguments :\n"); + DISPLAY( " -g# : generate # data (default:%i)\n", SIZE_DEFAULT); + DISPLAY( " -s# : Select seed (default:%i)\n", SEED_DEFAULT); + DISPLAY( " -P# : Select compressibility in %% (default:%i%%)\n", + COMPRESSIBILITY_DEFAULT); + DISPLAY( " -h : display help and exit\n"); + return 0; +} + + +int main(int argc, const char** argv) +{ + unsigned probaU32 = COMPRESSIBILITY_DEFAULT; + double litProba = 0.0; + U64 size = SIZE_DEFAULT; + U32 seed = SEED_DEFAULT; + const char* const programName = argv[0]; + + int argNb; + for(argNb=1; argNb<argc; argNb++) { + const char* argument = argv[argNb]; + + if(!argument) continue; /* Protection if argument empty */ + + /* Handle commands. Aggregated commands are allowed */ + if (*argument=='-') { + argument++; + while (*argument!=0) { + switch(*argument) + { + case 'h': + return usage(programName); + case 'g': + argument++; + size=0; + while ((*argument>='0') && (*argument<='9')) + size *= 10, size += *argument++ - '0'; + if (*argument=='K') { size <<= 10; argument++; } + if (*argument=='M') { size <<= 20; argument++; } + if (*argument=='G') { size <<= 30; argument++; } + if (*argument=='B') { argument++; } + break; + case 's': + argument++; + seed=0; + while ((*argument>='0') && (*argument<='9')) + seed *= 10, seed += *argument++ - '0'; + break; + case 'P': + argument++; + probaU32 = 0; + while ((*argument>='0') && (*argument<='9')) + probaU32 *= 10, probaU32 += *argument++ - '0'; + if (probaU32>100) probaU32 = 100; + break; + case 'L': /* hidden argument : Literal distribution probability */ + argument++; + litProba=0.; + while ((*argument>='0') && (*argument<='9')) + litProba *= 10, litProba += *argument++ - '0'; + if (litProba>100.) litProba=100.; + litProba /= 100.; + break; + case 'v': + displayLevel = 4; + argument++; + break; + default: + return usage(programName); + } + } } } /* for(argNb=1; argNb<argc; argNb++) */ + + DISPLAYLEVEL(4, "Compressible data Generator \n"); + if (probaU32!=COMPRESSIBILITY_DEFAULT) + DISPLAYLEVEL(3, "Compressibility : %i%%\n", probaU32); + DISPLAYLEVEL(3, "Seed = %u \n", seed); + + RDG_genStdout(size, (double)probaU32/100, litProba, seed); + DISPLAYLEVEL(1, "\n"); + + return 0; +} diff --git a/src/zstd/tests/decodecorpus.c b/src/zstd/tests/decodecorpus.c new file mode 100644 index 00000000..e697e519 --- /dev/null +++ b/src/zstd/tests/decodecorpus.c @@ -0,0 +1,1927 @@ +/* + * Copyright (c) 2017-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#include <limits.h> +#include <math.h> +#include <stddef.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> + +#include "zstd.h" +#include "zstd_internal.h" +#include "mem.h" +#define ZDICT_STATIC_LINKING_ONLY +#include "zdict.h" + +// Direct access to internal compression functions is required +#include "zstd_compress.c" + +#define XXH_STATIC_LINKING_ONLY +#include "xxhash.h" /* XXH64 */ + +#ifndef MIN + #define MIN(a, b) ((a) < (b) ? (a) : (b)) +#endif + +#ifndef MAX_PATH + #ifdef PATH_MAX + #define MAX_PATH PATH_MAX + #else + #define MAX_PATH 256 + #endif +#endif + +/*-************************************ +* DISPLAY Macros +**************************************/ +#define DISPLAY(...) fprintf(stderr, __VA_ARGS__) +#define DISPLAYLEVEL(l, ...) if (g_displayLevel>=l) { DISPLAY(__VA_ARGS__); } +static U32 g_displayLevel = 2; + +#define DISPLAYUPDATE(...) \ + do { \ + if ((clockSpan(g_displayClock) > g_refreshRate) || \ + (g_displayLevel >= 4)) { \ + g_displayClock = clock(); \ + DISPLAY(__VA_ARGS__); \ + if (g_displayLevel >= 4) fflush(stderr); \ + } \ + } while (0) +static const clock_t g_refreshRate = CLOCKS_PER_SEC / 6; +static clock_t g_displayClock = 0; + +static clock_t clockSpan(clock_t cStart) +{ + return clock() - cStart; /* works even when overflow; max span ~ 30mn */ +} + +#define CHECKERR(code) \ + do { \ + if (ZSTD_isError(code)) { \ + DISPLAY("Error occurred while generating data: %s\n", \ + ZSTD_getErrorName(code)); \ + exit(1); \ + } \ + } while (0) + +/*-******************************************************* +* Random function +*********************************************************/ +static unsigned RAND(unsigned* src) +{ +#define RAND_rotl32(x,r) ((x << r) | (x >> (32 - r))) + static const U32 prime1 = 2654435761U; + static const U32 prime2 = 2246822519U; + U32 rand32 = *src; + rand32 *= prime1; + rand32 += prime2; + rand32 = RAND_rotl32(rand32, 13); + *src = rand32; + return RAND_rotl32(rand32, 27); +#undef RAND_rotl32 +} + +#define DISTSIZE (8192) + +/* Write `size` bytes into `ptr`, all of which are less than or equal to `maxSymb` */ +static void RAND_bufferMaxSymb(U32* seed, void* ptr, size_t size, int maxSymb) +{ + size_t i; + BYTE* op = ptr; + + for (i = 0; i < size; i++) { + op[i] = (BYTE) (RAND(seed) % (maxSymb + 1)); + } +} + +/* Write `size` random bytes into `ptr` */ +static void RAND_buffer(U32* seed, void* ptr, size_t size) +{ + size_t i; + BYTE* op = ptr; + + for (i = 0; i + 4 <= size; i += 4) { + MEM_writeLE32(op + i, RAND(seed)); + } + for (; i < size; i++) { + op[i] = RAND(seed) & 0xff; + } +} + +/* Write `size` bytes into `ptr` following the distribution `dist` */ +static void RAND_bufferDist(U32* seed, BYTE* dist, void* ptr, size_t size) +{ + size_t i; + BYTE* op = ptr; + + for (i = 0; i < size; i++) { + op[i] = dist[RAND(seed) % DISTSIZE]; + } +} + +/* Generate a random distribution where the frequency of each symbol follows a + * geometric distribution defined by `weight` + * `dist` should have size at least `DISTSIZE` */ +static void RAND_genDist(U32* seed, BYTE* dist, double weight) +{ + size_t i = 0; + size_t statesLeft = DISTSIZE; + BYTE symb = (BYTE) (RAND(seed) % 256); + BYTE step = (BYTE) ((RAND(seed) % 256) | 1); /* force it to be odd so it's relatively prime to 256 */ + + while (i < DISTSIZE) { + size_t states = ((size_t)(weight * statesLeft)) + 1; + size_t j; + for (j = 0; j < states && i < DISTSIZE; j++, i++) { + dist[i] = symb; + } + + symb += step; + statesLeft -= states; + } +} + +/* Generates a random number in the range [min, max) */ +static inline U32 RAND_range(U32* seed, U32 min, U32 max) +{ + return (RAND(seed) % (max-min)) + min; +} + +#define ROUND(x) ((U32)(x + 0.5)) + +/* Generates a random number in an exponential distribution with mean `mean` */ +static double RAND_exp(U32* seed, double mean) +{ + double const u = RAND(seed) / (double) UINT_MAX; + return log(1-u) * (-mean); +} + +/*-******************************************************* +* Constants and Structs +*********************************************************/ +const char *BLOCK_TYPES[] = {"raw", "rle", "compressed"}; + +#define MAX_DECOMPRESSED_SIZE_LOG 20 +#define MAX_DECOMPRESSED_SIZE (1ULL << MAX_DECOMPRESSED_SIZE_LOG) + +#define MAX_WINDOW_LOG 22 /* Recommended support is 8MB, so limit to 4MB + mantissa */ + +#define MIN_SEQ_LEN (3) +#define MAX_NB_SEQ ((ZSTD_BLOCKSIZE_MAX + MIN_SEQ_LEN - 1) / MIN_SEQ_LEN) + +BYTE CONTENT_BUFFER[MAX_DECOMPRESSED_SIZE]; +BYTE FRAME_BUFFER[MAX_DECOMPRESSED_SIZE * 2]; +BYTE LITERAL_BUFFER[ZSTD_BLOCKSIZE_MAX]; + +seqDef SEQUENCE_BUFFER[MAX_NB_SEQ]; +BYTE SEQUENCE_LITERAL_BUFFER[ZSTD_BLOCKSIZE_MAX]; /* storeSeq expects a place to copy literals to */ +BYTE SEQUENCE_LLCODE[ZSTD_BLOCKSIZE_MAX]; +BYTE SEQUENCE_MLCODE[ZSTD_BLOCKSIZE_MAX]; +BYTE SEQUENCE_OFCODE[ZSTD_BLOCKSIZE_MAX]; + +unsigned WKSP[1024]; + +typedef struct { + size_t contentSize; /* 0 means unknown (unless contentSize == windowSize == 0) */ + unsigned windowSize; /* contentSize >= windowSize means single segment */ +} frameHeader_t; + +/* For repeat modes */ +typedef struct { + U32 rep[ZSTD_REP_NUM]; + + int hufInit; + /* the distribution used in the previous block for repeat mode */ + BYTE hufDist[DISTSIZE]; + U32 hufTable [256]; /* HUF_CElt is an incomplete type */ + + int fseInit; + FSE_CTable offcodeCTable [FSE_CTABLE_SIZE_U32(OffFSELog, MaxOff)]; + FSE_CTable matchlengthCTable[FSE_CTABLE_SIZE_U32(MLFSELog, MaxML)]; + FSE_CTable litlengthCTable [FSE_CTABLE_SIZE_U32(LLFSELog, MaxLL)]; + + /* Symbols that were present in the previous distribution, for use with + * set_repeat */ + BYTE litlengthSymbolSet[36]; + BYTE offsetSymbolSet[29]; + BYTE matchlengthSymbolSet[53]; +} cblockStats_t; + +typedef struct { + void* data; + void* dataStart; + void* dataEnd; + + void* src; + void* srcStart; + void* srcEnd; + + frameHeader_t header; + + cblockStats_t stats; + cblockStats_t oldStats; /* so they can be rolled back if uncompressible */ +} frame_t; + +typedef struct { + int useDict; + U32 dictID; + size_t dictContentSize; + BYTE* dictContent; +} dictInfo; + +typedef enum { + gt_frame = 0, /* generate frames */ + gt_block, /* generate compressed blocks without block/frame headers */ +} genType_e; + +/*-******************************************************* +* Global variables (set from command line) +*********************************************************/ +U32 g_maxDecompressedSizeLog = MAX_DECOMPRESSED_SIZE_LOG; /* <= 20 */ +U32 g_maxBlockSize = ZSTD_BLOCKSIZE_MAX; /* <= 128 KB */ + +/*-******************************************************* +* Generator Functions +*********************************************************/ + +struct { + int contentSize; /* force the content size to be present */ +} opts; /* advanced options on generation */ + +/* Generate and write a random frame header */ +static void writeFrameHeader(U32* seed, frame_t* frame, dictInfo info) +{ + BYTE* const op = frame->data; + size_t pos = 0; + frameHeader_t fh; + + BYTE windowByte = 0; + + int singleSegment = 0; + int contentSizeFlag = 0; + int fcsCode = 0; + + memset(&fh, 0, sizeof(fh)); + + /* generate window size */ + { + /* Follow window algorithm from specification */ + int const exponent = RAND(seed) % (MAX_WINDOW_LOG - 10); + int const mantissa = RAND(seed) % 8; + windowByte = (BYTE) ((exponent << 3) | mantissa); + fh.windowSize = (1U << (exponent + 10)); + fh.windowSize += fh.windowSize / 8 * mantissa; + } + + { + /* Generate random content size */ + size_t highBit; + if (RAND(seed) & 7 && g_maxDecompressedSizeLog > 7) { + /* do content of at least 128 bytes */ + highBit = 1ULL << RAND_range(seed, 7, g_maxDecompressedSizeLog); + } else if (RAND(seed) & 3) { + /* do small content */ + highBit = 1ULL << RAND_range(seed, 0, MIN(7, 1U << g_maxDecompressedSizeLog)); + } else { + /* 0 size frame */ + highBit = 0; + } + fh.contentSize = highBit ? highBit + (RAND(seed) % highBit) : 0; + + /* provide size sometimes */ + contentSizeFlag = opts.contentSize | (RAND(seed) & 1); + + if (contentSizeFlag && (fh.contentSize == 0 || !(RAND(seed) & 7))) { + /* do single segment sometimes */ + fh.windowSize = (U32) fh.contentSize; + singleSegment = 1; + } + } + + if (contentSizeFlag) { + /* Determine how large fcs field has to be */ + int minFcsCode = (fh.contentSize >= 256) + + (fh.contentSize >= 65536 + 256) + + (fh.contentSize > 0xFFFFFFFFU); + if (!singleSegment && !minFcsCode) { + minFcsCode = 1; + } + fcsCode = minFcsCode + (RAND(seed) % (4 - minFcsCode)); + if (fcsCode == 1 && fh.contentSize < 256) fcsCode++; + } + + /* write out the header */ + MEM_writeLE32(op + pos, ZSTD_MAGICNUMBER); + pos += 4; + + { + /* + * fcsCode: 2-bit flag specifying how many bytes used to represent Frame_Content_Size (bits 7-6) + * singleSegment: 1-bit flag describing if data must be regenerated within a single continuous memory segment. (bit 5) + * contentChecksumFlag: 1-bit flag that is set if frame includes checksum at the end -- set to 1 below (bit 2) + * dictBits: 2-bit flag describing how many bytes Dictionary_ID uses -- set to 3 (bits 1-0) + * For more information: https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#frame_header + */ + int const dictBits = info.useDict ? 3 : 0; + BYTE const frameHeaderDescriptor = + (BYTE) ((fcsCode << 6) | (singleSegment << 5) | (1 << 2) | dictBits); + op[pos++] = frameHeaderDescriptor; + } + + if (!singleSegment) { + op[pos++] = windowByte; + } + if (info.useDict) { + MEM_writeLE32(op + pos, (U32) info.dictID); + pos += 4; + } + if (contentSizeFlag) { + switch (fcsCode) { + default: /* Impossible */ + case 0: op[pos++] = (BYTE) fh.contentSize; break; + case 1: MEM_writeLE16(op + pos, (U16) (fh.contentSize - 256)); pos += 2; break; + case 2: MEM_writeLE32(op + pos, (U32) fh.contentSize); pos += 4; break; + case 3: MEM_writeLE64(op + pos, (U64) fh.contentSize); pos += 8; break; + } + } + + DISPLAYLEVEL(3, " frame content size:\t%u\n", (U32)fh.contentSize); + DISPLAYLEVEL(3, " frame window size:\t%u\n", fh.windowSize); + DISPLAYLEVEL(3, " content size flag:\t%d\n", contentSizeFlag); + DISPLAYLEVEL(3, " single segment flag:\t%d\n", singleSegment); + + frame->data = op + pos; + frame->header = fh; +} + +/* Write a literal block in either raw or RLE form, return the literals size */ +static size_t writeLiteralsBlockSimple(U32* seed, frame_t* frame, size_t contentSize) +{ + BYTE* op = (BYTE*)frame->data; + int const type = RAND(seed) % 2; + int const sizeFormatDesc = RAND(seed) % 8; + size_t litSize; + size_t maxLitSize = MIN(contentSize, g_maxBlockSize); + + if (sizeFormatDesc == 0) { + /* Size_FormatDesc = ?0 */ + maxLitSize = MIN(maxLitSize, 31); + } else if (sizeFormatDesc <= 4) { + /* Size_FormatDesc = 01 */ + maxLitSize = MIN(maxLitSize, 4095); + } else { + /* Size_Format = 11 */ + maxLitSize = MIN(maxLitSize, 1048575); + } + + litSize = RAND(seed) % (maxLitSize + 1); + if (frame->src == frame->srcStart && litSize == 0) { + litSize = 1; /* no empty literals if there's nothing preceding this block */ + } + if (litSize + 3 > contentSize) { + litSize = contentSize; /* no matches shorter than 3 are allowed */ + } + /* use smallest size format that fits */ + if (litSize < 32) { + op[0] = (type | (0 << 2) | (litSize << 3)) & 0xff; + op += 1; + } else if (litSize < 4096) { + op[0] = (type | (1 << 2) | (litSize << 4)) & 0xff; + op[1] = (litSize >> 4) & 0xff; + op += 2; + } else { + op[0] = (type | (3 << 2) | (litSize << 4)) & 0xff; + op[1] = (litSize >> 4) & 0xff; + op[2] = (litSize >> 12) & 0xff; + op += 3; + } + + if (type == 0) { + /* Raw literals */ + DISPLAYLEVEL(4, " raw literals\n"); + + RAND_buffer(seed, LITERAL_BUFFER, litSize); + memcpy(op, LITERAL_BUFFER, litSize); + op += litSize; + } else { + /* RLE literals */ + BYTE const symb = (BYTE) (RAND(seed) % 256); + + DISPLAYLEVEL(4, " rle literals: 0x%02x\n", (U32)symb); + + memset(LITERAL_BUFFER, symb, litSize); + op[0] = symb; + op++; + } + + frame->data = op; + + return litSize; +} + +/* Generate a Huffman header for the given source */ +static size_t writeHufHeader(U32* seed, HUF_CElt* hufTable, void* dst, size_t dstSize, + const void* src, size_t srcSize) +{ + BYTE* const ostart = (BYTE*)dst; + BYTE* op = ostart; + + unsigned huffLog = 11; + U32 maxSymbolValue = 255; + + U32 count[HUF_SYMBOLVALUE_MAX+1]; + + /* Scan input and build symbol stats */ + { size_t const largest = FSE_count_wksp (count, &maxSymbolValue, (const BYTE*)src, srcSize, WKSP); + if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 0; } /* single symbol, rle */ + if (largest <= (srcSize >> 7)+1) return 0; /* Fast heuristic : not compressible enough */ + } + + /* Build Huffman Tree */ + /* Max Huffman log is 11, min is highbit(maxSymbolValue)+1 */ + huffLog = RAND_range(seed, ZSTD_highbit32(maxSymbolValue)+1, huffLog+1); + DISPLAYLEVEL(6, " huffman log: %u\n", huffLog); + { size_t const maxBits = HUF_buildCTable_wksp (hufTable, count, maxSymbolValue, huffLog, WKSP, sizeof(WKSP)); + CHECKERR(maxBits); + huffLog = (U32)maxBits; + } + + /* Write table description header */ + { size_t const hSize = HUF_writeCTable (op, dstSize, hufTable, maxSymbolValue, huffLog); + if (hSize + 12 >= srcSize) return 0; /* not useful to try compression */ + op += hSize; + } + + return op - ostart; +} + +/* Write a Huffman coded literals block and return the literals size */ +static size_t writeLiteralsBlockCompressed(U32* seed, frame_t* frame, size_t contentSize) +{ + BYTE* origop = (BYTE*)frame->data; + BYTE* opend = (BYTE*)frame->dataEnd; + BYTE* op; + BYTE* const ostart = origop; + int const sizeFormat = RAND(seed) % 4; + size_t litSize; + size_t hufHeaderSize = 0; + size_t compressedSize = 0; + size_t maxLitSize = MIN(contentSize-3, g_maxBlockSize); + + symbolEncodingType_e hType; + + if (contentSize < 64) { + /* make sure we get reasonably-sized literals for compression */ + return ERROR(GENERIC); + } + + DISPLAYLEVEL(4, " compressed literals\n"); + + switch (sizeFormat) { + case 0: /* fall through, size is the same as case 1 */ + case 1: + maxLitSize = MIN(maxLitSize, 1023); + origop += 3; + break; + case 2: + maxLitSize = MIN(maxLitSize, 16383); + origop += 4; + break; + case 3: + maxLitSize = MIN(maxLitSize, 262143); + origop += 5; + break; + default:; /* impossible */ + } + + do { + op = origop; + do { + litSize = RAND(seed) % (maxLitSize + 1); + } while (litSize < 32); /* avoid small literal sizes */ + if (litSize + 3 > contentSize) { + litSize = contentSize; /* no matches shorter than 3 are allowed */ + } + + /* most of the time generate a new distribution */ + if ((RAND(seed) & 3) || !frame->stats.hufInit) { + do { + if (RAND(seed) & 3) { + /* add 10 to ensure some compressability */ + double const weight = ((RAND(seed) % 90) + 10) / 100.0; + + DISPLAYLEVEL(5, " distribution weight: %d%%\n", + (int)(weight * 100)); + + RAND_genDist(seed, frame->stats.hufDist, weight); + } else { + /* sometimes do restricted range literals to force + * non-huffman headers */ + DISPLAYLEVEL(5, " small range literals\n"); + RAND_bufferMaxSymb(seed, frame->stats.hufDist, DISTSIZE, + 15); + } + RAND_bufferDist(seed, frame->stats.hufDist, LITERAL_BUFFER, + litSize); + + /* generate the header from the distribution instead of the + * actual data to avoid bugs with symbols that were in the + * distribution but never showed up in the output */ + hufHeaderSize = writeHufHeader( + seed, (HUF_CElt*)frame->stats.hufTable, op, opend - op, + frame->stats.hufDist, DISTSIZE); + CHECKERR(hufHeaderSize); + /* repeat until a valid header is written */ + } while (hufHeaderSize == 0); + op += hufHeaderSize; + hType = set_compressed; + + frame->stats.hufInit = 1; + } else { + /* repeat the distribution/table from last time */ + DISPLAYLEVEL(5, " huffman repeat stats\n"); + RAND_bufferDist(seed, frame->stats.hufDist, LITERAL_BUFFER, + litSize); + hufHeaderSize = 0; + hType = set_repeat; + } + + do { + compressedSize = + sizeFormat == 0 + ? HUF_compress1X_usingCTable( + op, opend - op, LITERAL_BUFFER, litSize, + (HUF_CElt*)frame->stats.hufTable) + : HUF_compress4X_usingCTable( + op, opend - op, LITERAL_BUFFER, litSize, + (HUF_CElt*)frame->stats.hufTable); + CHECKERR(compressedSize); + /* this only occurs when it could not compress or similar */ + } while (compressedSize <= 0); + + op += compressedSize; + + compressedSize += hufHeaderSize; + DISPLAYLEVEL(5, " regenerated size: %u\n", (U32)litSize); + DISPLAYLEVEL(5, " compressed size: %u\n", (U32)compressedSize); + if (compressedSize >= litSize) { + DISPLAYLEVEL(5, " trying again\n"); + /* if we have to try again, reset the stats so we don't accidentally + * try to repeat a distribution we just made */ + frame->stats = frame->oldStats; + } else { + break; + } + } while (1); + + /* write header */ + switch (sizeFormat) { + case 0: /* fall through, size is the same as case 1 */ + case 1: { + U32 const header = hType | (sizeFormat << 2) | ((U32)litSize << 4) | + ((U32)compressedSize << 14); + MEM_writeLE24(ostart, header); + break; + } + case 2: { + U32 const header = hType | (sizeFormat << 2) | ((U32)litSize << 4) | + ((U32)compressedSize << 18); + MEM_writeLE32(ostart, header); + break; + } + case 3: { + U32 const header = hType | (sizeFormat << 2) | ((U32)litSize << 4) | + ((U32)compressedSize << 22); + MEM_writeLE32(ostart, header); + ostart[4] = (BYTE)(compressedSize >> 10); + break; + } + default:; /* impossible */ + } + + frame->data = op; + return litSize; +} + +static size_t writeLiteralsBlock(U32* seed, frame_t* frame, size_t contentSize) +{ + /* only do compressed for larger segments to avoid compressibility issues */ + if (RAND(seed) & 7 && contentSize >= 64) { + return writeLiteralsBlockCompressed(seed, frame, contentSize); + } else { + return writeLiteralsBlockSimple(seed, frame, contentSize); + } +} + +static inline void initSeqStore(seqStore_t *seqStore) { + seqStore->sequencesStart = SEQUENCE_BUFFER; + seqStore->litStart = SEQUENCE_LITERAL_BUFFER; + seqStore->llCode = SEQUENCE_LLCODE; + seqStore->mlCode = SEQUENCE_MLCODE; + seqStore->ofCode = SEQUENCE_OFCODE; + + ZSTD_resetSeqStore(seqStore); +} + +/* Randomly generate sequence commands */ +static U32 generateSequences(U32* seed, frame_t* frame, seqStore_t* seqStore, + size_t contentSize, size_t literalsSize, dictInfo info) +{ + /* The total length of all the matches */ + size_t const remainingMatch = contentSize - literalsSize; + size_t excessMatch = 0; + U32 numSequences = 0; + + U32 i; + + + const BYTE* literals = LITERAL_BUFFER; + BYTE* srcPtr = frame->src; + + if (literalsSize != contentSize) { + /* each match must be at least MIN_SEQ_LEN, so this is the maximum + * number of sequences we can have */ + U32 const maxSequences = (U32)remainingMatch / MIN_SEQ_LEN; + numSequences = (RAND(seed) % maxSequences) + 1; + + /* the extra match lengths we have to allocate to each sequence */ + excessMatch = remainingMatch - numSequences * MIN_SEQ_LEN; + } + + DISPLAYLEVEL(5, " total match lengths: %u\n", (U32)remainingMatch); + for (i = 0; i < numSequences; i++) { + /* Generate match and literal lengths by exponential distribution to + * ensure nice numbers */ + U32 matchLen = + MIN_SEQ_LEN + + ROUND(RAND_exp(seed, excessMatch / (double)(numSequences - i))); + U32 literalLen = + (RAND(seed) & 7) + ? ROUND(RAND_exp(seed, + literalsSize / + (double)(numSequences - i))) + : 0; + /* actual offset, code to send, and point to copy up to when shifting + * codes in the repeat offsets history */ + U32 offset, offsetCode, repIndex; + + /* bounds checks */ + matchLen = (U32) MIN(matchLen, excessMatch + MIN_SEQ_LEN); + literalLen = MIN(literalLen, (U32) literalsSize); + if (i == 0 && srcPtr == frame->srcStart && literalLen == 0) literalLen = 1; + if (i + 1 == numSequences) matchLen = MIN_SEQ_LEN + (U32) excessMatch; + + memcpy(srcPtr, literals, literalLen); + srcPtr += literalLen; + do { + if (RAND(seed) & 7) { + /* do a normal offset */ + U32 const dataDecompressed = (U32)((BYTE*)srcPtr-(BYTE*)frame->srcStart); + offset = (RAND(seed) % + MIN(frame->header.windowSize, + (size_t)((BYTE*)srcPtr - (BYTE*)frame->srcStart))) + + 1; + if (info.useDict && (RAND(seed) & 1) && i + 1 != numSequences && dataDecompressed < frame->header.windowSize) { + /* need to occasionally generate offsets that go past the start */ + /* including i+1 != numSequences because the last sequences has to adhere to predetermined contentSize */ + U32 lenPastStart = (RAND(seed) % info.dictContentSize) + 1; + offset = (U32)((BYTE*)srcPtr - (BYTE*)frame->srcStart)+lenPastStart; + if (offset > frame->header.windowSize) { + if (lenPastStart < MIN_SEQ_LEN) { + /* when offset > windowSize, matchLen bound by end of dictionary (lenPastStart) */ + /* this also means that lenPastStart must be greater than MIN_SEQ_LEN */ + /* make sure lenPastStart does not go past dictionary start though */ + lenPastStart = MIN(lenPastStart+MIN_SEQ_LEN, (U32)info.dictContentSize); + offset = (U32)((BYTE*)srcPtr - (BYTE*)frame->srcStart) + lenPastStart; + } + { + U32 const matchLenBound = MIN(frame->header.windowSize, lenPastStart); + matchLen = MIN(matchLen, matchLenBound); + } + } + } + offsetCode = offset + ZSTD_REP_MOVE; + repIndex = 2; + } else { + /* do a repeat offset */ + offsetCode = RAND(seed) % 3; + if (literalLen > 0) { + offset = frame->stats.rep[offsetCode]; + repIndex = offsetCode; + } else { + /* special case */ + offset = offsetCode == 2 ? frame->stats.rep[0] - 1 + : frame->stats.rep[offsetCode + 1]; + repIndex = MIN(2, offsetCode + 1); + } + } + } while (((!info.useDict) && (offset > (size_t)((BYTE*)srcPtr - (BYTE*)frame->srcStart))) || offset == 0); + + { + size_t j; + BYTE* const dictEnd = info.dictContent + info.dictContentSize; + for (j = 0; j < matchLen; j++) { + if ((U32)((BYTE*)srcPtr - (BYTE*)frame->srcStart) < offset) { + /* copy from dictionary instead of literals */ + size_t const dictOffset = offset - (srcPtr - (BYTE*)frame->srcStart); + *srcPtr = *(dictEnd - dictOffset); + } + else { + *srcPtr = *(srcPtr-offset); + } + srcPtr++; + } + } + + { int r; + for (r = repIndex; r > 0; r--) { + frame->stats.rep[r] = frame->stats.rep[r - 1]; + } + frame->stats.rep[0] = offset; + } + + DISPLAYLEVEL(6, " LL: %5u OF: %5u ML: %5u", literalLen, offset, matchLen); + DISPLAYLEVEL(7, " srcPos: %8u seqNb: %3u", + (U32)((BYTE*)srcPtr - (BYTE*)frame->srcStart), i); + DISPLAYLEVEL(6, "\n"); + if (offsetCode < 3) { + DISPLAYLEVEL(7, " repeat offset: %d\n", repIndex); + } + /* use libzstd sequence handling */ + ZSTD_storeSeq(seqStore, literalLen, literals, offsetCode, + matchLen - MINMATCH); + + literalsSize -= literalLen; + excessMatch -= (matchLen - MIN_SEQ_LEN); + literals += literalLen; + } + + memcpy(srcPtr, literals, literalsSize); + srcPtr += literalsSize; + DISPLAYLEVEL(6, " excess literals: %5u", (U32)literalsSize); + DISPLAYLEVEL(7, " srcPos: %8u", (U32)((BYTE*)srcPtr - (BYTE*)frame->srcStart)); + DISPLAYLEVEL(6, "\n"); + + return numSequences; +} + +static void initSymbolSet(const BYTE* symbols, size_t len, BYTE* set, BYTE maxSymbolValue) +{ + size_t i; + + memset(set, 0, (size_t)maxSymbolValue+1); + + for (i = 0; i < len; i++) { + set[symbols[i]] = 1; + } +} + +static int isSymbolSubset(const BYTE* symbols, size_t len, const BYTE* set, BYTE maxSymbolValue) +{ + size_t i; + + for (i = 0; i < len; i++) { + if (symbols[i] > maxSymbolValue || !set[symbols[i]]) { + return 0; + } + } + return 1; +} + +static size_t writeSequences(U32* seed, frame_t* frame, seqStore_t* seqStorePtr, + size_t nbSeq) +{ + /* This code is mostly copied from ZSTD_compressSequences in zstd_compress.c */ + U32 count[MaxSeq+1]; + S16 norm[MaxSeq+1]; + FSE_CTable* CTable_LitLength = frame->stats.litlengthCTable; + FSE_CTable* CTable_OffsetBits = frame->stats.offcodeCTable; + FSE_CTable* CTable_MatchLength = frame->stats.matchlengthCTable; + U32 LLtype, Offtype, MLtype; /* compressed, raw or rle */ + const seqDef* const sequences = seqStorePtr->sequencesStart; + const BYTE* const ofCodeTable = seqStorePtr->ofCode; + const BYTE* const llCodeTable = seqStorePtr->llCode; + const BYTE* const mlCodeTable = seqStorePtr->mlCode; + BYTE* const oend = (BYTE*)frame->dataEnd; + BYTE* op = (BYTE*)frame->data; + BYTE* seqHead; + BYTE scratchBuffer[1<<MAX(MLFSELog,LLFSELog)]; + + /* literals compressing block removed so that can be done separately */ + + /* Sequences Header */ + if ((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead */) return ERROR(dstSize_tooSmall); + if (nbSeq < 0x7F) *op++ = (BYTE)nbSeq; + else if (nbSeq < LONGNBSEQ) op[0] = (BYTE)((nbSeq>>8) + 0x80), op[1] = (BYTE)nbSeq, op+=2; + else op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3; + + /* seqHead : flags for FSE encoding type */ + seqHead = op++; + + if (nbSeq==0) { + frame->data = op; + + return 0; + } + + /* convert length/distances into codes */ + ZSTD_seqToCodes(seqStorePtr); + + /* CTable for Literal Lengths */ + { U32 max = MaxLL; + size_t const mostFrequent = FSE_countFast_wksp(count, &max, llCodeTable, nbSeq, WKSP); + if (mostFrequent == nbSeq) { + /* do RLE if we have the chance */ + *op++ = llCodeTable[0]; + FSE_buildCTable_rle(CTable_LitLength, (BYTE)max); + LLtype = set_rle; + } else if (frame->stats.fseInit && !(RAND(seed) & 3) && + isSymbolSubset(llCodeTable, nbSeq, + frame->stats.litlengthSymbolSet, 35)) { + /* maybe do repeat mode if we're allowed to */ + LLtype = set_repeat; + } else if (!(RAND(seed) & 3)) { + /* maybe use the default distribution */ + FSE_buildCTable_wksp(CTable_LitLength, LL_defaultNorm, MaxLL, LL_defaultNormLog, scratchBuffer, sizeof(scratchBuffer)); + LLtype = set_basic; + } else { + /* fall back on a full table */ + size_t nbSeq_1 = nbSeq; + const U32 tableLog = FSE_optimalTableLog(LLFSELog, nbSeq, max); + if (count[llCodeTable[nbSeq-1]]>1) { count[llCodeTable[nbSeq-1]]--; nbSeq_1--; } + FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max); + { size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */ + if (FSE_isError(NCountSize)) return ERROR(GENERIC); + op += NCountSize; } + FSE_buildCTable_wksp(CTable_LitLength, norm, max, tableLog, scratchBuffer, sizeof(scratchBuffer)); + LLtype = set_compressed; + } } + + /* CTable for Offsets */ + /* see Literal Lengths for descriptions of mode choices */ + { U32 max = MaxOff; + size_t const mostFrequent = FSE_countFast_wksp(count, &max, ofCodeTable, nbSeq, WKSP); + if (mostFrequent == nbSeq) { + *op++ = ofCodeTable[0]; + FSE_buildCTable_rle(CTable_OffsetBits, (BYTE)max); + Offtype = set_rle; + } else if (frame->stats.fseInit && !(RAND(seed) & 3) && + isSymbolSubset(ofCodeTable, nbSeq, + frame->stats.offsetSymbolSet, 28)) { + Offtype = set_repeat; + } else if (!(RAND(seed) & 3)) { + FSE_buildCTable_wksp(CTable_OffsetBits, OF_defaultNorm, DefaultMaxOff, OF_defaultNormLog, scratchBuffer, sizeof(scratchBuffer)); + Offtype = set_basic; + } else { + size_t nbSeq_1 = nbSeq; + const U32 tableLog = FSE_optimalTableLog(OffFSELog, nbSeq, max); + if (count[ofCodeTable[nbSeq-1]]>1) { count[ofCodeTable[nbSeq-1]]--; nbSeq_1--; } + FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max); + { size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */ + if (FSE_isError(NCountSize)) return ERROR(GENERIC); + op += NCountSize; } + FSE_buildCTable_wksp(CTable_OffsetBits, norm, max, tableLog, scratchBuffer, sizeof(scratchBuffer)); + Offtype = set_compressed; + } } + + /* CTable for MatchLengths */ + /* see Literal Lengths for descriptions of mode choices */ + { U32 max = MaxML; + size_t const mostFrequent = FSE_countFast_wksp(count, &max, mlCodeTable, nbSeq, WKSP); + if (mostFrequent == nbSeq) { + *op++ = *mlCodeTable; + FSE_buildCTable_rle(CTable_MatchLength, (BYTE)max); + MLtype = set_rle; + } else if (frame->stats.fseInit && !(RAND(seed) & 3) && + isSymbolSubset(mlCodeTable, nbSeq, + frame->stats.matchlengthSymbolSet, 52)) { + MLtype = set_repeat; + } else if (!(RAND(seed) & 3)) { + /* sometimes do default distribution */ + FSE_buildCTable_wksp(CTable_MatchLength, ML_defaultNorm, MaxML, ML_defaultNormLog, scratchBuffer, sizeof(scratchBuffer)); + MLtype = set_basic; + } else { + /* fall back on table */ + size_t nbSeq_1 = nbSeq; + const U32 tableLog = FSE_optimalTableLog(MLFSELog, nbSeq, max); + if (count[mlCodeTable[nbSeq-1]]>1) { count[mlCodeTable[nbSeq-1]]--; nbSeq_1--; } + FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max); + { size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */ + if (FSE_isError(NCountSize)) return ERROR(GENERIC); + op += NCountSize; } + FSE_buildCTable_wksp(CTable_MatchLength, norm, max, tableLog, scratchBuffer, sizeof(scratchBuffer)); + MLtype = set_compressed; + } } + frame->stats.fseInit = 1; + initSymbolSet(llCodeTable, nbSeq, frame->stats.litlengthSymbolSet, 35); + initSymbolSet(ofCodeTable, nbSeq, frame->stats.offsetSymbolSet, 28); + initSymbolSet(mlCodeTable, nbSeq, frame->stats.matchlengthSymbolSet, 52); + + DISPLAYLEVEL(5, " LL type: %d OF type: %d ML type: %d\n", LLtype, Offtype, MLtype); + + *seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2)); + + /* Encoding Sequences */ + { BIT_CStream_t blockStream; + FSE_CState_t stateMatchLength; + FSE_CState_t stateOffsetBits; + FSE_CState_t stateLitLength; + + CHECK_E(BIT_initCStream(&blockStream, op, oend-op), dstSize_tooSmall); /* not enough space remaining */ + + /* first symbols */ + FSE_initCState2(&stateMatchLength, CTable_MatchLength, mlCodeTable[nbSeq-1]); + FSE_initCState2(&stateOffsetBits, CTable_OffsetBits, ofCodeTable[nbSeq-1]); + FSE_initCState2(&stateLitLength, CTable_LitLength, llCodeTable[nbSeq-1]); + BIT_addBits(&blockStream, sequences[nbSeq-1].litLength, LL_bits[llCodeTable[nbSeq-1]]); + if (MEM_32bits()) BIT_flushBits(&blockStream); + BIT_addBits(&blockStream, sequences[nbSeq-1].matchLength, ML_bits[mlCodeTable[nbSeq-1]]); + if (MEM_32bits()) BIT_flushBits(&blockStream); + BIT_addBits(&blockStream, sequences[nbSeq-1].offset, ofCodeTable[nbSeq-1]); + BIT_flushBits(&blockStream); + + { size_t n; + for (n=nbSeq-2 ; n<nbSeq ; n--) { /* intentional underflow */ + BYTE const llCode = llCodeTable[n]; + BYTE const ofCode = ofCodeTable[n]; + BYTE const mlCode = mlCodeTable[n]; + U32 const llBits = LL_bits[llCode]; + U32 const ofBits = ofCode; /* 32b*/ /* 64b*/ + U32 const mlBits = ML_bits[mlCode]; + /* (7)*/ /* (7)*/ + FSE_encodeSymbol(&blockStream, &stateOffsetBits, ofCode); /* 15 */ /* 15 */ + FSE_encodeSymbol(&blockStream, &stateMatchLength, mlCode); /* 24 */ /* 24 */ + if (MEM_32bits()) BIT_flushBits(&blockStream); /* (7)*/ + FSE_encodeSymbol(&blockStream, &stateLitLength, llCode); /* 16 */ /* 33 */ + if (MEM_32bits() || (ofBits+mlBits+llBits >= 64-7-(LLFSELog+MLFSELog+OffFSELog))) + BIT_flushBits(&blockStream); /* (7)*/ + BIT_addBits(&blockStream, sequences[n].litLength, llBits); + if (MEM_32bits() && ((llBits+mlBits)>24)) BIT_flushBits(&blockStream); + BIT_addBits(&blockStream, sequences[n].matchLength, mlBits); + if (MEM_32bits()) BIT_flushBits(&blockStream); /* (7)*/ + BIT_addBits(&blockStream, sequences[n].offset, ofBits); /* 31 */ + BIT_flushBits(&blockStream); /* (7)*/ + } } + + FSE_flushCState(&blockStream, &stateMatchLength); + FSE_flushCState(&blockStream, &stateOffsetBits); + FSE_flushCState(&blockStream, &stateLitLength); + + { size_t const streamSize = BIT_closeCStream(&blockStream); + if (streamSize==0) return ERROR(dstSize_tooSmall); /* not enough space */ + op += streamSize; + } } + + frame->data = op; + + return 0; +} + +static size_t writeSequencesBlock(U32* seed, frame_t* frame, size_t contentSize, + size_t literalsSize, dictInfo info) +{ + seqStore_t seqStore; + size_t numSequences; + + + initSeqStore(&seqStore); + + /* randomly generate sequences */ + numSequences = generateSequences(seed, frame, &seqStore, contentSize, literalsSize, info); + /* write them out to the frame data */ + CHECKERR(writeSequences(seed, frame, &seqStore, numSequences)); + + return numSequences; +} + +static size_t writeCompressedBlock(U32* seed, frame_t* frame, size_t contentSize, dictInfo info) +{ + BYTE* const blockStart = (BYTE*)frame->data; + size_t literalsSize; + size_t nbSeq; + + DISPLAYLEVEL(4, " compressed block:\n"); + + literalsSize = writeLiteralsBlock(seed, frame, contentSize); + + DISPLAYLEVEL(4, " literals size: %u\n", (U32)literalsSize); + + nbSeq = writeSequencesBlock(seed, frame, contentSize, literalsSize, info); + + DISPLAYLEVEL(4, " number of sequences: %u\n", (U32)nbSeq); + + return (BYTE*)frame->data - blockStart; +} + +static void writeBlock(U32* seed, frame_t* frame, size_t contentSize, + int lastBlock, dictInfo info) +{ + int const blockTypeDesc = RAND(seed) % 8; + size_t blockSize; + int blockType; + + BYTE *const header = (BYTE*)frame->data; + BYTE *op = header + 3; + + DISPLAYLEVEL(4, " block:\n"); + DISPLAYLEVEL(4, " block content size: %u\n", (U32)contentSize); + DISPLAYLEVEL(4, " last block: %s\n", lastBlock ? "yes" : "no"); + + if (blockTypeDesc == 0) { + /* Raw data frame */ + + RAND_buffer(seed, frame->src, contentSize); + memcpy(op, frame->src, contentSize); + + op += contentSize; + blockType = 0; + blockSize = contentSize; + } else if (blockTypeDesc == 1) { + /* RLE */ + BYTE const symbol = RAND(seed) & 0xff; + + op[0] = symbol; + memset(frame->src, symbol, contentSize); + + op++; + blockType = 1; + blockSize = contentSize; + } else { + /* compressed, most common */ + size_t compressedSize; + blockType = 2; + + frame->oldStats = frame->stats; + + frame->data = op; + compressedSize = writeCompressedBlock(seed, frame, contentSize, info); + if (compressedSize >= contentSize) { /* compressed block must be strictly smaller than uncompressed one */ + blockType = 0; + memcpy(op, frame->src, contentSize); + + op += contentSize; + blockSize = contentSize; /* fall back on raw block if data doesn't + compress */ + + frame->stats = frame->oldStats; /* don't update the stats */ + } else { + op += compressedSize; + blockSize = compressedSize; + } + } + frame->src = (BYTE*)frame->src + contentSize; + + DISPLAYLEVEL(4, " block type: %s\n", BLOCK_TYPES[blockType]); + DISPLAYLEVEL(4, " block size field: %u\n", (U32)blockSize); + + header[0] = (BYTE) ((lastBlock | (blockType << 1) | (blockSize << 3)) & 0xff); + MEM_writeLE16(header + 1, (U16) (blockSize >> 5)); + + frame->data = op; +} + +static void writeBlocks(U32* seed, frame_t* frame, dictInfo info) +{ + size_t contentLeft = frame->header.contentSize; + size_t const maxBlockSize = MIN(g_maxBlockSize, frame->header.windowSize); + while (1) { + /* 1 in 4 chance of ending frame */ + int const lastBlock = contentLeft > maxBlockSize ? 0 : !(RAND(seed) & 3); + size_t blockContentSize; + if (lastBlock) { + blockContentSize = contentLeft; + } else { + if (contentLeft > 0 && (RAND(seed) & 7)) { + /* some variable size block */ + blockContentSize = RAND(seed) % (MIN(maxBlockSize, contentLeft)+1); + } else if (contentLeft > maxBlockSize && (RAND(seed) & 1)) { + /* some full size block */ + blockContentSize = maxBlockSize; + } else { + /* some empty block */ + blockContentSize = 0; + } + } + + writeBlock(seed, frame, blockContentSize, lastBlock, info); + + contentLeft -= blockContentSize; + if (lastBlock) break; + } +} + +static void writeChecksum(frame_t* frame) +{ + /* write checksum so implementations can verify their output */ + U64 digest = XXH64(frame->srcStart, (BYTE*)frame->src-(BYTE*)frame->srcStart, 0); + DISPLAYLEVEL(3, " checksum: %08x\n", (U32)digest); + MEM_writeLE32(frame->data, (U32)digest); + frame->data = (BYTE*)frame->data + 4; +} + +static void outputBuffer(const void* buf, size_t size, const char* const path) +{ + /* write data out to file */ + const BYTE* ip = (const BYTE*)buf; + FILE* out; + if (path) { + out = fopen(path, "wb"); + } else { + out = stdout; + } + if (!out) { + fprintf(stderr, "Failed to open file at %s: ", path); + perror(NULL); + exit(1); + } + + { size_t fsize = size; + size_t written = 0; + while (written < fsize) { + written += fwrite(ip + written, 1, fsize - written, out); + if (ferror(out)) { + fprintf(stderr, "Failed to write to file at %s: ", path); + perror(NULL); + exit(1); + } + } + } + + if (path) { + fclose(out); + } +} + +static void initFrame(frame_t* fr) +{ + memset(fr, 0, sizeof(*fr)); + fr->data = fr->dataStart = FRAME_BUFFER; + fr->dataEnd = FRAME_BUFFER + sizeof(FRAME_BUFFER); + fr->src = fr->srcStart = CONTENT_BUFFER; + fr->srcEnd = CONTENT_BUFFER + sizeof(CONTENT_BUFFER); + + /* init repeat codes */ + fr->stats.rep[0] = 1; + fr->stats.rep[1] = 4; + fr->stats.rep[2] = 8; +} + +/** + * Generated a single zstd compressed block with no block/frame header. + * Returns the final seed. + */ +static U32 generateCompressedBlock(U32 seed, frame_t* frame, dictInfo info) +{ + size_t blockContentSize; + int blockWritten = 0; + BYTE* op; + DISPLAYLEVEL(4, "block seed: %u\n", seed); + initFrame(frame); + op = (BYTE*)frame->data; + + while (!blockWritten) { + size_t cSize; + /* generate window size */ + { int const exponent = RAND(&seed) % (MAX_WINDOW_LOG - 10); + int const mantissa = RAND(&seed) % 8; + frame->header.windowSize = (1U << (exponent + 10)); + frame->header.windowSize += (frame->header.windowSize / 8) * mantissa; + } + + /* generate content size */ + { size_t const maxBlockSize = MIN(g_maxBlockSize, frame->header.windowSize); + if (RAND(&seed) & 15) { + /* some full size blocks */ + blockContentSize = maxBlockSize; + } else if (RAND(&seed) & 7 && g_maxBlockSize >= (1U << 7)) { + /* some small blocks <= 128 bytes*/ + blockContentSize = RAND(&seed) % (1U << 7); + } else { + /* some variable size blocks */ + blockContentSize = RAND(&seed) % maxBlockSize; + } + } + + /* try generating a compressed block */ + frame->oldStats = frame->stats; + frame->data = op; + cSize = writeCompressedBlock(&seed, frame, blockContentSize, info); + if (cSize >= blockContentSize) { /* compressed size must be strictly smaller than decompressed size : https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#blocks */ + /* data doesn't compress -- try again */ + frame->stats = frame->oldStats; /* don't update the stats */ + DISPLAYLEVEL(5, " can't compress block : try again \n"); + } else { + blockWritten = 1; + DISPLAYLEVEL(4, " block size: %u \n", (U32)cSize); + frame->src = (BYTE*)frame->src + blockContentSize; + } + } + return seed; +} + +/* Return the final seed */ +static U32 generateFrame(U32 seed, frame_t* fr, dictInfo info) +{ + /* generate a complete frame */ + DISPLAYLEVEL(3, "frame seed: %u\n", seed); + initFrame(fr); + + writeFrameHeader(&seed, fr, info); + writeBlocks(&seed, fr, info); + writeChecksum(fr); + + return seed; +} + +/*_******************************************************* +* Dictionary Helper Functions +*********************************************************/ +/* returns 0 if successful, otherwise returns 1 upon error */ +static int genRandomDict(U32 dictID, U32 seed, size_t dictSize, BYTE* fullDict) +{ + /* allocate space for samples */ + int ret = 0; + unsigned const numSamples = 4; + size_t sampleSizes[4]; + BYTE* const samples = malloc(5000*sizeof(BYTE)); + if (samples == NULL) { + DISPLAY("Error: could not allocate space for samples\n"); + return 1; + } + + /* generate samples */ + { unsigned literalValue = 1; + unsigned samplesPos = 0; + size_t currSize = 1; + while (literalValue <= 4) { + sampleSizes[literalValue - 1] = currSize; + { size_t k; + for (k = 0; k < currSize; k++) { + *(samples + (samplesPos++)) = (BYTE)literalValue; + } } + literalValue++; + currSize *= 16; + } } + + { size_t dictWriteSize = 0; + ZDICT_params_t zdictParams; + size_t const headerSize = MAX(dictSize/4, 256); + size_t const dictContentSize = dictSize - headerSize; + BYTE* const dictContent = fullDict + headerSize; + if (dictContentSize < ZDICT_CONTENTSIZE_MIN || dictSize < ZDICT_DICTSIZE_MIN) { + DISPLAY("Error: dictionary size is too small\n"); + ret = 1; + goto exitGenRandomDict; + } + + /* init dictionary params */ + memset(&zdictParams, 0, sizeof(zdictParams)); + zdictParams.dictID = dictID; + zdictParams.notificationLevel = 1; + + /* fill in dictionary content */ + RAND_buffer(&seed, (void*)dictContent, dictContentSize); + + /* finalize dictionary with random samples */ + dictWriteSize = ZDICT_finalizeDictionary(fullDict, dictSize, + dictContent, dictContentSize, + samples, sampleSizes, numSamples, + zdictParams); + + if (ZDICT_isError(dictWriteSize)) { + DISPLAY("Could not finalize dictionary: %s\n", ZDICT_getErrorName(dictWriteSize)); + ret = 1; + } + } + +exitGenRandomDict: + free(samples); + return ret; +} + +static dictInfo initDictInfo(int useDict, size_t dictContentSize, BYTE* dictContent, U32 dictID){ + /* allocate space statically */ + dictInfo dictOp; + memset(&dictOp, 0, sizeof(dictOp)); + dictOp.useDict = useDict; + dictOp.dictContentSize = dictContentSize; + dictOp.dictContent = dictContent; + dictOp.dictID = dictID; + return dictOp; +} + +/*-******************************************************* +* Test Mode +*********************************************************/ + +BYTE DECOMPRESSED_BUFFER[MAX_DECOMPRESSED_SIZE]; + +static size_t testDecodeSimple(frame_t* fr) +{ + /* test decoding the generated data with the simple API */ + size_t const ret = ZSTD_decompress(DECOMPRESSED_BUFFER, MAX_DECOMPRESSED_SIZE, + fr->dataStart, (BYTE*)fr->data - (BYTE*)fr->dataStart); + + if (ZSTD_isError(ret)) return ret; + + if (memcmp(DECOMPRESSED_BUFFER, fr->srcStart, + (BYTE*)fr->src - (BYTE*)fr->srcStart) != 0) { + return ERROR(corruption_detected); + } + + return ret; +} + +static size_t testDecodeStreaming(frame_t* fr) +{ + /* test decoding the generated data with the streaming API */ + ZSTD_DStream* zd = ZSTD_createDStream(); + ZSTD_inBuffer in; + ZSTD_outBuffer out; + size_t ret; + + if (!zd) return ERROR(memory_allocation); + + in.src = fr->dataStart; + in.pos = 0; + in.size = (BYTE*)fr->data - (BYTE*)fr->dataStart; + + out.dst = DECOMPRESSED_BUFFER; + out.pos = 0; + out.size = ZSTD_DStreamOutSize(); + + ZSTD_initDStream(zd); + while (1) { + ret = ZSTD_decompressStream(zd, &out, &in); + if (ZSTD_isError(ret)) goto cleanup; /* error */ + if (ret == 0) break; /* frame is done */ + + /* force decoding to be done in chunks */ + out.size += MIN(ZSTD_DStreamOutSize(), MAX_DECOMPRESSED_SIZE - out.size); + } + + ret = out.pos; + + if (memcmp(out.dst, fr->srcStart, out.pos) != 0) { + return ERROR(corruption_detected); + } + +cleanup: + ZSTD_freeDStream(zd); + return ret; +} + +static size_t testDecodeWithDict(U32 seed, genType_e genType) +{ + /* create variables */ + size_t const dictSize = RAND(&seed) % (10 << 20) + ZDICT_DICTSIZE_MIN + ZDICT_CONTENTSIZE_MIN; + U32 const dictID = RAND(&seed); + size_t errorDetected = 0; + BYTE* const fullDict = malloc(dictSize); + if (fullDict == NULL) { + return ERROR(GENERIC); + } + + /* generate random dictionary */ + if (genRandomDict(dictID, seed, dictSize, fullDict)) { /* return 0 on success */ + errorDetected = ERROR(GENERIC); + goto dictTestCleanup; + } + + + { frame_t fr; + dictInfo info; + ZSTD_DCtx* const dctx = ZSTD_createDCtx(); + size_t ret; + + /* get dict info */ + { size_t const headerSize = MAX(dictSize/4, 256); + size_t const dictContentSize = dictSize-headerSize; + BYTE* const dictContent = fullDict+headerSize; + info = initDictInfo(1, dictContentSize, dictContent, dictID); + } + + /* manually decompress and check difference */ + if (genType == gt_frame) { + /* Test frame */ + generateFrame(seed, &fr, info); + ret = ZSTD_decompress_usingDict(dctx, DECOMPRESSED_BUFFER, MAX_DECOMPRESSED_SIZE, + fr.dataStart, (BYTE*)fr.data - (BYTE*)fr.dataStart, + fullDict, dictSize); + } else { + /* Test block */ + generateCompressedBlock(seed, &fr, info); + ret = ZSTD_decompressBegin_usingDict(dctx, fullDict, dictSize); + if (ZSTD_isError(ret)) { + errorDetected = ret; + ZSTD_freeDCtx(dctx); + goto dictTestCleanup; + } + ret = ZSTD_decompressBlock(dctx, DECOMPRESSED_BUFFER, MAX_DECOMPRESSED_SIZE, + fr.dataStart, (BYTE*)fr.data - (BYTE*)fr.dataStart); + } + ZSTD_freeDCtx(dctx); + + if (ZSTD_isError(ret)) { + errorDetected = ret; + goto dictTestCleanup; + } + + if (memcmp(DECOMPRESSED_BUFFER, fr.srcStart, (BYTE*)fr.src - (BYTE*)fr.srcStart) != 0) { + errorDetected = ERROR(corruption_detected); + goto dictTestCleanup; + } + } + +dictTestCleanup: + free(fullDict); + return errorDetected; +} + +static size_t testDecodeRawBlock(frame_t* fr) +{ + ZSTD_DCtx* dctx = ZSTD_createDCtx(); + size_t ret = ZSTD_decompressBegin(dctx); + if (ZSTD_isError(ret)) return ret; + + ret = ZSTD_decompressBlock( + dctx, + DECOMPRESSED_BUFFER, MAX_DECOMPRESSED_SIZE, + fr->dataStart, (BYTE*)fr->data - (BYTE*)fr->dataStart); + ZSTD_freeDCtx(dctx); + if (ZSTD_isError(ret)) return ret; + + if (memcmp(DECOMPRESSED_BUFFER, fr->srcStart, + (BYTE*)fr->src - (BYTE*)fr->srcStart) != 0) { + return ERROR(corruption_detected); + } + + return ret; +} + +static int runBlockTest(U32* seed) +{ + frame_t fr; + U32 const seedCopy = *seed; + { dictInfo const info = initDictInfo(0, 0, NULL, 0); + *seed = generateCompressedBlock(*seed, &fr, info); + } + + { size_t const r = testDecodeRawBlock(&fr); + if (ZSTD_isError(r)) { + DISPLAY("Error in block mode on test seed %u: %s\n", seedCopy, + ZSTD_getErrorName(r)); + return 1; + } + } + + { size_t const r = testDecodeWithDict(*seed, gt_block); + if (ZSTD_isError(r)) { + DISPLAY("Error in block mode with dictionary on test seed %u: %s\n", + seedCopy, ZSTD_getErrorName(r)); + return 1; + } + } + return 0; +} + +static int runFrameTest(U32* seed) +{ + frame_t fr; + U32 const seedCopy = *seed; + { dictInfo const info = initDictInfo(0, 0, NULL, 0); + *seed = generateFrame(*seed, &fr, info); + } + + { size_t const r = testDecodeSimple(&fr); + if (ZSTD_isError(r)) { + DISPLAY("Error in simple mode on test seed %u: %s\n", + seedCopy, ZSTD_getErrorName(r)); + return 1; + } + } + { size_t const r = testDecodeStreaming(&fr); + if (ZSTD_isError(r)) { + DISPLAY("Error in streaming mode on test seed %u: %s\n", + seedCopy, ZSTD_getErrorName(r)); + return 1; + } + } + { size_t const r = testDecodeWithDict(*seed, gt_frame); /* avoid big dictionaries */ + if (ZSTD_isError(r)) { + DISPLAY("Error in dictionary mode on test seed %u: %s\n", + seedCopy, ZSTD_getErrorName(r)); + return 1; + } + } + return 0; +} + +static int runTestMode(U32 seed, unsigned numFiles, unsigned const testDurationS, + genType_e genType) +{ + unsigned fnum; + + clock_t const startClock = clock(); + clock_t const maxClockSpan = testDurationS * CLOCKS_PER_SEC; + + if (numFiles == 0 && !testDurationS) numFiles = 1; + + DISPLAY("seed: %u\n", seed); + + for (fnum = 0; fnum < numFiles || clockSpan(startClock) < maxClockSpan; fnum++) { + if (fnum < numFiles) + DISPLAYUPDATE("\r%u/%u ", fnum, numFiles); + else + DISPLAYUPDATE("\r%u ", fnum); + + { int const ret = (genType == gt_frame) ? + runFrameTest(&seed) : + runBlockTest(&seed); + if (ret) return ret; + } + } + + DISPLAY("\r%u tests completed: ", fnum); + DISPLAY("OK\n"); + + return 0; +} + +/*-******************************************************* +* File I/O +*********************************************************/ + +static int generateFile(U32 seed, const char* const path, + const char* const origPath, genType_e genType) +{ + frame_t fr; + + DISPLAY("seed: %u\n", seed); + + { dictInfo const info = initDictInfo(0, 0, NULL, 0); + if (genType == gt_frame) { + generateFrame(seed, &fr, info); + } else { + generateCompressedBlock(seed, &fr, info); + } + } + outputBuffer(fr.dataStart, (BYTE*)fr.data - (BYTE*)fr.dataStart, path); + if (origPath) { + outputBuffer(fr.srcStart, (BYTE*)fr.src - (BYTE*)fr.srcStart, origPath); + } + return 0; +} + +static int generateCorpus(U32 seed, unsigned numFiles, const char* const path, + const char* const origPath, genType_e genType) +{ + char outPath[MAX_PATH]; + unsigned fnum; + + DISPLAY("seed: %u\n", seed); + + for (fnum = 0; fnum < numFiles; fnum++) { + frame_t fr; + + DISPLAYUPDATE("\r%u/%u ", fnum, numFiles); + + { dictInfo const info = initDictInfo(0, 0, NULL, 0); + if (genType == gt_frame) { + seed = generateFrame(seed, &fr, info); + } else { + seed = generateCompressedBlock(seed, &fr, info); + } + } + + if (snprintf(outPath, MAX_PATH, "%s/z%06u.zst", path, fnum) + 1 > MAX_PATH) { + DISPLAY("Error: path too long\n"); + return 1; + } + outputBuffer(fr.dataStart, (BYTE*)fr.data - (BYTE*)fr.dataStart, outPath); + + if (origPath) { + if (snprintf(outPath, MAX_PATH, "%s/z%06u", origPath, fnum) + 1 > MAX_PATH) { + DISPLAY("Error: path too long\n"); + return 1; + } + outputBuffer(fr.srcStart, (BYTE*)fr.src - (BYTE*)fr.srcStart, outPath); + } + } + + DISPLAY("\r%u/%u \n", fnum, numFiles); + + return 0; +} + +static int generateCorpusWithDict(U32 seed, unsigned numFiles, const char* const path, + const char* const origPath, const size_t dictSize, + genType_e genType) +{ + char outPath[MAX_PATH]; + BYTE* fullDict; + U32 const dictID = RAND(&seed); + int errorDetected = 0; + + if (snprintf(outPath, MAX_PATH, "%s/dictionary", path) + 1 > MAX_PATH) { + DISPLAY("Error: path too long\n"); + return 1; + } + + /* allocate space for the dictionary */ + fullDict = malloc(dictSize); + if (fullDict == NULL) { + DISPLAY("Error: could not allocate space for full dictionary.\n"); + return 1; + } + + /* randomly generate the dictionary */ + { int const ret = genRandomDict(dictID, seed, dictSize, fullDict); + if (ret != 0) { + errorDetected = ret; + goto dictCleanup; + } + } + + /* write out dictionary */ + if (numFiles != 0) { + if (snprintf(outPath, MAX_PATH, "%s/dictionary", path) + 1 > MAX_PATH) { + DISPLAY("Error: dictionary path too long\n"); + errorDetected = 1; + goto dictCleanup; + } + outputBuffer(fullDict, dictSize, outPath); + } + else { + outputBuffer(fullDict, dictSize, "dictionary"); + } + + /* generate random compressed/decompressed files */ + { unsigned fnum; + for (fnum = 0; fnum < MAX(numFiles, 1); fnum++) { + frame_t fr; + DISPLAYUPDATE("\r%u/%u ", fnum, numFiles); + { + size_t const headerSize = MAX(dictSize/4, 256); + size_t const dictContentSize = dictSize-headerSize; + BYTE* const dictContent = fullDict+headerSize; + dictInfo const info = initDictInfo(1, dictContentSize, dictContent, dictID); + if (genType == gt_frame) { + seed = generateFrame(seed, &fr, info); + } else { + seed = generateCompressedBlock(seed, &fr, info); + } + } + + if (numFiles != 0) { + if (snprintf(outPath, MAX_PATH, "%s/z%06u.zst", path, fnum) + 1 > MAX_PATH) { + DISPLAY("Error: path too long\n"); + errorDetected = 1; + goto dictCleanup; + } + outputBuffer(fr.dataStart, (BYTE*)fr.data - (BYTE*)fr.dataStart, outPath); + + if (origPath) { + if (snprintf(outPath, MAX_PATH, "%s/z%06u", origPath, fnum) + 1 > MAX_PATH) { + DISPLAY("Error: path too long\n"); + errorDetected = 1; + goto dictCleanup; + } + outputBuffer(fr.srcStart, (BYTE*)fr.src - (BYTE*)fr.srcStart, outPath); + } + } + else { + outputBuffer(fr.dataStart, (BYTE*)fr.data - (BYTE*)fr.dataStart, path); + if (origPath) { + outputBuffer(fr.srcStart, (BYTE*)fr.src - (BYTE*)fr.srcStart, origPath); + } + } + } + } + +dictCleanup: + free(fullDict); + return errorDetected; +} + + +/*_******************************************************* +* Command line +*********************************************************/ +static U32 makeSeed(void) +{ + U32 t = (U32) time(NULL); + return XXH32(&t, sizeof(t), 0) % 65536; +} + +static unsigned readInt(const char** argument) +{ + unsigned val = 0; + while ((**argument>='0') && (**argument<='9')) { + val *= 10; + val += **argument - '0'; + (*argument)++; + } + return val; +} + +static void usage(const char* programName) +{ + DISPLAY( "Usage :\n"); + DISPLAY( " %s [args]\n", programName); + DISPLAY( "\n"); + DISPLAY( "Arguments :\n"); + DISPLAY( " -p<path> : select output path (default:stdout)\n"); + DISPLAY( " in multiple files mode this should be a directory\n"); + DISPLAY( " -o<path> : select path to output original file (default:no output)\n"); + DISPLAY( " in multiple files mode this should be a directory\n"); + DISPLAY( " -s# : select seed (default:random based on time)\n"); + DISPLAY( " -n# : number of files to generate (default:1)\n"); + DISPLAY( " -t : activate test mode (test files against libzstd instead of outputting them)\n"); + DISPLAY( " -T# : length of time to run tests for\n"); + DISPLAY( " -v : increase verbosity level (default:0, max:7)\n"); + DISPLAY( " -h/H : display help/long help and exit\n"); +} + +static void advancedUsage(const char* programName) +{ + usage(programName); + DISPLAY( "\n"); + DISPLAY( "Advanced arguments :\n"); + DISPLAY( " --content-size : always include the content size in the frame header\n"); + DISPLAY( " --use-dict=# : include a dictionary used to decompress the corpus\n"); + DISPLAY( " --gen-blocks : generate raw compressed blocks without block/frame headers\n"); + DISPLAY( " --max-block-size-log=# : max block size log, must be in range [2, 17]\n"); + DISPLAY( " --max-content-size-log=# : max content size log, must be <= 20\n"); + DISPLAY( " (this is ignored with gen-blocks)\n"); +} + +/*! readU32FromChar() : + @return : unsigned integer value read from input in `char` format + allows and interprets K, KB, KiB, M, MB and MiB suffix. + Will also modify `*stringPtr`, advancing it to position where it stopped reading. + Note : function result can overflow if digit string > MAX_UINT */ +static unsigned readU32FromChar(const char** stringPtr) +{ + unsigned result = 0; + while ((**stringPtr >='0') && (**stringPtr <='9')) + result *= 10, result += **stringPtr - '0', (*stringPtr)++ ; + if ((**stringPtr=='K') || (**stringPtr=='M')) { + result <<= 10; + if (**stringPtr=='M') result <<= 10; + (*stringPtr)++ ; + if (**stringPtr=='i') (*stringPtr)++; + if (**stringPtr=='B') (*stringPtr)++; + } + return result; +} + +/** longCommandWArg() : + * check if *stringPtr is the same as longCommand. + * If yes, @return 1 and advances *stringPtr to the position which immediately follows longCommand. + * @return 0 and doesn't modify *stringPtr otherwise. + */ +static unsigned longCommandWArg(const char** stringPtr, const char* longCommand) +{ + size_t const comSize = strlen(longCommand); + int const result = !strncmp(*stringPtr, longCommand, comSize); + if (result) *stringPtr += comSize; + return result; +} + +int main(int argc, char** argv) +{ + U32 seed = 0; + int seedset = 0; + unsigned numFiles = 0; + unsigned testDuration = 0; + int testMode = 0; + const char* path = NULL; + const char* origPath = NULL; + int useDict = 0; + unsigned dictSize = (10 << 10); /* 10 kB default */ + genType_e genType = gt_frame; + + int argNb; + + /* Check command line */ + for (argNb=1; argNb<argc; argNb++) { + const char* argument = argv[argNb]; + if(!argument) continue; /* Protection if argument empty */ + + /* Handle commands. Aggregated commands are allowed */ + if (argument[0]=='-') { + argument++; + while (*argument!=0) { + switch(*argument) + { + case 'h': + usage(argv[0]); + return 0; + case 'H': + advancedUsage(argv[0]); + return 0; + case 'v': + argument++; + g_displayLevel++; + break; + case 's': + argument++; + seedset=1; + seed = readInt(&argument); + break; + case 'n': + argument++; + numFiles = readInt(&argument); + break; + case 'T': + argument++; + testDuration = readInt(&argument); + if (*argument == 'm') { + testDuration *= 60; + argument++; + if (*argument == 'n') argument++; + } + break; + case 'o': + argument++; + origPath = argument; + argument += strlen(argument); + break; + case 'p': + argument++; + path = argument; + argument += strlen(argument); + break; + case 't': + argument++; + testMode = 1; + break; + case '-': + argument++; + if (strcmp(argument, "content-size") == 0) { + opts.contentSize = 1; + } else if (longCommandWArg(&argument, "use-dict=")) { + dictSize = readU32FromChar(&argument); + useDict = 1; + } else if (strcmp(argument, "gen-blocks") == 0) { + genType = gt_block; + } else if (longCommandWArg(&argument, "max-block-size-log=")) { + U32 value = readU32FromChar(&argument); + if (value >= 2 && value <= ZSTD_BLOCKSIZE_MAX) { + g_maxBlockSize = 1U << value; + } + } else if (longCommandWArg(&argument, "max-content-size-log=")) { + U32 value = readU32FromChar(&argument); + g_maxDecompressedSizeLog = + MIN(MAX_DECOMPRESSED_SIZE_LOG, value); + } else { + advancedUsage(argv[0]); + return 1; + } + argument += strlen(argument); + break; + default: + usage(argv[0]); + return 1; + } } } } /* for (argNb=1; argNb<argc; argNb++) */ + + if (!seedset) { + seed = makeSeed(); + } + + if (testMode) { + return runTestMode(seed, numFiles, testDuration, genType); + } else { + if (testDuration) { + DISPLAY("Error: -T requires test mode (-t)\n\n"); + usage(argv[0]); + return 1; + } + } + + if (!path) { + DISPLAY("Error: path is required in file generation mode\n"); + usage(argv[0]); + return 1; + } + + if (numFiles == 0 && useDict == 0) { + return generateFile(seed, path, origPath, genType); + } else if (useDict == 0){ + return generateCorpus(seed, numFiles, path, origPath, genType); + } else { + /* should generate files with a dictionary */ + return generateCorpusWithDict(seed, numFiles, path, origPath, dictSize, genType); + } + +} diff --git a/src/zstd/tests/files/huffman-compressed-larger b/src/zstd/tests/files/huffman-compressed-larger Binary files differnew file mode 100644 index 00000000..f594f1ae --- /dev/null +++ b/src/zstd/tests/files/huffman-compressed-larger diff --git a/src/zstd/tests/fullbench.c b/src/zstd/tests/fullbench.c new file mode 100644 index 00000000..db00ce21 --- /dev/null +++ b/src/zstd/tests/fullbench.c @@ -0,0 +1,638 @@ +/* + * Copyright (c) 2015-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + +/*_************************************ +* Includes +**************************************/ +#include "util.h" /* Compiler options, UTIL_GetFileSize */ +#include <stdlib.h> /* malloc */ +#include <stdio.h> /* fprintf, fopen, ftello64 */ + +#include "mem.h" /* U32 */ +#ifndef ZSTD_DLL_IMPORT + #include "zstd_internal.h" /* ZSTD_blockHeaderSize, blockType_e, KB, MB */ +#else + #define KB *(1 <<10) + #define MB *(1 <<20) + #define GB *(1U<<30) + typedef enum { bt_raw, bt_rle, bt_compressed, bt_reserved } blockType_e; +#endif +#define ZSTD_STATIC_LINKING_ONLY /* ZSTD_compressBegin, ZSTD_compressContinue, etc. */ +#include "zstd.h" /* ZSTD_versionString */ +#include "util.h" /* time functions */ +#include "datagen.h" + + +/*_************************************ +* Constants +**************************************/ +#define PROGRAM_DESCRIPTION "Zstandard speed analyzer" +#define AUTHOR "Yann Collet" +#define WELCOME_MESSAGE "*** %s %s %i-bits, by %s (%s) ***\n", PROGRAM_DESCRIPTION, ZSTD_versionString(), (int)(sizeof(void*)*8), AUTHOR, __DATE__ + +#define NBLOOPS 6 +#define TIMELOOP_S 2 + +#define KNUTH 2654435761U +#define MAX_MEM (1984 MB) + +#define COMPRESSIBILITY_DEFAULT 0.50 +static const size_t g_sampleSize = 10000000; + + +/*_************************************ +* Macros +**************************************/ +#define DISPLAY(...) fprintf(stderr, __VA_ARGS__) + + +/*_************************************ +* Benchmark Parameters +**************************************/ +static U32 g_nbIterations = NBLOOPS; +static double g_compressibility = COMPRESSIBILITY_DEFAULT; + +static void BMK_SetNbIterations(U32 nbLoops) +{ + g_nbIterations = nbLoops; + DISPLAY("- %i iterations -\n", g_nbIterations); +} + + +/*_******************************************************* +* Private functions +*********************************************************/ +static size_t BMK_findMaxMem(U64 requiredMem) +{ + size_t const step = 64 MB; + void* testmem = NULL; + + requiredMem = (((requiredMem >> 26) + 1) << 26); + if (requiredMem > MAX_MEM) requiredMem = MAX_MEM; + + requiredMem += step; + do { + testmem = malloc ((size_t)requiredMem); + requiredMem -= step; + } while (!testmem); + + free (testmem); + return (size_t) requiredMem; +} + + +/*_******************************************************* +* Benchmark wrappers +*********************************************************/ +size_t local_ZSTD_compress(void* dst, size_t dstSize, void* buff2, const void* src, size_t srcSize) +{ + (void)buff2; + return ZSTD_compress(dst, dstSize, src, srcSize, 1); +} + +static size_t g_cSize = 0; +size_t local_ZSTD_decompress(void* dst, size_t dstSize, void* buff2, const void* src, size_t srcSize) +{ + (void)src; (void)srcSize; + return ZSTD_decompress(dst, dstSize, buff2, g_cSize); +} + +static ZSTD_DCtx* g_zdc = NULL; + +#ifndef ZSTD_DLL_IMPORT +extern size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* ctx, const void* src, size_t srcSize); +size_t local_ZSTD_decodeLiteralsBlock(void* dst, size_t dstSize, void* buff2, const void* src, size_t srcSize) +{ + (void)src; (void)srcSize; (void)dst; (void)dstSize; + return ZSTD_decodeLiteralsBlock((ZSTD_DCtx*)g_zdc, buff2, g_cSize); +} + +extern size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeq, const void* src, size_t srcSize); +size_t local_ZSTD_decodeSeqHeaders(void* dst, size_t dstSize, void* buff2, const void* src, size_t srcSize) +{ + int nbSeq; + (void)src; (void)srcSize; (void)dst; (void)dstSize; + return ZSTD_decodeSeqHeaders(g_zdc, &nbSeq, buff2, g_cSize); +} +#endif + +static ZSTD_CStream* g_cstream= NULL; +size_t local_ZSTD_compressStream(void* dst, size_t dstCapacity, void* buff2, const void* src, size_t srcSize) +{ + ZSTD_outBuffer buffOut; + ZSTD_inBuffer buffIn; + (void)buff2; + ZSTD_initCStream(g_cstream, 1); + buffOut.dst = dst; + buffOut.size = dstCapacity; + buffOut.pos = 0; + buffIn.src = src; + buffIn.size = srcSize; + buffIn.pos = 0; + ZSTD_compressStream(g_cstream, &buffOut, &buffIn); + ZSTD_endStream(g_cstream, &buffOut); + return buffOut.pos; +} + +static size_t local_ZSTD_compress_generic_end(void* dst, size_t dstCapacity, void* buff2, const void* src, size_t srcSize) +{ + ZSTD_outBuffer buffOut; + ZSTD_inBuffer buffIn; + (void)buff2; + ZSTD_CCtx_setParameter(g_cstream, ZSTD_p_compressionLevel, 1); + buffOut.dst = dst; + buffOut.size = dstCapacity; + buffOut.pos = 0; + buffIn.src = src; + buffIn.size = srcSize; + buffIn.pos = 0; + ZSTD_compress_generic(g_cstream, &buffOut, &buffIn, ZSTD_e_end); + return buffOut.pos; +} + +static size_t local_ZSTD_compress_generic_continue(void* dst, size_t dstCapacity, void* buff2, const void* src, size_t srcSize) +{ + ZSTD_outBuffer buffOut; + ZSTD_inBuffer buffIn; + (void)buff2; + ZSTD_CCtx_setParameter(g_cstream, ZSTD_p_compressionLevel, 1); + buffOut.dst = dst; + buffOut.size = dstCapacity; + buffOut.pos = 0; + buffIn.src = src; + buffIn.size = srcSize; + buffIn.pos = 0; + ZSTD_compress_generic(g_cstream, &buffOut, &buffIn, ZSTD_e_continue); + ZSTD_compress_generic(g_cstream, &buffOut, &buffIn, ZSTD_e_end); + return buffOut.pos; +} + +static size_t local_ZSTD_compress_generic_T2_end(void* dst, size_t dstCapacity, void* buff2, const void* src, size_t srcSize) +{ + ZSTD_outBuffer buffOut; + ZSTD_inBuffer buffIn; + (void)buff2; + ZSTD_CCtx_setParameter(g_cstream, ZSTD_p_compressionLevel, 1); + ZSTD_CCtx_setParameter(g_cstream, ZSTD_p_nbThreads, 2); + buffOut.dst = dst; + buffOut.size = dstCapacity; + buffOut.pos = 0; + buffIn.src = src; + buffIn.size = srcSize; + buffIn.pos = 0; + while (ZSTD_compress_generic(g_cstream, &buffOut, &buffIn, ZSTD_e_end)) {} + return buffOut.pos; +} + +static size_t local_ZSTD_compress_generic_T2_continue(void* dst, size_t dstCapacity, void* buff2, const void* src, size_t srcSize) +{ + ZSTD_outBuffer buffOut; + ZSTD_inBuffer buffIn; + (void)buff2; + ZSTD_CCtx_setParameter(g_cstream, ZSTD_p_compressionLevel, 1); + ZSTD_CCtx_setParameter(g_cstream, ZSTD_p_nbThreads, 2); + buffOut.dst = dst; + buffOut.size = dstCapacity; + buffOut.pos = 0; + buffIn.src = src; + buffIn.size = srcSize; + buffIn.pos = 0; + ZSTD_compress_generic(g_cstream, &buffOut, &buffIn, ZSTD_e_continue); + while(ZSTD_compress_generic(g_cstream, &buffOut, &buffIn, ZSTD_e_end)) {} + return buffOut.pos; +} + +static ZSTD_DStream* g_dstream= NULL; +static size_t local_ZSTD_decompressStream(void* dst, size_t dstCapacity, void* buff2, const void* src, size_t srcSize) +{ + ZSTD_outBuffer buffOut; + ZSTD_inBuffer buffIn; + (void)src; (void)srcSize; + ZSTD_initDStream(g_dstream); + buffOut.dst = dst; + buffOut.size = dstCapacity; + buffOut.pos = 0; + buffIn.src = buff2; + buffIn.size = g_cSize; + buffIn.pos = 0; + ZSTD_decompressStream(g_dstream, &buffOut, &buffIn); + return buffOut.pos; +} + +static ZSTD_CCtx* g_zcc = NULL; + +#ifndef ZSTD_DLL_IMPORT +size_t local_ZSTD_compressContinue(void* dst, size_t dstCapacity, void* buff2, const void* src, size_t srcSize) +{ + (void)buff2; + ZSTD_compressBegin(g_zcc, 1); + return ZSTD_compressEnd(g_zcc, dst, dstCapacity, src, srcSize); +} + +#define FIRST_BLOCK_SIZE 8 +size_t local_ZSTD_compressContinue_extDict(void* dst, size_t dstCapacity, void* buff2, const void* src, size_t srcSize) +{ + BYTE firstBlockBuf[FIRST_BLOCK_SIZE]; + + (void)buff2; + memcpy(firstBlockBuf, src, FIRST_BLOCK_SIZE); + ZSTD_compressBegin(g_zcc, 1); + + { size_t const compressResult = ZSTD_compressContinue(g_zcc, dst, dstCapacity, firstBlockBuf, FIRST_BLOCK_SIZE); + if (ZSTD_isError(compressResult)) { DISPLAY("local_ZSTD_compressContinue_extDict error : %s\n", ZSTD_getErrorName(compressResult)); return compressResult; } + dst = (BYTE*)dst + compressResult; + dstCapacity -= compressResult; + } + return ZSTD_compressEnd(g_zcc, dst, dstCapacity, (const BYTE*)src + FIRST_BLOCK_SIZE, srcSize - FIRST_BLOCK_SIZE); +} + +size_t local_ZSTD_decompressContinue(void* dst, size_t dstCapacity, void* buff2, const void* src, size_t srcSize) +{ + size_t regeneratedSize = 0; + const BYTE* ip = (const BYTE*)buff2; + const BYTE* const iend = ip + g_cSize; + BYTE* op = (BYTE*)dst; + size_t remainingCapacity = dstCapacity; + + (void)src; (void)srcSize; + ZSTD_decompressBegin(g_zdc); + while (ip < iend) { + size_t const iSize = ZSTD_nextSrcSizeToDecompress(g_zdc); + size_t const decodedSize = ZSTD_decompressContinue(g_zdc, op, remainingCapacity, ip, iSize); + ip += iSize; + regeneratedSize += decodedSize; + op += decodedSize; + remainingCapacity -= decodedSize; + } + + return regeneratedSize; +} +#endif + + +/*_******************************************************* +* Bench functions +*********************************************************/ +static size_t benchMem(const void* src, size_t srcSize, U32 benchNb) +{ + BYTE* dstBuff; + size_t const dstBuffSize = ZSTD_compressBound(srcSize); + void* buff2; + const char* benchName; + size_t (*benchFunction)(void* dst, size_t dstSize, void* verifBuff, const void* src, size_t srcSize); + double bestTime = 100000000.; + + /* Selection */ + switch(benchNb) + { + case 1: + benchFunction = local_ZSTD_compress; benchName = "compress(1)"; + break; + case 2: + benchFunction = local_ZSTD_decompress; benchName = "decompress"; + break; +#ifndef ZSTD_DLL_IMPORT + case 11: + benchFunction = local_ZSTD_compressContinue; benchName = "compressContinue(1)"; + break; + case 12: + benchFunction = local_ZSTD_compressContinue_extDict; benchName = "compressContinue_extDict"; + break; + case 13: + benchFunction = local_ZSTD_decompressContinue; benchName = "decompressContinue"; + break; + case 31: + benchFunction = local_ZSTD_decodeLiteralsBlock; benchName = "decodeLiteralsBlock"; + break; + case 32: + benchFunction = local_ZSTD_decodeSeqHeaders; benchName = "decodeSeqHeaders"; + break; +#endif + case 41: + benchFunction = local_ZSTD_compressStream; benchName = "compressStream(1)"; + break; + case 42: + benchFunction = local_ZSTD_decompressStream; benchName = "decompressStream"; + break; + case 51: + benchFunction = local_ZSTD_compress_generic_continue; benchName = "compress_generic, continue"; + break; + case 52: + benchFunction = local_ZSTD_compress_generic_end; benchName = "compress_generic, end"; + break; + case 61: + benchFunction = local_ZSTD_compress_generic_T2_continue; benchName = "compress_generic, -T2, continue"; + break; + case 62: + benchFunction = local_ZSTD_compress_generic_T2_end; benchName = "compress_generic, -T2, end"; + break; + default : + return 0; + } + + /* Allocation */ + dstBuff = (BYTE*)malloc(dstBuffSize); + buff2 = malloc(dstBuffSize); + if ((!dstBuff) || (!buff2)) { + DISPLAY("\nError: not enough memory!\n"); + free(dstBuff); free(buff2); + return 12; + } + if (g_zcc==NULL) g_zcc = ZSTD_createCCtx(); + if (g_zdc==NULL) g_zdc = ZSTD_createDCtx(); + if (g_cstream==NULL) g_cstream = ZSTD_createCStream(); + if (g_dstream==NULL) g_dstream = ZSTD_createDStream(); + + /* Preparation */ + switch(benchNb) + { + case 2: + g_cSize = ZSTD_compress(buff2, dstBuffSize, src, srcSize, 1); + break; +#ifndef ZSTD_DLL_IMPORT + case 13 : + g_cSize = ZSTD_compress(buff2, dstBuffSize, src, srcSize, 1); + break; + case 31: /* ZSTD_decodeLiteralsBlock */ + { blockProperties_t bp; + ZSTD_frameHeader zfp; + size_t frameHeaderSize, skippedSize; + g_cSize = ZSTD_compress(dstBuff, dstBuffSize, src, srcSize, 1); + frameHeaderSize = ZSTD_getFrameHeader(&zfp, dstBuff, ZSTD_frameHeaderSize_min); + if (frameHeaderSize==0) frameHeaderSize = ZSTD_frameHeaderSize_min; + ZSTD_getcBlockSize(dstBuff+frameHeaderSize, dstBuffSize, &bp); /* Get 1st block type */ + if (bp.blockType != bt_compressed) { + DISPLAY("ZSTD_decodeLiteralsBlock : impossible to test on this sample (not compressible)\n"); + goto _cleanOut; + } + skippedSize = frameHeaderSize + ZSTD_blockHeaderSize; + memcpy(buff2, dstBuff+skippedSize, g_cSize-skippedSize); + srcSize = srcSize > 128 KB ? 128 KB : srcSize; /* speed relative to block */ + ZSTD_decompressBegin(g_zdc); + break; + } + case 32: /* ZSTD_decodeSeqHeaders */ + { blockProperties_t bp; + ZSTD_frameHeader zfp; + const BYTE* ip = dstBuff; + const BYTE* iend; + size_t frameHeaderSize, cBlockSize; + ZSTD_compress(dstBuff, dstBuffSize, src, srcSize, 1); /* it would be better to use direct block compression here */ + g_cSize = ZSTD_compress(dstBuff, dstBuffSize, src, srcSize, 1); + frameHeaderSize = ZSTD_getFrameHeader(&zfp, dstBuff, ZSTD_frameHeaderSize_min); + if (frameHeaderSize==0) frameHeaderSize = ZSTD_frameHeaderSize_min; + ip += frameHeaderSize; /* Skip frame Header */ + cBlockSize = ZSTD_getcBlockSize(ip, dstBuffSize, &bp); /* Get 1st block type */ + if (bp.blockType != bt_compressed) { + DISPLAY("ZSTD_decodeSeqHeaders : impossible to test on this sample (not compressible)\n"); + goto _cleanOut; + } + iend = ip + ZSTD_blockHeaderSize + cBlockSize; /* End of first block */ + ip += ZSTD_blockHeaderSize; /* skip block header */ + ZSTD_decompressBegin(g_zdc); + ip += ZSTD_decodeLiteralsBlock(g_zdc, ip, iend-ip); /* skip literal segment */ + g_cSize = iend-ip; + memcpy(buff2, ip, g_cSize); /* copy rest of block (it starts by SeqHeader) */ + srcSize = srcSize > 128 KB ? 128 KB : srcSize; /* speed relative to block */ + break; + } +#else + case 31: + goto _cleanOut; +#endif + case 42 : + g_cSize = ZSTD_compress(buff2, dstBuffSize, src, srcSize, 1); + break; + + /* test functions */ + /* by convention, test functions can be added > 100 */ + + default : ; + } + + { size_t i; for (i=0; i<dstBuffSize; i++) dstBuff[i]=(BYTE)i; } /* warming up memory */ + + { U32 loopNb; +# define TIME_SEC_MICROSEC (1*1000000ULL) /* 1 second */ + U64 const clockLoop = TIMELOOP_S * TIME_SEC_MICROSEC; + DISPLAY("%2i- %-30.30s : \r", benchNb, benchName); + for (loopNb = 1; loopNb <= g_nbIterations; loopNb++) { + UTIL_time_t clockStart; + size_t benchResult=0; + U32 nbRounds; + + UTIL_sleepMilli(1); /* give processor time to other processes */ + UTIL_waitForNextTick(); + clockStart = UTIL_getTime(); + for (nbRounds=0; UTIL_clockSpanMicro(clockStart) < clockLoop; nbRounds++) { + benchResult = benchFunction(dstBuff, dstBuffSize, buff2, src, srcSize); + if (ZSTD_isError(benchResult)) { DISPLAY("ERROR ! %s() => %s !! \n", benchName, ZSTD_getErrorName(benchResult)); exit(1); } + } + { U64 const clockSpanMicro = UTIL_clockSpanMicro(clockStart); + double const averageTime = (double)clockSpanMicro / TIME_SEC_MICROSEC / nbRounds; + if (averageTime < bestTime) bestTime = averageTime; + DISPLAY("%2i- %-30.30s : %7.1f MB/s (%9u)\r", loopNb, benchName, (double)srcSize / (1 MB) / bestTime, (U32)benchResult); + } } } + DISPLAY("%2u\n", benchNb); + +_cleanOut: + free(dstBuff); + free(buff2); + ZSTD_freeCCtx(g_zcc); g_zcc=NULL; + ZSTD_freeDCtx(g_zdc); g_zdc=NULL; + ZSTD_freeCStream(g_cstream); g_cstream=NULL; + ZSTD_freeDStream(g_dstream); g_dstream=NULL; + return 0; +} + + +static int benchSample(U32 benchNb) +{ + size_t const benchedSize = g_sampleSize; + const char* name = "Sample 10MiB"; + + /* Allocation */ + void* origBuff = malloc(benchedSize); + if (!origBuff) { DISPLAY("\nError: not enough memory!\n"); return 12; } + + /* Fill buffer */ + RDG_genBuffer(origBuff, benchedSize, g_compressibility, 0.0, 0); + + /* bench */ + DISPLAY("\r%79s\r", ""); + DISPLAY(" %s : \n", name); + if (benchNb) + benchMem(origBuff, benchedSize, benchNb); + else + for (benchNb=0; benchNb<100; benchNb++) benchMem(origBuff, benchedSize, benchNb); + + free(origBuff); + return 0; +} + + +static int benchFiles(const char** fileNamesTable, const int nbFiles, U32 benchNb) +{ + /* Loop for each file */ + int fileIdx; + for (fileIdx=0; fileIdx<nbFiles; fileIdx++) { + const char* inFileName = fileNamesTable[fileIdx]; + FILE* inFile = fopen( inFileName, "rb" ); + U64 inFileSize; + size_t benchedSize; + void* origBuff; + + /* Check file existence */ + if (inFile==NULL) { DISPLAY( "Pb opening %s\n", inFileName); return 11; } + + /* Memory allocation & restrictions */ + inFileSize = UTIL_getFileSize(inFileName); + benchedSize = BMK_findMaxMem(inFileSize*3) / 3; + if ((U64)benchedSize > inFileSize) benchedSize = (size_t)inFileSize; + if (benchedSize < inFileSize) + DISPLAY("Not enough memory for '%s' full size; testing %u MB only...\n", inFileName, (U32)(benchedSize>>20)); + + /* Alloc */ + origBuff = malloc(benchedSize); + if (!origBuff) { DISPLAY("\nError: not enough memory!\n"); fclose(inFile); return 12; } + + /* Fill input buffer */ + DISPLAY("Loading %s... \r", inFileName); + { + size_t readSize = fread(origBuff, 1, benchedSize, inFile); + fclose(inFile); + if (readSize != benchedSize) { + DISPLAY("\nError: problem reading file '%s' !! \n", inFileName); + free(origBuff); + return 13; + } } + + /* bench */ + DISPLAY("\r%79s\r", ""); + DISPLAY(" %s : \n", inFileName); + if (benchNb) + benchMem(origBuff, benchedSize, benchNb); + else + for (benchNb=0; benchNb<100; benchNb++) benchMem(origBuff, benchedSize, benchNb); + + free(origBuff); + } + + return 0; +} + + +static int usage(const char* exename) +{ + DISPLAY( "Usage :\n"); + DISPLAY( " %s [arg] file1 file2 ... fileX\n", exename); + DISPLAY( "Arguments :\n"); + DISPLAY( " -H/-h : Help (this text + advanced options)\n"); + return 0; +} + +static int usage_advanced(const char* exename) +{ + usage(exename); + DISPLAY( "\nAdvanced options :\n"); + DISPLAY( " -b# : test only function # \n"); + DISPLAY( " -i# : iteration loops [1-9](default : %i)\n", NBLOOPS); + DISPLAY( " -P# : sample compressibility (default : %.1f%%)\n", COMPRESSIBILITY_DEFAULT * 100); + return 0; +} + +static int badusage(const char* exename) +{ + DISPLAY("Wrong parameters\n"); + usage(exename); + return 1; +} + +int main(int argc, const char** argv) +{ + int i, filenamesStart=0, result; + const char* exename = argv[0]; + const char* input_filename = NULL; + U32 benchNb = 0, main_pause = 0; + + DISPLAY(WELCOME_MESSAGE); + if (argc<1) return badusage(exename); + + for(i=1; i<argc; i++) { + const char* argument = argv[i]; + if(!argument) continue; /* Protection if argument empty */ + + /* Commands (note : aggregated commands are allowed) */ + if (argument[0]=='-') { + + while (argument[1]!=0) { + argument++; + + switch(argument[0]) + { + /* Display help on usage */ + case 'h': + case 'H': return usage_advanced(exename); + + /* Pause at the end (hidden option) */ + case 'p': main_pause = 1; break; + + /* Select specific algorithm to bench */ + case 'b': + benchNb = 0; + while ((argument[1]>= '0') && (argument[1]<= '9')) { + benchNb *= 10; + benchNb += argument[1] - '0'; + argument++; + } + break; + + /* Modify Nb Iterations */ + case 'i': + if ((argument[1] >='0') && (argument[1] <='9')) { + int iters = argument[1] - '0'; + BMK_SetNbIterations(iters); + argument++; + } + break; + + /* Select compressibility of synthetic sample */ + case 'P': + { U32 proba32 = 0; + while ((argument[1]>= '0') && (argument[1]<= '9')) { + proba32 *= 10; + proba32 += argument[1] - '0'; + argument++; + } + g_compressibility = (double)proba32 / 100.; + } + break; + + /* Unknown command */ + default : return badusage(exename); + } + } + continue; + } + + /* first provided filename is input */ + if (!input_filename) { input_filename=argument; filenamesStart=i; continue; } + } + + if (filenamesStart==0) /* no input file */ + result = benchSample(benchNb); + else + result = benchFiles(argv+filenamesStart, argc-filenamesStart, benchNb); + + if (main_pause) { int unused; printf("press enter...\n"); unused = getchar(); (void)unused; } + + return result; +} diff --git a/src/zstd/tests/fuzz/.gitignore b/src/zstd/tests/fuzz/.gitignore new file mode 100644 index 00000000..4ff28de9 --- /dev/null +++ b/src/zstd/tests/fuzz/.gitignore @@ -0,0 +1,5 @@ +# test artefacts +corpora +block_decompress +block_round_trip +simple_round_trip diff --git a/src/zstd/tests/fuzz/Makefile b/src/zstd/tests/fuzz/Makefile new file mode 100644 index 00000000..d9b00fd2 --- /dev/null +++ b/src/zstd/tests/fuzz/Makefile @@ -0,0 +1,127 @@ +# ################################################################ +# Copyright (c) 2016-present, Facebook, Inc. +# All rights reserved. +# +# This source code is licensed under both the BSD-style license (found in the +# LICENSE file in the root directory of this source tree) and the GPLv2 (found +# in the COPYING file in the root directory of this source tree). +# ################################################################ + +# Optionally user defined flags +CFLAGS ?= -O3 +CXXFLAGS ?= -O3 +CPPFLAGS ?= +LDFLAGS ?= +ARFLAGS ?= +LIB_FUZZING_ENGINE ?= libregression.a +PYTHON ?= python +ifeq ($(shell uname), Darwin) + DOWNLOAD?=curl -L -o +else + DOWNLOAD?=wget -O +endif +CORPORA_URL_PREFIX:=https://github.com/facebook/zstd/releases/download/fuzz-corpora/ + +ZSTDDIR = ../../lib +PRGDIR = ../../programs + +FUZZ_CPPFLAGS := -I$(ZSTDDIR) -I$(ZSTDDIR)/common -I$(ZSTDDIR)/compress \ + -I$(ZSTDDIR)/dictBuilder -I$(ZSTDDIR)/deprecated -I$(PRGDIR) \ + $(CPPFLAGS) +FUZZ_EXTRA_FLAGS := -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \ + -Wstrict-aliasing=1 -Wswitch-enum -Wdeclaration-after-statement \ + -Wstrict-prototypes -Wundef -Wformat-security \ + -Wvla -Wformat=2 -Winit-self -Wfloat-equal -Wwrite-strings \ + -Wredundant-decls \ + -g -fno-omit-frame-pointer +FUZZ_CFLAGS := $(FUZZ_EXTRA_FLAGS) $(CFLAGS) +FUZZ_CXXFLAGS := $(FUZZ_EXTRA_FLAGS) -std=c++11 $(CXXFLAGS) +FUZZ_LDFLAGS := $(LDFLAGS) +FUZZ_ARFLAGS := $(ARFLAGS) +FUZZ_TARGET_FLAGS = $(FUZZ_CPPFLAGS) $(FUZZ_CXXFLAGS) $(FUZZ_LDFLAGS) + +FUZZ_HEADERS := fuzz_helpers.h fuzz.h zstd_helpers.h +FUZZ_SRC := zstd_helpers.c + +ZSTDCOMMON_SRC := $(ZSTDDIR)/common/*.c +ZSTDCOMP_SRC := $(ZSTDDIR)/compress/*.c +ZSTDDECOMP_SRC := $(ZSTDDIR)/decompress/*.c +FUZZ_SRC := \ + $(FUZZ_SRC) \ + $(ZSTDDECOMP_SRC) \ + $(ZSTDCOMMON_SRC) \ + $(ZSTDCOMP_SRC) + +FUZZ_OBJ := $(patsubst %.c,%.o, $(wildcard $(FUZZ_SRC))) + + +.PHONY: default all clean cleanall + +default: all + +FUZZ_TARGETS := \ + simple_round_trip \ + stream_round_trip \ + block_round_trip \ + simple_decompress \ + stream_decompress \ + block_decompress + +all: $(FUZZ_TARGETS) + +%.o: %.c + $(CC) $(FUZZ_CPPFLAGS) $(FUZZ_CFLAGS) $^ -c -o $@ + +simple_round_trip: $(FUZZ_HEADERS) $(FUZZ_OBJ) simple_round_trip.o + $(CXX) $(FUZZ_TARGET_FLAGS) $(FUZZ_OBJ) simple_round_trip.o $(LIB_FUZZING_ENGINE) -o $@ + +stream_round_trip: $(FUZZ_HEADERS) $(FUZZ_OBJ) stream_round_trip.o + $(CXX) $(FUZZ_TARGET_FLAGS) $(FUZZ_OBJ) stream_round_trip.o $(LIB_FUZZING_ENGINE) -o $@ + +block_round_trip: $(FUZZ_HEADERS) $(FUZZ_OBJ) block_round_trip.o + $(CXX) $(FUZZ_TARGET_FLAGS) $(FUZZ_OBJ) block_round_trip.o $(LIB_FUZZING_ENGINE) -o $@ + +simple_decompress: $(FUZZ_HEADERS) $(FUZZ_OBJ) simple_decompress.o + $(CXX) $(FUZZ_TARGET_FLAGS) $(FUZZ_OBJ) simple_decompress.o $(LIB_FUZZING_ENGINE) -o $@ + +stream_decompress: $(FUZZ_HEADERS) $(FUZZ_OBJ) stream_decompress.o + $(CXX) $(FUZZ_TARGET_FLAGS) $(FUZZ_OBJ) stream_decompress.o $(LIB_FUZZING_ENGINE) -o $@ + +block_decompress: $(FUZZ_HEADERS) $(FUZZ_OBJ) block_decompress.o + $(CXX) $(FUZZ_TARGET_FLAGS) $(FUZZ_OBJ) block_decompress.o $(LIB_FUZZING_ENGINE) -o $@ + +libregression.a: $(FUZZ_HEADERS) $(PRGDIR)/util.h regression_driver.o + $(AR) $(FUZZ_ARFLAGS) $@ regression_driver.o + +# Install libfuzzer (not usable for MSAN testing) +# Provided for convienence. To use this library run make libFuzzer and +# set LDFLAGS=-L. +.PHONY: libFuzzer +libFuzzer: + @$(RM) -rf Fuzzer + @git clone https://chromium.googlesource.com/chromium/llvm-project/llvm/lib/Fuzzer + @cd Fuzzer && ./build.sh + +corpora/%_seed_corpus.zip: + @mkdir -p corpora + $(DOWNLOAD) $@ $(CORPORA_URL_PREFIX)$*_seed_corpus.zip + +corpora/%: corpora/%_seed_corpus.zip + unzip -q $^ -d $@ + +.PHONY: corpora +corpora: $(patsubst %,corpora/%,$(FUZZ_TARGETS)) + +regressiontest: corpora + CC="$(CC)" CXX="$(CXX)" CFLAGS="$(CFLAGS)" CXXFLAGS="$(CXXFLAGS)" LDFLAGS="$(LDFLAGS)" $(PYTHON) ./fuzz.py build all + $(PYTHON) ./fuzz.py regression all + +clean: + @$(MAKE) -C $(ZSTDDIR) clean + @$(RM) *.a *.o + @$(RM) simple_round_trip stream_round_trip simple_decompress \ + stream_decompress block_decompress block_round_trip + +cleanall: + @$(RM) -r Fuzzer + @$(RM) -r corpora diff --git a/src/zstd/tests/fuzz/README.md b/src/zstd/tests/fuzz/README.md new file mode 100644 index 00000000..f184be64 --- /dev/null +++ b/src/zstd/tests/fuzz/README.md @@ -0,0 +1,96 @@ +# Fuzzing + +Each fuzzing target can be built with multiple engines. +Zstd provides a fuzz corpus for each target that can be downloaded with +the command: + +``` +make corpora +``` + +It will download each corpus into `./corpora/TARGET`. + +## fuzz.py + +`fuzz.py` is a helper script for building and running fuzzers. +Run `./fuzz.py -h` for the commands and run `./fuzz.py COMMAND -h` for +command specific help. + +### Generating Data + +`fuzz.py` provides a utility to generate seed data for each fuzzer. + +``` +make -C ../tests decodecorpus +./fuzz.py gen TARGET +``` + +By default it outputs 100 samples, each at most 8KB into `corpora/TARGET-seed`, +but that can be configured with the `--number`, `--max-size-log` and `--seed` +flags. + +### Build +It respects the usual build environment variables `CC`, `CFLAGS`, etc. +The environment variables can be overridden with the corresponding flags +`--cc`, `--cflags`, etc. +The specific fuzzing engine is selected with `LIB_FUZZING_ENGINE` or +`--lib-fuzzing-engine`, the default is `libregression.a`. +It has flags that can easily set up sanitizers `--enable-{a,ub,m}san`, and +coverage instrumentation `--enable-coverage`. +It sets sane defaults which can be overriden with flags `--debug`, +`--enable-ubsan-pointer-overlow`, etc. +Run `./fuzz.py build -h` for help. + +### Running Fuzzers + +`./fuzz.py` can run `libfuzzer`, `afl`, and `regression` tests. +See the help of the relevant command for options. +Flags not parsed by `fuzz.py` are passed to the fuzzing engine. +The command used to run the fuzzer is printed for debugging. + +## LibFuzzer + +``` +# Build libfuzzer if necessary +make libFuzzer +# Build the fuzz targets +./fuzz.py build all --enable-coverage --enable-asan --enable-ubsan --lib-fuzzing-engine Fuzzer/libFuzzer.a --cc clang --cxx clang++ +# OR equivalently +CC=clang CXX=clang++ LIB_FUZZING_ENGINE=Fuzzer/libFuzzer.a ./fuzz.py build all --enable-coverage --enable-asan --enable-ubsan +# Run the fuzzer +./fuzz.py libfuzzer TARGET -max_len=8192 -jobs=4 +``` + +where `TARGET` could be `simple_decompress`, `stream_round_trip`, etc. + +### MSAN + +Fuzzing with `libFuzzer` and `MSAN` will require building a C++ standard library +and libFuzzer with MSAN. +`fuzz.py` respects the environment variables / flags `MSAN_EXTRA_CPPFLAGS`, +`MSAN_EXTRA_CFLAGS`, `MSAN_EXTRA_CXXFLAGS`, `MSAN_EXTRA_LDFLAGS` to easily pass +the extra parameters only for MSAN. + +## AFL + +The default `LIB_FUZZING_ENGINE` is `libregression.a`, which produces a binary +that AFL can use. + +``` +# Build the fuzz targets +CC=afl-clang CXX=afl-clang++ ./fuzz.py build all --enable-asan --enable-ubsan +# Run the fuzzer without a memory limit because of ASAN +./fuzz.py afl TARGET -m none +``` + +## Regression Testing + +The regression rest supports the `all` target to run all the fuzzers in one +command. + +``` +CC=clang CXX=clang++ ./fuzz.py build all --enable-asan --enable-ubsan +./fuzz.py regression all +CC=clang CXX=clang++ ./fuzz.py build all --enable-msan +./fuzz.py regression all +``` diff --git a/src/zstd/tests/fuzz/block_decompress.c b/src/zstd/tests/fuzz/block_decompress.c new file mode 100644 index 00000000..3cccc32f --- /dev/null +++ b/src/zstd/tests/fuzz/block_decompress.c @@ -0,0 +1,51 @@ +/** + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + */ + +/** + * This fuzz target attempts to decompress the fuzzed data with the simple + * decompression function to ensure the decompressor never crashes. + */ + +#define ZSTD_STATIC_LINKING_ONLY + +#include <stddef.h> +#include <stdlib.h> +#include <stdio.h> +#include "fuzz_helpers.h" +#include "zstd.h" + +static ZSTD_DCtx *dctx = NULL; +static void* rBuf = NULL; +static size_t bufSize = 0; + +int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) +{ + size_t const neededBufSize = ZSTD_BLOCKSIZE_MAX; + + FUZZ_seed(&src, &size); + + /* Allocate all buffers and contexts if not already allocated */ + if (neededBufSize > bufSize) { + free(rBuf); + rBuf = malloc(neededBufSize); + bufSize = neededBufSize; + FUZZ_ASSERT(rBuf); + } + if (!dctx) { + dctx = ZSTD_createDCtx(); + FUZZ_ASSERT(dctx); + } + ZSTD_decompressBegin(dctx); + ZSTD_decompressBlock(dctx, rBuf, neededBufSize, src, size); + +#ifndef STATEFUL_FUZZING + ZSTD_freeDCtx(dctx); dctx = NULL; +#endif + return 0; +} diff --git a/src/zstd/tests/fuzz/block_round_trip.c b/src/zstd/tests/fuzz/block_round_trip.c new file mode 100644 index 00000000..64ca5fc4 --- /dev/null +++ b/src/zstd/tests/fuzz/block_round_trip.c @@ -0,0 +1,92 @@ +/** + * Copyright (c) 2016-present, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + */ + +/** + * This fuzz target performs a zstd round-trip test (compress & decompress), + * compares the result with the original, and calls abort() on corruption. + */ + +#define ZSTD_STATIC_LINKING_ONLY + +#include <stddef.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include "fuzz_helpers.h" +#include "zstd.h" + +static const int kMaxClevel = 19; + +static ZSTD_CCtx *cctx = NULL; +static ZSTD_DCtx *dctx = NULL; +static void* cBuf = NULL; +static void* rBuf = NULL; +static size_t bufSize = 0; +static uint32_t seed; + +static size_t roundTripTest(void *result, size_t resultCapacity, + void *compressed, size_t compressedCapacity, + const void *src, size_t srcSize) +{ + int const cLevel = FUZZ_rand(&seed) % kMaxClevel; + ZSTD_parameters const params = ZSTD_getParams(cLevel, srcSize, 0); + size_t ret = ZSTD_compressBegin_advanced(cctx, NULL, 0, params, srcSize); + FUZZ_ZASSERT(ret); + + ret = ZSTD_compressBlock(cctx, compressed, compressedCapacity, src, srcSize); + FUZZ_ZASSERT(ret); + if (ret == 0) { + FUZZ_ASSERT(resultCapacity >= srcSize); + memcpy(result, src, srcSize); + return srcSize; + } + ZSTD_decompressBegin(dctx); + return ZSTD_decompressBlock(dctx, result, resultCapacity, compressed, ret); +} + +int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) +{ + size_t neededBufSize; + + seed = FUZZ_seed(&src, &size); + neededBufSize = size; + if (size > ZSTD_BLOCKSIZE_MAX) + return 0; + + /* Allocate all buffers and contexts if not already allocated */ + if (neededBufSize > bufSize || !cBuf || !rBuf) { + free(cBuf); + free(rBuf); + cBuf = malloc(neededBufSize); + rBuf = malloc(neededBufSize); + bufSize = neededBufSize; + FUZZ_ASSERT(cBuf && rBuf); + } + if (!cctx) { + cctx = ZSTD_createCCtx(); + FUZZ_ASSERT(cctx); + } + if (!dctx) { + dctx = ZSTD_createDCtx(); + FUZZ_ASSERT(dctx); + } + + { + size_t const result = + roundTripTest(rBuf, neededBufSize, cBuf, neededBufSize, src, size); + FUZZ_ZASSERT(result); + FUZZ_ASSERT_MSG(result == size, "Incorrect regenerated size"); + FUZZ_ASSERT_MSG(!memcmp(src, rBuf, size), "Corruption!"); + } +#ifndef STATEFUL_FUZZING + ZSTD_freeCCtx(cctx); cctx = NULL; + ZSTD_freeDCtx(dctx); dctx = NULL; +#endif + return 0; +} diff --git a/src/zstd/tests/fuzz/default.options b/src/zstd/tests/fuzz/default.options new file mode 100644 index 00000000..8ea85883 --- /dev/null +++ b/src/zstd/tests/fuzz/default.options @@ -0,0 +1,2 @@ +[libfuzzer] +max_len = 8192 diff --git a/src/zstd/tests/fuzz/fuzz.h b/src/zstd/tests/fuzz/fuzz.h new file mode 100644 index 00000000..a6484547 --- /dev/null +++ b/src/zstd/tests/fuzz/fuzz.h @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2016-present, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + */ + +/** + * Fuzz target interface. + * Fuzz targets have some common parameters passed as macros during compilation. + * Check the documentation for each individual fuzzer for more parameters. + * + * @param STATEFUL_FUZZING: + * Define this to reuse state between fuzzer runs. This can be useful to + * test code paths which are only executed when contexts are reused. + * WARNING: Makes reproducing crashes much harder. + * Default: Not defined. + * @param FUZZ_RNG_SEED_SIZE: + * The number of bytes of the source to look at when constructing a seed + * for the deterministic RNG. These bytes are discarded before passing + * the data to zstd functions. Every fuzzer initializes the RNG exactly + * once before doing anything else, even if it is unused. + * Default: 4. + * @param ZSTD_DEBUG: + * This is a parameter for the zstd library. Defining `ZSTD_DEBUG=1` + * enables assert() statements in the zstd library. Higher levels enable + * logging, so aren't recommended. Defining `ZSTD_DEBUG=1` is + * recommended. + * @param MEM_FORCE_MEMORY_ACCESS: + * This flag controls how the zstd library accesses unaligned memory. + * It can be undefined, or 0 through 2. If it is undefined, it selects + * the method to use based on the compiler. If testing with UBSAN set + * MEM_FORCE_MEMORY_ACCESS=0 to use the standard compliant method. + * @param FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + * This is the canonical flag to enable deterministic builds for fuzzing. + * Changes to zstd for fuzzing are gated behind this define. + * It is recommended to define this when building zstd for fuzzing. + */ + +#ifndef FUZZ_H +#define FUZZ_H + +#ifndef FUZZ_RNG_SEED_SIZE +# define FUZZ_RNG_SEED_SIZE 4 +#endif + +#include <stddef.h> +#include <stdint.h> + +#ifdef __cplusplus +extern "C" { +#endif + +int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/zstd/tests/fuzz/fuzz.py b/src/zstd/tests/fuzz/fuzz.py new file mode 100755 index 00000000..b591e4f6 --- /dev/null +++ b/src/zstd/tests/fuzz/fuzz.py @@ -0,0 +1,818 @@ +#!/usr/bin/env python + +# ################################################################ +# Copyright (c) 2016-present, Facebook, Inc. +# All rights reserved. +# +# This source code is licensed under both the BSD-style license (found in the +# LICENSE file in the root directory of this source tree) and the GPLv2 (found +# in the COPYING file in the root directory of this source tree). +# ########################################################################## + +import argparse +import contextlib +import os +import re +import shutil +import subprocess +import sys +import tempfile + + +def abs_join(a, *p): + return os.path.abspath(os.path.join(a, *p)) + + +# Constants +FUZZ_DIR = os.path.abspath(os.path.dirname(__file__)) +CORPORA_DIR = abs_join(FUZZ_DIR, 'corpora') +TARGETS = [ + 'simple_round_trip', + 'stream_round_trip', + 'block_round_trip', + 'simple_decompress', + 'stream_decompress', + 'block_decompress', +] +ALL_TARGETS = TARGETS + ['all'] +FUZZ_RNG_SEED_SIZE = 4 + +# Standard environment variables +CC = os.environ.get('CC', 'cc') +CXX = os.environ.get('CXX', 'c++') +CPPFLAGS = os.environ.get('CPPFLAGS', '') +CFLAGS = os.environ.get('CFLAGS', '-O3') +CXXFLAGS = os.environ.get('CXXFLAGS', CFLAGS) +LDFLAGS = os.environ.get('LDFLAGS', '') +MFLAGS = os.environ.get('MFLAGS', '-j') + +# Fuzzing environment variables +LIB_FUZZING_ENGINE = os.environ.get('LIB_FUZZING_ENGINE', 'libregression.a') +AFL_FUZZ = os.environ.get('AFL_FUZZ', 'afl-fuzz') +DECODECORPUS = os.environ.get('DECODECORPUS', + abs_join(FUZZ_DIR, '..', 'decodecorpus')) + +# Sanitizer environment variables +MSAN_EXTRA_CPPFLAGS = os.environ.get('MSAN_EXTRA_CPPFLAGS', '') +MSAN_EXTRA_CFLAGS = os.environ.get('MSAN_EXTRA_CFLAGS', '') +MSAN_EXTRA_CXXFLAGS = os.environ.get('MSAN_EXTRA_CXXFLAGS', '') +MSAN_EXTRA_LDFLAGS = os.environ.get('MSAN_EXTRA_LDFLAGS', '') + + +def create(r): + d = os.path.abspath(r) + if not os.path.isdir(d): + os.mkdir(d) + return d + + +def check(r): + d = os.path.abspath(r) + if not os.path.isdir(d): + return None + return d + + +@contextlib.contextmanager +def tmpdir(): + dirpath = tempfile.mkdtemp() + try: + yield dirpath + finally: + shutil.rmtree(dirpath, ignore_errors=True) + + +def parse_targets(in_targets): + targets = set() + for target in in_targets: + if not target: + continue + if target == 'all': + targets = targets.union(TARGETS) + elif target in TARGETS: + targets.add(target) + else: + raise RuntimeError('{} is not a valid target'.format(target)) + return list(targets) + + +def targets_parser(args, description): + parser = argparse.ArgumentParser(prog=args.pop(0), description=description) + parser.add_argument( + 'TARGET', + nargs='*', + type=str, + help='Fuzz target(s) to build {{{}}}'.format(', '.join(ALL_TARGETS))) + args, extra = parser.parse_known_args(args) + args.extra = extra + + args.TARGET = parse_targets(args.TARGET) + + return args + + +def parse_env_flags(args, flags): + """ + Look for flags set by environment variables. + """ + san_flags = ','.join(re.findall('-fsanitize=((?:[a-z]+,?)+)', flags)) + nosan_flags = ','.join(re.findall('-fno-sanitize=((?:[a-z]+,?)+)', flags)) + + def set_sanitizer(sanitizer, default, san, nosan): + if sanitizer in san and sanitizer in nosan: + raise RuntimeError('-fno-sanitize={s} and -fsanitize={s} passed'. + format(s=sanitizer)) + if sanitizer in san: + return True + if sanitizer in nosan: + return False + return default + + san = set(san_flags.split(',')) + nosan = set(nosan_flags.split(',')) + + args.asan = set_sanitizer('address', args.asan, san, nosan) + args.msan = set_sanitizer('memory', args.msan, san, nosan) + args.ubsan = set_sanitizer('undefined', args.ubsan, san, nosan) + + args.sanitize = args.asan or args.msan or args.ubsan + + return args + + +def compiler_version(cc, cxx): + """ + Determines the compiler and version. + Only works for clang and gcc. + """ + cc_version_bytes = subprocess.check_output([cc, "--version"]) + cxx_version_bytes = subprocess.check_output([cxx, "--version"]) + if cc_version_bytes.startswith(b'clang'): + assert(cxx_version_bytes.startswith(b'clang')) + compiler = 'clang' + if cc_version_bytes.startswith(b'gcc'): + assert(cxx_version_bytes.startswith(b'g++')) + compiler = 'gcc' + version_regex = b'([0-9])+\.([0-9])+\.([0-9])+' + version_match = re.search(version_regex, cc_version_bytes) + version = tuple(int(version_match.group(i)) for i in range(1, 4)) + return compiler, version + + +def overflow_ubsan_flags(cc, cxx): + compiler, version = compiler_version(cc, cxx) + if compiler == 'gcc': + return ['-fno-sanitize=signed-integer-overflow'] + if compiler == 'clang' and version >= (5, 0, 0): + return ['-fno-sanitize=pointer-overflow'] + return [] + + +def build_parser(args): + description = """ + Cleans the repository and builds a fuzz target (or all). + Many flags default to environment variables (default says $X='y'). + Options that aren't enabling features default to the correct values for + zstd. + Enable sanitizers with --enable-*san. + For regression testing just build. + For libFuzzer set LIB_FUZZING_ENGINE and pass --enable-coverage. + For AFL set CC and CXX to AFL's compilers and set + LIB_FUZZING_ENGINE='libregression.a'. + """ + parser = argparse.ArgumentParser(prog=args.pop(0), description=description) + parser.add_argument( + '--lib-fuzzing-engine', + dest='lib_fuzzing_engine', + type=str, + default=LIB_FUZZING_ENGINE, + help=('The fuzzing engine to use e.g. /path/to/libFuzzer.a ' + "(default: $LIB_FUZZING_ENGINE='{})".format(LIB_FUZZING_ENGINE))) + parser.add_argument( + '--enable-coverage', + dest='coverage', + action='store_true', + help='Enable coverage instrumentation (-fsanitize-coverage)') + parser.add_argument( + '--enable-asan', dest='asan', action='store_true', help='Enable UBSAN') + parser.add_argument( + '--enable-ubsan', + dest='ubsan', + action='store_true', + help='Enable UBSAN') + parser.add_argument( + '--enable-ubsan-pointer-overflow', + dest='ubsan_pointer_overflow', + action='store_true', + help='Enable UBSAN pointer overflow check (known failure)') + parser.add_argument( + '--enable-msan', dest='msan', action='store_true', help='Enable MSAN') + parser.add_argument( + '--enable-msan-track-origins', dest='msan_track_origins', + action='store_true', help='Enable MSAN origin tracking') + parser.add_argument( + '--msan-extra-cppflags', + dest='msan_extra_cppflags', + type=str, + default=MSAN_EXTRA_CPPFLAGS, + help="Extra CPPFLAGS for MSAN (default: $MSAN_EXTRA_CPPFLAGS='{}')". + format(MSAN_EXTRA_CPPFLAGS)) + parser.add_argument( + '--msan-extra-cflags', + dest='msan_extra_cflags', + type=str, + default=MSAN_EXTRA_CFLAGS, + help="Extra CFLAGS for MSAN (default: $MSAN_EXTRA_CFLAGS='{}')".format( + MSAN_EXTRA_CFLAGS)) + parser.add_argument( + '--msan-extra-cxxflags', + dest='msan_extra_cxxflags', + type=str, + default=MSAN_EXTRA_CXXFLAGS, + help="Extra CXXFLAGS for MSAN (default: $MSAN_EXTRA_CXXFLAGS='{}')". + format(MSAN_EXTRA_CXXFLAGS)) + parser.add_argument( + '--msan-extra-ldflags', + dest='msan_extra_ldflags', + type=str, + default=MSAN_EXTRA_LDFLAGS, + help="Extra LDFLAGS for MSAN (default: $MSAN_EXTRA_LDFLAGS='{}')". + format(MSAN_EXTRA_LDFLAGS)) + parser.add_argument( + '--enable-sanitize-recover', + dest='sanitize_recover', + action='store_true', + help='Non-fatal sanitizer errors where possible') + parser.add_argument( + '--debug', + dest='debug', + type=int, + default=1, + help='Set ZSTD_DEBUG (default: 1)') + parser.add_argument( + '--force-memory-access', + dest='memory_access', + type=int, + default=0, + help='Set MEM_FORCE_MEMORY_ACCESS (default: 0)') + parser.add_argument( + '--fuzz-rng-seed-size', + dest='fuzz_rng_seed_size', + type=int, + default=4, + help='Set FUZZ_RNG_SEED_SIZE (default: 4)') + parser.add_argument( + '--disable-fuzzing-mode', + dest='fuzzing_mode', + action='store_false', + help='Do not define FUZZING_BUILD_MORE_UNSAFE_FOR_PRODUCTION') + parser.add_argument( + '--enable-stateful-fuzzing', + dest='stateful_fuzzing', + action='store_true', + help='Reuse contexts between runs (makes reproduction impossible)') + parser.add_argument( + '--cc', + dest='cc', + type=str, + default=CC, + help="CC (default: $CC='{}')".format(CC)) + parser.add_argument( + '--cxx', + dest='cxx', + type=str, + default=CXX, + help="CXX (default: $CXX='{}')".format(CXX)) + parser.add_argument( + '--cppflags', + dest='cppflags', + type=str, + default=CPPFLAGS, + help="CPPFLAGS (default: $CPPFLAGS='{}')".format(CPPFLAGS)) + parser.add_argument( + '--cflags', + dest='cflags', + type=str, + default=CFLAGS, + help="CFLAGS (default: $CFLAGS='{}')".format(CFLAGS)) + parser.add_argument( + '--cxxflags', + dest='cxxflags', + type=str, + default=CXXFLAGS, + help="CXXFLAGS (default: $CXXFLAGS='{}')".format(CXXFLAGS)) + parser.add_argument( + '--ldflags', + dest='ldflags', + type=str, + default=LDFLAGS, + help="LDFLAGS (default: $LDFLAGS='{}')".format(LDFLAGS)) + parser.add_argument( + '--mflags', + dest='mflags', + type=str, + default=MFLAGS, + help="Extra Make flags (default: $MFLAGS='{}')".format(MFLAGS)) + parser.add_argument( + 'TARGET', + nargs='*', + type=str, + help='Fuzz target(s) to build {{{}}}'.format(', '.join(ALL_TARGETS)) + ) + args = parser.parse_args(args) + args = parse_env_flags(args, ' '.join( + [args.cppflags, args.cflags, args.cxxflags, args.ldflags])) + + # Check option sanitiy + if args.msan and (args.asan or args.ubsan): + raise RuntimeError('MSAN may not be used with any other sanitizers') + if args.msan_track_origins and not args.msan: + raise RuntimeError('--enable-msan-track-origins requires MSAN') + if args.ubsan_pointer_overflow and not args.ubsan: + raise RuntimeError('--enable-ubsan-pointer-overlow requires UBSAN') + if args.sanitize_recover and not args.sanitize: + raise RuntimeError('--enable-sanitize-recover but no sanitizers used') + + return args + + +def build(args): + try: + args = build_parser(args) + except Exception as e: + print(e) + return 1 + # The compilation flags we are setting + targets = args.TARGET + cc = args.cc + cxx = args.cxx + cppflags = [args.cppflags] + cflags = [args.cflags] + ldflags = [args.ldflags] + cxxflags = [args.cxxflags] + mflags = [args.mflags] if args.mflags else [] + # Flags to be added to both cflags and cxxflags + common_flags = [] + + cppflags += [ + '-DZSTD_DEBUG={}'.format(args.debug), + '-DMEM_FORCE_MEMORY_ACCESS={}'.format(args.memory_access), + '-DFUZZ_RNG_SEED_SIZE={}'.format(args.fuzz_rng_seed_size), + ] + + mflags += ['LIB_FUZZING_ENGINE={}'.format(args.lib_fuzzing_engine)] + + # Set flags for options + if args.coverage: + common_flags += [ + '-fsanitize-coverage=trace-pc-guard,indirect-calls,trace-cmp' + ] + + if args.sanitize_recover: + recover_flags = ['-fsanitize-recover=all'] + else: + recover_flags = ['-fno-sanitize-recover=all'] + if args.sanitize: + common_flags += recover_flags + + if args.msan: + msan_flags = ['-fsanitize=memory'] + if args.msan_track_origins: + msan_flags += ['-fsanitize-memory-track-origins'] + common_flags += msan_flags + # Append extra MSAN flags (it might require special setup) + cppflags += [args.msan_extra_cppflags] + cflags += [args.msan_extra_cflags] + cxxflags += [args.msan_extra_cxxflags] + ldflags += [args.msan_extra_ldflags] + + if args.asan: + common_flags += ['-fsanitize=address'] + + if args.ubsan: + ubsan_flags = ['-fsanitize=undefined'] + if not args.ubsan_pointer_overflow: + ubsan_flags += overflow_ubsan_flags(cc, cxx) + common_flags += ubsan_flags + + if args.stateful_fuzzing: + cppflags += ['-DSTATEFUL_FUZZING'] + + if args.fuzzing_mode: + cppflags += ['-DFUZZING_BUILD_MORE_UNSAFE_FOR_PRODUCTION'] + + if args.lib_fuzzing_engine == 'libregression.a': + targets = ['libregression.a'] + targets + + # Append the common flags + cflags += common_flags + cxxflags += common_flags + + # Prepare the flags for Make + cc_str = "CC={}".format(cc) + cxx_str = "CXX={}".format(cxx) + cppflags_str = "CPPFLAGS={}".format(' '.join(cppflags)) + cflags_str = "CFLAGS={}".format(' '.join(cflags)) + cxxflags_str = "CXXFLAGS={}".format(' '.join(cxxflags)) + ldflags_str = "LDFLAGS={}".format(' '.join(ldflags)) + + # Print the flags + print('MFLAGS={}'.format(' '.join(mflags))) + print(cc_str) + print(cxx_str) + print(cppflags_str) + print(cflags_str) + print(cxxflags_str) + print(ldflags_str) + + # Clean and build + clean_cmd = ['make', 'clean'] + mflags + print(' '.join(clean_cmd)) + subprocess.check_call(clean_cmd) + build_cmd = [ + 'make', + cc_str, + cxx_str, + cppflags_str, + cflags_str, + cxxflags_str, + ldflags_str, + ] + mflags + targets + print(' '.join(build_cmd)) + subprocess.check_call(build_cmd) + return 0 + + +def libfuzzer_parser(args): + description = """ + Runs a libfuzzer binary. + Passes all extra arguments to libfuzzer. + The fuzzer should have been build with LIB_FUZZING_ENGINE pointing to + libFuzzer.a. + Generates output in the CORPORA directory, puts crashes in the ARTIFACT + directory, and takes extra input from the SEED directory. + To merge AFL's output pass the SEED as AFL's output directory and pass + '-merge=1'. + """ + parser = argparse.ArgumentParser(prog=args.pop(0), description=description) + parser.add_argument( + '--corpora', + type=str, + help='Override the default corpora dir (default: {})'.format( + abs_join(CORPORA_DIR, 'TARGET'))) + parser.add_argument( + '--artifact', + type=str, + help='Override the default artifact dir (default: {})'.format( + abs_join(CORPORA_DIR, 'TARGET-crash'))) + parser.add_argument( + '--seed', + type=str, + help='Override the default seed dir (default: {})'.format( + abs_join(CORPORA_DIR, 'TARGET-seed'))) + parser.add_argument( + 'TARGET', + type=str, + help='Fuzz target(s) to build {{{}}}'.format(', '.join(TARGETS))) + args, extra = parser.parse_known_args(args) + args.extra = extra + + if args.TARGET and args.TARGET not in TARGETS: + raise RuntimeError('{} is not a valid target'.format(args.TARGET)) + + return args + + +def libfuzzer(target, corpora=None, artifact=None, seed=None, extra_args=None): + if corpora is None: + corpora = abs_join(CORPORA_DIR, target) + if artifact is None: + artifact = abs_join(CORPORA_DIR, '{}-crash'.format(target)) + if seed is None: + seed = abs_join(CORPORA_DIR, '{}-seed'.format(target)) + if extra_args is None: + extra_args = [] + + target = abs_join(FUZZ_DIR, target) + + corpora = [create(corpora)] + artifact = create(artifact) + seed = check(seed) + + corpora += [artifact] + if seed is not None: + corpora += [seed] + + cmd = [target, '-artifact_prefix={}/'.format(artifact)] + cmd += corpora + extra_args + print(' '.join(cmd)) + subprocess.check_call(cmd) + + +def libfuzzer_cmd(args): + try: + args = libfuzzer_parser(args) + except Exception as e: + print(e) + return 1 + libfuzzer(args.TARGET, args.corpora, args.artifact, args.seed, args.extra) + return 0 + + +def afl_parser(args): + description = """ + Runs an afl-fuzz job. + Passes all extra arguments to afl-fuzz. + The fuzzer should have been built with CC/CXX set to the AFL compilers, + and with LIB_FUZZING_ENGINE='libregression.a'. + Takes input from CORPORA and writes output to OUTPUT. + Uses AFL_FUZZ as the binary (set from flag or environment variable). + """ + parser = argparse.ArgumentParser(prog=args.pop(0), description=description) + parser.add_argument( + '--corpora', + type=str, + help='Override the default corpora dir (default: {})'.format( + abs_join(CORPORA_DIR, 'TARGET'))) + parser.add_argument( + '--output', + type=str, + help='Override the default AFL output dir (default: {})'.format( + abs_join(CORPORA_DIR, 'TARGET-afl'))) + parser.add_argument( + '--afl-fuzz', + type=str, + default=AFL_FUZZ, + help='AFL_FUZZ (default: $AFL_FUZZ={})'.format(AFL_FUZZ)) + parser.add_argument( + 'TARGET', + type=str, + help='Fuzz target(s) to build {{{}}}'.format(', '.join(TARGETS))) + args, extra = parser.parse_known_args(args) + args.extra = extra + + if args.TARGET and args.TARGET not in TARGETS: + raise RuntimeError('{} is not a valid target'.format(args.TARGET)) + + if not args.corpora: + args.corpora = abs_join(CORPORA_DIR, args.TARGET) + if not args.output: + args.output = abs_join(CORPORA_DIR, '{}-afl'.format(args.TARGET)) + + return args + + +def afl(args): + try: + args = afl_parser(args) + except Exception as e: + print(e) + return 1 + target = abs_join(FUZZ_DIR, args.TARGET) + + corpora = create(args.corpora) + output = create(args.output) + + cmd = [args.afl_fuzz, '-i', corpora, '-o', output] + args.extra + cmd += [target, '@@'] + print(' '.join(cmd)) + subprocess.call(cmd) + return 0 + + +def regression(args): + try: + description = """ + Runs one or more regression tests. + The fuzzer should have been built with with + LIB_FUZZING_ENGINE='libregression.a'. + Takes input from CORPORA. + """ + args = targets_parser(args, description) + except Exception as e: + print(e) + return 1 + for target in args.TARGET: + corpora = create(abs_join(CORPORA_DIR, target)) + target = abs_join(FUZZ_DIR, target) + cmd = [target, corpora] + print(' '.join(cmd)) + subprocess.check_call(cmd) + return 0 + + +def gen_parser(args): + description = """ + Generate a seed corpus appropiate for TARGET with data generated with + decodecorpus. + The fuzz inputs are prepended with a seed before the zstd data, so the + output of decodecorpus shouldn't be used directly. + Generates NUMBER samples prepended with FUZZ_RNG_SEED_SIZE random bytes and + puts the output in SEED. + DECODECORPUS is the decodecorpus binary, and must already be built. + """ + parser = argparse.ArgumentParser(prog=args.pop(0), description=description) + parser.add_argument( + '--number', + '-n', + type=int, + default=100, + help='Number of samples to generate') + parser.add_argument( + '--max-size-log', + type=int, + default=13, + help='Maximum sample size to generate') + parser.add_argument( + '--seed', + type=str, + help='Override the default seed dir (default: {})'.format( + abs_join(CORPORA_DIR, 'TARGET-seed'))) + parser.add_argument( + '--decodecorpus', + type=str, + default=DECODECORPUS, + help="decodecorpus binary (default: $DECODECORPUS='{}')".format( + DECODECORPUS)) + parser.add_argument( + '--fuzz-rng-seed-size', + type=int, + default=4, + help="FUZZ_RNG_SEED_SIZE used for generate the samples (must match)" + ) + parser.add_argument( + 'TARGET', + type=str, + help='Fuzz target(s) to build {{{}}}'.format(', '.join(TARGETS))) + args, extra = parser.parse_known_args(args) + args.extra = extra + + if args.TARGET and args.TARGET not in TARGETS: + raise RuntimeError('{} is not a valid target'.format(args.TARGET)) + + if not args.seed: + args.seed = abs_join(CORPORA_DIR, '{}-seed'.format(args.TARGET)) + + if not os.path.isfile(args.decodecorpus): + raise RuntimeError("{} is not a file run 'make -C {} decodecorpus'". + format(args.decodecorpus, abs_join(FUZZ_DIR, '..'))) + + return args + + +def gen(args): + try: + args = gen_parser(args) + except Exception as e: + print(e) + return 1 + + seed = create(args.seed) + with tmpdir() as compressed: + with tmpdir() as decompressed: + cmd = [ + args.decodecorpus, + '-n{}'.format(args.number), + '-p{}/'.format(compressed), + '-o{}'.format(decompressed), + ] + + if 'block_' in args.TARGET: + cmd += [ + '--gen-blocks', + '--max-block-size-log={}'.format(args.max_size_log) + ] + else: + cmd += ['--max-content-size-log={}'.format(args.max_size_log)] + + print(' '.join(cmd)) + subprocess.check_call(cmd) + + if '_round_trip' in args.TARGET: + print('using decompressed data in {}'.format(decompressed)) + samples = decompressed + elif '_decompress' in args.TARGET: + print('using compressed data in {}'.format(compressed)) + samples = compressed + + # Copy the samples over and prepend the RNG seeds + for name in os.listdir(samples): + samplename = abs_join(samples, name) + outname = abs_join(seed, name) + rng_seed = os.urandom(args.fuzz_rng_seed_size) + with open(samplename, 'rb') as sample: + with open(outname, 'wb') as out: + out.write(rng_seed) + CHUNK_SIZE = 131072 + chunk = sample.read(CHUNK_SIZE) + while len(chunk) > 0: + out.write(chunk) + chunk = sample.read(CHUNK_SIZE) + return 0 + + +def minimize(args): + try: + description = """ + Runs a libfuzzer fuzzer with -merge=1 to build a minimal corpus in + TARGET_seed_corpus. All extra args are passed to libfuzzer. + """ + args = targets_parser(args, description) + except Exception as e: + print(e) + return 1 + + for target in args.TARGET: + # Merge the corpus + anything else into the seed_corpus + corpus = abs_join(CORPORA_DIR, target) + seed_corpus = abs_join(CORPORA_DIR, "{}_seed_corpus".format(target)) + extra_args = [corpus, "-merge=1"] + args.extra + libfuzzer(target, corpora=seed_corpus, extra_args=extra_args) + seeds = set(os.listdir(seed_corpus)) + # Copy all crashes directly into the seed_corpus if not already present + crashes = abs_join(CORPORA_DIR, '{}-crash'.format(target)) + for crash in os.listdir(crashes): + if crash not in seeds: + shutil.copy(abs_join(crashes, crash), seed_corpus) + seeds.add(crash) + + +def zip_cmd(args): + try: + description = """ + Zips up the seed corpus. + """ + args = targets_parser(args, description) + except Exception as e: + print(e) + return 1 + + for target in args.TARGET: + # Zip the seed_corpus + seed_corpus = abs_join(CORPORA_DIR, "{}_seed_corpus".format(target)) + seeds = [abs_join(seed_corpus, f) for f in os.listdir(seed_corpus)] + zip_file = "{}.zip".format(seed_corpus) + cmd = ["zip", "-q", "-j", "-9", zip_file] + print(' '.join(cmd + [abs_join(seed_corpus, '*')])) + subprocess.check_call(cmd + seeds) + + +def list_cmd(args): + print("\n".join(TARGETS)) + + +def short_help(args): + name = args[0] + print("Usage: {} [OPTIONS] COMMAND [ARGS]...\n".format(name)) + + +def help(args): + short_help(args) + print("\tfuzzing helpers (select a command and pass -h for help)\n") + print("Options:") + print("\t-h, --help\tPrint this message") + print("") + print("Commands:") + print("\tbuild\t\tBuild a fuzzer") + print("\tlibfuzzer\tRun a libFuzzer fuzzer") + print("\tafl\t\tRun an AFL fuzzer") + print("\tregression\tRun a regression test") + print("\tgen\t\tGenerate a seed corpus for a fuzzer") + print("\tminimize\tMinimize the test corpora") + print("\tzip\t\tZip the minimized corpora up") + print("\tlist\t\tList the available targets") + + +def main(): + args = sys.argv + if len(args) < 2: + help(args) + return 1 + if args[1] == '-h' or args[1] == '--help' or args[1] == '-H': + help(args) + return 1 + command = args.pop(1) + args[0] = "{} {}".format(args[0], command) + if command == "build": + return build(args) + if command == "libfuzzer": + return libfuzzer_cmd(args) + if command == "regression": + return regression(args) + if command == "afl": + return afl(args) + if command == "gen": + return gen(args) + if command == "minimize": + return minimize(args) + if command == "zip": + return zip_cmd(args) + if command == "list": + return list_cmd(args) + short_help(args) + print("Error: No such command {} (pass -h for help)".format(command)) + return 1 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/src/zstd/tests/fuzz/fuzz_helpers.h b/src/zstd/tests/fuzz/fuzz_helpers.h new file mode 100644 index 00000000..468c39fb --- /dev/null +++ b/src/zstd/tests/fuzz/fuzz_helpers.h @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2016-present, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + */ + +/** + * Helper functions for fuzzing. + */ + +#ifndef FUZZ_HELPERS_H +#define FUZZ_HELPERS_H + +#include "fuzz.h" +#include "xxhash.h" +#include "zstd.h" +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> + +#ifdef __cplusplus +extern "C" { +#endif + +#define MIN(a, b) ((a) < (b) ? (a) : (b)) +#define MAX(a, b) ((a) > (b) ? (a) : (b)) + +#define FUZZ_QUOTE_IMPL(str) #str +#define FUZZ_QUOTE(str) FUZZ_QUOTE_IMPL(str) + +/** + * Asserts for fuzzing that are always enabled. + */ +#define FUZZ_ASSERT_MSG(cond, msg) \ + ((cond) ? (void)0 \ + : (fprintf(stderr, "%s: %u: Assertion: `%s' failed. %s\n", __FILE__, \ + __LINE__, FUZZ_QUOTE(cond), (msg)), \ + abort())) +#define FUZZ_ASSERT(cond) FUZZ_ASSERT_MSG((cond), ""); +#define FUZZ_ZASSERT(code) \ + FUZZ_ASSERT_MSG(!ZSTD_isError(code), ZSTD_getErrorName(code)) + +#if defined(__GNUC__) +#define FUZZ_STATIC static __inline __attribute__((unused)) +#elif defined(__cplusplus) || \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) +#define FUZZ_STATIC static inline +#elif defined(_MSC_VER) +#define FUZZ_STATIC static __inline +#else +#define FUZZ_STATIC static +#endif + +/** + * Determininistically constructs a seed based on the fuzz input. + * Consumes up to the first FUZZ_RNG_SEED_SIZE bytes of the input. + */ +FUZZ_STATIC uint32_t FUZZ_seed(uint8_t const **src, size_t* size) { + uint8_t const *data = *src; + size_t const toHash = MIN(FUZZ_RNG_SEED_SIZE, *size); + *size -= toHash; + *src += toHash; + return XXH32(data, toHash, 0); +} + +#define FUZZ_rotl32(x, r) (((x) << (r)) | ((x) >> (32 - (r)))) + +FUZZ_STATIC uint32_t FUZZ_rand(uint32_t *state) { + static const uint32_t prime1 = 2654435761U; + static const uint32_t prime2 = 2246822519U; + uint32_t rand32 = *state; + rand32 *= prime1; + rand32 += prime2; + rand32 = FUZZ_rotl32(rand32, 13); + *state = rand32; + return rand32 >> 5; +} + +/* Returns a random numer in the range [min, max]. */ +FUZZ_STATIC uint32_t FUZZ_rand32(uint32_t *state, uint32_t min, uint32_t max) { + uint32_t random = FUZZ_rand(state); + return min + (random % (max - min + 1)); +} + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/zstd/tests/fuzz/regression_driver.c b/src/zstd/tests/fuzz/regression_driver.c new file mode 100644 index 00000000..2b714d29 --- /dev/null +++ b/src/zstd/tests/fuzz/regression_driver.c @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2016-present, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + */ + +#include "fuzz.h" +#include "fuzz_helpers.h" +#include "util.h" +#include <stddef.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> + +int main(int argc, char const **argv) { + size_t const kMaxFileSize = (size_t)1 << 20; + int const kFollowLinks = 1; + char *fileNamesBuf = NULL; + char const **files = argv + 1; + unsigned numFiles = argc - 1; + uint8_t *buffer = NULL; + size_t bufferSize = 0; + unsigned i; + int ret; + +#ifdef UTIL_HAS_CREATEFILELIST + files = UTIL_createFileList(files, numFiles, &fileNamesBuf, &numFiles, + kFollowLinks); + if (!files) + numFiles = 0; +#endif + if (numFiles == 0) + fprintf(stderr, "WARNING: No files passed to %s\n", argv[0]); + for (i = 0; i < numFiles; ++i) { + char const *fileName = files[i]; + size_t const fileSize = UTIL_getFileSize(fileName); + size_t readSize; + FILE *file; + + /* Check that it is a regular file, and that the fileSize is valid */ + FUZZ_ASSERT_MSG(UTIL_isRegularFile(fileName), fileName); + FUZZ_ASSERT_MSG(fileSize <= kMaxFileSize, fileName); + /* Ensure we have a large enough buffer allocated */ + if (fileSize > bufferSize) { + free(buffer); + buffer = (uint8_t *)malloc(fileSize); + FUZZ_ASSERT_MSG(buffer, fileName); + bufferSize = fileSize; + } + /* Open the file */ + file = fopen(fileName, "rb"); + FUZZ_ASSERT_MSG(file, fileName); + /* Read the file */ + readSize = fread(buffer, 1, fileSize, file); + FUZZ_ASSERT_MSG(readSize == fileSize, fileName); + /* Close the file */ + fclose(file); + /* Run the fuzz target */ + LLVMFuzzerTestOneInput(buffer, fileSize); + } + + ret = 0; + free(buffer); +#ifdef UTIL_HAS_CREATEFILELIST + UTIL_freeFileList(files, fileNamesBuf); +#endif + return ret; +} diff --git a/src/zstd/tests/fuzz/simple_decompress.c b/src/zstd/tests/fuzz/simple_decompress.c new file mode 100644 index 00000000..bba272c6 --- /dev/null +++ b/src/zstd/tests/fuzz/simple_decompress.c @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2016-present, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + */ + +/** + * This fuzz target attempts to decompress the fuzzed data with the simple + * decompression function to ensure the decompressor never crashes. + */ + +#include <stddef.h> +#include <stdlib.h> +#include <stdio.h> +#include "fuzz_helpers.h" +#include "zstd.h" + +static ZSTD_DCtx *dctx = NULL; +static void* rBuf = NULL; +static size_t bufSize = 0; + +int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) +{ + size_t neededBufSize; + + FUZZ_seed(&src, &size); + neededBufSize = MAX(20 * size, (size_t)256 << 10); + + /* Allocate all buffers and contexts if not already allocated */ + if (neededBufSize > bufSize) { + free(rBuf); + rBuf = malloc(neededBufSize); + bufSize = neededBufSize; + FUZZ_ASSERT(rBuf); + } + if (!dctx) { + dctx = ZSTD_createDCtx(); + FUZZ_ASSERT(dctx); + } + ZSTD_decompressDCtx(dctx, rBuf, neededBufSize, src, size); + +#ifndef STATEFUL_FUZZING + ZSTD_freeDCtx(dctx); dctx = NULL; +#endif + return 0; +} diff --git a/src/zstd/tests/fuzz/simple_round_trip.c b/src/zstd/tests/fuzz/simple_round_trip.c new file mode 100644 index 00000000..0921106d --- /dev/null +++ b/src/zstd/tests/fuzz/simple_round_trip.c @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2016-present, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + */ + +/** + * This fuzz target performs a zstd round-trip test (compress & decompress), + * compares the result with the original, and calls abort() on corruption. + */ + +#define ZSTD_STATIC_LINKING_ONLY + +#include <stddef.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include "fuzz_helpers.h" +#include "zstd_helpers.h" + +static const int kMaxClevel = 19; + +static ZSTD_CCtx *cctx = NULL; +static ZSTD_DCtx *dctx = NULL; +static void* cBuf = NULL; +static void* rBuf = NULL; +static size_t bufSize = 0; +static uint32_t seed; + +static size_t roundTripTest(void *result, size_t resultCapacity, + void *compressed, size_t compressedCapacity, + const void *src, size_t srcSize) +{ + size_t cSize; + if (FUZZ_rand(&seed) & 1) { + ZSTD_inBuffer in = {src, srcSize, 0}; + ZSTD_outBuffer out = {compressed, compressedCapacity, 0}; + size_t err; + + ZSTD_CCtx_reset(cctx); + FUZZ_setRandomParameters(cctx, srcSize, &seed); + err = ZSTD_compress_generic(cctx, &out, &in, ZSTD_e_end); + FUZZ_ZASSERT(err); + FUZZ_ASSERT(err == 0); + cSize = out.pos; + } else { + int const cLevel = FUZZ_rand(&seed) % kMaxClevel; + cSize = ZSTD_compressCCtx( + cctx, compressed, compressedCapacity, src, srcSize, cLevel); + } + FUZZ_ZASSERT(cSize); + return ZSTD_decompressDCtx(dctx, result, resultCapacity, compressed, cSize); +} + +int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) +{ + size_t neededBufSize; + + seed = FUZZ_seed(&src, &size); + neededBufSize = ZSTD_compressBound(size); + + /* Allocate all buffers and contexts if not already allocated */ + if (neededBufSize > bufSize) { + free(cBuf); + free(rBuf); + cBuf = malloc(neededBufSize); + rBuf = malloc(neededBufSize); + bufSize = neededBufSize; + FUZZ_ASSERT(cBuf && rBuf); + } + if (!cctx) { + cctx = ZSTD_createCCtx(); + FUZZ_ASSERT(cctx); + } + if (!dctx) { + dctx = ZSTD_createDCtx(); + FUZZ_ASSERT(dctx); + } + + { + size_t const result = + roundTripTest(rBuf, neededBufSize, cBuf, neededBufSize, src, size); + FUZZ_ZASSERT(result); + FUZZ_ASSERT_MSG(result == size, "Incorrect regenerated size"); + FUZZ_ASSERT_MSG(!memcmp(src, rBuf, size), "Corruption!"); + } +#ifndef STATEFUL_FUZZING + ZSTD_freeCCtx(cctx); cctx = NULL; + ZSTD_freeDCtx(dctx); dctx = NULL; +#endif + return 0; +} diff --git a/src/zstd/tests/fuzz/stream_decompress.c b/src/zstd/tests/fuzz/stream_decompress.c new file mode 100644 index 00000000..7ad57122 --- /dev/null +++ b/src/zstd/tests/fuzz/stream_decompress.c @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2016-present, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + */ + +/** + * This fuzz target attempts to decompress the fuzzed data with the simple + * decompression function to ensure the decompressor never crashes. + */ + +#define ZSTD_STATIC_LINKING_ONLY + +#include <stddef.h> +#include <stdlib.h> +#include <stdio.h> +#include "fuzz_helpers.h" +#include "zstd.h" + +static size_t const kBufSize = ZSTD_BLOCKSIZE_MAX; + +static ZSTD_DStream *dstream = NULL; +static void* buf = NULL; +uint32_t seed; + +static ZSTD_outBuffer makeOutBuffer(void) +{ + ZSTD_outBuffer buffer = { buf, 0, 0 }; + + buffer.size = (FUZZ_rand(&seed) % kBufSize) + 1; + FUZZ_ASSERT(buffer.size <= kBufSize); + + return buffer; +} + +static ZSTD_inBuffer makeInBuffer(const uint8_t **src, size_t *size) +{ + ZSTD_inBuffer buffer = { *src, 0, 0 }; + + FUZZ_ASSERT(*size > 0); + buffer.size = (FUZZ_rand(&seed) % *size) + 1; + FUZZ_ASSERT(buffer.size <= *size); + *src += buffer.size; + *size -= buffer.size; + + return buffer; +} + +int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) +{ + seed = FUZZ_seed(&src, &size); + + /* Allocate all buffers and contexts if not already allocated */ + if (!buf) { + buf = malloc(kBufSize); + FUZZ_ASSERT(buf); + } + + if (!dstream) { + dstream = ZSTD_createDStream(); + FUZZ_ASSERT(dstream); + FUZZ_ASSERT(!ZSTD_isError(ZSTD_initDStream(dstream))); + } else { + FUZZ_ASSERT(!ZSTD_isError(ZSTD_resetDStream(dstream))); + } + + while (size > 0) { + ZSTD_inBuffer in = makeInBuffer(&src, &size); + while (in.pos != in.size) { + ZSTD_outBuffer out = makeOutBuffer(); + size_t const rc = ZSTD_decompressStream(dstream, &out, &in); + if (ZSTD_isError(rc)) goto error; + if (rc == 0) FUZZ_ASSERT(!ZSTD_isError(ZSTD_resetDStream(dstream))); + } + } + +error: +#ifndef STATEFUL_FUZZING + ZSTD_freeDStream(dstream); dstream = NULL; +#endif + return 0; +} diff --git a/src/zstd/tests/fuzz/stream_round_trip.c b/src/zstd/tests/fuzz/stream_round_trip.c new file mode 100644 index 00000000..72d70495 --- /dev/null +++ b/src/zstd/tests/fuzz/stream_round_trip.c @@ -0,0 +1,162 @@ +/* + * Copyright (c) 2016-present, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + */ + +/** + * This fuzz target performs a zstd round-trip test (compress & decompress), + * compares the result with the original, and calls abort() on corruption. + */ + +#define ZSTD_STATIC_LINKING_ONLY + +#include <stddef.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include "fuzz_helpers.h" +#include "zstd_helpers.h" + +ZSTD_CCtx *cctx = NULL; +static ZSTD_DCtx *dctx = NULL; +static uint8_t* cBuf = NULL; +static uint8_t* rBuf = NULL; +static size_t bufSize = 0; +static uint32_t seed; + +static ZSTD_outBuffer makeOutBuffer(uint8_t *dst, size_t capacity) +{ + ZSTD_outBuffer buffer = { dst, 0, 0 }; + + FUZZ_ASSERT(capacity > 0); + buffer.size = (FUZZ_rand(&seed) % capacity) + 1; + FUZZ_ASSERT(buffer.size <= capacity); + + return buffer; +} + +static ZSTD_inBuffer makeInBuffer(const uint8_t **src, size_t *size) +{ + ZSTD_inBuffer buffer = { *src, 0, 0 }; + + FUZZ_ASSERT(*size > 0); + buffer.size = (FUZZ_rand(&seed) % *size) + 1; + FUZZ_ASSERT(buffer.size <= *size); + *src += buffer.size; + *size -= buffer.size; + + return buffer; +} + +static size_t compress(uint8_t *dst, size_t capacity, + const uint8_t *src, size_t srcSize) +{ + size_t dstSize = 0; + ZSTD_CCtx_reset(cctx); + FUZZ_setRandomParameters(cctx, srcSize, &seed); + + while (srcSize > 0) { + ZSTD_inBuffer in = makeInBuffer(&src, &srcSize); + /* Mode controls the action. If mode == -1 we pick a new mode */ + int mode = -1; + while (in.pos < in.size) { + ZSTD_outBuffer out = makeOutBuffer(dst, capacity); + /* Previous action finished, pick a new mode. */ + if (mode == -1) mode = FUZZ_rand(&seed) % 10; + switch (mode) { + case 0: /* fall-though */ + case 1: /* fall-though */ + case 2: { + size_t const ret = + ZSTD_compress_generic(cctx, &out, &in, ZSTD_e_flush); + FUZZ_ZASSERT(ret); + if (ret == 0) + mode = -1; + break; + } + case 3: { + size_t ret = + ZSTD_compress_generic(cctx, &out, &in, ZSTD_e_end); + FUZZ_ZASSERT(ret); + /* Reset the compressor when the frame is finished */ + if (ret == 0) { + ZSTD_CCtx_reset(cctx); + if ((FUZZ_rand(&seed) & 7) == 0) { + size_t const remaining = in.size - in.pos; + FUZZ_setRandomParameters(cctx, remaining, &seed); + } + mode = -1; + } + break; + } + default: { + size_t const ret = + ZSTD_compress_generic(cctx, &out, &in, ZSTD_e_continue); + FUZZ_ZASSERT(ret); + mode = -1; + } + } + dst += out.pos; + dstSize += out.pos; + capacity -= out.pos; + } + } + for (;;) { + ZSTD_inBuffer in = {NULL, 0, 0}; + ZSTD_outBuffer out = makeOutBuffer(dst, capacity); + size_t const ret = ZSTD_compress_generic(cctx, &out, &in, ZSTD_e_end); + FUZZ_ZASSERT(ret); + + dst += out.pos; + dstSize += out.pos; + capacity -= out.pos; + if (ret == 0) + break; + } + return dstSize; +} + +int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) +{ + size_t neededBufSize; + + seed = FUZZ_seed(&src, &size); + neededBufSize = ZSTD_compressBound(size) * 2; + + /* Allocate all buffers and contexts if not already allocated */ + if (neededBufSize > bufSize) { + free(cBuf); + free(rBuf); + cBuf = (uint8_t*)malloc(neededBufSize); + rBuf = (uint8_t*)malloc(neededBufSize); + bufSize = neededBufSize; + FUZZ_ASSERT(cBuf && rBuf); + } + if (!cctx) { + cctx = ZSTD_createCCtx(); + FUZZ_ASSERT(cctx); + } + if (!dctx) { + dctx = ZSTD_createDCtx(); + FUZZ_ASSERT(dctx); + } + + { + size_t const cSize = compress(cBuf, neededBufSize, src, size); + size_t const rSize = + ZSTD_decompressDCtx(dctx, rBuf, neededBufSize, cBuf, cSize); + FUZZ_ZASSERT(rSize); + FUZZ_ASSERT_MSG(rSize == size, "Incorrect regenerated size"); + FUZZ_ASSERT_MSG(!memcmp(src, rBuf, size), "Corruption!"); + } + +#ifndef STATEFUL_FUZZING + ZSTD_freeCCtx(cctx); cctx = NULL; + ZSTD_freeDCtx(dctx); dctx = NULL; +#endif + return 0; +} diff --git a/src/zstd/tests/fuzz/zstd_helpers.c b/src/zstd/tests/fuzz/zstd_helpers.c new file mode 100644 index 00000000..60289847 --- /dev/null +++ b/src/zstd/tests/fuzz/zstd_helpers.c @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2016-present, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + */ + +#define ZSTD_STATIC_LINKING_ONLY + +#include "zstd_helpers.h" +#include "fuzz_helpers.h" +#include "zstd.h" + +static void set(ZSTD_CCtx *cctx, ZSTD_cParameter param, unsigned value) +{ + FUZZ_ZASSERT(ZSTD_CCtx_setParameter(cctx, param, value)); +} + +static void setRand(ZSTD_CCtx *cctx, ZSTD_cParameter param, unsigned min, + unsigned max, uint32_t *state) { + unsigned const value = FUZZ_rand32(state, min, max); + set(cctx, param, value); +} + +ZSTD_compressionParameters FUZZ_randomCParams(size_t srcSize, uint32_t *state) +{ + /* Select compression parameters */ + ZSTD_compressionParameters cParams; + cParams.windowLog = FUZZ_rand32(state, ZSTD_WINDOWLOG_MIN, 15); + cParams.hashLog = FUZZ_rand32(state, ZSTD_HASHLOG_MIN, 15); + cParams.chainLog = FUZZ_rand32(state, ZSTD_CHAINLOG_MIN, 16); + cParams.searchLog = FUZZ_rand32(state, ZSTD_SEARCHLOG_MIN, 9); + cParams.searchLength = FUZZ_rand32(state, ZSTD_SEARCHLENGTH_MIN, + ZSTD_SEARCHLENGTH_MAX); + cParams.targetLength = FUZZ_rand32(state, ZSTD_TARGETLENGTH_MIN, + ZSTD_TARGETLENGTH_MAX); + cParams.strategy = FUZZ_rand32(state, ZSTD_fast, ZSTD_btultra); + return ZSTD_adjustCParams(cParams, srcSize, 0); +} + +ZSTD_frameParameters FUZZ_randomFParams(uint32_t *state) +{ + /* Select frame parameters */ + ZSTD_frameParameters fParams; + fParams.contentSizeFlag = FUZZ_rand32(state, 0, 1); + fParams.checksumFlag = FUZZ_rand32(state, 0, 1); + fParams.noDictIDFlag = FUZZ_rand32(state, 0, 1); + return fParams; +} + +ZSTD_parameters FUZZ_randomParams(size_t srcSize, uint32_t *state) +{ + ZSTD_parameters params; + params.cParams = FUZZ_randomCParams(srcSize, state); + params.fParams = FUZZ_randomFParams(state); + return params; +} + +void FUZZ_setRandomParameters(ZSTD_CCtx *cctx, size_t srcSize, uint32_t *state) +{ + ZSTD_compressionParameters cParams = FUZZ_randomCParams(srcSize, state); + set(cctx, ZSTD_p_windowLog, cParams.windowLog); + set(cctx, ZSTD_p_hashLog, cParams.hashLog); + set(cctx, ZSTD_p_chainLog, cParams.chainLog); + set(cctx, ZSTD_p_searchLog, cParams.searchLog); + set(cctx, ZSTD_p_minMatch, cParams.searchLength); + set(cctx, ZSTD_p_targetLength, cParams.targetLength); + set(cctx, ZSTD_p_compressionStrategy, cParams.strategy); + /* Select frame parameters */ + setRand(cctx, ZSTD_p_contentSizeFlag, 0, 1, state); + setRand(cctx, ZSTD_p_checksumFlag, 0, 1, state); + setRand(cctx, ZSTD_p_dictIDFlag, 0, 1, state); + /* Select long distance matchig parameters */ + setRand(cctx, ZSTD_p_enableLongDistanceMatching, 0, 1, state); + setRand(cctx, ZSTD_p_ldmHashLog, ZSTD_HASHLOG_MIN, 16, state); + setRand(cctx, ZSTD_p_ldmMinMatch, ZSTD_LDM_MINMATCH_MIN, + ZSTD_LDM_MINMATCH_MAX, state); + setRand(cctx, ZSTD_p_ldmBucketSizeLog, 0, ZSTD_LDM_BUCKETSIZELOG_MAX, + state); + setRand(cctx, ZSTD_p_ldmHashEveryLog, 0, + ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN, state); +} diff --git a/src/zstd/tests/fuzz/zstd_helpers.h b/src/zstd/tests/fuzz/zstd_helpers.h new file mode 100644 index 00000000..3856bebe --- /dev/null +++ b/src/zstd/tests/fuzz/zstd_helpers.h @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2016-present, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + */ + +/** + * Helper functions for fuzzing. + */ + +#ifndef ZSTD_HELPERS_H +#define ZSTD_HELPERS_H + +#include "zstd.h" +#include <stdint.h> + +#ifdef __cplusplus +extern "C" { +#endif + +void FUZZ_setRandomParameters(ZSTD_CCtx *cctx, size_t srcSize, uint32_t *state); + +ZSTD_compressionParameters FUZZ_randomCParams(size_t srcSize, uint32_t *state); +ZSTD_frameParameters FUZZ_randomFParams(uint32_t *state); +ZSTD_parameters FUZZ_randomParams(size_t srcSize, uint32_t *state); + + +#ifdef __cplusplus +} +#endif + +#endif /* ZSTD_HELPERS_H */ diff --git a/src/zstd/tests/fuzzer.c b/src/zstd/tests/fuzzer.c new file mode 100644 index 00000000..76df77af --- /dev/null +++ b/src/zstd/tests/fuzzer.c @@ -0,0 +1,1643 @@ +/* + * Copyright (c) 2015-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + +/*-************************************ +* Compiler specific +**************************************/ +#ifdef _MSC_VER /* Visual Studio */ +# define _CRT_SECURE_NO_WARNINGS /* fgets */ +# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ +# pragma warning(disable : 4204) /* disable: C4204: non-constant aggregate initializer */ +#endif + + +/*-************************************ +* Includes +**************************************/ +#include <stdlib.h> /* free */ +#include <stdio.h> /* fgets, sscanf */ +#include <string.h> /* strcmp */ +#include <time.h> /* clock_t */ +#define ZSTD_STATIC_LINKING_ONLY /* ZSTD_compressContinue, ZSTD_compressBlock */ +#include "zstd.h" /* ZSTD_VERSION_STRING */ +#include "zstd_errors.h" /* ZSTD_getErrorCode */ +#include "zstdmt_compress.h" +#define ZDICT_STATIC_LINKING_ONLY +#include "zdict.h" /* ZDICT_trainFromBuffer */ +#include "datagen.h" /* RDG_genBuffer */ +#include "mem.h" +#define XXH_STATIC_LINKING_ONLY +#include "xxhash.h" /* XXH64 */ + + +/*-************************************ +* Constants +**************************************/ +#define KB *(1U<<10) +#define MB *(1U<<20) +#define GB *(1U<<30) + +static const U32 FUZ_compressibility_default = 50; +static const U32 nbTestsDefault = 30000; + + +/*-************************************ +* Display Macros +**************************************/ +#define DISPLAY(...) fprintf(stdout, __VA_ARGS__) +#define DISPLAYLEVEL(l, ...) if (g_displayLevel>=l) { DISPLAY(__VA_ARGS__); } +static U32 g_displayLevel = 2; + +#define DISPLAYUPDATE(l, ...) if (g_displayLevel>=l) { \ + if ((FUZ_clockSpan(g_displayClock) > g_refreshRate) || (g_displayLevel>=4)) \ + { g_displayClock = clock(); DISPLAY(__VA_ARGS__); \ + if (g_displayLevel>=4) fflush(stdout); } } +static const clock_t g_refreshRate = CLOCKS_PER_SEC / 6; +static clock_t g_displayClock = 0; + + +/*-******************************************************* +* Fuzzer functions +*********************************************************/ +#define MIN(a,b) ((a)<(b)?(a):(b)) +#define MAX(a,b) ((a)>(b)?(a):(b)) + +static clock_t FUZ_clockSpan(clock_t cStart) +{ + return clock() - cStart; /* works even when overflow; max span ~ 30mn */ +} + +#define FUZ_rotl32(x,r) ((x << r) | (x >> (32 - r))) +static unsigned FUZ_rand(unsigned* src) +{ + static const U32 prime1 = 2654435761U; + static const U32 prime2 = 2246822519U; + U32 rand32 = *src; + rand32 *= prime1; + rand32 += prime2; + rand32 = FUZ_rotl32(rand32, 13); + *src = rand32; + return rand32 >> 5; +} + +static unsigned FUZ_highbit32(U32 v32) +{ + unsigned nbBits = 0; + if (v32==0) return 0; + while (v32) v32 >>= 1, nbBits++; + return nbBits; +} + + +/*============================================= +* Memory Tests +=============================================*/ +#if defined(__APPLE__) && defined(__MACH__) + +#include <malloc/malloc.h> /* malloc_size */ + +typedef struct { + unsigned long long totalMalloc; + size_t currentMalloc; + size_t peakMalloc; + unsigned nbMalloc; + unsigned nbFree; +} mallocCounter_t; + +static const mallocCounter_t INIT_MALLOC_COUNTER = { 0, 0, 0, 0, 0 }; + +static void* FUZ_mallocDebug(void* counter, size_t size) +{ + mallocCounter_t* const mcPtr = (mallocCounter_t*)counter; + void* const ptr = malloc(size); + if (ptr==NULL) return NULL; + DISPLAYLEVEL(4, "allocating %u KB => effectively %u KB \n", + (U32)(size >> 10), (U32)(malloc_size(ptr) >> 10)); /* OS-X specific */ + mcPtr->totalMalloc += size; + mcPtr->currentMalloc += size; + if (mcPtr->currentMalloc > mcPtr->peakMalloc) + mcPtr->peakMalloc = mcPtr->currentMalloc; + mcPtr->nbMalloc += 1; + return ptr; +} + +static void FUZ_freeDebug(void* counter, void* address) +{ + mallocCounter_t* const mcPtr = (mallocCounter_t*)counter; + DISPLAYLEVEL(4, "freeing %u KB \n", (U32)(malloc_size(address) >> 10)); + mcPtr->nbFree += 1; + mcPtr->currentMalloc -= malloc_size(address); /* OS-X specific */ + free(address); +} + +static void FUZ_displayMallocStats(mallocCounter_t count) +{ + DISPLAYLEVEL(3, "peak:%6u KB, nbMallocs:%2u, total:%6u KB \n", + (U32)(count.peakMalloc >> 10), + count.nbMalloc, + (U32)(count.totalMalloc >> 10)); +} + +#define CHECK_Z(f) { \ + size_t const err = f; \ + if (ZSTD_isError(err)) { \ + DISPLAY("Error => %s : %s ", \ + #f, ZSTD_getErrorName(err)); \ + exit(1); \ +} } + +static int FUZ_mallocTests(unsigned seed, double compressibility, unsigned part) +{ + size_t const inSize = 64 MB + 16 MB + 4 MB + 1 MB + 256 KB + 64 KB; /* 85.3 MB */ + size_t const outSize = ZSTD_compressBound(inSize); + void* const inBuffer = malloc(inSize); + void* const outBuffer = malloc(outSize); + + /* test only played in verbose mode, as they are long */ + if (g_displayLevel<3) return 0; + + /* Create compressible noise */ + if (!inBuffer || !outBuffer) { + DISPLAY("Not enough memory, aborting\n"); + exit(1); + } + RDG_genBuffer(inBuffer, inSize, compressibility, 0. /*auto*/, seed); + + /* simple compression tests */ + if (part <= 1) + { int compressionLevel; + for (compressionLevel=1; compressionLevel<=6; compressionLevel++) { + mallocCounter_t malcount = INIT_MALLOC_COUNTER; + ZSTD_customMem const cMem = { FUZ_mallocDebug, FUZ_freeDebug, &malcount }; + ZSTD_CCtx* const cctx = ZSTD_createCCtx_advanced(cMem); + CHECK_Z( ZSTD_compressCCtx(cctx, outBuffer, outSize, inBuffer, inSize, compressionLevel) ); + ZSTD_freeCCtx(cctx); + DISPLAYLEVEL(3, "compressCCtx level %i : ", compressionLevel); + FUZ_displayMallocStats(malcount); + } } + + /* streaming compression tests */ + if (part <= 2) + { int compressionLevel; + for (compressionLevel=1; compressionLevel<=6; compressionLevel++) { + mallocCounter_t malcount = INIT_MALLOC_COUNTER; + ZSTD_customMem const cMem = { FUZ_mallocDebug, FUZ_freeDebug, &malcount }; + ZSTD_CCtx* const cstream = ZSTD_createCStream_advanced(cMem); + ZSTD_outBuffer out = { outBuffer, outSize, 0 }; + ZSTD_inBuffer in = { inBuffer, inSize, 0 }; + CHECK_Z( ZSTD_initCStream(cstream, compressionLevel) ); + CHECK_Z( ZSTD_compressStream(cstream, &out, &in) ); + CHECK_Z( ZSTD_endStream(cstream, &out) ); + ZSTD_freeCStream(cstream); + DISPLAYLEVEL(3, "compressStream level %i : ", compressionLevel); + FUZ_displayMallocStats(malcount); + } } + + /* advanced MT API test */ + if (part <= 3) + { U32 nbThreads; + for (nbThreads=1; nbThreads<=4; nbThreads++) { + int compressionLevel; + for (compressionLevel=1; compressionLevel<=6; compressionLevel++) { + mallocCounter_t malcount = INIT_MALLOC_COUNTER; + ZSTD_customMem const cMem = { FUZ_mallocDebug, FUZ_freeDebug, &malcount }; + ZSTD_CCtx* const cctx = ZSTD_createCCtx_advanced(cMem); + ZSTD_outBuffer out = { outBuffer, outSize, 0 }; + ZSTD_inBuffer in = { inBuffer, inSize, 0 }; + CHECK_Z( ZSTD_CCtx_setParameter(cctx, ZSTD_p_compressionLevel, (U32)compressionLevel) ); + CHECK_Z( ZSTD_CCtx_setParameter(cctx, ZSTD_p_nbThreads, nbThreads) ); + while ( ZSTD_compress_generic(cctx, &out, &in, ZSTD_e_end) ) {} + ZSTD_freeCCtx(cctx); + DISPLAYLEVEL(3, "compress_generic,-T%u,end level %i : ", + nbThreads, compressionLevel); + FUZ_displayMallocStats(malcount); + } } } + + /* advanced MT streaming API test */ + if (part <= 4) + { U32 nbThreads; + for (nbThreads=1; nbThreads<=4; nbThreads++) { + int compressionLevel; + for (compressionLevel=1; compressionLevel<=6; compressionLevel++) { + mallocCounter_t malcount = INIT_MALLOC_COUNTER; + ZSTD_customMem const cMem = { FUZ_mallocDebug, FUZ_freeDebug, &malcount }; + ZSTD_CCtx* const cctx = ZSTD_createCCtx_advanced(cMem); + ZSTD_outBuffer out = { outBuffer, outSize, 0 }; + ZSTD_inBuffer in = { inBuffer, inSize, 0 }; + CHECK_Z( ZSTD_CCtx_setParameter(cctx, ZSTD_p_compressionLevel, (U32)compressionLevel) ); + CHECK_Z( ZSTD_CCtx_setParameter(cctx, ZSTD_p_nbThreads, nbThreads) ); + CHECK_Z( ZSTD_compress_generic(cctx, &out, &in, ZSTD_e_continue) ); + while ( ZSTD_compress_generic(cctx, &out, &in, ZSTD_e_end) ) {} + ZSTD_freeCCtx(cctx); + DISPLAYLEVEL(3, "compress_generic,-T%u,continue level %i : ", + nbThreads, compressionLevel); + FUZ_displayMallocStats(malcount); + } } } + + return 0; +} + +#else + +static int FUZ_mallocTests(unsigned seed, double compressibility, unsigned part) +{ + (void)seed; (void)compressibility; (void)part; + return 0; +} + +#endif + +/*============================================= +* Unit tests +=============================================*/ + +#define CHECK_V(var, fn) size_t const var = fn; if (ZSTD_isError(var)) goto _output_error +#define CHECK(fn) { CHECK_V(err, fn); } +#define CHECKPLUS(var, fn, more) { CHECK_V(var, fn); more; } + +static int basicUnitTests(U32 seed, double compressibility) +{ + size_t const CNBuffSize = 5 MB; + void* const CNBuffer = malloc(CNBuffSize); + size_t const compressedBufferSize = ZSTD_compressBound(CNBuffSize); + void* const compressedBuffer = malloc(compressedBufferSize); + void* const decodedBuffer = malloc(CNBuffSize); + ZSTD_DCtx* dctx = ZSTD_createDCtx(); + int testResult = 0; + U32 testNb=0; + size_t cSize; + + /* Create compressible noise */ + if (!CNBuffer || !compressedBuffer || !decodedBuffer) { + DISPLAY("Not enough memory, aborting\n"); + testResult = 1; + goto _end; + } + RDG_genBuffer(CNBuffer, CNBuffSize, compressibility, 0., seed); + + /* Basic tests */ + DISPLAYLEVEL(4, "test%3i : ZSTD_getErrorName : ", testNb++); + { const char* errorString = ZSTD_getErrorName(0); + DISPLAYLEVEL(4, "OK : %s \n", errorString); + } + + DISPLAYLEVEL(4, "test%3i : ZSTD_getErrorName with wrong value : ", testNb++); + { const char* errorString = ZSTD_getErrorName(499); + DISPLAYLEVEL(4, "OK : %s \n", errorString); + } + + + DISPLAYLEVEL(4, "test%3i : compress %u bytes : ", testNb++, (U32)CNBuffSize); + { ZSTD_CCtx* cctx = ZSTD_createCCtx(); + if (cctx==NULL) goto _output_error; + CHECKPLUS(r, ZSTD_compressCCtx(cctx, + compressedBuffer, compressedBufferSize, + CNBuffer, CNBuffSize, 1), + cSize=r ); + DISPLAYLEVEL(4, "OK (%u bytes : %.2f%%)\n", (U32)cSize, (double)cSize/CNBuffSize*100); + + DISPLAYLEVEL(4, "test%3i : size of cctx for level 1 : ", testNb++); + { size_t const cctxSize = ZSTD_sizeof_CCtx(cctx); + DISPLAYLEVEL(4, "%u bytes \n", (U32)cctxSize); + } + ZSTD_freeCCtx(cctx); + } + + + DISPLAYLEVEL(4, "test%3i : ZSTD_getFrameContentSize test : ", testNb++); + { unsigned long long const rSize = ZSTD_getFrameContentSize(compressedBuffer, cSize); + if (rSize != CNBuffSize) goto _output_error; + } + DISPLAYLEVEL(4, "OK \n"); + + DISPLAYLEVEL(4, "test%3i : ZSTD_findDecompressedSize test : ", testNb++); + { unsigned long long const rSize = ZSTD_findDecompressedSize(compressedBuffer, cSize); + if (rSize != CNBuffSize) goto _output_error; + } + DISPLAYLEVEL(4, "OK \n"); + + DISPLAYLEVEL(4, "test%3i : decompress %u bytes : ", testNb++, (U32)CNBuffSize); + { size_t const r = ZSTD_decompress(decodedBuffer, CNBuffSize, compressedBuffer, cSize); + if (r != CNBuffSize) goto _output_error; } + DISPLAYLEVEL(4, "OK \n"); + + DISPLAYLEVEL(4, "test%3i : check decompressed result : ", testNb++); + { size_t u; + for (u=0; u<CNBuffSize; u++) { + if (((BYTE*)decodedBuffer)[u] != ((BYTE*)CNBuffer)[u]) goto _output_error;; + } } + DISPLAYLEVEL(4, "OK \n"); + + + DISPLAYLEVEL(4, "test%3i : decompress with null dict : ", testNb++); + { size_t const r = ZSTD_decompress_usingDict(dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize, NULL, 0); + if (r != CNBuffSize) goto _output_error; } + DISPLAYLEVEL(4, "OK \n"); + + DISPLAYLEVEL(4, "test%3i : decompress with null DDict : ", testNb++); + { size_t const r = ZSTD_decompress_usingDDict(dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize, NULL); + if (r != CNBuffSize) goto _output_error; } + DISPLAYLEVEL(4, "OK \n"); + + DISPLAYLEVEL(4, "test%3i : decompress with 1 missing byte : ", testNb++); + { size_t const r = ZSTD_decompress(decodedBuffer, CNBuffSize, compressedBuffer, cSize-1); + if (!ZSTD_isError(r)) goto _output_error; + if (ZSTD_getErrorCode((size_t)r) != ZSTD_error_srcSize_wrong) goto _output_error; } + DISPLAYLEVEL(4, "OK \n"); + + DISPLAYLEVEL(4, "test%3i : decompress with 1 too much byte : ", testNb++); + { size_t const r = ZSTD_decompress(decodedBuffer, CNBuffSize, compressedBuffer, cSize+1); + if (!ZSTD_isError(r)) goto _output_error; + if (ZSTD_getErrorCode(r) != ZSTD_error_srcSize_wrong) goto _output_error; } + DISPLAYLEVEL(4, "OK \n"); + + + /* Static CCtx tests */ +#define STATIC_CCTX_LEVEL 3 + DISPLAYLEVEL(4, "test%3i : create static CCtx for level %u :", testNb++, STATIC_CCTX_LEVEL); + { size_t const staticCCtxSize = ZSTD_estimateCStreamSize(STATIC_CCTX_LEVEL); + void* const staticCCtxBuffer = malloc(staticCCtxSize); + size_t const staticDCtxSize = ZSTD_estimateDCtxSize(); + void* const staticDCtxBuffer = malloc(staticDCtxSize); + if (staticCCtxBuffer==NULL || staticDCtxBuffer==NULL) { + free(staticCCtxBuffer); + free(staticDCtxBuffer); + DISPLAY("Not enough memory, aborting\n"); + testResult = 1; + goto _end; + } + { ZSTD_CCtx* staticCCtx = ZSTD_initStaticCCtx(staticCCtxBuffer, staticCCtxSize); + ZSTD_DCtx* staticDCtx = ZSTD_initStaticDCtx(staticDCtxBuffer, staticDCtxSize); + if ((staticCCtx==NULL) || (staticDCtx==NULL)) goto _output_error; + DISPLAYLEVEL(4, "OK \n"); + + DISPLAYLEVEL(4, "test%3i : init CCtx for level %u : ", testNb++, STATIC_CCTX_LEVEL); + { size_t const r = ZSTD_compressBegin(staticCCtx, STATIC_CCTX_LEVEL); + if (ZSTD_isError(r)) goto _output_error; } + DISPLAYLEVEL(4, "OK \n"); + + DISPLAYLEVEL(4, "test%3i : simple compression test with static CCtx : ", testNb++); + CHECKPLUS(r, ZSTD_compressCCtx(staticCCtx, + compressedBuffer, compressedBufferSize, + CNBuffer, CNBuffSize, STATIC_CCTX_LEVEL), + cSize=r ); + DISPLAYLEVEL(4, "OK (%u bytes : %.2f%%)\n", + (U32)cSize, (double)cSize/CNBuffSize*100); + + DISPLAYLEVEL(4, "test%3i : simple decompression test with static DCtx : ", testNb++); + { size_t const r = ZSTD_decompressDCtx(staticDCtx, + decodedBuffer, CNBuffSize, + compressedBuffer, cSize); + if (r != CNBuffSize) goto _output_error; } + DISPLAYLEVEL(4, "OK \n"); + + DISPLAYLEVEL(4, "test%3i : check decompressed result : ", testNb++); + { size_t u; + for (u=0; u<CNBuffSize; u++) { + if (((BYTE*)decodedBuffer)[u] != ((BYTE*)CNBuffer)[u]) + goto _output_error;; + } } + DISPLAYLEVEL(4, "OK \n"); + + DISPLAYLEVEL(4, "test%3i : init CCtx for too large level (must fail) : ", testNb++); + { size_t const r = ZSTD_compressBegin(staticCCtx, ZSTD_maxCLevel()); + if (!ZSTD_isError(r)) goto _output_error; } + DISPLAYLEVEL(4, "OK \n"); + + DISPLAYLEVEL(4, "test%3i : init CCtx for small level %u (should work again) : ", testNb++, 1); + { size_t const r = ZSTD_compressBegin(staticCCtx, 1); + if (ZSTD_isError(r)) goto _output_error; } + DISPLAYLEVEL(4, "OK \n"); + + DISPLAYLEVEL(4, "test%3i : init CStream for small level %u : ", testNb++, 1); + { size_t const r = ZSTD_initCStream(staticCCtx, 1); + if (ZSTD_isError(r)) goto _output_error; } + DISPLAYLEVEL(4, "OK \n"); + + DISPLAYLEVEL(4, "test%3i : init CStream with dictionary (should fail) : ", testNb++); + { size_t const r = ZSTD_initCStream_usingDict(staticCCtx, CNBuffer, 64 KB, 1); + if (!ZSTD_isError(r)) goto _output_error; } + DISPLAYLEVEL(4, "OK \n"); + + DISPLAYLEVEL(4, "test%3i : init DStream (should fail) : ", testNb++); + { size_t const r = ZSTD_initDStream(staticDCtx); + if (ZSTD_isError(r)) goto _output_error; } + { ZSTD_outBuffer output = { decodedBuffer, CNBuffSize, 0 }; + ZSTD_inBuffer input = { compressedBuffer, ZSTD_FRAMEHEADERSIZE_MAX+1, 0 }; + size_t const r = ZSTD_decompressStream(staticDCtx, &output, &input); + if (!ZSTD_isError(r)) goto _output_error; + } + DISPLAYLEVEL(4, "OK \n"); + } + free(staticCCtxBuffer); + free(staticDCtxBuffer); + } + + + + /* ZSTDMT simple MT compression test */ + DISPLAYLEVEL(4, "test%3i : create ZSTDMT CCtx : ", testNb++); + { ZSTDMT_CCtx* mtctx = ZSTDMT_createCCtx(2); + if (mtctx==NULL) { + DISPLAY("mtctx : mot enough memory, aborting \n"); + testResult = 1; + goto _end; + } + DISPLAYLEVEL(4, "OK \n"); + + DISPLAYLEVEL(4, "test%3i : compress %u bytes with 2 threads : ", testNb++, (U32)CNBuffSize); + CHECKPLUS(r, ZSTDMT_compressCCtx(mtctx, + compressedBuffer, compressedBufferSize, + CNBuffer, CNBuffSize, + 1), + cSize=r ); + DISPLAYLEVEL(4, "OK (%u bytes : %.2f%%)\n", (U32)cSize, (double)cSize/CNBuffSize*100); + + DISPLAYLEVEL(4, "test%3i : decompressed size test : ", testNb++); + { unsigned long long const rSize = ZSTD_getFrameContentSize(compressedBuffer, cSize); + if (rSize != CNBuffSize) { + DISPLAY("ZSTD_getFrameContentSize incorrect : %u != %u \n", (U32)rSize, (U32)CNBuffSize); + goto _output_error; + } } + DISPLAYLEVEL(4, "OK \n"); + + DISPLAYLEVEL(4, "test%3i : decompress %u bytes : ", testNb++, (U32)CNBuffSize); + { size_t const r = ZSTD_decompress(decodedBuffer, CNBuffSize, compressedBuffer, cSize); + if (r != CNBuffSize) goto _output_error; } + DISPLAYLEVEL(4, "OK \n"); + + DISPLAYLEVEL(4, "test%3i : check decompressed result : ", testNb++); + { size_t u; + for (u=0; u<CNBuffSize; u++) { + if (((BYTE*)decodedBuffer)[u] != ((BYTE*)CNBuffer)[u]) goto _output_error;; + } } + DISPLAYLEVEL(4, "OK \n"); + + DISPLAYLEVEL(4, "test%3i : compress -T2 with checksum : ", testNb++); + { ZSTD_parameters params = ZSTD_getParams(1, CNBuffSize, 0); + params.fParams.checksumFlag = 1; + params.fParams.contentSizeFlag = 1; + CHECKPLUS(r, ZSTDMT_compress_advanced(mtctx, + compressedBuffer, compressedBufferSize, + CNBuffer, CNBuffSize, + NULL, params, 3 /*overlapRLog*/), + cSize=r ); + } + DISPLAYLEVEL(4, "OK (%u bytes : %.2f%%)\n", (U32)cSize, (double)cSize/CNBuffSize*100); + + DISPLAYLEVEL(4, "test%3i : decompress %u bytes : ", testNb++, (U32)CNBuffSize); + { size_t const r = ZSTD_decompress(decodedBuffer, CNBuffSize, compressedBuffer, cSize); + if (r != CNBuffSize) goto _output_error; } + DISPLAYLEVEL(4, "OK \n"); + + ZSTDMT_freeCCtx(mtctx); + } + + + /* Simple API multiframe test */ + DISPLAYLEVEL(4, "test%3i : compress multiple frames : ", testNb++); + { size_t off = 0; + int i; + int const segs = 4; + /* only use the first half so we don't push against size limit of compressedBuffer */ + size_t const segSize = (CNBuffSize / 2) / segs; + for (i = 0; i < segs; i++) { + CHECK_V(r, ZSTD_compress( + (BYTE *)compressedBuffer + off, CNBuffSize - off, + (BYTE *)CNBuffer + segSize * i, + segSize, 5)); + off += r; + if (i == segs/2) { + /* insert skippable frame */ + const U32 skipLen = 128 KB; + MEM_writeLE32((BYTE*)compressedBuffer + off, ZSTD_MAGIC_SKIPPABLE_START); + MEM_writeLE32((BYTE*)compressedBuffer + off + 4, skipLen); + off += skipLen + ZSTD_skippableHeaderSize; + } + } + cSize = off; + } + DISPLAYLEVEL(4, "OK \n"); + + DISPLAYLEVEL(4, "test%3i : get decompressed size of multiple frames : ", testNb++); + { unsigned long long const r = ZSTD_findDecompressedSize(compressedBuffer, cSize); + if (r != CNBuffSize / 2) goto _output_error; } + DISPLAYLEVEL(4, "OK \n"); + + DISPLAYLEVEL(4, "test%3i : decompress multiple frames : ", testNb++); + { CHECK_V(r, ZSTD_decompress(decodedBuffer, CNBuffSize, compressedBuffer, cSize)); + if (r != CNBuffSize / 2) goto _output_error; } + DISPLAYLEVEL(4, "OK \n"); + + DISPLAYLEVEL(4, "test%3i : check decompressed result : ", testNb++); + if (memcmp(decodedBuffer, CNBuffer, CNBuffSize / 2) != 0) goto _output_error; + DISPLAYLEVEL(4, "OK \n"); + + /* Dictionary and CCtx Duplication tests */ + { ZSTD_CCtx* const ctxOrig = ZSTD_createCCtx(); + ZSTD_CCtx* const ctxDuplicated = ZSTD_createCCtx(); + static const size_t dictSize = 551; + + DISPLAYLEVEL(4, "test%3i : copy context too soon : ", testNb++); + { size_t const copyResult = ZSTD_copyCCtx(ctxDuplicated, ctxOrig, 0); + if (!ZSTD_isError(copyResult)) goto _output_error; } /* error must be detected */ + DISPLAYLEVEL(4, "OK \n"); + + DISPLAYLEVEL(4, "test%3i : load dictionary into context : ", testNb++); + CHECK( ZSTD_compressBegin_usingDict(ctxOrig, CNBuffer, dictSize, 2) ); + CHECK( ZSTD_copyCCtx(ctxDuplicated, ctxOrig, 0) ); /* Begin_usingDict implies unknown srcSize, so match that */ + DISPLAYLEVEL(4, "OK \n"); + + DISPLAYLEVEL(4, "test%3i : compress with flat dictionary : ", testNb++); + cSize = 0; + CHECKPLUS(r, ZSTD_compressEnd(ctxOrig, compressedBuffer, compressedBufferSize, + (const char*)CNBuffer + dictSize, CNBuffSize - dictSize), + cSize += r); + DISPLAYLEVEL(4, "OK (%u bytes : %.2f%%)\n", (U32)cSize, (double)cSize/CNBuffSize*100); + + DISPLAYLEVEL(4, "test%3i : frame built with flat dictionary should be decompressible : ", testNb++); + CHECKPLUS(r, ZSTD_decompress_usingDict(dctx, + decodedBuffer, CNBuffSize, + compressedBuffer, cSize, + CNBuffer, dictSize), + if (r != CNBuffSize - dictSize) goto _output_error); + DISPLAYLEVEL(4, "OK \n"); + + DISPLAYLEVEL(4, "test%3i : compress with duplicated context : ", testNb++); + { size_t const cSizeOrig = cSize; + cSize = 0; + CHECKPLUS(r, ZSTD_compressEnd(ctxDuplicated, compressedBuffer, compressedBufferSize, + (const char*)CNBuffer + dictSize, CNBuffSize - dictSize), + cSize += r); + if (cSize != cSizeOrig) goto _output_error; /* should be identical ==> same size */ + } + DISPLAYLEVEL(4, "OK (%u bytes : %.2f%%)\n", (U32)cSize, (double)cSize/CNBuffSize*100); + + DISPLAYLEVEL(4, "test%3i : frame built with duplicated context should be decompressible : ", testNb++); + CHECKPLUS(r, ZSTD_decompress_usingDict(dctx, + decodedBuffer, CNBuffSize, + compressedBuffer, cSize, + CNBuffer, dictSize), + if (r != CNBuffSize - dictSize) goto _output_error); + DISPLAYLEVEL(4, "OK \n"); + + DISPLAYLEVEL(4, "test%3i : decompress with DDict : ", testNb++); + { ZSTD_DDict* const ddict = ZSTD_createDDict(CNBuffer, dictSize); + size_t const r = ZSTD_decompress_usingDDict(dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize, ddict); + if (r != CNBuffSize - dictSize) goto _output_error; + DISPLAYLEVEL(4, "OK (size of DDict : %u) \n", (U32)ZSTD_sizeof_DDict(ddict)); + ZSTD_freeDDict(ddict); + } + + DISPLAYLEVEL(4, "test%3i : decompress with static DDict : ", testNb++); + { size_t const ddictBufferSize = ZSTD_estimateDDictSize(dictSize, ZSTD_dlm_byCopy); + void* ddictBuffer = malloc(ddictBufferSize); + if (ddictBuffer == NULL) goto _output_error; + { ZSTD_DDict* const ddict = ZSTD_initStaticDDict(ddictBuffer, ddictBufferSize, CNBuffer, dictSize, ZSTD_dlm_byCopy); + size_t const r = ZSTD_decompress_usingDDict(dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize, ddict); + if (r != CNBuffSize - dictSize) goto _output_error; + } + free(ddictBuffer); + DISPLAYLEVEL(4, "OK (size of static DDict : %u) \n", (U32)ddictBufferSize); + } + + DISPLAYLEVEL(4, "test%3i : check content size on duplicated context : ", testNb++); + { size_t const testSize = CNBuffSize / 3; + { ZSTD_parameters p = ZSTD_getParams(2, testSize, dictSize); + p.fParams.contentSizeFlag = 1; + CHECK( ZSTD_compressBegin_advanced(ctxOrig, CNBuffer, dictSize, p, testSize-1) ); + } + CHECK( ZSTD_copyCCtx(ctxDuplicated, ctxOrig, testSize) ); + + CHECKPLUS(r, ZSTD_compressEnd(ctxDuplicated, compressedBuffer, ZSTD_compressBound(testSize), + (const char*)CNBuffer + dictSize, testSize), + cSize = r); + { ZSTD_frameHeader zfh; + if (ZSTD_getFrameHeader(&zfh, compressedBuffer, cSize)) goto _output_error; + if ((zfh.frameContentSize != testSize) && (zfh.frameContentSize != 0)) goto _output_error; + } } + DISPLAYLEVEL(4, "OK \n"); + + ZSTD_freeCCtx(ctxOrig); + ZSTD_freeCCtx(ctxDuplicated); + } + + /* Dictionary and dictBuilder tests */ + { ZSTD_CCtx* const cctx = ZSTD_createCCtx(); + size_t dictSize = 16 KB; + void* dictBuffer = malloc(dictSize); + size_t const totalSampleSize = 1 MB; + size_t const sampleUnitSize = 8 KB; + U32 const nbSamples = (U32)(totalSampleSize / sampleUnitSize); + size_t* const samplesSizes = (size_t*) malloc(nbSamples * sizeof(size_t)); + U32 dictID; + + if (dictBuffer==NULL || samplesSizes==NULL) { + free(dictBuffer); + free(samplesSizes); + goto _output_error; + } + + DISPLAYLEVEL(4, "test%3i : dictBuilder : ", testNb++); + { U32 u; for (u=0; u<nbSamples; u++) samplesSizes[u] = sampleUnitSize; } + dictSize = ZDICT_trainFromBuffer(dictBuffer, dictSize, + CNBuffer, samplesSizes, nbSamples); + if (ZDICT_isError(dictSize)) goto _output_error; + DISPLAYLEVEL(4, "OK, created dictionary of size %u \n", (U32)dictSize); + + DISPLAYLEVEL(4, "test%3i : check dictID : ", testNb++); + dictID = ZDICT_getDictID(dictBuffer, dictSize); + if (dictID==0) goto _output_error; + DISPLAYLEVEL(4, "OK : %u \n", dictID); + + DISPLAYLEVEL(4, "test%3i : compress with dictionary : ", testNb++); + cSize = ZSTD_compress_usingDict(cctx, compressedBuffer, compressedBufferSize, + CNBuffer, CNBuffSize, + dictBuffer, dictSize, 4); + if (ZSTD_isError(cSize)) goto _output_error; + DISPLAYLEVEL(4, "OK (%u bytes : %.2f%%)\n", (U32)cSize, (double)cSize/CNBuffSize*100); + + DISPLAYLEVEL(4, "test%3i : retrieve dictID from dictionary : ", testNb++); + { U32 const did = ZSTD_getDictID_fromDict(dictBuffer, dictSize); + if (did != dictID) goto _output_error; /* non-conformant (content-only) dictionary */ + } + DISPLAYLEVEL(4, "OK \n"); + + DISPLAYLEVEL(4, "test%3i : retrieve dictID from frame : ", testNb++); + { U32 const did = ZSTD_getDictID_fromFrame(compressedBuffer, cSize); + if (did != dictID) goto _output_error; /* non-conformant (content-only) dictionary */ + } + DISPLAYLEVEL(4, "OK \n"); + + DISPLAYLEVEL(4, "test%3i : frame built with dictionary should be decompressible : ", testNb++); + CHECKPLUS(r, ZSTD_decompress_usingDict(dctx, + decodedBuffer, CNBuffSize, + compressedBuffer, cSize, + dictBuffer, dictSize), + if (r != CNBuffSize) goto _output_error); + DISPLAYLEVEL(4, "OK \n"); + + DISPLAYLEVEL(4, "test%3i : estimate CDict size : ", testNb++); + { ZSTD_compressionParameters const cParams = ZSTD_getCParams(1, CNBuffSize, dictSize); + size_t const estimatedSize = ZSTD_estimateCDictSize_advanced(dictSize, cParams, ZSTD_dlm_byRef); + DISPLAYLEVEL(4, "OK : %u \n", (U32)estimatedSize); + } + + DISPLAYLEVEL(4, "test%3i : compress with CDict ", testNb++); + { ZSTD_compressionParameters const cParams = ZSTD_getCParams(1, CNBuffSize, dictSize); + ZSTD_CDict* const cdict = ZSTD_createCDict_advanced(dictBuffer, dictSize, + ZSTD_dlm_byRef, ZSTD_dm_auto, + cParams, ZSTD_defaultCMem); + DISPLAYLEVEL(4, "(size : %u) : ", (U32)ZSTD_sizeof_CDict(cdict)); + cSize = ZSTD_compress_usingCDict(cctx, compressedBuffer, compressedBufferSize, + CNBuffer, CNBuffSize, cdict); + ZSTD_freeCDict(cdict); + if (ZSTD_isError(cSize)) goto _output_error; + } + DISPLAYLEVEL(4, "OK (%u bytes : %.2f%%)\n", (U32)cSize, (double)cSize/CNBuffSize*100); + + DISPLAYLEVEL(4, "test%3i : retrieve dictID from frame : ", testNb++); + { U32 const did = ZSTD_getDictID_fromFrame(compressedBuffer, cSize); + if (did != dictID) goto _output_error; /* non-conformant (content-only) dictionary */ + } + DISPLAYLEVEL(4, "OK \n"); + + DISPLAYLEVEL(4, "test%3i : frame built with dictionary should be decompressible : ", testNb++); + CHECKPLUS(r, ZSTD_decompress_usingDict(dctx, + decodedBuffer, CNBuffSize, + compressedBuffer, cSize, + dictBuffer, dictSize), + if (r != CNBuffSize) goto _output_error); + DISPLAYLEVEL(4, "OK \n"); + + DISPLAYLEVEL(4, "test%3i : compress with static CDict : ", testNb++); + { ZSTD_compressionParameters const cParams = ZSTD_getCParams(1, CNBuffSize, dictSize); + size_t const cdictSize = ZSTD_estimateCDictSize_advanced(dictSize, cParams, ZSTD_dlm_byCopy); + void* const cdictBuffer = malloc(cdictSize); + if (cdictBuffer==NULL) goto _output_error; + { ZSTD_CDict* const cdict = ZSTD_initStaticCDict(cdictBuffer, cdictSize, + dictBuffer, dictSize, + ZSTD_dlm_byCopy, ZSTD_dm_auto, + cParams); + if (cdict == NULL) { + DISPLAY("ZSTD_initStaticCDict failed "); + goto _output_error; + } + cSize = ZSTD_compress_usingCDict(cctx, + compressedBuffer, compressedBufferSize, + CNBuffer, CNBuffSize, cdict); + if (ZSTD_isError(cSize)) { + DISPLAY("ZSTD_compress_usingCDict failed "); + goto _output_error; + } } + free(cdictBuffer); + } + DISPLAYLEVEL(4, "OK (%u bytes : %.2f%%)\n", (U32)cSize, (double)cSize/CNBuffSize*100); + + DISPLAYLEVEL(4, "test%3i : ZSTD_compress_usingCDict_advanced, no contentSize, no dictID : ", testNb++); + { ZSTD_frameParameters const fParams = { 0 /* frameSize */, 1 /* checksum */, 1 /* noDictID*/ }; + ZSTD_compressionParameters const cParams = ZSTD_getCParams(1, CNBuffSize, dictSize); + ZSTD_CDict* const cdict = ZSTD_createCDict_advanced(dictBuffer, dictSize, ZSTD_dlm_byRef, ZSTD_dm_auto, cParams, ZSTD_defaultCMem); + cSize = ZSTD_compress_usingCDict_advanced(cctx, compressedBuffer, compressedBufferSize, + CNBuffer, CNBuffSize, cdict, fParams); + ZSTD_freeCDict(cdict); + if (ZSTD_isError(cSize)) goto _output_error; + } + DISPLAYLEVEL(4, "OK (%u bytes : %.2f%%)\n", (U32)cSize, (double)cSize/CNBuffSize*100); + + DISPLAYLEVEL(4, "test%3i : try retrieving contentSize from frame : ", testNb++); + { U64 const contentSize = ZSTD_getFrameContentSize(compressedBuffer, cSize); + if (contentSize != ZSTD_CONTENTSIZE_UNKNOWN) goto _output_error; + } + DISPLAYLEVEL(4, "OK (unknown)\n"); + + DISPLAYLEVEL(4, "test%3i : frame built without dictID should be decompressible : ", testNb++); + CHECKPLUS(r, ZSTD_decompress_usingDict(dctx, + decodedBuffer, CNBuffSize, + compressedBuffer, cSize, + dictBuffer, dictSize), + if (r != CNBuffSize) goto _output_error); + DISPLAYLEVEL(4, "OK \n"); + + DISPLAYLEVEL(4, "test%3i : ZSTD_compress_advanced, no dictID : ", testNb++); + { ZSTD_parameters p = ZSTD_getParams(3, CNBuffSize, dictSize); + p.fParams.noDictIDFlag = 1; + cSize = ZSTD_compress_advanced(cctx, compressedBuffer, compressedBufferSize, + CNBuffer, CNBuffSize, + dictBuffer, dictSize, p); + if (ZSTD_isError(cSize)) goto _output_error; + } + DISPLAYLEVEL(4, "OK (%u bytes : %.2f%%)\n", (U32)cSize, (double)cSize/CNBuffSize*100); + + DISPLAYLEVEL(4, "test%3i : frame built without dictID should be decompressible : ", testNb++); + CHECKPLUS(r, ZSTD_decompress_usingDict(dctx, + decodedBuffer, CNBuffSize, + compressedBuffer, cSize, + dictBuffer, dictSize), + if (r != CNBuffSize) goto _output_error); + DISPLAYLEVEL(4, "OK \n"); + + DISPLAYLEVEL(4, "test%3i : dictionary containing only header should return error : ", testNb++); + { + const size_t ret = ZSTD_decompress_usingDict( + dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize, + "\x37\xa4\x30\xec\x11\x22\x33\x44", 8); + if (ZSTD_getErrorCode(ret) != ZSTD_error_dictionary_corrupted) goto _output_error; + } + DISPLAYLEVEL(4, "OK \n"); + + DISPLAYLEVEL(4, "test%3i : Building cdict w/ ZSTD_dm_fullDict on a good dictionary : ", testNb++); + { ZSTD_compressionParameters const cParams = ZSTD_getCParams(1, CNBuffSize, dictSize); + ZSTD_CDict* const cdict = ZSTD_createCDict_advanced(dictBuffer, dictSize, ZSTD_dlm_byRef, ZSTD_dm_fullDict, cParams, ZSTD_defaultCMem); + if (cdict==NULL) goto _output_error; + ZSTD_freeCDict(cdict); + } + DISPLAYLEVEL(4, "OK \n"); + + DISPLAYLEVEL(4, "test%3i : Building cdict w/ ZSTD_dm_fullDict on a rawContent (must fail) : ", testNb++); + { ZSTD_compressionParameters const cParams = ZSTD_getCParams(1, CNBuffSize, dictSize); + ZSTD_CDict* const cdict = ZSTD_createCDict_advanced((const char*)dictBuffer+1, dictSize-1, ZSTD_dlm_byRef, ZSTD_dm_fullDict, cParams, ZSTD_defaultCMem); + if (cdict!=NULL) goto _output_error; + ZSTD_freeCDict(cdict); + } + DISPLAYLEVEL(4, "OK \n"); + + DISPLAYLEVEL(4, "test%3i : Loading rawContent starting with dict header w/ ZSTD_dm_auto should fail : ", testNb++); + { + size_t ret; + MEM_writeLE32((char*)dictBuffer+2, ZSTD_MAGIC_DICTIONARY); + ret = ZSTD_CCtx_loadDictionary_advanced( + cctx, (const char*)dictBuffer+2, dictSize-2, ZSTD_dlm_byRef, ZSTD_dm_auto); + if (!ZSTD_isError(ret)) goto _output_error; + } + DISPLAYLEVEL(4, "OK \n"); + + DISPLAYLEVEL(4, "test%3i : Loading rawContent starting with dict header w/ ZSTD_dm_rawContent should pass : ", testNb++); + { + size_t ret; + MEM_writeLE32((char*)dictBuffer+2, ZSTD_MAGIC_DICTIONARY); + ret = ZSTD_CCtx_loadDictionary_advanced( + cctx, (const char*)dictBuffer+2, dictSize-2, ZSTD_dlm_byRef, ZSTD_dm_rawContent); + if (ZSTD_isError(ret)) goto _output_error; + } + DISPLAYLEVEL(4, "OK \n"); + + ZSTD_freeCCtx(cctx); + free(dictBuffer); + free(samplesSizes); + } + + /* COVER dictionary builder tests */ + { ZSTD_CCtx* const cctx = ZSTD_createCCtx(); + size_t dictSize = 16 KB; + size_t optDictSize = dictSize; + void* dictBuffer = malloc(dictSize); + size_t const totalSampleSize = 1 MB; + size_t const sampleUnitSize = 8 KB; + U32 const nbSamples = (U32)(totalSampleSize / sampleUnitSize); + size_t* const samplesSizes = (size_t*) malloc(nbSamples * sizeof(size_t)); + ZDICT_cover_params_t params; + U32 dictID; + + if (dictBuffer==NULL || samplesSizes==NULL) { + free(dictBuffer); + free(samplesSizes); + goto _output_error; + } + + DISPLAYLEVEL(4, "test%3i : ZDICT_trainFromBuffer_cover : ", testNb++); + { U32 u; for (u=0; u<nbSamples; u++) samplesSizes[u] = sampleUnitSize; } + memset(¶ms, 0, sizeof(params)); + params.d = 1 + (FUZ_rand(&seed) % 16); + params.k = params.d + (FUZ_rand(&seed) % 256); + dictSize = ZDICT_trainFromBuffer_cover(dictBuffer, dictSize, + CNBuffer, samplesSizes, nbSamples, + params); + if (ZDICT_isError(dictSize)) goto _output_error; + DISPLAYLEVEL(4, "OK, created dictionary of size %u \n", (U32)dictSize); + + DISPLAYLEVEL(4, "test%3i : check dictID : ", testNb++); + dictID = ZDICT_getDictID(dictBuffer, dictSize); + if (dictID==0) goto _output_error; + DISPLAYLEVEL(4, "OK : %u \n", dictID); + + DISPLAYLEVEL(4, "test%3i : ZDICT_optimizeTrainFromBuffer_cover : ", testNb++); + memset(¶ms, 0, sizeof(params)); + params.steps = 4; + optDictSize = ZDICT_optimizeTrainFromBuffer_cover(dictBuffer, optDictSize, + CNBuffer, samplesSizes, + nbSamples / 4, ¶ms); + if (ZDICT_isError(optDictSize)) goto _output_error; + DISPLAYLEVEL(4, "OK, created dictionary of size %u \n", (U32)optDictSize); + + DISPLAYLEVEL(4, "test%3i : check dictID : ", testNb++); + dictID = ZDICT_getDictID(dictBuffer, optDictSize); + if (dictID==0) goto _output_error; + DISPLAYLEVEL(4, "OK : %u \n", dictID); + + ZSTD_freeCCtx(cctx); + free(dictBuffer); + free(samplesSizes); + } + + /* Decompression defense tests */ + DISPLAYLEVEL(4, "test%3i : Check input length for magic number : ", testNb++); + { size_t const r = ZSTD_decompress(decodedBuffer, CNBuffSize, CNBuffer, 3); /* too small input */ + if (!ZSTD_isError(r)) goto _output_error; + if (ZSTD_getErrorCode(r) != ZSTD_error_srcSize_wrong) goto _output_error; } + DISPLAYLEVEL(4, "OK \n"); + + DISPLAYLEVEL(4, "test%3i : Check magic Number : ", testNb++); + ((char*)(CNBuffer))[0] = 1; + { size_t const r = ZSTD_decompress(decodedBuffer, CNBuffSize, CNBuffer, 4); + if (!ZSTD_isError(r)) goto _output_error; } + DISPLAYLEVEL(4, "OK \n"); + + /* content size verification test */ + DISPLAYLEVEL(4, "test%3i : Content size verification : ", testNb++); + { ZSTD_CCtx* const cctx = ZSTD_createCCtx(); + size_t const srcSize = 5000; + size_t const wrongSrcSize = (srcSize + 1000); + ZSTD_parameters params = ZSTD_getParams(1, wrongSrcSize, 0); + params.fParams.contentSizeFlag = 1; + CHECK( ZSTD_compressBegin_advanced(cctx, NULL, 0, params, wrongSrcSize) ); + { size_t const result = ZSTD_compressEnd(cctx, decodedBuffer, CNBuffSize, CNBuffer, srcSize); + if (!ZSTD_isError(result)) goto _output_error; + if (ZSTD_getErrorCode(result) != ZSTD_error_srcSize_wrong) goto _output_error; + DISPLAYLEVEL(4, "OK : %s \n", ZSTD_getErrorName(result)); + } + ZSTD_freeCCtx(cctx); + } + + /* custom formats tests */ + { ZSTD_CCtx* const cctx = ZSTD_createCCtx(); + size_t const inputSize = CNBuffSize / 2; /* won't cause pb with small dict size */ + + /* basic block compression */ + DISPLAYLEVEL(4, "test%3i : magic-less format test : ", testNb++); + CHECK( ZSTD_CCtx_setParameter(cctx, ZSTD_p_format, ZSTD_f_zstd1_magicless) ); + { ZSTD_inBuffer in = { CNBuffer, inputSize, 0 }; + ZSTD_outBuffer out = { compressedBuffer, ZSTD_compressBound(inputSize), 0 }; + size_t const result = ZSTD_compress_generic(cctx, &out, &in, ZSTD_e_end); + if (result != 0) goto _output_error; + if (in.pos != in.size) goto _output_error; + cSize = out.pos; + } + DISPLAYLEVEL(4, "OK (compress : %u -> %u bytes)\n", (U32)inputSize, (U32)cSize); + + DISPLAYLEVEL(4, "test%3i : decompress normally (should fail) : ", testNb++); + { size_t const decodeResult = ZSTD_decompressDCtx(dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize); + if (ZSTD_getErrorCode(decodeResult) != ZSTD_error_prefix_unknown) goto _output_error; + DISPLAYLEVEL(4, "OK : %s \n", ZSTD_getErrorName(decodeResult)); + } + + DISPLAYLEVEL(4, "test%3i : decompress with magic-less instruction : ", testNb++); + ZSTD_DCtx_reset(dctx); + CHECK( ZSTD_DCtx_setFormat(dctx, ZSTD_f_zstd1_magicless) ); + { ZSTD_inBuffer in = { compressedBuffer, cSize, 0 }; + ZSTD_outBuffer out = { decodedBuffer, CNBuffSize, 0 }; + size_t const result = ZSTD_decompress_generic(dctx, &out, &in); + if (result != 0) goto _output_error; + if (in.pos != in.size) goto _output_error; + if (out.pos != inputSize) goto _output_error; + DISPLAYLEVEL(4, "OK : regenerated %u bytes \n", (U32)out.pos); + } + + ZSTD_freeCCtx(cctx); + } + + /* block API tests */ + { ZSTD_CCtx* const cctx = ZSTD_createCCtx(); + static const size_t dictSize = 65 KB; + static const size_t blockSize = 100 KB; /* won't cause pb with small dict size */ + size_t cSize2; + + /* basic block compression */ + DISPLAYLEVEL(4, "test%3i : Block compression test : ", testNb++); + CHECK( ZSTD_compressBegin(cctx, 5) ); + CHECK( ZSTD_getBlockSize(cctx) >= blockSize); + cSize = ZSTD_compressBlock(cctx, compressedBuffer, ZSTD_compressBound(blockSize), CNBuffer, blockSize); + if (ZSTD_isError(cSize)) goto _output_error; + DISPLAYLEVEL(4, "OK \n"); + + DISPLAYLEVEL(4, "test%3i : Block decompression test : ", testNb++); + CHECK( ZSTD_decompressBegin(dctx) ); + { CHECK_V(r, ZSTD_decompressBlock(dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize) ); + if (r != blockSize) goto _output_error; } + DISPLAYLEVEL(4, "OK \n"); + + /* dictionary block compression */ + DISPLAYLEVEL(4, "test%3i : Dictionary Block compression test : ", testNb++); + CHECK( ZSTD_compressBegin_usingDict(cctx, CNBuffer, dictSize, 5) ); + cSize = ZSTD_compressBlock(cctx, compressedBuffer, ZSTD_compressBound(blockSize), (char*)CNBuffer+dictSize, blockSize); + if (ZSTD_isError(cSize)) goto _output_error; + cSize2 = ZSTD_compressBlock(cctx, (char*)compressedBuffer+cSize, ZSTD_compressBound(blockSize), (char*)CNBuffer+dictSize+blockSize, blockSize); + if (ZSTD_isError(cSize2)) goto _output_error; + memcpy((char*)compressedBuffer+cSize, (char*)CNBuffer+dictSize+blockSize, blockSize); /* fake non-compressed block */ + cSize2 = ZSTD_compressBlock(cctx, (char*)compressedBuffer+cSize+blockSize, ZSTD_compressBound(blockSize), + (char*)CNBuffer+dictSize+2*blockSize, blockSize); + if (ZSTD_isError(cSize2)) goto _output_error; + DISPLAYLEVEL(4, "OK \n"); + + DISPLAYLEVEL(4, "test%3i : Dictionary Block decompression test : ", testNb++); + CHECK( ZSTD_decompressBegin_usingDict(dctx, CNBuffer, dictSize) ); + { CHECK_V( r, ZSTD_decompressBlock(dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize) ); + if (r != blockSize) goto _output_error; } + ZSTD_insertBlock(dctx, (char*)decodedBuffer+blockSize, blockSize); /* insert non-compressed block into dctx history */ + { CHECK_V( r, ZSTD_decompressBlock(dctx, (char*)decodedBuffer+2*blockSize, CNBuffSize, (char*)compressedBuffer+cSize+blockSize, cSize2) ); + if (r != blockSize) goto _output_error; } + DISPLAYLEVEL(4, "OK \n"); + + ZSTD_freeCCtx(cctx); + } + ZSTD_freeDCtx(dctx); + + /* long rle test */ + { size_t sampleSize = 0; + DISPLAYLEVEL(4, "test%3i : Long RLE test : ", testNb++); + RDG_genBuffer(CNBuffer, sampleSize, compressibility, 0., seed+1); + memset((char*)CNBuffer+sampleSize, 'B', 256 KB - 1); + sampleSize += 256 KB - 1; + RDG_genBuffer((char*)CNBuffer+sampleSize, 96 KB, compressibility, 0., seed+2); + sampleSize += 96 KB; + cSize = ZSTD_compress(compressedBuffer, ZSTD_compressBound(sampleSize), CNBuffer, sampleSize, 1); + if (ZSTD_isError(cSize)) goto _output_error; + { CHECK_V(regenSize, ZSTD_decompress(decodedBuffer, sampleSize, compressedBuffer, cSize)); + if (regenSize!=sampleSize) goto _output_error; } + DISPLAYLEVEL(4, "OK \n"); + } + + /* All zeroes test (test bug #137) */ + #define ZEROESLENGTH 100 + DISPLAYLEVEL(4, "test%3i : compress %u zeroes : ", testNb++, ZEROESLENGTH); + memset(CNBuffer, 0, ZEROESLENGTH); + { CHECK_V(r, ZSTD_compress(compressedBuffer, ZSTD_compressBound(ZEROESLENGTH), CNBuffer, ZEROESLENGTH, 1) ); + cSize = r; } + DISPLAYLEVEL(4, "OK (%u bytes : %.2f%%)\n", (U32)cSize, (double)cSize/ZEROESLENGTH*100); + + DISPLAYLEVEL(4, "test%3i : decompress %u zeroes : ", testNb++, ZEROESLENGTH); + { CHECK_V(r, ZSTD_decompress(decodedBuffer, ZEROESLENGTH, compressedBuffer, cSize) ); + if (r != ZEROESLENGTH) goto _output_error; } + DISPLAYLEVEL(4, "OK \n"); + + /* nbSeq limit test */ + #define _3BYTESTESTLENGTH 131000 + #define NB3BYTESSEQLOG 9 + #define NB3BYTESSEQ (1 << NB3BYTESSEQLOG) + #define NB3BYTESSEQMASK (NB3BYTESSEQ-1) + /* creates a buffer full of 3-bytes sequences */ + { BYTE _3BytesSeqs[NB3BYTESSEQ][3]; + U32 rSeed = 1; + + /* create batch of 3-bytes sequences */ + { int i; + for (i=0; i < NB3BYTESSEQ; i++) { + _3BytesSeqs[i][0] = (BYTE)(FUZ_rand(&rSeed) & 255); + _3BytesSeqs[i][1] = (BYTE)(FUZ_rand(&rSeed) & 255); + _3BytesSeqs[i][2] = (BYTE)(FUZ_rand(&rSeed) & 255); + } } + + /* randomly fills CNBuffer with prepared 3-bytes sequences */ + { int i; + for (i=0; i < _3BYTESTESTLENGTH; i += 3) { /* note : CNBuffer size > _3BYTESTESTLENGTH+3 */ + U32 const id = FUZ_rand(&rSeed) & NB3BYTESSEQMASK; + ((BYTE*)CNBuffer)[i+0] = _3BytesSeqs[id][0]; + ((BYTE*)CNBuffer)[i+1] = _3BytesSeqs[id][1]; + ((BYTE*)CNBuffer)[i+2] = _3BytesSeqs[id][2]; + } } } + DISPLAYLEVEL(4, "test%3i : compress lots 3-bytes sequences : ", testNb++); + { CHECK_V(r, ZSTD_compress(compressedBuffer, ZSTD_compressBound(_3BYTESTESTLENGTH), + CNBuffer, _3BYTESTESTLENGTH, 19) ); + cSize = r; } + DISPLAYLEVEL(4, "OK (%u bytes : %.2f%%)\n", (U32)cSize, (double)cSize/_3BYTESTESTLENGTH*100); + + DISPLAYLEVEL(4, "test%3i : decompress lots 3-bytes sequence : ", testNb++); + { CHECK_V(r, ZSTD_decompress(decodedBuffer, _3BYTESTESTLENGTH, compressedBuffer, cSize) ); + if (r != _3BYTESTESTLENGTH) goto _output_error; } + DISPLAYLEVEL(4, "OK \n"); + + DISPLAYLEVEL(4, "test%3i : incompressible data and ill suited dictionary : ", testNb++); + RDG_genBuffer(CNBuffer, CNBuffSize, 0.0, 0.1, seed); + { /* Train a dictionary on low characters */ + size_t dictSize = 16 KB; + void* const dictBuffer = malloc(dictSize); + size_t const totalSampleSize = 1 MB; + size_t const sampleUnitSize = 8 KB; + U32 const nbSamples = (U32)(totalSampleSize / sampleUnitSize); + size_t* const samplesSizes = (size_t*) malloc(nbSamples * sizeof(size_t)); + if (!dictBuffer || !samplesSizes) goto _output_error; + { U32 u; for (u=0; u<nbSamples; u++) samplesSizes[u] = sampleUnitSize; } + dictSize = ZDICT_trainFromBuffer(dictBuffer, dictSize, CNBuffer, samplesSizes, nbSamples); + if (ZDICT_isError(dictSize)) goto _output_error; + /* Reverse the characters to make the dictionary ill suited */ + { U32 u; + for (u = 0; u < CNBuffSize; ++u) { + ((BYTE*)CNBuffer)[u] = 255 - ((BYTE*)CNBuffer)[u]; + } + } + { /* Compress the data */ + size_t const inputSize = 500; + size_t const outputSize = ZSTD_compressBound(inputSize); + void* const outputBuffer = malloc(outputSize); + ZSTD_CCtx* const cctx = ZSTD_createCCtx(); + if (!outputBuffer || !cctx) goto _output_error; + CHECK(ZSTD_compress_usingDict(cctx, outputBuffer, outputSize, CNBuffer, inputSize, dictBuffer, dictSize, 1)); + free(outputBuffer); + ZSTD_freeCCtx(cctx); + } + + free(dictBuffer); + free(samplesSizes); + } + DISPLAYLEVEL(4, "OK \n"); + + + /* findFrameCompressedSize on skippable frames */ + DISPLAYLEVEL(4, "test%3i : frame compressed size of skippable frame : ", testNb++); + { const char* frame = "\x50\x2a\x4d\x18\x05\x0\x0\0abcde"; + size_t const frameSrcSize = 13; + if (ZSTD_findFrameCompressedSize(frame, frameSrcSize) != frameSrcSize) goto _output_error; } + DISPLAYLEVEL(4, "OK \n"); + + /* error string tests */ + DISPLAYLEVEL(4, "test%3i : testing ZSTD error code strings : ", testNb++); + if (strcmp("No error detected", ZSTD_getErrorName((ZSTD_ErrorCode)(0-ZSTD_error_no_error))) != 0) goto _output_error; + if (strcmp("No error detected", ZSTD_getErrorString(ZSTD_error_no_error)) != 0) goto _output_error; + if (strcmp("Unspecified error code", ZSTD_getErrorString((ZSTD_ErrorCode)(0-ZSTD_error_GENERIC))) != 0) goto _output_error; + if (strcmp("Error (generic)", ZSTD_getErrorName((size_t)0-ZSTD_error_GENERIC)) != 0) goto _output_error; + if (strcmp("Error (generic)", ZSTD_getErrorString(ZSTD_error_GENERIC)) != 0) goto _output_error; + if (strcmp("No error detected", ZSTD_getErrorName(ZSTD_error_GENERIC)) != 0) goto _output_error; + DISPLAYLEVEL(4, "OK \n"); + +_end: + free(CNBuffer); + free(compressedBuffer); + free(decodedBuffer); + return testResult; + +_output_error: + testResult = 1; + DISPLAY("Error detected in Unit tests ! \n"); + goto _end; +} + + +static size_t findDiff(const void* buf1, const void* buf2, size_t max) +{ + const BYTE* b1 = (const BYTE*)buf1; + const BYTE* b2 = (const BYTE*)buf2; + size_t u; + for (u=0; u<max; u++) { + if (b1[u] != b2[u]) break; + } + return u; +} + + +static ZSTD_parameters FUZ_makeParams(ZSTD_compressionParameters cParams, ZSTD_frameParameters fParams) +{ + ZSTD_parameters params; + params.cParams = cParams; + params.fParams = fParams; + return params; +} + +static size_t FUZ_rLogLength(U32* seed, U32 logLength) +{ + size_t const lengthMask = ((size_t)1 << logLength) - 1; + return (lengthMask+1) + (FUZ_rand(seed) & lengthMask); +} + +static size_t FUZ_randomLength(U32* seed, U32 maxLog) +{ + U32 const logLength = FUZ_rand(seed) % maxLog; + return FUZ_rLogLength(seed, logLength); +} + +#undef CHECK +#define CHECK(cond, ...) { \ + if (cond) { \ + DISPLAY("Error => "); \ + DISPLAY(__VA_ARGS__); \ + DISPLAY(" (seed %u, test nb %u) \n", seed, testNb); \ + goto _output_error; \ +} } + +#undef CHECK_Z +#define CHECK_Z(f) { \ + size_t const err = f; \ + if (ZSTD_isError(err)) { \ + DISPLAY("Error => %s : %s ", \ + #f, ZSTD_getErrorName(err)); \ + DISPLAY(" (seed %u, test nb %u) \n", seed, testNb); \ + goto _output_error; \ +} } + + +static int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, U32 const maxDurationS, double compressibility, int bigTests) +{ + static const U32 maxSrcLog = 23; + static const U32 maxSampleLog = 22; + size_t const srcBufferSize = (size_t)1<<maxSrcLog; + size_t const dstBufferSize = (size_t)1<<maxSampleLog; + size_t const cBufferSize = ZSTD_compressBound(dstBufferSize); + BYTE* cNoiseBuffer[5]; + BYTE* srcBuffer; /* jumping pointer */ + BYTE* const cBuffer = (BYTE*) malloc (cBufferSize); + BYTE* const dstBuffer = (BYTE*) malloc (dstBufferSize); + BYTE* const mirrorBuffer = (BYTE*) malloc (dstBufferSize); + ZSTD_CCtx* const refCtx = ZSTD_createCCtx(); + ZSTD_CCtx* const ctx = ZSTD_createCCtx(); + ZSTD_DCtx* const dctx = ZSTD_createDCtx(); + U32 result = 0; + U32 testNb = 0; + U32 coreSeed = seed, lseed = 0; + clock_t const startClock = clock(); + clock_t const maxClockSpan = maxDurationS * CLOCKS_PER_SEC; + int const cLevelLimiter = bigTests ? 3 : 2; + + /* allocation */ + cNoiseBuffer[0] = (BYTE*)malloc (srcBufferSize); + cNoiseBuffer[1] = (BYTE*)malloc (srcBufferSize); + cNoiseBuffer[2] = (BYTE*)malloc (srcBufferSize); + cNoiseBuffer[3] = (BYTE*)malloc (srcBufferSize); + cNoiseBuffer[4] = (BYTE*)malloc (srcBufferSize); + CHECK (!cNoiseBuffer[0] || !cNoiseBuffer[1] || !cNoiseBuffer[2] || !cNoiseBuffer[3] || !cNoiseBuffer[4] + || !dstBuffer || !mirrorBuffer || !cBuffer || !refCtx || !ctx || !dctx, + "Not enough memory, fuzzer tests cancelled"); + + /* Create initial samples */ + RDG_genBuffer(cNoiseBuffer[0], srcBufferSize, 0.00, 0., coreSeed); /* pure noise */ + RDG_genBuffer(cNoiseBuffer[1], srcBufferSize, 0.05, 0., coreSeed); /* barely compressible */ + RDG_genBuffer(cNoiseBuffer[2], srcBufferSize, compressibility, 0., coreSeed); + RDG_genBuffer(cNoiseBuffer[3], srcBufferSize, 0.95, 0., coreSeed); /* highly compressible */ + RDG_genBuffer(cNoiseBuffer[4], srcBufferSize, 1.00, 0., coreSeed); /* sparse content */ + srcBuffer = cNoiseBuffer[2]; + + /* catch up testNb */ + for (testNb=1; testNb < startTest; testNb++) FUZ_rand(&coreSeed); + + /* main test loop */ + for ( ; (testNb <= nbTests) || (FUZ_clockSpan(startClock) < maxClockSpan); testNb++ ) { + size_t sampleSize, maxTestSize, totalTestSize; + size_t cSize, totalCSize, totalGenSize; + U64 crcOrig; + BYTE* sampleBuffer; + const BYTE* dict; + size_t dictSize; + + /* notification */ + if (nbTests >= testNb) { DISPLAYUPDATE(2, "\r%6u/%6u ", testNb, nbTests); } + else { DISPLAYUPDATE(2, "\r%6u ", testNb); } + + FUZ_rand(&coreSeed); + { U32 const prime1 = 2654435761U; lseed = coreSeed ^ prime1; } + + /* srcBuffer selection [0-4] */ + { U32 buffNb = FUZ_rand(&lseed) & 0x7F; + if (buffNb & 7) buffNb=2; /* most common : compressible (P) */ + else { + buffNb >>= 3; + if (buffNb & 7) { + const U32 tnb[2] = { 1, 3 }; /* barely/highly compressible */ + buffNb = tnb[buffNb >> 3]; + } else { + const U32 tnb[2] = { 0, 4 }; /* not compressible / sparse */ + buffNb = tnb[buffNb >> 3]; + } } + srcBuffer = cNoiseBuffer[buffNb]; + } + + /* select src segment */ + sampleSize = FUZ_randomLength(&lseed, maxSampleLog); + + /* create sample buffer (to catch read error with valgrind & sanitizers) */ + sampleBuffer = (BYTE*)malloc(sampleSize); + CHECK(sampleBuffer==NULL, "not enough memory for sample buffer"); + { size_t const sampleStart = FUZ_rand(&lseed) % (srcBufferSize - sampleSize); + memcpy(sampleBuffer, srcBuffer + sampleStart, sampleSize); } + crcOrig = XXH64(sampleBuffer, sampleSize, 0); + + /* compression tests */ + { unsigned const cLevel = + ( FUZ_rand(&lseed) % + (ZSTD_maxCLevel() - (FUZ_highbit32((U32)sampleSize) / cLevelLimiter)) ) + + 1; + cSize = ZSTD_compressCCtx(ctx, cBuffer, cBufferSize, sampleBuffer, sampleSize, cLevel); + CHECK(ZSTD_isError(cSize), "ZSTD_compressCCtx failed : %s", ZSTD_getErrorName(cSize)); + + /* compression failure test : too small dest buffer */ + if (cSize > 3) { + const size_t missing = (FUZ_rand(&lseed) % (cSize-2)) + 1; /* no problem, as cSize > 4 (frameHeaderSizer) */ + const size_t tooSmallSize = cSize - missing; + const U32 endMark = 0x4DC2B1A9; + memcpy(dstBuffer+tooSmallSize, &endMark, 4); + { size_t const errorCode = ZSTD_compressCCtx(ctx, dstBuffer, tooSmallSize, sampleBuffer, sampleSize, cLevel); + CHECK(!ZSTD_isError(errorCode), "ZSTD_compressCCtx should have failed ! (buffer too small : %u < %u)", (U32)tooSmallSize, (U32)cSize); } + { U32 endCheck; memcpy(&endCheck, dstBuffer+tooSmallSize, 4); + CHECK(endCheck != endMark, "ZSTD_compressCCtx : dst buffer overflow"); } + } } + + /* frame header decompression test */ + { ZSTD_frameHeader zfh; + CHECK_Z( ZSTD_getFrameHeader(&zfh, cBuffer, cSize) ); + CHECK(zfh.frameContentSize != sampleSize, "Frame content size incorrect"); + } + + /* Decompressed size test */ + { unsigned long long const rSize = ZSTD_findDecompressedSize(cBuffer, cSize); + CHECK(rSize != sampleSize, "decompressed size incorrect"); + } + + /* successful decompression test */ + { size_t const margin = (FUZ_rand(&lseed) & 1) ? 0 : (FUZ_rand(&lseed) & 31) + 1; + size_t const dSize = ZSTD_decompress(dstBuffer, sampleSize + margin, cBuffer, cSize); + CHECK(dSize != sampleSize, "ZSTD_decompress failed (%s) (srcSize : %u ; cSize : %u)", ZSTD_getErrorName(dSize), (U32)sampleSize, (U32)cSize); + { U64 const crcDest = XXH64(dstBuffer, sampleSize, 0); + CHECK(crcOrig != crcDest, "decompression result corrupted (pos %u / %u)", (U32)findDiff(sampleBuffer, dstBuffer, sampleSize), (U32)sampleSize); + } } + + free(sampleBuffer); /* no longer useful after this point */ + + /* truncated src decompression test */ + { size_t const missing = (FUZ_rand(&lseed) % (cSize-2)) + 1; /* no problem, as cSize > 4 (frameHeaderSizer) */ + size_t const tooSmallSize = cSize - missing; + void* cBufferTooSmall = malloc(tooSmallSize); /* valgrind will catch read overflows */ + CHECK(cBufferTooSmall == NULL, "not enough memory !"); + memcpy(cBufferTooSmall, cBuffer, tooSmallSize); + { size_t const errorCode = ZSTD_decompress(dstBuffer, dstBufferSize, cBufferTooSmall, tooSmallSize); + CHECK(!ZSTD_isError(errorCode), "ZSTD_decompress should have failed ! (truncated src buffer)"); } + free(cBufferTooSmall); + } + + /* too small dst decompression test */ + if (sampleSize > 3) { + size_t const missing = (FUZ_rand(&lseed) % (sampleSize-2)) + 1; /* no problem, as cSize > 4 (frameHeaderSizer) */ + size_t const tooSmallSize = sampleSize - missing; + static const BYTE token = 0xA9; + dstBuffer[tooSmallSize] = token; + { size_t const errorCode = ZSTD_decompress(dstBuffer, tooSmallSize, cBuffer, cSize); + CHECK(!ZSTD_isError(errorCode), "ZSTD_decompress should have failed : %u > %u (dst buffer too small)", (U32)errorCode, (U32)tooSmallSize); } + CHECK(dstBuffer[tooSmallSize] != token, "ZSTD_decompress : dst buffer overflow"); + } + + /* noisy src decompression test */ + if (cSize > 6) { + /* insert noise into src */ + { U32 const maxNbBits = FUZ_highbit32((U32)(cSize-4)); + size_t pos = 4; /* preserve magic number (too easy to detect) */ + for (;;) { + /* keep some original src */ + { U32 const nbBits = FUZ_rand(&lseed) % maxNbBits; + size_t const mask = (1<<nbBits) - 1; + size_t const skipLength = FUZ_rand(&lseed) & mask; + pos += skipLength; + } + if (pos <= cSize) break; + /* add noise */ + { U32 const nbBitsCodes = FUZ_rand(&lseed) % maxNbBits; + U32 const nbBits = nbBitsCodes ? nbBitsCodes-1 : 0; + size_t const mask = (1<<nbBits) - 1; + size_t const rNoiseLength = (FUZ_rand(&lseed) & mask) + 1; + size_t const noiseLength = MIN(rNoiseLength, cSize-pos); + size_t const noiseStart = FUZ_rand(&lseed) % (srcBufferSize - noiseLength); + memcpy(cBuffer + pos, srcBuffer + noiseStart, noiseLength); + pos += noiseLength; + } } } + + /* decompress noisy source */ + { U32 const endMark = 0xA9B1C3D6; + memcpy(dstBuffer+sampleSize, &endMark, 4); + { size_t const decompressResult = ZSTD_decompress(dstBuffer, sampleSize, cBuffer, cSize); + /* result *may* be an unlikely success, but even then, it must strictly respect dst buffer boundaries */ + CHECK((!ZSTD_isError(decompressResult)) && (decompressResult>sampleSize), + "ZSTD_decompress on noisy src : result is too large : %u > %u (dst buffer)", (U32)decompressResult, (U32)sampleSize); + } + { U32 endCheck; memcpy(&endCheck, dstBuffer+sampleSize, 4); + CHECK(endMark!=endCheck, "ZSTD_decompress on noisy src : dst buffer overflow"); + } } } /* noisy src decompression test */ + + /*===== Streaming compression test, scattered segments and dictionary =====*/ + + { U32 const testLog = FUZ_rand(&lseed) % maxSrcLog; + U32 const dictLog = FUZ_rand(&lseed) % maxSrcLog; + int const cLevel = (FUZ_rand(&lseed) % + (ZSTD_maxCLevel() - + (MAX(testLog, dictLog) / cLevelLimiter))) + + 1; + maxTestSize = FUZ_rLogLength(&lseed, testLog); + if (maxTestSize >= dstBufferSize) maxTestSize = dstBufferSize-1; + + dictSize = FUZ_rLogLength(&lseed, dictLog); /* needed also for decompression */ + dict = srcBuffer + (FUZ_rand(&lseed) % (srcBufferSize - dictSize)); + + if (FUZ_rand(&lseed) & 0xF) { + CHECK_Z ( ZSTD_compressBegin_usingDict(refCtx, dict, dictSize, cLevel) ); + } else { + ZSTD_compressionParameters const cPar = ZSTD_getCParams(cLevel, 0, dictSize); + ZSTD_frameParameters const fPar = { FUZ_rand(&lseed)&1 /* contentSizeFlag */, + !(FUZ_rand(&lseed)&3) /* contentChecksumFlag*/, + 0 /*NodictID*/ }; /* note : since dictionary is fake, dictIDflag has no impact */ + ZSTD_parameters const p = FUZ_makeParams(cPar, fPar); + CHECK_Z ( ZSTD_compressBegin_advanced(refCtx, dict, dictSize, p, 0) ); + } + CHECK_Z( ZSTD_copyCCtx(ctx, refCtx, 0) ); + } + + { U32 const nbChunks = (FUZ_rand(&lseed) & 127) + 2; + U32 n; + XXH64_state_t xxhState; + XXH64_reset(&xxhState, 0); + for (totalTestSize=0, cSize=0, n=0 ; n<nbChunks ; n++) { + size_t const segmentSize = FUZ_randomLength(&lseed, maxSampleLog); + size_t const segmentStart = FUZ_rand(&lseed) % (srcBufferSize - segmentSize); + + if (cBufferSize-cSize < ZSTD_compressBound(segmentSize)) break; /* avoid invalid dstBufferTooSmall */ + if (totalTestSize+segmentSize > maxTestSize) break; + + { size_t const compressResult = ZSTD_compressContinue(ctx, cBuffer+cSize, cBufferSize-cSize, srcBuffer+segmentStart, segmentSize); + CHECK (ZSTD_isError(compressResult), "multi-segments compression error : %s", ZSTD_getErrorName(compressResult)); + cSize += compressResult; + } + XXH64_update(&xxhState, srcBuffer+segmentStart, segmentSize); + memcpy(mirrorBuffer + totalTestSize, srcBuffer+segmentStart, segmentSize); + totalTestSize += segmentSize; + } + + { size_t const flushResult = ZSTD_compressEnd(ctx, cBuffer+cSize, cBufferSize-cSize, NULL, 0); + CHECK (ZSTD_isError(flushResult), "multi-segments epilogue error : %s", ZSTD_getErrorName(flushResult)); + cSize += flushResult; + } + crcOrig = XXH64_digest(&xxhState); + } + + /* streaming decompression test */ + /* ensure memory requirement is good enough (should always be true) */ + { ZSTD_frameHeader zfh; + CHECK( ZSTD_getFrameHeader(&zfh, cBuffer, ZSTD_frameHeaderSize_max), + "ZSTD_getFrameHeader(): error retrieving frame information"); + { size_t const roundBuffSize = ZSTD_decodingBufferSize_min(zfh.windowSize, zfh.frameContentSize); + CHECK_Z(roundBuffSize); + CHECK((roundBuffSize > totalTestSize) && (zfh.frameContentSize!=ZSTD_CONTENTSIZE_UNKNOWN), + "ZSTD_decodingBufferSize_min() requires more memory (%u) than necessary (%u)", + (U32)roundBuffSize, (U32)totalTestSize ); + } } + if (dictSize<8) dictSize=0, dict=NULL; /* disable dictionary */ + CHECK_Z( ZSTD_decompressBegin_usingDict(dctx, dict, dictSize) ); + totalCSize = 0; + totalGenSize = 0; + while (totalCSize < cSize) { + size_t const inSize = ZSTD_nextSrcSizeToDecompress(dctx); + size_t const genSize = ZSTD_decompressContinue(dctx, dstBuffer+totalGenSize, dstBufferSize-totalGenSize, cBuffer+totalCSize, inSize); + CHECK (ZSTD_isError(genSize), "ZSTD_decompressContinue error : %s", ZSTD_getErrorName(genSize)); + totalGenSize += genSize; + totalCSize += inSize; + } + CHECK (ZSTD_nextSrcSizeToDecompress(dctx) != 0, "frame not fully decoded"); + CHECK (totalGenSize != totalTestSize, "streaming decompressed data : wrong size") + CHECK (totalCSize != cSize, "compressed data should be fully read") + { U64 const crcDest = XXH64(dstBuffer, totalTestSize, 0); + if (crcDest!=crcOrig) { + size_t const errorPos = findDiff(mirrorBuffer, dstBuffer, totalTestSize); + CHECK (1, "streaming decompressed data corrupted : byte %u / %u (%02X!=%02X)", + (U32)errorPos, (U32)totalTestSize, dstBuffer[errorPos], mirrorBuffer[errorPos]); + } } + } /* for ( ; (testNb <= nbTests) */ + DISPLAY("\r%u fuzzer tests completed \n", testNb-1); + +_cleanup: + ZSTD_freeCCtx(refCtx); + ZSTD_freeCCtx(ctx); + ZSTD_freeDCtx(dctx); + free(cNoiseBuffer[0]); + free(cNoiseBuffer[1]); + free(cNoiseBuffer[2]); + free(cNoiseBuffer[3]); + free(cNoiseBuffer[4]); + free(cBuffer); + free(dstBuffer); + free(mirrorBuffer); + return result; + +_output_error: + result = 1; + goto _cleanup; +} + + +/*_******************************************************* +* Command line +*********************************************************/ +static int FUZ_usage(const char* programName) +{ + DISPLAY( "Usage :\n"); + DISPLAY( " %s [args]\n", programName); + DISPLAY( "\n"); + DISPLAY( "Arguments :\n"); + DISPLAY( " -i# : Nb of tests (default:%u) \n", nbTestsDefault); + DISPLAY( " -s# : Select seed (default:prompt user)\n"); + DISPLAY( " -t# : Select starting test number (default:0)\n"); + DISPLAY( " -P# : Select compressibility in %% (default:%u%%)\n", FUZ_compressibility_default); + DISPLAY( " -v : verbose\n"); + DISPLAY( " -p : pause at the end\n"); + DISPLAY( " -h : display help and exit\n"); + return 0; +} + +/*! readU32FromChar() : + @return : unsigned integer value read from input in `char` format + allows and interprets K, KB, KiB, M, MB and MiB suffix. + Will also modify `*stringPtr`, advancing it to position where it stopped reading. + Note : function result can overflow if digit string > MAX_UINT */ +static unsigned readU32FromChar(const char** stringPtr) +{ + unsigned result = 0; + while ((**stringPtr >='0') && (**stringPtr <='9')) + result *= 10, result += **stringPtr - '0', (*stringPtr)++ ; + if ((**stringPtr=='K') || (**stringPtr=='M')) { + result <<= 10; + if (**stringPtr=='M') result <<= 10; + (*stringPtr)++ ; + if (**stringPtr=='i') (*stringPtr)++; + if (**stringPtr=='B') (*stringPtr)++; + } + return result; +} + +/** longCommandWArg() : + * check if *stringPtr is the same as longCommand. + * If yes, @return 1 and advances *stringPtr to the position which immediately follows longCommand. + * @return 0 and doesn't modify *stringPtr otherwise. + */ +static unsigned longCommandWArg(const char** stringPtr, const char* longCommand) +{ + size_t const comSize = strlen(longCommand); + int const result = !strncmp(*stringPtr, longCommand, comSize); + if (result) *stringPtr += comSize; + return result; +} + +int main(int argc, const char** argv) +{ + U32 seed = 0; + int seedset = 0; + int argNb; + int nbTests = nbTestsDefault; + int testNb = 0; + U32 proba = FUZ_compressibility_default; + int result = 0; + U32 mainPause = 0; + U32 maxDuration = 0; + int bigTests = 1; + U32 memTestsOnly = 0; + const char* const programName = argv[0]; + + /* Check command line */ + for (argNb=1; argNb<argc; argNb++) { + const char* argument = argv[argNb]; + if(!argument) continue; /* Protection if argument empty */ + + /* Handle commands. Aggregated commands are allowed */ + if (argument[0]=='-') { + + if (longCommandWArg(&argument, "--memtest=")) { memTestsOnly = readU32FromChar(&argument); continue; } + + if (!strcmp(argument, "--memtest")) { memTestsOnly=1; continue; } + if (!strcmp(argument, "--no-big-tests")) { bigTests=0; continue; } + + argument++; + while (*argument!=0) { + switch(*argument) + { + case 'h': + return FUZ_usage(programName); + + case 'v': + argument++; + g_displayLevel = 4; + break; + + case 'q': + argument++; + g_displayLevel--; + break; + + case 'p': /* pause at the end */ + argument++; + mainPause = 1; + break; + + case 'i': + argument++; maxDuration = 0; + nbTests = readU32FromChar(&argument); + break; + + case 'T': + argument++; + nbTests = 0; + maxDuration = readU32FromChar(&argument); + if (*argument=='s') argument++; /* seconds */ + if (*argument=='m') maxDuration *= 60, argument++; /* minutes */ + if (*argument=='n') argument++; + break; + + case 's': + argument++; + seedset = 1; + seed = readU32FromChar(&argument); + break; + + case 't': + argument++; + testNb = readU32FromChar(&argument); + break; + + case 'P': /* compressibility % */ + argument++; + proba = readU32FromChar(&argument); + if (proba>100) proba = 100; + break; + + default: + return (FUZ_usage(programName), 1); + } } } } /* for (argNb=1; argNb<argc; argNb++) */ + + /* Get Seed */ + DISPLAY("Starting zstd tester (%i-bits, %s)\n", (int)(sizeof(size_t)*8), ZSTD_VERSION_STRING); + + if (!seedset) { + time_t const t = time(NULL); + U32 const h = XXH32(&t, sizeof(t), 1); + seed = h % 10000; + } + + DISPLAY("Seed = %u\n", seed); + if (proba!=FUZ_compressibility_default) DISPLAY("Compressibility : %u%%\n", proba); + + if (memTestsOnly) { + g_displayLevel = MAX(3, g_displayLevel); + return FUZ_mallocTests(seed, ((double)proba) / 100, memTestsOnly); + } + + if (nbTests < testNb) nbTests = testNb; + + if (testNb==0) + result = basicUnitTests(0, ((double)proba) / 100); /* constant seed for predictability */ + if (!result) + result = fuzzerTests(seed, nbTests, testNb, maxDuration, ((double)proba) / 100, bigTests); + if (mainPause) { + int unused; + DISPLAY("Press Enter \n"); + unused = getchar(); + (void)unused; + } + return result; +} diff --git a/src/zstd/tests/gzip/Makefile b/src/zstd/tests/gzip/Makefile new file mode 100644 index 00000000..40a0ba97 --- /dev/null +++ b/src/zstd/tests/gzip/Makefile @@ -0,0 +1,44 @@ +# ################################################################ +# Copyright (c) 2017-present, Facebook, Inc. +# All rights reserved. +# +# This source code is licensed under both the BSD-style license (found in the +# LICENSE file in the root directory of this source tree) and the GPLv2 (found +# in the COPYING file in the root directory of this source tree). +# ################################################################ + +PRGDIR = ../../programs +VOID = /dev/null +export PATH := .:$(PATH) + +.PHONY: all +#all: test-gzip-env +all: test-helin-segv test-hufts test-keep test-list test-memcpy-abuse test-mixed +all: test-null-suffix-clobber test-stdin test-trailing-nul test-unpack-invalid +all: test-zdiff test-zgrep-context test-zgrep-f test-zgrep-signal test-znew-k test-z-suffix + @echo Testing completed + +.PHONY: zstd +zstd: + $(MAKE) -C $(PRGDIR) zstd + ln -sf $(PRGDIR)/zstd gzip + @echo PATH=$(PATH) + gzip --version + +.PHONY: clean +clean: + @$(MAKE) -C $(PRGDIR) $@ > $(VOID) + @$(RM) *.trs *.log + @echo Cleaning completed + + +#------------------------------------------------------------------------------ +# validated only for Linux, OSX, Hurd and some BSD targets +#------------------------------------------------------------------------------ +ifneq (,$(filter $(shell uname),Linux Darwin GNU/kFreeBSD GNU FreeBSD DragonFly NetBSD)) + +test-%: zstd + @./test-driver.sh --test-name $* --log-file $*.log --trs-file $*.trs --expect-failure "no" --color-tests "yes" --enable-hard-errors "yes" ./$*.sh + # || echo ignoring error + +endif diff --git a/src/zstd/tests/gzip/gzip-env.sh b/src/zstd/tests/gzip/gzip-env.sh new file mode 100755 index 00000000..120e52d7 --- /dev/null +++ b/src/zstd/tests/gzip/gzip-env.sh @@ -0,0 +1,46 @@ +#!/bin/sh +# Test the obsolescent GZIP environment variable. + +# Copyright 2015-2016 Free Software Foundation, Inc. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# limit so don't run it by default. + +. "${srcdir=.}/init.sh"; path_prepend_ . + +#echo PATH=$PATH +#gzip --version + +echo a >exp || framework_failure_ +gzip <exp >in || framework_failure_ + +fail=0 +GZIP=-qv gzip -d <in >out 2>err || fail=1 +compare exp out || fail=1 + +for badopt in -- -c --stdout -d --decompress -f --force -h --help -k --keep \ + -l --list -L --license -r --recursive -Sxxx --suffix=xxx '--suffix xxx' \ + -t --test -V --version +do + GZIP=$badopt gzip -d <in >out 2>err && fail=1 +done + +for goodopt in -n --no-name -N --name -q --quiet -v --verbose \ + -1 --fast -2 -3 -4 -5 -6 -7 -8 -9 --best +do + GZIP=$goodopt gzip -d <in >out 2>err || fail=1 + compare exp out || fail=1 +done + +Exit $fail diff --git a/src/zstd/tests/gzip/helin-segv.sh b/src/zstd/tests/gzip/helin-segv.sh new file mode 100644 index 00000000..f182c806 --- /dev/null +++ b/src/zstd/tests/gzip/helin-segv.sh @@ -0,0 +1,31 @@ +#!/bin/sh +# Before gzip-1.4, gzip -d would segfault on some inputs. + +# Copyright (C) 2010-2016 Free Software Foundation, Inc. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# limit so don't run it by default. + +. "${srcdir=.}/init.sh"; path_prepend_ . + +# This test case was provided by Aki Helin. +printf '\037\235\220\0\0\0\304' > helin.gz || framework_failure_ +printf '\0\0' > exp || framework_failure_ + +fail=0 + +gzip -dc helin.gz > out || fail=1 +compare exp out || fail=1 + +Exit $fail diff --git a/src/zstd/tests/gzip/help-version.sh b/src/zstd/tests/gzip/help-version.sh new file mode 100644 index 00000000..ee0c19f7 --- /dev/null +++ b/src/zstd/tests/gzip/help-version.sh @@ -0,0 +1,270 @@ +#! /bin/sh +# Make sure all these programs work properly +# when invoked with --help or --version. + +# Copyright (C) 2000-2016 Free Software Foundation, Inc. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +# Ensure that $SHELL is set to *some* value and exported. +# This is required for dircolors, which would fail e.g., when +# invoked via debuild (which removes SHELL from the environment). +test "x$SHELL" = x && SHELL=/bin/sh +export SHELL + +. "${srcdir=.}/init.sh"; path_prepend_ . + +expected_failure_status_chroot=125 +expected_failure_status_env=125 +expected_failure_status_nice=125 +expected_failure_status_nohup=125 +expected_failure_status_stdbuf=125 +expected_failure_status_su=125 +expected_failure_status_timeout=125 +expected_failure_status_printenv=2 +expected_failure_status_tty=3 +expected_failure_status_sort=2 +expected_failure_status_expr=3 +expected_failure_status_lbracket=2 +expected_failure_status_dir=2 +expected_failure_status_ls=2 +expected_failure_status_vdir=2 + +expected_failure_status_cmp=2 +expected_failure_status_zcmp=2 +expected_failure_status_sdiff=2 +expected_failure_status_diff3=2 +expected_failure_status_diff=2 +expected_failure_status_zdiff=2 +expected_failure_status_zgrep=2 +expected_failure_status_zegrep=2 +expected_failure_status_zfgrep=2 + +expected_failure_status_grep=2 +expected_failure_status_egrep=2 +expected_failure_status_fgrep=2 + +test "$built_programs" \ + || fail_ "built_programs not specified!?!" + +test "$VERSION" \ + || fail_ "set envvar VERSION; it is required for a PATH sanity-check" + +# Extract version from --version output of the first program +for i in $built_programs; do + v=$(env $i --version | sed -n '1s/.* //p;q') + break +done + +# Ensure that it matches $VERSION. +test "x$v" = "x$VERSION" \ + || fail_ "--version-\$VERSION mismatch" + +for lang in C fr da; do + for i in $built_programs; do + + # Skip `test'; it doesn't accept --help or --version. + test $i = test && continue; + + # false fails even when invoked with --help or --version. + if test $i = false; then + env LC_MESSAGES=$lang $i --help >/dev/null && fail=1 + env LC_MESSAGES=$lang $i --version >/dev/null && fail=1 + continue + fi + + args= + + # The just-built install executable is always named `ginstall'. + test $i = install && i=ginstall + + # Make sure they exit successfully, under normal conditions. + eval "env \$i $args --help > h-\$i " || fail=1 + eval "env \$i $args --version >/dev/null" || fail=1 + + # Make sure they mention the bug-reporting address in --help output. + grep "$PACKAGE_BUGREPORT" h-$i > /dev/null || fail=1 + rm -f h-$i + + # Make sure they fail upon `disk full' error. + if test -w /dev/full && test -c /dev/full; then + eval "env \$i $args --help >/dev/full 2>/dev/null" && fail=1 + eval "env \$i $args --version >/dev/full 2>/dev/null" && fail=1 + status=$? + test $i = [ && prog=lbracket || prog=$i + eval "expected=\$expected_failure_status_$prog" + test x$expected = x && expected=1 + if test $status = $expected; then + : # ok + else + fail=1 + echo "*** $i: bad exit status \`$status' (expected $expected)," 1>&2 + echo " with --help or --version output redirected to /dev/full" 1>&2 + fi + fi + done +done + +bigZ_in=bigZ-in.Z +zin=zin.gz +zin2=zin2.gz + +tmp=tmp-$$ +tmp_in=in-$$ +tmp_in2=in2-$$ +tmp_dir=dir-$$ +tmp_out=out-$$ +mkdir $tmp || fail=1 +cd $tmp || fail=1 + +comm_setup () { args="$tmp_in $tmp_in"; } +csplit_setup () { args="$tmp_in //"; } +cut_setup () { args='-f 1'; } +join_setup () { args="$tmp_in $tmp_in"; } +tr_setup () { args='a a'; } + +chmod_setup () { args="a+x $tmp_in"; } +# Punt on these. +chgrp_setup () { args=--version; } +chown_setup () { args=--version; } +mkfifo_setup () { args=--version; } +mknod_setup () { args=--version; } +# Punt on uptime, since it fails (e.g., failing to get boot time) +# on some systems, and we shouldn't let that stop `make check'. +uptime_setup () { args=--version; } + +# Create a file in the current directory, not in $TMPDIR. +mktemp_setup () { args=mktemp.XXXX; } + +cmp_setup () { args="$tmp_in $tmp_in2"; } + +# Tell dd not to print the line with transfer rate and total. +# The transfer rate would vary between runs. +dd_setup () { args=status=noxfer; } + +zdiff_setup () { args="$args $zin $zin2"; } +zcmp_setup () { zdiff_setup; } +zcat_setup () { args="$args $zin"; } +gunzip_setup () { zcat_setup; } +zmore_setup () { zcat_setup; } +zless_setup () { zcat_setup; } +znew_setup () { args="$args $bigZ_in"; } +zforce_setup () { zcat_setup; } +zgrep_setup () { args="$args z $zin"; } +zegrep_setup () { zgrep_setup; } +zfgrep_setup () { zgrep_setup; } +gzexe_setup () { args="$args $tmp_in"; } + +# We know that $tmp_in contains a "0" +grep_setup () { args="0 $tmp_in"; } +egrep_setup () { args="0 $tmp_in"; } +fgrep_setup () { args="0 $tmp_in"; } + +diff_setup () { args="$tmp_in $tmp_in2"; } +sdiff_setup () { args="$tmp_in $tmp_in2"; } +diff3_setup () { args="$tmp_in $tmp_in2 $tmp_in2"; } +cp_setup () { args="$tmp_in $tmp_in2"; } +ln_setup () { args="$tmp_in ln-target"; } +ginstall_setup () { args="$tmp_in $tmp_in2"; } +mv_setup () { args="$tmp_in $tmp_in2"; } +mkdir_setup () { args=$tmp_dir/subdir; } +rmdir_setup () { args=$tmp_dir; } +rm_setup () { args=$tmp_in; } +shred_setup () { args=$tmp_in; } +touch_setup () { args=$tmp_in2; } +truncate_setup () { args="--reference=$tmp_in $tmp_in2"; } + +basename_setup () { args=$tmp_in; } +dirname_setup () { args=$tmp_in; } +expr_setup () { args=foo; } + +# Punt, in case GNU `id' hasn't been installed yet. +groups_setup () { args=--version; } + +pathchk_setup () { args=$tmp_in; } +yes_setup () { args=--version; } +logname_setup () { args=--version; } +nohup_setup () { args=--version; } +printf_setup () { args=foo; } +seq_setup () { args=10; } +sleep_setup () { args=0; } +su_setup () { args=--version; } +stdbuf_setup () { args="-oL true"; } +timeout_setup () { args=--version; } + +# I'd rather not run sync, since it spins up disks that I've +# deliberately caused to spin down (but not unmounted). +sync_setup () { args=--version; } + +test_setup () { args=foo; } + +# This is necessary in the unusual event that there is +# no valid entry in /etc/mtab. +df_setup () { args=/; } + +# This is necessary in the unusual event that getpwuid (getuid ()) fails. +id_setup () { args=-u; } + +# Use env to avoid invoking built-in sleep of Solaris 11's /bin/sh. +kill_setup () { + env sleep 10m & + args=$! +} + +link_setup () { args="$tmp_in link-target"; } +unlink_setup () { args=$tmp_in; } + +readlink_setup () { + ln -s . slink + args=slink; +} + +stat_setup () { args=$tmp_in; } +unlink_setup () { args=$tmp_in; } +lbracket_setup () { args=": ]"; } + +# Ensure that each program "works" (exits successfully) when doing +# something more than --help or --version. +for i in $built_programs; do + # Skip these. + case $i in chroot|stty|tty|false|chcon|runcon) continue;; esac + + rm -rf $tmp_in $tmp_in2 $tmp_dir $tmp_out $bigZ_in $zin $zin2 + echo z |gzip > $zin + cp $zin $zin2 + cp $zin $bigZ_in + + # This is sort of kludgey: use numbers so this is valid input for factor, + # and two tokens so it's valid input for tsort. + echo 2147483647 0 > $tmp_in + # Make $tmp_in2 identical. Then, using $tmp_in and $tmp_in2 as arguments + # to the likes of cmp and diff makes them exit successfully. + cp $tmp_in $tmp_in2 + mkdir $tmp_dir + # echo ================== $i + test $i = [ && prog=lbracket || prog=$i + args= + if type ${prog}_setup > /dev/null 2>&1; then + ${prog}_setup + fi + if eval "env \$i $args < \$tmp_in > \$tmp_out"; then + : # ok + else + echo FAIL: $i + fail=1 + fi + rm -rf $tmp_in $tmp_in2 $tmp_out $tmp_dir +done + +Exit $fail diff --git a/src/zstd/tests/gzip/hufts-segv.gz b/src/zstd/tests/gzip/hufts-segv.gz Binary files differnew file mode 100644 index 00000000..32cb2a25 --- /dev/null +++ b/src/zstd/tests/gzip/hufts-segv.gz diff --git a/src/zstd/tests/gzip/hufts.sh b/src/zstd/tests/gzip/hufts.sh new file mode 100644 index 00000000..9b9576ce --- /dev/null +++ b/src/zstd/tests/gzip/hufts.sh @@ -0,0 +1,34 @@ +#!/bin/sh +# Exercise a bug whereby an invalid input could make gzip -d misbehave. + +# Copyright (C) 2009-2016 Free Software Foundation, Inc. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# limit so don't run it by default. + +. "${srcdir=.}/init.sh"; path_prepend_ . + +printf '\n...: invalid compressed data--format violated\n' > exp \ + || framework_failure_ + +fail=0 +gzip -dc "$abs_srcdir/hufts-segv.gz" > out 2> err +test $? = 1 || fail=1 + +compare /dev/null out || fail=1 + +sed 's/.*hufts-segv.gz: /...: /' err > k; mv k err || fail=1 +compare exp err || fail=1 + +Exit $fail diff --git a/src/zstd/tests/gzip/init.cfg b/src/zstd/tests/gzip/init.cfg new file mode 100644 index 00000000..901209ce --- /dev/null +++ b/src/zstd/tests/gzip/init.cfg @@ -0,0 +1,5 @@ +# This file is sourced by init.sh, *before* its initialization. + +# This goes hand in hand with the "exec 9>&2;" in Makefile.am's +# TESTS_ENVIRONMENT definition. +stderr_fileno_=9 diff --git a/src/zstd/tests/gzip/init.sh b/src/zstd/tests/gzip/init.sh new file mode 100644 index 00000000..97e4e4ba --- /dev/null +++ b/src/zstd/tests/gzip/init.sh @@ -0,0 +1,616 @@ +# source this file; set up for tests + +# Copyright (C) 2009-2016 Free Software Foundation, Inc. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +# Using this file in a test +# ========================= +# +# The typical skeleton of a test looks like this: +# +# #!/bin/sh +# . "${srcdir=.}/init.sh"; path_prepend_ . +# Execute some commands. +# Note that these commands are executed in a subdirectory, therefore you +# need to prepend "../" to relative filenames in the build directory. +# Note that the "path_prepend_ ." is useful only if the body of your +# test invokes programs residing in the initial directory. +# For example, if the programs you want to test are in src/, and this test +# script is named tests/test-1, then you would use "path_prepend_ ../src", +# or perhaps export PATH='$(abs_top_builddir)/src$(PATH_SEPARATOR)'"$$PATH" +# to all tests via automake's TESTS_ENVIRONMENT. +# Set the exit code 0 for success, 77 for skipped, or 1 or other for failure. +# Use the skip_ and fail_ functions to print a diagnostic and then exit +# with the corresponding exit code. +# Exit $? + +# Executing a test that uses this file +# ==================================== +# +# Running a single test: +# $ make check TESTS=test-foo.sh +# +# Running a single test, with verbose output: +# $ make check TESTS=test-foo.sh VERBOSE=yes +# +# Running a single test, with single-stepping: +# 1. Go into a sub-shell: +# $ bash +# 2. Set relevant environment variables from TESTS_ENVIRONMENT in the +# Makefile: +# $ export srcdir=../../tests # this is an example +# 3. Execute the commands from the test, copy&pasting them one by one: +# $ . "$srcdir/init.sh"; path_prepend_ . +# ... +# 4. Finally +# $ exit + +ME_=`expr "./$0" : '.*/\(.*\)$'` + +# We use a trap below for cleanup. This requires us to go through +# hoops to get the right exit status transported through the handler. +# So use 'Exit STATUS' instead of 'exit STATUS' inside of the tests. +# Turn off errexit here so that we don't trip the bug with OSF1/Tru64 +# sh inside this function. +Exit () { set +e; (exit $1); exit $1; } + +# Print warnings (e.g., about skipped and failed tests) to this file number. +# Override by defining to say, 9, in init.cfg, and putting say, +# export ...ENVVAR_SETTINGS...; $(SHELL) 9>&2 +# in the definition of TESTS_ENVIRONMENT in your tests/Makefile.am file. +# This is useful when using automake's parallel tests mode, to print +# the reason for skip/failure to console, rather than to the .log files. +: ${stderr_fileno_=2} + +# Note that correct expansion of "$*" depends on IFS starting with ' '. +# Always write the full diagnostic to stderr. +# When stderr_fileno_ is not 2, also emit the first line of the +# diagnostic to that file descriptor. +warn_ () +{ + # If IFS does not start with ' ', set it and emit the warning in a subshell. + case $IFS in + ' '*) printf '%s\n' "$*" >&2 + test $stderr_fileno_ = 2 \ + || { printf '%s\n' "$*" | sed 1q >&$stderr_fileno_ ; } ;; + *) (IFS=' '; warn_ "$@");; + esac +} +fail_ () { warn_ "$ME_: failed test: $@"; Exit 1; } +skip_ () { warn_ "$ME_: skipped test: $@"; Exit 77; } +fatal_ () { warn_ "$ME_: hard error: $@"; Exit 99; } +framework_failure_ () { warn_ "$ME_: set-up failure: $@"; Exit 99; } + +# This is used to simplify checking of the return value +# which is useful when ensuring a command fails as desired. +# I.e., just doing `command ... &&fail=1` will not catch +# a segfault in command for example. With this helper you +# instead check an explicit exit code like +# returns_ 1 command ... || fail +returns_ () { + # Disable tracing so it doesn't interfere with stderr of the wrapped command + { set +x; } 2>/dev/null + + local exp_exit="$1" + shift + "$@" + test $? -eq $exp_exit && ret_=0 || ret_=1 + + if test "$VERBOSE" = yes && test "$gl_set_x_corrupts_stderr_" = false; then + set -x + fi + { return $ret_; } 2>/dev/null +} + +# Sanitize this shell to POSIX mode, if possible. +DUALCASE=1; export DUALCASE +if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then + emulate sh + NULLCMD=: + alias -g '${1+"$@"}'='"$@"' + setopt NO_GLOB_SUBST +else + case `(set -o) 2>/dev/null` in + *posix*) set -o posix ;; + esac +fi + +# We require $(...) support unconditionally. +# We require a few additional shell features only when $EXEEXT is nonempty, +# in order to support automatic $EXEEXT emulation: +# - hyphen-containing alias names +# - we prefer to use ${var#...} substitution, rather than having +# to work around lack of support for that feature. +# The following code attempts to find a shell with support for these features. +# If the current shell passes the test, we're done. Otherwise, test other +# shells until we find one that passes. If one is found, re-exec it. +# If no acceptable shell is found, skip the current test. +# +# The "...set -x; P=1 true 2>err..." test is to disqualify any shell that +# emits "P=1" into err, as /bin/sh from SunOS 5.11 and OpenBSD 4.7 do. +# +# Use "9" to indicate success (rather than 0), in case some shell acts +# like Solaris 10's /bin/sh but exits successfully instead of with status 2. + +# Eval this code in a subshell to determine a shell's suitability. +# 10 - passes all tests; ok to use +# 9 - ok, but enabling "set -x" corrupts app stderr; prefer higher score +# ? - not ok +gl_shell_test_script_=' +test $(echo y) = y || exit 1 +f_local_() { local v=1; }; f_local_ || exit 1 +score_=10 +if test "$VERBOSE" = yes; then + test -n "$( (exec 3>&1; set -x; P=1 true 2>&3) 2> /dev/null)" && score_=9 +fi +test -z "$EXEEXT" && exit $score_ +shopt -s expand_aliases +alias a-b="echo zoo" +v=abx + test ${v%x} = ab \ + && test ${v#a} = bx \ + && test $(a-b) = zoo \ + && exit $score_ +' + +if test "x$1" = "x--no-reexec"; then + shift +else + # Assume a working shell. Export to subshells (setup_ needs this). + gl_set_x_corrupts_stderr_=false + export gl_set_x_corrupts_stderr_ + + # Record the first marginally acceptable shell. + marginal_= + + # Search for a shell that meets our requirements. + for re_shell_ in __current__ "${CONFIG_SHELL:-no_shell}" \ + /bin/sh bash dash zsh pdksh fail + do + test "$re_shell_" = no_shell && continue + + # If we've made it all the way to the sentinel, "fail" without + # finding even a marginal shell, skip this test. + if test "$re_shell_" = fail; then + test -z "$marginal_" && skip_ failed to find an adequate shell + re_shell_=$marginal_ + break + fi + + # When testing the current shell, simply "eval" the test code. + # Otherwise, run it via $re_shell_ -c ... + if test "$re_shell_" = __current__; then + # 'eval'ing this code makes Solaris 10's /bin/sh exit with + # $? set to 2. It does not evaluate any of the code after the + # "unexpected" first '('. Thus, we must run it in a subshell. + ( eval "$gl_shell_test_script_" ) > /dev/null 2>&1 + else + "$re_shell_" -c "$gl_shell_test_script_" 2>/dev/null + fi + + st_=$? + + # $re_shell_ works just fine. Use it. + if test $st_ = 10; then + gl_set_x_corrupts_stderr_=false + break + fi + + # If this is our first marginally acceptable shell, remember it. + if test "$st_:$marginal_" = 9: ; then + marginal_="$re_shell_" + gl_set_x_corrupts_stderr_=true + fi + done + + if test "$re_shell_" != __current__; then + # Found a usable shell. Preserve -v and -x. + case $- in + *v*x* | *x*v*) opts_=-vx ;; + *v*) opts_=-v ;; + *x*) opts_=-x ;; + *) opts_= ;; + esac + re_shell=$re_shell_ + export re_shell + exec "$re_shell_" $opts_ "$0" --no-reexec "$@" + echo "$ME_: exec failed" 1>&2 + exit 127 + fi +fi + +# If this is bash, turn off all aliases. +test -n "$BASH_VERSION" && unalias -a + +# Note that when supporting $EXEEXT (transparently mapping from PROG_NAME to +# PROG_NAME.exe), we want to support hyphen-containing names like test-acos. +# That is part of the shell-selection test above. Why use aliases rather +# than functions? Because support for hyphen-containing aliases is more +# widespread than that for hyphen-containing function names. +test -n "$EXEEXT" && shopt -s expand_aliases + +# Enable glibc's malloc-perturbing option. +# This is useful for exposing code that depends on the fact that +# malloc-related functions often return memory that is mostly zeroed. +# If you have the time and cycles, use valgrind to do an even better job. +: ${MALLOC_PERTURB_=87} +export MALLOC_PERTURB_ + +# This is a stub function that is run upon trap (upon regular exit and +# interrupt). Override it with a per-test function, e.g., to unmount +# a partition, or to undo any other global state changes. +cleanup_ () { :; } + +# Emit a header similar to that from diff -u; Print the simulated "diff" +# command so that the order of arguments is clear. Don't bother with @@ lines. +emit_diff_u_header_ () +{ + printf '%s\n' "diff -u $*" \ + "--- $1 1970-01-01" \ + "+++ $2 1970-01-01" +} + +# Arrange not to let diff or cmp operate on /dev/null, +# since on some systems (at least OSF/1 5.1), that doesn't work. +# When there are not two arguments, or no argument is /dev/null, return 2. +# When one argument is /dev/null and the other is not empty, +# cat the nonempty file to stderr and return 1. +# Otherwise, return 0. +compare_dev_null_ () +{ + test $# = 2 || return 2 + + if test "x$1" = x/dev/null; then + test -s "$2" || return 0 + emit_diff_u_header_ "$@"; sed 's/^/+/' "$2" + return 1 + fi + + if test "x$2" = x/dev/null; then + test -s "$1" || return 0 + emit_diff_u_header_ "$@"; sed 's/^/-/' "$1" + return 1 + fi + + return 2 +} + +if diff_out_=`exec 2>/dev/null; diff -u "$0" "$0" < /dev/null` \ + && diff -u Makefile "$0" 2>/dev/null | grep '^[+]#!' >/dev/null; then + # diff accepts the -u option and does not (like AIX 7 'diff') produce an + # extra space on column 1 of every content line. + if test -z "$diff_out_"; then + compare_ () { diff -u "$@"; } + else + compare_ () + { + if diff -u "$@" > diff.out; then + # No differences were found, but Solaris 'diff' produces output + # "No differences encountered". Hide this output. + rm -f diff.out + true + else + cat diff.out + rm -f diff.out + false + fi + } + fi +elif + for diff_opt_ in -U3 -c '' no; do + test "$diff_opt_" = no && break + diff_out_=`exec 2>/dev/null; diff $diff_opt_ "$0" "$0" </dev/null` && break + done + test "$diff_opt_" != no +then + if test -z "$diff_out_"; then + compare_ () { diff $diff_opt_ "$@"; } + else + compare_ () + { + if diff $diff_opt_ "$@" > diff.out; then + # No differences were found, but AIX and HP-UX 'diff' produce output + # "No differences encountered" or "There are no differences between the + # files.". Hide this output. + rm -f diff.out + true + else + cat diff.out + rm -f diff.out + false + fi + } + fi +elif cmp -s /dev/null /dev/null 2>/dev/null; then + compare_ () { cmp -s "$@"; } +else + compare_ () { cmp "$@"; } +fi + +# Usage: compare EXPECTED ACTUAL +# +# Given compare_dev_null_'s preprocessing, defer to compare_ if 2 or more. +# Otherwise, propagate $? to caller: any diffs have already been printed. +compare () +{ + # This looks like it can be factored to use a simple "case $?" + # after unchecked compare_dev_null_ invocation, but that would + # fail in a "set -e" environment. + if compare_dev_null_ "$@"; then + return 0 + else + case $? in + 1) return 1;; + *) compare_ "$@";; + esac + fi +} + +# An arbitrary prefix to help distinguish test directories. +testdir_prefix_ () { printf gt; } + +# Run the user-overridable cleanup_ function, remove the temporary +# directory and exit with the incoming value of $?. +remove_tmp_ () +{ + __st=$? + cleanup_ + # cd out of the directory we're about to remove + cd "$initial_cwd_" || cd / || cd /tmp + chmod -R u+rwx "$test_dir_" + # If removal fails and exit status was to be 0, then change it to 1. + rm -rf "$test_dir_" || { test $__st = 0 && __st=1; } + exit $__st +} + +# Given a directory name, DIR, if every entry in it that matches *.exe +# contains only the specified bytes (see the case stmt below), then print +# a space-separated list of those names and return 0. Otherwise, don't +# print anything and return 1. Naming constraints apply also to DIR. +find_exe_basenames_ () +{ + feb_dir_=$1 + feb_fail_=0 + feb_result_= + feb_sp_= + for feb_file_ in $feb_dir_/*.exe; do + # If there was no *.exe file, or there existed a file named "*.exe" that + # was deleted between the above glob expansion and the existence test + # below, just skip it. + test "x$feb_file_" = "x$feb_dir_/*.exe" && test ! -f "$feb_file_" \ + && continue + # Exempt [.exe, since we can't create a function by that name, yet + # we can't invoke [ by PATH search anyways due to shell builtins. + test "x$feb_file_" = "x$feb_dir_/[.exe" && continue + case $feb_file_ in + *[!-a-zA-Z/0-9_.+]*) feb_fail_=1; break;; + *) # Remove leading file name components as well as the .exe suffix. + feb_file_=${feb_file_##*/} + feb_file_=${feb_file_%.exe} + feb_result_="$feb_result_$feb_sp_$feb_file_";; + esac + feb_sp_=' ' + done + test $feb_fail_ = 0 && printf %s "$feb_result_" + return $feb_fail_ +} + +# Consider the files in directory, $1. +# For each file name of the form PROG.exe, create an alias named +# PROG that simply invokes PROG.exe, then return 0. If any selected +# file name or the directory name, $1, contains an unexpected character, +# define no alias and return 1. +create_exe_shims_ () +{ + case $EXEEXT in + '') return 0 ;; + .exe) ;; + *) echo "$0: unexpected \$EXEEXT value: $EXEEXT" 1>&2; return 1 ;; + esac + + base_names_=`find_exe_basenames_ $1` \ + || { echo "$0 (exe_shim): skipping directory: $1" 1>&2; return 0; } + + if test -n "$base_names_"; then + for base_ in $base_names_; do + alias "$base_"="$base_$EXEEXT" + done + fi + + return 0 +} + +# Use this function to prepend to PATH an absolute name for each +# specified, possibly-$initial_cwd_-relative, directory. +path_prepend_ () +{ + while test $# != 0; do + path_dir_=$1 + case $path_dir_ in + '') fail_ "invalid path dir: '$1'";; + /*) abs_path_dir_=$path_dir_;; + *) abs_path_dir_=$initial_cwd_/$path_dir_;; + esac + case $abs_path_dir_ in + *:*) fail_ "invalid path dir: '$abs_path_dir_'";; + esac + PATH="$abs_path_dir_:$PATH" + + # Create an alias, FOO, for each FOO.exe in this directory. + create_exe_shims_ "$abs_path_dir_" \ + || fail_ "something failed (above): $abs_path_dir_" + shift + done + export PATH +} + +setup_ () +{ + if test "$VERBOSE" = yes; then + # Test whether set -x may cause the selected shell to corrupt an + # application's stderr. Many do, including zsh-4.3.10 and the /bin/sh + # from SunOS 5.11, OpenBSD 4.7 and Irix 5.x and 6.5. + # If enabling verbose output this way would cause trouble, simply + # issue a warning and refrain. + if $gl_set_x_corrupts_stderr_; then + warn_ "using SHELL=$SHELL with 'set -x' corrupts stderr" + else + set -x + fi + fi + + initial_cwd_=$PWD + + pfx_=`testdir_prefix_` + test_dir_=`mktempd_ "$initial_cwd_" "$pfx_-$ME_.XXXX"` \ + || fail_ "failed to create temporary directory in $initial_cwd_" + cd "$test_dir_" || fail_ "failed to cd to temporary directory" + + # As autoconf-generated configure scripts do, ensure that IFS + # is defined initially, so that saving and restoring $IFS works. + gl_init_sh_nl_=' +' + IFS=" "" $gl_init_sh_nl_" + + # This trap statement, along with a trap on 0 below, ensure that the + # temporary directory, $test_dir_, is removed upon exit as well as + # upon receipt of any of the listed signals. + for sig_ in 1 2 3 13 15; do + eval "trap 'Exit $(expr $sig_ + 128)' $sig_" + done +} + +# Create a temporary directory, much like mktemp -d does. +# Written by Jim Meyering. +# +# Usage: mktempd_ /tmp phoey.XXXXXXXXXX +# +# First, try to use the mktemp program. +# Failing that, we'll roll our own mktemp-like function: +# - try to get random bytes from /dev/urandom +# - failing that, generate output from a combination of quickly-varying +# sources and gzip. Ignore non-varying gzip header, and extract +# "random" bits from there. +# - given those bits, map to file-name bytes using tr, and try to create +# the desired directory. +# - make only $MAX_TRIES_ attempts + +# Helper function. Print $N pseudo-random bytes from a-zA-Z0-9. +rand_bytes_ () +{ + n_=$1 + + # Maybe try openssl rand -base64 $n_prime_|tr '+/=\012' abcd first? + # But if they have openssl, they probably have mktemp, too. + + chars_=abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 + dev_rand_=/dev/urandom + if test -r "$dev_rand_"; then + # Note: 256-length($chars_) == 194; 3 copies of $chars_ is 186 + 8 = 194. + dd ibs=$n_ count=1 if=$dev_rand_ 2>/dev/null \ + | LC_ALL=C tr -c $chars_ 01234567$chars_$chars_$chars_ + return + fi + + n_plus_50_=`expr $n_ + 50` + cmds_='date; date +%N; free; who -a; w; ps auxww; ps ef; netstat -n' + data_=` (eval "$cmds_") 2>&1 | gzip ` + + # Ensure that $data_ has length at least 50+$n_ + while :; do + len_=`echo "$data_"|wc -c` + test $n_plus_50_ -le $len_ && break; + data_=` (echo "$data_"; eval "$cmds_") 2>&1 | gzip ` + done + + echo "$data_" \ + | dd bs=1 skip=50 count=$n_ 2>/dev/null \ + | LC_ALL=C tr -c $chars_ 01234567$chars_$chars_$chars_ +} + +mktempd_ () +{ + case $# in + 2);; + *) fail_ "Usage: mktempd_ DIR TEMPLATE";; + esac + + destdir_=$1 + template_=$2 + + MAX_TRIES_=4 + + # Disallow any trailing slash on specified destdir: + # it would subvert the post-mktemp "case"-based destdir test. + case $destdir_ in + / | //) destdir_slash_=$destdir;; + */) fail_ "invalid destination dir: remove trailing slash(es)";; + *) destdir_slash_=$destdir_/;; + esac + + case $template_ in + *XXXX) ;; + *) fail_ \ + "invalid template: $template_ (must have a suffix of at least 4 X's)";; + esac + + # First, try to use mktemp. + d=`unset TMPDIR; { mktemp -d -t -p "$destdir_" "$template_"; } 2>/dev/null` && + + # The resulting name must be in the specified directory. + case $d in "$destdir_slash_"*) :;; *) false;; esac && + + # It must have created the directory. + test -d "$d" && + + # It must have 0700 permissions. Handle sticky "S" bits. + perms=`ls -dgo "$d" 2>/dev/null` && + case $perms in drwx--[-S]---*) :;; *) false;; esac && { + echo "$d" + return + } + + # If we reach this point, we'll have to create a directory manually. + + # Get a copy of the template without its suffix of X's. + base_template_=`echo "$template_"|sed 's/XX*$//'` + + # Calculate how many X's we've just removed. + template_length_=`echo "$template_" | wc -c` + nx_=`echo "$base_template_" | wc -c` + nx_=`expr $template_length_ - $nx_` + + err_= + i_=1 + while :; do + X_=`rand_bytes_ $nx_` + candidate_dir_="$destdir_slash_$base_template_$X_" + err_=`mkdir -m 0700 "$candidate_dir_" 2>&1` \ + && { echo "$candidate_dir_"; return; } + test $MAX_TRIES_ -le $i_ && break; + i_=`expr $i_ + 1` + done + fail_ "$err_" +} + +# If you want to override the testdir_prefix_ function, +# or to add more utility functions, use this file. +test -f "$srcdir/init.cfg" \ + && . "$srcdir/init.cfg" + +setup_ "$@" +# This trap is here, rather than in the setup_ function, because some +# shells run the exit trap at shell function exit, rather than script exit. +trap remove_tmp_ 0 diff --git a/src/zstd/tests/gzip/keep.sh b/src/zstd/tests/gzip/keep.sh new file mode 100644 index 00000000..ab9a2181 --- /dev/null +++ b/src/zstd/tests/gzip/keep.sh @@ -0,0 +1,51 @@ +#!/bin/sh +# Exercise the --keep option. + +# Copyright (C) 2013-2016 Free Software Foundation, Inc. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# limit so don't run it by default. + +. "${srcdir=.}/init.sh"; path_prepend_ . + +echo fooooooooo > in || framework_failure_ +cp in orig || framework_failure_ + +fail=0 + +# Compress and decompress both with and without --keep. +for k in --keep ''; do + # With --keep, the source must be retained, otherwise, it must be removed. + case $k in --keep) op='||' ;; *) op='&&' ;; esac + + gzip $k in || fail=1 + eval "test -f in $op fail=1" + test -f in.gz || fail=1 + rm -f in || fail=1 + + gzip -d $k in.gz || fail=1 + eval "test -f in.gz $op fail=1" + test -f in || fail=1 + compare in orig || fail=1 + rm -f in.gz || fail=1 +done + +cp orig in || framework_failure_ +log=$(gzip -kv in 2>&1) || fail=1 +case $log in + *'created in.gz'*) ;; + *) fail=1;; +esac + +Exit $fail diff --git a/src/zstd/tests/gzip/list.sh b/src/zstd/tests/gzip/list.sh new file mode 100644 index 00000000..75912e1e --- /dev/null +++ b/src/zstd/tests/gzip/list.sh @@ -0,0 +1,31 @@ +#!/bin/sh +# Exercise the --list option. + +# Copyright 2016 Free Software Foundation, Inc. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# limit so don't run it by default. + +. "${srcdir=.}/init.sh"; path_prepend_ . + +echo zoology zucchini > in || framework_failure_ +cp in orig || framework_failure_ + +gzip -l in && fail=1 +gzip -9 in || fail=1 +gzip -l in.gz >out1 || fail=1 +gzip -l in.gz | cat >out2 || fail=1 +compare out1 out2 || fail=1 + +Exit $fail diff --git a/src/zstd/tests/gzip/memcpy-abuse.sh b/src/zstd/tests/gzip/memcpy-abuse.sh new file mode 100644 index 00000000..7d5c056d --- /dev/null +++ b/src/zstd/tests/gzip/memcpy-abuse.sh @@ -0,0 +1,34 @@ +#!/bin/sh +# Before gzip-1.4, this the use of memcpy in inflate_codes could +# mistakenly operate on overlapping regions. Exercise that code. + +# Copyright (C) 2010-2016 Free Software Foundation, Inc. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# limit so don't run it by default. + +. "${srcdir=.}/init.sh"; path_prepend_ . + +# The input must be larger than 32KiB and slightly +# less uniform than e.g., all zeros. +printf wxy%032767d 0 | tee in | gzip > in.gz || framework_failure_ + +fail=0 + +# Before the fix, this would call memcpy with overlapping regions. +gzip -dc in.gz > out || fail=1 + +compare in out || fail=1 + +Exit $fail diff --git a/src/zstd/tests/gzip/mixed.sh b/src/zstd/tests/gzip/mixed.sh new file mode 100644 index 00000000..383a54f5 --- /dev/null +++ b/src/zstd/tests/gzip/mixed.sh @@ -0,0 +1,68 @@ +#!/bin/sh +# Ensure that gzip -cdf handles mixed compressed/not-compressed data +# Before gzip-1.5, it would produce invalid output. + +# Copyright (C) 2010-2016 Free Software Foundation, Inc. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# limit so don't run it by default. + +. "${srcdir=.}/init.sh"; path_prepend_ . + +printf 'xxx\nyyy\n' > exp2 || framework_failure_ +printf 'aaa\nbbb\nccc\n' > exp3 || framework_failure_ + +fail=0 + +(echo xxx; echo yyy) > in || fail=1 +gzip -cdf < in > out || fail=1 +compare exp2 out || fail=1 + +# Uncompressed input, followed by compressed data. +# Currently fails, so skip it. +# (echo xxx; echo yyy|gzip) > in || fail=1 +# gzip -cdf < in > out || fail=1 +# compare exp2 out || fail=1 + +# Compressed input, followed by regular (not-compressed) data. +(echo xxx|gzip; echo yyy) > in || fail=1 +gzip -cdf < in > out || fail=1 +compare exp2 out || fail=1 + +(echo xxx|gzip; echo yyy|gzip) > in || fail=1 +gzip -cdf < in > out || fail=1 +compare exp2 out || fail=1 + +in_str=0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_-+=% +for i in 0 1 2 3 4 5 6 7 8 9 a; do in_str="$in_str$in_str" ;done + +# Start with some small sizes. $(seq 64) +sizes=$(i=0; while :; do echo $i; test $i = 64 && break; i=$(expr $i + 1); done) + +# gzip's internal buffer size is 32KiB + 64 bytes: +sizes="$sizes 32831 32832 32833" + +# 128KiB, +/- 1 +sizes="$sizes 131071 131072 131073" + +# Ensure that "gzip -cdf" acts like cat, for a range of small input files. +i=0 +for i in $sizes; do + echo $i + printf %$i.${i}s $in_str > in + gzip -cdf < in > out + compare in out || fail=1 +done + +Exit $fail diff --git a/src/zstd/tests/gzip/null-suffix-clobber.sh b/src/zstd/tests/gzip/null-suffix-clobber.sh new file mode 100644 index 00000000..0efd0e34 --- /dev/null +++ b/src/zstd/tests/gzip/null-suffix-clobber.sh @@ -0,0 +1,35 @@ +#!/bin/sh +# Before gzip-1.5, gzip -d -S '' k.gz would delete F.gz and not create "F" + +# Copyright (C) 2010-2016 Free Software Foundation, Inc. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# limit so don't run it by default. + +. "${srcdir=.}/init.sh"; path_prepend_ . + +printf anything | gzip > F.gz || framework_failure_ +echo y > yes || framework_failure_ +echo "gzip: invalid suffix ''" > expected-err || framework_failure_ + +fail=0 + +gzip ---presume-input-tty -d -S '' F.gz < yes > out 2>err && fail=1 + +compare /dev/null out || fail=1 +compare expected-err err || fail=1 + +test -f F.gz || fail=1 + +Exit $fail diff --git a/src/zstd/tests/gzip/stdin.sh b/src/zstd/tests/gzip/stdin.sh new file mode 100644 index 00000000..eef4cd8b --- /dev/null +++ b/src/zstd/tests/gzip/stdin.sh @@ -0,0 +1,31 @@ +#!/bin/sh +# Ensure that gzip interprets "-" as stdin. + +# Copyright (C) 2009-2016 Free Software Foundation, Inc. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# limit so don't run it by default. + +. "${srcdir=.}/init.sh"; path_prepend_ . + +printf a | gzip > in || framework_failure_ +printf aaa > exp || framework_failure_ + +fail=0 +gzip -dc in - in < in > out 2>err || fail=1 + +compare exp out || fail=1 +compare /dev/null err || fail=1 + +Exit $fail diff --git a/src/zstd/tests/gzip/test-driver.sh b/src/zstd/tests/gzip/test-driver.sh new file mode 100644 index 00000000..649c084e --- /dev/null +++ b/src/zstd/tests/gzip/test-driver.sh @@ -0,0 +1,150 @@ +#! /bin/sh +# test-driver - basic testsuite driver script. + +scriptversion=2016-01-11.22; # UTC + +# Copyright (C) 2011-2015 Free Software Foundation, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +# As a special exception to the GNU General Public License, if you +# distribute this file as part of a program that contains a +# configuration script generated by Autoconf, you may include it under +# the same distribution terms that you use for the rest of that program. + +# This file is maintained in Automake, please report +# bugs to <bug-automake@gnu.org> or send patches to +# <automake-patches@gnu.org>. + +# Make unconditional expansion of undefined variables an error. This +# helps a lot in preventing typo-related bugs. +set -u + +usage_error () +{ + echo "$0: $*" >&2 + print_usage >&2 + exit 2 +} + +print_usage () +{ + cat <<END +Usage: + test-driver --test-name=NAME --log-file=PATH --trs-file=PATH + [--expect-failure={yes|no}] [--color-tests={yes|no}] + [--enable-hard-errors={yes|no}] [--] + TEST-SCRIPT [TEST-SCRIPT-ARGUMENTS] +The '--test-name', '--log-file' and '--trs-file' options are mandatory. +END +} + +test_name= # Used for reporting. +log_file= # Where to save the output of the test script. +trs_file= # Where to save the metadata of the test run. +expect_failure=no +color_tests=no +enable_hard_errors=yes +while test $# -gt 0; do + case $1 in + --help) print_usage; exit $?;; + --version) echo "test-driver $scriptversion"; exit $?;; + --test-name) test_name=$2; shift;; + --log-file) log_file=$2; shift;; + --trs-file) trs_file=$2; shift;; + --color-tests) color_tests=$2; shift;; + --expect-failure) expect_failure=$2; shift;; + --enable-hard-errors) enable_hard_errors=$2; shift;; + --) shift; break;; + -*) usage_error "invalid option: '$1'";; + *) break;; + esac + shift +done + +missing_opts= +test x"$test_name" = x && missing_opts="$missing_opts --test-name" +test x"$log_file" = x && missing_opts="$missing_opts --log-file" +test x"$trs_file" = x && missing_opts="$missing_opts --trs-file" +if test x"$missing_opts" != x; then + usage_error "the following mandatory options are missing:$missing_opts" +fi + +if test $# -eq 0; then + usage_error "missing argument" +fi + +if test $color_tests = yes; then + # Keep this in sync with 'lib/am/check.am:$(am__tty_colors)'. + red='[0;31m' # Red. + grn='[0;32m' # Green. + lgn='[1;32m' # Light green. + blu='[1;34m' # Blue. + mgn='[0;35m' # Magenta. + std='[m' # No color. +else + red= grn= lgn= blu= mgn= std= +fi + +do_exit='rm -f $log_file $trs_file; (exit $st); exit $st' +trap "st=129; $do_exit" 1 +trap "st=130; $do_exit" 2 +trap "st=141; $do_exit" 13 +trap "st=143; $do_exit" 15 + +# Test script is run here. +"$@" >$log_file 2>&1 +estatus=$? + +if test $enable_hard_errors = no && test $estatus -eq 99; then + tweaked_estatus=1 +else + tweaked_estatus=$estatus +fi + +case $tweaked_estatus:$expect_failure in + 0:yes) col=$red res=XPASS recheck=yes gcopy=yes;; + 0:*) col=$grn res=PASS recheck=no gcopy=no;; + 77:*) col=$blu res=SKIP recheck=no gcopy=yes;; + 99:*) col=$mgn res=ERROR recheck=yes gcopy=yes;; + *:yes) col=$lgn res=XFAIL recheck=no gcopy=yes;; + *:*) col=$red res=FAIL recheck=yes gcopy=yes;; +esac + +# Report the test outcome and exit status in the logs, so that one can +# know whether the test passed or failed simply by looking at the '.log' +# file, without the need of also peaking into the corresponding '.trs' +# file (automake bug#11814). +echo "$res $test_name (exit status: $estatus)" >>$log_file + +# Report outcome to console. +echo "${col}${res}${std}: $test_name" + +# Register the test result, and other relevant metadata. +echo ":test-result: $res" > $trs_file +echo ":global-test-result: $res" >> $trs_file +echo ":recheck: $recheck" >> $trs_file +echo ":copy-in-global-log: $gcopy" >> $trs_file + +# Local Variables: +# mode: shell-script +# sh-indentation: 2 +# eval: (add-hook 'write-file-hooks 'time-stamp) +# time-stamp-start: "scriptversion=" +# time-stamp-format: "%:y-%02m-%02d.%02H" +# time-stamp-time-zone: "UTC0" +# time-stamp-end: "; # UTC" +# End: + +exit $tweaked_estatus diff --git a/src/zstd/tests/gzip/trailing-nul.sh b/src/zstd/tests/gzip/trailing-nul.sh new file mode 100644 index 00000000..7b15d5e5 --- /dev/null +++ b/src/zstd/tests/gzip/trailing-nul.sh @@ -0,0 +1,37 @@ +#!/bin/sh +# gzip accepts trailing NUL bytes; don't fail if there is exactly one. +# Before gzip-1.4, this would fail. + +# Copyright (C) 2009-2016 Free Software Foundation, Inc. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# limit so don't run it by default. + +. "${srcdir=.}/init.sh"; path_prepend_ . + +(echo 0 | gzip; printf '\0') > 0.gz || framework_failure_ +(echo 00 | gzip; printf '\0\0') > 00.gz || framework_failure_ +(echo 1 | gzip; printf '\1') > 1.gz || framework_failure_ + +fail=0 + +for i in 0 00 1; do + gzip -d $i.gz; ret=$? + test $ret -eq $i || fail=1 + test $ret = 1 && continue + echo $i > exp || fail=1 + compare exp $i || fail=1 +done + +Exit $fail diff --git a/src/zstd/tests/gzip/unpack-invalid.sh b/src/zstd/tests/gzip/unpack-invalid.sh new file mode 100644 index 00000000..fe8384d7 --- /dev/null +++ b/src/zstd/tests/gzip/unpack-invalid.sh @@ -0,0 +1,36 @@ +#!/bin/sh +# gzip should report invalid 'unpack' input when uncompressing. +# With gzip-1.5, it would output invalid data instead. + +# Copyright (C) 2012-2016 Free Software Foundation, Inc. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# limit so don't run it by default. + +. "${srcdir=.}/init.sh"; path_prepend_ . + +for input in \ + '\037\036\000\000\037\213\010\000\000\000\000\000\002\003\036\000\000\000\002\003\037\213\010\000\000\000\000\000\002\003\355\301\001\015\000\000\000\302\240\037\000\302\240\037\213\010\000\000\000\000\000\002\003\355\301' \ + '\037\213\010\000\000\000\000\000\002\003\355\301\001\015\000\000\000\302\240\076\366\017\370\036\016\030\000\000\000\000\000\000\000\000\000\034\010\105\140\104\025\020\047\000\000\037\036\016\030\000\000\000'; do + + printf "$input" >in || framework_failure_ + + if gzip -d <in >out 2>err; then + fail=1 + else + fail=0 + fi +done + +Exit $fail diff --git a/src/zstd/tests/gzip/z-suffix.sh b/src/zstd/tests/gzip/z-suffix.sh new file mode 100644 index 00000000..a870a540 --- /dev/null +++ b/src/zstd/tests/gzip/z-suffix.sh @@ -0,0 +1,30 @@ +#!/bin/sh +# Check that -Sz works. + +# Copyright 2014-2016 Free Software Foundation, Inc. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# limit so don't run it by default. + +. "${srcdir=.}/init.sh"; path_prepend_ . + +printf anything > F && cp F G || framework_failure_ +gzip -Sz F || fail=1 +test ! -f F || fail=1 +test -f Fz || fail=1 +gzip -dSz F || fail=1 +test ! -f Fz || fail=1 +compare F G || fail\1 + +Exit $fail diff --git a/src/zstd/tests/gzip/zdiff.sh b/src/zstd/tests/gzip/zdiff.sh new file mode 100644 index 00000000..d62a8460 --- /dev/null +++ b/src/zstd/tests/gzip/zdiff.sh @@ -0,0 +1,48 @@ +#!/bin/sh +# Exercise zdiff with two compressed inputs. +# Before gzip-1.4, this would fail. + +# Copyright (C) 2009-2016 Free Software Foundation, Inc. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# limit so don't run it by default. + +. "${srcdir=.}/init.sh"; path_prepend_ . + +echo a > a || framework_failure_ +echo b > b || framework_failure_ +gzip a b || framework_failure_ + +cat <<EOF > exp +1c1 +< a +--- +> b +EOF + +fail=0 +zdiff a.gz b.gz > out 2>&1 +test $? = 1 || fail=1 + +compare exp out || fail=1 + +rm -f out +# expect success, for equal files +zdiff a.gz a.gz > out 2> err || fail=1 +# expect no output +test -s out && fail=1 +# expect no stderr +test -s err && fail=1 + +Exit $fail diff --git a/src/zstd/tests/gzip/zgrep-context.sh b/src/zstd/tests/gzip/zgrep-context.sh new file mode 100644 index 00000000..c8648b7e --- /dev/null +++ b/src/zstd/tests/gzip/zgrep-context.sh @@ -0,0 +1,47 @@ +#!/bin/sh +# Ensure that zgrep -15 works. Before gzip-1.5, it would fail. + +# Copyright (C) 2012-2016 Free Software Foundation, Inc. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# limit so don't run it by default. + +. "${srcdir=.}/init.sh"; path_prepend_ . + +# A limited replacement for seq: handle 1 or 2 args; increment must be 1 +seq() +{ + case $# in + 1) start=1 final=$1;; + 2) start=$1 final=$2;; + *) echo you lose 1>&2; exit 1;; + esac + awk 'BEGIN{for(i='$start';i<='$final';i++) print i}' < /dev/null +} + +seq 40 > in || framework_failure_ +gzip < in > in.gz || framework_failure_ +seq 2 32 > exp || framework_failure_ + +: ${GREP=grep} +$GREP -15 17 - < in > out && compare exp out || { + echo >&2 "$0: $GREP does not support context options; skipping this test" + exit 77 +} + +fail=0 +zgrep -15 17 - < in.gz > out || fail=1 +compare exp out || fail=1 + +Exit $fail diff --git a/src/zstd/tests/gzip/zgrep-f.sh b/src/zstd/tests/gzip/zgrep-f.sh new file mode 100644 index 00000000..d0cf27f7 --- /dev/null +++ b/src/zstd/tests/gzip/zgrep-f.sh @@ -0,0 +1,43 @@ +#!/bin/sh +# Ensure that zgrep -f - works like grep -f - +# Before gzip-1.4, it would fail. + +# Copyright (C) 2009-2016 Free Software Foundation, Inc. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# limit so don't run it by default. + +. "${srcdir=.}/init.sh"; path_prepend_ . + +printf 'needle\nn2\n' > n || framework_failure_ +cp n haystack || framework_failure_ +gzip haystack || framework_failure_ + +fail=0 +zgrep -f - haystack.gz < n > out 2>&1 || fail=1 + +compare out n || fail=1 + +if ${BASH_VERSION+:} false; then + set +o posix + # This failed with gzip 1.6. + cat n n >nn || framework_failure_ + eval 'zgrep -h -f <(cat n) haystack.gz haystack.gz' >out || fail=1 + compare out nn || fail=1 +fi + +# This failed with gzip 1.4. +echo a-b | zgrep -e - > /dev/null || fail=1 + +Exit $fail diff --git a/src/zstd/tests/gzip/zgrep-signal.sh b/src/zstd/tests/gzip/zgrep-signal.sh new file mode 100644 index 00000000..a8c53881 --- /dev/null +++ b/src/zstd/tests/gzip/zgrep-signal.sh @@ -0,0 +1,64 @@ +#!/bin/sh +# Check that zgrep is terminated gracefully by signal when +# its grep/sed pipeline is terminated by a signal. + +# Copyright (C) 2010-2016 Free Software Foundation, Inc. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# limit so don't run it by default. + +. "${srcdir=.}/init.sh"; path_prepend_ . + +echo a | gzip -c > f.gz || framework_failure_ + +test "x$PERL" = x && PERL=perl +("$PERL" -e 'use POSIX qw(dup2)') >/dev/null 2>&1 || + skip_ "no suitable perl found" + +# Run the arguments as a command, in a process where stdout is a +# dangling pipe and SIGPIPE has the default signal-handling action. +# This can't be done portably in the shell, because if SIGPIPE is +# ignored when the shell is entered, the shell might refuse to trap +# it. Fall back on Perl+POSIX, if available. Take care to close the +# pipe's read end before running the program; the equivalent of the +# shell's "command | :" has a race condition in that COMMAND could +# write before ":" exits. +write_to_dangling_pipe () { + program=${1?} + shift + args= + for arg; do + args="$args, '$arg'" + done + "$PERL" -e ' + use POSIX qw(dup2); + $SIG{PIPE} = "DEFAULT"; + pipe my ($read_end, $write_end) or die "pipe: $!\n"; + dup2 fileno $write_end, 1 or die "dup2: $!\n"; + close $read_end or die "close: $!\n"; + exec '"'$program'$args"'; + ' +} + +write_to_dangling_pipe cat f.gz f.gz +signal_status=$? +test 128 -lt $signal_status || + framework_failure_ 'signal handling busted on this host' + +fail=0 + +write_to_dangling_pipe zgrep a f.gz f.gz +test $? -eq $signal_status || fail=1 + +Exit $fail diff --git a/src/zstd/tests/gzip/znew-k.sh b/src/zstd/tests/gzip/znew-k.sh new file mode 100644 index 00000000..6c239e28 --- /dev/null +++ b/src/zstd/tests/gzip/znew-k.sh @@ -0,0 +1,40 @@ +#!/bin/sh +# Check that znew -K works without compress(1). + +# Copyright (C) 2010-2016 Free Software Foundation, Inc. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# limit so don't run it by default. + +. "${srcdir=.}/init.sh"; path_prepend_ . + +cat <<'EOF' >compress || framework_failure_ +#!/bin/sh +echo >&2 'compress has been invoked' +exit 1 +EOF +chmod +x compress || framework_failure_ + +# Note that the basename must have a length of 6 or greater. +# Otherwise, "test -f $name" below would fail. +name=123456.Z + +printf '%1012977s' ' ' | gzip -c > $name || framework_failure_ + +fail=0 + +znew -K $name || fail=1 +test -f $name || fail=1 + +Exit $fail diff --git a/src/zstd/tests/invalidDictionaries.c b/src/zstd/tests/invalidDictionaries.c new file mode 100644 index 00000000..b23db036 --- /dev/null +++ b/src/zstd/tests/invalidDictionaries.c @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#include <stddef.h> +#include "zstd.h" + +static const char invalidRepCode[] = { + 0x37, 0xa4, 0x30, 0xec, 0x2a, 0x00, 0x00, 0x00, 0x39, 0x10, 0xc0, 0xc2, + 0xa6, 0x00, 0x0c, 0x30, 0xc0, 0x00, 0x03, 0x0c, 0x30, 0x20, 0x72, 0xf8, + 0xb4, 0x6d, 0x4b, 0x9f, 0xfc, 0x97, 0x29, 0x49, 0xb2, 0xdf, 0x4b, 0x29, + 0x7d, 0x4a, 0xfc, 0x83, 0x18, 0x22, 0x75, 0x23, 0x24, 0x44, 0x4d, 0x02, + 0xb7, 0x97, 0x96, 0xf6, 0xcb, 0xd1, 0xcf, 0xe8, 0x22, 0xea, 0x27, 0x36, + 0xb7, 0x2c, 0x40, 0x46, 0x01, 0x08, 0x23, 0x01, 0x00, 0x00, 0x06, 0x1e, + 0x3c, 0x83, 0x81, 0xd6, 0x18, 0xd4, 0x12, 0x3a, 0x04, 0x00, 0x80, 0x03, + 0x08, 0x0e, 0x12, 0x1c, 0x12, 0x11, 0x0d, 0x0e, 0x0a, 0x0b, 0x0a, 0x09, + 0x10, 0x0c, 0x09, 0x05, 0x04, 0x03, 0x06, 0x06, 0x06, 0x02, 0x00, 0x03, + 0x00, 0x00, 0x02, 0x02, 0x00, 0x04, 0x06, 0x03, 0x06, 0x08, 0x24, 0x6b, + 0x0d, 0x01, 0x10, 0x04, 0x81, 0x07, 0x00, 0x00, 0x04, 0xb9, 0x58, 0x18, + 0x06, 0x59, 0x92, 0x43, 0xce, 0x28, 0xa5, 0x08, 0x88, 0xc0, 0x80, 0x88, + 0x8c, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x08, 0x00, 0x00, 0x00 +}; + +typedef struct dictionary_s { + const char *data; + size_t size; +} dictionary; + +static const dictionary dictionaries[] = { + {invalidRepCode, sizeof(invalidRepCode)}, + {NULL, 0}, +}; + +int main(int argc, const char** argv) { + const dictionary *dict; + for (dict = dictionaries; dict->data != NULL; ++dict) { + ZSTD_CDict *cdict; + ZSTD_DDict *ddict; + cdict = ZSTD_createCDict(dict->data, dict->size, 1); + if (cdict) { + ZSTD_freeCDict(cdict); + return 1; + } + ddict = ZSTD_createDDict(dict->data, dict->size); + if (ddict) { + ZSTD_freeDDict(ddict); + return 2; + } + } + + (void)argc; + (void)argv; + return 0; +} diff --git a/src/zstd/tests/legacy.c b/src/zstd/tests/legacy.c new file mode 100644 index 00000000..46a8206c --- /dev/null +++ b/src/zstd/tests/legacy.c @@ -0,0 +1,229 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/* + This program uses hard-coded data compressed with Zstd legacy versions + and tests that the API decompresses them correctly +*/ + +/*=========================================== +* Dependencies +*==========================================*/ +#include <stddef.h> /* size_t */ +#include <stdlib.h> /* malloc, free */ +#include <stdio.h> /* fprintf */ +#include <string.h> /* strlen */ +#include "zstd.h" +#include "zstd_errors.h" + +/*=========================================== +* Macros +*==========================================*/ +#define DISPLAY(...) fprintf(stderr, __VA_ARGS__) + +/*=========================================== +* Precompressed frames +*==========================================*/ +const char* const COMPRESSED; /* content is at end of file */ +size_t const COMPRESSED_SIZE = 917; +const char* const EXPECTED; /* content is at end of file */ + + +int testSimpleAPI(void) +{ + size_t const size = strlen(EXPECTED); + char* const output = malloc(size); + + if (!output) { + DISPLAY("ERROR: Not enough memory!\n"); + return 1; + } + + { + size_t const ret = ZSTD_decompress(output, size, COMPRESSED, COMPRESSED_SIZE); + if (ZSTD_isError(ret)) { + if (ret == ZSTD_error_prefix_unknown) { + DISPLAY("ERROR: Invalid frame magic number, was this compiled " + "without legacy support?\n"); + } else { + DISPLAY("ERROR: %s\n", ZSTD_getErrorName(ret)); + } + return 1; + } + if (ret != size) { + DISPLAY("ERROR: Wrong decoded size\n"); + } + } + if (memcmp(EXPECTED, output, size) != 0) { + DISPLAY("ERROR: Wrong decoded output produced\n"); + return 1; + } + + free(output); + DISPLAY("Simple API OK\n"); + return 0; +} + +int testStreamingAPI(void) +{ + size_t const outBuffSize = ZSTD_DStreamOutSize(); + char* const outBuff = malloc(outBuffSize); + ZSTD_DStream* const stream = ZSTD_createDStream(); + ZSTD_inBuffer input = { COMPRESSED, COMPRESSED_SIZE, 0 }; + size_t outputPos = 0; + int needsInit = 1; + + if (outBuff == NULL) { + DISPLAY("ERROR: Could not allocate memory\n"); + return 1; + } + if (stream == NULL) { + DISPLAY("ERROR: Could not create dstream\n"); + return 1; + } + + while (1) { + ZSTD_outBuffer output = {outBuff, outBuffSize, 0}; + if (needsInit) { + size_t const ret = ZSTD_initDStream(stream); + if (ZSTD_isError(ret)) { + DISPLAY("ERROR: %s\n", ZSTD_getErrorName(ret)); + return 1; + } + } + { + size_t const ret = ZSTD_decompressStream(stream, &output, &input); + if (ZSTD_isError(ret)) { + DISPLAY("ERROR: %s\n", ZSTD_getErrorName(ret)); + return 1; + } + + if (ret == 0) { + needsInit = 1; + } + } + + if (memcmp(outBuff, EXPECTED + outputPos, output.pos) != 0) { + DISPLAY("ERROR: Wrong decoded output produced\n"); + return 1; + } + outputPos += output.pos; + if (input.pos == input.size && output.pos < output.size) { + break; + } + } + + free(outBuff); + ZSTD_freeDStream(stream); + DISPLAY("Streaming API OK\n"); + return 0; +} + +int main(void) +{ + int ret; + + ret = testSimpleAPI(); + if (ret) return ret; + ret = testStreamingAPI(); + if (ret) return ret; + + DISPLAY("OK\n"); + + return 0; +} + +/* Consists of the "EXPECTED" string compressed with default settings on + - v0.4.3 + - v0.5.0 + - v0.6.0 + - v0.7.0 + - v0.8.0 +*/ +const char* const COMPRESSED = + "\x24\xB5\x2F\xFD\x00\x00\x00\xBB\xB0\x02\xC0\x10\x00\x1E\xB0\x01" + "\x02\x00\x00\x80\x00\xE8\x92\x34\x12\x97\xC8\xDF\xE9\xF3\xEF\x53" + "\xEA\x1D\x27\x4F\x0C\x44\x90\x0C\x8D\xF1\xB4\x89\x17\x00\x18\x00" + "\x18\x00\x3F\xE6\xE2\xE3\x74\xD6\xEC\xC9\x4A\xE0\x71\x71\x42\x3E" + "\x64\x4F\x6A\x45\x4E\x78\xEC\x49\x03\x3F\xC6\x80\xAB\x8F\x75\x5E" + "\x6F\x2E\x3E\x7E\xC6\xDC\x45\x69\x6C\xC5\xFD\xC7\x40\xB8\x84\x8A" + "\x01\xEB\xA8\xD1\x40\x39\x90\x4C\x64\xF8\xEB\x53\xE6\x18\x0B\x67" + "\x12\xAD\xB8\x99\xB3\x5A\x6F\x8A\x19\x03\x01\x50\x67\x56\xF5\x9F" + "\x35\x84\x60\xA0\x60\x91\xC9\x0A\xDC\xAB\xAB\xE0\xE2\x81\xFA\xCF" + "\xC6\xBA\x01\x0E\x00\x54\x00\x00\x19\x00\x00\x54\x14\x00\x24\x24" + "\x04\xFE\x04\x84\x4E\x41\x00\x27\xE2\x02\xC4\xB1\x00\xD2\x51\x00" + "\x79\x58\x41\x28\x00\xE0\x0C\x01\x68\x65\x00\x04\x13\x0C\xDA\x0C" + "\x80\x22\x06\xC0\x00\x00\x25\xB5\x2F\xFD\x00\x00\x00\xAD\x12\xB0" + "\x7D\x1E\xB0\x01\x02\x00\x00\x80\x00\xE8\x92\x34\x12\x97\xC8\xDF" + "\xE9\xF3\xEF\x53\xEA\x1D\x27\x4F\x0C\x44\x90\x0C\x8D\xF1\xB4\x89" + "\x03\x01\x50\x67\x56\xF5\x9F\x35\x84\x60\xA0\x60\x91\xC9\x0A\xDC" + "\xAB\xAB\xE0\xE2\x81\xFA\xCF\xC6\xBA\xEB\xA8\xD1\x40\x39\x90\x4C" + "\x64\xF8\xEB\x53\xE6\x18\x0B\x67\x12\xAD\xB8\x99\xB3\x5A\x6F\x8A" + "\xF9\x63\x0C\xB8\xFA\x58\xE7\xF5\xE6\xE2\xE3\x67\xCC\x5D\x94\xC6" + "\x56\xDC\x7F\x0C\x84\x4B\xA8\xF8\x63\x2E\x3E\x4E\x67\xCD\x9E\xAC" + "\x04\x1E\x17\x27\xE4\x43\xF6\xA4\x56\xE4\x84\xC7\x9E\x34\x0E\x00" + "\x00\x32\x40\x80\xA8\x00\x01\x49\x81\xE0\x3C\x01\x29\x1D\x00\x87" + "\xCE\x80\x75\x08\x80\x72\x24\x00\x7B\x52\x00\x94\x00\x20\xCC\x01" + "\x86\xD2\x00\x81\x09\x83\xC1\x34\xA0\x88\x01\xC0\x00\x00\x26\xB5" + "\x2F\xFD\x42\xEF\x00\x00\xA6\x12\xB0\x7D\x1E\xB0\x01\x02\x00\x00" + "\x54\xA0\xBA\x24\x8D\xC4\x25\xF2\x77\xFA\xFC\xFB\x94\x7A\xC7\xC9" + "\x13\x03\x11\x24\x43\x63\x3C\x6D\x22\x03\x01\x50\x67\x56\xF5\x9F" + "\x35\x84\x60\xA0\x60\x91\xC9\x0A\xDC\xAB\xAB\xE0\xE2\x81\xFA\xCF" + "\xC6\xBA\xEB\xA8\xD1\x40\x39\x90\x4C\x64\xF8\xEB\x53\xE6\x18\x0B" + "\x67\x12\xAD\xB8\x99\xB3\x5A\x6F\x8A\xF9\x63\x0C\xB8\xFA\x58\xE7" + "\xF5\xE6\xE2\xE3\x67\xCC\x5D\x94\xC6\x56\xDC\x7F\x0C\x84\x4B\xA8" + "\xF8\x63\x2E\x3E\x4E\x67\xCD\x9E\xAC\x04\x1E\x17\x27\xE4\x43\xF6" + "\xA4\x56\xE4\x84\xC7\x9E\x34\x0E\x00\x35\x0B\x71\xB5\xC0\x2A\x5C" + "\x26\x94\x22\x20\x8B\x4C\x8D\x13\x47\x58\x67\x15\x6C\xF1\x1C\x4B" + "\x54\x10\x9D\x31\x50\x85\x4B\x54\x0E\x01\x4B\x3D\x01\xC0\x00\x00" + "\x27\xB5\x2F\xFD\x20\xEF\x00\x00\xA6\x12\xE4\x84\x1F\xB0\x01\x10" + "\x00\x00\x00\x35\x59\xA6\xE7\xA1\xEF\x7C\xFC\xBD\x3F\xFF\x9F\xEF" + "\xEE\xEF\x61\xC3\xAA\x31\x1D\x34\x38\x22\x22\x04\x44\x21\x80\x32" + "\xAD\x28\xF3\xD6\x28\x0C\x0A\x0E\xD6\x5C\xAC\x19\x8D\x20\x5F\x45" + "\x02\x2E\x17\x50\x66\x6D\xAC\x8B\x9C\x6E\x07\x73\x46\xBB\x44\x14" + "\xE7\x98\xC3\xB9\x17\x32\x6E\x33\x7C\x0E\x21\xB1\xDB\xCB\x89\x51" + "\x23\x34\xAB\x9D\xBC\x6D\x20\xF5\x03\xA9\x91\x4C\x2E\x1F\x59\xDB" + "\xD9\x35\x67\x4B\x0C\x95\x79\x10\x00\x85\xA6\x96\x95\x2E\xDF\x78" + "\x7B\x4A\x5C\x09\x76\x97\xD1\x5C\x96\x12\x75\x35\xA3\x55\x4A\xD4" + "\x0B\x00\x35\x0B\x71\xB5\xC0\x2A\x5C\xE6\x08\x45\xF1\x39\x43\xF1" + "\x1C\x4B\x54\x10\x9D\x31\x50\x85\x4B\x54\x0E\x01\x4B\x3D\x01\xC0" + "\x00\x00\x28\xB5\x2F\xFD\x24\xEF\x35\x05\x00\x92\x0B\x21\x1F\xB0" + "\x01\x10\x00\x00\x00\x35\x59\xA6\xE7\xA1\xEF\x7C\xFC\xBD\x3F\xFF" + "\x9F\xEF\xEE\xEF\x61\xC3\xAA\x31\x1D\x34\x38\x22\x22\x04\x44\x21" + "\x80\x32\xAD\x28\xF3\xD6\x28\x0C\x0A\x0E\xD6\x5C\xAC\x19\x8D\x20" + "\x5F\x45\x02\x2E\x17\x50\x66\x6D\xAC\x8B\x9C\x6E\x07\x73\x46\xBB" + "\x44\x14\xE7\x98\xC3\xB9\x17\x32\x6E\x33\x7C\x0E\x21\xB1\xDB\xCB" + "\x89\x51\x23\x34\xAB\x9D\xBC\x6D\x20\xF5\x03\xA9\x91\x4C\x2E\x1F" + "\x59\xDB\xD9\x35\x67\x4B\x0C\x95\x79\x10\x00\x85\xA6\x96\x95\x2E" + "\xDF\x78\x7B\x4A\x5C\x09\x76\x97\xD1\x5C\x96\x12\x75\x35\xA3\x55" + "\x4A\xD4\x0B\x00\x35\x0B\x71\xB5\xC0\x2A\x5C\xE6\x08\x45\xF1\x39" + "\x43\xF1\x1C\x4B\x54\x10\x9D\x31\x50\x85\x4B\x54\x0E\x01\x4B\x3D" + "\x01\xD2\x2F\x21\x80"; + +const char* const EXPECTED = + "snowden is snowed in / he's now then in his snow den / when does the snow end?\n" + "goodbye little dog / you dug some holes in your day / they'll be hard to fill.\n" + "when life shuts a door, / just open it. it’s a door. / that is how doors work.\n" + + "snowden is snowed in / he's now then in his snow den / when does the snow end?\n" + "goodbye little dog / you dug some holes in your day / they'll be hard to fill.\n" + "when life shuts a door, / just open it. it’s a door. / that is how doors work.\n" + + "snowden is snowed in / he's now then in his snow den / when does the snow end?\n" + "goodbye little dog / you dug some holes in your day / they'll be hard to fill.\n" + "when life shuts a door, / just open it. it’s a door. / that is how doors work.\n" + + "snowden is snowed in / he's now then in his snow den / when does the snow end?\n" + "goodbye little dog / you dug some holes in your day / they'll be hard to fill.\n" + "when life shuts a door, / just open it. it’s a door. / that is how doors work.\n" + + "snowden is snowed in / he's now then in his snow den / when does the snow end?\n" + "goodbye little dog / you dug some holes in your day / they'll be hard to fill.\n" + "when life shuts a door, / just open it. it’s a door. / that is how doors work.\n"; diff --git a/src/zstd/tests/longmatch.c b/src/zstd/tests/longmatch.c new file mode 100644 index 00000000..ed386157 --- /dev/null +++ b/src/zstd/tests/longmatch.c @@ -0,0 +1,101 @@ +/* + * Copyright (c) 2017-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + +#include <stdio.h> +#include <stddef.h> +#include <stdlib.h> +#include <stdint.h> +#include "mem.h" +#define ZSTD_STATIC_LINKING_ONLY +#include "zstd.h" + +int compress(ZSTD_CStream *ctx, ZSTD_outBuffer out, const void *data, size_t size) { + ZSTD_inBuffer in = { data, size, 0 }; + while (in.pos < in.size) { + ZSTD_outBuffer tmp = out; + const size_t rc = ZSTD_compressStream(ctx, &tmp, &in); + if (ZSTD_isError(rc)) { + return 1; + } + } + { + ZSTD_outBuffer tmp = out; + const size_t rc = ZSTD_flushStream(ctx, &tmp); + if (rc != 0) { return 1; } + } + return 0; +} + +int main(int argc, const char** argv) { + ZSTD_CStream *ctx; + ZSTD_parameters params; + size_t rc; + unsigned windowLog; + (void)argc; + (void)argv; + /* Create stream */ + ctx = ZSTD_createCStream(); + if (!ctx) { return 1; } + /* Set parameters */ + memset(¶ms, 0, sizeof(params)); + params.cParams.windowLog = 18; + params.cParams.chainLog = 13; + params.cParams.hashLog = 14; + params.cParams.searchLog = 1; + params.cParams.searchLength = 7; + params.cParams.targetLength = 16; + params.cParams.strategy = ZSTD_fast; + windowLog = params.cParams.windowLog; + /* Initialize stream */ + rc = ZSTD_initCStream_advanced(ctx, NULL, 0, params, 0); + if (ZSTD_isError(rc)) { return 2; } + { + U64 compressed = 0; + const U64 toCompress = ((U64)1) << 33; + const size_t size = 1 << windowLog; + size_t pos = 0; + char *srcBuffer = (char*) malloc(1 << windowLog); + char *dstBuffer = (char*) malloc(ZSTD_compressBound(1 << windowLog)); + ZSTD_outBuffer out = { dstBuffer, ZSTD_compressBound(1 << windowLog), 0 }; + const char match[] = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"; + const size_t randomData = (1 << windowLog) - 2*sizeof(match); + size_t i; + printf("\n === Long Match Test === \n"); + printf("Creating random data to produce long matches \n"); + for (i = 0; i < sizeof(match); ++i) { + srcBuffer[i] = match[i]; + } + for (i = 0; i < randomData; ++i) { + srcBuffer[sizeof(match) + i] = (char)(rand() & 0xFF); + } + for (i = 0; i < sizeof(match); ++i) { + srcBuffer[sizeof(match) + randomData + i] = match[i]; + } + printf("Compressing, trying to generate a segfault \n"); + if (compress(ctx, out, srcBuffer, size)) { + return 1; + } + compressed += size; + while (compressed < toCompress) { + const size_t block = rand() % (size - pos + 1); + if (pos == size) { pos = 0; } + if (compress(ctx, out, srcBuffer + pos, block)) { + return 1; + } + pos += block; + compressed += block; + } + printf("Compression completed successfully (no error triggered)\n"); + free(srcBuffer); + free(dstBuffer); + } + return 0; +} diff --git a/src/zstd/tests/namespaceTest.c b/src/zstd/tests/namespaceTest.c new file mode 100644 index 00000000..5b7095f6 --- /dev/null +++ b/src/zstd/tests/namespaceTest.c @@ -0,0 +1,24 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + + +#include <stddef.h> /* size_t */ +#include <string.h> /* strlen */ + +/* symbol definition */ +extern unsigned XXH32(const void* src, size_t srcSize, unsigned seed); + +int main(int argc, const char** argv) +{ + const char* exename = argv[0]; + unsigned result = XXH32(exename, strlen(exename), argc); + return !result; +} diff --git a/src/zstd/tests/paramgrill.c b/src/zstd/tests/paramgrill.c new file mode 100644 index 00000000..317ec461 --- /dev/null +++ b/src/zstd/tests/paramgrill.c @@ -0,0 +1,1049 @@ +/* + * Copyright (c) 2015-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + +/*-************************************ +* Dependencies +**************************************/ +#include "util.h" /* Compiler options, UTIL_GetFileSize */ +#include <stdlib.h> /* malloc */ +#include <stdio.h> /* fprintf, fopen, ftello64 */ +#include <string.h> /* strcmp */ +#include <math.h> /* log */ +#include <time.h> /* clock_t */ + +#include "mem.h" +#define ZSTD_STATIC_LINKING_ONLY /* ZSTD_parameters, ZSTD_estimateCCtxSize */ +#include "zstd.h" +#include "datagen.h" +#include "xxhash.h" + + +/*-************************************ +* Constants +**************************************/ +#define PROGRAM_DESCRIPTION "ZSTD parameters tester" +#define AUTHOR "Yann Collet" +#define WELCOME_MESSAGE "*** %s %s %i-bits, by %s (%s) ***\n", PROGRAM_DESCRIPTION, ZSTD_VERSION_STRING, (int)(sizeof(void*)*8), AUTHOR, __DATE__ + + +#define KB *(1<<10) +#define MB *(1<<20) +#define GB *(1ULL<<30) + +#define NBLOOPS 2 +#define TIMELOOP (2 * CLOCKS_PER_SEC) + +#define NB_LEVELS_TRACKED 30 + +static const size_t maxMemory = (sizeof(size_t)==4) ? (2 GB - 64 MB) : (size_t)(1ULL << ((sizeof(size_t)*8)-31)); + +#define COMPRESSIBILITY_DEFAULT 0.50 +static const size_t sampleSize = 10000000; + +static const double g_grillDuration_s = 90000; /* about 24 hours */ +static const clock_t g_maxParamTime = 15 * CLOCKS_PER_SEC; +static const clock_t g_maxVariationTime = 60 * CLOCKS_PER_SEC; +static const int g_maxNbVariations = 64; + + +/*-************************************ +* Macros +**************************************/ +#define DISPLAY(...) fprintf(stderr, __VA_ARGS__) + +#undef MIN +#undef MAX +#define MIN(a,b) ( (a) < (b) ? (a) : (b) ) +#define MAX(a,b) ( (a) > (b) ? (a) : (b) ) + + +/*-************************************ +* Benchmark Parameters +**************************************/ +static U32 g_nbIterations = NBLOOPS; +static double g_compressibility = COMPRESSIBILITY_DEFAULT; +static U32 g_blockSize = 0; +static U32 g_rand = 1; +static U32 g_singleRun = 0; +static U32 g_target = 0; +static U32 g_noSeed = 0; +static ZSTD_compressionParameters g_params = { 0, 0, 0, 0, 0, 0, ZSTD_greedy }; + +void BMK_SetNbIterations(int nbLoops) +{ + g_nbIterations = nbLoops; + DISPLAY("- %u iterations -\n", g_nbIterations); +} + + +/*-******************************************************* +* Private functions +*********************************************************/ + +/* works even if overflow ; max span ~ 30 mn */ +static clock_t BMK_clockSpan(clock_t cStart) { return clock() - cStart; } + +/* accuracy in seconds only, span can be multiple years */ +static double BMK_timeSpan(time_t tStart) { return difftime(time(NULL), tStart); } + + +static size_t BMK_findMaxMem(U64 requiredMem) +{ + size_t const step = 64 MB; + void* testmem = NULL; + + requiredMem = (((requiredMem >> 26) + 1) << 26); + if (requiredMem > maxMemory) requiredMem = maxMemory; + + requiredMem += 2*step; + while (!testmem) { + requiredMem -= step; + testmem = malloc ((size_t)requiredMem); + } + + free (testmem); + return (size_t) (requiredMem - step); +} + + +static U32 FUZ_rotl32(U32 x, U32 r) +{ + return ((x << r) | (x >> (32 - r))); +} + +U32 FUZ_rand(U32* src) +{ + const U32 prime1 = 2654435761U; + const U32 prime2 = 2246822519U; + U32 rand32 = *src; + rand32 *= prime1; + rand32 += prime2; + rand32 = FUZ_rotl32(rand32, 13); + *src = rand32; + return rand32 >> 5; +} + + +/*-******************************************************* +* Bench functions +*********************************************************/ +typedef struct { + size_t cSize; + double cSpeed; /* bytes / sec */ + double dSpeed; +} BMK_result_t; + +typedef struct +{ + const char* srcPtr; + size_t srcSize; + char* cPtr; + size_t cRoom; + size_t cSize; + char* resPtr; + size_t resSize; +} blockParam_t; + + +static size_t BMK_benchParam(BMK_result_t* resultPtr, + const void* srcBuffer, size_t srcSize, + ZSTD_CCtx* ctx, + const ZSTD_compressionParameters cParams) +{ + const size_t blockSize = g_blockSize ? g_blockSize : srcSize; + const U32 nbBlocks = (U32) ((srcSize + (blockSize-1)) / blockSize); + blockParam_t* const blockTable = (blockParam_t*) malloc(nbBlocks * sizeof(blockParam_t)); + const size_t maxCompressedSize = (size_t)nbBlocks * ZSTD_compressBound(blockSize); + void* const compressedBuffer = malloc(maxCompressedSize); + void* const resultBuffer = malloc(srcSize); + ZSTD_parameters params; + U32 Wlog = cParams.windowLog; + U32 Clog = cParams.chainLog; + U32 Hlog = cParams.hashLog; + U32 Slog = cParams.searchLog; + U32 Slength = cParams.searchLength; + U32 Tlength = cParams.targetLength; + ZSTD_strategy strat = cParams.strategy; + char name[30] = { 0 }; + U64 crcOrig; + + /* init result for early exit */ + resultPtr->cSize = srcSize; + resultPtr->cSpeed = 0.; + resultPtr->dSpeed = 0.; + + /* Memory allocation & restrictions */ + snprintf(name, 30, "Sw%02uc%02uh%02us%02ul%1ut%03uS%1u", Wlog, Clog, Hlog, Slog, Slength, Tlength, strat); + if (!compressedBuffer || !resultBuffer || !blockTable) { + DISPLAY("\nError: not enough memory!\n"); + free(compressedBuffer); + free(resultBuffer); + free(blockTable); + return 12; + } + + /* Calculating input Checksum */ + crcOrig = XXH64(srcBuffer, srcSize, 0); + + /* Init blockTable data */ + { + U32 i; + size_t remaining = srcSize; + const char* srcPtr = (const char*)srcBuffer; + char* cPtr = (char*)compressedBuffer; + char* resPtr = (char*)resultBuffer; + for (i=0; i<nbBlocks; i++) { + size_t thisBlockSize = MIN(remaining, blockSize); + blockTable[i].srcPtr = srcPtr; + blockTable[i].cPtr = cPtr; + blockTable[i].resPtr = resPtr; + blockTable[i].srcSize = thisBlockSize; + blockTable[i].cRoom = ZSTD_compressBound(thisBlockSize); + srcPtr += thisBlockSize; + cPtr += blockTable[i].cRoom; + resPtr += thisBlockSize; + remaining -= thisBlockSize; + } } + + /* warmimg up memory */ + RDG_genBuffer(compressedBuffer, maxCompressedSize, 0.10, 0.10, 1); + + /* Bench */ + { U32 loopNb; + size_t cSize = 0; + double fastestC = 100000000., fastestD = 100000000.; + double ratio = 0.; + clock_t const benchStart = clock(); + + DISPLAY("\r%79s\r", ""); + memset(¶ms, 0, sizeof(params)); + params.cParams = cParams; + for (loopNb = 1; loopNb <= g_nbIterations; loopNb++) { + int nbLoops; + U32 blockNb; + clock_t roundStart, roundClock; + + { clock_t const benchTime = BMK_clockSpan(benchStart); + if (benchTime > g_maxParamTime) break; } + + /* Compression */ + DISPLAY("\r%1u-%s : %9u ->", loopNb, name, (U32)srcSize); + memset(compressedBuffer, 0xE5, maxCompressedSize); + + nbLoops = 0; + roundStart = clock(); + while (clock() == roundStart); + roundStart = clock(); + while (BMK_clockSpan(roundStart) < TIMELOOP) { + for (blockNb=0; blockNb<nbBlocks; blockNb++) + blockTable[blockNb].cSize = ZSTD_compress_advanced(ctx, + blockTable[blockNb].cPtr, blockTable[blockNb].cRoom, + blockTable[blockNb].srcPtr, blockTable[blockNb].srcSize, + NULL, 0, + params); + nbLoops++; + } + roundClock = BMK_clockSpan(roundStart); + + cSize = 0; + for (blockNb=0; blockNb<nbBlocks; blockNb++) + cSize += blockTable[blockNb].cSize; + ratio = (double)srcSize / (double)cSize; + if ((double)roundClock < fastestC * CLOCKS_PER_SEC * nbLoops) fastestC = ((double)roundClock / CLOCKS_PER_SEC) / nbLoops; + DISPLAY("\r"); + DISPLAY("%1u-%s : %9u ->", loopNb, name, (U32)srcSize); + DISPLAY(" %9u (%4.3f),%7.1f MB/s", (U32)cSize, ratio, (double)srcSize / fastestC / 1000000.); + resultPtr->cSize = cSize; + resultPtr->cSpeed = (double)srcSize / fastestC; + +#if 1 + /* Decompression */ + memset(resultBuffer, 0xD6, srcSize); + + nbLoops = 0; + roundStart = clock(); + while (clock() == roundStart); + roundStart = clock(); + for ( ; BMK_clockSpan(roundStart) < TIMELOOP; nbLoops++) { + for (blockNb=0; blockNb<nbBlocks; blockNb++) + blockTable[blockNb].resSize = ZSTD_decompress(blockTable[blockNb].resPtr, blockTable[blockNb].srcSize, + blockTable[blockNb].cPtr, blockTable[blockNb].cSize); + } + roundClock = BMK_clockSpan(roundStart); + + if ((double)roundClock < fastestD * CLOCKS_PER_SEC * nbLoops) fastestD = ((double)roundClock / CLOCKS_PER_SEC) / nbLoops; + DISPLAY("\r"); + DISPLAY("%1u-%s : %9u -> ", loopNb, name, (U32)srcSize); + DISPLAY("%9u (%4.3f),%7.1f MB/s, ", (U32)cSize, ratio, (double)srcSize / fastestC / 1000000.); + DISPLAY("%7.1f MB/s", (double)srcSize / fastestD / 1000000.); + resultPtr->dSpeed = (double)srcSize / fastestD; + + /* CRC Checking */ + { U64 const crcCheck = XXH64(resultBuffer, srcSize, 0); + if (crcOrig!=crcCheck) { + unsigned u; + unsigned eBlockSize = (unsigned)(MIN(65536*2, blockSize)); + DISPLAY("\n!!! WARNING !!! Invalid Checksum : %x != %x\n", (unsigned)crcOrig, (unsigned)crcCheck); + for (u=0; u<srcSize; u++) { + if (((const BYTE*)srcBuffer)[u] != ((BYTE*)resultBuffer)[u]) { + printf("Decoding error at pos %u (block %u, pos %u) \n", u, u / eBlockSize, u % eBlockSize); + break; + } } + break; + } } +#endif + } } + + /* End cleaning */ + DISPLAY("\r"); + free(compressedBuffer); + free(resultBuffer); + return 0; +} + + +const char* g_stratName[] = { "ZSTD_fast ", + "ZSTD_dfast ", + "ZSTD_greedy ", + "ZSTD_lazy ", + "ZSTD_lazy2 ", + "ZSTD_btlazy2 ", + "ZSTD_btopt ", + "ZSTD_btultra "}; + +static void BMK_printWinner(FILE* f, U32 cLevel, BMK_result_t result, ZSTD_compressionParameters params, size_t srcSize) +{ + DISPLAY("\r%79s\r", ""); + fprintf(f," {%3u,%3u,%3u,%3u,%3u,%3u, %s }, ", + params.windowLog, params.chainLog, params.hashLog, params.searchLog, params.searchLength, + params.targetLength, g_stratName[(U32)(params.strategy)]); + fprintf(f, + "/* level %2u */ /* R:%5.3f at %5.1f MB/s - %5.1f MB/s */\n", + cLevel, (double)srcSize / result.cSize, result.cSpeed / 1000000., result.dSpeed / 1000000.); +} + + +static double g_cSpeedTarget[NB_LEVELS_TRACKED] = { 0. }; /* NB_LEVELS_TRACKED : checked at main() */ + +typedef struct { + BMK_result_t result; + ZSTD_compressionParameters params; +} winnerInfo_t; + +static void BMK_printWinners2(FILE* f, const winnerInfo_t* winners, size_t srcSize) +{ + int cLevel; + + fprintf(f, "\n /* Proposed configurations : */ \n"); + fprintf(f, " /* W, C, H, S, L, T, strat */ \n"); + + for (cLevel=0; cLevel <= ZSTD_maxCLevel(); cLevel++) + BMK_printWinner(f, cLevel, winners[cLevel].result, winners[cLevel].params, srcSize); +} + + +static void BMK_printWinners(FILE* f, const winnerInfo_t* winners, size_t srcSize) +{ + fseek(f, 0, SEEK_SET); + BMK_printWinners2(f, winners, srcSize); + fflush(f); + BMK_printWinners2(stdout, winners, srcSize); +} + +static int BMK_seed(winnerInfo_t* winners, const ZSTD_compressionParameters params, + const void* srcBuffer, size_t srcSize, + ZSTD_CCtx* ctx) +{ + BMK_result_t testResult; + int better = 0; + int cLevel; + + BMK_benchParam(&testResult, srcBuffer, srcSize, ctx, params); + + for (cLevel = 1; cLevel <= ZSTD_maxCLevel(); cLevel++) { + if (testResult.cSpeed < g_cSpeedTarget[cLevel]) + continue; /* not fast enough for this level */ + if (winners[cLevel].result.cSize==0) { + /* first solution for this cLevel */ + winners[cLevel].result = testResult; + winners[cLevel].params = params; + BMK_printWinner(stdout, cLevel, testResult, params, srcSize); + better = 1; + continue; + } + + if ((double)testResult.cSize <= ((double)winners[cLevel].result.cSize * (1. + (0.02 / cLevel))) ) { + /* Validate solution is "good enough" */ + double W_ratio = (double)srcSize / testResult.cSize; + double O_ratio = (double)srcSize / winners[cLevel].result.cSize; + double W_ratioNote = log (W_ratio); + double O_ratioNote = log (O_ratio); + size_t W_DMemUsed = (1 << params.windowLog) + (16 KB); + size_t O_DMemUsed = (1 << winners[cLevel].params.windowLog) + (16 KB); + double W_DMemUsed_note = W_ratioNote * ( 40 + 9*cLevel) - log((double)W_DMemUsed); + double O_DMemUsed_note = O_ratioNote * ( 40 + 9*cLevel) - log((double)O_DMemUsed); + + size_t W_CMemUsed = (1 << params.windowLog) + ZSTD_estimateCCtxSize_usingCParams(params); + size_t O_CMemUsed = (1 << winners[cLevel].params.windowLog) + ZSTD_estimateCCtxSize_usingCParams(winners[cLevel].params); + double W_CMemUsed_note = W_ratioNote * ( 50 + 13*cLevel) - log((double)W_CMemUsed); + double O_CMemUsed_note = O_ratioNote * ( 50 + 13*cLevel) - log((double)O_CMemUsed); + + double W_CSpeed_note = W_ratioNote * ( 30 + 10*cLevel) + log(testResult.cSpeed); + double O_CSpeed_note = O_ratioNote * ( 30 + 10*cLevel) + log(winners[cLevel].result.cSpeed); + + double W_DSpeed_note = W_ratioNote * ( 20 + 2*cLevel) + log(testResult.dSpeed); + double O_DSpeed_note = O_ratioNote * ( 20 + 2*cLevel) + log(winners[cLevel].result.dSpeed); + + if (W_DMemUsed_note < O_DMemUsed_note) { + /* uses too much Decompression memory for too little benefit */ + if (W_ratio > O_ratio) + DISPLAY ("Decompression Memory : %5.3f @ %4.1f MB vs %5.3f @ %4.1f MB : not enough for level %i\n", + W_ratio, (double)(W_DMemUsed) / 1024 / 1024, + O_ratio, (double)(O_DMemUsed) / 1024 / 1024, cLevel); + continue; + } + if (W_CMemUsed_note < O_CMemUsed_note) { + /* uses too much memory for compression for too little benefit */ + if (W_ratio > O_ratio) + DISPLAY ("Compression Memory : %5.3f @ %4.1f MB vs %5.3f @ %4.1f MB : not enough for level %i\n", + W_ratio, (double)(W_CMemUsed) / 1024 / 1024, + O_ratio, (double)(O_CMemUsed) / 1024 / 1024, cLevel); + continue; + } + if (W_CSpeed_note < O_CSpeed_note ) { + /* too large compression speed difference for the compression benefit */ + if (W_ratio > O_ratio) + DISPLAY ("Compression Speed : %5.3f @ %4.1f MB/s vs %5.3f @ %4.1f MB/s : not enough for level %i\n", + W_ratio, testResult.cSpeed / 1000000, + O_ratio, winners[cLevel].result.cSpeed / 1000000., cLevel); + continue; + } + if (W_DSpeed_note < O_DSpeed_note ) { + /* too large decompression speed difference for the compression benefit */ + if (W_ratio > O_ratio) + DISPLAY ("Decompression Speed : %5.3f @ %4.1f MB/s vs %5.3f @ %4.1f MB/s : not enough for level %i\n", + W_ratio, testResult.dSpeed / 1000000., + O_ratio, winners[cLevel].result.dSpeed / 1000000., cLevel); + continue; + } + + if (W_ratio < O_ratio) + DISPLAY("Solution %4.3f selected over %4.3f at level %i, due to better secondary statistics \n", W_ratio, O_ratio, cLevel); + + winners[cLevel].result = testResult; + winners[cLevel].params = params; + BMK_printWinner(stdout, cLevel, testResult, params, srcSize); + + better = 1; + } } + + return better; +} + + +/* nullified useless params, to ensure count stats */ +static ZSTD_compressionParameters* sanitizeParams(ZSTD_compressionParameters params) +{ + g_params = params; + if (params.strategy == ZSTD_fast) + g_params.chainLog = 0, g_params.searchLog = 0; + if (params.strategy == ZSTD_dfast) + g_params.searchLog = 0; + if (params.strategy != ZSTD_btopt && params.strategy != ZSTD_btultra) + g_params.targetLength = 0; + return &g_params; +} + + +static void paramVariation(ZSTD_compressionParameters* ptr) +{ + ZSTD_compressionParameters p; + U32 validated = 0; + while (!validated) { + U32 nbChanges = (FUZ_rand(&g_rand) & 3) + 1; + p = *ptr; + for ( ; nbChanges ; nbChanges--) { + const U32 changeID = FUZ_rand(&g_rand) % 14; + switch(changeID) + { + case 0: + p.chainLog++; break; + case 1: + p.chainLog--; break; + case 2: + p.hashLog++; break; + case 3: + p.hashLog--; break; + case 4: + p.searchLog++; break; + case 5: + p.searchLog--; break; + case 6: + p.windowLog++; break; + case 7: + p.windowLog--; break; + case 8: + p.searchLength++; break; + case 9: + p.searchLength--; break; + case 10: + p.strategy = (ZSTD_strategy)(((U32)p.strategy)+1); break; + case 11: + p.strategy = (ZSTD_strategy)(((U32)p.strategy)-1); break; + case 12: + p.targetLength *= 1 + ((double)(FUZ_rand(&g_rand)&255)) / 256.; break; + case 13: + p.targetLength /= 1 + ((double)(FUZ_rand(&g_rand)&255)) / 256.; break; + } + } + validated = !ZSTD_isError(ZSTD_checkCParams(p)); + } + *ptr = p; +} + + +#define PARAMTABLELOG 25 +#define PARAMTABLESIZE (1<<PARAMTABLELOG) +#define PARAMTABLEMASK (PARAMTABLESIZE-1) +static BYTE g_alreadyTested[PARAMTABLESIZE] = {0}; /* init to zero */ + +#define NB_TESTS_PLAYED(p) \ + g_alreadyTested[(XXH64(sanitizeParams(p), sizeof(p), 0) >> 3) & PARAMTABLEMASK] + + +static void playAround(FILE* f, winnerInfo_t* winners, + ZSTD_compressionParameters params, + const void* srcBuffer, size_t srcSize, + ZSTD_CCtx* ctx) +{ + int nbVariations = 0; + clock_t const clockStart = clock(); + + while (BMK_clockSpan(clockStart) < g_maxVariationTime) { + ZSTD_compressionParameters p = params; + + if (nbVariations++ > g_maxNbVariations) break; + paramVariation(&p); + + /* exclude faster if already played params */ + if (FUZ_rand(&g_rand) & ((1 << NB_TESTS_PLAYED(p))-1)) + continue; + + /* test */ + NB_TESTS_PLAYED(p)++; + if (!BMK_seed(winners, p, srcBuffer, srcSize, ctx)) continue; + + /* improvement found => search more */ + BMK_printWinners(f, winners, srcSize); + playAround(f, winners, p, srcBuffer, srcSize, ctx); + } + +} + + +static ZSTD_compressionParameters randomParams(void) +{ + ZSTD_compressionParameters p; + U32 validated = 0; + while (!validated) { + /* totally random entry */ + p.chainLog = FUZ_rand(&g_rand) % (ZSTD_CHAINLOG_MAX+1 - ZSTD_CHAINLOG_MIN) + ZSTD_CHAINLOG_MIN; + p.hashLog = FUZ_rand(&g_rand) % (ZSTD_HASHLOG_MAX+1 - ZSTD_HASHLOG_MIN) + ZSTD_HASHLOG_MIN; + p.searchLog = FUZ_rand(&g_rand) % (ZSTD_SEARCHLOG_MAX+1 - ZSTD_SEARCHLOG_MIN) + ZSTD_SEARCHLOG_MIN; + p.windowLog = FUZ_rand(&g_rand) % (ZSTD_WINDOWLOG_MAX+1 - ZSTD_WINDOWLOG_MIN) + ZSTD_WINDOWLOG_MIN; + p.searchLength=FUZ_rand(&g_rand) % (ZSTD_SEARCHLENGTH_MAX+1 - ZSTD_SEARCHLENGTH_MIN) + ZSTD_SEARCHLENGTH_MIN; + p.targetLength=FUZ_rand(&g_rand) % (ZSTD_TARGETLENGTH_MAX+1 - ZSTD_TARGETLENGTH_MIN) + ZSTD_TARGETLENGTH_MIN; + p.strategy = (ZSTD_strategy) (FUZ_rand(&g_rand) % (ZSTD_btultra +1)); + validated = !ZSTD_isError(ZSTD_checkCParams(p)); + } + return p; +} + +static void BMK_selectRandomStart( + FILE* f, winnerInfo_t* winners, + const void* srcBuffer, size_t srcSize, + ZSTD_CCtx* ctx) +{ + U32 const id = (FUZ_rand(&g_rand) % (ZSTD_maxCLevel()+1)); + if ((id==0) || (winners[id].params.windowLog==0)) { + /* totally random entry */ + ZSTD_compressionParameters const p = ZSTD_adjustCParams(randomParams(), srcSize, 0); + playAround(f, winners, p, srcBuffer, srcSize, ctx); + } + else + playAround(f, winners, winners[id].params, srcBuffer, srcSize, ctx); +} + + +static void BMK_benchMem(void* srcBuffer, size_t srcSize) +{ + ZSTD_CCtx* const ctx = ZSTD_createCCtx(); + ZSTD_compressionParameters params; + winnerInfo_t winners[NB_LEVELS_TRACKED]; + const char* const rfName = "grillResults.txt"; + FILE* const f = fopen(rfName, "w"); + const size_t blockSize = g_blockSize ? g_blockSize : srcSize; + + /* init */ + if (ctx==NULL) { DISPLAY("ZSTD_createCCtx() failed \n"); exit(1); } + memset(winners, 0, sizeof(winners)); + if (f==NULL) { DISPLAY("error opening %s \n", rfName); exit(1); } + + if (g_singleRun) { + BMK_result_t testResult; + g_params = ZSTD_adjustCParams(g_params, srcSize, 0); + BMK_benchParam(&testResult, srcBuffer, srcSize, ctx, g_params); + DISPLAY("\n"); + return; + } + + if (g_target) + g_cSpeedTarget[1] = g_target * 1000000; + else { + /* baseline config for level 1 */ + BMK_result_t testResult; + params = ZSTD_getCParams(1, blockSize, 0); + BMK_benchParam(&testResult, srcBuffer, srcSize, ctx, params); + g_cSpeedTarget[1] = (testResult.cSpeed * 31) / 32; + } + + /* establish speed objectives (relative to level 1) */ + { int i; + for (i=2; i<=ZSTD_maxCLevel(); i++) + g_cSpeedTarget[i] = (g_cSpeedTarget[i-1] * 25) / 32; + } + + /* populate initial solution */ + { const int maxSeeds = g_noSeed ? 1 : ZSTD_maxCLevel(); + int i; + for (i=0; i<=maxSeeds; i++) { + params = ZSTD_getCParams(i, blockSize, 0); + BMK_seed(winners, params, srcBuffer, srcSize, ctx); + } } + BMK_printWinners(f, winners, srcSize); + + /* start tests */ + { const time_t grillStart = time(NULL); + do { + BMK_selectRandomStart(f, winners, srcBuffer, srcSize, ctx); + } while (BMK_timeSpan(grillStart) < g_grillDuration_s); + } + + /* end summary */ + BMK_printWinners(f, winners, srcSize); + DISPLAY("grillParams operations completed \n"); + + /* clean up*/ + fclose(f); + ZSTD_freeCCtx(ctx); +} + + +static int benchSample(void) +{ + void* origBuff; + size_t const benchedSize = sampleSize; + const char* const name = "Sample 10MiB"; + + /* Allocation */ + origBuff = malloc(benchedSize); + if (!origBuff) { DISPLAY("\nError: not enough memory!\n"); return 12; } + + /* Fill buffer */ + RDG_genBuffer(origBuff, benchedSize, g_compressibility, 0.0, 0); + + /* bench */ + DISPLAY("\r%79s\r", ""); + DISPLAY("using %s %i%%: \n", name, (int)(g_compressibility*100)); + BMK_benchMem(origBuff, benchedSize); + + free(origBuff); + return 0; +} + + +int benchFiles(const char** fileNamesTable, int nbFiles) +{ + int fileIdx=0; + + /* Loop for each file */ + while (fileIdx<nbFiles) { + const char* const inFileName = fileNamesTable[fileIdx++]; + FILE* const inFile = fopen( inFileName, "rb" ); + U64 const inFileSize = UTIL_getFileSize(inFileName); + size_t benchedSize; + void* origBuff; + + /* Check file existence */ + if (inFile==NULL) { + DISPLAY( "Pb opening %s\n", inFileName); + return 11; + } + + /* Memory allocation */ + benchedSize = BMK_findMaxMem(inFileSize*3) / 3; + if ((U64)benchedSize > inFileSize) benchedSize = (size_t)inFileSize; + if (benchedSize < inFileSize) + DISPLAY("Not enough memory for '%s' full size; testing %i MB only...\n", inFileName, (int)(benchedSize>>20)); + origBuff = malloc(benchedSize); + if (origBuff==NULL) { + DISPLAY("\nError: not enough memory!\n"); + fclose(inFile); + return 12; + } + + /* Fill input buffer */ + DISPLAY("Loading %s... \r", inFileName); + { size_t const readSize = fread(origBuff, 1, benchedSize, inFile); + fclose(inFile); + if(readSize != benchedSize) { + DISPLAY("\nError: problem reading file '%s' !! \n", inFileName); + free(origBuff); + return 13; + } } + + /* bench */ + DISPLAY("\r%79s\r", ""); + DISPLAY("using %s : \n", inFileName); + BMK_benchMem(origBuff, benchedSize); + + /* clean */ + free(origBuff); + } + + return 0; +} + + +static void BMK_translateAdvancedParams(ZSTD_compressionParameters params) +{ + DISPLAY("--zstd=windowLog=%u,chainLog=%u,hashLog=%u,searchLog=%u,searchLength=%u,targetLength=%u,strategy=%u \n", + params.windowLog, params.chainLog, params.hashLog, params.searchLog, params.searchLength, params.targetLength, (U32)(params.strategy)); +} + +/* optimizeForSize(): + * targetSpeed : expressed in MB/s */ +int optimizeForSize(const char* inFileName, U32 targetSpeed) +{ + FILE* const inFile = fopen( inFileName, "rb" ); + U64 const inFileSize = UTIL_getFileSize(inFileName); + size_t benchedSize = BMK_findMaxMem(inFileSize*3) / 3; + void* origBuff; + + /* Init */ + if (inFile==NULL) { DISPLAY( "Pb opening %s\n", inFileName); return 11; } + + /* Memory allocation & restrictions */ + if ((U64)benchedSize > inFileSize) benchedSize = (size_t)inFileSize; + if (benchedSize < inFileSize) { + DISPLAY("Not enough memory for '%s' \n", inFileName); + fclose(inFile); + return 11; + } + + /* Alloc */ + origBuff = malloc(benchedSize); + if(!origBuff) { + DISPLAY("\nError: not enough memory!\n"); + fclose(inFile); + return 12; + } + + /* Fill input buffer */ + DISPLAY("Loading %s... \r", inFileName); + { size_t const readSize = fread(origBuff, 1, benchedSize, inFile); + fclose(inFile); + if(readSize != benchedSize) { + DISPLAY("\nError: problem reading file '%s' !! \n", inFileName); + free(origBuff); + return 13; + } } + + /* bench */ + DISPLAY("\r%79s\r", ""); + DISPLAY("optimizing for %s - limit speed %u MB/s \n", inFileName, targetSpeed); + targetSpeed *= 1000000; + + { ZSTD_CCtx* const ctx = ZSTD_createCCtx(); + winnerInfo_t winner; + BMK_result_t candidate; + const size_t blockSize = g_blockSize ? g_blockSize : benchedSize; + + /* init */ + if (ctx==NULL) { DISPLAY("\n ZSTD_createCCtx error \n"); free(origBuff); return 14;} + memset(&winner, 0, sizeof(winner)); + winner.result.cSize = (size_t)(-1); + + /* find best solution from default params */ + { const int maxSeeds = g_noSeed ? 1 : ZSTD_maxCLevel(); + int i; + for (i=1; i<=maxSeeds; i++) { + ZSTD_compressionParameters const CParams = ZSTD_getCParams(i, blockSize, 0); + BMK_benchParam(&candidate, origBuff, benchedSize, ctx, CParams); + if (candidate.cSpeed < targetSpeed) + break; + if ( (candidate.cSize < winner.result.cSize) + | ((candidate.cSize == winner.result.cSize) & (candidate.cSpeed > winner.result.cSpeed)) ) + { + winner.params = CParams; + winner.result = candidate; + BMK_printWinner(stdout, i, winner.result, winner.params, benchedSize); + } } + } + BMK_printWinner(stdout, 99, winner.result, winner.params, benchedSize); + BMK_translateAdvancedParams(winner.params); + + /* start tests */ + { time_t const grillStart = time(NULL); + do { + ZSTD_compressionParameters params = winner.params; + paramVariation(¶ms); + if ((FUZ_rand(&g_rand) & 31) == 3) params = randomParams(); /* totally random config to improve search space */ + params = ZSTD_adjustCParams(params, blockSize, 0); + + /* exclude faster if already played set of params */ + if (FUZ_rand(&g_rand) & ((1 << NB_TESTS_PLAYED(params))-1)) continue; + + /* test */ + NB_TESTS_PLAYED(params)++; + BMK_benchParam(&candidate, origBuff, benchedSize, ctx, params); + + /* improvement found => new winner */ + if ( (candidate.cSpeed > targetSpeed) + & ( (candidate.cSize < winner.result.cSize) + | ((candidate.cSize == winner.result.cSize) & (candidate.cSpeed > winner.result.cSpeed)) ) ) + { + winner.params = params; + winner.result = candidate; + BMK_printWinner(stdout, 99, winner.result, winner.params, benchedSize); + BMK_translateAdvancedParams(winner.params); + } + } while (BMK_timeSpan(grillStart) < g_grillDuration_s); + } + + /* end summary */ + BMK_printWinner(stdout, 99, winner.result, winner.params, benchedSize); + DISPLAY("grillParams size - optimizer completed \n"); + + /* clean up*/ + ZSTD_freeCCtx(ctx); + } + + free(origBuff); + return 0; +} + + +static int usage(const char* exename) +{ + DISPLAY( "Usage :\n"); + DISPLAY( " %s [arg] file\n", exename); + DISPLAY( "Arguments :\n"); + DISPLAY( " file : path to the file used as reference (if none, generates a compressible sample)\n"); + DISPLAY( " -H/-h : Help (this text + advanced options)\n"); + return 0; +} + +static int usage_advanced(void) +{ + DISPLAY( "\nAdvanced options :\n"); + DISPLAY( " -T# : set level 1 speed objective \n"); + DISPLAY( " -B# : cut input into blocks of size # (default : single block) \n"); + DISPLAY( " -i# : iteration loops [1-9](default : %i) \n", NBLOOPS); + DISPLAY( " -O# : find Optimized parameters for # MB/s compression speed (default : 0) \n"); + DISPLAY( " -S : Single run \n"); + DISPLAY( " -P# : generated sample compressibility (default : %.1f%%) \n", COMPRESSIBILITY_DEFAULT * 100); + return 0; +} + +static int badusage(const char* exename) +{ + DISPLAY("Wrong parameters\n"); + usage(exename); + return 1; +} + +int main(int argc, const char** argv) +{ + int i, + filenamesStart=0, + result; + const char* exename=argv[0]; + const char* input_filename=0; + U32 optimizer = 0; + U32 main_pause = 0; + U32 targetSpeed = 0; + + /* checks */ + if (NB_LEVELS_TRACKED <= ZSTD_maxCLevel()) { + DISPLAY("Error : NB_LEVELS_TRACKED <= ZSTD_maxCLevel() \n"); + exit(1); + } + + /* Welcome message */ + DISPLAY(WELCOME_MESSAGE); + + if (argc<1) { badusage(exename); return 1; } + + for(i=1; i<argc; i++) { + const char* argument = argv[i]; + + if(!argument) continue; /* Protection if argument empty */ + + if(!strcmp(argument,"--no-seed")) { g_noSeed = 1; continue; } + + /* Decode command (note : aggregated commands are allowed) */ + if (argument[0]=='-') { + argument++; + + while (argument[0]!=0) { + + switch(argument[0]) + { + /* Display help on usage */ + case 'h' : + case 'H': usage(exename); usage_advanced(); return 0; + + /* Pause at the end (hidden option) */ + case 'p': main_pause = 1; argument++; break; + + /* Modify Nb Iterations */ + case 'i': + argument++; + if ((argument[0] >='0') & (argument[0] <='9')) + g_nbIterations = *argument++ - '0'; + break; + + /* Sample compressibility (when no file provided) */ + case 'P': + argument++; + { U32 proba32 = 0; + while ((argument[0]>= '0') & (argument[0]<= '9')) + proba32 = (proba32*10) + (*argument++ - '0'); + g_compressibility = (double)proba32 / 100.; + } + break; + + case 'O': + argument++; + optimizer=1; + targetSpeed = 0; + while ((*argument >= '0') & (*argument <= '9')) + targetSpeed = (targetSpeed*10) + (*argument++ - '0'); + break; + + /* Run Single conf */ + case 'S': + g_singleRun = 1; + argument++; + g_params = ZSTD_getCParams(2, g_blockSize, 0); + for ( ; ; ) { + switch(*argument) + { + case 'w': + g_params.windowLog = 0; + argument++; + while ((*argument>= '0') && (*argument<='9')) + g_params.windowLog *= 10, g_params.windowLog += *argument++ - '0'; + continue; + case 'c': + g_params.chainLog = 0; + argument++; + while ((*argument>= '0') && (*argument<='9')) + g_params.chainLog *= 10, g_params.chainLog += *argument++ - '0'; + continue; + case 'h': + g_params.hashLog = 0; + argument++; + while ((*argument>= '0') && (*argument<='9')) + g_params.hashLog *= 10, g_params.hashLog += *argument++ - '0'; + continue; + case 's': + g_params.searchLog = 0; + argument++; + while ((*argument>= '0') && (*argument<='9')) + g_params.searchLog *= 10, g_params.searchLog += *argument++ - '0'; + continue; + case 'l': /* search length */ + g_params.searchLength = 0; + argument++; + while ((*argument>= '0') && (*argument<='9')) + g_params.searchLength *= 10, g_params.searchLength += *argument++ - '0'; + continue; + case 't': /* target length */ + g_params.targetLength = 0; + argument++; + while ((*argument>= '0') && (*argument<='9')) + g_params.targetLength *= 10, g_params.targetLength += *argument++ - '0'; + continue; + case 'S': /* strategy */ + argument++; + while ((*argument>= '0') && (*argument<='9')) + g_params.strategy = (ZSTD_strategy)(*argument++ - '0'); + continue; + case 'L': + { int cLevel = 0; + argument++; + while ((*argument>= '0') && (*argument<='9')) + cLevel *= 10, cLevel += *argument++ - '0'; + g_params = ZSTD_getCParams(cLevel, g_blockSize, 0); + continue; + } + default : ; + } + break; + } + break; + + /* target level1 speed objective, in MB/s */ + case 'T': + argument++; + g_target = 0; + while ((*argument >= '0') && (*argument <= '9')) + g_target = (g_target*10) + (*argument++ - '0'); + break; + + /* cut input into blocks */ + case 'B': + g_blockSize = 0; + argument++; + while ((*argument >='0') & (*argument <='9')) + g_blockSize = (g_blockSize*10) + (*argument++ - '0'); + if (*argument=='K') g_blockSize<<=10, argument++; /* allows using KB notation */ + if (*argument=='M') g_blockSize<<=20, argument++; + if (*argument=='B') argument++; + DISPLAY("using %u KB block size \n", g_blockSize>>10); + break; + + /* Unknown command */ + default : return badusage(exename); + } + } + continue; + } /* if (argument[0]=='-') */ + + /* first provided filename is input */ + if (!input_filename) { input_filename=argument; filenamesStart=i; continue; } + } + + if (filenamesStart==0) + result = benchSample(); + else { + if (optimizer) + result = optimizeForSize(input_filename, targetSpeed); + else + result = benchFiles(argv+filenamesStart, argc-filenamesStart); + } + + if (main_pause) { int unused; printf("press enter...\n"); unused = getchar(); (void)unused; } + + return result; +} diff --git a/src/zstd/tests/playTests.sh b/src/zstd/tests/playTests.sh new file mode 100755 index 00000000..bc021648 --- /dev/null +++ b/src/zstd/tests/playTests.sh @@ -0,0 +1,710 @@ +#!/bin/sh -e + +die() { + $ECHO "$@" 1>&2 + exit 1 +} + +roundTripTest() { + if [ -n "$3" ]; then + cLevel="$3" + proba="$2" + else + cLevel="$2" + proba="" + fi + if [ -n "$4" ]; then + dLevel="$4" + else + dLevel="$cLevel" + fi + + rm -f tmp1 tmp2 + $ECHO "roundTripTest: ./datagen $1 $proba | $ZSTD -v$cLevel | $ZSTD -d$dLevel" + ./datagen $1 $proba | $MD5SUM > tmp1 + ./datagen $1 $proba | $ZSTD --ultra -v$cLevel | $ZSTD -d$dLevel | $MD5SUM > tmp2 + $DIFF -q tmp1 tmp2 +} + +fileRoundTripTest() { + if [ -n "$3" ]; then + local_c="$3" + local_p="$2" + else + local_c="$2" + local_p="" + fi + if [ -n "$4" ]; then + local_d="$4" + else + local_d="$local_c" + fi + + rm -f tmp.zstd tmp.md5.1 tmp.md5.2 + $ECHO "fileRoundTripTest: ./datagen $1 $local_p > tmp && $ZSTD -v$local_c -c tmp | $ZSTD -d$local_d" + ./datagen $1 $local_p > tmp + < tmp $MD5SUM > tmp.md5.1 + $ZSTD --ultra -v$local_c -c tmp | $ZSTD -d$local_d | $MD5SUM > tmp.md5.2 + $DIFF -q tmp.md5.1 tmp.md5.2 +} + +isTerminal=false +if [ -t 0 ] && [ -t 1 ] +then + isTerminal=true +fi + +isWindows=false +INTOVOID="/dev/null" +case "$OS" in + Windows*) + isWindows=true + INTOVOID="NUL" + ;; +esac + +UNAME=$(uname) +case "$UNAME" in + Darwin) MD5SUM="md5 -r" ;; + FreeBSD) MD5SUM="gmd5sum" ;; + *) MD5SUM="md5sum" ;; +esac + +DIFF="diff" +case "$UNAME" in + SunOS) DIFF="gdiff" ;; +esac + +ECHO="echo -e" +case "$UNAME" in + Darwin) ECHO="echo" ;; +esac + +$ECHO "\nStarting playTests.sh isWindows=$isWindows ZSTD='$ZSTD'" + +[ -n "$ZSTD" ] || die "ZSTD variable must be defined!" + +if [ -n "$(echo hello | $ZSTD -v -T2 2>&1 > $INTOVOID | grep 'multi-threading is disabled')" ] +then + hasMT="" +else + hasMT="true" +fi + +$ECHO "\n**** simple tests **** " + +./datagen > tmp +$ECHO "test : basic compression " +$ZSTD -f tmp # trivial compression case, creates tmp.zst +$ECHO "test : basic decompression" +$ZSTD -df tmp.zst # trivial decompression case (overwrites tmp) +$ECHO "test : too large compression level (must fail)" +$ZSTD -99 -f tmp # too large compression level, automatic sized down +$ECHO "test : compress to stdout" +$ZSTD tmp -c > tmpCompressed +$ZSTD tmp --stdout > tmpCompressed # long command format +$ECHO "test : compress to named file" +rm tmpCompressed +$ZSTD tmp -o tmpCompressed +test -f tmpCompressed # file must be created +$ECHO "test : -o must be followed by filename (must fail)" +$ZSTD tmp -of tmpCompressed && die "-o must be followed by filename " +$ECHO "test : force write, correct order" +$ZSTD tmp -fo tmpCompressed +$ECHO "test : forgotten argument" +cp tmp tmp2 +$ZSTD tmp2 -fo && die "-o must be followed by filename " +$ECHO "test : implied stdout when input is stdin" +$ECHO bob | $ZSTD | $ZSTD -d +if [ "$isTerminal" = true ]; then +$ECHO "test : compressed data to terminal" +$ECHO bob | $ZSTD && die "should have refused : compressed data to terminal" +$ECHO "test : compressed data from terminal (a hang here is a test fail, zstd is wrongly waiting on data from terminal)" +$ZSTD -d > $INTOVOID && die "should have refused : compressed data from terminal" +fi +$ECHO "test : null-length file roundtrip" +$ECHO -n '' | $ZSTD - --stdout | $ZSTD -d --stdout +$ECHO "test : decompress file with wrong suffix (must fail)" +$ZSTD -d tmpCompressed && die "wrong suffix error not detected!" +$ZSTD -df tmp && die "should have refused : wrong extension" +$ECHO "test : decompress into stdout" +$ZSTD -d tmpCompressed -c > tmpResult # decompression using stdout +$ZSTD --decompress tmpCompressed -c > tmpResult +$ZSTD --decompress tmpCompressed --stdout > tmpResult +$ECHO "test : decompress from stdin into stdout" +$ZSTD -dc < tmp.zst > $INTOVOID # combine decompression, stdin & stdout +$ZSTD -dc - < tmp.zst > $INTOVOID +$ZSTD -d < tmp.zst > $INTOVOID # implicit stdout when stdin is used +$ZSTD -d - < tmp.zst > $INTOVOID +$ECHO "test : impose memory limitation (must fail)" +$ZSTD -d -f tmp.zst -M2K -c > $INTOVOID && die "decompression needs more memory than allowed" +$ZSTD -d -f tmp.zst --memlimit=2K -c > $INTOVOID && die "decompression needs more memory than allowed" # long command +$ZSTD -d -f tmp.zst --memory=2K -c > $INTOVOID && die "decompression needs more memory than allowed" # long command +$ZSTD -d -f tmp.zst --memlimit-decompress=2K -c > $INTOVOID && die "decompression needs more memory than allowed" # long command +$ECHO "test : overwrite protection" +$ZSTD -q tmp && die "overwrite check failed!" +$ECHO "test : force overwrite" +$ZSTD -q -f tmp +$ZSTD -q --force tmp +$ECHO "test : overwrite readonly file" +rm -f tmpro tmpro.zst +$ECHO foo > tmpro.zst +$ECHO foo > tmpro +chmod 400 tmpro.zst +$ZSTD -q tmpro && die "should have refused to overwrite read-only file" +$ZSTD -q -f tmpro +rm -f tmpro tmpro.zst +$ECHO "test : file removal" +$ZSTD -f --rm tmp +test ! -f tmp # tmp should no longer be present +$ZSTD -f -d --rm tmp.zst +test ! -f tmp.zst # tmp.zst should no longer be present +$ECHO "test : --rm on stdin" +$ECHO a | $ZSTD --rm > $INTOVOID # --rm should remain silent +rm tmp +$ZSTD -f tmp && die "tmp not present : should have failed" +test ! -f tmp.zst # tmp.zst should not be created + + +$ECHO "\n**** Advanced compression parameters **** " +$ECHO "Hello world!" | $ZSTD --zstd=windowLog=21, - -o tmp.zst && die "wrong parameters not detected!" +$ECHO "Hello world!" | $ZSTD --zstd=windowLo=21 - -o tmp.zst && die "wrong parameters not detected!" +$ECHO "Hello world!" | $ZSTD --zstd=windowLog=21,slog - -o tmp.zst && die "wrong parameters not detected!" +test ! -f tmp.zst # tmp.zst should not be created +roundTripTest -g512K +roundTripTest -g512K " --zstd=slen=3,tlen=48,strat=6" +roundTripTest -g512K " --zstd=strat=6,wlog=23,clog=23,hlog=22,slog=6" +roundTripTest -g512K " --zstd=windowLog=23,chainLog=23,hashLog=22,searchLog=6,searchLength=3,targetLength=48,strategy=6" +roundTripTest -g512K " --long --zstd=ldmHashLog=20,ldmSearchLength=64,ldmBucketSizeLog=1,ldmHashEveryLog=7" +roundTripTest -g512K " --long --zstd=ldmhlog=20,ldmslen=64,ldmblog=1,ldmhevery=7" +roundTripTest -g512K 19 + + +$ECHO "\n**** Pass-Through mode **** " +$ECHO "Hello world 1!" | $ZSTD -df +$ECHO "Hello world 2!" | $ZSTD -dcf +$ECHO "Hello world 3!" > tmp1 +$ZSTD -dcf tmp1 + + +$ECHO "\n**** frame concatenation **** " + +$ECHO "hello " > hello.tmp +$ECHO "world!" > world.tmp +cat hello.tmp world.tmp > helloworld.tmp +$ZSTD -c hello.tmp > hello.zstd +$ZSTD -c world.tmp > world.zstd +cat hello.zstd world.zstd > helloworld.zstd +$ZSTD -dc helloworld.zstd > result.tmp +cat result.tmp +$DIFF helloworld.tmp result.tmp +$ECHO "frame concatenation without checksum" +$ZSTD -c hello.tmp > hello.zstd --no-check +$ZSTD -c world.tmp > world.zstd --no-check +cat hello.zstd world.zstd > helloworld.zstd +$ZSTD -dc helloworld.zstd > result.tmp +cat result.tmp +$DIFF helloworld.tmp result.tmp +rm ./*.tmp ./*.zstd +$ECHO "frame concatenation tests completed" + + +if [ "$isWindows" = false ] && [ "$UNAME" != 'SunOS' ] ; then +$ECHO "\n**** flush write error test **** " + +$ECHO "$ECHO foo | $ZSTD > /dev/full" +$ECHO foo | $ZSTD > /dev/full && die "write error not detected!" +$ECHO "$ECHO foo | $ZSTD | $ZSTD -d > /dev/full" +$ECHO foo | $ZSTD | $ZSTD -d > /dev/full && die "write error not detected!" + + +$ECHO "\n**** symbolic link test **** " + +rm -f hello.tmp world.tmp hello.tmp.zst world.tmp.zst +$ECHO "hello world" > hello.tmp +ln -s hello.tmp world.tmp +$ZSTD world.tmp hello.tmp +test -f hello.tmp.zst # regular file should have been compressed! +test ! -f world.tmp.zst # symbolic link should not have been compressed! +$ZSTD world.tmp hello.tmp -f +test -f world.tmp.zst # symbolic link should have been compressed with --force +rm -f hello.tmp world.tmp hello.tmp.zst world.tmp.zst + +fi + + +$ECHO "\n**** test sparse file support **** " + +./datagen -g5M -P100 > tmpSparse +$ZSTD tmpSparse -c | $ZSTD -dv -o tmpSparseRegen +$DIFF -s tmpSparse tmpSparseRegen +$ZSTD tmpSparse -c | $ZSTD -dv --sparse -c > tmpOutSparse +$DIFF -s tmpSparse tmpOutSparse +$ZSTD tmpSparse -c | $ZSTD -dv --no-sparse -c > tmpOutNoSparse +$DIFF -s tmpSparse tmpOutNoSparse +ls -ls tmpSparse* # look at file size and block size on disk +./datagen -s1 -g1200007 -P100 | $ZSTD | $ZSTD -dv --sparse -c > tmpSparseOdd # Odd size file (to not finish on an exact nb of blocks) +./datagen -s1 -g1200007 -P100 | $DIFF -s - tmpSparseOdd +ls -ls tmpSparseOdd # look at file size and block size on disk +$ECHO "\n Sparse Compatibility with Console :" +$ECHO "Hello World 1 !" | $ZSTD | $ZSTD -d -c +$ECHO "Hello World 2 !" | $ZSTD | $ZSTD -d | cat +$ECHO "\n Sparse Compatibility with Append :" +./datagen -P100 -g1M > tmpSparse1M +cat tmpSparse1M tmpSparse1M > tmpSparse2M +$ZSTD -v -f tmpSparse1M -o tmpSparseCompressed +$ZSTD -d -v -f tmpSparseCompressed -o tmpSparseRegenerated +$ZSTD -d -v -f tmpSparseCompressed -c >> tmpSparseRegenerated +ls -ls tmpSparse* # look at file size and block size on disk +$DIFF tmpSparse2M tmpSparseRegenerated +rm tmpSparse* + + +$ECHO "\n**** multiple files tests **** " + +./datagen -s1 > tmp1 2> $INTOVOID +./datagen -s2 -g100K > tmp2 2> $INTOVOID +./datagen -s3 -g1M > tmp3 2> $INTOVOID +$ECHO "compress tmp* : " +$ZSTD -f tmp* +ls -ls tmp* +rm tmp1 tmp2 tmp3 +$ECHO "decompress tmp* : " +$ZSTD -df *.zst +ls -ls tmp* +$ECHO "compress tmp* into stdout > tmpall : " +$ZSTD -c tmp1 tmp2 tmp3 > tmpall +ls -ls tmp* # check size of tmpall (should be tmp1.zst + tmp2.zst + tmp3.zst) +$ECHO "decompress tmpall* into stdout > tmpdec : " +cp tmpall tmpall2 +$ZSTD -dc tmpall* > tmpdec +ls -ls tmp* # check size of tmpdec (should be 2*(tmp1 + tmp2 + tmp3)) +$ECHO "compress multiple files including a missing one (notHere) : " +$ZSTD -f tmp1 notHere tmp2 && die "missing file not detected!" + + +$ECHO "\n**** dictionary tests **** " + +$ECHO "- test with raw dict (content only) " +./datagen > tmpDict +./datagen -g1M | $MD5SUM > tmp1 +./datagen -g1M | $ZSTD -D tmpDict | $ZSTD -D tmpDict -dvq | $MD5SUM > tmp2 +$DIFF -q tmp1 tmp2 +$ECHO "- Create first dictionary " +TESTFILE=../programs/zstdcli.c +$ZSTD --train *.c ../programs/*.c -o tmpDict +cp $TESTFILE tmp +$ZSTD -f tmp -D tmpDict +$ZSTD -d tmp.zst -D tmpDict -fo result +$DIFF $TESTFILE result +$ECHO "- Create second (different) dictionary " +$ZSTD --train *.c ../programs/*.c ../programs/*.h -o tmpDictC +$ZSTD -d tmp.zst -D tmpDictC -fo result && die "wrong dictionary not detected!" +$ECHO "- Create dictionary with short dictID" +$ZSTD --train *.c ../programs/*.c --dictID=1 -o tmpDict1 +cmp tmpDict tmpDict1 && die "dictionaries should have different ID !" +$ECHO "- Create dictionary with wrong dictID parameter order (must fail)" +$ZSTD --train *.c ../programs/*.c --dictID -o 1 tmpDict1 && die "wrong order : --dictID must be followed by argument " +$ECHO "- Create dictionary with size limit" +$ZSTD --train *.c ../programs/*.c -o tmpDict2 --maxdict=4K -v +$ECHO "- Create dictionary with small size limit" +$ZSTD --train *.c ../programs/*.c -o tmpDict3 --maxdict=1K -v +$ECHO "- Create dictionary with wrong parameter order (must fail)" +$ZSTD --train *.c ../programs/*.c -o tmpDict3 --maxdict -v 4K && die "wrong order : --maxdict must be followed by argument " +$ECHO "- Compress without dictID" +$ZSTD -f tmp -D tmpDict1 --no-dictID +$ZSTD -d tmp.zst -D tmpDict -fo result +$DIFF $TESTFILE result +$ECHO "- Compress with wrong argument order (must fail)" +$ZSTD tmp -Df tmpDict1 -c > $INTOVOID && die "-D must be followed by dictionary name " +$ECHO "- Compress multiple files with dictionary" +rm -rf dirTestDict +mkdir dirTestDict +cp *.c dirTestDict +cp ../programs/*.c dirTestDict +cp ../programs/*.h dirTestDict +$MD5SUM dirTestDict/* > tmph1 +$ZSTD -f --rm dirTestDict/* -D tmpDictC +$ZSTD -d --rm dirTestDict/*.zst -D tmpDictC # note : use internal checksum by default +case "$UNAME" in + Darwin) $ECHO "md5sum -c not supported on OS-X : test skipped" ;; # not compatible with OS-X's md5 + *) $MD5SUM -c tmph1 ;; +esac +rm -rf dirTestDict +$ECHO "- dictionary builder on bogus input" +$ECHO "Hello World" > tmp +$ZSTD --train-legacy -q tmp && die "Dictionary training should fail : not enough input source" +./datagen -P0 -g10M > tmp +$ZSTD --train-legacy -q tmp && die "Dictionary training should fail : source is pure noise" +rm tmp* + + +$ECHO "\n**** cover dictionary tests **** " + +TESTFILE=../programs/zstdcli.c +./datagen > tmpDict +$ECHO "- Create first dictionary" +$ZSTD --train-cover=k=46,d=8 *.c ../programs/*.c -o tmpDict +cp $TESTFILE tmp +$ZSTD -f tmp -D tmpDict +$ZSTD -d tmp.zst -D tmpDict -fo result +$DIFF $TESTFILE result +$ECHO "- Create second (different) dictionary" +$ZSTD --train-cover=k=56,d=8 *.c ../programs/*.c ../programs/*.h -o tmpDictC +$ZSTD -d tmp.zst -D tmpDictC -fo result && die "wrong dictionary not detected!" +$ECHO "- Create dictionary with short dictID" +$ZSTD --train-cover=k=46,d=8 *.c ../programs/*.c --dictID=1 -o tmpDict1 +cmp tmpDict tmpDict1 && die "dictionaries should have different ID !" +$ECHO "- Create dictionary with size limit" +$ZSTD --train-cover=steps=8 *.c ../programs/*.c -o tmpDict2 --maxdict=4K +rm tmp* + +$ECHO "\n**** legacy dictionary tests **** " + +TESTFILE=../programs/zstdcli.c +./datagen > tmpDict +$ECHO "- Create first dictionary" +$ZSTD --train-legacy=selectivity=8 *.c ../programs/*.c -o tmpDict +cp $TESTFILE tmp +$ZSTD -f tmp -D tmpDict +$ZSTD -d tmp.zst -D tmpDict -fo result +$DIFF $TESTFILE result +$ECHO "- Create second (different) dictionary" +$ZSTD --train-legacy=s=5 *.c ../programs/*.c ../programs/*.h -o tmpDictC +$ZSTD -d tmp.zst -D tmpDictC -fo result && die "wrong dictionary not detected!" +$ECHO "- Create dictionary with short dictID" +$ZSTD --train-legacy -s5 *.c ../programs/*.c --dictID=1 -o tmpDict1 +cmp tmpDict tmpDict1 && die "dictionaries should have different ID !" +$ECHO "- Create dictionary with size limit" +$ZSTD --train-legacy -s9 *.c ../programs/*.c -o tmpDict2 --maxdict=4K +rm tmp* + + +$ECHO "\n**** integrity tests **** " + +$ECHO "test one file (tmp1.zst) " +./datagen > tmp1 +$ZSTD tmp1 +$ZSTD -t tmp1.zst +$ZSTD --test tmp1.zst +$ECHO "test multiple files (*.zst) " +$ZSTD -t *.zst +$ECHO "test bad files (*) " +$ZSTD -t * && die "bad files not detected !" +$ZSTD -t tmp1 && die "bad file not detected !" +cp tmp1 tmp2.zst +$ZSTD -t tmp2.zst && die "bad file not detected !" +./datagen -g0 > tmp3 +$ZSTD -t tmp3 && die "bad file not detected !" # detects 0-sized files as bad +$ECHO "test --rm and --test combined " +$ZSTD -t --rm tmp1.zst +test -f tmp1.zst # check file is still present +split -b16384 tmp1.zst tmpSplit. +$ZSTD -t tmpSplit.* && die "bad file not detected !" +./datagen | $ZSTD -c | $ZSTD -t + + + +$ECHO "\n**** golden files tests **** " + +$ZSTD -t -r files +$ZSTD -c -r files | $ZSTD -t + + +$ECHO "\n**** benchmark mode tests **** " + +$ECHO "bench one file" +./datagen > tmp1 +$ZSTD -bi0 tmp1 +$ECHO "bench multiple levels" +$ZSTD -i0b0e3 tmp1 +$ECHO "with recursive and quiet modes" +$ZSTD -rqi1b1e2 tmp1 + + +$ECHO "\n**** gzip compatibility tests **** " + +GZIPMODE=1 +$ZSTD --format=gzip -V || GZIPMODE=0 +if [ $GZIPMODE -eq 1 ]; then + $ECHO "gzip support detected" + GZIPEXE=1 + gzip -V || GZIPEXE=0 + if [ $GZIPEXE -eq 1 ]; then + ./datagen > tmp + $ZSTD --format=gzip -f tmp + gzip -t -v tmp.gz + gzip -f tmp + $ZSTD -d -f -v tmp.gz + rm tmp* + else + $ECHO "gzip binary not detected" + fi +else + $ECHO "gzip mode not supported" +fi + + +$ECHO "\n**** gzip frame tests **** " + +if [ $GZIPMODE -eq 1 ]; then + ./datagen > tmp + $ZSTD -f --format=gzip tmp + $ZSTD -f tmp + cat tmp.gz tmp.zst tmp.gz tmp.zst | $ZSTD -d -f -o tmp + head -c -1 tmp.gz | $ZSTD -t > $INTOVOID && die "incomplete frame not detected !" + rm tmp* +else + $ECHO "gzip mode not supported" +fi + + +$ECHO "\n**** xz compatibility tests **** " + +LZMAMODE=1 +$ZSTD --format=xz -V || LZMAMODE=0 +if [ $LZMAMODE -eq 1 ]; then + $ECHO "xz support detected" + XZEXE=1 + xz -V && lzma -V || XZEXE=0 + if [ $XZEXE -eq 1 ]; then + $ECHO "Testing zstd xz and lzma support" + ./datagen > tmp + $ZSTD --format=lzma -f tmp + $ZSTD --format=xz -f tmp + xz -t -v tmp.xz + xz -t -v tmp.lzma + xz -f -k tmp + lzma -f -k --lzma1 tmp + $ZSTD -d -f -v tmp.xz + $ZSTD -d -f -v tmp.lzma + rm tmp* + $ECHO "Creating symlinks" + ln -s $ZSTD ./xz + ln -s $ZSTD ./unxz + ln -s $ZSTD ./lzma + ln -s $ZSTD ./unlzma + $ECHO "Testing xz and lzma symlinks" + ./datagen > tmp + ./xz tmp + xz -d tmp.xz + ./lzma tmp + lzma -d tmp.lzma + $ECHO "Testing unxz and unlzma symlinks" + xz tmp + ./xz -d tmp.xz + lzma tmp + ./lzma -d tmp.lzma + rm xz unxz lzma unlzma + rm tmp* + else + $ECHO "xz binary not detected" + fi +else + $ECHO "xz mode not supported" +fi + + +$ECHO "\n**** xz frame tests **** " + +if [ $LZMAMODE -eq 1 ]; then + ./datagen > tmp + $ZSTD -f --format=xz tmp + $ZSTD -f --format=lzma tmp + $ZSTD -f tmp + cat tmp.xz tmp.lzma tmp.zst tmp.lzma tmp.xz tmp.zst | $ZSTD -d -f -o tmp + head -c -1 tmp.xz | $ZSTD -t > $INTOVOID && die "incomplete frame not detected !" + head -c -1 tmp.lzma | $ZSTD -t > $INTOVOID && die "incomplete frame not detected !" + rm tmp* +else + $ECHO "xz mode not supported" +fi + +$ECHO "\n**** lz4 compatibility tests **** " + +LZ4MODE=1 +$ZSTD --format=lz4 -V || LZ4MODE=0 +if [ $LZ4MODE -eq 1 ]; then + $ECHO "lz4 support detected" + LZ4EXE=1 + lz4 -V || LZ4EXE=0 + if [ $LZ4EXE -eq 1 ]; then + ./datagen > tmp + $ZSTD --format=lz4 -f tmp + lz4 -t -v tmp.lz4 + lz4 -f tmp + $ZSTD -d -f -v tmp.lz4 + rm tmp* + else + $ECHO "lz4 binary not detected" + fi +else + $ECHO "lz4 mode not supported" +fi + + +$ECHO "\n**** lz4 frame tests **** " + +if [ $LZ4MODE -eq 1 ]; then + ./datagen > tmp + $ZSTD -f --format=lz4 tmp + $ZSTD -f tmp + cat tmp.lz4 tmp.zst tmp.lz4 tmp.zst | $ZSTD -d -f -o tmp + head -c -1 tmp.lz4 | $ZSTD -t > $INTOVOID && die "incomplete frame not detected !" + rm tmp* +else + $ECHO "lz4 mode not supported" +fi + +$ECHO "\n**** zstd round-trip tests **** " + +roundTripTest +roundTripTest -g15K # TableID==3 +roundTripTest -g127K # TableID==2 +roundTripTest -g255K # TableID==1 +roundTripTest -g522K # TableID==0 +roundTripTest -g519K 6 # greedy, hash chain +roundTripTest -g517K 16 # btlazy2 +roundTripTest -g516K 19 # btopt + +fileRoundTripTest -g500K + +$ECHO "\n**** zstd long distance matching round-trip tests **** " +roundTripTest -g0 "2 --long" +roundTripTest -g1000K "1 --long" +roundTripTest -g517K "6 --long" +roundTripTest -g516K "16 --long" +roundTripTest -g518K "19 --long" +fileRoundTripTest -g5M "3 --long" + + +if [ -n "$hasMT" ] +then + $ECHO "\n**** zstdmt round-trip tests **** " + roundTripTest -g4M "1 -T0" + roundTripTest -g8M "3 -T2" + roundTripTest -g8000K "2 --threads=2" + fileRoundTripTest -g4M "19 -T2 -B1M" + + $ECHO "\n**** zstdmt long distance matching round-trip tests **** " + roundTripTest -g8M "3 --long -T2" +else + $ECHO "\n**** no multithreading, skipping zstdmt tests **** " +fi + +rm tmp* + +$ECHO "\n**** zstd --list/-l single frame tests ****" +./datagen > tmp1 +./datagen > tmp2 +./datagen > tmp3 +$ZSTD tmp* +$ZSTD -l *.zst +$ZSTD -lv *.zst +$ZSTD --list *.zst +$ZSTD --list -v *.zst + +$ECHO "\n**** zstd --list/-l multiple frame tests ****" +cat tmp1.zst tmp2.zst > tmp12.zst +cat tmp12.zst tmp3.zst > tmp123.zst +$ZSTD -l *.zst +$ZSTD -lv *.zst +$ZSTD --list *.zst +$ZSTD --list -v *.zst + +$ECHO "\n**** zstd --list/-l error detection tests ****" +! $ZSTD -l tmp1 tmp1.zst +! $ZSTD --list tmp* +! $ZSTD -lv tmp1* +! $ZSTD --list -v tmp2 tmp12.zst + +$ECHO "\n**** zstd --list/-l test with null files ****" +./datagen -g0 > tmp5 +$ZSTD tmp5 +$ZSTD -l tmp5.zst +! $ZSTD -l tmp5* +$ZSTD -lv tmp5.zst +! $ZSTD -lv tmp5* + +$ECHO "\n**** zstd --list/-l test with no content size field ****" +./datagen -g1MB | $ZSTD > tmp6.zst +$ZSTD -l tmp6.zst +$ZSTD -lv tmp6.zst + +$ECHO "\n**** zstd --list/-l test with no checksum ****" +$ZSTD -f --no-check tmp1 +$ZSTD -l tmp1.zst +$ZSTD -lv tmp1.zst + +rm tmp* + + +$ECHO "\n**** zstd long distance matching tests **** " +roundTripTest -g0 " --long" +roundTripTest -g9M "2 --long" +# Test parameter parsing +roundTripTest -g1M -P50 "1 --long=29" " --memory=512MB" +roundTripTest -g1M -P50 "1 --long=29 --zstd=wlog=28" " --memory=256MB" +roundTripTest -g1M -P50 "1 --long=29" " --long=28 --memory=512MB" +roundTripTest -g1M -P50 "1 --long=29" " --zstd=wlog=28 --memory=512MB" + + +if [ "$1" != "--test-large-data" ]; then + $ECHO "Skipping large data tests" + exit 0 +fi + +$ECHO "\n**** large files tests **** " + +roundTripTest -g270000000 1 +roundTripTest -g250000000 2 +roundTripTest -g230000000 3 + +roundTripTest -g140000000 -P60 4 +roundTripTest -g130000000 -P62 5 +roundTripTest -g120000000 -P65 6 + +roundTripTest -g70000000 -P70 7 +roundTripTest -g60000000 -P71 8 +roundTripTest -g50000000 -P73 9 + +roundTripTest -g35000000 -P75 10 +roundTripTest -g30000000 -P76 11 +roundTripTest -g25000000 -P78 12 + +roundTripTest -g18000013 -P80 13 +roundTripTest -g18000014 -P80 14 +roundTripTest -g18000015 -P81 15 +roundTripTest -g18000016 -P84 16 +roundTripTest -g18000017 -P88 17 +roundTripTest -g18000018 -P94 18 +roundTripTest -g18000019 -P96 19 + +roundTripTest -g5000000000 -P99 1 + +fileRoundTripTest -g4193M -P99 1 + + +$ECHO "\n**** zstd long, long distance matching round-trip tests **** " +roundTripTest -g270000000 "1 --long" +roundTripTest -g130000000 -P60 "5 --long" +roundTripTest -g35000000 -P70 "8 --long" +roundTripTest -g18000001 -P80 "18 --long" +# Test large window logs +roundTripTest -g700M -P50 "1 --long=29" +roundTripTest -g600M -P50 "1 --long --zstd=wlog=29,clog=28" + + +if [ -n "$hasMT" ] +then + $ECHO "\n**** zstdmt long round-trip tests **** " + roundTripTest -g80000000 -P99 "19 -T2" " " + roundTripTest -g5000000000 -P99 "1 -T2" " " + roundTripTest -g500000000 -P97 "1 -T999" " " + fileRoundTripTest -g4103M -P98 " -T0" " " + roundTripTest -g400000000 -P97 "1 --long=24 -T2" " " +else + $ECHO "\n**** no multithreading, skipping zstdmt tests **** " +fi + +rm tmp* diff --git a/src/zstd/tests/poolTests.c b/src/zstd/tests/poolTests.c new file mode 100644 index 00000000..00ee8301 --- /dev/null +++ b/src/zstd/tests/poolTests.c @@ -0,0 +1,106 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + +#include "pool.h" +#include "threading.h" +#include "util.h" +#include <stddef.h> +#include <stdio.h> + +#define ASSERT_TRUE(p) \ + do { \ + if (!(p)) { \ + return 1; \ + } \ + } while (0) +#define ASSERT_FALSE(p) ASSERT_TRUE(!(p)) +#define ASSERT_EQ(lhs, rhs) ASSERT_TRUE((lhs) == (rhs)) + +struct data { + pthread_mutex_t mutex; + unsigned data[16]; + size_t i; +}; + +void fn(void *opaque) { + struct data *data = (struct data *)opaque; + pthread_mutex_lock(&data->mutex); + data->data[data->i] = data->i; + ++data->i; + pthread_mutex_unlock(&data->mutex); +} + +int testOrder(size_t numThreads, size_t queueSize) { + struct data data; + POOL_ctx *ctx = POOL_create(numThreads, queueSize); + ASSERT_TRUE(ctx); + data.i = 0; + pthread_mutex_init(&data.mutex, NULL); + { + size_t i; + for (i = 0; i < 16; ++i) { + POOL_add(ctx, &fn, &data); + } + } + POOL_free(ctx); + ASSERT_EQ(16, data.i); + { + size_t i; + for (i = 0; i < data.i; ++i) { + ASSERT_EQ(i, data.data[i]); + } + } + pthread_mutex_destroy(&data.mutex); + return 0; +} + +void waitFn(void *opaque) { + (void)opaque; + UTIL_sleepMilli(1); +} + +/* Tests for deadlock */ +int testWait(size_t numThreads, size_t queueSize) { + struct data data; + POOL_ctx *ctx = POOL_create(numThreads, queueSize); + ASSERT_TRUE(ctx); + { + size_t i; + for (i = 0; i < 16; ++i) { + POOL_add(ctx, &waitFn, &data); + } + } + POOL_free(ctx); + return 0; +} + +int main(int argc, const char **argv) { + size_t numThreads; + for (numThreads = 1; numThreads <= 4; ++numThreads) { + size_t queueSize; + for (queueSize = 0; queueSize <= 2; ++queueSize) { + if (testOrder(numThreads, queueSize)) { + printf("FAIL: testOrder\n"); + return 1; + } + if (testWait(numThreads, queueSize)) { + printf("FAIL: testWait\n"); + return 1; + } + } + } + printf("PASS: testOrder\n"); + (void)argc; + (void)argv; + return (POOL_create(0, 1)) ? printf("FAIL: testInvalid\n"), 1 + : printf("PASS: testInvalid\n"), 0; + return 0; +} diff --git a/src/zstd/tests/roundTripCrash.c b/src/zstd/tests/roundTripCrash.c new file mode 100644 index 00000000..180fa9b6 --- /dev/null +++ b/src/zstd/tests/roundTripCrash.c @@ -0,0 +1,241 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/* + This program takes a file in input, + performs a zstd round-trip test (compression - decompress) + compares the result with original + and generates a crash (double free) on corruption detection. +*/ + +/*=========================================== +* Dependencies +*==========================================*/ +#include <stddef.h> /* size_t */ +#include <stdlib.h> /* malloc, free, exit */ +#include <stdio.h> /* fprintf */ +#include <string.h> /* strcmp */ +#include <sys/types.h> /* stat */ +#include <sys/stat.h> /* stat */ +#include "xxhash.h" + +#define ZSTD_STATIC_LINKING_ONLY +#include "zstd.h" + +/*=========================================== +* Macros +*==========================================*/ +#define MIN(a,b) ( (a) < (b) ? (a) : (b) ) + +static void crash(int errorCode){ + /* abort if AFL/libfuzzer, exit otherwise */ + #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION /* could also use __AFL_COMPILER */ + abort(); + #else + exit(errorCode); + #endif +} + +#define CHECK_Z(f) { \ + size_t const err = f; \ + if (ZSTD_isError(err)) { \ + fprintf(stderr, \ + "Error=> %s: %s", \ + #f, ZSTD_getErrorName(err)); \ + crash(1); \ +} } + +/** roundTripTest() : +* Compresses `srcBuff` into `compressedBuff`, +* then decompresses `compressedBuff` into `resultBuff`. +* Compression level used is derived from first content byte. +* @return : result of decompression, which should be == `srcSize` +* or an error code if either compression or decompression fails. +* Note : `compressedBuffCapacity` should be `>= ZSTD_compressBound(srcSize)` +* for compression to be guaranteed to work */ +static size_t roundTripTest(void* resultBuff, size_t resultBuffCapacity, + void* compressedBuff, size_t compressedBuffCapacity, + const void* srcBuff, size_t srcBuffSize) +{ + static const int maxClevel = 19; + size_t const hashLength = MIN(128, srcBuffSize); + unsigned const h32 = XXH32(srcBuff, hashLength, 0); + int const cLevel = h32 % maxClevel; + size_t const cSize = ZSTD_compress(compressedBuff, compressedBuffCapacity, srcBuff, srcBuffSize, cLevel); + if (ZSTD_isError(cSize)) { + fprintf(stderr, "Compression error : %s \n", ZSTD_getErrorName(cSize)); + return cSize; + } + return ZSTD_decompress(resultBuff, resultBuffCapacity, compressedBuff, cSize); +} + +/** cctxParamRoundTripTest() : + * Same as roundTripTest() except allows experimenting with ZSTD_CCtx_params. */ +static size_t cctxParamRoundTripTest(void* resultBuff, size_t resultBuffCapacity, + void* compressedBuff, size_t compressedBuffCapacity, + const void* srcBuff, size_t srcBuffSize) +{ + ZSTD_CCtx* const cctx = ZSTD_createCCtx(); + ZSTD_CCtx_params* const cctxParams = ZSTD_createCCtxParams(); + ZSTD_inBuffer inBuffer = { srcBuff, srcBuffSize, 0 }; + ZSTD_outBuffer outBuffer = {compressedBuff, compressedBuffCapacity, 0 }; + + static const int maxClevel = 19; + size_t const hashLength = MIN(128, srcBuffSize); + unsigned const h32 = XXH32(srcBuff, hashLength, 0); + int const cLevel = h32 % maxClevel; + + /* Set parameters */ + CHECK_Z( ZSTD_CCtxParam_setParameter(cctxParams, ZSTD_p_compressionLevel, cLevel) ); + CHECK_Z( ZSTD_CCtxParam_setParameter(cctxParams, ZSTD_p_nbThreads, 2) ); + CHECK_Z( ZSTD_CCtxParam_setParameter(cctxParams, ZSTD_p_overlapSizeLog, 5) ); + + + /* Apply parameters */ + CHECK_Z( ZSTD_CCtx_setParametersUsingCCtxParams(cctx, cctxParams) ); + + CHECK_Z (ZSTD_compress_generic(cctx, &outBuffer, &inBuffer, ZSTD_e_end) ); + + ZSTD_freeCCtxParams(cctxParams); + ZSTD_freeCCtx(cctx); + + return ZSTD_decompress(resultBuff, resultBuffCapacity, compressedBuff, outBuffer.pos); +} + +static size_t checkBuffers(const void* buff1, const void* buff2, size_t buffSize) +{ + const char* ip1 = (const char*)buff1; + const char* ip2 = (const char*)buff2; + size_t pos; + + for (pos=0; pos<buffSize; pos++) + if (ip1[pos]!=ip2[pos]) + break; + + return pos; +} + +static void roundTripCheck(const void* srcBuff, size_t srcBuffSize, int testCCtxParams) +{ + size_t const cBuffSize = ZSTD_compressBound(srcBuffSize); + void* cBuff = malloc(cBuffSize); + void* rBuff = malloc(cBuffSize); + + if (!cBuff || !rBuff) { + fprintf(stderr, "not enough memory ! \n"); + exit (1); + } + + { size_t const result = testCCtxParams ? + cctxParamRoundTripTest(rBuff, cBuffSize, cBuff, cBuffSize, srcBuff, srcBuffSize) + : roundTripTest(rBuff, cBuffSize, cBuff, cBuffSize, srcBuff, srcBuffSize); + if (ZSTD_isError(result)) { + fprintf(stderr, "roundTripTest error : %s \n", ZSTD_getErrorName(result)); + crash(1); + } + if (result != srcBuffSize) { + fprintf(stderr, "Incorrect regenerated size : %u != %u\n", (unsigned)result, (unsigned)srcBuffSize); + crash(1); + } + if (checkBuffers(srcBuff, rBuff, srcBuffSize) != srcBuffSize) { + fprintf(stderr, "Silent decoding corruption !!!"); + crash(1); + } + } + + free(cBuff); + free(rBuff); +} + + +static size_t getFileSize(const char* infilename) +{ + int r; +#if defined(_MSC_VER) + struct _stat64 statbuf; + r = _stat64(infilename, &statbuf); + if (r || !(statbuf.st_mode & S_IFREG)) return 0; /* No good... */ +#else + struct stat statbuf; + r = stat(infilename, &statbuf); + if (r || !S_ISREG(statbuf.st_mode)) return 0; /* No good... */ +#endif + return (size_t)statbuf.st_size; +} + + +static int isDirectory(const char* infilename) +{ + int r; +#if defined(_MSC_VER) + struct _stat64 statbuf; + r = _stat64(infilename, &statbuf); + if (!r && (statbuf.st_mode & _S_IFDIR)) return 1; +#else + struct stat statbuf; + r = stat(infilename, &statbuf); + if (!r && S_ISDIR(statbuf.st_mode)) return 1; +#endif + return 0; +} + + +/** loadFile() : +* requirement : `buffer` size >= `fileSize` */ +static void loadFile(void* buffer, const char* fileName, size_t fileSize) +{ + FILE* const f = fopen(fileName, "rb"); + if (isDirectory(fileName)) { + fprintf(stderr, "Ignoring %s directory \n", fileName); + exit(2); + } + if (f==NULL) { + fprintf(stderr, "Impossible to open %s \n", fileName); + exit(3); + } + { size_t const readSize = fread(buffer, 1, fileSize, f); + if (readSize != fileSize) { + fprintf(stderr, "Error reading %s \n", fileName); + exit(5); + } } + fclose(f); +} + + +static void fileCheck(const char* fileName, int testCCtxParams) +{ + size_t const fileSize = getFileSize(fileName); + void* buffer = malloc(fileSize); + if (!buffer) { + fprintf(stderr, "not enough memory \n"); + exit(4); + } + loadFile(buffer, fileName, fileSize); + roundTripCheck(buffer, fileSize, testCCtxParams); + free (buffer); +} + +int main(int argCount, const char** argv) { + int argNb = 1; + int testCCtxParams = 0; + if (argCount < 2) { + fprintf(stderr, "Error : no argument : need input file \n"); + exit(9); + } + + if (!strcmp(argv[argNb], "--cctxParams")) { + testCCtxParams = 1; + argNb++; + } + + fileCheck(argv[argNb], testCCtxParams); + fprintf(stderr, "no pb detected\n"); + return 0; +} diff --git a/src/zstd/tests/symbols.c b/src/zstd/tests/symbols.c new file mode 100644 index 00000000..c0bed2e5 --- /dev/null +++ b/src/zstd/tests/symbols.c @@ -0,0 +1,162 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + +#include <stdio.h> +#include "zstd_errors.h" +#define ZSTD_STATIC_LINKING_ONLY +#include "zstd.h" +#define ZBUFF_DISABLE_DEPRECATE_WARNINGS +#define ZBUFF_STATIC_LINKING_ONLY +#include "zbuff.h" +#define ZDICT_DISABLE_DEPRECATE_WARNINGS +#define ZDICT_STATIC_LINKING_ONLY +#include "zdict.h" + +static const void *symbols[] = { +/* zstd.h */ + &ZSTD_versionNumber, + &ZSTD_compress, + &ZSTD_decompress, + &ZSTD_getDecompressedSize, + &ZSTD_findDecompressedSize, + &ZSTD_findFrameCompressedSize, + &ZSTD_getFrameContentSize, + &ZSTD_maxCLevel, + &ZSTD_compressBound, + &ZSTD_isError, + &ZSTD_getErrorName, + &ZSTD_createCCtx, + &ZSTD_freeCCtx, + &ZSTD_compressCCtx, + &ZSTD_createDCtx, + &ZSTD_freeDCtx, + &ZSTD_decompressDCtx, + &ZSTD_compress_usingDict, + &ZSTD_decompress_usingDict, + &ZSTD_createCDict, + &ZSTD_freeCDict, + &ZSTD_compress_usingCDict, + &ZSTD_createDDict, + &ZSTD_freeDDict, + &ZSTD_decompress_usingDDict, + &ZSTD_createCStream, + &ZSTD_freeCStream, + &ZSTD_initCStream, + &ZSTD_compressStream, + &ZSTD_flushStream, + &ZSTD_endStream, + &ZSTD_CStreamInSize, + &ZSTD_CStreamOutSize, + &ZSTD_createDStream, + &ZSTD_freeDStream, + &ZSTD_initDStream, + &ZSTD_decompressStream, + &ZSTD_DStreamInSize, + &ZSTD_DStreamOutSize, +/* zstd.h: advanced functions */ + &ZSTD_estimateCCtxSize, + &ZSTD_createCCtx_advanced, + &ZSTD_sizeof_CCtx, + &ZSTD_createCDict_advanced, + &ZSTD_sizeof_CDict, + &ZSTD_getCParams, + &ZSTD_getParams, + &ZSTD_checkCParams, + &ZSTD_adjustCParams, + &ZSTD_compress_advanced, + &ZSTD_isFrame, + &ZSTD_estimateDCtxSize, + &ZSTD_createDCtx_advanced, + &ZSTD_sizeof_DCtx, + &ZSTD_sizeof_DDict, + &ZSTD_getDictID_fromDict, + &ZSTD_getDictID_fromDDict, + &ZSTD_getDictID_fromFrame, + &ZSTD_createCStream_advanced, + &ZSTD_initCStream_srcSize, + &ZSTD_initCStream_usingDict, + &ZSTD_initCStream_advanced, + &ZSTD_initCStream_usingCDict, + &ZSTD_resetCStream, + &ZSTD_sizeof_CStream, + &ZSTD_createDStream_advanced, + &ZSTD_initDStream_usingDict, + &ZSTD_setDStreamParameter, + &ZSTD_initDStream_usingDDict, + &ZSTD_resetDStream, + &ZSTD_sizeof_DStream, + &ZSTD_compressBegin, + &ZSTD_compressBegin_usingDict, + &ZSTD_compressBegin_advanced, + &ZSTD_copyCCtx, + &ZSTD_compressContinue, + &ZSTD_compressEnd, + &ZSTD_getFrameHeader, + &ZSTD_decompressBegin, + &ZSTD_decompressBegin_usingDict, + &ZSTD_copyDCtx, + &ZSTD_nextSrcSizeToDecompress, + &ZSTD_decompressContinue, + &ZSTD_nextInputType, + &ZSTD_getBlockSize, + &ZSTD_compressBlock, + &ZSTD_decompressBlock, + &ZSTD_insertBlock, +/* zstd_errors.h */ + &ZSTD_getErrorCode, + &ZSTD_getErrorString, +/* zbuff.h */ + &ZBUFF_createCCtx, + &ZBUFF_freeCCtx, + &ZBUFF_compressInit, + &ZBUFF_compressInitDictionary, + &ZBUFF_compressContinue, + &ZBUFF_compressFlush, + &ZBUFF_compressEnd, + &ZBUFF_createDCtx, + &ZBUFF_freeDCtx, + &ZBUFF_decompressInit, + &ZBUFF_decompressInitDictionary, + &ZBUFF_decompressContinue, + &ZBUFF_isError, + &ZBUFF_getErrorName, + &ZBUFF_recommendedCInSize, + &ZBUFF_recommendedCOutSize, + &ZBUFF_recommendedDInSize, + &ZBUFF_recommendedDOutSize, +/* zbuff.h: advanced functions */ + &ZBUFF_createCCtx_advanced, + &ZBUFF_createDCtx_advanced, + &ZBUFF_compressInit_advanced, +/* zdict.h */ + &ZDICT_trainFromBuffer, + &ZDICT_getDictID, + &ZDICT_isError, + &ZDICT_getErrorName, +/* zdict.h: advanced functions */ + &ZDICT_trainFromBuffer_cover, + &ZDICT_optimizeTrainFromBuffer_cover, + &ZDICT_finalizeDictionary, + &ZDICT_trainFromBuffer_legacy, + &ZDICT_addEntropyTablesFromBuffer, + NULL, +}; + +int main(int argc, const char** argv) { + const void **symbol; + (void)argc; + (void)argv; + + for (symbol = symbols; *symbol != NULL; ++symbol) { + printf("%p\n", *symbol); + } + return 0; +} diff --git a/src/zstd/tests/test-zstd-speed.py b/src/zstd/tests/test-zstd-speed.py new file mode 100755 index 00000000..1096d5e4 --- /dev/null +++ b/src/zstd/tests/test-zstd-speed.py @@ -0,0 +1,376 @@ +#! /usr/bin/env python3 + +# ################################################################ +# Copyright (c) 2016-present, Przemyslaw Skibinski, Yann Collet, Facebook, Inc. +# All rights reserved. +# +# This source code is licensed under both the BSD-style license (found in the +# LICENSE file in the root directory of this source tree) and the GPLv2 (found +# in the COPYING file in the root directory of this source tree). +# ########################################################################## + +# Limitations: +# - doesn't support filenames with spaces +# - dir1/zstd and dir2/zstd will be merged in a single results file + +import argparse +import os # getloadavg +import string +import subprocess +import time # strftime +import traceback +import hashlib +import platform # system + +script_version = 'v1.1.2 (2017-03-26)' +default_repo_url = 'https://github.com/facebook/zstd.git' +working_dir_name = 'speedTest' +working_path = os.getcwd() + '/' + working_dir_name # /path/to/zstd/tests/speedTest +clone_path = working_path + '/' + 'zstd' # /path/to/zstd/tests/speedTest/zstd +email_header = 'ZSTD_speedTest' +pid = str(os.getpid()) +verbose = False +clang_version = "unknown" +gcc_version = "unknown" +args = None + + +def hashfile(hasher, fname, blocksize=65536): + with open(fname, "rb") as f: + for chunk in iter(lambda: f.read(blocksize), b""): + hasher.update(chunk) + return hasher.hexdigest() + + +def log(text): + print(time.strftime("%Y/%m/%d %H:%M:%S") + ' - ' + text) + + +def execute(command, print_command=True, print_output=False, print_error=True, param_shell=True): + if print_command: + log("> " + command) + popen = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=param_shell, cwd=execute.cwd) + stdout_lines, stderr_lines = popen.communicate(timeout=args.timeout) + stderr_lines = stderr_lines.decode("utf-8") + stdout_lines = stdout_lines.decode("utf-8") + if print_output: + if stdout_lines: + print(stdout_lines) + if stderr_lines: + print(stderr_lines) + if popen.returncode is not None and popen.returncode != 0: + if stderr_lines and not print_output and print_error: + print(stderr_lines) + raise RuntimeError(stdout_lines + stderr_lines) + return (stdout_lines + stderr_lines).splitlines() +execute.cwd = None + + +def does_command_exist(command): + try: + execute(command, verbose, False, False) + except Exception: + return False + return True + + +def send_email(emails, topic, text, have_mutt, have_mail): + logFileName = working_path + '/' + 'tmpEmailContent' + with open(logFileName, "w") as myfile: + myfile.writelines(text) + myfile.close() + if have_mutt: + execute('mutt -s "' + topic + '" ' + emails + ' < ' + logFileName, verbose) + elif have_mail: + execute('mail -s "' + topic + '" ' + emails + ' < ' + logFileName, verbose) + else: + log("e-mail cannot be sent (mail or mutt not found)") + + +def send_email_with_attachments(branch, commit, last_commit, args, text, results_files, + logFileName, have_mutt, have_mail): + with open(logFileName, "w") as myfile: + myfile.writelines(text) + myfile.close() + email_topic = '[%s:%s] Warning for %s:%s last_commit=%s speed<%s ratio<%s' \ + % (email_header, pid, branch, commit, last_commit, + args.lowerLimit, args.ratioLimit) + if have_mutt: + execute('mutt -s "' + email_topic + '" ' + args.emails + ' -a ' + results_files + + ' < ' + logFileName) + elif have_mail: + execute('mail -s "' + email_topic + '" ' + args.emails + ' < ' + logFileName) + else: + log("e-mail cannot be sent (mail or mutt not found)") + + +def git_get_branches(): + execute('git fetch -p', verbose) + branches = execute('git branch -rl', verbose) + output = [] + for line in branches: + if ("HEAD" not in line) and ("coverity_scan" not in line) and ("gh-pages" not in line): + output.append(line.strip()) + return output + + +def git_get_changes(branch, commit, last_commit): + fmt = '--format="%h: (%an) %s, %ar"' + if last_commit is None: + commits = execute('git log -n 10 %s %s' % (fmt, commit)) + else: + commits = execute('git --no-pager log %s %s..%s' % (fmt, last_commit, commit)) + return str('Changes in %s since %s:\n' % (branch, last_commit)) + '\n'.join(commits) + + +def get_last_results(resultsFileName): + if not os.path.isfile(resultsFileName): + return None, None, None, None + commit = None + csize = [] + cspeed = [] + dspeed = [] + with open(resultsFileName, 'r') as f: + for line in f: + words = line.split() + if len(words) <= 4: # branch + commit + compilerVer + md5 + commit = words[1] + csize = [] + cspeed = [] + dspeed = [] + if (len(words) == 8) or (len(words) == 9): # results: "filename" or "XX files" + csize.append(int(words[1])) + cspeed.append(float(words[3])) + dspeed.append(float(words[5])) + return commit, csize, cspeed, dspeed + + +def benchmark_and_compare(branch, commit, last_commit, args, executableName, md5sum, compilerVersion, resultsFileName, + testFilePath, fileName, last_csize, last_cspeed, last_dspeed): + sleepTime = 30 + while os.getloadavg()[0] > args.maxLoadAvg: + log("WARNING: bench loadavg=%.2f is higher than %s, sleeping for %s seconds" + % (os.getloadavg()[0], args.maxLoadAvg, sleepTime)) + time.sleep(sleepTime) + start_load = str(os.getloadavg()) + osType = platform.system() + if osType == 'Linux': + cpuSelector = "taskset --cpu-list 0" + else: + cpuSelector = "" + if args.dictionary: + result = execute('%s programs/%s -rqi5b1e%s -D %s %s' % (cpuSelector, executableName, args.lastCLevel, args.dictionary, testFilePath), print_output=True) + else: + result = execute('%s programs/%s -rqi5b1e%s %s' % (cpuSelector, executableName, args.lastCLevel, testFilePath), print_output=True) + end_load = str(os.getloadavg()) + linesExpected = args.lastCLevel + 1 + if len(result) != linesExpected: + raise RuntimeError("ERROR: number of result lines=%d is different that expected %d\n%s" % (len(result), linesExpected, '\n'.join(result))) + with open(resultsFileName, "a") as myfile: + myfile.write('%s %s %s md5=%s\n' % (branch, commit, compilerVersion, md5sum)) + myfile.write('\n'.join(result) + '\n') + myfile.close() + if (last_cspeed == None): + log("WARNING: No data for comparison for branch=%s file=%s " % (branch, fileName)) + return "" + commit, csize, cspeed, dspeed = get_last_results(resultsFileName) + text = "" + for i in range(0, min(len(cspeed), len(last_cspeed))): + print("%s:%s -%d cSpeed=%6.2f cLast=%6.2f cDiff=%1.4f dSpeed=%6.2f dLast=%6.2f dDiff=%1.4f ratioDiff=%1.4f %s" % (branch, commit, i+1, cspeed[i], last_cspeed[i], cspeed[i]/last_cspeed[i], dspeed[i], last_dspeed[i], dspeed[i]/last_dspeed[i], float(last_csize[i])/csize[i], fileName)) + if (cspeed[i]/last_cspeed[i] < args.lowerLimit): + text += "WARNING: %s -%d cSpeed=%.2f cLast=%.2f cDiff=%.4f %s\n" % (executableName, i+1, cspeed[i], last_cspeed[i], cspeed[i]/last_cspeed[i], fileName) + if (dspeed[i]/last_dspeed[i] < args.lowerLimit): + text += "WARNING: %s -%d dSpeed=%.2f dLast=%.2f dDiff=%.4f %s\n" % (executableName, i+1, dspeed[i], last_dspeed[i], dspeed[i]/last_dspeed[i], fileName) + if (float(last_csize[i])/csize[i] < args.ratioLimit): + text += "WARNING: %s -%d cSize=%d last_cSize=%d diff=%.4f %s\n" % (executableName, i+1, csize[i], last_csize[i], float(last_csize[i])/csize[i], fileName) + if text: + text = args.message + ("\nmaxLoadAvg=%s load average at start=%s end=%s\n%s last_commit=%s md5=%s\n" % (args.maxLoadAvg, start_load, end_load, compilerVersion, last_commit, md5sum)) + text + return text + + +def update_config_file(branch, commit): + last_commit = None + commitFileName = working_path + "/commit_" + branch.replace("/", "_") + ".txt" + if os.path.isfile(commitFileName): + with open(commitFileName, 'r') as infile: + last_commit = infile.read() + with open(commitFileName, 'w') as outfile: + outfile.write(commit) + return last_commit + + +def double_check(branch, commit, args, executableName, md5sum, compilerVersion, resultsFileName, filePath, fileName): + last_commit, csize, cspeed, dspeed = get_last_results(resultsFileName) + if not args.dry_run: + text = benchmark_and_compare(branch, commit, last_commit, args, executableName, md5sum, compilerVersion, resultsFileName, filePath, fileName, csize, cspeed, dspeed) + if text: + log("WARNING: redoing tests for branch %s: commit %s" % (branch, commit)) + text = benchmark_and_compare(branch, commit, last_commit, args, executableName, md5sum, compilerVersion, resultsFileName, filePath, fileName, csize, cspeed, dspeed) + return text + + +def test_commit(branch, commit, last_commit, args, testFilePaths, have_mutt, have_mail): + local_branch = branch.split('/')[1] + version = local_branch.rpartition('-')[2] + '_' + commit + if not args.dry_run: + execute('make -C programs clean zstd CC=clang MOREFLAGS="-Werror -Wconversion -Wno-sign-conversion -DZSTD_GIT_COMMIT=%s" && ' % version + + 'mv programs/zstd programs/zstd_clang && ' + + 'make -C programs clean zstd zstd32 MOREFLAGS="-DZSTD_GIT_COMMIT=%s"' % version) + md5_zstd = hashfile(hashlib.md5(), clone_path + '/programs/zstd') + md5_zstd32 = hashfile(hashlib.md5(), clone_path + '/programs/zstd32') + md5_zstd_clang = hashfile(hashlib.md5(), clone_path + '/programs/zstd_clang') + print("md5(zstd)=%s\nmd5(zstd32)=%s\nmd5(zstd_clang)=%s" % (md5_zstd, md5_zstd32, md5_zstd_clang)) + print("gcc_version=%s clang_version=%s" % (gcc_version, clang_version)) + + logFileName = working_path + "/log_" + branch.replace("/", "_") + ".txt" + text_to_send = [] + results_files = "" + if args.dictionary: + dictName = args.dictionary.rpartition('/')[2] + else: + dictName = None + + for filePath in testFilePaths: + fileName = filePath.rpartition('/')[2] + if dictName: + resultsFileName = working_path + "/" + dictName.replace(".", "_") + "_" + branch.replace("/", "_") + "_" + fileName.replace(".", "_") + ".txt" + else: + resultsFileName = working_path + "/results_" + branch.replace("/", "_") + "_" + fileName.replace(".", "_") + ".txt" + text = double_check(branch, commit, args, 'zstd', md5_zstd, 'gcc_version='+gcc_version, resultsFileName, filePath, fileName) + if text: + text_to_send.append(text) + results_files += resultsFileName + " " + resultsFileName = working_path + "/results32_" + branch.replace("/", "_") + "_" + fileName.replace(".", "_") + ".txt" + text = double_check(branch, commit, args, 'zstd32', md5_zstd32, 'gcc_version='+gcc_version, resultsFileName, filePath, fileName) + if text: + text_to_send.append(text) + results_files += resultsFileName + " " + resultsFileName = working_path + "/resultsClang_" + branch.replace("/", "_") + "_" + fileName.replace(".", "_") + ".txt" + text = double_check(branch, commit, args, 'zstd_clang', md5_zstd_clang, 'clang_version='+clang_version, resultsFileName, filePath, fileName) + if text: + text_to_send.append(text) + results_files += resultsFileName + " " + if text_to_send: + send_email_with_attachments(branch, commit, last_commit, args, text_to_send, results_files, logFileName, have_mutt, have_mail) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('testFileNames', help='file or directory names list for speed benchmark') + parser.add_argument('emails', help='list of e-mail addresses to send warnings') + parser.add_argument('--dictionary', '-D', help='path to the dictionary') + parser.add_argument('--message', '-m', help='attach an additional message to e-mail', default="") + parser.add_argument('--repoURL', help='changes default repository URL', default=default_repo_url) + parser.add_argument('--lowerLimit', '-l', type=float, help='send email if speed is lower than given limit', default=0.98) + parser.add_argument('--ratioLimit', '-r', type=float, help='send email if ratio is lower than given limit', default=0.999) + parser.add_argument('--maxLoadAvg', type=float, help='maximum load average to start testing', default=0.75) + parser.add_argument('--lastCLevel', type=int, help='last compression level for testing', default=5) + parser.add_argument('--sleepTime', '-s', type=int, help='frequency of repository checking in seconds', default=300) + parser.add_argument('--timeout', '-t', type=int, help='timeout for executing shell commands', default=1800) + parser.add_argument('--dry-run', dest='dry_run', action='store_true', help='not build', default=False) + parser.add_argument('--verbose', '-v', action='store_true', help='more verbose logs', default=False) + args = parser.parse_args() + verbose = args.verbose + + # check if test files are accessible + testFileNames = args.testFileNames.split() + testFilePaths = [] + for fileName in testFileNames: + fileName = os.path.expanduser(fileName) + if os.path.isfile(fileName) or os.path.isdir(fileName): + testFilePaths.append(os.path.abspath(fileName)) + else: + log("ERROR: File/directory not found: " + fileName) + exit(1) + + # check if dictionary is accessible + if args.dictionary: + args.dictionary = os.path.abspath(os.path.expanduser(args.dictionary)) + if not os.path.isfile(args.dictionary): + log("ERROR: Dictionary not found: " + args.dictionary) + exit(1) + + # check availability of e-mail senders + have_mutt = does_command_exist("mutt -h") + have_mail = does_command_exist("mail -V") + if not have_mutt and not have_mail: + log("ERROR: e-mail senders 'mail' or 'mutt' not found") + exit(1) + + clang_version = execute("clang -v 2>&1 | grep ' version ' | sed -e 's:.*version \\([0-9.]*\\).*:\\1:' -e 's:\\.\\([0-9][0-9]\\):\\1:g'", verbose)[0]; + gcc_version = execute("gcc -dumpversion", verbose)[0]; + + if verbose: + print("PARAMETERS:\nrepoURL=%s" % args.repoURL) + print("working_path=%s" % working_path) + print("clone_path=%s" % clone_path) + print("testFilePath(%s)=%s" % (len(testFilePaths), testFilePaths)) + print("message=%s" % args.message) + print("emails=%s" % args.emails) + print("dictionary=%s" % args.dictionary) + print("maxLoadAvg=%s" % args.maxLoadAvg) + print("lowerLimit=%s" % args.lowerLimit) + print("ratioLimit=%s" % args.ratioLimit) + print("lastCLevel=%s" % args.lastCLevel) + print("sleepTime=%s" % args.sleepTime) + print("timeout=%s" % args.timeout) + print("dry_run=%s" % args.dry_run) + print("verbose=%s" % args.verbose) + print("have_mutt=%s have_mail=%s" % (have_mutt, have_mail)) + + # clone ZSTD repo if needed + if not os.path.isdir(working_path): + os.mkdir(working_path) + if not os.path.isdir(clone_path): + execute.cwd = working_path + execute('git clone ' + args.repoURL) + if not os.path.isdir(clone_path): + log("ERROR: ZSTD clone not found: " + clone_path) + exit(1) + execute.cwd = clone_path + + # check if speedTest.pid already exists + pidfile = "./speedTest.pid" + if os.path.isfile(pidfile): + log("ERROR: %s already exists, exiting" % pidfile) + exit(1) + + send_email(args.emails, '[%s:%s] test-zstd-speed.py %s has been started' % (email_header, pid, script_version), args.message, have_mutt, have_mail) + with open(pidfile, 'w') as the_file: + the_file.write(pid) + + branch = "" + commit = "" + first_time = True + while True: + try: + if first_time: + first_time = False + else: + time.sleep(args.sleepTime) + loadavg = os.getloadavg()[0] + if (loadavg <= args.maxLoadAvg): + branches = git_get_branches() + for branch in branches: + commit = execute('git show -s --format=%h ' + branch, verbose)[0] + last_commit = update_config_file(branch, commit) + if commit == last_commit: + log("skipping branch %s: head %s already processed" % (branch, commit)) + else: + log("build branch %s: head %s is different from prev %s" % (branch, commit, last_commit)) + execute('git checkout -- . && git checkout ' + branch) + print(git_get_changes(branch, commit, last_commit)) + test_commit(branch, commit, last_commit, args, testFilePaths, have_mutt, have_mail) + else: + log("WARNING: main loadavg=%.2f is higher than %s" % (loadavg, args.maxLoadAvg)) + if verbose: + log("sleep for %s seconds" % args.sleepTime) + except Exception as e: + stack = traceback.format_exc() + email_topic = '[%s:%s] ERROR in %s:%s' % (email_header, pid, branch, commit) + send_email(args.emails, email_topic, stack, have_mutt, have_mail) + print(stack) + except KeyboardInterrupt: + os.unlink(pidfile) + send_email(args.emails, '[%s:%s] test-zstd-speed.py %s has been stopped' % (email_header, pid, script_version), args.message, have_mutt, have_mail) + exit(0) diff --git a/src/zstd/tests/test-zstd-versions.py b/src/zstd/tests/test-zstd-versions.py new file mode 100755 index 00000000..f2deac1f --- /dev/null +++ b/src/zstd/tests/test-zstd-versions.py @@ -0,0 +1,276 @@ +#!/usr/bin/env python3 +"""Test zstd interoperability between versions""" + +# ################################################################ +# Copyright (c) 2016-present, Yann Collet, Facebook, Inc. +# All rights reserved. +# +# This source code is licensed under both the BSD-style license (found in the +# LICENSE file in the root directory of this source tree) and the GPLv2 (found +# in the COPYING file in the root directory of this source tree). +# ################################################################ + +import filecmp +import glob +import hashlib +import os +import shutil +import sys +import subprocess +from subprocess import Popen, PIPE + +repo_url = 'https://github.com/facebook/zstd.git' +tmp_dir_name = 'tests/versionsTest' +make_cmd = 'make' +git_cmd = 'git' +test_dat_src = 'README.md' +test_dat = 'test_dat' +head = 'vdevel' +dict_source = 'dict_source' +dict_files = './zstd/programs/*.c ./zstd/lib/common/*.c ./zstd/lib/compress/*.c ./zstd/lib/decompress/*.c ./zstd/lib/dictBuilder/*.c ./zstd/lib/legacy/*.c ' +dict_files += './zstd/programs/*.h ./zstd/lib/common/*.h ./zstd/lib/compress/*.h ./zstd/lib/dictBuilder/*.h ./zstd/lib/legacy/*.h' + + +def execute(command, print_output=False, print_error=True, param_shell=False): + popen = Popen(command, stdout=PIPE, stderr=PIPE, shell=param_shell) + stdout_lines, stderr_lines = popen.communicate() + stderr_lines = stderr_lines.decode("utf-8") + stdout_lines = stdout_lines.decode("utf-8") + if print_output: + print(stdout_lines) + print(stderr_lines) + if popen.returncode is not None and popen.returncode != 0: + if not print_output and print_error: + print(stderr_lines) + return popen.returncode + + +def proc(cmd_args, pipe=True, dummy=False): + if dummy: + return + if pipe: + subproc = Popen(cmd_args, stdout=PIPE, stderr=PIPE) + else: + subproc = Popen(cmd_args) + return subproc.communicate() + + +def make(args, pipe=True): + return proc([make_cmd] + args, pipe) + + +def git(args, pipe=True): + return proc([git_cmd] + args, pipe) + + +def get_git_tags(): + stdout, stderr = git(['tag', '-l', 'v[0-9].[0-9].[0-9]']) + tags = stdout.decode('utf-8').split() + return tags + + +def create_dict(tag, dict_source_path): + dict_name = 'dict.' + tag + if not os.path.isfile(dict_name): + cFiles = glob.glob(dict_source_path + "/*.c") + hFiles = glob.glob(dict_source_path + "/*.h") + if tag == 'v0.5.0': + result = execute('./dictBuilder.' + tag + ' ' + ' '.join(cFiles) + ' ' + ' '.join(hFiles) + ' -o ' + dict_name, print_output=False, param_shell=True) + else: + result = execute('./zstd.' + tag + ' -f --train ' + ' '.join(cFiles) + ' ' + ' '.join(hFiles) + ' -o ' + dict_name, print_output=False, param_shell=True) + if result == 0: + print(dict_name + ' created') + else: + print('ERROR: creating of ' + dict_name + ' failed') + else: + print(dict_name + ' already exists') + + +def dict_compress_sample(tag, sample): + dict_name = 'dict.' + tag + DEVNULL = open(os.devnull, 'wb') + if subprocess.call(['./zstd.' + tag, '-D', dict_name, '-f', sample], stderr=DEVNULL) == 0: + os.rename(sample + '.zst', sample + '_01_64_' + tag + '_dictio.zst') + if subprocess.call(['./zstd.' + tag, '-D', dict_name, '-5f', sample], stderr=DEVNULL) == 0: + os.rename(sample + '.zst', sample + '_05_64_' + tag + '_dictio.zst') + if subprocess.call(['./zstd.' + tag, '-D', dict_name, '-9f', sample], stderr=DEVNULL) == 0: + os.rename(sample + '.zst', sample + '_09_64_' + tag + '_dictio.zst') + if subprocess.call(['./zstd.' + tag, '-D', dict_name, '-15f', sample], stderr=DEVNULL) == 0: + os.rename(sample + '.zst', sample + '_15_64_' + tag + '_dictio.zst') + if subprocess.call(['./zstd.' + tag, '-D', dict_name, '-18f', sample], stderr=DEVNULL) == 0: + os.rename(sample + '.zst', sample + '_18_64_' + tag + '_dictio.zst') + # zstdFiles = glob.glob("*.zst*") + # print(zstdFiles) + print(tag + " : dict compression completed") + + +def compress_sample(tag, sample): + DEVNULL = open(os.devnull, 'wb') + if subprocess.call(['./zstd.' + tag, '-f', sample], stderr=DEVNULL) == 0: + os.rename(sample + '.zst', sample + '_01_64_' + tag + '_nodict.zst') + if subprocess.call(['./zstd.' + tag, '-5f', sample], stderr=DEVNULL) == 0: + os.rename(sample + '.zst', sample + '_05_64_' + tag + '_nodict.zst') + if subprocess.call(['./zstd.' + tag, '-9f', sample], stderr=DEVNULL) == 0: + os.rename(sample + '.zst', sample + '_09_64_' + tag + '_nodict.zst') + if subprocess.call(['./zstd.' + tag, '-15f', sample], stderr=DEVNULL) == 0: + os.rename(sample + '.zst', sample + '_15_64_' + tag + '_nodict.zst') + if subprocess.call(['./zstd.' + tag, '-18f', sample], stderr=DEVNULL) == 0: + os.rename(sample + '.zst', sample + '_18_64_' + tag + '_nodict.zst') + # zstdFiles = glob.glob("*.zst*") + # print(zstdFiles) + print(tag + " : compression completed") + + +# http://stackoverflow.com/a/19711609/2132223 +def sha1_of_file(filepath): + with open(filepath, 'rb') as f: + return hashlib.sha1(f.read()).hexdigest() + + +def remove_duplicates(): + list_of_zst = sorted(glob.glob('*.zst')) + for i, ref_zst in enumerate(list_of_zst): + if not os.path.isfile(ref_zst): + continue + for j in range(i + 1, len(list_of_zst)): + compared_zst = list_of_zst[j] + if not os.path.isfile(compared_zst): + continue + if filecmp.cmp(ref_zst, compared_zst): + os.remove(compared_zst) + print('duplicated : {} == {}'.format(ref_zst, compared_zst)) + + +def decompress_zst(tag): + dec_error = 0 + list_zst = sorted(glob.glob('*_nodict.zst')) + for file_zst in list_zst: + print(file_zst, end=' ') + print(tag, end=' ') + file_dec = file_zst + '_d64_' + tag + '.dec' + if tag <= 'v0.5.0': + params = ['./zstd.' + tag, '-df', file_zst, file_dec] + else: + params = ['./zstd.' + tag, '-df', file_zst, '-o', file_dec] + if execute(params) == 0: + if not filecmp.cmp(file_dec, test_dat): + print('ERR !! ') + dec_error = 1 + else: + print('OK ') + else: + print('command does not work') + dec_error = 1 + return dec_error + + +def decompress_dict(tag): + dec_error = 0 + list_zst = sorted(glob.glob('*_dictio.zst')) + for file_zst in list_zst: + dict_tag = file_zst[0:len(file_zst)-11] # remove "_dictio.zst" + if head in dict_tag: # find vdevel + dict_tag = head + else: + dict_tag = dict_tag[dict_tag.rfind('v'):] + if tag == 'v0.6.0' and dict_tag < 'v0.6.0': + continue + dict_name = 'dict.' + dict_tag + print(file_zst + ' ' + tag + ' dict=' + dict_tag, end=' ') + file_dec = file_zst + '_d64_' + tag + '.dec' + if tag <= 'v0.5.0': + params = ['./zstd.' + tag, '-D', dict_name, '-df', file_zst, file_dec] + else: + params = ['./zstd.' + tag, '-D', dict_name, '-df', file_zst, '-o', file_dec] + if execute(params) == 0: + if not filecmp.cmp(file_dec, test_dat): + print('ERR !! ') + dec_error = 1 + else: + print('OK ') + else: + print('command does not work') + dec_error = 1 + return dec_error + + +if __name__ == '__main__': + error_code = 0 + base_dir = os.getcwd() + '/..' # /path/to/zstd + tmp_dir = base_dir + '/' + tmp_dir_name # /path/to/zstd/tests/versionsTest + clone_dir = tmp_dir + '/' + 'zstd' # /path/to/zstd/tests/versionsTest/zstd + dict_source_path = tmp_dir + '/' + dict_source # /path/to/zstd/tests/versionsTest/dict_source + programs_dir = base_dir + '/programs' # /path/to/zstd/programs + os.makedirs(tmp_dir, exist_ok=True) + + # since Travis clones limited depth, we should clone full repository + if not os.path.isdir(clone_dir): + git(['clone', repo_url, clone_dir]) + + shutil.copy2(base_dir + '/' + test_dat_src, tmp_dir + '/' + test_dat) + + # Retrieve all release tags + print('Retrieve all release tags :') + os.chdir(clone_dir) + alltags = get_git_tags() + [head] + tags = [t for t in alltags if t >= 'v0.4.0'] + print(tags) + + # Build all release zstd + for tag in tags: + os.chdir(base_dir) + dst_zstd = '{}/zstd.{}'.format(tmp_dir, tag) # /path/to/zstd/tests/versionsTest/zstd.<TAG> + if not os.path.isfile(dst_zstd) or tag == head: + if tag != head: + r_dir = '{}/{}'.format(tmp_dir, tag) # /path/to/zstd/tests/versionsTest/<TAG> + os.makedirs(r_dir, exist_ok=True) + os.chdir(clone_dir) + git(['--work-tree=' + r_dir, 'checkout', tag, '--', '.'], False) + if tag == 'v0.5.0': + os.chdir(r_dir + '/dictBuilder') # /path/to/zstd/tests/versionsTest/v0.5.0/dictBuilder + make(['clean', 'dictBuilder'], False) + shutil.copy2('dictBuilder', '{}/dictBuilder.{}'.format(tmp_dir, tag)) + os.chdir(r_dir + '/programs') # /path/to/zstd/tests/versionsTest/<TAG>/programs + make(['clean', 'zstd'], False) + else: + os.chdir(programs_dir) + make(['zstd'], False) + shutil.copy2('zstd', dst_zstd) + + # remove any remaining *.zst and *.dec from previous test + os.chdir(tmp_dir) + for compressed in glob.glob("*.zst"): + os.remove(compressed) + for dec in glob.glob("*.dec"): + os.remove(dec) + + # copy *.c and *.h to a temporary directory ("dict_source") + if not os.path.isdir(dict_source_path): + os.mkdir(dict_source_path) + print('cp ' + dict_files + ' ' + dict_source_path) + execute('cp ' + dict_files + ' ' + dict_source_path, param_shell=True) + + print('Compress test.dat by all released zstd') + + error_code = 0 + for tag in tags: + print(tag) + if tag >= 'v0.5.0': + create_dict(tag, dict_source_path) + dict_compress_sample(tag, test_dat) + remove_duplicates() + error_code += decompress_dict(tag) + compress_sample(tag, test_dat) + remove_duplicates() + error_code += decompress_zst(tag) + + print('') + print('Enumerate different compressed files') + zstds = sorted(glob.glob('*.zst')) + for zstd in zstds: + print(zstd + ' : ' + repr(os.path.getsize(zstd)) + ', ' + sha1_of_file(zstd)) + + if error_code != 0: + print('====== ERROR !!! =======') + + sys.exit(error_code) diff --git a/src/zstd/tests/zbufftest.c b/src/zstd/tests/zbufftest.c new file mode 100644 index 00000000..e64c886a --- /dev/null +++ b/src/zstd/tests/zbufftest.c @@ -0,0 +1,622 @@ +/* + * Copyright (c) 2015-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + +/*-************************************ +* Compiler specific +**************************************/ +#ifdef _MSC_VER /* Visual Studio */ +# define _CRT_SECURE_NO_WARNINGS /* fgets */ +# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ +# pragma warning(disable : 4146) /* disable: C4146: minus unsigned expression */ +#endif + + +/*-************************************ +* Includes +**************************************/ +#include <stdlib.h> /* free */ +#include <stdio.h> /* fgets, sscanf */ +#include <time.h> /* clock_t, clock() */ +#include <string.h> /* strcmp */ +#include "mem.h" +#define ZSTD_STATIC_LINKING_ONLY /* ZSTD_maxCLevel */ +#include "zstd.h" /* ZSTD_compressBound */ +#define ZBUFF_STATIC_LINKING_ONLY /* ZBUFF_createCCtx_advanced */ +#include "zbuff.h" /* ZBUFF_isError */ +#include "datagen.h" /* RDG_genBuffer */ +#define XXH_STATIC_LINKING_ONLY +#include "xxhash.h" /* XXH64_* */ + + +/*-************************************ +* Constants +**************************************/ +#define KB *(1U<<10) +#define MB *(1U<<20) +#define GB *(1U<<30) + +static const U32 nbTestsDefault = 10000; +#define COMPRESSIBLE_NOISE_LENGTH (10 MB) +#define FUZ_COMPRESSIBILITY_DEFAULT 50 +static const U32 prime1 = 2654435761U; +static const U32 prime2 = 2246822519U; + + + +/*-************************************ +* Display Macros +**************************************/ +#define DISPLAY(...) fprintf(stderr, __VA_ARGS__) +#define DISPLAYLEVEL(l, ...) if (g_displayLevel>=l) { DISPLAY(__VA_ARGS__); } +static U32 g_displayLevel = 2; + +#define DISPLAYUPDATE(l, ...) if (g_displayLevel>=l) { \ + if ((FUZ_GetClockSpan(g_displayClock) > g_refreshRate) || (g_displayLevel>=4)) \ + { g_displayClock = clock(); DISPLAY(__VA_ARGS__); \ + if (g_displayLevel>=4) fflush(stderr); } } +static const clock_t g_refreshRate = CLOCKS_PER_SEC * 15 / 100; +static clock_t g_displayClock = 0; + +static clock_t g_clockTime = 0; + + +/*-******************************************************* +* Fuzzer functions +*********************************************************/ +#define MAX(a,b) ((a)>(b)?(a):(b)) + +static clock_t FUZ_GetClockSpan(clock_t clockStart) +{ + return clock() - clockStart; /* works even when overflow. Max span ~ 30 mn */ +} + +/*! FUZ_rand() : + @return : a 27 bits random value, from a 32-bits `seed`. + `seed` is also modified */ +# define FUZ_rotl32(x,r) ((x << r) | (x >> (32 - r))) +unsigned int FUZ_rand(unsigned int* seedPtr) +{ + U32 rand32 = *seedPtr; + rand32 *= prime1; + rand32 += prime2; + rand32 = FUZ_rotl32(rand32, 13); + *seedPtr = rand32; + return rand32 >> 5; +} + + +/* +static unsigned FUZ_highbit32(U32 v32) +{ + unsigned nbBits = 0; + if (v32==0) return 0; + for ( ; v32 ; v32>>=1) nbBits++; + return nbBits; +} +*/ + +static void* ZBUFF_allocFunction(void* opaque, size_t size) +{ + void* address = malloc(size); + (void)opaque; + /* DISPLAYLEVEL(4, "alloc %p, %d opaque=%p \n", address, (int)size, opaque); */ + return address; +} + +static void ZBUFF_freeFunction(void* opaque, void* address) +{ + (void)opaque; + /* if (address) DISPLAYLEVEL(4, "free %p opaque=%p \n", address, opaque); */ + free(address); +} + +static int basicUnitTests(U32 seed, double compressibility, ZSTD_customMem customMem) +{ + int testResult = 0; + size_t CNBufferSize = COMPRESSIBLE_NOISE_LENGTH; + void* CNBuffer = malloc(CNBufferSize); + size_t const skippableFrameSize = 11; + size_t const compressedBufferSize = (8 + skippableFrameSize) + ZSTD_compressBound(COMPRESSIBLE_NOISE_LENGTH); + void* compressedBuffer = malloc(compressedBufferSize); + size_t const decodedBufferSize = CNBufferSize; + void* decodedBuffer = malloc(decodedBufferSize); + size_t cSize, readSize, readSkipSize, genSize; + U32 testNb=0; + ZBUFF_CCtx* zc = ZBUFF_createCCtx_advanced(customMem); + ZBUFF_DCtx* zd = ZBUFF_createDCtx_advanced(customMem); + + /* Create compressible test buffer */ + if (!CNBuffer || !compressedBuffer || !decodedBuffer || !zc || !zd) { + DISPLAY("Not enough memory, aborting\n"); + goto _output_error; + } + RDG_genBuffer(CNBuffer, CNBufferSize, compressibility, 0., seed); + + /* generate skippable frame */ + MEM_writeLE32(compressedBuffer, ZSTD_MAGIC_SKIPPABLE_START); + MEM_writeLE32(((char*)compressedBuffer)+4, (U32)skippableFrameSize); + cSize = skippableFrameSize + 8; + + /* Basic compression test */ + DISPLAYLEVEL(4, "test%3i : compress %u bytes : ", testNb++, COMPRESSIBLE_NOISE_LENGTH); + ZBUFF_compressInitDictionary(zc, CNBuffer, 128 KB, 1); + readSize = CNBufferSize; + genSize = compressedBufferSize; + { size_t const r = ZBUFF_compressContinue(zc, ((char*)compressedBuffer)+cSize, &genSize, CNBuffer, &readSize); + if (ZBUFF_isError(r)) goto _output_error; } + if (readSize != CNBufferSize) goto _output_error; /* entire input should be consumed */ + cSize += genSize; + genSize = compressedBufferSize - cSize; + { size_t const r = ZBUFF_compressEnd(zc, ((char*)compressedBuffer)+cSize, &genSize); + if (r != 0) goto _output_error; } /* error, or some data not flushed */ + cSize += genSize; + DISPLAYLEVEL(4, "OK (%u bytes : %.2f%%)\n", (U32)cSize, (double)cSize/COMPRESSIBLE_NOISE_LENGTH*100); + + /* skippable frame test */ + DISPLAYLEVEL(4, "test%3i : decompress skippable frame : ", testNb++); + ZBUFF_decompressInitDictionary(zd, CNBuffer, 128 KB); + readSkipSize = cSize; + genSize = CNBufferSize; + { size_t const r = ZBUFF_decompressContinue(zd, decodedBuffer, &genSize, compressedBuffer, &readSkipSize); + if (r != 0) goto _output_error; } + if (genSize != 0) goto _output_error; /* skippable frame len is 0 */ + DISPLAYLEVEL(4, "OK \n"); + + /* Basic decompression test */ + DISPLAYLEVEL(4, "test%3i : decompress %u bytes : ", testNb++, COMPRESSIBLE_NOISE_LENGTH); + ZBUFF_decompressInitDictionary(zd, CNBuffer, 128 KB); + readSize = cSize - readSkipSize; + genSize = CNBufferSize; + { size_t const r = ZBUFF_decompressContinue(zd, decodedBuffer, &genSize, ((char*)compressedBuffer)+readSkipSize, &readSize); + if (r != 0) goto _output_error; } /* should reach end of frame == 0; otherwise, some data left, or an error */ + if (genSize != CNBufferSize) goto _output_error; /* should regenerate the same amount */ + if (readSize+readSkipSize != cSize) goto _output_error; /* should have read the entire frame */ + DISPLAYLEVEL(4, "OK \n"); + + /* check regenerated data is byte exact */ + DISPLAYLEVEL(4, "test%3i : check decompressed result : ", testNb++); + { size_t i; + for (i=0; i<CNBufferSize; i++) { + if (((BYTE*)decodedBuffer)[i] != ((BYTE*)CNBuffer)[i]) goto _output_error;; + } } + DISPLAYLEVEL(4, "OK \n"); + + /* Byte-by-byte decompression test */ + DISPLAYLEVEL(4, "test%3i : decompress byte-by-byte : ", testNb++); + { size_t r, pIn=0, pOut=0; + do + { ZBUFF_decompressInitDictionary(zd, CNBuffer, 128 KB); + r = 1; + while (r) { + size_t inS = 1; + size_t outS = 1; + r = ZBUFF_decompressContinue(zd, ((BYTE*)decodedBuffer)+pOut, &outS, ((BYTE*)compressedBuffer)+pIn, &inS); + pIn += inS; + pOut += outS; + } + readSize = pIn; + genSize = pOut; + } while (genSize==0); + } + if (genSize != CNBufferSize) goto _output_error; /* should regenerate the same amount */ + if (readSize != cSize) goto _output_error; /* should have read the entire frame */ + DISPLAYLEVEL(4, "OK \n"); + + /* check regenerated data is byte exact */ + DISPLAYLEVEL(4, "test%3i : check decompressed result : ", testNb++); + { size_t i; + for (i=0; i<CNBufferSize; i++) { + if (((BYTE*)decodedBuffer)[i] != ((BYTE*)CNBuffer)[i]) goto _output_error;; + } } + DISPLAYLEVEL(4, "OK \n"); + +_end: + ZBUFF_freeCCtx(zc); + ZBUFF_freeDCtx(zd); + free(CNBuffer); + free(compressedBuffer); + free(decodedBuffer); + return testResult; + +_output_error: + testResult = 1; + DISPLAY("Error detected in Unit tests ! \n"); + goto _end; +} + + +static size_t findDiff(const void* buf1, const void* buf2, size_t max) +{ + const BYTE* b1 = (const BYTE*)buf1; + const BYTE* b2 = (const BYTE*)buf2; + size_t u; + for (u=0; u<max; u++) { + if (b1[u] != b2[u]) break; + } + return u; +} + +static size_t FUZ_rLogLength(U32* seed, U32 logLength) +{ + size_t const lengthMask = ((size_t)1 << logLength) - 1; + return (lengthMask+1) + (FUZ_rand(seed) & lengthMask); +} + +static size_t FUZ_randomLength(U32* seed, U32 maxLog) +{ + U32 const logLength = FUZ_rand(seed) % maxLog; + return FUZ_rLogLength(seed, logLength); +} + +#define MIN(a,b) ( (a) < (b) ? (a) : (b) ) + +#define CHECK(cond, ...) if (cond) { DISPLAY("Error => "); DISPLAY(__VA_ARGS__); \ + DISPLAY(" (seed %u, test nb %u) \n", seed, testNb); goto _output_error; } + +static int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compressibility) +{ + static const U32 maxSrcLog = 24; + static const U32 maxSampleLog = 19; + BYTE* cNoiseBuffer[5]; + size_t const srcBufferSize = (size_t)1<<maxSrcLog; + BYTE* copyBuffer; + size_t const copyBufferSize= srcBufferSize + (1<<maxSampleLog); + BYTE* cBuffer; + size_t const cBufferSize = ZSTD_compressBound(srcBufferSize); + BYTE* dstBuffer; + size_t dstBufferSize = srcBufferSize; + U32 result = 0; + U32 testNb = 0; + U32 coreSeed = seed; + ZBUFF_CCtx* zc; + ZBUFF_DCtx* zd; + clock_t startClock = clock(); + + /* allocations */ + zc = ZBUFF_createCCtx(); + zd = ZBUFF_createDCtx(); + cNoiseBuffer[0] = (BYTE*)malloc (srcBufferSize); + cNoiseBuffer[1] = (BYTE*)malloc (srcBufferSize); + cNoiseBuffer[2] = (BYTE*)malloc (srcBufferSize); + cNoiseBuffer[3] = (BYTE*)malloc (srcBufferSize); + cNoiseBuffer[4] = (BYTE*)malloc (srcBufferSize); + copyBuffer= (BYTE*)malloc (copyBufferSize); + dstBuffer = (BYTE*)malloc (dstBufferSize); + cBuffer = (BYTE*)malloc (cBufferSize); + CHECK (!cNoiseBuffer[0] || !cNoiseBuffer[1] || !cNoiseBuffer[2] || !cNoiseBuffer[3] || !cNoiseBuffer[4] || + !copyBuffer || !dstBuffer || !cBuffer || !zc || !zd, + "Not enough memory, fuzzer tests cancelled"); + + /* Create initial samples */ + RDG_genBuffer(cNoiseBuffer[0], srcBufferSize, 0.00, 0., coreSeed); /* pure noise */ + RDG_genBuffer(cNoiseBuffer[1], srcBufferSize, 0.05, 0., coreSeed); /* barely compressible */ + RDG_genBuffer(cNoiseBuffer[2], srcBufferSize, compressibility, 0., coreSeed); + RDG_genBuffer(cNoiseBuffer[3], srcBufferSize, 0.95, 0., coreSeed); /* highly compressible */ + RDG_genBuffer(cNoiseBuffer[4], srcBufferSize, 1.00, 0., coreSeed); /* sparse content */ + memset(copyBuffer, 0x65, copyBufferSize); /* make copyBuffer considered initialized */ + + /* catch up testNb */ + for (testNb=1; testNb < startTest; testNb++) + FUZ_rand(&coreSeed); + + /* test loop */ + for ( ; (testNb <= nbTests) || (FUZ_GetClockSpan(startClock) < g_clockTime) ; testNb++ ) { + U32 lseed; + const BYTE* srcBuffer; + const BYTE* dict; + size_t maxTestSize, dictSize; + size_t cSize, totalTestSize, totalCSize, totalGenSize; + size_t errorCode; + U32 n, nbChunks; + XXH64_state_t xxhState; + U64 crcOrig; + + /* init */ + DISPLAYUPDATE(2, "\r%6u", testNb); + if (nbTests >= testNb) DISPLAYUPDATE(2, "/%6u ", nbTests); + FUZ_rand(&coreSeed); + lseed = coreSeed ^ prime1; + + /* states full reset (unsynchronized) */ + /* some issues only happen when reusing states in a specific sequence of parameters */ + if ((FUZ_rand(&lseed) & 0xFF) == 131) { ZBUFF_freeCCtx(zc); zc = ZBUFF_createCCtx(); } + if ((FUZ_rand(&lseed) & 0xFF) == 132) { ZBUFF_freeDCtx(zd); zd = ZBUFF_createDCtx(); } + + /* srcBuffer selection [0-4] */ + { U32 buffNb = FUZ_rand(&lseed) & 0x7F; + if (buffNb & 7) buffNb=2; /* most common : compressible (P) */ + else { + buffNb >>= 3; + if (buffNb & 7) { + const U32 tnb[2] = { 1, 3 }; /* barely/highly compressible */ + buffNb = tnb[buffNb >> 3]; + } else { + const U32 tnb[2] = { 0, 4 }; /* not compressible / sparse */ + buffNb = tnb[buffNb >> 3]; + } } + srcBuffer = cNoiseBuffer[buffNb]; + } + + /* compression init */ + { U32 const testLog = FUZ_rand(&lseed) % maxSrcLog; + U32 const cLevel = (FUZ_rand(&lseed) % (ZSTD_maxCLevel() - (testLog/3))) + 1; + maxTestSize = FUZ_rLogLength(&lseed, testLog); + dictSize = (FUZ_rand(&lseed)==1) ? FUZ_randomLength(&lseed, maxSampleLog) : 0; + /* random dictionary selection */ + { size_t const dictStart = FUZ_rand(&lseed) % (srcBufferSize - dictSize); + dict = srcBuffer + dictStart; + } + { ZSTD_parameters params = ZSTD_getParams(cLevel, 0, dictSize); + params.fParams.checksumFlag = FUZ_rand(&lseed) & 1; + params.fParams.noDictIDFlag = FUZ_rand(&lseed) & 1; + { size_t const initError = ZBUFF_compressInit_advanced(zc, dict, dictSize, params, 0); + CHECK (ZBUFF_isError(initError),"init error : %s", ZBUFF_getErrorName(initError)); + } } } + + /* multi-segments compression test */ + XXH64_reset(&xxhState, 0); + nbChunks = (FUZ_rand(&lseed) & 127) + 2; + for (n=0, cSize=0, totalTestSize=0 ; (n<nbChunks) && (totalTestSize < maxTestSize) ; n++) { + /* compress random chunk into random size dst buffer */ + { size_t readChunkSize = FUZ_randomLength(&lseed, maxSampleLog); + size_t const randomDstSize = FUZ_randomLength(&lseed, maxSampleLog); + size_t dstBuffSize = MIN(cBufferSize - cSize, randomDstSize); + size_t const srcStart = FUZ_rand(&lseed) % (srcBufferSize - readChunkSize); + + size_t const compressionError = ZBUFF_compressContinue(zc, cBuffer+cSize, &dstBuffSize, srcBuffer+srcStart, &readChunkSize); + CHECK (ZBUFF_isError(compressionError), "compression error : %s", ZBUFF_getErrorName(compressionError)); + + XXH64_update(&xxhState, srcBuffer+srcStart, readChunkSize); + memcpy(copyBuffer+totalTestSize, srcBuffer+srcStart, readChunkSize); + cSize += dstBuffSize; + totalTestSize += readChunkSize; + } + + /* random flush operation, to mess around */ + if ((FUZ_rand(&lseed) & 15) == 0) { + size_t const randomDstSize = FUZ_randomLength(&lseed, maxSampleLog); + size_t dstBuffSize = MIN(cBufferSize - cSize, randomDstSize); + size_t const flushError = ZBUFF_compressFlush(zc, cBuffer+cSize, &dstBuffSize); + CHECK (ZBUFF_isError(flushError), "flush error : %s", ZBUFF_getErrorName(flushError)); + cSize += dstBuffSize; + } } + + /* final frame epilogue */ + { size_t remainingToFlush = (size_t)(-1); + while (remainingToFlush) { + size_t const randomDstSize = FUZ_randomLength(&lseed, maxSampleLog); + size_t dstBuffSize = MIN(cBufferSize - cSize, randomDstSize); + U32 const enoughDstSize = dstBuffSize >= remainingToFlush; + remainingToFlush = ZBUFF_compressEnd(zc, cBuffer+cSize, &dstBuffSize); + CHECK (ZBUFF_isError(remainingToFlush), "flush error : %s", ZBUFF_getErrorName(remainingToFlush)); + CHECK (enoughDstSize && remainingToFlush, "ZBUFF_compressEnd() not fully flushed (%u remaining), but enough space available", (U32)remainingToFlush); + cSize += dstBuffSize; + } } + crcOrig = XXH64_digest(&xxhState); + + /* multi - fragments decompression test */ + ZBUFF_decompressInitDictionary(zd, dict, dictSize); + errorCode = 1; + for (totalCSize = 0, totalGenSize = 0 ; errorCode ; ) { + size_t readCSrcSize = FUZ_randomLength(&lseed, maxSampleLog); + size_t const randomDstSize = FUZ_randomLength(&lseed, maxSampleLog); + size_t dstBuffSize = MIN(dstBufferSize - totalGenSize, randomDstSize); + errorCode = ZBUFF_decompressContinue(zd, dstBuffer+totalGenSize, &dstBuffSize, cBuffer+totalCSize, &readCSrcSize); + CHECK (ZBUFF_isError(errorCode), "decompression error : %s", ZBUFF_getErrorName(errorCode)); + totalGenSize += dstBuffSize; + totalCSize += readCSrcSize; + } + CHECK (errorCode != 0, "frame not fully decoded"); + CHECK (totalGenSize != totalTestSize, "decompressed data : wrong size") + CHECK (totalCSize != cSize, "compressed data should be fully read") + { U64 const crcDest = XXH64(dstBuffer, totalTestSize, 0); + if (crcDest!=crcOrig) findDiff(copyBuffer, dstBuffer, totalTestSize); + CHECK (crcDest!=crcOrig, "decompressed data corrupted"); } + + /*===== noisy/erroneous src decompression test =====*/ + + /* add some noise */ + { U32 const nbNoiseChunks = (FUZ_rand(&lseed) & 7) + 2; + U32 nn; for (nn=0; nn<nbNoiseChunks; nn++) { + size_t const randomNoiseSize = FUZ_randomLength(&lseed, maxSampleLog); + size_t const noiseSize = MIN((cSize/3) , randomNoiseSize); + size_t const noiseStart = FUZ_rand(&lseed) % (srcBufferSize - noiseSize); + size_t const cStart = FUZ_rand(&lseed) % (cSize - noiseSize); + memcpy(cBuffer+cStart, srcBuffer+noiseStart, noiseSize); + } } + + /* try decompression on noisy data */ + ZBUFF_decompressInit(zd); + totalCSize = 0; + totalGenSize = 0; + while ( (totalCSize < cSize) && (totalGenSize < dstBufferSize) ) { + size_t readCSrcSize = FUZ_randomLength(&lseed, maxSampleLog); + size_t const randomDstSize = FUZ_randomLength(&lseed, maxSampleLog); + size_t dstBuffSize = MIN(dstBufferSize - totalGenSize, randomDstSize); + size_t const decompressError = ZBUFF_decompressContinue(zd, dstBuffer+totalGenSize, &dstBuffSize, cBuffer+totalCSize, &readCSrcSize); + if (ZBUFF_isError(decompressError)) break; /* error correctly detected */ + totalGenSize += dstBuffSize; + totalCSize += readCSrcSize; + } } + DISPLAY("\r%u fuzzer tests completed \n", testNb); + +_cleanup: + ZBUFF_freeCCtx(zc); + ZBUFF_freeDCtx(zd); + free(cNoiseBuffer[0]); + free(cNoiseBuffer[1]); + free(cNoiseBuffer[2]); + free(cNoiseBuffer[3]); + free(cNoiseBuffer[4]); + free(copyBuffer); + free(cBuffer); + free(dstBuffer); + return result; + +_output_error: + result = 1; + goto _cleanup; +} + + +/*-******************************************************* +* Command line +*********************************************************/ +int FUZ_usage(const char* programName) +{ + DISPLAY( "Usage :\n"); + DISPLAY( " %s [args]\n", programName); + DISPLAY( "\n"); + DISPLAY( "Arguments :\n"); + DISPLAY( " -i# : Nb of tests (default:%u) \n", nbTestsDefault); + DISPLAY( " -s# : Select seed (default:prompt user)\n"); + DISPLAY( " -t# : Select starting test number (default:0)\n"); + DISPLAY( " -P# : Select compressibility in %% (default:%i%%)\n", FUZ_COMPRESSIBILITY_DEFAULT); + DISPLAY( " -v : verbose\n"); + DISPLAY( " -p : pause at the end\n"); + DISPLAY( " -h : display help and exit\n"); + return 0; +} + + +int main(int argc, const char** argv) +{ + U32 seed=0; + int seedset=0; + int argNb; + int nbTests = nbTestsDefault; + int testNb = 0; + int proba = FUZ_COMPRESSIBILITY_DEFAULT; + int result=0; + U32 mainPause = 0; + const char* programName = argv[0]; + ZSTD_customMem customMem = { ZBUFF_allocFunction, ZBUFF_freeFunction, NULL }; + ZSTD_customMem customNULL = { NULL, NULL, NULL }; + + /* Check command line */ + for(argNb=1; argNb<argc; argNb++) { + const char* argument = argv[argNb]; + if(!argument) continue; /* Protection if argument empty */ + + /* Parsing commands. Aggregated commands are allowed */ + if (argument[0]=='-') { + argument++; + + while (*argument!=0) { + switch(*argument) + { + case 'h': + return FUZ_usage(programName); + case 'v': + argument++; + g_displayLevel=4; + break; + case 'q': + argument++; + g_displayLevel--; + break; + case 'p': /* pause at the end */ + argument++; + mainPause = 1; + break; + + case 'i': + argument++; + nbTests=0; g_clockTime=0; + while ((*argument>='0') && (*argument<='9')) { + nbTests *= 10; + nbTests += *argument - '0'; + argument++; + } + break; + + case 'T': + argument++; + nbTests=0; g_clockTime=0; + while ((*argument>='0') && (*argument<='9')) { + g_clockTime *= 10; + g_clockTime += *argument - '0'; + argument++; + } + if (*argument=='m') g_clockTime *=60, argument++; + if (*argument=='n') argument++; + g_clockTime *= CLOCKS_PER_SEC; + break; + + case 's': + argument++; + seed=0; + seedset=1; + while ((*argument>='0') && (*argument<='9')) { + seed *= 10; + seed += *argument - '0'; + argument++; + } + break; + + case 't': + argument++; + testNb=0; + while ((*argument>='0') && (*argument<='9')) { + testNb *= 10; + testNb += *argument - '0'; + argument++; + } + break; + + case 'P': /* compressibility % */ + argument++; + proba=0; + while ((*argument>='0') && (*argument<='9')) { + proba *= 10; + proba += *argument - '0'; + argument++; + } + if (proba<0) proba=0; + if (proba>100) proba=100; + break; + + default: + return FUZ_usage(programName); + } + } } } /* for(argNb=1; argNb<argc; argNb++) */ + + /* Get Seed */ + DISPLAY("Starting zstd_buffered tester (%i-bits, %s)\n", (int)(sizeof(size_t)*8), ZSTD_VERSION_STRING); + + if (!seedset) { + time_t const t = time(NULL); + U32 const h = XXH32(&t, sizeof(t), 1); + seed = h % 10000; + } + DISPLAY("Seed = %u\n", seed); + if (proba!=FUZ_COMPRESSIBILITY_DEFAULT) DISPLAY("Compressibility : %i%%\n", proba); + + if (nbTests<=0) nbTests=1; + + if (testNb==0) { + result = basicUnitTests(0, ((double)proba) / 100, customNULL); /* constant seed for predictability */ + if (!result) { + DISPLAYLEVEL(4, "Unit tests using customMem :\n") + result = basicUnitTests(0, ((double)proba) / 100, customMem); /* use custom memory allocation functions */ + } } + + if (!result) + result = fuzzerTests(seed, nbTests, testNb, ((double)proba) / 100); + + if (mainPause) { + int unused; + DISPLAY("Press Enter \n"); + unused = getchar(); + (void)unused; + } + return result; +} diff --git a/src/zstd/tests/zstreamtest.c b/src/zstd/tests/zstreamtest.c new file mode 100644 index 00000000..8b4c8369 --- /dev/null +++ b/src/zstd/tests/zstreamtest.c @@ -0,0 +1,1748 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + +/*-************************************ +* Compiler specific +**************************************/ +#ifdef _MSC_VER /* Visual Studio */ +# define _CRT_SECURE_NO_WARNINGS /* fgets */ +# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ +# pragma warning(disable : 4146) /* disable: C4146: minus unsigned expression */ +#endif + + +/*-************************************ +* Includes +**************************************/ +#include <stdlib.h> /* free */ +#include <stdio.h> /* fgets, sscanf */ +#include <time.h> /* clock_t, clock() */ +#include <string.h> /* strcmp */ +#include <assert.h> /* assert */ +#include "mem.h" +#define ZSTD_STATIC_LINKING_ONLY /* ZSTD_maxCLevel, ZSTD_customMem, ZSTD_getDictID_fromFrame */ +#include "zstd.h" /* ZSTD_compressBound */ +#include "zstd_errors.h" /* ZSTD_error_srcSize_wrong */ +#include "zstdmt_compress.h" +#include "zdict.h" /* ZDICT_trainFromBuffer */ +#include "datagen.h" /* RDG_genBuffer */ +#define XXH_STATIC_LINKING_ONLY /* XXH64_state_t */ +#include "xxhash.h" /* XXH64_* */ + + +/*-************************************ +* Constants +**************************************/ +#define KB *(1U<<10) +#define MB *(1U<<20) +#define GB *(1U<<30) + +static const U32 nbTestsDefault = 10000; +static const U32 g_cLevelMax_smallTests = 10; +#define COMPRESSIBLE_NOISE_LENGTH (10 MB) +#define FUZ_COMPRESSIBILITY_DEFAULT 50 +static const U32 prime32 = 2654435761U; + + +/*-************************************ +* Display Macros +**************************************/ +#define DISPLAY(...) fprintf(stderr, __VA_ARGS__) +#define DISPLAYLEVEL(l, ...) if (g_displayLevel>=l) { \ + DISPLAY(__VA_ARGS__); \ + if (g_displayLevel>=4) fflush(stderr); } +static U32 g_displayLevel = 2; + +#define DISPLAYUPDATE(l, ...) if (g_displayLevel>=l) { \ + if ((FUZ_GetClockSpan(g_displayClock) > g_refreshRate) || (g_displayLevel>=4)) \ + { g_displayClock = clock(); DISPLAY(__VA_ARGS__); \ + if (g_displayLevel>=4) fflush(stderr); } } +static const clock_t g_refreshRate = CLOCKS_PER_SEC / 6; +static clock_t g_displayClock = 0; + +static clock_t g_clockTime = 0; + + +/*-******************************************************* +* Fuzzer functions +*********************************************************/ +#define MAX(a,b) ((a)>(b)?(a):(b)) + +static clock_t FUZ_GetClockSpan(clock_t clockStart) +{ + return clock() - clockStart; /* works even when overflow. Max span ~ 30 mn */ +} + +/*! FUZ_rand() : + @return : a 27 bits random value, from a 32-bits `seed`. + `seed` is also modified */ +#define FUZ_rotl32(x,r) ((x << r) | (x >> (32 - r))) +unsigned int FUZ_rand(unsigned int* seedPtr) +{ + static const U32 prime2 = 2246822519U; + U32 rand32 = *seedPtr; + rand32 *= prime32; + rand32 += prime2; + rand32 = FUZ_rotl32(rand32, 13); + *seedPtr = rand32; + return rand32 >> 5; +} + +#define CHECK_Z(f) { \ + size_t const err = f; \ + if (ZSTD_isError(err)) { \ + DISPLAY("Error => %s : %s ", \ + #f, ZSTD_getErrorName(err)); \ + DISPLAY(" (seed %u, test nb %u) \n", seed, testNb); \ + goto _output_error; \ +} } + + +/*====================================================== +* Basic Unit tests +======================================================*/ + +typedef struct { + void* start; + size_t size; + size_t filled; +} buffer_t; + +static const buffer_t g_nullBuffer = { NULL, 0 , 0 }; + +static buffer_t FUZ_createDictionary(const void* src, size_t srcSize, size_t blockSize, size_t requestedDictSize) +{ + buffer_t dict = { NULL, 0, 0 }; + size_t const nbBlocks = (srcSize + (blockSize-1)) / blockSize; + size_t* const blockSizes = (size_t*) malloc(nbBlocks * sizeof(size_t)); + if (!blockSizes) return dict; + dict.start = malloc(requestedDictSize); + if (!dict.start) { free(blockSizes); return dict; } + { size_t nb; + for (nb=0; nb<nbBlocks-1; nb++) blockSizes[nb] = blockSize; + blockSizes[nbBlocks-1] = srcSize - (blockSize * (nbBlocks-1)); + } + { size_t const dictSize = ZDICT_trainFromBuffer(dict.start, requestedDictSize, src, blockSizes, (unsigned)nbBlocks); + free(blockSizes); + if (ZDICT_isError(dictSize)) { free(dict.start); return g_nullBuffer; } + dict.size = requestedDictSize; + dict.filled = dictSize; + return dict; /* how to return dictSize ? */ + } +} + +static void FUZ_freeDictionary(buffer_t dict) +{ + free(dict.start); +} + + +static int basicUnitTests(U32 seed, double compressibility, ZSTD_customMem customMem) +{ + size_t const CNBufferSize = COMPRESSIBLE_NOISE_LENGTH; + void* CNBuffer = malloc(CNBufferSize); + size_t const skippableFrameSize = 11; + size_t const compressedBufferSize = (8 + skippableFrameSize) + ZSTD_compressBound(COMPRESSIBLE_NOISE_LENGTH); + void* compressedBuffer = malloc(compressedBufferSize); + size_t const decodedBufferSize = CNBufferSize; + void* decodedBuffer = malloc(decodedBufferSize); + size_t cSize; + int testResult = 0; + U32 testNb = 1; + ZSTD_CStream* zc = ZSTD_createCStream_advanced(customMem); + ZSTD_DStream* zd = ZSTD_createDStream_advanced(customMem); + ZSTDMT_CCtx* mtctx = ZSTDMT_createCCtx(2); + + ZSTD_inBuffer inBuff, inBuff2; + ZSTD_outBuffer outBuff; + buffer_t dictionary = g_nullBuffer; + size_t const dictSize = 128 KB; + unsigned dictID = 0; + + /* Create compressible test buffer */ + if (!CNBuffer || !compressedBuffer || !decodedBuffer || !zc || !zd) { + DISPLAY("Not enough memory, aborting \n"); + goto _output_error; + } + RDG_genBuffer(CNBuffer, CNBufferSize, compressibility, 0., seed); + + /* Create dictionary */ + DISPLAYLEVEL(3, "creating dictionary for unit tests \n"); + dictionary = FUZ_createDictionary(CNBuffer, CNBufferSize / 2, 8 KB, 40 KB); + if (!dictionary.start) { + DISPLAY("Error creating dictionary, aborting \n"); + goto _output_error; + } + dictID = ZDICT_getDictID(dictionary.start, dictionary.filled); + + /* generate skippable frame */ + MEM_writeLE32(compressedBuffer, ZSTD_MAGIC_SKIPPABLE_START); + MEM_writeLE32(((char*)compressedBuffer)+4, (U32)skippableFrameSize); + cSize = skippableFrameSize + 8; + + /* Basic compression test */ + DISPLAYLEVEL(3, "test%3i : compress %u bytes : ", testNb++, COMPRESSIBLE_NOISE_LENGTH); + CHECK_Z( ZSTD_initCStream_usingDict(zc, CNBuffer, dictSize, 1) ); + outBuff.dst = (char*)(compressedBuffer)+cSize; + outBuff.size = compressedBufferSize; + outBuff.pos = 0; + inBuff.src = CNBuffer; + inBuff.size = CNBufferSize; + inBuff.pos = 0; + CHECK_Z( ZSTD_compressStream(zc, &outBuff, &inBuff) ); + if (inBuff.pos != inBuff.size) goto _output_error; /* entire input should be consumed */ + { size_t const r = ZSTD_endStream(zc, &outBuff); + if (r != 0) goto _output_error; } /* error, or some data not flushed */ + cSize += outBuff.pos; + DISPLAYLEVEL(3, "OK (%u bytes : %.2f%%)\n", (U32)cSize, (double)cSize/COMPRESSIBLE_NOISE_LENGTH*100); + + /* context size functions */ + DISPLAYLEVEL(3, "test%3i : estimate CStream size : ", testNb++); + { ZSTD_compressionParameters const cParams = ZSTD_getCParams(1, CNBufferSize, dictSize); + size_t const cstreamSize = ZSTD_estimateCStreamSize_usingCParams(cParams); + size_t const cdictSize = ZSTD_estimateCDictSize_advanced(dictSize, cParams, ZSTD_dlm_byCopy); /* uses ZSTD_initCStream_usingDict() */ + if (ZSTD_isError(cstreamSize)) goto _output_error; + if (ZSTD_isError(cdictSize)) goto _output_error; + DISPLAYLEVEL(3, "OK (%u bytes) \n", (U32)(cstreamSize + cdictSize)); + } + + DISPLAYLEVEL(3, "test%3i : check actual CStream size : ", testNb++); + { size_t const s = ZSTD_sizeof_CStream(zc); + if (ZSTD_isError(s)) goto _output_error; + DISPLAYLEVEL(3, "OK (%u bytes) \n", (U32)s); + } + + /* Attempt bad compression parameters */ + DISPLAYLEVEL(3, "test%3i : use bad compression parameters : ", testNb++); + { size_t r; + ZSTD_parameters params = ZSTD_getParams(1, 0, 0); + params.cParams.searchLength = 2; + r = ZSTD_initCStream_advanced(zc, NULL, 0, params, 0); + if (!ZSTD_isError(r)) goto _output_error; + DISPLAYLEVEL(3, "init error : %s \n", ZSTD_getErrorName(r)); + } + + /* skippable frame test */ + DISPLAYLEVEL(3, "test%3i : decompress skippable frame : ", testNb++); + CHECK_Z( ZSTD_initDStream_usingDict(zd, CNBuffer, dictSize) ); + inBuff.src = compressedBuffer; + inBuff.size = cSize; + inBuff.pos = 0; + outBuff.dst = decodedBuffer; + outBuff.size = CNBufferSize; + outBuff.pos = 0; + { size_t const r = ZSTD_decompressStream(zd, &outBuff, &inBuff); + DISPLAYLEVEL(5, " ( ZSTD_decompressStream => %u ) ", (U32)r); + if (r != 0) goto _output_error; + } + if (outBuff.pos != 0) goto _output_error; /* skippable frame output len is 0 */ + DISPLAYLEVEL(3, "OK \n"); + + /* Basic decompression test */ + inBuff2 = inBuff; + DISPLAYLEVEL(3, "test%3i : decompress %u bytes : ", testNb++, COMPRESSIBLE_NOISE_LENGTH); + ZSTD_initDStream_usingDict(zd, CNBuffer, dictSize); + CHECK_Z( ZSTD_setDStreamParameter(zd, DStream_p_maxWindowSize, 1000000000) ); /* large limit */ + { size_t const remaining = ZSTD_decompressStream(zd, &outBuff, &inBuff); + if (remaining != 0) goto _output_error; } /* should reach end of frame == 0; otherwise, some data left, or an error */ + if (outBuff.pos != CNBufferSize) goto _output_error; /* should regenerate the same amount */ + if (inBuff.pos != inBuff.size) goto _output_error; /* should have read the entire frame */ + DISPLAYLEVEL(3, "OK \n"); + + /* Re-use without init */ + DISPLAYLEVEL(3, "test%3i : decompress again without init (re-use previous settings): ", testNb++); + outBuff.pos = 0; + { size_t const remaining = ZSTD_decompressStream(zd, &outBuff, &inBuff2); + if (remaining != 0) goto _output_error; } /* should reach end of frame == 0; otherwise, some data left, or an error */ + if (outBuff.pos != CNBufferSize) goto _output_error; /* should regenerate the same amount */ + if (inBuff.pos != inBuff.size) goto _output_error; /* should have read the entire frame */ + DISPLAYLEVEL(3, "OK \n"); + + /* check regenerated data is byte exact */ + DISPLAYLEVEL(3, "test%3i : check decompressed result : ", testNb++); + { size_t i; + for (i=0; i<CNBufferSize; i++) { + if (((BYTE*)decodedBuffer)[i] != ((BYTE*)CNBuffer)[i]) goto _output_error; + } } + DISPLAYLEVEL(3, "OK \n"); + + /* context size functions */ + DISPLAYLEVEL(3, "test%3i : estimate DStream size : ", testNb++); + { ZSTD_frameHeader fhi; + const void* cStart = (char*)compressedBuffer + (skippableFrameSize + 8); + size_t const gfhError = ZSTD_getFrameHeader(&fhi, cStart, cSize); + if (gfhError!=0) goto _output_error; + DISPLAYLEVEL(5, " (windowSize : %u) ", (U32)fhi.windowSize); + { size_t const s = ZSTD_estimateDStreamSize(fhi.windowSize) + /* uses ZSTD_initDStream_usingDict() */ + + ZSTD_estimateDDictSize(dictSize, ZSTD_dlm_byCopy); + if (ZSTD_isError(s)) goto _output_error; + DISPLAYLEVEL(3, "OK (%u bytes) \n", (U32)s); + } } + + DISPLAYLEVEL(3, "test%3i : check actual DStream size : ", testNb++); + { size_t const s = ZSTD_sizeof_DStream(zd); + if (ZSTD_isError(s)) goto _output_error; + DISPLAYLEVEL(3, "OK (%u bytes) \n", (U32)s); + } + + /* Byte-by-byte decompression test */ + DISPLAYLEVEL(3, "test%3i : decompress byte-by-byte : ", testNb++); + { /* skippable frame */ + size_t r = 1; + ZSTD_initDStream_usingDict(zd, CNBuffer, dictSize); + inBuff.src = compressedBuffer; + outBuff.dst = decodedBuffer; + inBuff.pos = 0; + outBuff.pos = 0; + while (r) { /* skippable frame */ + inBuff.size = inBuff.pos + 1; + outBuff.size = outBuff.pos + 1; + r = ZSTD_decompressStream(zd, &outBuff, &inBuff); + if (ZSTD_isError(r)) goto _output_error; + } + /* normal frame */ + ZSTD_initDStream_usingDict(zd, CNBuffer, dictSize); + r=1; + while (r) { + inBuff.size = inBuff.pos + 1; + outBuff.size = outBuff.pos + 1; + r = ZSTD_decompressStream(zd, &outBuff, &inBuff); + if (ZSTD_isError(r)) goto _output_error; + } + } + if (outBuff.pos != CNBufferSize) goto _output_error; /* should regenerate the same amount */ + if (inBuff.pos != cSize) goto _output_error; /* should have read the entire frame */ + DISPLAYLEVEL(3, "OK \n"); + + /* check regenerated data is byte exact */ + DISPLAYLEVEL(3, "test%3i : check decompressed result : ", testNb++); + { size_t i; + for (i=0; i<CNBufferSize; i++) { + if (((BYTE*)decodedBuffer)[i] != ((BYTE*)CNBuffer)[i]) goto _output_error;; + } } + DISPLAYLEVEL(3, "OK \n"); + + /* _srcSize compression test */ + DISPLAYLEVEL(3, "test%3i : compress_srcSize %u bytes : ", testNb++, COMPRESSIBLE_NOISE_LENGTH); + ZSTD_initCStream_srcSize(zc, 1, CNBufferSize); + outBuff.dst = (char*)(compressedBuffer); + outBuff.size = compressedBufferSize; + outBuff.pos = 0; + inBuff.src = CNBuffer; + inBuff.size = CNBufferSize; + inBuff.pos = 0; + CHECK_Z( ZSTD_compressStream(zc, &outBuff, &inBuff) ); + if (inBuff.pos != inBuff.size) goto _output_error; /* entire input should be consumed */ + { size_t const r = ZSTD_endStream(zc, &outBuff); + if (r != 0) goto _output_error; } /* error, or some data not flushed */ + { unsigned long long origSize = ZSTD_findDecompressedSize(outBuff.dst, outBuff.pos); + if ((size_t)origSize != CNBufferSize) goto _output_error; } /* exact original size must be present */ + DISPLAYLEVEL(3, "OK (%u bytes : %.2f%%)\n", (U32)cSize, (double)cSize/COMPRESSIBLE_NOISE_LENGTH*100); + + /* wrong _srcSize compression test */ + DISPLAYLEVEL(3, "test%3i : wrong srcSize : %u bytes : ", testNb++, COMPRESSIBLE_NOISE_LENGTH-1); + ZSTD_initCStream_srcSize(zc, 1, CNBufferSize-1); + outBuff.dst = (char*)(compressedBuffer); + outBuff.size = compressedBufferSize; + outBuff.pos = 0; + inBuff.src = CNBuffer; + inBuff.size = CNBufferSize; + inBuff.pos = 0; + CHECK_Z( ZSTD_compressStream(zc, &outBuff, &inBuff) ); + if (inBuff.pos != inBuff.size) goto _output_error; /* entire input should be consumed */ + { size_t const r = ZSTD_endStream(zc, &outBuff); + if (ZSTD_getErrorCode(r) != ZSTD_error_srcSize_wrong) goto _output_error; /* must fail : wrong srcSize */ + DISPLAYLEVEL(3, "OK (error detected : %s) \n", ZSTD_getErrorName(r)); } + + /* Complex context re-use scenario */ + DISPLAYLEVEL(3, "test%3i : context re-use : ", testNb++); + ZSTD_freeCStream(zc); + zc = ZSTD_createCStream_advanced(customMem); + if (zc==NULL) goto _output_error; /* memory allocation issue */ + /* use 1 */ + { size_t const inSize = 513; + DISPLAYLEVEL(5, "use1 "); + ZSTD_initCStream_advanced(zc, NULL, 0, ZSTD_getParams(19, inSize, 0), inSize); /* needs btopt + search3 to trigger hashLog3 */ + inBuff.src = CNBuffer; + inBuff.size = inSize; + inBuff.pos = 0; + outBuff.dst = (char*)(compressedBuffer)+cSize; + outBuff.size = ZSTD_compressBound(inSize); + outBuff.pos = 0; + DISPLAYLEVEL(5, "compress1 "); + CHECK_Z( ZSTD_compressStream(zc, &outBuff, &inBuff) ); + if (inBuff.pos != inBuff.size) goto _output_error; /* entire input should be consumed */ + DISPLAYLEVEL(5, "end1 "); + { size_t const r = ZSTD_endStream(zc, &outBuff); + if (r != 0) goto _output_error; } /* error, or some data not flushed */ + } + /* use 2 */ + { size_t const inSize = 1025; /* will not continue, because tables auto-adjust and are therefore different size */ + DISPLAYLEVEL(5, "use2 "); + ZSTD_initCStream_advanced(zc, NULL, 0, ZSTD_getParams(19, inSize, 0), inSize); /* needs btopt + search3 to trigger hashLog3 */ + inBuff.src = CNBuffer; + inBuff.size = inSize; + inBuff.pos = 0; + outBuff.dst = (char*)(compressedBuffer)+cSize; + outBuff.size = ZSTD_compressBound(inSize); + outBuff.pos = 0; + DISPLAYLEVEL(5, "compress2 "); + CHECK_Z( ZSTD_compressStream(zc, &outBuff, &inBuff) ); + if (inBuff.pos != inBuff.size) goto _output_error; /* entire input should be consumed */ + DISPLAYLEVEL(5, "end2 "); + { size_t const r = ZSTD_endStream(zc, &outBuff); + if (r != 0) goto _output_error; } /* error, or some data not flushed */ + } + DISPLAYLEVEL(3, "OK \n"); + + /* CDict scenario */ + DISPLAYLEVEL(3, "test%3i : digested dictionary : ", testNb++); + { ZSTD_CDict* const cdict = ZSTD_createCDict(dictionary.start, dictionary.filled, 1 /*byRef*/ ); + size_t const initError = ZSTD_initCStream_usingCDict(zc, cdict); + if (ZSTD_isError(initError)) goto _output_error; + cSize = 0; + outBuff.dst = compressedBuffer; + outBuff.size = compressedBufferSize; + outBuff.pos = 0; + inBuff.src = CNBuffer; + inBuff.size = CNBufferSize; + inBuff.pos = 0; + CHECK_Z( ZSTD_compressStream(zc, &outBuff, &inBuff) ); + if (inBuff.pos != inBuff.size) goto _output_error; /* entire input should be consumed */ + { size_t const r = ZSTD_endStream(zc, &outBuff); + if (r != 0) goto _output_error; } /* error, or some data not flushed */ + cSize = outBuff.pos; + ZSTD_freeCDict(cdict); + DISPLAYLEVEL(3, "OK (%u bytes : %.2f%%)\n", (U32)cSize, (double)cSize/CNBufferSize*100); + } + + DISPLAYLEVEL(3, "test%3i : check CStream size : ", testNb++); + { size_t const s = ZSTD_sizeof_CStream(zc); + if (ZSTD_isError(s)) goto _output_error; + DISPLAYLEVEL(3, "OK (%u bytes) \n", (U32)s); + } + + DISPLAYLEVEL(4, "test%3i : check Dictionary ID : ", testNb++); + { unsigned const dID = ZSTD_getDictID_fromFrame(compressedBuffer, cSize); + if (dID != dictID) goto _output_error; + DISPLAYLEVEL(4, "OK (%u) \n", dID); + } + + /* DDict scenario */ + DISPLAYLEVEL(3, "test%3i : decompress %u bytes with digested dictionary : ", testNb++, (U32)CNBufferSize); + { ZSTD_DDict* const ddict = ZSTD_createDDict(dictionary.start, dictionary.filled); + size_t const initError = ZSTD_initDStream_usingDDict(zd, ddict); + if (ZSTD_isError(initError)) goto _output_error; + inBuff.src = compressedBuffer; + inBuff.size = cSize; + inBuff.pos = 0; + outBuff.dst = decodedBuffer; + outBuff.size = CNBufferSize; + outBuff.pos = 0; + { size_t const r = ZSTD_decompressStream(zd, &outBuff, &inBuff); + if (r != 0) goto _output_error; } /* should reach end of frame == 0; otherwise, some data left, or an error */ + if (outBuff.pos != CNBufferSize) goto _output_error; /* should regenerate the same amount */ + if (inBuff.pos != inBuff.size) goto _output_error; /* should have read the entire frame */ + ZSTD_freeDDict(ddict); + DISPLAYLEVEL(3, "OK \n"); + } + + /* test ZSTD_setDStreamParameter() resilience */ + DISPLAYLEVEL(3, "test%3i : wrong parameter for ZSTD_setDStreamParameter(): ", testNb++); + { size_t const r = ZSTD_setDStreamParameter(zd, (ZSTD_DStreamParameter_e)999, 1); /* large limit */ + if (!ZSTD_isError(r)) goto _output_error; } + DISPLAYLEVEL(3, "OK \n"); + + /* Memory restriction */ + DISPLAYLEVEL(3, "test%3i : maxWindowSize < frame requirement : ", testNb++); + ZSTD_initDStream_usingDict(zd, CNBuffer, dictSize); + CHECK_Z( ZSTD_setDStreamParameter(zd, DStream_p_maxWindowSize, 1000) ); /* too small limit */ + inBuff.src = compressedBuffer; + inBuff.size = cSize; + inBuff.pos = 0; + outBuff.dst = decodedBuffer; + outBuff.size = CNBufferSize; + outBuff.pos = 0; + { size_t const r = ZSTD_decompressStream(zd, &outBuff, &inBuff); + if (!ZSTD_isError(r)) goto _output_error; /* must fail : frame requires > 100 bytes */ + DISPLAYLEVEL(3, "OK (%s)\n", ZSTD_getErrorName(r)); } + + DISPLAYLEVEL(3, "test%3i : ZSTD_initCStream_usingCDict_advanced with masked dictID : ", testNb++); + { ZSTD_compressionParameters const cParams = ZSTD_getCParams(1, CNBufferSize, dictionary.filled); + ZSTD_frameParameters const fParams = { 1 /* contentSize */, 1 /* checksum */, 1 /* noDictID */}; + ZSTD_CDict* const cdict = ZSTD_createCDict_advanced(dictionary.start, dictionary.filled, ZSTD_dlm_byRef, ZSTD_dm_auto, cParams, customMem); + size_t const initError = ZSTD_initCStream_usingCDict_advanced(zc, cdict, fParams, CNBufferSize); + if (ZSTD_isError(initError)) goto _output_error; + cSize = 0; + outBuff.dst = compressedBuffer; + outBuff.size = compressedBufferSize; + outBuff.pos = 0; + inBuff.src = CNBuffer; + inBuff.size = CNBufferSize; + inBuff.pos = 0; + CHECK_Z( ZSTD_compressStream(zc, &outBuff, &inBuff) ); + if (inBuff.pos != inBuff.size) goto _output_error; /* entire input should be consumed */ + { size_t const r = ZSTD_endStream(zc, &outBuff); + if (r != 0) goto _output_error; } /* error, or some data not flushed */ + cSize = outBuff.pos; + ZSTD_freeCDict(cdict); + DISPLAYLEVEL(3, "OK (%u bytes : %.2f%%)\n", (U32)cSize, (double)cSize/CNBufferSize*100); + } + + DISPLAYLEVEL(3, "test%3i : try retrieving dictID from frame : ", testNb++); + { U32 const did = ZSTD_getDictID_fromFrame(compressedBuffer, cSize); + if (did != 0) goto _output_error; + } + DISPLAYLEVEL(3, "OK (not detected) \n"); + + DISPLAYLEVEL(3, "test%3i : decompress without dictionary : ", testNb++); + { size_t const r = ZSTD_decompress(decodedBuffer, CNBufferSize, compressedBuffer, cSize); + if (!ZSTD_isError(r)) goto _output_error; /* must fail : dictionary not used */ + DISPLAYLEVEL(3, "OK (%s)\n", ZSTD_getErrorName(r)); + } + + DISPLAYLEVEL(3, "test%3i : compress with ZSTD_CCtx_refPrefix : ", testNb++); + CHECK_Z( ZSTD_CCtx_refPrefix(zc, dictionary.start, dictionary.filled) ); + outBuff.dst = compressedBuffer; + outBuff.size = compressedBufferSize; + outBuff.pos = 0; + inBuff.src = CNBuffer; + inBuff.size = CNBufferSize; + inBuff.pos = 0; + CHECK_Z( ZSTD_compress_generic(zc, &outBuff, &inBuff, ZSTD_e_end) ); + if (inBuff.pos != inBuff.size) goto _output_error; /* entire input should be consumed */ + cSize = outBuff.pos; + DISPLAYLEVEL(3, "OK (%u bytes : %.2f%%)\n", (U32)cSize, (double)cSize/CNBufferSize*100); + + DISPLAYLEVEL(3, "test%3i : decompress with dictionary : ", testNb++); + { size_t const r = ZSTD_decompress_usingDict(zd, + decodedBuffer, CNBufferSize, + compressedBuffer, cSize, + dictionary.start, dictionary.filled); + if (ZSTD_isError(r)) goto _output_error; /* must fail : dictionary not used */ + DISPLAYLEVEL(3, "OK \n"); + } + + DISPLAYLEVEL(3, "test%3i : decompress without dictionary (should fail): ", testNb++); + { size_t const r = ZSTD_decompress(decodedBuffer, CNBufferSize, compressedBuffer, cSize); + if (!ZSTD_isError(r)) goto _output_error; /* must fail : dictionary not used */ + DISPLAYLEVEL(3, "OK (%s)\n", ZSTD_getErrorName(r)); + } + + DISPLAYLEVEL(3, "test%3i : compress again with ZSTD_compress_generic : ", testNb++); + outBuff.dst = compressedBuffer; + outBuff.size = compressedBufferSize; + outBuff.pos = 0; + inBuff.src = CNBuffer; + inBuff.size = CNBufferSize; + inBuff.pos = 0; + CHECK_Z( ZSTD_compress_generic(zc, &outBuff, &inBuff, ZSTD_e_end) ); + if (inBuff.pos != inBuff.size) goto _output_error; /* entire input should be consumed */ + cSize = outBuff.pos; + DISPLAYLEVEL(3, "OK (%u bytes : %.2f%%)\n", (U32)cSize, (double)cSize/CNBufferSize*100); + + DISPLAYLEVEL(3, "test%3i : decompress without dictionary (should work): ", testNb++); + CHECK_Z( ZSTD_decompress(decodedBuffer, CNBufferSize, compressedBuffer, cSize) ); + DISPLAYLEVEL(3, "OK \n"); + + /* Empty srcSize */ + DISPLAYLEVEL(3, "test%3i : ZSTD_initCStream_advanced with pledgedSrcSize=0 and dict : ", testNb++); + { ZSTD_parameters params = ZSTD_getParams(5, 0, 0); + params.fParams.contentSizeFlag = 1; + CHECK_Z( ZSTD_initCStream_advanced(zc, dictionary.start, dictionary.filled, params, 0) ); + } /* cstream advanced shall write content size = 0 */ + inBuff.src = CNBuffer; + inBuff.size = 0; + inBuff.pos = 0; + outBuff.dst = compressedBuffer; + outBuff.size = compressedBufferSize; + outBuff.pos = 0; + CHECK_Z( ZSTD_compressStream(zc, &outBuff, &inBuff) ); + if (ZSTD_endStream(zc, &outBuff) != 0) goto _output_error; + cSize = outBuff.pos; + if (ZSTD_findDecompressedSize(compressedBuffer, cSize) != 0) goto _output_error; + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3i : pledgedSrcSize == 0 behaves properly : ", testNb++); + { ZSTD_parameters params = ZSTD_getParams(5, 0, 0); + params.fParams.contentSizeFlag = 1; + CHECK_Z( ZSTD_initCStream_advanced(zc, NULL, 0, params, 0) ); + } /* cstream advanced shall write content size = 0 */ + inBuff.src = CNBuffer; + inBuff.size = 0; + inBuff.pos = 0; + outBuff.dst = compressedBuffer; + outBuff.size = compressedBufferSize; + outBuff.pos = 0; + CHECK_Z( ZSTD_compressStream(zc, &outBuff, &inBuff) ); + if (ZSTD_endStream(zc, &outBuff) != 0) goto _output_error; + cSize = outBuff.pos; + if (ZSTD_findDecompressedSize(compressedBuffer, cSize) != 0) goto _output_error; + + ZSTD_resetCStream(zc, 0); /* resetCStream should treat 0 as unknown */ + inBuff.src = CNBuffer; + inBuff.size = 0; + inBuff.pos = 0; + outBuff.dst = compressedBuffer; + outBuff.size = compressedBufferSize; + outBuff.pos = 0; + CHECK_Z( ZSTD_compressStream(zc, &outBuff, &inBuff) ); + if (ZSTD_endStream(zc, &outBuff) != 0) goto _output_error; + cSize = outBuff.pos; + if (ZSTD_findDecompressedSize(compressedBuffer, cSize) != ZSTD_CONTENTSIZE_UNKNOWN) goto _output_error; + DISPLAYLEVEL(3, "OK \n"); + + /* Basic multithreading compression test */ + DISPLAYLEVEL(3, "test%3i : compress %u bytes with multiple threads : ", testNb++, COMPRESSIBLE_NOISE_LENGTH); + { ZSTD_parameters const params = ZSTD_getParams(1, 0, 0); + CHECK_Z( ZSTDMT_initCStream_advanced(mtctx, CNBuffer, dictSize, params, CNBufferSize) ); + } + outBuff.dst = (char*)(compressedBuffer); + outBuff.size = compressedBufferSize; + outBuff.pos = 0; + inBuff.src = CNBuffer; + inBuff.size = CNBufferSize; + inBuff.pos = 0; + CHECK_Z( ZSTDMT_compressStream_generic(mtctx, &outBuff, &inBuff, ZSTD_e_end) ); + if (inBuff.pos != inBuff.size) goto _output_error; /* entire input should be consumed */ + { size_t const r = ZSTDMT_endStream(mtctx, &outBuff); + if (r != 0) goto _output_error; } /* error, or some data not flushed */ + DISPLAYLEVEL(3, "OK \n"); + + + /* Overlen overwriting window data bug */ + DISPLAYLEVEL(3, "test%3i : wildcopy doesn't overwrite potential match data : ", testNb++); + { /* This test has a window size of 1024 bytes and consists of 3 blocks: + 1. 'a' repeated 517 times + 2. 'b' repeated 516 times + 3. a compressed block with no literals and 3 sequence commands: + litlength = 0, offset = 24, match length = 24 + litlength = 0, offset = 24, match length = 3 (this one creates an overlength write of length 2*WILDCOPY_OVERLENGTH - 3) + litlength = 0, offset = 1021, match length = 3 (this one will try to read from overwritten data if the buffer is too small) */ + + const char* testCase = + "\x28\xB5\x2F\xFD\x04\x00\x4C\x00\x00\x10\x61\x61\x01\x00\x00\x2A" + "\x80\x05\x44\x00\x00\x08\x62\x01\x00\x00\x2A\x20\x04\x5D\x00\x00" + "\x00\x03\x40\x00\x00\x64\x60\x27\xB0\xE0\x0C\x67\x62\xCE\xE0"; + ZSTD_DStream* const zds = ZSTD_createDStream(); + if (zds==NULL) goto _output_error; + + CHECK_Z( ZSTD_initDStream(zds) ); + inBuff.src = testCase; + inBuff.size = 47; + inBuff.pos = 0; + outBuff.dst = decodedBuffer; + outBuff.size = CNBufferSize; + outBuff.pos = 0; + + while (inBuff.pos < inBuff.size) { + CHECK_Z( ZSTD_decompressStream(zds, &outBuff, &inBuff) ); + } + + ZSTD_freeDStream(zds); + } + DISPLAYLEVEL(3, "OK \n"); + +_end: + FUZ_freeDictionary(dictionary); + ZSTD_freeCStream(zc); + ZSTD_freeDStream(zd); + ZSTDMT_freeCCtx(mtctx); + free(CNBuffer); + free(compressedBuffer); + free(decodedBuffer); + return testResult; + +_output_error: + testResult = 1; + DISPLAY("Error detected in Unit tests ! \n"); + goto _end; +} + + +/* ====== Fuzzer tests ====== */ + +static size_t findDiff(const void* buf1, const void* buf2, size_t max) +{ + const BYTE* b1 = (const BYTE*)buf1; + const BYTE* b2 = (const BYTE*)buf2; + size_t u; + for (u=0; u<max; u++) { + if (b1[u] != b2[u]) break; + } + DISPLAY("Error at position %u / %u \n", (U32)u, (U32)max); + DISPLAY(" %02X %02X %02X :%02X: %02X %02X %02X %02X %02X \n", + b1[u-3], b1[u-2], b1[u-1], b1[u-0], b1[u+1], b1[u+2], b1[u+3], b1[u+4], b1[u+5]); + DISPLAY(" %02X %02X %02X :%02X: %02X %02X %02X %02X %02X \n", + b2[u-3], b2[u-2], b2[u-1], b2[u-0], b2[u+1], b2[u+2], b2[u+3], b2[u+4], b2[u+5]); + return u; +} + +static size_t FUZ_rLogLength(U32* seed, U32 logLength) +{ + size_t const lengthMask = ((size_t)1 << logLength) - 1; + return (lengthMask+1) + (FUZ_rand(seed) & lengthMask); +} + +static size_t FUZ_randomLength(U32* seed, U32 maxLog) +{ + U32 const logLength = FUZ_rand(seed) % maxLog; + return FUZ_rLogLength(seed, logLength); +} + +#define MIN(a,b) ( (a) < (b) ? (a) : (b) ) + +/* Return value in range minVal <= v <= maxVal */ +static U32 FUZ_randomClampedLength(U32* seed, U32 minVal, U32 maxVal) +{ + U32 const mod = maxVal < minVal ? 1 : (maxVal + 1) - minVal; + return (U32)((FUZ_rand(seed) % mod) + minVal); +} + +#define CHECK(cond, ...) { \ + if (cond) { \ + DISPLAY("Error => "); \ + DISPLAY(__VA_ARGS__); \ + DISPLAY(" (seed %u, test nb %u) \n", seed, testNb); \ + goto _output_error; \ +} } + +static int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compressibility, int bigTests) +{ + U32 const maxSrcLog = bigTests ? 24 : 22; + static const U32 maxSampleLog = 19; + size_t const srcBufferSize = (size_t)1<<maxSrcLog; + BYTE* cNoiseBuffer[5]; + size_t const copyBufferSize = srcBufferSize + (1<<maxSampleLog); + BYTE* const copyBuffer = (BYTE*)malloc (copyBufferSize); + size_t const cBufferSize = ZSTD_compressBound(srcBufferSize); + BYTE* const cBuffer = (BYTE*)malloc (cBufferSize); + size_t const dstBufferSize = srcBufferSize; + BYTE* const dstBuffer = (BYTE*)malloc (dstBufferSize); + U32 result = 0; + U32 testNb = 0; + U32 coreSeed = seed; + ZSTD_CStream* zc = ZSTD_createCStream(); /* will be re-created sometimes */ + ZSTD_DStream* zd = ZSTD_createDStream(); /* will be re-created sometimes */ + ZSTD_DStream* const zd_noise = ZSTD_createDStream(); + clock_t const startClock = clock(); + const BYTE* dict = NULL; /* can keep same dict on 2 consecutive tests */ + size_t dictSize = 0; + U32 oldTestLog = 0; + U32 const cLevelMax = bigTests ? (U32)ZSTD_maxCLevel() : g_cLevelMax_smallTests; + + /* allocations */ + cNoiseBuffer[0] = (BYTE*)malloc (srcBufferSize); + cNoiseBuffer[1] = (BYTE*)malloc (srcBufferSize); + cNoiseBuffer[2] = (BYTE*)malloc (srcBufferSize); + cNoiseBuffer[3] = (BYTE*)malloc (srcBufferSize); + cNoiseBuffer[4] = (BYTE*)malloc (srcBufferSize); + CHECK (!cNoiseBuffer[0] || !cNoiseBuffer[1] || !cNoiseBuffer[2] || !cNoiseBuffer[3] || !cNoiseBuffer[4] || + !copyBuffer || !dstBuffer || !cBuffer || !zc || !zd || !zd_noise , + "Not enough memory, fuzzer tests cancelled"); + + /* Create initial samples */ + RDG_genBuffer(cNoiseBuffer[0], srcBufferSize, 0.00, 0., coreSeed); /* pure noise */ + RDG_genBuffer(cNoiseBuffer[1], srcBufferSize, 0.05, 0., coreSeed); /* barely compressible */ + RDG_genBuffer(cNoiseBuffer[2], srcBufferSize, compressibility, 0., coreSeed); + RDG_genBuffer(cNoiseBuffer[3], srcBufferSize, 0.95, 0., coreSeed); /* highly compressible */ + RDG_genBuffer(cNoiseBuffer[4], srcBufferSize, 1.00, 0., coreSeed); /* sparse content */ + memset(copyBuffer, 0x65, copyBufferSize); /* make copyBuffer considered initialized */ + ZSTD_initDStream_usingDict(zd, NULL, 0); /* ensure at least one init */ + + /* catch up testNb */ + for (testNb=1; testNb < startTest; testNb++) + FUZ_rand(&coreSeed); + + /* test loop */ + for ( ; (testNb <= nbTests) || (FUZ_GetClockSpan(startClock) < g_clockTime) ; testNb++ ) { + U32 lseed; + const BYTE* srcBuffer; + size_t totalTestSize, totalGenSize, cSize; + XXH64_state_t xxhState; + U64 crcOrig; + U32 resetAllowed = 1; + size_t maxTestSize; + + /* init */ + if (nbTests >= testNb) { DISPLAYUPDATE(2, "\r%6u/%6u ", testNb, nbTests); } + else { DISPLAYUPDATE(2, "\r%6u ", testNb); } + FUZ_rand(&coreSeed); + lseed = coreSeed ^ prime32; + + /* states full reset (deliberately not synchronized) */ + /* some issues can only happen when reusing states */ + if ((FUZ_rand(&lseed) & 0xFF) == 131) { + ZSTD_freeCStream(zc); + zc = ZSTD_createCStream(); + CHECK(zc==NULL, "ZSTD_createCStream : allocation error"); + resetAllowed=0; + } + if ((FUZ_rand(&lseed) & 0xFF) == 132) { + ZSTD_freeDStream(zd); + zd = ZSTD_createDStream(); + CHECK(zd==NULL, "ZSTD_createDStream : allocation error"); + CHECK_Z( ZSTD_initDStream_usingDict(zd, NULL, 0) ); /* ensure at least one init */ + } + + /* srcBuffer selection [0-4] */ + { U32 buffNb = FUZ_rand(&lseed) & 0x7F; + if (buffNb & 7) buffNb=2; /* most common : compressible (P) */ + else { + buffNb >>= 3; + if (buffNb & 7) { + const U32 tnb[2] = { 1, 3 }; /* barely/highly compressible */ + buffNb = tnb[buffNb >> 3]; + } else { + const U32 tnb[2] = { 0, 4 }; /* not compressible / sparse */ + buffNb = tnb[buffNb >> 3]; + } } + srcBuffer = cNoiseBuffer[buffNb]; + } + + /* compression init */ + if ((FUZ_rand(&lseed)&1) /* at beginning, to keep same nb of rand */ + && oldTestLog /* at least one test happened */ && resetAllowed) { + maxTestSize = FUZ_randomLength(&lseed, oldTestLog+2); + maxTestSize = MIN(maxTestSize, srcBufferSize-16); + { U64 const pledgedSrcSize = (FUZ_rand(&lseed) & 3) ? 0 : maxTestSize; + CHECK_Z( ZSTD_resetCStream(zc, pledgedSrcSize) ); + } + } else { + U32 const testLog = FUZ_rand(&lseed) % maxSrcLog; + U32 const dictLog = FUZ_rand(&lseed) % maxSrcLog; + U32 const cLevelCandidate = ( FUZ_rand(&lseed) % + (ZSTD_maxCLevel() - + (MAX(testLog, dictLog) / 3))) + + 1; + U32 const cLevel = MIN(cLevelCandidate, cLevelMax); + maxTestSize = FUZ_rLogLength(&lseed, testLog); + oldTestLog = testLog; + /* random dictionary selection */ + dictSize = ((FUZ_rand(&lseed)&7)==1) ? FUZ_rLogLength(&lseed, dictLog) : 0; + { size_t const dictStart = FUZ_rand(&lseed) % (srcBufferSize - dictSize); + dict = srcBuffer + dictStart; + } + { U64 const pledgedSrcSize = (FUZ_rand(&lseed) & 3) ? 0 : maxTestSize; + ZSTD_parameters params = ZSTD_getParams(cLevel, pledgedSrcSize, dictSize); + params.fParams.checksumFlag = FUZ_rand(&lseed) & 1; + params.fParams.noDictIDFlag = FUZ_rand(&lseed) & 1; + CHECK_Z ( ZSTD_initCStream_advanced(zc, dict, dictSize, params, pledgedSrcSize) ); + } } + + /* multi-segments compression test */ + XXH64_reset(&xxhState, 0); + { ZSTD_outBuffer outBuff = { cBuffer, cBufferSize, 0 } ; + U32 n; + for (n=0, cSize=0, totalTestSize=0 ; totalTestSize < maxTestSize ; n++) { + /* compress random chunks into randomly sized dst buffers */ + { size_t const randomSrcSize = FUZ_randomLength(&lseed, maxSampleLog); + size_t const srcSize = MIN (maxTestSize-totalTestSize, randomSrcSize); + size_t const srcStart = FUZ_rand(&lseed) % (srcBufferSize - srcSize); + size_t const randomDstSize = FUZ_randomLength(&lseed, maxSampleLog); + size_t const dstBuffSize = MIN(cBufferSize - cSize, randomDstSize); + ZSTD_inBuffer inBuff = { srcBuffer+srcStart, srcSize, 0 }; + outBuff.size = outBuff.pos + dstBuffSize; + + CHECK_Z( ZSTD_compressStream(zc, &outBuff, &inBuff) ); + + XXH64_update(&xxhState, srcBuffer+srcStart, inBuff.pos); + memcpy(copyBuffer+totalTestSize, srcBuffer+srcStart, inBuff.pos); + totalTestSize += inBuff.pos; + } + + /* random flush operation, to mess around */ + if ((FUZ_rand(&lseed) & 15) == 0) { + size_t const randomDstSize = FUZ_randomLength(&lseed, maxSampleLog); + size_t const adjustedDstSize = MIN(cBufferSize - cSize, randomDstSize); + outBuff.size = outBuff.pos + adjustedDstSize; + CHECK_Z( ZSTD_flushStream(zc, &outBuff) ); + } } + + /* final frame epilogue */ + { size_t remainingToFlush = (size_t)(-1); + while (remainingToFlush) { + size_t const randomDstSize = FUZ_randomLength(&lseed, maxSampleLog); + size_t const adjustedDstSize = MIN(cBufferSize - cSize, randomDstSize); + outBuff.size = outBuff.pos + adjustedDstSize; + remainingToFlush = ZSTD_endStream(zc, &outBuff); + CHECK (ZSTD_isError(remainingToFlush), "end error : %s", ZSTD_getErrorName(remainingToFlush)); + } } + crcOrig = XXH64_digest(&xxhState); + cSize = outBuff.pos; + } + + /* multi - fragments decompression test */ + if (!dictSize /* don't reset if dictionary : could be different */ && (FUZ_rand(&lseed) & 1)) { + CHECK_Z ( ZSTD_resetDStream(zd) ); + } else { + CHECK_Z ( ZSTD_initDStream_usingDict(zd, dict, dictSize) ); + } + { size_t decompressionResult = 1; + ZSTD_inBuffer inBuff = { cBuffer, cSize, 0 }; + ZSTD_outBuffer outBuff= { dstBuffer, dstBufferSize, 0 }; + for (totalGenSize = 0 ; decompressionResult ; ) { + size_t const readCSrcSize = FUZ_randomLength(&lseed, maxSampleLog); + size_t const randomDstSize = FUZ_randomLength(&lseed, maxSampleLog); + size_t const dstBuffSize = MIN(dstBufferSize - totalGenSize, randomDstSize); + inBuff.size = inBuff.pos + readCSrcSize; + outBuff.size = outBuff.pos + dstBuffSize; + decompressionResult = ZSTD_decompressStream(zd, &outBuff, &inBuff); + if (ZSTD_getErrorCode(decompressionResult) == ZSTD_error_checksum_wrong) { + DISPLAY("checksum error : \n"); + findDiff(copyBuffer, dstBuffer, totalTestSize); + } + CHECK( ZSTD_isError(decompressionResult), "decompression error : %s", + ZSTD_getErrorName(decompressionResult) ); + } + CHECK (decompressionResult != 0, "frame not fully decoded"); + CHECK (outBuff.pos != totalTestSize, "decompressed data : wrong size (%u != %u)", + (U32)outBuff.pos, (U32)totalTestSize); + CHECK (inBuff.pos != cSize, "compressed data should be fully read") + { U64 const crcDest = XXH64(dstBuffer, totalTestSize, 0); + if (crcDest!=crcOrig) findDiff(copyBuffer, dstBuffer, totalTestSize); + CHECK (crcDest!=crcOrig, "decompressed data corrupted"); + } } + + /*===== noisy/erroneous src decompression test =====*/ + + /* add some noise */ + { U32 const nbNoiseChunks = (FUZ_rand(&lseed) & 7) + 2; + U32 nn; for (nn=0; nn<nbNoiseChunks; nn++) { + size_t const randomNoiseSize = FUZ_randomLength(&lseed, maxSampleLog); + size_t const noiseSize = MIN((cSize/3) , randomNoiseSize); + size_t const noiseStart = FUZ_rand(&lseed) % (srcBufferSize - noiseSize); + size_t const cStart = FUZ_rand(&lseed) % (cSize - noiseSize); + memcpy(cBuffer+cStart, srcBuffer+noiseStart, noiseSize); + } } + + /* try decompression on noisy data */ + CHECK_Z( ZSTD_initDStream(zd_noise) ); /* note : no dictionary */ + { ZSTD_inBuffer inBuff = { cBuffer, cSize, 0 }; + ZSTD_outBuffer outBuff= { dstBuffer, dstBufferSize, 0 }; + while (outBuff.pos < dstBufferSize) { + size_t const randomCSrcSize = FUZ_randomLength(&lseed, maxSampleLog); + size_t const randomDstSize = FUZ_randomLength(&lseed, maxSampleLog); + size_t const adjustedDstSize = MIN(dstBufferSize - outBuff.pos, randomDstSize); + size_t const adjustedCSrcSize = MIN(cSize - inBuff.pos, randomCSrcSize); + outBuff.size = outBuff.pos + adjustedDstSize; + inBuff.size = inBuff.pos + adjustedCSrcSize; + { size_t const decompressError = ZSTD_decompressStream(zd, &outBuff, &inBuff); + if (ZSTD_isError(decompressError)) break; /* error correctly detected */ + /* No forward progress possible */ + if (outBuff.pos < outBuff.size && inBuff.pos == cSize) break; + } } } } + DISPLAY("\r%u fuzzer tests completed \n", testNb); + +_cleanup: + ZSTD_freeCStream(zc); + ZSTD_freeDStream(zd); + ZSTD_freeDStream(zd_noise); + free(cNoiseBuffer[0]); + free(cNoiseBuffer[1]); + free(cNoiseBuffer[2]); + free(cNoiseBuffer[3]); + free(cNoiseBuffer[4]); + free(copyBuffer); + free(cBuffer); + free(dstBuffer); + return result; + +_output_error: + result = 1; + goto _cleanup; +} + + +/* Multi-threading version of fuzzer Tests */ +static int fuzzerTests_MT(U32 seed, U32 nbTests, unsigned startTest, double compressibility, int bigTests) +{ + const U32 maxSrcLog = bigTests ? 24 : 22; + static const U32 maxSampleLog = 19; + size_t const srcBufferSize = (size_t)1<<maxSrcLog; + BYTE* cNoiseBuffer[5]; + size_t const copyBufferSize= srcBufferSize + (1<<maxSampleLog); + BYTE* const copyBuffer = (BYTE*)malloc (copyBufferSize); + size_t const cBufferSize = ZSTD_compressBound(srcBufferSize); + BYTE* const cBuffer = (BYTE*)malloc (cBufferSize); + size_t const dstBufferSize = srcBufferSize; + BYTE* const dstBuffer = (BYTE*)malloc (dstBufferSize); + U32 result = 0; + U32 testNb = 0; + U32 coreSeed = seed; + U32 nbThreads = 2; + ZSTDMT_CCtx* zc = ZSTDMT_createCCtx(nbThreads); /* will be reset sometimes */ + ZSTD_DStream* zd = ZSTD_createDStream(); /* will be reset sometimes */ + ZSTD_DStream* const zd_noise = ZSTD_createDStream(); + clock_t const startClock = clock(); + const BYTE* dict=NULL; /* can keep same dict on 2 consecutive tests */ + size_t dictSize = 0; + U32 oldTestLog = 0; + int const cLevelMax = bigTests ? (U32)ZSTD_maxCLevel()-1 : g_cLevelMax_smallTests; + U32 const nbThreadsMax = bigTests ? 4 : 2; + + /* allocations */ + cNoiseBuffer[0] = (BYTE*)malloc (srcBufferSize); + cNoiseBuffer[1] = (BYTE*)malloc (srcBufferSize); + cNoiseBuffer[2] = (BYTE*)malloc (srcBufferSize); + cNoiseBuffer[3] = (BYTE*)malloc (srcBufferSize); + cNoiseBuffer[4] = (BYTE*)malloc (srcBufferSize); + CHECK (!cNoiseBuffer[0] || !cNoiseBuffer[1] || !cNoiseBuffer[2] || !cNoiseBuffer[3] || !cNoiseBuffer[4] || + !copyBuffer || !dstBuffer || !cBuffer || !zc || !zd || !zd_noise , + "Not enough memory, fuzzer tests cancelled"); + + /* Create initial samples */ + RDG_genBuffer(cNoiseBuffer[0], srcBufferSize, 0.00, 0., coreSeed); /* pure noise */ + RDG_genBuffer(cNoiseBuffer[1], srcBufferSize, 0.05, 0., coreSeed); /* barely compressible */ + RDG_genBuffer(cNoiseBuffer[2], srcBufferSize, compressibility, 0., coreSeed); + RDG_genBuffer(cNoiseBuffer[3], srcBufferSize, 0.95, 0., coreSeed); /* highly compressible */ + RDG_genBuffer(cNoiseBuffer[4], srcBufferSize, 1.00, 0., coreSeed); /* sparse content */ + memset(copyBuffer, 0x65, copyBufferSize); /* make copyBuffer considered initialized */ + ZSTD_initDStream_usingDict(zd, NULL, 0); /* ensure at least one init */ + + /* catch up testNb */ + for (testNb=1; testNb < startTest; testNb++) + FUZ_rand(&coreSeed); + + /* test loop */ + for ( ; (testNb <= nbTests) || (FUZ_GetClockSpan(startClock) < g_clockTime) ; testNb++ ) { + U32 lseed; + const BYTE* srcBuffer; + size_t totalTestSize, totalGenSize, cSize; + XXH64_state_t xxhState; + U64 crcOrig; + U32 resetAllowed = 1; + size_t maxTestSize; + + /* init */ + if (testNb < nbTests) { + DISPLAYUPDATE(2, "\r%6u/%6u ", testNb, nbTests); + } else { DISPLAYUPDATE(2, "\r%6u ", testNb); } + FUZ_rand(&coreSeed); + lseed = coreSeed ^ prime32; + + /* states full reset (deliberately not synchronized) */ + /* some issues can only happen when reusing states */ + if ((FUZ_rand(&lseed) & 0xFF) == 131) { + nbThreads = (FUZ_rand(&lseed) % nbThreadsMax) + 1; + DISPLAYLEVEL(5, "Creating new context with %u threads \n", nbThreads); + ZSTDMT_freeCCtx(zc); + zc = ZSTDMT_createCCtx(nbThreads); + CHECK(zc==NULL, "ZSTDMT_createCCtx allocation error") + resetAllowed=0; + } + if ((FUZ_rand(&lseed) & 0xFF) == 132) { + ZSTD_freeDStream(zd); + zd = ZSTD_createDStream(); + CHECK(zd==NULL, "ZSTDMT_createCCtx allocation error") + ZSTD_initDStream_usingDict(zd, NULL, 0); /* ensure at least one init */ + } + + /* srcBuffer selection [0-4] */ + { U32 buffNb = FUZ_rand(&lseed) & 0x7F; + if (buffNb & 7) buffNb=2; /* most common : compressible (P) */ + else { + buffNb >>= 3; + if (buffNb & 7) { + const U32 tnb[2] = { 1, 3 }; /* barely/highly compressible */ + buffNb = tnb[buffNb >> 3]; + } else { + const U32 tnb[2] = { 0, 4 }; /* not compressible / sparse */ + buffNb = tnb[buffNb >> 3]; + } } + srcBuffer = cNoiseBuffer[buffNb]; + } + + /* compression init */ + if ((FUZ_rand(&lseed)&1) /* at beginning, to keep same nb of rand */ + && oldTestLog /* at least one test happened */ && resetAllowed) { + maxTestSize = FUZ_randomLength(&lseed, oldTestLog+2); + if (maxTestSize >= srcBufferSize) maxTestSize = srcBufferSize-1; + { int const compressionLevel = (FUZ_rand(&lseed) % 5) + 1; + CHECK_Z( ZSTDMT_initCStream(zc, compressionLevel) ); + } + } else { + U32 const testLog = FUZ_rand(&lseed) % maxSrcLog; + U32 const dictLog = FUZ_rand(&lseed) % maxSrcLog; + int const cLevelCandidate = ( FUZ_rand(&lseed) + % (ZSTD_maxCLevel() - (MAX(testLog, dictLog) / 2)) ) + + 1; + int const cLevelThreadAdjusted = cLevelCandidate - (nbThreads * 2) + 2; /* reduce cLevel when multiple threads to reduce memory consumption */ + int const cLevelMin = MAX(cLevelThreadAdjusted, 1); /* no negative cLevel yet */ + int const cLevel = MIN(cLevelMin, cLevelMax); + maxTestSize = FUZ_rLogLength(&lseed, testLog); + oldTestLog = testLog; + /* random dictionary selection */ + dictSize = ((FUZ_rand(&lseed)&63)==1) ? FUZ_rLogLength(&lseed, dictLog) : 0; + { size_t const dictStart = FUZ_rand(&lseed) % (srcBufferSize - dictSize); + dict = srcBuffer + dictStart; + } + { U64 const pledgedSrcSize = (FUZ_rand(&lseed) & 3) ? 0 : maxTestSize; + ZSTD_parameters params = ZSTD_getParams(cLevel, pledgedSrcSize, dictSize); + DISPLAYLEVEL(5, "Init with windowLog = %u and pledgedSrcSize = %u \n", + params.cParams.windowLog, (U32)pledgedSrcSize); + params.fParams.checksumFlag = FUZ_rand(&lseed) & 1; + params.fParams.noDictIDFlag = FUZ_rand(&lseed) & 1; + params.fParams.contentSizeFlag = pledgedSrcSize>0; + DISPLAYLEVEL(5, "checksumFlag : %u \n", params.fParams.checksumFlag); + CHECK_Z( ZSTDMT_initCStream_advanced(zc, dict, dictSize, params, pledgedSrcSize) ); + CHECK_Z( ZSTDMT_setMTCtxParameter(zc, ZSTDMT_p_overlapSectionLog, FUZ_rand(&lseed) % 12) ); + CHECK_Z( ZSTDMT_setMTCtxParameter(zc, ZSTDMT_p_sectionSize, FUZ_rand(&lseed) % (2*maxTestSize+1)) ); + } } + + /* multi-segments compression test */ + XXH64_reset(&xxhState, 0); + { ZSTD_outBuffer outBuff = { cBuffer, cBufferSize, 0 } ; + U32 n; + for (n=0, cSize=0, totalTestSize=0 ; totalTestSize < maxTestSize ; n++) { + /* compress random chunks into randomly sized dst buffers */ + { size_t const randomSrcSize = FUZ_randomLength(&lseed, maxSampleLog); + size_t const srcSize = MIN (maxTestSize-totalTestSize, randomSrcSize); + size_t const srcStart = FUZ_rand(&lseed) % (srcBufferSize - srcSize); + size_t const randomDstSize = FUZ_randomLength(&lseed, maxSampleLog); + size_t const dstBuffSize = MIN(cBufferSize - cSize, randomDstSize); + ZSTD_inBuffer inBuff = { srcBuffer+srcStart, srcSize, 0 }; + outBuff.size = outBuff.pos + dstBuffSize; + + DISPLAYLEVEL(5, "Sending %u bytes to compress \n", (U32)srcSize); + CHECK_Z( ZSTDMT_compressStream(zc, &outBuff, &inBuff) ); + DISPLAYLEVEL(5, "%u bytes read by ZSTDMT_compressStream \n", (U32)inBuff.pos); + + XXH64_update(&xxhState, srcBuffer+srcStart, inBuff.pos); + memcpy(copyBuffer+totalTestSize, srcBuffer+srcStart, inBuff.pos); + totalTestSize += inBuff.pos; + } + + /* random flush operation, to mess around */ + if ((FUZ_rand(&lseed) & 15) == 0) { + size_t const randomDstSize = FUZ_randomLength(&lseed, maxSampleLog); + size_t const adjustedDstSize = MIN(cBufferSize - cSize, randomDstSize); + outBuff.size = outBuff.pos + adjustedDstSize; + DISPLAYLEVEL(5, "Flushing into dst buffer of size %u \n", (U32)adjustedDstSize); + CHECK_Z( ZSTDMT_flushStream(zc, &outBuff) ); + } } + + /* final frame epilogue */ + { size_t remainingToFlush = (size_t)(-1); + while (remainingToFlush) { + size_t const randomDstSize = FUZ_randomLength(&lseed, maxSampleLog); + size_t const adjustedDstSize = MIN(cBufferSize - cSize, randomDstSize); + outBuff.size = outBuff.pos + adjustedDstSize; + DISPLAYLEVEL(5, "Ending into dst buffer of size %u \n", (U32)adjustedDstSize); + remainingToFlush = ZSTDMT_endStream(zc, &outBuff); + CHECK (ZSTD_isError(remainingToFlush), "ZSTDMT_endStream error : %s", ZSTD_getErrorName(remainingToFlush)); + DISPLAYLEVEL(5, "endStream : remainingToFlush : %u \n", (U32)remainingToFlush); + } } + crcOrig = XXH64_digest(&xxhState); + cSize = outBuff.pos; + DISPLAYLEVEL(5, "Frame completed : %u bytes \n", (U32)cSize); + } + + /* multi - fragments decompression test */ + if (!dictSize /* don't reset if dictionary : could be different */ && (FUZ_rand(&lseed) & 1)) { + CHECK_Z( ZSTD_resetDStream(zd) ); + } else { + CHECK_Z( ZSTD_initDStream_usingDict(zd, dict, dictSize) ); + } + { size_t decompressionResult = 1; + ZSTD_inBuffer inBuff = { cBuffer, cSize, 0 }; + ZSTD_outBuffer outBuff= { dstBuffer, dstBufferSize, 0 }; + for (totalGenSize = 0 ; decompressionResult ; ) { + size_t const readCSrcSize = FUZ_randomLength(&lseed, maxSampleLog); + size_t const randomDstSize = FUZ_randomLength(&lseed, maxSampleLog); + size_t const dstBuffSize = MIN(dstBufferSize - totalGenSize, randomDstSize); + inBuff.size = inBuff.pos + readCSrcSize; + outBuff.size = outBuff.pos + dstBuffSize; + DISPLAYLEVEL(5, "ZSTD_decompressStream input %u bytes \n", (U32)readCSrcSize); + decompressionResult = ZSTD_decompressStream(zd, &outBuff, &inBuff); + CHECK (ZSTD_isError(decompressionResult), "decompression error : %s", ZSTD_getErrorName(decompressionResult)); + DISPLAYLEVEL(5, "inBuff.pos = %u \n", (U32)readCSrcSize); + } + CHECK (outBuff.pos != totalTestSize, "decompressed data : wrong size (%u != %u)", (U32)outBuff.pos, (U32)totalTestSize); + CHECK (inBuff.pos != cSize, "compressed data should be fully read (%u != %u)", (U32)inBuff.pos, (U32)cSize); + { U64 const crcDest = XXH64(dstBuffer, totalTestSize, 0); + if (crcDest!=crcOrig) findDiff(copyBuffer, dstBuffer, totalTestSize); + CHECK (crcDest!=crcOrig, "decompressed data corrupted"); + } } + + /*===== noisy/erroneous src decompression test =====*/ + + /* add some noise */ + { U32 const nbNoiseChunks = (FUZ_rand(&lseed) & 7) + 2; + U32 nn; for (nn=0; nn<nbNoiseChunks; nn++) { + size_t const randomNoiseSize = FUZ_randomLength(&lseed, maxSampleLog); + size_t const noiseSize = MIN((cSize/3) , randomNoiseSize); + size_t const noiseStart = FUZ_rand(&lseed) % (srcBufferSize - noiseSize); + size_t const cStart = FUZ_rand(&lseed) % (cSize - noiseSize); + memcpy(cBuffer+cStart, srcBuffer+noiseStart, noiseSize); + } } + + /* try decompression on noisy data */ + CHECK_Z( ZSTD_initDStream(zd_noise) ); /* note : no dictionary */ + { ZSTD_inBuffer inBuff = { cBuffer, cSize, 0 }; + ZSTD_outBuffer outBuff= { dstBuffer, dstBufferSize, 0 }; + while (outBuff.pos < dstBufferSize) { + size_t const randomCSrcSize = FUZ_randomLength(&lseed, maxSampleLog); + size_t const randomDstSize = FUZ_randomLength(&lseed, maxSampleLog); + size_t const adjustedDstSize = MIN(dstBufferSize - outBuff.pos, randomDstSize); + size_t const adjustedCSrcSize = MIN(cSize - inBuff.pos, randomCSrcSize); + outBuff.size = outBuff.pos + adjustedDstSize; + inBuff.size = inBuff.pos + adjustedCSrcSize; + { size_t const decompressError = ZSTD_decompressStream(zd, &outBuff, &inBuff); + if (ZSTD_isError(decompressError)) break; /* error correctly detected */ + /* No forward progress possible */ + if (outBuff.pos < outBuff.size && inBuff.pos == cSize) break; + } } } } + DISPLAY("\r%u fuzzer tests completed \n", testNb); + +_cleanup: + ZSTDMT_freeCCtx(zc); + ZSTD_freeDStream(zd); + ZSTD_freeDStream(zd_noise); + free(cNoiseBuffer[0]); + free(cNoiseBuffer[1]); + free(cNoiseBuffer[2]); + free(cNoiseBuffer[3]); + free(cNoiseBuffer[4]); + free(copyBuffer); + free(cBuffer); + free(dstBuffer); + return result; + +_output_error: + result = 1; + goto _cleanup; +} + +/** If useOpaqueAPI, sets param in cctxParams. + * Otherwise, sets the param in zc. */ +static size_t setCCtxParameter(ZSTD_CCtx* zc, ZSTD_CCtx_params* cctxParams, + ZSTD_cParameter param, unsigned value, + U32 useOpaqueAPI) +{ + if (useOpaqueAPI) { + return ZSTD_CCtxParam_setParameter(cctxParams, param, value); + } else { + return ZSTD_CCtx_setParameter(zc, param, value); + } +} + +/* Tests for ZSTD_compress_generic() API */ +static int fuzzerTests_newAPI(U32 seed, U32 nbTests, unsigned startTest, double compressibility, int bigTests, U32 const useOpaqueAPI) +{ + U32 const maxSrcLog = bigTests ? 24 : 22; + static const U32 maxSampleLog = 19; + size_t const srcBufferSize = (size_t)1<<maxSrcLog; + BYTE* cNoiseBuffer[5]; + size_t const copyBufferSize= srcBufferSize + (1<<maxSampleLog); + BYTE* const copyBuffer = (BYTE*)malloc (copyBufferSize); + size_t const cBufferSize = ZSTD_compressBound(srcBufferSize); + BYTE* const cBuffer = (BYTE*)malloc (cBufferSize); + size_t const dstBufferSize = srcBufferSize; + BYTE* const dstBuffer = (BYTE*)malloc (dstBufferSize); + U32 result = 0; + U32 testNb = 0; + U32 coreSeed = seed; + ZSTD_CCtx* zc = ZSTD_createCCtx(); /* will be reset sometimes */ + ZSTD_DStream* zd = ZSTD_createDStream(); /* will be reset sometimes */ + ZSTD_DStream* const zd_noise = ZSTD_createDStream(); + clock_t const startClock = clock(); + const BYTE* dict = NULL; /* can keep same dict on 2 consecutive tests */ + size_t dictSize = 0; + U32 oldTestLog = 0; + U32 windowLogMalus = 0; /* can survive between 2 loops */ + U32 const cLevelMax = bigTests ? (U32)ZSTD_maxCLevel()-1 : g_cLevelMax_smallTests; + U32 const nbThreadsMax = bigTests ? 4 : 2; + ZSTD_CCtx_params* cctxParams = ZSTD_createCCtxParams(); + + /* allocations */ + cNoiseBuffer[0] = (BYTE*)malloc (srcBufferSize); + cNoiseBuffer[1] = (BYTE*)malloc (srcBufferSize); + cNoiseBuffer[2] = (BYTE*)malloc (srcBufferSize); + cNoiseBuffer[3] = (BYTE*)malloc (srcBufferSize); + cNoiseBuffer[4] = (BYTE*)malloc (srcBufferSize); + CHECK (!cNoiseBuffer[0] || !cNoiseBuffer[1] || !cNoiseBuffer[2] || !cNoiseBuffer[3] || !cNoiseBuffer[4] || + !copyBuffer || !dstBuffer || !cBuffer || !zc || !zd || !zd_noise , + "Not enough memory, fuzzer tests cancelled"); + + /* Create initial samples */ + RDG_genBuffer(cNoiseBuffer[0], srcBufferSize, 0.00, 0., coreSeed); /* pure noise */ + RDG_genBuffer(cNoiseBuffer[1], srcBufferSize, 0.05, 0., coreSeed); /* barely compressible */ + RDG_genBuffer(cNoiseBuffer[2], srcBufferSize, compressibility, 0., coreSeed); + RDG_genBuffer(cNoiseBuffer[3], srcBufferSize, 0.95, 0., coreSeed); /* highly compressible */ + RDG_genBuffer(cNoiseBuffer[4], srcBufferSize, 1.00, 0., coreSeed); /* sparse content */ + memset(copyBuffer, 0x65, copyBufferSize); /* make copyBuffer considered initialized */ + CHECK_Z( ZSTD_initDStream_usingDict(zd, NULL, 0) ); /* ensure at least one init */ + + /* catch up testNb */ + for (testNb=1; testNb < startTest; testNb++) + FUZ_rand(&coreSeed); + + /* test loop */ + for ( ; (testNb <= nbTests) || (FUZ_GetClockSpan(startClock) < g_clockTime) ; testNb++ ) { + U32 lseed; + const BYTE* srcBuffer; + size_t totalTestSize, totalGenSize, cSize; + XXH64_state_t xxhState; + U64 crcOrig; + U32 resetAllowed = 1; + size_t maxTestSize; + + /* init */ + if (nbTests >= testNb) { DISPLAYUPDATE(2, "\r%6u/%6u ", testNb, nbTests); } + else { DISPLAYUPDATE(2, "\r%6u ", testNb); } + FUZ_rand(&coreSeed); + lseed = coreSeed ^ prime32; + DISPLAYLEVEL(5, " *** Test %u *** \n", testNb); + + /* states full reset (deliberately not synchronized) */ + /* some issues can only happen when reusing states */ + if ((FUZ_rand(&lseed) & 0xFF) == 131) { + DISPLAYLEVEL(5, "Creating new context \n"); + ZSTD_freeCCtx(zc); + zc = ZSTD_createCCtx(); + CHECK(zc==NULL, "ZSTD_createCCtx allocation error"); + resetAllowed=0; + } + if ((FUZ_rand(&lseed) & 0xFF) == 132) { + ZSTD_freeDStream(zd); + zd = ZSTD_createDStream(); + CHECK(zd==NULL, "ZSTD_createDStream allocation error"); + ZSTD_initDStream_usingDict(zd, NULL, 0); /* ensure at least one init */ + } + + /* srcBuffer selection [0-4] */ + { U32 buffNb = FUZ_rand(&lseed) & 0x7F; + if (buffNb & 7) buffNb=2; /* most common : compressible (P) */ + else { + buffNb >>= 3; + if (buffNb & 7) { + const U32 tnb[2] = { 1, 3 }; /* barely/highly compressible */ + buffNb = tnb[buffNb >> 3]; + } else { + const U32 tnb[2] = { 0, 4 }; /* not compressible / sparse */ + buffNb = tnb[buffNb >> 3]; + } } + srcBuffer = cNoiseBuffer[buffNb]; + } + + /* compression init */ + CHECK_Z( ZSTD_CCtx_loadDictionary(zc, NULL, 0) ); /* cancel previous dict /*/ + if ((FUZ_rand(&lseed)&1) /* at beginning, to keep same nb of rand */ + && oldTestLog /* at least one test happened */ && resetAllowed) { + maxTestSize = FUZ_randomLength(&lseed, oldTestLog+2); + if (maxTestSize >= srcBufferSize) maxTestSize = srcBufferSize-1; + { int const compressionLevel = (FUZ_rand(&lseed) % 5) + 1; + CHECK_Z (setCCtxParameter(zc, cctxParams, ZSTD_p_compressionLevel, compressionLevel, useOpaqueAPI) ); + } + } else { + U32 const testLog = FUZ_rand(&lseed) % maxSrcLog; + U32 const dictLog = FUZ_rand(&lseed) % maxSrcLog; + U32 const cLevelCandidate = (FUZ_rand(&lseed) % + (ZSTD_maxCLevel() - + (MAX(testLog, dictLog) / 2))) + + 1; + U32 const cLevel = MIN(cLevelCandidate, cLevelMax); + DISPLAYLEVEL(5, "t%u: cLevel : %u \n", testNb, cLevel); + maxTestSize = FUZ_rLogLength(&lseed, testLog); + DISPLAYLEVEL(5, "t%u: maxTestSize : %u \n", testNb, (U32)maxTestSize); + oldTestLog = testLog; + /* random dictionary selection */ + dictSize = ((FUZ_rand(&lseed)&63)==1) ? FUZ_rLogLength(&lseed, dictLog) : 0; + { size_t const dictStart = FUZ_rand(&lseed) % (srcBufferSize - dictSize); + dict = srcBuffer + dictStart; + if (!dictSize) dict=NULL; + } + { U64 const pledgedSrcSize = (FUZ_rand(&lseed) & 3) ? ZSTD_CONTENTSIZE_UNKNOWN : maxTestSize; + ZSTD_compressionParameters cParams = ZSTD_getCParams(cLevel, pledgedSrcSize, dictSize); + static const U32 windowLogMax = 24; + + /* mess with compression parameters */ + cParams.windowLog += (FUZ_rand(&lseed) & 3) - 1; + cParams.windowLog = MIN(windowLogMax, cParams.windowLog); + cParams.hashLog += (FUZ_rand(&lseed) & 3) - 1; + cParams.chainLog += (FUZ_rand(&lseed) & 3) - 1; + cParams.searchLog += (FUZ_rand(&lseed) & 3) - 1; + cParams.searchLength += (FUZ_rand(&lseed) & 3) - 1; + cParams.targetLength = (U32)((cParams.targetLength + 1 ) * (0.5 + ((double)(FUZ_rand(&lseed) & 127) / 128))); + cParams = ZSTD_adjustCParams(cParams, 0, 0); + + if (FUZ_rand(&lseed) & 1) { + CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_p_windowLog, cParams.windowLog, useOpaqueAPI) ); + assert(cParams.windowLog >= ZSTD_WINDOWLOG_MIN); /* guaranteed by ZSTD_adjustCParams() */ + windowLogMalus = (cParams.windowLog - ZSTD_WINDOWLOG_MIN) / 5; + DISPLAYLEVEL(5, "t%u: windowLog : %u \n", testNb, cParams.windowLog); + } + if (FUZ_rand(&lseed) & 1) { + CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_p_hashLog, cParams.hashLog, useOpaqueAPI) ); + DISPLAYLEVEL(5, "t%u: hashLog : %u \n", testNb, cParams.hashLog); + } + if (FUZ_rand(&lseed) & 1) { + CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_p_chainLog, cParams.chainLog, useOpaqueAPI) ); + DISPLAYLEVEL(5, "t%u: chainLog : %u \n", testNb, cParams.chainLog); + } + if (FUZ_rand(&lseed) & 1) CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_p_searchLog, cParams.searchLog, useOpaqueAPI) ); + if (FUZ_rand(&lseed) & 1) CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_p_minMatch, cParams.searchLength, useOpaqueAPI) ); + if (FUZ_rand(&lseed) & 1) CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_p_targetLength, cParams.targetLength, useOpaqueAPI) ); + + /* mess with long distance matching parameters */ + if (FUZ_rand(&lseed) & 1) CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_p_enableLongDistanceMatching, FUZ_rand(&lseed) & 63, useOpaqueAPI) ); + if (FUZ_rand(&lseed) & 3) CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_p_ldmHashLog, FUZ_randomClampedLength(&lseed, ZSTD_HASHLOG_MIN, 23), useOpaqueAPI) ); + if (FUZ_rand(&lseed) & 3) CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_p_ldmMinMatch, FUZ_randomClampedLength(&lseed, ZSTD_LDM_MINMATCH_MIN, ZSTD_LDM_MINMATCH_MAX), useOpaqueAPI) ); + if (FUZ_rand(&lseed) & 3) CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_p_ldmBucketSizeLog, FUZ_randomClampedLength(&lseed, 0, ZSTD_LDM_BUCKETSIZELOG_MAX), useOpaqueAPI) ); + if (FUZ_rand(&lseed) & 3) CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_p_ldmHashEveryLog, FUZ_randomClampedLength(&lseed, 0, ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN), useOpaqueAPI) ); + + /* mess with frame parameters */ + if (FUZ_rand(&lseed) & 1) CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_p_checksumFlag, FUZ_rand(&lseed) & 1, useOpaqueAPI) ); + if (FUZ_rand(&lseed) & 1) CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_p_dictIDFlag, FUZ_rand(&lseed) & 1, useOpaqueAPI) ); + if (FUZ_rand(&lseed) & 1) CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_p_contentSizeFlag, FUZ_rand(&lseed) & 1, useOpaqueAPI) ); + if (FUZ_rand(&lseed) & 1) CHECK_Z( ZSTD_CCtx_setPledgedSrcSize(zc, pledgedSrcSize) ); + DISPLAYLEVEL(5, "t%u: pledgedSrcSize : %u \n", testNb, (U32)pledgedSrcSize); + + /* multi-threading parameters */ + { U32 const nbThreadsCandidate = (FUZ_rand(&lseed) & 4) + 1; + U32 const nbThreadsAdjusted = (windowLogMalus < nbThreadsCandidate) ? nbThreadsCandidate - windowLogMalus : 1; + U32 const nbThreads = MIN(nbThreadsAdjusted, nbThreadsMax); + CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_p_nbThreads, nbThreads, useOpaqueAPI) ); + DISPLAYLEVEL(5, "t%u: nbThreads : %u \n", testNb, nbThreads); + if (nbThreads > 1) { + U32 const jobLog = FUZ_rand(&lseed) % (testLog+1); + CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_p_overlapSizeLog, FUZ_rand(&lseed) % 10, useOpaqueAPI) ); + CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_p_jobSize, (U32)FUZ_rLogLength(&lseed, jobLog), useOpaqueAPI) ); + } + } + + if (FUZ_rand(&lseed) & 1) CHECK_Z (setCCtxParameter(zc, cctxParams, ZSTD_p_forceMaxWindow, FUZ_rand(&lseed) & 1, useOpaqueAPI) ); + + /* Apply parameters */ + if (useOpaqueAPI) { + CHECK_Z (ZSTD_CCtx_setParametersUsingCCtxParams(zc, cctxParams) ); + } + + if (FUZ_rand(&lseed) & 1) { + if (FUZ_rand(&lseed) & 1) { + CHECK_Z( ZSTD_CCtx_loadDictionary(zc, dict, dictSize) ); + } else { + CHECK_Z( ZSTD_CCtx_loadDictionary_byReference(zc, dict, dictSize) ); + } + if (dict && dictSize) { + /* test that compression parameters are rejected (correctly) after loading a non-NULL dictionary */ + if (useOpaqueAPI) { + size_t const setError = ZSTD_CCtx_setParametersUsingCCtxParams(zc, cctxParams); + CHECK(!ZSTD_isError(setError), "ZSTD_CCtx_setParametersUsingCCtxParams should have failed"); + } else { + size_t const setError = ZSTD_CCtx_setParameter(zc, ZSTD_p_windowLog, cParams.windowLog-1); + CHECK(!ZSTD_isError(setError), "ZSTD_CCtx_setParameter should have failed"); + } + } + } else { + CHECK_Z( ZSTD_CCtx_refPrefix(zc, dict, dictSize) ); + } + } } + + /* multi-segments compression test */ + XXH64_reset(&xxhState, 0); + { ZSTD_outBuffer outBuff = { cBuffer, cBufferSize, 0 } ; + for (cSize=0, totalTestSize=0 ; (totalTestSize < maxTestSize) ; ) { + /* compress random chunks into randomly sized dst buffers */ + size_t const randomSrcSize = FUZ_randomLength(&lseed, maxSampleLog); + size_t const srcSize = MIN(maxTestSize-totalTestSize, randomSrcSize); + size_t const srcStart = FUZ_rand(&lseed) % (srcBufferSize - srcSize); + size_t const randomDstSize = FUZ_randomLength(&lseed, maxSampleLog+1); + size_t const dstBuffSize = MIN(cBufferSize - cSize, randomDstSize); + ZSTD_EndDirective const flush = (FUZ_rand(&lseed) & 15) ? ZSTD_e_continue : ZSTD_e_flush; + ZSTD_inBuffer inBuff = { srcBuffer+srcStart, srcSize, 0 }; + outBuff.size = outBuff.pos + dstBuffSize; + + CHECK_Z( ZSTD_compress_generic(zc, &outBuff, &inBuff, flush) ); + DISPLAYLEVEL(6, "compress consumed %u bytes (total : %u) \n", + (U32)inBuff.pos, (U32)(totalTestSize + inBuff.pos)); + + XXH64_update(&xxhState, srcBuffer+srcStart, inBuff.pos); + memcpy(copyBuffer+totalTestSize, srcBuffer+srcStart, inBuff.pos); + totalTestSize += inBuff.pos; + } + + /* final frame epilogue */ + { size_t remainingToFlush = (size_t)(-1); + while (remainingToFlush) { + ZSTD_inBuffer inBuff = { NULL, 0, 0 }; + size_t const randomDstSize = FUZ_randomLength(&lseed, maxSampleLog+1); + size_t const adjustedDstSize = MIN(cBufferSize - cSize, randomDstSize); + outBuff.size = outBuff.pos + adjustedDstSize; + DISPLAYLEVEL(6, "End-flush into dst buffer of size %u \n", (U32)adjustedDstSize); + remainingToFlush = ZSTD_compress_generic(zc, &outBuff, &inBuff, ZSTD_e_end); + CHECK( ZSTD_isError(remainingToFlush), + "ZSTD_compress_generic w/ ZSTD_e_end error : %s", + ZSTD_getErrorName(remainingToFlush) ); + } } + crcOrig = XXH64_digest(&xxhState); + cSize = outBuff.pos; + DISPLAYLEVEL(5, "Frame completed : %u bytes \n", (U32)cSize); + } + + /* multi - fragments decompression test */ + if (!dictSize /* don't reset if dictionary : could be different */ && (FUZ_rand(&lseed) & 1)) { + DISPLAYLEVEL(5, "resetting DCtx (dict:%08X) \n", (U32)(size_t)dict); + CHECK_Z( ZSTD_resetDStream(zd) ); + } else { + DISPLAYLEVEL(5, "using dict of size %u \n", (U32)dictSize); + CHECK_Z( ZSTD_initDStream_usingDict(zd, dict, dictSize) ); + } + { size_t decompressionResult = 1; + ZSTD_inBuffer inBuff = { cBuffer, cSize, 0 }; + ZSTD_outBuffer outBuff= { dstBuffer, dstBufferSize, 0 }; + for (totalGenSize = 0 ; decompressionResult ; ) { + size_t const readCSrcSize = FUZ_randomLength(&lseed, maxSampleLog); + size_t const randomDstSize = FUZ_randomLength(&lseed, maxSampleLog); + size_t const dstBuffSize = MIN(dstBufferSize - totalGenSize, randomDstSize); + inBuff.size = inBuff.pos + readCSrcSize; + outBuff.size = outBuff.pos + dstBuffSize; + DISPLAYLEVEL(6, "ZSTD_decompressStream input %u bytes (pos:%u/%u)\n", + (U32)readCSrcSize, (U32)inBuff.pos, (U32)cSize); + decompressionResult = ZSTD_decompressStream(zd, &outBuff, &inBuff); + CHECK (ZSTD_isError(decompressionResult), "decompression error : %s", ZSTD_getErrorName(decompressionResult)); + DISPLAYLEVEL(6, "inBuff.pos = %u \n", (U32)readCSrcSize); + } + CHECK (outBuff.pos != totalTestSize, "decompressed data : wrong size (%u != %u)", (U32)outBuff.pos, (U32)totalTestSize); + CHECK (inBuff.pos != cSize, "compressed data should be fully read (%u != %u)", (U32)inBuff.pos, (U32)cSize); + { U64 const crcDest = XXH64(dstBuffer, totalTestSize, 0); + if (crcDest!=crcOrig) findDiff(copyBuffer, dstBuffer, totalTestSize); + CHECK (crcDest!=crcOrig, "decompressed data corrupted"); + } } + + /*===== noisy/erroneous src decompression test =====*/ + + /* add some noise */ + { U32 const nbNoiseChunks = (FUZ_rand(&lseed) & 7) + 2; + U32 nn; for (nn=0; nn<nbNoiseChunks; nn++) { + size_t const randomNoiseSize = FUZ_randomLength(&lseed, maxSampleLog); + size_t const noiseSize = MIN((cSize/3) , randomNoiseSize); + size_t const noiseStart = FUZ_rand(&lseed) % (srcBufferSize - noiseSize); + size_t const cStart = FUZ_rand(&lseed) % (cSize - noiseSize); + memcpy(cBuffer+cStart, srcBuffer+noiseStart, noiseSize); + } } + + /* try decompression on noisy data */ + CHECK_Z( ZSTD_initDStream(zd_noise) ); /* note : no dictionary */ + { ZSTD_inBuffer inBuff = { cBuffer, cSize, 0 }; + ZSTD_outBuffer outBuff= { dstBuffer, dstBufferSize, 0 }; + while (outBuff.pos < dstBufferSize) { + size_t const randomCSrcSize = FUZ_randomLength(&lseed, maxSampleLog); + size_t const randomDstSize = FUZ_randomLength(&lseed, maxSampleLog); + size_t const adjustedDstSize = MIN(dstBufferSize - outBuff.pos, randomDstSize); + size_t const adjustedCSrcSize = MIN(cSize - inBuff.pos, randomCSrcSize); + outBuff.size = outBuff.pos + adjustedDstSize; + inBuff.size = inBuff.pos + adjustedCSrcSize; + { size_t const decompressError = ZSTD_decompressStream(zd, &outBuff, &inBuff); + if (ZSTD_isError(decompressError)) break; /* error correctly detected */ + /* Good so far, but no more progress possible */ + if (outBuff.pos < outBuff.size && inBuff.pos == cSize) break; + } } } } + DISPLAY("\r%u fuzzer tests completed \n", testNb-1); + +_cleanup: + ZSTD_freeCCtx(zc); + ZSTD_freeDStream(zd); + ZSTD_freeDStream(zd_noise); + ZSTD_freeCCtxParams(cctxParams); + free(cNoiseBuffer[0]); + free(cNoiseBuffer[1]); + free(cNoiseBuffer[2]); + free(cNoiseBuffer[3]); + free(cNoiseBuffer[4]); + free(copyBuffer); + free(cBuffer); + free(dstBuffer); + return result; + +_output_error: + result = 1; + goto _cleanup; +} + +/*-******************************************************* +* Command line +*********************************************************/ +int FUZ_usage(const char* programName) +{ + DISPLAY( "Usage :\n"); + DISPLAY( " %s [args]\n", programName); + DISPLAY( "\n"); + DISPLAY( "Arguments :\n"); + DISPLAY( " -i# : Nb of tests (default:%u) \n", nbTestsDefault); + DISPLAY( " -s# : Select seed (default:prompt user)\n"); + DISPLAY( " -t# : Select starting test number (default:0)\n"); + DISPLAY( " -P# : Select compressibility in %% (default:%i%%)\n", FUZ_COMPRESSIBILITY_DEFAULT); + DISPLAY( " -v : verbose\n"); + DISPLAY( " -p : pause at the end\n"); + DISPLAY( " -h : display help and exit\n"); + return 0; +} + +typedef enum { simple_api, mt_api, advanced_api } e_api; + +int main(int argc, const char** argv) +{ + U32 seed=0; + int seedset=0; + int argNb; + int nbTests = nbTestsDefault; + int testNb = 0; + int proba = FUZ_COMPRESSIBILITY_DEFAULT; + int result=0; + int mainPause = 0; + int bigTests = (sizeof(size_t) == 8); + e_api selected_api = simple_api; + const char* const programName = argv[0]; + ZSTD_customMem const customNULL = ZSTD_defaultCMem; + U32 useOpaqueAPI = 0; + + /* Check command line */ + for(argNb=1; argNb<argc; argNb++) { + const char* argument = argv[argNb]; + if(!argument) continue; /* Protection if argument empty */ + + /* Parsing commands. Aggregated commands are allowed */ + if (argument[0]=='-') { + + if (!strcmp(argument, "--mt")) { selected_api=mt_api; testNb += !testNb; continue; } + if (!strcmp(argument, "--newapi")) { selected_api=advanced_api; testNb += !testNb; continue; } + if (!strcmp(argument, "--opaqueapi")) { selected_api=advanced_api; testNb += !testNb; useOpaqueAPI = 1; continue; } + if (!strcmp(argument, "--no-big-tests")) { bigTests=0; continue; } + + argument++; + while (*argument!=0) { + switch(*argument) + { + case 'h': + return FUZ_usage(programName); + + case 'v': + argument++; + g_displayLevel++; + break; + + case 'q': + argument++; + g_displayLevel--; + break; + + case 'p': /* pause at the end */ + argument++; + mainPause = 1; + break; + + case 'i': /* limit tests by nb of iterations (default) */ + argument++; + nbTests=0; g_clockTime=0; + while ((*argument>='0') && (*argument<='9')) { + nbTests *= 10; + nbTests += *argument - '0'; + argument++; + } + break; + + case 'T': /* limit tests by time */ + argument++; + nbTests=0; g_clockTime=0; + while ((*argument>='0') && (*argument<='9')) { + g_clockTime *= 10; + g_clockTime += *argument - '0'; + argument++; + } + if (*argument=='m') g_clockTime *=60, argument++; + if (*argument=='n') argument++; + g_clockTime *= CLOCKS_PER_SEC; + break; + + case 's': /* manually select seed */ + argument++; + seed=0; + seedset=1; + while ((*argument>='0') && (*argument<='9')) { + seed *= 10; + seed += *argument - '0'; + argument++; + } + break; + + case 't': /* select starting test number */ + argument++; + testNb=0; + while ((*argument>='0') && (*argument<='9')) { + testNb *= 10; + testNb += *argument - '0'; + argument++; + } + break; + + case 'P': /* compressibility % */ + argument++; + proba=0; + while ((*argument>='0') && (*argument<='9')) { + proba *= 10; + proba += *argument - '0'; + argument++; + } + if (proba<0) proba=0; + if (proba>100) proba=100; + break; + + default: + return FUZ_usage(programName); + } + } } } /* for(argNb=1; argNb<argc; argNb++) */ + + /* Get Seed */ + DISPLAY("Starting zstream tester (%i-bits, %s)\n", (int)(sizeof(size_t)*8), ZSTD_VERSION_STRING); + + if (!seedset) { + time_t const t = time(NULL); + U32 const h = XXH32(&t, sizeof(t), 1); + seed = h % 10000; + } + + DISPLAY("Seed = %u\n", seed); + if (proba!=FUZ_COMPRESSIBILITY_DEFAULT) DISPLAY("Compressibility : %i%%\n", proba); + + if (nbTests<=0) nbTests=1; + + if (testNb==0) { + result = basicUnitTests(0, ((double)proba) / 100, customNULL); /* constant seed for predictability */ + } + + if (!result) { + switch(selected_api) + { + case simple_api : + result = fuzzerTests(seed, nbTests, testNb, ((double)proba) / 100, bigTests); + break; + case mt_api : + result = fuzzerTests_MT(seed, nbTests, testNb, ((double)proba) / 100, bigTests); + break; + case advanced_api : + result = fuzzerTests_newAPI(seed, nbTests, testNb, ((double)proba) / 100, bigTests, useOpaqueAPI); + break; + default : + assert(0); /* impossible */ + } + } + + if (mainPause) { + int unused; + DISPLAY("Press Enter \n"); + unused = getchar(); + (void)unused; + } + return result; +} |