diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 18:45:59 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 18:45:59 +0000 |
commit | 19fcec84d8d7d21e796c7624e521b60d28ee21ed (patch) | |
tree | 42d26aa27d1e3f7c0b8bd3fd14e7d7082f5008dc /src/zstd/tests/fuzz | |
parent | Initial commit. (diff) | |
download | ceph-19fcec84d8d7d21e796c7624e521b60d28ee21ed.tar.xz ceph-19fcec84d8d7d21e796c7624e521b60d28ee21ed.zip |
Adding upstream version 16.2.11+ds.upstream/16.2.11+dsupstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/zstd/tests/fuzz')
25 files changed, 3126 insertions, 0 deletions
diff --git a/src/zstd/tests/fuzz/.gitignore b/src/zstd/tests/fuzz/.gitignore new file mode 100644 index 000000000..b6fc6e506 --- /dev/null +++ b/src/zstd/tests/fuzz/.gitignore @@ -0,0 +1,20 @@ +# test artefacts +corpora +block_decompress +block_round_trip +dictionary_decompress +dictionary_loader +dictionary_round_trip +dictionary_stream_round_trip +raw_dictionary_round_trip +simple_compress +simple_decompress +simple_round_trip +stream_decompress +stream_round_trip +zstd_frame_info +fuzz-*.log + +# misc +trace +tmp* diff --git a/src/zstd/tests/fuzz/Makefile b/src/zstd/tests/fuzz/Makefile new file mode 100644 index 000000000..1af3dc734 --- /dev/null +++ b/src/zstd/tests/fuzz/Makefile @@ -0,0 +1,212 @@ +# ################################################################ +# Copyright (c) 2016-2020, Facebook, Inc. +# All rights reserved. +# +# This source code is licensed under both the BSD-style license (found in the +# LICENSE file in the root directory of this source tree) and the GPLv2 (found +# in the COPYING file in the root directory of this source tree). +# You may select, at your option, one of the above-listed licenses. +# ################################################################ + +# Optionally user defined flags +CFLAGS ?= -O3 +CXXFLAGS ?= -O3 +CPPFLAGS ?= +LDFLAGS ?= +ARFLAGS ?= +LIB_FUZZING_ENGINE ?= libregression.a +PYTHON ?= python +ifeq ($(shell uname), Darwin) + DOWNLOAD?=curl -L -o +else + DOWNLOAD?=wget -O +endif +CORPORA_URL_PREFIX:=https://github.com/facebook/zstd/releases/download/fuzz-corpora/ + +ZSTDDIR = ../../lib +PRGDIR = ../../programs + +FUZZ_CPPFLAGS := -I$(ZSTDDIR) -I$(ZSTDDIR)/common -I$(ZSTDDIR)/compress \ + -I$(ZSTDDIR)/dictBuilder -I$(ZSTDDIR)/deprecated -I$(ZSTDDIR)/legacy \ + -I$(PRGDIR) -DZSTD_MULTITHREAD -DZSTD_LEGACY_SUPPORT=1 $(CPPFLAGS) +FUZZ_EXTRA_FLAGS := -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \ + -Wstrict-aliasing=1 -Wswitch-enum -Wdeclaration-after-statement \ + -Wstrict-prototypes -Wundef \ + -Wvla -Wformat=2 -Winit-self -Wfloat-equal -Wwrite-strings \ + -Wredundant-decls \ + -g -fno-omit-frame-pointer +FUZZ_CFLAGS := $(FUZZ_EXTRA_FLAGS) $(CFLAGS) +FUZZ_CXXFLAGS := $(FUZZ_EXTRA_FLAGS) -std=c++11 $(CXXFLAGS) +FUZZ_LDFLAGS := -pthread $(LDFLAGS) +FUZZ_ARFLAGS := $(ARFLAGS) +FUZZ_TARGET_FLAGS = $(FUZZ_CPPFLAGS) $(FUZZ_CXXFLAGS) $(FUZZ_LDFLAGS) + +FUZZ_ROUND_TRIP_FLAGS := -DFUZZING_ASSERT_VALID_SEQUENCE + +FUZZ_HEADERS := fuzz_helpers.h fuzz.h zstd_helpers.h fuzz_data_producer.h +FUZZ_SRC := $(PRGDIR)/util.c ./fuzz_helpers.c ./zstd_helpers.c ./fuzz_data_producer.c + +ZSTDCOMMON_SRC := $(ZSTDDIR)/common/*.c +ZSTDCOMP_SRC := $(ZSTDDIR)/compress/*.c +ZSTDDECOMP_SRC := $(ZSTDDIR)/decompress/*.c +ZSTDDICT_SRC := $(ZSTDDIR)/dictBuilder/*.c +ZSTDLEGACY_SRC := $(ZSTDDIR)/legacy/*.c +FUZZ_SRC := \ + $(FUZZ_SRC) \ + $(ZSTDDECOMP_SRC) \ + $(ZSTDCOMMON_SRC) \ + $(ZSTDCOMP_SRC) \ + $(ZSTDDICT_SRC) \ + $(ZSTDLEGACY_SRC) +FUZZ_SRC := $(wildcard $(FUZZ_SRC)) + +FUZZ_D_OBJ1 := $(subst $(ZSTDDIR)/common/,d_lib_common_,$(FUZZ_SRC)) +FUZZ_D_OBJ2 := $(subst $(ZSTDDIR)/compress/,d_lib_compress_,$(FUZZ_D_OBJ1)) +FUZZ_D_OBJ3 := $(subst $(ZSTDDIR)/decompress/,d_lib_decompress_,$(FUZZ_D_OBJ2)) +FUZZ_D_OBJ4 := $(subst $(ZSTDDIR)/dictBuilder/,d_lib_dictBuilder_,$(FUZZ_D_OBJ3)) +FUZZ_D_OBJ5 := $(subst $(ZSTDDIR)/legacy/,d_lib_legacy_,$(FUZZ_D_OBJ4)) +FUZZ_D_OBJ6 := $(subst $(PRGDIR)/,d_prg_,$(FUZZ_D_OBJ5)) +FUZZ_D_OBJ7 := $(subst $\./,d_fuzz_,$(FUZZ_D_OBJ6)) +FUZZ_DECOMPRESS_OBJ := $(FUZZ_D_OBJ7:.c=.o) + +FUZZ_RT_OBJ1 := $(subst $(ZSTDDIR)/common/,rt_lib_common_,$(FUZZ_SRC)) +FUZZ_RT_OBJ2 := $(subst $(ZSTDDIR)/compress/,rt_lib_compress_,$(FUZZ_RT_OBJ1)) +FUZZ_RT_OBJ3 := $(subst $(ZSTDDIR)/decompress/,rt_lib_decompress_,$(FUZZ_RT_OBJ2)) +FUZZ_RT_OBJ4 := $(subst $(ZSTDDIR)/dictBuilder/,rt_lib_dictBuilder_,$(FUZZ_RT_OBJ3)) +FUZZ_RT_OBJ5 := $(subst $(ZSTDDIR)/legacy/,rt_lib_legacy_,$(FUZZ_RT_OBJ4)) +FUZZ_RT_OBJ6 := $(subst $(PRGDIR)/,rt_prg_,$(FUZZ_RT_OBJ5)) +FUZZ_RT_OBJ7 := $(subst $\./,rt_fuzz_,$(FUZZ_RT_OBJ6)) +FUZZ_ROUND_TRIP_OBJ := $(FUZZ_RT_OBJ7:.c=.o) + +.PHONY: default all clean cleanall + +default: all + +FUZZ_TARGETS := \ + simple_round_trip \ + stream_round_trip \ + block_round_trip \ + simple_decompress \ + stream_decompress \ + block_decompress \ + dictionary_round_trip \ + dictionary_decompress \ + zstd_frame_info \ + simple_compress \ + dictionary_loader \ + raw_dictionary_round_trip \ + dictionary_stream_round_trip + +all: $(FUZZ_TARGETS) + +rt_lib_common_%.o: $(ZSTDDIR)/common/%.c + $(CC) $(FUZZ_CPPFLAGS) $(FUZZ_CFLAGS) $(FUZZ_ROUND_TRIP_FLAGS) $< -c -o $@ + +rt_lib_compress_%.o: $(ZSTDDIR)/compress/%.c + $(CC) $(FUZZ_CPPFLAGS) $(FUZZ_CFLAGS) $(FUZZ_ROUND_TRIP_FLAGS) $< -c -o $@ + +rt_lib_decompress_%.o: $(ZSTDDIR)/decompress/%.c + $(CC) $(FUZZ_CPPFLAGS) $(FUZZ_CFLAGS) $(FUZZ_ROUND_TRIP_FLAGS) $< -c -o $@ + +rt_lib_dictBuilder_%.o: $(ZSTDDIR)/dictBuilder/%.c + $(CC) $(FUZZ_CPPFLAGS) $(FUZZ_CFLAGS) $(FUZZ_ROUND_TRIP_FLAGS) $< -c -o $@ + +rt_lib_legacy_%.o: $(ZSTDDIR)/legacy/%.c + $(CC) $(FUZZ_CPPFLAGS) $(FUZZ_CFLAGS) $(FUZZ_ROUND_TRIP_FLAGS) $< -c -o $@ + +rt_prg_%.o: $(PRGDIR)/%.c + $(CC) $(FUZZ_CPPFLAGS) $(FUZZ_CFLAGS) $(FUZZ_ROUND_TRIP_FLAGS) $< -c -o $@ + +rt_fuzz_%.o: %.c + $(CC) $(FUZZ_CPPFLAGS) $(FUZZ_CFLAGS) $(FUZZ_ROUND_TRIP_FLAGS) $< -c -o $@ + +d_lib_common_%.o: $(ZSTDDIR)/common/%.c + $(CC) $(FUZZ_CPPFLAGS) $(FUZZ_CFLAGS) $< -c -o $@ + +d_lib_compress_%.o: $(ZSTDDIR)/compress/%.c + $(CC) $(FUZZ_CPPFLAGS) $(FUZZ_CFLAGS) $< -c -o $@ + +d_lib_decompress_%.o: $(ZSTDDIR)/decompress/%.c + $(CC) $(FUZZ_CPPFLAGS) $(FUZZ_CFLAGS) $< -c -o $@ + +d_lib_dictBuilder_%.o: $(ZSTDDIR)/dictBuilder/%.c + $(CC) $(FUZZ_CPPFLAGS) $(FUZZ_CFLAGS) $< -c -o $@ + +d_lib_legacy_%.o: $(ZSTDDIR)/legacy/%.c + $(CC) $(FUZZ_CPPFLAGS) $(FUZZ_CFLAGS) $< -c -o $@ + +d_prg_%.o: $(PRGDIR)/%.c + $(CC) $(FUZZ_CPPFLAGS) $(FUZZ_CFLAGS) $< -c -o $@ + +d_fuzz_%.o: %.c + $(CC) $(FUZZ_CPPFLAGS) $(FUZZ_CFLAGS) $< -c -o $@ + +simple_round_trip: $(FUZZ_HEADERS) $(FUZZ_ROUND_TRIP_OBJ) rt_fuzz_simple_round_trip.o + $(CXX) $(FUZZ_TARGET_FLAGS) $(FUZZ_ROUND_TRIP_OBJ) rt_fuzz_simple_round_trip.o $(LIB_FUZZING_ENGINE) -o $@ + +stream_round_trip: $(FUZZ_HEADERS) $(FUZZ_ROUND_TRIP_OBJ) rt_fuzz_stream_round_trip.o + $(CXX) $(FUZZ_TARGET_FLAGS) $(FUZZ_ROUND_TRIP_OBJ) rt_fuzz_stream_round_trip.o $(LIB_FUZZING_ENGINE) -o $@ + +block_round_trip: $(FUZZ_HEADERS) $(FUZZ_ROUND_TRIP_OBJ) rt_fuzz_block_round_trip.o + $(CXX) $(FUZZ_TARGET_FLAGS) $(FUZZ_ROUND_TRIP_OBJ) rt_fuzz_block_round_trip.o $(LIB_FUZZING_ENGINE) -o $@ + +simple_decompress: $(FUZZ_HEADERS) $(FUZZ_DECOMPRESS_OBJ) d_fuzz_simple_decompress.o + $(CXX) $(FUZZ_TARGET_FLAGS) $(FUZZ_DECOMPRESS_OBJ) d_fuzz_simple_decompress.o $(LIB_FUZZING_ENGINE) -o $@ + +stream_decompress: $(FUZZ_HEADERS) $(FUZZ_DECOMPRESS_OBJ) d_fuzz_stream_decompress.o + $(CXX) $(FUZZ_TARGET_FLAGS) $(FUZZ_DECOMPRESS_OBJ) d_fuzz_stream_decompress.o $(LIB_FUZZING_ENGINE) -o $@ + +block_decompress: $(FUZZ_HEADERS) $(FUZZ_DECOMPRESS_OBJ) d_fuzz_block_decompress.o + $(CXX) $(FUZZ_TARGET_FLAGS) $(FUZZ_DECOMPRESS_OBJ) d_fuzz_block_decompress.o $(LIB_FUZZING_ENGINE) -o $@ + +dictionary_round_trip: $(FUZZ_HEADERS) $(FUZZ_ROUND_TRIP_OBJ) rt_fuzz_dictionary_round_trip.o + $(CXX) $(FUZZ_TARGET_FLAGS) $(FUZZ_ROUND_TRIP_OBJ) rt_fuzz_dictionary_round_trip.o $(LIB_FUZZING_ENGINE) -o $@ + +raw_dictionary_round_trip: $(FUZZ_HEADERS) $(FUZZ_ROUND_TRIP_OBJ) rt_fuzz_raw_dictionary_round_trip.o + $(CXX) $(FUZZ_TARGET_FLAGS) $(FUZZ_ROUND_TRIP_OBJ) rt_fuzz_raw_dictionary_round_trip.o $(LIB_FUZZING_ENGINE) -o $@ + +dictionary_stream_round_trip: $(FUZZ_HEADERS) $(FUZZ_ROUND_TRIP_OBJ) rt_fuzz_dictionary_stream_round_trip.o + $(CXX) $(FUZZ_TARGET_FLAGS) $(FUZZ_ROUND_TRIP_OBJ) rt_fuzz_dictionary_stream_round_trip.o $(LIB_FUZZING_ENGINE) -o $@ + +dictionary_decompress: $(FUZZ_HEADERS) $(FUZZ_DECOMPRESS_OBJ) d_fuzz_dictionary_decompress.o + $(CXX) $(FUZZ_TARGET_FLAGS) $(FUZZ_DECOMPRESS_OBJ) d_fuzz_dictionary_decompress.o $(LIB_FUZZING_ENGINE) -o $@ + +simple_compress: $(FUZZ_HEADERS) $(FUZZ_ROUND_TRIP_OBJ) rt_fuzz_simple_compress.o + $(CXX) $(FUZZ_TARGET_FLAGS) $(FUZZ_ROUND_TRIP_OBJ) rt_fuzz_simple_compress.o $(LIB_FUZZING_ENGINE) -o $@ + +zstd_frame_info: $(FUZZ_HEADERS) $(FUZZ_DECOMPRESS_OBJ) d_fuzz_zstd_frame_info.o + $(CXX) $(FUZZ_TARGET_FLAGS) $(FUZZ_DECOMPRESS_OBJ) d_fuzz_zstd_frame_info.o $(LIB_FUZZING_ENGINE) -o $@ + +dictionary_loader: $(FUZZ_HEADERS) $(FUZZ_ROUND_TRIP_OBJ) rt_fuzz_dictionary_loader.o + $(CXX) $(FUZZ_TARGET_FLAGS) $(FUZZ_ROUND_TRIP_OBJ) rt_fuzz_dictionary_loader.o $(LIB_FUZZING_ENGINE) -o $@ + +libregression.a: $(FUZZ_HEADERS) $(PRGDIR)/util.h $(PRGDIR)/util.c d_fuzz_regression_driver.o + $(AR) $(FUZZ_ARFLAGS) $@ d_fuzz_regression_driver.o + +corpora/%_seed_corpus.zip: + @mkdir -p corpora + $(DOWNLOAD) $@ $(CORPORA_URL_PREFIX)$*_seed_corpus.zip + +corpora/%: corpora/%_seed_corpus.zip + unzip -q $^ -d $@ + +.PHONY: corpora +corpora: $(patsubst %,corpora/%,$(FUZZ_TARGETS)) + +.PHONY: seedcorpora +seedcorpora: $(patsubst %,corpora/%_seed_corpus.zip,$(FUZZ_TARGETS)) + +regressiontest: corpora + CC="$(CC)" CXX="$(CXX)" CFLAGS="$(CFLAGS)" CXXFLAGS="$(CXXFLAGS)" LDFLAGS="$(LDFLAGS)" $(PYTHON) ./fuzz.py build all + $(PYTHON) ./fuzz.py regression all + +clean: + @$(RM) *.a *.o + @$(RM) simple_round_trip stream_round_trip simple_decompress \ + stream_decompress block_decompress block_round_trip \ + simple_compress dictionary_round_trip dictionary_decompress \ + zstd_frame_info + +cleanall: + @$(RM) -r Fuzzer + @$(RM) -r corpora diff --git a/src/zstd/tests/fuzz/README.md b/src/zstd/tests/fuzz/README.md new file mode 100644 index 000000000..71afa4063 --- /dev/null +++ b/src/zstd/tests/fuzz/README.md @@ -0,0 +1,101 @@ +# Fuzzing + +Each fuzzing target can be built with multiple engines. +Zstd provides a fuzz corpus for each target that can be downloaded with +the command: + +``` +make corpora +``` + +It will download each corpus into `./corpora/TARGET`. + +## fuzz.py + +`fuzz.py` is a helper script for building and running fuzzers. +Run `./fuzz.py -h` for the commands and run `./fuzz.py COMMAND -h` for +command specific help. + +### Generating Data + +`fuzz.py` provides a utility to generate seed data for each fuzzer. + +``` +make -C ../tests decodecorpus +./fuzz.py gen TARGET +``` + +By default it outputs 100 samples, each at most 8KB into `corpora/TARGET-seed`, +but that can be configured with the `--number`, `--max-size-log` and `--seed` +flags. + +### Build +It respects the usual build environment variables `CC`, `CFLAGS`, etc. +The environment variables can be overridden with the corresponding flags +`--cc`, `--cflags`, etc. +The specific fuzzing engine is selected with `LIB_FUZZING_ENGINE` or +`--lib-fuzzing-engine`, the default is `libregression.a`. +Alternatively, you can use Clang's built in fuzzing engine with +`--enable-fuzzer`. +It has flags that can easily set up sanitizers `--enable-{a,ub,m}san`, and +coverage instrumentation `--enable-coverage`. +It sets sane defaults which can be overridden with flags `--debug`, +`--enable-ubsan-pointer-overflow`, etc. +Run `./fuzz.py build -h` for help. + +### Running Fuzzers + +`./fuzz.py` can run `libfuzzer`, `afl`, and `regression` tests. +See the help of the relevant command for options. +Flags not parsed by `fuzz.py` are passed to the fuzzing engine. +The command used to run the fuzzer is printed for debugging. + +## LibFuzzer + +``` +# Build the fuzz targets +./fuzz.py build all --enable-fuzzer --enable-asan --enable-ubsan --cc clang --cxx clang++ +# OR equivalently +CC=clang CXX=clang++ ./fuzz.py build all --enable-fuzzer --enable-asan --enable-ubsan +# Run the fuzzer +./fuzz.py libfuzzer TARGET <libfuzzer args like -jobs=4> +``` + +where `TARGET` could be `simple_decompress`, `stream_round_trip`, etc. + +### MSAN + +Fuzzing with `libFuzzer` and `MSAN` is as easy as: + +``` +CC=clang CXX=clang++ ./fuzz.py build all --enable-fuzzer --enable-msan +./fuzz.py libfuzzer TARGET <libfuzzer args> +``` + +`fuzz.py` respects the environment variables / flags `MSAN_EXTRA_CPPFLAGS`, +`MSAN_EXTRA_CFLAGS`, `MSAN_EXTRA_CXXFLAGS`, `MSAN_EXTRA_LDFLAGS` to easily pass +the extra parameters only for MSAN. + +## AFL + +The default `LIB_FUZZING_ENGINE` is `libregression.a`, which produces a binary +that AFL can use. + +``` +# Build the fuzz targets +CC=afl-clang CXX=afl-clang++ ./fuzz.py build all --enable-asan --enable-ubsan +# Run the fuzzer without a memory limit because of ASAN +./fuzz.py afl TARGET -m none +``` + +## Regression Testing + +The regression test supports the `all` target to run all the fuzzers in one +command. + +``` +CC=clang CXX=clang++ ./fuzz.py build all --enable-asan --enable-ubsan +./fuzz.py regression all +CC=clang CXX=clang++ ./fuzz.py build all --enable-msan +./fuzz.py regression all +``` diff --git a/src/zstd/tests/fuzz/block_decompress.c b/src/zstd/tests/fuzz/block_decompress.c new file mode 100644 index 000000000..64d70f005 --- /dev/null +++ b/src/zstd/tests/fuzz/block_decompress.c @@ -0,0 +1,49 @@ +/** + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/** + * This fuzz target attempts to decompress the fuzzed data with the simple + * decompression function to ensure the decompressor never crashes. + */ + +#define ZSTD_STATIC_LINKING_ONLY + +#include <stddef.h> +#include <stdlib.h> +#include <stdio.h> +#include "fuzz_helpers.h" +#include "zstd.h" + +static ZSTD_DCtx *dctx = NULL; +static void* rBuf = NULL; +static size_t bufSize = 0; + +int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) +{ + size_t const neededBufSize = ZSTD_BLOCKSIZE_MAX; + + /* Allocate all buffers and contexts if not already allocated */ + if (neededBufSize > bufSize) { + free(rBuf); + rBuf = FUZZ_malloc(neededBufSize); + bufSize = neededBufSize; + } + if (!dctx) { + dctx = ZSTD_createDCtx(); + FUZZ_ASSERT(dctx); + } + ZSTD_decompressBegin(dctx); + ZSTD_decompressBlock(dctx, rBuf, neededBufSize, src, size); + +#ifndef STATEFUL_FUZZING + ZSTD_freeDCtx(dctx); dctx = NULL; +#endif + return 0; +} diff --git a/src/zstd/tests/fuzz/block_round_trip.c b/src/zstd/tests/fuzz/block_round_trip.c new file mode 100644 index 000000000..097fc01b8 --- /dev/null +++ b/src/zstd/tests/fuzz/block_round_trip.c @@ -0,0 +1,99 @@ +/** + * Copyright (c) 2016-2020, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/** + * This fuzz target performs a zstd round-trip test (compress & decompress), + * compares the result with the original, and calls abort() on corruption. + */ + +#define ZSTD_STATIC_LINKING_ONLY + +#include <stddef.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include "fuzz_helpers.h" +#include "zstd.h" +#include "zstd_helpers.h" +#include "fuzz_data_producer.h" + +static ZSTD_CCtx *cctx = NULL; +static ZSTD_DCtx *dctx = NULL; +static void* cBuf = NULL; +static void* rBuf = NULL; +static size_t bufSize = 0; + +static size_t roundTripTest(void *result, size_t resultCapacity, + void *compressed, size_t compressedCapacity, + const void *src, size_t srcSize, + int cLevel) +{ + ZSTD_parameters const params = ZSTD_getParams(cLevel, srcSize, 0); + size_t ret = ZSTD_compressBegin_advanced(cctx, NULL, 0, params, srcSize); + FUZZ_ZASSERT(ret); + + ret = ZSTD_compressBlock(cctx, compressed, compressedCapacity, src, srcSize); + FUZZ_ZASSERT(ret); + if (ret == 0) { + FUZZ_ASSERT(resultCapacity >= srcSize); + if (srcSize > 0) { + memcpy(result, src, srcSize); + } + return srcSize; + } + ZSTD_decompressBegin(dctx); + return ZSTD_decompressBlock(dctx, result, resultCapacity, compressed, ret); +} + +int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) +{ + /* Give a random portion of src data to the producer, to use for + parameter generation. The rest will be used for (de)compression */ + FUZZ_dataProducer_t *producer = FUZZ_dataProducer_create(src, size); + size = FUZZ_dataProducer_reserveDataPrefix(producer); + + int const cLevel = FUZZ_dataProducer_int32Range(producer, kMinClevel, kMaxClevel); + + size_t neededBufSize = size; + if (size > ZSTD_BLOCKSIZE_MAX) + size = ZSTD_BLOCKSIZE_MAX; + + /* Allocate all buffers and contexts if not already allocated */ + if (neededBufSize > bufSize || !cBuf || !rBuf) { + free(cBuf); + free(rBuf); + cBuf = FUZZ_malloc(neededBufSize); + rBuf = FUZZ_malloc(neededBufSize); + bufSize = neededBufSize; + } + if (!cctx) { + cctx = ZSTD_createCCtx(); + FUZZ_ASSERT(cctx); + } + if (!dctx) { + dctx = ZSTD_createDCtx(); + FUZZ_ASSERT(dctx); + } + + { + size_t const result = + roundTripTest(rBuf, neededBufSize, cBuf, neededBufSize, src, size, + cLevel); + FUZZ_ZASSERT(result); + FUZZ_ASSERT_MSG(result == size, "Incorrect regenerated size"); + FUZZ_ASSERT_MSG(!FUZZ_memcmp(src, rBuf, size), "Corruption!"); + } + FUZZ_dataProducer_free(producer); +#ifndef STATEFUL_FUZZING + ZSTD_freeCCtx(cctx); cctx = NULL; + ZSTD_freeDCtx(dctx); dctx = NULL; +#endif + return 0; +} diff --git a/src/zstd/tests/fuzz/dictionary_decompress.c b/src/zstd/tests/fuzz/dictionary_decompress.c new file mode 100644 index 000000000..9944baa15 --- /dev/null +++ b/src/zstd/tests/fuzz/dictionary_decompress.c @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2016-2020, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/** + * This fuzz target attempts to decompress the fuzzed data with the dictionary + * decompression function to ensure the decompressor never crashes. It does not + * fuzz the dictionary. + */ + +#include <stddef.h> +#include <stdlib.h> +#include <stdio.h> +#include "fuzz_helpers.h" +#include "zstd_helpers.h" +#include "fuzz_data_producer.h" + +static ZSTD_DCtx *dctx = NULL; + +int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) +{ + /* Give a random portion of src data to the producer, to use for + parameter generation. The rest will be used for (de)compression */ + FUZZ_dataProducer_t *producer = FUZZ_dataProducer_create(src, size); + size = FUZZ_dataProducer_reserveDataPrefix(producer); + + FUZZ_dict_t dict; + ZSTD_DDict* ddict = NULL; + + if (!dctx) { + dctx = ZSTD_createDCtx(); + FUZZ_ASSERT(dctx); + } + dict = FUZZ_train(src, size, producer); + if (FUZZ_dataProducer_uint32Range(producer, 0, 1) == 0) { + ddict = ZSTD_createDDict(dict.buff, dict.size); + FUZZ_ASSERT(ddict); + } else { + if (FUZZ_dataProducer_uint32Range(producer, 0, 1) == 0) + FUZZ_ZASSERT(ZSTD_DCtx_loadDictionary_advanced( + dctx, dict.buff, dict.size, + (ZSTD_dictLoadMethod_e)FUZZ_dataProducer_uint32Range(producer, 0, 1), + (ZSTD_dictContentType_e)FUZZ_dataProducer_uint32Range(producer, 0, 2))); + else + FUZZ_ZASSERT(ZSTD_DCtx_refPrefix_advanced( + dctx, dict.buff, dict.size, + (ZSTD_dictContentType_e)FUZZ_dataProducer_uint32Range(producer, 0, 2))); + } + + { + size_t const bufSize = FUZZ_dataProducer_uint32Range(producer, 0, 10 * size); + void* rBuf = FUZZ_malloc(bufSize); + if (ddict) { + ZSTD_decompress_usingDDict(dctx, rBuf, bufSize, src, size, ddict); + } else { + ZSTD_decompressDCtx(dctx, rBuf, bufSize, src, size); + } + free(rBuf); + } + free(dict.buff); + FUZZ_dataProducer_free(producer); + ZSTD_freeDDict(ddict); +#ifndef STATEFUL_FUZZING + ZSTD_freeDCtx(dctx); dctx = NULL; +#endif + return 0; +} diff --git a/src/zstd/tests/fuzz/dictionary_loader.c b/src/zstd/tests/fuzz/dictionary_loader.c new file mode 100644 index 000000000..f1fdf4da9 --- /dev/null +++ b/src/zstd/tests/fuzz/dictionary_loader.c @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2016-2020, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/** + * This fuzz target makes sure that whenever a compression dictionary can be + * loaded, the data can be round tripped. + */ + +#include <stddef.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include "fuzz_helpers.h" +#include "zstd_helpers.h" +#include "fuzz_data_producer.h" + +/** + * Compresses the data and returns the compressed size or an error. + */ +static size_t compress(void* compressed, size_t compressedCapacity, + void const* source, size_t sourceSize, + void const* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType, + int const refPrefix) +{ + ZSTD_CCtx* cctx = ZSTD_createCCtx(); + if (refPrefix) + FUZZ_ZASSERT(ZSTD_CCtx_refPrefix_advanced( + cctx, dict, dictSize, dictContentType)); + else + FUZZ_ZASSERT(ZSTD_CCtx_loadDictionary_advanced( + cctx, dict, dictSize, dictLoadMethod, dictContentType)); + size_t const compressedSize = ZSTD_compress2( + cctx, compressed, compressedCapacity, source, sourceSize); + ZSTD_freeCCtx(cctx); + return compressedSize; +} + +static size_t decompress(void* result, size_t resultCapacity, + void const* compressed, size_t compressedSize, + void const* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType, + int const refPrefix) +{ + ZSTD_DCtx* dctx = ZSTD_createDCtx(); + if (refPrefix) + FUZZ_ZASSERT(ZSTD_DCtx_refPrefix_advanced( + dctx, dict, dictSize, dictContentType)); + else + FUZZ_ZASSERT(ZSTD_DCtx_loadDictionary_advanced( + dctx, dict, dictSize, dictLoadMethod, dictContentType)); + size_t const resultSize = ZSTD_decompressDCtx( + dctx, result, resultCapacity, compressed, compressedSize); + FUZZ_ZASSERT(resultSize); + ZSTD_freeDCtx(dctx); + return resultSize; +} + +int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) +{ + FUZZ_dataProducer_t *producer = FUZZ_dataProducer_create(src, size); + int const refPrefix = FUZZ_dataProducer_uint32Range(producer, 0, 1) != 0; + ZSTD_dictLoadMethod_e const dlm = + size = FUZZ_dataProducer_uint32Range(producer, 0, 1); + ZSTD_dictContentType_e const dct = + FUZZ_dataProducer_uint32Range(producer, 0, 2); + size = FUZZ_dataProducer_remainingBytes(producer); + + DEBUGLOG(2, "Dict load method %d", dlm); + DEBUGLOG(2, "Dict content type %d", dct); + DEBUGLOG(2, "Dict size %u", (unsigned)size); + + void* const rBuf = FUZZ_malloc(size); + size_t const cBufSize = ZSTD_compressBound(size); + void* const cBuf = FUZZ_malloc(cBufSize); + + size_t const cSize = + compress(cBuf, cBufSize, src, size, src, size, dlm, dct, refPrefix); + /* compression failing is okay */ + if (ZSTD_isError(cSize)) { + FUZZ_ASSERT_MSG(dct != ZSTD_dct_rawContent, "Raw must always succeed!"); + goto out; + } + size_t const rSize = + decompress(rBuf, size, cBuf, cSize, src, size, dlm, dct, refPrefix); + FUZZ_ASSERT_MSG(rSize == size, "Incorrect regenerated size"); + FUZZ_ASSERT_MSG(!FUZZ_memcmp(src, rBuf, size), "Corruption!"); + +out: + free(cBuf); + free(rBuf); + FUZZ_dataProducer_free(producer); + return 0; +} diff --git a/src/zstd/tests/fuzz/dictionary_round_trip.c b/src/zstd/tests/fuzz/dictionary_round_trip.c new file mode 100644 index 000000000..7b7771e48 --- /dev/null +++ b/src/zstd/tests/fuzz/dictionary_round_trip.c @@ -0,0 +1,121 @@ +/* + * Copyright (c) 2016-2020, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/** + * This fuzz target performs a zstd round-trip test (compress & decompress) with + * a dictionary, compares the result with the original, and calls abort() on + * corruption. + */ + +#include <stddef.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include "fuzz_helpers.h" +#include "zstd_helpers.h" +#include "fuzz_data_producer.h" + +static ZSTD_CCtx *cctx = NULL; +static ZSTD_DCtx *dctx = NULL; + +static size_t roundTripTest(void *result, size_t resultCapacity, + void *compressed, size_t compressedCapacity, + const void *src, size_t srcSize, + FUZZ_dataProducer_t *producer) +{ + ZSTD_dictContentType_e dictContentType = ZSTD_dct_auto; + FUZZ_dict_t dict = FUZZ_train(src, srcSize, producer); + int const refPrefix = FUZZ_dataProducer_uint32Range(producer, 0, 1) != 0; + size_t cSize; + if (FUZZ_dataProducer_uint32Range(producer, 0, 15) == 0) { + int const cLevel = FUZZ_dataProducer_int32Range(producer, kMinClevel, kMaxClevel); + + cSize = ZSTD_compress_usingDict(cctx, + compressed, compressedCapacity, + src, srcSize, + dict.buff, dict.size, + cLevel); + } else { + dictContentType = FUZZ_dataProducer_uint32Range(producer, 0, 2); + FUZZ_setRandomParameters(cctx, srcSize, producer); + /* Disable checksum so we can use sizes smaller than compress bound. */ + FUZZ_ZASSERT(ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, 0)); + if (refPrefix) + FUZZ_ZASSERT(ZSTD_CCtx_refPrefix_advanced( + cctx, dict.buff, dict.size, + dictContentType)); + else + FUZZ_ZASSERT(ZSTD_CCtx_loadDictionary_advanced( + cctx, dict.buff, dict.size, + (ZSTD_dictLoadMethod_e)FUZZ_dataProducer_uint32Range(producer, 0, 1), + dictContentType)); + cSize = ZSTD_compress2(cctx, compressed, compressedCapacity, src, srcSize); + } + FUZZ_ZASSERT(cSize); + if (refPrefix) + FUZZ_ZASSERT(ZSTD_DCtx_refPrefix_advanced( + dctx, dict.buff, dict.size, + dictContentType)); + else + FUZZ_ZASSERT(ZSTD_DCtx_loadDictionary_advanced( + dctx, dict.buff, dict.size, + (ZSTD_dictLoadMethod_e)FUZZ_dataProducer_uint32Range(producer, 0, 1), + dictContentType)); + { + size_t const ret = ZSTD_decompressDCtx( + dctx, result, resultCapacity, compressed, cSize); + free(dict.buff); + return ret; + } +} + +int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) +{ + /* Give a random portion of src data to the producer, to use for + parameter generation. The rest will be used for (de)compression */ + FUZZ_dataProducer_t *producer = FUZZ_dataProducer_create(src, size); + size = FUZZ_dataProducer_reserveDataPrefix(producer); + + size_t const rBufSize = size; + void* rBuf = FUZZ_malloc(rBufSize); + size_t cBufSize = ZSTD_compressBound(size); + void *cBuf; + /* Half of the time fuzz with a 1 byte smaller output size. + * This will still succeed because we force the checksum to be disabled, + * giving us 4 bytes of overhead. + */ + cBufSize -= FUZZ_dataProducer_uint32Range(producer, 0, 1); + cBuf = FUZZ_malloc(cBufSize); + + if (!cctx) { + cctx = ZSTD_createCCtx(); + FUZZ_ASSERT(cctx); + } + if (!dctx) { + dctx = ZSTD_createDCtx(); + FUZZ_ASSERT(dctx); + } + + { + size_t const result = + roundTripTest(rBuf, rBufSize, cBuf, cBufSize, src, size, producer); + FUZZ_ZASSERT(result); + FUZZ_ASSERT_MSG(result == size, "Incorrect regenerated size"); + FUZZ_ASSERT_MSG(!FUZZ_memcmp(src, rBuf, size), "Corruption!"); + } + free(rBuf); + free(cBuf); + FUZZ_dataProducer_free(producer); +#ifndef STATEFUL_FUZZING + ZSTD_freeCCtx(cctx); cctx = NULL; + ZSTD_freeDCtx(dctx); dctx = NULL; +#endif + return 0; +} diff --git a/src/zstd/tests/fuzz/dictionary_stream_round_trip.c b/src/zstd/tests/fuzz/dictionary_stream_round_trip.c new file mode 100644 index 000000000..67e8c69ef --- /dev/null +++ b/src/zstd/tests/fuzz/dictionary_stream_round_trip.c @@ -0,0 +1,206 @@ +/* + * Copyright (c) 2016-2020, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/** + * This fuzz target performs a zstd round-trip test (compress & decompress), + * compares the result with the original, and calls abort() on corruption. + */ + +#define ZSTD_STATIC_LINKING_ONLY + +#include <stddef.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include "fuzz_helpers.h" +#include "zstd_helpers.h" +#include "fuzz_data_producer.h" + +ZSTD_CCtx *cctx = NULL; +static ZSTD_DCtx *dctx = NULL; +static uint8_t* cBuf = NULL; +static uint8_t* rBuf = NULL; +static size_t bufSize = 0; + +static ZSTD_outBuffer makeOutBuffer(uint8_t *dst, size_t capacity, + FUZZ_dataProducer_t *producer) +{ + ZSTD_outBuffer buffer = { dst, 0, 0 }; + + FUZZ_ASSERT(capacity > 0); + buffer.size = (FUZZ_dataProducer_uint32Range(producer, 1, capacity)); + FUZZ_ASSERT(buffer.size <= capacity); + + return buffer; +} + +static ZSTD_inBuffer makeInBuffer(const uint8_t **src, size_t *size, + FUZZ_dataProducer_t *producer) +{ + ZSTD_inBuffer buffer = { *src, 0, 0 }; + + FUZZ_ASSERT(*size > 0); + buffer.size = (FUZZ_dataProducer_uint32Range(producer, 1, *size)); + FUZZ_ASSERT(buffer.size <= *size); + *src += buffer.size; + *size -= buffer.size; + + return buffer; +} + +static size_t compress(uint8_t *dst, size_t capacity, + const uint8_t *src, size_t srcSize, + const uint8_t* dict, size_t dictSize, + FUZZ_dataProducer_t *producer, int refPrefix, + ZSTD_dictContentType_e dictContentType) +{ + size_t dstSize = 0; + ZSTD_CCtx_reset(cctx, ZSTD_reset_session_only); + FUZZ_setRandomParameters(cctx, srcSize, producer); + + /* Disable checksum so we can use sizes smaller than compress bound. */ + FUZZ_ZASSERT(ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, 0)); + if (refPrefix) + FUZZ_ZASSERT(ZSTD_CCtx_refPrefix_advanced( + cctx, dict, dictSize, + dictContentType)); + else + FUZZ_ZASSERT(ZSTD_CCtx_loadDictionary_advanced( + cctx, dict, dictSize, + (ZSTD_dictLoadMethod_e)FUZZ_dataProducer_uint32Range(producer, 0, 1), + dictContentType)); + + while (srcSize > 0) { + ZSTD_inBuffer in = makeInBuffer(&src, &srcSize, producer); + /* Mode controls the action. If mode == -1 we pick a new mode */ + int mode = -1; + while (in.pos < in.size || mode != -1) { + ZSTD_outBuffer out = makeOutBuffer(dst, capacity, producer); + /* Previous action finished, pick a new mode. */ + if (mode == -1) mode = FUZZ_dataProducer_uint32Range(producer, 0, 9); + switch (mode) { + case 0: /* fall-through */ + case 1: /* fall-through */ + case 2: { + size_t const ret = + ZSTD_compressStream2(cctx, &out, &in, ZSTD_e_flush); + FUZZ_ZASSERT(ret); + if (ret == 0) + mode = -1; + break; + } + case 3: { + size_t ret = + ZSTD_compressStream2(cctx, &out, &in, ZSTD_e_end); + FUZZ_ZASSERT(ret); + /* Reset the compressor when the frame is finished */ + if (ret == 0) { + ZSTD_CCtx_reset(cctx, ZSTD_reset_session_only); + if (FUZZ_dataProducer_uint32Range(producer, 0, 7) == 0) { + size_t const remaining = in.size - in.pos; + FUZZ_setRandomParameters(cctx, remaining, producer); + } + mode = -1; + } + break; + } + case 4: { + ZSTD_inBuffer nullIn = { NULL, 0, 0 }; + ZSTD_outBuffer nullOut = { NULL, 0, 0 }; + size_t const ret = ZSTD_compressStream2(cctx, &nullOut, &nullIn, ZSTD_e_continue); + FUZZ_ZASSERT(ret); + } + /* fall-through */ + default: { + size_t const ret = + ZSTD_compressStream2(cctx, &out, &in, ZSTD_e_continue); + FUZZ_ZASSERT(ret); + mode = -1; + } + } + dst += out.pos; + dstSize += out.pos; + capacity -= out.pos; + } + } + for (;;) { + ZSTD_inBuffer in = {NULL, 0, 0}; + ZSTD_outBuffer out = makeOutBuffer(dst, capacity, producer); + size_t const ret = ZSTD_compressStream2(cctx, &out, &in, ZSTD_e_end); + FUZZ_ZASSERT(ret); + + dst += out.pos; + dstSize += out.pos; + capacity -= out.pos; + if (ret == 0) + break; + } + return dstSize; +} + +int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) +{ + size_t neededBufSize; + + /* Give a random portion of src data to the producer, to use for + parameter generation. The rest will be used for (de)compression */ + FUZZ_dataProducer_t *producer = FUZZ_dataProducer_create(src, size); + size = FUZZ_dataProducer_reserveDataPrefix(producer); + + neededBufSize = ZSTD_compressBound(size) * 15; + + /* Allocate all buffers and contexts if not already allocated */ + if (neededBufSize > bufSize) { + free(cBuf); + free(rBuf); + cBuf = (uint8_t*)FUZZ_malloc(neededBufSize); + rBuf = (uint8_t*)FUZZ_malloc(neededBufSize); + bufSize = neededBufSize; + } + if (!cctx) { + cctx = ZSTD_createCCtx(); + FUZZ_ASSERT(cctx); + } + if (!dctx) { + dctx = ZSTD_createDCtx(); + FUZZ_ASSERT(dctx); + } + + { + ZSTD_dictContentType_e dictContentType = FUZZ_dataProducer_uint32Range(producer, 0, 2); + FUZZ_dict_t dict = FUZZ_train(src, size, producer); + int const refPrefix = FUZZ_dataProducer_uint32Range(producer, 0, 1) != 0; + + size_t const cSize = compress(cBuf, neededBufSize, src, size, dict.buff, dict.size, producer, refPrefix, dictContentType); + + if (refPrefix) + FUZZ_ZASSERT(ZSTD_DCtx_refPrefix_advanced( + dctx, dict.buff, dict.size, + dictContentType)); + else + FUZZ_ZASSERT(ZSTD_DCtx_loadDictionary_advanced( + dctx, dict.buff, dict.size, + (ZSTD_dictLoadMethod_e)FUZZ_dataProducer_uint32Range(producer, 0, 1), + dictContentType)); + size_t const rSize = + ZSTD_decompressDCtx(dctx, rBuf, neededBufSize, cBuf, cSize); + FUZZ_ZASSERT(rSize); + FUZZ_ASSERT_MSG(rSize == size, "Incorrect regenerated size"); + FUZZ_ASSERT_MSG(!FUZZ_memcmp(src, rBuf, size), "Corruption!"); + free(dict.buff); + } + + FUZZ_dataProducer_free(producer); +#ifndef STATEFUL_FUZZING + ZSTD_freeCCtx(cctx); cctx = NULL; + ZSTD_freeDCtx(dctx); dctx = NULL; +#endif + return 0; +} diff --git a/src/zstd/tests/fuzz/fuzz.h b/src/zstd/tests/fuzz/fuzz.h new file mode 100644 index 000000000..8ee964536 --- /dev/null +++ b/src/zstd/tests/fuzz/fuzz.h @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2016-2020, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/** + * Fuzz target interface. + * Fuzz targets have some common parameters passed as macros during compilation. + * Check the documentation for each individual fuzzer for more parameters. + * + * @param STATEFUL_FUZZING: + * Define this to reuse state between fuzzer runs. This can be useful to + * test code paths which are only executed when contexts are reused. + * WARNING: Makes reproducing crashes much harder. + * Default: Not defined. + * @param DEBUGLEVEL: + * This is a parameter for the zstd library. Defining `DEBUGLEVEL=1` + * enables assert() statements in the zstd library. Higher levels enable + * logging, so aren't recommended. Defining `DEBUGLEVEL=1` is + * recommended. + * @param MEM_FORCE_MEMORY_ACCESS: + * This flag controls how the zstd library accesses unaligned memory. + * It can be undefined, or 0 through 2. If it is undefined, it selects + * the method to use based on the compiler. If testing with UBSAN set + * MEM_FORCE_MEMORY_ACCESS=0 to use the standard compliant method. + * @param FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + * This is the canonical flag to enable deterministic builds for fuzzing. + * Changes to zstd for fuzzing are gated behind this define. + * It is recommended to define this when building zstd for fuzzing. + */ + +#ifndef FUZZ_H +#define FUZZ_H + +#include <stddef.h> +#include <stdint.h> + +#ifdef __cplusplus +extern "C" { +#endif + +int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/zstd/tests/fuzz/fuzz.py b/src/zstd/tests/fuzz/fuzz.py new file mode 100755 index 000000000..6875d1d60 --- /dev/null +++ b/src/zstd/tests/fuzz/fuzz.py @@ -0,0 +1,887 @@ +#!/usr/bin/env python + +# ################################################################ +# Copyright (c) 2016-2020, Facebook, Inc. +# All rights reserved. +# +# This source code is licensed under both the BSD-style license (found in the +# LICENSE file in the root directory of this source tree) and the GPLv2 (found +# in the COPYING file in the root directory of this source tree). +# You may select, at your option, one of the above-listed licenses. +# ########################################################################## + +import argparse +import contextlib +import os +import re +import shlex +import shutil +import subprocess +import sys +import tempfile + + +def abs_join(a, *p): + return os.path.abspath(os.path.join(a, *p)) + + +class InputType(object): + RAW_DATA = 1 + COMPRESSED_DATA = 2 + DICTIONARY_DATA = 3 + + +class FrameType(object): + ZSTD = 1 + BLOCK = 2 + + +class TargetInfo(object): + def __init__(self, input_type, frame_type=FrameType.ZSTD): + self.input_type = input_type + self.frame_type = frame_type + + +# Constants +FUZZ_DIR = os.path.abspath(os.path.dirname(__file__)) +CORPORA_DIR = abs_join(FUZZ_DIR, 'corpora') +TARGET_INFO = { + 'simple_round_trip': TargetInfo(InputType.RAW_DATA), + 'stream_round_trip': TargetInfo(InputType.RAW_DATA), + 'block_round_trip': TargetInfo(InputType.RAW_DATA, FrameType.BLOCK), + 'simple_decompress': TargetInfo(InputType.COMPRESSED_DATA), + 'stream_decompress': TargetInfo(InputType.COMPRESSED_DATA), + 'block_decompress': TargetInfo(InputType.COMPRESSED_DATA, FrameType.BLOCK), + 'dictionary_round_trip': TargetInfo(InputType.RAW_DATA), + 'dictionary_decompress': TargetInfo(InputType.COMPRESSED_DATA), + 'zstd_frame_info': TargetInfo(InputType.COMPRESSED_DATA), + 'simple_compress': TargetInfo(InputType.RAW_DATA), + 'dictionary_loader': TargetInfo(InputType.DICTIONARY_DATA), + 'raw_dictionary_round_trip': TargetInfo(InputType.RAW_DATA), + 'dictionary_stream_round_trip': TargetInfo(InputType.RAW_DATA), +} +TARGETS = list(TARGET_INFO.keys()) +ALL_TARGETS = TARGETS + ['all'] +FUZZ_RNG_SEED_SIZE = 4 + +# Standard environment variables +CC = os.environ.get('CC', 'cc') +CXX = os.environ.get('CXX', 'c++') +CPPFLAGS = os.environ.get('CPPFLAGS', '') +CFLAGS = os.environ.get('CFLAGS', '-O3') +CXXFLAGS = os.environ.get('CXXFLAGS', CFLAGS) +LDFLAGS = os.environ.get('LDFLAGS', '') +MFLAGS = os.environ.get('MFLAGS', '-j') + +# Fuzzing environment variables +LIB_FUZZING_ENGINE = os.environ.get('LIB_FUZZING_ENGINE', 'libregression.a') +AFL_FUZZ = os.environ.get('AFL_FUZZ', 'afl-fuzz') +DECODECORPUS = os.environ.get('DECODECORPUS', + abs_join(FUZZ_DIR, '..', 'decodecorpus')) +ZSTD = os.environ.get('ZSTD', abs_join(FUZZ_DIR, '..', '..', 'zstd')) + +# Sanitizer environment variables +MSAN_EXTRA_CPPFLAGS = os.environ.get('MSAN_EXTRA_CPPFLAGS', '') +MSAN_EXTRA_CFLAGS = os.environ.get('MSAN_EXTRA_CFLAGS', '') +MSAN_EXTRA_CXXFLAGS = os.environ.get('MSAN_EXTRA_CXXFLAGS', '') +MSAN_EXTRA_LDFLAGS = os.environ.get('MSAN_EXTRA_LDFLAGS', '') + + +def create(r): + d = os.path.abspath(r) + if not os.path.isdir(d): + os.makedirs(d) + return d + + +def check(r): + d = os.path.abspath(r) + if not os.path.isdir(d): + return None + return d + + +@contextlib.contextmanager +def tmpdir(): + dirpath = tempfile.mkdtemp() + try: + yield dirpath + finally: + shutil.rmtree(dirpath, ignore_errors=True) + + +def parse_targets(in_targets): + targets = set() + for target in in_targets: + if not target: + continue + if target == 'all': + targets = targets.union(TARGETS) + elif target in TARGETS: + targets.add(target) + else: + raise RuntimeError('{} is not a valid target'.format(target)) + return list(targets) + + +def targets_parser(args, description): + parser = argparse.ArgumentParser(prog=args.pop(0), description=description) + parser.add_argument( + 'TARGET', + nargs='*', + type=str, + help='Fuzz target(s) to build {{{}}}'.format(', '.join(ALL_TARGETS))) + args, extra = parser.parse_known_args(args) + args.extra = extra + + args.TARGET = parse_targets(args.TARGET) + + return args + + +def parse_env_flags(args, flags): + """ + Look for flags set by environment variables. + """ + san_flags = ','.join(re.findall('-fsanitize=((?:[a-z]+,?)+)', flags)) + nosan_flags = ','.join(re.findall('-fno-sanitize=((?:[a-z]+,?)+)', flags)) + + def set_sanitizer(sanitizer, default, san, nosan): + if sanitizer in san and sanitizer in nosan: + raise RuntimeError('-fno-sanitize={s} and -fsanitize={s} passed'. + format(s=sanitizer)) + if sanitizer in san: + return True + if sanitizer in nosan: + return False + return default + + san = set(san_flags.split(',')) + nosan = set(nosan_flags.split(',')) + + args.asan = set_sanitizer('address', args.asan, san, nosan) + args.msan = set_sanitizer('memory', args.msan, san, nosan) + args.ubsan = set_sanitizer('undefined', args.ubsan, san, nosan) + + args.sanitize = args.asan or args.msan or args.ubsan + + return args + + +def compiler_version(cc, cxx): + """ + Determines the compiler and version. + Only works for clang and gcc. + """ + cc_version_bytes = subprocess.check_output([cc, "--version"]) + cxx_version_bytes = subprocess.check_output([cxx, "--version"]) + compiler = None + version = None + if b'clang' in cc_version_bytes: + assert(b'clang' in cxx_version_bytes) + compiler = 'clang' + elif b'gcc' in cc_version_bytes: + assert(b'gcc' in cxx_version_bytes or b'g++' in cxx_version_bytes) + compiler = 'gcc' + if compiler is not None: + version_regex = b'([0-9])+\.([0-9])+\.([0-9])+' + version_match = re.search(version_regex, cc_version_bytes) + version = tuple(int(version_match.group(i)) for i in range(1, 4)) + return compiler, version + + +def overflow_ubsan_flags(cc, cxx): + compiler, version = compiler_version(cc, cxx) + if compiler == 'gcc': + return ['-fno-sanitize=signed-integer-overflow'] + if compiler == 'clang' and version >= (5, 0, 0): + return ['-fno-sanitize=pointer-overflow'] + return [] + + +def build_parser(args): + description = """ + Cleans the repository and builds a fuzz target (or all). + Many flags default to environment variables (default says $X='y'). + Options that aren't enabling features default to the correct values for + zstd. + Enable sanitizers with --enable-*san. + For regression testing just build. + For libFuzzer set LIB_FUZZING_ENGINE and pass --enable-coverage. + For AFL set CC and CXX to AFL's compilers and set + LIB_FUZZING_ENGINE='libregression.a'. + """ + parser = argparse.ArgumentParser(prog=args.pop(0), description=description) + parser.add_argument( + '--lib-fuzzing-engine', + dest='lib_fuzzing_engine', + type=str, + default=LIB_FUZZING_ENGINE, + help=('The fuzzing engine to use e.g. /path/to/libFuzzer.a ' + "(default: $LIB_FUZZING_ENGINE='{})".format(LIB_FUZZING_ENGINE))) + + fuzz_group = parser.add_mutually_exclusive_group() + fuzz_group.add_argument( + '--enable-coverage', + dest='coverage', + action='store_true', + help='Enable coverage instrumentation (-fsanitize-coverage)') + fuzz_group.add_argument( + '--enable-fuzzer', + dest='fuzzer', + action='store_true', + help=('Enable clang fuzzer (-fsanitize=fuzzer). When enabled ' + 'LIB_FUZZING_ENGINE is ignored') + ) + + parser.add_argument( + '--enable-asan', dest='asan', action='store_true', help='Enable UBSAN') + parser.add_argument( + '--enable-ubsan', + dest='ubsan', + action='store_true', + help='Enable UBSAN') + parser.add_argument( + '--enable-ubsan-pointer-overflow', + dest='ubsan_pointer_overflow', + action='store_true', + help='Enable UBSAN pointer overflow check (known failure)') + parser.add_argument( + '--enable-msan', dest='msan', action='store_true', help='Enable MSAN') + parser.add_argument( + '--enable-msan-track-origins', dest='msan_track_origins', + action='store_true', help='Enable MSAN origin tracking') + parser.add_argument( + '--msan-extra-cppflags', + dest='msan_extra_cppflags', + type=str, + default=MSAN_EXTRA_CPPFLAGS, + help="Extra CPPFLAGS for MSAN (default: $MSAN_EXTRA_CPPFLAGS='{}')". + format(MSAN_EXTRA_CPPFLAGS)) + parser.add_argument( + '--msan-extra-cflags', + dest='msan_extra_cflags', + type=str, + default=MSAN_EXTRA_CFLAGS, + help="Extra CFLAGS for MSAN (default: $MSAN_EXTRA_CFLAGS='{}')".format( + MSAN_EXTRA_CFLAGS)) + parser.add_argument( + '--msan-extra-cxxflags', + dest='msan_extra_cxxflags', + type=str, + default=MSAN_EXTRA_CXXFLAGS, + help="Extra CXXFLAGS for MSAN (default: $MSAN_EXTRA_CXXFLAGS='{}')". + format(MSAN_EXTRA_CXXFLAGS)) + parser.add_argument( + '--msan-extra-ldflags', + dest='msan_extra_ldflags', + type=str, + default=MSAN_EXTRA_LDFLAGS, + help="Extra LDFLAGS for MSAN (default: $MSAN_EXTRA_LDFLAGS='{}')". + format(MSAN_EXTRA_LDFLAGS)) + parser.add_argument( + '--enable-sanitize-recover', + dest='sanitize_recover', + action='store_true', + help='Non-fatal sanitizer errors where possible') + parser.add_argument( + '--debug', + dest='debug', + type=int, + default=1, + help='Set DEBUGLEVEL (default: 1)') + parser.add_argument( + '--force-memory-access', + dest='memory_access', + type=int, + default=0, + help='Set MEM_FORCE_MEMORY_ACCESS (default: 0)') + parser.add_argument( + '--fuzz-rng-seed-size', + dest='fuzz_rng_seed_size', + type=int, + default=4, + help='Set FUZZ_RNG_SEED_SIZE (default: 4)') + parser.add_argument( + '--disable-fuzzing-mode', + dest='fuzzing_mode', + action='store_false', + help='Do not define FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION') + parser.add_argument( + '--enable-stateful-fuzzing', + dest='stateful_fuzzing', + action='store_true', + help='Reuse contexts between runs (makes reproduction impossible)') + parser.add_argument( + '--cc', + dest='cc', + type=str, + default=CC, + help="CC (default: $CC='{}')".format(CC)) + parser.add_argument( + '--cxx', + dest='cxx', + type=str, + default=CXX, + help="CXX (default: $CXX='{}')".format(CXX)) + parser.add_argument( + '--cppflags', + dest='cppflags', + type=str, + default=CPPFLAGS, + help="CPPFLAGS (default: $CPPFLAGS='{}')".format(CPPFLAGS)) + parser.add_argument( + '--cflags', + dest='cflags', + type=str, + default=CFLAGS, + help="CFLAGS (default: $CFLAGS='{}')".format(CFLAGS)) + parser.add_argument( + '--cxxflags', + dest='cxxflags', + type=str, + default=CXXFLAGS, + help="CXXFLAGS (default: $CXXFLAGS='{}')".format(CXXFLAGS)) + parser.add_argument( + '--ldflags', + dest='ldflags', + type=str, + default=LDFLAGS, + help="LDFLAGS (default: $LDFLAGS='{}')".format(LDFLAGS)) + parser.add_argument( + '--mflags', + dest='mflags', + type=str, + default=MFLAGS, + help="Extra Make flags (default: $MFLAGS='{}')".format(MFLAGS)) + parser.add_argument( + 'TARGET', + nargs='*', + type=str, + help='Fuzz target(s) to build {{{}}}'.format(', '.join(ALL_TARGETS)) + ) + args = parser.parse_args(args) + args = parse_env_flags(args, ' '.join( + [args.cppflags, args.cflags, args.cxxflags, args.ldflags])) + + # Check option sanity + if args.msan and (args.asan or args.ubsan): + raise RuntimeError('MSAN may not be used with any other sanitizers') + if args.msan_track_origins and not args.msan: + raise RuntimeError('--enable-msan-track-origins requires MSAN') + if args.ubsan_pointer_overflow and not args.ubsan: + raise RuntimeError('--enable-ubsan-pointer-overflow requires UBSAN') + if args.sanitize_recover and not args.sanitize: + raise RuntimeError('--enable-sanitize-recover but no sanitizers used') + + return args + + +def build(args): + try: + args = build_parser(args) + except Exception as e: + print(e) + return 1 + # The compilation flags we are setting + targets = args.TARGET + cc = args.cc + cxx = args.cxx + cppflags = shlex.split(args.cppflags) + cflags = shlex.split(args.cflags) + ldflags = shlex.split(args.ldflags) + cxxflags = shlex.split(args.cxxflags) + mflags = shlex.split(args.mflags) + # Flags to be added to both cflags and cxxflags + common_flags = [] + + cppflags += [ + '-DDEBUGLEVEL={}'.format(args.debug), + '-DMEM_FORCE_MEMORY_ACCESS={}'.format(args.memory_access), + '-DFUZZ_RNG_SEED_SIZE={}'.format(args.fuzz_rng_seed_size), + ] + + # Set flags for options + assert not (args.fuzzer and args.coverage) + if args.coverage: + common_flags += [ + '-fsanitize-coverage=trace-pc-guard,indirect-calls,trace-cmp' + ] + if args.fuzzer: + common_flags += ['-fsanitize=fuzzer'] + args.lib_fuzzing_engine = '' + + mflags += ['LIB_FUZZING_ENGINE={}'.format(args.lib_fuzzing_engine)] + + if args.sanitize_recover: + recover_flags = ['-fsanitize-recover=all'] + else: + recover_flags = ['-fno-sanitize-recover=all'] + if args.sanitize: + common_flags += recover_flags + + if args.msan: + msan_flags = ['-fsanitize=memory'] + if args.msan_track_origins: + msan_flags += ['-fsanitize-memory-track-origins'] + common_flags += msan_flags + # Append extra MSAN flags (it might require special setup) + cppflags += [args.msan_extra_cppflags] + cflags += [args.msan_extra_cflags] + cxxflags += [args.msan_extra_cxxflags] + ldflags += [args.msan_extra_ldflags] + + if args.asan: + common_flags += ['-fsanitize=address'] + + if args.ubsan: + ubsan_flags = ['-fsanitize=undefined'] + if not args.ubsan_pointer_overflow: + ubsan_flags += overflow_ubsan_flags(cc, cxx) + common_flags += ubsan_flags + + if args.stateful_fuzzing: + cppflags += ['-DSTATEFUL_FUZZING'] + + if args.fuzzing_mode: + cppflags += ['-DFUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION'] + + if args.lib_fuzzing_engine == 'libregression.a': + targets = ['libregression.a'] + targets + + # Append the common flags + cflags += common_flags + cxxflags += common_flags + + # Prepare the flags for Make + cc_str = "CC={}".format(cc) + cxx_str = "CXX={}".format(cxx) + cppflags_str = "CPPFLAGS={}".format(' '.join(cppflags)) + cflags_str = "CFLAGS={}".format(' '.join(cflags)) + cxxflags_str = "CXXFLAGS={}".format(' '.join(cxxflags)) + ldflags_str = "LDFLAGS={}".format(' '.join(ldflags)) + + # Print the flags + print('MFLAGS={}'.format(' '.join(mflags))) + print(cc_str) + print(cxx_str) + print(cppflags_str) + print(cflags_str) + print(cxxflags_str) + print(ldflags_str) + + # Clean and build + clean_cmd = ['make', 'clean'] + mflags + print(' '.join(clean_cmd)) + subprocess.check_call(clean_cmd) + build_cmd = [ + 'make', + cc_str, + cxx_str, + cppflags_str, + cflags_str, + cxxflags_str, + ldflags_str, + ] + mflags + targets + print(' '.join(build_cmd)) + subprocess.check_call(build_cmd) + return 0 + + +def libfuzzer_parser(args): + description = """ + Runs a libfuzzer binary. + Passes all extra arguments to libfuzzer. + The fuzzer should have been build with LIB_FUZZING_ENGINE pointing to + libFuzzer.a. + Generates output in the CORPORA directory, puts crashes in the ARTIFACT + directory, and takes extra input from the SEED directory. + To merge AFL's output pass the SEED as AFL's output directory and pass + '-merge=1'. + """ + parser = argparse.ArgumentParser(prog=args.pop(0), description=description) + parser.add_argument( + '--corpora', + type=str, + help='Override the default corpora dir (default: {})'.format( + abs_join(CORPORA_DIR, 'TARGET'))) + parser.add_argument( + '--artifact', + type=str, + help='Override the default artifact dir (default: {})'.format( + abs_join(CORPORA_DIR, 'TARGET-crash'))) + parser.add_argument( + '--seed', + type=str, + help='Override the default seed dir (default: {})'.format( + abs_join(CORPORA_DIR, 'TARGET-seed'))) + parser.add_argument( + 'TARGET', + type=str, + help='Fuzz target(s) to build {{{}}}'.format(', '.join(TARGETS))) + args, extra = parser.parse_known_args(args) + args.extra = extra + + if args.TARGET and args.TARGET not in TARGETS: + raise RuntimeError('{} is not a valid target'.format(args.TARGET)) + + return args + + +def libfuzzer(target, corpora=None, artifact=None, seed=None, extra_args=None): + if corpora is None: + corpora = abs_join(CORPORA_DIR, target) + if artifact is None: + artifact = abs_join(CORPORA_DIR, '{}-crash'.format(target)) + if seed is None: + seed = abs_join(CORPORA_DIR, '{}-seed'.format(target)) + if extra_args is None: + extra_args = [] + + target = abs_join(FUZZ_DIR, target) + + corpora = [create(corpora)] + artifact = create(artifact) + seed = check(seed) + + corpora += [artifact] + if seed is not None: + corpora += [seed] + + cmd = [target, '-artifact_prefix={}/'.format(artifact)] + cmd += corpora + extra_args + print(' '.join(cmd)) + subprocess.check_call(cmd) + + +def libfuzzer_cmd(args): + try: + args = libfuzzer_parser(args) + except Exception as e: + print(e) + return 1 + libfuzzer(args.TARGET, args.corpora, args.artifact, args.seed, args.extra) + return 0 + + +def afl_parser(args): + description = """ + Runs an afl-fuzz job. + Passes all extra arguments to afl-fuzz. + The fuzzer should have been built with CC/CXX set to the AFL compilers, + and with LIB_FUZZING_ENGINE='libregression.a'. + Takes input from CORPORA and writes output to OUTPUT. + Uses AFL_FUZZ as the binary (set from flag or environment variable). + """ + parser = argparse.ArgumentParser(prog=args.pop(0), description=description) + parser.add_argument( + '--corpora', + type=str, + help='Override the default corpora dir (default: {})'.format( + abs_join(CORPORA_DIR, 'TARGET'))) + parser.add_argument( + '--output', + type=str, + help='Override the default AFL output dir (default: {})'.format( + abs_join(CORPORA_DIR, 'TARGET-afl'))) + parser.add_argument( + '--afl-fuzz', + type=str, + default=AFL_FUZZ, + help='AFL_FUZZ (default: $AFL_FUZZ={})'.format(AFL_FUZZ)) + parser.add_argument( + 'TARGET', + type=str, + help='Fuzz target(s) to build {{{}}}'.format(', '.join(TARGETS))) + args, extra = parser.parse_known_args(args) + args.extra = extra + + if args.TARGET and args.TARGET not in TARGETS: + raise RuntimeError('{} is not a valid target'.format(args.TARGET)) + + if not args.corpora: + args.corpora = abs_join(CORPORA_DIR, args.TARGET) + if not args.output: + args.output = abs_join(CORPORA_DIR, '{}-afl'.format(args.TARGET)) + + return args + + +def afl(args): + try: + args = afl_parser(args) + except Exception as e: + print(e) + return 1 + target = abs_join(FUZZ_DIR, args.TARGET) + + corpora = create(args.corpora) + output = create(args.output) + + cmd = [args.afl_fuzz, '-i', corpora, '-o', output] + args.extra + cmd += [target, '@@'] + print(' '.join(cmd)) + subprocess.call(cmd) + return 0 + + +def regression(args): + try: + description = """ + Runs one or more regression tests. + The fuzzer should have been built with with + LIB_FUZZING_ENGINE='libregression.a'. + Takes input from CORPORA. + """ + args = targets_parser(args, description) + except Exception as e: + print(e) + return 1 + for target in args.TARGET: + corpora = create(abs_join(CORPORA_DIR, target)) + target = abs_join(FUZZ_DIR, target) + cmd = [target, corpora] + print(' '.join(cmd)) + subprocess.check_call(cmd) + return 0 + + +def gen_parser(args): + description = """ + Generate a seed corpus appropriate for TARGET with data generated with + decodecorpus. + The fuzz inputs are prepended with a seed before the zstd data, so the + output of decodecorpus shouldn't be used directly. + Generates NUMBER samples prepended with FUZZ_RNG_SEED_SIZE random bytes and + puts the output in SEED. + DECODECORPUS is the decodecorpus binary, and must already be built. + """ + parser = argparse.ArgumentParser(prog=args.pop(0), description=description) + parser.add_argument( + '--number', + '-n', + type=int, + default=100, + help='Number of samples to generate') + parser.add_argument( + '--max-size-log', + type=int, + default=18, + help='Maximum sample size to generate') + parser.add_argument( + '--seed', + type=str, + help='Override the default seed dir (default: {})'.format( + abs_join(CORPORA_DIR, 'TARGET-seed'))) + parser.add_argument( + '--decodecorpus', + type=str, + default=DECODECORPUS, + help="decodecorpus binary (default: $DECODECORPUS='{}')".format( + DECODECORPUS)) + parser.add_argument( + '--zstd', + type=str, + default=ZSTD, + help="zstd binary (default: $ZSTD='{}')".format(ZSTD)) + parser.add_argument( + '--fuzz-rng-seed-size', + type=int, + default=4, + help="FUZZ_RNG_SEED_SIZE used for generate the samples (must match)" + ) + parser.add_argument( + 'TARGET', + type=str, + help='Fuzz target(s) to build {{{}}}'.format(', '.join(TARGETS))) + args, extra = parser.parse_known_args(args) + args.extra = extra + + if args.TARGET and args.TARGET not in TARGETS: + raise RuntimeError('{} is not a valid target'.format(args.TARGET)) + + if not args.seed: + args.seed = abs_join(CORPORA_DIR, '{}-seed'.format(args.TARGET)) + + if not os.path.isfile(args.decodecorpus): + raise RuntimeError("{} is not a file run 'make -C {} decodecorpus'". + format(args.decodecorpus, abs_join(FUZZ_DIR, '..'))) + + return args + + +def gen(args): + try: + args = gen_parser(args) + except Exception as e: + print(e) + return 1 + + seed = create(args.seed) + with tmpdir() as compressed, tmpdir() as decompressed, tmpdir() as dict: + info = TARGET_INFO[args.TARGET] + + if info.input_type == InputType.DICTIONARY_DATA: + number = max(args.number, 1000) + else: + number = args.number + cmd = [ + args.decodecorpus, + '-n{}'.format(args.number), + '-p{}/'.format(compressed), + '-o{}'.format(decompressed), + ] + + if info.frame_type == FrameType.BLOCK: + cmd += [ + '--gen-blocks', + '--max-block-size-log={}'.format(min(args.max_size_log, 17)) + ] + else: + cmd += ['--max-content-size-log={}'.format(args.max_size_log)] + + print(' '.join(cmd)) + subprocess.check_call(cmd) + + if info.input_type == InputType.RAW_DATA: + print('using decompressed data in {}'.format(decompressed)) + samples = decompressed + elif info.input_type == InputType.COMPRESSED_DATA: + print('using compressed data in {}'.format(compressed)) + samples = compressed + else: + assert info.input_type == InputType.DICTIONARY_DATA + print('making dictionary data from {}'.format(decompressed)) + samples = dict + min_dict_size_log = 9 + max_dict_size_log = max(min_dict_size_log + 1, args.max_size_log) + for dict_size_log in range(min_dict_size_log, max_dict_size_log): + dict_size = 1 << dict_size_log + cmd = [ + args.zstd, + '--train', + '-r', decompressed, + '--maxdict={}'.format(dict_size), + '-o', abs_join(dict, '{}.zstd-dict'.format(dict_size)) + ] + print(' '.join(cmd)) + subprocess.check_call(cmd) + + # Copy the samples over and prepend the RNG seeds + for name in os.listdir(samples): + samplename = abs_join(samples, name) + outname = abs_join(seed, name) + with open(samplename, 'rb') as sample: + with open(outname, 'wb') as out: + CHUNK_SIZE = 131072 + chunk = sample.read(CHUNK_SIZE) + while len(chunk) > 0: + out.write(chunk) + chunk = sample.read(CHUNK_SIZE) + return 0 + + +def minimize(args): + try: + description = """ + Runs a libfuzzer fuzzer with -merge=1 to build a minimal corpus in + TARGET_seed_corpus. All extra args are passed to libfuzzer. + """ + args = targets_parser(args, description) + except Exception as e: + print(e) + return 1 + + for target in args.TARGET: + # Merge the corpus + anything else into the seed_corpus + corpus = abs_join(CORPORA_DIR, target) + seed_corpus = abs_join(CORPORA_DIR, "{}_seed_corpus".format(target)) + extra_args = [corpus, "-merge=1"] + args.extra + libfuzzer(target, corpora=seed_corpus, extra_args=extra_args) + seeds = set(os.listdir(seed_corpus)) + # Copy all crashes directly into the seed_corpus if not already present + crashes = abs_join(CORPORA_DIR, '{}-crash'.format(target)) + for crash in os.listdir(crashes): + if crash not in seeds: + shutil.copy(abs_join(crashes, crash), seed_corpus) + seeds.add(crash) + + +def zip_cmd(args): + try: + description = """ + Zips up the seed corpus. + """ + args = targets_parser(args, description) + except Exception as e: + print(e) + return 1 + + for target in args.TARGET: + # Zip the seed_corpus + seed_corpus = abs_join(CORPORA_DIR, "{}_seed_corpus".format(target)) + zip_file = "{}.zip".format(seed_corpus) + cmd = ["zip", "-r", "-q", "-j", "-9", zip_file, "."] + print(' '.join(cmd)) + subprocess.check_call(cmd, cwd=seed_corpus) + + +def list_cmd(args): + print("\n".join(TARGETS)) + + +def short_help(args): + name = args[0] + print("Usage: {} [OPTIONS] COMMAND [ARGS]...\n".format(name)) + + +def help(args): + short_help(args) + print("\tfuzzing helpers (select a command and pass -h for help)\n") + print("Options:") + print("\t-h, --help\tPrint this message") + print("") + print("Commands:") + print("\tbuild\t\tBuild a fuzzer") + print("\tlibfuzzer\tRun a libFuzzer fuzzer") + print("\tafl\t\tRun an AFL fuzzer") + print("\tregression\tRun a regression test") + print("\tgen\t\tGenerate a seed corpus for a fuzzer") + print("\tminimize\tMinimize the test corpora") + print("\tzip\t\tZip the minimized corpora up") + print("\tlist\t\tList the available targets") + + +def main(): + args = sys.argv + if len(args) < 2: + help(args) + return 1 + if args[1] == '-h' or args[1] == '--help' or args[1] == '-H': + help(args) + return 1 + command = args.pop(1) + args[0] = "{} {}".format(args[0], command) + if command == "build": + return build(args) + if command == "libfuzzer": + return libfuzzer_cmd(args) + if command == "regression": + return regression(args) + if command == "afl": + return afl(args) + if command == "gen": + return gen(args) + if command == "minimize": + return minimize(args) + if command == "zip": + return zip_cmd(args) + if command == "list": + return list_cmd(args) + short_help(args) + print("Error: No such command {} (pass -h for help)".format(command)) + return 1 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/src/zstd/tests/fuzz/fuzz_data_producer.c b/src/zstd/tests/fuzz/fuzz_data_producer.c new file mode 100644 index 000000000..6518af309 --- /dev/null +++ b/src/zstd/tests/fuzz/fuzz_data_producer.c @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2016-2020, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#include "fuzz_data_producer.h" + +struct FUZZ_dataProducer_s{ + const uint8_t *data; + size_t size; +}; + +FUZZ_dataProducer_t *FUZZ_dataProducer_create(const uint8_t *data, size_t size) { + FUZZ_dataProducer_t *producer = FUZZ_malloc(sizeof(FUZZ_dataProducer_t)); + + producer->data = data; + producer->size = size; + return producer; +} + +void FUZZ_dataProducer_free(FUZZ_dataProducer_t *producer) { free(producer); } + +uint32_t FUZZ_dataProducer_uint32Range(FUZZ_dataProducer_t *producer, uint32_t min, + uint32_t max) { + FUZZ_ASSERT(min <= max); + + uint32_t range = max - min; + uint32_t rolling = range; + uint32_t result = 0; + + while (rolling > 0 && producer->size > 0) { + uint8_t next = *(producer->data + producer->size - 1); + producer->size -= 1; + result = (result << 8) | next; + rolling >>= 8; + } + + if (range == 0xffffffff) { + return result; + } + + return min + result % (range + 1); +} + +uint32_t FUZZ_dataProducer_uint32(FUZZ_dataProducer_t *producer) { + return FUZZ_dataProducer_uint32Range(producer, 0, 0xffffffff); +} + +int32_t FUZZ_dataProducer_int32Range(FUZZ_dataProducer_t *producer, + int32_t min, int32_t max) +{ + FUZZ_ASSERT(min <= max); + + if (min < 0) + return (int)FUZZ_dataProducer_uint32Range(producer, 0, max - min) + min; + + return FUZZ_dataProducer_uint32Range(producer, min, max); +} + +size_t FUZZ_dataProducer_remainingBytes(FUZZ_dataProducer_t *producer){ + return producer->size; +} + +size_t FUZZ_dataProducer_contract(FUZZ_dataProducer_t *producer, size_t newSize) +{ + newSize = newSize > producer->size ? producer->size : newSize; + + size_t remaining = producer->size - newSize; + producer->data = producer->data + remaining; + producer->size = newSize; + return remaining; +} + +size_t FUZZ_dataProducer_reserveDataPrefix(FUZZ_dataProducer_t *producer) +{ + size_t producerSliceSize = FUZZ_dataProducer_uint32Range( + producer, 0, producer->size); + return FUZZ_dataProducer_contract(producer, producerSliceSize); +} diff --git a/src/zstd/tests/fuzz/fuzz_data_producer.h b/src/zstd/tests/fuzz/fuzz_data_producer.h new file mode 100644 index 000000000..41e0b52d5 --- /dev/null +++ b/src/zstd/tests/fuzz/fuzz_data_producer.h @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2016-2020, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/** + * Helper APIs for generating random data from input data stream. + The producer reads bytes from the end of the input and appends them together + to generate a random number in the requested range. If it runs out of input + data, it will keep returning the same value (min) over and over again. + + */ + +#ifndef FUZZ_DATA_PRODUCER_H +#define FUZZ_DATA_PRODUCER_H + +#include <stddef.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> + +#include "fuzz_helpers.h" + +/* Struct used for maintaining the state of the data */ +typedef struct FUZZ_dataProducer_s FUZZ_dataProducer_t; + +/* Returns a data producer state struct. Use for producer initialization. */ +FUZZ_dataProducer_t *FUZZ_dataProducer_create(const uint8_t *data, size_t size); + +/* Frees the data producer */ +void FUZZ_dataProducer_free(FUZZ_dataProducer_t *producer); + +/* Returns value between [min, max] */ +uint32_t FUZZ_dataProducer_uint32Range(FUZZ_dataProducer_t *producer, uint32_t min, + uint32_t max); + +/* Returns a uint32 value */ +uint32_t FUZZ_dataProducer_uint32(FUZZ_dataProducer_t *producer); + +/* Returns a signed value between [min, max] */ +int32_t FUZZ_dataProducer_int32Range(FUZZ_dataProducer_t *producer, + int32_t min, int32_t max); + +/* Returns the size of the remaining bytes of data in the producer */ +size_t FUZZ_dataProducer_remainingBytes(FUZZ_dataProducer_t *producer); + +/* Restricts the producer to only the last newSize bytes of data. +If newSize > current data size, nothing happens. Returns the number of bytes +the producer won't use anymore, after contracting. */ +size_t FUZZ_dataProducer_contract(FUZZ_dataProducer_t *producer, size_t newSize); + +/* Restricts the producer to use only the last X bytes of data, where X is + a random number in the interval [0, data_size]. Returns the size of the + remaining data the producer won't use anymore (the prefix). */ +size_t FUZZ_dataProducer_reserveDataPrefix(FUZZ_dataProducer_t *producer); +#endif // FUZZ_DATA_PRODUCER_H diff --git a/src/zstd/tests/fuzz/fuzz_helpers.c b/src/zstd/tests/fuzz/fuzz_helpers.c new file mode 100644 index 000000000..b80dc7571 --- /dev/null +++ b/src/zstd/tests/fuzz/fuzz_helpers.c @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2016-2020, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ +#include "fuzz_helpers.h" + +#include <stddef.h> +#include <stdlib.h> +#include <string.h> + +void* FUZZ_malloc(size_t size) +{ + if (size > 0) { + void* const mem = malloc(size); + FUZZ_ASSERT(mem); + return mem; + } + return NULL; +} + +int FUZZ_memcmp(void const* lhs, void const* rhs, size_t size) +{ + if (size == 0) { + return 0; + } + return memcmp(lhs, rhs, size); +}
\ No newline at end of file diff --git a/src/zstd/tests/fuzz/fuzz_helpers.h b/src/zstd/tests/fuzz/fuzz_helpers.h new file mode 100644 index 000000000..cde2c4ea7 --- /dev/null +++ b/src/zstd/tests/fuzz/fuzz_helpers.h @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2016-2020, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/** + * Helper functions for fuzzing. + */ + +#ifndef FUZZ_HELPERS_H +#define FUZZ_HELPERS_H + +#include "debug.h" +#include "fuzz.h" +#include "xxhash.h" +#include "zstd.h" +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> + +#ifdef __cplusplus +extern "C" { +#endif + +#define MIN(a, b) ((a) < (b) ? (a) : (b)) +#define MAX(a, b) ((a) > (b) ? (a) : (b)) + +#define FUZZ_QUOTE_IMPL(str) #str +#define FUZZ_QUOTE(str) FUZZ_QUOTE_IMPL(str) + +/** + * Asserts for fuzzing that are always enabled. + */ +#define FUZZ_ASSERT_MSG(cond, msg) \ + ((cond) ? (void)0 \ + : (fprintf(stderr, "%s: %u: Assertion: `%s' failed. %s\n", __FILE__, \ + __LINE__, FUZZ_QUOTE(cond), (msg)), \ + abort())) +#define FUZZ_ASSERT(cond) FUZZ_ASSERT_MSG((cond), ""); +#define FUZZ_ZASSERT(code) \ + FUZZ_ASSERT_MSG(!ZSTD_isError(code), ZSTD_getErrorName(code)) + +#if defined(__GNUC__) +#define FUZZ_STATIC static __inline __attribute__((unused)) +#elif defined(__cplusplus) || \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) +#define FUZZ_STATIC static inline +#elif defined(_MSC_VER) +#define FUZZ_STATIC static __inline +#else +#define FUZZ_STATIC static +#endif + +/** + * malloc except return NULL for zero sized data and FUZZ_ASSERT + * that malloc doesn't fail. + */ +void* FUZZ_malloc(size_t size); + +/** + * memcmp but accepts NULL. + */ +int FUZZ_memcmp(void const* lhs, void const* rhs, size_t size); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/zstd/tests/fuzz/raw_dictionary_round_trip.c b/src/zstd/tests/fuzz/raw_dictionary_round_trip.c new file mode 100644 index 000000000..08e5fd9ed --- /dev/null +++ b/src/zstd/tests/fuzz/raw_dictionary_round_trip.c @@ -0,0 +1,115 @@ +/* + * Copyright (c) 2016-2020, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/** + * This fuzz target performs a zstd round-trip test (compress & decompress) with + * a raw content dictionary, compares the result with the original, and calls + * abort() on corruption. + */ + +#include <stddef.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include "fuzz_helpers.h" +#include "zstd_helpers.h" +#include "fuzz_data_producer.h" + +static ZSTD_CCtx *cctx = NULL; +static ZSTD_DCtx *dctx = NULL; + +static size_t roundTripTest(void *result, size_t resultCapacity, + void *compressed, size_t compressedCapacity, + const void *src, size_t srcSize, + const void *dict, size_t dictSize, + FUZZ_dataProducer_t *producer) +{ + ZSTD_dictContentType_e const dictContentType = ZSTD_dct_rawContent; + int const refPrefix = FUZZ_dataProducer_uint32Range(producer, 0, 1) != 0; + size_t cSize; + + FUZZ_setRandomParameters(cctx, srcSize, producer); + /* Disable checksum so we can use sizes smaller than compress bound. */ + FUZZ_ZASSERT(ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, 0)); + if (refPrefix) + FUZZ_ZASSERT(ZSTD_CCtx_refPrefix_advanced( + cctx, dict, dictSize, + ZSTD_dct_rawContent)); + else + FUZZ_ZASSERT(ZSTD_CCtx_loadDictionary_advanced( + cctx, dict, dictSize, + (ZSTD_dictLoadMethod_e)FUZZ_dataProducer_uint32Range(producer, 0, 1), + ZSTD_dct_rawContent)); + cSize = ZSTD_compress2(cctx, compressed, compressedCapacity, src, srcSize); + FUZZ_ZASSERT(cSize); + + if (refPrefix) + FUZZ_ZASSERT(ZSTD_DCtx_refPrefix_advanced( + dctx, dict, dictSize, + dictContentType)); + else + FUZZ_ZASSERT(ZSTD_DCtx_loadDictionary_advanced( + dctx, dict, dictSize, + (ZSTD_dictLoadMethod_e)FUZZ_dataProducer_uint32Range(producer, 0, 1), + dictContentType)); + { + size_t const ret = ZSTD_decompressDCtx( + dctx, result, resultCapacity, compressed, cSize); + return ret; + } +} + +int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) +{ + /* Give a random portion of src data to the producer, to use for + parameter generation. The rest will be used for (de)compression */ + FUZZ_dataProducer_t *producer = FUZZ_dataProducer_create(src, size); + size = FUZZ_dataProducer_reserveDataPrefix(producer); + + uint8_t const* const srcBuf = src; + size_t const srcSize = FUZZ_dataProducer_uint32Range(producer, 0, size); + uint8_t const* const dictBuf = srcBuf + srcSize; + size_t const dictSize = size - srcSize; + size_t const decompSize = srcSize; + void* const decompBuf = FUZZ_malloc(decompSize); + size_t compSize = ZSTD_compressBound(srcSize); + void* compBuf; + /* Half of the time fuzz with a 1 byte smaller output size. + * This will still succeed because we force the checksum to be disabled, + * giving us 4 bytes of overhead. + */ + compSize -= FUZZ_dataProducer_uint32Range(producer, 0, 1); + compBuf = FUZZ_malloc(compSize); + + if (!cctx) { + cctx = ZSTD_createCCtx(); + FUZZ_ASSERT(cctx); + } + if (!dctx) { + dctx = ZSTD_createDCtx(); + FUZZ_ASSERT(dctx); + } + + { + size_t const result = + roundTripTest(decompBuf, decompSize, compBuf, compSize, srcBuf, srcSize, dictBuf, dictSize, producer); + FUZZ_ZASSERT(result); + FUZZ_ASSERT_MSG(result == srcSize, "Incorrect regenerated size"); + FUZZ_ASSERT_MSG(!FUZZ_memcmp(src, decompBuf, srcSize), "Corruption!"); + } + free(decompBuf); + free(compBuf); + FUZZ_dataProducer_free(producer); +#ifndef STATEFUL_FUZZING + ZSTD_freeCCtx(cctx); cctx = NULL; + ZSTD_freeDCtx(dctx); dctx = NULL; +#endif + return 0; +} diff --git a/src/zstd/tests/fuzz/regression_driver.c b/src/zstd/tests/fuzz/regression_driver.c new file mode 100644 index 000000000..8180ca822 --- /dev/null +++ b/src/zstd/tests/fuzz/regression_driver.c @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2016-2020, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#include "fuzz.h" +#include "fuzz_helpers.h" +#include "util.h" +#include <stddef.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> + +int main(int argc, char const **argv) { + size_t const kMaxFileSize = (size_t)1 << 27; + int const kFollowLinks = 1; + FileNamesTable* files; + const char** const fnTable = argv + 1; + uint8_t *buffer = NULL; + size_t bufferSize = 0; + unsigned i; + unsigned numFilesTested = 0; + int ret = 0; + + { + unsigned const numFiles = (unsigned)(argc - 1); +#ifdef UTIL_HAS_CREATEFILELIST + files = UTIL_createExpandedFNT(fnTable, numFiles, kFollowLinks); +#else + files = UTIL_createFNT_fromROTable(fnTable, numFiles); + assert(numFiles == files->tableSize); +#endif + } + if (!files) { + fprintf(stderr, "ERROR: Failed to create file names table\n"); + return 1; + } + if (files->tableSize == 0) + fprintf(stderr, "WARNING: No files passed to %s\n", argv[0]); + for (i = 0; i < files->tableSize; ++i) { + char const *fileName = files->fileNames[i]; + DEBUGLOG(3, "Running %s", fileName); + size_t const fileSize = UTIL_getFileSize(fileName); + size_t readSize; + FILE *file; + + /* Check that it is a regular file, and that the fileSize is valid. + * If it is not a regular file, then it may have been deleted since we + * constructed the list, so just skip it, but return an error exit code. + */ + if (!UTIL_isRegularFile(fileName)) { + ret = 1; + continue; + } + FUZZ_ASSERT_MSG(fileSize <= kMaxFileSize, fileName); + /* Ensure we have a large enough buffer allocated */ + if (fileSize > bufferSize) { + free(buffer); + buffer = (uint8_t *)malloc(fileSize); + FUZZ_ASSERT_MSG(buffer, fileName); + bufferSize = fileSize; + } + /* Open the file */ + file = fopen(fileName, "rb"); + FUZZ_ASSERT_MSG(file, fileName); + /* Read the file */ + readSize = fread(buffer, 1, fileSize, file); + FUZZ_ASSERT_MSG(readSize == fileSize, fileName); + /* Close the file */ + fclose(file); + /* Run the fuzz target */ + LLVMFuzzerTestOneInput(buffer, fileSize); + ++numFilesTested; + } + fprintf(stderr, "Tested %u files: ", numFilesTested); + if (ret == 0) { + fprintf(stderr, "Success!\n"); + } else { + fprintf(stderr, "Failure!\n"); + } + free(buffer); + UTIL_freeFileNamesTable(files); + return ret; +} diff --git a/src/zstd/tests/fuzz/simple_compress.c b/src/zstd/tests/fuzz/simple_compress.c new file mode 100644 index 000000000..b64f373eb --- /dev/null +++ b/src/zstd/tests/fuzz/simple_compress.c @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2016-2020, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/** + * This fuzz target attempts to comprss the fuzzed data with the simple + * compression function with an output buffer that may be too small to + * ensure that the compressor never crashes. + */ + +#include <stddef.h> +#include <stdlib.h> +#include <stdio.h> +#include "fuzz_helpers.h" +#include "zstd.h" +#include "zstd_helpers.h" +#include "fuzz_data_producer.h" + +static ZSTD_CCtx *cctx = NULL; + +int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) +{ + /* Give a random portion of src data to the producer, to use for + parameter generation. The rest will be used for (de)compression */ + FUZZ_dataProducer_t *producer = FUZZ_dataProducer_create(src, size); + size = FUZZ_dataProducer_reserveDataPrefix(producer); + + size_t const maxSize = ZSTD_compressBound(size); + size_t const bufSize = FUZZ_dataProducer_uint32Range(producer, 0, maxSize); + + int const cLevel = FUZZ_dataProducer_int32Range(producer, kMinClevel, kMaxClevel); + + if (!cctx) { + cctx = ZSTD_createCCtx(); + FUZZ_ASSERT(cctx); + } + + void *rBuf = FUZZ_malloc(bufSize); + ZSTD_compressCCtx(cctx, rBuf, bufSize, src, size, cLevel); + free(rBuf); + FUZZ_dataProducer_free(producer); +#ifndef STATEFUL_FUZZING + ZSTD_freeCCtx(cctx); cctx = NULL; +#endif + return 0; +} diff --git a/src/zstd/tests/fuzz/simple_decompress.c b/src/zstd/tests/fuzz/simple_decompress.c new file mode 100644 index 000000000..c3903ce8b --- /dev/null +++ b/src/zstd/tests/fuzz/simple_decompress.c @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2016-2020, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/** + * This fuzz target attempts to decompress the fuzzed data with the simple + * decompression function to ensure the decompressor never crashes. + */ + +#include <stddef.h> +#include <stdlib.h> +#include <stdio.h> +#include "fuzz_helpers.h" +#include "zstd.h" +#include "fuzz_data_producer.h" + +static ZSTD_DCtx *dctx = NULL; + +int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) +{ + /* Give a random portion of src data to the producer, to use for + parameter generation. The rest will be used for (de)compression */ + FUZZ_dataProducer_t *producer = FUZZ_dataProducer_create(src, size); + size = FUZZ_dataProducer_reserveDataPrefix(producer); + + if (!dctx) { + dctx = ZSTD_createDCtx(); + FUZZ_ASSERT(dctx); + } + + size_t const bufSize = FUZZ_dataProducer_uint32Range(producer, 0, 10 * size); + void *rBuf = FUZZ_malloc(bufSize); + + ZSTD_decompressDCtx(dctx, rBuf, bufSize, src, size); + free(rBuf); + + FUZZ_dataProducer_free(producer); + +#ifndef STATEFUL_FUZZING + ZSTD_freeDCtx(dctx); dctx = NULL; +#endif + return 0; +} diff --git a/src/zstd/tests/fuzz/simple_round_trip.c b/src/zstd/tests/fuzz/simple_round_trip.c new file mode 100644 index 000000000..2f008d06a --- /dev/null +++ b/src/zstd/tests/fuzz/simple_round_trip.c @@ -0,0 +1,114 @@ +/* + * Copyright (c) 2016-2020, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/** + * This fuzz target performs a zstd round-trip test (compress & decompress), + * compares the result with the original, and calls abort() on corruption. + */ + +#define ZSTD_STATIC_LINKING_ONLY + +#include <stddef.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include "fuzz_helpers.h" +#include "zstd_helpers.h" +#include "fuzz_data_producer.h" + +static ZSTD_CCtx *cctx = NULL; +static ZSTD_DCtx *dctx = NULL; + +static size_t roundTripTest(void *result, size_t resultCapacity, + void *compressed, size_t compressedCapacity, + const void *src, size_t srcSize, + FUZZ_dataProducer_t *producer) +{ + size_t cSize; + size_t dSize; + int targetCBlockSize = 0; + if (FUZZ_dataProducer_uint32Range(producer, 0, 1)) { + FUZZ_setRandomParameters(cctx, srcSize, producer); + cSize = ZSTD_compress2(cctx, compressed, compressedCapacity, src, srcSize); + FUZZ_ZASSERT(ZSTD_CCtx_getParameter(cctx, ZSTD_c_targetCBlockSize, &targetCBlockSize)); + } else { + int const cLevel = FUZZ_dataProducer_int32Range(producer, kMinClevel, kMaxClevel); + + cSize = ZSTD_compressCCtx( + cctx, compressed, compressedCapacity, src, srcSize, cLevel); + } + FUZZ_ZASSERT(cSize); + dSize = ZSTD_decompressDCtx(dctx, result, resultCapacity, compressed, cSize); + FUZZ_ZASSERT(dSize); + /* When superblock is enabled make sure we don't expand the block more than expected. */ + if (targetCBlockSize != 0) { + size_t normalCSize; + FUZZ_ZASSERT(ZSTD_CCtx_setParameter(cctx, ZSTD_c_targetCBlockSize, 0)); + normalCSize = ZSTD_compress2(cctx, compressed, compressedCapacity, src, srcSize); + FUZZ_ZASSERT(normalCSize); + { + size_t const bytesPerBlock = 3 /* block header */ + + 5 /* Literal header */ + + 6 /* Huffman jump table */ + + 3 /* number of sequences */ + + 1 /* symbol compression modes */; + size_t const expectedExpansion = bytesPerBlock * (1 + (normalCSize / MAX(1, targetCBlockSize))); + size_t const allowedExpansion = (srcSize >> 3) + 5 * expectedExpansion + 10; + FUZZ_ASSERT(cSize <= normalCSize + allowedExpansion); + } + } + return dSize; +} + +int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) +{ + size_t const rBufSize = size; + void* rBuf = FUZZ_malloc(rBufSize); + size_t cBufSize = ZSTD_compressBound(size); + void* cBuf; + + /* Give a random portion of src data to the producer, to use for + parameter generation. The rest will be used for (de)compression */ + FUZZ_dataProducer_t *producer = FUZZ_dataProducer_create(src, size); + size = FUZZ_dataProducer_reserveDataPrefix(producer); + + /* Half of the time fuzz with a 1 byte smaller output size. + * This will still succeed because we don't use a dictionary, so the dictID + * field is empty, giving us 4 bytes of overhead. + */ + cBufSize -= FUZZ_dataProducer_uint32Range(producer, 0, 1); + + cBuf = FUZZ_malloc(cBufSize); + + if (!cctx) { + cctx = ZSTD_createCCtx(); + FUZZ_ASSERT(cctx); + } + if (!dctx) { + dctx = ZSTD_createDCtx(); + FUZZ_ASSERT(dctx); + } + + { + size_t const result = + roundTripTest(rBuf, rBufSize, cBuf, cBufSize, src, size, producer); + FUZZ_ZASSERT(result); + FUZZ_ASSERT_MSG(result == size, "Incorrect regenerated size"); + FUZZ_ASSERT_MSG(!FUZZ_memcmp(src, rBuf, size), "Corruption!"); + } + free(rBuf); + free(cBuf); + FUZZ_dataProducer_free(producer); +#ifndef STATEFUL_FUZZING + ZSTD_freeCCtx(cctx); cctx = NULL; + ZSTD_freeDCtx(dctx); dctx = NULL; +#endif + return 0; +} diff --git a/src/zstd/tests/fuzz/stream_decompress.c b/src/zstd/tests/fuzz/stream_decompress.c new file mode 100644 index 000000000..25901b1eb --- /dev/null +++ b/src/zstd/tests/fuzz/stream_decompress.c @@ -0,0 +1,116 @@ +/* + * Copyright (c) 2016-2020, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/** + * This fuzz target attempts to decompress the fuzzed data with the simple + * decompression function to ensure the decompressor never crashes. + */ + +#define ZSTD_STATIC_LINKING_ONLY + +#include <stddef.h> +#include <stdlib.h> +#include <stdio.h> +#include "fuzz_helpers.h" +#include "zstd.h" +#include "fuzz_data_producer.h" + +static size_t const kBufSize = ZSTD_BLOCKSIZE_MAX; + +static ZSTD_DStream *dstream = NULL; +static void* buf = NULL; +uint32_t seed; + +static ZSTD_outBuffer makeOutBuffer(FUZZ_dataProducer_t *producer, uint32_t min) +{ + ZSTD_outBuffer buffer = { buf, 0, 0 }; + + buffer.size = (FUZZ_dataProducer_uint32Range(producer, min, kBufSize)); + FUZZ_ASSERT(buffer.size <= kBufSize); + + if (buffer.size == 0) { + buffer.dst = NULL; + } + + return buffer; +} + +static ZSTD_inBuffer makeInBuffer(const uint8_t **src, size_t *size, + FUZZ_dataProducer_t *producer, + uint32_t min) +{ + ZSTD_inBuffer buffer = { *src, 0, 0 }; + + FUZZ_ASSERT(*size > 0); + buffer.size = (FUZZ_dataProducer_uint32Range(producer, min, *size)); + FUZZ_ASSERT(buffer.size <= *size); + *src += buffer.size; + *size -= buffer.size; + + if (buffer.size == 0) { + buffer.src = NULL; + } + + return buffer; +} + +int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) +{ + /* Give a random portion of src data to the producer, to use for + parameter generation. The rest will be used for (de)compression */ + FUZZ_dataProducer_t *producer = FUZZ_dataProducer_create(src, size); + /* Guarantee forward progress by refusing to generate 2 zero sized + * buffers in a row. */ + int prevInWasZero = 0; + int prevOutWasZero = 0; + int stableOutBuffer; + ZSTD_outBuffer out; + size = FUZZ_dataProducer_reserveDataPrefix(producer); + + /* Allocate all buffers and contexts if not already allocated */ + if (!buf) { + buf = FUZZ_malloc(kBufSize); + } + + if (!dstream) { + dstream = ZSTD_createDStream(); + FUZZ_ASSERT(dstream); + } else { + FUZZ_ZASSERT(ZSTD_DCtx_reset(dstream, ZSTD_reset_session_only)); + } + + stableOutBuffer = FUZZ_dataProducer_uint32Range(producer, 0, 10) == 5; + if (stableOutBuffer) { + FUZZ_ZASSERT(ZSTD_DCtx_setParameter(dstream, ZSTD_d_stableOutBuffer, 1)); + out.dst = buf; + out.size = kBufSize; + out.pos = 0; + } + + while (size > 0) { + ZSTD_inBuffer in = makeInBuffer(&src, &size, producer, prevInWasZero ? 1 : 0); + prevInWasZero = in.size == 0; + while (in.pos != in.size) { + if (!stableOutBuffer || prevOutWasZero || FUZZ_dataProducer_uint32Range(producer, 0, 100) == 55) { + out = makeOutBuffer(producer, prevOutWasZero ? 1 : 0); + } + prevOutWasZero = out.size == 0; + size_t const rc = ZSTD_decompressStream(dstream, &out, &in); + if (ZSTD_isError(rc)) goto error; + } + } + +error: +#ifndef STATEFUL_FUZZING + ZSTD_freeDStream(dstream); dstream = NULL; +#endif + FUZZ_dataProducer_free(producer); + return 0; +} diff --git a/src/zstd/tests/fuzz/stream_round_trip.c b/src/zstd/tests/fuzz/stream_round_trip.c new file mode 100644 index 000000000..286d3871b --- /dev/null +++ b/src/zstd/tests/fuzz/stream_round_trip.c @@ -0,0 +1,177 @@ +/* + * Copyright (c) 2016-2020, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/** + * This fuzz target performs a zstd round-trip test (compress & decompress), + * compares the result with the original, and calls abort() on corruption. + */ + +#define ZSTD_STATIC_LINKING_ONLY + +#include <stddef.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include "fuzz_helpers.h" +#include "zstd_helpers.h" +#include "fuzz_data_producer.h" + +ZSTD_CCtx *cctx = NULL; +static ZSTD_DCtx *dctx = NULL; +static uint8_t* cBuf = NULL; +static uint8_t* rBuf = NULL; +static size_t bufSize = 0; + +static ZSTD_outBuffer makeOutBuffer(uint8_t *dst, size_t capacity, + FUZZ_dataProducer_t *producer) +{ + ZSTD_outBuffer buffer = { dst, 0, 0 }; + + FUZZ_ASSERT(capacity > 0); + buffer.size = (FUZZ_dataProducer_uint32Range(producer, 1, capacity)); + FUZZ_ASSERT(buffer.size <= capacity); + + return buffer; +} + +static ZSTD_inBuffer makeInBuffer(const uint8_t **src, size_t *size, + FUZZ_dataProducer_t *producer) +{ + ZSTD_inBuffer buffer = { *src, 0, 0 }; + + FUZZ_ASSERT(*size > 0); + buffer.size = (FUZZ_dataProducer_uint32Range(producer, 1, *size)); + FUZZ_ASSERT(buffer.size <= *size); + *src += buffer.size; + *size -= buffer.size; + + return buffer; +} + +static size_t compress(uint8_t *dst, size_t capacity, + const uint8_t *src, size_t srcSize, + FUZZ_dataProducer_t *producer) +{ + size_t dstSize = 0; + ZSTD_CCtx_reset(cctx, ZSTD_reset_session_only); + FUZZ_setRandomParameters(cctx, srcSize, producer); + + while (srcSize > 0) { + ZSTD_inBuffer in = makeInBuffer(&src, &srcSize, producer); + /* Mode controls the action. If mode == -1 we pick a new mode */ + int mode = -1; + while (in.pos < in.size || mode != -1) { + ZSTD_outBuffer out = makeOutBuffer(dst, capacity, producer); + /* Previous action finished, pick a new mode. */ + if (mode == -1) mode = FUZZ_dataProducer_uint32Range(producer, 0, 9); + switch (mode) { + case 0: /* fall-through */ + case 1: /* fall-through */ + case 2: { + size_t const ret = + ZSTD_compressStream2(cctx, &out, &in, ZSTD_e_flush); + FUZZ_ZASSERT(ret); + if (ret == 0) + mode = -1; + break; + } + case 3: { + size_t ret = + ZSTD_compressStream2(cctx, &out, &in, ZSTD_e_end); + FUZZ_ZASSERT(ret); + /* Reset the compressor when the frame is finished */ + if (ret == 0) { + ZSTD_CCtx_reset(cctx, ZSTD_reset_session_only); + if (FUZZ_dataProducer_uint32Range(producer, 0, 7) == 0) { + size_t const remaining = in.size - in.pos; + FUZZ_setRandomParameters(cctx, remaining, producer); + } + mode = -1; + } + break; + } + case 4: { + ZSTD_inBuffer nullIn = { NULL, 0, 0 }; + ZSTD_outBuffer nullOut = { NULL, 0, 0 }; + size_t const ret = ZSTD_compressStream2(cctx, &nullOut, &nullIn, ZSTD_e_continue); + FUZZ_ZASSERT(ret); + } + /* fall-through */ + default: { + size_t const ret = + ZSTD_compressStream2(cctx, &out, &in, ZSTD_e_continue); + FUZZ_ZASSERT(ret); + mode = -1; + } + } + dst += out.pos; + dstSize += out.pos; + capacity -= out.pos; + } + } + for (;;) { + ZSTD_inBuffer in = {NULL, 0, 0}; + ZSTD_outBuffer out = makeOutBuffer(dst, capacity, producer); + size_t const ret = ZSTD_compressStream2(cctx, &out, &in, ZSTD_e_end); + FUZZ_ZASSERT(ret); + + dst += out.pos; + dstSize += out.pos; + capacity -= out.pos; + if (ret == 0) + break; + } + return dstSize; +} + +int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) +{ + size_t neededBufSize; + + /* Give a random portion of src data to the producer, to use for + parameter generation. The rest will be used for (de)compression */ + FUZZ_dataProducer_t *producer = FUZZ_dataProducer_create(src, size); + size = FUZZ_dataProducer_reserveDataPrefix(producer); + + neededBufSize = ZSTD_compressBound(size) * 15; + + /* Allocate all buffers and contexts if not already allocated */ + if (neededBufSize > bufSize) { + free(cBuf); + free(rBuf); + cBuf = (uint8_t*)FUZZ_malloc(neededBufSize); + rBuf = (uint8_t*)FUZZ_malloc(neededBufSize); + bufSize = neededBufSize; + } + if (!cctx) { + cctx = ZSTD_createCCtx(); + FUZZ_ASSERT(cctx); + } + if (!dctx) { + dctx = ZSTD_createDCtx(); + FUZZ_ASSERT(dctx); + } + + { + size_t const cSize = compress(cBuf, neededBufSize, src, size, producer); + size_t const rSize = + ZSTD_decompressDCtx(dctx, rBuf, neededBufSize, cBuf, cSize); + FUZZ_ZASSERT(rSize); + FUZZ_ASSERT_MSG(rSize == size, "Incorrect regenerated size"); + FUZZ_ASSERT_MSG(!FUZZ_memcmp(src, rBuf, size), "Corruption!"); + } + + FUZZ_dataProducer_free(producer); +#ifndef STATEFUL_FUZZING + ZSTD_freeCCtx(cctx); cctx = NULL; + ZSTD_freeDCtx(dctx); dctx = NULL; +#endif + return 0; +} diff --git a/src/zstd/tests/fuzz/zstd_frame_info.c b/src/zstd/tests/fuzz/zstd_frame_info.c new file mode 100644 index 000000000..876a74e9a --- /dev/null +++ b/src/zstd/tests/fuzz/zstd_frame_info.c @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2016-2020, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/** + * This fuzz target fuzzes all of the helper functions that consume compressed + * input. + */ + +#include <stddef.h> +#include <stdlib.h> +#include <stdio.h> +#include "fuzz_helpers.h" +#include "zstd_helpers.h" + +int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) +{ + ZSTD_frameHeader zfh; + if (size == 0) { + src = NULL; + } + /* You can fuzz any helper functions here that are fast, and take zstd + * compressed data as input. E.g. don't expect the input to be a dictionary, + * so don't fuzz ZSTD_getDictID_fromDict(). + */ + ZSTD_getFrameContentSize(src, size); + ZSTD_getDecompressedSize(src, size); + ZSTD_findFrameCompressedSize(src, size); + ZSTD_getDictID_fromFrame(src, size); + ZSTD_findDecompressedSize(src, size); + ZSTD_decompressBound(src, size); + ZSTD_frameHeaderSize(src, size); + ZSTD_isFrame(src, size); + ZSTD_getFrameHeader(&zfh, src, size); + ZSTD_getFrameHeader_advanced(&zfh, src, size, ZSTD_f_zstd1); + return 0; +} diff --git a/src/zstd/tests/fuzz/zstd_helpers.c b/src/zstd/tests/fuzz/zstd_helpers.c new file mode 100644 index 000000000..5680bd628 --- /dev/null +++ b/src/zstd/tests/fuzz/zstd_helpers.c @@ -0,0 +1,145 @@ +/* + * Copyright (c) 2016-2020, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#define ZSTD_STATIC_LINKING_ONLY +#define ZDICT_STATIC_LINKING_ONLY + +#include <string.h> + +#include "zstd_helpers.h" +#include "fuzz_helpers.h" +#include "zstd.h" +#include "zdict.h" + +const int kMinClevel = -3; +const int kMaxClevel = 19; + +static void set(ZSTD_CCtx *cctx, ZSTD_cParameter param, int value) +{ + FUZZ_ZASSERT(ZSTD_CCtx_setParameter(cctx, param, value)); +} + +static void setRand(ZSTD_CCtx *cctx, ZSTD_cParameter param, unsigned min, + unsigned max, FUZZ_dataProducer_t *producer) { + unsigned const value = FUZZ_dataProducer_uint32Range(producer, min, max); + set(cctx, param, value); +} + +ZSTD_compressionParameters FUZZ_randomCParams(size_t srcSize, FUZZ_dataProducer_t *producer) +{ + /* Select compression parameters */ + ZSTD_compressionParameters cParams; + cParams.windowLog = FUZZ_dataProducer_uint32Range(producer, ZSTD_WINDOWLOG_MIN, 15); + cParams.hashLog = FUZZ_dataProducer_uint32Range(producer, ZSTD_HASHLOG_MIN, 15); + cParams.chainLog = FUZZ_dataProducer_uint32Range(producer, ZSTD_CHAINLOG_MIN, 16); + cParams.searchLog = FUZZ_dataProducer_uint32Range(producer, ZSTD_SEARCHLOG_MIN, 9); + cParams.minMatch = FUZZ_dataProducer_uint32Range(producer, ZSTD_MINMATCH_MIN, + ZSTD_MINMATCH_MAX); + cParams.targetLength = FUZZ_dataProducer_uint32Range(producer, 0, 512); + cParams.strategy = FUZZ_dataProducer_uint32Range(producer, ZSTD_STRATEGY_MIN, ZSTD_STRATEGY_MAX); + return ZSTD_adjustCParams(cParams, srcSize, 0); +} + +ZSTD_frameParameters FUZZ_randomFParams(FUZZ_dataProducer_t *producer) +{ + /* Select frame parameters */ + ZSTD_frameParameters fParams; + fParams.contentSizeFlag = FUZZ_dataProducer_uint32Range(producer, 0, 1); + fParams.checksumFlag = FUZZ_dataProducer_uint32Range(producer, 0, 1); + fParams.noDictIDFlag = FUZZ_dataProducer_uint32Range(producer, 0, 1); + return fParams; +} + +ZSTD_parameters FUZZ_randomParams(size_t srcSize, FUZZ_dataProducer_t *producer) +{ + ZSTD_parameters params; + params.cParams = FUZZ_randomCParams(srcSize, producer); + params.fParams = FUZZ_randomFParams(producer); + return params; +} + +void FUZZ_setRandomParameters(ZSTD_CCtx *cctx, size_t srcSize, FUZZ_dataProducer_t *producer) +{ + ZSTD_compressionParameters cParams = FUZZ_randomCParams(srcSize, producer); + set(cctx, ZSTD_c_windowLog, cParams.windowLog); + set(cctx, ZSTD_c_hashLog, cParams.hashLog); + set(cctx, ZSTD_c_chainLog, cParams.chainLog); + set(cctx, ZSTD_c_searchLog, cParams.searchLog); + set(cctx, ZSTD_c_minMatch, cParams.minMatch); + set(cctx, ZSTD_c_targetLength, cParams.targetLength); + set(cctx, ZSTD_c_strategy, cParams.strategy); + /* Select frame parameters */ + setRand(cctx, ZSTD_c_contentSizeFlag, 0, 1, producer); + setRand(cctx, ZSTD_c_checksumFlag, 0, 1, producer); + setRand(cctx, ZSTD_c_dictIDFlag, 0, 1, producer); + /* Select long distance matching parameters */ + setRand(cctx, ZSTD_c_enableLongDistanceMatching, 0, 1, producer); + setRand(cctx, ZSTD_c_ldmHashLog, ZSTD_HASHLOG_MIN, 16, producer); + setRand(cctx, ZSTD_c_ldmMinMatch, ZSTD_LDM_MINMATCH_MIN, + ZSTD_LDM_MINMATCH_MAX, producer); + setRand(cctx, ZSTD_c_ldmBucketSizeLog, 0, ZSTD_LDM_BUCKETSIZELOG_MAX, + producer); + setRand(cctx, ZSTD_c_ldmHashRateLog, ZSTD_LDM_HASHRATELOG_MIN, + ZSTD_LDM_HASHRATELOG_MAX, producer); + /* Set misc parameters */ + setRand(cctx, ZSTD_c_nbWorkers, 0, 2, producer); + setRand(cctx, ZSTD_c_rsyncable, 0, 1, producer); + setRand(cctx, ZSTD_c_forceMaxWindow, 0, 1, producer); + setRand(cctx, ZSTD_c_literalCompressionMode, 0, 2, producer); + setRand(cctx, ZSTD_c_forceAttachDict, 0, 2, producer); + if (FUZZ_dataProducer_uint32Range(producer, 0, 1) == 0) { + setRand(cctx, ZSTD_c_srcSizeHint, ZSTD_SRCSIZEHINT_MIN, 2 * srcSize, producer); + } + if (FUZZ_dataProducer_uint32Range(producer, 0, 1) == 0) { + setRand(cctx, ZSTD_c_targetCBlockSize, ZSTD_TARGETCBLOCKSIZE_MIN, ZSTD_TARGETCBLOCKSIZE_MAX, producer); + } +} + +FUZZ_dict_t FUZZ_train(void const* src, size_t srcSize, FUZZ_dataProducer_t *producer) +{ + size_t const dictSize = MAX(srcSize / 8, 1024); + size_t const totalSampleSize = dictSize * 11; + FUZZ_dict_t dict = { FUZZ_malloc(dictSize), dictSize }; + char* const samples = (char*)FUZZ_malloc(totalSampleSize); + unsigned nbSamples = 100; + size_t* const samplesSizes = (size_t*)FUZZ_malloc(sizeof(size_t) * nbSamples); + size_t pos = 0; + size_t sample = 0; + ZDICT_fastCover_params_t params; + + for (sample = 0; sample < nbSamples; ++sample) { + size_t const remaining = totalSampleSize - pos; + size_t const offset = FUZZ_dataProducer_uint32Range(producer, 0, MAX(srcSize, 1) - 1); + size_t const limit = MIN(srcSize - offset, remaining); + size_t const toCopy = MIN(limit, remaining / (nbSamples - sample)); + memcpy(samples + pos, src + offset, toCopy); + pos += toCopy; + samplesSizes[sample] = toCopy; + } + memset(samples + pos, 0, totalSampleSize - pos); + + memset(¶ms, 0, sizeof(params)); + params.accel = 5; + params.k = 40; + params.d = 8; + params.f = 14; + params.zParams.compressionLevel = 1; + dict.size = ZDICT_trainFromBuffer_fastCover(dict.buff, dictSize, + samples, samplesSizes, nbSamples, params); + if (ZSTD_isError(dict.size)) { + free(dict.buff); + memset(&dict, 0, sizeof(dict)); + } + + free(samplesSizes); + free(samples); + + return dict; +} diff --git a/src/zstd/tests/fuzz/zstd_helpers.h b/src/zstd/tests/fuzz/zstd_helpers.h new file mode 100644 index 000000000..6a4e340d3 --- /dev/null +++ b/src/zstd/tests/fuzz/zstd_helpers.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2016-2020, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ +/** + * Helper functions for fuzzing. + */ + +#ifndef ZSTD_HELPERS_H +#define ZSTD_HELPERS_H + +#define ZSTD_STATIC_LINKING_ONLY + +#include "zstd.h" +#include "fuzz_data_producer.h" +#include <stdint.h> + +#ifdef __cplusplus +extern "C" { +#endif + +extern const int kMinClevel; +extern const int kMaxClevel; + +void FUZZ_setRandomParameters(ZSTD_CCtx *cctx, size_t srcSize, FUZZ_dataProducer_t *producer); + +ZSTD_compressionParameters FUZZ_randomCParams(size_t srcSize, FUZZ_dataProducer_t *producer); +ZSTD_frameParameters FUZZ_randomFParams(FUZZ_dataProducer_t *producer); +ZSTD_parameters FUZZ_randomParams(size_t srcSize, FUZZ_dataProducer_t *producer); + +typedef struct { + void* buff; + size_t size; +} FUZZ_dict_t; + +/* Quickly train a dictionary from a source for fuzzing. + * NOTE: Don't use this to train production dictionaries, it is only optimized + * for speed, and doesn't care about dictionary quality. + */ +FUZZ_dict_t FUZZ_train(void const* src, size_t srcSize, FUZZ_dataProducer_t *producer); + +#ifdef __cplusplus +} +#endif + +#endif /* ZSTD_HELPERS_H */ |