diff options
Diffstat (limited to 'src/zstd/tests/regression')
-rw-r--r-- | src/zstd/tests/regression/.gitignore | 3 | ||||
-rw-r--r-- | src/zstd/tests/regression/Makefile | 59 | ||||
-rw-r--r-- | src/zstd/tests/regression/config.c | 278 | ||||
-rw-r--r-- | src/zstd/tests/regression/config.h | 86 | ||||
-rw-r--r-- | src/zstd/tests/regression/data.c | 613 | ||||
-rw-r--r-- | src/zstd/tests/regression/data.h | 121 | ||||
-rw-r--r-- | src/zstd/tests/regression/levels.h | 44 | ||||
-rw-r--r-- | src/zstd/tests/regression/method.c | 688 | ||||
-rw-r--r-- | src/zstd/tests/regression/method.h | 65 | ||||
-rw-r--r-- | src/zstd/tests/regression/result.c | 28 | ||||
-rw-r--r-- | src/zstd/tests/regression/result.h | 103 | ||||
-rw-r--r-- | src/zstd/tests/regression/results.csv | 636 | ||||
-rw-r--r-- | src/zstd/tests/regression/test.c | 362 |
13 files changed, 3086 insertions, 0 deletions
diff --git a/src/zstd/tests/regression/.gitignore b/src/zstd/tests/regression/.gitignore new file mode 100644 index 000000000..1b2618f41 --- /dev/null +++ b/src/zstd/tests/regression/.gitignore @@ -0,0 +1,3 @@ +# regression test artifacts +data-cache +test diff --git a/src/zstd/tests/regression/Makefile b/src/zstd/tests/regression/Makefile new file mode 100644 index 000000000..87c1c2b96 --- /dev/null +++ b/src/zstd/tests/regression/Makefile @@ -0,0 +1,59 @@ +# ################################################################ +# Copyright (c) 2015-2020, Facebook, Inc. +# All rights reserved. +# +# This source code is licensed under both the BSD-style license (found in the +# LICENSE file in the root directory of this source tree) and the GPLv2 (found +# in the COPYING file in the root directory of this source tree). +# You may select, at your option, one of the above-listed licenses. +# ################################################################ + +CFLAGS ?= -O3 + +CURL_CFLAGS := $(shell curl-config --cflags) +CURL_LDFLAGS := $(shell curl-config --libs) -pthread + +PROGDIR := ../../programs +LIBDIR := ../../lib +ZSTD_CPPFLAGS := -I$(PROGDIR) -I$(LIBDIR) -I$(LIBDIR)/common + +REGRESSION_CFLAGS = $(CFLAGS) $(CURL_CFLAGS) +REGRESSION_CPPFLAGS = $(CPPFLAGS) $(ZSTD_CPPFLAGS) +REGRESSION_LDFLAGS = $(LDFLAGS) $(CURL_LDFLAGS) + +all: test + +xxhash.o: $(LIBDIR)/common/xxhash.c $(LIBDIR)/common/xxhash.h + $(CC) $(REGRESSION_CFLAGS) $(REGRESSION_CPPFLAGS) $< -c -o $@ + +util.o: $(PROGDIR)/util.c $(PROGDIR)/util.h + $(CC) $(REGRESSION_CFLAGS) $(REGRESSION_CPPFLAGS) $< -c -o $@ + +data.o: data.c data.h $(PROGDIR)/util.h $(LIBDIR)/common/xxhash.h + $(CC) $(REGRESSION_CFLAGS) $(REGRESSION_CPPFLAGS) $< -c -o $@ + +config.o: config.c config.h levels.h + $(CC) $(REGRESSION_CFLAGS) $(REGRESSION_CPPFLAGS) $< -c -o $@ + +method.h: data.h config.h result.h + +method.o: method.c method.h + $(CC) $(REGRESSION_CFLAGS) $(REGRESSION_CPPFLAGS) $< -c -o $@ + +result.o: result.c result.h + $(CC) $(REGRESSION_CFLAGS) $(REGRESSION_CPPFLAGS) $< -c -o $@ + +test.o: test.c data.h config.h method.h + $(CC) $(REGRESSION_CFLAGS) $(REGRESSION_CPPFLAGS) $< -c -o $@ + +libzstd.a: + $(MAKE) -C $(LIBDIR) libzstd.a-mt + cp $(LIBDIR)/libzstd.a . + +test: test.o data.o config.o util.o method.o result.o xxhash.o libzstd.a + $(CC) $^ $(REGRESSION_LDFLAGS) -o $@ + +.PHONY: clean +clean: + $(MAKE) -C $(LIBDIR) clean + $(RM) *.o *.a test diff --git a/src/zstd/tests/regression/config.c b/src/zstd/tests/regression/config.c new file mode 100644 index 000000000..ed6b69235 --- /dev/null +++ b/src/zstd/tests/regression/config.c @@ -0,0 +1,278 @@ +/* + * Copyright (c) 2016-2020, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#include "config.h" + +/* Define a config for each fast level we want to test with. */ +#define FAST_LEVEL(x) \ + param_value_t const level_fast##x##_param_values[] = { \ + {.param = ZSTD_c_compressionLevel, .value = -x}, \ + }; \ + config_t const level_fast##x = { \ + .name = "level -" #x, \ + .cli_args = "--fast=" #x, \ + .param_values = PARAM_VALUES(level_fast##x##_param_values), \ + }; \ + config_t const level_fast##x##_dict = { \ + .name = "level -" #x " with dict", \ + .cli_args = "--fast=" #x, \ + .param_values = PARAM_VALUES(level_fast##x##_param_values), \ + .use_dictionary = 1, \ + }; + +/* Define a config for each level we want to test with. */ +#define LEVEL(x) \ + param_value_t const level_##x##_param_values[] = { \ + {.param = ZSTD_c_compressionLevel, .value = x}, \ + }; \ + config_t const level_##x = { \ + .name = "level " #x, \ + .cli_args = "-" #x, \ + .param_values = PARAM_VALUES(level_##x##_param_values), \ + }; \ + config_t const level_##x##_dict = { \ + .name = "level " #x " with dict", \ + .cli_args = "-" #x, \ + .param_values = PARAM_VALUES(level_##x##_param_values), \ + .use_dictionary = 1, \ + }; + +#define PARAM_VALUES(pv) \ + { .data = pv, .size = sizeof(pv) / sizeof((pv)[0]) } + +#include "levels.h" + +#undef LEVEL +#undef FAST_LEVEL + +static config_t no_pledged_src_size = { + .name = "no source size", + .cli_args = "", + .param_values = PARAM_VALUES(level_0_param_values), + .no_pledged_src_size = 1, +}; + +static param_value_t const ldm_param_values[] = { + {.param = ZSTD_c_enableLongDistanceMatching, .value = 1}, +}; + +static config_t ldm = { + .name = "long distance mode", + .cli_args = "--long", + .param_values = PARAM_VALUES(ldm_param_values), +}; + +static param_value_t const mt_param_values[] = { + {.param = ZSTD_c_nbWorkers, .value = 2}, +}; + +static config_t mt = { + .name = "multithreaded", + .cli_args = "-T2", + .param_values = PARAM_VALUES(mt_param_values), +}; + +static param_value_t const mt_ldm_param_values[] = { + {.param = ZSTD_c_nbWorkers, .value = 2}, + {.param = ZSTD_c_enableLongDistanceMatching, .value = 1}, +}; + +static config_t mt_ldm = { + .name = "multithreaded long distance mode", + .cli_args = "-T2 --long", + .param_values = PARAM_VALUES(mt_ldm_param_values), +}; + +static param_value_t mt_advanced_param_values[] = { + {.param = ZSTD_c_nbWorkers, .value = 2}, + {.param = ZSTD_c_literalCompressionMode, .value = ZSTD_lcm_uncompressed}, +}; + +static config_t mt_advanced = { + .name = "multithreaded with advanced params", + .cli_args = "-T2 --no-compress-literals", + .param_values = PARAM_VALUES(mt_advanced_param_values), +}; + +static param_value_t const small_wlog_param_values[] = { + {.param = ZSTD_c_windowLog, .value = 10}, +}; + +static config_t small_wlog = { + .name = "small window log", + .cli_args = "--zstd=wlog=10", + .param_values = PARAM_VALUES(small_wlog_param_values), +}; + +static param_value_t const small_hlog_param_values[] = { + {.param = ZSTD_c_hashLog, .value = 6}, + {.param = ZSTD_c_strategy, .value = (int)ZSTD_btopt}, +}; + +static config_t small_hlog = { + .name = "small hash log", + .cli_args = "--zstd=hlog=6,strat=7", + .param_values = PARAM_VALUES(small_hlog_param_values), +}; + +static param_value_t const small_clog_param_values[] = { + {.param = ZSTD_c_chainLog, .value = 6}, + {.param = ZSTD_c_strategy, .value = (int)ZSTD_btopt}, +}; + +static config_t small_clog = { + .name = "small chain log", + .cli_args = "--zstd=clog=6,strat=7", + .param_values = PARAM_VALUES(small_clog_param_values), +}; + +static param_value_t const uncompressed_literals_param_values[] = { + {.param = ZSTD_c_compressionLevel, .value = 3}, + {.param = ZSTD_c_literalCompressionMode, .value = ZSTD_lcm_uncompressed}, +}; + +static config_t uncompressed_literals = { + .name = "uncompressed literals", + .cli_args = "-3 --no-compress-literals", + .param_values = PARAM_VALUES(uncompressed_literals_param_values), +}; + +static param_value_t const uncompressed_literals_opt_param_values[] = { + {.param = ZSTD_c_compressionLevel, .value = 19}, + {.param = ZSTD_c_literalCompressionMode, .value = ZSTD_lcm_uncompressed}, +}; + +static config_t uncompressed_literals_opt = { + .name = "uncompressed literals optimal", + .cli_args = "-19 --no-compress-literals", + .param_values = PARAM_VALUES(uncompressed_literals_opt_param_values), +}; + +static param_value_t const huffman_literals_param_values[] = { + {.param = ZSTD_c_compressionLevel, .value = -1}, + {.param = ZSTD_c_literalCompressionMode, .value = ZSTD_lcm_huffman}, +}; + +static config_t huffman_literals = { + .name = "huffman literals", + .cli_args = "--fast=1 --compress-literals", + .param_values = PARAM_VALUES(huffman_literals_param_values), +}; + +static param_value_t const explicit_params_param_values[] = { + {.param = ZSTD_c_checksumFlag, .value = 1}, + {.param = ZSTD_c_contentSizeFlag, .value = 0}, + {.param = ZSTD_c_dictIDFlag, .value = 0}, + {.param = ZSTD_c_strategy, .value = (int)ZSTD_greedy}, + {.param = ZSTD_c_windowLog, .value = 18}, + {.param = ZSTD_c_hashLog, .value = 21}, + {.param = ZSTD_c_chainLog, .value = 21}, + {.param = ZSTD_c_targetLength, .value = 100}, +}; + +static config_t explicit_params = { + .name = "explicit params", + .cli_args = "--no-check --no-dictID --zstd=strategy=3,wlog=18,hlog=21,clog=21,tlen=100", + .param_values = PARAM_VALUES(explicit_params_param_values), +}; + +static config_t const* g_configs[] = { + +#define FAST_LEVEL(x) &level_fast##x, &level_fast##x##_dict, +#define LEVEL(x) &level_##x, &level_##x##_dict, +#include "levels.h" +#undef LEVEL +#undef FAST_LEVEL + + &no_pledged_src_size, + &ldm, + &mt, + &mt_ldm, + &small_wlog, + &small_hlog, + &small_clog, + &explicit_params, + &uncompressed_literals, + &uncompressed_literals_opt, + &huffman_literals, + &mt_advanced, + NULL, +}; + +config_t const* const* configs = g_configs; + +int config_skip_data(config_t const* config, data_t const* data) { + return config->use_dictionary && !data_has_dict(data); +} + +int config_get_level(config_t const* config) +{ + param_values_t const params = config->param_values; + size_t i; + for (i = 0; i < params.size; ++i) { + if (params.data[i].param == ZSTD_c_compressionLevel) + return (int)params.data[i].value; + } + return CONFIG_NO_LEVEL; +} + +ZSTD_parameters config_get_zstd_params( + config_t const* config, + uint64_t srcSize, + size_t dictSize) +{ + ZSTD_parameters zparams = {}; + param_values_t const params = config->param_values; + int level = config_get_level(config); + if (level == CONFIG_NO_LEVEL) + level = 3; + zparams = ZSTD_getParams( + level, + config->no_pledged_src_size ? ZSTD_CONTENTSIZE_UNKNOWN : srcSize, + dictSize); + for (size_t i = 0; i < params.size; ++i) { + unsigned const value = params.data[i].value; + switch (params.data[i].param) { + case ZSTD_c_contentSizeFlag: + zparams.fParams.contentSizeFlag = value; + break; + case ZSTD_c_checksumFlag: + zparams.fParams.checksumFlag = value; + break; + case ZSTD_c_dictIDFlag: + zparams.fParams.noDictIDFlag = !value; + break; + case ZSTD_c_windowLog: + zparams.cParams.windowLog = value; + break; + case ZSTD_c_chainLog: + zparams.cParams.chainLog = value; + break; + case ZSTD_c_hashLog: + zparams.cParams.hashLog = value; + break; + case ZSTD_c_searchLog: + zparams.cParams.searchLog = value; + break; + case ZSTD_c_minMatch: + zparams.cParams.minMatch = value; + break; + case ZSTD_c_targetLength: + zparams.cParams.targetLength = value; + break; + case ZSTD_c_strategy: + zparams.cParams.strategy = (ZSTD_strategy)value; + break; + default: + break; + } + } + return zparams; +} diff --git a/src/zstd/tests/regression/config.h b/src/zstd/tests/regression/config.h new file mode 100644 index 000000000..aa563b9e9 --- /dev/null +++ b/src/zstd/tests/regression/config.h @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2016-2020, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef CONFIG_H +#define CONFIG_H + +#include <stddef.h> + +#define ZSTD_STATIC_LINKING_ONLY +#include <zstd.h> + +#include "data.h" + +typedef struct { + ZSTD_cParameter param; + int value; +} param_value_t; + +typedef struct { + size_t size; + param_value_t const* data; +} param_values_t; + +/** + * The config tells the compression method what options to use. + */ +typedef struct { + const char* name; /**< Identifies the config in the results table */ + /** + * Optional arguments to pass to the CLI. If not set, CLI-based methods + * will skip this config. + */ + char const* cli_args; + /** + * Parameters to pass to the advanced API. If the advanced API isn't used, + * the parameters will be derived from these. + */ + param_values_t param_values; + /** + * Boolean parameter that says if we should use a dictionary. If the data + * doesn't have a dictionary, this config is skipped. Defaults to no. + */ + int use_dictionary; + /** + * Boolean parameter that says if we should pass the pledged source size + * when the method allows it. Defaults to yes. + */ + int no_pledged_src_size; +} config_t; + +/** + * Returns true if the config should skip this data. + * For instance, if the config requires a dictionary but the data doesn't have + * one. + */ +int config_skip_data(config_t const* config, data_t const* data); + +#define CONFIG_NO_LEVEL (-ZSTD_TARGETLENGTH_MAX - 1) +/** + * Returns the compression level specified by the config, or CONFIG_NO_LEVEL if + * no level is specified. Note that 0 is a valid compression level, meaning + * default. + */ +int config_get_level(config_t const* config); + +/** + * Returns the compression parameters specified by the config. + */ +ZSTD_parameters config_get_zstd_params( + config_t const* config, + uint64_t srcSize, + size_t dictSize); + +/** + * The NULL-terminated list of configs. + */ +extern config_t const* const* configs; + +#endif diff --git a/src/zstd/tests/regression/data.c b/src/zstd/tests/regression/data.c new file mode 100644 index 000000000..b75ac1192 --- /dev/null +++ b/src/zstd/tests/regression/data.c @@ -0,0 +1,613 @@ +/* + * Copyright (c) 2016-2020, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#include "data.h" + +#include <assert.h> +#include <errno.h> +#include <stdio.h> +#include <string.h> + +#include <sys/stat.h> + +#include <curl/curl.h> + +#include "mem.h" +#include "util.h" +#define XXH_STATIC_LINKING_ONLY +#include "xxhash.h" + +/** + * Data objects + */ + +#define REGRESSION_RELEASE(x) \ + "https://github.com/facebook/zstd/releases/download/regression-data/" x + +data_t silesia = { + .name = "silesia", + .type = data_type_dir, + .data = + { + .url = REGRESSION_RELEASE("silesia.tar.zst"), + .xxhash64 = 0x48a199f92f93e977LL, + }, +}; + +data_t silesia_tar = { + .name = "silesia.tar", + .type = data_type_file, + .data = + { + .url = REGRESSION_RELEASE("silesia.tar.zst"), + .xxhash64 = 0x48a199f92f93e977LL, + }, +}; + +data_t github = { + .name = "github", + .type = data_type_dir, + .data = + { + .url = REGRESSION_RELEASE("github.tar.zst"), + .xxhash64 = 0xa9b1b44b020df292LL, + }, + .dict = + { + .url = REGRESSION_RELEASE("github.dict.zst"), + .xxhash64 = 0x1eddc6f737d3cb53LL, + + }, +}; + +static data_t* g_data[] = { + &silesia, + &silesia_tar, + &github, + NULL, +}; + +data_t const* const* data = (data_t const* const*)g_data; + +/** + * data helpers. + */ + +int data_has_dict(data_t const* data) { + return data->dict.url != NULL; +} + +/** + * data buffer helper functions (documented in header). + */ + +data_buffer_t data_buffer_create(size_t const capacity) { + data_buffer_t buffer = {}; + + buffer.data = (uint8_t*)malloc(capacity); + if (buffer.data == NULL) + return buffer; + buffer.capacity = capacity; + return buffer; +} + +data_buffer_t data_buffer_read(char const* filename) { + data_buffer_t buffer = {}; + + uint64_t const size = UTIL_getFileSize(filename); + if (size == UTIL_FILESIZE_UNKNOWN) { + fprintf(stderr, "unknown size for %s\n", filename); + return buffer; + } + + buffer.data = (uint8_t*)malloc(size); + if (buffer.data == NULL) { + fprintf(stderr, "malloc failed\n"); + return buffer; + } + buffer.capacity = size; + + FILE* file = fopen(filename, "rb"); + if (file == NULL) { + fprintf(stderr, "file null\n"); + goto err; + } + buffer.size = fread(buffer.data, 1, buffer.capacity, file); + fclose(file); + if (buffer.size != buffer.capacity) { + fprintf(stderr, "read %zu != %zu\n", buffer.size, buffer.capacity); + goto err; + } + + return buffer; +err: + free(buffer.data); + memset(&buffer, 0, sizeof(buffer)); + return buffer; +} + +data_buffer_t data_buffer_get_data(data_t const* data) { + data_buffer_t const kEmptyBuffer = {}; + + if (data->type != data_type_file) + return kEmptyBuffer; + + return data_buffer_read(data->data.path); +} + +data_buffer_t data_buffer_get_dict(data_t const* data) { + data_buffer_t const kEmptyBuffer = {}; + + if (!data_has_dict(data)) + return kEmptyBuffer; + + return data_buffer_read(data->dict.path); +} + +int data_buffer_compare(data_buffer_t buffer1, data_buffer_t buffer2) { + size_t const size = + buffer1.size < buffer2.size ? buffer1.size : buffer2.size; + int const cmp = memcmp(buffer1.data, buffer2.data, size); + if (cmp != 0) + return cmp; + if (buffer1.size < buffer2.size) + return -1; + if (buffer1.size == buffer2.size) + return 0; + assert(buffer1.size > buffer2.size); + return 1; +} + +void data_buffer_free(data_buffer_t buffer) { + free(buffer.data); +} + +/** + * data filenames helpers. + */ + +FileNamesTable* data_filenames_get(data_t const* data) +{ + char const* const path = data->data.path; + return UTIL_createExpandedFNT(&path, 1, 0 /* followLinks */ ); +} + +/** + * data buffers helpers. + */ + +data_buffers_t data_buffers_get(data_t const* data) { + data_buffers_t buffers = {.size = 0}; + FileNamesTable* const filenames = data_filenames_get(data); + if (filenames == NULL) return buffers; + if (filenames->tableSize == 0) { + UTIL_freeFileNamesTable(filenames); + return buffers; + } + + data_buffer_t* buffersPtr = + (data_buffer_t*)malloc(filenames->tableSize * sizeof(*buffersPtr)); + if (buffersPtr == NULL) { + UTIL_freeFileNamesTable(filenames); + return buffers; + } + buffers.buffers = (data_buffer_t const*)buffersPtr; + buffers.size = filenames->tableSize; + + for (size_t i = 0; i < filenames->tableSize; ++i) { + buffersPtr[i] = data_buffer_read(filenames->fileNames[i]); + if (buffersPtr[i].data == NULL) { + data_buffers_t const kEmptyBuffer = {}; + data_buffers_free(buffers); + UTIL_freeFileNamesTable(filenames); + return kEmptyBuffer; + } + } + + UTIL_freeFileNamesTable(filenames); + return buffers; +} + +/** + * Frees the data buffers. + */ +void data_buffers_free(data_buffers_t buffers) { + free((data_buffer_t*)buffers.buffers); +} + +/** + * Initialization and download functions. + */ + +static char* g_data_dir = NULL; + +/* mkdir -p */ +static int ensure_directory_exists(char const* indir) { + char* const dir = strdup(indir); + char* end = dir; + int ret = 0; + if (dir == NULL) { + ret = EINVAL; + goto out; + } + do { + /* Find the next directory level. */ + for (++end; *end != '\0' && *end != '/'; ++end) + ; + /* End the string there, make the directory, and restore the string. */ + char const save = *end; + *end = '\0'; + int const isdir = UTIL_isDirectory(dir); + ret = mkdir(dir, S_IRWXU); + *end = save; + /* Its okay if the directory already exists. */ + if (ret == 0 || (errno == EEXIST && isdir)) + continue; + ret = errno; + fprintf(stderr, "mkdir() failed\n"); + goto out; + } while (*end != '\0'); + + ret = 0; +out: + free(dir); + return ret; +} + +/** Concatenate 3 strings into a new buffer. */ +static char* cat3(char const* str1, char const* str2, char const* str3) { + size_t const size1 = strlen(str1); + size_t const size2 = strlen(str2); + size_t const size3 = str3 == NULL ? 0 : strlen(str3); + size_t const size = size1 + size2 + size3 + 1; + char* const dst = (char*)malloc(size); + if (dst == NULL) + return NULL; + strcpy(dst, str1); + strcpy(dst + size1, str2); + if (str3 != NULL) + strcpy(dst + size1 + size2, str3); + assert(strlen(dst) == size1 + size2 + size3); + return dst; +} + +static char* cat2(char const* str1, char const* str2) { + return cat3(str1, str2, NULL); +} + +/** + * State needed by the curl callback. + * It takes data from curl, hashes it, and writes it to the file. + */ +typedef struct { + FILE* file; + XXH64_state_t xxhash64; + int error; +} curl_data_t; + +/** Create the curl state. */ +static curl_data_t curl_data_create( + data_resource_t const* resource, + data_type_t type) { + curl_data_t cdata = {}; + + XXH64_reset(&cdata.xxhash64, 0); + + assert(UTIL_isDirectory(g_data_dir)); + + if (type == data_type_file) { + /* Decompress the resource and store to the path. */ + char* cmd = cat3("zstd -dqfo '", resource->path, "'"); + if (cmd == NULL) { + cdata.error = ENOMEM; + return cdata; + } + cdata.file = popen(cmd, "w"); + free(cmd); + } else { + /* Decompress and extract the resource to the cache directory. */ + char* cmd = cat3("zstd -dc | tar -x -C '", g_data_dir, "'"); + if (cmd == NULL) { + cdata.error = ENOMEM; + return cdata; + } + cdata.file = popen(cmd, "w"); + free(cmd); + } + if (cdata.file == NULL) { + cdata.error = errno; + } + + return cdata; +} + +/** Free the curl state. */ +static int curl_data_free(curl_data_t cdata) { + return pclose(cdata.file); +} + +/** curl callback. Updates the hash, and writes to the file. */ +static size_t curl_write(void* data, size_t size, size_t count, void* ptr) { + curl_data_t* cdata = (curl_data_t*)ptr; + size_t const written = fwrite(data, size, count, cdata->file); + XXH64_update(&cdata->xxhash64, data, written * size); + return written; +} + +static int curl_download_resource( + CURL* curl, + data_resource_t const* resource, + data_type_t type) { + curl_data_t cdata; + /* Download the data. */ + if (curl_easy_setopt(curl, CURLOPT_URL, resource->url) != 0) + return EINVAL; + if (curl_easy_setopt(curl, CURLOPT_WRITEDATA, &cdata) != 0) + return EINVAL; + cdata = curl_data_create(resource, type); + if (cdata.error != 0) + return cdata.error; + int const curl_err = curl_easy_perform(curl); + int const close_err = curl_data_free(cdata); + if (curl_err) { + fprintf( + stderr, + "downloading '%s' for '%s' failed\n", + resource->url, + resource->path); + return EIO; + } + if (close_err) { + fprintf(stderr, "writing data to '%s' failed\n", resource->path); + return EIO; + } + /* check that the file exists. */ + if (type == data_type_file && !UTIL_isRegularFile(resource->path)) { + fprintf(stderr, "output file '%s' does not exist\n", resource->path); + return EIO; + } + if (type == data_type_dir && !UTIL_isDirectory(resource->path)) { + fprintf( + stderr, "output directory '%s' does not exist\n", resource->path); + return EIO; + } + /* Check that the hash matches. */ + if (XXH64_digest(&cdata.xxhash64) != resource->xxhash64) { + fprintf( + stderr, + "checksum does not match: 0x%llxLL != 0x%llxLL\n", + (unsigned long long)XXH64_digest(&cdata.xxhash64), + (unsigned long long)resource->xxhash64); + return EINVAL; + } + + return 0; +} + +/** Download a single data object. */ +static int curl_download_datum(CURL* curl, data_t const* data) { + int ret; + ret = curl_download_resource(curl, &data->data, data->type); + if (ret != 0) + return ret; + if (data_has_dict(data)) { + ret = curl_download_resource(curl, &data->dict, data_type_file); + if (ret != 0) + return ret; + } + return ret; +} + +/** Download all the data. */ +static int curl_download_data(data_t const* const* data) { + if (curl_global_init(CURL_GLOBAL_ALL) != 0) + return EFAULT; + + curl_data_t cdata = {}; + CURL* curl = curl_easy_init(); + int err = EFAULT; + + if (curl == NULL) + return EFAULT; + + if (curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 1L) != 0) + goto out; + if (curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L) != 0) + goto out; + if (curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, curl_write) != 0) + goto out; + + assert(data != NULL); + for (; *data != NULL; ++data) { + if (curl_download_datum(curl, *data) != 0) + goto out; + } + + err = 0; +out: + curl_easy_cleanup(curl); + curl_global_cleanup(); + return err; +} + +/** Fill the path member variable of the data objects. */ +static int data_create_paths(data_t* const* data, char const* dir) { + size_t const dirlen = strlen(dir); + assert(data != NULL); + for (; *data != NULL; ++data) { + data_t* const datum = *data; + datum->data.path = cat3(dir, "/", datum->name); + if (datum->data.path == NULL) + return ENOMEM; + if (data_has_dict(datum)) { + datum->dict.path = cat2(datum->data.path, ".dict"); + if (datum->dict.path == NULL) + return ENOMEM; + } + } + return 0; +} + +/** Free the path member variable of the data objects. */ +static void data_free_paths(data_t* const* data) { + assert(data != NULL); + for (; *data != NULL; ++data) { + data_t* datum = *data; + free((void*)datum->data.path); + free((void*)datum->dict.path); + datum->data.path = NULL; + datum->dict.path = NULL; + } +} + +static char const kStampName[] = "STAMP"; + +static void xxh_update_le(XXH64_state_t* state, uint64_t data) { + if (!MEM_isLittleEndian()) + data = MEM_swap64(data); + XXH64_update(state, &data, sizeof(data)); +} + +/** Hash the data to create the stamp. */ +static uint64_t stamp_hash(data_t const* const* data) { + XXH64_state_t state; + + XXH64_reset(&state, 0); + assert(data != NULL); + for (; *data != NULL; ++data) { + data_t const* datum = *data; + /* We don't care about the URL that we fetch from. */ + /* The path is derived from the name. */ + XXH64_update(&state, datum->name, strlen(datum->name)); + xxh_update_le(&state, datum->data.xxhash64); + xxh_update_le(&state, datum->dict.xxhash64); + xxh_update_le(&state, datum->type); + } + return XXH64_digest(&state); +} + +/** Check if the stamp matches the stamp in the cache directory. */ +static int stamp_check(char const* dir, data_t const* const* data) { + char* stamp = cat3(dir, "/", kStampName); + uint64_t const expected = stamp_hash(data); + XXH64_canonical_t actual; + FILE* stampfile = NULL; + int matches = 0; + + if (stamp == NULL) + goto out; + if (!UTIL_isRegularFile(stamp)) { + fprintf(stderr, "stamp does not exist: recreating the data cache\n"); + goto out; + } + + stampfile = fopen(stamp, "rb"); + if (stampfile == NULL) { + fprintf(stderr, "could not open stamp: recreating the data cache\n"); + goto out; + } + + size_t b; + if ((b = fread(&actual, sizeof(actual), 1, stampfile)) != 1) { + fprintf(stderr, "invalid stamp: recreating the data cache\n"); + goto out; + } + + matches = (expected == XXH64_hashFromCanonical(&actual)); + if (matches) + fprintf(stderr, "stamp matches: reusing the cached data\n"); + else + fprintf(stderr, "stamp does not match: recreating the data cache\n"); + +out: + free(stamp); + if (stampfile != NULL) + fclose(stampfile); + return matches; +} + +/** On success write a new stamp, on failure delete the old stamp. */ +static int +stamp_write(char const* dir, data_t const* const* data, int const data_err) { + char* stamp = cat3(dir, "/", kStampName); + FILE* stampfile = NULL; + int err = EIO; + + if (stamp == NULL) + return ENOMEM; + + if (data_err != 0) { + err = data_err; + goto out; + } + XXH64_canonical_t hash; + + XXH64_canonicalFromHash(&hash, stamp_hash(data)); + + stampfile = fopen(stamp, "wb"); + if (stampfile == NULL) + goto out; + if (fwrite(&hash, sizeof(hash), 1, stampfile) != 1) + goto out; + err = 0; + fprintf(stderr, "stamped new data cache\n"); +out: + if (err != 0) + /* Ignore errors. */ + unlink(stamp); + free(stamp); + if (stampfile != NULL) + fclose(stampfile); + return err; +} + +int data_init(char const* dir) { + int err; + + if (dir == NULL) + return EINVAL; + + /* This must be first to simplify logic. */ + err = ensure_directory_exists(dir); + if (err != 0) + return err; + + /* Save the cache directory. */ + g_data_dir = strdup(dir); + if (g_data_dir == NULL) + return ENOMEM; + + err = data_create_paths(g_data, dir); + if (err != 0) + return err; + + /* If the stamp matches then we are good to go. + * This must be called before any modifications to the data cache. + * After this point, we MUST call stamp_write() to update the STAMP, + * since we've updated the data cache. + */ + if (stamp_check(dir, data)) + return 0; + + err = curl_download_data(data); + if (err != 0) + goto out; + +out: + /* This must be last, since it must know if data_init() succeeded. */ + stamp_write(dir, data, err); + return err; +} + +void data_finish(void) { + data_free_paths(g_data); + free(g_data_dir); + g_data_dir = NULL; +} diff --git a/src/zstd/tests/regression/data.h b/src/zstd/tests/regression/data.h new file mode 100644 index 000000000..90ed22f19 --- /dev/null +++ b/src/zstd/tests/regression/data.h @@ -0,0 +1,121 @@ +/* + * Copyright (c) 2016-2020, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef DATA_H +#define DATA_H + +#include <stddef.h> +#include <stdint.h> + +typedef enum { + data_type_file = 1, /**< This data is a file. *.zst */ + data_type_dir = 2, /**< This data is a directory. *.tar.zst */ +} data_type_t; + +typedef struct { + char const* url; /**< Where to get this resource. */ + uint64_t xxhash64; /**< Hash of the url contents. */ + char const* path; /**< The path of the unpacked resource (derived). */ +} data_resource_t; + +typedef struct { + data_resource_t data; + data_resource_t dict; + data_type_t type; /**< The type of the data. */ + char const* name; /**< The logical name of the data (no extension). */ +} data_t; + +/** + * The NULL-terminated list of data objects. + */ +extern data_t const* const* data; + + +int data_has_dict(data_t const* data); + +/** + * Initializes the data module and downloads the data necessary. + * Caches the downloads in dir. We add a stamp file in the directory after + * a successful download. If a stamp file already exists, and matches our + * current data stamp, we will use the cached data without downloading. + * + * @param dir The directory to cache the downloaded data into. + * + * @returns 0 on success. + */ +int data_init(char const* dir); + +/** + * Must be called at exit to free resources allocated by data_init(). + */ +void data_finish(void); + +typedef struct { + uint8_t* data; + size_t size; + size_t capacity; +} data_buffer_t; + +/** + * Read the file that data points to into a buffer. + * NOTE: data must be a file, not a directory. + * + * @returns The buffer, which is NULL on failure. + */ +data_buffer_t data_buffer_get_data(data_t const* data); + +/** + * Read the dictionary that the data points to into a buffer. + * + * @returns The buffer, which is NULL on failure. + */ +data_buffer_t data_buffer_get_dict(data_t const* data); + +/** + * Read the contents of filename into a buffer. + * + * @returns The buffer, which is NULL on failure. + */ +data_buffer_t data_buffer_read(char const* filename); + +/** + * Create a buffer with the specified capacity. + * + * @returns The buffer, which is NULL on failure. + */ +data_buffer_t data_buffer_create(size_t capacity); + +/** + * Calls memcmp() on the contents [0, size) of both buffers. + */ +int data_buffer_compare(data_buffer_t buffer1, data_buffer_t buffer2); + +/** + * Frees an allocated buffer. + */ +void data_buffer_free(data_buffer_t buffer); + + +typedef struct { + data_buffer_t const* buffers; + size_t size; +} data_buffers_t; + +/** + * @returns a list of buffers for every file in data. It is zero sized on error. + */ +data_buffers_t data_buffers_get(data_t const* data); + +/** + * Frees the data buffers. + */ +void data_buffers_free(data_buffers_t buffers); + +#endif diff --git a/src/zstd/tests/regression/levels.h b/src/zstd/tests/regression/levels.h new file mode 100644 index 000000000..5e7d40a7d --- /dev/null +++ b/src/zstd/tests/regression/levels.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2016-2020, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef LEVEL +# error LEVEL(x) must be defined +#endif +#ifndef FAST_LEVEL +# error FAST_LEVEL(x) must be defined +#endif + +/** + * The levels are chosen to trigger every strategy in every source size, + * as well as some fast levels and the default level. + * If you change the compression levels, you should probably update these. + */ + +FAST_LEVEL(5) + +FAST_LEVEL(3) + +FAST_LEVEL(1) +LEVEL(0) +LEVEL(1) + +LEVEL(3) +LEVEL(4) +LEVEL(5) +LEVEL(6) +LEVEL(7) + +LEVEL(9) + +LEVEL(13) + +LEVEL(16) + +LEVEL(19) diff --git a/src/zstd/tests/regression/method.c b/src/zstd/tests/regression/method.c new file mode 100644 index 000000000..3c949a278 --- /dev/null +++ b/src/zstd/tests/regression/method.c @@ -0,0 +1,688 @@ +/* + * Copyright (c) 2016-2020, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#include "method.h" + +#include <stdio.h> +#include <stdlib.h> + +#define ZSTD_STATIC_LINKING_ONLY +#include <zstd.h> + +#define MIN(x, y) ((x) < (y) ? (x) : (y)) + +static char const* g_zstdcli = NULL; + +void method_set_zstdcli(char const* zstdcli) { + g_zstdcli = zstdcli; +} + +/** + * Macro to get a pointer of type, given ptr, which is a member variable with + * the given name, member. + * + * method_state_t* base = ...; + * buffer_state_t* state = container_of(base, buffer_state_t, base); + */ +#define container_of(ptr, type, member) \ + ((type*)(ptr == NULL ? NULL : (char*)(ptr)-offsetof(type, member))) + +/** State to reuse the same buffers between compression calls. */ +typedef struct { + method_state_t base; + data_buffers_t inputs; /**< The input buffer for each file. */ + data_buffer_t dictionary; /**< The dictionary. */ + data_buffer_t compressed; /**< The compressed data buffer. */ + data_buffer_t decompressed; /**< The decompressed data buffer. */ +} buffer_state_t; + +static size_t buffers_max_size(data_buffers_t buffers) { + size_t max = 0; + for (size_t i = 0; i < buffers.size; ++i) { + if (buffers.buffers[i].size > max) + max = buffers.buffers[i].size; + } + return max; +} + +static method_state_t* buffer_state_create(data_t const* data) { + buffer_state_t* state = (buffer_state_t*)calloc(1, sizeof(buffer_state_t)); + if (state == NULL) + return NULL; + state->base.data = data; + state->inputs = data_buffers_get(data); + state->dictionary = data_buffer_get_dict(data); + size_t const max_size = buffers_max_size(state->inputs); + state->compressed = data_buffer_create(ZSTD_compressBound(max_size)); + state->decompressed = data_buffer_create(max_size); + return &state->base; +} + +static void buffer_state_destroy(method_state_t* base) { + if (base == NULL) + return; + buffer_state_t* state = container_of(base, buffer_state_t, base); + free(state); +} + +static int buffer_state_bad( + buffer_state_t const* state, + config_t const* config) { + if (state == NULL) { + fprintf(stderr, "buffer_state_t is NULL\n"); + return 1; + } + if (state->inputs.size == 0 || state->compressed.data == NULL || + state->decompressed.data == NULL) { + fprintf(stderr, "buffer state allocation failure\n"); + return 1; + } + if (config->use_dictionary && state->dictionary.data == NULL) { + fprintf(stderr, "dictionary loading failed\n"); + return 1; + } + return 0; +} + +static result_t simple_compress(method_state_t* base, config_t const* config) { + buffer_state_t* state = container_of(base, buffer_state_t, base); + + if (buffer_state_bad(state, config)) + return result_error(result_error_system_error); + + /* Keep the tests short by skipping directories, since behavior shouldn't + * change. + */ + if (base->data->type != data_type_file) + return result_error(result_error_skip); + + if (config->use_dictionary || config->no_pledged_src_size) + return result_error(result_error_skip); + + /* If the config doesn't specify a level, skip. */ + int const level = config_get_level(config); + if (level == CONFIG_NO_LEVEL) + return result_error(result_error_skip); + + data_buffer_t const input = state->inputs.buffers[0]; + + /* Compress, decompress, and check the result. */ + state->compressed.size = ZSTD_compress( + state->compressed.data, + state->compressed.capacity, + input.data, + input.size, + level); + if (ZSTD_isError(state->compressed.size)) + return result_error(result_error_compression_error); + + state->decompressed.size = ZSTD_decompress( + state->decompressed.data, + state->decompressed.capacity, + state->compressed.data, + state->compressed.size); + if (ZSTD_isError(state->decompressed.size)) + return result_error(result_error_decompression_error); + if (data_buffer_compare(input, state->decompressed)) + return result_error(result_error_round_trip_error); + + result_data_t data; + data.total_size = state->compressed.size; + return result_data(data); +} + +static result_t compress_cctx_compress( + method_state_t* base, + config_t const* config) { + buffer_state_t* state = container_of(base, buffer_state_t, base); + + if (buffer_state_bad(state, config)) + return result_error(result_error_system_error); + + if (config->no_pledged_src_size) + return result_error(result_error_skip); + + if (base->data->type != data_type_dir) + return result_error(result_error_skip); + + int const level = config_get_level(config); + + ZSTD_CCtx* cctx = ZSTD_createCCtx(); + ZSTD_DCtx* dctx = ZSTD_createDCtx(); + if (cctx == NULL || dctx == NULL) { + fprintf(stderr, "context creation failed\n"); + return result_error(result_error_system_error); + } + + result_t result; + result_data_t data = {.total_size = 0}; + for (size_t i = 0; i < state->inputs.size; ++i) { + data_buffer_t const input = state->inputs.buffers[i]; + ZSTD_parameters const params = + config_get_zstd_params(config, input.size, state->dictionary.size); + + if (level == CONFIG_NO_LEVEL) + state->compressed.size = ZSTD_compress_advanced( + cctx, + state->compressed.data, + state->compressed.capacity, + input.data, + input.size, + config->use_dictionary ? state->dictionary.data : NULL, + config->use_dictionary ? state->dictionary.size : 0, + params); + else if (config->use_dictionary) + state->compressed.size = ZSTD_compress_usingDict( + cctx, + state->compressed.data, + state->compressed.capacity, + input.data, + input.size, + state->dictionary.data, + state->dictionary.size, + level); + else + state->compressed.size = ZSTD_compressCCtx( + cctx, + state->compressed.data, + state->compressed.capacity, + input.data, + input.size, + level); + + if (ZSTD_isError(state->compressed.size)) { + result = result_error(result_error_compression_error); + goto out; + } + + if (config->use_dictionary) + state->decompressed.size = ZSTD_decompress_usingDict( + dctx, + state->decompressed.data, + state->decompressed.capacity, + state->compressed.data, + state->compressed.size, + state->dictionary.data, + state->dictionary.size); + else + state->decompressed.size = ZSTD_decompressDCtx( + dctx, + state->decompressed.data, + state->decompressed.capacity, + state->compressed.data, + state->compressed.size); + if (ZSTD_isError(state->decompressed.size)) { + result = result_error(result_error_decompression_error); + goto out; + } + if (data_buffer_compare(input, state->decompressed)) { + result = result_error(result_error_round_trip_error); + goto out; + } + + data.total_size += state->compressed.size; + } + + result = result_data(data); +out: + ZSTD_freeCCtx(cctx); + ZSTD_freeDCtx(dctx); + return result; +} + +/** Generic state creation function. */ +static method_state_t* method_state_create(data_t const* data) { + method_state_t* state = (method_state_t*)malloc(sizeof(method_state_t)); + if (state == NULL) + return NULL; + state->data = data; + return state; +} + +static void method_state_destroy(method_state_t* state) { + free(state); +} + +static result_t cli_compress(method_state_t* state, config_t const* config) { + if (config->cli_args == NULL) + return result_error(result_error_skip); + + /* We don't support no pledged source size with directories. Too slow. */ + if (state->data->type == data_type_dir && config->no_pledged_src_size) + return result_error(result_error_skip); + + if (g_zstdcli == NULL) + return result_error(result_error_system_error); + + /* '<zstd>' -cqr <args> [-D '<dict>'] '<file/dir>' */ + char cmd[1024]; + size_t const cmd_size = snprintf( + cmd, + sizeof(cmd), + "'%s' -cqr %s %s%s%s %s '%s'", + g_zstdcli, + config->cli_args, + config->use_dictionary ? "-D '" : "", + config->use_dictionary ? state->data->dict.path : "", + config->use_dictionary ? "'" : "", + config->no_pledged_src_size ? "<" : "", + state->data->data.path); + if (cmd_size >= sizeof(cmd)) { + fprintf(stderr, "command too large: %s\n", cmd); + return result_error(result_error_system_error); + } + FILE* zstd = popen(cmd, "r"); + if (zstd == NULL) { + fprintf(stderr, "failed to popen command: %s\n", cmd); + return result_error(result_error_system_error); + } + + char out[4096]; + size_t total_size = 0; + while (1) { + size_t const size = fread(out, 1, sizeof(out), zstd); + total_size += size; + if (size != sizeof(out)) + break; + } + if (ferror(zstd) || pclose(zstd) != 0) { + fprintf(stderr, "zstd failed with command: %s\n", cmd); + return result_error(result_error_compression_error); + } + + result_data_t const data = {.total_size = total_size}; + return result_data(data); +} + +static int advanced_config( + ZSTD_CCtx* cctx, + buffer_state_t* state, + config_t const* config) { + ZSTD_CCtx_reset(cctx, ZSTD_reset_session_and_parameters); + for (size_t p = 0; p < config->param_values.size; ++p) { + param_value_t const pv = config->param_values.data[p]; + if (ZSTD_isError(ZSTD_CCtx_setParameter(cctx, pv.param, pv.value))) { + return 1; + } + } + if (config->use_dictionary) { + if (ZSTD_isError(ZSTD_CCtx_loadDictionary( + cctx, state->dictionary.data, state->dictionary.size))) { + return 1; + } + } + return 0; +} + +static result_t advanced_one_pass_compress_output_adjustment( + method_state_t* base, + config_t const* config, + size_t const subtract) { + buffer_state_t* state = container_of(base, buffer_state_t, base); + + if (buffer_state_bad(state, config)) + return result_error(result_error_system_error); + + ZSTD_CCtx* cctx = ZSTD_createCCtx(); + result_t result; + + if (!cctx || advanced_config(cctx, state, config)) { + result = result_error(result_error_compression_error); + goto out; + } + + result_data_t data = {.total_size = 0}; + for (size_t i = 0; i < state->inputs.size; ++i) { + data_buffer_t const input = state->inputs.buffers[i]; + + if (!config->no_pledged_src_size) { + if (ZSTD_isError(ZSTD_CCtx_setPledgedSrcSize(cctx, input.size))) { + result = result_error(result_error_compression_error); + goto out; + } + } + size_t const size = ZSTD_compress2( + cctx, + state->compressed.data, + ZSTD_compressBound(input.size) - subtract, + input.data, + input.size); + if (ZSTD_isError(size)) { + result = result_error(result_error_compression_error); + goto out; + } + data.total_size += size; + } + + result = result_data(data); +out: + ZSTD_freeCCtx(cctx); + return result; +} + +static result_t advanced_one_pass_compress( + method_state_t* base, + config_t const* config) { + return advanced_one_pass_compress_output_adjustment(base, config, 0); +} + +static result_t advanced_one_pass_compress_small_output( + method_state_t* base, + config_t const* config) { + return advanced_one_pass_compress_output_adjustment(base, config, 1); +} + +static result_t advanced_streaming_compress( + method_state_t* base, + config_t const* config) { + buffer_state_t* state = container_of(base, buffer_state_t, base); + + if (buffer_state_bad(state, config)) + return result_error(result_error_system_error); + + ZSTD_CCtx* cctx = ZSTD_createCCtx(); + result_t result; + + if (!cctx || advanced_config(cctx, state, config)) { + result = result_error(result_error_compression_error); + goto out; + } + + result_data_t data = {.total_size = 0}; + for (size_t i = 0; i < state->inputs.size; ++i) { + data_buffer_t input = state->inputs.buffers[i]; + + if (!config->no_pledged_src_size) { + if (ZSTD_isError(ZSTD_CCtx_setPledgedSrcSize(cctx, input.size))) { + result = result_error(result_error_compression_error); + goto out; + } + } + + while (input.size > 0) { + ZSTD_inBuffer in = {input.data, MIN(input.size, 4096)}; + input.data += in.size; + input.size -= in.size; + ZSTD_EndDirective const op = + input.size > 0 ? ZSTD_e_continue : ZSTD_e_end; + size_t ret = 0; + while (in.pos < in.size || (op == ZSTD_e_end && ret != 0)) { + ZSTD_outBuffer out = {state->compressed.data, + MIN(state->compressed.capacity, 1024)}; + ret = ZSTD_compressStream2(cctx, &out, &in, op); + if (ZSTD_isError(ret)) { + result = result_error(result_error_compression_error); + goto out; + } + data.total_size += out.pos; + } + } + } + + result = result_data(data); +out: + ZSTD_freeCCtx(cctx); + return result; +} + +static int init_cstream( + buffer_state_t* state, + ZSTD_CStream* zcs, + config_t const* config, + int const advanced, + ZSTD_CDict** cdict) +{ + size_t zret; + if (advanced) { + ZSTD_parameters const params = config_get_zstd_params(config, 0, 0); + ZSTD_CDict* dict = NULL; + if (cdict) { + if (!config->use_dictionary) + return 1; + *cdict = ZSTD_createCDict_advanced( + state->dictionary.data, + state->dictionary.size, + ZSTD_dlm_byRef, + ZSTD_dct_auto, + params.cParams, + ZSTD_defaultCMem); + if (!*cdict) { + return 1; + } + zret = ZSTD_initCStream_usingCDict_advanced( + zcs, *cdict, params.fParams, ZSTD_CONTENTSIZE_UNKNOWN); + } else { + zret = ZSTD_initCStream_advanced( + zcs, + config->use_dictionary ? state->dictionary.data : NULL, + config->use_dictionary ? state->dictionary.size : 0, + params, + ZSTD_CONTENTSIZE_UNKNOWN); + } + } else { + int const level = config_get_level(config); + if (level == CONFIG_NO_LEVEL) + return 1; + if (cdict) { + if (!config->use_dictionary) + return 1; + *cdict = ZSTD_createCDict( + state->dictionary.data, + state->dictionary.size, + level); + if (!*cdict) { + return 1; + } + zret = ZSTD_initCStream_usingCDict(zcs, *cdict); + } else if (config->use_dictionary) { + zret = ZSTD_initCStream_usingDict( + zcs, + state->dictionary.data, + state->dictionary.size, + level); + } else { + zret = ZSTD_initCStream(zcs, level); + } + } + if (ZSTD_isError(zret)) { + return 1; + } + return 0; +} + +static result_t old_streaming_compress_internal( + method_state_t* base, + config_t const* config, + int const advanced, + int const cdict) { + buffer_state_t* state = container_of(base, buffer_state_t, base); + + if (buffer_state_bad(state, config)) + return result_error(result_error_system_error); + + + ZSTD_CStream* zcs = ZSTD_createCStream(); + ZSTD_CDict* cd = NULL; + result_t result; + if (zcs == NULL) { + result = result_error(result_error_compression_error); + goto out; + } + if (!advanced && config_get_level(config) == CONFIG_NO_LEVEL) { + result = result_error(result_error_skip); + goto out; + } + if (cdict && !config->use_dictionary) { + result = result_error(result_error_skip); + goto out; + } + if (init_cstream(state, zcs, config, advanced, cdict ? &cd : NULL)) { + result = result_error(result_error_compression_error); + goto out; + } + + result_data_t data = {.total_size = 0}; + for (size_t i = 0; i < state->inputs.size; ++i) { + data_buffer_t input = state->inputs.buffers[i]; + size_t zret = ZSTD_resetCStream( + zcs, + config->no_pledged_src_size ? ZSTD_CONTENTSIZE_UNKNOWN : input.size); + if (ZSTD_isError(zret)) { + result = result_error(result_error_compression_error); + goto out; + } + + while (input.size > 0) { + ZSTD_inBuffer in = {input.data, MIN(input.size, 4096)}; + input.data += in.size; + input.size -= in.size; + ZSTD_EndDirective const op = + input.size > 0 ? ZSTD_e_continue : ZSTD_e_end; + zret = 0; + while (in.pos < in.size || (op == ZSTD_e_end && zret != 0)) { + ZSTD_outBuffer out = {state->compressed.data, + MIN(state->compressed.capacity, 1024)}; + if (op == ZSTD_e_continue || in.pos < in.size) + zret = ZSTD_compressStream(zcs, &out, &in); + else + zret = ZSTD_endStream(zcs, &out); + if (ZSTD_isError(zret)) { + result = result_error(result_error_compression_error); + goto out; + } + data.total_size += out.pos; + } + } + } + + result = result_data(data); +out: + ZSTD_freeCStream(zcs); + ZSTD_freeCDict(cd); + return result; +} + +static result_t old_streaming_compress( + method_state_t* base, + config_t const* config) +{ + return old_streaming_compress_internal( + base, config, /* advanced */ 0, /* cdict */ 0); +} + +static result_t old_streaming_compress_advanced( + method_state_t* base, + config_t const* config) +{ + return old_streaming_compress_internal( + base, config, /* advanced */ 1, /* cdict */ 0); +} + +static result_t old_streaming_compress_cdict( + method_state_t* base, + config_t const* config) +{ + return old_streaming_compress_internal( + base, config, /* advanced */ 0, /* cdict */ 1); +} + +static result_t old_streaming_compress_cdict_advanced( + method_state_t* base, + config_t const* config) +{ + return old_streaming_compress_internal( + base, config, /* advanced */ 1, /* cdict */ 1); +} + +method_t const simple = { + .name = "compress simple", + .create = buffer_state_create, + .compress = simple_compress, + .destroy = buffer_state_destroy, +}; + +method_t const compress_cctx = { + .name = "compress cctx", + .create = buffer_state_create, + .compress = compress_cctx_compress, + .destroy = buffer_state_destroy, +}; + +method_t const advanced_one_pass = { + .name = "advanced one pass", + .create = buffer_state_create, + .compress = advanced_one_pass_compress, + .destroy = buffer_state_destroy, +}; + +method_t const advanced_one_pass_small_out = { + .name = "advanced one pass small out", + .create = buffer_state_create, + .compress = advanced_one_pass_compress, + .destroy = buffer_state_destroy, +}; + +method_t const advanced_streaming = { + .name = "advanced streaming", + .create = buffer_state_create, + .compress = advanced_streaming_compress, + .destroy = buffer_state_destroy, +}; + +method_t const old_streaming = { + .name = "old streaming", + .create = buffer_state_create, + .compress = old_streaming_compress, + .destroy = buffer_state_destroy, +}; + +method_t const old_streaming_advanced = { + .name = "old streaming advanced", + .create = buffer_state_create, + .compress = old_streaming_compress_advanced, + .destroy = buffer_state_destroy, +}; + +method_t const old_streaming_cdict = { + .name = "old streaming cdcit", + .create = buffer_state_create, + .compress = old_streaming_compress_cdict, + .destroy = buffer_state_destroy, +}; + +method_t const old_streaming_advanced_cdict = { + .name = "old streaming advanced cdict", + .create = buffer_state_create, + .compress = old_streaming_compress_cdict_advanced, + .destroy = buffer_state_destroy, +}; + +method_t const cli = { + .name = "zstdcli", + .create = method_state_create, + .compress = cli_compress, + .destroy = method_state_destroy, +}; + +static method_t const* g_methods[] = { + &simple, + &compress_cctx, + &cli, + &advanced_one_pass, + &advanced_one_pass_small_out, + &advanced_streaming, + &old_streaming, + &old_streaming_advanced, + &old_streaming_cdict, + &old_streaming_advanced_cdict, + NULL, +}; + +method_t const* const* methods = g_methods; diff --git a/src/zstd/tests/regression/method.h b/src/zstd/tests/regression/method.h new file mode 100644 index 000000000..6884e5418 --- /dev/null +++ b/src/zstd/tests/regression/method.h @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2016-2020, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef METHOD_H +#define METHOD_H + +#include <stddef.h> + +#include "data.h" +#include "config.h" +#include "result.h" + +/** + * The base class for state that methods keep. + * All derived method state classes must have a member of this type. + */ +typedef struct { + data_t const* data; +} method_state_t; + +/** + * A method that compresses the data using config. + */ +typedef struct { + char const* name; /**< The identifier for this method in the results. */ + /** + * Creates a state that must contain a member variable of method_state_t, + * and returns a pointer to that member variable. + * + * This method can be used to do expensive work that only depends on the + * data, like loading the data file into a buffer. + */ + method_state_t* (*create)(data_t const* data); + /** + * Compresses the data in the state using the given config. + * + * @param state A pointer to the state returned by create(). + * + * @returns The total compressed size on success, or an error code. + */ + result_t (*compress)(method_state_t* state, config_t const* config); + /** + * Frees the state. + */ + void (*destroy)(method_state_t* state); +} method_t; + +/** + * Set the zstd cli path. Must be called before any methods are used. + */ +void method_set_zstdcli(char const* zstdcli); + +/** + * A NULL-terminated list of methods. + */ +extern method_t const* const* methods; + +#endif diff --git a/src/zstd/tests/regression/result.c b/src/zstd/tests/regression/result.c new file mode 100644 index 000000000..2911722cd --- /dev/null +++ b/src/zstd/tests/regression/result.c @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2016-2020, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#include "result.h" + +char const* result_get_error_string(result_t result) { + switch (result_get_error(result)) { + case result_error_ok: + return "okay"; + case result_error_skip: + return "skip"; + case result_error_system_error: + return "system error"; + case result_error_compression_error: + return "compression error"; + case result_error_decompression_error: + return "decompression error"; + case result_error_round_trip_error: + return "round trip error"; + } +} diff --git a/src/zstd/tests/regression/result.h b/src/zstd/tests/regression/result.h new file mode 100644 index 000000000..0085c2adf --- /dev/null +++ b/src/zstd/tests/regression/result.h @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2016-2020, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef RESULT_H +#define RESULT_H + +#include <stddef.h> + +/** + * The error type enum. + */ +typedef enum { + result_error_ok, /**< No error. */ + result_error_skip, /**< This method was skipped. */ + result_error_system_error, /**< Some internal error happened. */ + result_error_compression_error, /**< Compression failed. */ + result_error_decompression_error, /**< Decompression failed. */ + result_error_round_trip_error, /**< Data failed to round trip. */ +} result_error_t; + +/** + * The success type. + */ +typedef struct { + size_t total_size; /**< The total compressed size. */ +} result_data_t; + +/** + * The result type. + * Do not access the member variables directory, use the helper functions. + */ +typedef struct { + result_error_t internal_error; + result_data_t internal_data; +} result_t; + +/** + * Create a result of the error type. + */ +static result_t result_error(result_error_t error); +/** + * Create a result of the success type. + */ +static result_t result_data(result_data_t data); + +/** + * Check if the result is an error or skip. + */ +static int result_is_error(result_t result); +/** + * Check if the result error is skip. + */ +static int result_is_skip(result_t result); +/** + * Get the result error or okay. + */ +static result_error_t result_get_error(result_t result); +/** + * Get the result data. The result MUST be checked with result_is_error() first. + */ +static result_data_t result_get_data(result_t result); + +static result_t result_error(result_error_t error) { + result_t result = { + .internal_error = error, + }; + return result; +} + +static result_t result_data(result_data_t data) { + result_t result = { + .internal_error = result_error_ok, + .internal_data = data, + }; + return result; +} + +static int result_is_error(result_t result) { + return result_get_error(result) != result_error_ok; +} + +static int result_is_skip(result_t result) { + return result_get_error(result) == result_error_skip; +} + +static result_error_t result_get_error(result_t result) { + return result.internal_error; +} + +char const* result_get_error_string(result_t result); + +static result_data_t result_get_data(result_t result) { + return result.internal_data; +} + +#endif diff --git a/src/zstd/tests/regression/results.csv b/src/zstd/tests/regression/results.csv new file mode 100644 index 000000000..4db42a488 --- /dev/null +++ b/src/zstd/tests/regression/results.csv @@ -0,0 +1,636 @@ +Data, Config, Method, Total compressed size +silesia.tar, level -5, compress simple, 6738558 +silesia.tar, level -3, compress simple, 6446362 +silesia.tar, level -1, compress simple, 6186038 +silesia.tar, level 0, compress simple, 4861374 +silesia.tar, level 1, compress simple, 5334825 +silesia.tar, level 3, compress simple, 4861374 +silesia.tar, level 4, compress simple, 4799583 +silesia.tar, level 5, compress simple, 4722271 +silesia.tar, level 6, compress simple, 4672231 +silesia.tar, level 7, compress simple, 4606657 +silesia.tar, level 9, compress simple, 4554099 +silesia.tar, level 13, compress simple, 4491706 +silesia.tar, level 16, compress simple, 4381265 +silesia.tar, level 19, compress simple, 4281551 +silesia.tar, uncompressed literals, compress simple, 4861374 +silesia.tar, uncompressed literals optimal, compress simple, 4281551 +silesia.tar, huffman literals, compress simple, 6186038 +silesia, level -5, compress cctx, 6737567 +silesia, level -3, compress cctx, 6444663 +silesia, level -1, compress cctx, 6178442 +silesia, level 0, compress cctx, 4849491 +silesia, level 1, compress cctx, 5313144 +silesia, level 3, compress cctx, 4849491 +silesia, level 4, compress cctx, 4786913 +silesia, level 5, compress cctx, 4710178 +silesia, level 6, compress cctx, 4659996 +silesia, level 7, compress cctx, 4596234 +silesia, level 9, compress cctx, 4543862 +silesia, level 13, compress cctx, 4482073 +silesia, level 16, compress cctx, 4377389 +silesia, level 19, compress cctx, 4293262 +silesia, long distance mode, compress cctx, 4849491 +silesia, multithreaded, compress cctx, 4849491 +silesia, multithreaded long distance mode, compress cctx, 4849491 +silesia, small window log, compress cctx, 7078156 +silesia, small hash log, compress cctx, 6554898 +silesia, small chain log, compress cctx, 4931093 +silesia, explicit params, compress cctx, 4794609 +silesia, uncompressed literals, compress cctx, 4849491 +silesia, uncompressed literals optimal, compress cctx, 4293262 +silesia, huffman literals, compress cctx, 6178442 +silesia, multithreaded with advanced params, compress cctx, 4849491 +github, level -5, compress cctx, 205285 +github, level -5 with dict, compress cctx, 47294 +github, level -3, compress cctx, 190643 +github, level -3 with dict, compress cctx, 48047 +github, level -1, compress cctx, 175568 +github, level -1 with dict, compress cctx, 43527 +github, level 0, compress cctx, 136311 +github, level 0 with dict, compress cctx, 41534 +github, level 1, compress cctx, 142450 +github, level 1 with dict, compress cctx, 42157 +github, level 3, compress cctx, 136311 +github, level 3 with dict, compress cctx, 41534 +github, level 4, compress cctx, 136144 +github, level 4 with dict, compress cctx, 41725 +github, level 5, compress cctx, 135106 +github, level 5 with dict, compress cctx, 38934 +github, level 6, compress cctx, 135108 +github, level 6 with dict, compress cctx, 38628 +github, level 7, compress cctx, 135108 +github, level 7 with dict, compress cctx, 38741 +github, level 9, compress cctx, 135108 +github, level 9 with dict, compress cctx, 39335 +github, level 13, compress cctx, 133717 +github, level 13 with dict, compress cctx, 39923 +github, level 16, compress cctx, 133717 +github, level 16 with dict, compress cctx, 37568 +github, level 19, compress cctx, 133717 +github, level 19 with dict, compress cctx, 37567 +github, long distance mode, compress cctx, 141101 +github, multithreaded, compress cctx, 141101 +github, multithreaded long distance mode, compress cctx, 141101 +github, small window log, compress cctx, 141101 +github, small hash log, compress cctx, 138943 +github, small chain log, compress cctx, 139239 +github, explicit params, compress cctx, 140924 +github, uncompressed literals, compress cctx, 136311 +github, uncompressed literals optimal, compress cctx, 133717 +github, huffman literals, compress cctx, 175568 +github, multithreaded with advanced params, compress cctx, 141101 +silesia, level -5, zstdcli, 6882514 +silesia, level -3, zstdcli, 6568406 +silesia, level -1, zstdcli, 6183433 +silesia, level 0, zstdcli, 4849539 +silesia, level 1, zstdcli, 5314157 +silesia, level 3, zstdcli, 4849539 +silesia, level 4, zstdcli, 4786961 +silesia, level 5, zstdcli, 4710226 +silesia, level 6, zstdcli, 4660044 +silesia, level 7, zstdcli, 4596282 +silesia, level 9, zstdcli, 4543910 +silesia, level 13, zstdcli, 4482121 +silesia, level 16, zstdcli, 4377437 +silesia, level 19, zstdcli, 4293310 +silesia, long distance mode, zstdcli, 4839698 +silesia, multithreaded, zstdcli, 4849539 +silesia, multithreaded long distance mode, zstdcli, 4839698 +silesia, small window log, zstdcli, 7104616 +silesia, small hash log, zstdcli, 6554946 +silesia, small chain log, zstdcli, 4931141 +silesia, explicit params, zstdcli, 4797048 +silesia, uncompressed literals, zstdcli, 5128008 +silesia, uncompressed literals optimal, zstdcli, 4325482 +silesia, huffman literals, zstdcli, 5331158 +silesia, multithreaded with advanced params, zstdcli, 5128008 +silesia.tar, level -5, zstdcli, 6738906 +silesia.tar, level -3, zstdcli, 6448409 +silesia.tar, level -1, zstdcli, 6186908 +silesia.tar, level 0, zstdcli, 4861462 +silesia.tar, level 1, zstdcli, 5336255 +silesia.tar, level 3, zstdcli, 4861462 +silesia.tar, level 4, zstdcli, 4800482 +silesia.tar, level 5, zstdcli, 4723312 +silesia.tar, level 6, zstdcli, 4673616 +silesia.tar, level 7, zstdcli, 4608346 +silesia.tar, level 9, zstdcli, 4554702 +silesia.tar, level 13, zstdcli, 4491710 +silesia.tar, level 16, zstdcli, 4381269 +silesia.tar, level 19, zstdcli, 4281555 +silesia.tar, no source size, zstdcli, 4861458 +silesia.tar, long distance mode, zstdcli, 4853140 +silesia.tar, multithreaded, zstdcli, 4861462 +silesia.tar, multithreaded long distance mode, zstdcli, 4853140 +silesia.tar, small window log, zstdcli, 7095284 +silesia.tar, small hash log, zstdcli, 6587841 +silesia.tar, small chain log, zstdcli, 4943269 +silesia.tar, explicit params, zstdcli, 4822318 +silesia.tar, uncompressed literals, zstdcli, 5129548 +silesia.tar, uncompressed literals optimal, zstdcli, 4320914 +silesia.tar, huffman literals, zstdcli, 5347560 +silesia.tar, multithreaded with advanced params, zstdcli, 5129548 +github, level -5, zstdcli, 207285 +github, level -5 with dict, zstdcli, 48718 +github, level -3, zstdcli, 192643 +github, level -3 with dict, zstdcli, 47395 +github, level -1, zstdcli, 177568 +github, level -1 with dict, zstdcli, 45170 +github, level 0, zstdcli, 138311 +github, level 0 with dict, zstdcli, 43148 +github, level 1, zstdcli, 144450 +github, level 1 with dict, zstdcli, 43682 +github, level 3, zstdcli, 138311 +github, level 3 with dict, zstdcli, 43148 +github, level 4, zstdcli, 138144 +github, level 4 with dict, zstdcli, 43251 +github, level 5, zstdcli, 137106 +github, level 5 with dict, zstdcli, 40938 +github, level 6, zstdcli, 137108 +github, level 6 with dict, zstdcli, 40632 +github, level 7, zstdcli, 137108 +github, level 7 with dict, zstdcli, 40766 +github, level 9, zstdcli, 137108 +github, level 9 with dict, zstdcli, 41326 +github, level 13, zstdcli, 135717 +github, level 13 with dict, zstdcli, 41716 +github, level 16, zstdcli, 135717 +github, level 16 with dict, zstdcli, 39577 +github, level 19, zstdcli, 135717 +github, level 19 with dict, zstdcli, 39576 +github, long distance mode, zstdcli, 138311 +github, multithreaded, zstdcli, 138311 +github, multithreaded long distance mode, zstdcli, 138311 +github, small window log, zstdcli, 138311 +github, small hash log, zstdcli, 137467 +github, small chain log, zstdcli, 138314 +github, explicit params, zstdcli, 136140 +github, uncompressed literals, zstdcli, 167915 +github, uncompressed literals optimal, zstdcli, 158824 +github, huffman literals, zstdcli, 144450 +github, multithreaded with advanced params, zstdcli, 167915 +silesia, level -5, advanced one pass, 6737567 +silesia, level -3, advanced one pass, 6444663 +silesia, level -1, advanced one pass, 6178442 +silesia, level 0, advanced one pass, 4849491 +silesia, level 1, advanced one pass, 5313144 +silesia, level 3, advanced one pass, 4849491 +silesia, level 4, advanced one pass, 4786913 +silesia, level 5, advanced one pass, 4710178 +silesia, level 6, advanced one pass, 4659996 +silesia, level 7, advanced one pass, 4596234 +silesia, level 9, advanced one pass, 4543862 +silesia, level 13, advanced one pass, 4482073 +silesia, level 16, advanced one pass, 4377389 +silesia, level 19, advanced one pass, 4293262 +silesia, no source size, advanced one pass, 4849491 +silesia, long distance mode, advanced one pass, 4839650 +silesia, multithreaded, advanced one pass, 4849491 +silesia, multithreaded long distance mode, advanced one pass, 4839650 +silesia, small window log, advanced one pass, 7089646 +silesia, small hash log, advanced one pass, 6554898 +silesia, small chain log, advanced one pass, 4931093 +silesia, explicit params, advanced one pass, 4797035 +silesia, uncompressed literals, advanced one pass, 5127960 +silesia, uncompressed literals optimal, advanced one pass, 4325434 +silesia, huffman literals, advanced one pass, 5326210 +silesia, multithreaded with advanced params, advanced one pass, 5127960 +silesia.tar, level -5, advanced one pass, 6738558 +silesia.tar, level -3, advanced one pass, 6446362 +silesia.tar, level -1, advanced one pass, 6186038 +silesia.tar, level 0, advanced one pass, 4861374 +silesia.tar, level 1, advanced one pass, 5334825 +silesia.tar, level 3, advanced one pass, 4861374 +silesia.tar, level 4, advanced one pass, 4799583 +silesia.tar, level 5, advanced one pass, 4722271 +silesia.tar, level 6, advanced one pass, 4672231 +silesia.tar, level 7, advanced one pass, 4606657 +silesia.tar, level 9, advanced one pass, 4554099 +silesia.tar, level 13, advanced one pass, 4491706 +silesia.tar, level 16, advanced one pass, 4381265 +silesia.tar, level 19, advanced one pass, 4281551 +silesia.tar, no source size, advanced one pass, 4861374 +silesia.tar, long distance mode, advanced one pass, 4848046 +silesia.tar, multithreaded, advanced one pass, 4860726 +silesia.tar, multithreaded long distance mode, advanced one pass, 4847343 +silesia.tar, small window log, advanced one pass, 7095237 +silesia.tar, small hash log, advanced one pass, 6587833 +silesia.tar, small chain log, advanced one pass, 4943266 +silesia.tar, explicit params, advanced one pass, 4808543 +silesia.tar, uncompressed literals, advanced one pass, 5129447 +silesia.tar, uncompressed literals optimal, advanced one pass, 4320910 +silesia.tar, huffman literals, advanced one pass, 5347283 +silesia.tar, multithreaded with advanced params, advanced one pass, 5129766 +github, level -5, advanced one pass, 205285 +github, level -5 with dict, advanced one pass, 46718 +github, level -3, advanced one pass, 190643 +github, level -3 with dict, advanced one pass, 45395 +github, level -1, advanced one pass, 175568 +github, level -1 with dict, advanced one pass, 43170 +github, level 0, advanced one pass, 136311 +github, level 0 with dict, advanced one pass, 41148 +github, level 1, advanced one pass, 142450 +github, level 1 with dict, advanced one pass, 41682 +github, level 3, advanced one pass, 136311 +github, level 3 with dict, advanced one pass, 41148 +github, level 4, advanced one pass, 136144 +github, level 4 with dict, advanced one pass, 41251 +github, level 5, advanced one pass, 135106 +github, level 5 with dict, advanced one pass, 38938 +github, level 6, advanced one pass, 135108 +github, level 6 with dict, advanced one pass, 38632 +github, level 7, advanced one pass, 135108 +github, level 7 with dict, advanced one pass, 38766 +github, level 9, advanced one pass, 135108 +github, level 9 with dict, advanced one pass, 39326 +github, level 13, advanced one pass, 133717 +github, level 13 with dict, advanced one pass, 39716 +github, level 16, advanced one pass, 133717 +github, level 16 with dict, advanced one pass, 37577 +github, level 19, advanced one pass, 133717 +github, level 19 with dict, advanced one pass, 37576 +github, no source size, advanced one pass, 136311 +github, long distance mode, advanced one pass, 136311 +github, multithreaded, advanced one pass, 136311 +github, multithreaded long distance mode, advanced one pass, 136311 +github, small window log, advanced one pass, 136311 +github, small hash log, advanced one pass, 135467 +github, small chain log, advanced one pass, 136314 +github, explicit params, advanced one pass, 137670 +github, uncompressed literals, advanced one pass, 165915 +github, uncompressed literals optimal, advanced one pass, 156824 +github, huffman literals, advanced one pass, 142450 +github, multithreaded with advanced params, advanced one pass, 165915 +silesia, level -5, advanced one pass small out, 6737567 +silesia, level -3, advanced one pass small out, 6444663 +silesia, level -1, advanced one pass small out, 6178442 +silesia, level 0, advanced one pass small out, 4849491 +silesia, level 1, advanced one pass small out, 5313144 +silesia, level 3, advanced one pass small out, 4849491 +silesia, level 4, advanced one pass small out, 4786913 +silesia, level 5, advanced one pass small out, 4710178 +silesia, level 6, advanced one pass small out, 4659996 +silesia, level 7, advanced one pass small out, 4596234 +silesia, level 9, advanced one pass small out, 4543862 +silesia, level 13, advanced one pass small out, 4482073 +silesia, level 16, advanced one pass small out, 4377389 +silesia, level 19, advanced one pass small out, 4293262 +silesia, no source size, advanced one pass small out, 4849491 +silesia, long distance mode, advanced one pass small out, 4839650 +silesia, multithreaded, advanced one pass small out, 4849491 +silesia, multithreaded long distance mode, advanced one pass small out, 4839650 +silesia, small window log, advanced one pass small out, 7089646 +silesia, small hash log, advanced one pass small out, 6554898 +silesia, small chain log, advanced one pass small out, 4931093 +silesia, explicit params, advanced one pass small out, 4797035 +silesia, uncompressed literals, advanced one pass small out, 5127960 +silesia, uncompressed literals optimal, advanced one pass small out, 4325434 +silesia, huffman literals, advanced one pass small out, 5326210 +silesia, multithreaded with advanced params, advanced one pass small out, 5127960 +silesia.tar, level -5, advanced one pass small out, 6738558 +silesia.tar, level -3, advanced one pass small out, 6446362 +silesia.tar, level -1, advanced one pass small out, 6186038 +silesia.tar, level 0, advanced one pass small out, 4861374 +silesia.tar, level 1, advanced one pass small out, 5334825 +silesia.tar, level 3, advanced one pass small out, 4861374 +silesia.tar, level 4, advanced one pass small out, 4799583 +silesia.tar, level 5, advanced one pass small out, 4722271 +silesia.tar, level 6, advanced one pass small out, 4672231 +silesia.tar, level 7, advanced one pass small out, 4606657 +silesia.tar, level 9, advanced one pass small out, 4554099 +silesia.tar, level 13, advanced one pass small out, 4491706 +silesia.tar, level 16, advanced one pass small out, 4381265 +silesia.tar, level 19, advanced one pass small out, 4281551 +silesia.tar, no source size, advanced one pass small out, 4861374 +silesia.tar, long distance mode, advanced one pass small out, 4848046 +silesia.tar, multithreaded, advanced one pass small out, 4860726 +silesia.tar, multithreaded long distance mode, advanced one pass small out, 4847343 +silesia.tar, small window log, advanced one pass small out, 7095237 +silesia.tar, small hash log, advanced one pass small out, 6587833 +silesia.tar, small chain log, advanced one pass small out, 4943266 +silesia.tar, explicit params, advanced one pass small out, 4808543 +silesia.tar, uncompressed literals, advanced one pass small out, 5129447 +silesia.tar, uncompressed literals optimal, advanced one pass small out, 4320910 +silesia.tar, huffman literals, advanced one pass small out, 5347283 +silesia.tar, multithreaded with advanced params, advanced one pass small out, 5129766 +github, level -5, advanced one pass small out, 205285 +github, level -5 with dict, advanced one pass small out, 46718 +github, level -3, advanced one pass small out, 190643 +github, level -3 with dict, advanced one pass small out, 45395 +github, level -1, advanced one pass small out, 175568 +github, level -1 with dict, advanced one pass small out, 43170 +github, level 0, advanced one pass small out, 136311 +github, level 0 with dict, advanced one pass small out, 41148 +github, level 1, advanced one pass small out, 142450 +github, level 1 with dict, advanced one pass small out, 41682 +github, level 3, advanced one pass small out, 136311 +github, level 3 with dict, advanced one pass small out, 41148 +github, level 4, advanced one pass small out, 136144 +github, level 4 with dict, advanced one pass small out, 41251 +github, level 5, advanced one pass small out, 135106 +github, level 5 with dict, advanced one pass small out, 38938 +github, level 6, advanced one pass small out, 135108 +github, level 6 with dict, advanced one pass small out, 38632 +github, level 7, advanced one pass small out, 135108 +github, level 7 with dict, advanced one pass small out, 38766 +github, level 9, advanced one pass small out, 135108 +github, level 9 with dict, advanced one pass small out, 39326 +github, level 13, advanced one pass small out, 133717 +github, level 13 with dict, advanced one pass small out, 39716 +github, level 16, advanced one pass small out, 133717 +github, level 16 with dict, advanced one pass small out, 37577 +github, level 19, advanced one pass small out, 133717 +github, level 19 with dict, advanced one pass small out, 37576 +github, no source size, advanced one pass small out, 136311 +github, long distance mode, advanced one pass small out, 136311 +github, multithreaded, advanced one pass small out, 136311 +github, multithreaded long distance mode, advanced one pass small out, 136311 +github, small window log, advanced one pass small out, 136311 +github, small hash log, advanced one pass small out, 135467 +github, small chain log, advanced one pass small out, 136314 +github, explicit params, advanced one pass small out, 137670 +github, uncompressed literals, advanced one pass small out, 165915 +github, uncompressed literals optimal, advanced one pass small out, 156824 +github, huffman literals, advanced one pass small out, 142450 +github, multithreaded with advanced params, advanced one pass small out, 165915 +silesia, level -5, advanced streaming, 6882466 +silesia, level -3, advanced streaming, 6568358 +silesia, level -1, advanced streaming, 6183385 +silesia, level 0, advanced streaming, 4849491 +silesia, level 1, advanced streaming, 5314109 +silesia, level 3, advanced streaming, 4849491 +silesia, level 4, advanced streaming, 4786913 +silesia, level 5, advanced streaming, 4710178 +silesia, level 6, advanced streaming, 4659996 +silesia, level 7, advanced streaming, 4596234 +silesia, level 9, advanced streaming, 4543862 +silesia, level 13, advanced streaming, 4482073 +silesia, level 16, advanced streaming, 4377389 +silesia, level 19, advanced streaming, 4293262 +silesia, no source size, advanced streaming, 4849455 +silesia, long distance mode, advanced streaming, 4839650 +silesia, multithreaded, advanced streaming, 4849491 +silesia, multithreaded long distance mode, advanced streaming, 4839650 +silesia, small window log, advanced streaming, 7105714 +silesia, small hash log, advanced streaming, 6554898 +silesia, small chain log, advanced streaming, 4931093 +silesia, explicit params, advanced streaming, 4797048 +silesia, uncompressed literals, advanced streaming, 5127960 +silesia, uncompressed literals optimal, advanced streaming, 4325434 +silesia, huffman literals, advanced streaming, 5331110 +silesia, multithreaded with advanced params, advanced streaming, 5127960 +silesia.tar, level -5, advanced streaming, 6982738 +silesia.tar, level -3, advanced streaming, 6641264 +silesia.tar, level -1, advanced streaming, 6190789 +silesia.tar, level 0, advanced streaming, 4861376 +silesia.tar, level 1, advanced streaming, 5336879 +silesia.tar, level 3, advanced streaming, 4861376 +silesia.tar, level 4, advanced streaming, 4799583 +silesia.tar, level 5, advanced streaming, 4722276 +silesia.tar, level 6, advanced streaming, 4672240 +silesia.tar, level 7, advanced streaming, 4606657 +silesia.tar, level 9, advanced streaming, 4554106 +silesia.tar, level 13, advanced streaming, 4491707 +silesia.tar, level 16, advanced streaming, 4381284 +silesia.tar, level 19, advanced streaming, 4281511 +silesia.tar, no source size, advanced streaming, 4861372 +silesia.tar, long distance mode, advanced streaming, 4848046 +silesia.tar, multithreaded, advanced streaming, 4861458 +silesia.tar, multithreaded long distance mode, advanced streaming, 4853136 +silesia.tar, small window log, advanced streaming, 7112148 +silesia.tar, small hash log, advanced streaming, 6587834 +silesia.tar, small chain log, advanced streaming, 4943271 +silesia.tar, explicit params, advanced streaming, 4808570 +silesia.tar, uncompressed literals, advanced streaming, 5129450 +silesia.tar, uncompressed literals optimal, advanced streaming, 4320841 +silesia.tar, huffman literals, advanced streaming, 5352306 +silesia.tar, multithreaded with advanced params, advanced streaming, 5129544 +github, level -5, advanced streaming, 205285 +github, level -5 with dict, advanced streaming, 46718 +github, level -3, advanced streaming, 190643 +github, level -3 with dict, advanced streaming, 45395 +github, level -1, advanced streaming, 175568 +github, level -1 with dict, advanced streaming, 43170 +github, level 0, advanced streaming, 136311 +github, level 0 with dict, advanced streaming, 41148 +github, level 1, advanced streaming, 142450 +github, level 1 with dict, advanced streaming, 41682 +github, level 3, advanced streaming, 136311 +github, level 3 with dict, advanced streaming, 41148 +github, level 4, advanced streaming, 136144 +github, level 4 with dict, advanced streaming, 41251 +github, level 5, advanced streaming, 135106 +github, level 5 with dict, advanced streaming, 38938 +github, level 6, advanced streaming, 135108 +github, level 6 with dict, advanced streaming, 38632 +github, level 7, advanced streaming, 135108 +github, level 7 with dict, advanced streaming, 38766 +github, level 9, advanced streaming, 135108 +github, level 9 with dict, advanced streaming, 39326 +github, level 13, advanced streaming, 133717 +github, level 13 with dict, advanced streaming, 39716 +github, level 16, advanced streaming, 133717 +github, level 16 with dict, advanced streaming, 37577 +github, level 19, advanced streaming, 133717 +github, level 19 with dict, advanced streaming, 37576 +github, no source size, advanced streaming, 136311 +github, long distance mode, advanced streaming, 136311 +github, multithreaded, advanced streaming, 136311 +github, multithreaded long distance mode, advanced streaming, 136311 +github, small window log, advanced streaming, 136311 +github, small hash log, advanced streaming, 135467 +github, small chain log, advanced streaming, 136314 +github, explicit params, advanced streaming, 137670 +github, uncompressed literals, advanced streaming, 165915 +github, uncompressed literals optimal, advanced streaming, 156824 +github, huffman literals, advanced streaming, 142450 +github, multithreaded with advanced params, advanced streaming, 165915 +silesia, level -5, old streaming, 6882466 +silesia, level -3, old streaming, 6568358 +silesia, level -1, old streaming, 6183385 +silesia, level 0, old streaming, 4849491 +silesia, level 1, old streaming, 5314109 +silesia, level 3, old streaming, 4849491 +silesia, level 4, old streaming, 4786913 +silesia, level 5, old streaming, 4710178 +silesia, level 6, old streaming, 4659996 +silesia, level 7, old streaming, 4596234 +silesia, level 9, old streaming, 4543862 +silesia, level 13, old streaming, 4482073 +silesia, level 16, old streaming, 4377389 +silesia, level 19, old streaming, 4293262 +silesia, no source size, old streaming, 4849455 +silesia, uncompressed literals, old streaming, 4849491 +silesia, uncompressed literals optimal, old streaming, 4293262 +silesia, huffman literals, old streaming, 6183385 +silesia.tar, level -5, old streaming, 6982738 +silesia.tar, level -3, old streaming, 6641264 +silesia.tar, level -1, old streaming, 6190789 +silesia.tar, level 0, old streaming, 4861376 +silesia.tar, level 1, old streaming, 5336879 +silesia.tar, level 3, old streaming, 4861376 +silesia.tar, level 4, old streaming, 4799583 +silesia.tar, level 5, old streaming, 4722276 +silesia.tar, level 6, old streaming, 4672240 +silesia.tar, level 7, old streaming, 4606657 +silesia.tar, level 9, old streaming, 4554106 +silesia.tar, level 13, old streaming, 4491707 +silesia.tar, level 16, old streaming, 4381284 +silesia.tar, level 19, old streaming, 4281511 +silesia.tar, no source size, old streaming, 4861372 +silesia.tar, uncompressed literals, old streaming, 4861376 +silesia.tar, uncompressed literals optimal, old streaming, 4281511 +silesia.tar, huffman literals, old streaming, 6190789 +github, level -5, old streaming, 205285 +github, level -5 with dict, old streaming, 46718 +github, level -3, old streaming, 190643 +github, level -3 with dict, old streaming, 45395 +github, level -1, old streaming, 175568 +github, level -1 with dict, old streaming, 43170 +github, level 0, old streaming, 136311 +github, level 0 with dict, old streaming, 41148 +github, level 1, old streaming, 142450 +github, level 1 with dict, old streaming, 41682 +github, level 3, old streaming, 136311 +github, level 3 with dict, old streaming, 41148 +github, level 4, old streaming, 136144 +github, level 4 with dict, old streaming, 41251 +github, level 5, old streaming, 135106 +github, level 5 with dict, old streaming, 38938 +github, level 6, old streaming, 135108 +github, level 6 with dict, old streaming, 38632 +github, level 7, old streaming, 135108 +github, level 7 with dict, old streaming, 38766 +github, level 9, old streaming, 135108 +github, level 9 with dict, old streaming, 39326 +github, level 13, old streaming, 133717 +github, level 13 with dict, old streaming, 39716 +github, level 16, old streaming, 133717 +github, level 16 with dict, old streaming, 37577 +github, level 19, old streaming, 133717 +github, level 19 with dict, old streaming, 37576 +github, no source size, old streaming, 140631 +github, uncompressed literals, old streaming, 136311 +github, uncompressed literals optimal, old streaming, 133717 +github, huffman literals, old streaming, 175568 +silesia, level -5, old streaming advanced, 6882466 +silesia, level -3, old streaming advanced, 6568358 +silesia, level -1, old streaming advanced, 6183385 +silesia, level 0, old streaming advanced, 4849491 +silesia, level 1, old streaming advanced, 5314109 +silesia, level 3, old streaming advanced, 4849491 +silesia, level 4, old streaming advanced, 4786913 +silesia, level 5, old streaming advanced, 4710178 +silesia, level 6, old streaming advanced, 4659996 +silesia, level 7, old streaming advanced, 4596234 +silesia, level 9, old streaming advanced, 4543862 +silesia, level 13, old streaming advanced, 4482073 +silesia, level 16, old streaming advanced, 4377389 +silesia, level 19, old streaming advanced, 4293262 +silesia, no source size, old streaming advanced, 4849455 +silesia, long distance mode, old streaming advanced, 4849491 +silesia, multithreaded, old streaming advanced, 4849491 +silesia, multithreaded long distance mode, old streaming advanced, 4849491 +silesia, small window log, old streaming advanced, 7105714 +silesia, small hash log, old streaming advanced, 6554898 +silesia, small chain log, old streaming advanced, 4931093 +silesia, explicit params, old streaming advanced, 4797048 +silesia, uncompressed literals, old streaming advanced, 4849491 +silesia, uncompressed literals optimal, old streaming advanced, 4293262 +silesia, huffman literals, old streaming advanced, 6183385 +silesia, multithreaded with advanced params, old streaming advanced, 4849491 +silesia.tar, level -5, old streaming advanced, 6982738 +silesia.tar, level -3, old streaming advanced, 6641264 +silesia.tar, level -1, old streaming advanced, 6190789 +silesia.tar, level 0, old streaming advanced, 4861376 +silesia.tar, level 1, old streaming advanced, 5336879 +silesia.tar, level 3, old streaming advanced, 4861376 +silesia.tar, level 4, old streaming advanced, 4799583 +silesia.tar, level 5, old streaming advanced, 4722276 +silesia.tar, level 6, old streaming advanced, 4672240 +silesia.tar, level 7, old streaming advanced, 4606657 +silesia.tar, level 9, old streaming advanced, 4554106 +silesia.tar, level 13, old streaming advanced, 4491707 +silesia.tar, level 16, old streaming advanced, 4381284 +silesia.tar, level 19, old streaming advanced, 4281511 +silesia.tar, no source size, old streaming advanced, 4861372 +silesia.tar, long distance mode, old streaming advanced, 4861376 +silesia.tar, multithreaded, old streaming advanced, 4861376 +silesia.tar, multithreaded long distance mode, old streaming advanced, 4861376 +silesia.tar, small window log, old streaming advanced, 7112151 +silesia.tar, small hash log, old streaming advanced, 6587834 +silesia.tar, small chain log, old streaming advanced, 4943271 +silesia.tar, explicit params, old streaming advanced, 4808570 +silesia.tar, uncompressed literals, old streaming advanced, 4861376 +silesia.tar, uncompressed literals optimal, old streaming advanced, 4281511 +silesia.tar, huffman literals, old streaming advanced, 6190789 +silesia.tar, multithreaded with advanced params, old streaming advanced, 4861376 +github, level -5, old streaming advanced, 216734 +github, level -5 with dict, old streaming advanced, 49562 +github, level -3, old streaming advanced, 192160 +github, level -3 with dict, old streaming advanced, 44956 +github, level -1, old streaming advanced, 181108 +github, level -1 with dict, old streaming advanced, 42383 +github, level 0, old streaming advanced, 141090 +github, level 0 with dict, old streaming advanced, 41113 +github, level 1, old streaming advanced, 143682 +github, level 1 with dict, old streaming advanced, 42430 +github, level 3, old streaming advanced, 141090 +github, level 3 with dict, old streaming advanced, 41113 +github, level 4, old streaming advanced, 141090 +github, level 4 with dict, old streaming advanced, 41084 +github, level 5, old streaming advanced, 139391 +github, level 5 with dict, old streaming advanced, 39159 +github, level 6, old streaming advanced, 139394 +github, level 6 with dict, old streaming advanced, 38749 +github, level 7, old streaming advanced, 138675 +github, level 7 with dict, old streaming advanced, 38746 +github, level 9, old streaming advanced, 138675 +github, level 9 with dict, old streaming advanced, 38987 +github, level 13, old streaming advanced, 138675 +github, level 13 with dict, old streaming advanced, 39724 +github, level 16, old streaming advanced, 138675 +github, level 16 with dict, old streaming advanced, 40771 +github, level 19, old streaming advanced, 133717 +github, level 19 with dict, old streaming advanced, 37576 +github, no source size, old streaming advanced, 140631 +github, long distance mode, old streaming advanced, 141090 +github, multithreaded, old streaming advanced, 141090 +github, multithreaded long distance mode, old streaming advanced, 141090 +github, small window log, old streaming advanced, 141090 +github, small hash log, old streaming advanced, 141578 +github, small chain log, old streaming advanced, 139258 +github, explicit params, old streaming advanced, 140930 +github, uncompressed literals, old streaming advanced, 141090 +github, uncompressed literals optimal, old streaming advanced, 133717 +github, huffman literals, old streaming advanced, 181108 +github, multithreaded with advanced params, old streaming advanced, 141090 +github, level -5 with dict, old streaming cdcit, 46718 +github, level -3 with dict, old streaming cdcit, 45395 +github, level -1 with dict, old streaming cdcit, 43170 +github, level 0 with dict, old streaming cdcit, 41148 +github, level 1 with dict, old streaming cdcit, 41682 +github, level 3 with dict, old streaming cdcit, 41148 +github, level 4 with dict, old streaming cdcit, 41251 +github, level 5 with dict, old streaming cdcit, 38938 +github, level 6 with dict, old streaming cdcit, 38632 +github, level 7 with dict, old streaming cdcit, 38766 +github, level 9 with dict, old streaming cdcit, 39326 +github, level 13 with dict, old streaming cdcit, 39716 +github, level 16 with dict, old streaming cdcit, 37577 +github, level 19 with dict, old streaming cdcit, 37576 +github, level -5 with dict, old streaming advanced cdict, 49562 +github, level -3 with dict, old streaming advanced cdict, 44956 +github, level -1 with dict, old streaming advanced cdict, 42383 +github, level 0 with dict, old streaming advanced cdict, 41113 +github, level 1 with dict, old streaming advanced cdict, 42430 +github, level 3 with dict, old streaming advanced cdict, 41113 +github, level 4 with dict, old streaming advanced cdict, 41084 +github, level 5 with dict, old streaming advanced cdict, 39158 +github, level 6 with dict, old streaming advanced cdict, 38748 +github, level 7 with dict, old streaming advanced cdict, 38744 +github, level 9 with dict, old streaming advanced cdict, 38986 +github, level 13 with dict, old streaming advanced cdict, 39724 +github, level 16 with dict, old streaming advanced cdict, 40771 +github, level 19 with dict, old streaming advanced cdict, 37576 diff --git a/src/zstd/tests/regression/test.c b/src/zstd/tests/regression/test.c new file mode 100644 index 000000000..ff2cdba30 --- /dev/null +++ b/src/zstd/tests/regression/test.c @@ -0,0 +1,362 @@ +/* + * Copyright (c) 2016-2020, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#include <assert.h> +#include <getopt.h> +#include <stdio.h> +#include <string.h> + +#include "config.h" +#include "data.h" +#include "method.h" + +static int g_max_name_len = 0; + +/** Check if a name contains a comma or is too long. */ +static int is_name_bad(char const* name) { + if (name == NULL) + return 1; + int const len = strlen(name); + if (len > g_max_name_len) + g_max_name_len = len; + for (; *name != '\0'; ++name) + if (*name == ',') + return 1; + return 0; +} + +/** Check if any of the names contain a comma. */ +static int are_names_bad() { + for (size_t method = 0; methods[method] != NULL; ++method) + if (is_name_bad(methods[method]->name)) { + fprintf(stderr, "method name %s is bad\n", methods[method]->name); + return 1; + } + for (size_t datum = 0; data[datum] != NULL; ++datum) + if (is_name_bad(data[datum]->name)) { + fprintf(stderr, "data name %s is bad\n", data[datum]->name); + return 1; + } + for (size_t config = 0; configs[config] != NULL; ++config) + if (is_name_bad(configs[config]->name)) { + fprintf(stderr, "config name %s is bad\n", configs[config]->name); + return 1; + } + return 0; +} + +/** + * Option parsing using getopt. + * When you add a new option update: long_options, long_extras, and + * short_options. + */ + +/** Option variables filled by parse_args. */ +static char const* g_output = NULL; +static char const* g_diff = NULL; +static char const* g_cache = NULL; +static char const* g_zstdcli = NULL; +static char const* g_config = NULL; +static char const* g_data = NULL; +static char const* g_method = NULL; + +typedef enum { + required_option, + optional_option, + help_option, +} option_type; + +/** + * Extra state that we need to keep per-option that we can't store in getopt. + */ +struct option_extra { + int id; /**< The short option name, used as an id. */ + char const* help; /**< The help message. */ + option_type opt_type; /**< The option type: required, optional, or help. */ + char const** value; /**< The value to set or NULL if no_argument. */ +}; + +/** The options. */ +static struct option long_options[] = { + {"cache", required_argument, NULL, 'c'}, + {"output", required_argument, NULL, 'o'}, + {"zstd", required_argument, NULL, 'z'}, + {"config", required_argument, NULL, 128}, + {"data", required_argument, NULL, 129}, + {"method", required_argument, NULL, 130}, + {"diff", required_argument, NULL, 'd'}, + {"help", no_argument, NULL, 'h'}, +}; + +static size_t const nargs = sizeof(long_options) / sizeof(long_options[0]); + +/** The extra info for the options. Must be in the same order as the options. */ +static struct option_extra long_extras[] = { + {'c', "the cache directory", required_option, &g_cache}, + {'o', "write the results here", required_option, &g_output}, + {'z', "zstd cli tool", required_option, &g_zstdcli}, + {128, "use this config", optional_option, &g_config}, + {129, "use this data", optional_option, &g_data}, + {130, "use this method", optional_option, &g_method}, + {'d', "compare the results to this file", optional_option, &g_diff}, + {'h', "display this message", help_option, NULL}, +}; + +/** The short options. Must correspond to the options. */ +static char const short_options[] = "c:d:ho:z:"; + +/** Return the help string for the option type. */ +static char const* required_message(option_type opt_type) { + switch (opt_type) { + case required_option: + return "[required]"; + case optional_option: + return "[optional]"; + case help_option: + return ""; + default: + assert(0); + return NULL; + } +} + +/** Print the help for the program. */ +static void print_help(void) { + fprintf(stderr, "regression test runner\n"); + size_t const nargs = sizeof(long_options) / sizeof(long_options[0]); + for (size_t i = 0; i < nargs; ++i) { + if (long_options[i].val < 128) { + /* Long / short - help [option type] */ + fprintf( + stderr, + "--%s / -%c \t- %s %s\n", + long_options[i].name, + long_options[i].val, + long_extras[i].help, + required_message(long_extras[i].opt_type)); + } else { + /* Short / long - help [option type] */ + fprintf( + stderr, + "--%s \t- %s %s\n", + long_options[i].name, + long_extras[i].help, + required_message(long_extras[i].opt_type)); + } + } +} + +/** Parse the arguments. Return 0 on success. Print help on failure. */ +static int parse_args(int argc, char** argv) { + int option_index = 0; + int c; + + while (1) { + c = getopt_long(argc, argv, short_options, long_options, &option_index); + if (c == -1) + break; + + int found = 0; + for (size_t i = 0; i < nargs; ++i) { + if (c == long_extras[i].id && long_extras[i].value != NULL) { + *long_extras[i].value = optarg; + found = 1; + break; + } + } + if (found) + continue; + + switch (c) { + case 'h': + case '?': + default: + print_help(); + return 1; + } + } + + int bad = 0; + for (size_t i = 0; i < nargs; ++i) { + if (long_extras[i].opt_type != required_option) + continue; + if (long_extras[i].value == NULL) + continue; + if (*long_extras[i].value != NULL) + continue; + fprintf( + stderr, + "--%s is a required argument but is not set\n", + long_options[i].name); + bad = 1; + } + if (bad) { + fprintf(stderr, "\n"); + print_help(); + return 1; + } + + return 0; +} + +/** Helper macro to print to stderr and a file. */ +#define tprintf(file, ...) \ + do { \ + fprintf(file, __VA_ARGS__); \ + fprintf(stderr, __VA_ARGS__); \ + } while (0) +/** Helper macro to flush stderr and a file. */ +#define tflush(file) \ + do { \ + fflush(file); \ + fflush(stderr); \ + } while (0) + +void tprint_names( + FILE* results, + char const* data_name, + char const* config_name, + char const* method_name) { + int const data_padding = g_max_name_len - strlen(data_name); + int const config_padding = g_max_name_len - strlen(config_name); + int const method_padding = g_max_name_len - strlen(method_name); + + tprintf( + results, + "%s, %*s%s, %*s%s, %*s", + data_name, + data_padding, + "", + config_name, + config_padding, + "", + method_name, + method_padding, + ""); +} + +/** + * Run all the regression tests and record the results table to results and + * stderr progressively. + */ +static int run_all(FILE* results) { + tprint_names(results, "Data", "Config", "Method"); + tprintf(results, "Total compressed size\n"); + for (size_t method = 0; methods[method] != NULL; ++method) { + if (g_method != NULL && strcmp(methods[method]->name, g_method)) + continue; + for (size_t datum = 0; data[datum] != NULL; ++datum) { + if (g_data != NULL && strcmp(data[datum]->name, g_data)) + continue; + /* Create the state common to all configs */ + method_state_t* state = methods[method]->create(data[datum]); + for (size_t config = 0; configs[config] != NULL; ++config) { + if (g_config != NULL && strcmp(configs[config]->name, g_config)) + continue; + if (config_skip_data(configs[config], data[datum])) + continue; + /* Print the result for the (method, data, config) tuple. */ + result_t const result = + methods[method]->compress(state, configs[config]); + if (result_is_skip(result)) + continue; + tprint_names( + results, + data[datum]->name, + configs[config]->name, + methods[method]->name); + if (result_is_error(result)) { + tprintf(results, "%s\n", result_get_error_string(result)); + } else { + tprintf( + results, + "%llu\n", + (unsigned long long)result_get_data(result).total_size); + } + tflush(results); + } + methods[method]->destroy(state); + } + } + return 0; +} + +/** memcmp() the old results file and the new results file. */ +static int diff_results(char const* actual_file, char const* expected_file) { + data_buffer_t const actual = data_buffer_read(actual_file); + data_buffer_t const expected = data_buffer_read(expected_file); + int ret = 1; + + if (actual.data == NULL) { + fprintf(stderr, "failed to open results '%s' for diff\n", actual_file); + goto out; + } + if (expected.data == NULL) { + fprintf( + stderr, + "failed to open previous results '%s' for diff\n", + expected_file); + goto out; + } + + ret = data_buffer_compare(actual, expected); + if (ret != 0) { + fprintf( + stderr, + "actual results '%s' does not match expected results '%s'\n", + actual_file, + expected_file); + } else { + fprintf(stderr, "actual results match expected results\n"); + } +out: + data_buffer_free(actual); + data_buffer_free(expected); + return ret; +} + +int main(int argc, char** argv) { + /* Parse args and validate modules. */ + int ret = parse_args(argc, argv); + if (ret != 0) + return ret; + + if (are_names_bad()) + return 1; + + /* Initialize modules. */ + method_set_zstdcli(g_zstdcli); + ret = data_init(g_cache); + if (ret != 0) { + fprintf(stderr, "data_init() failed with error=%s\n", strerror(ret)); + return 1; + } + + /* Run the regression tests. */ + ret = 1; + FILE* results = fopen(g_output, "w"); + if (results == NULL) { + fprintf(stderr, "Failed to open the output file\n"); + goto out; + } + ret = run_all(results); + fclose(results); + + if (ret != 0) + goto out; + + if (g_diff) + /* Diff the new results with the previous results. */ + ret = diff_results(g_output, g_diff); + +out: + data_finish(); + return ret; +} |