diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-27 18:24:20 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-27 18:24:20 +0000 |
commit | 483eb2f56657e8e7f419ab1a4fab8dce9ade8609 (patch) | |
tree | e5d88d25d870d5dedacb6bbdbe2a966086a0a5cf /src/zstd/programs | |
parent | Initial commit. (diff) | |
download | ceph-483eb2f56657e8e7f419ab1a4fab8dce9ade8609.tar.xz ceph-483eb2f56657e8e7f419ab1a4fab8dce9ade8609.zip |
Adding upstream version 14.2.21.upstream/14.2.21upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
24 files changed, 6868 insertions, 0 deletions
diff --git a/src/zstd/programs/.gitignore b/src/zstd/programs/.gitignore new file mode 100644 index 00000000..701830c7 --- /dev/null +++ b/src/zstd/programs/.gitignore @@ -0,0 +1,36 @@ +# local binary (Makefile) +zstd +zstd32 +zstd4 +zstd-compress +zstd-decompress +zstd-frugal +zstd-small +zstd-nolegacy + +# Object files +*.o +*.ko +default.profraw +have_zlib + +# Executables +*.exe +*.out +*.app + +# Default result files +dictionary +grillResults.txt +_* +tmp* +*.zst +result +out + +# fuzzer +afl + +# Misc files +*.bat +dirTest* diff --git a/src/zstd/programs/BUCK b/src/zstd/programs/BUCK new file mode 100644 index 00000000..06940304 --- /dev/null +++ b/src/zstd/programs/BUCK @@ -0,0 +1,63 @@ +cxx_binary( + name='zstd', + headers=glob(['*.h'], excludes=['datagen.h', 'platform.h', 'util.h']), + srcs=glob(['*.c'], excludes=['datagen.c']), + deps=[ + ':datagen', + ':util', + '//lib:zstd', + '//lib:zdict', + '//lib:mem', + '//lib:xxhash', + ], +) + +cxx_binary( + name='zstdmt', + headers=glob(['*.h'], excludes=['datagen.h', 'platform.h', 'util.h']), + srcs=glob(['*.c'], excludes=['datagen.c']), + deps=[ + ':datagen', + ':util', + '//lib:zstd', + '//lib:zdict', + '//lib:mem', + '//lib:xxhash', + ], + preprocessor_flags=['-DZSTD_MULTITHREAD'], + linker_flags=['-lpthread'], +) + +cxx_binary( + name='gzstd', + headers=glob(['*.h'], excludes=['datagen.h', 'platform.h', 'util.h']), + srcs=glob(['*.c'], excludes=['datagen.c']), + deps=[ + ':datagen', + ':util', + '//lib:zstd', + '//lib:zdict', + '//lib:mem', + '//lib:xxhash', + ], + preprocessor_flags=['-DZSTD_GZDECOMPRESS'], + linker_flags=['-lz'], +) + +cxx_library( + name='datagen', + visibility=['PUBLIC'], + header_namespace='', + exported_headers=['datagen.h'], + srcs=['datagen.c'], + deps=['//lib:mem'], +) + + +cxx_library( + name='util', + visibility=['PUBLIC'], + header_namespace='', + exported_headers=['util.h', 'platform.h'], + deps=['//lib:mem'], +) diff --git a/src/zstd/programs/Makefile b/src/zstd/programs/Makefile new file mode 100644 index 00000000..cbb67e9f --- /dev/null +++ b/src/zstd/programs/Makefile @@ -0,0 +1,310 @@ +# ################################################################ +# Copyright (c) 2015-present, Yann Collet, Facebook, Inc. +# All rights reserved. +# +# This source code is licensed under both the BSD-style license (found in the +# LICENSE file in the root directory of this source tree) and the GPLv2 (found +# in the COPYING file in the root directory of this source tree). +# ########################################################################## +# zstd : Command Line Utility, supporting gzip-like arguments +# zstd32 : Same as zstd, but forced to compile in 32-bits mode +# zstd_nolegacy : zstd without support of decompression of legacy versions +# zstd-small : minimal zstd without dictionary builder and benchmark +# zstd-compress : compressor-only version of zstd +# zstd-decompress : decompressor-only version of zstd +# ########################################################################## + +ZSTDDIR = ../lib + +# Version numbers +LIBVER_SRC := $(ZSTDDIR)/zstd.h +LIBVER_MAJOR_SCRIPT:=`sed -n '/define ZSTD_VERSION_MAJOR/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < $(LIBVER_SRC)` +LIBVER_MINOR_SCRIPT:=`sed -n '/define ZSTD_VERSION_MINOR/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < $(LIBVER_SRC)` +LIBVER_PATCH_SCRIPT:=`sed -n '/define ZSTD_VERSION_RELEASE/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < $(LIBVER_SRC)` +LIBVER_SCRIPT:= $(LIBVER_MAJOR_SCRIPT).$(LIBVER_MINOR_SCRIPT).$(LIBVER_PATCH_SCRIPT) +LIBVER_MAJOR := $(shell echo $(LIBVER_MAJOR_SCRIPT)) +LIBVER_MINOR := $(shell echo $(LIBVER_MINOR_SCRIPT)) +LIBVER_PATCH := $(shell echo $(LIBVER_PATCH_SCRIPT)) +LIBVER := $(shell echo $(LIBVER_SCRIPT)) + +ZSTD_VERSION=$(LIBVER) + +ifeq ($(shell $(CC) -v 2>&1 | grep -c "gcc version "), 1) +ALIGN_LOOP = -falign-loops=32 +else +ALIGN_LOOP = +endif + +CPPFLAGS+= -I$(ZSTDDIR) -I$(ZSTDDIR)/common -I$(ZSTDDIR)/compress \ + -I$(ZSTDDIR)/dictBuilder \ + -DZSTD_NEWAPI \ + -DXXH_NAMESPACE=ZSTD_ # because xxhash.o already compiled with this macro from library +CFLAGS ?= -O3 +DEBUGFLAGS+=-Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \ + -Wstrict-aliasing=1 -Wswitch-enum -Wdeclaration-after-statement \ + -Wstrict-prototypes -Wundef -Wpointer-arith -Wformat-security \ + -Wvla -Wformat=2 -Winit-self -Wfloat-equal -Wwrite-strings \ + -Wredundant-decls +CFLAGS += $(DEBUGFLAGS) $(MOREFLAGS) +FLAGS = $(CPPFLAGS) $(CFLAGS) $(LDFLAGS) + + +ZSTDCOMMON_FILES := $(ZSTDDIR)/common/*.c +ZSTDCOMP_FILES := $(ZSTDDIR)/compress/*.c +ZSTDDECOMP_FILES := $(ZSTDDIR)/decompress/*.c +ZSTD_FILES := $(ZSTDDECOMP_FILES) $(ZSTDCOMMON_FILES) $(ZSTDCOMP_FILES) +ZDICT_FILES := $(ZSTDDIR)/dictBuilder/*.c +ZSTDDECOMP_O = $(ZSTDDIR)/decompress/zstd_decompress.o + +ZSTD_LEGACY_SUPPORT ?= 4 +ZSTDLEGACY_FILES := +ifneq ($(ZSTD_LEGACY_SUPPORT), 0) +ifeq ($(shell test $(ZSTD_LEGACY_SUPPORT) -lt 8; echo $$?), 0) + ZSTDLEGACY_FILES += $(shell ls $(ZSTDDIR)/legacy/*.c | grep 'v0[$(ZSTD_LEGACY_SUPPORT)-7]') +endif + CPPFLAGS += -I$(ZSTDDIR)/legacy +else +endif + +ZSTDLIB_FILES := $(sort $(wildcard $(ZSTD_FILES)) $(wildcard $(ZSTDLEGACY_FILES)) $(wildcard $(ZDICT_FILES))) + +# Define *.exe as extension for Windows systems +ifneq (,$(filter Windows%,$(OS))) +EXT =.exe +RES64_FILE = windres/zstd64.res +RES32_FILE = windres/zstd32.res +ifneq (,$(filter x86_64%,$(shell $(CC) -dumpmachine))) + RES_FILE = $(RES64_FILE) +else + RES_FILE = $(RES32_FILE) +endif +else +EXT = +endif + +VOID = /dev/null + +# thread detection +NO_THREAD_MSG := ==> no threads, building without multithreading support +HAVE_PTHREAD := $(shell printf '\#include <pthread.h>\nint main(void) { return 0; }' | $(CC) $(FLAGS) -o have_pthread$(EXT) -x c - -pthread 2> $(VOID) && rm have_pthread$(EXT) && echo 1 || echo 0) +HAVE_THREAD := $(shell [ "$(HAVE_PTHREAD)" -eq "1" -o -n "$(filter Windows%,$(OS))" ] && echo 1 || echo 0) +ifeq ($(HAVE_THREAD), 1) +THREAD_MSG := ==> building with threading support +THREAD_CPP := -DZSTD_MULTITHREAD +THREAD_LD := -pthread +else +THREAD_MSG := $(NO_THREAD_MSG) +endif + +# zlib detection +NO_ZLIB_MSG := ==> no zlib, building zstd without .gz support +HAVE_ZLIB := $(shell printf '\#include <zlib.h>\nint main(void) { return 0; }' | $(CC) $(FLAGS) -o have_zlib$(EXT) -x c - -lz 2> $(VOID) && rm have_zlib$(EXT) && echo 1 || echo 0) +ifeq ($(HAVE_ZLIB), 1) +ZLIB_MSG := ==> building zstd with .gz compression support +ZLIBCPP = -DZSTD_GZCOMPRESS -DZSTD_GZDECOMPRESS +ZLIBLD = -lz +else +ZLIB_MSG := $(NO_ZLIB_MSG) +endif + +# lzma detection +NO_LZMA_MSG := ==> no liblzma, building zstd without .xz/.lzma support +HAVE_LZMA := $(shell printf '\#include <lzma.h>\nint main(void) { return 0; }' | $(CC) $(FLAGS) -o have_lzma$(EXT) -x c - -llzma 2> $(VOID) && rm have_lzma$(EXT) && echo 1 || echo 0) +ifeq ($(HAVE_LZMA), 1) +LZMA_MSG := ==> building zstd with .xz/.lzma compression support +LZMACPP = -DZSTD_LZMACOMPRESS -DZSTD_LZMADECOMPRESS +LZMALD = -llzma +else +LZMA_MSG := $(NO_LZMA_MSG) +endif + +# lz4 detection +NO_LZ4_MSG := ==> no liblz4, building zstd without .lz4 support +HAVE_LZ4 := $(shell printf '\#include <lz4frame.h>\n\#include <lz4.h>\nint main(void) { return 0; }' | $(CC) $(FLAGS) -o have_lz4$(EXT) -x c - -llz4 2> $(VOID) && rm have_lz4$(EXT) && echo 1 || echo 0) +ifeq ($(HAVE_LZ4), 1) +LZ4_MSG := ==> building zstd with .lz4 compression support +LZ4CPP = -DZSTD_LZ4COMPRESS -DZSTD_LZ4DECOMPRESS +LZ4LD = -llz4 +else +LZ4_MSG := $(NO_LZ4_MSG) +endif + +.PHONY: default +default: zstd-release + +.PHONY: all +all: zstd + +.PHONY: allVariants +allVariants: zstd zstd-compress zstd-decompress zstd-small zstd-nolegacy + +$(ZSTDDECOMP_O): CFLAGS += $(ALIGN_LOOP) + +zstd : CPPFLAGS += $(THREAD_CPP) $(ZLIBCPP) $(LZMACPP) $(LZ4CPP) +zstd : LDFLAGS += $(THREAD_LD) $(ZLIBLD) $(LZMALD) $(LZ4LD) +zstd : CPPFLAGS += -DZSTD_LEGACY_SUPPORT=$(ZSTD_LEGACY_SUPPORT) +zstd : $(ZSTDLIB_FILES) zstdcli.o fileio.o bench.o datagen.o dibio.o + @echo "$(THREAD_MSG)" + @echo "$(ZLIB_MSG)" + @echo "$(LZMA_MSG)" + @echo "$(LZ4_MSG)" +ifneq (,$(filter Windows%,$(OS))) + windres/generate_res.bat +endif + $(CC) $(FLAGS) $^ $(RES_FILE) -o $@$(EXT) $(LDFLAGS) + +.PHONY: zstd-release +zstd-release: DEBUGFLAGS := +zstd-release: zstd + +zstd32 : CPPFLAGS += -DZSTD_LEGACY_SUPPORT=$(ZSTD_LEGACY_SUPPORT) +zstd32 : $(ZSTDLIB_FILES) zstdcli.c fileio.c bench.c datagen.c dibio.c +ifneq (,$(filter Windows%,$(OS))) + windres/generate_res.bat +endif + $(CC) -m32 $(FLAGS) $^ $(RES32_FILE) -o $@$(EXT) + +zstd-nolegacy : $(ZSTD_FILES) $(ZDICT_FILES) zstdcli.o fileio.c bench.o datagen.o dibio.o + $(CC) $(FLAGS) $^ -o $@$(EXT) $(LDFLAGS) + +zstd-nomt : THREAD_CPP := +zstd-nomt : THREAD_LD := +zstd-nomt : THREAD_MSG := - multi-threading disabled +zstd-nomt : zstd + +zstd-nogz : ZLIBCPP := +zstd-nogz : ZLIBLD := +zstd-nogz : ZLIB_MSG := - gzip support is disabled +zstd-nogz : zstd + +zstd-noxz : LZMACPP := +zstd-noxz : LZMALD := +zstd-noxz : LZMA_MSG := - xz/lzma support is disabled +zstd-noxz : zstd + + +zstd-pgo : MOREFLAGS = -fprofile-generate +zstd-pgo : clean zstd + ./zstd -b19i1 $(PROFILE_WITH) + ./zstd -b16i1 $(PROFILE_WITH) + ./zstd -b9i2 $(PROFILE_WITH) + ./zstd -b $(PROFILE_WITH) + ./zstd -b7i2 $(PROFILE_WITH) + ./zstd -b5 $(PROFILE_WITH) + $(RM) zstd + $(RM) $(ZSTDDECOMP_O) + $(MAKE) zstd MOREFLAGS=-fprofile-use + +# minimal target, with only zstd compression and decompression. no bench. no legacy. +zstd-small: CFLAGS = -Os -s +zstd-frugal zstd-small: $(ZSTD_FILES) zstdcli.c fileio.c + $(CC) $(FLAGS) -DZSTD_NOBENCH -DZSTD_NODICT $^ -o $@$(EXT) + +zstd-decompress: $(ZSTDCOMMON_FILES) $(ZSTDDECOMP_FILES) zstdcli.c fileio.c + $(CC) $(FLAGS) -DZSTD_NOBENCH -DZSTD_NODICT -DZSTD_NOCOMPRESS $^ -o $@$(EXT) + +zstd-compress: $(ZSTDCOMMON_FILES) $(ZSTDCOMP_FILES) zstdcli.c fileio.c + $(CC) $(FLAGS) -DZSTD_NOBENCH -DZSTD_NODICT -DZSTD_NODECOMPRESS $^ -o $@$(EXT) + +zstdmt: zstd + ln -sf zstd zstdmt + +.PHONY: generate_res +generate_res: + windres/generate_res.bat + +.PHONY: clean +clean: + $(MAKE) -C $(ZSTDDIR) clean + @$(RM) $(ZSTDDIR)/decompress/*.o $(ZSTDDIR)/decompress/zstd_decompress.gcda + @$(RM) core *.o tmp* result* *.gcda dictionary *.zst \ + zstd$(EXT) zstd32$(EXT) zstd-compress$(EXT) zstd-decompress$(EXT) \ + zstd-small$(EXT) zstd-frugal$(EXT) zstd-nolegacy$(EXT) zstd4$(EXT) \ + *.gcda default.profraw have_zlib$(EXT) + @echo Cleaning completed + +MD2ROFF = ronn +MD2ROFF_FLAGS = --roff --warnings --manual="User Commands" --organization="zstd $(ZSTD_VERSION)" + +zstd.1: zstd.1.md + cat $^ | $(MD2ROFF) $(MD2ROFF_FLAGS) | sed -n '/^\.\\\".*/!p' > $@ + +.PHONY: man +man: zstd.1 + +.PHONY: clean-man +clean-man: + rm zstd.1 + +.PHONY: preview-man +preview-man: clean-man man + man ./zstd.1 + +#----------------------------------------------------------------------------- +# make install is validated only for Linux, OSX, BSD, Hurd and Solaris targets +#----------------------------------------------------------------------------- +ifneq (,$(filter $(shell uname),Linux Darwin GNU/kFreeBSD GNU OpenBSD FreeBSD NetBSD DragonFly SunOS)) + +.PHONY: list +list: + @$(MAKE) -pRrq -f $(lastword $(MAKEFILE_LIST)) : 2>/dev/null | awk -v RS= -F: '/^# File/,/^# Finished Make data base/ {if ($$1 !~ "^[#.]") {print $$1}}' | sort | egrep -v -e '^[^[:alnum:]]' -e '^$@$$' | xargs + +DESTDIR ?= +# directory variables : GNU conventions prefer lowercase +# see https://www.gnu.org/prep/standards/html_node/Makefile-Conventions.html +# support both lower and uppercase (BSD), use uppercase in script +prefix ?= /usr/local +PREFIX ?= $(prefix) +exec_prefix ?= $(PREFIX) +bindir ?= $(exec_prefix)/bin +BINDIR ?= $(bindir) +datarootdir ?= $(PREFIX)/share +mandir ?= $(datarootdir)/man +man1dir ?= $(mandir)/man1 + +ifneq (,$(filter $(shell uname),OpenBSD FreeBSD NetBSD DragonFly SunOS)) +MANDIR ?= $(PREFIX)/man/man1 +else +MANDIR ?= $(man1dir) +endif + +ifneq (,$(filter $(shell uname),SunOS)) +INSTALL ?= ginstall +else +INSTALL ?= install +endif + +INSTALL_PROGRAM ?= $(INSTALL) +INSTALL_SCRIPT ?= $(INSTALL_PROGRAM) +INSTALL_DATA ?= $(INSTALL) -m 644 +INSTALL_MAN ?= $(INSTALL_DATA) + +.PHONY: install +install: zstd + @echo Installing binaries + @$(INSTALL) -d -m 755 $(DESTDIR)$(BINDIR)/ $(DESTDIR)$(MANDIR)/ + @$(INSTALL_PROGRAM) zstd $(DESTDIR)$(BINDIR)/zstd + @ln -sf zstd $(DESTDIR)$(BINDIR)/zstdcat + @ln -sf zstd $(DESTDIR)$(BINDIR)/unzstd + @ln -sf zstd $(DESTDIR)$(BINDIR)/zstdmt + @$(INSTALL_SCRIPT) zstdless $(DESTDIR)$(BINDIR)/zstdless + @$(INSTALL_SCRIPT) zstdgrep $(DESTDIR)$(BINDIR)/zstdgrep + @echo Installing man pages + @$(INSTALL_MAN) zstd.1 $(DESTDIR)$(MANDIR)/zstd.1 + @ln -sf zstd.1 $(DESTDIR)$(MANDIR)/zstdcat.1 + @ln -sf zstd.1 $(DESTDIR)$(MANDIR)/unzstd.1 + @echo zstd installation completed + +.PHONY: uninstall +uninstall: + @$(RM) $(DESTDIR)$(BINDIR)/zstdgrep + @$(RM) $(DESTDIR)$(BINDIR)/zstdless + @$(RM) $(DESTDIR)$(BINDIR)/zstdcat + @$(RM) $(DESTDIR)$(BINDIR)/unzstd + @$(RM) $(DESTDIR)$(BINDIR)/zstd + @$(RM) $(DESTDIR)$(MANDIR)/zstdcat.1 + @$(RM) $(DESTDIR)$(MANDIR)/unzstd.1 + @$(RM) $(DESTDIR)$(MANDIR)/zstd.1 + @echo zstd programs successfully uninstalled + +endif diff --git a/src/zstd/programs/README.md b/src/zstd/programs/README.md new file mode 100644 index 00000000..2da9a6db --- /dev/null +++ b/src/zstd/programs/README.md @@ -0,0 +1,209 @@ +Command Line Interface for Zstandard library +============================================ + +Command Line Interface (CLI) can be created using the `make` command without any additional parameters. +There are however other Makefile targets that create different variations of CLI: +- `zstd` : default CLI supporting gzip-like arguments; includes dictionary builder, benchmark, and support for decompression of legacy zstd formats +- `zstd_nolegacy` : Same as `zstd` but without support for legacy zstd formats +- `zstd-small` : CLI optimized for minimal size; no dictionary builder, no benchmark, and no support for legacy zstd formats +- `zstd-compress` : version of CLI which can only compress into zstd format +- `zstd-decompress` : version of CLI which can only decompress zstd format + + +#### Compilation variables +`zstd` scope can be altered by modifying the following compilation variables : + +- __HAVE_THREAD__ : multithreading is automatically enabled when `pthread` is detected. + It's possible to disable multithread support, by setting HAVE_THREAD=0 . + Example : make zstd HAVE_THREAD=0 + It's also possible to force compilation with multithread support, using HAVE_THREAD=1. + In which case, linking stage will fail if `pthread` library cannot be found. + This might be useful to prevent silent feature disabling. + +- __HAVE_ZLIB__ : `zstd` can compress and decompress files in `.gz` format. + This is ordered through command `--format=gzip`. + Alternatively, symlinks named `gzip` or `gunzip` will mimic intended behavior. + `.gz` support is automatically enabled when `zlib` library is detected at build time. + It's possible to disable `.gz` support, by setting HAVE_ZLIB=0. + Example : make zstd HAVE_ZLIB=0 + It's also possible to force compilation with zlib support, using HAVE_ZLIB=1. + In which case, linking stage will fail if `zlib` library cannot be found. + This might be useful to prevent silent feature disabling. + +- __HAVE_LZMA__ : `zstd` can compress and decompress files in `.xz` and `.lzma` formats. + This is ordered through commands `--format=xz` and `--format=lzma` respectively. + Alternatively, symlinks named `xz`, `unxz`, `lzma`, or `unlzma` will mimic intended behavior. + `.xz` and `.lzma` support is automatically enabled when `lzma` library is detected at build time. + It's possible to disable `.xz` and `.lzma` support, by setting HAVE_LZMA=0 . + Example : make zstd HAVE_LZMA=0 + It's also possible to force compilation with lzma support, using HAVE_LZMA=1. + In which case, linking stage will fail if `lzma` library cannot be found. + This might be useful to prevent silent feature disabling. + +- __HAVE_LZ4__ : `zstd` can compress and decompress files in `.lz4` formats. + This is ordered through commands `--format=lz4`. + Alternatively, symlinks named `lz4`, or `unlz4` will mimic intended behavior. + `.lz4` support is automatically enabled when `lz4` library is detected at build time. + It's possible to disable `.lz4` support, by setting HAVE_LZ4=0 . + Example : make zstd HAVE_LZ4=0 + It's also possible to force compilation with lz4 support, using HAVE_LZ4=1. + In which case, linking stage will fail if `lz4` library cannot be found. + This might be useful to prevent silent feature disabling. + +- __ZSTD_LEGACY_SUPPORT__ : `zstd` can decompress files compressed by older versions of `zstd`. + Starting v0.8.0, all versions of `zstd` produce frames compliant with the [specification](../doc/zstd_compression_format.md), and are therefore compatible. + But older versions (< v0.8.0) produced different, incompatible, frames. + By default, `zstd` supports decoding legacy formats >= v0.4.0 (`ZSTD_LEGACY_SUPPORT=4`). + This can be altered by modifying this compilation variable. + `ZSTD_LEGACY_SUPPORT=1` means "support all formats >= v0.1.0". + `ZSTD_LEGACY_SUPPORT=2` means "support all formats >= v0.2.0", and so on. + `ZSTD_LEGACY_SUPPORT=0` means _DO NOT_ support any legacy format. + if `ZSTD_LEGACY_SUPPORT >= 8`, it's the same as `0`, since there is no legacy format after `7`. + Note : `zstd` only supports decoding older formats, and cannot generate any legacy format. + + +#### Aggregation of parameters +CLI supports aggregation of parameters i.e. `-b1`, `-e18`, and `-i1` can be joined into `-b1e18i1`. + + +#### Dictionary builder in Command Line Interface +Zstd offers a training mode, which can be used to tune the algorithm for a selected +type of data, by providing it with a few samples. The result of the training is stored +in a file selected with the `-o` option (default name is `dictionary`), +which can be loaded before compression and decompression. + +Using a dictionary, the compression ratio achievable on small data improves dramatically. +These compression gains are achieved while simultaneously providing faster compression and decompression speeds. +Dictionary work if there is some correlation in a family of small data (there is no universal dictionary). +Hence, deploying one dictionary per type of data will provide the greater benefits. +Dictionary gains are mostly effective in the first few KB. Then, the compression algorithm +will rely more and more on previously decoded content to compress the rest of the file. + +Usage of the dictionary builder and created dictionaries with CLI: + +1. Create the dictionary : `zstd --train PathToTrainingSet/* -o dictionaryName` +2. Compress with the dictionary: `zstd FILE -D dictionaryName` +3. Decompress with the dictionary: `zstd --decompress FILE.zst -D dictionaryName` + + +#### Benchmark in Command Line Interface +CLI includes in-memory compression benchmark module for zstd. +The benchmark is conducted using given filenames. The files are read into memory and joined together. +It makes benchmark more precise as it eliminates I/O overhead. +Multiple filenames can be supplied, as multiple parameters, with wildcards, +or names of directories can be used as parameters with `-r` option. + +The benchmark measures ratio, compressed size, compression and decompression speed. +One can select compression levels starting from `-b` and ending with `-e`. +The `-i` parameter selects minimal time used for each of tested levels. + + +#### Usage of Command Line Interface +The full list of options can be obtained with `-h` or `-H` parameter: +``` +Usage : + zstd [args] [FILE(s)] [-o file] + +FILE : a filename + with no FILE, or when FILE is - , read standard input +Arguments : + -# : # compression level (1-19, default:3) + -d : decompression + -D file: use `file` as Dictionary + -o file: result stored into `file` (only if 1 input file) + -f : overwrite output without prompting and (de)compress links +--rm : remove source file(s) after successful de/compression + -k : preserve source file(s) (default) + -h/-H : display help/long help and exit + +Advanced arguments : + -V : display Version number and exit + -v : verbose mode; specify multiple times to increase verbosity + -q : suppress warnings; specify twice to suppress errors too + -c : force write to standard output, even if it is the console + -l : print information about zstd compressed files +--ultra : enable levels beyond 19, up to 22 (requires more memory) +--long : enable long distance matching (requires more memory) +--no-dictID : don't write dictID into header (dictionary compression) +--[no-]check : integrity check (default:enabled) + -r : operate recursively on directories +--format=gzip : compress files to the .gz format +--format=xz : compress files to the .xz format +--format=lzma : compress files to the .lzma format +--test : test compressed file integrity +--[no-]sparse : sparse mode (default:disabled) + -M# : Set a memory usage limit for decompression +-- : All arguments after "--" are treated as files + +Dictionary builder : +--train ## : create a dictionary from a training set of files +--train-cover[=k=#,d=#,steps=#] : use the cover algorithm with optional args +--train-legacy[=s=#] : use the legacy algorithm with selectivity (default: 9) + -o file : `file` is dictionary name (default: dictionary) +--maxdict=# : limit dictionary to specified size (default : 112640) +--dictID=# : force dictionary ID to specified value (default: random) + +Benchmark arguments : + -b# : benchmark file(s), using # compression level (default : 1) + -e# : test all compression levels from -bX to # (default: 1) + -i# : minimum evaluation time in seconds (default : 3s) + -B# : cut file into independent blocks of size # (default: no block) +--priority=rt : set process priority to real-time +``` + + +#### Long distance matching mode +The long distance matching mode, enabled with `--long`, is designed to improve +the compression ratio for files with long matches at a large distance (up to the +maximum window size, `128 MiB`) while still maintaining compression speed. + +Enabling this mode sets the window size to `128 MiB` and thus increases the memory +usage for both the compressor and decompressor. Performance in terms of speed is +dependent on long matches being found. Compression speed may degrade if few long +matches are found. Decompression speed usually improves when there are many long +distance matches. + +Below are graphs comparing the compression speed, compression ratio, and +decompression speed with and without long distance matching on an ideal use +case: a tar of four versions of clang (versions `3.4.1`, `3.4.2`, `3.5.0`, +`3.5.1`) with a total size of `244889600 B`. This is an ideal use case as there +are many long distance matches within the maximum window size of `128 MiB` (each +version is less than `128 MiB`). + +Compression Speed vs Ratio | Decompression Speed +---------------------------|--------------------- +![Compression Speed vs Ratio](../doc/images/ldmCspeed.png "Compression Speed vs Ratio") | ![Decompression Speed](../doc/images/ldmDspeed.png "Decompression Speed") + +| Method | Compression ratio | Compression speed | Decompression speed | +|:-------|------------------:|-------------------------:|---------------------------:| +| `zstd -1` | `5.065` | `284.8 MB/s` | `759.3 MB/s` | +| `zstd -5` | `5.826` | `124.9 MB/s` | `674.0 MB/s` | +| `zstd -10` | `6.504` | `29.5 MB/s` | `771.3 MB/s` | +| `zstd -1 --long` | `17.426` | `220.6 MB/s` | `1638.4 MB/s` | +| `zstd -5 --long` | `19.661` | `165.5 MB/s` | `1530.6 MB/s`| +| `zstd -10 --long`| `21.949` | `75.6 MB/s` | `1632.6 MB/s`| + +On this file, the compression ratio improves significantly with minimal impact +on compression speed, and the decompression speed doubles. + +On the other extreme, compressing a file with few long distance matches (such as +the [Silesia compression corpus]) will likely lead to a deterioration in +compression speed (for lower levels) with minimal change in compression ratio. + +The below table illustrates this on the [Silesia compression corpus]. + +[Silesia compression corpus]: http://sun.aei.polsl.pl/~sdeor/index.php?page=silesia + +| Method | Compression ratio | Compression speed | Decompression speed | +|:-------|------------------:|-------------------------:|---------------------------:| +| `zstd -1` | `2.878` | `231.7 MB/s` | `594.4 MB/s` | +| `zstd -1 --long` | `2.929` | `106.5 MB/s` | `517.9 MB/s` | +| `zstd -5` | `3.274` | `77.1 MB/s` | `464.2 MB/s` | +| `zstd -5 --long` | `3.319` | `51.7 MB/s` | `371.9 MB/s` | +| `zstd -10` | `3.523` | `16.4 MB/s` | `489.2 MB/s` | +| `zstd -10 --long`| `3.566` | `16.2 MB/s` | `415.7 MB/s` | + + + + + diff --git a/src/zstd/programs/bench.c b/src/zstd/programs/bench.c new file mode 100644 index 00000000..ec99c61c --- /dev/null +++ b/src/zstd/programs/bench.c @@ -0,0 +1,656 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + + +/* ************************************** +* Tuning parameters +****************************************/ +#ifndef BMK_TIMETEST_DEFAULT_S /* default minimum time per test */ +#define BMK_TIMETEST_DEFAULT_S 3 +#endif + + +/* ************************************** +* Compiler Warnings +****************************************/ +#ifdef _MSC_VER +# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ +#endif + + +/* ************************************* +* Includes +***************************************/ +#include "platform.h" /* Large Files support */ +#include "util.h" /* UTIL_getFileSize, UTIL_sleep */ +#include <stdlib.h> /* malloc, free */ +#include <string.h> /* memset */ +#include <stdio.h> /* fprintf, fopen */ +#include <time.h> /* clock_t, clock, CLOCKS_PER_SEC */ + +#include "mem.h" +#define ZSTD_STATIC_LINKING_ONLY +#include "zstd.h" +#include "datagen.h" /* RDG_genBuffer */ +#include "xxhash.h" +#include "zstdmt_compress.h" + + +/* ************************************* +* Constants +***************************************/ +#ifndef ZSTD_GIT_COMMIT +# define ZSTD_GIT_COMMIT_STRING "" +#else +# define ZSTD_GIT_COMMIT_STRING ZSTD_EXPAND_AND_QUOTE(ZSTD_GIT_COMMIT) +#endif + +#define TIMELOOP_MICROSEC 1*1000000ULL /* 1 second */ +#define ACTIVEPERIOD_MICROSEC 70*1000000ULL /* 70 seconds */ +#define COOLPERIOD_SEC 10 + +#define KB *(1 <<10) +#define MB *(1 <<20) +#define GB *(1U<<30) + +static const size_t maxMemory = (sizeof(size_t)==4) ? (2 GB - 64 MB) : (size_t)(1ULL << ((sizeof(size_t)*8)-31)); + +static U32 g_compressibilityDefault = 50; + + +/* ************************************* +* console display +***************************************/ +#define DISPLAY(...) fprintf(stderr, __VA_ARGS__) +#define DISPLAYLEVEL(l, ...) if (g_displayLevel>=l) { DISPLAY(__VA_ARGS__); } +static int g_displayLevel = 2; /* 0 : no display; 1: errors; 2 : + result + interaction + warnings; 3 : + progression; 4 : + information */ + +#define DISPLAYUPDATE(l, ...) if (g_displayLevel>=l) { \ + if ((clock() - g_time > refreshRate) || (g_displayLevel>=4)) \ + { g_time = clock(); DISPLAY(__VA_ARGS__); \ + if (g_displayLevel>=4) fflush(stderr); } } +static const clock_t refreshRate = CLOCKS_PER_SEC * 15 / 100; +static clock_t g_time = 0; + + +/* ************************************* +* Exceptions +***************************************/ +#ifndef DEBUG +# define DEBUG 0 +#endif +#define DEBUGOUTPUT(...) { if (DEBUG) DISPLAY(__VA_ARGS__); } +#define EXM_THROW(error, ...) { \ + DEBUGOUTPUT("%s: %i: \n", __FILE__, __LINE__); \ + DISPLAYLEVEL(1, "Error %i : ", error); \ + DISPLAYLEVEL(1, __VA_ARGS__); \ + DISPLAYLEVEL(1, " \n"); \ + exit(error); \ +} + + +/* ************************************* +* Benchmark Parameters +***************************************/ +static int g_additionalParam = 0; +static U32 g_decodeOnly = 0; + +void BMK_setNotificationLevel(unsigned level) { g_displayLevel=level; } + +void BMK_setAdditionalParam(int additionalParam) { g_additionalParam=additionalParam; } + +static U32 g_nbSeconds = BMK_TIMETEST_DEFAULT_S; +void BMK_setNbSeconds(unsigned nbSeconds) +{ + g_nbSeconds = nbSeconds; + DISPLAYLEVEL(3, "- test >= %u seconds per compression / decompression - \n", g_nbSeconds); +} + +static size_t g_blockSize = 0; +void BMK_setBlockSize(size_t blockSize) +{ + g_blockSize = blockSize; + if (g_blockSize) DISPLAYLEVEL(2, "using blocks of size %u KB \n", (U32)(blockSize>>10)); +} + +void BMK_setDecodeOnlyMode(unsigned decodeFlag) { g_decodeOnly = (decodeFlag>0); } + +static U32 g_nbThreads = 1; +void BMK_setNbThreads(unsigned nbThreads) { +#ifndef ZSTD_MULTITHREAD + if (nbThreads > 1) DISPLAYLEVEL(2, "Note : multi-threading is disabled \n"); +#endif + g_nbThreads = nbThreads; +} +static U32 g_ldmFlag = 0; +void BMK_setLdmFlag(unsigned ldmFlag) { + g_ldmFlag = ldmFlag; +} + +static U32 g_ldmMinMatch = 0; +void BMK_setLdmMinMatch(unsigned ldmMinMatch) { + g_ldmMinMatch = ldmMinMatch; +} + +static U32 g_ldmHashLog = 0; +void BMK_setLdmHashLog(unsigned ldmHashLog) { + g_ldmHashLog = ldmHashLog; +} + +#define BMK_LDM_PARAM_NOTSET 9999 +static U32 g_ldmBucketSizeLog = BMK_LDM_PARAM_NOTSET; +void BMK_setLdmBucketSizeLog(unsigned ldmBucketSizeLog) { + g_ldmBucketSizeLog = ldmBucketSizeLog; +} + +static U32 g_ldmHashEveryLog = BMK_LDM_PARAM_NOTSET; +void BMK_setLdmHashEveryLog(unsigned ldmHashEveryLog) { + g_ldmHashEveryLog = ldmHashEveryLog; +} + + +/* ******************************************************** +* Bench functions +**********************************************************/ +typedef struct { + const void* srcPtr; + size_t srcSize; + void* cPtr; + size_t cRoom; + size_t cSize; + void* resPtr; + size_t resSize; +} blockParam_t; + + + +#undef MIN +#undef MAX +#define MIN(a,b) ((a) < (b) ? (a) : (b)) +#define MAX(a,b) ((a) > (b) ? (a) : (b)) + +static int BMK_benchMem(const void* srcBuffer, size_t srcSize, + const char* displayName, int cLevel, + const size_t* fileSizes, U32 nbFiles, + const void* dictBuffer, size_t dictBufferSize, + const ZSTD_compressionParameters* comprParams) +{ + size_t const blockSize = ((g_blockSize>=32 && !g_decodeOnly) ? g_blockSize : srcSize) + (!srcSize) /* avoid div by 0 */ ; + U32 const maxNbBlocks = (U32) ((srcSize + (blockSize-1)) / blockSize) + nbFiles; + blockParam_t* const blockTable = (blockParam_t*) malloc(maxNbBlocks * sizeof(blockParam_t)); + size_t const maxCompressedSize = ZSTD_compressBound(srcSize) + (maxNbBlocks * 1024); /* add some room for safety */ + void* const compressedBuffer = malloc(maxCompressedSize); + void* resultBuffer = malloc(srcSize); + ZSTDMT_CCtx* const mtctx = ZSTDMT_createCCtx(g_nbThreads); + ZSTD_CCtx* const ctx = ZSTD_createCCtx(); + ZSTD_DCtx* const dctx = ZSTD_createDCtx(); + size_t const loadedCompressedSize = srcSize; + size_t cSize = 0; + double ratio = 0.; + U32 nbBlocks; + + /* checks */ + if (!compressedBuffer || !resultBuffer || !blockTable || !ctx || !dctx) + EXM_THROW(31, "allocation error : not enough memory"); + + /* init */ + if (strlen(displayName)>17) displayName += strlen(displayName)-17; /* display last 17 characters */ + + if (g_decodeOnly) { /* benchmark only decompression : source must be already compressed */ + const char* srcPtr = (const char*)srcBuffer; + U64 totalDSize64 = 0; + U32 fileNb; + for (fileNb=0; fileNb<nbFiles; fileNb++) { + U64 const fSize64 = ZSTD_findDecompressedSize(srcPtr, fileSizes[fileNb]); + if (fSize64==0) EXM_THROW(32, "Impossible to determine original size "); + totalDSize64 += fSize64; + srcPtr += fileSizes[fileNb]; + } + { size_t const decodedSize = (size_t)totalDSize64; + if (totalDSize64 > decodedSize) EXM_THROW(32, "original size is too large"); /* size_t overflow */ + free(resultBuffer); + resultBuffer = malloc(decodedSize); + if (!resultBuffer) EXM_THROW(33, "not enough memory"); + cSize = srcSize; + srcSize = decodedSize; + ratio = (double)srcSize / (double)cSize; + } } + + /* Init blockTable data */ + { const char* srcPtr = (const char*)srcBuffer; + char* cPtr = (char*)compressedBuffer; + char* resPtr = (char*)resultBuffer; + U32 fileNb; + for (nbBlocks=0, fileNb=0; fileNb<nbFiles; fileNb++) { + size_t remaining = fileSizes[fileNb]; + U32 const nbBlocksforThisFile = g_decodeOnly ? 1 : (U32)((remaining + (blockSize-1)) / blockSize); + U32 const blockEnd = nbBlocks + nbBlocksforThisFile; + for ( ; nbBlocks<blockEnd; nbBlocks++) { + size_t const thisBlockSize = MIN(remaining, blockSize); + blockTable[nbBlocks].srcPtr = (const void*)srcPtr; + blockTable[nbBlocks].srcSize = thisBlockSize; + blockTable[nbBlocks].cPtr = (void*)cPtr; + blockTable[nbBlocks].cRoom = g_decodeOnly ? thisBlockSize : ZSTD_compressBound(thisBlockSize); + blockTable[nbBlocks].cSize = blockTable[nbBlocks].cRoom; + blockTable[nbBlocks].resPtr = (void*)resPtr; + blockTable[nbBlocks].resSize = g_decodeOnly ? (size_t) ZSTD_findDecompressedSize(srcPtr, thisBlockSize) : thisBlockSize; + srcPtr += thisBlockSize; + cPtr += blockTable[nbBlocks].cRoom; + resPtr += thisBlockSize; + remaining -= thisBlockSize; + } } } + + /* warmimg up memory */ + RDG_genBuffer(compressedBuffer, maxCompressedSize, 0.10, 0.50, 1); + + /* Bench */ + { U64 fastestC = (U64)(-1LL), fastestD = (U64)(-1LL); + U64 const crcOrig = g_decodeOnly ? 0 : XXH64(srcBuffer, srcSize, 0); + UTIL_time_t coolTime; + U64 const maxTime = (g_nbSeconds * TIMELOOP_MICROSEC) + 1; + U64 totalCTime=0, totalDTime=0; + U32 cCompleted=g_decodeOnly, dCompleted=0; +# define NB_MARKS 4 + const char* const marks[NB_MARKS] = { " |", " /", " =", "\\" }; + U32 markNb = 0; + + coolTime = UTIL_getTime(); + DISPLAYLEVEL(2, "\r%79s\r", ""); + while (!cCompleted || !dCompleted) { + + /* overheat protection */ + if (UTIL_clockSpanMicro(coolTime) > ACTIVEPERIOD_MICROSEC) { + DISPLAYLEVEL(2, "\rcooling down ... \r"); + UTIL_sleep(COOLPERIOD_SEC); + coolTime = UTIL_getTime(); + } + + if (!g_decodeOnly) { + UTIL_time_t clockStart; + /* Compression */ + DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->\r", marks[markNb], displayName, (U32)srcSize); + if (!cCompleted) memset(compressedBuffer, 0xE5, maxCompressedSize); /* warm up and erase result buffer */ + + UTIL_sleepMilli(1); /* give processor time to other processes */ + UTIL_waitForNextTick(); + clockStart = UTIL_getTime(); + + if (!cCompleted) { /* still some time to do compression tests */ + U64 const clockLoop = g_nbSeconds ? TIMELOOP_MICROSEC : 1; + U32 nbLoops = 0; + ZSTD_CDict* cdict = NULL; +#ifdef ZSTD_NEWAPI + ZSTD_CCtx_setParameter(ctx, ZSTD_p_nbThreads, g_nbThreads); + ZSTD_CCtx_setParameter(ctx, ZSTD_p_compressionLevel, cLevel); + ZSTD_CCtx_setParameter(ctx, ZSTD_p_enableLongDistanceMatching, g_ldmFlag); + ZSTD_CCtx_setParameter(ctx, ZSTD_p_ldmMinMatch, g_ldmMinMatch); + ZSTD_CCtx_setParameter(ctx, ZSTD_p_ldmHashLog, g_ldmHashLog); + if (g_ldmBucketSizeLog != BMK_LDM_PARAM_NOTSET) { + ZSTD_CCtx_setParameter(ctx, ZSTD_p_ldmBucketSizeLog, g_ldmBucketSizeLog); + } + if (g_ldmHashEveryLog != BMK_LDM_PARAM_NOTSET) { + ZSTD_CCtx_setParameter(ctx, ZSTD_p_ldmHashEveryLog, g_ldmHashEveryLog); + } + ZSTD_CCtx_setParameter(ctx, ZSTD_p_windowLog, comprParams->windowLog); + ZSTD_CCtx_setParameter(ctx, ZSTD_p_chainLog, comprParams->chainLog); + ZSTD_CCtx_setParameter(ctx, ZSTD_p_searchLog, comprParams->searchLog); + ZSTD_CCtx_setParameter(ctx, ZSTD_p_minMatch, comprParams->searchLength); + ZSTD_CCtx_setParameter(ctx, ZSTD_p_targetLength, comprParams->targetLength); + ZSTD_CCtx_setParameter(ctx, ZSTD_p_compressionStrategy, comprParams->strategy); + ZSTD_CCtx_loadDictionary(ctx, dictBuffer, dictBufferSize); +#else + size_t const avgSize = MIN(blockSize, (srcSize / nbFiles)); + ZSTD_parameters zparams = ZSTD_getParams(cLevel, avgSize, dictBufferSize); + ZSTD_customMem const cmem = { NULL, NULL, NULL }; + if (comprParams->windowLog) zparams.cParams.windowLog = comprParams->windowLog; + if (comprParams->chainLog) zparams.cParams.chainLog = comprParams->chainLog; + if (comprParams->hashLog) zparams.cParams.hashLog = comprParams->hashLog; + if (comprParams->searchLog) zparams.cParams.searchLog = comprParams->searchLog; + if (comprParams->searchLength) zparams.cParams.searchLength = comprParams->searchLength; + if (comprParams->targetLength) zparams.cParams.targetLength = comprParams->targetLength; + if (comprParams->strategy) zparams.cParams.strategy = comprParams->strategy; + cdict = ZSTD_createCDict_advanced(dictBuffer, dictBufferSize, ZSTD_dlm_byRef, ZSTD_dm_auto, zparams.cParams, cmem); + if (cdict==NULL) EXM_THROW(1, "ZSTD_createCDict_advanced() allocation failure"); +#endif + do { + U32 blockNb; + for (blockNb=0; blockNb<nbBlocks; blockNb++) { + size_t rSize; +#ifdef ZSTD_NEWAPI + ZSTD_outBuffer out = { blockTable[blockNb].cPtr, blockTable[blockNb].cRoom, 0 }; + ZSTD_inBuffer in = { blockTable[blockNb].srcPtr, blockTable[blockNb].srcSize, 0 }; + size_t cError = 1; + while (cError) { + cError = ZSTD_compress_generic(ctx, + &out, &in, ZSTD_e_end); + if (ZSTD_isError(cError)) + EXM_THROW(1, "ZSTD_compress_generic() error : %s", + ZSTD_getErrorName(cError)); + } + rSize = out.pos; +#else /* ! ZSTD_NEWAPI */ + if (dictBufferSize) { + rSize = ZSTD_compress_usingCDict(ctx, + blockTable[blockNb].cPtr, blockTable[blockNb].cRoom, + blockTable[blockNb].srcPtr,blockTable[blockNb].srcSize, + cdict); + } else { +# ifdef ZSTD_MULTITHREAD /* note : limitation : MT single-pass does not support compression with dictionary */ + rSize = ZSTDMT_compressCCtx(mtctx, + blockTable[blockNb].cPtr, blockTable[blockNb].cRoom, + blockTable[blockNb].srcPtr,blockTable[blockNb].srcSize, + cLevel); +# else + rSize = ZSTD_compress_advanced (ctx, + blockTable[blockNb].cPtr, blockTable[blockNb].cRoom, + blockTable[blockNb].srcPtr,blockTable[blockNb].srcSize, + NULL, 0, zparams); +# endif + } + if (ZSTD_isError(rSize)) + EXM_THROW(1, "ZSTD_compress_usingCDict() failed : %s", + ZSTD_getErrorName(rSize)); +#endif /* ZSTD_NEWAPI */ + blockTable[blockNb].cSize = rSize; + } + nbLoops++; + } while (UTIL_clockSpanMicro(clockStart) < clockLoop); + ZSTD_freeCDict(cdict); + { U64 const clockSpanMicro = UTIL_clockSpanMicro(clockStart); + if (clockSpanMicro < fastestC*nbLoops) fastestC = clockSpanMicro / nbLoops; + totalCTime += clockSpanMicro; + cCompleted = (totalCTime >= maxTime); + } } + + cSize = 0; + { U32 blockNb; for (blockNb=0; blockNb<nbBlocks; blockNb++) cSize += blockTable[blockNb].cSize; } + ratio = (double)srcSize / (double)cSize; + markNb = (markNb+1) % NB_MARKS; + DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->%10u (%5.3f),%6.1f MB/s\r", + marks[markNb], displayName, (U32)srcSize, (U32)cSize, ratio, + (double)srcSize / fastestC ); + } else { /* g_decodeOnly */ + memcpy(compressedBuffer, srcBuffer, loadedCompressedSize); + } + +#if 0 /* disable decompression test */ + dCompleted=1; + (void)totalDTime; (void)fastestD; (void)crcOrig; /* unused when decompression disabled */ +#else + /* Decompression */ + if (!dCompleted) memset(resultBuffer, 0xD6, srcSize); /* warm result buffer */ + + UTIL_sleepMilli(1); /* give processor time to other processes */ + UTIL_waitForNextTick(); + + if (!dCompleted) { + U64 clockLoop = g_nbSeconds ? TIMELOOP_MICROSEC : 1; + U32 nbLoops = 0; + ZSTD_DDict* const ddict = ZSTD_createDDict(dictBuffer, dictBufferSize); + UTIL_time_t const clockStart = UTIL_getTime(); + if (!ddict) EXM_THROW(2, "ZSTD_createDDict() allocation failure"); + do { + U32 blockNb; + for (blockNb=0; blockNb<nbBlocks; blockNb++) { + size_t const regenSize = ZSTD_decompress_usingDDict(dctx, + blockTable[blockNb].resPtr, blockTable[blockNb].resSize, + blockTable[blockNb].cPtr, blockTable[blockNb].cSize, + ddict); + if (ZSTD_isError(regenSize)) { + EXM_THROW(2, "ZSTD_decompress_usingDDict() failed on block %u of size %u : %s \n", + blockNb, (U32)blockTable[blockNb].cSize, ZSTD_getErrorName(regenSize)); + } + blockTable[blockNb].resSize = regenSize; + } + nbLoops++; + } while (UTIL_clockSpanMicro(clockStart) < clockLoop); + ZSTD_freeDDict(ddict); + { U64 const clockSpanMicro = UTIL_clockSpanMicro(clockStart); + if (clockSpanMicro < fastestD*nbLoops) fastestD = clockSpanMicro / nbLoops; + totalDTime += clockSpanMicro; + dCompleted = (totalDTime >= maxTime); + } } + + markNb = (markNb+1) % NB_MARKS; + DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->%10u (%5.3f),%6.1f MB/s ,%6.1f MB/s\r", + marks[markNb], displayName, (U32)srcSize, (U32)cSize, ratio, + (double)srcSize / fastestC, + (double)srcSize / fastestD ); + + /* CRC Checking */ + { U64 const crcCheck = XXH64(resultBuffer, srcSize, 0); + if (!g_decodeOnly && (crcOrig!=crcCheck)) { + size_t u; + DISPLAY("!!! WARNING !!! %14s : Invalid Checksum : %x != %x \n", displayName, (unsigned)crcOrig, (unsigned)crcCheck); + for (u=0; u<srcSize; u++) { + if (((const BYTE*)srcBuffer)[u] != ((const BYTE*)resultBuffer)[u]) { + U32 segNb, bNb, pos; + size_t bacc = 0; + DISPLAY("Decoding error at pos %u ", (U32)u); + for (segNb = 0; segNb < nbBlocks; segNb++) { + if (bacc + blockTable[segNb].srcSize > u) break; + bacc += blockTable[segNb].srcSize; + } + pos = (U32)(u - bacc); + bNb = pos / (128 KB); + DISPLAY("(sample %u, block %u, pos %u) \n", segNb, bNb, pos); + if (u>5) { + int n; + for (n=-5; n<0; n++) DISPLAY("%02X ", ((const BYTE*)srcBuffer)[u+n]); + DISPLAY(" :%02X: ", ((const BYTE*)srcBuffer)[u]); + for (n=1; n<3; n++) DISPLAY("%02X ", ((const BYTE*)srcBuffer)[u+n]); + DISPLAY(" \n"); + for (n=-5; n<0; n++) DISPLAY("%02X ", ((const BYTE*)resultBuffer)[u+n]); + DISPLAY(" :%02X: ", ((const BYTE*)resultBuffer)[u]); + for (n=1; n<3; n++) DISPLAY("%02X ", ((const BYTE*)resultBuffer)[u+n]); + DISPLAY(" \n"); + } + break; + } + if (u==srcSize-1) { /* should never happen */ + DISPLAY("no difference detected\n"); + } } + break; + } } /* CRC Checking */ +#endif + } /* for (testNb = 1; testNb <= (g_nbSeconds + !g_nbSeconds); testNb++) */ + + if (g_displayLevel == 1) { + double cSpeed = (double)srcSize / fastestC; + double dSpeed = (double)srcSize / fastestD; + if (g_additionalParam) + DISPLAY("-%-3i%11i (%5.3f) %6.2f MB/s %6.1f MB/s %s (param=%d)\n", cLevel, (int)cSize, ratio, cSpeed, dSpeed, displayName, g_additionalParam); + else + DISPLAY("-%-3i%11i (%5.3f) %6.2f MB/s %6.1f MB/s %s\n", cLevel, (int)cSize, ratio, cSpeed, dSpeed, displayName); + } + DISPLAYLEVEL(2, "%2i#\n", cLevel); + } /* Bench */ + + /* clean up */ + free(blockTable); + free(compressedBuffer); + free(resultBuffer); + ZSTDMT_freeCCtx(mtctx); + ZSTD_freeCCtx(ctx); + ZSTD_freeDCtx(dctx); + return 0; +} + + +static size_t BMK_findMaxMem(U64 requiredMem) +{ + size_t const step = 64 MB; + BYTE* testmem = NULL; + + requiredMem = (((requiredMem >> 26) + 1) << 26); + requiredMem += step; + if (requiredMem > maxMemory) requiredMem = maxMemory; + + do { + testmem = (BYTE*)malloc((size_t)requiredMem); + requiredMem -= step; + } while (!testmem); + + free(testmem); + return (size_t)(requiredMem); +} + +static void BMK_benchCLevel(void* srcBuffer, size_t benchedSize, + const char* displayName, int cLevel, int cLevelLast, + const size_t* fileSizes, unsigned nbFiles, + const void* dictBuffer, size_t dictBufferSize, + ZSTD_compressionParameters *compressionParams, int setRealTimePrio) +{ + int l; + + const char* pch = strrchr(displayName, '\\'); /* Windows */ + if (!pch) pch = strrchr(displayName, '/'); /* Linux */ + if (pch) displayName = pch+1; + + if (setRealTimePrio) { + DISPLAYLEVEL(2, "Note : switching to a real-time priority \n"); + SET_REALTIME_PRIORITY; + } + + if (g_displayLevel == 1 && !g_additionalParam) + DISPLAY("bench %s %s: input %u bytes, %u seconds, %u KB blocks\n", ZSTD_VERSION_STRING, ZSTD_GIT_COMMIT_STRING, (U32)benchedSize, g_nbSeconds, (U32)(g_blockSize>>10)); + + if (cLevelLast < cLevel) cLevelLast = cLevel; + + for (l=cLevel; l <= cLevelLast; l++) { + BMK_benchMem(srcBuffer, benchedSize, + displayName, l, + fileSizes, nbFiles, + dictBuffer, dictBufferSize, compressionParams); + } +} + + +/*! BMK_loadFiles() : + Loads `buffer` with content of files listed within `fileNamesTable`. + At most, fills `buffer` entirely */ +static void BMK_loadFiles(void* buffer, size_t bufferSize, + size_t* fileSizes, + const char** fileNamesTable, unsigned nbFiles) +{ + size_t pos = 0, totalSize = 0; + unsigned n; + for (n=0; n<nbFiles; n++) { + FILE* f; + U64 fileSize = UTIL_getFileSize(fileNamesTable[n]); + if (UTIL_isDirectory(fileNamesTable[n])) { + DISPLAYLEVEL(2, "Ignoring %s directory... \n", fileNamesTable[n]); + fileSizes[n] = 0; + continue; + } + f = fopen(fileNamesTable[n], "rb"); + if (f==NULL) EXM_THROW(10, "impossible to open file %s", fileNamesTable[n]); + DISPLAYUPDATE(2, "Loading %s... \r", fileNamesTable[n]); + if (fileSize > bufferSize-pos) fileSize = bufferSize-pos, nbFiles=n; /* buffer too small - stop after this file */ + { size_t const readSize = fread(((char*)buffer)+pos, 1, (size_t)fileSize, f); + if (readSize != (size_t)fileSize) EXM_THROW(11, "could not read %s", fileNamesTable[n]); + pos += readSize; } + fileSizes[n] = (size_t)fileSize; + totalSize += (size_t)fileSize; + fclose(f); + } + + if (totalSize == 0) EXM_THROW(12, "no data to bench"); +} + +static void BMK_benchFileTable(const char** fileNamesTable, unsigned nbFiles, const char* dictFileName, int cLevel, + int cLevelLast, ZSTD_compressionParameters *compressionParams, int setRealTimePrio) +{ + void* srcBuffer; + size_t benchedSize; + void* dictBuffer = NULL; + size_t dictBufferSize = 0; + size_t* fileSizes = (size_t*)malloc(nbFiles * sizeof(size_t)); + U64 const totalSizeToLoad = UTIL_getTotalFileSize(fileNamesTable, nbFiles); + char mfName[20] = {0}; + + if (!fileSizes) EXM_THROW(12, "not enough memory for fileSizes"); + + /* Load dictionary */ + if (dictFileName != NULL) { + U64 dictFileSize = UTIL_getFileSize(dictFileName); + if (dictFileSize > 64 MB) EXM_THROW(10, "dictionary file %s too large", dictFileName); + dictBufferSize = (size_t)dictFileSize; + dictBuffer = malloc(dictBufferSize); + if (dictBuffer==NULL) EXM_THROW(11, "not enough memory for dictionary (%u bytes)", (U32)dictBufferSize); + BMK_loadFiles(dictBuffer, dictBufferSize, fileSizes, &dictFileName, 1); + } + + /* Memory allocation & restrictions */ + benchedSize = BMK_findMaxMem(totalSizeToLoad * 3) / 3; + if ((U64)benchedSize > totalSizeToLoad) benchedSize = (size_t)totalSizeToLoad; + if (benchedSize < totalSizeToLoad) + DISPLAY("Not enough memory; testing %u MB only...\n", (U32)(benchedSize >> 20)); + srcBuffer = malloc(benchedSize); + if (!srcBuffer) EXM_THROW(12, "not enough memory"); + + /* Load input buffer */ + BMK_loadFiles(srcBuffer, benchedSize, fileSizes, fileNamesTable, nbFiles); + + /* Bench */ + snprintf (mfName, sizeof(mfName), " %u files", nbFiles); + { const char* displayName = (nbFiles > 1) ? mfName : fileNamesTable[0]; + BMK_benchCLevel(srcBuffer, benchedSize, + displayName, cLevel, cLevelLast, + fileSizes, nbFiles, + dictBuffer, dictBufferSize, compressionParams, setRealTimePrio); + } + + /* clean up */ + free(srcBuffer); + free(dictBuffer); + free(fileSizes); +} + + +static void BMK_syntheticTest(int cLevel, int cLevelLast, double compressibility, ZSTD_compressionParameters* compressionParams, int setRealTimePrio) +{ + char name[20] = {0}; + size_t benchedSize = 10000000; + void* const srcBuffer = malloc(benchedSize); + + /* Memory allocation */ + if (!srcBuffer) EXM_THROW(21, "not enough memory"); + + /* Fill input buffer */ + RDG_genBuffer(srcBuffer, benchedSize, compressibility, 0.0, 0); + + /* Bench */ + snprintf (name, sizeof(name), "Synthetic %2u%%", (unsigned)(compressibility*100)); + BMK_benchCLevel(srcBuffer, benchedSize, name, cLevel, cLevelLast, &benchedSize, 1, NULL, 0, compressionParams, setRealTimePrio); + + /* clean up */ + free(srcBuffer); +} + + +int BMK_benchFiles(const char** fileNamesTable, unsigned nbFiles, const char* dictFileName, + int cLevel, int cLevelLast, ZSTD_compressionParameters* compressionParams, int setRealTimePrio) +{ + double const compressibility = (double)g_compressibilityDefault / 100; + + if (cLevel < 1) cLevel = 1; /* minimum compression level */ + if (cLevel > ZSTD_maxCLevel()) cLevel = ZSTD_maxCLevel(); + if (cLevelLast > ZSTD_maxCLevel()) cLevelLast = ZSTD_maxCLevel(); + if (cLevelLast < cLevel) cLevelLast = cLevel; + if (cLevelLast > cLevel) DISPLAYLEVEL(2, "Benchmarking levels from %d to %d\n", cLevel, cLevelLast); + + if (nbFiles == 0) + BMK_syntheticTest(cLevel, cLevelLast, compressibility, compressionParams, setRealTimePrio); + else + BMK_benchFileTable(fileNamesTable, nbFiles, dictFileName, cLevel, cLevelLast, compressionParams, setRealTimePrio); + return 0; +} diff --git a/src/zstd/programs/bench.h b/src/zstd/programs/bench.h new file mode 100644 index 00000000..82bb3456 --- /dev/null +++ b/src/zstd/programs/bench.h @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + +#ifndef BENCH_H_121279284357 +#define BENCH_H_121279284357 + +#include <stddef.h> /* size_t */ +#define ZSTD_STATIC_LINKING_ONLY /* ZSTD_compressionParameters */ +#include "zstd.h" /* ZSTD_compressionParameters */ + +int BMK_benchFiles(const char** fileNamesTable, unsigned nbFiles,const char* dictFileName, + int cLevel, int cLevelLast, ZSTD_compressionParameters* compressionParams, int setRealTimePrio); + +/* Set Parameters */ +void BMK_setNbSeconds(unsigned nbLoops); +void BMK_setBlockSize(size_t blockSize); +void BMK_setNbThreads(unsigned nbThreads); +void BMK_setNotificationLevel(unsigned level); +void BMK_setAdditionalParam(int additionalParam); +void BMK_setDecodeOnlyMode(unsigned decodeFlag); +void BMK_setLdmFlag(unsigned ldmFlag); +void BMK_setLdmMinMatch(unsigned ldmMinMatch); +void BMK_setLdmHashLog(unsigned ldmHashLog); +void BMK_setLdmBucketSizeLog(unsigned ldmBucketSizeLog); +void BMK_setLdmHashEveryLog(unsigned ldmHashEveryLog); + +#endif /* BENCH_H_121279284357 */ diff --git a/src/zstd/programs/datagen.c b/src/zstd/programs/datagen.c new file mode 100644 index 00000000..a489d6af --- /dev/null +++ b/src/zstd/programs/datagen.c @@ -0,0 +1,181 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + + +/*-************************************ +* Dependencies +**************************************/ +#include "platform.h" /* SET_BINARY_MODE */ +#include <stdlib.h> /* malloc, free */ +#include <stdio.h> /* FILE, fwrite, fprintf */ +#include <string.h> /* memcpy */ +#include "mem.h" /* U32 */ + + +/*-************************************ +* Macros +**************************************/ +#define KB *(1 <<10) +#define MIN(a,b) ( (a) < (b) ? (a) : (b) ) + +#define RDG_DEBUG 0 +#define TRACE(...) if (RDG_DEBUG) fprintf(stderr, __VA_ARGS__ ) + + +/*-************************************ +* Local constants +**************************************/ +#define LTLOG 13 +#define LTSIZE (1<<LTLOG) +#define LTMASK (LTSIZE-1) + + +/*-******************************************************* +* Local Functions +*********************************************************/ +#define RDG_rotl32(x,r) ((x << r) | (x >> (32 - r))) +static U32 RDG_rand(U32* src) +{ + static const U32 prime1 = 2654435761U; + static const U32 prime2 = 2246822519U; + U32 rand32 = *src; + rand32 *= prime1; + rand32 ^= prime2; + rand32 = RDG_rotl32(rand32, 13); + *src = rand32; + return rand32 >> 5; +} + + +static void RDG_fillLiteralDistrib(BYTE* ldt, double ld) +{ + BYTE const firstChar = (ld<=0.0) ? 0 : '('; + BYTE const lastChar = (ld<=0.0) ? 255 : '}'; + BYTE character = (ld<=0.0) ? 0 : '0'; + U32 u; + + if (ld<=0.0) ld = 0.0; + for (u=0; u<LTSIZE; ) { + U32 const weight = (U32)((double)(LTSIZE - u) * ld) + 1; + U32 const end = MIN ( u + weight , LTSIZE); + while (u < end) ldt[u++] = character; + character++; + if (character > lastChar) character = firstChar; + } +} + + +static BYTE RDG_genChar(U32* seed, const BYTE* ldt) +{ + U32 const id = RDG_rand(seed) & LTMASK; + return ldt[id]; /* memory-sanitizer fails here, stating "uninitialized value" when table initialized with P==0.0. Checked : table is fully initialized */ +} + + +static U32 RDG_rand15Bits (unsigned* seedPtr) +{ + return RDG_rand(seedPtr) & 0x7FFF; +} + +static U32 RDG_randLength(unsigned* seedPtr) +{ + if (RDG_rand(seedPtr) & 7) return (RDG_rand(seedPtr) & 0xF); /* small length */ + return (RDG_rand(seedPtr) & 0x1FF) + 0xF; +} + +void RDG_genBlock(void* buffer, size_t buffSize, size_t prefixSize, double matchProba, const BYTE* ldt, unsigned* seedPtr) +{ + BYTE* const buffPtr = (BYTE*)buffer; + U32 const matchProba32 = (U32)(32768 * matchProba); + size_t pos = prefixSize; + U32 prevOffset = 1; + + /* special case : sparse content */ + while (matchProba >= 1.0) { + size_t size0 = RDG_rand(seedPtr) & 3; + size0 = (size_t)1 << (16 + size0 * 2); + size0 += RDG_rand(seedPtr) & (size0-1); /* because size0 is power of 2*/ + if (buffSize < pos + size0) { + memset(buffPtr+pos, 0, buffSize-pos); + return; + } + memset(buffPtr+pos, 0, size0); + pos += size0; + buffPtr[pos-1] = RDG_genChar(seedPtr, ldt); + continue; + } + + /* init */ + if (pos==0) buffPtr[0] = RDG_genChar(seedPtr, ldt), pos=1; + + /* Generate compressible data */ + while (pos < buffSize) { + /* Select : Literal (char) or Match (within 32K) */ + if (RDG_rand15Bits(seedPtr) < matchProba32) { + /* Copy (within 32K) */ + U32 const length = RDG_randLength(seedPtr) + 4; + U32 const d = (U32) MIN(pos + length , buffSize); + U32 const repeatOffset = (RDG_rand(seedPtr) & 15) == 2; + U32 const randOffset = RDG_rand15Bits(seedPtr) + 1; + U32 const offset = repeatOffset ? prevOffset : (U32) MIN(randOffset , pos); + size_t match = pos - offset; + while (pos < d) buffPtr[pos++] = buffPtr[match++]; /* correctly manages overlaps */ + prevOffset = offset; + } else { + /* Literal (noise) */ + U32 const length = RDG_randLength(seedPtr); + U32 const d = (U32) MIN(pos + length, buffSize); + while (pos < d) buffPtr[pos++] = RDG_genChar(seedPtr, ldt); + } } +} + + +void RDG_genBuffer(void* buffer, size_t size, double matchProba, double litProba, unsigned seed) +{ + BYTE ldt[LTSIZE]; + memset(ldt, '0', sizeof(ldt)); /* yes, character '0', this is intentional */ + if (litProba<=0.0) litProba = matchProba / 4.5; + RDG_fillLiteralDistrib(ldt, litProba); + RDG_genBlock(buffer, size, 0, matchProba, ldt, &seed); +} + + +void RDG_genStdout(unsigned long long size, double matchProba, double litProba, unsigned seed) +{ + size_t const stdBlockSize = 128 KB; + size_t const stdDictSize = 32 KB; + BYTE* const buff = (BYTE*)malloc(stdDictSize + stdBlockSize); + U64 total = 0; + BYTE ldt[LTSIZE]; /* literals distribution table */ + + /* init */ + if (buff==NULL) { perror("datagen"); exit(1); } + if (litProba<=0.0) litProba = matchProba / 4.5; + memset(ldt, '0', sizeof(ldt)); /* yes, character '0', this is intentional */ + RDG_fillLiteralDistrib(ldt, litProba); + SET_BINARY_MODE(stdout); + + /* Generate initial dict */ + RDG_genBlock(buff, stdDictSize, 0, matchProba, ldt, &seed); + + /* Generate compressible data */ + while (total < size) { + size_t const genBlockSize = (size_t) (MIN (stdBlockSize, size-total)); + RDG_genBlock(buff, stdDictSize+stdBlockSize, stdDictSize, matchProba, ldt, &seed); + total += genBlockSize; + { size_t const unused = fwrite(buff, 1, genBlockSize, stdout); (void)unused; } + /* update dict */ + memcpy(buff, buff + stdBlockSize, stdDictSize); + } + + /* cleanup */ + free(buff); +} diff --git a/src/zstd/programs/datagen.h b/src/zstd/programs/datagen.h new file mode 100644 index 00000000..2fcc980e --- /dev/null +++ b/src/zstd/programs/datagen.h @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + +#ifndef DATAGEN_H +#define DATAGEN_H + +#include <stddef.h> /* size_t */ + +void RDG_genStdout(unsigned long long size, double matchProba, double litProba, unsigned seed); +void RDG_genBuffer(void* buffer, size_t size, double matchProba, double litProba, unsigned seed); +/*!RDG_genBuffer + Generate 'size' bytes of compressible data into 'buffer'. + Compressibility can be controlled using 'matchProba', which is floating point value between 0 and 1. + 'LitProba' is optional, it affect variability of individual bytes. If litProba==0.0, default value will be used. + Generated data pattern can be modified using different 'seed'. + For a triplet (matchProba, litProba, seed), the function always generate the same content. + + RDG_genStdout + Same as RDG_genBuffer, but generates data into stdout +*/ + +#endif diff --git a/src/zstd/programs/dibio.c b/src/zstd/programs/dibio.c new file mode 100644 index 00000000..2cb2a426 --- /dev/null +++ b/src/zstd/programs/dibio.c @@ -0,0 +1,350 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + + +/* ************************************** +* Compiler Warnings +****************************************/ +#ifdef _MSC_VER +# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ +#endif + + +/*-************************************* +* Includes +***************************************/ +#include "platform.h" /* Large Files support */ +#include "util.h" /* UTIL_getFileSize, UTIL_getTotalFileSize */ +#include <stdlib.h> /* malloc, free */ +#include <string.h> /* memset */ +#include <stdio.h> /* fprintf, fopen, ftello64 */ +#include <time.h> /* clock_t, clock, CLOCKS_PER_SEC */ +#include <errno.h> /* errno */ + +#include "mem.h" /* read */ +#include "error_private.h" +#include "dibio.h" + + +/*-************************************* +* Constants +***************************************/ +#define KB *(1 <<10) +#define MB *(1 <<20) +#define GB *(1U<<30) + +#define SAMPLESIZE_MAX (128 KB) +#define MEMMULT 11 /* rough estimation : memory cost to analyze 1 byte of sample */ +#define COVER_MEMMULT 9 /* rough estimation : memory cost to analyze 1 byte of sample */ +static const size_t g_maxMemory = (sizeof(size_t) == 4) ? (2 GB - 64 MB) : ((size_t)(512 MB) << sizeof(size_t)); + +#define NOISELENGTH 32 + + +/*-************************************* +* Console display +***************************************/ +#define DISPLAY(...) fprintf(stderr, __VA_ARGS__) +#define DISPLAYLEVEL(l, ...) if (displayLevel>=l) { DISPLAY(__VA_ARGS__); } + +#define DISPLAYUPDATE(l, ...) if (displayLevel>=l) { \ + if ((DIB_clockSpan(g_time) > refreshRate) || (displayLevel>=4)) \ + { g_time = clock(); DISPLAY(__VA_ARGS__); \ + if (displayLevel>=4) fflush(stderr); } } +static const clock_t refreshRate = CLOCKS_PER_SEC * 2 / 10; +static clock_t g_time = 0; + +static clock_t DIB_clockSpan(clock_t nPrevious) { return clock() - nPrevious; } + + +/*-************************************* +* Exceptions +***************************************/ +#ifndef DEBUG +# define DEBUG 0 +#endif +#define DEBUGOUTPUT(...) if (DEBUG) DISPLAY(__VA_ARGS__); +#define EXM_THROW(error, ...) \ +{ \ + DEBUGOUTPUT("Error defined at %s, line %i : \n", __FILE__, __LINE__); \ + DISPLAY("Error %i : ", error); \ + DISPLAY(__VA_ARGS__); \ + DISPLAY("\n"); \ + exit(error); \ +} + + +/* ******************************************************** +* Helper functions +**********************************************************/ +unsigned DiB_isError(size_t errorCode) { return ERR_isError(errorCode); } + +const char* DiB_getErrorName(size_t errorCode) { return ERR_getErrorName(errorCode); } + +#undef MIN +#define MIN(a,b) ((a) < (b) ? (a) : (b)) + + +/* ******************************************************** +* File related operations +**********************************************************/ +/** DiB_loadFiles() : + * load samples from files listed in fileNamesTable into buffer. + * works even if buffer is too small to load all samples. + * Also provides the size of each sample into sampleSizes table + * which must be sized correctly, using DiB_fileStats(). + * @return : nb of samples effectively loaded into `buffer` + * *bufferSizePtr is modified, it provides the amount data loaded within buffer. + * sampleSizes is filled with the size of each sample. + */ +static unsigned DiB_loadFiles(void* buffer, size_t* bufferSizePtr, + size_t* sampleSizes, unsigned sstSize, + const char** fileNamesTable, unsigned nbFiles, size_t targetChunkSize, + unsigned displayLevel) +{ + char* const buff = (char*)buffer; + size_t pos = 0; + unsigned nbLoadedChunks = 0, fileIndex; + + for (fileIndex=0; fileIndex<nbFiles; fileIndex++) { + const char* const fileName = fileNamesTable[fileIndex]; + unsigned long long const fs64 = UTIL_getFileSize(fileName); + unsigned long long remainingToLoad = fs64; + U32 const nbChunks = targetChunkSize ? (U32)((fs64 + (targetChunkSize-1)) / targetChunkSize) : 1; + U64 const chunkSize = targetChunkSize ? MIN(targetChunkSize, fs64) : fs64; + size_t const maxChunkSize = (size_t)MIN(chunkSize, SAMPLESIZE_MAX); + U32 cnb; + FILE* const f = fopen(fileName, "rb"); + if (f==NULL) EXM_THROW(10, "zstd: dictBuilder: %s %s ", fileName, strerror(errno)); + DISPLAYUPDATE(2, "Loading %s... \r", fileName); + for (cnb=0; cnb<nbChunks; cnb++) { + size_t const toLoad = (size_t)MIN(maxChunkSize, remainingToLoad); + if (toLoad > *bufferSizePtr-pos) break; + { size_t const readSize = fread(buff+pos, 1, toLoad, f); + if (readSize != toLoad) EXM_THROW(11, "Pb reading %s", fileName); + pos += readSize; + sampleSizes[nbLoadedChunks++] = toLoad; + remainingToLoad -= targetChunkSize; + if (nbLoadedChunks == sstSize) { /* no more space left in sampleSizes table */ + fileIndex = nbFiles; /* stop there */ + break; + } + if (toLoad < targetChunkSize) { + fseek(f, (long)(targetChunkSize - toLoad), SEEK_CUR); + } } } + fclose(f); + } + DISPLAYLEVEL(2, "\r%79s\r", ""); + *bufferSizePtr = pos; + DISPLAYLEVEL(4, "loaded : %u KB \n", (U32)(pos >> 10)) + return nbLoadedChunks; +} + +#define DiB_rotl32(x,r) ((x << r) | (x >> (32 - r))) +static U32 DiB_rand(U32* src) +{ + static const U32 prime1 = 2654435761U; + static const U32 prime2 = 2246822519U; + U32 rand32 = *src; + rand32 *= prime1; + rand32 ^= prime2; + rand32 = DiB_rotl32(rand32, 13); + *src = rand32; + return rand32 >> 5; +} + +/* DiB_shuffle() : + * shuffle a table of file names in a semi-random way + * It improves dictionary quality by reducing "locality" impact, so if sample set is very large, + * it will load random elements from it, instead of just the first ones. */ +static void DiB_shuffle(const char** fileNamesTable, unsigned nbFiles) { + U32 seed = 0xFD2FB528; + unsigned i; + for (i = nbFiles - 1; i > 0; --i) { + unsigned const j = DiB_rand(&seed) % (i + 1); + const char* const tmp = fileNamesTable[j]; + fileNamesTable[j] = fileNamesTable[i]; + fileNamesTable[i] = tmp; + } +} + + +/*-******************************************************** +* Dictionary training functions +**********************************************************/ +static size_t DiB_findMaxMem(unsigned long long requiredMem) +{ + size_t const step = 8 MB; + void* testmem = NULL; + + requiredMem = (((requiredMem >> 23) + 1) << 23); + requiredMem += step; + if (requiredMem > g_maxMemory) requiredMem = g_maxMemory; + + while (!testmem) { + testmem = malloc((size_t)requiredMem); + requiredMem -= step; + } + + free(testmem); + return (size_t)requiredMem; +} + + +static void DiB_fillNoise(void* buffer, size_t length) +{ + unsigned const prime1 = 2654435761U; + unsigned const prime2 = 2246822519U; + unsigned acc = prime1; + size_t p=0;; + + for (p=0; p<length; p++) { + acc *= prime2; + ((unsigned char*)buffer)[p] = (unsigned char)(acc >> 21); + } +} + + +static void DiB_saveDict(const char* dictFileName, + const void* buff, size_t buffSize) +{ + FILE* const f = fopen(dictFileName, "wb"); + if (f==NULL) EXM_THROW(3, "cannot open %s ", dictFileName); + + { size_t const n = fwrite(buff, 1, buffSize, f); + if (n!=buffSize) EXM_THROW(4, "%s : write error", dictFileName) } + + { size_t const n = (size_t)fclose(f); + if (n!=0) EXM_THROW(5, "%s : flush error", dictFileName) } +} + + +typedef struct { + U64 totalSizeToLoad; + unsigned oneSampleTooLarge; + unsigned nbSamples; +} fileStats; + +/*! DiB_fileStats() : + * Given a list of files, and a chunkSize (0 == no chunk, whole files) + * provides the amount of data to be loaded and the resulting nb of samples. + * This is useful primarily for allocation purpose => sample buffer, and sample sizes table. + */ +static fileStats DiB_fileStats(const char** fileNamesTable, unsigned nbFiles, size_t chunkSize, unsigned displayLevel) +{ + fileStats fs; + unsigned n; + memset(&fs, 0, sizeof(fs)); + for (n=0; n<nbFiles; n++) { + U64 const fileSize = UTIL_getFileSize(fileNamesTable[n]); + U32 const nbSamples = (U32)(chunkSize ? (fileSize + (chunkSize-1)) / chunkSize : 1); + U64 const chunkToLoad = chunkSize ? MIN(chunkSize, fileSize) : fileSize; + size_t const cappedChunkSize = (size_t)MIN(chunkToLoad, SAMPLESIZE_MAX); + fs.totalSizeToLoad += cappedChunkSize * nbSamples; + fs.oneSampleTooLarge |= (chunkSize > 2*SAMPLESIZE_MAX); + fs.nbSamples += nbSamples; + } + DISPLAYLEVEL(4, "Preparing to load : %u KB \n", (U32)(fs.totalSizeToLoad >> 10)); + return fs; +} + + +/*! ZDICT_trainFromBuffer_unsafe_legacy() : + Strictly Internal use only !! + Same as ZDICT_trainFromBuffer_legacy(), but does not control `samplesBuffer`. + `samplesBuffer` must be followed by noisy guard band to avoid out-of-buffer reads. + @return : size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`) + or an error code. +*/ +size_t ZDICT_trainFromBuffer_unsafe_legacy(void* dictBuffer, size_t dictBufferCapacity, + const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples, + ZDICT_legacy_params_t parameters); + + +int DiB_trainFromFiles(const char* dictFileName, unsigned maxDictSize, + const char** fileNamesTable, unsigned nbFiles, size_t chunkSize, + ZDICT_legacy_params_t *params, ZDICT_cover_params_t *coverParams, + int optimizeCover) +{ + unsigned const displayLevel = params ? params->zParams.notificationLevel : + coverParams ? coverParams->zParams.notificationLevel : + 0; /* should never happen */ + void* const dictBuffer = malloc(maxDictSize); + fileStats const fs = DiB_fileStats(fileNamesTable, nbFiles, chunkSize, displayLevel); + size_t* const sampleSizes = (size_t*)malloc(fs.nbSamples * sizeof(size_t)); + size_t const memMult = params ? MEMMULT : COVER_MEMMULT; + size_t const maxMem = DiB_findMaxMem(fs.totalSizeToLoad * memMult) / memMult; + size_t loadedSize = (size_t) MIN ((unsigned long long)maxMem, fs.totalSizeToLoad); + void* const srcBuffer = malloc(loadedSize+NOISELENGTH); + int result = 0; + + /* Checks */ + if ((!sampleSizes) || (!srcBuffer) || (!dictBuffer)) + EXM_THROW(12, "not enough memory for DiB_trainFiles"); /* should not happen */ + if (fs.oneSampleTooLarge) { + DISPLAYLEVEL(2, "! Warning : some sample(s) are very large \n"); + DISPLAYLEVEL(2, "! Note that dictionary is only useful for small samples. \n"); + DISPLAYLEVEL(2, "! As a consequence, only the first %u bytes of each sample are loaded \n", SAMPLESIZE_MAX); + } + if (fs.nbSamples < 5) { + DISPLAYLEVEL(2, "! Warning : nb of samples too low for proper processing ! \n"); + DISPLAYLEVEL(2, "! Please provide _one file per sample_. \n"); + DISPLAYLEVEL(2, "! Alternatively, split files into fixed-size blocks representative of samples, with -B# \n"); + EXM_THROW(14, "nb of samples too low"); /* we now clearly forbid this case */ + } + if (fs.totalSizeToLoad < (unsigned long long)(8 * maxDictSize)) { + DISPLAYLEVEL(2, "! Warning : data size of samples too small for target dictionary size \n"); + DISPLAYLEVEL(2, "! Samples should be about 100x larger than target dictionary size \n"); + } + + /* init */ + if (loadedSize < fs.totalSizeToLoad) + DISPLAYLEVEL(1, "Not enough memory; training on %u MB only...\n", (unsigned)(loadedSize >> 20)); + + /* Load input buffer */ + DISPLAYLEVEL(3, "Shuffling input files\n"); + DiB_shuffle(fileNamesTable, nbFiles); + nbFiles = DiB_loadFiles(srcBuffer, &loadedSize, sampleSizes, fs.nbSamples, fileNamesTable, nbFiles, chunkSize, displayLevel); + + { size_t dictSize; + if (params) { + DiB_fillNoise((char*)srcBuffer + loadedSize, NOISELENGTH); /* guard band, for end of buffer condition */ + dictSize = ZDICT_trainFromBuffer_unsafe_legacy(dictBuffer, maxDictSize, + srcBuffer, sampleSizes, fs.nbSamples, + *params); + } else if (optimizeCover) { + dictSize = ZDICT_optimizeTrainFromBuffer_cover(dictBuffer, maxDictSize, + srcBuffer, sampleSizes, fs.nbSamples, + coverParams); + if (!ZDICT_isError(dictSize)) { + DISPLAYLEVEL(2, "k=%u\nd=%u\nsteps=%u\n", coverParams->k, coverParams->d, coverParams->steps); + } + } else { + dictSize = ZDICT_trainFromBuffer_cover(dictBuffer, maxDictSize, srcBuffer, + sampleSizes, fs.nbSamples, *coverParams); + } + if (ZDICT_isError(dictSize)) { + DISPLAYLEVEL(1, "dictionary training failed : %s \n", ZDICT_getErrorName(dictSize)); /* should not happen */ + result = 1; + goto _cleanup; + } + /* save dict */ + DISPLAYLEVEL(2, "Save dictionary of size %u into file %s \n", (U32)dictSize, dictFileName); + DiB_saveDict(dictFileName, dictBuffer, dictSize); + } + + /* clean up */ +_cleanup: + free(srcBuffer); + free(sampleSizes); + free(dictBuffer); + return result; +} diff --git a/src/zstd/programs/dibio.h b/src/zstd/programs/dibio.h new file mode 100644 index 00000000..499e3036 --- /dev/null +++ b/src/zstd/programs/dibio.h @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/* This library is designed for a single-threaded console application. +* It exit() and printf() into stderr when it encounters an error condition. */ + +#ifndef DIBIO_H_003 +#define DIBIO_H_003 + + +/*-************************************* +* Dependencies +***************************************/ +#define ZDICT_STATIC_LINKING_ONLY +#include "zdict.h" /* ZDICT_params_t */ + + +/*-************************************* +* Public functions +***************************************/ +/*! DiB_trainFromFiles() : + Train a dictionary from a set of files provided by `fileNamesTable`. + Resulting dictionary is written into file `dictFileName`. + `parameters` is optional and can be provided with values set to 0, meaning "default". + @return : 0 == ok. Any other : error. +*/ +int DiB_trainFromFiles(const char* dictFileName, unsigned maxDictSize, + const char** fileNamesTable, unsigned nbFiles, size_t chunkSize, + ZDICT_legacy_params_t *params, ZDICT_cover_params_t *coverParams, + int optimizeCover); + +#endif diff --git a/src/zstd/programs/fileio.c b/src/zstd/programs/fileio.c new file mode 100644 index 00000000..70158f93 --- /dev/null +++ b/src/zstd/programs/fileio.c @@ -0,0 +1,2052 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + +/* ************************************* +* Compiler Options +***************************************/ +#ifdef _MSC_VER /* Visual */ +# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ +# pragma warning(disable : 4204) /* non-constant aggregate initializer */ +#endif +#if defined(__MINGW32__) && !defined(_POSIX_SOURCE) +# define _POSIX_SOURCE 1 /* disable %llu warnings with MinGW on Windows */ +#endif + + +/*-************************************* +* Includes +***************************************/ +#include "platform.h" /* Large Files support, SET_BINARY_MODE */ +#include "util.h" /* UTIL_getFileSize */ +#include <stdio.h> /* fprintf, fopen, fread, _fileno, stdin, stdout */ +#include <stdlib.h> /* malloc, free */ +#include <string.h> /* strcmp, strlen */ +#include <time.h> /* clock */ +#include <errno.h> /* errno */ + +#if defined (_MSC_VER) +# include <sys/stat.h> +# include <io.h> +#endif + +#include "bitstream.h" +#include "mem.h" +#include "fileio.h" +#define ZSTD_STATIC_LINKING_ONLY /* ZSTD_magicNumber, ZSTD_frameHeaderSize_max */ +#include "zstd.h" +#ifdef ZSTD_MULTITHREAD +# include "zstdmt_compress.h" +#endif +#if defined(ZSTD_GZCOMPRESS) || defined(ZSTD_GZDECOMPRESS) +# include <zlib.h> +# if !defined(z_const) +# define z_const +# endif +#endif +#if defined(ZSTD_LZMACOMPRESS) || defined(ZSTD_LZMADECOMPRESS) +# include <lzma.h> +#endif + +#define LZ4_MAGICNUMBER 0x184D2204 +#if defined(ZSTD_LZ4COMPRESS) || defined(ZSTD_LZ4DECOMPRESS) +# define LZ4F_ENABLE_OBSOLETE_ENUMS +# include <lz4frame.h> +# include <lz4.h> +#endif + + +/*-************************************* +* Constants +***************************************/ +#define KB *(1<<10) +#define MB *(1<<20) +#define GB *(1U<<30) + +#define _1BIT 0x01 +#define _2BITS 0x03 +#define _3BITS 0x07 +#define _4BITS 0x0F +#define _6BITS 0x3F +#define _8BITS 0xFF + +#define BLOCKSIZE (128 KB) +#define ROLLBUFFERSIZE (BLOCKSIZE*8*64) + +#define FIO_FRAMEHEADERSIZE 5 /* as a define, because needed to allocated table on stack */ + +#define DICTSIZE_MAX (32 MB) /* protection against large input (attack scenario) */ + +#define FNSPACE 30 + + +/*-************************************* +* Macros +***************************************/ +#define DISPLAY(...) fprintf(stderr, __VA_ARGS__) +#define DISPLAYOUT(...) fprintf(stdout, __VA_ARGS__) +#define DISPLAYLEVEL(l, ...) { if (g_displayLevel>=l) { DISPLAY(__VA_ARGS__); } } +static int g_displayLevel = 2; /* 0 : no display; 1: errors; 2: + result + interaction + warnings; 3: + progression; 4: + information */ +void FIO_setNotificationLevel(unsigned level) { g_displayLevel=level; } + +#define DISPLAYUPDATE(l, ...) { if (g_displayLevel>=l) { \ + if ((clock() - g_time > refreshRate) || (g_displayLevel>=4)) \ + { g_time = clock(); DISPLAY(__VA_ARGS__); \ + if (g_displayLevel>=4) fflush(stderr); } } } +static const clock_t refreshRate = CLOCKS_PER_SEC * 15 / 100; +static clock_t g_time = 0; + +#undef MIN /* in case it would be already defined */ +#define MIN(a,b) ((a) < (b) ? (a) : (b)) + + +/*-************************************* +* Debug +***************************************/ +#if defined(ZSTD_DEBUG) && (ZSTD_DEBUG>=1) +# include <assert.h> +#else +# ifndef assert +# define assert(condition) ((void)0) +# endif +#endif + +#ifndef ZSTD_DEBUG +# define ZSTD_DEBUG 0 +#endif +#define DEBUGLOG(l,...) if (l<=ZSTD_DEBUG) DISPLAY(__VA_ARGS__); +#define EXM_THROW(error, ...) \ +{ \ + DISPLAYLEVEL(1, "zstd: "); \ + DEBUGLOG(1, "Error defined at %s, line %i : \n", __FILE__, __LINE__); \ + DISPLAYLEVEL(1, "error %i : ", error); \ + DISPLAYLEVEL(1, __VA_ARGS__); \ + DISPLAYLEVEL(1, " \n"); \ + exit(error); \ +} + +#define CHECK(f) { \ + size_t const err = f; \ + if (ZSTD_isError(err)) { \ + DEBUGLOG(1, "%s \n", #f); \ + EXM_THROW(11, "%s", ZSTD_getErrorName(err)); \ +} } + + +/*-************************************ +* Signal (Ctrl-C trapping) +**************************************/ +#include <signal.h> + +static const char* g_artefact = NULL; +static void INThandler(int sig) +{ + assert(sig==SIGINT); (void)sig; +#if !defined(_MSC_VER) + signal(sig, SIG_IGN); /* this invocation generates a buggy warning in Visual Studio */ +#endif + if (g_artefact) remove(g_artefact); + DISPLAY("\n"); + exit(2); +} + + +/* ************************************************************ +* Avoid fseek()'s 2GiB barrier with MSVC, MacOS, *BSD, MinGW +***************************************************************/ +#if defined(_MSC_VER) && _MSC_VER >= 1400 +# define LONG_SEEK _fseeki64 +#elif !defined(__64BIT__) && (PLATFORM_POSIX_VERSION >= 200112L) /* No point defining Large file for 64 bit */ +# define LONG_SEEK fseeko +#elif defined(__MINGW32__) && !defined(__STRICT_ANSI__) && !defined(__NO_MINGW_LFS) && defined(__MSVCRT__) +# define LONG_SEEK fseeko64 +#elif defined(_WIN32) && !defined(__DJGPP__) +# include <windows.h> + static int LONG_SEEK(FILE* file, __int64 offset, int origin) { + LARGE_INTEGER off; + DWORD method; + off.QuadPart = offset; + if (origin == SEEK_END) + method = FILE_END; + else if (origin == SEEK_CUR) + method = FILE_CURRENT; + else + method = FILE_BEGIN; + + if (SetFilePointerEx((HANDLE) _get_osfhandle(_fileno(file)), off, NULL, method)) + return 0; + else + return -1; + } +#else +# define LONG_SEEK fseek +#endif + + +/*-************************************* +* Local Parameters - Not thread safe +***************************************/ +static FIO_compressionType_t g_compressionType = FIO_zstdCompression; +void FIO_setCompressionType(FIO_compressionType_t compressionType) { g_compressionType = compressionType; } +static U32 g_overwrite = 0; +void FIO_overwriteMode(void) { g_overwrite=1; } +static U32 g_sparseFileSupport = 1; /* 0: no sparse allowed; 1: auto (file yes, stdout no); 2: force sparse */ +void FIO_setSparseWrite(unsigned sparse) { g_sparseFileSupport=sparse; } +static U32 g_dictIDFlag = 1; +void FIO_setDictIDFlag(unsigned dictIDFlag) { g_dictIDFlag = dictIDFlag; } +static U32 g_checksumFlag = 1; +void FIO_setChecksumFlag(unsigned checksumFlag) { g_checksumFlag = checksumFlag; } +static U32 g_removeSrcFile = 0; +void FIO_setRemoveSrcFile(unsigned flag) { g_removeSrcFile = (flag>0); } +static U32 g_memLimit = 0; +void FIO_setMemLimit(unsigned memLimit) { g_memLimit = memLimit; } +static U32 g_nbThreads = 1; +void FIO_setNbThreads(unsigned nbThreads) { +#ifndef ZSTD_MULTITHREAD + if (nbThreads > 1) DISPLAYLEVEL(2, "Note : multi-threading is disabled \n"); +#endif + g_nbThreads = nbThreads; +} +static U32 g_blockSize = 0; +void FIO_setBlockSize(unsigned blockSize) { + if (blockSize && g_nbThreads==1) + DISPLAYLEVEL(2, "Setting block size is useless in single-thread mode \n"); +#ifdef ZSTD_MULTITHREAD + if (blockSize-1 < ZSTDMT_SECTION_SIZE_MIN-1) /* intentional underflow */ + DISPLAYLEVEL(2, "Note : minimum block size is %u KB \n", (ZSTDMT_SECTION_SIZE_MIN>>10)); +#endif + g_blockSize = blockSize; +} +#define FIO_OVERLAP_LOG_NOTSET 9999 +static U32 g_overlapLog = FIO_OVERLAP_LOG_NOTSET; +void FIO_setOverlapLog(unsigned overlapLog){ + if (overlapLog && g_nbThreads==1) + DISPLAYLEVEL(2, "Setting overlapLog is useless in single-thread mode \n"); + g_overlapLog = overlapLog; +} +static U32 g_ldmFlag = 0; +void FIO_setLdmFlag(unsigned ldmFlag) { + g_ldmFlag = (ldmFlag>0); +} +static U32 g_ldmHashLog = 0; +void FIO_setLdmHashLog(unsigned ldmHashLog) { + g_ldmHashLog = ldmHashLog; +} +static U32 g_ldmMinMatch = 0; +void FIO_setLdmMinMatch(unsigned ldmMinMatch) { + g_ldmMinMatch = ldmMinMatch; +} + +#define FIO_LDM_PARAM_NOTSET 9999 +static U32 g_ldmBucketSizeLog = FIO_LDM_PARAM_NOTSET; +void FIO_setLdmBucketSizeLog(unsigned ldmBucketSizeLog) { + g_ldmBucketSizeLog = ldmBucketSizeLog; +} + +static U32 g_ldmHashEveryLog = FIO_LDM_PARAM_NOTSET; +void FIO_setLdmHashEveryLog(unsigned ldmHashEveryLog) { + g_ldmHashEveryLog = ldmHashEveryLog; +} + + + +/*-************************************* +* Functions +***************************************/ +/** FIO_remove() : + * @result : Unlink `fileName`, even if it's read-only */ +static int FIO_remove(const char* path) +{ +#if defined(_WIN32) || defined(WIN32) + /* windows doesn't allow remove read-only files, + * so try to make it writable first */ + chmod(path, _S_IWRITE); +#endif + return remove(path); +} + +/** FIO_openSrcFile() : + * condition : `dstFileName` must be non-NULL. + * @result : FILE* to `dstFileName`, or NULL if it fails */ +static FILE* FIO_openSrcFile(const char* srcFileName) +{ + FILE* f; + + if (!strcmp (srcFileName, stdinmark)) { + DISPLAYLEVEL(4,"Using stdin for input\n"); + f = stdin; + SET_BINARY_MODE(stdin); + } else { + if (!UTIL_isRegularFile(srcFileName)) { + DISPLAYLEVEL(1, "zstd: %s is not a regular file -- ignored \n", + srcFileName); + return NULL; + } + f = fopen(srcFileName, "rb"); + if ( f==NULL ) + DISPLAYLEVEL(1, "zstd: %s: %s \n", srcFileName, strerror(errno)); + } + + return f; +} + +/** FIO_openDstFile() : + * condition : `dstFileName` must be non-NULL. + * @result : FILE* to `dstFileName`, or NULL if it fails */ +static FILE* FIO_openDstFile(const char* dstFileName) +{ + FILE* f; + + if (!strcmp (dstFileName, stdoutmark)) { + DISPLAYLEVEL(4,"Using stdout for output\n"); + f = stdout; + SET_BINARY_MODE(stdout); + if (g_sparseFileSupport==1) { + g_sparseFileSupport = 0; + DISPLAYLEVEL(4, "Sparse File Support is automatically disabled on stdout ; try --sparse \n"); + } + } else { + if (g_sparseFileSupport == 1) { + g_sparseFileSupport = ZSTD_SPARSE_DEFAULT; + } + if (strcmp (dstFileName, nulmark)) { + /* Check if destination file already exists */ + f = fopen( dstFileName, "rb" ); + if (f != 0) { /* dst file exists, prompt for overwrite authorization */ + fclose(f); + if (!g_overwrite) { + if (g_displayLevel <= 1) { + /* No interaction possible */ + DISPLAY("zstd: %s already exists; not overwritten \n", + dstFileName); + return NULL; + } + DISPLAY("zstd: %s already exists; do you wish to overwrite (y/N) ? ", + dstFileName); + { int ch = getchar(); + if ((ch!='Y') && (ch!='y')) { + DISPLAY(" not overwritten \n"); + return NULL; + } + /* flush rest of input line */ + while ((ch!=EOF) && (ch!='\n')) ch = getchar(); + } } + /* need to unlink */ + FIO_remove(dstFileName); + } } + f = fopen( dstFileName, "wb" ); + if (f==NULL) DISPLAYLEVEL(1, "zstd: %s: %s\n", dstFileName, strerror(errno)); + } + + return f; +} + + +/*! FIO_createDictBuffer() : + * creates a buffer, pointed by `*bufferPtr`, + * loads `filename` content into it, up to DICTSIZE_MAX bytes. + * @return : loaded size + * if fileName==NULL, returns 0 and a NULL pointer + */ +static size_t FIO_createDictBuffer(void** bufferPtr, const char* fileName) +{ + FILE* fileHandle; + U64 fileSize; + + *bufferPtr = NULL; + if (fileName == NULL) return 0; + + DISPLAYLEVEL(4,"Loading %s as dictionary \n", fileName); + fileHandle = fopen(fileName, "rb"); + if (fileHandle==0) EXM_THROW(31, "%s: %s", fileName, strerror(errno)); + fileSize = UTIL_getFileSize(fileName); + if (fileSize > DICTSIZE_MAX) { + EXM_THROW(32, "Dictionary file %s is too large (> %u MB)", + fileName, DICTSIZE_MAX >> 20); /* avoid extreme cases */ + } + *bufferPtr = malloc((size_t)fileSize); + if (*bufferPtr==NULL) EXM_THROW(34, "%s", strerror(errno)); + { size_t const readSize = fread(*bufferPtr, 1, (size_t)fileSize, fileHandle); + if (readSize!=fileSize) + EXM_THROW(35, "Error reading dictionary file %s", fileName); + } + fclose(fileHandle); + return (size_t)fileSize; +} + +#ifndef ZSTD_NOCOMPRESS + +/*-********************************************************************** +* Compression +************************************************************************/ +typedef struct { + FILE* srcFile; + FILE* dstFile; + void* srcBuffer; + size_t srcBufferSize; + void* dstBuffer; + size_t dstBufferSize; +#if !defined(ZSTD_NEWAPI) && defined(ZSTD_MULTITHREAD) + ZSTDMT_CCtx* cctx; +#else + ZSTD_CStream* cctx; +#endif +} cRess_t; + +static cRess_t FIO_createCResources(const char* dictFileName, int cLevel, + U64 srcSize, + ZSTD_compressionParameters* comprParams) { + cRess_t ress; + memset(&ress, 0, sizeof(ress)); + +#ifdef ZSTD_NEWAPI + ress.cctx = ZSTD_createCCtx(); + if (ress.cctx == NULL) + EXM_THROW(30, "allocation error : can't create ZSTD_CCtx"); +#elif defined(ZSTD_MULTITHREAD) + ress.cctx = ZSTDMT_createCCtx(g_nbThreads); + if (ress.cctx == NULL) + EXM_THROW(30, "allocation error : can't create ZSTDMT_CCtx"); + if ((cLevel==ZSTD_maxCLevel()) && (g_overlapLog==FIO_OVERLAP_LOG_NOTSET)) + /* use complete window for overlap */ + ZSTDMT_setMTCtxParameter(ress.cctx, ZSTDMT_p_overlapSectionLog, 9); + if (g_overlapLog != FIO_OVERLAP_LOG_NOTSET) + ZSTDMT_setMTCtxParameter(ress.cctx, ZSTDMT_p_overlapSectionLog, g_overlapLog); +#else + ress.cctx = ZSTD_createCStream(); + if (ress.cctx == NULL) + EXM_THROW(30, "allocation error : can't create ZSTD_CStream"); +#endif + ress.srcBufferSize = ZSTD_CStreamInSize(); + ress.srcBuffer = malloc(ress.srcBufferSize); + ress.dstBufferSize = ZSTD_CStreamOutSize(); + ress.dstBuffer = malloc(ress.dstBufferSize); + if (!ress.srcBuffer || !ress.dstBuffer) + EXM_THROW(31, "allocation error : not enough memory"); + + /* dictionary */ + { void* dictBuffer; + size_t const dictBuffSize = FIO_createDictBuffer(&dictBuffer, dictFileName); /* works with dictFileName==NULL */ + if (dictFileName && (dictBuffer==NULL)) + EXM_THROW(32, "allocation error : can't create dictBuffer"); + +#ifdef ZSTD_NEWAPI + { /* frame parameters */ + CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_dictIDFlag, g_dictIDFlag) ); + CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_checksumFlag, g_checksumFlag) ); + (void)srcSize; + /* compression level */ + CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_compressionLevel, cLevel) ); + /* long distance matching */ + CHECK( ZSTD_CCtx_setParameter( + ress.cctx, ZSTD_p_enableLongDistanceMatching, g_ldmFlag) ); + CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_ldmHashLog, g_ldmHashLog) ); + CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_ldmMinMatch, g_ldmMinMatch) ); + if (g_ldmBucketSizeLog != FIO_LDM_PARAM_NOTSET) { + CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_ldmBucketSizeLog, g_ldmBucketSizeLog) ); + } + if (g_ldmHashEveryLog != FIO_LDM_PARAM_NOTSET) { + CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_ldmHashEveryLog, g_ldmHashEveryLog) ); + } + /* compression parameters */ + CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_windowLog, comprParams->windowLog) ); + CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_chainLog, comprParams->chainLog) ); + CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_hashLog, comprParams->hashLog) ); + CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_searchLog, comprParams->searchLog) ); + CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_minMatch, comprParams->searchLength) ); + CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_targetLength, comprParams->targetLength) ); + CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_compressionStrategy, (U32)comprParams->strategy) ); + /* multi-threading */ + DISPLAYLEVEL(5,"set nb threads = %u \n", g_nbThreads); + CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_nbThreads, g_nbThreads) ); + /* dictionary */ + CHECK( ZSTD_CCtx_loadDictionary(ress.cctx, dictBuffer, dictBuffSize) ); + } +#elif defined(ZSTD_MULTITHREAD) + { ZSTD_parameters params = ZSTD_getParams(cLevel, srcSize, dictBuffSize); + params.fParams.checksumFlag = g_checksumFlag; + params.fParams.noDictIDFlag = !g_dictIDFlag; + if (comprParams->windowLog) params.cParams.windowLog = comprParams->windowLog; + if (comprParams->chainLog) params.cParams.chainLog = comprParams->chainLog; + if (comprParams->hashLog) params.cParams.hashLog = comprParams->hashLog; + if (comprParams->searchLog) params.cParams.searchLog = comprParams->searchLog; + if (comprParams->searchLength) params.cParams.searchLength = comprParams->searchLength; + if (comprParams->targetLength) params.cParams.targetLength = comprParams->targetLength; + if (comprParams->strategy) params.cParams.strategy = (ZSTD_strategy) comprParams->strategy; + CHECK( ZSTDMT_initCStream_advanced(ress.cctx, dictBuffer, dictBuffSize, params, srcSize) ); + ZSTDMT_setMTCtxParameter(ress.cctx, ZSTDMT_p_sectionSize, g_blockSize); + } +#else + { ZSTD_parameters params = ZSTD_getParams(cLevel, srcSize, dictBuffSize); + params.fParams.checksumFlag = g_checksumFlag; + params.fParams.noDictIDFlag = !g_dictIDFlag; + if (comprParams->windowLog) params.cParams.windowLog = comprParams->windowLog; + if (comprParams->chainLog) params.cParams.chainLog = comprParams->chainLog; + if (comprParams->hashLog) params.cParams.hashLog = comprParams->hashLog; + if (comprParams->searchLog) params.cParams.searchLog = comprParams->searchLog; + if (comprParams->searchLength) params.cParams.searchLength = comprParams->searchLength; + if (comprParams->targetLength) params.cParams.targetLength = comprParams->targetLength; + if (comprParams->strategy) params.cParams.strategy = (ZSTD_strategy) comprParams->strategy; + CHECK( ZSTD_initCStream_advanced(ress.cctx, dictBuffer, dictBuffSize, params, srcSize) ); + } +#endif + free(dictBuffer); + } + + return ress; +} + +static void FIO_freeCResources(cRess_t ress) +{ + free(ress.srcBuffer); + free(ress.dstBuffer); +#if !defined(ZSTD_NEWAPI) && defined(ZSTD_MULTITHREAD) + ZSTDMT_freeCCtx(ress.cctx); +#else + ZSTD_freeCStream(ress.cctx); /* never fails */ +#endif +} + + +#ifdef ZSTD_GZCOMPRESS +static unsigned long long FIO_compressGzFrame(cRess_t* ress, + const char* srcFileName, U64 const srcFileSize, + int compressionLevel, U64* readsize) +{ + unsigned long long inFileSize = 0, outFileSize = 0; + z_stream strm; + int ret; + + if (compressionLevel > Z_BEST_COMPRESSION) + compressionLevel = Z_BEST_COMPRESSION; + + strm.zalloc = Z_NULL; + strm.zfree = Z_NULL; + strm.opaque = Z_NULL; + + ret = deflateInit2(&strm, compressionLevel, Z_DEFLATED, + 15 /* maxWindowLogSize */ + 16 /* gzip only */, + 8, Z_DEFAULT_STRATEGY); /* see http://www.zlib.net/manual.html */ + if (ret != Z_OK) + EXM_THROW(71, "zstd: %s: deflateInit2 error %d \n", srcFileName, ret); + + strm.next_in = 0; + strm.avail_in = 0; + strm.next_out = (Bytef*)ress->dstBuffer; + strm.avail_out = (uInt)ress->dstBufferSize; + + while (1) { + if (strm.avail_in == 0) { + size_t const inSize = fread(ress->srcBuffer, 1, ress->srcBufferSize, ress->srcFile); + if (inSize == 0) break; + inFileSize += inSize; + strm.next_in = (z_const unsigned char*)ress->srcBuffer; + strm.avail_in = (uInt)inSize; + } + ret = deflate(&strm, Z_NO_FLUSH); + if (ret != Z_OK) + EXM_THROW(72, "zstd: %s: deflate error %d \n", srcFileName, ret); + { size_t const decompBytes = ress->dstBufferSize - strm.avail_out; + if (decompBytes) { + if (fwrite(ress->dstBuffer, 1, decompBytes, ress->dstFile) != decompBytes) + EXM_THROW(73, "Write error : cannot write to output file"); + outFileSize += decompBytes; + strm.next_out = (Bytef*)ress->dstBuffer; + strm.avail_out = (uInt)ress->dstBufferSize; + } + } + if (!srcFileSize) + DISPLAYUPDATE(2, "\rRead : %u MB ==> %.2f%%", + (U32)(inFileSize>>20), + (double)outFileSize/inFileSize*100) + else + DISPLAYUPDATE(2, "\rRead : %u / %u MB ==> %.2f%%", + (U32)(inFileSize>>20), (U32)(srcFileSize>>20), + (double)outFileSize/inFileSize*100); + } + + while (1) { + ret = deflate(&strm, Z_FINISH); + { size_t const decompBytes = ress->dstBufferSize - strm.avail_out; + if (decompBytes) { + if (fwrite(ress->dstBuffer, 1, decompBytes, ress->dstFile) != decompBytes) + EXM_THROW(75, "Write error : cannot write to output file"); + outFileSize += decompBytes; + strm.next_out = (Bytef*)ress->dstBuffer; + strm.avail_out = (uInt)ress->dstBufferSize; + } } + if (ret == Z_STREAM_END) break; + if (ret != Z_BUF_ERROR) + EXM_THROW(77, "zstd: %s: deflate error %d \n", srcFileName, ret); + } + + ret = deflateEnd(&strm); + if (ret != Z_OK) + EXM_THROW(79, "zstd: %s: deflateEnd error %d \n", srcFileName, ret); + *readsize = inFileSize; + + return outFileSize; +} +#endif + + +#ifdef ZSTD_LZMACOMPRESS +static unsigned long long FIO_compressLzmaFrame(cRess_t* ress, + const char* srcFileName, U64 const srcFileSize, + int compressionLevel, U64* readsize, int plain_lzma) +{ + unsigned long long inFileSize = 0, outFileSize = 0; + lzma_stream strm = LZMA_STREAM_INIT; + lzma_action action = LZMA_RUN; + lzma_ret ret; + + if (compressionLevel < 0) compressionLevel = 0; + if (compressionLevel > 9) compressionLevel = 9; + + if (plain_lzma) { + lzma_options_lzma opt_lzma; + if (lzma_lzma_preset(&opt_lzma, compressionLevel)) + EXM_THROW(71, "zstd: %s: lzma_lzma_preset error", srcFileName); + ret = lzma_alone_encoder(&strm, &opt_lzma); /* LZMA */ + if (ret != LZMA_OK) + EXM_THROW(71, "zstd: %s: lzma_alone_encoder error %d", srcFileName, ret); + } else { + ret = lzma_easy_encoder(&strm, compressionLevel, LZMA_CHECK_CRC64); /* XZ */ + if (ret != LZMA_OK) + EXM_THROW(71, "zstd: %s: lzma_easy_encoder error %d", srcFileName, ret); + } + + strm.next_in = 0; + strm.avail_in = 0; + strm.next_out = (BYTE*)ress->dstBuffer; + strm.avail_out = ress->dstBufferSize; + + while (1) { + if (strm.avail_in == 0) { + size_t const inSize = fread(ress->srcBuffer, 1, ress->srcBufferSize, ress->srcFile); + if (inSize == 0) action = LZMA_FINISH; + inFileSize += inSize; + strm.next_in = (BYTE const*)ress->srcBuffer; + strm.avail_in = inSize; + } + + ret = lzma_code(&strm, action); + + if (ret != LZMA_OK && ret != LZMA_STREAM_END) + EXM_THROW(72, "zstd: %s: lzma_code encoding error %d", srcFileName, ret); + { size_t const compBytes = ress->dstBufferSize - strm.avail_out; + if (compBytes) { + if (fwrite(ress->dstBuffer, 1, compBytes, ress->dstFile) != compBytes) + EXM_THROW(73, "Write error : cannot write to output file"); + outFileSize += compBytes; + strm.next_out = (BYTE*)ress->dstBuffer; + strm.avail_out = ress->dstBufferSize; + } } + if (!srcFileSize) + DISPLAYUPDATE(2, "\rRead : %u MB ==> %.2f%%", + (U32)(inFileSize>>20), + (double)outFileSize/inFileSize*100) + else + DISPLAYUPDATE(2, "\rRead : %u / %u MB ==> %.2f%%", + (U32)(inFileSize>>20), (U32)(srcFileSize>>20), + (double)outFileSize/inFileSize*100); + if (ret == LZMA_STREAM_END) break; + } + + lzma_end(&strm); + *readsize = inFileSize; + + return outFileSize; +} +#endif + +#ifdef ZSTD_LZ4COMPRESS +static int FIO_LZ4_GetBlockSize_FromBlockId (int id) { return (1 << (8 + (2 * id))); } +static unsigned long long FIO_compressLz4Frame(cRess_t* ress, + const char* srcFileName, U64 const srcFileSize, + int compressionLevel, U64* readsize) +{ + unsigned long long inFileSize = 0, outFileSize = 0; + + LZ4F_preferences_t prefs; + LZ4F_compressionContext_t ctx; + + LZ4F_errorCode_t const errorCode = LZ4F_createCompressionContext(&ctx, LZ4F_VERSION); + if (LZ4F_isError(errorCode)) + EXM_THROW(31, "zstd: failed to create lz4 compression context"); + + memset(&prefs, 0, sizeof(prefs)); + +#if LZ4_VERSION_NUMBER <= 10600 +#define LZ4F_blockIndependent blockIndependent +#define LZ4F_max4MB max4MB +#endif + + prefs.autoFlush = 1; + prefs.compressionLevel = compressionLevel; + prefs.frameInfo.blockMode = LZ4F_blockIndependent; /* stick to defaults for lz4 cli */ + prefs.frameInfo.blockSizeID = LZ4F_max4MB; + prefs.frameInfo.contentChecksumFlag = (contentChecksum_t)g_checksumFlag; +#if LZ4_VERSION_NUMBER >= 10600 + prefs.frameInfo.contentSize = srcFileSize; +#endif + + { + size_t blockSize = FIO_LZ4_GetBlockSize_FromBlockId(LZ4F_max4MB); + size_t readSize; + size_t headerSize = LZ4F_compressBegin(ctx, ress->dstBuffer, ress->dstBufferSize, &prefs); + if (LZ4F_isError(headerSize)) + EXM_THROW(33, "File header generation failed : %s", + LZ4F_getErrorName(headerSize)); + { size_t const sizeCheck = fwrite(ress->dstBuffer, 1, headerSize, ress->dstFile); + if (sizeCheck!=headerSize) EXM_THROW(34, "Write error : cannot write header"); } + outFileSize += headerSize; + + /* Read first block */ + readSize = fread(ress->srcBuffer, (size_t)1, (size_t)blockSize, ress->srcFile); + inFileSize += readSize; + + /* Main Loop */ + while (readSize>0) { + size_t outSize; + + /* Compress Block */ + outSize = LZ4F_compressUpdate(ctx, ress->dstBuffer, ress->dstBufferSize, ress->srcBuffer, readSize, NULL); + if (LZ4F_isError(outSize)) + EXM_THROW(35, "zstd: %s: lz4 compression failed : %s", + srcFileName, LZ4F_getErrorName(outSize)); + outFileSize += outSize; + if (!srcFileSize) + DISPLAYUPDATE(2, "\rRead : %u MB ==> %.2f%%", + (U32)(inFileSize>>20), + (double)outFileSize/inFileSize*100) + else + DISPLAYUPDATE(2, "\rRead : %u / %u MB ==> %.2f%%", + (U32)(inFileSize>>20), (U32)(srcFileSize>>20), + (double)outFileSize/inFileSize*100); + + /* Write Block */ + { size_t const sizeCheck = fwrite(ress->dstBuffer, 1, outSize, ress->dstFile); + if (sizeCheck!=outSize) EXM_THROW(36, "Write error : cannot write compressed block"); } + + /* Read next block */ + readSize = fread(ress->srcBuffer, (size_t)1, (size_t)blockSize, ress->srcFile); + inFileSize += readSize; + } + if (ferror(ress->srcFile)) EXM_THROW(37, "Error reading %s ", srcFileName); + + /* End of Stream mark */ + headerSize = LZ4F_compressEnd(ctx, ress->dstBuffer, ress->dstBufferSize, NULL); + if (LZ4F_isError(headerSize)) + EXM_THROW(38, "zstd: %s: lz4 end of file generation failed : %s", + srcFileName, LZ4F_getErrorName(headerSize)); + + { size_t const sizeCheck = fwrite(ress->dstBuffer, 1, headerSize, ress->dstFile); + if (sizeCheck!=headerSize) EXM_THROW(39, "Write error : cannot write end of stream"); } + outFileSize += headerSize; + } + + *readsize = inFileSize; + LZ4F_freeCompressionContext(ctx); + + return outFileSize; +} +#endif + + +/*! FIO_compressFilename_internal() : + * same as FIO_compressFilename_extRess(), with `ress.desFile` already opened. + * @return : 0 : compression completed correctly, + * 1 : missing or pb opening srcFileName + */ +static int FIO_compressFilename_internal(cRess_t ress, + const char* dstFileName, const char* srcFileName, int compressionLevel) +{ + FILE* const srcFile = ress.srcFile; + FILE* const dstFile = ress.dstFile; + U64 readsize = 0; + U64 compressedfilesize = 0; + U64 const fileSize = UTIL_getFileSize(srcFileName); + DISPLAYLEVEL(5, "%s: %u bytes \n", srcFileName, (U32)fileSize); + + switch (g_compressionType) { + case FIO_zstdCompression: + break; + + case FIO_gzipCompression: +#ifdef ZSTD_GZCOMPRESS + compressedfilesize = FIO_compressGzFrame(&ress, srcFileName, fileSize, compressionLevel, &readsize); +#else + (void)compressionLevel; + EXM_THROW(20, "zstd: %s: file cannot be compressed as gzip (zstd compiled without ZSTD_GZCOMPRESS) -- ignored \n", + srcFileName); +#endif + goto finish; + + case FIO_xzCompression: + case FIO_lzmaCompression: +#ifdef ZSTD_LZMACOMPRESS + compressedfilesize = FIO_compressLzmaFrame(&ress, srcFileName, fileSize, compressionLevel, &readsize, g_compressionType==FIO_lzmaCompression); +#else + (void)compressionLevel; + EXM_THROW(20, "zstd: %s: file cannot be compressed as xz/lzma (zstd compiled without ZSTD_LZMACOMPRESS) -- ignored \n", + srcFileName); +#endif + goto finish; + + case FIO_lz4Compression: +#ifdef ZSTD_LZ4COMPRESS + compressedfilesize = FIO_compressLz4Frame(&ress, srcFileName, fileSize, compressionLevel, &readsize); +#else + (void)compressionLevel; + EXM_THROW(20, "zstd: %s: file cannot be compressed as lz4 (zstd compiled without ZSTD_LZ4COMPRESS) -- ignored \n", + srcFileName); +#endif + goto finish; + } + + /* init */ +#ifdef ZSTD_NEWAPI + if (fileSize!=0) /* when src is stdin, fileSize==0, but is effectively unknown */ + ZSTD_CCtx_setPledgedSrcSize(ress.cctx, fileSize); /* note : fileSize==0 means "empty" */ +#elif defined(ZSTD_MULTITHREAD) + CHECK( ZSTDMT_resetCStream(ress.cctx, fileSize) ); /* note : fileSize==0 means "unknown" */ +#else + CHECK( ZSTD_resetCStream(ress.cctx, fileSize) ); /* note : fileSize==0 means "unknown" */ +#endif + + /* Main compression loop */ + while (1) { + /* Fill input Buffer */ + size_t const inSize = fread(ress.srcBuffer, (size_t)1, ress.srcBufferSize, srcFile); + ZSTD_inBuffer inBuff = { ress.srcBuffer, inSize, 0 }; + if (inSize==0) break; + readsize += inSize; + + while (inBuff.pos != inBuff.size) { + ZSTD_outBuffer outBuff = { ress.dstBuffer, ress.dstBufferSize, 0 }; +#ifdef ZSTD_NEWAPI + CHECK( ZSTD_compress_generic(ress.cctx, + &outBuff, &inBuff, ZSTD_e_continue) ); +#elif defined(ZSTD_MULTITHREAD) + CHECK( ZSTDMT_compressStream(ress.cctx, &outBuff, &inBuff) ); +#else + CHECK( ZSTD_compressStream(ress.cctx, &outBuff, &inBuff) ); +#endif + + /* Write compressed stream */ + if (outBuff.pos) { + size_t const sizeCheck = fwrite(ress.dstBuffer, 1, outBuff.pos, dstFile); + if (sizeCheck!=outBuff.pos) + EXM_THROW(25, "Write error : cannot write compressed block into %s", dstFileName); + compressedfilesize += outBuff.pos; + } } + if (g_nbThreads > 1) { + if (!fileSize) + DISPLAYUPDATE(2, "\rRead : %u MB", (U32)(readsize>>20)) + else + DISPLAYUPDATE(2, "\rRead : %u / %u MB", + (U32)(readsize>>20), (U32)(fileSize>>20)); + } else { + if (!fileSize) + DISPLAYUPDATE(2, "\rRead : %u MB ==> %.2f%%", + (U32)(readsize>>20), + (double)compressedfilesize/readsize*100) + else + DISPLAYUPDATE(2, "\rRead : %u / %u MB ==> %.2f%%", + (U32)(readsize>>20), (U32)(fileSize>>20), + (double)compressedfilesize/readsize*100); + } + } + + /* End of Frame */ + { size_t result = 1; + while (result != 0) { + ZSTD_outBuffer outBuff = { ress.dstBuffer, ress.dstBufferSize, 0 }; +#ifdef ZSTD_NEWAPI + ZSTD_inBuffer inBuff = { NULL, 0, 0 }; + result = ZSTD_compress_generic(ress.cctx, + &outBuff, &inBuff, ZSTD_e_end); +#elif defined(ZSTD_MULTITHREAD) + result = ZSTDMT_endStream(ress.cctx, &outBuff); +#else + result = ZSTD_endStream(ress.cctx, &outBuff); +#endif + if (ZSTD_isError(result)) { + EXM_THROW(26, "Compression error during frame end : %s", + ZSTD_getErrorName(result)); + } + { size_t const sizeCheck = fwrite(ress.dstBuffer, 1, outBuff.pos, dstFile); + if (sizeCheck!=outBuff.pos) + EXM_THROW(27, "Write error : cannot write frame end into %s", dstFileName); + } + compressedfilesize += outBuff.pos; + } + } + +finish: + /* Status */ + DISPLAYLEVEL(2, "\r%79s\r", ""); + DISPLAYLEVEL(2,"%-20s :%6.2f%% (%6llu => %6llu bytes, %s) \n", srcFileName, + (double)compressedfilesize/(readsize+(!readsize) /* avoid div by zero */ )*100, + (unsigned long long)readsize, (unsigned long long) compressedfilesize, + dstFileName); + + return 0; +} + + +/*! FIO_compressFilename_srcFile() : + * note : ress.destFile already opened + * @return : 0 : compression completed correctly, + * 1 : missing or pb opening srcFileName + */ +static int FIO_compressFilename_srcFile(cRess_t ress, + const char* dstFileName, const char* srcFileName, + int compressionLevel) +{ + int result; + + /* File check */ + if (UTIL_isDirectory(srcFileName)) { + DISPLAYLEVEL(1, "zstd: %s is a directory -- ignored \n", srcFileName); + return 1; + } + + ress.srcFile = FIO_openSrcFile(srcFileName); + if (!ress.srcFile) return 1; /* srcFile could not be opened */ + + result = FIO_compressFilename_internal(ress, dstFileName, srcFileName, compressionLevel); + + fclose(ress.srcFile); + if (g_removeSrcFile /* --rm */ && !result && strcmp(srcFileName, stdinmark)) { + if (remove(srcFileName)) + EXM_THROW(1, "zstd: %s: %s", srcFileName, strerror(errno)); + } + return result; +} + + +/*! FIO_compressFilename_dstFile() : + * @return : 0 : compression completed correctly, + * 1 : pb + */ +static int FIO_compressFilename_dstFile(cRess_t ress, + const char* dstFileName, + const char* srcFileName, + int compressionLevel) +{ + int result; + stat_t statbuf; + int stat_result = 0; + + ress.dstFile = FIO_openDstFile(dstFileName); + if (ress.dstFile==NULL) return 1; /* could not open dstFileName */ + + if (UTIL_isRegularFile(dstFileName)) { + g_artefact = dstFileName; + signal(SIGINT, INThandler); + } else { + g_artefact = NULL; + } + + + if (strcmp (srcFileName, stdinmark) && UTIL_getFileStat(srcFileName, &statbuf)) + stat_result = 1; + result = FIO_compressFilename_srcFile(ress, dstFileName, srcFileName, compressionLevel); + + if (fclose(ress.dstFile)) { /* error closing dstFile */ + DISPLAYLEVEL(1, "zstd: %s: %s \n", dstFileName, strerror(errno)); + result=1; + } + if (result!=0) { /* remove operation artefact */ + if (remove(dstFileName)) + EXM_THROW(1, "zstd: %s: %s", dstFileName, strerror(errno)); + } + else if (strcmp (dstFileName, stdoutmark) && stat_result) + UTIL_setFileStat(dstFileName, &statbuf); + + signal(SIGINT, SIG_DFL); + + return result; +} + + +int FIO_compressFilename(const char* dstFileName, const char* srcFileName, + const char* dictFileName, int compressionLevel, ZSTD_compressionParameters* comprParams) +{ + clock_t const start = clock(); + U64 const srcSize = UTIL_getFileSize(srcFileName); + + cRess_t const ress = FIO_createCResources(dictFileName, compressionLevel, srcSize, comprParams); + int const result = FIO_compressFilename_dstFile(ress, dstFileName, srcFileName, compressionLevel); + + double const seconds = (double)(clock() - start) / CLOCKS_PER_SEC; + DISPLAYLEVEL(4, "Completed in %.2f sec \n", seconds); + + FIO_freeCResources(ress); + return result; +} + + +int FIO_compressMultipleFilenames(const char** inFileNamesTable, unsigned nbFiles, + const char* suffix, + const char* dictFileName, int compressionLevel, + ZSTD_compressionParameters* comprParams) +{ + int missed_files = 0; + size_t dfnSize = FNSPACE; + char* dstFileName = (char*)malloc(FNSPACE); + size_t const suffixSize = suffix ? strlen(suffix) : 0; + U64 const srcSize = (nbFiles != 1) ? 0 : UTIL_getFileSize(inFileNamesTable[0]) ; + cRess_t ress = FIO_createCResources(dictFileName, compressionLevel, srcSize, comprParams); + + /* init */ + if (dstFileName==NULL) + EXM_THROW(27, "FIO_compressMultipleFilenames : allocation error for dstFileName"); + if (suffix == NULL) + EXM_THROW(28, "FIO_compressMultipleFilenames : dst unknown"); /* should never happen */ + + /* loop on each file */ + if (!strcmp(suffix, stdoutmark)) { + unsigned u; + ress.dstFile = stdout; + SET_BINARY_MODE(stdout); + for (u=0; u<nbFiles; u++) + missed_files += FIO_compressFilename_srcFile(ress, stdoutmark, inFileNamesTable[u], compressionLevel); + if (fclose(ress.dstFile)) + EXM_THROW(29, "Write error : cannot properly close stdout"); + } else { + unsigned u; + for (u=0; u<nbFiles; u++) { + size_t const ifnSize = strlen(inFileNamesTable[u]); + if (dfnSize <= ifnSize+suffixSize+1) { /* resize name buffer */ + free(dstFileName); + dfnSize = ifnSize + 20; + dstFileName = (char*)malloc(dfnSize); + if (!dstFileName) + EXM_THROW(30, "zstd: %s", strerror(errno)); + } + strcpy(dstFileName, inFileNamesTable[u]); + strcat(dstFileName, suffix); + missed_files += FIO_compressFilename_dstFile(ress, dstFileName, inFileNamesTable[u], compressionLevel); + } } + + FIO_freeCResources(ress); + free(dstFileName); + return missed_files; +} + +#endif /* #ifndef ZSTD_NOCOMPRESS */ + + + +#ifndef ZSTD_NODECOMPRESS + +/* ************************************************************************** + * Decompression + ***************************************************************************/ +typedef struct { + void* srcBuffer; + size_t srcBufferSize; + size_t srcBufferLoaded; + void* dstBuffer; + size_t dstBufferSize; + ZSTD_DStream* dctx; + FILE* dstFile; +} dRess_t; + +static dRess_t FIO_createDResources(const char* dictFileName) +{ + dRess_t ress; + memset(&ress, 0, sizeof(ress)); + + /* Allocation */ + ress.dctx = ZSTD_createDStream(); + if (ress.dctx==NULL) EXM_THROW(60, "Can't create ZSTD_DStream"); + CHECK( ZSTD_setDStreamParameter(ress.dctx, DStream_p_maxWindowSize, g_memLimit) ); + ress.srcBufferSize = ZSTD_DStreamInSize(); + ress.srcBuffer = malloc(ress.srcBufferSize); + ress.dstBufferSize = ZSTD_DStreamOutSize(); + ress.dstBuffer = malloc(ress.dstBufferSize); + if (!ress.srcBuffer || !ress.dstBuffer) + EXM_THROW(61, "Allocation error : not enough memory"); + + /* dictionary */ + { void* dictBuffer; + size_t const dictBufferSize = FIO_createDictBuffer(&dictBuffer, dictFileName); + CHECK( ZSTD_initDStream_usingDict(ress.dctx, dictBuffer, dictBufferSize) ); + free(dictBuffer); + } + + return ress; +} + +static void FIO_freeDResources(dRess_t ress) +{ + CHECK( ZSTD_freeDStream(ress.dctx) ); + free(ress.srcBuffer); + free(ress.dstBuffer); +} + + +/** FIO_fwriteSparse() : +* @return : storedSkips, to be provided to next call to FIO_fwriteSparse() of LZ4IO_fwriteSparseEnd() */ +static unsigned FIO_fwriteSparse(FILE* file, const void* buffer, size_t bufferSize, unsigned storedSkips) +{ + const size_t* const bufferT = (const size_t*)buffer; /* Buffer is supposed malloc'ed, hence aligned on size_t */ + size_t bufferSizeT = bufferSize / sizeof(size_t); + const size_t* const bufferTEnd = bufferT + bufferSizeT; + const size_t* ptrT = bufferT; + static const size_t segmentSizeT = (32 KB) / sizeof(size_t); /* 0-test re-attempted every 32 KB */ + + if (!g_sparseFileSupport) { /* normal write */ + size_t const sizeCheck = fwrite(buffer, 1, bufferSize, file); + if (sizeCheck != bufferSize) EXM_THROW(70, "Write error : cannot write decoded block"); + return 0; + } + + /* avoid int overflow */ + if (storedSkips > 1 GB) { + int const seekResult = LONG_SEEK(file, 1 GB, SEEK_CUR); + if (seekResult != 0) EXM_THROW(71, "1 GB skip error (sparse file support)"); + storedSkips -= 1 GB; + } + + while (ptrT < bufferTEnd) { + size_t seg0SizeT = segmentSizeT; + size_t nb0T; + + /* count leading zeros */ + if (seg0SizeT > bufferSizeT) seg0SizeT = bufferSizeT; + bufferSizeT -= seg0SizeT; + for (nb0T=0; (nb0T < seg0SizeT) && (ptrT[nb0T] == 0); nb0T++) ; + storedSkips += (unsigned)(nb0T * sizeof(size_t)); + + if (nb0T != seg0SizeT) { /* not all 0s */ + int const seekResult = LONG_SEEK(file, storedSkips, SEEK_CUR); + if (seekResult) EXM_THROW(72, "Sparse skip error ; try --no-sparse"); + storedSkips = 0; + seg0SizeT -= nb0T; + ptrT += nb0T; + { size_t const sizeCheck = fwrite(ptrT, sizeof(size_t), seg0SizeT, file); + if (sizeCheck != seg0SizeT) + EXM_THROW(73, "Write error : cannot write decoded block"); + } } + ptrT += seg0SizeT; + } + + { static size_t const maskT = sizeof(size_t)-1; + if (bufferSize & maskT) { + /* size not multiple of sizeof(size_t) : implies end of block */ + const char* const restStart = (const char*)bufferTEnd; + const char* restPtr = restStart; + size_t restSize = bufferSize & maskT; + const char* const restEnd = restStart + restSize; + for ( ; (restPtr < restEnd) && (*restPtr == 0); restPtr++) ; + storedSkips += (unsigned) (restPtr - restStart); + if (restPtr != restEnd) { + int seekResult = LONG_SEEK(file, storedSkips, SEEK_CUR); + if (seekResult) + EXM_THROW(74, "Sparse skip error ; try --no-sparse"); + storedSkips = 0; + { size_t const sizeCheck = fwrite(restPtr, 1, restEnd - restPtr, file); + if (sizeCheck != (size_t)(restEnd - restPtr)) + EXM_THROW(75, "Write error : cannot write decoded end of block"); + } } } } + + return storedSkips; +} + +static void FIO_fwriteSparseEnd(FILE* file, unsigned storedSkips) +{ + if (storedSkips-->0) { /* implies g_sparseFileSupport>0 */ + int const seekResult = LONG_SEEK(file, storedSkips, SEEK_CUR); + if (seekResult != 0) EXM_THROW(69, "Final skip error (sparse file)"); + { const char lastZeroByte[1] = { 0 }; + size_t const sizeCheck = fwrite(lastZeroByte, 1, 1, file); + if (sizeCheck != 1) + EXM_THROW(69, "Write error : cannot write last zero"); + } } +} + + +/** FIO_passThrough() : just copy input into output, for compatibility with gzip -df mode + @return : 0 (no error) */ +static unsigned FIO_passThrough(FILE* foutput, FILE* finput, void* buffer, size_t bufferSize, size_t alreadyLoaded) +{ + size_t const blockSize = MIN(64 KB, bufferSize); + size_t readFromInput = 1; + unsigned storedSkips = 0; + + /* assumption : ress->srcBufferLoaded bytes already loaded and stored within buffer */ + { size_t const sizeCheck = fwrite(buffer, 1, alreadyLoaded, foutput); + if (sizeCheck != alreadyLoaded) { + DISPLAYLEVEL(1, "Pass-through write error \n"); + return 1; + } } + + while (readFromInput) { + readFromInput = fread(buffer, 1, blockSize, finput); + storedSkips = FIO_fwriteSparse(foutput, buffer, readFromInput, storedSkips); + } + + FIO_fwriteSparseEnd(foutput, storedSkips); + return 0; +} + +static void FIO_zstdErrorHelp(dRess_t* ress, size_t ret, char const* srcFileName) +{ + ZSTD_frameHeader header; + /* No special help for these errors */ + if (ZSTD_getErrorCode(ret) != ZSTD_error_frameParameter_windowTooLarge) + return; + /* Try to decode the frame header */ + ret = ZSTD_getFrameHeader(&header, ress->srcBuffer, ress->srcBufferLoaded); + if (ret == 0) { + U32 const windowSize = (U32)header.windowSize; + U32 const windowLog = BIT_highbit32(windowSize) + ((windowSize & (windowSize - 1)) != 0); + U32 const windowMB = (windowSize >> 20) + (windowSize & ((1 MB) - 1)); + assert(header.windowSize <= (U64)((U32)-1)); + assert(g_memLimit > 0); + DISPLAYLEVEL(1, "%s : Window size larger than maximum : %llu > %u\n", + srcFileName, header.windowSize, g_memLimit); + if (windowLog <= ZSTD_WINDOWLOG_MAX) { + DISPLAYLEVEL(1, "%s : Use --long=%u or --memory=%uMB\n", + srcFileName, windowLog, windowMB); + return; + } + } else if (ZSTD_getErrorCode(ret) != ZSTD_error_frameParameter_windowTooLarge) { + DISPLAYLEVEL(1, "%s : Error decoding frame header to read window size : %s\n", + srcFileName, ZSTD_getErrorName(ret)); + return; + } + DISPLAYLEVEL(1, "%s : Window log larger than ZSTD_WINDOWLOG_MAX=%u not supported\n", + srcFileName, ZSTD_WINDOWLOG_MAX); +} + +/** FIO_decompressFrame() : + * @return : size of decoded zstd frame, or an error code +*/ +#define FIO_ERROR_FRAME_DECODING ((unsigned long long)(-2)) +unsigned long long FIO_decompressZstdFrame(dRess_t* ress, + FILE* finput, + const char* srcFileName, + U64 alreadyDecoded) +{ + U64 frameSize = 0; + U32 storedSkips = 0; + + size_t const srcFileLength = strlen(srcFileName); + if (srcFileLength>20) srcFileName += srcFileLength-20; /* display last 20 characters only */ + + ZSTD_resetDStream(ress->dctx); + + /* Header loading : ensures ZSTD_getFrameHeader() will succeed */ + { size_t const toDecode = ZSTD_FRAMEHEADERSIZE_MAX; + if (ress->srcBufferLoaded < toDecode) { + size_t const toRead = toDecode - ress->srcBufferLoaded; + void* const startPosition = (char*)ress->srcBuffer + ress->srcBufferLoaded; + ress->srcBufferLoaded += fread(startPosition, 1, toRead, finput); + } } + + /* Main decompression Loop */ + while (1) { + ZSTD_inBuffer inBuff = { ress->srcBuffer, ress->srcBufferLoaded, 0 }; + ZSTD_outBuffer outBuff= { ress->dstBuffer, ress->dstBufferSize, 0 }; + size_t const readSizeHint = ZSTD_decompressStream(ress->dctx, &outBuff, &inBuff); + if (ZSTD_isError(readSizeHint)) { + DISPLAYLEVEL(1, "%s : Decoding error (36) : %s \n", + srcFileName, ZSTD_getErrorName(readSizeHint)); + FIO_zstdErrorHelp(ress, readSizeHint, srcFileName); + return FIO_ERROR_FRAME_DECODING; + } + + /* Write block */ + storedSkips = FIO_fwriteSparse(ress->dstFile, ress->dstBuffer, outBuff.pos, storedSkips); + frameSize += outBuff.pos; + DISPLAYUPDATE(2, "\r%-20.20s : %u MB... ", + srcFileName, (U32)((alreadyDecoded+frameSize)>>20) ); + + if (inBuff.pos > 0) { + memmove(ress->srcBuffer, (char*)ress->srcBuffer + inBuff.pos, inBuff.size - inBuff.pos); + ress->srcBufferLoaded -= inBuff.pos; + } + + if (readSizeHint == 0) break; /* end of frame */ + if (inBuff.size != inBuff.pos) { + DISPLAYLEVEL(1, "%s : Decoding error (37) : should consume entire input \n", + srcFileName); + return FIO_ERROR_FRAME_DECODING; + } + + /* Fill input buffer */ + { size_t const toDecode = MIN(readSizeHint, ress->srcBufferSize); /* support large skippable frames */ + if (ress->srcBufferLoaded < toDecode) { + size_t const toRead = toDecode - ress->srcBufferLoaded; /* > 0 */ + void* const startPosition = (char*)ress->srcBuffer + ress->srcBufferLoaded; + size_t const readSize = fread(startPosition, 1, toRead, finput); + if (readSize==0) { + DISPLAYLEVEL(1, "%s : Read error (39) : premature end \n", + srcFileName); + return FIO_ERROR_FRAME_DECODING; + } + ress->srcBufferLoaded += readSize; + } } } + + FIO_fwriteSparseEnd(ress->dstFile, storedSkips); + + return frameSize; +} + + +#ifdef ZSTD_GZDECOMPRESS +static unsigned long long FIO_decompressGzFrame(dRess_t* ress, + FILE* srcFile, const char* srcFileName) +{ + unsigned long long outFileSize = 0; + z_stream strm; + int flush = Z_NO_FLUSH; + int decodingError = 0; + + strm.zalloc = Z_NULL; + strm.zfree = Z_NULL; + strm.opaque = Z_NULL; + strm.next_in = 0; + strm.avail_in = 0; + /* see http://www.zlib.net/manual.html */ + if (inflateInit2(&strm, 15 /* maxWindowLogSize */ + 16 /* gzip only */) != Z_OK) + return FIO_ERROR_FRAME_DECODING; + + strm.next_out = (Bytef*)ress->dstBuffer; + strm.avail_out = (uInt)ress->dstBufferSize; + strm.avail_in = (uInt)ress->srcBufferLoaded; + strm.next_in = (z_const unsigned char*)ress->srcBuffer; + + for ( ; ; ) { + int ret; + if (strm.avail_in == 0) { + ress->srcBufferLoaded = fread(ress->srcBuffer, 1, ress->srcBufferSize, srcFile); + if (ress->srcBufferLoaded == 0) flush = Z_FINISH; + strm.next_in = (z_const unsigned char*)ress->srcBuffer; + strm.avail_in = (uInt)ress->srcBufferLoaded; + } + ret = inflate(&strm, flush); + if (ret == Z_BUF_ERROR) { + DISPLAYLEVEL(1, "zstd: %s: premature gz end \n", srcFileName); + decodingError = 1; break; + } + if (ret != Z_OK && ret != Z_STREAM_END) { + DISPLAYLEVEL(1, "zstd: %s: inflate error %d \n", srcFileName, ret); + decodingError = 1; break; + } + { size_t const decompBytes = ress->dstBufferSize - strm.avail_out; + if (decompBytes) { + if (fwrite(ress->dstBuffer, 1, decompBytes, ress->dstFile) != decompBytes) { + DISPLAYLEVEL(1, "zstd: %s \n", strerror(errno)); + decodingError = 1; break; + } + outFileSize += decompBytes; + strm.next_out = (Bytef*)ress->dstBuffer; + strm.avail_out = (uInt)ress->dstBufferSize; + } + } + if (ret == Z_STREAM_END) break; + } + + if (strm.avail_in > 0) + memmove(ress->srcBuffer, strm.next_in, strm.avail_in); + ress->srcBufferLoaded = strm.avail_in; + if ( (inflateEnd(&strm) != Z_OK) /* release resources ; error detected */ + && (decodingError==0) ) { + DISPLAYLEVEL(1, "zstd: %s: inflateEnd error \n", srcFileName); + decodingError = 1; + } + return decodingError ? FIO_ERROR_FRAME_DECODING : outFileSize; +} +#endif + + +#ifdef ZSTD_LZMADECOMPRESS +static unsigned long long FIO_decompressLzmaFrame(dRess_t* ress, FILE* srcFile, const char* srcFileName, int plain_lzma) +{ + unsigned long long outFileSize = 0; + lzma_stream strm = LZMA_STREAM_INIT; + lzma_action action = LZMA_RUN; + lzma_ret initRet; + int decodingError = 0; + + strm.next_in = 0; + strm.avail_in = 0; + if (plain_lzma) { + initRet = lzma_alone_decoder(&strm, UINT64_MAX); /* LZMA */ + } else { + initRet = lzma_stream_decoder(&strm, UINT64_MAX, 0); /* XZ */ + } + + if (initRet != LZMA_OK) { + DISPLAYLEVEL(1, "zstd: %s: %s error %d \n", + plain_lzma ? "lzma_alone_decoder" : "lzma_stream_decoder", + srcFileName, initRet); + return FIO_ERROR_FRAME_DECODING; + } + + strm.next_out = (BYTE*)ress->dstBuffer; + strm.avail_out = ress->dstBufferSize; + strm.next_in = (BYTE const*)ress->srcBuffer; + strm.avail_in = ress->srcBufferLoaded; + + for ( ; ; ) { + lzma_ret ret; + if (strm.avail_in == 0) { + ress->srcBufferLoaded = fread(ress->srcBuffer, 1, ress->srcBufferSize, srcFile); + if (ress->srcBufferLoaded == 0) action = LZMA_FINISH; + strm.next_in = (BYTE const*)ress->srcBuffer; + strm.avail_in = ress->srcBufferLoaded; + } + ret = lzma_code(&strm, action); + + if (ret == LZMA_BUF_ERROR) { + DISPLAYLEVEL(1, "zstd: %s: premature lzma end \n", srcFileName); + decodingError = 1; break; + } + if (ret != LZMA_OK && ret != LZMA_STREAM_END) { + DISPLAYLEVEL(1, "zstd: %s: lzma_code decoding error %d \n", + srcFileName, ret); + decodingError = 1; break; + } + { size_t const decompBytes = ress->dstBufferSize - strm.avail_out; + if (decompBytes) { + if (fwrite(ress->dstBuffer, 1, decompBytes, ress->dstFile) != decompBytes) { + DISPLAYLEVEL(1, "zstd: %s \n", strerror(errno)); + decodingError = 1; break; + } + outFileSize += decompBytes; + strm.next_out = (BYTE*)ress->dstBuffer; + strm.avail_out = ress->dstBufferSize; + } } + if (ret == LZMA_STREAM_END) break; + } + + if (strm.avail_in > 0) + memmove(ress->srcBuffer, strm.next_in, strm.avail_in); + ress->srcBufferLoaded = strm.avail_in; + lzma_end(&strm); + return decodingError ? FIO_ERROR_FRAME_DECODING : outFileSize; +} +#endif + +#ifdef ZSTD_LZ4DECOMPRESS +static unsigned long long FIO_decompressLz4Frame(dRess_t* ress, + FILE* srcFile, const char* srcFileName) +{ + unsigned long long filesize = 0; + LZ4F_errorCode_t nextToLoad; + LZ4F_decompressionContext_t dCtx; + LZ4F_errorCode_t const errorCode = LZ4F_createDecompressionContext(&dCtx, LZ4F_VERSION); + int decodingError = 0; + + if (LZ4F_isError(errorCode)) { + DISPLAYLEVEL(1, "zstd: failed to create lz4 decompression context \n"); + return FIO_ERROR_FRAME_DECODING; + } + + /* Init feed with magic number (already consumed from FILE* sFile) */ + { size_t inSize = 4; + size_t outSize= 0; + MEM_writeLE32(ress->srcBuffer, LZ4_MAGICNUMBER); + nextToLoad = LZ4F_decompress(dCtx, ress->dstBuffer, &outSize, ress->srcBuffer, &inSize, NULL); + if (LZ4F_isError(nextToLoad)) { + DISPLAYLEVEL(1, "zstd: %s: lz4 header error : %s \n", + srcFileName, LZ4F_getErrorName(nextToLoad)); + LZ4F_freeDecompressionContext(dCtx); + return FIO_ERROR_FRAME_DECODING; + } } + + /* Main Loop */ + for (;nextToLoad;) { + size_t readSize; + size_t pos = 0; + size_t decodedBytes = ress->dstBufferSize; + + /* Read input */ + if (nextToLoad > ress->srcBufferSize) nextToLoad = ress->srcBufferSize; + readSize = fread(ress->srcBuffer, 1, nextToLoad, srcFile); + if (!readSize) break; /* reached end of file or stream */ + + while ((pos < readSize) || (decodedBytes == ress->dstBufferSize)) { /* still to read, or still to flush */ + /* Decode Input (at least partially) */ + size_t remaining = readSize - pos; + decodedBytes = ress->dstBufferSize; + nextToLoad = LZ4F_decompress(dCtx, ress->dstBuffer, &decodedBytes, (char*)(ress->srcBuffer)+pos, &remaining, NULL); + if (LZ4F_isError(nextToLoad)) { + DISPLAYLEVEL(1, "zstd: %s: lz4 decompression error : %s \n", + srcFileName, LZ4F_getErrorName(nextToLoad)); + decodingError = 1; break; + } + pos += remaining; + + /* Write Block */ + if (decodedBytes) { + if (fwrite(ress->dstBuffer, 1, decodedBytes, ress->dstFile) != decodedBytes) { + DISPLAYLEVEL(1, "zstd: %s \n", strerror(errno)); + decodingError = 1; break; + } + filesize += decodedBytes; + DISPLAYUPDATE(2, "\rDecompressed : %u MB ", (unsigned)(filesize>>20)); + } + + if (!nextToLoad) break; + } + } + /* can be out because readSize == 0, which could be an fread() error */ + if (ferror(srcFile)) { + DISPLAYLEVEL(1, "zstd: %s: read error \n", srcFileName); + decodingError=1; + } + + if (nextToLoad!=0) { + DISPLAYLEVEL(1, "zstd: %s: unfinished lz4 stream \n", srcFileName); + decodingError=1; + } + + LZ4F_freeDecompressionContext(dCtx); + ress->srcBufferLoaded = 0; /* LZ4F will reach exact frame boundary */ + + return decodingError ? FIO_ERROR_FRAME_DECODING : filesize; +} +#endif + + + +/** FIO_decompressFrames() : + * Find and decode frames inside srcFile + * srcFile presumed opened and valid + * @return : 0 : OK + * 1 : error + */ +static int FIO_decompressFrames(dRess_t ress, FILE* srcFile, + const char* dstFileName, const char* srcFileName) +{ + unsigned readSomething = 0; + unsigned long long filesize = 0; + assert(srcFile != NULL); + + /* for each frame */ + for ( ; ; ) { + /* check magic number -> version */ + size_t const toRead = 4; + const BYTE* const buf = (const BYTE*)ress.srcBuffer; + if (ress.srcBufferLoaded < toRead) /* load up to 4 bytes for header */ + ress.srcBufferLoaded += fread((char*)ress.srcBuffer + ress.srcBufferLoaded, + (size_t)1, toRead - ress.srcBufferLoaded, srcFile); + if (ress.srcBufferLoaded==0) { + if (readSomething==0) { /* srcFile is empty (which is invalid) */ + DISPLAYLEVEL(1, "zstd: %s: unexpected end of file \n", srcFileName); + return 1; + } /* else, just reached frame boundary */ + break; /* no more input */ + } + readSomething = 1; /* there is at least 1 byte in srcFile */ + if (ress.srcBufferLoaded < toRead) { + DISPLAYLEVEL(1, "zstd: %s: unknown header \n", srcFileName); + return 1; + } + if (ZSTD_isFrame(buf, ress.srcBufferLoaded)) { + unsigned long long const frameSize = FIO_decompressZstdFrame(&ress, srcFile, srcFileName, filesize); + if (frameSize == FIO_ERROR_FRAME_DECODING) return 1; + filesize += frameSize; + } else if (buf[0] == 31 && buf[1] == 139) { /* gz magic number */ +#ifdef ZSTD_GZDECOMPRESS + unsigned long long const frameSize = FIO_decompressGzFrame(&ress, srcFile, srcFileName); + if (frameSize == FIO_ERROR_FRAME_DECODING) return 1; + filesize += frameSize; +#else + DISPLAYLEVEL(1, "zstd: %s: gzip file cannot be uncompressed (zstd compiled without HAVE_ZLIB) -- ignored \n", srcFileName); + return 1; +#endif + } else if ((buf[0] == 0xFD && buf[1] == 0x37) /* xz magic number */ + || (buf[0] == 0x5D && buf[1] == 0x00)) { /* lzma header (no magic number) */ +#ifdef ZSTD_LZMADECOMPRESS + unsigned long long const frameSize = FIO_decompressLzmaFrame(&ress, srcFile, srcFileName, buf[0] != 0xFD); + if (frameSize == FIO_ERROR_FRAME_DECODING) return 1; + filesize += frameSize; +#else + DISPLAYLEVEL(1, "zstd: %s: xz/lzma file cannot be uncompressed (zstd compiled without HAVE_LZMA) -- ignored \n", srcFileName); + return 1; +#endif + } else if (MEM_readLE32(buf) == LZ4_MAGICNUMBER) { +#ifdef ZSTD_LZ4DECOMPRESS + unsigned long long const frameSize = FIO_decompressLz4Frame(&ress, srcFile, srcFileName); + if (frameSize == FIO_ERROR_FRAME_DECODING) return 1; + filesize += frameSize; +#else + DISPLAYLEVEL(1, "zstd: %s: lz4 file cannot be uncompressed (zstd compiled without HAVE_LZ4) -- ignored \n", srcFileName); + return 1; +#endif + } else if ((g_overwrite) && !strcmp (dstFileName, stdoutmark)) { /* pass-through mode */ + return FIO_passThrough(ress.dstFile, srcFile, + ress.srcBuffer, ress.srcBufferSize, ress.srcBufferLoaded); + } else { + DISPLAYLEVEL(1, "zstd: %s: unsupported format \n", srcFileName); + return 1; + } } /* for each frame */ + + /* Final Status */ + DISPLAYLEVEL(2, "\r%79s\r", ""); + DISPLAYLEVEL(2, "%-20s: %llu bytes \n", srcFileName, filesize); + + return 0; +} + + +/** FIO_decompressSrcFile() : + Decompression `srcFileName` into `ress.dstFile` + @return : 0 : OK + 1 : operation not started +*/ +static int FIO_decompressSrcFile(dRess_t ress, const char* dstFileName, const char* srcFileName) +{ + FILE* srcFile; + int result; + + if (UTIL_isDirectory(srcFileName)) { + DISPLAYLEVEL(1, "zstd: %s is a directory -- ignored \n", srcFileName); + return 1; + } + + srcFile = FIO_openSrcFile(srcFileName); + if (srcFile==NULL) return 1; + + result = FIO_decompressFrames(ress, srcFile, dstFileName, srcFileName); + + /* Close file */ + if (fclose(srcFile)) { + DISPLAYLEVEL(1, "zstd: %s: %s \n", srcFileName, strerror(errno)); /* error should not happen */ + return 1; + } + if ( g_removeSrcFile /* --rm */ + && (result==0) /* decompression successful */ + && strcmp(srcFileName, stdinmark) ) /* not stdin */ { + if (remove(srcFileName)) { + /* failed to remove src file */ + DISPLAYLEVEL(1, "zstd: %s: %s \n", srcFileName, strerror(errno)); + return 1; + } } + return result; +} + + +/** FIO_decompressFile_extRess() : + decompress `srcFileName` into `dstFileName` + @return : 0 : OK + 1 : operation aborted (src not available, dst already taken, etc.) +*/ +static int FIO_decompressDstFile(dRess_t ress, + const char* dstFileName, const char* srcFileName) +{ + int result; + stat_t statbuf; + int stat_result = 0; + + ress.dstFile = FIO_openDstFile(dstFileName); + if (ress.dstFile==0) return 1; + + if (UTIL_isRegularFile(dstFileName)) { + g_artefact = dstFileName; + signal(SIGINT, INThandler); + } else { + g_artefact = NULL; + } + + if ( strcmp(srcFileName, stdinmark) + && UTIL_getFileStat(srcFileName, &statbuf) ) + stat_result = 1; + result = FIO_decompressSrcFile(ress, dstFileName, srcFileName); + + if (fclose(ress.dstFile)) { + DISPLAYLEVEL(1, "zstd: %s: %s \n", dstFileName, strerror(errno)); + result = 1; + } + + if ( (result != 0) /* operation failure */ + && strcmp(dstFileName, nulmark) /* special case : don't remove() /dev/null (#316) */ + && strcmp(dstFileName, stdoutmark) ) /* special case : don't remove() stdout */ + remove(dstFileName); /* remove decompression artefact; note don't do anything special if remove() fails */ + else { /* operation success */ + if ( strcmp(dstFileName, stdoutmark) /* special case : don't chmod stdout */ + && strcmp(dstFileName, nulmark) /* special case : don't chmod /dev/null */ + && stat_result ) /* file permissions correctly extracted from src */ + UTIL_setFileStat(dstFileName, &statbuf); /* transfer file permissions from src into dst */ + } + + signal(SIGINT, SIG_DFL); + + return result; +} + + +int FIO_decompressFilename(const char* dstFileName, const char* srcFileName, + const char* dictFileName) +{ + dRess_t const ress = FIO_createDResources(dictFileName); + + int const decodingError = FIO_decompressDstFile(ress, dstFileName, srcFileName); + + FIO_freeDResources(ress); + return decodingError; +} + + +#define MAXSUFFIXSIZE 8 +int FIO_decompressMultipleFilenames(const char** srcNamesTable, unsigned nbFiles, + const char* suffix, + const char* dictFileName) +{ + int skippedFiles = 0; + int missingFiles = 0; + dRess_t ress = FIO_createDResources(dictFileName); + + if (suffix==NULL) + EXM_THROW(70, "zstd: decompression: unknown dst"); /* should never happen */ + + if (!strcmp(suffix, stdoutmark) || !strcmp(suffix, nulmark)) { /* special cases : -c or -t */ + unsigned u; + ress.dstFile = FIO_openDstFile(suffix); + if (ress.dstFile == 0) EXM_THROW(71, "cannot open %s", suffix); + for (u=0; u<nbFiles; u++) + missingFiles += FIO_decompressSrcFile(ress, suffix, srcNamesTable[u]); + if (fclose(ress.dstFile)) + EXM_THROW(72, "Write error : cannot properly close stdout"); + } else { + size_t suffixSize; + size_t dfnSize = FNSPACE; + unsigned u; + char* dstFileName = (char*)malloc(FNSPACE); + if (dstFileName==NULL) + EXM_THROW(73, "not enough memory for dstFileName"); + for (u=0; u<nbFiles; u++) { /* create dstFileName */ + const char* const srcFileName = srcNamesTable[u]; + const char* const suffixPtr = strrchr(srcFileName, '.'); + size_t const sfnSize = strlen(srcFileName); + if (!suffixPtr) { + DISPLAYLEVEL(1, "zstd: %s: unknown suffix -- ignored \n", + srcFileName); + skippedFiles++; + continue; + } + suffixSize = strlen(suffixPtr); + if (dfnSize+suffixSize <= sfnSize+1) { + free(dstFileName); + dfnSize = sfnSize + 20; + dstFileName = (char*)malloc(dfnSize); + if (dstFileName==NULL) + EXM_THROW(74, "not enough memory for dstFileName"); + } + if (sfnSize <= suffixSize + || (strcmp(suffixPtr, GZ_EXTENSION) + && strcmp(suffixPtr, XZ_EXTENSION) + && strcmp(suffixPtr, ZSTD_EXTENSION) + && strcmp(suffixPtr, LZMA_EXTENSION) + && strcmp(suffixPtr, LZ4_EXTENSION)) ) { + DISPLAYLEVEL(1, "zstd: %s: unknown suffix (%s/%s/%s/%s/%s expected) -- ignored \n", + srcFileName, GZ_EXTENSION, XZ_EXTENSION, ZSTD_EXTENSION, LZMA_EXTENSION, LZ4_EXTENSION); + skippedFiles++; + continue; + } else { + memcpy(dstFileName, srcFileName, sfnSize - suffixSize); + dstFileName[sfnSize-suffixSize] = '\0'; + } + missingFiles += FIO_decompressDstFile(ress, dstFileName, srcFileName); + } + free(dstFileName); + } + + FIO_freeDResources(ress); + return missingFiles + skippedFiles; +} + + + +/* ************************************************************************** + * .zst file info (--list command) + ***************************************************************************/ + +typedef struct { + U64 decompressedSize; + U64 compressedSize; + U64 windowSize; + int numActualFrames; + int numSkippableFrames; + int decompUnavailable; + int usesCheck; + U32 nbFiles; +} fileInfo_t; + +/** getFileInfo() : + * Reads information from file, stores in *info + * @return : 0 if successful + * 1 for frame analysis error + * 2 for file not compressed with zstd + * 3 for cases in which file could not be opened. + */ +static int getFileInfo(fileInfo_t* info, const char* inFileName){ + int detectError = 0; + FILE* const srcFile = FIO_openSrcFile(inFileName); + if (srcFile == NULL) { + DISPLAY("Error: could not open source file %s\n", inFileName); + return 3; + } + info->compressedSize = UTIL_getFileSize(inFileName); + + /* begin analyzing frame */ + for ( ; ; ) { + BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX]; + size_t const numBytesRead = fread(headerBuffer, 1, sizeof(headerBuffer), srcFile); + if (numBytesRead < ZSTD_frameHeaderSize_min) { + if ( feof(srcFile) + && (numBytesRead == 0) + && (info->compressedSize > 0) ) { + break; + } + else if (feof(srcFile)) { + DISPLAY("Error: reached end of file with incomplete frame\n"); + detectError = 2; + break; + } + else { + DISPLAY("Error: did not reach end of file but ran out of frames\n"); + detectError = 1; + break; + } + } + { U32 const magicNumber = MEM_readLE32(headerBuffer); + /* Zstandard frame */ + if (magicNumber == ZSTD_MAGICNUMBER) { + ZSTD_frameHeader header; + U64 const frameContentSize = ZSTD_getFrameContentSize(headerBuffer, numBytesRead); + if (frameContentSize == ZSTD_CONTENTSIZE_ERROR || frameContentSize == ZSTD_CONTENTSIZE_UNKNOWN) { + info->decompUnavailable = 1; + } else { + info->decompressedSize += frameContentSize; + } + if (ZSTD_getFrameHeader(&header, headerBuffer, numBytesRead) != 0) { + DISPLAY("Error: could not decode frame header\n"); + detectError = 1; + break; + } + info->windowSize = header.windowSize; + /* move to the end of the frame header */ + { size_t const headerSize = ZSTD_frameHeaderSize(headerBuffer, numBytesRead); + if (ZSTD_isError(headerSize)) { + DISPLAY("Error: could not determine frame header size\n"); + detectError = 1; + break; + } + { int const ret = fseek(srcFile, ((long)headerSize)-((long)numBytesRead), SEEK_CUR); + if (ret != 0) { + DISPLAY("Error: could not move to end of frame header\n"); + detectError = 1; + break; + } } } + + /* skip the rest of the blocks in the frame */ + { int lastBlock = 0; + do { + BYTE blockHeaderBuffer[3]; + size_t const readBytes = fread(blockHeaderBuffer, 1, 3, srcFile); + if (readBytes != 3) { + DISPLAY("There was a problem reading the block header\n"); + detectError = 1; + break; + } + { U32 const blockHeader = MEM_readLE24(blockHeaderBuffer); + U32 const blockTypeID = (blockHeader >> 1) & 3; + U32 const isRLE = (blockTypeID == 1); + U32 const isWrongBlock = (blockTypeID == 3); + long const blockSize = isRLE ? 1 : (long)(blockHeader >> 3); + if (isWrongBlock) { + DISPLAY("Error: unsupported block type \n"); + detectError = 1; + break; + } + lastBlock = blockHeader & 1; + { int const ret = fseek(srcFile, blockSize, SEEK_CUR); + if (ret != 0) { + DISPLAY("Error: could not skip to end of block\n"); + detectError = 1; + break; + } } } + } while (lastBlock != 1); + + if (detectError) break; + } + + /* check if checksum is used */ + { BYTE const frameHeaderDescriptor = headerBuffer[4]; + int const contentChecksumFlag = (frameHeaderDescriptor & (1 << 2)) >> 2; + if (contentChecksumFlag) { + int const ret = fseek(srcFile, 4, SEEK_CUR); + info->usesCheck = 1; + if (ret != 0) { + DISPLAY("Error: could not skip past checksum\n"); + detectError = 1; + break; + } } } + info->numActualFrames++; + } + /* Skippable frame */ + else if ((magicNumber & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) { + U32 const frameSize = MEM_readLE32(headerBuffer + 4); + long const seek = (long)(8 + frameSize - numBytesRead); + int const ret = LONG_SEEK(srcFile, seek, SEEK_CUR); + if (ret != 0) { + DISPLAY("Error: could not find end of skippable frame\n"); + detectError = 1; + break; + } + info->numSkippableFrames++; + } + /* unknown content */ + else { + detectError = 2; + break; + } + } + } /* end analyzing frame */ + fclose(srcFile); + info->nbFiles = 1; + return detectError; +} + +static void displayInfo(const char* inFileName, fileInfo_t* info, int displayLevel){ + unsigned const unit = info->compressedSize < (1 MB) ? (1 KB) : (1 MB); + const char* const unitStr = info->compressedSize < (1 MB) ? "KB" : "MB"; + double const windowSizeUnit = (double)info->windowSize / unit; + double const compressedSizeUnit = (double)info->compressedSize / unit; + double const decompressedSizeUnit = (double)info->decompressedSize / unit; + double const ratio = (info->compressedSize == 0) ? 0 : ((double)info->decompressedSize)/info->compressedSize; + const char* const checkString = (info->usesCheck ? "XXH64" : "None"); + if (displayLevel <= 2) { + if (!info->decompUnavailable) { + DISPLAYOUT("%6d %5d %7.2f %2s %9.2f %2s %5.3f %5s %s\n", + info->numSkippableFrames + info->numActualFrames, + info->numSkippableFrames, + compressedSizeUnit, unitStr, decompressedSizeUnit, unitStr, + ratio, checkString, inFileName); + } else { + DISPLAYOUT("%6d %5d %7.2f %2s %5s %s\n", + info->numSkippableFrames + info->numActualFrames, + info->numSkippableFrames, + compressedSizeUnit, unitStr, + checkString, inFileName); + } + } else { + DISPLAYOUT("# Zstandard Frames: %d\n", info->numActualFrames); + DISPLAYOUT("# Skippable Frames: %d\n", info->numSkippableFrames); + DISPLAYOUT("Window Size: %.2f %2s (%llu B)\n", + windowSizeUnit, unitStr, + (unsigned long long)info->windowSize); + DISPLAYOUT("Compressed Size: %.2f %2s (%llu B)\n", + compressedSizeUnit, unitStr, + (unsigned long long)info->compressedSize); + if (!info->decompUnavailable) { + DISPLAYOUT("Decompressed Size: %.2f %2s (%llu B)\n", + decompressedSizeUnit, unitStr, + (unsigned long long)info->decompressedSize); + DISPLAYOUT("Ratio: %.4f\n", ratio); + } + DISPLAYOUT("Check: %s\n", checkString); + DISPLAYOUT("\n"); + } +} + +static fileInfo_t FIO_addFInfo(fileInfo_t fi1, fileInfo_t fi2) +{ + fileInfo_t total; + total.numActualFrames = fi1.numActualFrames + fi2.numActualFrames; + total.numSkippableFrames = fi1.numSkippableFrames + fi2.numSkippableFrames; + total.compressedSize = fi1.compressedSize + fi2.compressedSize; + total.decompressedSize = fi1.decompressedSize + fi2.decompressedSize; + total.decompUnavailable = fi1.decompUnavailable | fi2.decompUnavailable; + total.usesCheck = fi1.usesCheck & fi2.usesCheck; + total.nbFiles = fi1.nbFiles + fi2.nbFiles; + return total; +} + +static int FIO_listFile(fileInfo_t* total, const char* inFileName, int displayLevel){ + /* initialize info to avoid warnings */ + fileInfo_t info; + memset(&info, 0, sizeof(info)); + { int const error = getFileInfo(&info, inFileName); + if (error == 1) { + /* display error, but provide output */ + DISPLAY("An error occurred while getting file info \n"); + } + else if (error == 2) { + DISPLAYOUT("File %s not compressed by zstd \n", inFileName); + if (displayLevel > 2) DISPLAYOUT("\n"); + return 1; + } + else if (error == 3) { + /* error occurred while opening the file */ + if (displayLevel > 2) DISPLAYOUT("\n"); + return 1; + } + displayInfo(inFileName, &info, displayLevel); + *total = FIO_addFInfo(*total, info); + return error; + } +} + +int FIO_listMultipleFiles(unsigned numFiles, const char** filenameTable, int displayLevel){ + if (numFiles == 0) { + DISPLAYOUT("No files given\n"); + return 0; + } + if (displayLevel <= 2) { + DISPLAYOUT("Frames Skips Compressed Uncompressed Ratio Check Filename\n"); + } + { int error = 0; + unsigned u; + fileInfo_t total; + memset(&total, 0, sizeof(total)); + total.usesCheck = 1; + for (u=0; u<numFiles;u++) { + error |= FIO_listFile(&total, filenameTable[u], displayLevel); + } + if (numFiles > 1 && displayLevel <= 2) { + unsigned const unit = total.compressedSize < (1 MB) ? (1 KB) : (1 MB); + const char* const unitStr = total.compressedSize < (1 MB) ? "KB" : "MB"; + double const compressedSizeUnit = (double)total.compressedSize / unit; + double const decompressedSizeUnit = (double)total.decompressedSize / unit; + double const ratio = (total.compressedSize == 0) ? 0 : ((double)total.decompressedSize)/total.compressedSize; + const char* const checkString = (total.usesCheck ? "XXH64" : ""); + DISPLAYOUT("----------------------------------------------------------------- \n"); + if (total.decompUnavailable) { + DISPLAYOUT("%6d %5d %7.2f %2s %5s %u files\n", + total.numSkippableFrames + total.numActualFrames, + total.numSkippableFrames, + compressedSizeUnit, unitStr, + checkString, total.nbFiles); + } else { + DISPLAYOUT("%6d %5d %7.2f %2s %9.2f %2s %5.3f %5s %u files\n", + total.numSkippableFrames + total.numActualFrames, + total.numSkippableFrames, + compressedSizeUnit, unitStr, decompressedSizeUnit, unitStr, + ratio, checkString, total.nbFiles); + } + } + return error; + } +} + + +#endif /* #ifndef ZSTD_NODECOMPRESS */ diff --git a/src/zstd/programs/fileio.h b/src/zstd/programs/fileio.h new file mode 100644 index 00000000..aa4484fd --- /dev/null +++ b/src/zstd/programs/fileio.h @@ -0,0 +1,102 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + +#ifndef FILEIO_H_23981798732 +#define FILEIO_H_23981798732 + +#define ZSTD_STATIC_LINKING_ONLY /* ZSTD_compressionParameters */ +#include "zstd.h" /* ZSTD_* */ + +#if defined (__cplusplus) +extern "C" { +#endif + + +/* ************************************* +* Special i/o constants +**************************************/ +#define stdinmark "/*stdin*\\" +#define stdoutmark "/*stdout*\\" +#ifdef _WIN32 +# define nulmark "nul" +#else +# define nulmark "/dev/null" +#endif +#define LZMA_EXTENSION ".lzma" +#define XZ_EXTENSION ".xz" +#define GZ_EXTENSION ".gz" +#define ZSTD_EXTENSION ".zst" +#define LZ4_EXTENSION ".lz4" + + +/*-************************************* +* Types +***************************************/ +typedef enum { FIO_zstdCompression, FIO_gzipCompression, FIO_xzCompression, FIO_lzmaCompression, FIO_lz4Compression } FIO_compressionType_t; + + +/*-************************************* +* Parameters +***************************************/ +void FIO_setCompressionType(FIO_compressionType_t compressionType); +void FIO_overwriteMode(void); +void FIO_setNotificationLevel(unsigned level); +void FIO_setSparseWrite(unsigned sparse); /**< 0: no sparse; 1: disable on stdout; 2: always enabled */ +void FIO_setDictIDFlag(unsigned dictIDFlag); +void FIO_setChecksumFlag(unsigned checksumFlag); +void FIO_setRemoveSrcFile(unsigned flag); +void FIO_setMemLimit(unsigned memLimit); +void FIO_setNbThreads(unsigned nbThreads); +void FIO_setBlockSize(unsigned blockSize); +void FIO_setOverlapLog(unsigned overlapLog); +void FIO_setLdmFlag(unsigned ldmFlag); +void FIO_setLdmHashLog(unsigned ldmHashLog); +void FIO_setLdmMinMatch(unsigned ldmMinMatch); +void FIO_setLdmBucketSizeLog(unsigned ldmBucketSizeLog); +void FIO_setLdmHashEveryLog(unsigned ldmHashEveryLog); + + +/*-************************************* +* Single File functions +***************************************/ +/** FIO_compressFilename() : + @return : 0 == ok; 1 == pb with src file. */ +int FIO_compressFilename (const char* outfilename, const char* infilename, const char* dictFileName, + int compressionLevel, ZSTD_compressionParameters* comprParams); + +/** FIO_decompressFilename() : + @return : 0 == ok; 1 == pb with src file. */ +int FIO_decompressFilename (const char* outfilename, const char* infilename, const char* dictFileName); + +int FIO_listMultipleFiles(unsigned numFiles, const char** filenameTable, int displayLevel); + +/*-************************************* +* Multiple File functions +***************************************/ +/** FIO_compressMultipleFilenames() : + @return : nb of missing files */ +int FIO_compressMultipleFilenames(const char** srcNamesTable, unsigned nbFiles, + const char* suffix, + const char* dictFileName, int compressionLevel, + ZSTD_compressionParameters* comprParams); + +/** FIO_decompressMultipleFilenames() : + @return : nb of missing or skipped files */ +int FIO_decompressMultipleFilenames(const char** srcNamesTable, unsigned nbFiles, + const char* suffix, + const char* dictFileName); + + +#if defined (__cplusplus) +} +#endif + +#endif /* FILEIO_H_23981798732 */ diff --git a/src/zstd/programs/platform.h b/src/zstd/programs/platform.h new file mode 100644 index 00000000..a4d7850f --- /dev/null +++ b/src/zstd/programs/platform.h @@ -0,0 +1,153 @@ +/* + * Copyright (c) 2016-present, Przemyslaw Skibinski, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef PLATFORM_H_MODULE +#define PLATFORM_H_MODULE + +#if defined (__cplusplus) +extern "C" { +#endif + + + +/* ************************************** +* Compiler Options +****************************************/ +#if defined(_MSC_VER) +# define _CRT_SECURE_NO_WARNINGS /* Disable Visual Studio warning messages for fopen, strncpy, strerror */ +# define _CRT_SECURE_NO_DEPRECATE /* VS2005 - must be declared before <io.h> and <windows.h> */ +# if (_MSC_VER <= 1800) /* (1800 = Visual Studio 2013) */ +# define snprintf sprintf_s /* snprintf unsupported by Visual <= 2013 */ +# endif +#endif + + +/* ************************************** +* Detect 64-bit OS +* http://nadeausoftware.com/articles/2012/02/c_c_tip_how_detect_processor_type_using_compiler_predefined_macros +****************************************/ +#if defined __ia64 || defined _M_IA64 /* Intel Itanium */ \ + || defined __powerpc64__ || defined __ppc64__ || defined __PPC64__ /* POWER 64-bit */ \ + || (defined __sparc && (defined __sparcv9 || defined __sparc_v9__ || defined __arch64__)) || defined __sparc64__ /* SPARC 64-bit */ \ + || defined __x86_64__s || defined _M_X64 /* x86 64-bit */ \ + || defined __arm64__ || defined __aarch64__ || defined __ARM64_ARCH_8__ /* ARM 64-bit */ \ + || (defined __mips && (__mips == 64 || __mips == 4 || __mips == 3)) /* MIPS 64-bit */ \ + || defined _LP64 || defined __LP64__ /* NetBSD, OpenBSD */ || defined __64BIT__ /* AIX */ || defined _ADDR64 /* Cray */ \ + || (defined __SIZEOF_POINTER__ && __SIZEOF_POINTER__ == 8) /* gcc */ +# if !defined(__64BIT__) +# define __64BIT__ 1 +# endif +#endif + + +/* ********************************************************* +* Turn on Large Files support (>4GB) for 32-bit Linux/Unix +***********************************************************/ +#if !defined(__64BIT__) || defined(__MINGW32__) /* No point defining Large file for 64 bit but MinGW-w64 requires it */ +# if !defined(_FILE_OFFSET_BITS) +# define _FILE_OFFSET_BITS 64 /* turn off_t into a 64-bit type for ftello, fseeko */ +# endif +# if !defined(_LARGEFILE_SOURCE) /* obsolete macro, replaced with _FILE_OFFSET_BITS */ +# define _LARGEFILE_SOURCE 1 /* Large File Support extension (LFS) - fseeko, ftello */ +# endif +# if defined(_AIX) || defined(__hpux) +# define _LARGE_FILES /* Large file support on 32-bits AIX and HP-UX */ +# endif +#endif + + +/* ************************************************************ +* Detect POSIX version +* PLATFORM_POSIX_VERSION = -1 for non-Unix e.g. Windows +* PLATFORM_POSIX_VERSION = 0 for Unix-like non-POSIX +* PLATFORM_POSIX_VERSION >= 1 is equal to found _POSIX_VERSION +***************************************************************/ +#if !defined(_WIN32) && (defined(__unix__) || defined(__unix) || (defined(__APPLE__) && defined(__MACH__)) /* UNIX-like OS */ \ + || defined(__midipix__) || defined(__VMS)) +# if (defined(__APPLE__) && defined(__MACH__)) || defined(__SVR4) || defined(_AIX) || defined(__hpux) /* POSIX.1–2001 (SUSv3) conformant */ \ + || defined(__DragonFly__) || defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) /* BSD distros */ +# define PLATFORM_POSIX_VERSION 200112L +# else +# if defined(__linux__) || defined(__linux) +# ifndef _POSIX_C_SOURCE +# define _POSIX_C_SOURCE 200112L /* use feature test macro */ +# endif +# endif +# include <unistd.h> /* declares _POSIX_VERSION */ +# if defined(_POSIX_VERSION) /* POSIX compliant */ +# define PLATFORM_POSIX_VERSION _POSIX_VERSION +# else +# define PLATFORM_POSIX_VERSION 0 +# endif +# endif +#endif +#if !defined(PLATFORM_POSIX_VERSION) +# define PLATFORM_POSIX_VERSION -1 +#endif + + +/*-********************************************* +* Detect if isatty() and fileno() are available +************************************************/ +#if (defined(__linux__) && (PLATFORM_POSIX_VERSION >= 1)) || (PLATFORM_POSIX_VERSION >= 200112L) || defined(__DJGPP__) +# include <unistd.h> /* isatty */ +# define IS_CONSOLE(stdStream) isatty(fileno(stdStream)) +#elif defined(MSDOS) || defined(OS2) || defined(__CYGWIN__) +# include <io.h> /* _isatty */ +# define IS_CONSOLE(stdStream) _isatty(_fileno(stdStream)) +#elif defined(WIN32) || defined(_WIN32) +# include <io.h> /* _isatty */ +# include <windows.h> /* DeviceIoControl, HANDLE, FSCTL_SET_SPARSE */ +# include <stdio.h> /* FILE */ +static __inline int IS_CONSOLE(FILE* stdStream) +{ + DWORD dummy; + return _isatty(_fileno(stdStream)) && GetConsoleMode((HANDLE)_get_osfhandle(_fileno(stdStream)), &dummy); +} +#else +# define IS_CONSOLE(stdStream) 0 +#endif + + +/****************************** +* OS-specific Includes +******************************/ +#if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(_WIN32) +# include <fcntl.h> /* _O_BINARY */ +# include <io.h> /* _setmode, _fileno, _get_osfhandle */ +# if !defined(__DJGPP__) +# include <windows.h> /* DeviceIoControl, HANDLE, FSCTL_SET_SPARSE */ +# include <winioctl.h> /* FSCTL_SET_SPARSE */ +# define SET_BINARY_MODE(file) { int unused=_setmode(_fileno(file), _O_BINARY); (void)unused; } +# define SET_SPARSE_FILE_MODE(file) { DWORD dw; DeviceIoControl((HANDLE) _get_osfhandle(_fileno(file)), FSCTL_SET_SPARSE, 0, 0, 0, 0, &dw, 0); } +# else +# define SET_BINARY_MODE(file) setmode(fileno(file), O_BINARY) +# define SET_SPARSE_FILE_MODE(file) +# endif +#else +# define SET_BINARY_MODE(file) +# define SET_SPARSE_FILE_MODE(file) +#endif + + +#ifndef ZSTD_SPARSE_DEFAULT +# if (defined(__APPLE__) && defined(__MACH__)) +# define ZSTD_SPARSE_DEFAULT 0 +# else +# define ZSTD_SPARSE_DEFAULT 1 +# endif +#endif + + +#if defined (__cplusplus) +} +#endif + +#endif /* PLATFORM_H_MODULE */ diff --git a/src/zstd/programs/util.h b/src/zstd/programs/util.h new file mode 100644 index 00000000..c8be5f5f --- /dev/null +++ b/src/zstd/programs/util.h @@ -0,0 +1,757 @@ +/* + * Copyright (c) 2016-present, Przemyslaw Skibinski, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef UTIL_H_MODULE +#define UTIL_H_MODULE + +#if defined (__cplusplus) +extern "C" { +#endif + + + +/*-**************************************** +* Dependencies +******************************************/ +#include "platform.h" /* PLATFORM_POSIX_VERSION */ +#include <stdlib.h> /* malloc */ +#include <stddef.h> /* size_t, ptrdiff_t */ +#include <stdio.h> /* fprintf */ +#include <string.h> /* strncmp */ +#include <sys/types.h> /* stat, utime */ +#include <sys/stat.h> /* stat */ +#if defined(_MSC_VER) +# include <sys/utime.h> /* utime */ +# include <io.h> /* _chmod */ +#else +# include <unistd.h> /* chown, stat */ +# include <utime.h> /* utime */ +#endif +#include <time.h> /* time */ +#include <errno.h> +#include "mem.h" /* U32, U64 */ + + +/* ************************************************************ +* Avoid fseek()'s 2GiB barrier with MSVC, MacOS, *BSD, MinGW +***************************************************************/ +#if defined(_MSC_VER) && (_MSC_VER >= 1400) +# define UTIL_fseek _fseeki64 +#elif !defined(__64BIT__) && (PLATFORM_POSIX_VERSION >= 200112L) /* No point defining Large file for 64 bit */ +# define UTIL_fseek fseeko +#elif defined(__MINGW32__) && defined(__MSVCRT__) && !defined(__STRICT_ANSI__) && !defined(__NO_MINGW_LFS) +# define UTIL_fseek fseeko64 +#else +# define UTIL_fseek fseek +#endif + + +/*-**************************************** +* Sleep functions: Windows - Posix - others +******************************************/ +#if defined(_WIN32) +# include <windows.h> +# define SET_REALTIME_PRIORITY SetPriorityClass(GetCurrentProcess(), REALTIME_PRIORITY_CLASS) +# define UTIL_sleep(s) Sleep(1000*s) +# define UTIL_sleepMilli(milli) Sleep(milli) +#elif PLATFORM_POSIX_VERSION >= 0 /* Unix-like operating system */ +# include <unistd.h> +# include <sys/resource.h> /* setpriority */ +# include <time.h> /* clock_t, nanosleep, clock, CLOCKS_PER_SEC */ +# if defined(PRIO_PROCESS) +# define SET_REALTIME_PRIORITY setpriority(PRIO_PROCESS, 0, -20) +# else +# define SET_REALTIME_PRIORITY /* disabled */ +# endif +# define UTIL_sleep(s) sleep(s) +# if (defined(__linux__) && (PLATFORM_POSIX_VERSION >= 199309L)) || (PLATFORM_POSIX_VERSION >= 200112L) /* nanosleep requires POSIX.1-2001 */ +# define UTIL_sleepMilli(milli) { struct timespec t; t.tv_sec=0; t.tv_nsec=milli*1000000ULL; nanosleep(&t, NULL); } +# else +# define UTIL_sleepMilli(milli) /* disabled */ +# endif +#else +# define SET_REALTIME_PRIORITY /* disabled */ +# define UTIL_sleep(s) /* disabled */ +# define UTIL_sleepMilli(milli) /* disabled */ +#endif + + +/* ************************************* +* Constants +***************************************/ +#define LIST_SIZE_INCREASE (8*1024) + + +/*-**************************************** +* Compiler specifics +******************************************/ +#if defined(__INTEL_COMPILER) +# pragma warning(disable : 177) /* disable: message #177: function was declared but never referenced, useful with UTIL_STATIC */ +#endif +#if defined(__GNUC__) +# define UTIL_STATIC static __attribute__((unused)) +#elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) +# define UTIL_STATIC static inline +#elif defined(_MSC_VER) +# define UTIL_STATIC static __inline +#else +# define UTIL_STATIC static /* this version may generate warnings for unused static functions; disable the relevant warning */ +#endif + + +/*-**************************************** +* Console log +******************************************/ +static int g_utilDisplayLevel; +#define UTIL_DISPLAY(...) fprintf(stderr, __VA_ARGS__) +#define UTIL_DISPLAYLEVEL(l, ...) { if (g_utilDisplayLevel>=l) { UTIL_DISPLAY(__VA_ARGS__); } } + + +/*-**************************************** +* Time functions +******************************************/ +#if defined(_WIN32) /* Windows */ + typedef LARGE_INTEGER UTIL_time_t; + UTIL_STATIC UTIL_time_t UTIL_getTime(void) { UTIL_time_t x; QueryPerformanceCounter(&x); return x; } + UTIL_STATIC U64 UTIL_getSpanTimeMicro(UTIL_time_t clockStart, UTIL_time_t clockEnd) + { + static LARGE_INTEGER ticksPerSecond; + static int init = 0; + if (!init) { + if (!QueryPerformanceFrequency(&ticksPerSecond)) + UTIL_DISPLAYLEVEL(1, "ERROR: QueryPerformanceFrequency() failure\n"); + init = 1; + } + return 1000000ULL*(clockEnd.QuadPart - clockStart.QuadPart)/ticksPerSecond.QuadPart; + } + UTIL_STATIC U64 UTIL_getSpanTimeNano(UTIL_time_t clockStart, UTIL_time_t clockEnd) + { + static LARGE_INTEGER ticksPerSecond; + static int init = 0; + if (!init) { + if (!QueryPerformanceFrequency(&ticksPerSecond)) + UTIL_DISPLAYLEVEL(1, "ERROR: QueryPerformanceFrequency() failure\n"); + init = 1; + } + return 1000000000ULL*(clockEnd.QuadPart - clockStart.QuadPart)/ticksPerSecond.QuadPart; + } +#elif defined(__APPLE__) && defined(__MACH__) + #include <mach/mach_time.h> + typedef U64 UTIL_time_t; + UTIL_STATIC UTIL_time_t UTIL_getTime(void) { return mach_absolute_time(); } + UTIL_STATIC U64 UTIL_getSpanTimeMicro(UTIL_time_t clockStart, UTIL_time_t clockEnd) + { + static mach_timebase_info_data_t rate; + static int init = 0; + if (!init) { + mach_timebase_info(&rate); + init = 1; + } + return (((clockEnd - clockStart) * (U64)rate.numer) / ((U64)rate.denom))/1000ULL; + } + UTIL_STATIC U64 UTIL_getSpanTimeNano(UTIL_time_t clockStart, UTIL_time_t clockEnd) + { + static mach_timebase_info_data_t rate; + static int init = 0; + if (!init) { + mach_timebase_info(&rate); + init = 1; + } + return ((clockEnd - clockStart) * (U64)rate.numer) / ((U64)rate.denom); + } +#elif (PLATFORM_POSIX_VERSION >= 200112L) + #include <time.h> + typedef struct timespec UTIL_freq_t; + typedef struct timespec UTIL_time_t; + UTIL_STATIC UTIL_time_t UTIL_getTime(void) + { + UTIL_time_t time; + if (clock_gettime(CLOCK_MONOTONIC, &time)) + UTIL_DISPLAYLEVEL(1, "ERROR: Failed to get time\n"); /* we could also exit() */ + return time; + } + UTIL_STATIC UTIL_time_t UTIL_getSpanTime(UTIL_time_t begin, UTIL_time_t end) + { + UTIL_time_t diff; + if (end.tv_nsec < begin.tv_nsec) { + diff.tv_sec = (end.tv_sec - 1) - begin.tv_sec; + diff.tv_nsec = (end.tv_nsec + 1000000000ULL) - begin.tv_nsec; + } else { + diff.tv_sec = end.tv_sec - begin.tv_sec; + diff.tv_nsec = end.tv_nsec - begin.tv_nsec; + } + return diff; + } + UTIL_STATIC U64 UTIL_getSpanTimeMicro(UTIL_time_t begin, UTIL_time_t end) + { + UTIL_time_t const diff = UTIL_getSpanTime(begin, end); + U64 micro = 0; + micro += 1000000ULL * diff.tv_sec; + micro += diff.tv_nsec / 1000ULL; + return micro; + } + UTIL_STATIC U64 UTIL_getSpanTimeNano(UTIL_time_t begin, UTIL_time_t end) + { + UTIL_time_t const diff = UTIL_getSpanTime(begin, end); + U64 nano = 0; + nano += 1000000000ULL * diff.tv_sec; + nano += diff.tv_nsec; + return nano; + } +#else /* relies on standard C (note : clock_t measurements can be wrong when using multi-threading) */ + typedef clock_t UTIL_time_t; + UTIL_STATIC UTIL_time_t UTIL_getTime(void) { return clock(); } + UTIL_STATIC U64 UTIL_getSpanTimeMicro(UTIL_time_t clockStart, UTIL_time_t clockEnd) { return 1000000ULL * (clockEnd - clockStart) / CLOCKS_PER_SEC; } + UTIL_STATIC U64 UTIL_getSpanTimeNano(UTIL_time_t clockStart, UTIL_time_t clockEnd) { return 1000000000ULL * (clockEnd - clockStart) / CLOCKS_PER_SEC; } +#endif + + +/* returns time span in microseconds */ +UTIL_STATIC U64 UTIL_clockSpanMicro( UTIL_time_t clockStart ) +{ + UTIL_time_t const clockEnd = UTIL_getTime(); + return UTIL_getSpanTimeMicro(clockStart, clockEnd); +} + + +UTIL_STATIC void UTIL_waitForNextTick(void) +{ + UTIL_time_t const clockStart = UTIL_getTime(); + UTIL_time_t clockEnd; + do { + clockEnd = UTIL_getTime(); + } while (UTIL_getSpanTimeNano(clockStart, clockEnd) == 0); +} + + + +/*-**************************************** +* File functions +******************************************/ +#if defined(_MSC_VER) + #define chmod _chmod + typedef struct __stat64 stat_t; +#else + typedef struct stat stat_t; +#endif + + +UTIL_STATIC int UTIL_setFileStat(const char *filename, stat_t *statbuf) +{ + int res = 0; + struct utimbuf timebuf; + + timebuf.actime = time(NULL); + timebuf.modtime = statbuf->st_mtime; + res += utime(filename, &timebuf); /* set access and modification times */ + +#if !defined(_WIN32) + res += chown(filename, statbuf->st_uid, statbuf->st_gid); /* Copy ownership */ +#endif + + res += chmod(filename, statbuf->st_mode & 07777); /* Copy file permissions */ + + errno = 0; + return -res; /* number of errors is returned */ +} + + +UTIL_STATIC int UTIL_getFileStat(const char* infilename, stat_t *statbuf) +{ + int r; +#if defined(_MSC_VER) + r = _stat64(infilename, statbuf); + if (r || !(statbuf->st_mode & S_IFREG)) return 0; /* No good... */ +#else + r = stat(infilename, statbuf); + if (r || !S_ISREG(statbuf->st_mode)) return 0; /* No good... */ +#endif + return 1; +} + + +UTIL_STATIC int UTIL_isRegularFile(const char* infilename) +{ + stat_t statbuf; + return UTIL_getFileStat(infilename, &statbuf); /* Only need to know whether it is a regular file */ +} + + +UTIL_STATIC U32 UTIL_isDirectory(const char* infilename) +{ + int r; + stat_t statbuf; +#if defined(_MSC_VER) + r = _stat64(infilename, &statbuf); + if (!r && (statbuf.st_mode & _S_IFDIR)) return 1; +#else + r = stat(infilename, &statbuf); + if (!r && S_ISDIR(statbuf.st_mode)) return 1; +#endif + return 0; +} + +UTIL_STATIC U32 UTIL_isLink(const char* infilename) +{ +#if defined(_WIN32) + /* no symlinks on windows */ + (void)infilename; +#else + int r; + stat_t statbuf; + r = lstat(infilename, &statbuf); + if (!r && S_ISLNK(statbuf.st_mode)) return 1; +#endif + return 0; +} + + +UTIL_STATIC U64 UTIL_getFileSize(const char* infilename) +{ + int r; +#if defined(_MSC_VER) + struct __stat64 statbuf; + r = _stat64(infilename, &statbuf); + if (r || !(statbuf.st_mode & S_IFREG)) return 0; /* No good... */ +#elif defined(__MINGW32__) && defined (__MSVCRT__) + struct _stati64 statbuf; + r = _stati64(infilename, &statbuf); + if (r || !(statbuf.st_mode & S_IFREG)) return 0; /* No good... */ +#else + struct stat statbuf; + r = stat(infilename, &statbuf); + if (r || !S_ISREG(statbuf.st_mode)) return 0; /* No good... */ +#endif + return (U64)statbuf.st_size; +} + + +UTIL_STATIC U64 UTIL_getTotalFileSize(const char** fileNamesTable, unsigned nbFiles) +{ + U64 total = 0; + unsigned n; + for (n=0; n<nbFiles; n++) + total += UTIL_getFileSize(fileNamesTable[n]); + return total; +} + + +/* + * A modified version of realloc(). + * If UTIL_realloc() fails the original block is freed. +*/ +UTIL_STATIC void *UTIL_realloc(void *ptr, size_t size) +{ + void *newptr = realloc(ptr, size); + if (newptr) return newptr; + free(ptr); + return NULL; +} + +#ifdef _WIN32 +# define UTIL_HAS_CREATEFILELIST + +UTIL_STATIC int UTIL_prepareFileList(const char *dirName, char** bufStart, size_t* pos, char** bufEnd, int followLinks) +{ + char* path; + int dirLength, fnameLength, pathLength, nbFiles = 0; + WIN32_FIND_DATAA cFile; + HANDLE hFile; + + dirLength = (int)strlen(dirName); + path = (char*) malloc(dirLength + 3); + if (!path) return 0; + + memcpy(path, dirName, dirLength); + path[dirLength] = '\\'; + path[dirLength+1] = '*'; + path[dirLength+2] = 0; + + hFile=FindFirstFileA(path, &cFile); + if (hFile == INVALID_HANDLE_VALUE) { + UTIL_DISPLAYLEVEL(1, "Cannot open directory '%s'\n", dirName); + return 0; + } + free(path); + + do { + fnameLength = (int)strlen(cFile.cFileName); + path = (char*) malloc(dirLength + fnameLength + 2); + if (!path) { FindClose(hFile); return 0; } + memcpy(path, dirName, dirLength); + path[dirLength] = '\\'; + memcpy(path+dirLength+1, cFile.cFileName, fnameLength); + pathLength = dirLength+1+fnameLength; + path[pathLength] = 0; + if (cFile.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) { + if (strcmp (cFile.cFileName, "..") == 0 || + strcmp (cFile.cFileName, ".") == 0) continue; + + nbFiles += UTIL_prepareFileList(path, bufStart, pos, bufEnd, followLinks); /* Recursively call "UTIL_prepareFileList" with the new path. */ + if (*bufStart == NULL) { free(path); FindClose(hFile); return 0; } + } + else if ((cFile.dwFileAttributes & FILE_ATTRIBUTE_NORMAL) || (cFile.dwFileAttributes & FILE_ATTRIBUTE_ARCHIVE) || (cFile.dwFileAttributes & FILE_ATTRIBUTE_COMPRESSED)) { + if (*bufStart + *pos + pathLength >= *bufEnd) { + ptrdiff_t newListSize = (*bufEnd - *bufStart) + LIST_SIZE_INCREASE; + *bufStart = (char*)UTIL_realloc(*bufStart, newListSize); + *bufEnd = *bufStart + newListSize; + if (*bufStart == NULL) { free(path); FindClose(hFile); return 0; } + } + if (*bufStart + *pos + pathLength < *bufEnd) { + strncpy(*bufStart + *pos, path, *bufEnd - (*bufStart + *pos)); + *pos += pathLength + 1; + nbFiles++; + } + } + free(path); + } while (FindNextFileA(hFile, &cFile)); + + FindClose(hFile); + return nbFiles; +} + +#elif defined(__linux__) || (PLATFORM_POSIX_VERSION >= 200112L) /* opendir, readdir require POSIX.1-2001 */ +# define UTIL_HAS_CREATEFILELIST +# include <dirent.h> /* opendir, readdir */ +# include <string.h> /* strerror, memcpy */ + +UTIL_STATIC int UTIL_prepareFileList(const char *dirName, char** bufStart, size_t* pos, char** bufEnd, int followLinks) +{ + DIR *dir; + struct dirent *entry; + char* path; + int dirLength, fnameLength, pathLength, nbFiles = 0; + + if (!(dir = opendir(dirName))) { + UTIL_DISPLAYLEVEL(1, "Cannot open directory '%s': %s\n", dirName, strerror(errno)); + return 0; + } + + dirLength = (int)strlen(dirName); + errno = 0; + while ((entry = readdir(dir)) != NULL) { + if (strcmp (entry->d_name, "..") == 0 || + strcmp (entry->d_name, ".") == 0) continue; + fnameLength = (int)strlen(entry->d_name); + path = (char*) malloc(dirLength + fnameLength + 2); + if (!path) { closedir(dir); return 0; } + memcpy(path, dirName, dirLength); + + path[dirLength] = '/'; + memcpy(path+dirLength+1, entry->d_name, fnameLength); + pathLength = dirLength+1+fnameLength; + path[pathLength] = 0; + + if (!followLinks && UTIL_isLink(path)) { + UTIL_DISPLAYLEVEL(2, "Warning : %s is a symbolic link, ignoring\n", path); + continue; + } + + if (UTIL_isDirectory(path)) { + nbFiles += UTIL_prepareFileList(path, bufStart, pos, bufEnd, followLinks); /* Recursively call "UTIL_prepareFileList" with the new path. */ + if (*bufStart == NULL) { free(path); closedir(dir); return 0; } + } else { + if (*bufStart + *pos + pathLength >= *bufEnd) { + ptrdiff_t newListSize = (*bufEnd - *bufStart) + LIST_SIZE_INCREASE; + *bufStart = (char*)UTIL_realloc(*bufStart, newListSize); + *bufEnd = *bufStart + newListSize; + if (*bufStart == NULL) { free(path); closedir(dir); return 0; } + } + if (*bufStart + *pos + pathLength < *bufEnd) { + strncpy(*bufStart + *pos, path, *bufEnd - (*bufStart + *pos)); + *pos += pathLength + 1; + nbFiles++; + } + } + free(path); + errno = 0; /* clear errno after UTIL_isDirectory, UTIL_prepareFileList */ + } + + if (errno != 0) { + UTIL_DISPLAYLEVEL(1, "readdir(%s) error: %s\n", dirName, strerror(errno)); + free(*bufStart); + *bufStart = NULL; + } + closedir(dir); + return nbFiles; +} + +#else + +UTIL_STATIC int UTIL_prepareFileList(const char *dirName, char** bufStart, size_t* pos, char** bufEnd, int followLinks) +{ + (void)bufStart; (void)bufEnd; (void)pos; + UTIL_DISPLAYLEVEL(1, "Directory %s ignored (compiled without _WIN32 or _POSIX_C_SOURCE)\n", dirName); + return 0; +} + +#endif /* #ifdef _WIN32 */ + +/* + * UTIL_createFileList - takes a list of files and directories (params: inputNames, inputNamesNb), scans directories, + * and returns a new list of files (params: return value, allocatedBuffer, allocatedNamesNb). + * After finishing usage of the list the structures should be freed with UTIL_freeFileList(params: return value, allocatedBuffer) + * In case of error UTIL_createFileList returns NULL and UTIL_freeFileList should not be called. + */ +UTIL_STATIC const char** UTIL_createFileList(const char **inputNames, unsigned inputNamesNb, char** allocatedBuffer, unsigned* allocatedNamesNb, int followLinks) +{ + size_t pos; + unsigned i, nbFiles; + char* buf = (char*)malloc(LIST_SIZE_INCREASE); + char* bufend = buf + LIST_SIZE_INCREASE; + const char** fileTable; + + if (!buf) return NULL; + + for (i=0, pos=0, nbFiles=0; i<inputNamesNb; i++) { + if (!UTIL_isDirectory(inputNames[i])) { + size_t const len = strlen(inputNames[i]); + if (buf + pos + len >= bufend) { + ptrdiff_t newListSize = (bufend - buf) + LIST_SIZE_INCREASE; + buf = (char*)UTIL_realloc(buf, newListSize); + bufend = buf + newListSize; + if (!buf) return NULL; + } + if (buf + pos + len < bufend) { + strncpy(buf + pos, inputNames[i], bufend - (buf + pos)); + pos += len + 1; + nbFiles++; + } + } else { + nbFiles += UTIL_prepareFileList(inputNames[i], &buf, &pos, &bufend, followLinks); + if (buf == NULL) return NULL; + } } + + if (nbFiles == 0) { free(buf); return NULL; } + + fileTable = (const char**)malloc((nbFiles+1) * sizeof(const char*)); + if (!fileTable) { free(buf); return NULL; } + + for (i=0, pos=0; i<nbFiles; i++) { + fileTable[i] = buf + pos; + pos += strlen(fileTable[i]) + 1; + } + + if (buf + pos > bufend) { free(buf); free((void*)fileTable); return NULL; } + + *allocatedBuffer = buf; + *allocatedNamesNb = nbFiles; + + return fileTable; +} + + +UTIL_STATIC void UTIL_freeFileList(const char** filenameTable, char* allocatedBuffer) +{ + if (allocatedBuffer) free(allocatedBuffer); + if (filenameTable) free((void*)filenameTable); +} + +/* count the number of physical cores */ +#if defined(_WIN32) || defined(WIN32) + +#include <windows.h> + +typedef BOOL(WINAPI* LPFN_GLPI)(PSYSTEM_LOGICAL_PROCESSOR_INFORMATION, PDWORD); + +UTIL_STATIC int UTIL_countPhysicalCores(void) +{ + static int numPhysicalCores = 0; + if (numPhysicalCores != 0) return numPhysicalCores; + + { LPFN_GLPI glpi; + BOOL done = FALSE; + PSYSTEM_LOGICAL_PROCESSOR_INFORMATION buffer = NULL; + PSYSTEM_LOGICAL_PROCESSOR_INFORMATION ptr = NULL; + DWORD returnLength = 0; + size_t byteOffset = 0; + + glpi = (LPFN_GLPI)GetProcAddress(GetModuleHandle(TEXT("kernel32")), + "GetLogicalProcessorInformation"); + + if (glpi == NULL) { + goto failed; + } + + while(!done) { + DWORD rc = glpi(buffer, &returnLength); + if (FALSE == rc) { + if (GetLastError() == ERROR_INSUFFICIENT_BUFFER) { + if (buffer) + free(buffer); + buffer = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION)malloc(returnLength); + + if (buffer == NULL) { + perror("zstd"); + exit(1); + } + } else { + /* some other error */ + goto failed; + } + } else { + done = TRUE; + } + } + + ptr = buffer; + + while (byteOffset + sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION) <= returnLength) { + + if (ptr->Relationship == RelationProcessorCore) { + numPhysicalCores++; + } + + ptr++; + byteOffset += sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION); + } + + free(buffer); + + return numPhysicalCores; + } + +failed: + /* try to fall back on GetSystemInfo */ + { SYSTEM_INFO sysinfo; + GetSystemInfo(&sysinfo); + numPhysicalCores = sysinfo.dwNumberOfProcessors; + if (numPhysicalCores == 0) numPhysicalCores = 1; /* just in case */ + } + return numPhysicalCores; +} + +#elif defined(__APPLE__) + +#include <sys/sysctl.h> + +/* Use apple-provided syscall + * see: man 3 sysctl */ +UTIL_STATIC int UTIL_countPhysicalCores(void) +{ + static S32 numPhysicalCores = 0; /* apple specifies int32_t */ + if (numPhysicalCores != 0) return numPhysicalCores; + + { size_t size = sizeof(S32); + int const ret = sysctlbyname("hw.physicalcpu", &numPhysicalCores, &size, NULL, 0); + if (ret != 0) { + if (errno == ENOENT) { + /* entry not present, fall back on 1 */ + numPhysicalCores = 1; + } else { + perror("zstd: can't get number of physical cpus"); + exit(1); + } + } + + return numPhysicalCores; + } +} + +#elif defined(__linux__) + +/* parse /proc/cpuinfo + * siblings / cpu cores should give hyperthreading ratio + * otherwise fall back on sysconf */ +UTIL_STATIC int UTIL_countPhysicalCores(void) +{ + static int numPhysicalCores = 0; + + if (numPhysicalCores != 0) return numPhysicalCores; + + numPhysicalCores = (int)sysconf(_SC_NPROCESSORS_ONLN); + if (numPhysicalCores == -1) { + /* value not queryable, fall back on 1 */ + return numPhysicalCores = 1; + } + + /* try to determine if there's hyperthreading */ + { FILE* const cpuinfo = fopen("/proc/cpuinfo", "r"); +#define BUF_SIZE 80 + char buff[BUF_SIZE]; + + int siblings = 0; + int cpu_cores = 0; + int ratio = 1; + + if (cpuinfo == NULL) { + /* fall back on the sysconf value */ + return numPhysicalCores; + } + + /* assume the cpu cores/siblings values will be constant across all + * present processors */ + while (!feof(cpuinfo)) { + if (fgets(buff, BUF_SIZE, cpuinfo) != NULL) { + if (strncmp(buff, "siblings", 8) == 0) { + const char* const sep = strchr(buff, ':'); + if (*sep == '\0') { + /* formatting was broken? */ + goto failed; + } + + siblings = atoi(sep + 1); + } + if (strncmp(buff, "cpu cores", 9) == 0) { + const char* const sep = strchr(buff, ':'); + if (*sep == '\0') { + /* formatting was broken? */ + goto failed; + } + + cpu_cores = atoi(sep + 1); + } + } else if (ferror(cpuinfo)) { + /* fall back on the sysconf value */ + goto failed; + } + } + if (siblings && cpu_cores) { + ratio = siblings / cpu_cores; + } +failed: + fclose(cpuinfo); + return numPhysicalCores = numPhysicalCores / ratio; + } +} + +#elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__DragonFly__) + +/* Use apple-provided syscall + * see: man 3 sysctl */ +UTIL_STATIC int UTIL_countPhysicalCores(void) +{ + static int numPhysicalCores = 0; + + if (numPhysicalCores != 0) return numPhysicalCores; + + numPhysicalCores = (int)sysconf(_SC_NPROCESSORS_ONLN); + if (numPhysicalCores == -1) { + /* value not queryable, fall back on 1 */ + return numPhysicalCores = 1; + } + return numPhysicalCores; +} + +#else + +UTIL_STATIC int UTIL_countPhysicalCores(void) +{ + /* assume 1 */ + return 1; +} + +#endif + +#if defined (__cplusplus) +} +#endif + +#endif /* UTIL_H_MODULE */ diff --git a/src/zstd/programs/windres/generate_res.bat b/src/zstd/programs/windres/generate_res.bat new file mode 100644 index 00000000..7ff9aef5 --- /dev/null +++ b/src/zstd/programs/windres/generate_res.bat @@ -0,0 +1,11 @@ +@echo off +REM http://stackoverflow.com/questions/708238/how-do-i-add-an-icon-to-a-mingw-gcc-compiled-executable + +where /q windres.exe +IF ERRORLEVEL 1 ( + ECHO The windres.exe is missing. Ensure it is installed and placed in your PATH. + EXIT /B +) ELSE ( + windres.exe -I ../lib -I windres -i windres/zstd.rc -O coff -F pe-x86-64 -o windres/zstd64.res + windres.exe -I ../lib -I windres -i windres/zstd.rc -O coff -F pe-i386 -o windres/zstd32.res +) diff --git a/src/zstd/programs/windres/verrsrc.h b/src/zstd/programs/windres/verrsrc.h new file mode 100644 index 00000000..e282add0 --- /dev/null +++ b/src/zstd/programs/windres/verrsrc.h @@ -0,0 +1,8 @@ +/* minimal set of defines required to generate zstd.res from zstd.rc */ + +#define VS_VERSION_INFO 1 + +#define VS_FFI_FILEFLAGSMASK 0x0000003FL +#define VOS_NT_WINDOWS32 0x00040004L +#define VFT_DLL 0x00000002L +#define VFT2_UNKNOWN 0x00000000L diff --git a/src/zstd/programs/windres/zstd.rc b/src/zstd/programs/windres/zstd.rc new file mode 100644 index 00000000..f5e40473 --- /dev/null +++ b/src/zstd/programs/windres/zstd.rc @@ -0,0 +1,51 @@ +// Microsoft Visual C++ generated resource script. +// + +#include "zstd.h" /* ZSTD_VERSION_STRING */ +#define APSTUDIO_READONLY_SYMBOLS +#include "verrsrc.h" +#undef APSTUDIO_READONLY_SYMBOLS + + +#if !defined(AFX_RESOURCE_DLL) || defined(AFX_TARG_ENU) +LANGUAGE 9, 1 + +///////////////////////////////////////////////////////////////////////////// +// +// Version +// + +VS_VERSION_INFO VERSIONINFO + FILEVERSION ZSTD_VERSION_MAJOR,ZSTD_VERSION_MINOR,ZSTD_VERSION_RELEASE,0 + PRODUCTVERSION ZSTD_VERSION_MAJOR,ZSTD_VERSION_MINOR,ZSTD_VERSION_RELEASE,0 + FILEFLAGSMASK VS_FFI_FILEFLAGSMASK +#ifdef _DEBUG + FILEFLAGS VS_FF_DEBUG +#else + FILEFLAGS 0x0L +#endif + FILEOS VOS_NT_WINDOWS32 + FILETYPE VFT_DLL + FILESUBTYPE VFT2_UNKNOWN +BEGIN + BLOCK "StringFileInfo" + BEGIN + BLOCK "040904B0" + BEGIN + VALUE "CompanyName", "Yann Collet, Facebook, Inc." + VALUE "FileDescription", "Zstandard - Fast and efficient compression algorithm" + VALUE "FileVersion", ZSTD_VERSION_STRING + VALUE "InternalName", "zstd.exe" + VALUE "LegalCopyright", "Copyright (c) 2013-present, Yann Collet, Facebook, Inc." + VALUE "OriginalFilename", "zstd.exe" + VALUE "ProductName", "Zstandard" + VALUE "ProductVersion", ZSTD_VERSION_STRING + END + END + BLOCK "VarFileInfo" + BEGIN + VALUE "Translation", 0x0409, 1200 + END +END + +#endif diff --git a/src/zstd/programs/windres/zstd32.res b/src/zstd/programs/windres/zstd32.res Binary files differnew file mode 100644 index 00000000..d6caf985 --- /dev/null +++ b/src/zstd/programs/windres/zstd32.res diff --git a/src/zstd/programs/windres/zstd64.res b/src/zstd/programs/windres/zstd64.res Binary files differnew file mode 100644 index 00000000..5b1c73bf --- /dev/null +++ b/src/zstd/programs/windres/zstd64.res diff --git a/src/zstd/programs/zstd.1 b/src/zstd/programs/zstd.1 new file mode 100644 index 00000000..8187c740 --- /dev/null +++ b/src/zstd/programs/zstd.1 @@ -0,0 +1,408 @@ +. +.TH "ZSTD" "1" "September 2017" "zstd 1.3.1" "User Commands" +. +.SH "NAME" +\fBzstd\fR \- zstd, zstdmt, unzstd, zstdcat \- Compress or decompress \.zst files +. +.SH "SYNOPSIS" +\fBzstd\fR [\fIOPTIONS\fR] [\-|\fIINPUT\-FILE\fR] [\-o \fIOUTPUT\-FILE\fR] +. +.P +\fBzstdmt\fR is equivalent to \fBzstd \-T0\fR +. +.P +\fBunzstd\fR is equivalent to \fBzstd \-d\fR +. +.P +\fBzstdcat\fR is equivalent to \fBzstd \-dcf\fR +. +.SH "DESCRIPTION" +\fBzstd\fR is a fast lossless compression algorithm and data compression tool, with command line syntax similar to \fBgzip (1)\fR and \fBxz (1)\fR\. It is based on the \fBLZ77\fR family, with further FSE & huff0 entropy stages\. \fBzstd\fR offers highly configurable compression speed, with fast modes at > 200 MB/s per code, and strong modes nearing lzma compression ratios\. It also features a very fast decoder, with speeds > 500 MB/s per core\. +. +.P +\fBzstd\fR command line syntax is generally similar to gzip, but features the following differences : +. +.IP "\(bu" 4 +Source files are preserved by default\. It\'s possible to remove them automatically by using the \fB\-\-rm\fR command\. +. +.IP "\(bu" 4 +When compressing a single file, \fBzstd\fR displays progress notifications and result summary by default\. Use \fB\-q\fR to turn them off\. +. +.IP "\(bu" 4 +\fBzstd\fR does not accept input from console, but it properly accepts \fBstdin\fR when it\'s not the console\. +. +.IP "\(bu" 4 +\fBzstd\fR displays a short help page when command line is an error\. Use \fB\-q\fR to turn it off\. +. +.IP "" 0 +. +.P +\fBzstd\fR compresses or decompresses each \fIfile\fR according to the selected operation mode\. If no \fIfiles\fR are given or \fIfile\fR is \fB\-\fR, \fBzstd\fR reads from standard input and writes the processed data to standard output\. \fBzstd\fR will refuse to write compressed data to standard output if it is a terminal : it will display an error message and skip the \fIfile\fR\. Similarly, \fBzstd\fR will refuse to read compressed data from standard input if it is a terminal\. +. +.P +Unless \fB\-\-stdout\fR or \fB\-o\fR is specified, \fIfiles\fR are written to a new file whose name is derived from the source \fIfile\fR name: +. +.IP "\(bu" 4 +When compressing, the suffix \fB\.zst\fR is appended to the source filename to get the target filename\. +. +.IP "\(bu" 4 +When decompressing, the \fB\.zst\fR suffix is removed from the source filename to get the target filename +. +.IP "" 0 +. +.SS "Concatenation with \.zst files" +It is possible to concatenate \fB\.zst\fR files as is\. \fBzstd\fR will decompress such files as if they were a single \fB\.zst\fR file\. +. +.SH "OPTIONS" +. +.SS "Integer suffixes and special values" +In most places where an integer argument is expected, an optional suffix is supported to easily indicate large integers\. There must be no space between the integer and the suffix\. +. +.TP +\fBKiB\fR +Multiply the integer by 1,024 (2\e \fBKi\fR, \fBK\fR, and \fBKB\fR are accepted as synonyms for \fBKiB\fR\. +. +.TP +\fBMiB\fR +Multiply the integer by 1,048,576 (2\e \fBMi\fR, \fBM\fR, and \fBMB\fR are accepted as synonyms for \fBMiB\fR\. +. +.SS "Operation mode" +If multiple operation mode options are given, the last one takes effect\. +. +.TP +\fB\-z\fR, \fB\-\-compress\fR +Compress\. This is the default operation mode when no operation mode option is specified and no other operation mode is implied from the command name (for example, \fBunzstd\fR implies \fB\-\-decompress\fR)\. +. +.TP +\fB\-d\fR, \fB\-\-decompress\fR, \fB\-\-uncompress\fR +Decompress\. +. +.TP +\fB\-t\fR, \fB\-\-test\fR +Test the integrity of compressed \fIfiles\fR\. This option is equivalent to \fB\-\-decompress \-\-stdout\fR except that the decompressed data is discarded instead of being written to standard output\. No files are created or removed\. +. +.TP +\fB\-b#\fR +Benchmark file(s) using compression level # +. +.TP +\fB\-\-train FILEs\fR +Use FILEs as a training set to create a dictionary\. The training set should contain a lot of small files (> 100)\. +. +.TP +\fB\-l\fR, \fB\-\-list\fR +Display information related to a zstd compressed file, such as size, ratio, and checksum\. Some of these fields may not be available\. This command can be augmented with the \fB\-v\fR modifier\. +. +.SS "Operation modifiers" +. +.TP +\fB\-#\fR +\fB#\fR compression level [1\-19] (default: 3) +. +.TP +\fB\-\-ultra\fR +unlocks high compression levels 20+ (maximum 22), using a lot more memory\. Note that decompression will also require more memory when using these levels\. +. +.TP +\fB\-\-long[=#]\fR +enables long distance matching with \fB#\fR \fBwindowLog\fR, if not \fB#\fR is not present it defaults to \fB27\fR\. This increases the window size (\fBwindowLog\fR) and memory usage for both the compressor and decompressor\. This setting is designed to improve the compression ratio for files with long matches at a large distance\. +. +.IP +Note: If \fBwindowLog\fR is set to larger than 27, \fB\-\-long=windowLog\fR or \fB\-\-memory=windowSize\fR needs to be passed to the decompressor\. +. +.TP +\fB\-T#\fR, \fB\-\-threads=#\fR +Compress using \fB#\fR threads (default: 1)\. If \fB#\fR is 0, attempt to detect and use the number of physical CPU cores\. In all cases, the nb of threads is capped to ZSTDMT_NBTHREADS_MAX==256\. This modifier does nothing if \fBzstd\fR is compiled without multithread support\. +. +.TP +\fB\-D file\fR +use \fBfile\fR as Dictionary to compress or decompress FILE(s) +. +.TP +\fB\-\-nodictID\fR +do not store dictionary ID within frame header (dictionary compression)\. The decoder will have to rely on implicit knowledge about which dictionary to use, it won\'t be able to check if it\'s correct\. +. +.TP +\fB\-o file\fR +save result into \fBfile\fR (only possible with a single \fIINPUT\-FILE\fR) +. +.TP +\fB\-f\fR, \fB\-\-force\fR +overwrite output without prompting, and (de)compress symbolic links +. +.TP +\fB\-c\fR, \fB\-\-stdout\fR +force write to standard output, even if it is the console +. +.TP +\fB\-\-[no\-]sparse\fR +enable / disable sparse FS support, to make files with many zeroes smaller on disk\. Creating sparse files may save disk space and speed up decompression by reducing the amount of disk I/O\. default : enabled when output is into a file, and disabled when output is stdout\. This setting overrides default and can force sparse mode over stdout\. +. +.TP +\fB\-\-rm\fR +remove source file(s) after successful compression or decompression +. +.TP +\fB\-k\fR, \fB\-\-keep\fR +keep source file(s) after successful compression or decompression\. This is the default behavior\. +. +.TP +\fB\-r\fR +operate recursively on dictionaries +. +.TP +\fB\-\-format=FORMAT\fR +compress and decompress in other formats\. If compiled with support, zstd can compress to or decompress from other compression algorithm formats\. Possibly available options are \fBgzip\fR, \fBxz\fR, \fBlzma\fR, and \fBlz4\fR\. +. +.TP +\fB\-h\fR/\fB\-H\fR, \fB\-\-help\fR +display help/long help and exit +. +.TP +\fB\-V\fR, \fB\-\-version\fR +display version number and exit\. Advanced : \fB\-vV\fR also displays supported formats\. \fB\-vvV\fR also displays POSIX support\. +. +.TP +\fB\-v\fR +verbose mode +. +.TP +\fB\-q\fR, \fB\-\-quiet\fR +suppress warnings, interactivity, and notifications\. specify twice to suppress errors too\. +. +.TP +\fB\-C\fR, \fB\-\-[no\-]check\fR +add integrity check computed from uncompressed data (default : enabled) +. +.TP +\fB\-\-\fR +All arguments after \fB\-\-\fR are treated as files +. +.SH "DICTIONARY BUILDER" +\fBzstd\fR offers \fIdictionary\fR compression, useful for very small files and messages\. It\'s possible to train \fBzstd\fR with some samples, the result of which is saved into a file called a \fBdictionary\fR\. Then during compression and decompression, reference the same dictionary\. It will improve compression ratio of small files\. Typical gains range from 10% (at 64KB) to x5 better (at <1KB)\. +. +.TP +\fB\-\-train FILEs\fR +Use FILEs as training set to create a dictionary\. The training set should contain a lot of small files (> 100), and weight typically 100x the target dictionary size (for example, 10 MB for a 100 KB dictionary)\. +. +.IP +Supports multithreading if \fBzstd\fR is compiled with threading support\. Additional parameters can be specified with \fB\-\-train\-cover\fR\. The legacy dictionary builder can be accessed with \fB\-\-train\-legacy\fR\. Equivalent to \fB\-\-train\-cover=d=8,steps=4\fR\. +. +.TP +\fB\-o file\fR +Dictionary saved into \fBfile\fR (default name: dictionary)\. +. +.TP +\fB\-\-maxdict=#\fR +Limit dictionary to specified size (default: 112640)\. +. +.TP +\fB\-B#\fR +Split input files in blocks of size # (default: no split) +. +.TP +\fB\-\-dictID=#\fR +A dictionary ID is a locally unique ID that a decoder can use to verify it is using the right dictionary\. By default, zstd will create a 4\-bytes random number ID\. It\'s possible to give a precise number instead\. Short numbers have an advantage : an ID < 256 will only need 1 byte in the compressed frame header, and an ID < 65536 will only need 2 bytes\. This compares favorably to 4 bytes default\. However, it\'s up to the dictionary manager to not assign twice the same ID to 2 different dictionaries\. +. +.TP +\fB\-\-train\-cover[=k#,d=#,steps=#]\fR +Select parameters for the default dictionary builder algorithm named cover\. If \fId\fR is not specified, then it tries \fId\fR = 6 and \fId\fR = 8\. If \fIk\fR is not specified, then it tries \fIsteps\fR values in the range [50, 2000]\. If \fIsteps\fR is not specified, then the default value of 40 is used\. Requires that \fId\fR <= \fIk\fR\. +. +.IP +Selects segments of size \fIk\fR with highest score to put in the dictionary\. The score of a segment is computed by the sum of the frequencies of all the subsegments of size \fId\fR\. Generally \fId\fR should be in the range [6, 8], occasionally up to 16, but the algorithm will run faster with d <= \fI8\fR\. Good values for \fIk\fR vary widely based on the input data, but a safe range is [2 * \fId\fR, 2000]\. Supports multithreading if \fBzstd\fR is compiled with threading support\. +. +.IP +Examples: +. +.IP +\fBzstd \-\-train\-cover FILEs\fR +. +.IP +\fBzstd \-\-train\-cover=k=50,d=8 FILEs\fR +. +.IP +\fBzstd \-\-train\-cover=d=8,steps=500 FILEs\fR +. +.IP +\fBzstd \-\-train\-cover=k=50 FILEs\fR +. +.TP +\fB\-\-train\-legacy[=selectivity=#]\fR +Use legacy dictionary builder algorithm with the given dictionary \fIselectivity\fR (default: 9)\. The smaller the \fIselectivity\fR value, the denser the dictionary, improving its efficiency but reducing its possible maximum size\. \fB\-\-train\-legacy=s=#\fR is also accepted\. +. +.IP +Examples: +. +.IP +\fBzstd \-\-train\-legacy FILEs\fR +. +.IP +\fBzstd \-\-train\-legacy=selectivity=8 FILEs\fR +. +.SH "BENCHMARK" +. +.TP +\fB\-b#\fR +benchmark file(s) using compression level # +. +.TP +\fB\-e#\fR +benchmark file(s) using multiple compression levels, from \fB\-b#\fR to \fB\-e#\fR (inclusive) +. +.TP +\fB\-i#\fR +minimum evaluation time, in seconds (default : 3s), benchmark mode only +. +.TP +\fB\-B#\fR, \fB\-\-block\-size=#\fR +cut file(s) into independent blocks of size # (default: no block) +. +.TP +\fB\-\-priority=rt\fR +set process priority to real\-time +. +.SH "ADVANCED COMPRESSION OPTIONS" +. +.SS "\-\-zstd[=options]:" +\fBzstd\fR provides 22 predefined compression levels\. The selected or default predefined compression level can be changed with advanced compression options\. The \fIoptions\fR are provided as a comma\-separated list\. You may specify only the options you want to change and the rest will be taken from the selected or default compression level\. The list of available \fIoptions\fR: +. +.TP +\fBstrategy\fR=\fIstrat\fR, \fBstrat\fR=\fIstrat\fR +Specify a strategy used by a match finder\. +. +.IP +There are 8 strategies numbered from 1 to 8, from faster to stronger: 1=ZSTD_fast, 2=ZSTD_dfast, 3=ZSTD_greedy, 4=ZSTD_lazy, 5=ZSTD_lazy2, 6=ZSTD_btlazy2, 7=ZSTD_btopt, 8=ZSTD_btultra\. +. +.TP +\fBwindowLog\fR=\fIwlog\fR, \fBwlog\fR=\fIwlog\fR +Specify the maximum number of bits for a match distance\. +. +.IP +The higher number of increases the chance to find a match which usually improves compression ratio\. It also increases memory requirements for the compressor and decompressor\. The minimum \fIwlog\fR is 10 (1 KiB) and the maximum is 30 (1 GiB) on 32\-bit platforms and 31 (2 GiB) on 64\-bit platforms\. +. +.IP +Note: If \fBwindowLog\fR is set to larger than 27, \fB\-\-long=windowLog\fR or \fB\-\-memory=windowSize\fR needs to be passed to the decompressor\. +. +.TP +\fBhashLog\fR=\fIhlog\fR, \fBhlog\fR=\fIhlog\fR +Specify the maximum number of bits for a hash table\. +. +.IP +Bigger hash tables cause less collisions which usually makes compression faster, but requires more memory during compression\. +. +.IP +The minimum \fIhlog\fR is 6 (64 B) and the maximum is 26 (128 MiB)\. +. +.TP +\fBchainLog\fR=\fIclog\fR, \fBclog\fR=\fIclog\fR +Specify the maximum number of bits for a hash chain or a binary tree\. +. +.IP +Higher numbers of bits increases the chance to find a match which usually improves compression ratio\. It also slows down compression speed and increases memory requirements for compression\. This option is ignored for the ZSTD_fast strategy\. +. +.IP +The minimum \fIclog\fR is 6 (64 B) and the maximum is 28 (256 MiB)\. +. +.TP +\fBsearchLog\fR=\fIslog\fR, \fBslog\fR=\fIslog\fR +Specify the maximum number of searches in a hash chain or a binary tree using logarithmic scale\. +. +.IP +More searches increases the chance to find a match which usually increases compression ratio but decreases compression speed\. +. +.IP +The minimum \fIslog\fR is 1 and the maximum is 26\. +. +.TP +\fBsearchLength\fR=\fIslen\fR, \fBslen\fR=\fIslen\fR +Specify the minimum searched length of a match in a hash table\. +. +.IP +Larger search lengths usually decrease compression ratio but improve decompression speed\. +. +.IP +The minimum \fIslen\fR is 3 and the maximum is 7\. +. +.TP +\fBtargetLen\fR=\fItlen\fR, \fBtlen\fR=\fItlen\fR +Specify the minimum match length that causes a match finder to stop searching for better matches\. +. +.IP +A larger minimum match length usually improves compression ratio but decreases compression speed\. This option is only used with strategies ZSTD_btopt and ZSTD_btultra\. +. +.IP +The minimum \fItlen\fR is 4 and the maximum is 999\. +. +.TP +\fBoverlapLog\fR=\fIovlog\fR, \fBovlog\fR=\fIovlog\fR +Determine \fBoverlapSize\fR, amount of data reloaded from previous job\. This parameter is only available when multithreading is enabled\. Reloading more data improves compression ratio, but decreases speed\. +. +.IP +The minimum \fIovlog\fR is 0, and the maximum is 9\. 0 means "no overlap", hence completely independent jobs\. 9 means "full overlap", meaning up to \fBwindowSize\fR is reloaded from previous job\. Reducing \fIovlog\fR by 1 reduces the amount of reload by a factor 2\. Default \fIovlog\fR is 6, which means "reload \fBwindowSize / 8\fR"\. Exception : the maximum compression level (22) has a default \fIovlog\fR of 9\. +. +.TP +\fBldmHashLog\fR=\fIldmhlog\fR, \fBldmhlog\fR=\fIldmhlog\fR +Specify the maximum size for a hash table used for long distance matching\. +. +.IP +This option is ignored unless long distance matching is enabled\. +. +.IP +Bigger hash tables usually improve compression ratio at the expense of more memory during compression and a decrease in compression speed\. +. +.IP +The minimum \fIldmhlog\fR is 6 and the maximum is 26 (default: 20)\. +. +.TP +\fBldmSearchLength\fR=\fIldmslen\fR, \fBldmslen\fR=\fIldmslen\fR +Specify the minimum searched length of a match for long distance matching\. +. +.IP +This option is ignored unless long distance matching is enabled\. +. +.IP +Larger/very small values usually decrease compression ratio\. +. +.IP +The minumum \fIldmslen\fR is 4 and the maximum is 4096 (default: 64)\. +. +.TP +\fBldmBucketSizeLog\fR=\fIldmblog\fR, \fBldmblog\fR=\fIldmblog\fR +Specify the size of each bucket for the hash table used for long distance matching\. +. +.IP +This option is ignored unless long distance matching is enabled\. +. +.IP +Larger bucket sizes improve collision resolution but decrease compression speed\. +. +.IP +The minimum \fIldmblog\fR is 0 and the maximum is 8 (default: 3)\. +. +.TP +\fBldmHashEveryLog\fR=\fIldmhevery\fR, \fBldmhevery\fR=\fIldmhevery\fR +Specify the frequency of inserting entries into the long distance matching hash table\. +. +.IP +This option is ignored unless long distance matching is enabled\. +. +.IP +Larger values will improve compression speed\. Deviating far from the default value will likely result in a decrease in compression ratio\. +. +.IP +The default value is \fBwlog \- ldmhlog\fR\. +. +.SS "\-B#:" +Select the size of each compression job\. This parameter is available only when multi\-threading is enabled\. Default value is \fB4 * windowSize\fR, which means it varies depending on compression level\. \fB\-B#\fR makes it possible to select a custom value\. Note that job size must respect a minimum value which is enforced transparently\. This minimum is either 1 MB, or \fBoverlapSize\fR, whichever is largest\. +. +.SS "Example" +The following parameters sets advanced compression options to those of predefined level 19 for files bigger than 256 KB: +. +.P +\fB\-\-zstd\fR=windowLog=23,chainLog=23,hashLog=22,searchLog=6,searchLength=3,targetLength=48,strategy=6 +. +.SH "BUGS" +Report bugs at: https://github\.com/facebook/zstd/issues +. +.SH "AUTHOR" +Yann Collet diff --git a/src/zstd/programs/zstd.1.md b/src/zstd/programs/zstd.1.md new file mode 100644 index 00000000..eea68548 --- /dev/null +++ b/src/zstd/programs/zstd.1.md @@ -0,0 +1,411 @@ +zstd(1) -- zstd, zstdmt, unzstd, zstdcat - Compress or decompress .zst files +============================================================================ + +SYNOPSIS +-------- + +`zstd` [*OPTIONS*] [-|_INPUT-FILE_] [-o _OUTPUT-FILE_] + +`zstdmt` is equivalent to `zstd -T0` + +`unzstd` is equivalent to `zstd -d` + +`zstdcat` is equivalent to `zstd -dcf` + + +DESCRIPTION +----------- +`zstd` is a fast lossless compression algorithm and data compression tool, +with command line syntax similar to `gzip (1)` and `xz (1)`. +It is based on the **LZ77** family, with further FSE & huff0 entropy stages. +`zstd` offers highly configurable compression speed, +with fast modes at > 200 MB/s per code, +and strong modes nearing lzma compression ratios. +It also features a very fast decoder, with speeds > 500 MB/s per core. + +`zstd` command line syntax is generally similar to gzip, +but features the following differences : + + - Source files are preserved by default. + It's possible to remove them automatically by using the `--rm` command. + - When compressing a single file, `zstd` displays progress notifications + and result summary by default. + Use `-q` to turn them off. + - `zstd` does not accept input from console, + but it properly accepts `stdin` when it's not the console. + - `zstd` displays a short help page when command line is an error. + Use `-q` to turn it off. + +`zstd` compresses or decompresses each _file_ according to the selected +operation mode. +If no _files_ are given or _file_ is `-`, `zstd` reads from standard input +and writes the processed data to standard output. +`zstd` will refuse to write compressed data to standard output +if it is a terminal : it will display an error message and skip the _file_. +Similarly, `zstd` will refuse to read compressed data from standard input +if it is a terminal. + +Unless `--stdout` or `-o` is specified, _files_ are written to a new file +whose name is derived from the source _file_ name: + +* When compressing, the suffix `.zst` is appended to the source filename to + get the target filename. +* When decompressing, the `.zst` suffix is removed from the source filename to + get the target filename + +### Concatenation with .zst files +It is possible to concatenate `.zst` files as is. +`zstd` will decompress such files as if they were a single `.zst` file. + +OPTIONS +------- + +### Integer suffixes and special values +In most places where an integer argument is expected, +an optional suffix is supported to easily indicate large integers. +There must be no space between the integer and the suffix. + +* `KiB`: + Multiply the integer by 1,024 (2\^10). + `Ki`, `K`, and `KB` are accepted as synonyms for `KiB`. +* `MiB`: + Multiply the integer by 1,048,576 (2\^20). + `Mi`, `M`, and `MB` are accepted as synonyms for `MiB`. + +### Operation mode +If multiple operation mode options are given, +the last one takes effect. + +* `-z`, `--compress`: + Compress. + This is the default operation mode when no operation mode option is specified + and no other operation mode is implied from the command name + (for example, `unzstd` implies `--decompress`). +* `-d`, `--decompress`, `--uncompress`: + Decompress. +* `-t`, `--test`: + Test the integrity of compressed _files_. + This option is equivalent to `--decompress --stdout` except that the + decompressed data is discarded instead of being written to standard output. + No files are created or removed. +* `-b#`: + Benchmark file(s) using compression level # +* `--train FILEs`: + Use FILEs as a training set to create a dictionary. + The training set should contain a lot of small files (> 100). +* `-l`, `--list`: + Display information related to a zstd compressed file, such as size, ratio, and checksum. + Some of these fields may not be available. + This command can be augmented with the `-v` modifier. + +### Operation modifiers + +* `-#`: + `#` compression level \[1-19] (default: 3) +* `--ultra`: + unlocks high compression levels 20+ (maximum 22), using a lot more memory. + Note that decompression will also require more memory when using these levels. +* `--long[=#]`: + enables long distance matching with `#` `windowLog`, if not `#` is not + present it defaults to `27`. + This increases the window size (`windowLog`) and memory usage for both the + compressor and decompressor. + This setting is designed to improve the compression ratio for files with + long matches at a large distance. + + Note: If `windowLog` is set to larger than 27, `--long=windowLog` or + `--memory=windowSize` needs to be passed to the decompressor. +* `-T#`, `--threads=#`: + Compress using `#` threads (default: 1). + If `#` is 0, attempt to detect and use the number of physical CPU cores. + In all cases, the nb of threads is capped to ZSTDMT_NBTHREADS_MAX==256. + This modifier does nothing if `zstd` is compiled without multithread support. +* `-D file`: + use `file` as Dictionary to compress or decompress FILE(s) +* `--nodictID`: + do not store dictionary ID within frame header (dictionary compression). + The decoder will have to rely on implicit knowledge about which dictionary to use, + it won't be able to check if it's correct. +* `-o file`: + save result into `file` (only possible with a single _INPUT-FILE_) +* `-f`, `--force`: + overwrite output without prompting, and (de)compress symbolic links +* `-c`, `--stdout`: + force write to standard output, even if it is the console +* `--[no-]sparse`: + enable / disable sparse FS support, + to make files with many zeroes smaller on disk. + Creating sparse files may save disk space and speed up decompression by + reducing the amount of disk I/O. + default : enabled when output is into a file, + and disabled when output is stdout. + This setting overrides default and can force sparse mode over stdout. +* `--rm`: + remove source file(s) after successful compression or decompression +* `-k`, `--keep`: + keep source file(s) after successful compression or decompression. + This is the default behavior. +* `-r`: + operate recursively on dictionaries +* `--format=FORMAT`: + compress and decompress in other formats. If compiled with + support, zstd can compress to or decompress from other compression algorithm + formats. Possibly available options are `gzip`, `xz`, `lzma`, and `lz4`. +* `-h`/`-H`, `--help`: + display help/long help and exit +* `-V`, `--version`: + display version number and exit. + Advanced : `-vV` also displays supported formats. + `-vvV` also displays POSIX support. +* `-v`: + verbose mode +* `-q`, `--quiet`: + suppress warnings, interactivity, and notifications. + specify twice to suppress errors too. +* `-C`, `--[no-]check`: + add integrity check computed from uncompressed data (default : enabled) +* `--`: + All arguments after `--` are treated as files + + +DICTIONARY BUILDER +------------------ +`zstd` offers _dictionary_ compression, +useful for very small files and messages. +It's possible to train `zstd` with some samples, +the result of which is saved into a file called a `dictionary`. +Then during compression and decompression, reference the same dictionary. +It will improve compression ratio of small files. +Typical gains range from 10% (at 64KB) to x5 better (at <1KB). + +* `--train FILEs`: + Use FILEs as training set to create a dictionary. + The training set should contain a lot of small files (> 100), + and weight typically 100x the target dictionary size + (for example, 10 MB for a 100 KB dictionary). + + Supports multithreading if `zstd` is compiled with threading support. + Additional parameters can be specified with `--train-cover`. + The legacy dictionary builder can be accessed with `--train-legacy`. + Equivalent to `--train-cover=d=8,steps=4`. +* `-o file`: + Dictionary saved into `file` (default name: dictionary). +* `--maxdict=#`: + Limit dictionary to specified size (default: 112640). +* `-B#`: + Split input files in blocks of size # (default: no split) +* `--dictID=#`: + A dictionary ID is a locally unique ID that a decoder can use to verify it is + using the right dictionary. + By default, zstd will create a 4-bytes random number ID. + It's possible to give a precise number instead. + Short numbers have an advantage : an ID < 256 will only need 1 byte in the + compressed frame header, and an ID < 65536 will only need 2 bytes. + This compares favorably to 4 bytes default. + However, it's up to the dictionary manager to not assign twice the same ID to + 2 different dictionaries. +* `--train-cover[=k#,d=#,steps=#]`: + Select parameters for the default dictionary builder algorithm named cover. + If _d_ is not specified, then it tries _d_ = 6 and _d_ = 8. + If _k_ is not specified, then it tries _steps_ values in the range [50, 2000]. + If _steps_ is not specified, then the default value of 40 is used. + Requires that _d_ <= _k_. + + Selects segments of size _k_ with highest score to put in the dictionary. + The score of a segment is computed by the sum of the frequencies of all the + subsegments of size _d_. + Generally _d_ should be in the range [6, 8], occasionally up to 16, but the + algorithm will run faster with d <= _8_. + Good values for _k_ vary widely based on the input data, but a safe range is + [2 * _d_, 2000]. + Supports multithreading if `zstd` is compiled with threading support. + + Examples: + + `zstd --train-cover FILEs` + + `zstd --train-cover=k=50,d=8 FILEs` + + `zstd --train-cover=d=8,steps=500 FILEs` + + `zstd --train-cover=k=50 FILEs` + +* `--train-legacy[=selectivity=#]`: + Use legacy dictionary builder algorithm with the given dictionary + _selectivity_ (default: 9). + The smaller the _selectivity_ value, the denser the dictionary, + improving its efficiency but reducing its possible maximum size. + `--train-legacy=s=#` is also accepted. + + Examples: + + `zstd --train-legacy FILEs` + + `zstd --train-legacy=selectivity=8 FILEs` + + +BENCHMARK +--------- + +* `-b#`: + benchmark file(s) using compression level # +* `-e#`: + benchmark file(s) using multiple compression levels, from `-b#` to `-e#` (inclusive) +* `-i#`: + minimum evaluation time, in seconds (default : 3s), benchmark mode only +* `-B#`, `--block-size=#`: + cut file(s) into independent blocks of size # (default: no block) +* `--priority=rt`: + set process priority to real-time + + +ADVANCED COMPRESSION OPTIONS +---------------------------- +### --zstd[=options]: +`zstd` provides 22 predefined compression levels. +The selected or default predefined compression level can be changed with +advanced compression options. +The _options_ are provided as a comma-separated list. +You may specify only the options you want to change and the rest will be +taken from the selected or default compression level. +The list of available _options_: + +- `strategy`=_strat_, `strat`=_strat_: + Specify a strategy used by a match finder. + + There are 8 strategies numbered from 1 to 8, from faster to stronger: + 1=ZSTD\_fast, 2=ZSTD\_dfast, 3=ZSTD\_greedy, 4=ZSTD\_lazy, + 5=ZSTD\_lazy2, 6=ZSTD\_btlazy2, 7=ZSTD\_btopt, 8=ZSTD\_btultra. + +- `windowLog`=_wlog_, `wlog`=_wlog_: + Specify the maximum number of bits for a match distance. + + The higher number of increases the chance to find a match which usually + improves compression ratio. + It also increases memory requirements for the compressor and decompressor. + The minimum _wlog_ is 10 (1 KiB) and the maximum is 30 (1 GiB) on 32-bit + platforms and 31 (2 GiB) on 64-bit platforms. + + Note: If `windowLog` is set to larger than 27, `--long=windowLog` or + `--memory=windowSize` needs to be passed to the decompressor. + +- `hashLog`=_hlog_, `hlog`=_hlog_: + Specify the maximum number of bits for a hash table. + + Bigger hash tables cause less collisions which usually makes compression + faster, but requires more memory during compression. + + The minimum _hlog_ is 6 (64 B) and the maximum is 26 (128 MiB). + +- `chainLog`=_clog_, `clog`=_clog_: + Specify the maximum number of bits for a hash chain or a binary tree. + + Higher numbers of bits increases the chance to find a match which usually + improves compression ratio. + It also slows down compression speed and increases memory requirements for + compression. + This option is ignored for the ZSTD_fast strategy. + + The minimum _clog_ is 6 (64 B) and the maximum is 28 (256 MiB). + +- `searchLog`=_slog_, `slog`=_slog_: + Specify the maximum number of searches in a hash chain or a binary tree + using logarithmic scale. + + More searches increases the chance to find a match which usually increases + compression ratio but decreases compression speed. + + The minimum _slog_ is 1 and the maximum is 26. + +- `searchLength`=_slen_, `slen`=_slen_: + Specify the minimum searched length of a match in a hash table. + + Larger search lengths usually decrease compression ratio but improve + decompression speed. + + The minimum _slen_ is 3 and the maximum is 7. + +- `targetLen`=_tlen_, `tlen`=_tlen_: + Specify the minimum match length that causes a match finder to stop + searching for better matches. + + A larger minimum match length usually improves compression ratio but + decreases compression speed. + This option is only used with strategies ZSTD_btopt and ZSTD_btultra. + + The minimum _tlen_ is 4 and the maximum is 999. + +- `overlapLog`=_ovlog_, `ovlog`=_ovlog_: + Determine `overlapSize`, amount of data reloaded from previous job. + This parameter is only available when multithreading is enabled. + Reloading more data improves compression ratio, but decreases speed. + + The minimum _ovlog_ is 0, and the maximum is 9. + 0 means "no overlap", hence completely independent jobs. + 9 means "full overlap", meaning up to `windowSize` is reloaded from previous job. + Reducing _ovlog_ by 1 reduces the amount of reload by a factor 2. + Default _ovlog_ is 6, which means "reload `windowSize / 8`". + Exception : the maximum compression level (22) has a default _ovlog_ of 9. + +- `ldmHashLog`=_ldmhlog_, `ldmhlog`=_ldmhlog_: + Specify the maximum size for a hash table used for long distance matching. + + This option is ignored unless long distance matching is enabled. + + Bigger hash tables usually improve compression ratio at the expense of more + memory during compression and a decrease in compression speed. + + The minimum _ldmhlog_ is 6 and the maximum is 26 (default: 20). + +- `ldmSearchLength`=_ldmslen_, `ldmslen`=_ldmslen_: + Specify the minimum searched length of a match for long distance matching. + + This option is ignored unless long distance matching is enabled. + + Larger/very small values usually decrease compression ratio. + + The minumum _ldmslen_ is 4 and the maximum is 4096 (default: 64). + +- `ldmBucketSizeLog`=_ldmblog_, `ldmblog`=_ldmblog_: + Specify the size of each bucket for the hash table used for long distance + matching. + + This option is ignored unless long distance matching is enabled. + + Larger bucket sizes improve collision resolution but decrease compression + speed. + + The minimum _ldmblog_ is 0 and the maximum is 8 (default: 3). + +- `ldmHashEveryLog`=_ldmhevery_, `ldmhevery`=_ldmhevery_: + Specify the frequency of inserting entries into the long distance matching + hash table. + + This option is ignored unless long distance matching is enabled. + + Larger values will improve compression speed. Deviating far from the + default value will likely result in a decrease in compression ratio. + + The default value is `wlog - ldmhlog`. + +### -B#: +Select the size of each compression job. +This parameter is available only when multi-threading is enabled. +Default value is `4 * windowSize`, which means it varies depending on compression level. +`-B#` makes it possible to select a custom value. +Note that job size must respect a minimum value which is enforced transparently. +This minimum is either 1 MB, or `overlapSize`, whichever is largest. + +### Example +The following parameters sets advanced compression options to those of +predefined level 19 for files bigger than 256 KB: + +`--zstd`=windowLog=23,chainLog=23,hashLog=22,searchLog=6,searchLength=3,targetLength=48,strategy=6 + +BUGS +---- +Report bugs at: https://github.com/facebook/zstd/issues + +AUTHOR +------ +Yann Collet diff --git a/src/zstd/programs/zstdcli.c b/src/zstd/programs/zstdcli.c new file mode 100644 index 00000000..3f836734 --- /dev/null +++ b/src/zstd/programs/zstdcli.c @@ -0,0 +1,880 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + +/*-************************************ +* Tuning parameters +**************************************/ +#ifndef ZSTDCLI_CLEVEL_DEFAULT +# define ZSTDCLI_CLEVEL_DEFAULT 3 +#endif + +#ifndef ZSTDCLI_CLEVEL_MAX +# define ZSTDCLI_CLEVEL_MAX 19 /* without using --ultra */ +#endif + + + +/*-************************************ +* Dependencies +**************************************/ +#include "platform.h" /* IS_CONSOLE, PLATFORM_POSIX_VERSION */ +#include "util.h" /* UTIL_HAS_CREATEFILELIST, UTIL_createFileList */ +#include <stdio.h> /* fprintf(), stdin, stdout, stderr */ +#include <string.h> /* strcmp, strlen */ +#include <errno.h> /* errno */ +#include "fileio.h" /* stdinmark, stdoutmark, ZSTD_EXTENSION */ +#ifndef ZSTD_NOBENCH +# include "bench.h" /* BMK_benchFiles, BMK_SetNbSeconds */ +#endif +#ifndef ZSTD_NODICT +# include "dibio.h" /* ZDICT_cover_params_t, DiB_trainFromFiles() */ +#endif +#define ZSTD_STATIC_LINKING_ONLY /* ZSTD_maxCLevel */ +#include "zstd.h" /* ZSTD_VERSION_STRING */ + + +/*-************************************ +* Constants +**************************************/ +#define COMPRESSOR_NAME "zstd command line interface" +#ifndef ZSTD_VERSION +# define ZSTD_VERSION "v" ZSTD_VERSION_STRING +#endif +#define AUTHOR "Yann Collet" +#define WELCOME_MESSAGE "*** %s %i-bits %s, by %s ***\n", COMPRESSOR_NAME, (int)(sizeof(size_t)*8), ZSTD_VERSION, AUTHOR + +#define ZSTD_ZSTDMT "zstdmt" +#define ZSTD_UNZSTD "unzstd" +#define ZSTD_CAT "zstdcat" +#define ZSTD_GZ "gzip" +#define ZSTD_GUNZIP "gunzip" +#define ZSTD_GZCAT "gzcat" +#define ZSTD_LZMA "lzma" +#define ZSTD_UNLZMA "unlzma" +#define ZSTD_XZ "xz" +#define ZSTD_UNXZ "unxz" +#define ZSTD_LZ4 "lz4" +#define ZSTD_UNLZ4 "unlz4" + +#define KB *(1 <<10) +#define MB *(1 <<20) +#define GB *(1U<<30) + +#define DISPLAY_LEVEL_DEFAULT 2 + +static const char* g_defaultDictName = "dictionary"; +static const unsigned g_defaultMaxDictSize = 110 KB; +static const int g_defaultDictCLevel = 3; +static const unsigned g_defaultSelectivityLevel = 9; +static const unsigned g_defaultMaxWindowLog = 27; +#define OVERLAP_LOG_DEFAULT 9999 +#define LDM_PARAM_DEFAULT 9999 /* Default for parameters where 0 is valid */ +static U32 g_overlapLog = OVERLAP_LOG_DEFAULT; +static U32 g_ldmHashLog = 0; +static U32 g_ldmMinMatch = 0; +static U32 g_ldmHashEveryLog = LDM_PARAM_DEFAULT; +static U32 g_ldmBucketSizeLog = LDM_PARAM_DEFAULT; + + +/*-************************************ +* Display Macros +**************************************/ +#define DISPLAY(...) fprintf(g_displayOut, __VA_ARGS__) +#define DISPLAYLEVEL(l, ...) { if (g_displayLevel>=l) { DISPLAY(__VA_ARGS__); } } +static int g_displayLevel = DISPLAY_LEVEL_DEFAULT; /* 0 : no display, 1: errors, 2 : + result + interaction + warnings, 3 : + progression, 4 : + information */ +static FILE* g_displayOut; + + +/*-************************************ +* Command Line +**************************************/ +static int usage(const char* programName) +{ + DISPLAY( "Usage : \n"); + DISPLAY( " %s [args] [FILE(s)] [-o file] \n", programName); + DISPLAY( "\n"); + DISPLAY( "FILE : a filename \n"); + DISPLAY( " with no FILE, or when FILE is - , read standard input\n"); + DISPLAY( "Arguments : \n"); +#ifndef ZSTD_NOCOMPRESS + DISPLAY( " -# : # compression level (1-%d, default:%d) \n", ZSTDCLI_CLEVEL_MAX, ZSTDCLI_CLEVEL_DEFAULT); +#endif +#ifndef ZSTD_NODECOMPRESS + DISPLAY( " -d : decompression \n"); +#endif + DISPLAY( " -D file: use `file` as Dictionary \n"); + DISPLAY( " -o file: result stored into `file` (only if 1 input file) \n"); + DISPLAY( " -f : overwrite output without prompting and (de)compress links \n"); + DISPLAY( "--rm : remove source file(s) after successful de/compression \n"); + DISPLAY( " -k : preserve source file(s) (default) \n"); + DISPLAY( " -h/-H : display help/long help and exit \n"); + return 0; +} + +static int usage_advanced(const char* programName) +{ + DISPLAY(WELCOME_MESSAGE); + usage(programName); + DISPLAY( "\n"); + DISPLAY( "Advanced arguments : \n"); + DISPLAY( " -V : display Version number and exit \n"); + DISPLAY( " -v : verbose mode; specify multiple times to increase verbosity\n"); + DISPLAY( " -q : suppress warnings; specify twice to suppress errors too\n"); + DISPLAY( " -c : force write to standard output, even if it is the console\n"); + DISPLAY( " -l : print information about zstd compressed files \n"); +#ifndef ZSTD_NOCOMPRESS + DISPLAY( "--ultra : enable levels beyond %i, up to %i (requires more memory)\n", ZSTDCLI_CLEVEL_MAX, ZSTD_maxCLevel()); + DISPLAY( "--long[=#] : enable long distance matching with given window log (default : %u)\n", g_defaultMaxWindowLog); +#ifdef ZSTD_MULTITHREAD + DISPLAY( " -T# : use # threads for compression (default:1) \n"); + DISPLAY( " -B# : select size of each job (default:0==automatic) \n"); +#endif + DISPLAY( "--no-dictID : don't write dictID into header (dictionary compression)\n"); + DISPLAY( "--[no-]check : integrity check (default:enabled) \n"); +#endif +#ifdef UTIL_HAS_CREATEFILELIST + DISPLAY( " -r : operate recursively on directories \n"); +#endif +#ifdef ZSTD_GZCOMPRESS + DISPLAY( "--format=gzip : compress files to the .gz format \n"); +#endif +#ifdef ZSTD_LZMACOMPRESS + DISPLAY( "--format=xz : compress files to the .xz format \n"); + DISPLAY( "--format=lzma : compress files to the .lzma format \n"); +#endif +#ifdef ZSTD_LZ4COMPRESS + DISPLAY( "--format=lz4 : compress files to the .lz4 format \n"); +#endif +#ifndef ZSTD_NODECOMPRESS + DISPLAY( "--test : test compressed file integrity \n"); +#if ZSTD_SPARSE_DEFAULT + DISPLAY( "--[no-]sparse : sparse mode (default:enabled on file, disabled on stdout)\n"); +#else + DISPLAY( "--[no-]sparse : sparse mode (default:disabled)\n"); +#endif +#endif + DISPLAY( " -M# : Set a memory usage limit for decompression \n"); + DISPLAY( "-- : All arguments after \"--\" are treated as files \n"); +#ifndef ZSTD_NODICT + DISPLAY( "\n"); + DISPLAY( "Dictionary builder : \n"); + DISPLAY( "--train ## : create a dictionary from a training set of files \n"); + DISPLAY( "--train-cover[=k=#,d=#,steps=#] : use the cover algorithm with optional args\n"); + DISPLAY( "--train-legacy[=s=#] : use the legacy algorithm with selectivity (default: %u)\n", g_defaultSelectivityLevel); + DISPLAY( " -o file : `file` is dictionary name (default: %s) \n", g_defaultDictName); + DISPLAY( "--maxdict=# : limit dictionary to specified size (default : %u) \n", g_defaultMaxDictSize); + DISPLAY( "--dictID=# : force dictionary ID to specified value (default: random)\n"); +#endif +#ifndef ZSTD_NOBENCH + DISPLAY( "\n"); + DISPLAY( "Benchmark arguments : \n"); + DISPLAY( " -b# : benchmark file(s), using # compression level (default : 1) \n"); + DISPLAY( " -e# : test all compression levels from -bX to # (default: 1)\n"); + DISPLAY( " -i# : minimum evaluation time in seconds (default : 3s) \n"); + DISPLAY( " -B# : cut file into independent blocks of size # (default: no block)\n"); + DISPLAY( "--priority=rt : set process priority to real-time \n"); +#endif + return 0; +} + +static int badusage(const char* programName) +{ + DISPLAYLEVEL(1, "Incorrect parameters\n"); + if (g_displayLevel >= 2) usage(programName); + return 1; +} + +static void waitEnter(void) +{ + int unused; + DISPLAY("Press enter to continue...\n"); + unused = getchar(); + (void)unused; +} + +static const char* lastNameFromPath(const char* path) +{ + const char* name = path; + if (strrchr(name, '/')) name = strrchr(name, '/') + 1; + if (strrchr(name, '\\')) name = strrchr(name, '\\') + 1; /* windows */ + return name; +} + +/*! exeNameMatch() : + @return : a non-zero value if exeName matches test, excluding the extension + */ +static int exeNameMatch(const char* exeName, const char* test) +{ + return !strncmp(exeName, test, strlen(test)) && + (exeName[strlen(test)] == '\0' || exeName[strlen(test)] == '.'); +} + +/*! readU32FromChar() : + @return : unsigned integer value read from input in `char` format + allows and interprets K, KB, KiB, M, MB and MiB suffix. + Will also modify `*stringPtr`, advancing it to position where it stopped reading. + Note : function result can overflow if digit string > MAX_UINT */ +static unsigned readU32FromChar(const char** stringPtr) +{ + unsigned result = 0; + while ((**stringPtr >='0') && (**stringPtr <='9')) + result *= 10, result += **stringPtr - '0', (*stringPtr)++ ; + if ((**stringPtr=='K') || (**stringPtr=='M')) { + result <<= 10; + if (**stringPtr=='M') result <<= 10; + (*stringPtr)++ ; + if (**stringPtr=='i') (*stringPtr)++; + if (**stringPtr=='B') (*stringPtr)++; + } + return result; +} + +/** longCommandWArg() : + * check if *stringPtr is the same as longCommand. + * If yes, @return 1 and advances *stringPtr to the position which immediately follows longCommand. + * @return 0 and doesn't modify *stringPtr otherwise. + */ +static unsigned longCommandWArg(const char** stringPtr, const char* longCommand) +{ + size_t const comSize = strlen(longCommand); + int const result = !strncmp(*stringPtr, longCommand, comSize); + if (result) *stringPtr += comSize; + return result; +} + + +#ifndef ZSTD_NODICT +/** + * parseCoverParameters() : + * reads cover parameters from *stringPtr (e.g. "--train-cover=k=48,d=8,steps=32") into *params + * @return 1 means that cover parameters were correct + * @return 0 in case of malformed parameters + */ +static unsigned parseCoverParameters(const char* stringPtr, ZDICT_cover_params_t* params) +{ + memset(params, 0, sizeof(*params)); + for (; ;) { + if (longCommandWArg(&stringPtr, "k=")) { params->k = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; } + if (longCommandWArg(&stringPtr, "d=")) { params->d = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; } + if (longCommandWArg(&stringPtr, "steps=")) { params->steps = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; } + return 0; + } + if (stringPtr[0] != 0) return 0; + DISPLAYLEVEL(4, "cover: k=%u\nd=%u\nsteps=%u\n", params->k, params->d, params->steps); + return 1; +} + +/** + * parseLegacyParameters() : + * reads legacy dictioanry builter parameters from *stringPtr (e.g. "--train-legacy=selectivity=8") into *selectivity + * @return 1 means that legacy dictionary builder parameters were correct + * @return 0 in case of malformed parameters + */ +static unsigned parseLegacyParameters(const char* stringPtr, unsigned* selectivity) +{ + if (!longCommandWArg(&stringPtr, "s=") && !longCommandWArg(&stringPtr, "selectivity=")) { return 0; } + *selectivity = readU32FromChar(&stringPtr); + if (stringPtr[0] != 0) return 0; + DISPLAYLEVEL(4, "legacy: selectivity=%u\n", *selectivity); + return 1; +} + +static ZDICT_cover_params_t defaultCoverParams(void) +{ + ZDICT_cover_params_t params; + memset(¶ms, 0, sizeof(params)); + params.d = 8; + params.steps = 4; + return params; +} +#endif + + +/** parseCompressionParameters() : + * reads compression parameters from *stringPtr (e.g. "--zstd=wlog=23,clog=23,hlog=22,slog=6,slen=3,tlen=48,strat=6") into *params + * @return 1 means that compression parameters were correct + * @return 0 in case of malformed parameters + */ +static unsigned parseCompressionParameters(const char* stringPtr, ZSTD_compressionParameters* params) +{ + for ( ; ;) { + if (longCommandWArg(&stringPtr, "windowLog=") || longCommandWArg(&stringPtr, "wlog=")) { params->windowLog = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; } + if (longCommandWArg(&stringPtr, "chainLog=") || longCommandWArg(&stringPtr, "clog=")) { params->chainLog = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; } + if (longCommandWArg(&stringPtr, "hashLog=") || longCommandWArg(&stringPtr, "hlog=")) { params->hashLog = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; } + if (longCommandWArg(&stringPtr, "searchLog=") || longCommandWArg(&stringPtr, "slog=")) { params->searchLog = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; } + if (longCommandWArg(&stringPtr, "searchLength=") || longCommandWArg(&stringPtr, "slen=")) { params->searchLength = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; } + if (longCommandWArg(&stringPtr, "targetLength=") || longCommandWArg(&stringPtr, "tlen=")) { params->targetLength = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; } + if (longCommandWArg(&stringPtr, "strategy=") || longCommandWArg(&stringPtr, "strat=")) { params->strategy = (ZSTD_strategy)(readU32FromChar(&stringPtr)); if (stringPtr[0]==',') { stringPtr++; continue; } else break; } + if (longCommandWArg(&stringPtr, "overlapLog=") || longCommandWArg(&stringPtr, "ovlog=")) { g_overlapLog = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; } + if (longCommandWArg(&stringPtr, "ldmHashLog=") || longCommandWArg(&stringPtr, "ldmhlog=")) { g_ldmHashLog = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; } + if (longCommandWArg(&stringPtr, "ldmSearchLength=") || longCommandWArg(&stringPtr, "ldmslen=")) { g_ldmMinMatch = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; } + if (longCommandWArg(&stringPtr, "ldmBucketSizeLog=") || longCommandWArg(&stringPtr, "ldmblog=")) { g_ldmBucketSizeLog = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; } + if (longCommandWArg(&stringPtr, "ldmHashEveryLog=") || longCommandWArg(&stringPtr, "ldmhevery=")) { g_ldmHashEveryLog = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; } + return 0; + } + + if (stringPtr[0] != 0) return 0; /* check the end of string */ + DISPLAYLEVEL(4, "windowLog=%d\nchainLog=%d\nhashLog=%d\nsearchLog=%d\n", params->windowLog, params->chainLog, params->hashLog, params->searchLog); + DISPLAYLEVEL(4, "searchLength=%d\ntargetLength=%d\nstrategy=%d\n", params->searchLength, params->targetLength, params->strategy); + return 1; +} + +static void printVersion(void) +{ + DISPLAY(WELCOME_MESSAGE); + /* format support */ + DISPLAYLEVEL(3, "*** supports: zstd"); +#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>0) && (ZSTD_LEGACY_SUPPORT<8) + DISPLAYLEVEL(3, ", zstd legacy v0.%d+", ZSTD_LEGACY_SUPPORT); +#endif +#ifdef ZSTD_GZCOMPRESS + DISPLAYLEVEL(3, ", gzip"); +#endif +#ifdef ZSTD_LZ4COMPRESS + DISPLAYLEVEL(3, ", lz4"); +#endif +#ifdef ZSTD_LZMACOMPRESS + DISPLAYLEVEL(3, ", lzma, xz "); +#endif + DISPLAYLEVEL(3, "\n"); + /* posix support */ +#ifdef _POSIX_C_SOURCE + DISPLAYLEVEL(4, "_POSIX_C_SOURCE defined: %ldL\n", (long) _POSIX_C_SOURCE); +#endif +#ifdef _POSIX_VERSION + DISPLAYLEVEL(4, "_POSIX_VERSION defined: %ldL \n", (long) _POSIX_VERSION); +#endif +#ifdef PLATFORM_POSIX_VERSION + DISPLAYLEVEL(4, "PLATFORM_POSIX_VERSION defined: %ldL\n", (long) PLATFORM_POSIX_VERSION); +#endif +} + +typedef enum { zom_compress, zom_decompress, zom_test, zom_bench, zom_train, zom_list } zstd_operation_mode; + +#define CLEAN_RETURN(i) { operationResult = (i); goto _end; } + +int main(int argCount, const char* argv[]) +{ + int argNb, + forceStdout=0, + followLinks=0, + main_pause=0, + nextEntryIsDictionary=0, + operationResult=0, + nextArgumentIsOutFileName=0, + nextArgumentIsMaxDict=0, + nextArgumentIsDictID=0, + nextArgumentsAreFiles=0, + ultra=0, + lastCommand = 0, + nbThreads = 1, + setRealTimePrio = 0, + ldmFlag = 0; + unsigned bench_nbSeconds = 3; /* would be better if this value was synchronized from bench */ + size_t blockSize = 0; + zstd_operation_mode operation = zom_compress; + ZSTD_compressionParameters compressionParams; + int cLevel = ZSTDCLI_CLEVEL_DEFAULT; + int cLevelLast = 1; + unsigned recursive = 0; + unsigned memLimit = 0; + const char** filenameTable = (const char**)malloc(argCount * sizeof(const char*)); /* argCount >= 1 */ + unsigned filenameIdx = 0; + const char* programName = argv[0]; + const char* outFileName = NULL; + const char* dictFileName = NULL; + const char* suffix = ZSTD_EXTENSION; + unsigned maxDictSize = g_defaultMaxDictSize; + unsigned dictID = 0; + int dictCLevel = g_defaultDictCLevel; + unsigned dictSelect = g_defaultSelectivityLevel; +#ifdef UTIL_HAS_CREATEFILELIST + const char** extendedFileList = NULL; + char* fileNamesBuf = NULL; + unsigned fileNamesNb; +#endif +#ifndef ZSTD_NODICT + ZDICT_cover_params_t coverParams = defaultCoverParams(); + int cover = 1; +#endif + + + /* init */ + (void)recursive; (void)cLevelLast; /* not used when ZSTD_NOBENCH set */ + (void)dictCLevel; (void)dictSelect; (void)dictID; (void)maxDictSize; /* not used when ZSTD_NODICT set */ + (void)ultra; (void)cLevel; (void)ldmFlag; /* not used when ZSTD_NOCOMPRESS set */ + (void)memLimit; /* not used when ZSTD_NODECOMPRESS set */ + if (filenameTable==NULL) { DISPLAY("zstd: %s \n", strerror(errno)); exit(1); } + filenameTable[0] = stdinmark; + g_displayOut = stderr; + + programName = lastNameFromPath(programName); + + /* preset behaviors */ + if (exeNameMatch(programName, ZSTD_ZSTDMT)) nbThreads=0; + if (exeNameMatch(programName, ZSTD_UNZSTD)) operation=zom_decompress; + if (exeNameMatch(programName, ZSTD_CAT)) { operation=zom_decompress; forceStdout=1; FIO_overwriteMode(); outFileName=stdoutmark; g_displayLevel=1; } + if (exeNameMatch(programName, ZSTD_GZ)) { suffix = GZ_EXTENSION; FIO_setCompressionType(FIO_gzipCompression); FIO_setRemoveSrcFile(1); } /* behave like gzip */ + if (exeNameMatch(programName, ZSTD_GUNZIP)) { operation=zom_decompress; FIO_setRemoveSrcFile(1); } /* behave like gunzip */ + if (exeNameMatch(programName, ZSTD_GZCAT)) { operation=zom_decompress; forceStdout=1; FIO_overwriteMode(); outFileName=stdoutmark; g_displayLevel=1; } /* behave like gzcat */ + if (exeNameMatch(programName, ZSTD_LZMA)) { suffix = LZMA_EXTENSION; FIO_setCompressionType(FIO_lzmaCompression); FIO_setRemoveSrcFile(1); } /* behave like lzma */ + if (exeNameMatch(programName, ZSTD_UNLZMA)) { operation=zom_decompress; FIO_setCompressionType(FIO_lzmaCompression); FIO_setRemoveSrcFile(1); } /* behave like unlzma */ + if (exeNameMatch(programName, ZSTD_XZ)) { suffix = XZ_EXTENSION; FIO_setCompressionType(FIO_xzCompression); FIO_setRemoveSrcFile(1); } /* behave like xz */ + if (exeNameMatch(programName, ZSTD_UNXZ)) { operation=zom_decompress; FIO_setCompressionType(FIO_xzCompression); FIO_setRemoveSrcFile(1); } /* behave like unxz */ + if (exeNameMatch(programName, ZSTD_LZ4)) { suffix = LZ4_EXTENSION; FIO_setCompressionType(FIO_lz4Compression); FIO_setRemoveSrcFile(1); } /* behave like xz */ + if (exeNameMatch(programName, ZSTD_UNLZ4)) { operation=zom_decompress; FIO_setCompressionType(FIO_lz4Compression); FIO_setRemoveSrcFile(1); } /* behave like unxz */ + memset(&compressionParams, 0, sizeof(compressionParams)); + + /* command switches */ + for (argNb=1; argNb<argCount; argNb++) { + const char* argument = argv[argNb]; + if(!argument) continue; /* Protection if argument empty */ + + if (nextArgumentsAreFiles==0) { + /* "-" means stdin/stdout */ + if (!strcmp(argument, "-")){ + if (!filenameIdx) { + filenameIdx=1, filenameTable[0]=stdinmark; + outFileName=stdoutmark; + g_displayLevel-=(g_displayLevel==2); + continue; + } } + + /* Decode commands (note : aggregated commands are allowed) */ + if (argument[0]=='-') { + + if (argument[1]=='-') { + /* long commands (--long-word) */ + if (!strcmp(argument, "--")) { nextArgumentsAreFiles=1; continue; } /* only file names allowed from now on */ + if (!strcmp(argument, "--list")) { operation=zom_list; continue; } + if (!strcmp(argument, "--compress")) { operation=zom_compress; continue; } + if (!strcmp(argument, "--decompress")) { operation=zom_decompress; continue; } + if (!strcmp(argument, "--uncompress")) { operation=zom_decompress; continue; } + if (!strcmp(argument, "--force")) { FIO_overwriteMode(); forceStdout=1; followLinks=1; continue; } + if (!strcmp(argument, "--version")) { g_displayOut=stdout; DISPLAY(WELCOME_MESSAGE); CLEAN_RETURN(0); } + if (!strcmp(argument, "--help")) { g_displayOut=stdout; CLEAN_RETURN(usage_advanced(programName)); } + if (!strcmp(argument, "--verbose")) { g_displayLevel++; continue; } + if (!strcmp(argument, "--quiet")) { g_displayLevel--; continue; } + if (!strcmp(argument, "--stdout")) { forceStdout=1; outFileName=stdoutmark; g_displayLevel-=(g_displayLevel==2); continue; } + if (!strcmp(argument, "--ultra")) { ultra=1; continue; } + if (!strcmp(argument, "--check")) { FIO_setChecksumFlag(2); continue; } + if (!strcmp(argument, "--no-check")) { FIO_setChecksumFlag(0); continue; } + if (!strcmp(argument, "--sparse")) { FIO_setSparseWrite(2); continue; } + if (!strcmp(argument, "--no-sparse")) { FIO_setSparseWrite(0); continue; } + if (!strcmp(argument, "--test")) { operation=zom_test; continue; } + if (!strcmp(argument, "--train")) { operation=zom_train; outFileName=g_defaultDictName; continue; } + if (!strcmp(argument, "--maxdict")) { nextArgumentIsMaxDict=1; lastCommand=1; continue; } /* kept available for compatibility with old syntax ; will be removed one day */ + if (!strcmp(argument, "--dictID")) { nextArgumentIsDictID=1; lastCommand=1; continue; } /* kept available for compatibility with old syntax ; will be removed one day */ + if (!strcmp(argument, "--no-dictID")) { FIO_setDictIDFlag(0); continue; } + if (!strcmp(argument, "--keep")) { FIO_setRemoveSrcFile(0); continue; } + if (!strcmp(argument, "--rm")) { FIO_setRemoveSrcFile(1); continue; } + if (!strcmp(argument, "--priority=rt")) { setRealTimePrio = 1; continue; } +#ifdef ZSTD_GZCOMPRESS + if (!strcmp(argument, "--format=gzip")) { suffix = GZ_EXTENSION; FIO_setCompressionType(FIO_gzipCompression); continue; } +#endif +#ifdef ZSTD_LZMACOMPRESS + if (!strcmp(argument, "--format=lzma")) { suffix = LZMA_EXTENSION; FIO_setCompressionType(FIO_lzmaCompression); continue; } + if (!strcmp(argument, "--format=xz")) { suffix = XZ_EXTENSION; FIO_setCompressionType(FIO_xzCompression); continue; } +#endif +#ifdef ZSTD_LZ4COMPRESS + if (!strcmp(argument, "--format=lz4")) { suffix = LZ4_EXTENSION; FIO_setCompressionType(FIO_lz4Compression); continue; } +#endif + + /* long commands with arguments */ +#ifndef ZSTD_NODICT + if (longCommandWArg(&argument, "--train-cover")) { + operation = zom_train; + outFileName = g_defaultDictName; + cover = 1; + /* Allow optional arguments following an = */ + if (*argument == 0) { memset(&coverParams, 0, sizeof(coverParams)); } + else if (*argument++ != '=') { CLEAN_RETURN(badusage(programName)); } + else if (!parseCoverParameters(argument, &coverParams)) { CLEAN_RETURN(badusage(programName)); } + continue; + } + if (longCommandWArg(&argument, "--train-legacy")) { + operation = zom_train; + outFileName = g_defaultDictName; + cover = 0; + /* Allow optional arguments following an = */ + if (*argument == 0) { continue; } + else if (*argument++ != '=') { CLEAN_RETURN(badusage(programName)); } + else if (!parseLegacyParameters(argument, &dictSelect)) { CLEAN_RETURN(badusage(programName)); } + continue; + } +#endif + if (longCommandWArg(&argument, "--threads=")) { nbThreads = readU32FromChar(&argument); continue; } + if (longCommandWArg(&argument, "--memlimit=")) { memLimit = readU32FromChar(&argument); continue; } + if (longCommandWArg(&argument, "--memory=")) { memLimit = readU32FromChar(&argument); continue; } + if (longCommandWArg(&argument, "--memlimit-decompress=")) { memLimit = readU32FromChar(&argument); continue; } + if (longCommandWArg(&argument, "--block-size=")) { blockSize = readU32FromChar(&argument); continue; } + if (longCommandWArg(&argument, "--maxdict=")) { maxDictSize = readU32FromChar(&argument); continue; } + if (longCommandWArg(&argument, "--dictID=")) { dictID = readU32FromChar(&argument); continue; } + if (longCommandWArg(&argument, "--zstd=")) { if (!parseCompressionParameters(argument, &compressionParams)) CLEAN_RETURN(badusage(programName)); continue; } + if (longCommandWArg(&argument, "--long")) { + unsigned ldmWindowLog = 0; + ldmFlag = 1; + /* Parse optional window log */ + if (*argument == '=') { + ++argument; + ldmWindowLog = readU32FromChar(&argument); + } else if (*argument != 0) { + /* Invalid character following --long */ + CLEAN_RETURN(badusage(programName)); + } + /* Only set windowLog if not already set by --zstd */ + if (compressionParams.windowLog == 0) + compressionParams.windowLog = ldmWindowLog; + continue; + } + /* fall-through, will trigger bad_usage() later on */ + } + + argument++; + while (argument[0]!=0) { + if (lastCommand) { + DISPLAY("error : command must be followed by argument \n"); + CLEAN_RETURN(1); + } +#ifndef ZSTD_NOCOMPRESS + /* compression Level */ + if ((*argument>='0') && (*argument<='9')) { + dictCLevel = cLevel = readU32FromChar(&argument); + continue; + } +#endif + + switch(argument[0]) + { + /* Display help */ + case 'V': g_displayOut=stdout; printVersion(); CLEAN_RETURN(0); /* Version Only */ + case 'H': + case 'h': g_displayOut=stdout; CLEAN_RETURN(usage_advanced(programName)); + + /* Compress */ + case 'z': operation=zom_compress; argument++; break; + + /* Decoding */ + case 'd': +#ifndef ZSTD_NOBENCH + if (operation==zom_bench) { BMK_setDecodeOnlyMode(1); argument++; break; } /* benchmark decode (hidden option) */ +#endif + operation=zom_decompress; argument++; break; + + /* Force stdout, even if stdout==console */ + case 'c': forceStdout=1; outFileName=stdoutmark; argument++; break; + + /* Use file content as dictionary */ + case 'D': nextEntryIsDictionary = 1; lastCommand = 1; argument++; break; + + /* Overwrite */ + case 'f': FIO_overwriteMode(); forceStdout=1; followLinks=1; argument++; break; + + /* Verbose mode */ + case 'v': g_displayLevel++; argument++; break; + + /* Quiet mode */ + case 'q': g_displayLevel--; argument++; break; + + /* keep source file (default) */ + case 'k': FIO_setRemoveSrcFile(0); argument++; break; + + /* Checksum */ + case 'C': FIO_setChecksumFlag(2); argument++; break; + + /* test compressed file */ + case 't': operation=zom_test; argument++; break; + + /* destination file name */ + case 'o': nextArgumentIsOutFileName=1; lastCommand=1; argument++; break; + + /* limit decompression memory */ + case 'M': + argument++; + memLimit = readU32FromChar(&argument); + break; + case 'l': operation=zom_list; argument++; break; +#ifdef UTIL_HAS_CREATEFILELIST + /* recursive */ + case 'r': recursive=1; argument++; break; +#endif + +#ifndef ZSTD_NOBENCH + /* Benchmark */ + case 'b': + operation=zom_bench; + argument++; + break; + + /* range bench (benchmark only) */ + case 'e': + /* compression Level */ + argument++; + cLevelLast = readU32FromChar(&argument); + break; + + /* Modify Nb Iterations (benchmark only) */ + case 'i': + argument++; + bench_nbSeconds = readU32FromChar(&argument); + break; + + /* cut input into blocks (benchmark only) */ + case 'B': + argument++; + blockSize = readU32FromChar(&argument); + break; + +#endif /* ZSTD_NOBENCH */ + + /* nb of threads (hidden option) */ + case 'T': + argument++; + nbThreads = readU32FromChar(&argument); + break; + + /* Dictionary Selection level */ + case 's': + argument++; + dictSelect = readU32FromChar(&argument); + break; + + /* Pause at the end (-p) or set an additional param (-p#) (hidden option) */ + case 'p': argument++; +#ifndef ZSTD_NOBENCH + if ((*argument>='0') && (*argument<='9')) { + BMK_setAdditionalParam(readU32FromChar(&argument)); + } else +#endif + main_pause=1; + break; + /* unknown command */ + default : CLEAN_RETURN(badusage(programName)); + } + } + continue; + } /* if (argument[0]=='-') */ + + if (nextArgumentIsMaxDict) { /* kept available for compatibility with old syntax ; will be removed one day */ + nextArgumentIsMaxDict = 0; + lastCommand = 0; + maxDictSize = readU32FromChar(&argument); + continue; + } + + if (nextArgumentIsDictID) { /* kept available for compatibility with old syntax ; will be removed one day */ + nextArgumentIsDictID = 0; + lastCommand = 0; + dictID = readU32FromChar(&argument); + continue; + } + + } /* if (nextArgumentIsAFile==0) */ + + if (nextEntryIsDictionary) { + nextEntryIsDictionary = 0; + lastCommand = 0; + dictFileName = argument; + continue; + } + + if (nextArgumentIsOutFileName) { + nextArgumentIsOutFileName = 0; + lastCommand = 0; + outFileName = argument; + if (!strcmp(outFileName, "-")) outFileName = stdoutmark; + continue; + } + + /* add filename to list */ + filenameTable[filenameIdx++] = argument; + } + + if (lastCommand) { /* forgotten argument */ + DISPLAY("error : command must be followed by argument \n"); + CLEAN_RETURN(1); + } + + /* Welcome message (if verbose) */ + DISPLAYLEVEL(3, WELCOME_MESSAGE); + + if (nbThreads == 0) { + /* try to guess */ + nbThreads = UTIL_countPhysicalCores(); + DISPLAYLEVEL(3, "Note: %d physical core(s) detected \n", nbThreads); + } + + g_utilDisplayLevel = g_displayLevel; + if (!followLinks) { + unsigned u; + for (u=0, fileNamesNb=0; u<filenameIdx; u++) { + if (UTIL_isLink(filenameTable[u])) { + DISPLAYLEVEL(2, "Warning : %s is a symbolic link, ignoring\n", filenameTable[u]); + } else { + filenameTable[fileNamesNb++] = filenameTable[u]; + } + } + filenameIdx = fileNamesNb; + } +#ifdef UTIL_HAS_CREATEFILELIST + if (recursive) { /* at this stage, filenameTable is a list of paths, which can contain both files and directories */ + extendedFileList = UTIL_createFileList(filenameTable, filenameIdx, &fileNamesBuf, &fileNamesNb, followLinks); + if (extendedFileList) { + unsigned u; + for (u=0; u<fileNamesNb; u++) DISPLAYLEVEL(4, "%u %s\n", u, extendedFileList[u]); + free((void*)filenameTable); + filenameTable = extendedFileList; + filenameIdx = fileNamesNb; + } + } +#endif + + if (operation == zom_list) { +#ifndef ZSTD_NODECOMPRESS + int const ret = FIO_listMultipleFiles(filenameIdx, filenameTable, g_displayLevel); + CLEAN_RETURN(ret); +#else + DISPLAY("file information is not supported \n"); + CLEAN_RETURN(1); +#endif + } + + /* Check if benchmark is selected */ + if (operation==zom_bench) { +#ifndef ZSTD_NOBENCH + BMK_setNotificationLevel(g_displayLevel); + BMK_setBlockSize(blockSize); + BMK_setNbThreads(nbThreads); + BMK_setNbSeconds(bench_nbSeconds); + BMK_setLdmFlag(ldmFlag); + BMK_setLdmMinMatch(g_ldmMinMatch); + BMK_setLdmHashLog(g_ldmHashLog); + if (g_ldmBucketSizeLog != LDM_PARAM_DEFAULT) { + BMK_setLdmBucketSizeLog(g_ldmBucketSizeLog); + } + if (g_ldmHashEveryLog != LDM_PARAM_DEFAULT) { + BMK_setLdmHashEveryLog(g_ldmHashEveryLog); + } + BMK_benchFiles(filenameTable, filenameIdx, dictFileName, cLevel, cLevelLast, &compressionParams, setRealTimePrio); +#endif + (void)bench_nbSeconds; (void)blockSize; (void)setRealTimePrio; + goto _end; + } + + /* Check if dictionary builder is selected */ + if (operation==zom_train) { +#ifndef ZSTD_NODICT + ZDICT_params_t zParams; + zParams.compressionLevel = dictCLevel; + zParams.notificationLevel = g_displayLevel; + zParams.dictID = dictID; + if (cover) { + int const optimize = !coverParams.k || !coverParams.d; + coverParams.nbThreads = nbThreads; + coverParams.zParams = zParams; + operationResult = DiB_trainFromFiles(outFileName, maxDictSize, filenameTable, filenameIdx, blockSize, NULL, &coverParams, optimize); + } else { + ZDICT_legacy_params_t dictParams; + memset(&dictParams, 0, sizeof(dictParams)); + dictParams.selectivityLevel = dictSelect; + dictParams.zParams = zParams; + operationResult = DiB_trainFromFiles(outFileName, maxDictSize, filenameTable, filenameIdx, blockSize, &dictParams, NULL, 0); + } +#endif + goto _end; + } + +#ifndef ZSTD_NODECOMPRESS + if (operation==zom_test) { outFileName=nulmark; FIO_setRemoveSrcFile(0); } /* test mode */ +#endif + + /* No input filename ==> use stdin and stdout */ + filenameIdx += !filenameIdx; /* filenameTable[0] is stdin by default */ + if (!strcmp(filenameTable[0], stdinmark) && !outFileName) outFileName = stdoutmark; /* when input is stdin, default output is stdout */ + + /* Check if input/output defined as console; trigger an error in this case */ + if (!strcmp(filenameTable[0], stdinmark) && IS_CONSOLE(stdin) ) CLEAN_RETURN(badusage(programName)); + if (outFileName && !strcmp(outFileName, stdoutmark) && IS_CONSOLE(stdout) && !strcmp(filenameTable[0], stdinmark) && !forceStdout && operation!=zom_decompress) + CLEAN_RETURN(badusage(programName)); + + /* user-selected output filename, only possible with a single file */ + if (outFileName && strcmp(outFileName,stdoutmark) && strcmp(outFileName,nulmark) && (filenameIdx>1)) { + DISPLAY("Too many files (%u) on the command line. \n", filenameIdx); + CLEAN_RETURN(filenameIdx); + } + +#ifndef ZSTD_NOCOMPRESS + /* check compression level limits */ + { int const maxCLevel = ultra ? ZSTD_maxCLevel() : ZSTDCLI_CLEVEL_MAX; + if (cLevel > maxCLevel) { + DISPLAYLEVEL(2, "Warning : compression level higher than max, reduced to %i \n", maxCLevel); + cLevel = maxCLevel; + } } +#endif + + /* No status message in pipe mode (stdin - stdout) or multi-files mode */ + if (!strcmp(filenameTable[0], stdinmark) && outFileName && !strcmp(outFileName,stdoutmark) && (g_displayLevel==2)) g_displayLevel=1; + if ((filenameIdx>1) & (g_displayLevel==2)) g_displayLevel=1; + + /* IO Stream/File */ + FIO_setNotificationLevel(g_displayLevel); + if (operation==zom_compress) { +#ifndef ZSTD_NOCOMPRESS + FIO_setNbThreads(nbThreads); + FIO_setBlockSize((U32)blockSize); + FIO_setLdmFlag(ldmFlag); + FIO_setLdmHashLog(g_ldmHashLog); + FIO_setLdmMinMatch(g_ldmMinMatch); + if (g_ldmBucketSizeLog != LDM_PARAM_DEFAULT) { + FIO_setLdmBucketSizeLog(g_ldmBucketSizeLog); + } + if (g_ldmHashEveryLog != LDM_PARAM_DEFAULT) { + FIO_setLdmHashEveryLog(g_ldmHashEveryLog); + } + + if (g_overlapLog!=OVERLAP_LOG_DEFAULT) FIO_setOverlapLog(g_overlapLog); + if ((filenameIdx==1) && outFileName) + operationResult = FIO_compressFilename(outFileName, filenameTable[0], dictFileName, cLevel, &compressionParams); + else + operationResult = FIO_compressMultipleFilenames(filenameTable, filenameIdx, outFileName ? outFileName : suffix, dictFileName, cLevel, &compressionParams); +#else + (void)suffix; + DISPLAY("Compression not supported\n"); +#endif + } else { /* decompression or test */ +#ifndef ZSTD_NODECOMPRESS + if (memLimit == 0) { + if (compressionParams.windowLog == 0) + memLimit = (U32)1 << g_defaultMaxWindowLog; + else { + memLimit = (U32)1 << (compressionParams.windowLog & 31); + } + } + FIO_setMemLimit(memLimit); + if (filenameIdx==1 && outFileName) + operationResult = FIO_decompressFilename(outFileName, filenameTable[0], dictFileName); + else + operationResult = FIO_decompressMultipleFilenames(filenameTable, filenameIdx, outFileName ? outFileName : ZSTD_EXTENSION, dictFileName); +#else + DISPLAY("Decompression not supported\n"); +#endif + } + +_end: + if (main_pause) waitEnter(); +#ifdef UTIL_HAS_CREATEFILELIST + if (extendedFileList) + UTIL_freeFileList(extendedFileList, fileNamesBuf); + else +#endif + free((void*)filenameTable); + return operationResult; +} diff --git a/src/zstd/programs/zstdgrep b/src/zstd/programs/zstdgrep new file mode 100755 index 00000000..9f871c03 --- /dev/null +++ b/src/zstd/programs/zstdgrep @@ -0,0 +1,124 @@ +#!/bin/sh +# +# Copyright (c) 2003 Thomas Klausner. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +grep=grep +zcat=zstdcat + +endofopts=0 +pattern_found=0 +grep_args="" +hyphen=0 +silent=0 + +prg=$(basename $0) + +# handle being called 'zegrep' or 'zfgrep' +case ${prg} in + *zegrep) + grep_args="-E";; + *zfgrep) + grep_args="-F";; +esac + +# skip all options and pass them on to grep taking care of options +# with arguments, and if -e was supplied + +while [ $# -gt 0 -a ${endofopts} -eq 0 ] +do + case $1 in + # from GNU grep-2.5.1 -- keep in sync! + -[ABCDXdefm]) + if [ $# -lt 2 ] + then + echo "${prg}: missing argument for $1 flag" >&2 + exit 1 + fi + case $1 in + -e) + pattern="$2" + pattern_found=1 + shift 2 + break + ;; + *) + ;; + esac + grep_args="${grep_args} $1 $2" + shift 2 + ;; + --) + shift + endofopts=1 + ;; + -) + hyphen=1 + shift + ;; + -h) + silent=1 + shift + ;; + -*) + grep_args="${grep_args} $1" + shift + ;; + *) + # pattern to grep for + endofopts=1 + ;; + esac +done + +# if no -e option was found, take next argument as grep-pattern +if [ ${pattern_found} -lt 1 ] +then + if [ $# -ge 1 ]; then + pattern="$1" + shift + elif [ ${hyphen} -gt 0 ]; then + pattern="-" + else + echo "${prg}: missing pattern" >&2 + exit 1 + fi +fi + +# call grep ... +if [ $# -lt 1 ] +then + # ... on stdin + ${zcat} -fq - | ${grep} ${grep_args} -- "${pattern}" - +else + # ... on all files given on the command line + if [ ${silent} -lt 1 -a $# -gt 1 ]; then + grep_args="-H ${grep_args}" + fi + while [ $# -gt 0 ] + do + ${zcat} -fq -- "$1" | ${grep} --label="${1}" ${grep_args} -- "${pattern}" - + shift + done +fi + +exit 0 diff --git a/src/zstd/programs/zstdless b/src/zstd/programs/zstdless new file mode 100755 index 00000000..893799e7 --- /dev/null +++ b/src/zstd/programs/zstdless @@ -0,0 +1,2 @@ +#!/bin/sh +zstdcat "$@" | less |