summaryrefslogtreecommitdiffstats
path: root/mfbt/lz4
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--mfbt/lz4/LICENSE24
-rw-r--r--mfbt/lz4/README.md169
-rw-r--r--mfbt/lz4/README.mozilla18
-rw-r--r--mfbt/lz4/lz4.c2722
-rw-r--r--mfbt/lz4/lz4.h842
-rw-r--r--mfbt/lz4/lz4file.c311
-rw-r--r--mfbt/lz4/lz4file.h93
-rw-r--r--mfbt/lz4/lz4frame.c2078
-rw-r--r--mfbt/lz4/lz4frame.h692
-rw-r--r--mfbt/lz4/lz4frame_static.h47
-rw-r--r--mfbt/lz4/lz4hc.c1631
-rw-r--r--mfbt/lz4/lz4hc.h413
-rw-r--r--mfbt/lz4/xxhash.c43
-rw-r--r--mfbt/lz4/xxhash.h6773
14 files changed, 15856 insertions, 0 deletions
diff --git a/mfbt/lz4/LICENSE b/mfbt/lz4/LICENSE
new file mode 100644
index 0000000000..488491695a
--- /dev/null
+++ b/mfbt/lz4/LICENSE
@@ -0,0 +1,24 @@
+LZ4 Library
+Copyright (c) 2011-2020, Yann Collet
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, this
+ list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright notice, this
+ list of conditions and the following disclaimer in the documentation and/or
+ other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/mfbt/lz4/README.md b/mfbt/lz4/README.md
new file mode 100644
index 0000000000..08d1cef2bf
--- /dev/null
+++ b/mfbt/lz4/README.md
@@ -0,0 +1,169 @@
+LZ4 - Library Files
+================================
+
+The `/lib` directory contains many files, but depending on project's objectives,
+not all of them are required.
+Limited systems may want to reduce the nb of source files to include
+as a way to reduce binary size and dependencies.
+
+Capabilities are added at the "level" granularity, detailed below.
+
+#### Level 1 : Minimal LZ4 build
+
+The minimum required is **`lz4.c`** and **`lz4.h`**,
+which provides the fast compression and decompression algorithms.
+They generate and decode data using the [LZ4 block format].
+
+
+#### Level 2 : High Compression variant
+
+For more compression ratio at the cost of compression speed,
+the High Compression variant called **lz4hc** is available.
+Add files **`lz4hc.c`** and **`lz4hc.h`**.
+This variant also compresses data using the [LZ4 block format],
+and depends on regular `lib/lz4.*` source files.
+
+
+#### Level 3 : Frame support, for interoperability
+
+In order to produce compressed data compatible with `lz4` command line utility,
+it's necessary to use the [official interoperable frame format].
+This format is generated and decoded automatically by the **lz4frame** library.
+Its public API is described in `lib/lz4frame.h`.
+In order to work properly, lz4frame needs all other modules present in `/lib`,
+including, lz4 and lz4hc, and also **xxhash**.
+So it's necessary to also include `xxhash.c` and `xxhash.h`.
+
+
+#### Level 4 : File compression operations
+
+As a helper around file operations,
+the library has been recently extended with `lz4file.c` and `lz4file.h`
+(still considered experimental at the time of this writing).
+These helpers allow opening, reading, writing, and closing files
+using transparent LZ4 compression / decompression.
+As a consequence, using `lz4file` adds a dependency on `<stdio.h>`.
+
+`lz4file` relies on `lz4frame` in order to produce compressed data
+conformant to the [LZ4 Frame format] specification.
+Consequently, to enable this capability,
+it's necessary to include all `*.c` and `*.h` files from `lib/` directory.
+
+
+#### Advanced / Experimental API
+
+Definitions which are not guaranteed to remain stable in future versions,
+are protected behind macros, such as `LZ4_STATIC_LINKING_ONLY`.
+As the name suggests, these definitions should only be invoked
+in the context of static linking ***only***.
+Otherwise, dependent application may fail on API or ABI break in the future.
+The associated symbols are also not exposed by the dynamic library by default.
+Should they be nonetheless needed, it's possible to force their publication
+by using build macros `LZ4_PUBLISH_STATIC_FUNCTIONS`
+and `LZ4F_PUBLISH_STATIC_FUNCTIONS`.
+
+
+#### Build macros
+
+The following build macro can be selected to adjust source code behavior at compilation time :
+
+- `LZ4_FAST_DEC_LOOP` : this triggers a speed optimized decompression loop, more powerful on modern cpus.
+ This loop works great on `x86`, `x64` and `aarch64` cpus, and is automatically enabled for them.
+ It's also possible to enable or disable it manually, by passing `LZ4_FAST_DEC_LOOP=1` or `0` to the preprocessor.
+ For example, with `gcc` : `-DLZ4_FAST_DEC_LOOP=1`,
+ and with `make` : `CPPFLAGS+=-DLZ4_FAST_DEC_LOOP=1 make lz4`.
+
+- `LZ4_DISTANCE_MAX` : control the maximum offset that the compressor will allow.
+ Set to 65535 by default, which is the maximum value supported by lz4 format.
+ Reducing maximum distance will reduce opportunities for LZ4 to find matches,
+ hence will produce a worse compression ratio.
+ Setting a smaller max distance could allow compatibility with specific decoders with limited memory budget.
+ This build macro only influences the compressed output of the compressor.
+
+- `LZ4_DISABLE_DEPRECATE_WARNINGS` : invoking a deprecated function will make the compiler generate a warning.
+ This is meant to invite users to update their source code.
+ Should this be a problem, it's generally possible to make the compiler ignore these warnings,
+ for example with `-Wno-deprecated-declarations` on `gcc`,
+ or `_CRT_SECURE_NO_WARNINGS` for Visual Studio.
+ This build macro offers another project-specific method
+ by defining `LZ4_DISABLE_DEPRECATE_WARNINGS` before including the LZ4 header files.
+
+- `LZ4_FORCE_SW_BITCOUNT` : by default, the compression algorithm tries to determine lengths
+ by using bitcount instructions, generally implemented as fast single instructions in many cpus.
+ In case the target cpus doesn't support it, or compiler intrinsic doesn't work, or feature bad performance,
+ it's possible to use an optimized software path instead.
+ This is achieved by setting this build macros.
+ In most cases, it's not expected to be necessary,
+ but it can be legitimately considered for less common platforms.
+
+- `LZ4_ALIGN_TEST` : alignment test ensures that the memory area
+ passed as argument to become a compression state is suitably aligned.
+ This test can be disabled if it proves flaky, by setting this value to 0.
+
+- `LZ4_USER_MEMORY_FUNCTIONS` : replace calls to `<stdlib,h>`'s `malloc()`, `calloc()` and `free()`
+ by user-defined functions, which must be named `LZ4_malloc()`, `LZ4_calloc()` and `LZ4_free()`.
+ User functions must be available at link time.
+
+- `LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION` :
+ Remove support of dynamic memory allocation.
+ For more details, see description of this macro in `lib/lz4.c`.
+
+- `LZ4_FREESTANDING` : by setting this build macro to 1,
+ LZ4/HC removes dependencies on the C standard library,
+ including allocation functions and `memmove()`, `memcpy()`, and `memset()`.
+ This build macro is designed to help use LZ4/HC in restricted environments
+ (embedded, bootloader, etc).
+ For more details, see description of this macro in `lib/lz4.h`.
+
+
+
+#### Amalgamation
+
+lz4 source code can be amalgamated into a single file.
+One can combine all source code into `lz4_all.c` by using following command:
+```
+cat lz4.c lz4hc.c lz4frame.c > lz4_all.c
+```
+(`cat` file order is important) then compile `lz4_all.c`.
+All `*.h` files present in `/lib` remain necessary to compile `lz4_all.c`.
+
+
+#### Windows : using MinGW+MSYS to create DLL
+
+DLL can be created using MinGW+MSYS with the `make liblz4` command.
+This command creates `dll\liblz4.dll` and the import library `dll\liblz4.lib`.
+To override the `dlltool` command when cross-compiling on Linux, just set the `DLLTOOL` variable. Example of cross compilation on Linux with mingw-w64 64 bits:
+```
+make BUILD_STATIC=no CC=x86_64-w64-mingw32-gcc DLLTOOL=x86_64-w64-mingw32-dlltool OS=Windows_NT
+```
+The import library is only required with Visual C++.
+The header files `lz4.h`, `lz4hc.h`, `lz4frame.h` and the dynamic library
+`dll\liblz4.dll` are required to compile a project using gcc/MinGW.
+The dynamic library has to be added to linking options.
+It means that if a project that uses LZ4 consists of a single `test-dll.c`
+file it should be linked with `dll\liblz4.dll`. For example:
+```
+ $(CC) $(CFLAGS) -Iinclude/ test-dll.c -o test-dll dll\liblz4.dll
+```
+The compiled executable will require LZ4 DLL which is available at `dll\liblz4.dll`.
+
+
+#### Miscellaneous
+
+Other files present in the directory are not source code. They are :
+
+ - `LICENSE` : contains the BSD license text
+ - `Makefile` : `make` script to compile and install lz4 library (static and dynamic)
+ - `liblz4.pc.in` : for `pkg-config` (used in `make install`)
+ - `README.md` : this file
+
+[official interoperable frame format]: ../doc/lz4_Frame_format.md
+[LZ4 Frame format]: ../doc/lz4_Frame_format.md
+[LZ4 block format]: ../doc/lz4_Block_format.md
+
+
+#### License
+
+All source material within __lib__ directory are BSD 2-Clause licensed.
+See [LICENSE](LICENSE) for details.
+The license is also reminded at the top of each source file.
diff --git a/mfbt/lz4/README.mozilla b/mfbt/lz4/README.mozilla
new file mode 100644
index 0000000000..3974a20090
--- /dev/null
+++ b/mfbt/lz4/README.mozilla
@@ -0,0 +1,18 @@
+This directory contains the LZ4 source from the upstream repo:
+https://github.com/lz4/lz4/
+
+Current version: 1.9.4 [5ff839680134437dbf4678f3d0c7b371d84f4964]
+
+Our in-tree copy of LZ4 does not depend on any generated files from the
+upstream build system, only the lz4*.{c,h} files found in the lib
+sub-directory. Therefore, it should be sufficient to simply overwrite
+the in-tree files with the updated ones from upstream.
+
+If the collection of source files changes, manual updates to moz.build may be
+needed as we don't use the upstream makefiles.
+
+Note that we do NOT use the copy of xxhash.{c,h} from the LZ4 repo. We
+instead use the newer release from that project's upstream repo:
+https://github.com/Cyan4973/xxHash
+
+Current version: 0.8.1 [35b0373c697b5f160d3db26b1cbb45a0d5ba788c]
diff --git a/mfbt/lz4/lz4.c b/mfbt/lz4/lz4.c
new file mode 100644
index 0000000000..654bfdf32f
--- /dev/null
+++ b/mfbt/lz4/lz4.c
@@ -0,0 +1,2722 @@
+/*
+ LZ4 - Fast LZ compression algorithm
+ Copyright (C) 2011-2020, Yann Collet.
+
+ BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are
+ met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above
+ copyright notice, this list of conditions and the following disclaimer
+ in the documentation and/or other materials provided with the
+ distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ You can contact the author at :
+ - LZ4 homepage : http://www.lz4.org
+ - LZ4 source repository : https://github.com/lz4/lz4
+*/
+
+/*-************************************
+* Tuning parameters
+**************************************/
+/*
+ * LZ4_HEAPMODE :
+ * Select how default compression functions will allocate memory for their hash table,
+ * in memory stack (0:default, fastest), or in memory heap (1:requires malloc()).
+ */
+#ifndef LZ4_HEAPMODE
+# define LZ4_HEAPMODE 0
+#endif
+
+/*
+ * LZ4_ACCELERATION_DEFAULT :
+ * Select "acceleration" for LZ4_compress_fast() when parameter value <= 0
+ */
+#define LZ4_ACCELERATION_DEFAULT 1
+/*
+ * LZ4_ACCELERATION_MAX :
+ * Any "acceleration" value higher than this threshold
+ * get treated as LZ4_ACCELERATION_MAX instead (fix #876)
+ */
+#define LZ4_ACCELERATION_MAX 65537
+
+
+/*-************************************
+* CPU Feature Detection
+**************************************/
+/* LZ4_FORCE_MEMORY_ACCESS
+ * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
+ * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
+ * The below switch allow to select different access method for improved performance.
+ * Method 0 (default) : use `memcpy()`. Safe and portable.
+ * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable).
+ * This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
+ * Method 2 : direct access. This method is portable but violate C standard.
+ * It can generate buggy code on targets which assembly generation depends on alignment.
+ * But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6)
+ * See https://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details.
+ * Prefer these methods in priority order (0 > 1 > 2)
+ */
+#ifndef LZ4_FORCE_MEMORY_ACCESS /* can be defined externally */
+# if defined(__GNUC__) && \
+ ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) \
+ || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
+# define LZ4_FORCE_MEMORY_ACCESS 2
+# elif (defined(__INTEL_COMPILER) && !defined(_WIN32)) || defined(__GNUC__)
+# define LZ4_FORCE_MEMORY_ACCESS 1
+# endif
+#endif
+
+/*
+ * LZ4_FORCE_SW_BITCOUNT
+ * Define this parameter if your target system or compiler does not support hardware bit count
+ */
+#if defined(_MSC_VER) && defined(_WIN32_WCE) /* Visual Studio for WinCE doesn't support Hardware bit count */
+# undef LZ4_FORCE_SW_BITCOUNT /* avoid double def */
+# define LZ4_FORCE_SW_BITCOUNT
+#endif
+
+
+
+/*-************************************
+* Dependency
+**************************************/
+/*
+ * LZ4_SRC_INCLUDED:
+ * Amalgamation flag, whether lz4.c is included
+ */
+#ifndef LZ4_SRC_INCLUDED
+# define LZ4_SRC_INCLUDED 1
+#endif
+
+#ifndef LZ4_STATIC_LINKING_ONLY
+#define LZ4_STATIC_LINKING_ONLY
+#endif
+
+#ifndef LZ4_DISABLE_DEPRECATE_WARNINGS
+#define LZ4_DISABLE_DEPRECATE_WARNINGS /* due to LZ4_decompress_safe_withPrefix64k */
+#endif
+
+#define LZ4_STATIC_LINKING_ONLY /* LZ4_DISTANCE_MAX */
+#include "lz4.h"
+/* see also "memory routines" below */
+
+
+/*-************************************
+* Compiler Options
+**************************************/
+#if defined(_MSC_VER) && (_MSC_VER >= 1400) /* Visual Studio 2005+ */
+# include <intrin.h> /* only present in VS2005+ */
+# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
+# pragma warning(disable : 6237) /* disable: C6237: conditional expression is always 0 */
+#endif /* _MSC_VER */
+
+#ifndef LZ4_FORCE_INLINE
+# ifdef _MSC_VER /* Visual Studio */
+# define LZ4_FORCE_INLINE static __forceinline
+# else
+# if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */
+# ifdef __GNUC__
+# define LZ4_FORCE_INLINE static inline __attribute__((always_inline))
+# else
+# define LZ4_FORCE_INLINE static inline
+# endif
+# else
+# define LZ4_FORCE_INLINE static
+# endif /* __STDC_VERSION__ */
+# endif /* _MSC_VER */
+#endif /* LZ4_FORCE_INLINE */
+
+/* LZ4_FORCE_O2 and LZ4_FORCE_INLINE
+ * gcc on ppc64le generates an unrolled SIMDized loop for LZ4_wildCopy8,
+ * together with a simple 8-byte copy loop as a fall-back path.
+ * However, this optimization hurts the decompression speed by >30%,
+ * because the execution does not go to the optimized loop
+ * for typical compressible data, and all of the preamble checks
+ * before going to the fall-back path become useless overhead.
+ * This optimization happens only with the -O3 flag, and -O2 generates
+ * a simple 8-byte copy loop.
+ * With gcc on ppc64le, all of the LZ4_decompress_* and LZ4_wildCopy8
+ * functions are annotated with __attribute__((optimize("O2"))),
+ * and also LZ4_wildCopy8 is forcibly inlined, so that the O2 attribute
+ * of LZ4_wildCopy8 does not affect the compression speed.
+ */
+#if defined(__PPC64__) && defined(__LITTLE_ENDIAN__) && defined(__GNUC__) && !defined(__clang__)
+# define LZ4_FORCE_O2 __attribute__((optimize("O2")))
+# undef LZ4_FORCE_INLINE
+# define LZ4_FORCE_INLINE static __inline __attribute__((optimize("O2"),always_inline))
+#else
+# define LZ4_FORCE_O2
+#endif
+
+#if (defined(__GNUC__) && (__GNUC__ >= 3)) || (defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 800)) || defined(__clang__)
+# define expect(expr,value) (__builtin_expect ((expr),(value)) )
+#else
+# define expect(expr,value) (expr)
+#endif
+
+#ifndef likely
+#define likely(expr) expect((expr) != 0, 1)
+#endif
+#ifndef unlikely
+#define unlikely(expr) expect((expr) != 0, 0)
+#endif
+
+/* Should the alignment test prove unreliable, for some reason,
+ * it can be disabled by setting LZ4_ALIGN_TEST to 0 */
+#ifndef LZ4_ALIGN_TEST /* can be externally provided */
+# define LZ4_ALIGN_TEST 1
+#endif
+
+
+/*-************************************
+* Memory routines
+**************************************/
+
+/*! LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION :
+ * Disable relatively high-level LZ4/HC functions that use dynamic memory
+ * allocation functions (malloc(), calloc(), free()).
+ *
+ * Note that this is a compile-time switch. And since it disables
+ * public/stable LZ4 v1 API functions, we don't recommend using this
+ * symbol to generate a library for distribution.
+ *
+ * The following public functions are removed when this symbol is defined.
+ * - lz4 : LZ4_createStream, LZ4_freeStream,
+ * LZ4_createStreamDecode, LZ4_freeStreamDecode, LZ4_create (deprecated)
+ * - lz4hc : LZ4_createStreamHC, LZ4_freeStreamHC,
+ * LZ4_createHC (deprecated), LZ4_freeHC (deprecated)
+ * - lz4frame, lz4file : All LZ4F_* functions
+ */
+#if defined(LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION)
+# define ALLOC(s) lz4_error_memory_allocation_is_disabled
+# define ALLOC_AND_ZERO(s) lz4_error_memory_allocation_is_disabled
+# define FREEMEM(p) lz4_error_memory_allocation_is_disabled
+#elif defined(LZ4_USER_MEMORY_FUNCTIONS)
+/* memory management functions can be customized by user project.
+ * Below functions must exist somewhere in the Project
+ * and be available at link time */
+void* LZ4_malloc(size_t s);
+void* LZ4_calloc(size_t n, size_t s);
+void LZ4_free(void* p);
+# define ALLOC(s) LZ4_malloc(s)
+# define ALLOC_AND_ZERO(s) LZ4_calloc(1,s)
+# define FREEMEM(p) LZ4_free(p)
+#else
+# include <stdlib.h> /* malloc, calloc, free */
+# define ALLOC(s) malloc(s)
+# define ALLOC_AND_ZERO(s) calloc(1,s)
+# define FREEMEM(p) free(p)
+#endif
+
+#if ! LZ4_FREESTANDING
+# include <string.h> /* memset, memcpy */
+#endif
+#if !defined(LZ4_memset)
+# define LZ4_memset(p,v,s) memset((p),(v),(s))
+#endif
+#define MEM_INIT(p,v,s) LZ4_memset((p),(v),(s))
+
+
+/*-************************************
+* Common Constants
+**************************************/
+#define MINMATCH 4
+
+#define WILDCOPYLENGTH 8
+#define LASTLITERALS 5 /* see ../doc/lz4_Block_format.md#parsing-restrictions */
+#define MFLIMIT 12 /* see ../doc/lz4_Block_format.md#parsing-restrictions */
+#define MATCH_SAFEGUARD_DISTANCE ((2*WILDCOPYLENGTH) - MINMATCH) /* ensure it's possible to write 2 x wildcopyLength without overflowing output buffer */
+#define FASTLOOP_SAFE_DISTANCE 64
+static const int LZ4_minLength = (MFLIMIT+1);
+
+#define KB *(1 <<10)
+#define MB *(1 <<20)
+#define GB *(1U<<30)
+
+#define LZ4_DISTANCE_ABSOLUTE_MAX 65535
+#if (LZ4_DISTANCE_MAX > LZ4_DISTANCE_ABSOLUTE_MAX) /* max supported by LZ4 format */
+# error "LZ4_DISTANCE_MAX is too big : must be <= 65535"
+#endif
+
+#define ML_BITS 4
+#define ML_MASK ((1U<<ML_BITS)-1)
+#define RUN_BITS (8-ML_BITS)
+#define RUN_MASK ((1U<<RUN_BITS)-1)
+
+
+/*-************************************
+* Error detection
+**************************************/
+#if defined(LZ4_DEBUG) && (LZ4_DEBUG>=1)
+# include <assert.h>
+#else
+# ifndef assert
+# define assert(condition) ((void)0)
+# endif
+#endif
+
+#define LZ4_STATIC_ASSERT(c) { enum { LZ4_static_assert = 1/(int)(!!(c)) }; } /* use after variable declarations */
+
+#if defined(LZ4_DEBUG) && (LZ4_DEBUG>=2)
+# include <stdio.h>
+ static int g_debuglog_enable = 1;
+# define DEBUGLOG(l, ...) { \
+ if ((g_debuglog_enable) && (l<=LZ4_DEBUG)) { \
+ fprintf(stderr, __FILE__ ": "); \
+ fprintf(stderr, __VA_ARGS__); \
+ fprintf(stderr, " \n"); \
+ } }
+#else
+# define DEBUGLOG(l, ...) {} /* disabled */
+#endif
+
+static int LZ4_isAligned(const void* ptr, size_t alignment)
+{
+ return ((size_t)ptr & (alignment -1)) == 0;
+}
+
+
+/*-************************************
+* Types
+**************************************/
+#include <limits.h>
+#if defined(__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+# include <stdint.h>
+ typedef uint8_t BYTE;
+ typedef uint16_t U16;
+ typedef uint32_t U32;
+ typedef int32_t S32;
+ typedef uint64_t U64;
+ typedef uintptr_t uptrval;
+#else
+# if UINT_MAX != 4294967295UL
+# error "LZ4 code (when not C++ or C99) assumes that sizeof(int) == 4"
+# endif
+ typedef unsigned char BYTE;
+ typedef unsigned short U16;
+ typedef unsigned int U32;
+ typedef signed int S32;
+ typedef unsigned long long U64;
+ typedef size_t uptrval; /* generally true, except OpenVMS-64 */
+#endif
+
+#if defined(__x86_64__)
+ typedef U64 reg_t; /* 64-bits in x32 mode */
+#else
+ typedef size_t reg_t; /* 32-bits in x32 mode */
+#endif
+
+typedef enum {
+ notLimited = 0,
+ limitedOutput = 1,
+ fillOutput = 2
+} limitedOutput_directive;
+
+
+/*-************************************
+* Reading and writing into memory
+**************************************/
+
+/**
+ * LZ4 relies on memcpy with a constant size being inlined. In freestanding
+ * environments, the compiler can't assume the implementation of memcpy() is
+ * standard compliant, so it can't apply its specialized memcpy() inlining
+ * logic. When possible, use __builtin_memcpy() to tell the compiler to analyze
+ * memcpy() as if it were standard compliant, so it can inline it in freestanding
+ * environments. This is needed when decompressing the Linux Kernel, for example.
+ */
+#if !defined(LZ4_memcpy)
+# if defined(__GNUC__) && (__GNUC__ >= 4)
+# define LZ4_memcpy(dst, src, size) __builtin_memcpy(dst, src, size)
+# else
+# define LZ4_memcpy(dst, src, size) memcpy(dst, src, size)
+# endif
+#endif
+
+#if !defined(LZ4_memmove)
+# if defined(__GNUC__) && (__GNUC__ >= 4)
+# define LZ4_memmove __builtin_memmove
+# else
+# define LZ4_memmove memmove
+# endif
+#endif
+
+static unsigned LZ4_isLittleEndian(void)
+{
+ const union { U32 u; BYTE c[4]; } one = { 1 }; /* don't use static : performance detrimental */
+ return one.c[0];
+}
+
+
+#if defined(LZ4_FORCE_MEMORY_ACCESS) && (LZ4_FORCE_MEMORY_ACCESS==2)
+/* lie to the compiler about data alignment; use with caution */
+
+static U16 LZ4_read16(const void* memPtr) { return *(const U16*) memPtr; }
+static U32 LZ4_read32(const void* memPtr) { return *(const U32*) memPtr; }
+static reg_t LZ4_read_ARCH(const void* memPtr) { return *(const reg_t*) memPtr; }
+
+static void LZ4_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; }
+static void LZ4_write32(void* memPtr, U32 value) { *(U32*)memPtr = value; }
+
+#elif defined(LZ4_FORCE_MEMORY_ACCESS) && (LZ4_FORCE_MEMORY_ACCESS==1)
+
+/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
+/* currently only defined for gcc and icc */
+typedef union { U16 u16; U32 u32; reg_t uArch; } __attribute__((packed)) LZ4_unalign;
+
+static U16 LZ4_read16(const void* ptr) { return ((const LZ4_unalign*)ptr)->u16; }
+static U32 LZ4_read32(const void* ptr) { return ((const LZ4_unalign*)ptr)->u32; }
+static reg_t LZ4_read_ARCH(const void* ptr) { return ((const LZ4_unalign*)ptr)->uArch; }
+
+static void LZ4_write16(void* memPtr, U16 value) { ((LZ4_unalign*)memPtr)->u16 = value; }
+static void LZ4_write32(void* memPtr, U32 value) { ((LZ4_unalign*)memPtr)->u32 = value; }
+
+#else /* safe and portable access using memcpy() */
+
+static U16 LZ4_read16(const void* memPtr)
+{
+ U16 val; LZ4_memcpy(&val, memPtr, sizeof(val)); return val;
+}
+
+static U32 LZ4_read32(const void* memPtr)
+{
+ U32 val; LZ4_memcpy(&val, memPtr, sizeof(val)); return val;
+}
+
+static reg_t LZ4_read_ARCH(const void* memPtr)
+{
+ reg_t val; LZ4_memcpy(&val, memPtr, sizeof(val)); return val;
+}
+
+static void LZ4_write16(void* memPtr, U16 value)
+{
+ LZ4_memcpy(memPtr, &value, sizeof(value));
+}
+
+static void LZ4_write32(void* memPtr, U32 value)
+{
+ LZ4_memcpy(memPtr, &value, sizeof(value));
+}
+
+#endif /* LZ4_FORCE_MEMORY_ACCESS */
+
+
+static U16 LZ4_readLE16(const void* memPtr)
+{
+ if (LZ4_isLittleEndian()) {
+ return LZ4_read16(memPtr);
+ } else {
+ const BYTE* p = (const BYTE*)memPtr;
+ return (U16)((U16)p[0] + (p[1]<<8));
+ }
+}
+
+static void LZ4_writeLE16(void* memPtr, U16 value)
+{
+ if (LZ4_isLittleEndian()) {
+ LZ4_write16(memPtr, value);
+ } else {
+ BYTE* p = (BYTE*)memPtr;
+ p[0] = (BYTE) value;
+ p[1] = (BYTE)(value>>8);
+ }
+}
+
+/* customized variant of memcpy, which can overwrite up to 8 bytes beyond dstEnd */
+LZ4_FORCE_INLINE
+void LZ4_wildCopy8(void* dstPtr, const void* srcPtr, void* dstEnd)
+{
+ BYTE* d = (BYTE*)dstPtr;
+ const BYTE* s = (const BYTE*)srcPtr;
+ BYTE* const e = (BYTE*)dstEnd;
+
+ do { LZ4_memcpy(d,s,8); d+=8; s+=8; } while (d<e);
+}
+
+static const unsigned inc32table[8] = {0, 1, 2, 1, 0, 4, 4, 4};
+static const int dec64table[8] = {0, 0, 0, -1, -4, 1, 2, 3};
+
+
+#ifndef LZ4_FAST_DEC_LOOP
+# if defined __i386__ || defined _M_IX86 || defined __x86_64__ || defined _M_X64
+# define LZ4_FAST_DEC_LOOP 1
+# elif defined(__aarch64__) && defined(__APPLE__)
+# define LZ4_FAST_DEC_LOOP 1
+# elif defined(__aarch64__) && !defined(__clang__)
+ /* On non-Apple aarch64, we disable this optimization for clang because
+ * on certain mobile chipsets, performance is reduced with clang. For
+ * more information refer to https://github.com/lz4/lz4/pull/707 */
+# define LZ4_FAST_DEC_LOOP 1
+# else
+# define LZ4_FAST_DEC_LOOP 0
+# endif
+#endif
+
+#if LZ4_FAST_DEC_LOOP
+
+LZ4_FORCE_INLINE void
+LZ4_memcpy_using_offset_base(BYTE* dstPtr, const BYTE* srcPtr, BYTE* dstEnd, const size_t offset)
+{
+ assert(srcPtr + offset == dstPtr);
+ if (offset < 8) {
+ LZ4_write32(dstPtr, 0); /* silence an msan warning when offset==0 */
+ dstPtr[0] = srcPtr[0];
+ dstPtr[1] = srcPtr[1];
+ dstPtr[2] = srcPtr[2];
+ dstPtr[3] = srcPtr[3];
+ srcPtr += inc32table[offset];
+ LZ4_memcpy(dstPtr+4, srcPtr, 4);
+ srcPtr -= dec64table[offset];
+ dstPtr += 8;
+ } else {
+ LZ4_memcpy(dstPtr, srcPtr, 8);
+ dstPtr += 8;
+ srcPtr += 8;
+ }
+
+ LZ4_wildCopy8(dstPtr, srcPtr, dstEnd);
+}
+
+/* customized variant of memcpy, which can overwrite up to 32 bytes beyond dstEnd
+ * this version copies two times 16 bytes (instead of one time 32 bytes)
+ * because it must be compatible with offsets >= 16. */
+LZ4_FORCE_INLINE void
+LZ4_wildCopy32(void* dstPtr, const void* srcPtr, void* dstEnd)
+{
+ BYTE* d = (BYTE*)dstPtr;
+ const BYTE* s = (const BYTE*)srcPtr;
+ BYTE* const e = (BYTE*)dstEnd;
+
+ do { LZ4_memcpy(d,s,16); LZ4_memcpy(d+16,s+16,16); d+=32; s+=32; } while (d<e);
+}
+
+/* LZ4_memcpy_using_offset() presumes :
+ * - dstEnd >= dstPtr + MINMATCH
+ * - there is at least 8 bytes available to write after dstEnd */
+LZ4_FORCE_INLINE void
+LZ4_memcpy_using_offset(BYTE* dstPtr, const BYTE* srcPtr, BYTE* dstEnd, const size_t offset)
+{
+ BYTE v[8];
+
+ assert(dstEnd >= dstPtr + MINMATCH);
+
+ switch(offset) {
+ case 1:
+ MEM_INIT(v, *srcPtr, 8);
+ break;
+ case 2:
+ LZ4_memcpy(v, srcPtr, 2);
+ LZ4_memcpy(&v[2], srcPtr, 2);
+#if defined(_MSC_VER) && (_MSC_VER <= 1933) /* MSVC 2022 ver 17.3 or earlier */
+# pragma warning(push)
+# pragma warning(disable : 6385) /* warning C6385: Reading invalid data from 'v'. */
+#endif
+ LZ4_memcpy(&v[4], v, 4);
+#if defined(_MSC_VER) && (_MSC_VER <= 1933) /* MSVC 2022 ver 17.3 or earlier */
+# pragma warning(pop)
+#endif
+ break;
+ case 4:
+ LZ4_memcpy(v, srcPtr, 4);
+ LZ4_memcpy(&v[4], srcPtr, 4);
+ break;
+ default:
+ LZ4_memcpy_using_offset_base(dstPtr, srcPtr, dstEnd, offset);
+ return;
+ }
+
+ LZ4_memcpy(dstPtr, v, 8);
+ dstPtr += 8;
+ while (dstPtr < dstEnd) {
+ LZ4_memcpy(dstPtr, v, 8);
+ dstPtr += 8;
+ }
+}
+#endif
+
+
+/*-************************************
+* Common functions
+**************************************/
+static unsigned LZ4_NbCommonBytes (reg_t val)
+{
+ assert(val != 0);
+ if (LZ4_isLittleEndian()) {
+ if (sizeof(val) == 8) {
+# if defined(_MSC_VER) && (_MSC_VER >= 1800) && (defined(_M_AMD64) && !defined(_M_ARM64EC)) && !defined(LZ4_FORCE_SW_BITCOUNT)
+/*-*************************************************************************************************
+* ARM64EC is a Microsoft-designed ARM64 ABI compatible with AMD64 applications on ARM64 Windows 11.
+* The ARM64EC ABI does not support AVX/AVX2/AVX512 instructions, nor their relevant intrinsics
+* including _tzcnt_u64. Therefore, we need to neuter the _tzcnt_u64 code path for ARM64EC.
+****************************************************************************************************/
+# if defined(__clang__) && (__clang_major__ < 10)
+ /* Avoid undefined clang-cl intrinsics issue.
+ * See https://github.com/lz4/lz4/pull/1017 for details. */
+ return (unsigned)__builtin_ia32_tzcnt_u64(val) >> 3;
+# else
+ /* x64 CPUS without BMI support interpret `TZCNT` as `REP BSF` */
+ return (unsigned)_tzcnt_u64(val) >> 3;
+# endif
+# elif defined(_MSC_VER) && defined(_WIN64) && !defined(LZ4_FORCE_SW_BITCOUNT)
+ unsigned long r = 0;
+ _BitScanForward64(&r, (U64)val);
+ return (unsigned)r >> 3;
+# elif (defined(__clang__) || (defined(__GNUC__) && ((__GNUC__ > 3) || \
+ ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))) && \
+ !defined(LZ4_FORCE_SW_BITCOUNT)
+ return (unsigned)__builtin_ctzll((U64)val) >> 3;
+# else
+ const U64 m = 0x0101010101010101ULL;
+ val ^= val - 1;
+ return (unsigned)(((U64)((val & (m - 1)) * m)) >> 56);
+# endif
+ } else /* 32 bits */ {
+# if defined(_MSC_VER) && (_MSC_VER >= 1400) && !defined(LZ4_FORCE_SW_BITCOUNT)
+ unsigned long r;
+ _BitScanForward(&r, (U32)val);
+ return (unsigned)r >> 3;
+# elif (defined(__clang__) || (defined(__GNUC__) && ((__GNUC__ > 3) || \
+ ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))) && \
+ !defined(__TINYC__) && !defined(LZ4_FORCE_SW_BITCOUNT)
+ return (unsigned)__builtin_ctz((U32)val) >> 3;
+# else
+ const U32 m = 0x01010101;
+ return (unsigned)((((val - 1) ^ val) & (m - 1)) * m) >> 24;
+# endif
+ }
+ } else /* Big Endian CPU */ {
+ if (sizeof(val)==8) {
+# if (defined(__clang__) || (defined(__GNUC__) && ((__GNUC__ > 3) || \
+ ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))) && \
+ !defined(__TINYC__) && !defined(LZ4_FORCE_SW_BITCOUNT)
+ return (unsigned)__builtin_clzll((U64)val) >> 3;
+# else
+#if 1
+ /* this method is probably faster,
+ * but adds a 128 bytes lookup table */
+ static const unsigned char ctz7_tab[128] = {
+ 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+ 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+ 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+ 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+ 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+ 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+ 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+ 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+ };
+ U64 const mask = 0x0101010101010101ULL;
+ U64 const t = (((val >> 8) - mask) | val) & mask;
+ return ctz7_tab[(t * 0x0080402010080402ULL) >> 57];
+#else
+ /* this method doesn't consume memory space like the previous one,
+ * but it contains several branches,
+ * that may end up slowing execution */
+ static const U32 by32 = sizeof(val)*4; /* 32 on 64 bits (goal), 16 on 32 bits.
+ Just to avoid some static analyzer complaining about shift by 32 on 32-bits target.
+ Note that this code path is never triggered in 32-bits mode. */
+ unsigned r;
+ if (!(val>>by32)) { r=4; } else { r=0; val>>=by32; }
+ if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; }
+ r += (!val);
+ return r;
+#endif
+# endif
+ } else /* 32 bits */ {
+# if (defined(__clang__) || (defined(__GNUC__) && ((__GNUC__ > 3) || \
+ ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))) && \
+ !defined(LZ4_FORCE_SW_BITCOUNT)
+ return (unsigned)__builtin_clz((U32)val) >> 3;
+# else
+ val >>= 8;
+ val = ((((val + 0x00FFFF00) | 0x00FFFFFF) + val) |
+ (val + 0x00FF0000)) >> 24;
+ return (unsigned)val ^ 3;
+# endif
+ }
+ }
+}
+
+
+#define STEPSIZE sizeof(reg_t)
+LZ4_FORCE_INLINE
+unsigned LZ4_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* pInLimit)
+{
+ const BYTE* const pStart = pIn;
+
+ if (likely(pIn < pInLimit-(STEPSIZE-1))) {
+ reg_t const diff = LZ4_read_ARCH(pMatch) ^ LZ4_read_ARCH(pIn);
+ if (!diff) {
+ pIn+=STEPSIZE; pMatch+=STEPSIZE;
+ } else {
+ return LZ4_NbCommonBytes(diff);
+ } }
+
+ while (likely(pIn < pInLimit-(STEPSIZE-1))) {
+ reg_t const diff = LZ4_read_ARCH(pMatch) ^ LZ4_read_ARCH(pIn);
+ if (!diff) { pIn+=STEPSIZE; pMatch+=STEPSIZE; continue; }
+ pIn += LZ4_NbCommonBytes(diff);
+ return (unsigned)(pIn - pStart);
+ }
+
+ if ((STEPSIZE==8) && (pIn<(pInLimit-3)) && (LZ4_read32(pMatch) == LZ4_read32(pIn))) { pIn+=4; pMatch+=4; }
+ if ((pIn<(pInLimit-1)) && (LZ4_read16(pMatch) == LZ4_read16(pIn))) { pIn+=2; pMatch+=2; }
+ if ((pIn<pInLimit) && (*pMatch == *pIn)) pIn++;
+ return (unsigned)(pIn - pStart);
+}
+
+
+#ifndef LZ4_COMMONDEFS_ONLY
+/*-************************************
+* Local Constants
+**************************************/
+static const int LZ4_64Klimit = ((64 KB) + (MFLIMIT-1));
+static const U32 LZ4_skipTrigger = 6; /* Increase this value ==> compression run slower on incompressible data */
+
+
+/*-************************************
+* Local Structures and types
+**************************************/
+typedef enum { clearedTable = 0, byPtr, byU32, byU16 } tableType_t;
+
+/**
+ * This enum distinguishes several different modes of accessing previous
+ * content in the stream.
+ *
+ * - noDict : There is no preceding content.
+ * - withPrefix64k : Table entries up to ctx->dictSize before the current blob
+ * blob being compressed are valid and refer to the preceding
+ * content (of length ctx->dictSize), which is available
+ * contiguously preceding in memory the content currently
+ * being compressed.
+ * - usingExtDict : Like withPrefix64k, but the preceding content is somewhere
+ * else in memory, starting at ctx->dictionary with length
+ * ctx->dictSize.
+ * - usingDictCtx : Everything concerning the preceding content is
+ * in a separate context, pointed to by ctx->dictCtx.
+ * ctx->dictionary, ctx->dictSize, and table entries
+ * in the current context that refer to positions
+ * preceding the beginning of the current compression are
+ * ignored. Instead, ctx->dictCtx->dictionary and ctx->dictCtx
+ * ->dictSize describe the location and size of the preceding
+ * content, and matches are found by looking in the ctx
+ * ->dictCtx->hashTable.
+ */
+typedef enum { noDict = 0, withPrefix64k, usingExtDict, usingDictCtx } dict_directive;
+typedef enum { noDictIssue = 0, dictSmall } dictIssue_directive;
+
+
+/*-************************************
+* Local Utils
+**************************************/
+int LZ4_versionNumber (void) { return LZ4_VERSION_NUMBER; }
+const char* LZ4_versionString(void) { return LZ4_VERSION_STRING; }
+int LZ4_compressBound(int isize) { return LZ4_COMPRESSBOUND(isize); }
+int LZ4_sizeofState(void) { return sizeof(LZ4_stream_t); }
+
+
+/*-****************************************
+* Internal Definitions, used only in Tests
+*******************************************/
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+int LZ4_compress_forceExtDict (LZ4_stream_t* LZ4_dict, const char* source, char* dest, int srcSize);
+
+int LZ4_decompress_safe_forceExtDict(const char* source, char* dest,
+ int compressedSize, int maxOutputSize,
+ const void* dictStart, size_t dictSize);
+int LZ4_decompress_safe_partial_forceExtDict(const char* source, char* dest,
+ int compressedSize, int targetOutputSize, int dstCapacity,
+ const void* dictStart, size_t dictSize);
+#if defined (__cplusplus)
+}
+#endif
+
+/*-******************************
+* Compression functions
+********************************/
+LZ4_FORCE_INLINE U32 LZ4_hash4(U32 sequence, tableType_t const tableType)
+{
+ if (tableType == byU16)
+ return ((sequence * 2654435761U) >> ((MINMATCH*8)-(LZ4_HASHLOG+1)));
+ else
+ return ((sequence * 2654435761U) >> ((MINMATCH*8)-LZ4_HASHLOG));
+}
+
+LZ4_FORCE_INLINE U32 LZ4_hash5(U64 sequence, tableType_t const tableType)
+{
+ const U32 hashLog = (tableType == byU16) ? LZ4_HASHLOG+1 : LZ4_HASHLOG;
+ if (LZ4_isLittleEndian()) {
+ const U64 prime5bytes = 889523592379ULL;
+ return (U32)(((sequence << 24) * prime5bytes) >> (64 - hashLog));
+ } else {
+ const U64 prime8bytes = 11400714785074694791ULL;
+ return (U32)(((sequence >> 24) * prime8bytes) >> (64 - hashLog));
+ }
+}
+
+LZ4_FORCE_INLINE U32 LZ4_hashPosition(const void* const p, tableType_t const tableType)
+{
+ if ((sizeof(reg_t)==8) && (tableType != byU16)) return LZ4_hash5(LZ4_read_ARCH(p), tableType);
+ return LZ4_hash4(LZ4_read32(p), tableType);
+}
+
+LZ4_FORCE_INLINE void LZ4_clearHash(U32 h, void* tableBase, tableType_t const tableType)
+{
+ switch (tableType)
+ {
+ default: /* fallthrough */
+ case clearedTable: { /* illegal! */ assert(0); return; }
+ case byPtr: { const BYTE** hashTable = (const BYTE**)tableBase; hashTable[h] = NULL; return; }
+ case byU32: { U32* hashTable = (U32*) tableBase; hashTable[h] = 0; return; }
+ case byU16: { U16* hashTable = (U16*) tableBase; hashTable[h] = 0; return; }
+ }
+}
+
+LZ4_FORCE_INLINE void LZ4_putIndexOnHash(U32 idx, U32 h, void* tableBase, tableType_t const tableType)
+{
+ switch (tableType)
+ {
+ default: /* fallthrough */
+ case clearedTable: /* fallthrough */
+ case byPtr: { /* illegal! */ assert(0); return; }
+ case byU32: { U32* hashTable = (U32*) tableBase; hashTable[h] = idx; return; }
+ case byU16: { U16* hashTable = (U16*) tableBase; assert(idx < 65536); hashTable[h] = (U16)idx; return; }
+ }
+}
+
+LZ4_FORCE_INLINE void LZ4_putPositionOnHash(const BYTE* p, U32 h,
+ void* tableBase, tableType_t const tableType,
+ const BYTE* srcBase)
+{
+ switch (tableType)
+ {
+ case clearedTable: { /* illegal! */ assert(0); return; }
+ case byPtr: { const BYTE** hashTable = (const BYTE**)tableBase; hashTable[h] = p; return; }
+ case byU32: { U32* hashTable = (U32*) tableBase; hashTable[h] = (U32)(p-srcBase); return; }
+ case byU16: { U16* hashTable = (U16*) tableBase; hashTable[h] = (U16)(p-srcBase); return; }
+ }
+}
+
+LZ4_FORCE_INLINE void LZ4_putPosition(const BYTE* p, void* tableBase, tableType_t tableType, const BYTE* srcBase)
+{
+ U32 const h = LZ4_hashPosition(p, tableType);
+ LZ4_putPositionOnHash(p, h, tableBase, tableType, srcBase);
+}
+
+/* LZ4_getIndexOnHash() :
+ * Index of match position registered in hash table.
+ * hash position must be calculated by using base+index, or dictBase+index.
+ * Assumption 1 : only valid if tableType == byU32 or byU16.
+ * Assumption 2 : h is presumed valid (within limits of hash table)
+ */
+LZ4_FORCE_INLINE U32 LZ4_getIndexOnHash(U32 h, const void* tableBase, tableType_t tableType)
+{
+ LZ4_STATIC_ASSERT(LZ4_MEMORY_USAGE > 2);
+ if (tableType == byU32) {
+ const U32* const hashTable = (const U32*) tableBase;
+ assert(h < (1U << (LZ4_MEMORY_USAGE-2)));
+ return hashTable[h];
+ }
+ if (tableType == byU16) {
+ const U16* const hashTable = (const U16*) tableBase;
+ assert(h < (1U << (LZ4_MEMORY_USAGE-1)));
+ return hashTable[h];
+ }
+ assert(0); return 0; /* forbidden case */
+}
+
+static const BYTE* LZ4_getPositionOnHash(U32 h, const void* tableBase, tableType_t tableType, const BYTE* srcBase)
+{
+ if (tableType == byPtr) { const BYTE* const* hashTable = (const BYTE* const*) tableBase; return hashTable[h]; }
+ if (tableType == byU32) { const U32* const hashTable = (const U32*) tableBase; return hashTable[h] + srcBase; }
+ { const U16* const hashTable = (const U16*) tableBase; return hashTable[h] + srcBase; } /* default, to ensure a return */
+}
+
+LZ4_FORCE_INLINE const BYTE*
+LZ4_getPosition(const BYTE* p,
+ const void* tableBase, tableType_t tableType,
+ const BYTE* srcBase)
+{
+ U32 const h = LZ4_hashPosition(p, tableType);
+ return LZ4_getPositionOnHash(h, tableBase, tableType, srcBase);
+}
+
+LZ4_FORCE_INLINE void
+LZ4_prepareTable(LZ4_stream_t_internal* const cctx,
+ const int inputSize,
+ const tableType_t tableType) {
+ /* If the table hasn't been used, it's guaranteed to be zeroed out, and is
+ * therefore safe to use no matter what mode we're in. Otherwise, we figure
+ * out if it's safe to leave as is or whether it needs to be reset.
+ */
+ if ((tableType_t)cctx->tableType != clearedTable) {
+ assert(inputSize >= 0);
+ if ((tableType_t)cctx->tableType != tableType
+ || ((tableType == byU16) && cctx->currentOffset + (unsigned)inputSize >= 0xFFFFU)
+ || ((tableType == byU32) && cctx->currentOffset > 1 GB)
+ || tableType == byPtr
+ || inputSize >= 4 KB)
+ {
+ DEBUGLOG(4, "LZ4_prepareTable: Resetting table in %p", cctx);
+ MEM_INIT(cctx->hashTable, 0, LZ4_HASHTABLESIZE);
+ cctx->currentOffset = 0;
+ cctx->tableType = (U32)clearedTable;
+ } else {
+ DEBUGLOG(4, "LZ4_prepareTable: Re-use hash table (no reset)");
+ }
+ }
+
+ /* Adding a gap, so all previous entries are > LZ4_DISTANCE_MAX back,
+ * is faster than compressing without a gap.
+ * However, compressing with currentOffset == 0 is faster still,
+ * so we preserve that case.
+ */
+ if (cctx->currentOffset != 0 && tableType == byU32) {
+ DEBUGLOG(5, "LZ4_prepareTable: adding 64KB to currentOffset");
+ cctx->currentOffset += 64 KB;
+ }
+
+ /* Finally, clear history */
+ cctx->dictCtx = NULL;
+ cctx->dictionary = NULL;
+ cctx->dictSize = 0;
+}
+
+/** LZ4_compress_generic() :
+ * inlined, to ensure branches are decided at compilation time.
+ * Presumed already validated at this stage:
+ * - source != NULL
+ * - inputSize > 0
+ */
+LZ4_FORCE_INLINE int LZ4_compress_generic_validated(
+ LZ4_stream_t_internal* const cctx,
+ const char* const source,
+ char* const dest,
+ const int inputSize,
+ int* inputConsumed, /* only written when outputDirective == fillOutput */
+ const int maxOutputSize,
+ const limitedOutput_directive outputDirective,
+ const tableType_t tableType,
+ const dict_directive dictDirective,
+ const dictIssue_directive dictIssue,
+ const int acceleration)
+{
+ int result;
+ const BYTE* ip = (const BYTE*) source;
+
+ U32 const startIndex = cctx->currentOffset;
+ const BYTE* base = (const BYTE*) source - startIndex;
+ const BYTE* lowLimit;
+
+ const LZ4_stream_t_internal* dictCtx = (const LZ4_stream_t_internal*) cctx->dictCtx;
+ const BYTE* const dictionary =
+ dictDirective == usingDictCtx ? dictCtx->dictionary : cctx->dictionary;
+ const U32 dictSize =
+ dictDirective == usingDictCtx ? dictCtx->dictSize : cctx->dictSize;
+ const U32 dictDelta = (dictDirective == usingDictCtx) ? startIndex - dictCtx->currentOffset : 0; /* make indexes in dictCtx comparable with index in current context */
+
+ int const maybe_extMem = (dictDirective == usingExtDict) || (dictDirective == usingDictCtx);
+ U32 const prefixIdxLimit = startIndex - dictSize; /* used when dictDirective == dictSmall */
+ const BYTE* const dictEnd = dictionary ? dictionary + dictSize : dictionary;
+ const BYTE* anchor = (const BYTE*) source;
+ const BYTE* const iend = ip + inputSize;
+ const BYTE* const mflimitPlusOne = iend - MFLIMIT + 1;
+ const BYTE* const matchlimit = iend - LASTLITERALS;
+
+ /* the dictCtx currentOffset is indexed on the start of the dictionary,
+ * while a dictionary in the current context precedes the currentOffset */
+ const BYTE* dictBase = (dictionary == NULL) ? NULL :
+ (dictDirective == usingDictCtx) ?
+ dictionary + dictSize - dictCtx->currentOffset :
+ dictionary + dictSize - startIndex;
+
+ BYTE* op = (BYTE*) dest;
+ BYTE* const olimit = op + maxOutputSize;
+
+ U32 offset = 0;
+ U32 forwardH;
+
+ DEBUGLOG(5, "LZ4_compress_generic_validated: srcSize=%i, tableType=%u", inputSize, tableType);
+ assert(ip != NULL);
+ /* If init conditions are not met, we don't have to mark stream
+ * as having dirty context, since no action was taken yet */
+ if (outputDirective == fillOutput && maxOutputSize < 1) { return 0; } /* Impossible to store anything */
+ if ((tableType == byU16) && (inputSize>=LZ4_64Klimit)) { return 0; } /* Size too large (not within 64K limit) */
+ if (tableType==byPtr) assert(dictDirective==noDict); /* only supported use case with byPtr */
+ assert(acceleration >= 1);
+
+ lowLimit = (const BYTE*)source - (dictDirective == withPrefix64k ? dictSize : 0);
+
+ /* Update context state */
+ if (dictDirective == usingDictCtx) {
+ /* Subsequent linked blocks can't use the dictionary. */
+ /* Instead, they use the block we just compressed. */
+ cctx->dictCtx = NULL;
+ cctx->dictSize = (U32)inputSize;
+ } else {
+ cctx->dictSize += (U32)inputSize;
+ }
+ cctx->currentOffset += (U32)inputSize;
+ cctx->tableType = (U32)tableType;
+
+ if (inputSize<LZ4_minLength) goto _last_literals; /* Input too small, no compression (all literals) */
+
+ /* First Byte */
+ LZ4_putPosition(ip, cctx->hashTable, tableType, base);
+ ip++; forwardH = LZ4_hashPosition(ip, tableType);
+
+ /* Main Loop */
+ for ( ; ; ) {
+ const BYTE* match;
+ BYTE* token;
+ const BYTE* filledIp;
+
+ /* Find a match */
+ if (tableType == byPtr) {
+ const BYTE* forwardIp = ip;
+ int step = 1;
+ int searchMatchNb = acceleration << LZ4_skipTrigger;
+ do {
+ U32 const h = forwardH;
+ ip = forwardIp;
+ forwardIp += step;
+ step = (searchMatchNb++ >> LZ4_skipTrigger);
+
+ if (unlikely(forwardIp > mflimitPlusOne)) goto _last_literals;
+ assert(ip < mflimitPlusOne);
+
+ match = LZ4_getPositionOnHash(h, cctx->hashTable, tableType, base);
+ forwardH = LZ4_hashPosition(forwardIp, tableType);
+ LZ4_putPositionOnHash(ip, h, cctx->hashTable, tableType, base);
+
+ } while ( (match+LZ4_DISTANCE_MAX < ip)
+ || (LZ4_read32(match) != LZ4_read32(ip)) );
+
+ } else { /* byU32, byU16 */
+
+ const BYTE* forwardIp = ip;
+ int step = 1;
+ int searchMatchNb = acceleration << LZ4_skipTrigger;
+ do {
+ U32 const h = forwardH;
+ U32 const current = (U32)(forwardIp - base);
+ U32 matchIndex = LZ4_getIndexOnHash(h, cctx->hashTable, tableType);
+ assert(matchIndex <= current);
+ assert(forwardIp - base < (ptrdiff_t)(2 GB - 1));
+ ip = forwardIp;
+ forwardIp += step;
+ step = (searchMatchNb++ >> LZ4_skipTrigger);
+
+ if (unlikely(forwardIp > mflimitPlusOne)) goto _last_literals;
+ assert(ip < mflimitPlusOne);
+
+ if (dictDirective == usingDictCtx) {
+ if (matchIndex < startIndex) {
+ /* there was no match, try the dictionary */
+ assert(tableType == byU32);
+ matchIndex = LZ4_getIndexOnHash(h, dictCtx->hashTable, byU32);
+ match = dictBase + matchIndex;
+ matchIndex += dictDelta; /* make dictCtx index comparable with current context */
+ lowLimit = dictionary;
+ } else {
+ match = base + matchIndex;
+ lowLimit = (const BYTE*)source;
+ }
+ } else if (dictDirective == usingExtDict) {
+ if (matchIndex < startIndex) {
+ DEBUGLOG(7, "extDict candidate: matchIndex=%5u < startIndex=%5u", matchIndex, startIndex);
+ assert(startIndex - matchIndex >= MINMATCH);
+ assert(dictBase);
+ match = dictBase + matchIndex;
+ lowLimit = dictionary;
+ } else {
+ match = base + matchIndex;
+ lowLimit = (const BYTE*)source;
+ }
+ } else { /* single continuous memory segment */
+ match = base + matchIndex;
+ }
+ forwardH = LZ4_hashPosition(forwardIp, tableType);
+ LZ4_putIndexOnHash(current, h, cctx->hashTable, tableType);
+
+ DEBUGLOG(7, "candidate at pos=%u (offset=%u \n", matchIndex, current - matchIndex);
+ if ((dictIssue == dictSmall) && (matchIndex < prefixIdxLimit)) { continue; } /* match outside of valid area */
+ assert(matchIndex < current);
+ if ( ((tableType != byU16) || (LZ4_DISTANCE_MAX < LZ4_DISTANCE_ABSOLUTE_MAX))
+ && (matchIndex+LZ4_DISTANCE_MAX < current)) {
+ continue;
+ } /* too far */
+ assert((current - matchIndex) <= LZ4_DISTANCE_MAX); /* match now expected within distance */
+
+ if (LZ4_read32(match) == LZ4_read32(ip)) {
+ if (maybe_extMem) offset = current - matchIndex;
+ break; /* match found */
+ }
+
+ } while(1);
+ }
+
+ /* Catch up */
+ filledIp = ip;
+ while (((ip>anchor) & (match > lowLimit)) && (unlikely(ip[-1]==match[-1]))) { ip--; match--; }
+
+ /* Encode Literals */
+ { unsigned const litLength = (unsigned)(ip - anchor);
+ token = op++;
+ if ((outputDirective == limitedOutput) && /* Check output buffer overflow */
+ (unlikely(op + litLength + (2 + 1 + LASTLITERALS) + (litLength/255) > olimit)) ) {
+ return 0; /* cannot compress within `dst` budget. Stored indexes in hash table are nonetheless fine */
+ }
+ if ((outputDirective == fillOutput) &&
+ (unlikely(op + (litLength+240)/255 /* litlen */ + litLength /* literals */ + 2 /* offset */ + 1 /* token */ + MFLIMIT - MINMATCH /* min last literals so last match is <= end - MFLIMIT */ > olimit))) {
+ op--;
+ goto _last_literals;
+ }
+ if (litLength >= RUN_MASK) {
+ int len = (int)(litLength - RUN_MASK);
+ *token = (RUN_MASK<<ML_BITS);
+ for(; len >= 255 ; len-=255) *op++ = 255;
+ *op++ = (BYTE)len;
+ }
+ else *token = (BYTE)(litLength<<ML_BITS);
+
+ /* Copy Literals */
+ LZ4_wildCopy8(op, anchor, op+litLength);
+ op+=litLength;
+ DEBUGLOG(6, "seq.start:%i, literals=%u, match.start:%i",
+ (int)(anchor-(const BYTE*)source), litLength, (int)(ip-(const BYTE*)source));
+ }
+
+_next_match:
+ /* at this stage, the following variables must be correctly set :
+ * - ip : at start of LZ operation
+ * - match : at start of previous pattern occurrence; can be within current prefix, or within extDict
+ * - offset : if maybe_ext_memSegment==1 (constant)
+ * - lowLimit : must be == dictionary to mean "match is within extDict"; must be == source otherwise
+ * - token and *token : position to write 4-bits for match length; higher 4-bits for literal length supposed already written
+ */
+
+ if ((outputDirective == fillOutput) &&
+ (op + 2 /* offset */ + 1 /* token */ + MFLIMIT - MINMATCH /* min last literals so last match is <= end - MFLIMIT */ > olimit)) {
+ /* the match was too close to the end, rewind and go to last literals */
+ op = token;
+ goto _last_literals;
+ }
+
+ /* Encode Offset */
+ if (maybe_extMem) { /* static test */
+ DEBUGLOG(6, " with offset=%u (ext if > %i)", offset, (int)(ip - (const BYTE*)source));
+ assert(offset <= LZ4_DISTANCE_MAX && offset > 0);
+ LZ4_writeLE16(op, (U16)offset); op+=2;
+ } else {
+ DEBUGLOG(6, " with offset=%u (same segment)", (U32)(ip - match));
+ assert(ip-match <= LZ4_DISTANCE_MAX);
+ LZ4_writeLE16(op, (U16)(ip - match)); op+=2;
+ }
+
+ /* Encode MatchLength */
+ { unsigned matchCode;
+
+ if ( (dictDirective==usingExtDict || dictDirective==usingDictCtx)
+ && (lowLimit==dictionary) /* match within extDict */ ) {
+ const BYTE* limit = ip + (dictEnd-match);
+ assert(dictEnd > match);
+ if (limit > matchlimit) limit = matchlimit;
+ matchCode = LZ4_count(ip+MINMATCH, match+MINMATCH, limit);
+ ip += (size_t)matchCode + MINMATCH;
+ if (ip==limit) {
+ unsigned const more = LZ4_count(limit, (const BYTE*)source, matchlimit);
+ matchCode += more;
+ ip += more;
+ }
+ DEBUGLOG(6, " with matchLength=%u starting in extDict", matchCode+MINMATCH);
+ } else {
+ matchCode = LZ4_count(ip+MINMATCH, match+MINMATCH, matchlimit);
+ ip += (size_t)matchCode + MINMATCH;
+ DEBUGLOG(6, " with matchLength=%u", matchCode+MINMATCH);
+ }
+
+ if ((outputDirective) && /* Check output buffer overflow */
+ (unlikely(op + (1 + LASTLITERALS) + (matchCode+240)/255 > olimit)) ) {
+ if (outputDirective == fillOutput) {
+ /* Match description too long : reduce it */
+ U32 newMatchCode = 15 /* in token */ - 1 /* to avoid needing a zero byte */ + ((U32)(olimit - op) - 1 - LASTLITERALS) * 255;
+ ip -= matchCode - newMatchCode;
+ assert(newMatchCode < matchCode);
+ matchCode = newMatchCode;
+ if (unlikely(ip <= filledIp)) {
+ /* We have already filled up to filledIp so if ip ends up less than filledIp
+ * we have positions in the hash table beyond the current position. This is
+ * a problem if we reuse the hash table. So we have to remove these positions
+ * from the hash table.
+ */
+ const BYTE* ptr;
+ DEBUGLOG(5, "Clearing %u positions", (U32)(filledIp - ip));
+ for (ptr = ip; ptr <= filledIp; ++ptr) {
+ U32 const h = LZ4_hashPosition(ptr, tableType);
+ LZ4_clearHash(h, cctx->hashTable, tableType);
+ }
+ }
+ } else {
+ assert(outputDirective == limitedOutput);
+ return 0; /* cannot compress within `dst` budget. Stored indexes in hash table are nonetheless fine */
+ }
+ }
+ if (matchCode >= ML_MASK) {
+ *token += ML_MASK;
+ matchCode -= ML_MASK;
+ LZ4_write32(op, 0xFFFFFFFF);
+ while (matchCode >= 4*255) {
+ op+=4;
+ LZ4_write32(op, 0xFFFFFFFF);
+ matchCode -= 4*255;
+ }
+ op += matchCode / 255;
+ *op++ = (BYTE)(matchCode % 255);
+ } else
+ *token += (BYTE)(matchCode);
+ }
+ /* Ensure we have enough space for the last literals. */
+ assert(!(outputDirective == fillOutput && op + 1 + LASTLITERALS > olimit));
+
+ anchor = ip;
+
+ /* Test end of chunk */
+ if (ip >= mflimitPlusOne) break;
+
+ /* Fill table */
+ LZ4_putPosition(ip-2, cctx->hashTable, tableType, base);
+
+ /* Test next position */
+ if (tableType == byPtr) {
+
+ match = LZ4_getPosition(ip, cctx->hashTable, tableType, base);
+ LZ4_putPosition(ip, cctx->hashTable, tableType, base);
+ if ( (match+LZ4_DISTANCE_MAX >= ip)
+ && (LZ4_read32(match) == LZ4_read32(ip)) )
+ { token=op++; *token=0; goto _next_match; }
+
+ } else { /* byU32, byU16 */
+
+ U32 const h = LZ4_hashPosition(ip, tableType);
+ U32 const current = (U32)(ip-base);
+ U32 matchIndex = LZ4_getIndexOnHash(h, cctx->hashTable, tableType);
+ assert(matchIndex < current);
+ if (dictDirective == usingDictCtx) {
+ if (matchIndex < startIndex) {
+ /* there was no match, try the dictionary */
+ matchIndex = LZ4_getIndexOnHash(h, dictCtx->hashTable, byU32);
+ match = dictBase + matchIndex;
+ lowLimit = dictionary; /* required for match length counter */
+ matchIndex += dictDelta;
+ } else {
+ match = base + matchIndex;
+ lowLimit = (const BYTE*)source; /* required for match length counter */
+ }
+ } else if (dictDirective==usingExtDict) {
+ if (matchIndex < startIndex) {
+ assert(dictBase);
+ match = dictBase + matchIndex;
+ lowLimit = dictionary; /* required for match length counter */
+ } else {
+ match = base + matchIndex;
+ lowLimit = (const BYTE*)source; /* required for match length counter */
+ }
+ } else { /* single memory segment */
+ match = base + matchIndex;
+ }
+ LZ4_putIndexOnHash(current, h, cctx->hashTable, tableType);
+ assert(matchIndex < current);
+ if ( ((dictIssue==dictSmall) ? (matchIndex >= prefixIdxLimit) : 1)
+ && (((tableType==byU16) && (LZ4_DISTANCE_MAX == LZ4_DISTANCE_ABSOLUTE_MAX)) ? 1 : (matchIndex+LZ4_DISTANCE_MAX >= current))
+ && (LZ4_read32(match) == LZ4_read32(ip)) ) {
+ token=op++;
+ *token=0;
+ if (maybe_extMem) offset = current - matchIndex;
+ DEBUGLOG(6, "seq.start:%i, literals=%u, match.start:%i",
+ (int)(anchor-(const BYTE*)source), 0, (int)(ip-(const BYTE*)source));
+ goto _next_match;
+ }
+ }
+
+ /* Prepare next loop */
+ forwardH = LZ4_hashPosition(++ip, tableType);
+
+ }
+
+_last_literals:
+ /* Encode Last Literals */
+ { size_t lastRun = (size_t)(iend - anchor);
+ if ( (outputDirective) && /* Check output buffer overflow */
+ (op + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > olimit)) {
+ if (outputDirective == fillOutput) {
+ /* adapt lastRun to fill 'dst' */
+ assert(olimit >= op);
+ lastRun = (size_t)(olimit-op) - 1/*token*/;
+ lastRun -= (lastRun + 256 - RUN_MASK) / 256; /*additional length tokens*/
+ } else {
+ assert(outputDirective == limitedOutput);
+ return 0; /* cannot compress within `dst` budget. Stored indexes in hash table are nonetheless fine */
+ }
+ }
+ DEBUGLOG(6, "Final literal run : %i literals", (int)lastRun);
+ if (lastRun >= RUN_MASK) {
+ size_t accumulator = lastRun - RUN_MASK;
+ *op++ = RUN_MASK << ML_BITS;
+ for(; accumulator >= 255 ; accumulator-=255) *op++ = 255;
+ *op++ = (BYTE) accumulator;
+ } else {
+ *op++ = (BYTE)(lastRun<<ML_BITS);
+ }
+ LZ4_memcpy(op, anchor, lastRun);
+ ip = anchor + lastRun;
+ op += lastRun;
+ }
+
+ if (outputDirective == fillOutput) {
+ *inputConsumed = (int) (((const char*)ip)-source);
+ }
+ result = (int)(((char*)op) - dest);
+ assert(result > 0);
+ DEBUGLOG(5, "LZ4_compress_generic: compressed %i bytes into %i bytes", inputSize, result);
+ return result;
+}
+
+/** LZ4_compress_generic() :
+ * inlined, to ensure branches are decided at compilation time;
+ * takes care of src == (NULL, 0)
+ * and forward the rest to LZ4_compress_generic_validated */
+LZ4_FORCE_INLINE int LZ4_compress_generic(
+ LZ4_stream_t_internal* const cctx,
+ const char* const src,
+ char* const dst,
+ const int srcSize,
+ int *inputConsumed, /* only written when outputDirective == fillOutput */
+ const int dstCapacity,
+ const limitedOutput_directive outputDirective,
+ const tableType_t tableType,
+ const dict_directive dictDirective,
+ const dictIssue_directive dictIssue,
+ const int acceleration)
+{
+ DEBUGLOG(5, "LZ4_compress_generic: srcSize=%i, dstCapacity=%i",
+ srcSize, dstCapacity);
+
+ if ((U32)srcSize > (U32)LZ4_MAX_INPUT_SIZE) { return 0; } /* Unsupported srcSize, too large (or negative) */
+ if (srcSize == 0) { /* src == NULL supported if srcSize == 0 */
+ if (outputDirective != notLimited && dstCapacity <= 0) return 0; /* no output, can't write anything */
+ DEBUGLOG(5, "Generating an empty block");
+ assert(outputDirective == notLimited || dstCapacity >= 1);
+ assert(dst != NULL);
+ dst[0] = 0;
+ if (outputDirective == fillOutput) {
+ assert (inputConsumed != NULL);
+ *inputConsumed = 0;
+ }
+ return 1;
+ }
+ assert(src != NULL);
+
+ return LZ4_compress_generic_validated(cctx, src, dst, srcSize,
+ inputConsumed, /* only written into if outputDirective == fillOutput */
+ dstCapacity, outputDirective,
+ tableType, dictDirective, dictIssue, acceleration);
+}
+
+
+int LZ4_compress_fast_extState(void* state, const char* source, char* dest, int inputSize, int maxOutputSize, int acceleration)
+{
+ LZ4_stream_t_internal* const ctx = & LZ4_initStream(state, sizeof(LZ4_stream_t)) -> internal_donotuse;
+ assert(ctx != NULL);
+ if (acceleration < 1) acceleration = LZ4_ACCELERATION_DEFAULT;
+ if (acceleration > LZ4_ACCELERATION_MAX) acceleration = LZ4_ACCELERATION_MAX;
+ if (maxOutputSize >= LZ4_compressBound(inputSize)) {
+ if (inputSize < LZ4_64Klimit) {
+ return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, 0, notLimited, byU16, noDict, noDictIssue, acceleration);
+ } else {
+ const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)source > LZ4_DISTANCE_MAX)) ? byPtr : byU32;
+ return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, 0, notLimited, tableType, noDict, noDictIssue, acceleration);
+ }
+ } else {
+ if (inputSize < LZ4_64Klimit) {
+ return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, byU16, noDict, noDictIssue, acceleration);
+ } else {
+ const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)source > LZ4_DISTANCE_MAX)) ? byPtr : byU32;
+ return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, noDict, noDictIssue, acceleration);
+ }
+ }
+}
+
+/**
+ * LZ4_compress_fast_extState_fastReset() :
+ * A variant of LZ4_compress_fast_extState().
+ *
+ * Using this variant avoids an expensive initialization step. It is only safe
+ * to call if the state buffer is known to be correctly initialized already
+ * (see comment in lz4.h on LZ4_resetStream_fast() for a definition of
+ * "correctly initialized").
+ */
+int LZ4_compress_fast_extState_fastReset(void* state, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration)
+{
+ LZ4_stream_t_internal* ctx = &((LZ4_stream_t*)state)->internal_donotuse;
+ if (acceleration < 1) acceleration = LZ4_ACCELERATION_DEFAULT;
+ if (acceleration > LZ4_ACCELERATION_MAX) acceleration = LZ4_ACCELERATION_MAX;
+
+ if (dstCapacity >= LZ4_compressBound(srcSize)) {
+ if (srcSize < LZ4_64Klimit) {
+ const tableType_t tableType = byU16;
+ LZ4_prepareTable(ctx, srcSize, tableType);
+ if (ctx->currentOffset) {
+ return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, 0, notLimited, tableType, noDict, dictSmall, acceleration);
+ } else {
+ return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, 0, notLimited, tableType, noDict, noDictIssue, acceleration);
+ }
+ } else {
+ const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)src > LZ4_DISTANCE_MAX)) ? byPtr : byU32;
+ LZ4_prepareTable(ctx, srcSize, tableType);
+ return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, 0, notLimited, tableType, noDict, noDictIssue, acceleration);
+ }
+ } else {
+ if (srcSize < LZ4_64Klimit) {
+ const tableType_t tableType = byU16;
+ LZ4_prepareTable(ctx, srcSize, tableType);
+ if (ctx->currentOffset) {
+ return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, dstCapacity, limitedOutput, tableType, noDict, dictSmall, acceleration);
+ } else {
+ return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, dstCapacity, limitedOutput, tableType, noDict, noDictIssue, acceleration);
+ }
+ } else {
+ const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)src > LZ4_DISTANCE_MAX)) ? byPtr : byU32;
+ LZ4_prepareTable(ctx, srcSize, tableType);
+ return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, dstCapacity, limitedOutput, tableType, noDict, noDictIssue, acceleration);
+ }
+ }
+}
+
+
+int LZ4_compress_fast(const char* source, char* dest, int inputSize, int maxOutputSize, int acceleration)
+{
+ int result;
+#if (LZ4_HEAPMODE)
+ LZ4_stream_t* ctxPtr = (LZ4_stream_t*)ALLOC(sizeof(LZ4_stream_t)); /* malloc-calloc always properly aligned */
+ if (ctxPtr == NULL) return 0;
+#else
+ LZ4_stream_t ctx;
+ LZ4_stream_t* const ctxPtr = &ctx;
+#endif
+ result = LZ4_compress_fast_extState(ctxPtr, source, dest, inputSize, maxOutputSize, acceleration);
+
+#if (LZ4_HEAPMODE)
+ FREEMEM(ctxPtr);
+#endif
+ return result;
+}
+
+
+int LZ4_compress_default(const char* src, char* dst, int srcSize, int maxOutputSize)
+{
+ return LZ4_compress_fast(src, dst, srcSize, maxOutputSize, 1);
+}
+
+
+/* Note!: This function leaves the stream in an unclean/broken state!
+ * It is not safe to subsequently use the same state with a _fastReset() or
+ * _continue() call without resetting it. */
+static int LZ4_compress_destSize_extState (LZ4_stream_t* state, const char* src, char* dst, int* srcSizePtr, int targetDstSize)
+{
+ void* const s = LZ4_initStream(state, sizeof (*state));
+ assert(s != NULL); (void)s;
+
+ if (targetDstSize >= LZ4_compressBound(*srcSizePtr)) { /* compression success is guaranteed */
+ return LZ4_compress_fast_extState(state, src, dst, *srcSizePtr, targetDstSize, 1);
+ } else {
+ if (*srcSizePtr < LZ4_64Klimit) {
+ return LZ4_compress_generic(&state->internal_donotuse, src, dst, *srcSizePtr, srcSizePtr, targetDstSize, fillOutput, byU16, noDict, noDictIssue, 1);
+ } else {
+ tableType_t const addrMode = ((sizeof(void*)==4) && ((uptrval)src > LZ4_DISTANCE_MAX)) ? byPtr : byU32;
+ return LZ4_compress_generic(&state->internal_donotuse, src, dst, *srcSizePtr, srcSizePtr, targetDstSize, fillOutput, addrMode, noDict, noDictIssue, 1);
+ } }
+}
+
+
+int LZ4_compress_destSize(const char* src, char* dst, int* srcSizePtr, int targetDstSize)
+{
+#if (LZ4_HEAPMODE)
+ LZ4_stream_t* ctx = (LZ4_stream_t*)ALLOC(sizeof(LZ4_stream_t)); /* malloc-calloc always properly aligned */
+ if (ctx == NULL) return 0;
+#else
+ LZ4_stream_t ctxBody;
+ LZ4_stream_t* ctx = &ctxBody;
+#endif
+
+ int result = LZ4_compress_destSize_extState(ctx, src, dst, srcSizePtr, targetDstSize);
+
+#if (LZ4_HEAPMODE)
+ FREEMEM(ctx);
+#endif
+ return result;
+}
+
+
+
+/*-******************************
+* Streaming functions
+********************************/
+
+#if !defined(LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION)
+LZ4_stream_t* LZ4_createStream(void)
+{
+ LZ4_stream_t* const lz4s = (LZ4_stream_t*)ALLOC(sizeof(LZ4_stream_t));
+ LZ4_STATIC_ASSERT(sizeof(LZ4_stream_t) >= sizeof(LZ4_stream_t_internal));
+ DEBUGLOG(4, "LZ4_createStream %p", lz4s);
+ if (lz4s == NULL) return NULL;
+ LZ4_initStream(lz4s, sizeof(*lz4s));
+ return lz4s;
+}
+#endif
+
+static size_t LZ4_stream_t_alignment(void)
+{
+#if LZ4_ALIGN_TEST
+ typedef struct { char c; LZ4_stream_t t; } t_a;
+ return sizeof(t_a) - sizeof(LZ4_stream_t);
+#else
+ return 1; /* effectively disabled */
+#endif
+}
+
+LZ4_stream_t* LZ4_initStream (void* buffer, size_t size)
+{
+ DEBUGLOG(5, "LZ4_initStream");
+ if (buffer == NULL) { return NULL; }
+ if (size < sizeof(LZ4_stream_t)) { return NULL; }
+ if (!LZ4_isAligned(buffer, LZ4_stream_t_alignment())) return NULL;
+ MEM_INIT(buffer, 0, sizeof(LZ4_stream_t_internal));
+ return (LZ4_stream_t*)buffer;
+}
+
+/* resetStream is now deprecated,
+ * prefer initStream() which is more general */
+void LZ4_resetStream (LZ4_stream_t* LZ4_stream)
+{
+ DEBUGLOG(5, "LZ4_resetStream (ctx:%p)", LZ4_stream);
+ MEM_INIT(LZ4_stream, 0, sizeof(LZ4_stream_t_internal));
+}
+
+void LZ4_resetStream_fast(LZ4_stream_t* ctx) {
+ LZ4_prepareTable(&(ctx->internal_donotuse), 0, byU32);
+}
+
+#if !defined(LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION)
+int LZ4_freeStream (LZ4_stream_t* LZ4_stream)
+{
+ if (!LZ4_stream) return 0; /* support free on NULL */
+ DEBUGLOG(5, "LZ4_freeStream %p", LZ4_stream);
+ FREEMEM(LZ4_stream);
+ return (0);
+}
+#endif
+
+
+#define HASH_UNIT sizeof(reg_t)
+int LZ4_loadDict (LZ4_stream_t* LZ4_dict, const char* dictionary, int dictSize)
+{
+ LZ4_stream_t_internal* dict = &LZ4_dict->internal_donotuse;
+ const tableType_t tableType = byU32;
+ const BYTE* p = (const BYTE*)dictionary;
+ const BYTE* const dictEnd = p + dictSize;
+ const BYTE* base;
+
+ DEBUGLOG(4, "LZ4_loadDict (%i bytes from %p into %p)", dictSize, dictionary, LZ4_dict);
+
+ /* It's necessary to reset the context,
+ * and not just continue it with prepareTable()
+ * to avoid any risk of generating overflowing matchIndex
+ * when compressing using this dictionary */
+ LZ4_resetStream(LZ4_dict);
+
+ /* We always increment the offset by 64 KB, since, if the dict is longer,
+ * we truncate it to the last 64k, and if it's shorter, we still want to
+ * advance by a whole window length so we can provide the guarantee that
+ * there are only valid offsets in the window, which allows an optimization
+ * in LZ4_compress_fast_continue() where it uses noDictIssue even when the
+ * dictionary isn't a full 64k. */
+ dict->currentOffset += 64 KB;
+
+ if (dictSize < (int)HASH_UNIT) {
+ return 0;
+ }
+
+ if ((dictEnd - p) > 64 KB) p = dictEnd - 64 KB;
+ base = dictEnd - dict->currentOffset;
+ dict->dictionary = p;
+ dict->dictSize = (U32)(dictEnd - p);
+ dict->tableType = (U32)tableType;
+
+ while (p <= dictEnd-HASH_UNIT) {
+ LZ4_putPosition(p, dict->hashTable, tableType, base);
+ p+=3;
+ }
+
+ return (int)dict->dictSize;
+}
+
+void LZ4_attach_dictionary(LZ4_stream_t* workingStream, const LZ4_stream_t* dictionaryStream)
+{
+ const LZ4_stream_t_internal* dictCtx = (dictionaryStream == NULL) ? NULL :
+ &(dictionaryStream->internal_donotuse);
+
+ DEBUGLOG(4, "LZ4_attach_dictionary (%p, %p, size %u)",
+ workingStream, dictionaryStream,
+ dictCtx != NULL ? dictCtx->dictSize : 0);
+
+ if (dictCtx != NULL) {
+ /* If the current offset is zero, we will never look in the
+ * external dictionary context, since there is no value a table
+ * entry can take that indicate a miss. In that case, we need
+ * to bump the offset to something non-zero.
+ */
+ if (workingStream->internal_donotuse.currentOffset == 0) {
+ workingStream->internal_donotuse.currentOffset = 64 KB;
+ }
+
+ /* Don't actually attach an empty dictionary.
+ */
+ if (dictCtx->dictSize == 0) {
+ dictCtx = NULL;
+ }
+ }
+ workingStream->internal_donotuse.dictCtx = dictCtx;
+}
+
+
+static void LZ4_renormDictT(LZ4_stream_t_internal* LZ4_dict, int nextSize)
+{
+ assert(nextSize >= 0);
+ if (LZ4_dict->currentOffset + (unsigned)nextSize > 0x80000000) { /* potential ptrdiff_t overflow (32-bits mode) */
+ /* rescale hash table */
+ U32 const delta = LZ4_dict->currentOffset - 64 KB;
+ const BYTE* dictEnd = LZ4_dict->dictionary + LZ4_dict->dictSize;
+ int i;
+ DEBUGLOG(4, "LZ4_renormDictT");
+ for (i=0; i<LZ4_HASH_SIZE_U32; i++) {
+ if (LZ4_dict->hashTable[i] < delta) LZ4_dict->hashTable[i]=0;
+ else LZ4_dict->hashTable[i] -= delta;
+ }
+ LZ4_dict->currentOffset = 64 KB;
+ if (LZ4_dict->dictSize > 64 KB) LZ4_dict->dictSize = 64 KB;
+ LZ4_dict->dictionary = dictEnd - LZ4_dict->dictSize;
+ }
+}
+
+
+int LZ4_compress_fast_continue (LZ4_stream_t* LZ4_stream,
+ const char* source, char* dest,
+ int inputSize, int maxOutputSize,
+ int acceleration)
+{
+ const tableType_t tableType = byU32;
+ LZ4_stream_t_internal* const streamPtr = &LZ4_stream->internal_donotuse;
+ const char* dictEnd = streamPtr->dictSize ? (const char*)streamPtr->dictionary + streamPtr->dictSize : NULL;
+
+ DEBUGLOG(5, "LZ4_compress_fast_continue (inputSize=%i, dictSize=%u)", inputSize, streamPtr->dictSize);
+
+ LZ4_renormDictT(streamPtr, inputSize); /* fix index overflow */
+ if (acceleration < 1) acceleration = LZ4_ACCELERATION_DEFAULT;
+ if (acceleration > LZ4_ACCELERATION_MAX) acceleration = LZ4_ACCELERATION_MAX;
+
+ /* invalidate tiny dictionaries */
+ if ( (streamPtr->dictSize < 4) /* tiny dictionary : not enough for a hash */
+ && (dictEnd != source) /* prefix mode */
+ && (inputSize > 0) /* tolerance : don't lose history, in case next invocation would use prefix mode */
+ && (streamPtr->dictCtx == NULL) /* usingDictCtx */
+ ) {
+ DEBUGLOG(5, "LZ4_compress_fast_continue: dictSize(%u) at addr:%p is too small", streamPtr->dictSize, streamPtr->dictionary);
+ /* remove dictionary existence from history, to employ faster prefix mode */
+ streamPtr->dictSize = 0;
+ streamPtr->dictionary = (const BYTE*)source;
+ dictEnd = source;
+ }
+
+ /* Check overlapping input/dictionary space */
+ { const char* const sourceEnd = source + inputSize;
+ if ((sourceEnd > (const char*)streamPtr->dictionary) && (sourceEnd < dictEnd)) {
+ streamPtr->dictSize = (U32)(dictEnd - sourceEnd);
+ if (streamPtr->dictSize > 64 KB) streamPtr->dictSize = 64 KB;
+ if (streamPtr->dictSize < 4) streamPtr->dictSize = 0;
+ streamPtr->dictionary = (const BYTE*)dictEnd - streamPtr->dictSize;
+ }
+ }
+
+ /* prefix mode : source data follows dictionary */
+ if (dictEnd == source) {
+ if ((streamPtr->dictSize < 64 KB) && (streamPtr->dictSize < streamPtr->currentOffset))
+ return LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, withPrefix64k, dictSmall, acceleration);
+ else
+ return LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, withPrefix64k, noDictIssue, acceleration);
+ }
+
+ /* external dictionary mode */
+ { int result;
+ if (streamPtr->dictCtx) {
+ /* We depend here on the fact that dictCtx'es (produced by
+ * LZ4_loadDict) guarantee that their tables contain no references
+ * to offsets between dictCtx->currentOffset - 64 KB and
+ * dictCtx->currentOffset - dictCtx->dictSize. This makes it safe
+ * to use noDictIssue even when the dict isn't a full 64 KB.
+ */
+ if (inputSize > 4 KB) {
+ /* For compressing large blobs, it is faster to pay the setup
+ * cost to copy the dictionary's tables into the active context,
+ * so that the compression loop is only looking into one table.
+ */
+ LZ4_memcpy(streamPtr, streamPtr->dictCtx, sizeof(*streamPtr));
+ result = LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, usingExtDict, noDictIssue, acceleration);
+ } else {
+ result = LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, usingDictCtx, noDictIssue, acceleration);
+ }
+ } else { /* small data <= 4 KB */
+ if ((streamPtr->dictSize < 64 KB) && (streamPtr->dictSize < streamPtr->currentOffset)) {
+ result = LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, usingExtDict, dictSmall, acceleration);
+ } else {
+ result = LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, usingExtDict, noDictIssue, acceleration);
+ }
+ }
+ streamPtr->dictionary = (const BYTE*)source;
+ streamPtr->dictSize = (U32)inputSize;
+ return result;
+ }
+}
+
+
+/* Hidden debug function, to force-test external dictionary mode */
+int LZ4_compress_forceExtDict (LZ4_stream_t* LZ4_dict, const char* source, char* dest, int srcSize)
+{
+ LZ4_stream_t_internal* streamPtr = &LZ4_dict->internal_donotuse;
+ int result;
+
+ LZ4_renormDictT(streamPtr, srcSize);
+
+ if ((streamPtr->dictSize < 64 KB) && (streamPtr->dictSize < streamPtr->currentOffset)) {
+ result = LZ4_compress_generic(streamPtr, source, dest, srcSize, NULL, 0, notLimited, byU32, usingExtDict, dictSmall, 1);
+ } else {
+ result = LZ4_compress_generic(streamPtr, source, dest, srcSize, NULL, 0, notLimited, byU32, usingExtDict, noDictIssue, 1);
+ }
+
+ streamPtr->dictionary = (const BYTE*)source;
+ streamPtr->dictSize = (U32)srcSize;
+
+ return result;
+}
+
+
+/*! LZ4_saveDict() :
+ * If previously compressed data block is not guaranteed to remain available at its memory location,
+ * save it into a safer place (char* safeBuffer).
+ * Note : no need to call LZ4_loadDict() afterwards, dictionary is immediately usable,
+ * one can therefore call LZ4_compress_fast_continue() right after.
+ * @return : saved dictionary size in bytes (necessarily <= dictSize), or 0 if error.
+ */
+int LZ4_saveDict (LZ4_stream_t* LZ4_dict, char* safeBuffer, int dictSize)
+{
+ LZ4_stream_t_internal* const dict = &LZ4_dict->internal_donotuse;
+
+ DEBUGLOG(5, "LZ4_saveDict : dictSize=%i, safeBuffer=%p", dictSize, safeBuffer);
+
+ if ((U32)dictSize > 64 KB) { dictSize = 64 KB; } /* useless to define a dictionary > 64 KB */
+ if ((U32)dictSize > dict->dictSize) { dictSize = (int)dict->dictSize; }
+
+ if (safeBuffer == NULL) assert(dictSize == 0);
+ if (dictSize > 0) {
+ const BYTE* const previousDictEnd = dict->dictionary + dict->dictSize;
+ assert(dict->dictionary);
+ LZ4_memmove(safeBuffer, previousDictEnd - dictSize, (size_t)dictSize);
+ }
+
+ dict->dictionary = (const BYTE*)safeBuffer;
+ dict->dictSize = (U32)dictSize;
+
+ return dictSize;
+}
+
+
+
+/*-*******************************
+ * Decompression functions
+ ********************************/
+
+typedef enum { decode_full_block = 0, partial_decode = 1 } earlyEnd_directive;
+
+#undef MIN
+#define MIN(a,b) ( (a) < (b) ? (a) : (b) )
+
+
+/* variant for decompress_unsafe()
+ * does not know end of input
+ * presumes input is well formed
+ * note : will consume at least one byte */
+size_t read_long_length_no_check(const BYTE** pp)
+{
+ size_t b, l = 0;
+ do { b = **pp; (*pp)++; l += b; } while (b==255);
+ DEBUGLOG(6, "read_long_length_no_check: +length=%zu using %zu input bytes", l, l/255 + 1)
+ return l;
+}
+
+/* core decoder variant for LZ4_decompress_fast*()
+ * for legacy support only : these entry points are deprecated.
+ * - Presumes input is correctly formed (no defense vs malformed inputs)
+ * - Does not know input size (presume input buffer is "large enough")
+ * - Decompress a full block (only)
+ * @return : nb of bytes read from input.
+ * Note : this variant is not optimized for speed, just for maintenance.
+ * the goal is to remove support of decompress_fast*() variants by v2.0
+**/
+LZ4_FORCE_INLINE int
+LZ4_decompress_unsafe_generic(
+ const BYTE* const istart,
+ BYTE* const ostart,
+ int decompressedSize,
+
+ size_t prefixSize,
+ const BYTE* const dictStart, /* only if dict==usingExtDict */
+ const size_t dictSize /* note: =0 if dictStart==NULL */
+ )
+{
+ const BYTE* ip = istart;
+ BYTE* op = (BYTE*)ostart;
+ BYTE* const oend = ostart + decompressedSize;
+ const BYTE* const prefixStart = ostart - prefixSize;
+
+ DEBUGLOG(5, "LZ4_decompress_unsafe_generic");
+ if (dictStart == NULL) assert(dictSize == 0);
+
+ while (1) {
+ /* start new sequence */
+ unsigned token = *ip++;
+
+ /* literals */
+ { size_t ll = token >> ML_BITS;
+ if (ll==15) {
+ /* long literal length */
+ ll += read_long_length_no_check(&ip);
+ }
+ if ((size_t)(oend-op) < ll) return -1; /* output buffer overflow */
+ LZ4_memmove(op, ip, ll); /* support in-place decompression */
+ op += ll;
+ ip += ll;
+ if ((size_t)(oend-op) < MFLIMIT) {
+ if (op==oend) break; /* end of block */
+ DEBUGLOG(5, "invalid: literals end at distance %zi from end of block", oend-op);
+ /* incorrect end of block :
+ * last match must start at least MFLIMIT==12 bytes before end of output block */
+ return -1;
+ } }
+
+ /* match */
+ { size_t ml = token & 15;
+ size_t const offset = LZ4_readLE16(ip);
+ ip+=2;
+
+ if (ml==15) {
+ /* long literal length */
+ ml += read_long_length_no_check(&ip);
+ }
+ ml += MINMATCH;
+
+ if ((size_t)(oend-op) < ml) return -1; /* output buffer overflow */
+
+ { const BYTE* match = op - offset;
+
+ /* out of range */
+ if (offset > (size_t)(op - prefixStart) + dictSize) {
+ DEBUGLOG(6, "offset out of range");
+ return -1;
+ }
+
+ /* check special case : extDict */
+ if (offset > (size_t)(op - prefixStart)) {
+ /* extDict scenario */
+ const BYTE* const dictEnd = dictStart + dictSize;
+ const BYTE* extMatch = dictEnd - (offset - (size_t)(op-prefixStart));
+ size_t const extml = (size_t)(dictEnd - extMatch);
+ if (extml > ml) {
+ /* match entirely within extDict */
+ LZ4_memmove(op, extMatch, ml);
+ op += ml;
+ ml = 0;
+ } else {
+ /* match split between extDict & prefix */
+ LZ4_memmove(op, extMatch, extml);
+ op += extml;
+ ml -= extml;
+ }
+ match = prefixStart;
+ }
+
+ /* match copy - slow variant, supporting overlap copy */
+ { size_t u;
+ for (u=0; u<ml; u++) {
+ op[u] = match[u];
+ } } }
+ op += ml;
+ if ((size_t)(oend-op) < LASTLITERALS) {
+ DEBUGLOG(5, "invalid: match ends at distance %zi from end of block", oend-op);
+ /* incorrect end of block :
+ * last match must stop at least LASTLITERALS==5 bytes before end of output block */
+ return -1;
+ }
+ } /* match */
+ } /* main loop */
+ return (int)(ip - istart);
+}
+
+
+/* Read the variable-length literal or match length.
+ *
+ * @ip : input pointer
+ * @ilimit : position after which if length is not decoded, the input is necessarily corrupted.
+ * @initial_check - check ip >= ipmax before start of loop. Returns initial_error if so.
+ * @error (output) - error code. Must be set to 0 before call.
+**/
+typedef size_t Rvl_t;
+static const Rvl_t rvl_error = (Rvl_t)(-1);
+LZ4_FORCE_INLINE Rvl_t
+read_variable_length(const BYTE** ip, const BYTE* ilimit,
+ int initial_check)
+{
+ Rvl_t s, length = 0;
+ assert(ip != NULL);
+ assert(*ip != NULL);
+ assert(ilimit != NULL);
+ if (initial_check && unlikely((*ip) >= ilimit)) { /* read limit reached */
+ return rvl_error;
+ }
+ do {
+ s = **ip;
+ (*ip)++;
+ length += s;
+ if (unlikely((*ip) > ilimit)) { /* read limit reached */
+ return rvl_error;
+ }
+ /* accumulator overflow detection (32-bit mode only) */
+ if ((sizeof(length)<8) && unlikely(length > ((Rvl_t)(-1)/2)) ) {
+ return rvl_error;
+ }
+ } while (s==255);
+
+ return length;
+}
+
+/*! LZ4_decompress_generic() :
+ * This generic decompression function covers all use cases.
+ * It shall be instantiated several times, using different sets of directives.
+ * Note that it is important for performance that this function really get inlined,
+ * in order to remove useless branches during compilation optimization.
+ */
+LZ4_FORCE_INLINE int
+LZ4_decompress_generic(
+ const char* const src,
+ char* const dst,
+ int srcSize,
+ int outputSize, /* If endOnInput==endOnInputSize, this value is `dstCapacity` */
+
+ earlyEnd_directive partialDecoding, /* full, partial */
+ dict_directive dict, /* noDict, withPrefix64k, usingExtDict */
+ const BYTE* const lowPrefix, /* always <= dst, == dst when no prefix */
+ const BYTE* const dictStart, /* only if dict==usingExtDict */
+ const size_t dictSize /* note : = 0 if noDict */
+ )
+{
+ if ((src == NULL) || (outputSize < 0)) { return -1; }
+
+ { const BYTE* ip = (const BYTE*) src;
+ const BYTE* const iend = ip + srcSize;
+
+ BYTE* op = (BYTE*) dst;
+ BYTE* const oend = op + outputSize;
+ BYTE* cpy;
+
+ const BYTE* const dictEnd = (dictStart == NULL) ? NULL : dictStart + dictSize;
+
+ const int checkOffset = (dictSize < (int)(64 KB));
+
+
+ /* Set up the "end" pointers for the shortcut. */
+ const BYTE* const shortiend = iend - 14 /*maxLL*/ - 2 /*offset*/;
+ const BYTE* const shortoend = oend - 14 /*maxLL*/ - 18 /*maxML*/;
+
+ const BYTE* match;
+ size_t offset;
+ unsigned token;
+ size_t length;
+
+
+ DEBUGLOG(5, "LZ4_decompress_generic (srcSize:%i, dstSize:%i)", srcSize, outputSize);
+
+ /* Special cases */
+ assert(lowPrefix <= op);
+ if (unlikely(outputSize==0)) {
+ /* Empty output buffer */
+ if (partialDecoding) return 0;
+ return ((srcSize==1) && (*ip==0)) ? 0 : -1;
+ }
+ if (unlikely(srcSize==0)) { return -1; }
+
+ /* LZ4_FAST_DEC_LOOP:
+ * designed for modern OoO performance cpus,
+ * where copying reliably 32-bytes is preferable to an unpredictable branch.
+ * note : fast loop may show a regression for some client arm chips. */
+#if LZ4_FAST_DEC_LOOP
+ if ((oend - op) < FASTLOOP_SAFE_DISTANCE) {
+ DEBUGLOG(6, "skip fast decode loop");
+ goto safe_decode;
+ }
+
+ /* Fast loop : decode sequences as long as output < oend-FASTLOOP_SAFE_DISTANCE */
+ while (1) {
+ /* Main fastloop assertion: We can always wildcopy FASTLOOP_SAFE_DISTANCE */
+ assert(oend - op >= FASTLOOP_SAFE_DISTANCE);
+ assert(ip < iend);
+ token = *ip++;
+ length = token >> ML_BITS; /* literal length */
+
+ /* decode literal length */
+ if (length == RUN_MASK) {
+ size_t const addl = read_variable_length(&ip, iend-RUN_MASK, 1);
+ if (addl == rvl_error) { goto _output_error; }
+ length += addl;
+ if (unlikely((uptrval)(op)+length<(uptrval)(op))) { goto _output_error; } /* overflow detection */
+ if (unlikely((uptrval)(ip)+length<(uptrval)(ip))) { goto _output_error; } /* overflow detection */
+
+ /* copy literals */
+ cpy = op+length;
+ LZ4_STATIC_ASSERT(MFLIMIT >= WILDCOPYLENGTH);
+ if ((cpy>oend-32) || (ip+length>iend-32)) { goto safe_literal_copy; }
+ LZ4_wildCopy32(op, ip, cpy);
+ ip += length; op = cpy;
+ } else {
+ cpy = op+length;
+ DEBUGLOG(7, "copy %u bytes in a 16-bytes stripe", (unsigned)length);
+ /* We don't need to check oend, since we check it once for each loop below */
+ if (ip > iend-(16 + 1/*max lit + offset + nextToken*/)) { goto safe_literal_copy; }
+ /* Literals can only be <= 14, but hope compilers optimize better when copy by a register size */
+ LZ4_memcpy(op, ip, 16);
+ ip += length; op = cpy;
+ }
+
+ /* get offset */
+ offset = LZ4_readLE16(ip); ip+=2;
+ match = op - offset;
+ assert(match <= op); /* overflow check */
+
+ /* get matchlength */
+ length = token & ML_MASK;
+
+ if (length == ML_MASK) {
+ size_t const addl = read_variable_length(&ip, iend - LASTLITERALS + 1, 0);
+ if (addl == rvl_error) { goto _output_error; }
+ length += addl;
+ length += MINMATCH;
+ if (unlikely((uptrval)(op)+length<(uptrval)op)) { goto _output_error; } /* overflow detection */
+ if ((checkOffset) && (unlikely(match + dictSize < lowPrefix))) { goto _output_error; } /* Error : offset outside buffers */
+ if (op + length >= oend - FASTLOOP_SAFE_DISTANCE) {
+ goto safe_match_copy;
+ }
+ } else {
+ length += MINMATCH;
+ if (op + length >= oend - FASTLOOP_SAFE_DISTANCE) {
+ goto safe_match_copy;
+ }
+
+ /* Fastpath check: skip LZ4_wildCopy32 when true */
+ if ((dict == withPrefix64k) || (match >= lowPrefix)) {
+ if (offset >= 8) {
+ assert(match >= lowPrefix);
+ assert(match <= op);
+ assert(op + 18 <= oend);
+
+ LZ4_memcpy(op, match, 8);
+ LZ4_memcpy(op+8, match+8, 8);
+ LZ4_memcpy(op+16, match+16, 2);
+ op += length;
+ continue;
+ } } }
+
+ if (checkOffset && (unlikely(match + dictSize < lowPrefix))) { goto _output_error; } /* Error : offset outside buffers */
+ /* match starting within external dictionary */
+ if ((dict==usingExtDict) && (match < lowPrefix)) {
+ assert(dictEnd != NULL);
+ if (unlikely(op+length > oend-LASTLITERALS)) {
+ if (partialDecoding) {
+ DEBUGLOG(7, "partialDecoding: dictionary match, close to dstEnd");
+ length = MIN(length, (size_t)(oend-op));
+ } else {
+ goto _output_error; /* end-of-block condition violated */
+ } }
+
+ if (length <= (size_t)(lowPrefix-match)) {
+ /* match fits entirely within external dictionary : just copy */
+ LZ4_memmove(op, dictEnd - (lowPrefix-match), length);
+ op += length;
+ } else {
+ /* match stretches into both external dictionary and current block */
+ size_t const copySize = (size_t)(lowPrefix - match);
+ size_t const restSize = length - copySize;
+ LZ4_memcpy(op, dictEnd - copySize, copySize);
+ op += copySize;
+ if (restSize > (size_t)(op - lowPrefix)) { /* overlap copy */
+ BYTE* const endOfMatch = op + restSize;
+ const BYTE* copyFrom = lowPrefix;
+ while (op < endOfMatch) { *op++ = *copyFrom++; }
+ } else {
+ LZ4_memcpy(op, lowPrefix, restSize);
+ op += restSize;
+ } }
+ continue;
+ }
+
+ /* copy match within block */
+ cpy = op + length;
+
+ assert((op <= oend) && (oend-op >= 32));
+ if (unlikely(offset<16)) {
+ LZ4_memcpy_using_offset(op, match, cpy, offset);
+ } else {
+ LZ4_wildCopy32(op, match, cpy);
+ }
+
+ op = cpy; /* wildcopy correction */
+ }
+ safe_decode:
+#endif
+
+ /* Main Loop : decode remaining sequences where output < FASTLOOP_SAFE_DISTANCE */
+ while (1) {
+ assert(ip < iend);
+ token = *ip++;
+ length = token >> ML_BITS; /* literal length */
+
+ /* A two-stage shortcut for the most common case:
+ * 1) If the literal length is 0..14, and there is enough space,
+ * enter the shortcut and copy 16 bytes on behalf of the literals
+ * (in the fast mode, only 8 bytes can be safely copied this way).
+ * 2) Further if the match length is 4..18, copy 18 bytes in a similar
+ * manner; but we ensure that there's enough space in the output for
+ * those 18 bytes earlier, upon entering the shortcut (in other words,
+ * there is a combined check for both stages).
+ */
+ if ( (length != RUN_MASK)
+ /* strictly "less than" on input, to re-enter the loop with at least one byte */
+ && likely((ip < shortiend) & (op <= shortoend)) ) {
+ /* Copy the literals */
+ LZ4_memcpy(op, ip, 16);
+ op += length; ip += length;
+
+ /* The second stage: prepare for match copying, decode full info.
+ * If it doesn't work out, the info won't be wasted. */
+ length = token & ML_MASK; /* match length */
+ offset = LZ4_readLE16(ip); ip += 2;
+ match = op - offset;
+ assert(match <= op); /* check overflow */
+
+ /* Do not deal with overlapping matches. */
+ if ( (length != ML_MASK)
+ && (offset >= 8)
+ && (dict==withPrefix64k || match >= lowPrefix) ) {
+ /* Copy the match. */
+ LZ4_memcpy(op + 0, match + 0, 8);
+ LZ4_memcpy(op + 8, match + 8, 8);
+ LZ4_memcpy(op +16, match +16, 2);
+ op += length + MINMATCH;
+ /* Both stages worked, load the next token. */
+ continue;
+ }
+
+ /* The second stage didn't work out, but the info is ready.
+ * Propel it right to the point of match copying. */
+ goto _copy_match;
+ }
+
+ /* decode literal length */
+ if (length == RUN_MASK) {
+ size_t const addl = read_variable_length(&ip, iend-RUN_MASK, 1);
+ if (addl == rvl_error) { goto _output_error; }
+ length += addl;
+ if (unlikely((uptrval)(op)+length<(uptrval)(op))) { goto _output_error; } /* overflow detection */
+ if (unlikely((uptrval)(ip)+length<(uptrval)(ip))) { goto _output_error; } /* overflow detection */
+ }
+
+ /* copy literals */
+ cpy = op+length;
+#if LZ4_FAST_DEC_LOOP
+ safe_literal_copy:
+#endif
+ LZ4_STATIC_ASSERT(MFLIMIT >= WILDCOPYLENGTH);
+ if ((cpy>oend-MFLIMIT) || (ip+length>iend-(2+1+LASTLITERALS))) {
+ /* We've either hit the input parsing restriction or the output parsing restriction.
+ * In the normal scenario, decoding a full block, it must be the last sequence,
+ * otherwise it's an error (invalid input or dimensions).
+ * In partialDecoding scenario, it's necessary to ensure there is no buffer overflow.
+ */
+ if (partialDecoding) {
+ /* Since we are partial decoding we may be in this block because of the output parsing
+ * restriction, which is not valid since the output buffer is allowed to be undersized.
+ */
+ DEBUGLOG(7, "partialDecoding: copying literals, close to input or output end")
+ DEBUGLOG(7, "partialDecoding: literal length = %u", (unsigned)length);
+ DEBUGLOG(7, "partialDecoding: remaining space in dstBuffer : %i", (int)(oend - op));
+ DEBUGLOG(7, "partialDecoding: remaining space in srcBuffer : %i", (int)(iend - ip));
+ /* Finishing in the middle of a literals segment,
+ * due to lack of input.
+ */
+ if (ip+length > iend) {
+ length = (size_t)(iend-ip);
+ cpy = op + length;
+ }
+ /* Finishing in the middle of a literals segment,
+ * due to lack of output space.
+ */
+ if (cpy > oend) {
+ cpy = oend;
+ assert(op<=oend);
+ length = (size_t)(oend-op);
+ }
+ } else {
+ /* We must be on the last sequence (or invalid) because of the parsing limitations
+ * so check that we exactly consume the input and don't overrun the output buffer.
+ */
+ if ((ip+length != iend) || (cpy > oend)) {
+ DEBUGLOG(6, "should have been last run of literals")
+ DEBUGLOG(6, "ip(%p) + length(%i) = %p != iend (%p)", ip, (int)length, ip+length, iend);
+ DEBUGLOG(6, "or cpy(%p) > oend(%p)", cpy, oend);
+ goto _output_error;
+ }
+ }
+ LZ4_memmove(op, ip, length); /* supports overlapping memory regions, for in-place decompression scenarios */
+ ip += length;
+ op += length;
+ /* Necessarily EOF when !partialDecoding.
+ * When partialDecoding, it is EOF if we've either
+ * filled the output buffer or
+ * can't proceed with reading an offset for following match.
+ */
+ if (!partialDecoding || (cpy == oend) || (ip >= (iend-2))) {
+ break;
+ }
+ } else {
+ LZ4_wildCopy8(op, ip, cpy); /* can overwrite up to 8 bytes beyond cpy */
+ ip += length; op = cpy;
+ }
+
+ /* get offset */
+ offset = LZ4_readLE16(ip); ip+=2;
+ match = op - offset;
+
+ /* get matchlength */
+ length = token & ML_MASK;
+
+ _copy_match:
+ if (length == ML_MASK) {
+ size_t const addl = read_variable_length(&ip, iend - LASTLITERALS + 1, 0);
+ if (addl == rvl_error) { goto _output_error; }
+ length += addl;
+ if (unlikely((uptrval)(op)+length<(uptrval)op)) goto _output_error; /* overflow detection */
+ }
+ length += MINMATCH;
+
+#if LZ4_FAST_DEC_LOOP
+ safe_match_copy:
+#endif
+ if ((checkOffset) && (unlikely(match + dictSize < lowPrefix))) goto _output_error; /* Error : offset outside buffers */
+ /* match starting within external dictionary */
+ if ((dict==usingExtDict) && (match < lowPrefix)) {
+ assert(dictEnd != NULL);
+ if (unlikely(op+length > oend-LASTLITERALS)) {
+ if (partialDecoding) length = MIN(length, (size_t)(oend-op));
+ else goto _output_error; /* doesn't respect parsing restriction */
+ }
+
+ if (length <= (size_t)(lowPrefix-match)) {
+ /* match fits entirely within external dictionary : just copy */
+ LZ4_memmove(op, dictEnd - (lowPrefix-match), length);
+ op += length;
+ } else {
+ /* match stretches into both external dictionary and current block */
+ size_t const copySize = (size_t)(lowPrefix - match);
+ size_t const restSize = length - copySize;
+ LZ4_memcpy(op, dictEnd - copySize, copySize);
+ op += copySize;
+ if (restSize > (size_t)(op - lowPrefix)) { /* overlap copy */
+ BYTE* const endOfMatch = op + restSize;
+ const BYTE* copyFrom = lowPrefix;
+ while (op < endOfMatch) *op++ = *copyFrom++;
+ } else {
+ LZ4_memcpy(op, lowPrefix, restSize);
+ op += restSize;
+ } }
+ continue;
+ }
+ assert(match >= lowPrefix);
+
+ /* copy match within block */
+ cpy = op + length;
+
+ /* partialDecoding : may end anywhere within the block */
+ assert(op<=oend);
+ if (partialDecoding && (cpy > oend-MATCH_SAFEGUARD_DISTANCE)) {
+ size_t const mlen = MIN(length, (size_t)(oend-op));
+ const BYTE* const matchEnd = match + mlen;
+ BYTE* const copyEnd = op + mlen;
+ if (matchEnd > op) { /* overlap copy */
+ while (op < copyEnd) { *op++ = *match++; }
+ } else {
+ LZ4_memcpy(op, match, mlen);
+ }
+ op = copyEnd;
+ if (op == oend) { break; }
+ continue;
+ }
+
+ if (unlikely(offset<8)) {
+ LZ4_write32(op, 0); /* silence msan warning when offset==0 */
+ op[0] = match[0];
+ op[1] = match[1];
+ op[2] = match[2];
+ op[3] = match[3];
+ match += inc32table[offset];
+ LZ4_memcpy(op+4, match, 4);
+ match -= dec64table[offset];
+ } else {
+ LZ4_memcpy(op, match, 8);
+ match += 8;
+ }
+ op += 8;
+
+ if (unlikely(cpy > oend-MATCH_SAFEGUARD_DISTANCE)) {
+ BYTE* const oCopyLimit = oend - (WILDCOPYLENGTH-1);
+ if (cpy > oend-LASTLITERALS) { goto _output_error; } /* Error : last LASTLITERALS bytes must be literals (uncompressed) */
+ if (op < oCopyLimit) {
+ LZ4_wildCopy8(op, match, oCopyLimit);
+ match += oCopyLimit - op;
+ op = oCopyLimit;
+ }
+ while (op < cpy) { *op++ = *match++; }
+ } else {
+ LZ4_memcpy(op, match, 8);
+ if (length > 16) { LZ4_wildCopy8(op+8, match+8, cpy); }
+ }
+ op = cpy; /* wildcopy correction */
+ }
+
+ /* end of decoding */
+ DEBUGLOG(5, "decoded %i bytes", (int) (((char*)op)-dst));
+ return (int) (((char*)op)-dst); /* Nb of output bytes decoded */
+
+ /* Overflow error detected */
+ _output_error:
+ return (int) (-(((const char*)ip)-src))-1;
+ }
+}
+
+
+/*===== Instantiate the API decoding functions. =====*/
+
+LZ4_FORCE_O2
+int LZ4_decompress_safe(const char* source, char* dest, int compressedSize, int maxDecompressedSize)
+{
+ return LZ4_decompress_generic(source, dest, compressedSize, maxDecompressedSize,
+ decode_full_block, noDict,
+ (BYTE*)dest, NULL, 0);
+}
+
+LZ4_FORCE_O2
+int LZ4_decompress_safe_partial(const char* src, char* dst, int compressedSize, int targetOutputSize, int dstCapacity)
+{
+ dstCapacity = MIN(targetOutputSize, dstCapacity);
+ return LZ4_decompress_generic(src, dst, compressedSize, dstCapacity,
+ partial_decode,
+ noDict, (BYTE*)dst, NULL, 0);
+}
+
+LZ4_FORCE_O2
+int LZ4_decompress_fast(const char* source, char* dest, int originalSize)
+{
+ DEBUGLOG(5, "LZ4_decompress_fast");
+ return LZ4_decompress_unsafe_generic(
+ (const BYTE*)source, (BYTE*)dest, originalSize,
+ 0, NULL, 0);
+}
+
+/*===== Instantiate a few more decoding cases, used more than once. =====*/
+
+LZ4_FORCE_O2 /* Exported, an obsolete API function. */
+int LZ4_decompress_safe_withPrefix64k(const char* source, char* dest, int compressedSize, int maxOutputSize)
+{
+ return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize,
+ decode_full_block, withPrefix64k,
+ (BYTE*)dest - 64 KB, NULL, 0);
+}
+
+LZ4_FORCE_O2
+static int LZ4_decompress_safe_partial_withPrefix64k(const char* source, char* dest, int compressedSize, int targetOutputSize, int dstCapacity)
+{
+ dstCapacity = MIN(targetOutputSize, dstCapacity);
+ return LZ4_decompress_generic(source, dest, compressedSize, dstCapacity,
+ partial_decode, withPrefix64k,
+ (BYTE*)dest - 64 KB, NULL, 0);
+}
+
+/* Another obsolete API function, paired with the previous one. */
+int LZ4_decompress_fast_withPrefix64k(const char* source, char* dest, int originalSize)
+{
+ return LZ4_decompress_unsafe_generic(
+ (const BYTE*)source, (BYTE*)dest, originalSize,
+ 64 KB, NULL, 0);
+}
+
+LZ4_FORCE_O2
+static int LZ4_decompress_safe_withSmallPrefix(const char* source, char* dest, int compressedSize, int maxOutputSize,
+ size_t prefixSize)
+{
+ return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize,
+ decode_full_block, noDict,
+ (BYTE*)dest-prefixSize, NULL, 0);
+}
+
+LZ4_FORCE_O2
+static int LZ4_decompress_safe_partial_withSmallPrefix(const char* source, char* dest, int compressedSize, int targetOutputSize, int dstCapacity,
+ size_t prefixSize)
+{
+ dstCapacity = MIN(targetOutputSize, dstCapacity);
+ return LZ4_decompress_generic(source, dest, compressedSize, dstCapacity,
+ partial_decode, noDict,
+ (BYTE*)dest-prefixSize, NULL, 0);
+}
+
+LZ4_FORCE_O2
+int LZ4_decompress_safe_forceExtDict(const char* source, char* dest,
+ int compressedSize, int maxOutputSize,
+ const void* dictStart, size_t dictSize)
+{
+ return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize,
+ decode_full_block, usingExtDict,
+ (BYTE*)dest, (const BYTE*)dictStart, dictSize);
+}
+
+LZ4_FORCE_O2
+int LZ4_decompress_safe_partial_forceExtDict(const char* source, char* dest,
+ int compressedSize, int targetOutputSize, int dstCapacity,
+ const void* dictStart, size_t dictSize)
+{
+ dstCapacity = MIN(targetOutputSize, dstCapacity);
+ return LZ4_decompress_generic(source, dest, compressedSize, dstCapacity,
+ partial_decode, usingExtDict,
+ (BYTE*)dest, (const BYTE*)dictStart, dictSize);
+}
+
+LZ4_FORCE_O2
+static int LZ4_decompress_fast_extDict(const char* source, char* dest, int originalSize,
+ const void* dictStart, size_t dictSize)
+{
+ return LZ4_decompress_unsafe_generic(
+ (const BYTE*)source, (BYTE*)dest, originalSize,
+ 0, (const BYTE*)dictStart, dictSize);
+}
+
+/* The "double dictionary" mode, for use with e.g. ring buffers: the first part
+ * of the dictionary is passed as prefix, and the second via dictStart + dictSize.
+ * These routines are used only once, in LZ4_decompress_*_continue().
+ */
+LZ4_FORCE_INLINE
+int LZ4_decompress_safe_doubleDict(const char* source, char* dest, int compressedSize, int maxOutputSize,
+ size_t prefixSize, const void* dictStart, size_t dictSize)
+{
+ return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize,
+ decode_full_block, usingExtDict,
+ (BYTE*)dest-prefixSize, (const BYTE*)dictStart, dictSize);
+}
+
+/*===== streaming decompression functions =====*/
+
+#if !defined(LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION)
+LZ4_streamDecode_t* LZ4_createStreamDecode(void)
+{
+ LZ4_STATIC_ASSERT(sizeof(LZ4_streamDecode_t) >= sizeof(LZ4_streamDecode_t_internal));
+ return (LZ4_streamDecode_t*) ALLOC_AND_ZERO(sizeof(LZ4_streamDecode_t));
+}
+
+int LZ4_freeStreamDecode (LZ4_streamDecode_t* LZ4_stream)
+{
+ if (LZ4_stream == NULL) { return 0; } /* support free on NULL */
+ FREEMEM(LZ4_stream);
+ return 0;
+}
+#endif
+
+/*! LZ4_setStreamDecode() :
+ * Use this function to instruct where to find the dictionary.
+ * This function is not necessary if previous data is still available where it was decoded.
+ * Loading a size of 0 is allowed (same effect as no dictionary).
+ * @return : 1 if OK, 0 if error
+ */
+int LZ4_setStreamDecode (LZ4_streamDecode_t* LZ4_streamDecode, const char* dictionary, int dictSize)
+{
+ LZ4_streamDecode_t_internal* lz4sd = &LZ4_streamDecode->internal_donotuse;
+ lz4sd->prefixSize = (size_t)dictSize;
+ if (dictSize) {
+ assert(dictionary != NULL);
+ lz4sd->prefixEnd = (const BYTE*) dictionary + dictSize;
+ } else {
+ lz4sd->prefixEnd = (const BYTE*) dictionary;
+ }
+ lz4sd->externalDict = NULL;
+ lz4sd->extDictSize = 0;
+ return 1;
+}
+
+/*! LZ4_decoderRingBufferSize() :
+ * when setting a ring buffer for streaming decompression (optional scenario),
+ * provides the minimum size of this ring buffer
+ * to be compatible with any source respecting maxBlockSize condition.
+ * Note : in a ring buffer scenario,
+ * blocks are presumed decompressed next to each other.
+ * When not enough space remains for next block (remainingSize < maxBlockSize),
+ * decoding resumes from beginning of ring buffer.
+ * @return : minimum ring buffer size,
+ * or 0 if there is an error (invalid maxBlockSize).
+ */
+int LZ4_decoderRingBufferSize(int maxBlockSize)
+{
+ if (maxBlockSize < 0) return 0;
+ if (maxBlockSize > LZ4_MAX_INPUT_SIZE) return 0;
+ if (maxBlockSize < 16) maxBlockSize = 16;
+ return LZ4_DECODER_RING_BUFFER_SIZE(maxBlockSize);
+}
+
+/*
+*_continue() :
+ These decoding functions allow decompression of multiple blocks in "streaming" mode.
+ Previously decoded blocks must still be available at the memory position where they were decoded.
+ If it's not possible, save the relevant part of decoded data into a safe buffer,
+ and indicate where it stands using LZ4_setStreamDecode()
+*/
+LZ4_FORCE_O2
+int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int compressedSize, int maxOutputSize)
+{
+ LZ4_streamDecode_t_internal* lz4sd = &LZ4_streamDecode->internal_donotuse;
+ int result;
+
+ if (lz4sd->prefixSize == 0) {
+ /* The first call, no dictionary yet. */
+ assert(lz4sd->extDictSize == 0);
+ result = LZ4_decompress_safe(source, dest, compressedSize, maxOutputSize);
+ if (result <= 0) return result;
+ lz4sd->prefixSize = (size_t)result;
+ lz4sd->prefixEnd = (BYTE*)dest + result;
+ } else if (lz4sd->prefixEnd == (BYTE*)dest) {
+ /* They're rolling the current segment. */
+ if (lz4sd->prefixSize >= 64 KB - 1)
+ result = LZ4_decompress_safe_withPrefix64k(source, dest, compressedSize, maxOutputSize);
+ else if (lz4sd->extDictSize == 0)
+ result = LZ4_decompress_safe_withSmallPrefix(source, dest, compressedSize, maxOutputSize,
+ lz4sd->prefixSize);
+ else
+ result = LZ4_decompress_safe_doubleDict(source, dest, compressedSize, maxOutputSize,
+ lz4sd->prefixSize, lz4sd->externalDict, lz4sd->extDictSize);
+ if (result <= 0) return result;
+ lz4sd->prefixSize += (size_t)result;
+ lz4sd->prefixEnd += result;
+ } else {
+ /* The buffer wraps around, or they're switching to another buffer. */
+ lz4sd->extDictSize = lz4sd->prefixSize;
+ lz4sd->externalDict = lz4sd->prefixEnd - lz4sd->extDictSize;
+ result = LZ4_decompress_safe_forceExtDict(source, dest, compressedSize, maxOutputSize,
+ lz4sd->externalDict, lz4sd->extDictSize);
+ if (result <= 0) return result;
+ lz4sd->prefixSize = (size_t)result;
+ lz4sd->prefixEnd = (BYTE*)dest + result;
+ }
+
+ return result;
+}
+
+LZ4_FORCE_O2 int
+LZ4_decompress_fast_continue (LZ4_streamDecode_t* LZ4_streamDecode,
+ const char* source, char* dest, int originalSize)
+{
+ LZ4_streamDecode_t_internal* const lz4sd =
+ (assert(LZ4_streamDecode!=NULL), &LZ4_streamDecode->internal_donotuse);
+ int result;
+
+ DEBUGLOG(5, "LZ4_decompress_fast_continue (toDecodeSize=%i)", originalSize);
+ assert(originalSize >= 0);
+
+ if (lz4sd->prefixSize == 0) {
+ DEBUGLOG(5, "first invocation : no prefix nor extDict");
+ assert(lz4sd->extDictSize == 0);
+ result = LZ4_decompress_fast(source, dest, originalSize);
+ if (result <= 0) return result;
+ lz4sd->prefixSize = (size_t)originalSize;
+ lz4sd->prefixEnd = (BYTE*)dest + originalSize;
+ } else if (lz4sd->prefixEnd == (BYTE*)dest) {
+ DEBUGLOG(5, "continue using existing prefix");
+ result = LZ4_decompress_unsafe_generic(
+ (const BYTE*)source, (BYTE*)dest, originalSize,
+ lz4sd->prefixSize,
+ lz4sd->externalDict, lz4sd->extDictSize);
+ if (result <= 0) return result;
+ lz4sd->prefixSize += (size_t)originalSize;
+ lz4sd->prefixEnd += originalSize;
+ } else {
+ DEBUGLOG(5, "prefix becomes extDict");
+ lz4sd->extDictSize = lz4sd->prefixSize;
+ lz4sd->externalDict = lz4sd->prefixEnd - lz4sd->extDictSize;
+ result = LZ4_decompress_fast_extDict(source, dest, originalSize,
+ lz4sd->externalDict, lz4sd->extDictSize);
+ if (result <= 0) return result;
+ lz4sd->prefixSize = (size_t)originalSize;
+ lz4sd->prefixEnd = (BYTE*)dest + originalSize;
+ }
+
+ return result;
+}
+
+
+/*
+Advanced decoding functions :
+*_usingDict() :
+ These decoding functions work the same as "_continue" ones,
+ the dictionary must be explicitly provided within parameters
+*/
+
+int LZ4_decompress_safe_usingDict(const char* source, char* dest, int compressedSize, int maxOutputSize, const char* dictStart, int dictSize)
+{
+ if (dictSize==0)
+ return LZ4_decompress_safe(source, dest, compressedSize, maxOutputSize);
+ if (dictStart+dictSize == dest) {
+ if (dictSize >= 64 KB - 1) {
+ return LZ4_decompress_safe_withPrefix64k(source, dest, compressedSize, maxOutputSize);
+ }
+ assert(dictSize >= 0);
+ return LZ4_decompress_safe_withSmallPrefix(source, dest, compressedSize, maxOutputSize, (size_t)dictSize);
+ }
+ assert(dictSize >= 0);
+ return LZ4_decompress_safe_forceExtDict(source, dest, compressedSize, maxOutputSize, dictStart, (size_t)dictSize);
+}
+
+int LZ4_decompress_safe_partial_usingDict(const char* source, char* dest, int compressedSize, int targetOutputSize, int dstCapacity, const char* dictStart, int dictSize)
+{
+ if (dictSize==0)
+ return LZ4_decompress_safe_partial(source, dest, compressedSize, targetOutputSize, dstCapacity);
+ if (dictStart+dictSize == dest) {
+ if (dictSize >= 64 KB - 1) {
+ return LZ4_decompress_safe_partial_withPrefix64k(source, dest, compressedSize, targetOutputSize, dstCapacity);
+ }
+ assert(dictSize >= 0);
+ return LZ4_decompress_safe_partial_withSmallPrefix(source, dest, compressedSize, targetOutputSize, dstCapacity, (size_t)dictSize);
+ }
+ assert(dictSize >= 0);
+ return LZ4_decompress_safe_partial_forceExtDict(source, dest, compressedSize, targetOutputSize, dstCapacity, dictStart, (size_t)dictSize);
+}
+
+int LZ4_decompress_fast_usingDict(const char* source, char* dest, int originalSize, const char* dictStart, int dictSize)
+{
+ if (dictSize==0 || dictStart+dictSize == dest)
+ return LZ4_decompress_unsafe_generic(
+ (const BYTE*)source, (BYTE*)dest, originalSize,
+ (size_t)dictSize, NULL, 0);
+ assert(dictSize >= 0);
+ return LZ4_decompress_fast_extDict(source, dest, originalSize, dictStart, (size_t)dictSize);
+}
+
+
+/*=*************************************************
+* Obsolete Functions
+***************************************************/
+/* obsolete compression functions */
+int LZ4_compress_limitedOutput(const char* source, char* dest, int inputSize, int maxOutputSize)
+{
+ return LZ4_compress_default(source, dest, inputSize, maxOutputSize);
+}
+int LZ4_compress(const char* src, char* dest, int srcSize)
+{
+ return LZ4_compress_default(src, dest, srcSize, LZ4_compressBound(srcSize));
+}
+int LZ4_compress_limitedOutput_withState (void* state, const char* src, char* dst, int srcSize, int dstSize)
+{
+ return LZ4_compress_fast_extState(state, src, dst, srcSize, dstSize, 1);
+}
+int LZ4_compress_withState (void* state, const char* src, char* dst, int srcSize)
+{
+ return LZ4_compress_fast_extState(state, src, dst, srcSize, LZ4_compressBound(srcSize), 1);
+}
+int LZ4_compress_limitedOutput_continue (LZ4_stream_t* LZ4_stream, const char* src, char* dst, int srcSize, int dstCapacity)
+{
+ return LZ4_compress_fast_continue(LZ4_stream, src, dst, srcSize, dstCapacity, 1);
+}
+int LZ4_compress_continue (LZ4_stream_t* LZ4_stream, const char* source, char* dest, int inputSize)
+{
+ return LZ4_compress_fast_continue(LZ4_stream, source, dest, inputSize, LZ4_compressBound(inputSize), 1);
+}
+
+/*
+These decompression functions are deprecated and should no longer be used.
+They are only provided here for compatibility with older user programs.
+- LZ4_uncompress is totally equivalent to LZ4_decompress_fast
+- LZ4_uncompress_unknownOutputSize is totally equivalent to LZ4_decompress_safe
+*/
+int LZ4_uncompress (const char* source, char* dest, int outputSize)
+{
+ return LZ4_decompress_fast(source, dest, outputSize);
+}
+int LZ4_uncompress_unknownOutputSize (const char* source, char* dest, int isize, int maxOutputSize)
+{
+ return LZ4_decompress_safe(source, dest, isize, maxOutputSize);
+}
+
+/* Obsolete Streaming functions */
+
+int LZ4_sizeofStreamState(void) { return sizeof(LZ4_stream_t); }
+
+int LZ4_resetStreamState(void* state, char* inputBuffer)
+{
+ (void)inputBuffer;
+ LZ4_resetStream((LZ4_stream_t*)state);
+ return 0;
+}
+
+#if !defined(LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION)
+void* LZ4_create (char* inputBuffer)
+{
+ (void)inputBuffer;
+ return LZ4_createStream();
+}
+#endif
+
+char* LZ4_slideInputBuffer (void* state)
+{
+ /* avoid const char * -> char * conversion warning */
+ return (char *)(uptrval)((LZ4_stream_t*)state)->internal_donotuse.dictionary;
+}
+
+#endif /* LZ4_COMMONDEFS_ONLY */
diff --git a/mfbt/lz4/lz4.h b/mfbt/lz4/lz4.h
new file mode 100644
index 0000000000..491c6087c4
--- /dev/null
+++ b/mfbt/lz4/lz4.h
@@ -0,0 +1,842 @@
+/*
+ * LZ4 - Fast LZ compression algorithm
+ * Header File
+ * Copyright (C) 2011-2020, Yann Collet.
+
+ BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are
+ met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above
+ copyright notice, this list of conditions and the following disclaimer
+ in the documentation and/or other materials provided with the
+ distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ You can contact the author at :
+ - LZ4 homepage : http://www.lz4.org
+ - LZ4 source repository : https://github.com/lz4/lz4
+*/
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+#ifndef LZ4_H_2983827168210
+#define LZ4_H_2983827168210
+
+/* --- Dependency --- */
+#include <stddef.h> /* size_t */
+
+
+/**
+ Introduction
+
+ LZ4 is lossless compression algorithm, providing compression speed >500 MB/s per core,
+ scalable with multi-cores CPU. It features an extremely fast decoder, with speed in
+ multiple GB/s per core, typically reaching RAM speed limits on multi-core systems.
+
+ The LZ4 compression library provides in-memory compression and decompression functions.
+ It gives full buffer control to user.
+ Compression can be done in:
+ - a single step (described as Simple Functions)
+ - a single step, reusing a context (described in Advanced Functions)
+ - unbounded multiple steps (described as Streaming compression)
+
+ lz4.h generates and decodes LZ4-compressed blocks (doc/lz4_Block_format.md).
+ Decompressing such a compressed block requires additional metadata.
+ Exact metadata depends on exact decompression function.
+ For the typical case of LZ4_decompress_safe(),
+ metadata includes block's compressed size, and maximum bound of decompressed size.
+ Each application is free to encode and pass such metadata in whichever way it wants.
+
+ lz4.h only handle blocks, it can not generate Frames.
+
+ Blocks are different from Frames (doc/lz4_Frame_format.md).
+ Frames bundle both blocks and metadata in a specified manner.
+ Embedding metadata is required for compressed data to be self-contained and portable.
+ Frame format is delivered through a companion API, declared in lz4frame.h.
+ The `lz4` CLI can only manage frames.
+*/
+
+/*^***************************************************************
+* Export parameters
+*****************************************************************/
+/*
+* LZ4_DLL_EXPORT :
+* Enable exporting of functions when building a Windows DLL
+* LZ4LIB_VISIBILITY :
+* Control library symbols visibility.
+*/
+#ifndef LZ4LIB_VISIBILITY
+# if defined(__GNUC__) && (__GNUC__ >= 4)
+# define LZ4LIB_VISIBILITY __attribute__ ((visibility ("default")))
+# else
+# define LZ4LIB_VISIBILITY
+# endif
+#endif
+#if defined(LZ4_DLL_EXPORT) && (LZ4_DLL_EXPORT==1)
+# define LZ4LIB_API __declspec(dllexport) LZ4LIB_VISIBILITY
+#elif defined(LZ4_DLL_IMPORT) && (LZ4_DLL_IMPORT==1)
+# define LZ4LIB_API __declspec(dllimport) LZ4LIB_VISIBILITY /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/
+#else
+# define LZ4LIB_API LZ4LIB_VISIBILITY
+#endif
+
+/*! LZ4_FREESTANDING :
+ * When this macro is set to 1, it enables "freestanding mode" that is
+ * suitable for typical freestanding environment which doesn't support
+ * standard C library.
+ *
+ * - LZ4_FREESTANDING is a compile-time switch.
+ * - It requires the following macros to be defined:
+ * LZ4_memcpy, LZ4_memmove, LZ4_memset.
+ * - It only enables LZ4/HC functions which don't use heap.
+ * All LZ4F_* functions are not supported.
+ * - See tests/freestanding.c to check its basic setup.
+ */
+#if defined(LZ4_FREESTANDING) && (LZ4_FREESTANDING == 1)
+# define LZ4_HEAPMODE 0
+# define LZ4HC_HEAPMODE 0
+# define LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION 1
+# if !defined(LZ4_memcpy)
+# error "LZ4_FREESTANDING requires macro 'LZ4_memcpy'."
+# endif
+# if !defined(LZ4_memset)
+# error "LZ4_FREESTANDING requires macro 'LZ4_memset'."
+# endif
+# if !defined(LZ4_memmove)
+# error "LZ4_FREESTANDING requires macro 'LZ4_memmove'."
+# endif
+#elif ! defined(LZ4_FREESTANDING)
+# define LZ4_FREESTANDING 0
+#endif
+
+
+/*------ Version ------*/
+#define LZ4_VERSION_MAJOR 1 /* for breaking interface changes */
+#define LZ4_VERSION_MINOR 9 /* for new (non-breaking) interface capabilities */
+#define LZ4_VERSION_RELEASE 4 /* for tweaks, bug-fixes, or development */
+
+#define LZ4_VERSION_NUMBER (LZ4_VERSION_MAJOR *100*100 + LZ4_VERSION_MINOR *100 + LZ4_VERSION_RELEASE)
+
+#define LZ4_LIB_VERSION LZ4_VERSION_MAJOR.LZ4_VERSION_MINOR.LZ4_VERSION_RELEASE
+#define LZ4_QUOTE(str) #str
+#define LZ4_EXPAND_AND_QUOTE(str) LZ4_QUOTE(str)
+#define LZ4_VERSION_STRING LZ4_EXPAND_AND_QUOTE(LZ4_LIB_VERSION) /* requires v1.7.3+ */
+
+LZ4LIB_API int LZ4_versionNumber (void); /**< library version number; useful to check dll version; requires v1.3.0+ */
+LZ4LIB_API const char* LZ4_versionString (void); /**< library version string; useful to check dll version; requires v1.7.5+ */
+
+
+/*-************************************
+* Tuning parameter
+**************************************/
+#define LZ4_MEMORY_USAGE_MIN 10
+#define LZ4_MEMORY_USAGE_DEFAULT 14
+#define LZ4_MEMORY_USAGE_MAX 20
+
+/*!
+ * LZ4_MEMORY_USAGE :
+ * Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; )
+ * Increasing memory usage improves compression ratio, at the cost of speed.
+ * Reduced memory usage may improve speed at the cost of ratio, thanks to better cache locality.
+ * Default value is 14, for 16KB, which nicely fits into Intel x86 L1 cache
+ */
+#ifndef LZ4_MEMORY_USAGE
+# define LZ4_MEMORY_USAGE LZ4_MEMORY_USAGE_DEFAULT
+#endif
+
+#if (LZ4_MEMORY_USAGE < LZ4_MEMORY_USAGE_MIN)
+# error "LZ4_MEMORY_USAGE is too small !"
+#endif
+
+#if (LZ4_MEMORY_USAGE > LZ4_MEMORY_USAGE_MAX)
+# error "LZ4_MEMORY_USAGE is too large !"
+#endif
+
+/*-************************************
+* Simple Functions
+**************************************/
+/*! LZ4_compress_default() :
+ * Compresses 'srcSize' bytes from buffer 'src'
+ * into already allocated 'dst' buffer of size 'dstCapacity'.
+ * Compression is guaranteed to succeed if 'dstCapacity' >= LZ4_compressBound(srcSize).
+ * It also runs faster, so it's a recommended setting.
+ * If the function cannot compress 'src' into a more limited 'dst' budget,
+ * compression stops *immediately*, and the function result is zero.
+ * In which case, 'dst' content is undefined (invalid).
+ * srcSize : max supported value is LZ4_MAX_INPUT_SIZE.
+ * dstCapacity : size of buffer 'dst' (which must be already allocated)
+ * @return : the number of bytes written into buffer 'dst' (necessarily <= dstCapacity)
+ * or 0 if compression fails
+ * Note : This function is protected against buffer overflow scenarios (never writes outside 'dst' buffer, nor read outside 'source' buffer).
+ */
+LZ4LIB_API int LZ4_compress_default(const char* src, char* dst, int srcSize, int dstCapacity);
+
+/*! LZ4_decompress_safe() :
+ * compressedSize : is the exact complete size of the compressed block.
+ * dstCapacity : is the size of destination buffer (which must be already allocated), presumed an upper bound of decompressed size.
+ * @return : the number of bytes decompressed into destination buffer (necessarily <= dstCapacity)
+ * If destination buffer is not large enough, decoding will stop and output an error code (negative value).
+ * If the source stream is detected malformed, the function will stop decoding and return a negative result.
+ * Note 1 : This function is protected against malicious data packets :
+ * it will never writes outside 'dst' buffer, nor read outside 'source' buffer,
+ * even if the compressed block is maliciously modified to order the decoder to do these actions.
+ * In such case, the decoder stops immediately, and considers the compressed block malformed.
+ * Note 2 : compressedSize and dstCapacity must be provided to the function, the compressed block does not contain them.
+ * The implementation is free to send / store / derive this information in whichever way is most beneficial.
+ * If there is a need for a different format which bundles together both compressed data and its metadata, consider looking at lz4frame.h instead.
+ */
+LZ4LIB_API int LZ4_decompress_safe (const char* src, char* dst, int compressedSize, int dstCapacity);
+
+
+/*-************************************
+* Advanced Functions
+**************************************/
+#define LZ4_MAX_INPUT_SIZE 0x7E000000 /* 2 113 929 216 bytes */
+#define LZ4_COMPRESSBOUND(isize) ((unsigned)(isize) > (unsigned)LZ4_MAX_INPUT_SIZE ? 0 : (isize) + ((isize)/255) + 16)
+
+/*! LZ4_compressBound() :
+ Provides the maximum size that LZ4 compression may output in a "worst case" scenario (input data not compressible)
+ This function is primarily useful for memory allocation purposes (destination buffer size).
+ Macro LZ4_COMPRESSBOUND() is also provided for compilation-time evaluation (stack memory allocation for example).
+ Note that LZ4_compress_default() compresses faster when dstCapacity is >= LZ4_compressBound(srcSize)
+ inputSize : max supported value is LZ4_MAX_INPUT_SIZE
+ return : maximum output size in a "worst case" scenario
+ or 0, if input size is incorrect (too large or negative)
+*/
+LZ4LIB_API int LZ4_compressBound(int inputSize);
+
+/*! LZ4_compress_fast() :
+ Same as LZ4_compress_default(), but allows selection of "acceleration" factor.
+ The larger the acceleration value, the faster the algorithm, but also the lesser the compression.
+ It's a trade-off. It can be fine tuned, with each successive value providing roughly +~3% to speed.
+ An acceleration value of "1" is the same as regular LZ4_compress_default()
+ Values <= 0 will be replaced by LZ4_ACCELERATION_DEFAULT (currently == 1, see lz4.c).
+ Values > LZ4_ACCELERATION_MAX will be replaced by LZ4_ACCELERATION_MAX (currently == 65537, see lz4.c).
+*/
+LZ4LIB_API int LZ4_compress_fast (const char* src, char* dst, int srcSize, int dstCapacity, int acceleration);
+
+
+/*! LZ4_compress_fast_extState() :
+ * Same as LZ4_compress_fast(), using an externally allocated memory space for its state.
+ * Use LZ4_sizeofState() to know how much memory must be allocated,
+ * and allocate it on 8-bytes boundaries (using `malloc()` typically).
+ * Then, provide this buffer as `void* state` to compression function.
+ */
+LZ4LIB_API int LZ4_sizeofState(void);
+LZ4LIB_API int LZ4_compress_fast_extState (void* state, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration);
+
+
+/*! LZ4_compress_destSize() :
+ * Reverse the logic : compresses as much data as possible from 'src' buffer
+ * into already allocated buffer 'dst', of size >= 'targetDestSize'.
+ * This function either compresses the entire 'src' content into 'dst' if it's large enough,
+ * or fill 'dst' buffer completely with as much data as possible from 'src'.
+ * note: acceleration parameter is fixed to "default".
+ *
+ * *srcSizePtr : will be modified to indicate how many bytes where read from 'src' to fill 'dst'.
+ * New value is necessarily <= input value.
+ * @return : Nb bytes written into 'dst' (necessarily <= targetDestSize)
+ * or 0 if compression fails.
+ *
+ * Note : from v1.8.2 to v1.9.1, this function had a bug (fixed un v1.9.2+):
+ * the produced compressed content could, in specific circumstances,
+ * require to be decompressed into a destination buffer larger
+ * by at least 1 byte than the content to decompress.
+ * If an application uses `LZ4_compress_destSize()`,
+ * it's highly recommended to update liblz4 to v1.9.2 or better.
+ * If this can't be done or ensured,
+ * the receiving decompression function should provide
+ * a dstCapacity which is > decompressedSize, by at least 1 byte.
+ * See https://github.com/lz4/lz4/issues/859 for details
+ */
+LZ4LIB_API int LZ4_compress_destSize (const char* src, char* dst, int* srcSizePtr, int targetDstSize);
+
+
+/*! LZ4_decompress_safe_partial() :
+ * Decompress an LZ4 compressed block, of size 'srcSize' at position 'src',
+ * into destination buffer 'dst' of size 'dstCapacity'.
+ * Up to 'targetOutputSize' bytes will be decoded.
+ * The function stops decoding on reaching this objective.
+ * This can be useful to boost performance
+ * whenever only the beginning of a block is required.
+ *
+ * @return : the number of bytes decoded in `dst` (necessarily <= targetOutputSize)
+ * If source stream is detected malformed, function returns a negative result.
+ *
+ * Note 1 : @return can be < targetOutputSize, if compressed block contains less data.
+ *
+ * Note 2 : targetOutputSize must be <= dstCapacity
+ *
+ * Note 3 : this function effectively stops decoding on reaching targetOutputSize,
+ * so dstCapacity is kind of redundant.
+ * This is because in older versions of this function,
+ * decoding operation would still write complete sequences.
+ * Therefore, there was no guarantee that it would stop writing at exactly targetOutputSize,
+ * it could write more bytes, though only up to dstCapacity.
+ * Some "margin" used to be required for this operation to work properly.
+ * Thankfully, this is no longer necessary.
+ * The function nonetheless keeps the same signature, in an effort to preserve API compatibility.
+ *
+ * Note 4 : If srcSize is the exact size of the block,
+ * then targetOutputSize can be any value,
+ * including larger than the block's decompressed size.
+ * The function will, at most, generate block's decompressed size.
+ *
+ * Note 5 : If srcSize is _larger_ than block's compressed size,
+ * then targetOutputSize **MUST** be <= block's decompressed size.
+ * Otherwise, *silent corruption will occur*.
+ */
+LZ4LIB_API int LZ4_decompress_safe_partial (const char* src, char* dst, int srcSize, int targetOutputSize, int dstCapacity);
+
+
+/*-*********************************************
+* Streaming Compression Functions
+***********************************************/
+typedef union LZ4_stream_u LZ4_stream_t; /* incomplete type (defined later) */
+
+/**
+ Note about RC_INVOKED
+
+ - RC_INVOKED is predefined symbol of rc.exe (the resource compiler which is part of MSVC/Visual Studio).
+ https://docs.microsoft.com/en-us/windows/win32/menurc/predefined-macros
+
+ - Since rc.exe is a legacy compiler, it truncates long symbol (> 30 chars)
+ and reports warning "RC4011: identifier truncated".
+
+ - To eliminate the warning, we surround long preprocessor symbol with
+ "#if !defined(RC_INVOKED) ... #endif" block that means
+ "skip this block when rc.exe is trying to read it".
+*/
+#if !defined(RC_INVOKED) /* https://docs.microsoft.com/en-us/windows/win32/menurc/predefined-macros */
+#if !defined(LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION)
+LZ4LIB_API LZ4_stream_t* LZ4_createStream(void);
+LZ4LIB_API int LZ4_freeStream (LZ4_stream_t* streamPtr);
+#endif /* !defined(LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION) */
+#endif
+
+/*! LZ4_resetStream_fast() : v1.9.0+
+ * Use this to prepare an LZ4_stream_t for a new chain of dependent blocks
+ * (e.g., LZ4_compress_fast_continue()).
+ *
+ * An LZ4_stream_t must be initialized once before usage.
+ * This is automatically done when created by LZ4_createStream().
+ * However, should the LZ4_stream_t be simply declared on stack (for example),
+ * it's necessary to initialize it first, using LZ4_initStream().
+ *
+ * After init, start any new stream with LZ4_resetStream_fast().
+ * A same LZ4_stream_t can be re-used multiple times consecutively
+ * and compress multiple streams,
+ * provided that it starts each new stream with LZ4_resetStream_fast().
+ *
+ * LZ4_resetStream_fast() is much faster than LZ4_initStream(),
+ * but is not compatible with memory regions containing garbage data.
+ *
+ * Note: it's only useful to call LZ4_resetStream_fast()
+ * in the context of streaming compression.
+ * The *extState* functions perform their own resets.
+ * Invoking LZ4_resetStream_fast() before is redundant, and even counterproductive.
+ */
+LZ4LIB_API void LZ4_resetStream_fast (LZ4_stream_t* streamPtr);
+
+/*! LZ4_loadDict() :
+ * Use this function to reference a static dictionary into LZ4_stream_t.
+ * The dictionary must remain available during compression.
+ * LZ4_loadDict() triggers a reset, so any previous data will be forgotten.
+ * The same dictionary will have to be loaded on decompression side for successful decoding.
+ * Dictionary are useful for better compression of small data (KB range).
+ * While LZ4 accept any input as dictionary,
+ * results are generally better when using Zstandard's Dictionary Builder.
+ * Loading a size of 0 is allowed, and is the same as reset.
+ * @return : loaded dictionary size, in bytes (necessarily <= 64 KB)
+ */
+LZ4LIB_API int LZ4_loadDict (LZ4_stream_t* streamPtr, const char* dictionary, int dictSize);
+
+/*! LZ4_compress_fast_continue() :
+ * Compress 'src' content using data from previously compressed blocks, for better compression ratio.
+ * 'dst' buffer must be already allocated.
+ * If dstCapacity >= LZ4_compressBound(srcSize), compression is guaranteed to succeed, and runs faster.
+ *
+ * @return : size of compressed block
+ * or 0 if there is an error (typically, cannot fit into 'dst').
+ *
+ * Note 1 : Each invocation to LZ4_compress_fast_continue() generates a new block.
+ * Each block has precise boundaries.
+ * Each block must be decompressed separately, calling LZ4_decompress_*() with relevant metadata.
+ * It's not possible to append blocks together and expect a single invocation of LZ4_decompress_*() to decompress them together.
+ *
+ * Note 2 : The previous 64KB of source data is __assumed__ to remain present, unmodified, at same address in memory !
+ *
+ * Note 3 : When input is structured as a double-buffer, each buffer can have any size, including < 64 KB.
+ * Make sure that buffers are separated, by at least one byte.
+ * This construction ensures that each block only depends on previous block.
+ *
+ * Note 4 : If input buffer is a ring-buffer, it can have any size, including < 64 KB.
+ *
+ * Note 5 : After an error, the stream status is undefined (invalid), it can only be reset or freed.
+ */
+LZ4LIB_API int LZ4_compress_fast_continue (LZ4_stream_t* streamPtr, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration);
+
+/*! LZ4_saveDict() :
+ * If last 64KB data cannot be guaranteed to remain available at its current memory location,
+ * save it into a safer place (char* safeBuffer).
+ * This is schematically equivalent to a memcpy() followed by LZ4_loadDict(),
+ * but is much faster, because LZ4_saveDict() doesn't need to rebuild tables.
+ * @return : saved dictionary size in bytes (necessarily <= maxDictSize), or 0 if error.
+ */
+LZ4LIB_API int LZ4_saveDict (LZ4_stream_t* streamPtr, char* safeBuffer, int maxDictSize);
+
+
+/*-**********************************************
+* Streaming Decompression Functions
+* Bufferless synchronous API
+************************************************/
+typedef union LZ4_streamDecode_u LZ4_streamDecode_t; /* tracking context */
+
+/*! LZ4_createStreamDecode() and LZ4_freeStreamDecode() :
+ * creation / destruction of streaming decompression tracking context.
+ * A tracking context can be re-used multiple times.
+ */
+#if !defined(RC_INVOKED) /* https://docs.microsoft.com/en-us/windows/win32/menurc/predefined-macros */
+#if !defined(LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION)
+LZ4LIB_API LZ4_streamDecode_t* LZ4_createStreamDecode(void);
+LZ4LIB_API int LZ4_freeStreamDecode (LZ4_streamDecode_t* LZ4_stream);
+#endif /* !defined(LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION) */
+#endif
+
+/*! LZ4_setStreamDecode() :
+ * An LZ4_streamDecode_t context can be allocated once and re-used multiple times.
+ * Use this function to start decompression of a new stream of blocks.
+ * A dictionary can optionally be set. Use NULL or size 0 for a reset order.
+ * Dictionary is presumed stable : it must remain accessible and unmodified during next decompression.
+ * @return : 1 if OK, 0 if error
+ */
+LZ4LIB_API int LZ4_setStreamDecode (LZ4_streamDecode_t* LZ4_streamDecode, const char* dictionary, int dictSize);
+
+/*! LZ4_decoderRingBufferSize() : v1.8.2+
+ * Note : in a ring buffer scenario (optional),
+ * blocks are presumed decompressed next to each other
+ * up to the moment there is not enough remaining space for next block (remainingSize < maxBlockSize),
+ * at which stage it resumes from beginning of ring buffer.
+ * When setting such a ring buffer for streaming decompression,
+ * provides the minimum size of this ring buffer
+ * to be compatible with any source respecting maxBlockSize condition.
+ * @return : minimum ring buffer size,
+ * or 0 if there is an error (invalid maxBlockSize).
+ */
+LZ4LIB_API int LZ4_decoderRingBufferSize(int maxBlockSize);
+#define LZ4_DECODER_RING_BUFFER_SIZE(maxBlockSize) (65536 + 14 + (maxBlockSize)) /* for static allocation; maxBlockSize presumed valid */
+
+/*! LZ4_decompress_*_continue() :
+ * These decoding functions allow decompression of consecutive blocks in "streaming" mode.
+ * A block is an unsplittable entity, it must be presented entirely to a decompression function.
+ * Decompression functions only accepts one block at a time.
+ * The last 64KB of previously decoded data *must* remain available and unmodified at the memory position where they were decoded.
+ * If less than 64KB of data has been decoded, all the data must be present.
+ *
+ * Special : if decompression side sets a ring buffer, it must respect one of the following conditions :
+ * - Decompression buffer size is _at least_ LZ4_decoderRingBufferSize(maxBlockSize).
+ * maxBlockSize is the maximum size of any single block. It can have any value > 16 bytes.
+ * In which case, encoding and decoding buffers do not need to be synchronized.
+ * Actually, data can be produced by any source compliant with LZ4 format specification, and respecting maxBlockSize.
+ * - Synchronized mode :
+ * Decompression buffer size is _exactly_ the same as compression buffer size,
+ * and follows exactly same update rule (block boundaries at same positions),
+ * and decoding function is provided with exact decompressed size of each block (exception for last block of the stream),
+ * _then_ decoding & encoding ring buffer can have any size, including small ones ( < 64 KB).
+ * - Decompression buffer is larger than encoding buffer, by a minimum of maxBlockSize more bytes.
+ * In which case, encoding and decoding buffers do not need to be synchronized,
+ * and encoding ring buffer can have any size, including small ones ( < 64 KB).
+ *
+ * Whenever these conditions are not possible,
+ * save the last 64KB of decoded data into a safe buffer where it can't be modified during decompression,
+ * then indicate where this data is saved using LZ4_setStreamDecode(), before decompressing next block.
+*/
+LZ4LIB_API int
+LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode,
+ const char* src, char* dst,
+ int srcSize, int dstCapacity);
+
+
+/*! LZ4_decompress_*_usingDict() :
+ * These decoding functions work the same as
+ * a combination of LZ4_setStreamDecode() followed by LZ4_decompress_*_continue()
+ * They are stand-alone, and don't need an LZ4_streamDecode_t structure.
+ * Dictionary is presumed stable : it must remain accessible and unmodified during decompression.
+ * Performance tip : Decompression speed can be substantially increased
+ * when dst == dictStart + dictSize.
+ */
+LZ4LIB_API int
+LZ4_decompress_safe_usingDict(const char* src, char* dst,
+ int srcSize, int dstCapacity,
+ const char* dictStart, int dictSize);
+
+LZ4LIB_API int
+LZ4_decompress_safe_partial_usingDict(const char* src, char* dst,
+ int compressedSize,
+ int targetOutputSize, int maxOutputSize,
+ const char* dictStart, int dictSize);
+
+#endif /* LZ4_H_2983827168210 */
+
+
+/*^*************************************
+ * !!!!!! STATIC LINKING ONLY !!!!!!
+ ***************************************/
+
+/*-****************************************************************************
+ * Experimental section
+ *
+ * Symbols declared in this section must be considered unstable. Their
+ * signatures or semantics may change, or they may be removed altogether in the
+ * future. They are therefore only safe to depend on when the caller is
+ * statically linked against the library.
+ *
+ * To protect against unsafe usage, not only are the declarations guarded,
+ * the definitions are hidden by default
+ * when building LZ4 as a shared/dynamic library.
+ *
+ * In order to access these declarations,
+ * define LZ4_STATIC_LINKING_ONLY in your application
+ * before including LZ4's headers.
+ *
+ * In order to make their implementations accessible dynamically, you must
+ * define LZ4_PUBLISH_STATIC_FUNCTIONS when building the LZ4 library.
+ ******************************************************************************/
+
+#ifdef LZ4_STATIC_LINKING_ONLY
+
+#ifndef LZ4_STATIC_3504398509
+#define LZ4_STATIC_3504398509
+
+#ifdef LZ4_PUBLISH_STATIC_FUNCTIONS
+#define LZ4LIB_STATIC_API LZ4LIB_API
+#else
+#define LZ4LIB_STATIC_API
+#endif
+
+
+/*! LZ4_compress_fast_extState_fastReset() :
+ * A variant of LZ4_compress_fast_extState().
+ *
+ * Using this variant avoids an expensive initialization step.
+ * It is only safe to call if the state buffer is known to be correctly initialized already
+ * (see above comment on LZ4_resetStream_fast() for a definition of "correctly initialized").
+ * From a high level, the difference is that
+ * this function initializes the provided state with a call to something like LZ4_resetStream_fast()
+ * while LZ4_compress_fast_extState() starts with a call to LZ4_resetStream().
+ */
+LZ4LIB_STATIC_API int LZ4_compress_fast_extState_fastReset (void* state, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration);
+
+/*! LZ4_attach_dictionary() :
+ * This is an experimental API that allows
+ * efficient use of a static dictionary many times.
+ *
+ * Rather than re-loading the dictionary buffer into a working context before
+ * each compression, or copying a pre-loaded dictionary's LZ4_stream_t into a
+ * working LZ4_stream_t, this function introduces a no-copy setup mechanism,
+ * in which the working stream references the dictionary stream in-place.
+ *
+ * Several assumptions are made about the state of the dictionary stream.
+ * Currently, only streams which have been prepared by LZ4_loadDict() should
+ * be expected to work.
+ *
+ * Alternatively, the provided dictionaryStream may be NULL,
+ * in which case any existing dictionary stream is unset.
+ *
+ * If a dictionary is provided, it replaces any pre-existing stream history.
+ * The dictionary contents are the only history that can be referenced and
+ * logically immediately precede the data compressed in the first subsequent
+ * compression call.
+ *
+ * The dictionary will only remain attached to the working stream through the
+ * first compression call, at the end of which it is cleared. The dictionary
+ * stream (and source buffer) must remain in-place / accessible / unchanged
+ * through the completion of the first compression call on the stream.
+ */
+LZ4LIB_STATIC_API void
+LZ4_attach_dictionary(LZ4_stream_t* workingStream,
+ const LZ4_stream_t* dictionaryStream);
+
+
+/*! In-place compression and decompression
+ *
+ * It's possible to have input and output sharing the same buffer,
+ * for highly constrained memory environments.
+ * In both cases, it requires input to lay at the end of the buffer,
+ * and decompression to start at beginning of the buffer.
+ * Buffer size must feature some margin, hence be larger than final size.
+ *
+ * |<------------------------buffer--------------------------------->|
+ * |<-----------compressed data--------->|
+ * |<-----------decompressed size------------------>|
+ * |<----margin---->|
+ *
+ * This technique is more useful for decompression,
+ * since decompressed size is typically larger,
+ * and margin is short.
+ *
+ * In-place decompression will work inside any buffer
+ * which size is >= LZ4_DECOMPRESS_INPLACE_BUFFER_SIZE(decompressedSize).
+ * This presumes that decompressedSize > compressedSize.
+ * Otherwise, it means compression actually expanded data,
+ * and it would be more efficient to store such data with a flag indicating it's not compressed.
+ * This can happen when data is not compressible (already compressed, or encrypted).
+ *
+ * For in-place compression, margin is larger, as it must be able to cope with both
+ * history preservation, requiring input data to remain unmodified up to LZ4_DISTANCE_MAX,
+ * and data expansion, which can happen when input is not compressible.
+ * As a consequence, buffer size requirements are much higher,
+ * and memory savings offered by in-place compression are more limited.
+ *
+ * There are ways to limit this cost for compression :
+ * - Reduce history size, by modifying LZ4_DISTANCE_MAX.
+ * Note that it is a compile-time constant, so all compressions will apply this limit.
+ * Lower values will reduce compression ratio, except when input_size < LZ4_DISTANCE_MAX,
+ * so it's a reasonable trick when inputs are known to be small.
+ * - Require the compressor to deliver a "maximum compressed size".
+ * This is the `dstCapacity` parameter in `LZ4_compress*()`.
+ * When this size is < LZ4_COMPRESSBOUND(inputSize), then compression can fail,
+ * in which case, the return code will be 0 (zero).
+ * The caller must be ready for these cases to happen,
+ * and typically design a backup scheme to send data uncompressed.
+ * The combination of both techniques can significantly reduce
+ * the amount of margin required for in-place compression.
+ *
+ * In-place compression can work in any buffer
+ * which size is >= (maxCompressedSize)
+ * with maxCompressedSize == LZ4_COMPRESSBOUND(srcSize) for guaranteed compression success.
+ * LZ4_COMPRESS_INPLACE_BUFFER_SIZE() depends on both maxCompressedSize and LZ4_DISTANCE_MAX,
+ * so it's possible to reduce memory requirements by playing with them.
+ */
+
+#define LZ4_DECOMPRESS_INPLACE_MARGIN(compressedSize) (((compressedSize) >> 8) + 32)
+#define LZ4_DECOMPRESS_INPLACE_BUFFER_SIZE(decompressedSize) ((decompressedSize) + LZ4_DECOMPRESS_INPLACE_MARGIN(decompressedSize)) /**< note: presumes that compressedSize < decompressedSize. note2: margin is overestimated a bit, since it could use compressedSize instead */
+
+#ifndef LZ4_DISTANCE_MAX /* history window size; can be user-defined at compile time */
+# define LZ4_DISTANCE_MAX 65535 /* set to maximum value by default */
+#endif
+
+#define LZ4_COMPRESS_INPLACE_MARGIN (LZ4_DISTANCE_MAX + 32) /* LZ4_DISTANCE_MAX can be safely replaced by srcSize when it's smaller */
+#define LZ4_COMPRESS_INPLACE_BUFFER_SIZE(maxCompressedSize) ((maxCompressedSize) + LZ4_COMPRESS_INPLACE_MARGIN) /**< maxCompressedSize is generally LZ4_COMPRESSBOUND(inputSize), but can be set to any lower value, with the risk that compression can fail (return code 0(zero)) */
+
+#endif /* LZ4_STATIC_3504398509 */
+#endif /* LZ4_STATIC_LINKING_ONLY */
+
+
+
+#ifndef LZ4_H_98237428734687
+#define LZ4_H_98237428734687
+
+/*-************************************************************
+ * Private Definitions
+ **************************************************************
+ * Do not use these definitions directly.
+ * They are only exposed to allow static allocation of `LZ4_stream_t` and `LZ4_streamDecode_t`.
+ * Accessing members will expose user code to API and/or ABI break in future versions of the library.
+ **************************************************************/
+#define LZ4_HASHLOG (LZ4_MEMORY_USAGE-2)
+#define LZ4_HASHTABLESIZE (1 << LZ4_MEMORY_USAGE)
+#define LZ4_HASH_SIZE_U32 (1 << LZ4_HASHLOG) /* required as macro for static allocation */
+
+#if defined(__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+# include <stdint.h>
+ typedef int8_t LZ4_i8;
+ typedef uint8_t LZ4_byte;
+ typedef uint16_t LZ4_u16;
+ typedef uint32_t LZ4_u32;
+#else
+ typedef signed char LZ4_i8;
+ typedef unsigned char LZ4_byte;
+ typedef unsigned short LZ4_u16;
+ typedef unsigned int LZ4_u32;
+#endif
+
+/*! LZ4_stream_t :
+ * Never ever use below internal definitions directly !
+ * These definitions are not API/ABI safe, and may change in future versions.
+ * If you need static allocation, declare or allocate an LZ4_stream_t object.
+**/
+
+typedef struct LZ4_stream_t_internal LZ4_stream_t_internal;
+struct LZ4_stream_t_internal {
+ LZ4_u32 hashTable[LZ4_HASH_SIZE_U32];
+ const LZ4_byte* dictionary;
+ const LZ4_stream_t_internal* dictCtx;
+ LZ4_u32 currentOffset;
+ LZ4_u32 tableType;
+ LZ4_u32 dictSize;
+ /* Implicit padding to ensure structure is aligned */
+};
+
+#define LZ4_STREAM_MINSIZE ((1UL << LZ4_MEMORY_USAGE) + 32) /* static size, for inter-version compatibility */
+union LZ4_stream_u {
+ char minStateSize[LZ4_STREAM_MINSIZE];
+ LZ4_stream_t_internal internal_donotuse;
+}; /* previously typedef'd to LZ4_stream_t */
+
+
+/*! LZ4_initStream() : v1.9.0+
+ * An LZ4_stream_t structure must be initialized at least once.
+ * This is automatically done when invoking LZ4_createStream(),
+ * but it's not when the structure is simply declared on stack (for example).
+ *
+ * Use LZ4_initStream() to properly initialize a newly declared LZ4_stream_t.
+ * It can also initialize any arbitrary buffer of sufficient size,
+ * and will @return a pointer of proper type upon initialization.
+ *
+ * Note : initialization fails if size and alignment conditions are not respected.
+ * In which case, the function will @return NULL.
+ * Note2: An LZ4_stream_t structure guarantees correct alignment and size.
+ * Note3: Before v1.9.0, use LZ4_resetStream() instead
+**/
+LZ4LIB_API LZ4_stream_t* LZ4_initStream (void* buffer, size_t size);
+
+
+/*! LZ4_streamDecode_t :
+ * Never ever use below internal definitions directly !
+ * These definitions are not API/ABI safe, and may change in future versions.
+ * If you need static allocation, declare or allocate an LZ4_streamDecode_t object.
+**/
+typedef struct {
+ const LZ4_byte* externalDict;
+ const LZ4_byte* prefixEnd;
+ size_t extDictSize;
+ size_t prefixSize;
+} LZ4_streamDecode_t_internal;
+
+#define LZ4_STREAMDECODE_MINSIZE 32
+union LZ4_streamDecode_u {
+ char minStateSize[LZ4_STREAMDECODE_MINSIZE];
+ LZ4_streamDecode_t_internal internal_donotuse;
+} ; /* previously typedef'd to LZ4_streamDecode_t */
+
+
+
+/*-************************************
+* Obsolete Functions
+**************************************/
+
+/*! Deprecation warnings
+ *
+ * Deprecated functions make the compiler generate a warning when invoked.
+ * This is meant to invite users to update their source code.
+ * Should deprecation warnings be a problem, it is generally possible to disable them,
+ * typically with -Wno-deprecated-declarations for gcc
+ * or _CRT_SECURE_NO_WARNINGS in Visual.
+ *
+ * Another method is to define LZ4_DISABLE_DEPRECATE_WARNINGS
+ * before including the header file.
+ */
+#ifdef LZ4_DISABLE_DEPRECATE_WARNINGS
+# define LZ4_DEPRECATED(message) /* disable deprecation warnings */
+#else
+# if defined (__cplusplus) && (__cplusplus >= 201402) /* C++14 or greater */
+# define LZ4_DEPRECATED(message) [[deprecated(message)]]
+# elif defined(_MSC_VER)
+# define LZ4_DEPRECATED(message) __declspec(deprecated(message))
+# elif defined(__clang__) || (defined(__GNUC__) && (__GNUC__ * 10 + __GNUC_MINOR__ >= 45))
+# define LZ4_DEPRECATED(message) __attribute__((deprecated(message)))
+# elif defined(__GNUC__) && (__GNUC__ * 10 + __GNUC_MINOR__ >= 31)
+# define LZ4_DEPRECATED(message) __attribute__((deprecated))
+# else
+# pragma message("WARNING: LZ4_DEPRECATED needs custom implementation for this compiler")
+# define LZ4_DEPRECATED(message) /* disabled */
+# endif
+#endif /* LZ4_DISABLE_DEPRECATE_WARNINGS */
+
+/*! Obsolete compression functions (since v1.7.3) */
+LZ4_DEPRECATED("use LZ4_compress_default() instead") LZ4LIB_API int LZ4_compress (const char* src, char* dest, int srcSize);
+LZ4_DEPRECATED("use LZ4_compress_default() instead") LZ4LIB_API int LZ4_compress_limitedOutput (const char* src, char* dest, int srcSize, int maxOutputSize);
+LZ4_DEPRECATED("use LZ4_compress_fast_extState() instead") LZ4LIB_API int LZ4_compress_withState (void* state, const char* source, char* dest, int inputSize);
+LZ4_DEPRECATED("use LZ4_compress_fast_extState() instead") LZ4LIB_API int LZ4_compress_limitedOutput_withState (void* state, const char* source, char* dest, int inputSize, int maxOutputSize);
+LZ4_DEPRECATED("use LZ4_compress_fast_continue() instead") LZ4LIB_API int LZ4_compress_continue (LZ4_stream_t* LZ4_streamPtr, const char* source, char* dest, int inputSize);
+LZ4_DEPRECATED("use LZ4_compress_fast_continue() instead") LZ4LIB_API int LZ4_compress_limitedOutput_continue (LZ4_stream_t* LZ4_streamPtr, const char* source, char* dest, int inputSize, int maxOutputSize);
+
+/*! Obsolete decompression functions (since v1.8.0) */
+LZ4_DEPRECATED("use LZ4_decompress_fast() instead") LZ4LIB_API int LZ4_uncompress (const char* source, char* dest, int outputSize);
+LZ4_DEPRECATED("use LZ4_decompress_safe() instead") LZ4LIB_API int LZ4_uncompress_unknownOutputSize (const char* source, char* dest, int isize, int maxOutputSize);
+
+/* Obsolete streaming functions (since v1.7.0)
+ * degraded functionality; do not use!
+ *
+ * In order to perform streaming compression, these functions depended on data
+ * that is no longer tracked in the state. They have been preserved as well as
+ * possible: using them will still produce a correct output. However, they don't
+ * actually retain any history between compression calls. The compression ratio
+ * achieved will therefore be no better than compressing each chunk
+ * independently.
+ */
+LZ4_DEPRECATED("Use LZ4_createStream() instead") LZ4LIB_API void* LZ4_create (char* inputBuffer);
+LZ4_DEPRECATED("Use LZ4_createStream() instead") LZ4LIB_API int LZ4_sizeofStreamState(void);
+LZ4_DEPRECATED("Use LZ4_resetStream() instead") LZ4LIB_API int LZ4_resetStreamState(void* state, char* inputBuffer);
+LZ4_DEPRECATED("Use LZ4_saveDict() instead") LZ4LIB_API char* LZ4_slideInputBuffer (void* state);
+
+/*! Obsolete streaming decoding functions (since v1.7.0) */
+LZ4_DEPRECATED("use LZ4_decompress_safe_usingDict() instead") LZ4LIB_API int LZ4_decompress_safe_withPrefix64k (const char* src, char* dst, int compressedSize, int maxDstSize);
+LZ4_DEPRECATED("use LZ4_decompress_fast_usingDict() instead") LZ4LIB_API int LZ4_decompress_fast_withPrefix64k (const char* src, char* dst, int originalSize);
+
+/*! Obsolete LZ4_decompress_fast variants (since v1.9.0) :
+ * These functions used to be faster than LZ4_decompress_safe(),
+ * but this is no longer the case. They are now slower.
+ * This is because LZ4_decompress_fast() doesn't know the input size,
+ * and therefore must progress more cautiously into the input buffer to not read beyond the end of block.
+ * On top of that `LZ4_decompress_fast()` is not protected vs malformed or malicious inputs, making it a security liability.
+ * As a consequence, LZ4_decompress_fast() is strongly discouraged, and deprecated.
+ *
+ * The last remaining LZ4_decompress_fast() specificity is that
+ * it can decompress a block without knowing its compressed size.
+ * Such functionality can be achieved in a more secure manner
+ * by employing LZ4_decompress_safe_partial().
+ *
+ * Parameters:
+ * originalSize : is the uncompressed size to regenerate.
+ * `dst` must be already allocated, its size must be >= 'originalSize' bytes.
+ * @return : number of bytes read from source buffer (== compressed size).
+ * The function expects to finish at block's end exactly.
+ * If the source stream is detected malformed, the function stops decoding and returns a negative result.
+ * note : LZ4_decompress_fast*() requires originalSize. Thanks to this information, it never writes past the output buffer.
+ * However, since it doesn't know its 'src' size, it may read an unknown amount of input, past input buffer bounds.
+ * Also, since match offsets are not validated, match reads from 'src' may underflow too.
+ * These issues never happen if input (compressed) data is correct.
+ * But they may happen if input data is invalid (error or intentional tampering).
+ * As a consequence, use these functions in trusted environments with trusted data **only**.
+ */
+LZ4_DEPRECATED("This function is deprecated and unsafe. Consider using LZ4_decompress_safe() instead")
+LZ4LIB_API int LZ4_decompress_fast (const char* src, char* dst, int originalSize);
+LZ4_DEPRECATED("This function is deprecated and unsafe. Consider using LZ4_decompress_safe_continue() instead")
+LZ4LIB_API int LZ4_decompress_fast_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* src, char* dst, int originalSize);
+LZ4_DEPRECATED("This function is deprecated and unsafe. Consider using LZ4_decompress_safe_usingDict() instead")
+LZ4LIB_API int LZ4_decompress_fast_usingDict (const char* src, char* dst, int originalSize, const char* dictStart, int dictSize);
+
+/*! LZ4_resetStream() :
+ * An LZ4_stream_t structure must be initialized at least once.
+ * This is done with LZ4_initStream(), or LZ4_resetStream().
+ * Consider switching to LZ4_initStream(),
+ * invoking LZ4_resetStream() will trigger deprecation warnings in the future.
+ */
+LZ4LIB_API void LZ4_resetStream (LZ4_stream_t* streamPtr);
+
+
+#endif /* LZ4_H_98237428734687 */
+
+
+#if defined (__cplusplus)
+}
+#endif
diff --git a/mfbt/lz4/lz4file.c b/mfbt/lz4/lz4file.c
new file mode 100644
index 0000000000..eaf9b1704d
--- /dev/null
+++ b/mfbt/lz4/lz4file.c
@@ -0,0 +1,311 @@
+/*
+ * LZ4 file library
+ * Copyright (C) 2022, Xiaomi Inc.
+ *
+ * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * You can contact the author at :
+ * - LZ4 homepage : http://www.lz4.org
+ * - LZ4 source repository : https://github.com/lz4/lz4
+ */
+#include <stdlib.h>
+#include <string.h>
+#include "lz4.h"
+#include "lz4file.h"
+
+struct LZ4_readFile_s {
+ LZ4F_dctx* dctxPtr;
+ FILE* fp;
+ LZ4_byte* srcBuf;
+ size_t srcBufNext;
+ size_t srcBufSize;
+ size_t srcBufMaxSize;
+};
+
+struct LZ4_writeFile_s {
+ LZ4F_cctx* cctxPtr;
+ FILE* fp;
+ LZ4_byte* dstBuf;
+ size_t maxWriteSize;
+ size_t dstBufMaxSize;
+ LZ4F_errorCode_t errCode;
+};
+
+LZ4F_errorCode_t LZ4F_readOpen(LZ4_readFile_t** lz4fRead, FILE* fp)
+{
+ char buf[LZ4F_HEADER_SIZE_MAX];
+ size_t consumedSize;
+ LZ4F_errorCode_t ret;
+ LZ4F_frameInfo_t info;
+
+ if (fp == NULL || lz4fRead == NULL) {
+ return -LZ4F_ERROR_GENERIC;
+ }
+
+ *lz4fRead = (LZ4_readFile_t*)calloc(1, sizeof(LZ4_readFile_t));
+ if (*lz4fRead == NULL) {
+ return -LZ4F_ERROR_allocation_failed;
+ }
+
+ ret = LZ4F_createDecompressionContext(&(*lz4fRead)->dctxPtr, LZ4F_getVersion());
+ if (LZ4F_isError(ret)) {
+ free(*lz4fRead);
+ return ret;
+ }
+
+ (*lz4fRead)->fp = fp;
+ consumedSize = fread(buf, 1, sizeof(buf), (*lz4fRead)->fp);
+ if (consumedSize != sizeof(buf)) {
+ free(*lz4fRead);
+ return -LZ4F_ERROR_GENERIC;
+ }
+
+ ret = LZ4F_getFrameInfo((*lz4fRead)->dctxPtr, &info, buf, &consumedSize);
+ if (LZ4F_isError(ret)) {
+ LZ4F_freeDecompressionContext((*lz4fRead)->dctxPtr);
+ free(*lz4fRead);
+ return ret;
+ }
+
+ switch (info.blockSizeID) {
+ case LZ4F_default :
+ case LZ4F_max64KB :
+ (*lz4fRead)->srcBufMaxSize = 64 * 1024;
+ break;
+ case LZ4F_max256KB:
+ (*lz4fRead)->srcBufMaxSize = 256 * 1024;
+ break;
+ case LZ4F_max1MB:
+ (*lz4fRead)->srcBufMaxSize = 1 * 1024 * 1024;
+ break;
+ case LZ4F_max4MB:
+ (*lz4fRead)->srcBufMaxSize = 4 * 1024 * 1024;
+ break;
+ default:
+ LZ4F_freeDecompressionContext((*lz4fRead)->dctxPtr);
+ free(*lz4fRead);
+ return -LZ4F_ERROR_maxBlockSize_invalid;
+ }
+
+ (*lz4fRead)->srcBuf = (LZ4_byte*)malloc((*lz4fRead)->srcBufMaxSize);
+ if ((*lz4fRead)->srcBuf == NULL) {
+ LZ4F_freeDecompressionContext((*lz4fRead)->dctxPtr);
+ free(lz4fRead);
+ return -LZ4F_ERROR_allocation_failed;
+ }
+
+ (*lz4fRead)->srcBufSize = sizeof(buf) - consumedSize;
+ memcpy((*lz4fRead)->srcBuf, buf + consumedSize, (*lz4fRead)->srcBufSize);
+
+ return ret;
+}
+
+size_t LZ4F_read(LZ4_readFile_t* lz4fRead, void* buf, size_t size)
+{
+ LZ4_byte* p = (LZ4_byte*)buf;
+ size_t next = 0;
+
+ if (lz4fRead == NULL || buf == NULL)
+ return -LZ4F_ERROR_GENERIC;
+
+ while (next < size) {
+ size_t srcsize = lz4fRead->srcBufSize - lz4fRead->srcBufNext;
+ size_t dstsize = size - next;
+ size_t ret;
+
+ if (srcsize == 0) {
+ ret = fread(lz4fRead->srcBuf, 1, lz4fRead->srcBufMaxSize, lz4fRead->fp);
+ if (ret > 0) {
+ lz4fRead->srcBufSize = ret;
+ srcsize = lz4fRead->srcBufSize;
+ lz4fRead->srcBufNext = 0;
+ }
+ else if (ret == 0) {
+ break;
+ }
+ else {
+ return -LZ4F_ERROR_GENERIC;
+ }
+ }
+
+ ret = LZ4F_decompress(lz4fRead->dctxPtr,
+ p, &dstsize,
+ lz4fRead->srcBuf + lz4fRead->srcBufNext,
+ &srcsize,
+ NULL);
+ if (LZ4F_isError(ret)) {
+ return ret;
+ }
+
+ lz4fRead->srcBufNext += srcsize;
+ next += dstsize;
+ p += dstsize;
+ }
+
+ return next;
+}
+
+LZ4F_errorCode_t LZ4F_readClose(LZ4_readFile_t* lz4fRead)
+{
+ if (lz4fRead == NULL)
+ return -LZ4F_ERROR_GENERIC;
+ LZ4F_freeDecompressionContext(lz4fRead->dctxPtr);
+ free(lz4fRead->srcBuf);
+ free(lz4fRead);
+ return LZ4F_OK_NoError;
+}
+
+LZ4F_errorCode_t LZ4F_writeOpen(LZ4_writeFile_t** lz4fWrite, FILE* fp, const LZ4F_preferences_t* prefsPtr)
+{
+ LZ4_byte buf[LZ4F_HEADER_SIZE_MAX];
+ size_t ret;
+
+ if (fp == NULL || lz4fWrite == NULL)
+ return -LZ4F_ERROR_GENERIC;
+
+ *lz4fWrite = (LZ4_writeFile_t*)malloc(sizeof(LZ4_writeFile_t));
+ if (*lz4fWrite == NULL) {
+ return -LZ4F_ERROR_allocation_failed;
+ }
+ if (prefsPtr != NULL) {
+ switch (prefsPtr->frameInfo.blockSizeID) {
+ case LZ4F_default :
+ case LZ4F_max64KB :
+ (*lz4fWrite)->maxWriteSize = 64 * 1024;
+ break;
+ case LZ4F_max256KB:
+ (*lz4fWrite)->maxWriteSize = 256 * 1024;
+ break;
+ case LZ4F_max1MB:
+ (*lz4fWrite)->maxWriteSize = 1 * 1024 * 1024;
+ break;
+ case LZ4F_max4MB:
+ (*lz4fWrite)->maxWriteSize = 4 * 1024 * 1024;
+ break;
+ default:
+ free(lz4fWrite);
+ return -LZ4F_ERROR_maxBlockSize_invalid;
+ }
+ } else {
+ (*lz4fWrite)->maxWriteSize = 64 * 1024;
+ }
+
+ (*lz4fWrite)->dstBufMaxSize = LZ4F_compressBound((*lz4fWrite)->maxWriteSize, prefsPtr);
+ (*lz4fWrite)->dstBuf = (LZ4_byte*)malloc((*lz4fWrite)->dstBufMaxSize);
+ if ((*lz4fWrite)->dstBuf == NULL) {
+ free(*lz4fWrite);
+ return -LZ4F_ERROR_allocation_failed;
+ }
+
+ ret = LZ4F_createCompressionContext(&(*lz4fWrite)->cctxPtr, LZ4F_getVersion());
+ if (LZ4F_isError(ret)) {
+ free((*lz4fWrite)->dstBuf);
+ free(*lz4fWrite);
+ return ret;
+ }
+
+ ret = LZ4F_compressBegin((*lz4fWrite)->cctxPtr, buf, LZ4F_HEADER_SIZE_MAX, prefsPtr);
+ if (LZ4F_isError(ret)) {
+ LZ4F_freeCompressionContext((*lz4fWrite)->cctxPtr);
+ free((*lz4fWrite)->dstBuf);
+ free(*lz4fWrite);
+ return ret;
+ }
+
+ if (ret != fwrite(buf, 1, ret, fp)) {
+ LZ4F_freeCompressionContext((*lz4fWrite)->cctxPtr);
+ free((*lz4fWrite)->dstBuf);
+ free(*lz4fWrite);
+ return -LZ4F_ERROR_GENERIC;
+ }
+
+ (*lz4fWrite)->fp = fp;
+ (*lz4fWrite)->errCode = LZ4F_OK_NoError;
+ return LZ4F_OK_NoError;
+}
+
+size_t LZ4F_write(LZ4_writeFile_t* lz4fWrite, void* buf, size_t size)
+{
+ LZ4_byte* p = (LZ4_byte*)buf;
+ size_t remain = size;
+ size_t chunk;
+ size_t ret;
+
+ if (lz4fWrite == NULL || buf == NULL)
+ return -LZ4F_ERROR_GENERIC;
+ while (remain) {
+ if (remain > lz4fWrite->maxWriteSize)
+ chunk = lz4fWrite->maxWriteSize;
+ else
+ chunk = remain;
+
+ ret = LZ4F_compressUpdate(lz4fWrite->cctxPtr,
+ lz4fWrite->dstBuf, lz4fWrite->dstBufMaxSize,
+ p, chunk,
+ NULL);
+ if (LZ4F_isError(ret)) {
+ lz4fWrite->errCode = ret;
+ return ret;
+ }
+
+ if(ret != fwrite(lz4fWrite->dstBuf, 1, ret, lz4fWrite->fp)) {
+ lz4fWrite->errCode = -LZ4F_ERROR_GENERIC;
+ return -LZ4F_ERROR_GENERIC;
+ }
+
+ p += chunk;
+ remain -= chunk;
+ }
+
+ return size;
+}
+
+LZ4F_errorCode_t LZ4F_writeClose(LZ4_writeFile_t* lz4fWrite)
+{
+ LZ4F_errorCode_t ret = LZ4F_OK_NoError;
+
+ if (lz4fWrite == NULL)
+ return -LZ4F_ERROR_GENERIC;
+
+ if (lz4fWrite->errCode == LZ4F_OK_NoError) {
+ ret = LZ4F_compressEnd(lz4fWrite->cctxPtr,
+ lz4fWrite->dstBuf, lz4fWrite->dstBufMaxSize,
+ NULL);
+ if (LZ4F_isError(ret)) {
+ goto out;
+ }
+
+ if (ret != fwrite(lz4fWrite->dstBuf, 1, ret, lz4fWrite->fp)) {
+ ret = -LZ4F_ERROR_GENERIC;
+ }
+ }
+
+out:
+ LZ4F_freeCompressionContext(lz4fWrite->cctxPtr);
+ free(lz4fWrite->dstBuf);
+ free(lz4fWrite);
+ return ret;
+}
diff --git a/mfbt/lz4/lz4file.h b/mfbt/lz4/lz4file.h
new file mode 100644
index 0000000000..5527130720
--- /dev/null
+++ b/mfbt/lz4/lz4file.h
@@ -0,0 +1,93 @@
+/*
+ LZ4 file library
+ Header File
+ Copyright (C) 2022, Xiaomi Inc.
+ BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are
+ met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above
+ copyright notice, this list of conditions and the following disclaimer
+ in the documentation and/or other materials provided with the
+ distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ You can contact the author at :
+ - LZ4 source repository : https://github.com/lz4/lz4
+ - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+#ifndef LZ4FILE_H
+#define LZ4FILE_H
+
+#include <stdio.h>
+#include "lz4frame_static.h"
+
+typedef struct LZ4_readFile_s LZ4_readFile_t;
+typedef struct LZ4_writeFile_s LZ4_writeFile_t;
+
+/*! LZ4F_readOpen() :
+ * Set read lz4file handle.
+ * `lz4f` will set a lz4file handle.
+ * `fp` must be the return value of the lz4 file opened by fopen.
+ */
+LZ4FLIB_STATIC_API LZ4F_errorCode_t LZ4F_readOpen(LZ4_readFile_t** lz4fRead, FILE* fp);
+
+/*! LZ4F_read() :
+ * Read lz4file content to buffer.
+ * `lz4f` must use LZ4_readOpen to set first.
+ * `buf` read data buffer.
+ * `size` read data buffer size.
+ */
+LZ4FLIB_STATIC_API size_t LZ4F_read(LZ4_readFile_t* lz4fRead, void* buf, size_t size);
+
+/*! LZ4F_readClose() :
+ * Close lz4file handle.
+ * `lz4f` must use LZ4_readOpen to set first.
+ */
+LZ4FLIB_STATIC_API LZ4F_errorCode_t LZ4F_readClose(LZ4_readFile_t* lz4fRead);
+
+/*! LZ4F_writeOpen() :
+ * Set write lz4file handle.
+ * `lz4f` will set a lz4file handle.
+ * `fp` must be the return value of the lz4 file opened by fopen.
+ */
+LZ4FLIB_STATIC_API LZ4F_errorCode_t LZ4F_writeOpen(LZ4_writeFile_t** lz4fWrite, FILE* fp, const LZ4F_preferences_t* prefsPtr);
+
+/*! LZ4F_write() :
+ * Write buffer to lz4file.
+ * `lz4f` must use LZ4F_writeOpen to set first.
+ * `buf` write data buffer.
+ * `size` write data buffer size.
+ */
+LZ4FLIB_STATIC_API size_t LZ4F_write(LZ4_writeFile_t* lz4fWrite, void* buf, size_t size);
+
+/*! LZ4F_writeClose() :
+ * Close lz4file handle.
+ * `lz4f` must use LZ4F_writeOpen to set first.
+ */
+LZ4FLIB_STATIC_API LZ4F_errorCode_t LZ4F_writeClose(LZ4_writeFile_t* lz4fWrite);
+
+#endif /* LZ4FILE_H */
+
+#if defined (__cplusplus)
+}
+#endif
diff --git a/mfbt/lz4/lz4frame.c b/mfbt/lz4/lz4frame.c
new file mode 100644
index 0000000000..174f9ae4f2
--- /dev/null
+++ b/mfbt/lz4/lz4frame.c
@@ -0,0 +1,2078 @@
+/*
+ * LZ4 auto-framing library
+ * Copyright (C) 2011-2016, Yann Collet.
+ *
+ * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * You can contact the author at :
+ * - LZ4 homepage : http://www.lz4.org
+ * - LZ4 source repository : https://github.com/lz4/lz4
+ */
+
+/* LZ4F is a stand-alone API to create LZ4-compressed Frames
+ * in full conformance with specification v1.6.1 .
+ * This library rely upon memory management capabilities (malloc, free)
+ * provided either by <stdlib.h>,
+ * or redirected towards another library of user's choice
+ * (see Memory Routines below).
+ */
+
+
+/*-************************************
+* Compiler Options
+**************************************/
+#ifdef _MSC_VER /* Visual Studio */
+# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
+#endif
+
+
+/*-************************************
+* Tuning parameters
+**************************************/
+/*
+ * LZ4F_HEAPMODE :
+ * Select how default compression functions will allocate memory for their hash table,
+ * in memory stack (0:default, fastest), or in memory heap (1:requires malloc()).
+ */
+#ifndef LZ4F_HEAPMODE
+# define LZ4F_HEAPMODE 0
+#endif
+
+
+/*-************************************
+* Library declarations
+**************************************/
+#define LZ4F_STATIC_LINKING_ONLY
+#include "lz4frame.h"
+#define LZ4_STATIC_LINKING_ONLY
+#include "lz4.h"
+#define LZ4_HC_STATIC_LINKING_ONLY
+#include "lz4hc.h"
+#define XXH_STATIC_LINKING_ONLY
+#include "xxhash.h"
+
+
+/*-************************************
+* Memory routines
+**************************************/
+/*
+ * User may redirect invocations of
+ * malloc(), calloc() and free()
+ * towards another library or solution of their choice
+ * by modifying below section.
+**/
+
+#include <string.h> /* memset, memcpy, memmove */
+#ifndef LZ4_SRC_INCLUDED /* avoid redefinition when sources are coalesced */
+# define MEM_INIT(p,v,s) memset((p),(v),(s))
+#endif
+
+#ifndef LZ4_SRC_INCLUDED /* avoid redefinition when sources are coalesced */
+# include <stdlib.h> /* malloc, calloc, free */
+# define ALLOC(s) malloc(s)
+# define ALLOC_AND_ZERO(s) calloc(1,(s))
+# define FREEMEM(p) free(p)
+#endif
+
+static void* LZ4F_calloc(size_t s, LZ4F_CustomMem cmem)
+{
+ /* custom calloc defined : use it */
+ if (cmem.customCalloc != NULL) {
+ return cmem.customCalloc(cmem.opaqueState, s);
+ }
+ /* nothing defined : use default <stdlib.h>'s calloc() */
+ if (cmem.customAlloc == NULL) {
+ return ALLOC_AND_ZERO(s);
+ }
+ /* only custom alloc defined : use it, and combine it with memset() */
+ { void* const p = cmem.customAlloc(cmem.opaqueState, s);
+ if (p != NULL) MEM_INIT(p, 0, s);
+ return p;
+} }
+
+static void* LZ4F_malloc(size_t s, LZ4F_CustomMem cmem)
+{
+ /* custom malloc defined : use it */
+ if (cmem.customAlloc != NULL) {
+ return cmem.customAlloc(cmem.opaqueState, s);
+ }
+ /* nothing defined : use default <stdlib.h>'s malloc() */
+ return ALLOC(s);
+}
+
+static void LZ4F_free(void* p, LZ4F_CustomMem cmem)
+{
+ /* custom malloc defined : use it */
+ if (cmem.customFree != NULL) {
+ cmem.customFree(cmem.opaqueState, p);
+ return;
+ }
+ /* nothing defined : use default <stdlib.h>'s free() */
+ FREEMEM(p);
+}
+
+
+/*-************************************
+* Debug
+**************************************/
+#if defined(LZ4_DEBUG) && (LZ4_DEBUG>=1)
+# include <assert.h>
+#else
+# ifndef assert
+# define assert(condition) ((void)0)
+# endif
+#endif
+
+#define LZ4F_STATIC_ASSERT(c) { enum { LZ4F_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */
+
+#if defined(LZ4_DEBUG) && (LZ4_DEBUG>=2) && !defined(DEBUGLOG)
+# include <stdio.h>
+static int g_debuglog_enable = 1;
+# define DEBUGLOG(l, ...) { \
+ if ((g_debuglog_enable) && (l<=LZ4_DEBUG)) { \
+ fprintf(stderr, __FILE__ ": "); \
+ fprintf(stderr, __VA_ARGS__); \
+ fprintf(stderr, " \n"); \
+ } }
+#else
+# define DEBUGLOG(l, ...) {} /* disabled */
+#endif
+
+
+/*-************************************
+* Basic Types
+**************************************/
+#if !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
+# include <stdint.h>
+ typedef uint8_t BYTE;
+ typedef uint16_t U16;
+ typedef uint32_t U32;
+ typedef int32_t S32;
+ typedef uint64_t U64;
+#else
+ typedef unsigned char BYTE;
+ typedef unsigned short U16;
+ typedef unsigned int U32;
+ typedef signed int S32;
+ typedef unsigned long long U64;
+#endif
+
+
+/* unoptimized version; solves endianness & alignment issues */
+static U32 LZ4F_readLE32 (const void* src)
+{
+ const BYTE* const srcPtr = (const BYTE*)src;
+ U32 value32 = srcPtr[0];
+ value32 += ((U32)srcPtr[1])<< 8;
+ value32 += ((U32)srcPtr[2])<<16;
+ value32 += ((U32)srcPtr[3])<<24;
+ return value32;
+}
+
+static void LZ4F_writeLE32 (void* dst, U32 value32)
+{
+ BYTE* const dstPtr = (BYTE*)dst;
+ dstPtr[0] = (BYTE)value32;
+ dstPtr[1] = (BYTE)(value32 >> 8);
+ dstPtr[2] = (BYTE)(value32 >> 16);
+ dstPtr[3] = (BYTE)(value32 >> 24);
+}
+
+static U64 LZ4F_readLE64 (const void* src)
+{
+ const BYTE* const srcPtr = (const BYTE*)src;
+ U64 value64 = srcPtr[0];
+ value64 += ((U64)srcPtr[1]<<8);
+ value64 += ((U64)srcPtr[2]<<16);
+ value64 += ((U64)srcPtr[3]<<24);
+ value64 += ((U64)srcPtr[4]<<32);
+ value64 += ((U64)srcPtr[5]<<40);
+ value64 += ((U64)srcPtr[6]<<48);
+ value64 += ((U64)srcPtr[7]<<56);
+ return value64;
+}
+
+static void LZ4F_writeLE64 (void* dst, U64 value64)
+{
+ BYTE* const dstPtr = (BYTE*)dst;
+ dstPtr[0] = (BYTE)value64;
+ dstPtr[1] = (BYTE)(value64 >> 8);
+ dstPtr[2] = (BYTE)(value64 >> 16);
+ dstPtr[3] = (BYTE)(value64 >> 24);
+ dstPtr[4] = (BYTE)(value64 >> 32);
+ dstPtr[5] = (BYTE)(value64 >> 40);
+ dstPtr[6] = (BYTE)(value64 >> 48);
+ dstPtr[7] = (BYTE)(value64 >> 56);
+}
+
+
+/*-************************************
+* Constants
+**************************************/
+#ifndef LZ4_SRC_INCLUDED /* avoid double definition */
+# define KB *(1<<10)
+# define MB *(1<<20)
+# define GB *(1<<30)
+#endif
+
+#define _1BIT 0x01
+#define _2BITS 0x03
+#define _3BITS 0x07
+#define _4BITS 0x0F
+#define _8BITS 0xFF
+
+#define LZ4F_BLOCKUNCOMPRESSED_FLAG 0x80000000U
+#define LZ4F_BLOCKSIZEID_DEFAULT LZ4F_max64KB
+
+static const size_t minFHSize = LZ4F_HEADER_SIZE_MIN; /* 7 */
+static const size_t maxFHSize = LZ4F_HEADER_SIZE_MAX; /* 19 */
+static const size_t BHSize = LZ4F_BLOCK_HEADER_SIZE; /* block header : size, and compress flag */
+static const size_t BFSize = LZ4F_BLOCK_CHECKSUM_SIZE; /* block footer : checksum (optional) */
+
+
+/*-************************************
+* Structures and local types
+**************************************/
+
+typedef enum { LZ4B_COMPRESSED, LZ4B_UNCOMPRESSED} LZ4F_blockCompression_t;
+
+typedef struct LZ4F_cctx_s
+{
+ LZ4F_CustomMem cmem;
+ LZ4F_preferences_t prefs;
+ U32 version;
+ U32 cStage;
+ const LZ4F_CDict* cdict;
+ size_t maxBlockSize;
+ size_t maxBufferSize;
+ BYTE* tmpBuff; /* internal buffer, for streaming */
+ BYTE* tmpIn; /* starting position of data compress within internal buffer (>= tmpBuff) */
+ size_t tmpInSize; /* amount of data to compress after tmpIn */
+ U64 totalInSize;
+ XXH32_state_t xxh;
+ void* lz4CtxPtr;
+ U16 lz4CtxAlloc; /* sized for: 0 = none, 1 = lz4 ctx, 2 = lz4hc ctx */
+ U16 lz4CtxState; /* in use as: 0 = none, 1 = lz4 ctx, 2 = lz4hc ctx */
+ LZ4F_blockCompression_t blockCompression;
+} LZ4F_cctx_t;
+
+
+/*-************************************
+* Error management
+**************************************/
+#define LZ4F_GENERATE_STRING(STRING) #STRING,
+static const char* LZ4F_errorStrings[] = { LZ4F_LIST_ERRORS(LZ4F_GENERATE_STRING) };
+
+
+unsigned LZ4F_isError(LZ4F_errorCode_t code)
+{
+ return (code > (LZ4F_errorCode_t)(-LZ4F_ERROR_maxCode));
+}
+
+const char* LZ4F_getErrorName(LZ4F_errorCode_t code)
+{
+ static const char* codeError = "Unspecified error code";
+ if (LZ4F_isError(code)) return LZ4F_errorStrings[-(int)(code)];
+ return codeError;
+}
+
+LZ4F_errorCodes LZ4F_getErrorCode(size_t functionResult)
+{
+ if (!LZ4F_isError(functionResult)) return LZ4F_OK_NoError;
+ return (LZ4F_errorCodes)(-(ptrdiff_t)functionResult);
+}
+
+static LZ4F_errorCode_t LZ4F_returnErrorCode(LZ4F_errorCodes code)
+{
+ /* A compilation error here means sizeof(ptrdiff_t) is not large enough */
+ LZ4F_STATIC_ASSERT(sizeof(ptrdiff_t) >= sizeof(size_t));
+ return (LZ4F_errorCode_t)-(ptrdiff_t)code;
+}
+
+#define RETURN_ERROR(e) return LZ4F_returnErrorCode(LZ4F_ERROR_ ## e)
+
+#define RETURN_ERROR_IF(c,e) if (c) RETURN_ERROR(e)
+
+#define FORWARD_IF_ERROR(r) if (LZ4F_isError(r)) return (r)
+
+unsigned LZ4F_getVersion(void) { return LZ4F_VERSION; }
+
+int LZ4F_compressionLevel_max(void) { return LZ4HC_CLEVEL_MAX; }
+
+size_t LZ4F_getBlockSize(LZ4F_blockSizeID_t blockSizeID)
+{
+ static const size_t blockSizes[4] = { 64 KB, 256 KB, 1 MB, 4 MB };
+
+ if (blockSizeID == 0) blockSizeID = LZ4F_BLOCKSIZEID_DEFAULT;
+ if (blockSizeID < LZ4F_max64KB || blockSizeID > LZ4F_max4MB)
+ RETURN_ERROR(maxBlockSize_invalid);
+ { int const blockSizeIdx = (int)blockSizeID - (int)LZ4F_max64KB;
+ return blockSizes[blockSizeIdx];
+} }
+
+/*-************************************
+* Private functions
+**************************************/
+#define MIN(a,b) ( (a) < (b) ? (a) : (b) )
+
+static BYTE LZ4F_headerChecksum (const void* header, size_t length)
+{
+ U32 const xxh = XXH32(header, length, 0);
+ return (BYTE)(xxh >> 8);
+}
+
+
+/*-************************************
+* Simple-pass compression functions
+**************************************/
+static LZ4F_blockSizeID_t LZ4F_optimalBSID(const LZ4F_blockSizeID_t requestedBSID,
+ const size_t srcSize)
+{
+ LZ4F_blockSizeID_t proposedBSID = LZ4F_max64KB;
+ size_t maxBlockSize = 64 KB;
+ while (requestedBSID > proposedBSID) {
+ if (srcSize <= maxBlockSize)
+ return proposedBSID;
+ proposedBSID = (LZ4F_blockSizeID_t)((int)proposedBSID + 1);
+ maxBlockSize <<= 2;
+ }
+ return requestedBSID;
+}
+
+/*! LZ4F_compressBound_internal() :
+ * Provides dstCapacity given a srcSize to guarantee operation success in worst case situations.
+ * prefsPtr is optional : if NULL is provided, preferences will be set to cover worst case scenario.
+ * @return is always the same for a srcSize and prefsPtr, so it can be relied upon to size reusable buffers.
+ * When srcSize==0, LZ4F_compressBound() provides an upper bound for LZ4F_flush() and LZ4F_compressEnd() operations.
+ */
+static size_t LZ4F_compressBound_internal(size_t srcSize,
+ const LZ4F_preferences_t* preferencesPtr,
+ size_t alreadyBuffered)
+{
+ LZ4F_preferences_t prefsNull = LZ4F_INIT_PREFERENCES;
+ prefsNull.frameInfo.contentChecksumFlag = LZ4F_contentChecksumEnabled; /* worst case */
+ prefsNull.frameInfo.blockChecksumFlag = LZ4F_blockChecksumEnabled; /* worst case */
+ { const LZ4F_preferences_t* const prefsPtr = (preferencesPtr==NULL) ? &prefsNull : preferencesPtr;
+ U32 const flush = prefsPtr->autoFlush | (srcSize==0);
+ LZ4F_blockSizeID_t const blockID = prefsPtr->frameInfo.blockSizeID;
+ size_t const blockSize = LZ4F_getBlockSize(blockID);
+ size_t const maxBuffered = blockSize - 1;
+ size_t const bufferedSize = MIN(alreadyBuffered, maxBuffered);
+ size_t const maxSrcSize = srcSize + bufferedSize;
+ unsigned const nbFullBlocks = (unsigned)(maxSrcSize / blockSize);
+ size_t const partialBlockSize = maxSrcSize & (blockSize-1);
+ size_t const lastBlockSize = flush ? partialBlockSize : 0;
+ unsigned const nbBlocks = nbFullBlocks + (lastBlockSize>0);
+
+ size_t const blockCRCSize = BFSize * prefsPtr->frameInfo.blockChecksumFlag;
+ size_t const frameEnd = BHSize + (prefsPtr->frameInfo.contentChecksumFlag*BFSize);
+
+ return ((BHSize + blockCRCSize) * nbBlocks) +
+ (blockSize * nbFullBlocks) + lastBlockSize + frameEnd;
+ }
+}
+
+size_t LZ4F_compressFrameBound(size_t srcSize, const LZ4F_preferences_t* preferencesPtr)
+{
+ LZ4F_preferences_t prefs;
+ size_t const headerSize = maxFHSize; /* max header size, including optional fields */
+
+ if (preferencesPtr!=NULL) prefs = *preferencesPtr;
+ else MEM_INIT(&prefs, 0, sizeof(prefs));
+ prefs.autoFlush = 1;
+
+ return headerSize + LZ4F_compressBound_internal(srcSize, &prefs, 0);;
+}
+
+
+/*! LZ4F_compressFrame_usingCDict() :
+ * Compress srcBuffer using a dictionary, in a single step.
+ * cdict can be NULL, in which case, no dictionary is used.
+ * dstBuffer MUST be >= LZ4F_compressFrameBound(srcSize, preferencesPtr).
+ * The LZ4F_preferences_t structure is optional : you may provide NULL as argument,
+ * however, it's the only way to provide a dictID, so it's not recommended.
+ * @return : number of bytes written into dstBuffer,
+ * or an error code if it fails (can be tested using LZ4F_isError())
+ */
+size_t LZ4F_compressFrame_usingCDict(LZ4F_cctx* cctx,
+ void* dstBuffer, size_t dstCapacity,
+ const void* srcBuffer, size_t srcSize,
+ const LZ4F_CDict* cdict,
+ const LZ4F_preferences_t* preferencesPtr)
+{
+ LZ4F_preferences_t prefs;
+ LZ4F_compressOptions_t options;
+ BYTE* const dstStart = (BYTE*) dstBuffer;
+ BYTE* dstPtr = dstStart;
+ BYTE* const dstEnd = dstStart + dstCapacity;
+
+ if (preferencesPtr!=NULL)
+ prefs = *preferencesPtr;
+ else
+ MEM_INIT(&prefs, 0, sizeof(prefs));
+ if (prefs.frameInfo.contentSize != 0)
+ prefs.frameInfo.contentSize = (U64)srcSize; /* auto-correct content size if selected (!=0) */
+
+ prefs.frameInfo.blockSizeID = LZ4F_optimalBSID(prefs.frameInfo.blockSizeID, srcSize);
+ prefs.autoFlush = 1;
+ if (srcSize <= LZ4F_getBlockSize(prefs.frameInfo.blockSizeID))
+ prefs.frameInfo.blockMode = LZ4F_blockIndependent; /* only one block => no need for inter-block link */
+
+ MEM_INIT(&options, 0, sizeof(options));
+ options.stableSrc = 1;
+
+ RETURN_ERROR_IF(dstCapacity < LZ4F_compressFrameBound(srcSize, &prefs), dstMaxSize_tooSmall);
+
+ { size_t const headerSize = LZ4F_compressBegin_usingCDict(cctx, dstBuffer, dstCapacity, cdict, &prefs); /* write header */
+ FORWARD_IF_ERROR(headerSize);
+ dstPtr += headerSize; /* header size */ }
+
+ assert(dstEnd >= dstPtr);
+ { size_t const cSize = LZ4F_compressUpdate(cctx, dstPtr, (size_t)(dstEnd-dstPtr), srcBuffer, srcSize, &options);
+ FORWARD_IF_ERROR(cSize);
+ dstPtr += cSize; }
+
+ assert(dstEnd >= dstPtr);
+ { size_t const tailSize = LZ4F_compressEnd(cctx, dstPtr, (size_t)(dstEnd-dstPtr), &options); /* flush last block, and generate suffix */
+ FORWARD_IF_ERROR(tailSize);
+ dstPtr += tailSize; }
+
+ assert(dstEnd >= dstStart);
+ return (size_t)(dstPtr - dstStart);
+}
+
+
+/*! LZ4F_compressFrame() :
+ * Compress an entire srcBuffer into a valid LZ4 frame, in a single step.
+ * dstBuffer MUST be >= LZ4F_compressFrameBound(srcSize, preferencesPtr).
+ * The LZ4F_preferences_t structure is optional : you can provide NULL as argument. All preferences will be set to default.
+ * @return : number of bytes written into dstBuffer.
+ * or an error code if it fails (can be tested using LZ4F_isError())
+ */
+size_t LZ4F_compressFrame(void* dstBuffer, size_t dstCapacity,
+ const void* srcBuffer, size_t srcSize,
+ const LZ4F_preferences_t* preferencesPtr)
+{
+ size_t result;
+#if (LZ4F_HEAPMODE)
+ LZ4F_cctx_t* cctxPtr;
+ result = LZ4F_createCompressionContext(&cctxPtr, LZ4F_VERSION);
+ FORWARD_IF_ERROR(result);
+#else
+ LZ4F_cctx_t cctx;
+ LZ4_stream_t lz4ctx;
+ LZ4F_cctx_t* const cctxPtr = &cctx;
+
+ MEM_INIT(&cctx, 0, sizeof(cctx));
+ cctx.version = LZ4F_VERSION;
+ cctx.maxBufferSize = 5 MB; /* mess with real buffer size to prevent dynamic allocation; works only because autoflush==1 & stableSrc==1 */
+ if ( preferencesPtr == NULL
+ || preferencesPtr->compressionLevel < LZ4HC_CLEVEL_MIN ) {
+ LZ4_initStream(&lz4ctx, sizeof(lz4ctx));
+ cctxPtr->lz4CtxPtr = &lz4ctx;
+ cctxPtr->lz4CtxAlloc = 1;
+ cctxPtr->lz4CtxState = 1;
+ }
+#endif
+ DEBUGLOG(4, "LZ4F_compressFrame");
+
+ result = LZ4F_compressFrame_usingCDict(cctxPtr, dstBuffer, dstCapacity,
+ srcBuffer, srcSize,
+ NULL, preferencesPtr);
+
+#if (LZ4F_HEAPMODE)
+ LZ4F_freeCompressionContext(cctxPtr);
+#else
+ if ( preferencesPtr != NULL
+ && preferencesPtr->compressionLevel >= LZ4HC_CLEVEL_MIN ) {
+ LZ4F_free(cctxPtr->lz4CtxPtr, cctxPtr->cmem);
+ }
+#endif
+ return result;
+}
+
+
+/*-***************************************************
+* Dictionary compression
+*****************************************************/
+
+struct LZ4F_CDict_s {
+ LZ4F_CustomMem cmem;
+ void* dictContent;
+ LZ4_stream_t* fastCtx;
+ LZ4_streamHC_t* HCCtx;
+}; /* typedef'd to LZ4F_CDict within lz4frame_static.h */
+
+LZ4F_CDict*
+LZ4F_createCDict_advanced(LZ4F_CustomMem cmem, const void* dictBuffer, size_t dictSize)
+{
+ const char* dictStart = (const char*)dictBuffer;
+ LZ4F_CDict* const cdict = (LZ4F_CDict*)LZ4F_malloc(sizeof(*cdict), cmem);
+ DEBUGLOG(4, "LZ4F_createCDict_advanced");
+ if (!cdict) return NULL;
+ cdict->cmem = cmem;
+ if (dictSize > 64 KB) {
+ dictStart += dictSize - 64 KB;
+ dictSize = 64 KB;
+ }
+ cdict->dictContent = LZ4F_malloc(dictSize, cmem);
+ cdict->fastCtx = (LZ4_stream_t*)LZ4F_malloc(sizeof(LZ4_stream_t), cmem);
+ if (cdict->fastCtx)
+ LZ4_initStream(cdict->fastCtx, sizeof(LZ4_stream_t));
+ cdict->HCCtx = (LZ4_streamHC_t*)LZ4F_malloc(sizeof(LZ4_streamHC_t), cmem);
+ if (cdict->HCCtx)
+ LZ4_initStream(cdict->HCCtx, sizeof(LZ4_streamHC_t));
+ if (!cdict->dictContent || !cdict->fastCtx || !cdict->HCCtx) {
+ LZ4F_freeCDict(cdict);
+ return NULL;
+ }
+ memcpy(cdict->dictContent, dictStart, dictSize);
+ LZ4_loadDict (cdict->fastCtx, (const char*)cdict->dictContent, (int)dictSize);
+ LZ4_setCompressionLevel(cdict->HCCtx, LZ4HC_CLEVEL_DEFAULT);
+ LZ4_loadDictHC(cdict->HCCtx, (const char*)cdict->dictContent, (int)dictSize);
+ return cdict;
+}
+
+/*! LZ4F_createCDict() :
+ * When compressing multiple messages / blocks with the same dictionary, it's recommended to load it just once.
+ * LZ4F_createCDict() will create a digested dictionary, ready to start future compression operations without startup delay.
+ * LZ4F_CDict can be created once and shared by multiple threads concurrently, since its usage is read-only.
+ * @dictBuffer can be released after LZ4F_CDict creation, since its content is copied within CDict
+ * @return : digested dictionary for compression, or NULL if failed */
+LZ4F_CDict* LZ4F_createCDict(const void* dictBuffer, size_t dictSize)
+{
+ DEBUGLOG(4, "LZ4F_createCDict");
+ return LZ4F_createCDict_advanced(LZ4F_defaultCMem, dictBuffer, dictSize);
+}
+
+void LZ4F_freeCDict(LZ4F_CDict* cdict)
+{
+ if (cdict==NULL) return; /* support free on NULL */
+ LZ4F_free(cdict->dictContent, cdict->cmem);
+ LZ4F_free(cdict->fastCtx, cdict->cmem);
+ LZ4F_free(cdict->HCCtx, cdict->cmem);
+ LZ4F_free(cdict, cdict->cmem);
+}
+
+
+/*-*********************************
+* Advanced compression functions
+***********************************/
+
+LZ4F_cctx*
+LZ4F_createCompressionContext_advanced(LZ4F_CustomMem customMem, unsigned version)
+{
+ LZ4F_cctx* const cctxPtr =
+ (LZ4F_cctx*)LZ4F_calloc(sizeof(LZ4F_cctx), customMem);
+ if (cctxPtr==NULL) return NULL;
+
+ cctxPtr->cmem = customMem;
+ cctxPtr->version = version;
+ cctxPtr->cStage = 0; /* Uninitialized. Next stage : init cctx */
+
+ return cctxPtr;
+}
+
+/*! LZ4F_createCompressionContext() :
+ * The first thing to do is to create a compressionContext object, which will be used in all compression operations.
+ * This is achieved using LZ4F_createCompressionContext(), which takes as argument a version and an LZ4F_preferences_t structure.
+ * The version provided MUST be LZ4F_VERSION. It is intended to track potential incompatible differences between different binaries.
+ * The function will provide a pointer to an allocated LZ4F_compressionContext_t object.
+ * If the result LZ4F_errorCode_t is not OK_NoError, there was an error during context creation.
+ * Object can release its memory using LZ4F_freeCompressionContext();
+**/
+LZ4F_errorCode_t
+LZ4F_createCompressionContext(LZ4F_cctx** LZ4F_compressionContextPtr, unsigned version)
+{
+ assert(LZ4F_compressionContextPtr != NULL); /* considered a violation of narrow contract */
+ /* in case it nonetheless happen in production */
+ RETURN_ERROR_IF(LZ4F_compressionContextPtr == NULL, parameter_null);
+
+ *LZ4F_compressionContextPtr = LZ4F_createCompressionContext_advanced(LZ4F_defaultCMem, version);
+ RETURN_ERROR_IF(*LZ4F_compressionContextPtr==NULL, allocation_failed);
+ return LZ4F_OK_NoError;
+}
+
+
+LZ4F_errorCode_t LZ4F_freeCompressionContext(LZ4F_cctx* cctxPtr)
+{
+ if (cctxPtr != NULL) { /* support free on NULL */
+ LZ4F_free(cctxPtr->lz4CtxPtr, cctxPtr->cmem); /* note: LZ4_streamHC_t and LZ4_stream_t are simple POD types */
+ LZ4F_free(cctxPtr->tmpBuff, cctxPtr->cmem);
+ LZ4F_free(cctxPtr, cctxPtr->cmem);
+ }
+ return LZ4F_OK_NoError;
+}
+
+
+/**
+ * This function prepares the internal LZ4(HC) stream for a new compression,
+ * resetting the context and attaching the dictionary, if there is one.
+ *
+ * It needs to be called at the beginning of each independent compression
+ * stream (i.e., at the beginning of a frame in blockLinked mode, or at the
+ * beginning of each block in blockIndependent mode).
+ */
+static void LZ4F_initStream(void* ctx,
+ const LZ4F_CDict* cdict,
+ int level,
+ LZ4F_blockMode_t blockMode) {
+ if (level < LZ4HC_CLEVEL_MIN) {
+ if (cdict != NULL || blockMode == LZ4F_blockLinked) {
+ /* In these cases, we will call LZ4_compress_fast_continue(),
+ * which needs an already reset context. Otherwise, we'll call a
+ * one-shot API. The non-continued APIs internally perform their own
+ * resets at the beginning of their calls, where they know what
+ * tableType they need the context to be in. So in that case this
+ * would be misguided / wasted work. */
+ LZ4_resetStream_fast((LZ4_stream_t*)ctx);
+ }
+ LZ4_attach_dictionary((LZ4_stream_t *)ctx, cdict ? cdict->fastCtx : NULL);
+ } else {
+ LZ4_resetStreamHC_fast((LZ4_streamHC_t*)ctx, level);
+ LZ4_attach_HC_dictionary((LZ4_streamHC_t *)ctx, cdict ? cdict->HCCtx : NULL);
+ }
+}
+
+static int ctxTypeID_to_size(int ctxTypeID) {
+ switch(ctxTypeID) {
+ case 1:
+ return LZ4_sizeofState();
+ case 2:
+ return LZ4_sizeofStateHC();
+ default:
+ return 0;
+ }
+}
+
+/*! LZ4F_compressBegin_usingCDict() :
+ * init streaming compression AND writes frame header into @dstBuffer.
+ * @dstCapacity must be >= LZ4F_HEADER_SIZE_MAX bytes.
+ * @return : number of bytes written into @dstBuffer for the header
+ * or an error code (can be tested using LZ4F_isError())
+ */
+size_t LZ4F_compressBegin_usingCDict(LZ4F_cctx* cctxPtr,
+ void* dstBuffer, size_t dstCapacity,
+ const LZ4F_CDict* cdict,
+ const LZ4F_preferences_t* preferencesPtr)
+{
+ LZ4F_preferences_t const prefNull = LZ4F_INIT_PREFERENCES;
+ BYTE* const dstStart = (BYTE*)dstBuffer;
+ BYTE* dstPtr = dstStart;
+
+ RETURN_ERROR_IF(dstCapacity < maxFHSize, dstMaxSize_tooSmall);
+ if (preferencesPtr == NULL) preferencesPtr = &prefNull;
+ cctxPtr->prefs = *preferencesPtr;
+
+ /* cctx Management */
+ { U16 const ctxTypeID = (cctxPtr->prefs.compressionLevel < LZ4HC_CLEVEL_MIN) ? 1 : 2;
+ int requiredSize = ctxTypeID_to_size(ctxTypeID);
+ int allocatedSize = ctxTypeID_to_size(cctxPtr->lz4CtxAlloc);
+ if (allocatedSize < requiredSize) {
+ /* not enough space allocated */
+ LZ4F_free(cctxPtr->lz4CtxPtr, cctxPtr->cmem);
+ if (cctxPtr->prefs.compressionLevel < LZ4HC_CLEVEL_MIN) {
+ /* must take ownership of memory allocation,
+ * in order to respect custom allocator contract */
+ cctxPtr->lz4CtxPtr = LZ4F_malloc(sizeof(LZ4_stream_t), cctxPtr->cmem);
+ if (cctxPtr->lz4CtxPtr)
+ LZ4_initStream(cctxPtr->lz4CtxPtr, sizeof(LZ4_stream_t));
+ } else {
+ cctxPtr->lz4CtxPtr = LZ4F_malloc(sizeof(LZ4_streamHC_t), cctxPtr->cmem);
+ if (cctxPtr->lz4CtxPtr)
+ LZ4_initStreamHC(cctxPtr->lz4CtxPtr, sizeof(LZ4_streamHC_t));
+ }
+ RETURN_ERROR_IF(cctxPtr->lz4CtxPtr == NULL, allocation_failed);
+ cctxPtr->lz4CtxAlloc = ctxTypeID;
+ cctxPtr->lz4CtxState = ctxTypeID;
+ } else if (cctxPtr->lz4CtxState != ctxTypeID) {
+ /* otherwise, a sufficient buffer is already allocated,
+ * but we need to reset it to the correct context type */
+ if (cctxPtr->prefs.compressionLevel < LZ4HC_CLEVEL_MIN) {
+ LZ4_initStream((LZ4_stream_t*)cctxPtr->lz4CtxPtr, sizeof(LZ4_stream_t));
+ } else {
+ LZ4_initStreamHC((LZ4_streamHC_t*)cctxPtr->lz4CtxPtr, sizeof(LZ4_streamHC_t));
+ LZ4_setCompressionLevel((LZ4_streamHC_t*)cctxPtr->lz4CtxPtr, cctxPtr->prefs.compressionLevel);
+ }
+ cctxPtr->lz4CtxState = ctxTypeID;
+ } }
+
+ /* Buffer Management */
+ if (cctxPtr->prefs.frameInfo.blockSizeID == 0)
+ cctxPtr->prefs.frameInfo.blockSizeID = LZ4F_BLOCKSIZEID_DEFAULT;
+ cctxPtr->maxBlockSize = LZ4F_getBlockSize(cctxPtr->prefs.frameInfo.blockSizeID);
+
+ { size_t const requiredBuffSize = preferencesPtr->autoFlush ?
+ ((cctxPtr->prefs.frameInfo.blockMode == LZ4F_blockLinked) ? 64 KB : 0) : /* only needs past data up to window size */
+ cctxPtr->maxBlockSize + ((cctxPtr->prefs.frameInfo.blockMode == LZ4F_blockLinked) ? 128 KB : 0);
+
+ if (cctxPtr->maxBufferSize < requiredBuffSize) {
+ cctxPtr->maxBufferSize = 0;
+ LZ4F_free(cctxPtr->tmpBuff, cctxPtr->cmem);
+ cctxPtr->tmpBuff = (BYTE*)LZ4F_calloc(requiredBuffSize, cctxPtr->cmem);
+ RETURN_ERROR_IF(cctxPtr->tmpBuff == NULL, allocation_failed);
+ cctxPtr->maxBufferSize = requiredBuffSize;
+ } }
+ cctxPtr->tmpIn = cctxPtr->tmpBuff;
+ cctxPtr->tmpInSize = 0;
+ (void)XXH32_reset(&(cctxPtr->xxh), 0);
+
+ /* context init */
+ cctxPtr->cdict = cdict;
+ if (cctxPtr->prefs.frameInfo.blockMode == LZ4F_blockLinked) {
+ /* frame init only for blockLinked : blockIndependent will be init at each block */
+ LZ4F_initStream(cctxPtr->lz4CtxPtr, cdict, cctxPtr->prefs.compressionLevel, LZ4F_blockLinked);
+ }
+ if (preferencesPtr->compressionLevel >= LZ4HC_CLEVEL_MIN) {
+ LZ4_favorDecompressionSpeed((LZ4_streamHC_t*)cctxPtr->lz4CtxPtr, (int)preferencesPtr->favorDecSpeed);
+ }
+
+ /* Magic Number */
+ LZ4F_writeLE32(dstPtr, LZ4F_MAGICNUMBER);
+ dstPtr += 4;
+ { BYTE* const headerStart = dstPtr;
+
+ /* FLG Byte */
+ *dstPtr++ = (BYTE)(((1 & _2BITS) << 6) /* Version('01') */
+ + ((cctxPtr->prefs.frameInfo.blockMode & _1BIT ) << 5)
+ + ((cctxPtr->prefs.frameInfo.blockChecksumFlag & _1BIT ) << 4)
+ + ((unsigned)(cctxPtr->prefs.frameInfo.contentSize > 0) << 3)
+ + ((cctxPtr->prefs.frameInfo.contentChecksumFlag & _1BIT ) << 2)
+ + (cctxPtr->prefs.frameInfo.dictID > 0) );
+ /* BD Byte */
+ *dstPtr++ = (BYTE)((cctxPtr->prefs.frameInfo.blockSizeID & _3BITS) << 4);
+ /* Optional Frame content size field */
+ if (cctxPtr->prefs.frameInfo.contentSize) {
+ LZ4F_writeLE64(dstPtr, cctxPtr->prefs.frameInfo.contentSize);
+ dstPtr += 8;
+ cctxPtr->totalInSize = 0;
+ }
+ /* Optional dictionary ID field */
+ if (cctxPtr->prefs.frameInfo.dictID) {
+ LZ4F_writeLE32(dstPtr, cctxPtr->prefs.frameInfo.dictID);
+ dstPtr += 4;
+ }
+ /* Header CRC Byte */
+ *dstPtr = LZ4F_headerChecksum(headerStart, (size_t)(dstPtr - headerStart));
+ dstPtr++;
+ }
+
+ cctxPtr->cStage = 1; /* header written, now request input data block */
+ return (size_t)(dstPtr - dstStart);
+}
+
+
+/*! LZ4F_compressBegin() :
+ * init streaming compression AND writes frame header into @dstBuffer.
+ * @dstCapacity must be >= LZ4F_HEADER_SIZE_MAX bytes.
+ * @preferencesPtr can be NULL, in which case default parameters are selected.
+ * @return : number of bytes written into dstBuffer for the header
+ * or an error code (can be tested using LZ4F_isError())
+ */
+size_t LZ4F_compressBegin(LZ4F_cctx* cctxPtr,
+ void* dstBuffer, size_t dstCapacity,
+ const LZ4F_preferences_t* preferencesPtr)
+{
+ return LZ4F_compressBegin_usingCDict(cctxPtr, dstBuffer, dstCapacity,
+ NULL, preferencesPtr);
+}
+
+
+/* LZ4F_compressBound() :
+ * @return minimum capacity of dstBuffer for a given srcSize to handle worst case scenario.
+ * LZ4F_preferences_t structure is optional : if NULL, preferences will be set to cover worst case scenario.
+ * This function cannot fail.
+ */
+size_t LZ4F_compressBound(size_t srcSize, const LZ4F_preferences_t* preferencesPtr)
+{
+ if (preferencesPtr && preferencesPtr->autoFlush) {
+ return LZ4F_compressBound_internal(srcSize, preferencesPtr, 0);
+ }
+ return LZ4F_compressBound_internal(srcSize, preferencesPtr, (size_t)-1);
+}
+
+
+typedef int (*compressFunc_t)(void* ctx, const char* src, char* dst, int srcSize, int dstSize, int level, const LZ4F_CDict* cdict);
+
+
+/*! LZ4F_makeBlock():
+ * compress a single block, add header and optional checksum.
+ * assumption : dst buffer capacity is >= BHSize + srcSize + crcSize
+ */
+static size_t LZ4F_makeBlock(void* dst,
+ const void* src, size_t srcSize,
+ compressFunc_t compress, void* lz4ctx, int level,
+ const LZ4F_CDict* cdict,
+ LZ4F_blockChecksum_t crcFlag)
+{
+ BYTE* const cSizePtr = (BYTE*)dst;
+ U32 cSize;
+ assert(compress != NULL);
+ cSize = (U32)compress(lz4ctx, (const char*)src, (char*)(cSizePtr+BHSize),
+ (int)(srcSize), (int)(srcSize-1),
+ level, cdict);
+
+ if (cSize == 0 || cSize >= srcSize) {
+ cSize = (U32)srcSize;
+ LZ4F_writeLE32(cSizePtr, cSize | LZ4F_BLOCKUNCOMPRESSED_FLAG);
+ memcpy(cSizePtr+BHSize, src, srcSize);
+ } else {
+ LZ4F_writeLE32(cSizePtr, cSize);
+ }
+ if (crcFlag) {
+ U32 const crc32 = XXH32(cSizePtr+BHSize, cSize, 0); /* checksum of compressed data */
+ LZ4F_writeLE32(cSizePtr+BHSize+cSize, crc32);
+ }
+ return BHSize + cSize + ((U32)crcFlag)*BFSize;
+}
+
+
+static int LZ4F_compressBlock(void* ctx, const char* src, char* dst, int srcSize, int dstCapacity, int level, const LZ4F_CDict* cdict)
+{
+ int const acceleration = (level < 0) ? -level + 1 : 1;
+ DEBUGLOG(5, "LZ4F_compressBlock (srcSize=%i)", srcSize);
+ LZ4F_initStream(ctx, cdict, level, LZ4F_blockIndependent);
+ if (cdict) {
+ return LZ4_compress_fast_continue((LZ4_stream_t*)ctx, src, dst, srcSize, dstCapacity, acceleration);
+ } else {
+ return LZ4_compress_fast_extState_fastReset(ctx, src, dst, srcSize, dstCapacity, acceleration);
+ }
+}
+
+static int LZ4F_compressBlock_continue(void* ctx, const char* src, char* dst, int srcSize, int dstCapacity, int level, const LZ4F_CDict* cdict)
+{
+ int const acceleration = (level < 0) ? -level + 1 : 1;
+ (void)cdict; /* init once at beginning of frame */
+ DEBUGLOG(5, "LZ4F_compressBlock_continue (srcSize=%i)", srcSize);
+ return LZ4_compress_fast_continue((LZ4_stream_t*)ctx, src, dst, srcSize, dstCapacity, acceleration);
+}
+
+static int LZ4F_compressBlockHC(void* ctx, const char* src, char* dst, int srcSize, int dstCapacity, int level, const LZ4F_CDict* cdict)
+{
+ LZ4F_initStream(ctx, cdict, level, LZ4F_blockIndependent);
+ if (cdict) {
+ return LZ4_compress_HC_continue((LZ4_streamHC_t*)ctx, src, dst, srcSize, dstCapacity);
+ }
+ return LZ4_compress_HC_extStateHC_fastReset(ctx, src, dst, srcSize, dstCapacity, level);
+}
+
+static int LZ4F_compressBlockHC_continue(void* ctx, const char* src, char* dst, int srcSize, int dstCapacity, int level, const LZ4F_CDict* cdict)
+{
+ (void)level; (void)cdict; /* init once at beginning of frame */
+ return LZ4_compress_HC_continue((LZ4_streamHC_t*)ctx, src, dst, srcSize, dstCapacity);
+}
+
+static int LZ4F_doNotCompressBlock(void* ctx, const char* src, char* dst, int srcSize, int dstCapacity, int level, const LZ4F_CDict* cdict)
+{
+ (void)ctx; (void)src; (void)dst; (void)srcSize; (void)dstCapacity; (void)level; (void)cdict;
+ return 0;
+}
+
+static compressFunc_t LZ4F_selectCompression(LZ4F_blockMode_t blockMode, int level, LZ4F_blockCompression_t compressMode)
+{
+ if (compressMode == LZ4B_UNCOMPRESSED) return LZ4F_doNotCompressBlock;
+ if (level < LZ4HC_CLEVEL_MIN) {
+ if (blockMode == LZ4F_blockIndependent) return LZ4F_compressBlock;
+ return LZ4F_compressBlock_continue;
+ }
+ if (blockMode == LZ4F_blockIndependent) return LZ4F_compressBlockHC;
+ return LZ4F_compressBlockHC_continue;
+}
+
+/* Save history (up to 64KB) into @tmpBuff */
+static int LZ4F_localSaveDict(LZ4F_cctx_t* cctxPtr)
+{
+ if (cctxPtr->prefs.compressionLevel < LZ4HC_CLEVEL_MIN)
+ return LZ4_saveDict ((LZ4_stream_t*)(cctxPtr->lz4CtxPtr), (char*)(cctxPtr->tmpBuff), 64 KB);
+ return LZ4_saveDictHC ((LZ4_streamHC_t*)(cctxPtr->lz4CtxPtr), (char*)(cctxPtr->tmpBuff), 64 KB);
+}
+
+typedef enum { notDone, fromTmpBuffer, fromSrcBuffer } LZ4F_lastBlockStatus;
+
+static const LZ4F_compressOptions_t k_cOptionsNull = { 0, { 0, 0, 0 } };
+
+
+ /*! LZ4F_compressUpdateImpl() :
+ * LZ4F_compressUpdate() can be called repetitively to compress as much data as necessary.
+ * When successful, the function always entirely consumes @srcBuffer.
+ * src data is either buffered or compressed into @dstBuffer.
+ * If the block compression does not match the compression of the previous block, the old data is flushed
+ * and operations continue with the new compression mode.
+ * @dstCapacity MUST be >= LZ4F_compressBound(srcSize, preferencesPtr) when block compression is turned on.
+ * @compressOptionsPtr is optional : provide NULL to mean "default".
+ * @return : the number of bytes written into dstBuffer. It can be zero, meaning input data was just buffered.
+ * or an error code if it fails (which can be tested using LZ4F_isError())
+ * After an error, the state is left in a UB state, and must be re-initialized.
+ */
+static size_t LZ4F_compressUpdateImpl(LZ4F_cctx* cctxPtr,
+ void* dstBuffer, size_t dstCapacity,
+ const void* srcBuffer, size_t srcSize,
+ const LZ4F_compressOptions_t* compressOptionsPtr,
+ LZ4F_blockCompression_t blockCompression)
+ {
+ size_t const blockSize = cctxPtr->maxBlockSize;
+ const BYTE* srcPtr = (const BYTE*)srcBuffer;
+ const BYTE* const srcEnd = srcPtr + srcSize;
+ BYTE* const dstStart = (BYTE*)dstBuffer;
+ BYTE* dstPtr = dstStart;
+ LZ4F_lastBlockStatus lastBlockCompressed = notDone;
+ compressFunc_t const compress = LZ4F_selectCompression(cctxPtr->prefs.frameInfo.blockMode, cctxPtr->prefs.compressionLevel, blockCompression);
+ size_t bytesWritten;
+ DEBUGLOG(4, "LZ4F_compressUpdate (srcSize=%zu)", srcSize);
+
+ RETURN_ERROR_IF(cctxPtr->cStage != 1, compressionState_uninitialized); /* state must be initialized and waiting for next block */
+ if (dstCapacity < LZ4F_compressBound_internal(srcSize, &(cctxPtr->prefs), cctxPtr->tmpInSize))
+ RETURN_ERROR(dstMaxSize_tooSmall);
+
+ if (blockCompression == LZ4B_UNCOMPRESSED && dstCapacity < srcSize)
+ RETURN_ERROR(dstMaxSize_tooSmall);
+
+ /* flush currently written block, to continue with new block compression */
+ if (cctxPtr->blockCompression != blockCompression) {
+ bytesWritten = LZ4F_flush(cctxPtr, dstBuffer, dstCapacity, compressOptionsPtr);
+ dstPtr += bytesWritten;
+ cctxPtr->blockCompression = blockCompression;
+ }
+
+ if (compressOptionsPtr == NULL) compressOptionsPtr = &k_cOptionsNull;
+
+ /* complete tmp buffer */
+ if (cctxPtr->tmpInSize > 0) { /* some data already within tmp buffer */
+ size_t const sizeToCopy = blockSize - cctxPtr->tmpInSize;
+ assert(blockSize > cctxPtr->tmpInSize);
+ if (sizeToCopy > srcSize) {
+ /* add src to tmpIn buffer */
+ memcpy(cctxPtr->tmpIn + cctxPtr->tmpInSize, srcBuffer, srcSize);
+ srcPtr = srcEnd;
+ cctxPtr->tmpInSize += srcSize;
+ /* still needs some CRC */
+ } else {
+ /* complete tmpIn block and then compress it */
+ lastBlockCompressed = fromTmpBuffer;
+ memcpy(cctxPtr->tmpIn + cctxPtr->tmpInSize, srcBuffer, sizeToCopy);
+ srcPtr += sizeToCopy;
+
+ dstPtr += LZ4F_makeBlock(dstPtr,
+ cctxPtr->tmpIn, blockSize,
+ compress, cctxPtr->lz4CtxPtr, cctxPtr->prefs.compressionLevel,
+ cctxPtr->cdict,
+ cctxPtr->prefs.frameInfo.blockChecksumFlag);
+ if (cctxPtr->prefs.frameInfo.blockMode==LZ4F_blockLinked) cctxPtr->tmpIn += blockSize;
+ cctxPtr->tmpInSize = 0;
+ } }
+
+ while ((size_t)(srcEnd - srcPtr) >= blockSize) {
+ /* compress full blocks */
+ lastBlockCompressed = fromSrcBuffer;
+ dstPtr += LZ4F_makeBlock(dstPtr,
+ srcPtr, blockSize,
+ compress, cctxPtr->lz4CtxPtr, cctxPtr->prefs.compressionLevel,
+ cctxPtr->cdict,
+ cctxPtr->prefs.frameInfo.blockChecksumFlag);
+ srcPtr += blockSize;
+ }
+
+ if ((cctxPtr->prefs.autoFlush) && (srcPtr < srcEnd)) {
+ /* autoFlush : remaining input (< blockSize) is compressed */
+ lastBlockCompressed = fromSrcBuffer;
+ dstPtr += LZ4F_makeBlock(dstPtr,
+ srcPtr, (size_t)(srcEnd - srcPtr),
+ compress, cctxPtr->lz4CtxPtr, cctxPtr->prefs.compressionLevel,
+ cctxPtr->cdict,
+ cctxPtr->prefs.frameInfo.blockChecksumFlag);
+ srcPtr = srcEnd;
+ }
+
+ /* preserve dictionary within @tmpBuff whenever necessary */
+ if ((cctxPtr->prefs.frameInfo.blockMode==LZ4F_blockLinked) && (lastBlockCompressed==fromSrcBuffer)) {
+ /* linked blocks are only supported in compressed mode, see LZ4F_uncompressedUpdate */
+ assert(blockCompression == LZ4B_COMPRESSED);
+ if (compressOptionsPtr->stableSrc) {
+ cctxPtr->tmpIn = cctxPtr->tmpBuff; /* src is stable : dictionary remains in src across invocations */
+ } else {
+ int const realDictSize = LZ4F_localSaveDict(cctxPtr);
+ assert(0 <= realDictSize && realDictSize <= 64 KB);
+ cctxPtr->tmpIn = cctxPtr->tmpBuff + realDictSize;
+ }
+ }
+
+ /* keep tmpIn within limits */
+ if (!(cctxPtr->prefs.autoFlush) /* no autoflush : there may be some data left within internal buffer */
+ && (cctxPtr->tmpIn + blockSize) > (cctxPtr->tmpBuff + cctxPtr->maxBufferSize) ) /* not enough room to store next block */
+ {
+ /* only preserve 64KB within internal buffer. Ensures there is enough room for next block.
+ * note: this situation necessarily implies lastBlockCompressed==fromTmpBuffer */
+ int const realDictSize = LZ4F_localSaveDict(cctxPtr);
+ cctxPtr->tmpIn = cctxPtr->tmpBuff + realDictSize;
+ assert((cctxPtr->tmpIn + blockSize) <= (cctxPtr->tmpBuff + cctxPtr->maxBufferSize));
+ }
+
+ /* some input data left, necessarily < blockSize */
+ if (srcPtr < srcEnd) {
+ /* fill tmp buffer */
+ size_t const sizeToCopy = (size_t)(srcEnd - srcPtr);
+ memcpy(cctxPtr->tmpIn, srcPtr, sizeToCopy);
+ cctxPtr->tmpInSize = sizeToCopy;
+ }
+
+ if (cctxPtr->prefs.frameInfo.contentChecksumFlag == LZ4F_contentChecksumEnabled)
+ (void)XXH32_update(&(cctxPtr->xxh), srcBuffer, srcSize);
+
+ cctxPtr->totalInSize += srcSize;
+ return (size_t)(dstPtr - dstStart);
+}
+
+/*! LZ4F_compressUpdate() :
+ * LZ4F_compressUpdate() can be called repetitively to compress as much data as necessary.
+ * When successful, the function always entirely consumes @srcBuffer.
+ * src data is either buffered or compressed into @dstBuffer.
+ * If previously an uncompressed block was written, buffered data is flushed
+ * before appending compressed data is continued.
+ * @dstCapacity MUST be >= LZ4F_compressBound(srcSize, preferencesPtr).
+ * @compressOptionsPtr is optional : provide NULL to mean "default".
+ * @return : the number of bytes written into dstBuffer. It can be zero, meaning input data was just buffered.
+ * or an error code if it fails (which can be tested using LZ4F_isError())
+ * After an error, the state is left in a UB state, and must be re-initialized.
+ */
+size_t LZ4F_compressUpdate(LZ4F_cctx* cctxPtr,
+ void* dstBuffer, size_t dstCapacity,
+ const void* srcBuffer, size_t srcSize,
+ const LZ4F_compressOptions_t* compressOptionsPtr)
+{
+ return LZ4F_compressUpdateImpl(cctxPtr,
+ dstBuffer, dstCapacity,
+ srcBuffer, srcSize,
+ compressOptionsPtr, LZ4B_COMPRESSED);
+}
+
+/*! LZ4F_compressUpdate() :
+ * LZ4F_compressUpdate() can be called repetitively to compress as much data as necessary.
+ * When successful, the function always entirely consumes @srcBuffer.
+ * src data is either buffered or compressed into @dstBuffer.
+ * If previously an uncompressed block was written, buffered data is flushed
+ * before appending compressed data is continued.
+ * This is only supported when LZ4F_blockIndependent is used
+ * @dstCapacity MUST be >= LZ4F_compressBound(srcSize, preferencesPtr).
+ * @compressOptionsPtr is optional : provide NULL to mean "default".
+ * @return : the number of bytes written into dstBuffer. It can be zero, meaning input data was just buffered.
+ * or an error code if it fails (which can be tested using LZ4F_isError())
+ * After an error, the state is left in a UB state, and must be re-initialized.
+ */
+size_t LZ4F_uncompressedUpdate(LZ4F_cctx* cctxPtr,
+ void* dstBuffer, size_t dstCapacity,
+ const void* srcBuffer, size_t srcSize,
+ const LZ4F_compressOptions_t* compressOptionsPtr) {
+ RETURN_ERROR_IF(cctxPtr->prefs.frameInfo.blockMode != LZ4F_blockIndependent, blockMode_invalid);
+ return LZ4F_compressUpdateImpl(cctxPtr,
+ dstBuffer, dstCapacity,
+ srcBuffer, srcSize,
+ compressOptionsPtr, LZ4B_UNCOMPRESSED);
+}
+
+
+/*! LZ4F_flush() :
+ * When compressed data must be sent immediately, without waiting for a block to be filled,
+ * invoke LZ4_flush(), which will immediately compress any remaining data stored within LZ4F_cctx.
+ * The result of the function is the number of bytes written into dstBuffer.
+ * It can be zero, this means there was no data left within LZ4F_cctx.
+ * The function outputs an error code if it fails (can be tested using LZ4F_isError())
+ * LZ4F_compressOptions_t* is optional. NULL is a valid argument.
+ */
+size_t LZ4F_flush(LZ4F_cctx* cctxPtr,
+ void* dstBuffer, size_t dstCapacity,
+ const LZ4F_compressOptions_t* compressOptionsPtr)
+{
+ BYTE* const dstStart = (BYTE*)dstBuffer;
+ BYTE* dstPtr = dstStart;
+ compressFunc_t compress;
+
+ if (cctxPtr->tmpInSize == 0) return 0; /* nothing to flush */
+ RETURN_ERROR_IF(cctxPtr->cStage != 1, compressionState_uninitialized);
+ RETURN_ERROR_IF(dstCapacity < (cctxPtr->tmpInSize + BHSize + BFSize), dstMaxSize_tooSmall);
+ (void)compressOptionsPtr; /* not useful (yet) */
+
+ /* select compression function */
+ compress = LZ4F_selectCompression(cctxPtr->prefs.frameInfo.blockMode, cctxPtr->prefs.compressionLevel, cctxPtr->blockCompression);
+
+ /* compress tmp buffer */
+ dstPtr += LZ4F_makeBlock(dstPtr,
+ cctxPtr->tmpIn, cctxPtr->tmpInSize,
+ compress, cctxPtr->lz4CtxPtr, cctxPtr->prefs.compressionLevel,
+ cctxPtr->cdict,
+ cctxPtr->prefs.frameInfo.blockChecksumFlag);
+ assert(((void)"flush overflows dstBuffer!", (size_t)(dstPtr - dstStart) <= dstCapacity));
+
+ if (cctxPtr->prefs.frameInfo.blockMode == LZ4F_blockLinked)
+ cctxPtr->tmpIn += cctxPtr->tmpInSize;
+ cctxPtr->tmpInSize = 0;
+
+ /* keep tmpIn within limits */
+ if ((cctxPtr->tmpIn + cctxPtr->maxBlockSize) > (cctxPtr->tmpBuff + cctxPtr->maxBufferSize)) { /* necessarily LZ4F_blockLinked */
+ int const realDictSize = LZ4F_localSaveDict(cctxPtr);
+ cctxPtr->tmpIn = cctxPtr->tmpBuff + realDictSize;
+ }
+
+ return (size_t)(dstPtr - dstStart);
+}
+
+
+/*! LZ4F_compressEnd() :
+ * When you want to properly finish the compressed frame, just call LZ4F_compressEnd().
+ * It will flush whatever data remained within compressionContext (like LZ4_flush())
+ * but also properly finalize the frame, with an endMark and an (optional) checksum.
+ * LZ4F_compressOptions_t structure is optional : you can provide NULL as argument.
+ * @return: the number of bytes written into dstBuffer (necessarily >= 4 (endMark size))
+ * or an error code if it fails (can be tested using LZ4F_isError())
+ * The context can then be used again to compress a new frame, starting with LZ4F_compressBegin().
+ */
+size_t LZ4F_compressEnd(LZ4F_cctx* cctxPtr,
+ void* dstBuffer, size_t dstCapacity,
+ const LZ4F_compressOptions_t* compressOptionsPtr)
+{
+ BYTE* const dstStart = (BYTE*)dstBuffer;
+ BYTE* dstPtr = dstStart;
+
+ size_t const flushSize = LZ4F_flush(cctxPtr, dstBuffer, dstCapacity, compressOptionsPtr);
+ DEBUGLOG(5,"LZ4F_compressEnd: dstCapacity=%u", (unsigned)dstCapacity);
+ FORWARD_IF_ERROR(flushSize);
+ dstPtr += flushSize;
+
+ assert(flushSize <= dstCapacity);
+ dstCapacity -= flushSize;
+
+ RETURN_ERROR_IF(dstCapacity < 4, dstMaxSize_tooSmall);
+ LZ4F_writeLE32(dstPtr, 0);
+ dstPtr += 4; /* endMark */
+
+ if (cctxPtr->prefs.frameInfo.contentChecksumFlag == LZ4F_contentChecksumEnabled) {
+ U32 const xxh = XXH32_digest(&(cctxPtr->xxh));
+ RETURN_ERROR_IF(dstCapacity < 8, dstMaxSize_tooSmall);
+ DEBUGLOG(5,"Writing 32-bit content checksum");
+ LZ4F_writeLE32(dstPtr, xxh);
+ dstPtr+=4; /* content Checksum */
+ }
+
+ cctxPtr->cStage = 0; /* state is now re-usable (with identical preferences) */
+ cctxPtr->maxBufferSize = 0; /* reuse HC context */
+
+ if (cctxPtr->prefs.frameInfo.contentSize) {
+ if (cctxPtr->prefs.frameInfo.contentSize != cctxPtr->totalInSize)
+ RETURN_ERROR(frameSize_wrong);
+ }
+
+ return (size_t)(dstPtr - dstStart);
+}
+
+
+/*-***************************************************
+* Frame Decompression
+*****************************************************/
+
+typedef enum {
+ dstage_getFrameHeader=0, dstage_storeFrameHeader,
+ dstage_init,
+ dstage_getBlockHeader, dstage_storeBlockHeader,
+ dstage_copyDirect, dstage_getBlockChecksum,
+ dstage_getCBlock, dstage_storeCBlock,
+ dstage_flushOut,
+ dstage_getSuffix, dstage_storeSuffix,
+ dstage_getSFrameSize, dstage_storeSFrameSize,
+ dstage_skipSkippable
+} dStage_t;
+
+struct LZ4F_dctx_s {
+ LZ4F_CustomMem cmem;
+ LZ4F_frameInfo_t frameInfo;
+ U32 version;
+ dStage_t dStage;
+ U64 frameRemainingSize;
+ size_t maxBlockSize;
+ size_t maxBufferSize;
+ BYTE* tmpIn;
+ size_t tmpInSize;
+ size_t tmpInTarget;
+ BYTE* tmpOutBuffer;
+ const BYTE* dict;
+ size_t dictSize;
+ BYTE* tmpOut;
+ size_t tmpOutSize;
+ size_t tmpOutStart;
+ XXH32_state_t xxh;
+ XXH32_state_t blockChecksum;
+ int skipChecksum;
+ BYTE header[LZ4F_HEADER_SIZE_MAX];
+}; /* typedef'd to LZ4F_dctx in lz4frame.h */
+
+
+LZ4F_dctx* LZ4F_createDecompressionContext_advanced(LZ4F_CustomMem customMem, unsigned version)
+{
+ LZ4F_dctx* const dctx = (LZ4F_dctx*)LZ4F_calloc(sizeof(LZ4F_dctx), customMem);
+ if (dctx == NULL) return NULL;
+
+ dctx->cmem = customMem;
+ dctx->version = version;
+ return dctx;
+}
+
+/*! LZ4F_createDecompressionContext() :
+ * Create a decompressionContext object, which will track all decompression operations.
+ * Provides a pointer to a fully allocated and initialized LZ4F_decompressionContext object.
+ * Object can later be released using LZ4F_freeDecompressionContext().
+ * @return : if != 0, there was an error during context creation.
+ */
+LZ4F_errorCode_t
+LZ4F_createDecompressionContext(LZ4F_dctx** LZ4F_decompressionContextPtr, unsigned versionNumber)
+{
+ assert(LZ4F_decompressionContextPtr != NULL); /* violation of narrow contract */
+ RETURN_ERROR_IF(LZ4F_decompressionContextPtr == NULL, parameter_null); /* in case it nonetheless happen in production */
+
+ *LZ4F_decompressionContextPtr = LZ4F_createDecompressionContext_advanced(LZ4F_defaultCMem, versionNumber);
+ if (*LZ4F_decompressionContextPtr == NULL) { /* failed allocation */
+ RETURN_ERROR(allocation_failed);
+ }
+ return LZ4F_OK_NoError;
+}
+
+LZ4F_errorCode_t LZ4F_freeDecompressionContext(LZ4F_dctx* dctx)
+{
+ LZ4F_errorCode_t result = LZ4F_OK_NoError;
+ if (dctx != NULL) { /* can accept NULL input, like free() */
+ result = (LZ4F_errorCode_t)dctx->dStage;
+ LZ4F_free(dctx->tmpIn, dctx->cmem);
+ LZ4F_free(dctx->tmpOutBuffer, dctx->cmem);
+ LZ4F_free(dctx, dctx->cmem);
+ }
+ return result;
+}
+
+
+/*==--- Streaming Decompression operations ---==*/
+
+void LZ4F_resetDecompressionContext(LZ4F_dctx* dctx)
+{
+ dctx->dStage = dstage_getFrameHeader;
+ dctx->dict = NULL;
+ dctx->dictSize = 0;
+ dctx->skipChecksum = 0;
+}
+
+
+/*! LZ4F_decodeHeader() :
+ * input : `src` points at the **beginning of the frame**
+ * output : set internal values of dctx, such as
+ * dctx->frameInfo and dctx->dStage.
+ * Also allocates internal buffers.
+ * @return : nb Bytes read from src (necessarily <= srcSize)
+ * or an error code (testable with LZ4F_isError())
+ */
+static size_t LZ4F_decodeHeader(LZ4F_dctx* dctx, const void* src, size_t srcSize)
+{
+ unsigned blockMode, blockChecksumFlag, contentSizeFlag, contentChecksumFlag, dictIDFlag, blockSizeID;
+ size_t frameHeaderSize;
+ const BYTE* srcPtr = (const BYTE*)src;
+
+ DEBUGLOG(5, "LZ4F_decodeHeader");
+ /* need to decode header to get frameInfo */
+ RETURN_ERROR_IF(srcSize < minFHSize, frameHeader_incomplete); /* minimal frame header size */
+ MEM_INIT(&(dctx->frameInfo), 0, sizeof(dctx->frameInfo));
+
+ /* special case : skippable frames */
+ if ((LZ4F_readLE32(srcPtr) & 0xFFFFFFF0U) == LZ4F_MAGIC_SKIPPABLE_START) {
+ dctx->frameInfo.frameType = LZ4F_skippableFrame;
+ if (src == (void*)(dctx->header)) {
+ dctx->tmpInSize = srcSize;
+ dctx->tmpInTarget = 8;
+ dctx->dStage = dstage_storeSFrameSize;
+ return srcSize;
+ } else {
+ dctx->dStage = dstage_getSFrameSize;
+ return 4;
+ } }
+
+ /* control magic number */
+#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+ if (LZ4F_readLE32(srcPtr) != LZ4F_MAGICNUMBER) {
+ DEBUGLOG(4, "frame header error : unknown magic number");
+ RETURN_ERROR(frameType_unknown);
+ }
+#endif
+ dctx->frameInfo.frameType = LZ4F_frame;
+
+ /* Flags */
+ { U32 const FLG = srcPtr[4];
+ U32 const version = (FLG>>6) & _2BITS;
+ blockChecksumFlag = (FLG>>4) & _1BIT;
+ blockMode = (FLG>>5) & _1BIT;
+ contentSizeFlag = (FLG>>3) & _1BIT;
+ contentChecksumFlag = (FLG>>2) & _1BIT;
+ dictIDFlag = FLG & _1BIT;
+ /* validate */
+ if (((FLG>>1)&_1BIT) != 0) RETURN_ERROR(reservedFlag_set); /* Reserved bit */
+ if (version != 1) RETURN_ERROR(headerVersion_wrong); /* Version Number, only supported value */
+ }
+
+ /* Frame Header Size */
+ frameHeaderSize = minFHSize + (contentSizeFlag?8:0) + (dictIDFlag?4:0);
+
+ if (srcSize < frameHeaderSize) {
+ /* not enough input to fully decode frame header */
+ if (srcPtr != dctx->header)
+ memcpy(dctx->header, srcPtr, srcSize);
+ dctx->tmpInSize = srcSize;
+ dctx->tmpInTarget = frameHeaderSize;
+ dctx->dStage = dstage_storeFrameHeader;
+ return srcSize;
+ }
+
+ { U32 const BD = srcPtr[5];
+ blockSizeID = (BD>>4) & _3BITS;
+ /* validate */
+ if (((BD>>7)&_1BIT) != 0) RETURN_ERROR(reservedFlag_set); /* Reserved bit */
+ if (blockSizeID < 4) RETURN_ERROR(maxBlockSize_invalid); /* 4-7 only supported values for the time being */
+ if (((BD>>0)&_4BITS) != 0) RETURN_ERROR(reservedFlag_set); /* Reserved bits */
+ }
+
+ /* check header */
+ assert(frameHeaderSize > 5);
+#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+ { BYTE const HC = LZ4F_headerChecksum(srcPtr+4, frameHeaderSize-5);
+ RETURN_ERROR_IF(HC != srcPtr[frameHeaderSize-1], headerChecksum_invalid);
+ }
+#endif
+
+ /* save */
+ dctx->frameInfo.blockMode = (LZ4F_blockMode_t)blockMode;
+ dctx->frameInfo.blockChecksumFlag = (LZ4F_blockChecksum_t)blockChecksumFlag;
+ dctx->frameInfo.contentChecksumFlag = (LZ4F_contentChecksum_t)contentChecksumFlag;
+ dctx->frameInfo.blockSizeID = (LZ4F_blockSizeID_t)blockSizeID;
+ dctx->maxBlockSize = LZ4F_getBlockSize((LZ4F_blockSizeID_t)blockSizeID);
+ if (contentSizeFlag)
+ dctx->frameRemainingSize = dctx->frameInfo.contentSize = LZ4F_readLE64(srcPtr+6);
+ if (dictIDFlag)
+ dctx->frameInfo.dictID = LZ4F_readLE32(srcPtr + frameHeaderSize - 5);
+
+ dctx->dStage = dstage_init;
+
+ return frameHeaderSize;
+}
+
+
+/*! LZ4F_headerSize() :
+ * @return : size of frame header
+ * or an error code, which can be tested using LZ4F_isError()
+ */
+size_t LZ4F_headerSize(const void* src, size_t srcSize)
+{
+ RETURN_ERROR_IF(src == NULL, srcPtr_wrong);
+
+ /* minimal srcSize to determine header size */
+ if (srcSize < LZ4F_MIN_SIZE_TO_KNOW_HEADER_LENGTH)
+ RETURN_ERROR(frameHeader_incomplete);
+
+ /* special case : skippable frames */
+ if ((LZ4F_readLE32(src) & 0xFFFFFFF0U) == LZ4F_MAGIC_SKIPPABLE_START)
+ return 8;
+
+ /* control magic number */
+#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+ if (LZ4F_readLE32(src) != LZ4F_MAGICNUMBER)
+ RETURN_ERROR(frameType_unknown);
+#endif
+
+ /* Frame Header Size */
+ { BYTE const FLG = ((const BYTE*)src)[4];
+ U32 const contentSizeFlag = (FLG>>3) & _1BIT;
+ U32 const dictIDFlag = FLG & _1BIT;
+ return minFHSize + (contentSizeFlag?8:0) + (dictIDFlag?4:0);
+ }
+}
+
+/*! LZ4F_getFrameInfo() :
+ * This function extracts frame parameters (max blockSize, frame checksum, etc.).
+ * Usage is optional. Objective is to provide relevant information for allocation purposes.
+ * This function works in 2 situations :
+ * - At the beginning of a new frame, in which case it will decode this information from `srcBuffer`, and start the decoding process.
+ * Amount of input data provided must be large enough to successfully decode the frame header.
+ * A header size is variable, but is guaranteed to be <= LZ4F_HEADER_SIZE_MAX bytes. It's possible to provide more input data than this minimum.
+ * - After decoding has been started. In which case, no input is read, frame parameters are extracted from dctx.
+ * The number of bytes consumed from srcBuffer will be updated within *srcSizePtr (necessarily <= original value).
+ * Decompression must resume from (srcBuffer + *srcSizePtr).
+ * @return : an hint about how many srcSize bytes LZ4F_decompress() expects for next call,
+ * or an error code which can be tested using LZ4F_isError()
+ * note 1 : in case of error, dctx is not modified. Decoding operations can resume from where they stopped.
+ * note 2 : frame parameters are *copied into* an already allocated LZ4F_frameInfo_t structure.
+ */
+LZ4F_errorCode_t LZ4F_getFrameInfo(LZ4F_dctx* dctx,
+ LZ4F_frameInfo_t* frameInfoPtr,
+ const void* srcBuffer, size_t* srcSizePtr)
+{
+ LZ4F_STATIC_ASSERT(dstage_getFrameHeader < dstage_storeFrameHeader);
+ if (dctx->dStage > dstage_storeFrameHeader) {
+ /* frameInfo already decoded */
+ size_t o=0, i=0;
+ *srcSizePtr = 0;
+ *frameInfoPtr = dctx->frameInfo;
+ /* returns : recommended nb of bytes for LZ4F_decompress() */
+ return LZ4F_decompress(dctx, NULL, &o, NULL, &i, NULL);
+ } else {
+ if (dctx->dStage == dstage_storeFrameHeader) {
+ /* frame decoding already started, in the middle of header => automatic fail */
+ *srcSizePtr = 0;
+ RETURN_ERROR(frameDecoding_alreadyStarted);
+ } else {
+ size_t const hSize = LZ4F_headerSize(srcBuffer, *srcSizePtr);
+ if (LZ4F_isError(hSize)) { *srcSizePtr=0; return hSize; }
+ if (*srcSizePtr < hSize) {
+ *srcSizePtr=0;
+ RETURN_ERROR(frameHeader_incomplete);
+ }
+
+ { size_t decodeResult = LZ4F_decodeHeader(dctx, srcBuffer, hSize);
+ if (LZ4F_isError(decodeResult)) {
+ *srcSizePtr = 0;
+ } else {
+ *srcSizePtr = decodeResult;
+ decodeResult = BHSize; /* block header size */
+ }
+ *frameInfoPtr = dctx->frameInfo;
+ return decodeResult;
+ } } }
+}
+
+
+/* LZ4F_updateDict() :
+ * only used for LZ4F_blockLinked mode
+ * Condition : @dstPtr != NULL
+ */
+static void LZ4F_updateDict(LZ4F_dctx* dctx,
+ const BYTE* dstPtr, size_t dstSize, const BYTE* dstBufferStart,
+ unsigned withinTmp)
+{
+ assert(dstPtr != NULL);
+ if (dctx->dictSize==0) dctx->dict = (const BYTE*)dstPtr; /* will lead to prefix mode */
+ assert(dctx->dict != NULL);
+
+ if (dctx->dict + dctx->dictSize == dstPtr) { /* prefix mode, everything within dstBuffer */
+ dctx->dictSize += dstSize;
+ return;
+ }
+
+ assert(dstPtr >= dstBufferStart);
+ if ((size_t)(dstPtr - dstBufferStart) + dstSize >= 64 KB) { /* history in dstBuffer becomes large enough to become dictionary */
+ dctx->dict = (const BYTE*)dstBufferStart;
+ dctx->dictSize = (size_t)(dstPtr - dstBufferStart) + dstSize;
+ return;
+ }
+
+ assert(dstSize < 64 KB); /* if dstSize >= 64 KB, dictionary would be set into dstBuffer directly */
+
+ /* dstBuffer does not contain whole useful history (64 KB), so it must be saved within tmpOutBuffer */
+ assert(dctx->tmpOutBuffer != NULL);
+
+ if (withinTmp && (dctx->dict == dctx->tmpOutBuffer)) { /* continue history within tmpOutBuffer */
+ /* withinTmp expectation : content of [dstPtr,dstSize] is same as [dict+dictSize,dstSize], so we just extend it */
+ assert(dctx->dict + dctx->dictSize == dctx->tmpOut + dctx->tmpOutStart);
+ dctx->dictSize += dstSize;
+ return;
+ }
+
+ if (withinTmp) { /* copy relevant dict portion in front of tmpOut within tmpOutBuffer */
+ size_t const preserveSize = (size_t)(dctx->tmpOut - dctx->tmpOutBuffer);
+ size_t copySize = 64 KB - dctx->tmpOutSize;
+ const BYTE* const oldDictEnd = dctx->dict + dctx->dictSize - dctx->tmpOutStart;
+ if (dctx->tmpOutSize > 64 KB) copySize = 0;
+ if (copySize > preserveSize) copySize = preserveSize;
+
+ memcpy(dctx->tmpOutBuffer + preserveSize - copySize, oldDictEnd - copySize, copySize);
+
+ dctx->dict = dctx->tmpOutBuffer;
+ dctx->dictSize = preserveSize + dctx->tmpOutStart + dstSize;
+ return;
+ }
+
+ if (dctx->dict == dctx->tmpOutBuffer) { /* copy dst into tmp to complete dict */
+ if (dctx->dictSize + dstSize > dctx->maxBufferSize) { /* tmp buffer not large enough */
+ size_t const preserveSize = 64 KB - dstSize;
+ memcpy(dctx->tmpOutBuffer, dctx->dict + dctx->dictSize - preserveSize, preserveSize);
+ dctx->dictSize = preserveSize;
+ }
+ memcpy(dctx->tmpOutBuffer + dctx->dictSize, dstPtr, dstSize);
+ dctx->dictSize += dstSize;
+ return;
+ }
+
+ /* join dict & dest into tmp */
+ { size_t preserveSize = 64 KB - dstSize;
+ if (preserveSize > dctx->dictSize) preserveSize = dctx->dictSize;
+ memcpy(dctx->tmpOutBuffer, dctx->dict + dctx->dictSize - preserveSize, preserveSize);
+ memcpy(dctx->tmpOutBuffer + preserveSize, dstPtr, dstSize);
+ dctx->dict = dctx->tmpOutBuffer;
+ dctx->dictSize = preserveSize + dstSize;
+ }
+}
+
+
+/*! LZ4F_decompress() :
+ * Call this function repetitively to regenerate compressed data in srcBuffer.
+ * The function will attempt to decode up to *srcSizePtr bytes from srcBuffer
+ * into dstBuffer of capacity *dstSizePtr.
+ *
+ * The number of bytes regenerated into dstBuffer will be provided within *dstSizePtr (necessarily <= original value).
+ *
+ * The number of bytes effectively read from srcBuffer will be provided within *srcSizePtr (necessarily <= original value).
+ * If number of bytes read is < number of bytes provided, then decompression operation is not complete.
+ * Remaining data will have to be presented again in a subsequent invocation.
+ *
+ * The function result is an hint of the better srcSize to use for next call to LZ4F_decompress.
+ * Schematically, it's the size of the current (or remaining) compressed block + header of next block.
+ * Respecting the hint provides a small boost to performance, since it allows less buffer shuffling.
+ * Note that this is just a hint, and it's always possible to any srcSize value.
+ * When a frame is fully decoded, @return will be 0.
+ * If decompression failed, @return is an error code which can be tested using LZ4F_isError().
+ */
+size_t LZ4F_decompress(LZ4F_dctx* dctx,
+ void* dstBuffer, size_t* dstSizePtr,
+ const void* srcBuffer, size_t* srcSizePtr,
+ const LZ4F_decompressOptions_t* decompressOptionsPtr)
+{
+ LZ4F_decompressOptions_t optionsNull;
+ const BYTE* const srcStart = (const BYTE*)srcBuffer;
+ const BYTE* const srcEnd = srcStart + *srcSizePtr;
+ const BYTE* srcPtr = srcStart;
+ BYTE* const dstStart = (BYTE*)dstBuffer;
+ BYTE* const dstEnd = dstStart ? dstStart + *dstSizePtr : NULL;
+ BYTE* dstPtr = dstStart;
+ const BYTE* selectedIn = NULL;
+ unsigned doAnotherStage = 1;
+ size_t nextSrcSizeHint = 1;
+
+
+ DEBUGLOG(5, "LZ4F_decompress : %p,%u => %p,%u",
+ srcBuffer, (unsigned)*srcSizePtr, dstBuffer, (unsigned)*dstSizePtr);
+ if (dstBuffer == NULL) assert(*dstSizePtr == 0);
+ MEM_INIT(&optionsNull, 0, sizeof(optionsNull));
+ if (decompressOptionsPtr==NULL) decompressOptionsPtr = &optionsNull;
+ *srcSizePtr = 0;
+ *dstSizePtr = 0;
+ assert(dctx != NULL);
+ dctx->skipChecksum |= (decompressOptionsPtr->skipChecksums != 0); /* once set, disable for the remainder of the frame */
+
+ /* behaves as a state machine */
+
+ while (doAnotherStage) {
+
+ switch(dctx->dStage)
+ {
+
+ case dstage_getFrameHeader:
+ DEBUGLOG(6, "dstage_getFrameHeader");
+ if ((size_t)(srcEnd-srcPtr) >= maxFHSize) { /* enough to decode - shortcut */
+ size_t const hSize = LZ4F_decodeHeader(dctx, srcPtr, (size_t)(srcEnd-srcPtr)); /* will update dStage appropriately */
+ FORWARD_IF_ERROR(hSize);
+ srcPtr += hSize;
+ break;
+ }
+ dctx->tmpInSize = 0;
+ if (srcEnd-srcPtr == 0) return minFHSize; /* 0-size input */
+ dctx->tmpInTarget = minFHSize; /* minimum size to decode header */
+ dctx->dStage = dstage_storeFrameHeader;
+ /* fall-through */
+
+ case dstage_storeFrameHeader:
+ DEBUGLOG(6, "dstage_storeFrameHeader");
+ { size_t const sizeToCopy = MIN(dctx->tmpInTarget - dctx->tmpInSize, (size_t)(srcEnd - srcPtr));
+ memcpy(dctx->header + dctx->tmpInSize, srcPtr, sizeToCopy);
+ dctx->tmpInSize += sizeToCopy;
+ srcPtr += sizeToCopy;
+ }
+ if (dctx->tmpInSize < dctx->tmpInTarget) {
+ nextSrcSizeHint = (dctx->tmpInTarget - dctx->tmpInSize) + BHSize; /* rest of header + nextBlockHeader */
+ doAnotherStage = 0; /* not enough src data, ask for some more */
+ break;
+ }
+ FORWARD_IF_ERROR( LZ4F_decodeHeader(dctx, dctx->header, dctx->tmpInTarget) ); /* will update dStage appropriately */
+ break;
+
+ case dstage_init:
+ DEBUGLOG(6, "dstage_init");
+ if (dctx->frameInfo.contentChecksumFlag) (void)XXH32_reset(&(dctx->xxh), 0);
+ /* internal buffers allocation */
+ { size_t const bufferNeeded = dctx->maxBlockSize
+ + ((dctx->frameInfo.blockMode==LZ4F_blockLinked) ? 128 KB : 0);
+ if (bufferNeeded > dctx->maxBufferSize) { /* tmp buffers too small */
+ dctx->maxBufferSize = 0; /* ensure allocation will be re-attempted on next entry*/
+ LZ4F_free(dctx->tmpIn, dctx->cmem);
+ dctx->tmpIn = (BYTE*)LZ4F_malloc(dctx->maxBlockSize + BFSize /* block checksum */, dctx->cmem);
+ RETURN_ERROR_IF(dctx->tmpIn == NULL, allocation_failed);
+ LZ4F_free(dctx->tmpOutBuffer, dctx->cmem);
+ dctx->tmpOutBuffer= (BYTE*)LZ4F_malloc(bufferNeeded, dctx->cmem);
+ RETURN_ERROR_IF(dctx->tmpOutBuffer== NULL, allocation_failed);
+ dctx->maxBufferSize = bufferNeeded;
+ } }
+ dctx->tmpInSize = 0;
+ dctx->tmpInTarget = 0;
+ dctx->tmpOut = dctx->tmpOutBuffer;
+ dctx->tmpOutStart = 0;
+ dctx->tmpOutSize = 0;
+
+ dctx->dStage = dstage_getBlockHeader;
+ /* fall-through */
+
+ case dstage_getBlockHeader:
+ if ((size_t)(srcEnd - srcPtr) >= BHSize) {
+ selectedIn = srcPtr;
+ srcPtr += BHSize;
+ } else {
+ /* not enough input to read cBlockSize field */
+ dctx->tmpInSize = 0;
+ dctx->dStage = dstage_storeBlockHeader;
+ }
+
+ if (dctx->dStage == dstage_storeBlockHeader) /* can be skipped */
+ case dstage_storeBlockHeader:
+ { size_t const remainingInput = (size_t)(srcEnd - srcPtr);
+ size_t const wantedData = BHSize - dctx->tmpInSize;
+ size_t const sizeToCopy = MIN(wantedData, remainingInput);
+ memcpy(dctx->tmpIn + dctx->tmpInSize, srcPtr, sizeToCopy);
+ srcPtr += sizeToCopy;
+ dctx->tmpInSize += sizeToCopy;
+
+ if (dctx->tmpInSize < BHSize) { /* not enough input for cBlockSize */
+ nextSrcSizeHint = BHSize - dctx->tmpInSize;
+ doAnotherStage = 0;
+ break;
+ }
+ selectedIn = dctx->tmpIn;
+ } /* if (dctx->dStage == dstage_storeBlockHeader) */
+
+ /* decode block header */
+ { U32 const blockHeader = LZ4F_readLE32(selectedIn);
+ size_t const nextCBlockSize = blockHeader & 0x7FFFFFFFU;
+ size_t const crcSize = dctx->frameInfo.blockChecksumFlag * BFSize;
+ if (blockHeader==0) { /* frameEnd signal, no more block */
+ DEBUGLOG(5, "end of frame");
+ dctx->dStage = dstage_getSuffix;
+ break;
+ }
+ if (nextCBlockSize > dctx->maxBlockSize) {
+ RETURN_ERROR(maxBlockSize_invalid);
+ }
+ if (blockHeader & LZ4F_BLOCKUNCOMPRESSED_FLAG) {
+ /* next block is uncompressed */
+ dctx->tmpInTarget = nextCBlockSize;
+ DEBUGLOG(5, "next block is uncompressed (size %u)", (U32)nextCBlockSize);
+ if (dctx->frameInfo.blockChecksumFlag) {
+ (void)XXH32_reset(&dctx->blockChecksum, 0);
+ }
+ dctx->dStage = dstage_copyDirect;
+ break;
+ }
+ /* next block is a compressed block */
+ dctx->tmpInTarget = nextCBlockSize + crcSize;
+ dctx->dStage = dstage_getCBlock;
+ if (dstPtr==dstEnd || srcPtr==srcEnd) {
+ nextSrcSizeHint = BHSize + nextCBlockSize + crcSize;
+ doAnotherStage = 0;
+ }
+ break;
+ }
+
+ case dstage_copyDirect: /* uncompressed block */
+ DEBUGLOG(6, "dstage_copyDirect");
+ { size_t sizeToCopy;
+ if (dstPtr == NULL) {
+ sizeToCopy = 0;
+ } else {
+ size_t const minBuffSize = MIN((size_t)(srcEnd-srcPtr), (size_t)(dstEnd-dstPtr));
+ sizeToCopy = MIN(dctx->tmpInTarget, minBuffSize);
+ memcpy(dstPtr, srcPtr, sizeToCopy);
+ if (!dctx->skipChecksum) {
+ if (dctx->frameInfo.blockChecksumFlag) {
+ (void)XXH32_update(&dctx->blockChecksum, srcPtr, sizeToCopy);
+ }
+ if (dctx->frameInfo.contentChecksumFlag)
+ (void)XXH32_update(&dctx->xxh, srcPtr, sizeToCopy);
+ }
+ if (dctx->frameInfo.contentSize)
+ dctx->frameRemainingSize -= sizeToCopy;
+
+ /* history management (linked blocks only)*/
+ if (dctx->frameInfo.blockMode == LZ4F_blockLinked) {
+ LZ4F_updateDict(dctx, dstPtr, sizeToCopy, dstStart, 0);
+ } }
+
+ srcPtr += sizeToCopy;
+ dstPtr += sizeToCopy;
+ if (sizeToCopy == dctx->tmpInTarget) { /* all done */
+ if (dctx->frameInfo.blockChecksumFlag) {
+ dctx->tmpInSize = 0;
+ dctx->dStage = dstage_getBlockChecksum;
+ } else
+ dctx->dStage = dstage_getBlockHeader; /* new block */
+ break;
+ }
+ dctx->tmpInTarget -= sizeToCopy; /* need to copy more */
+ }
+ nextSrcSizeHint = dctx->tmpInTarget +
+ +(dctx->frameInfo.blockChecksumFlag ? BFSize : 0)
+ + BHSize /* next header size */;
+ doAnotherStage = 0;
+ break;
+
+ /* check block checksum for recently transferred uncompressed block */
+ case dstage_getBlockChecksum:
+ DEBUGLOG(6, "dstage_getBlockChecksum");
+ { const void* crcSrc;
+ if ((srcEnd-srcPtr >= 4) && (dctx->tmpInSize==0)) {
+ crcSrc = srcPtr;
+ srcPtr += 4;
+ } else {
+ size_t const stillToCopy = 4 - dctx->tmpInSize;
+ size_t const sizeToCopy = MIN(stillToCopy, (size_t)(srcEnd-srcPtr));
+ memcpy(dctx->header + dctx->tmpInSize, srcPtr, sizeToCopy);
+ dctx->tmpInSize += sizeToCopy;
+ srcPtr += sizeToCopy;
+ if (dctx->tmpInSize < 4) { /* all input consumed */
+ doAnotherStage = 0;
+ break;
+ }
+ crcSrc = dctx->header;
+ }
+ if (!dctx->skipChecksum) {
+ U32 const readCRC = LZ4F_readLE32(crcSrc);
+ U32 const calcCRC = XXH32_digest(&dctx->blockChecksum);
+#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+ DEBUGLOG(6, "compare block checksum");
+ if (readCRC != calcCRC) {
+ DEBUGLOG(4, "incorrect block checksum: %08X != %08X",
+ readCRC, calcCRC);
+ RETURN_ERROR(blockChecksum_invalid);
+ }
+#else
+ (void)readCRC;
+ (void)calcCRC;
+#endif
+ } }
+ dctx->dStage = dstage_getBlockHeader; /* new block */
+ break;
+
+ case dstage_getCBlock:
+ DEBUGLOG(6, "dstage_getCBlock");
+ if ((size_t)(srcEnd-srcPtr) < dctx->tmpInTarget) {
+ dctx->tmpInSize = 0;
+ dctx->dStage = dstage_storeCBlock;
+ break;
+ }
+ /* input large enough to read full block directly */
+ selectedIn = srcPtr;
+ srcPtr += dctx->tmpInTarget;
+
+ if (0) /* always jump over next block */
+ case dstage_storeCBlock:
+ { size_t const wantedData = dctx->tmpInTarget - dctx->tmpInSize;
+ size_t const inputLeft = (size_t)(srcEnd-srcPtr);
+ size_t const sizeToCopy = MIN(wantedData, inputLeft);
+ memcpy(dctx->tmpIn + dctx->tmpInSize, srcPtr, sizeToCopy);
+ dctx->tmpInSize += sizeToCopy;
+ srcPtr += sizeToCopy;
+ if (dctx->tmpInSize < dctx->tmpInTarget) { /* need more input */
+ nextSrcSizeHint = (dctx->tmpInTarget - dctx->tmpInSize)
+ + (dctx->frameInfo.blockChecksumFlag ? BFSize : 0)
+ + BHSize /* next header size */;
+ doAnotherStage = 0;
+ break;
+ }
+ selectedIn = dctx->tmpIn;
+ }
+
+ /* At this stage, input is large enough to decode a block */
+
+ /* First, decode and control block checksum if it exists */
+ if (dctx->frameInfo.blockChecksumFlag) {
+ assert(dctx->tmpInTarget >= 4);
+ dctx->tmpInTarget -= 4;
+ assert(selectedIn != NULL); /* selectedIn is defined at this stage (either srcPtr, or dctx->tmpIn) */
+ { U32 const readBlockCrc = LZ4F_readLE32(selectedIn + dctx->tmpInTarget);
+ U32 const calcBlockCrc = XXH32(selectedIn, dctx->tmpInTarget, 0);
+#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+ RETURN_ERROR_IF(readBlockCrc != calcBlockCrc, blockChecksum_invalid);
+#else
+ (void)readBlockCrc;
+ (void)calcBlockCrc;
+#endif
+ } }
+
+ /* decode directly into destination buffer if there is enough room */
+ if ( ((size_t)(dstEnd-dstPtr) >= dctx->maxBlockSize)
+ /* unless the dictionary is stored in tmpOut:
+ * in which case it's faster to decode within tmpOut
+ * to benefit from prefix speedup */
+ && !(dctx->dict!= NULL && (const BYTE*)dctx->dict + dctx->dictSize == dctx->tmpOut) )
+ {
+ const char* dict = (const char*)dctx->dict;
+ size_t dictSize = dctx->dictSize;
+ int decodedSize;
+ assert(dstPtr != NULL);
+ if (dict && dictSize > 1 GB) {
+ /* overflow control : dctx->dictSize is an int, avoid truncation / sign issues */
+ dict += dictSize - 64 KB;
+ dictSize = 64 KB;
+ }
+ decodedSize = LZ4_decompress_safe_usingDict(
+ (const char*)selectedIn, (char*)dstPtr,
+ (int)dctx->tmpInTarget, (int)dctx->maxBlockSize,
+ dict, (int)dictSize);
+ RETURN_ERROR_IF(decodedSize < 0, decompressionFailed);
+ if ((dctx->frameInfo.contentChecksumFlag) && (!dctx->skipChecksum))
+ XXH32_update(&(dctx->xxh), dstPtr, (size_t)decodedSize);
+ if (dctx->frameInfo.contentSize)
+ dctx->frameRemainingSize -= (size_t)decodedSize;
+
+ /* dictionary management */
+ if (dctx->frameInfo.blockMode==LZ4F_blockLinked) {
+ LZ4F_updateDict(dctx, dstPtr, (size_t)decodedSize, dstStart, 0);
+ }
+
+ dstPtr += decodedSize;
+ dctx->dStage = dstage_getBlockHeader; /* end of block, let's get another one */
+ break;
+ }
+
+ /* not enough place into dst : decode into tmpOut */
+
+ /* manage dictionary */
+ if (dctx->frameInfo.blockMode == LZ4F_blockLinked) {
+ if (dctx->dict == dctx->tmpOutBuffer) {
+ /* truncate dictionary to 64 KB if too big */
+ if (dctx->dictSize > 128 KB) {
+ memcpy(dctx->tmpOutBuffer, dctx->dict + dctx->dictSize - 64 KB, 64 KB);
+ dctx->dictSize = 64 KB;
+ }
+ dctx->tmpOut = dctx->tmpOutBuffer + dctx->dictSize;
+ } else { /* dict not within tmpOut */
+ size_t const reservedDictSpace = MIN(dctx->dictSize, 64 KB);
+ dctx->tmpOut = dctx->tmpOutBuffer + reservedDictSpace;
+ } }
+
+ /* Decode block into tmpOut */
+ { const char* dict = (const char*)dctx->dict;
+ size_t dictSize = dctx->dictSize;
+ int decodedSize;
+ if (dict && dictSize > 1 GB) {
+ /* the dictSize param is an int, avoid truncation / sign issues */
+ dict += dictSize - 64 KB;
+ dictSize = 64 KB;
+ }
+ decodedSize = LZ4_decompress_safe_usingDict(
+ (const char*)selectedIn, (char*)dctx->tmpOut,
+ (int)dctx->tmpInTarget, (int)dctx->maxBlockSize,
+ dict, (int)dictSize);
+ RETURN_ERROR_IF(decodedSize < 0, decompressionFailed);
+ if (dctx->frameInfo.contentChecksumFlag && !dctx->skipChecksum)
+ XXH32_update(&(dctx->xxh), dctx->tmpOut, (size_t)decodedSize);
+ if (dctx->frameInfo.contentSize)
+ dctx->frameRemainingSize -= (size_t)decodedSize;
+ dctx->tmpOutSize = (size_t)decodedSize;
+ dctx->tmpOutStart = 0;
+ dctx->dStage = dstage_flushOut;
+ }
+ /* fall-through */
+
+ case dstage_flushOut: /* flush decoded data from tmpOut to dstBuffer */
+ DEBUGLOG(6, "dstage_flushOut");
+ if (dstPtr != NULL) {
+ size_t const sizeToCopy = MIN(dctx->tmpOutSize - dctx->tmpOutStart, (size_t)(dstEnd-dstPtr));
+ memcpy(dstPtr, dctx->tmpOut + dctx->tmpOutStart, sizeToCopy);
+
+ /* dictionary management */
+ if (dctx->frameInfo.blockMode == LZ4F_blockLinked)
+ LZ4F_updateDict(dctx, dstPtr, sizeToCopy, dstStart, 1 /*withinTmp*/);
+
+ dctx->tmpOutStart += sizeToCopy;
+ dstPtr += sizeToCopy;
+ }
+ if (dctx->tmpOutStart == dctx->tmpOutSize) { /* all flushed */
+ dctx->dStage = dstage_getBlockHeader; /* get next block */
+ break;
+ }
+ /* could not flush everything : stop there, just request a block header */
+ doAnotherStage = 0;
+ nextSrcSizeHint = BHSize;
+ break;
+
+ case dstage_getSuffix:
+ RETURN_ERROR_IF(dctx->frameRemainingSize, frameSize_wrong); /* incorrect frame size decoded */
+ if (!dctx->frameInfo.contentChecksumFlag) { /* no checksum, frame is completed */
+ nextSrcSizeHint = 0;
+ LZ4F_resetDecompressionContext(dctx);
+ doAnotherStage = 0;
+ break;
+ }
+ if ((srcEnd - srcPtr) < 4) { /* not enough size for entire CRC */
+ dctx->tmpInSize = 0;
+ dctx->dStage = dstage_storeSuffix;
+ } else {
+ selectedIn = srcPtr;
+ srcPtr += 4;
+ }
+
+ if (dctx->dStage == dstage_storeSuffix) /* can be skipped */
+ case dstage_storeSuffix:
+ { size_t const remainingInput = (size_t)(srcEnd - srcPtr);
+ size_t const wantedData = 4 - dctx->tmpInSize;
+ size_t const sizeToCopy = MIN(wantedData, remainingInput);
+ memcpy(dctx->tmpIn + dctx->tmpInSize, srcPtr, sizeToCopy);
+ srcPtr += sizeToCopy;
+ dctx->tmpInSize += sizeToCopy;
+ if (dctx->tmpInSize < 4) { /* not enough input to read complete suffix */
+ nextSrcSizeHint = 4 - dctx->tmpInSize;
+ doAnotherStage=0;
+ break;
+ }
+ selectedIn = dctx->tmpIn;
+ } /* if (dctx->dStage == dstage_storeSuffix) */
+
+ /* case dstage_checkSuffix: */ /* no direct entry, avoid initialization risks */
+ if (!dctx->skipChecksum) {
+ U32 const readCRC = LZ4F_readLE32(selectedIn);
+ U32 const resultCRC = XXH32_digest(&(dctx->xxh));
+#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+ RETURN_ERROR_IF(readCRC != resultCRC, contentChecksum_invalid);
+#else
+ (void)readCRC;
+ (void)resultCRC;
+#endif
+ }
+ nextSrcSizeHint = 0;
+ LZ4F_resetDecompressionContext(dctx);
+ doAnotherStage = 0;
+ break;
+
+ case dstage_getSFrameSize:
+ if ((srcEnd - srcPtr) >= 4) {
+ selectedIn = srcPtr;
+ srcPtr += 4;
+ } else {
+ /* not enough input to read cBlockSize field */
+ dctx->tmpInSize = 4;
+ dctx->tmpInTarget = 8;
+ dctx->dStage = dstage_storeSFrameSize;
+ }
+
+ if (dctx->dStage == dstage_storeSFrameSize)
+ case dstage_storeSFrameSize:
+ { size_t const sizeToCopy = MIN(dctx->tmpInTarget - dctx->tmpInSize,
+ (size_t)(srcEnd - srcPtr) );
+ memcpy(dctx->header + dctx->tmpInSize, srcPtr, sizeToCopy);
+ srcPtr += sizeToCopy;
+ dctx->tmpInSize += sizeToCopy;
+ if (dctx->tmpInSize < dctx->tmpInTarget) {
+ /* not enough input to get full sBlockSize; wait for more */
+ nextSrcSizeHint = dctx->tmpInTarget - dctx->tmpInSize;
+ doAnotherStage = 0;
+ break;
+ }
+ selectedIn = dctx->header + 4;
+ } /* if (dctx->dStage == dstage_storeSFrameSize) */
+
+ /* case dstage_decodeSFrameSize: */ /* no direct entry */
+ { size_t const SFrameSize = LZ4F_readLE32(selectedIn);
+ dctx->frameInfo.contentSize = SFrameSize;
+ dctx->tmpInTarget = SFrameSize;
+ dctx->dStage = dstage_skipSkippable;
+ break;
+ }
+
+ case dstage_skipSkippable:
+ { size_t const skipSize = MIN(dctx->tmpInTarget, (size_t)(srcEnd-srcPtr));
+ srcPtr += skipSize;
+ dctx->tmpInTarget -= skipSize;
+ doAnotherStage = 0;
+ nextSrcSizeHint = dctx->tmpInTarget;
+ if (nextSrcSizeHint) break; /* still more to skip */
+ /* frame fully skipped : prepare context for a new frame */
+ LZ4F_resetDecompressionContext(dctx);
+ break;
+ }
+ } /* switch (dctx->dStage) */
+ } /* while (doAnotherStage) */
+
+ /* preserve history within tmpOut whenever necessary */
+ LZ4F_STATIC_ASSERT((unsigned)dstage_init == 2);
+ if ( (dctx->frameInfo.blockMode==LZ4F_blockLinked) /* next block will use up to 64KB from previous ones */
+ && (dctx->dict != dctx->tmpOutBuffer) /* dictionary is not already within tmp */
+ && (dctx->dict != NULL) /* dictionary exists */
+ && (!decompressOptionsPtr->stableDst) /* cannot rely on dst data to remain there for next call */
+ && ((unsigned)(dctx->dStage)-2 < (unsigned)(dstage_getSuffix)-2) ) /* valid stages : [init ... getSuffix[ */
+ {
+ if (dctx->dStage == dstage_flushOut) {
+ size_t const preserveSize = (size_t)(dctx->tmpOut - dctx->tmpOutBuffer);
+ size_t copySize = 64 KB - dctx->tmpOutSize;
+ const BYTE* oldDictEnd = dctx->dict + dctx->dictSize - dctx->tmpOutStart;
+ if (dctx->tmpOutSize > 64 KB) copySize = 0;
+ if (copySize > preserveSize) copySize = preserveSize;
+ assert(dctx->tmpOutBuffer != NULL);
+
+ memcpy(dctx->tmpOutBuffer + preserveSize - copySize, oldDictEnd - copySize, copySize);
+
+ dctx->dict = dctx->tmpOutBuffer;
+ dctx->dictSize = preserveSize + dctx->tmpOutStart;
+ } else {
+ const BYTE* const oldDictEnd = dctx->dict + dctx->dictSize;
+ size_t const newDictSize = MIN(dctx->dictSize, 64 KB);
+
+ memcpy(dctx->tmpOutBuffer, oldDictEnd - newDictSize, newDictSize);
+
+ dctx->dict = dctx->tmpOutBuffer;
+ dctx->dictSize = newDictSize;
+ dctx->tmpOut = dctx->tmpOutBuffer + newDictSize;
+ }
+ }
+
+ *srcSizePtr = (size_t)(srcPtr - srcStart);
+ *dstSizePtr = (size_t)(dstPtr - dstStart);
+ return nextSrcSizeHint;
+}
+
+/*! LZ4F_decompress_usingDict() :
+ * Same as LZ4F_decompress(), using a predefined dictionary.
+ * Dictionary is used "in place", without any preprocessing.
+ * It must remain accessible throughout the entire frame decoding.
+ */
+size_t LZ4F_decompress_usingDict(LZ4F_dctx* dctx,
+ void* dstBuffer, size_t* dstSizePtr,
+ const void* srcBuffer, size_t* srcSizePtr,
+ const void* dict, size_t dictSize,
+ const LZ4F_decompressOptions_t* decompressOptionsPtr)
+{
+ if (dctx->dStage <= dstage_init) {
+ dctx->dict = (const BYTE*)dict;
+ dctx->dictSize = dictSize;
+ }
+ return LZ4F_decompress(dctx, dstBuffer, dstSizePtr,
+ srcBuffer, srcSizePtr,
+ decompressOptionsPtr);
+}
diff --git a/mfbt/lz4/lz4frame.h b/mfbt/lz4/lz4frame.h
new file mode 100644
index 0000000000..1bdf6c4fcb
--- /dev/null
+++ b/mfbt/lz4/lz4frame.h
@@ -0,0 +1,692 @@
+/*
+ LZ4F - LZ4-Frame library
+ Header File
+ Copyright (C) 2011-2020, Yann Collet.
+ BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are
+ met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above
+ copyright notice, this list of conditions and the following disclaimer
+ in the documentation and/or other materials provided with the
+ distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ You can contact the author at :
+ - LZ4 source repository : https://github.com/lz4/lz4
+ - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+
+/* LZ4F is a stand-alone API able to create and decode LZ4 frames
+ * conformant with specification v1.6.1 in doc/lz4_Frame_format.md .
+ * Generated frames are compatible with `lz4` CLI.
+ *
+ * LZ4F also offers streaming capabilities.
+ *
+ * lz4.h is not required when using lz4frame.h,
+ * except to extract common constants such as LZ4_VERSION_NUMBER.
+ * */
+
+#ifndef LZ4F_H_09782039843
+#define LZ4F_H_09782039843
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/* --- Dependency --- */
+#include <stddef.h> /* size_t */
+
+
+/**
+ * Introduction
+ *
+ * lz4frame.h implements LZ4 frame specification: see doc/lz4_Frame_format.md .
+ * LZ4 Frames are compatible with `lz4` CLI,
+ * and designed to be interoperable with any system.
+**/
+
+/*-***************************************************************
+ * Compiler specifics
+ *****************************************************************/
+/* LZ4_DLL_EXPORT :
+ * Enable exporting of functions when building a Windows DLL
+ * LZ4FLIB_VISIBILITY :
+ * Control library symbols visibility.
+ */
+#ifndef LZ4FLIB_VISIBILITY
+# if defined(__GNUC__) && (__GNUC__ >= 4)
+# define LZ4FLIB_VISIBILITY __attribute__ ((visibility ("default")))
+# else
+# define LZ4FLIB_VISIBILITY
+# endif
+#endif
+#if defined(LZ4_DLL_EXPORT) && (LZ4_DLL_EXPORT==1)
+# define LZ4FLIB_API __declspec(dllexport) LZ4FLIB_VISIBILITY
+#elif defined(LZ4_DLL_IMPORT) && (LZ4_DLL_IMPORT==1)
+# define LZ4FLIB_API __declspec(dllimport) LZ4FLIB_VISIBILITY
+#else
+# define LZ4FLIB_API LZ4FLIB_VISIBILITY
+#endif
+
+#ifdef LZ4F_DISABLE_DEPRECATE_WARNINGS
+# define LZ4F_DEPRECATE(x) x
+#else
+# if defined(_MSC_VER)
+# define LZ4F_DEPRECATE(x) x /* __declspec(deprecated) x - only works with C++ */
+# elif defined(__clang__) || (defined(__GNUC__) && (__GNUC__ >= 6))
+# define LZ4F_DEPRECATE(x) x __attribute__((deprecated))
+# else
+# define LZ4F_DEPRECATE(x) x /* no deprecation warning for this compiler */
+# endif
+#endif
+
+
+/*-************************************
+ * Error management
+ **************************************/
+typedef size_t LZ4F_errorCode_t;
+
+LZ4FLIB_API unsigned LZ4F_isError(LZ4F_errorCode_t code); /**< tells when a function result is an error code */
+LZ4FLIB_API const char* LZ4F_getErrorName(LZ4F_errorCode_t code); /**< return error code string; for debugging */
+
+
+/*-************************************
+ * Frame compression types
+ ************************************* */
+/* #define LZ4F_ENABLE_OBSOLETE_ENUMS // uncomment to enable obsolete enums */
+#ifdef LZ4F_ENABLE_OBSOLETE_ENUMS
+# define LZ4F_OBSOLETE_ENUM(x) , LZ4F_DEPRECATE(x) = LZ4F_##x
+#else
+# define LZ4F_OBSOLETE_ENUM(x)
+#endif
+
+/* The larger the block size, the (slightly) better the compression ratio,
+ * though there are diminishing returns.
+ * Larger blocks also increase memory usage on both compression and decompression sides.
+ */
+typedef enum {
+ LZ4F_default=0,
+ LZ4F_max64KB=4,
+ LZ4F_max256KB=5,
+ LZ4F_max1MB=6,
+ LZ4F_max4MB=7
+ LZ4F_OBSOLETE_ENUM(max64KB)
+ LZ4F_OBSOLETE_ENUM(max256KB)
+ LZ4F_OBSOLETE_ENUM(max1MB)
+ LZ4F_OBSOLETE_ENUM(max4MB)
+} LZ4F_blockSizeID_t;
+
+/* Linked blocks sharply reduce inefficiencies when using small blocks,
+ * they compress better.
+ * However, some LZ4 decoders are only compatible with independent blocks */
+typedef enum {
+ LZ4F_blockLinked=0,
+ LZ4F_blockIndependent
+ LZ4F_OBSOLETE_ENUM(blockLinked)
+ LZ4F_OBSOLETE_ENUM(blockIndependent)
+} LZ4F_blockMode_t;
+
+typedef enum {
+ LZ4F_noContentChecksum=0,
+ LZ4F_contentChecksumEnabled
+ LZ4F_OBSOLETE_ENUM(noContentChecksum)
+ LZ4F_OBSOLETE_ENUM(contentChecksumEnabled)
+} LZ4F_contentChecksum_t;
+
+typedef enum {
+ LZ4F_noBlockChecksum=0,
+ LZ4F_blockChecksumEnabled
+} LZ4F_blockChecksum_t;
+
+typedef enum {
+ LZ4F_frame=0,
+ LZ4F_skippableFrame
+ LZ4F_OBSOLETE_ENUM(skippableFrame)
+} LZ4F_frameType_t;
+
+#ifdef LZ4F_ENABLE_OBSOLETE_ENUMS
+typedef LZ4F_blockSizeID_t blockSizeID_t;
+typedef LZ4F_blockMode_t blockMode_t;
+typedef LZ4F_frameType_t frameType_t;
+typedef LZ4F_contentChecksum_t contentChecksum_t;
+#endif
+
+/*! LZ4F_frameInfo_t :
+ * makes it possible to set or read frame parameters.
+ * Structure must be first init to 0, using memset() or LZ4F_INIT_FRAMEINFO,
+ * setting all parameters to default.
+ * It's then possible to update selectively some parameters */
+typedef struct {
+ LZ4F_blockSizeID_t blockSizeID; /* max64KB, max256KB, max1MB, max4MB; 0 == default */
+ LZ4F_blockMode_t blockMode; /* LZ4F_blockLinked, LZ4F_blockIndependent; 0 == default */
+ LZ4F_contentChecksum_t contentChecksumFlag; /* 1: frame terminated with 32-bit checksum of decompressed data; 0: disabled (default) */
+ LZ4F_frameType_t frameType; /* read-only field : LZ4F_frame or LZ4F_skippableFrame */
+ unsigned long long contentSize; /* Size of uncompressed content ; 0 == unknown */
+ unsigned dictID; /* Dictionary ID, sent by compressor to help decoder select correct dictionary; 0 == no dictID provided */
+ LZ4F_blockChecksum_t blockChecksumFlag; /* 1: each block followed by a checksum of block's compressed data; 0: disabled (default) */
+} LZ4F_frameInfo_t;
+
+#define LZ4F_INIT_FRAMEINFO { LZ4F_default, LZ4F_blockLinked, LZ4F_noContentChecksum, LZ4F_frame, 0ULL, 0U, LZ4F_noBlockChecksum } /* v1.8.3+ */
+
+/*! LZ4F_preferences_t :
+ * makes it possible to supply advanced compression instructions to streaming interface.
+ * Structure must be first init to 0, using memset() or LZ4F_INIT_PREFERENCES,
+ * setting all parameters to default.
+ * All reserved fields must be set to zero. */
+typedef struct {
+ LZ4F_frameInfo_t frameInfo;
+ int compressionLevel; /* 0: default (fast mode); values > LZ4HC_CLEVEL_MAX count as LZ4HC_CLEVEL_MAX; values < 0 trigger "fast acceleration" */
+ unsigned autoFlush; /* 1: always flush; reduces usage of internal buffers */
+ unsigned favorDecSpeed; /* 1: parser favors decompression speed vs compression ratio. Only works for high compression modes (>= LZ4HC_CLEVEL_OPT_MIN) */ /* v1.8.2+ */
+ unsigned reserved[3]; /* must be zero for forward compatibility */
+} LZ4F_preferences_t;
+
+#define LZ4F_INIT_PREFERENCES { LZ4F_INIT_FRAMEINFO, 0, 0u, 0u, { 0u, 0u, 0u } } /* v1.8.3+ */
+
+
+/*-*********************************
+* Simple compression function
+***********************************/
+
+LZ4FLIB_API int LZ4F_compressionLevel_max(void); /* v1.8.0+ */
+
+/*! LZ4F_compressFrameBound() :
+ * Returns the maximum possible compressed size with LZ4F_compressFrame() given srcSize and preferences.
+ * `preferencesPtr` is optional. It can be replaced by NULL, in which case, the function will assume default preferences.
+ * Note : this result is only usable with LZ4F_compressFrame().
+ * It may also be relevant to LZ4F_compressUpdate() _only if_ no flush() operation is ever performed.
+ */
+LZ4FLIB_API size_t LZ4F_compressFrameBound(size_t srcSize, const LZ4F_preferences_t* preferencesPtr);
+
+/*! LZ4F_compressFrame() :
+ * Compress an entire srcBuffer into a valid LZ4 frame.
+ * dstCapacity MUST be >= LZ4F_compressFrameBound(srcSize, preferencesPtr).
+ * The LZ4F_preferences_t structure is optional : you can provide NULL as argument. All preferences will be set to default.
+ * @return : number of bytes written into dstBuffer.
+ * or an error code if it fails (can be tested using LZ4F_isError())
+ */
+LZ4FLIB_API size_t LZ4F_compressFrame(void* dstBuffer, size_t dstCapacity,
+ const void* srcBuffer, size_t srcSize,
+ const LZ4F_preferences_t* preferencesPtr);
+
+
+/*-***********************************
+* Advanced compression functions
+*************************************/
+typedef struct LZ4F_cctx_s LZ4F_cctx; /* incomplete type */
+typedef LZ4F_cctx* LZ4F_compressionContext_t; /* for compatibility with older APIs, prefer using LZ4F_cctx */
+
+typedef struct {
+ unsigned stableSrc; /* 1 == src content will remain present on future calls to LZ4F_compress(); skip copying src content within tmp buffer */
+ unsigned reserved[3];
+} LZ4F_compressOptions_t;
+
+/*--- Resource Management ---*/
+
+#define LZ4F_VERSION 100 /* This number can be used to check for an incompatible API breaking change */
+LZ4FLIB_API unsigned LZ4F_getVersion(void);
+
+/*! LZ4F_createCompressionContext() :
+ * The first thing to do is to create a compressionContext object,
+ * which will keep track of operation state during streaming compression.
+ * This is achieved using LZ4F_createCompressionContext(), which takes as argument a version,
+ * and a pointer to LZ4F_cctx*, to write the resulting pointer into.
+ * @version provided MUST be LZ4F_VERSION. It is intended to track potential version mismatch, notably when using DLL.
+ * The function provides a pointer to a fully allocated LZ4F_cctx object.
+ * @cctxPtr MUST be != NULL.
+ * If @return != zero, context creation failed.
+ * A created compression context can be employed multiple times for consecutive streaming operations.
+ * Once all streaming compression jobs are completed,
+ * the state object can be released using LZ4F_freeCompressionContext().
+ * Note1 : LZ4F_freeCompressionContext() is always successful. Its return value can be ignored.
+ * Note2 : LZ4F_freeCompressionContext() works fine with NULL input pointers (do nothing).
+**/
+LZ4FLIB_API LZ4F_errorCode_t LZ4F_createCompressionContext(LZ4F_cctx** cctxPtr, unsigned version);
+LZ4FLIB_API LZ4F_errorCode_t LZ4F_freeCompressionContext(LZ4F_cctx* cctx);
+
+
+/*---- Compression ----*/
+
+#define LZ4F_HEADER_SIZE_MIN 7 /* LZ4 Frame header size can vary, depending on selected parameters */
+#define LZ4F_HEADER_SIZE_MAX 19
+
+/* Size in bytes of a block header in little-endian format. Highest bit indicates if block data is uncompressed */
+#define LZ4F_BLOCK_HEADER_SIZE 4
+
+/* Size in bytes of a block checksum footer in little-endian format. */
+#define LZ4F_BLOCK_CHECKSUM_SIZE 4
+
+/* Size in bytes of the content checksum. */
+#define LZ4F_CONTENT_CHECKSUM_SIZE 4
+
+/*! LZ4F_compressBegin() :
+ * will write the frame header into dstBuffer.
+ * dstCapacity must be >= LZ4F_HEADER_SIZE_MAX bytes.
+ * `prefsPtr` is optional : you can provide NULL as argument, all preferences will then be set to default.
+ * @return : number of bytes written into dstBuffer for the header
+ * or an error code (which can be tested using LZ4F_isError())
+ */
+LZ4FLIB_API size_t LZ4F_compressBegin(LZ4F_cctx* cctx,
+ void* dstBuffer, size_t dstCapacity,
+ const LZ4F_preferences_t* prefsPtr);
+
+/*! LZ4F_compressBound() :
+ * Provides minimum dstCapacity required to guarantee success of
+ * LZ4F_compressUpdate(), given a srcSize and preferences, for a worst case scenario.
+ * When srcSize==0, LZ4F_compressBound() provides an upper bound for LZ4F_flush() and LZ4F_compressEnd() instead.
+ * Note that the result is only valid for a single invocation of LZ4F_compressUpdate().
+ * When invoking LZ4F_compressUpdate() multiple times,
+ * if the output buffer is gradually filled up instead of emptied and re-used from its start,
+ * one must check if there is enough remaining capacity before each invocation, using LZ4F_compressBound().
+ * @return is always the same for a srcSize and prefsPtr.
+ * prefsPtr is optional : when NULL is provided, preferences will be set to cover worst case scenario.
+ * tech details :
+ * @return if automatic flushing is not enabled, includes the possibility that internal buffer might already be filled by up to (blockSize-1) bytes.
+ * It also includes frame footer (ending + checksum), since it might be generated by LZ4F_compressEnd().
+ * @return doesn't include frame header, as it was already generated by LZ4F_compressBegin().
+ */
+LZ4FLIB_API size_t LZ4F_compressBound(size_t srcSize, const LZ4F_preferences_t* prefsPtr);
+
+/*! LZ4F_compressUpdate() :
+ * LZ4F_compressUpdate() can be called repetitively to compress as much data as necessary.
+ * Important rule: dstCapacity MUST be large enough to ensure operation success even in worst case situations.
+ * This value is provided by LZ4F_compressBound().
+ * If this condition is not respected, LZ4F_compress() will fail (result is an errorCode).
+ * After an error, the state is left in a UB state, and must be re-initialized or freed.
+ * If previously an uncompressed block was written, buffered data is flushed
+ * before appending compressed data is continued.
+ * `cOptPtr` is optional : NULL can be provided, in which case all options are set to default.
+ * @return : number of bytes written into `dstBuffer` (it can be zero, meaning input data was just buffered).
+ * or an error code if it fails (which can be tested using LZ4F_isError())
+ */
+LZ4FLIB_API size_t LZ4F_compressUpdate(LZ4F_cctx* cctx,
+ void* dstBuffer, size_t dstCapacity,
+ const void* srcBuffer, size_t srcSize,
+ const LZ4F_compressOptions_t* cOptPtr);
+
+/*! LZ4F_flush() :
+ * When data must be generated and sent immediately, without waiting for a block to be completely filled,
+ * it's possible to call LZ4_flush(). It will immediately compress any data buffered within cctx.
+ * `dstCapacity` must be large enough to ensure the operation will be successful.
+ * `cOptPtr` is optional : it's possible to provide NULL, all options will be set to default.
+ * @return : nb of bytes written into dstBuffer (can be zero, when there is no data stored within cctx)
+ * or an error code if it fails (which can be tested using LZ4F_isError())
+ * Note : LZ4F_flush() is guaranteed to be successful when dstCapacity >= LZ4F_compressBound(0, prefsPtr).
+ */
+LZ4FLIB_API size_t LZ4F_flush(LZ4F_cctx* cctx,
+ void* dstBuffer, size_t dstCapacity,
+ const LZ4F_compressOptions_t* cOptPtr);
+
+/*! LZ4F_compressEnd() :
+ * To properly finish an LZ4 frame, invoke LZ4F_compressEnd().
+ * It will flush whatever data remained within `cctx` (like LZ4_flush())
+ * and properly finalize the frame, with an endMark and a checksum.
+ * `cOptPtr` is optional : NULL can be provided, in which case all options will be set to default.
+ * @return : nb of bytes written into dstBuffer, necessarily >= 4 (endMark),
+ * or an error code if it fails (which can be tested using LZ4F_isError())
+ * Note : LZ4F_compressEnd() is guaranteed to be successful when dstCapacity >= LZ4F_compressBound(0, prefsPtr).
+ * A successful call to LZ4F_compressEnd() makes `cctx` available again for another compression task.
+ */
+LZ4FLIB_API size_t LZ4F_compressEnd(LZ4F_cctx* cctx,
+ void* dstBuffer, size_t dstCapacity,
+ const LZ4F_compressOptions_t* cOptPtr);
+
+
+/*-*********************************
+* Decompression functions
+***********************************/
+typedef struct LZ4F_dctx_s LZ4F_dctx; /* incomplete type */
+typedef LZ4F_dctx* LZ4F_decompressionContext_t; /* compatibility with previous API versions */
+
+typedef struct {
+ unsigned stableDst; /* pledges that last 64KB decompressed data will remain available unmodified between invocations.
+ * This optimization skips storage operations in tmp buffers. */
+ unsigned skipChecksums; /* disable checksum calculation and verification, even when one is present in frame, to save CPU time.
+ * Setting this option to 1 once disables all checksums for the rest of the frame. */
+ unsigned reserved1; /* must be set to zero for forward compatibility */
+ unsigned reserved0; /* idem */
+} LZ4F_decompressOptions_t;
+
+
+/* Resource management */
+
+/*! LZ4F_createDecompressionContext() :
+ * Create an LZ4F_dctx object, to track all decompression operations.
+ * @version provided MUST be LZ4F_VERSION.
+ * @dctxPtr MUST be valid.
+ * The function fills @dctxPtr with the value of a pointer to an allocated and initialized LZ4F_dctx object.
+ * The @return is an errorCode, which can be tested using LZ4F_isError().
+ * dctx memory can be released using LZ4F_freeDecompressionContext();
+ * Result of LZ4F_freeDecompressionContext() indicates current state of decompressionContext when being released.
+ * That is, it should be == 0 if decompression has been completed fully and correctly.
+ */
+LZ4FLIB_API LZ4F_errorCode_t LZ4F_createDecompressionContext(LZ4F_dctx** dctxPtr, unsigned version);
+LZ4FLIB_API LZ4F_errorCode_t LZ4F_freeDecompressionContext(LZ4F_dctx* dctx);
+
+
+/*-***********************************
+* Streaming decompression functions
+*************************************/
+
+#define LZ4F_MAGICNUMBER 0x184D2204U
+#define LZ4F_MAGIC_SKIPPABLE_START 0x184D2A50U
+#define LZ4F_MIN_SIZE_TO_KNOW_HEADER_LENGTH 5
+
+/*! LZ4F_headerSize() : v1.9.0+
+ * Provide the header size of a frame starting at `src`.
+ * `srcSize` must be >= LZ4F_MIN_SIZE_TO_KNOW_HEADER_LENGTH,
+ * which is enough to decode the header length.
+ * @return : size of frame header
+ * or an error code, which can be tested using LZ4F_isError()
+ * note : Frame header size is variable, but is guaranteed to be
+ * >= LZ4F_HEADER_SIZE_MIN bytes, and <= LZ4F_HEADER_SIZE_MAX bytes.
+ */
+LZ4FLIB_API size_t LZ4F_headerSize(const void* src, size_t srcSize);
+
+/*! LZ4F_getFrameInfo() :
+ * This function extracts frame parameters (max blockSize, dictID, etc.).
+ * Its usage is optional: user can also invoke LZ4F_decompress() directly.
+ *
+ * Extracted information will fill an existing LZ4F_frameInfo_t structure.
+ * This can be useful for allocation and dictionary identification purposes.
+ *
+ * LZ4F_getFrameInfo() can work in the following situations :
+ *
+ * 1) At the beginning of a new frame, before any invocation of LZ4F_decompress().
+ * It will decode header from `srcBuffer`,
+ * consuming the header and starting the decoding process.
+ *
+ * Input size must be large enough to contain the full frame header.
+ * Frame header size can be known beforehand by LZ4F_headerSize().
+ * Frame header size is variable, but is guaranteed to be >= LZ4F_HEADER_SIZE_MIN bytes,
+ * and not more than <= LZ4F_HEADER_SIZE_MAX bytes.
+ * Hence, blindly providing LZ4F_HEADER_SIZE_MAX bytes or more will always work.
+ * It's allowed to provide more input data than the header size,
+ * LZ4F_getFrameInfo() will only consume the header.
+ *
+ * If input size is not large enough,
+ * aka if it's smaller than header size,
+ * function will fail and return an error code.
+ *
+ * 2) After decoding has been started,
+ * it's possible to invoke LZ4F_getFrameInfo() anytime
+ * to extract already decoded frame parameters stored within dctx.
+ *
+ * Note that, if decoding has barely started,
+ * and not yet read enough information to decode the header,
+ * LZ4F_getFrameInfo() will fail.
+ *
+ * The number of bytes consumed from srcBuffer will be updated in *srcSizePtr (necessarily <= original value).
+ * LZ4F_getFrameInfo() only consumes bytes when decoding has not yet started,
+ * and when decoding the header has been successful.
+ * Decompression must then resume from (srcBuffer + *srcSizePtr).
+ *
+ * @return : a hint about how many srcSize bytes LZ4F_decompress() expects for next call,
+ * or an error code which can be tested using LZ4F_isError().
+ * note 1 : in case of error, dctx is not modified. Decoding operation can resume from beginning safely.
+ * note 2 : frame parameters are *copied into* an already allocated LZ4F_frameInfo_t structure.
+ */
+LZ4FLIB_API size_t
+LZ4F_getFrameInfo(LZ4F_dctx* dctx,
+ LZ4F_frameInfo_t* frameInfoPtr,
+ const void* srcBuffer, size_t* srcSizePtr);
+
+/*! LZ4F_decompress() :
+ * Call this function repetitively to regenerate data compressed in `srcBuffer`.
+ *
+ * The function requires a valid dctx state.
+ * It will read up to *srcSizePtr bytes from srcBuffer,
+ * and decompress data into dstBuffer, of capacity *dstSizePtr.
+ *
+ * The nb of bytes consumed from srcBuffer will be written into *srcSizePtr (necessarily <= original value).
+ * The nb of bytes decompressed into dstBuffer will be written into *dstSizePtr (necessarily <= original value).
+ *
+ * The function does not necessarily read all input bytes, so always check value in *srcSizePtr.
+ * Unconsumed source data must be presented again in subsequent invocations.
+ *
+ * `dstBuffer` can freely change between each consecutive function invocation.
+ * `dstBuffer` content will be overwritten.
+ *
+ * @return : an hint of how many `srcSize` bytes LZ4F_decompress() expects for next call.
+ * Schematically, it's the size of the current (or remaining) compressed block + header of next block.
+ * Respecting the hint provides some small speed benefit, because it skips intermediate buffers.
+ * This is just a hint though, it's always possible to provide any srcSize.
+ *
+ * When a frame is fully decoded, @return will be 0 (no more data expected).
+ * When provided with more bytes than necessary to decode a frame,
+ * LZ4F_decompress() will stop reading exactly at end of current frame, and @return 0.
+ *
+ * If decompression failed, @return is an error code, which can be tested using LZ4F_isError().
+ * After a decompression error, the `dctx` context is not resumable.
+ * Use LZ4F_resetDecompressionContext() to return to clean state.
+ *
+ * After a frame is fully decoded, dctx can be used again to decompress another frame.
+ */
+LZ4FLIB_API size_t
+LZ4F_decompress(LZ4F_dctx* dctx,
+ void* dstBuffer, size_t* dstSizePtr,
+ const void* srcBuffer, size_t* srcSizePtr,
+ const LZ4F_decompressOptions_t* dOptPtr);
+
+
+/*! LZ4F_resetDecompressionContext() : added in v1.8.0
+ * In case of an error, the context is left in "undefined" state.
+ * In which case, it's necessary to reset it, before re-using it.
+ * This method can also be used to abruptly stop any unfinished decompression,
+ * and start a new one using same context resources. */
+LZ4FLIB_API void LZ4F_resetDecompressionContext(LZ4F_dctx* dctx); /* always successful */
+
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* LZ4F_H_09782039843 */
+
+#if defined(LZ4F_STATIC_LINKING_ONLY) && !defined(LZ4F_H_STATIC_09782039843)
+#define LZ4F_H_STATIC_09782039843
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/* These declarations are not stable and may change in the future.
+ * They are therefore only safe to depend on
+ * when the caller is statically linked against the library.
+ * To access their declarations, define LZ4F_STATIC_LINKING_ONLY.
+ *
+ * By default, these symbols aren't published into shared/dynamic libraries.
+ * You can override this behavior and force them to be published
+ * by defining LZ4F_PUBLISH_STATIC_FUNCTIONS.
+ * Use at your own risk.
+ */
+#ifdef LZ4F_PUBLISH_STATIC_FUNCTIONS
+# define LZ4FLIB_STATIC_API LZ4FLIB_API
+#else
+# define LZ4FLIB_STATIC_API
+#endif
+
+
+/* --- Error List --- */
+#define LZ4F_LIST_ERRORS(ITEM) \
+ ITEM(OK_NoError) \
+ ITEM(ERROR_GENERIC) \
+ ITEM(ERROR_maxBlockSize_invalid) \
+ ITEM(ERROR_blockMode_invalid) \
+ ITEM(ERROR_contentChecksumFlag_invalid) \
+ ITEM(ERROR_compressionLevel_invalid) \
+ ITEM(ERROR_headerVersion_wrong) \
+ ITEM(ERROR_blockChecksum_invalid) \
+ ITEM(ERROR_reservedFlag_set) \
+ ITEM(ERROR_allocation_failed) \
+ ITEM(ERROR_srcSize_tooLarge) \
+ ITEM(ERROR_dstMaxSize_tooSmall) \
+ ITEM(ERROR_frameHeader_incomplete) \
+ ITEM(ERROR_frameType_unknown) \
+ ITEM(ERROR_frameSize_wrong) \
+ ITEM(ERROR_srcPtr_wrong) \
+ ITEM(ERROR_decompressionFailed) \
+ ITEM(ERROR_headerChecksum_invalid) \
+ ITEM(ERROR_contentChecksum_invalid) \
+ ITEM(ERROR_frameDecoding_alreadyStarted) \
+ ITEM(ERROR_compressionState_uninitialized) \
+ ITEM(ERROR_parameter_null) \
+ ITEM(ERROR_maxCode)
+
+#define LZ4F_GENERATE_ENUM(ENUM) LZ4F_##ENUM,
+
+/* enum list is exposed, to handle specific errors */
+typedef enum { LZ4F_LIST_ERRORS(LZ4F_GENERATE_ENUM)
+ _LZ4F_dummy_error_enum_for_c89_never_used } LZ4F_errorCodes;
+
+LZ4FLIB_STATIC_API LZ4F_errorCodes LZ4F_getErrorCode(size_t functionResult);
+
+
+/*! LZ4F_getBlockSize() :
+ * Return, in scalar format (size_t),
+ * the maximum block size associated with blockSizeID.
+**/
+LZ4FLIB_STATIC_API size_t LZ4F_getBlockSize(LZ4F_blockSizeID_t blockSizeID);
+
+/*! LZ4F_uncompressedUpdate() :
+ * LZ4F_uncompressedUpdate() can be called repetitively to add as much data uncompressed data as necessary.
+ * Important rule: dstCapacity MUST be large enough to store the entire source buffer as
+ * no compression is done for this operation
+ * If this condition is not respected, LZ4F_uncompressedUpdate() will fail (result is an errorCode).
+ * After an error, the state is left in a UB state, and must be re-initialized or freed.
+ * If previously a compressed block was written, buffered data is flushed
+ * before appending uncompressed data is continued.
+ * This is only supported when LZ4F_blockIndependent is used
+ * `cOptPtr` is optional : NULL can be provided, in which case all options are set to default.
+ * @return : number of bytes written into `dstBuffer` (it can be zero, meaning input data was just buffered).
+ * or an error code if it fails (which can be tested using LZ4F_isError())
+ */
+LZ4FLIB_STATIC_API size_t
+LZ4F_uncompressedUpdate(LZ4F_cctx* cctx,
+ void* dstBuffer, size_t dstCapacity,
+ const void* srcBuffer, size_t srcSize,
+ const LZ4F_compressOptions_t* cOptPtr);
+
+/**********************************
+ * Bulk processing dictionary API
+ *********************************/
+
+/* A Dictionary is useful for the compression of small messages (KB range).
+ * It dramatically improves compression efficiency.
+ *
+ * LZ4 can ingest any input as dictionary, though only the last 64 KB are useful.
+ * Best results are generally achieved by using Zstandard's Dictionary Builder
+ * to generate a high-quality dictionary from a set of samples.
+ *
+ * Loading a dictionary has a cost, since it involves construction of tables.
+ * The Bulk processing dictionary API makes it possible to share this cost
+ * over an arbitrary number of compression jobs, even concurrently,
+ * markedly improving compression latency for these cases.
+ *
+ * The same dictionary will have to be used on the decompression side
+ * for decoding to be successful.
+ * To help identify the correct dictionary at decoding stage,
+ * the frame header allows optional embedding of a dictID field.
+ */
+typedef struct LZ4F_CDict_s LZ4F_CDict;
+
+/*! LZ4_createCDict() :
+ * When compressing multiple messages / blocks using the same dictionary, it's recommended to load it just once.
+ * LZ4_createCDict() will create a digested dictionary, ready to start future compression operations without startup delay.
+ * LZ4_CDict can be created once and shared by multiple threads concurrently, since its usage is read-only.
+ * `dictBuffer` can be released after LZ4_CDict creation, since its content is copied within CDict */
+LZ4FLIB_STATIC_API LZ4F_CDict* LZ4F_createCDict(const void* dictBuffer, size_t dictSize);
+LZ4FLIB_STATIC_API void LZ4F_freeCDict(LZ4F_CDict* CDict);
+
+
+/*! LZ4_compressFrame_usingCDict() :
+ * Compress an entire srcBuffer into a valid LZ4 frame using a digested Dictionary.
+ * cctx must point to a context created by LZ4F_createCompressionContext().
+ * If cdict==NULL, compress without a dictionary.
+ * dstBuffer MUST be >= LZ4F_compressFrameBound(srcSize, preferencesPtr).
+ * If this condition is not respected, function will fail (@return an errorCode).
+ * The LZ4F_preferences_t structure is optional : you may provide NULL as argument,
+ * but it's not recommended, as it's the only way to provide dictID in the frame header.
+ * @return : number of bytes written into dstBuffer.
+ * or an error code if it fails (can be tested using LZ4F_isError()) */
+LZ4FLIB_STATIC_API size_t
+LZ4F_compressFrame_usingCDict(LZ4F_cctx* cctx,
+ void* dst, size_t dstCapacity,
+ const void* src, size_t srcSize,
+ const LZ4F_CDict* cdict,
+ const LZ4F_preferences_t* preferencesPtr);
+
+
+/*! LZ4F_compressBegin_usingCDict() :
+ * Inits streaming dictionary compression, and writes the frame header into dstBuffer.
+ * dstCapacity must be >= LZ4F_HEADER_SIZE_MAX bytes.
+ * `prefsPtr` is optional : you may provide NULL as argument,
+ * however, it's the only way to provide dictID in the frame header.
+ * @return : number of bytes written into dstBuffer for the header,
+ * or an error code (which can be tested using LZ4F_isError()) */
+LZ4FLIB_STATIC_API size_t
+LZ4F_compressBegin_usingCDict(LZ4F_cctx* cctx,
+ void* dstBuffer, size_t dstCapacity,
+ const LZ4F_CDict* cdict,
+ const LZ4F_preferences_t* prefsPtr);
+
+
+/*! LZ4F_decompress_usingDict() :
+ * Same as LZ4F_decompress(), using a predefined dictionary.
+ * Dictionary is used "in place", without any preprocessing.
+** It must remain accessible throughout the entire frame decoding. */
+LZ4FLIB_STATIC_API size_t
+LZ4F_decompress_usingDict(LZ4F_dctx* dctxPtr,
+ void* dstBuffer, size_t* dstSizePtr,
+ const void* srcBuffer, size_t* srcSizePtr,
+ const void* dict, size_t dictSize,
+ const LZ4F_decompressOptions_t* decompressOptionsPtr);
+
+
+/*! Custom memory allocation :
+ * These prototypes make it possible to pass custom allocation/free functions.
+ * LZ4F_customMem is provided at state creation time, using LZ4F_create*_advanced() listed below.
+ * All allocation/free operations will be completed using these custom variants instead of regular <stdlib.h> ones.
+ */
+typedef void* (*LZ4F_AllocFunction) (void* opaqueState, size_t size);
+typedef void* (*LZ4F_CallocFunction) (void* opaqueState, size_t size);
+typedef void (*LZ4F_FreeFunction) (void* opaqueState, void* address);
+typedef struct {
+ LZ4F_AllocFunction customAlloc;
+ LZ4F_CallocFunction customCalloc; /* optional; when not defined, uses customAlloc + memset */
+ LZ4F_FreeFunction customFree;
+ void* opaqueState;
+} LZ4F_CustomMem;
+static
+#ifdef __GNUC__
+__attribute__((__unused__))
+#endif
+LZ4F_CustomMem const LZ4F_defaultCMem = { NULL, NULL, NULL, NULL }; /**< this constant defers to stdlib's functions */
+
+LZ4FLIB_STATIC_API LZ4F_cctx* LZ4F_createCompressionContext_advanced(LZ4F_CustomMem customMem, unsigned version);
+LZ4FLIB_STATIC_API LZ4F_dctx* LZ4F_createDecompressionContext_advanced(LZ4F_CustomMem customMem, unsigned version);
+LZ4FLIB_STATIC_API LZ4F_CDict* LZ4F_createCDict_advanced(LZ4F_CustomMem customMem, const void* dictBuffer, size_t dictSize);
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* defined(LZ4F_STATIC_LINKING_ONLY) && !defined(LZ4F_H_STATIC_09782039843) */
diff --git a/mfbt/lz4/lz4frame_static.h b/mfbt/lz4/lz4frame_static.h
new file mode 100644
index 0000000000..2b44a63155
--- /dev/null
+++ b/mfbt/lz4/lz4frame_static.h
@@ -0,0 +1,47 @@
+/*
+ LZ4 auto-framing library
+ Header File for static linking only
+ Copyright (C) 2011-2020, Yann Collet.
+
+ BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are
+ met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above
+ copyright notice, this list of conditions and the following disclaimer
+ in the documentation and/or other materials provided with the
+ distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ You can contact the author at :
+ - LZ4 source repository : https://github.com/lz4/lz4
+ - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+
+#ifndef LZ4FRAME_STATIC_H_0398209384
+#define LZ4FRAME_STATIC_H_0398209384
+
+/* The declarations that formerly were made here have been merged into
+ * lz4frame.h, protected by the LZ4F_STATIC_LINKING_ONLY macro. Going forward,
+ * it is recommended to simply include that header directly.
+ */
+
+#define LZ4F_STATIC_LINKING_ONLY
+#include "lz4frame.h"
+
+#endif /* LZ4FRAME_STATIC_H_0398209384 */
diff --git a/mfbt/lz4/lz4hc.c b/mfbt/lz4/lz4hc.c
new file mode 100644
index 0000000000..b21ad6bb59
--- /dev/null
+++ b/mfbt/lz4/lz4hc.c
@@ -0,0 +1,1631 @@
+/*
+ LZ4 HC - High Compression Mode of LZ4
+ Copyright (C) 2011-2020, Yann Collet.
+
+ BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are
+ met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above
+ copyright notice, this list of conditions and the following disclaimer
+ in the documentation and/or other materials provided with the
+ distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ You can contact the author at :
+ - LZ4 source repository : https://github.com/lz4/lz4
+ - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+/* note : lz4hc is not an independent module, it requires lz4.h/lz4.c for proper compilation */
+
+
+/* *************************************
+* Tuning Parameter
+***************************************/
+
+/*! HEAPMODE :
+ * Select how default compression function will allocate workplace memory,
+ * in stack (0:fastest), or in heap (1:requires malloc()).
+ * Since workplace is rather large, heap mode is recommended.
+**/
+#ifndef LZ4HC_HEAPMODE
+# define LZ4HC_HEAPMODE 1
+#endif
+
+
+/*=== Dependency ===*/
+#define LZ4_HC_STATIC_LINKING_ONLY
+#include "lz4hc.h"
+
+
+/*=== Common definitions ===*/
+#if defined(__GNUC__)
+# pragma GCC diagnostic ignored "-Wunused-function"
+#endif
+#if defined (__clang__)
+# pragma clang diagnostic ignored "-Wunused-function"
+#endif
+
+#define LZ4_COMMONDEFS_ONLY
+#ifndef LZ4_SRC_INCLUDED
+#include "lz4.c" /* LZ4_count, constants, mem */
+#endif
+
+
+/*=== Enums ===*/
+typedef enum { noDictCtx, usingDictCtxHc } dictCtx_directive;
+
+
+/*=== Constants ===*/
+#define OPTIMAL_ML (int)((ML_MASK-1)+MINMATCH)
+#define LZ4_OPT_NUM (1<<12)
+
+
+/*=== Macros ===*/
+#define MIN(a,b) ( (a) < (b) ? (a) : (b) )
+#define MAX(a,b) ( (a) > (b) ? (a) : (b) )
+#define HASH_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH*8)-LZ4HC_HASH_LOG))
+#define DELTANEXTMAXD(p) chainTable[(p) & LZ4HC_MAXD_MASK] /* flexible, LZ4HC_MAXD dependent */
+#define DELTANEXTU16(table, pos) table[(U16)(pos)] /* faster */
+/* Make fields passed to, and updated by LZ4HC_encodeSequence explicit */
+#define UPDATABLE(ip, op, anchor) &ip, &op, &anchor
+
+static U32 LZ4HC_hashPtr(const void* ptr) { return HASH_FUNCTION(LZ4_read32(ptr)); }
+
+
+/**************************************
+* HC Compression
+**************************************/
+static void LZ4HC_clearTables (LZ4HC_CCtx_internal* hc4)
+{
+ MEM_INIT(hc4->hashTable, 0, sizeof(hc4->hashTable));
+ MEM_INIT(hc4->chainTable, 0xFF, sizeof(hc4->chainTable));
+}
+
+static void LZ4HC_init_internal (LZ4HC_CCtx_internal* hc4, const BYTE* start)
+{
+ size_t const bufferSize = (size_t)(hc4->end - hc4->prefixStart);
+ size_t newStartingOffset = bufferSize + hc4->dictLimit;
+ assert(newStartingOffset >= bufferSize); /* check overflow */
+ if (newStartingOffset > 1 GB) {
+ LZ4HC_clearTables(hc4);
+ newStartingOffset = 0;
+ }
+ newStartingOffset += 64 KB;
+ hc4->nextToUpdate = (U32)newStartingOffset;
+ hc4->prefixStart = start;
+ hc4->end = start;
+ hc4->dictStart = start;
+ hc4->dictLimit = (U32)newStartingOffset;
+ hc4->lowLimit = (U32)newStartingOffset;
+}
+
+
+/* Update chains up to ip (excluded) */
+LZ4_FORCE_INLINE void LZ4HC_Insert (LZ4HC_CCtx_internal* hc4, const BYTE* ip)
+{
+ U16* const chainTable = hc4->chainTable;
+ U32* const hashTable = hc4->hashTable;
+ const BYTE* const prefixPtr = hc4->prefixStart;
+ U32 const prefixIdx = hc4->dictLimit;
+ U32 const target = (U32)(ip - prefixPtr) + prefixIdx;
+ U32 idx = hc4->nextToUpdate;
+ assert(ip >= prefixPtr);
+ assert(target >= prefixIdx);
+
+ while (idx < target) {
+ U32 const h = LZ4HC_hashPtr(prefixPtr+idx-prefixIdx);
+ size_t delta = idx - hashTable[h];
+ if (delta>LZ4_DISTANCE_MAX) delta = LZ4_DISTANCE_MAX;
+ DELTANEXTU16(chainTable, idx) = (U16)delta;
+ hashTable[h] = idx;
+ idx++;
+ }
+
+ hc4->nextToUpdate = target;
+}
+
+/** LZ4HC_countBack() :
+ * @return : negative value, nb of common bytes before ip/match */
+LZ4_FORCE_INLINE
+int LZ4HC_countBack(const BYTE* const ip, const BYTE* const match,
+ const BYTE* const iMin, const BYTE* const mMin)
+{
+ int back = 0;
+ int const min = (int)MAX(iMin - ip, mMin - match);
+ assert(min <= 0);
+ assert(ip >= iMin); assert((size_t)(ip-iMin) < (1U<<31));
+ assert(match >= mMin); assert((size_t)(match - mMin) < (1U<<31));
+ while ( (back > min)
+ && (ip[back-1] == match[back-1]) )
+ back--;
+ return back;
+}
+
+#if defined(_MSC_VER)
+# define LZ4HC_rotl32(x,r) _rotl(x,r)
+#else
+# define LZ4HC_rotl32(x,r) ((x << r) | (x >> (32 - r)))
+#endif
+
+
+static U32 LZ4HC_rotatePattern(size_t const rotate, U32 const pattern)
+{
+ size_t const bitsToRotate = (rotate & (sizeof(pattern) - 1)) << 3;
+ if (bitsToRotate == 0) return pattern;
+ return LZ4HC_rotl32(pattern, (int)bitsToRotate);
+}
+
+/* LZ4HC_countPattern() :
+ * pattern32 must be a sample of repetitive pattern of length 1, 2 or 4 (but not 3!) */
+static unsigned
+LZ4HC_countPattern(const BYTE* ip, const BYTE* const iEnd, U32 const pattern32)
+{
+ const BYTE* const iStart = ip;
+ reg_t const pattern = (sizeof(pattern)==8) ?
+ (reg_t)pattern32 + (((reg_t)pattern32) << (sizeof(pattern)*4)) : pattern32;
+
+ while (likely(ip < iEnd-(sizeof(pattern)-1))) {
+ reg_t const diff = LZ4_read_ARCH(ip) ^ pattern;
+ if (!diff) { ip+=sizeof(pattern); continue; }
+ ip += LZ4_NbCommonBytes(diff);
+ return (unsigned)(ip - iStart);
+ }
+
+ if (LZ4_isLittleEndian()) {
+ reg_t patternByte = pattern;
+ while ((ip<iEnd) && (*ip == (BYTE)patternByte)) {
+ ip++; patternByte >>= 8;
+ }
+ } else { /* big endian */
+ U32 bitOffset = (sizeof(pattern)*8) - 8;
+ while (ip < iEnd) {
+ BYTE const byte = (BYTE)(pattern >> bitOffset);
+ if (*ip != byte) break;
+ ip ++; bitOffset -= 8;
+ } }
+
+ return (unsigned)(ip - iStart);
+}
+
+/* LZ4HC_reverseCountPattern() :
+ * pattern must be a sample of repetitive pattern of length 1, 2 or 4 (but not 3!)
+ * read using natural platform endianness */
+static unsigned
+LZ4HC_reverseCountPattern(const BYTE* ip, const BYTE* const iLow, U32 pattern)
+{
+ const BYTE* const iStart = ip;
+
+ while (likely(ip >= iLow+4)) {
+ if (LZ4_read32(ip-4) != pattern) break;
+ ip -= 4;
+ }
+ { const BYTE* bytePtr = (const BYTE*)(&pattern) + 3; /* works for any endianness */
+ while (likely(ip>iLow)) {
+ if (ip[-1] != *bytePtr) break;
+ ip--; bytePtr--;
+ } }
+ return (unsigned)(iStart - ip);
+}
+
+/* LZ4HC_protectDictEnd() :
+ * Checks if the match is in the last 3 bytes of the dictionary, so reading the
+ * 4 byte MINMATCH would overflow.
+ * @returns true if the match index is okay.
+ */
+static int LZ4HC_protectDictEnd(U32 const dictLimit, U32 const matchIndex)
+{
+ return ((U32)((dictLimit - 1) - matchIndex) >= 3);
+}
+
+typedef enum { rep_untested, rep_not, rep_confirmed } repeat_state_e;
+typedef enum { favorCompressionRatio=0, favorDecompressionSpeed } HCfavor_e;
+
+LZ4_FORCE_INLINE int
+LZ4HC_InsertAndGetWiderMatch (
+ LZ4HC_CCtx_internal* const hc4,
+ const BYTE* const ip,
+ const BYTE* const iLowLimit, const BYTE* const iHighLimit,
+ int longest,
+ const BYTE** matchpos,
+ const BYTE** startpos,
+ const int maxNbAttempts,
+ const int patternAnalysis, const int chainSwap,
+ const dictCtx_directive dict,
+ const HCfavor_e favorDecSpeed)
+{
+ U16* const chainTable = hc4->chainTable;
+ U32* const HashTable = hc4->hashTable;
+ const LZ4HC_CCtx_internal * const dictCtx = hc4->dictCtx;
+ const BYTE* const prefixPtr = hc4->prefixStart;
+ const U32 prefixIdx = hc4->dictLimit;
+ const U32 ipIndex = (U32)(ip - prefixPtr) + prefixIdx;
+ const int withinStartDistance = (hc4->lowLimit + (LZ4_DISTANCE_MAX + 1) > ipIndex);
+ const U32 lowestMatchIndex = (withinStartDistance) ? hc4->lowLimit : ipIndex - LZ4_DISTANCE_MAX;
+ const BYTE* const dictStart = hc4->dictStart;
+ const U32 dictIdx = hc4->lowLimit;
+ const BYTE* const dictEnd = dictStart + prefixIdx - dictIdx;
+ int const lookBackLength = (int)(ip-iLowLimit);
+ int nbAttempts = maxNbAttempts;
+ U32 matchChainPos = 0;
+ U32 const pattern = LZ4_read32(ip);
+ U32 matchIndex;
+ repeat_state_e repeat = rep_untested;
+ size_t srcPatternLength = 0;
+
+ DEBUGLOG(7, "LZ4HC_InsertAndGetWiderMatch");
+ /* First Match */
+ LZ4HC_Insert(hc4, ip);
+ matchIndex = HashTable[LZ4HC_hashPtr(ip)];
+ DEBUGLOG(7, "First match at index %u / %u (lowestMatchIndex)",
+ matchIndex, lowestMatchIndex);
+
+ while ((matchIndex>=lowestMatchIndex) && (nbAttempts>0)) {
+ int matchLength=0;
+ nbAttempts--;
+ assert(matchIndex < ipIndex);
+ if (favorDecSpeed && (ipIndex - matchIndex < 8)) {
+ /* do nothing */
+ } else if (matchIndex >= prefixIdx) { /* within current Prefix */
+ const BYTE* const matchPtr = prefixPtr + matchIndex - prefixIdx;
+ assert(matchPtr < ip);
+ assert(longest >= 1);
+ if (LZ4_read16(iLowLimit + longest - 1) == LZ4_read16(matchPtr - lookBackLength + longest - 1)) {
+ if (LZ4_read32(matchPtr) == pattern) {
+ int const back = lookBackLength ? LZ4HC_countBack(ip, matchPtr, iLowLimit, prefixPtr) : 0;
+ matchLength = MINMATCH + (int)LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, iHighLimit);
+ matchLength -= back;
+ if (matchLength > longest) {
+ longest = matchLength;
+ *matchpos = matchPtr + back;
+ *startpos = ip + back;
+ } } }
+ } else { /* lowestMatchIndex <= matchIndex < dictLimit */
+ const BYTE* const matchPtr = dictStart + (matchIndex - dictIdx);
+ assert(matchIndex >= dictIdx);
+ if ( likely(matchIndex <= prefixIdx - 4)
+ && (LZ4_read32(matchPtr) == pattern) ) {
+ int back = 0;
+ const BYTE* vLimit = ip + (prefixIdx - matchIndex);
+ if (vLimit > iHighLimit) vLimit = iHighLimit;
+ matchLength = (int)LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, vLimit) + MINMATCH;
+ if ((ip+matchLength == vLimit) && (vLimit < iHighLimit))
+ matchLength += LZ4_count(ip+matchLength, prefixPtr, iHighLimit);
+ back = lookBackLength ? LZ4HC_countBack(ip, matchPtr, iLowLimit, dictStart) : 0;
+ matchLength -= back;
+ if (matchLength > longest) {
+ longest = matchLength;
+ *matchpos = prefixPtr - prefixIdx + matchIndex + back; /* virtual pos, relative to ip, to retrieve offset */
+ *startpos = ip + back;
+ } } }
+
+ if (chainSwap && matchLength==longest) { /* better match => select a better chain */
+ assert(lookBackLength==0); /* search forward only */
+ if (matchIndex + (U32)longest <= ipIndex) {
+ int const kTrigger = 4;
+ U32 distanceToNextMatch = 1;
+ int const end = longest - MINMATCH + 1;
+ int step = 1;
+ int accel = 1 << kTrigger;
+ int pos;
+ for (pos = 0; pos < end; pos += step) {
+ U32 const candidateDist = DELTANEXTU16(chainTable, matchIndex + (U32)pos);
+ step = (accel++ >> kTrigger);
+ if (candidateDist > distanceToNextMatch) {
+ distanceToNextMatch = candidateDist;
+ matchChainPos = (U32)pos;
+ accel = 1 << kTrigger;
+ } }
+ if (distanceToNextMatch > 1) {
+ if (distanceToNextMatch > matchIndex) break; /* avoid overflow */
+ matchIndex -= distanceToNextMatch;
+ continue;
+ } } }
+
+ { U32 const distNextMatch = DELTANEXTU16(chainTable, matchIndex);
+ if (patternAnalysis && distNextMatch==1 && matchChainPos==0) {
+ U32 const matchCandidateIdx = matchIndex-1;
+ /* may be a repeated pattern */
+ if (repeat == rep_untested) {
+ if ( ((pattern & 0xFFFF) == (pattern >> 16))
+ & ((pattern & 0xFF) == (pattern >> 24)) ) {
+ repeat = rep_confirmed;
+ srcPatternLength = LZ4HC_countPattern(ip+sizeof(pattern), iHighLimit, pattern) + sizeof(pattern);
+ } else {
+ repeat = rep_not;
+ } }
+ if ( (repeat == rep_confirmed) && (matchCandidateIdx >= lowestMatchIndex)
+ && LZ4HC_protectDictEnd(prefixIdx, matchCandidateIdx) ) {
+ const int extDict = matchCandidateIdx < prefixIdx;
+ const BYTE* const matchPtr = (extDict ? dictStart - dictIdx : prefixPtr - prefixIdx) + matchCandidateIdx;
+ if (LZ4_read32(matchPtr) == pattern) { /* good candidate */
+ const BYTE* const iLimit = extDict ? dictEnd : iHighLimit;
+ size_t forwardPatternLength = LZ4HC_countPattern(matchPtr+sizeof(pattern), iLimit, pattern) + sizeof(pattern);
+ if (extDict && matchPtr + forwardPatternLength == iLimit) {
+ U32 const rotatedPattern = LZ4HC_rotatePattern(forwardPatternLength, pattern);
+ forwardPatternLength += LZ4HC_countPattern(prefixPtr, iHighLimit, rotatedPattern);
+ }
+ { const BYTE* const lowestMatchPtr = extDict ? dictStart : prefixPtr;
+ size_t backLength = LZ4HC_reverseCountPattern(matchPtr, lowestMatchPtr, pattern);
+ size_t currentSegmentLength;
+ if (!extDict
+ && matchPtr - backLength == prefixPtr
+ && dictIdx < prefixIdx) {
+ U32 const rotatedPattern = LZ4HC_rotatePattern((U32)(-(int)backLength), pattern);
+ backLength += LZ4HC_reverseCountPattern(dictEnd, dictStart, rotatedPattern);
+ }
+ /* Limit backLength not go further than lowestMatchIndex */
+ backLength = matchCandidateIdx - MAX(matchCandidateIdx - (U32)backLength, lowestMatchIndex);
+ assert(matchCandidateIdx - backLength >= lowestMatchIndex);
+ currentSegmentLength = backLength + forwardPatternLength;
+ /* Adjust to end of pattern if the source pattern fits, otherwise the beginning of the pattern */
+ if ( (currentSegmentLength >= srcPatternLength) /* current pattern segment large enough to contain full srcPatternLength */
+ && (forwardPatternLength <= srcPatternLength) ) { /* haven't reached this position yet */
+ U32 const newMatchIndex = matchCandidateIdx + (U32)forwardPatternLength - (U32)srcPatternLength; /* best position, full pattern, might be followed by more match */
+ if (LZ4HC_protectDictEnd(prefixIdx, newMatchIndex))
+ matchIndex = newMatchIndex;
+ else {
+ /* Can only happen if started in the prefix */
+ assert(newMatchIndex >= prefixIdx - 3 && newMatchIndex < prefixIdx && !extDict);
+ matchIndex = prefixIdx;
+ }
+ } else {
+ U32 const newMatchIndex = matchCandidateIdx - (U32)backLength; /* farthest position in current segment, will find a match of length currentSegmentLength + maybe some back */
+ if (!LZ4HC_protectDictEnd(prefixIdx, newMatchIndex)) {
+ assert(newMatchIndex >= prefixIdx - 3 && newMatchIndex < prefixIdx && !extDict);
+ matchIndex = prefixIdx;
+ } else {
+ matchIndex = newMatchIndex;
+ if (lookBackLength==0) { /* no back possible */
+ size_t const maxML = MIN(currentSegmentLength, srcPatternLength);
+ if ((size_t)longest < maxML) {
+ assert(prefixPtr - prefixIdx + matchIndex != ip);
+ if ((size_t)(ip - prefixPtr) + prefixIdx - matchIndex > LZ4_DISTANCE_MAX) break;
+ assert(maxML < 2 GB);
+ longest = (int)maxML;
+ *matchpos = prefixPtr - prefixIdx + matchIndex; /* virtual pos, relative to ip, to retrieve offset */
+ *startpos = ip;
+ }
+ { U32 const distToNextPattern = DELTANEXTU16(chainTable, matchIndex);
+ if (distToNextPattern > matchIndex) break; /* avoid overflow */
+ matchIndex -= distToNextPattern;
+ } } } } }
+ continue;
+ } }
+ } } /* PA optimization */
+
+ /* follow current chain */
+ matchIndex -= DELTANEXTU16(chainTable, matchIndex + matchChainPos);
+
+ } /* while ((matchIndex>=lowestMatchIndex) && (nbAttempts)) */
+
+ if ( dict == usingDictCtxHc
+ && nbAttempts > 0
+ && ipIndex - lowestMatchIndex < LZ4_DISTANCE_MAX) {
+ size_t const dictEndOffset = (size_t)(dictCtx->end - dictCtx->prefixStart) + dictCtx->dictLimit;
+ U32 dictMatchIndex = dictCtx->hashTable[LZ4HC_hashPtr(ip)];
+ assert(dictEndOffset <= 1 GB);
+ matchIndex = dictMatchIndex + lowestMatchIndex - (U32)dictEndOffset;
+ while (ipIndex - matchIndex <= LZ4_DISTANCE_MAX && nbAttempts--) {
+ const BYTE* const matchPtr = dictCtx->prefixStart - dictCtx->dictLimit + dictMatchIndex;
+
+ if (LZ4_read32(matchPtr) == pattern) {
+ int mlt;
+ int back = 0;
+ const BYTE* vLimit = ip + (dictEndOffset - dictMatchIndex);
+ if (vLimit > iHighLimit) vLimit = iHighLimit;
+ mlt = (int)LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, vLimit) + MINMATCH;
+ back = lookBackLength ? LZ4HC_countBack(ip, matchPtr, iLowLimit, dictCtx->prefixStart) : 0;
+ mlt -= back;
+ if (mlt > longest) {
+ longest = mlt;
+ *matchpos = prefixPtr - prefixIdx + matchIndex + back;
+ *startpos = ip + back;
+ } }
+
+ { U32 const nextOffset = DELTANEXTU16(dictCtx->chainTable, dictMatchIndex);
+ dictMatchIndex -= nextOffset;
+ matchIndex -= nextOffset;
+ } } }
+
+ return longest;
+}
+
+LZ4_FORCE_INLINE int
+LZ4HC_InsertAndFindBestMatch(LZ4HC_CCtx_internal* const hc4, /* Index table will be updated */
+ const BYTE* const ip, const BYTE* const iLimit,
+ const BYTE** matchpos,
+ const int maxNbAttempts,
+ const int patternAnalysis,
+ const dictCtx_directive dict)
+{
+ const BYTE* uselessPtr = ip;
+ /* note : LZ4HC_InsertAndGetWiderMatch() is able to modify the starting position of a match (*startpos),
+ * but this won't be the case here, as we define iLowLimit==ip,
+ * so LZ4HC_InsertAndGetWiderMatch() won't be allowed to search past ip */
+ return LZ4HC_InsertAndGetWiderMatch(hc4, ip, ip, iLimit, MINMATCH-1, matchpos, &uselessPtr, maxNbAttempts, patternAnalysis, 0 /*chainSwap*/, dict, favorCompressionRatio);
+}
+
+/* LZ4HC_encodeSequence() :
+ * @return : 0 if ok,
+ * 1 if buffer issue detected */
+LZ4_FORCE_INLINE int LZ4HC_encodeSequence (
+ const BYTE** _ip,
+ BYTE** _op,
+ const BYTE** _anchor,
+ int matchLength,
+ const BYTE* const match,
+ limitedOutput_directive limit,
+ BYTE* oend)
+{
+#define ip (*_ip)
+#define op (*_op)
+#define anchor (*_anchor)
+
+ size_t length;
+ BYTE* const token = op++;
+
+#if defined(LZ4_DEBUG) && (LZ4_DEBUG >= 6)
+ static const BYTE* start = NULL;
+ static U32 totalCost = 0;
+ U32 const pos = (start==NULL) ? 0 : (U32)(anchor - start);
+ U32 const ll = (U32)(ip - anchor);
+ U32 const llAdd = (ll>=15) ? ((ll-15) / 255) + 1 : 0;
+ U32 const mlAdd = (matchLength>=19) ? ((matchLength-19) / 255) + 1 : 0;
+ U32 const cost = 1 + llAdd + ll + 2 + mlAdd;
+ if (start==NULL) start = anchor; /* only works for single segment */
+ /* g_debuglog_enable = (pos >= 2228) & (pos <= 2262); */
+ DEBUGLOG(6, "pos:%7u -- literals:%4u, match:%4i, offset:%5u, cost:%4u + %5u",
+ pos,
+ (U32)(ip - anchor), matchLength, (U32)(ip-match),
+ cost, totalCost);
+ totalCost += cost;
+#endif
+
+ /* Encode Literal length */
+ length = (size_t)(ip - anchor);
+ LZ4_STATIC_ASSERT(notLimited == 0);
+ /* Check output limit */
+ if (limit && ((op + (length / 255) + length + (2 + 1 + LASTLITERALS)) > oend)) {
+ DEBUGLOG(6, "Not enough room to write %i literals (%i bytes remaining)",
+ (int)length, (int)(oend - op));
+ return 1;
+ }
+ if (length >= RUN_MASK) {
+ size_t len = length - RUN_MASK;
+ *token = (RUN_MASK << ML_BITS);
+ for(; len >= 255 ; len -= 255) *op++ = 255;
+ *op++ = (BYTE)len;
+ } else {
+ *token = (BYTE)(length << ML_BITS);
+ }
+
+ /* Copy Literals */
+ LZ4_wildCopy8(op, anchor, op + length);
+ op += length;
+
+ /* Encode Offset */
+ assert( (ip - match) <= LZ4_DISTANCE_MAX ); /* note : consider providing offset as a value, rather than as a pointer difference */
+ LZ4_writeLE16(op, (U16)(ip - match)); op += 2;
+
+ /* Encode MatchLength */
+ assert(matchLength >= MINMATCH);
+ length = (size_t)matchLength - MINMATCH;
+ if (limit && (op + (length / 255) + (1 + LASTLITERALS) > oend)) {
+ DEBUGLOG(6, "Not enough room to write match length");
+ return 1; /* Check output limit */
+ }
+ if (length >= ML_MASK) {
+ *token += ML_MASK;
+ length -= ML_MASK;
+ for(; length >= 510 ; length -= 510) { *op++ = 255; *op++ = 255; }
+ if (length >= 255) { length -= 255; *op++ = 255; }
+ *op++ = (BYTE)length;
+ } else {
+ *token += (BYTE)(length);
+ }
+
+ /* Prepare next loop */
+ ip += matchLength;
+ anchor = ip;
+
+ return 0;
+}
+#undef ip
+#undef op
+#undef anchor
+
+LZ4_FORCE_INLINE int LZ4HC_compress_hashChain (
+ LZ4HC_CCtx_internal* const ctx,
+ const char* const source,
+ char* const dest,
+ int* srcSizePtr,
+ int const maxOutputSize,
+ int maxNbAttempts,
+ const limitedOutput_directive limit,
+ const dictCtx_directive dict
+ )
+{
+ const int inputSize = *srcSizePtr;
+ const int patternAnalysis = (maxNbAttempts > 128); /* levels 9+ */
+
+ const BYTE* ip = (const BYTE*) source;
+ const BYTE* anchor = ip;
+ const BYTE* const iend = ip + inputSize;
+ const BYTE* const mflimit = iend - MFLIMIT;
+ const BYTE* const matchlimit = (iend - LASTLITERALS);
+
+ BYTE* optr = (BYTE*) dest;
+ BYTE* op = (BYTE*) dest;
+ BYTE* oend = op + maxOutputSize;
+
+ int ml0, ml, ml2, ml3;
+ const BYTE* start0;
+ const BYTE* ref0;
+ const BYTE* ref = NULL;
+ const BYTE* start2 = NULL;
+ const BYTE* ref2 = NULL;
+ const BYTE* start3 = NULL;
+ const BYTE* ref3 = NULL;
+
+ /* init */
+ *srcSizePtr = 0;
+ if (limit == fillOutput) oend -= LASTLITERALS; /* Hack for support LZ4 format restriction */
+ if (inputSize < LZ4_minLength) goto _last_literals; /* Input too small, no compression (all literals) */
+
+ /* Main Loop */
+ while (ip <= mflimit) {
+ ml = LZ4HC_InsertAndFindBestMatch(ctx, ip, matchlimit, &ref, maxNbAttempts, patternAnalysis, dict);
+ if (ml<MINMATCH) { ip++; continue; }
+
+ /* saved, in case we would skip too much */
+ start0 = ip; ref0 = ref; ml0 = ml;
+
+_Search2:
+ if (ip+ml <= mflimit) {
+ ml2 = LZ4HC_InsertAndGetWiderMatch(ctx,
+ ip + ml - 2, ip + 0, matchlimit, ml, &ref2, &start2,
+ maxNbAttempts, patternAnalysis, 0, dict, favorCompressionRatio);
+ } else {
+ ml2 = ml;
+ }
+
+ if (ml2 == ml) { /* No better match => encode ML1 */
+ optr = op;
+ if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml, ref, limit, oend)) goto _dest_overflow;
+ continue;
+ }
+
+ if (start0 < ip) { /* first match was skipped at least once */
+ if (start2 < ip + ml0) { /* squeezing ML1 between ML0(original ML1) and ML2 */
+ ip = start0; ref = ref0; ml = ml0; /* restore initial ML1 */
+ } }
+
+ /* Here, start0==ip */
+ if ((start2 - ip) < 3) { /* First Match too small : removed */
+ ml = ml2;
+ ip = start2;
+ ref =ref2;
+ goto _Search2;
+ }
+
+_Search3:
+ /* At this stage, we have :
+ * ml2 > ml1, and
+ * ip1+3 <= ip2 (usually < ip1+ml1) */
+ if ((start2 - ip) < OPTIMAL_ML) {
+ int correction;
+ int new_ml = ml;
+ if (new_ml > OPTIMAL_ML) new_ml = OPTIMAL_ML;
+ if (ip+new_ml > start2 + ml2 - MINMATCH) new_ml = (int)(start2 - ip) + ml2 - MINMATCH;
+ correction = new_ml - (int)(start2 - ip);
+ if (correction > 0) {
+ start2 += correction;
+ ref2 += correction;
+ ml2 -= correction;
+ }
+ }
+ /* Now, we have start2 = ip+new_ml, with new_ml = min(ml, OPTIMAL_ML=18) */
+
+ if (start2 + ml2 <= mflimit) {
+ ml3 = LZ4HC_InsertAndGetWiderMatch(ctx,
+ start2 + ml2 - 3, start2, matchlimit, ml2, &ref3, &start3,
+ maxNbAttempts, patternAnalysis, 0, dict, favorCompressionRatio);
+ } else {
+ ml3 = ml2;
+ }
+
+ if (ml3 == ml2) { /* No better match => encode ML1 and ML2 */
+ /* ip & ref are known; Now for ml */
+ if (start2 < ip+ml) ml = (int)(start2 - ip);
+ /* Now, encode 2 sequences */
+ optr = op;
+ if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml, ref, limit, oend)) goto _dest_overflow;
+ ip = start2;
+ optr = op;
+ if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml2, ref2, limit, oend)) {
+ ml = ml2;
+ ref = ref2;
+ goto _dest_overflow;
+ }
+ continue;
+ }
+
+ if (start3 < ip+ml+3) { /* Not enough space for match 2 : remove it */
+ if (start3 >= (ip+ml)) { /* can write Seq1 immediately ==> Seq2 is removed, so Seq3 becomes Seq1 */
+ if (start2 < ip+ml) {
+ int correction = (int)(ip+ml - start2);
+ start2 += correction;
+ ref2 += correction;
+ ml2 -= correction;
+ if (ml2 < MINMATCH) {
+ start2 = start3;
+ ref2 = ref3;
+ ml2 = ml3;
+ }
+ }
+
+ optr = op;
+ if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml, ref, limit, oend)) goto _dest_overflow;
+ ip = start3;
+ ref = ref3;
+ ml = ml3;
+
+ start0 = start2;
+ ref0 = ref2;
+ ml0 = ml2;
+ goto _Search2;
+ }
+
+ start2 = start3;
+ ref2 = ref3;
+ ml2 = ml3;
+ goto _Search3;
+ }
+
+ /*
+ * OK, now we have 3 ascending matches;
+ * let's write the first one ML1.
+ * ip & ref are known; Now decide ml.
+ */
+ if (start2 < ip+ml) {
+ if ((start2 - ip) < OPTIMAL_ML) {
+ int correction;
+ if (ml > OPTIMAL_ML) ml = OPTIMAL_ML;
+ if (ip + ml > start2 + ml2 - MINMATCH) ml = (int)(start2 - ip) + ml2 - MINMATCH;
+ correction = ml - (int)(start2 - ip);
+ if (correction > 0) {
+ start2 += correction;
+ ref2 += correction;
+ ml2 -= correction;
+ }
+ } else {
+ ml = (int)(start2 - ip);
+ }
+ }
+ optr = op;
+ if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml, ref, limit, oend)) goto _dest_overflow;
+
+ /* ML2 becomes ML1 */
+ ip = start2; ref = ref2; ml = ml2;
+
+ /* ML3 becomes ML2 */
+ start2 = start3; ref2 = ref3; ml2 = ml3;
+
+ /* let's find a new ML3 */
+ goto _Search3;
+ }
+
+_last_literals:
+ /* Encode Last Literals */
+ { size_t lastRunSize = (size_t)(iend - anchor); /* literals */
+ size_t llAdd = (lastRunSize + 255 - RUN_MASK) / 255;
+ size_t const totalSize = 1 + llAdd + lastRunSize;
+ if (limit == fillOutput) oend += LASTLITERALS; /* restore correct value */
+ if (limit && (op + totalSize > oend)) {
+ if (limit == limitedOutput) return 0;
+ /* adapt lastRunSize to fill 'dest' */
+ lastRunSize = (size_t)(oend - op) - 1 /*token*/;
+ llAdd = (lastRunSize + 256 - RUN_MASK) / 256;
+ lastRunSize -= llAdd;
+ }
+ DEBUGLOG(6, "Final literal run : %i literals", (int)lastRunSize);
+ ip = anchor + lastRunSize; /* can be != iend if limit==fillOutput */
+
+ if (lastRunSize >= RUN_MASK) {
+ size_t accumulator = lastRunSize - RUN_MASK;
+ *op++ = (RUN_MASK << ML_BITS);
+ for(; accumulator >= 255 ; accumulator -= 255) *op++ = 255;
+ *op++ = (BYTE) accumulator;
+ } else {
+ *op++ = (BYTE)(lastRunSize << ML_BITS);
+ }
+ LZ4_memcpy(op, anchor, lastRunSize);
+ op += lastRunSize;
+ }
+
+ /* End */
+ *srcSizePtr = (int) (((const char*)ip) - source);
+ return (int) (((char*)op)-dest);
+
+_dest_overflow:
+ if (limit == fillOutput) {
+ /* Assumption : ip, anchor, ml and ref must be set correctly */
+ size_t const ll = (size_t)(ip - anchor);
+ size_t const ll_addbytes = (ll + 240) / 255;
+ size_t const ll_totalCost = 1 + ll_addbytes + ll;
+ BYTE* const maxLitPos = oend - 3; /* 2 for offset, 1 for token */
+ DEBUGLOG(6, "Last sequence overflowing");
+ op = optr; /* restore correct out pointer */
+ if (op + ll_totalCost <= maxLitPos) {
+ /* ll validated; now adjust match length */
+ size_t const bytesLeftForMl = (size_t)(maxLitPos - (op+ll_totalCost));
+ size_t const maxMlSize = MINMATCH + (ML_MASK-1) + (bytesLeftForMl * 255);
+ assert(maxMlSize < INT_MAX); assert(ml >= 0);
+ if ((size_t)ml > maxMlSize) ml = (int)maxMlSize;
+ if ((oend + LASTLITERALS) - (op + ll_totalCost + 2) - 1 + ml >= MFLIMIT) {
+ LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml, ref, notLimited, oend);
+ } }
+ goto _last_literals;
+ }
+ /* compression failed */
+ return 0;
+}
+
+
+static int LZ4HC_compress_optimal( LZ4HC_CCtx_internal* ctx,
+ const char* const source, char* dst,
+ int* srcSizePtr, int dstCapacity,
+ int const nbSearches, size_t sufficient_len,
+ const limitedOutput_directive limit, int const fullUpdate,
+ const dictCtx_directive dict,
+ const HCfavor_e favorDecSpeed);
+
+
+LZ4_FORCE_INLINE int LZ4HC_compress_generic_internal (
+ LZ4HC_CCtx_internal* const ctx,
+ const char* const src,
+ char* const dst,
+ int* const srcSizePtr,
+ int const dstCapacity,
+ int cLevel,
+ const limitedOutput_directive limit,
+ const dictCtx_directive dict
+ )
+{
+ typedef enum { lz4hc, lz4opt } lz4hc_strat_e;
+ typedef struct {
+ lz4hc_strat_e strat;
+ int nbSearches;
+ U32 targetLength;
+ } cParams_t;
+ static const cParams_t clTable[LZ4HC_CLEVEL_MAX+1] = {
+ { lz4hc, 2, 16 }, /* 0, unused */
+ { lz4hc, 2, 16 }, /* 1, unused */
+ { lz4hc, 2, 16 }, /* 2, unused */
+ { lz4hc, 4, 16 }, /* 3 */
+ { lz4hc, 8, 16 }, /* 4 */
+ { lz4hc, 16, 16 }, /* 5 */
+ { lz4hc, 32, 16 }, /* 6 */
+ { lz4hc, 64, 16 }, /* 7 */
+ { lz4hc, 128, 16 }, /* 8 */
+ { lz4hc, 256, 16 }, /* 9 */
+ { lz4opt, 96, 64 }, /*10==LZ4HC_CLEVEL_OPT_MIN*/
+ { lz4opt, 512,128 }, /*11 */
+ { lz4opt,16384,LZ4_OPT_NUM }, /* 12==LZ4HC_CLEVEL_MAX */
+ };
+
+ DEBUGLOG(4, "LZ4HC_compress_generic(ctx=%p, src=%p, srcSize=%d, limit=%d)",
+ ctx, src, *srcSizePtr, limit);
+
+ if (limit == fillOutput && dstCapacity < 1) return 0; /* Impossible to store anything */
+ if ((U32)*srcSizePtr > (U32)LZ4_MAX_INPUT_SIZE) return 0; /* Unsupported input size (too large or negative) */
+
+ ctx->end += *srcSizePtr;
+ if (cLevel < 1) cLevel = LZ4HC_CLEVEL_DEFAULT; /* note : convention is different from lz4frame, maybe something to review */
+ cLevel = MIN(LZ4HC_CLEVEL_MAX, cLevel);
+ { cParams_t const cParam = clTable[cLevel];
+ HCfavor_e const favor = ctx->favorDecSpeed ? favorDecompressionSpeed : favorCompressionRatio;
+ int result;
+
+ if (cParam.strat == lz4hc) {
+ result = LZ4HC_compress_hashChain(ctx,
+ src, dst, srcSizePtr, dstCapacity,
+ cParam.nbSearches, limit, dict);
+ } else {
+ assert(cParam.strat == lz4opt);
+ result = LZ4HC_compress_optimal(ctx,
+ src, dst, srcSizePtr, dstCapacity,
+ cParam.nbSearches, cParam.targetLength, limit,
+ cLevel == LZ4HC_CLEVEL_MAX, /* ultra mode */
+ dict, favor);
+ }
+ if (result <= 0) ctx->dirty = 1;
+ return result;
+ }
+}
+
+static void LZ4HC_setExternalDict(LZ4HC_CCtx_internal* ctxPtr, const BYTE* newBlock);
+
+static int
+LZ4HC_compress_generic_noDictCtx (
+ LZ4HC_CCtx_internal* const ctx,
+ const char* const src,
+ char* const dst,
+ int* const srcSizePtr,
+ int const dstCapacity,
+ int cLevel,
+ limitedOutput_directive limit
+ )
+{
+ assert(ctx->dictCtx == NULL);
+ return LZ4HC_compress_generic_internal(ctx, src, dst, srcSizePtr, dstCapacity, cLevel, limit, noDictCtx);
+}
+
+static int
+LZ4HC_compress_generic_dictCtx (
+ LZ4HC_CCtx_internal* const ctx,
+ const char* const src,
+ char* const dst,
+ int* const srcSizePtr,
+ int const dstCapacity,
+ int cLevel,
+ limitedOutput_directive limit
+ )
+{
+ const size_t position = (size_t)(ctx->end - ctx->prefixStart) + (ctx->dictLimit - ctx->lowLimit);
+ assert(ctx->dictCtx != NULL);
+ if (position >= 64 KB) {
+ ctx->dictCtx = NULL;
+ return LZ4HC_compress_generic_noDictCtx(ctx, src, dst, srcSizePtr, dstCapacity, cLevel, limit);
+ } else if (position == 0 && *srcSizePtr > 4 KB) {
+ LZ4_memcpy(ctx, ctx->dictCtx, sizeof(LZ4HC_CCtx_internal));
+ LZ4HC_setExternalDict(ctx, (const BYTE *)src);
+ ctx->compressionLevel = (short)cLevel;
+ return LZ4HC_compress_generic_noDictCtx(ctx, src, dst, srcSizePtr, dstCapacity, cLevel, limit);
+ } else {
+ return LZ4HC_compress_generic_internal(ctx, src, dst, srcSizePtr, dstCapacity, cLevel, limit, usingDictCtxHc);
+ }
+}
+
+static int
+LZ4HC_compress_generic (
+ LZ4HC_CCtx_internal* const ctx,
+ const char* const src,
+ char* const dst,
+ int* const srcSizePtr,
+ int const dstCapacity,
+ int cLevel,
+ limitedOutput_directive limit
+ )
+{
+ if (ctx->dictCtx == NULL) {
+ return LZ4HC_compress_generic_noDictCtx(ctx, src, dst, srcSizePtr, dstCapacity, cLevel, limit);
+ } else {
+ return LZ4HC_compress_generic_dictCtx(ctx, src, dst, srcSizePtr, dstCapacity, cLevel, limit);
+ }
+}
+
+
+int LZ4_sizeofStateHC(void) { return (int)sizeof(LZ4_streamHC_t); }
+
+static size_t LZ4_streamHC_t_alignment(void)
+{
+#if LZ4_ALIGN_TEST
+ typedef struct { char c; LZ4_streamHC_t t; } t_a;
+ return sizeof(t_a) - sizeof(LZ4_streamHC_t);
+#else
+ return 1; /* effectively disabled */
+#endif
+}
+
+/* state is presumed correctly initialized,
+ * in which case its size and alignment have already been validate */
+int LZ4_compress_HC_extStateHC_fastReset (void* state, const char* src, char* dst, int srcSize, int dstCapacity, int compressionLevel)
+{
+ LZ4HC_CCtx_internal* const ctx = &((LZ4_streamHC_t*)state)->internal_donotuse;
+ if (!LZ4_isAligned(state, LZ4_streamHC_t_alignment())) return 0;
+ LZ4_resetStreamHC_fast((LZ4_streamHC_t*)state, compressionLevel);
+ LZ4HC_init_internal (ctx, (const BYTE*)src);
+ if (dstCapacity < LZ4_compressBound(srcSize))
+ return LZ4HC_compress_generic (ctx, src, dst, &srcSize, dstCapacity, compressionLevel, limitedOutput);
+ else
+ return LZ4HC_compress_generic (ctx, src, dst, &srcSize, dstCapacity, compressionLevel, notLimited);
+}
+
+int LZ4_compress_HC_extStateHC (void* state, const char* src, char* dst, int srcSize, int dstCapacity, int compressionLevel)
+{
+ LZ4_streamHC_t* const ctx = LZ4_initStreamHC(state, sizeof(*ctx));
+ if (ctx==NULL) return 0; /* init failure */
+ return LZ4_compress_HC_extStateHC_fastReset(state, src, dst, srcSize, dstCapacity, compressionLevel);
+}
+
+int LZ4_compress_HC(const char* src, char* dst, int srcSize, int dstCapacity, int compressionLevel)
+{
+ int cSize;
+#if defined(LZ4HC_HEAPMODE) && LZ4HC_HEAPMODE==1
+ LZ4_streamHC_t* const statePtr = (LZ4_streamHC_t*)ALLOC(sizeof(LZ4_streamHC_t));
+ if (statePtr==NULL) return 0;
+#else
+ LZ4_streamHC_t state;
+ LZ4_streamHC_t* const statePtr = &state;
+#endif
+ cSize = LZ4_compress_HC_extStateHC(statePtr, src, dst, srcSize, dstCapacity, compressionLevel);
+#if defined(LZ4HC_HEAPMODE) && LZ4HC_HEAPMODE==1
+ FREEMEM(statePtr);
+#endif
+ return cSize;
+}
+
+/* state is presumed sized correctly (>= sizeof(LZ4_streamHC_t)) */
+int LZ4_compress_HC_destSize(void* state, const char* source, char* dest, int* sourceSizePtr, int targetDestSize, int cLevel)
+{
+ LZ4_streamHC_t* const ctx = LZ4_initStreamHC(state, sizeof(*ctx));
+ if (ctx==NULL) return 0; /* init failure */
+ LZ4HC_init_internal(&ctx->internal_donotuse, (const BYTE*) source);
+ LZ4_setCompressionLevel(ctx, cLevel);
+ return LZ4HC_compress_generic(&ctx->internal_donotuse, source, dest, sourceSizePtr, targetDestSize, cLevel, fillOutput);
+}
+
+
+
+/**************************************
+* Streaming Functions
+**************************************/
+/* allocation */
+#if !defined(LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION)
+LZ4_streamHC_t* LZ4_createStreamHC(void)
+{
+ LZ4_streamHC_t* const state =
+ (LZ4_streamHC_t*)ALLOC_AND_ZERO(sizeof(LZ4_streamHC_t));
+ if (state == NULL) return NULL;
+ LZ4_setCompressionLevel(state, LZ4HC_CLEVEL_DEFAULT);
+ return state;
+}
+
+int LZ4_freeStreamHC (LZ4_streamHC_t* LZ4_streamHCPtr)
+{
+ DEBUGLOG(4, "LZ4_freeStreamHC(%p)", LZ4_streamHCPtr);
+ if (!LZ4_streamHCPtr) return 0; /* support free on NULL */
+ FREEMEM(LZ4_streamHCPtr);
+ return 0;
+}
+#endif
+
+
+LZ4_streamHC_t* LZ4_initStreamHC (void* buffer, size_t size)
+{
+ LZ4_streamHC_t* const LZ4_streamHCPtr = (LZ4_streamHC_t*)buffer;
+ DEBUGLOG(4, "LZ4_initStreamHC(%p, %u)", buffer, (unsigned)size);
+ /* check conditions */
+ if (buffer == NULL) return NULL;
+ if (size < sizeof(LZ4_streamHC_t)) return NULL;
+ if (!LZ4_isAligned(buffer, LZ4_streamHC_t_alignment())) return NULL;
+ /* init */
+ { LZ4HC_CCtx_internal* const hcstate = &(LZ4_streamHCPtr->internal_donotuse);
+ MEM_INIT(hcstate, 0, sizeof(*hcstate)); }
+ LZ4_setCompressionLevel(LZ4_streamHCPtr, LZ4HC_CLEVEL_DEFAULT);
+ return LZ4_streamHCPtr;
+}
+
+/* just a stub */
+void LZ4_resetStreamHC (LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel)
+{
+ LZ4_initStreamHC(LZ4_streamHCPtr, sizeof(*LZ4_streamHCPtr));
+ LZ4_setCompressionLevel(LZ4_streamHCPtr, compressionLevel);
+}
+
+void LZ4_resetStreamHC_fast (LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel)
+{
+ DEBUGLOG(4, "LZ4_resetStreamHC_fast(%p, %d)", LZ4_streamHCPtr, compressionLevel);
+ if (LZ4_streamHCPtr->internal_donotuse.dirty) {
+ LZ4_initStreamHC(LZ4_streamHCPtr, sizeof(*LZ4_streamHCPtr));
+ } else {
+ /* preserve end - prefixStart : can trigger clearTable's threshold */
+ if (LZ4_streamHCPtr->internal_donotuse.end != NULL) {
+ LZ4_streamHCPtr->internal_donotuse.end -= (uptrval)LZ4_streamHCPtr->internal_donotuse.prefixStart;
+ } else {
+ assert(LZ4_streamHCPtr->internal_donotuse.prefixStart == NULL);
+ }
+ LZ4_streamHCPtr->internal_donotuse.prefixStart = NULL;
+ LZ4_streamHCPtr->internal_donotuse.dictCtx = NULL;
+ }
+ LZ4_setCompressionLevel(LZ4_streamHCPtr, compressionLevel);
+}
+
+void LZ4_setCompressionLevel(LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel)
+{
+ DEBUGLOG(5, "LZ4_setCompressionLevel(%p, %d)", LZ4_streamHCPtr, compressionLevel);
+ if (compressionLevel < 1) compressionLevel = LZ4HC_CLEVEL_DEFAULT;
+ if (compressionLevel > LZ4HC_CLEVEL_MAX) compressionLevel = LZ4HC_CLEVEL_MAX;
+ LZ4_streamHCPtr->internal_donotuse.compressionLevel = (short)compressionLevel;
+}
+
+void LZ4_favorDecompressionSpeed(LZ4_streamHC_t* LZ4_streamHCPtr, int favor)
+{
+ LZ4_streamHCPtr->internal_donotuse.favorDecSpeed = (favor!=0);
+}
+
+/* LZ4_loadDictHC() :
+ * LZ4_streamHCPtr is presumed properly initialized */
+int LZ4_loadDictHC (LZ4_streamHC_t* LZ4_streamHCPtr,
+ const char* dictionary, int dictSize)
+{
+ LZ4HC_CCtx_internal* const ctxPtr = &LZ4_streamHCPtr->internal_donotuse;
+ DEBUGLOG(4, "LZ4_loadDictHC(ctx:%p, dict:%p, dictSize:%d)", LZ4_streamHCPtr, dictionary, dictSize);
+ assert(LZ4_streamHCPtr != NULL);
+ if (dictSize > 64 KB) {
+ dictionary += (size_t)dictSize - 64 KB;
+ dictSize = 64 KB;
+ }
+ /* need a full initialization, there are bad side-effects when using resetFast() */
+ { int const cLevel = ctxPtr->compressionLevel;
+ LZ4_initStreamHC(LZ4_streamHCPtr, sizeof(*LZ4_streamHCPtr));
+ LZ4_setCompressionLevel(LZ4_streamHCPtr, cLevel);
+ }
+ LZ4HC_init_internal (ctxPtr, (const BYTE*)dictionary);
+ ctxPtr->end = (const BYTE*)dictionary + dictSize;
+ if (dictSize >= 4) LZ4HC_Insert (ctxPtr, ctxPtr->end-3);
+ return dictSize;
+}
+
+void LZ4_attach_HC_dictionary(LZ4_streamHC_t *working_stream, const LZ4_streamHC_t *dictionary_stream) {
+ working_stream->internal_donotuse.dictCtx = dictionary_stream != NULL ? &(dictionary_stream->internal_donotuse) : NULL;
+}
+
+/* compression */
+
+static void LZ4HC_setExternalDict(LZ4HC_CCtx_internal* ctxPtr, const BYTE* newBlock)
+{
+ DEBUGLOG(4, "LZ4HC_setExternalDict(%p, %p)", ctxPtr, newBlock);
+ if (ctxPtr->end >= ctxPtr->prefixStart + 4)
+ LZ4HC_Insert (ctxPtr, ctxPtr->end-3); /* Referencing remaining dictionary content */
+
+ /* Only one memory segment for extDict, so any previous extDict is lost at this stage */
+ ctxPtr->lowLimit = ctxPtr->dictLimit;
+ ctxPtr->dictStart = ctxPtr->prefixStart;
+ ctxPtr->dictLimit += (U32)(ctxPtr->end - ctxPtr->prefixStart);
+ ctxPtr->prefixStart = newBlock;
+ ctxPtr->end = newBlock;
+ ctxPtr->nextToUpdate = ctxPtr->dictLimit; /* match referencing will resume from there */
+
+ /* cannot reference an extDict and a dictCtx at the same time */
+ ctxPtr->dictCtx = NULL;
+}
+
+static int
+LZ4_compressHC_continue_generic (LZ4_streamHC_t* LZ4_streamHCPtr,
+ const char* src, char* dst,
+ int* srcSizePtr, int dstCapacity,
+ limitedOutput_directive limit)
+{
+ LZ4HC_CCtx_internal* const ctxPtr = &LZ4_streamHCPtr->internal_donotuse;
+ DEBUGLOG(5, "LZ4_compressHC_continue_generic(ctx=%p, src=%p, srcSize=%d, limit=%d)",
+ LZ4_streamHCPtr, src, *srcSizePtr, limit);
+ assert(ctxPtr != NULL);
+ /* auto-init if forgotten */
+ if (ctxPtr->prefixStart == NULL) LZ4HC_init_internal (ctxPtr, (const BYTE*) src);
+
+ /* Check overflow */
+ if ((size_t)(ctxPtr->end - ctxPtr->prefixStart) + ctxPtr->dictLimit > 2 GB) {
+ size_t dictSize = (size_t)(ctxPtr->end - ctxPtr->prefixStart);
+ if (dictSize > 64 KB) dictSize = 64 KB;
+ LZ4_loadDictHC(LZ4_streamHCPtr, (const char*)(ctxPtr->end) - dictSize, (int)dictSize);
+ }
+
+ /* Check if blocks follow each other */
+ if ((const BYTE*)src != ctxPtr->end)
+ LZ4HC_setExternalDict(ctxPtr, (const BYTE*)src);
+
+ /* Check overlapping input/dictionary space */
+ { const BYTE* sourceEnd = (const BYTE*) src + *srcSizePtr;
+ const BYTE* const dictBegin = ctxPtr->dictStart;
+ const BYTE* const dictEnd = ctxPtr->dictStart + (ctxPtr->dictLimit - ctxPtr->lowLimit);
+ if ((sourceEnd > dictBegin) && ((const BYTE*)src < dictEnd)) {
+ if (sourceEnd > dictEnd) sourceEnd = dictEnd;
+ ctxPtr->lowLimit += (U32)(sourceEnd - ctxPtr->dictStart);
+ ctxPtr->dictStart += (U32)(sourceEnd - ctxPtr->dictStart);
+ if (ctxPtr->dictLimit - ctxPtr->lowLimit < 4) {
+ ctxPtr->lowLimit = ctxPtr->dictLimit;
+ ctxPtr->dictStart = ctxPtr->prefixStart;
+ } } }
+
+ return LZ4HC_compress_generic (ctxPtr, src, dst, srcSizePtr, dstCapacity, ctxPtr->compressionLevel, limit);
+}
+
+int LZ4_compress_HC_continue (LZ4_streamHC_t* LZ4_streamHCPtr, const char* src, char* dst, int srcSize, int dstCapacity)
+{
+ if (dstCapacity < LZ4_compressBound(srcSize))
+ return LZ4_compressHC_continue_generic (LZ4_streamHCPtr, src, dst, &srcSize, dstCapacity, limitedOutput);
+ else
+ return LZ4_compressHC_continue_generic (LZ4_streamHCPtr, src, dst, &srcSize, dstCapacity, notLimited);
+}
+
+int LZ4_compress_HC_continue_destSize (LZ4_streamHC_t* LZ4_streamHCPtr, const char* src, char* dst, int* srcSizePtr, int targetDestSize)
+{
+ return LZ4_compressHC_continue_generic(LZ4_streamHCPtr, src, dst, srcSizePtr, targetDestSize, fillOutput);
+}
+
+
+
+/* LZ4_saveDictHC :
+ * save history content
+ * into a user-provided buffer
+ * which is then used to continue compression
+ */
+int LZ4_saveDictHC (LZ4_streamHC_t* LZ4_streamHCPtr, char* safeBuffer, int dictSize)
+{
+ LZ4HC_CCtx_internal* const streamPtr = &LZ4_streamHCPtr->internal_donotuse;
+ int const prefixSize = (int)(streamPtr->end - streamPtr->prefixStart);
+ DEBUGLOG(5, "LZ4_saveDictHC(%p, %p, %d)", LZ4_streamHCPtr, safeBuffer, dictSize);
+ assert(prefixSize >= 0);
+ if (dictSize > 64 KB) dictSize = 64 KB;
+ if (dictSize < 4) dictSize = 0;
+ if (dictSize > prefixSize) dictSize = prefixSize;
+ if (safeBuffer == NULL) assert(dictSize == 0);
+ if (dictSize > 0)
+ LZ4_memmove(safeBuffer, streamPtr->end - dictSize, dictSize);
+ { U32 const endIndex = (U32)(streamPtr->end - streamPtr->prefixStart) + streamPtr->dictLimit;
+ streamPtr->end = (const BYTE*)safeBuffer + dictSize;
+ streamPtr->prefixStart = streamPtr->end - dictSize;
+ streamPtr->dictLimit = endIndex - (U32)dictSize;
+ streamPtr->lowLimit = endIndex - (U32)dictSize;
+ streamPtr->dictStart = streamPtr->prefixStart;
+ if (streamPtr->nextToUpdate < streamPtr->dictLimit)
+ streamPtr->nextToUpdate = streamPtr->dictLimit;
+ }
+ return dictSize;
+}
+
+
+/***************************************************
+* Deprecated Functions
+***************************************************/
+
+/* These functions currently generate deprecation warnings */
+
+/* Wrappers for deprecated compression functions */
+int LZ4_compressHC(const char* src, char* dst, int srcSize) { return LZ4_compress_HC (src, dst, srcSize, LZ4_compressBound(srcSize), 0); }
+int LZ4_compressHC_limitedOutput(const char* src, char* dst, int srcSize, int maxDstSize) { return LZ4_compress_HC(src, dst, srcSize, maxDstSize, 0); }
+int LZ4_compressHC2(const char* src, char* dst, int srcSize, int cLevel) { return LZ4_compress_HC (src, dst, srcSize, LZ4_compressBound(srcSize), cLevel); }
+int LZ4_compressHC2_limitedOutput(const char* src, char* dst, int srcSize, int maxDstSize, int cLevel) { return LZ4_compress_HC(src, dst, srcSize, maxDstSize, cLevel); }
+int LZ4_compressHC_withStateHC (void* state, const char* src, char* dst, int srcSize) { return LZ4_compress_HC_extStateHC (state, src, dst, srcSize, LZ4_compressBound(srcSize), 0); }
+int LZ4_compressHC_limitedOutput_withStateHC (void* state, const char* src, char* dst, int srcSize, int maxDstSize) { return LZ4_compress_HC_extStateHC (state, src, dst, srcSize, maxDstSize, 0); }
+int LZ4_compressHC2_withStateHC (void* state, const char* src, char* dst, int srcSize, int cLevel) { return LZ4_compress_HC_extStateHC(state, src, dst, srcSize, LZ4_compressBound(srcSize), cLevel); }
+int LZ4_compressHC2_limitedOutput_withStateHC (void* state, const char* src, char* dst, int srcSize, int maxDstSize, int cLevel) { return LZ4_compress_HC_extStateHC(state, src, dst, srcSize, maxDstSize, cLevel); }
+int LZ4_compressHC_continue (LZ4_streamHC_t* ctx, const char* src, char* dst, int srcSize) { return LZ4_compress_HC_continue (ctx, src, dst, srcSize, LZ4_compressBound(srcSize)); }
+int LZ4_compressHC_limitedOutput_continue (LZ4_streamHC_t* ctx, const char* src, char* dst, int srcSize, int maxDstSize) { return LZ4_compress_HC_continue (ctx, src, dst, srcSize, maxDstSize); }
+
+
+/* Deprecated streaming functions */
+int LZ4_sizeofStreamStateHC(void) { return sizeof(LZ4_streamHC_t); }
+
+/* state is presumed correctly sized, aka >= sizeof(LZ4_streamHC_t)
+ * @return : 0 on success, !=0 if error */
+int LZ4_resetStreamStateHC(void* state, char* inputBuffer)
+{
+ LZ4_streamHC_t* const hc4 = LZ4_initStreamHC(state, sizeof(*hc4));
+ if (hc4 == NULL) return 1; /* init failed */
+ LZ4HC_init_internal (&hc4->internal_donotuse, (const BYTE*)inputBuffer);
+ return 0;
+}
+
+#if !defined(LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION)
+void* LZ4_createHC (const char* inputBuffer)
+{
+ LZ4_streamHC_t* const hc4 = LZ4_createStreamHC();
+ if (hc4 == NULL) return NULL; /* not enough memory */
+ LZ4HC_init_internal (&hc4->internal_donotuse, (const BYTE*)inputBuffer);
+ return hc4;
+}
+
+int LZ4_freeHC (void* LZ4HC_Data)
+{
+ if (!LZ4HC_Data) return 0; /* support free on NULL */
+ FREEMEM(LZ4HC_Data);
+ return 0;
+}
+#endif
+
+int LZ4_compressHC2_continue (void* LZ4HC_Data, const char* src, char* dst, int srcSize, int cLevel)
+{
+ return LZ4HC_compress_generic (&((LZ4_streamHC_t*)LZ4HC_Data)->internal_donotuse, src, dst, &srcSize, 0, cLevel, notLimited);
+}
+
+int LZ4_compressHC2_limitedOutput_continue (void* LZ4HC_Data, const char* src, char* dst, int srcSize, int dstCapacity, int cLevel)
+{
+ return LZ4HC_compress_generic (&((LZ4_streamHC_t*)LZ4HC_Data)->internal_donotuse, src, dst, &srcSize, dstCapacity, cLevel, limitedOutput);
+}
+
+char* LZ4_slideInputBufferHC(void* LZ4HC_Data)
+{
+ LZ4_streamHC_t* const ctx = (LZ4_streamHC_t*)LZ4HC_Data;
+ const BYTE* bufferStart = ctx->internal_donotuse.prefixStart - ctx->internal_donotuse.dictLimit + ctx->internal_donotuse.lowLimit;
+ LZ4_resetStreamHC_fast(ctx, ctx->internal_donotuse.compressionLevel);
+ /* avoid const char * -> char * conversion warning :( */
+ return (char*)(uptrval)bufferStart;
+}
+
+
+/* ================================================
+ * LZ4 Optimal parser (levels [LZ4HC_CLEVEL_OPT_MIN - LZ4HC_CLEVEL_MAX])
+ * ===============================================*/
+typedef struct {
+ int price;
+ int off;
+ int mlen;
+ int litlen;
+} LZ4HC_optimal_t;
+
+/* price in bytes */
+LZ4_FORCE_INLINE int LZ4HC_literalsPrice(int const litlen)
+{
+ int price = litlen;
+ assert(litlen >= 0);
+ if (litlen >= (int)RUN_MASK)
+ price += 1 + ((litlen-(int)RUN_MASK) / 255);
+ return price;
+}
+
+
+/* requires mlen >= MINMATCH */
+LZ4_FORCE_INLINE int LZ4HC_sequencePrice(int litlen, int mlen)
+{
+ int price = 1 + 2 ; /* token + 16-bit offset */
+ assert(litlen >= 0);
+ assert(mlen >= MINMATCH);
+
+ price += LZ4HC_literalsPrice(litlen);
+
+ if (mlen >= (int)(ML_MASK+MINMATCH))
+ price += 1 + ((mlen-(int)(ML_MASK+MINMATCH)) / 255);
+
+ return price;
+}
+
+
+typedef struct {
+ int off;
+ int len;
+} LZ4HC_match_t;
+
+LZ4_FORCE_INLINE LZ4HC_match_t
+LZ4HC_FindLongerMatch(LZ4HC_CCtx_internal* const ctx,
+ const BYTE* ip, const BYTE* const iHighLimit,
+ int minLen, int nbSearches,
+ const dictCtx_directive dict,
+ const HCfavor_e favorDecSpeed)
+{
+ LZ4HC_match_t match = { 0 , 0 };
+ const BYTE* matchPtr = NULL;
+ /* note : LZ4HC_InsertAndGetWiderMatch() is able to modify the starting position of a match (*startpos),
+ * but this won't be the case here, as we define iLowLimit==ip,
+ * so LZ4HC_InsertAndGetWiderMatch() won't be allowed to search past ip */
+ int matchLength = LZ4HC_InsertAndGetWiderMatch(ctx, ip, ip, iHighLimit, minLen, &matchPtr, &ip, nbSearches, 1 /*patternAnalysis*/, 1 /*chainSwap*/, dict, favorDecSpeed);
+ if (matchLength <= minLen) return match;
+ if (favorDecSpeed) {
+ if ((matchLength>18) & (matchLength<=36)) matchLength=18; /* favor shortcut */
+ }
+ match.len = matchLength;
+ match.off = (int)(ip-matchPtr);
+ return match;
+}
+
+
+static int LZ4HC_compress_optimal ( LZ4HC_CCtx_internal* ctx,
+ const char* const source,
+ char* dst,
+ int* srcSizePtr,
+ int dstCapacity,
+ int const nbSearches,
+ size_t sufficient_len,
+ const limitedOutput_directive limit,
+ int const fullUpdate,
+ const dictCtx_directive dict,
+ const HCfavor_e favorDecSpeed)
+{
+ int retval = 0;
+#define TRAILING_LITERALS 3
+#if defined(LZ4HC_HEAPMODE) && LZ4HC_HEAPMODE==1
+ LZ4HC_optimal_t* const opt = (LZ4HC_optimal_t*)ALLOC(sizeof(LZ4HC_optimal_t) * (LZ4_OPT_NUM + TRAILING_LITERALS));
+#else
+ LZ4HC_optimal_t opt[LZ4_OPT_NUM + TRAILING_LITERALS]; /* ~64 KB, which is a bit large for stack... */
+#endif
+
+ const BYTE* ip = (const BYTE*) source;
+ const BYTE* anchor = ip;
+ const BYTE* const iend = ip + *srcSizePtr;
+ const BYTE* const mflimit = iend - MFLIMIT;
+ const BYTE* const matchlimit = iend - LASTLITERALS;
+ BYTE* op = (BYTE*) dst;
+ BYTE* opSaved = (BYTE*) dst;
+ BYTE* oend = op + dstCapacity;
+ int ovml = MINMATCH; /* overflow - last sequence */
+ const BYTE* ovref = NULL;
+
+ /* init */
+#if defined(LZ4HC_HEAPMODE) && LZ4HC_HEAPMODE==1
+ if (opt == NULL) goto _return_label;
+#endif
+ DEBUGLOG(5, "LZ4HC_compress_optimal(dst=%p, dstCapa=%u)", dst, (unsigned)dstCapacity);
+ *srcSizePtr = 0;
+ if (limit == fillOutput) oend -= LASTLITERALS; /* Hack for support LZ4 format restriction */
+ if (sufficient_len >= LZ4_OPT_NUM) sufficient_len = LZ4_OPT_NUM-1;
+
+ /* Main Loop */
+ while (ip <= mflimit) {
+ int const llen = (int)(ip - anchor);
+ int best_mlen, best_off;
+ int cur, last_match_pos = 0;
+
+ LZ4HC_match_t const firstMatch = LZ4HC_FindLongerMatch(ctx, ip, matchlimit, MINMATCH-1, nbSearches, dict, favorDecSpeed);
+ if (firstMatch.len==0) { ip++; continue; }
+
+ if ((size_t)firstMatch.len > sufficient_len) {
+ /* good enough solution : immediate encoding */
+ int const firstML = firstMatch.len;
+ const BYTE* const matchPos = ip - firstMatch.off;
+ opSaved = op;
+ if ( LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), firstML, matchPos, limit, oend) ) { /* updates ip, op and anchor */
+ ovml = firstML;
+ ovref = matchPos;
+ goto _dest_overflow;
+ }
+ continue;
+ }
+
+ /* set prices for first positions (literals) */
+ { int rPos;
+ for (rPos = 0 ; rPos < MINMATCH ; rPos++) {
+ int const cost = LZ4HC_literalsPrice(llen + rPos);
+ opt[rPos].mlen = 1;
+ opt[rPos].off = 0;
+ opt[rPos].litlen = llen + rPos;
+ opt[rPos].price = cost;
+ DEBUGLOG(7, "rPos:%3i => price:%3i (litlen=%i) -- initial setup",
+ rPos, cost, opt[rPos].litlen);
+ } }
+ /* set prices using initial match */
+ { int mlen = MINMATCH;
+ int const matchML = firstMatch.len; /* necessarily < sufficient_len < LZ4_OPT_NUM */
+ int const offset = firstMatch.off;
+ assert(matchML < LZ4_OPT_NUM);
+ for ( ; mlen <= matchML ; mlen++) {
+ int const cost = LZ4HC_sequencePrice(llen, mlen);
+ opt[mlen].mlen = mlen;
+ opt[mlen].off = offset;
+ opt[mlen].litlen = llen;
+ opt[mlen].price = cost;
+ DEBUGLOG(7, "rPos:%3i => price:%3i (matchlen=%i) -- initial setup",
+ mlen, cost, mlen);
+ } }
+ last_match_pos = firstMatch.len;
+ { int addLit;
+ for (addLit = 1; addLit <= TRAILING_LITERALS; addLit ++) {
+ opt[last_match_pos+addLit].mlen = 1; /* literal */
+ opt[last_match_pos+addLit].off = 0;
+ opt[last_match_pos+addLit].litlen = addLit;
+ opt[last_match_pos+addLit].price = opt[last_match_pos].price + LZ4HC_literalsPrice(addLit);
+ DEBUGLOG(7, "rPos:%3i => price:%3i (litlen=%i) -- initial setup",
+ last_match_pos+addLit, opt[last_match_pos+addLit].price, addLit);
+ } }
+
+ /* check further positions */
+ for (cur = 1; cur < last_match_pos; cur++) {
+ const BYTE* const curPtr = ip + cur;
+ LZ4HC_match_t newMatch;
+
+ if (curPtr > mflimit) break;
+ DEBUGLOG(7, "rPos:%u[%u] vs [%u]%u",
+ cur, opt[cur].price, opt[cur+1].price, cur+1);
+ if (fullUpdate) {
+ /* not useful to search here if next position has same (or lower) cost */
+ if ( (opt[cur+1].price <= opt[cur].price)
+ /* in some cases, next position has same cost, but cost rises sharply after, so a small match would still be beneficial */
+ && (opt[cur+MINMATCH].price < opt[cur].price + 3/*min seq price*/) )
+ continue;
+ } else {
+ /* not useful to search here if next position has same (or lower) cost */
+ if (opt[cur+1].price <= opt[cur].price) continue;
+ }
+
+ DEBUGLOG(7, "search at rPos:%u", cur);
+ if (fullUpdate)
+ newMatch = LZ4HC_FindLongerMatch(ctx, curPtr, matchlimit, MINMATCH-1, nbSearches, dict, favorDecSpeed);
+ else
+ /* only test matches of minimum length; slightly faster, but misses a few bytes */
+ newMatch = LZ4HC_FindLongerMatch(ctx, curPtr, matchlimit, last_match_pos - cur, nbSearches, dict, favorDecSpeed);
+ if (!newMatch.len) continue;
+
+ if ( ((size_t)newMatch.len > sufficient_len)
+ || (newMatch.len + cur >= LZ4_OPT_NUM) ) {
+ /* immediate encoding */
+ best_mlen = newMatch.len;
+ best_off = newMatch.off;
+ last_match_pos = cur + 1;
+ goto encode;
+ }
+
+ /* before match : set price with literals at beginning */
+ { int const baseLitlen = opt[cur].litlen;
+ int litlen;
+ for (litlen = 1; litlen < MINMATCH; litlen++) {
+ int const price = opt[cur].price - LZ4HC_literalsPrice(baseLitlen) + LZ4HC_literalsPrice(baseLitlen+litlen);
+ int const pos = cur + litlen;
+ if (price < opt[pos].price) {
+ opt[pos].mlen = 1; /* literal */
+ opt[pos].off = 0;
+ opt[pos].litlen = baseLitlen+litlen;
+ opt[pos].price = price;
+ DEBUGLOG(7, "rPos:%3i => price:%3i (litlen=%i)",
+ pos, price, opt[pos].litlen);
+ } } }
+
+ /* set prices using match at position = cur */
+ { int const matchML = newMatch.len;
+ int ml = MINMATCH;
+
+ assert(cur + newMatch.len < LZ4_OPT_NUM);
+ for ( ; ml <= matchML ; ml++) {
+ int const pos = cur + ml;
+ int const offset = newMatch.off;
+ int price;
+ int ll;
+ DEBUGLOG(7, "testing price rPos %i (last_match_pos=%i)",
+ pos, last_match_pos);
+ if (opt[cur].mlen == 1) {
+ ll = opt[cur].litlen;
+ price = ((cur > ll) ? opt[cur - ll].price : 0)
+ + LZ4HC_sequencePrice(ll, ml);
+ } else {
+ ll = 0;
+ price = opt[cur].price + LZ4HC_sequencePrice(0, ml);
+ }
+
+ assert((U32)favorDecSpeed <= 1);
+ if (pos > last_match_pos+TRAILING_LITERALS
+ || price <= opt[pos].price - (int)favorDecSpeed) {
+ DEBUGLOG(7, "rPos:%3i => price:%3i (matchlen=%i)",
+ pos, price, ml);
+ assert(pos < LZ4_OPT_NUM);
+ if ( (ml == matchML) /* last pos of last match */
+ && (last_match_pos < pos) )
+ last_match_pos = pos;
+ opt[pos].mlen = ml;
+ opt[pos].off = offset;
+ opt[pos].litlen = ll;
+ opt[pos].price = price;
+ } } }
+ /* complete following positions with literals */
+ { int addLit;
+ for (addLit = 1; addLit <= TRAILING_LITERALS; addLit ++) {
+ opt[last_match_pos+addLit].mlen = 1; /* literal */
+ opt[last_match_pos+addLit].off = 0;
+ opt[last_match_pos+addLit].litlen = addLit;
+ opt[last_match_pos+addLit].price = opt[last_match_pos].price + LZ4HC_literalsPrice(addLit);
+ DEBUGLOG(7, "rPos:%3i => price:%3i (litlen=%i)", last_match_pos+addLit, opt[last_match_pos+addLit].price, addLit);
+ } }
+ } /* for (cur = 1; cur <= last_match_pos; cur++) */
+
+ assert(last_match_pos < LZ4_OPT_NUM + TRAILING_LITERALS);
+ best_mlen = opt[last_match_pos].mlen;
+ best_off = opt[last_match_pos].off;
+ cur = last_match_pos - best_mlen;
+
+encode: /* cur, last_match_pos, best_mlen, best_off must be set */
+ assert(cur < LZ4_OPT_NUM);
+ assert(last_match_pos >= 1); /* == 1 when only one candidate */
+ DEBUGLOG(6, "reverse traversal, looking for shortest path (last_match_pos=%i)", last_match_pos);
+ { int candidate_pos = cur;
+ int selected_matchLength = best_mlen;
+ int selected_offset = best_off;
+ while (1) { /* from end to beginning */
+ int const next_matchLength = opt[candidate_pos].mlen; /* can be 1, means literal */
+ int const next_offset = opt[candidate_pos].off;
+ DEBUGLOG(7, "pos %i: sequence length %i", candidate_pos, selected_matchLength);
+ opt[candidate_pos].mlen = selected_matchLength;
+ opt[candidate_pos].off = selected_offset;
+ selected_matchLength = next_matchLength;
+ selected_offset = next_offset;
+ if (next_matchLength > candidate_pos) break; /* last match elected, first match to encode */
+ assert(next_matchLength > 0); /* can be 1, means literal */
+ candidate_pos -= next_matchLength;
+ } }
+
+ /* encode all recorded sequences in order */
+ { int rPos = 0; /* relative position (to ip) */
+ while (rPos < last_match_pos) {
+ int const ml = opt[rPos].mlen;
+ int const offset = opt[rPos].off;
+ if (ml == 1) { ip++; rPos++; continue; } /* literal; note: can end up with several literals, in which case, skip them */
+ rPos += ml;
+ assert(ml >= MINMATCH);
+ assert((offset >= 1) && (offset <= LZ4_DISTANCE_MAX));
+ opSaved = op;
+ if ( LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml, ip - offset, limit, oend) ) { /* updates ip, op and anchor */
+ ovml = ml;
+ ovref = ip - offset;
+ goto _dest_overflow;
+ } } }
+ } /* while (ip <= mflimit) */
+
+_last_literals:
+ /* Encode Last Literals */
+ { size_t lastRunSize = (size_t)(iend - anchor); /* literals */
+ size_t llAdd = (lastRunSize + 255 - RUN_MASK) / 255;
+ size_t const totalSize = 1 + llAdd + lastRunSize;
+ if (limit == fillOutput) oend += LASTLITERALS; /* restore correct value */
+ if (limit && (op + totalSize > oend)) {
+ if (limit == limitedOutput) { /* Check output limit */
+ retval = 0;
+ goto _return_label;
+ }
+ /* adapt lastRunSize to fill 'dst' */
+ lastRunSize = (size_t)(oend - op) - 1 /*token*/;
+ llAdd = (lastRunSize + 256 - RUN_MASK) / 256;
+ lastRunSize -= llAdd;
+ }
+ DEBUGLOG(6, "Final literal run : %i literals", (int)lastRunSize);
+ ip = anchor + lastRunSize; /* can be != iend if limit==fillOutput */
+
+ if (lastRunSize >= RUN_MASK) {
+ size_t accumulator = lastRunSize - RUN_MASK;
+ *op++ = (RUN_MASK << ML_BITS);
+ for(; accumulator >= 255 ; accumulator -= 255) *op++ = 255;
+ *op++ = (BYTE) accumulator;
+ } else {
+ *op++ = (BYTE)(lastRunSize << ML_BITS);
+ }
+ LZ4_memcpy(op, anchor, lastRunSize);
+ op += lastRunSize;
+ }
+
+ /* End */
+ *srcSizePtr = (int) (((const char*)ip) - source);
+ retval = (int) ((char*)op-dst);
+ goto _return_label;
+
+_dest_overflow:
+if (limit == fillOutput) {
+ /* Assumption : ip, anchor, ovml and ovref must be set correctly */
+ size_t const ll = (size_t)(ip - anchor);
+ size_t const ll_addbytes = (ll + 240) / 255;
+ size_t const ll_totalCost = 1 + ll_addbytes + ll;
+ BYTE* const maxLitPos = oend - 3; /* 2 for offset, 1 for token */
+ DEBUGLOG(6, "Last sequence overflowing (only %i bytes remaining)", (int)(oend-1-opSaved));
+ op = opSaved; /* restore correct out pointer */
+ if (op + ll_totalCost <= maxLitPos) {
+ /* ll validated; now adjust match length */
+ size_t const bytesLeftForMl = (size_t)(maxLitPos - (op+ll_totalCost));
+ size_t const maxMlSize = MINMATCH + (ML_MASK-1) + (bytesLeftForMl * 255);
+ assert(maxMlSize < INT_MAX); assert(ovml >= 0);
+ if ((size_t)ovml > maxMlSize) ovml = (int)maxMlSize;
+ if ((oend + LASTLITERALS) - (op + ll_totalCost + 2) - 1 + ovml >= MFLIMIT) {
+ DEBUGLOG(6, "Space to end : %i + ml (%i)", (int)((oend + LASTLITERALS) - (op + ll_totalCost + 2) - 1), ovml);
+ DEBUGLOG(6, "Before : ip = %p, anchor = %p", ip, anchor);
+ LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ovml, ovref, notLimited, oend);
+ DEBUGLOG(6, "After : ip = %p, anchor = %p", ip, anchor);
+ } }
+ goto _last_literals;
+}
+_return_label:
+#if defined(LZ4HC_HEAPMODE) && LZ4HC_HEAPMODE==1
+ FREEMEM(opt);
+#endif
+ return retval;
+}
diff --git a/mfbt/lz4/lz4hc.h b/mfbt/lz4/lz4hc.h
new file mode 100644
index 0000000000..e937acfefd
--- /dev/null
+++ b/mfbt/lz4/lz4hc.h
@@ -0,0 +1,413 @@
+/*
+ LZ4 HC - High Compression Mode of LZ4
+ Header File
+ Copyright (C) 2011-2020, Yann Collet.
+ BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are
+ met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above
+ copyright notice, this list of conditions and the following disclaimer
+ in the documentation and/or other materials provided with the
+ distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ You can contact the author at :
+ - LZ4 source repository : https://github.com/lz4/lz4
+ - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+#ifndef LZ4_HC_H_19834876238432
+#define LZ4_HC_H_19834876238432
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/* --- Dependency --- */
+/* note : lz4hc requires lz4.h/lz4.c for compilation */
+#include "lz4.h" /* stddef, LZ4LIB_API, LZ4_DEPRECATED */
+
+
+/* --- Useful constants --- */
+#define LZ4HC_CLEVEL_MIN 3
+#define LZ4HC_CLEVEL_DEFAULT 9
+#define LZ4HC_CLEVEL_OPT_MIN 10
+#define LZ4HC_CLEVEL_MAX 12
+
+
+/*-************************************
+ * Block Compression
+ **************************************/
+/*! LZ4_compress_HC() :
+ * Compress data from `src` into `dst`, using the powerful but slower "HC" algorithm.
+ * `dst` must be already allocated.
+ * Compression is guaranteed to succeed if `dstCapacity >= LZ4_compressBound(srcSize)` (see "lz4.h")
+ * Max supported `srcSize` value is LZ4_MAX_INPUT_SIZE (see "lz4.h")
+ * `compressionLevel` : any value between 1 and LZ4HC_CLEVEL_MAX will work.
+ * Values > LZ4HC_CLEVEL_MAX behave the same as LZ4HC_CLEVEL_MAX.
+ * @return : the number of bytes written into 'dst'
+ * or 0 if compression fails.
+ */
+LZ4LIB_API int LZ4_compress_HC (const char* src, char* dst, int srcSize, int dstCapacity, int compressionLevel);
+
+
+/* Note :
+ * Decompression functions are provided within "lz4.h" (BSD license)
+ */
+
+
+/*! LZ4_compress_HC_extStateHC() :
+ * Same as LZ4_compress_HC(), but using an externally allocated memory segment for `state`.
+ * `state` size is provided by LZ4_sizeofStateHC().
+ * Memory segment must be aligned on 8-bytes boundaries (which a normal malloc() should do properly).
+ */
+LZ4LIB_API int LZ4_sizeofStateHC(void);
+LZ4LIB_API int LZ4_compress_HC_extStateHC(void* stateHC, const char* src, char* dst, int srcSize, int maxDstSize, int compressionLevel);
+
+
+/*! LZ4_compress_HC_destSize() : v1.9.0+
+ * Will compress as much data as possible from `src`
+ * to fit into `targetDstSize` budget.
+ * Result is provided in 2 parts :
+ * @return : the number of bytes written into 'dst' (necessarily <= targetDstSize)
+ * or 0 if compression fails.
+ * `srcSizePtr` : on success, *srcSizePtr is updated to indicate how much bytes were read from `src`
+ */
+LZ4LIB_API int LZ4_compress_HC_destSize(void* stateHC,
+ const char* src, char* dst,
+ int* srcSizePtr, int targetDstSize,
+ int compressionLevel);
+
+
+/*-************************************
+ * Streaming Compression
+ * Bufferless synchronous API
+ **************************************/
+ typedef union LZ4_streamHC_u LZ4_streamHC_t; /* incomplete type (defined later) */
+
+/*! LZ4_createStreamHC() and LZ4_freeStreamHC() :
+ * These functions create and release memory for LZ4 HC streaming state.
+ * Newly created states are automatically initialized.
+ * A same state can be used multiple times consecutively,
+ * starting with LZ4_resetStreamHC_fast() to start a new stream of blocks.
+ */
+LZ4LIB_API LZ4_streamHC_t* LZ4_createStreamHC(void);
+LZ4LIB_API int LZ4_freeStreamHC (LZ4_streamHC_t* streamHCPtr);
+
+/*
+ These functions compress data in successive blocks of any size,
+ using previous blocks as dictionary, to improve compression ratio.
+ One key assumption is that previous blocks (up to 64 KB) remain read-accessible while compressing next blocks.
+ There is an exception for ring buffers, which can be smaller than 64 KB.
+ Ring-buffer scenario is automatically detected and handled within LZ4_compress_HC_continue().
+
+ Before starting compression, state must be allocated and properly initialized.
+ LZ4_createStreamHC() does both, though compression level is set to LZ4HC_CLEVEL_DEFAULT.
+
+ Selecting the compression level can be done with LZ4_resetStreamHC_fast() (starts a new stream)
+ or LZ4_setCompressionLevel() (anytime, between blocks in the same stream) (experimental).
+ LZ4_resetStreamHC_fast() only works on states which have been properly initialized at least once,
+ which is automatically the case when state is created using LZ4_createStreamHC().
+
+ After reset, a first "fictional block" can be designated as initial dictionary,
+ using LZ4_loadDictHC() (Optional).
+
+ Invoke LZ4_compress_HC_continue() to compress each successive block.
+ The number of blocks is unlimited.
+ Previous input blocks, including initial dictionary when present,
+ must remain accessible and unmodified during compression.
+
+ It's allowed to update compression level anytime between blocks,
+ using LZ4_setCompressionLevel() (experimental).
+
+ 'dst' buffer should be sized to handle worst case scenarios
+ (see LZ4_compressBound(), it ensures compression success).
+ In case of failure, the API does not guarantee recovery,
+ so the state _must_ be reset.
+ To ensure compression success
+ whenever `dst` buffer size cannot be made >= LZ4_compressBound(),
+ consider using LZ4_compress_HC_continue_destSize().
+
+ Whenever previous input blocks can't be preserved unmodified in-place during compression of next blocks,
+ it's possible to copy the last blocks into a more stable memory space, using LZ4_saveDictHC().
+ Return value of LZ4_saveDictHC() is the size of dictionary effectively saved into 'safeBuffer' (<= 64 KB)
+
+ After completing a streaming compression,
+ it's possible to start a new stream of blocks, using the same LZ4_streamHC_t state,
+ just by resetting it, using LZ4_resetStreamHC_fast().
+*/
+
+LZ4LIB_API void LZ4_resetStreamHC_fast(LZ4_streamHC_t* streamHCPtr, int compressionLevel); /* v1.9.0+ */
+LZ4LIB_API int LZ4_loadDictHC (LZ4_streamHC_t* streamHCPtr, const char* dictionary, int dictSize);
+
+LZ4LIB_API int LZ4_compress_HC_continue (LZ4_streamHC_t* streamHCPtr,
+ const char* src, char* dst,
+ int srcSize, int maxDstSize);
+
+/*! LZ4_compress_HC_continue_destSize() : v1.9.0+
+ * Similar to LZ4_compress_HC_continue(),
+ * but will read as much data as possible from `src`
+ * to fit into `targetDstSize` budget.
+ * Result is provided into 2 parts :
+ * @return : the number of bytes written into 'dst' (necessarily <= targetDstSize)
+ * or 0 if compression fails.
+ * `srcSizePtr` : on success, *srcSizePtr will be updated to indicate how much bytes were read from `src`.
+ * Note that this function may not consume the entire input.
+ */
+LZ4LIB_API int LZ4_compress_HC_continue_destSize(LZ4_streamHC_t* LZ4_streamHCPtr,
+ const char* src, char* dst,
+ int* srcSizePtr, int targetDstSize);
+
+LZ4LIB_API int LZ4_saveDictHC (LZ4_streamHC_t* streamHCPtr, char* safeBuffer, int maxDictSize);
+
+
+
+/*^**********************************************
+ * !!!!!! STATIC LINKING ONLY !!!!!!
+ ***********************************************/
+
+/*-******************************************************************
+ * PRIVATE DEFINITIONS :
+ * Do not use these definitions directly.
+ * They are merely exposed to allow static allocation of `LZ4_streamHC_t`.
+ * Declare an `LZ4_streamHC_t` directly, rather than any type below.
+ * Even then, only do so in the context of static linking, as definitions may change between versions.
+ ********************************************************************/
+
+#define LZ4HC_DICTIONARY_LOGSIZE 16
+#define LZ4HC_MAXD (1<<LZ4HC_DICTIONARY_LOGSIZE)
+#define LZ4HC_MAXD_MASK (LZ4HC_MAXD - 1)
+
+#define LZ4HC_HASH_LOG 15
+#define LZ4HC_HASHTABLESIZE (1 << LZ4HC_HASH_LOG)
+#define LZ4HC_HASH_MASK (LZ4HC_HASHTABLESIZE - 1)
+
+
+/* Never ever use these definitions directly !
+ * Declare or allocate an LZ4_streamHC_t instead.
+**/
+typedef struct LZ4HC_CCtx_internal LZ4HC_CCtx_internal;
+struct LZ4HC_CCtx_internal
+{
+ LZ4_u32 hashTable[LZ4HC_HASHTABLESIZE];
+ LZ4_u16 chainTable[LZ4HC_MAXD];
+ const LZ4_byte* end; /* next block here to continue on current prefix */
+ const LZ4_byte* prefixStart; /* Indexes relative to this position */
+ const LZ4_byte* dictStart; /* alternate reference for extDict */
+ LZ4_u32 dictLimit; /* below that point, need extDict */
+ LZ4_u32 lowLimit; /* below that point, no more dict */
+ LZ4_u32 nextToUpdate; /* index from which to continue dictionary update */
+ short compressionLevel;
+ LZ4_i8 favorDecSpeed; /* favor decompression speed if this flag set,
+ otherwise, favor compression ratio */
+ LZ4_i8 dirty; /* stream has to be fully reset if this flag is set */
+ const LZ4HC_CCtx_internal* dictCtx;
+};
+
+#define LZ4_STREAMHC_MINSIZE 262200 /* static size, for inter-version compatibility */
+union LZ4_streamHC_u {
+ char minStateSize[LZ4_STREAMHC_MINSIZE];
+ LZ4HC_CCtx_internal internal_donotuse;
+}; /* previously typedef'd to LZ4_streamHC_t */
+
+/* LZ4_streamHC_t :
+ * This structure allows static allocation of LZ4 HC streaming state.
+ * This can be used to allocate statically on stack, or as part of a larger structure.
+ *
+ * Such state **must** be initialized using LZ4_initStreamHC() before first use.
+ *
+ * Note that invoking LZ4_initStreamHC() is not required when
+ * the state was created using LZ4_createStreamHC() (which is recommended).
+ * Using the normal builder, a newly created state is automatically initialized.
+ *
+ * Static allocation shall only be used in combination with static linking.
+ */
+
+/* LZ4_initStreamHC() : v1.9.0+
+ * Required before first use of a statically allocated LZ4_streamHC_t.
+ * Before v1.9.0 : use LZ4_resetStreamHC() instead
+ */
+LZ4LIB_API LZ4_streamHC_t* LZ4_initStreamHC(void* buffer, size_t size);
+
+
+/*-************************************
+* Deprecated Functions
+**************************************/
+/* see lz4.h LZ4_DISABLE_DEPRECATE_WARNINGS to turn off deprecation warnings */
+
+/* deprecated compression functions */
+LZ4_DEPRECATED("use LZ4_compress_HC() instead") LZ4LIB_API int LZ4_compressHC (const char* source, char* dest, int inputSize);
+LZ4_DEPRECATED("use LZ4_compress_HC() instead") LZ4LIB_API int LZ4_compressHC_limitedOutput (const char* source, char* dest, int inputSize, int maxOutputSize);
+LZ4_DEPRECATED("use LZ4_compress_HC() instead") LZ4LIB_API int LZ4_compressHC2 (const char* source, char* dest, int inputSize, int compressionLevel);
+LZ4_DEPRECATED("use LZ4_compress_HC() instead") LZ4LIB_API int LZ4_compressHC2_limitedOutput(const char* source, char* dest, int inputSize, int maxOutputSize, int compressionLevel);
+LZ4_DEPRECATED("use LZ4_compress_HC_extStateHC() instead") LZ4LIB_API int LZ4_compressHC_withStateHC (void* state, const char* source, char* dest, int inputSize);
+LZ4_DEPRECATED("use LZ4_compress_HC_extStateHC() instead") LZ4LIB_API int LZ4_compressHC_limitedOutput_withStateHC (void* state, const char* source, char* dest, int inputSize, int maxOutputSize);
+LZ4_DEPRECATED("use LZ4_compress_HC_extStateHC() instead") LZ4LIB_API int LZ4_compressHC2_withStateHC (void* state, const char* source, char* dest, int inputSize, int compressionLevel);
+LZ4_DEPRECATED("use LZ4_compress_HC_extStateHC() instead") LZ4LIB_API int LZ4_compressHC2_limitedOutput_withStateHC(void* state, const char* source, char* dest, int inputSize, int maxOutputSize, int compressionLevel);
+LZ4_DEPRECATED("use LZ4_compress_HC_continue() instead") LZ4LIB_API int LZ4_compressHC_continue (LZ4_streamHC_t* LZ4_streamHCPtr, const char* source, char* dest, int inputSize);
+LZ4_DEPRECATED("use LZ4_compress_HC_continue() instead") LZ4LIB_API int LZ4_compressHC_limitedOutput_continue (LZ4_streamHC_t* LZ4_streamHCPtr, const char* source, char* dest, int inputSize, int maxOutputSize);
+
+/* Obsolete streaming functions; degraded functionality; do not use!
+ *
+ * In order to perform streaming compression, these functions depended on data
+ * that is no longer tracked in the state. They have been preserved as well as
+ * possible: using them will still produce a correct output. However, use of
+ * LZ4_slideInputBufferHC() will truncate the history of the stream, rather
+ * than preserve a window-sized chunk of history.
+ */
+#if !defined(LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION)
+LZ4_DEPRECATED("use LZ4_createStreamHC() instead") LZ4LIB_API void* LZ4_createHC (const char* inputBuffer);
+LZ4_DEPRECATED("use LZ4_freeStreamHC() instead") LZ4LIB_API int LZ4_freeHC (void* LZ4HC_Data);
+#endif
+LZ4_DEPRECATED("use LZ4_saveDictHC() instead") LZ4LIB_API char* LZ4_slideInputBufferHC (void* LZ4HC_Data);
+LZ4_DEPRECATED("use LZ4_compress_HC_continue() instead") LZ4LIB_API int LZ4_compressHC2_continue (void* LZ4HC_Data, const char* source, char* dest, int inputSize, int compressionLevel);
+LZ4_DEPRECATED("use LZ4_compress_HC_continue() instead") LZ4LIB_API int LZ4_compressHC2_limitedOutput_continue (void* LZ4HC_Data, const char* source, char* dest, int inputSize, int maxOutputSize, int compressionLevel);
+LZ4_DEPRECATED("use LZ4_createStreamHC() instead") LZ4LIB_API int LZ4_sizeofStreamStateHC(void);
+LZ4_DEPRECATED("use LZ4_initStreamHC() instead") LZ4LIB_API int LZ4_resetStreamStateHC(void* state, char* inputBuffer);
+
+
+/* LZ4_resetStreamHC() is now replaced by LZ4_initStreamHC().
+ * The intention is to emphasize the difference with LZ4_resetStreamHC_fast(),
+ * which is now the recommended function to start a new stream of blocks,
+ * but cannot be used to initialize a memory segment containing arbitrary garbage data.
+ *
+ * It is recommended to switch to LZ4_initStreamHC().
+ * LZ4_resetStreamHC() will generate deprecation warnings in a future version.
+ */
+LZ4LIB_API void LZ4_resetStreamHC (LZ4_streamHC_t* streamHCPtr, int compressionLevel);
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* LZ4_HC_H_19834876238432 */
+
+
+/*-**************************************************
+ * !!!!! STATIC LINKING ONLY !!!!!
+ * Following definitions are considered experimental.
+ * They should not be linked from DLL,
+ * as there is no guarantee of API stability yet.
+ * Prototypes will be promoted to "stable" status
+ * after successful usage in real-life scenarios.
+ ***************************************************/
+#ifdef LZ4_HC_STATIC_LINKING_ONLY /* protection macro */
+#ifndef LZ4_HC_SLO_098092834
+#define LZ4_HC_SLO_098092834
+
+#define LZ4_STATIC_LINKING_ONLY /* LZ4LIB_STATIC_API */
+#include "lz4.h"
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/*! LZ4_setCompressionLevel() : v1.8.0+ (experimental)
+ * It's possible to change compression level
+ * between successive invocations of LZ4_compress_HC_continue*()
+ * for dynamic adaptation.
+ */
+LZ4LIB_STATIC_API void LZ4_setCompressionLevel(
+ LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel);
+
+/*! LZ4_favorDecompressionSpeed() : v1.8.2+ (experimental)
+ * Opt. Parser will favor decompression speed over compression ratio.
+ * Only applicable to levels >= LZ4HC_CLEVEL_OPT_MIN.
+ */
+LZ4LIB_STATIC_API void LZ4_favorDecompressionSpeed(
+ LZ4_streamHC_t* LZ4_streamHCPtr, int favor);
+
+/*! LZ4_resetStreamHC_fast() : v1.9.0+
+ * When an LZ4_streamHC_t is known to be in a internally coherent state,
+ * it can often be prepared for a new compression with almost no work, only
+ * sometimes falling back to the full, expensive reset that is always required
+ * when the stream is in an indeterminate state (i.e., the reset performed by
+ * LZ4_resetStreamHC()).
+ *
+ * LZ4_streamHCs are guaranteed to be in a valid state when:
+ * - returned from LZ4_createStreamHC()
+ * - reset by LZ4_resetStreamHC()
+ * - memset(stream, 0, sizeof(LZ4_streamHC_t))
+ * - the stream was in a valid state and was reset by LZ4_resetStreamHC_fast()
+ * - the stream was in a valid state and was then used in any compression call
+ * that returned success
+ * - the stream was in an indeterminate state and was used in a compression
+ * call that fully reset the state (LZ4_compress_HC_extStateHC()) and that
+ * returned success
+ *
+ * Note:
+ * A stream that was last used in a compression call that returned an error
+ * may be passed to this function. However, it will be fully reset, which will
+ * clear any existing history and settings from the context.
+ */
+LZ4LIB_STATIC_API void LZ4_resetStreamHC_fast(
+ LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel);
+
+/*! LZ4_compress_HC_extStateHC_fastReset() :
+ * A variant of LZ4_compress_HC_extStateHC().
+ *
+ * Using this variant avoids an expensive initialization step. It is only safe
+ * to call if the state buffer is known to be correctly initialized already
+ * (see above comment on LZ4_resetStreamHC_fast() for a definition of
+ * "correctly initialized"). From a high level, the difference is that this
+ * function initializes the provided state with a call to
+ * LZ4_resetStreamHC_fast() while LZ4_compress_HC_extStateHC() starts with a
+ * call to LZ4_resetStreamHC().
+ */
+LZ4LIB_STATIC_API int LZ4_compress_HC_extStateHC_fastReset (
+ void* state,
+ const char* src, char* dst,
+ int srcSize, int dstCapacity,
+ int compressionLevel);
+
+/*! LZ4_attach_HC_dictionary() :
+ * This is an experimental API that allows for the efficient use of a
+ * static dictionary many times.
+ *
+ * Rather than re-loading the dictionary buffer into a working context before
+ * each compression, or copying a pre-loaded dictionary's LZ4_streamHC_t into a
+ * working LZ4_streamHC_t, this function introduces a no-copy setup mechanism,
+ * in which the working stream references the dictionary stream in-place.
+ *
+ * Several assumptions are made about the state of the dictionary stream.
+ * Currently, only streams which have been prepared by LZ4_loadDictHC() should
+ * be expected to work.
+ *
+ * Alternatively, the provided dictionary stream pointer may be NULL, in which
+ * case any existing dictionary stream is unset.
+ *
+ * A dictionary should only be attached to a stream without any history (i.e.,
+ * a stream that has just been reset).
+ *
+ * The dictionary will remain attached to the working stream only for the
+ * current stream session. Calls to LZ4_resetStreamHC(_fast) will remove the
+ * dictionary context association from the working stream. The dictionary
+ * stream (and source buffer) must remain in-place / accessible / unchanged
+ * through the lifetime of the stream session.
+ */
+LZ4LIB_STATIC_API void LZ4_attach_HC_dictionary(
+ LZ4_streamHC_t *working_stream,
+ const LZ4_streamHC_t *dictionary_stream);
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* LZ4_HC_SLO_098092834 */
+#endif /* LZ4_HC_STATIC_LINKING_ONLY */
diff --git a/mfbt/lz4/xxhash.c b/mfbt/lz4/xxhash.c
new file mode 100644
index 0000000000..083b039d70
--- /dev/null
+++ b/mfbt/lz4/xxhash.c
@@ -0,0 +1,43 @@
+/*
+ * xxHash - Extremely Fast Hash algorithm
+ * Copyright (C) 2012-2021 Yann Collet
+ *
+ * BSD 2-Clause License (https://www.opensource.org/licenses/bsd-license.php)
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * You can contact the author at:
+ * - xxHash homepage: https://www.xxhash.com
+ * - xxHash source repository: https://github.com/Cyan4973/xxHash
+ */
+
+
+/*
+ * xxhash.c instantiates functions defined in xxhash.h
+ */
+
+#define XXH_STATIC_LINKING_ONLY /* access advanced declarations */
+#define XXH_IMPLEMENTATION /* access definitions */
+
+#include "xxhash.h"
diff --git a/mfbt/lz4/xxhash.h b/mfbt/lz4/xxhash.h
new file mode 100644
index 0000000000..a18e8c762d
--- /dev/null
+++ b/mfbt/lz4/xxhash.h
@@ -0,0 +1,6773 @@
+/*
+ * xxHash - Extremely Fast Hash algorithm
+ * Header File
+ * Copyright (C) 2012-2021 Yann Collet
+ *
+ * BSD 2-Clause License (https://www.opensource.org/licenses/bsd-license.php)
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * You can contact the author at:
+ * - xxHash homepage: https://www.xxhash.com
+ * - xxHash source repository: https://github.com/Cyan4973/xxHash
+ */
+
+/*!
+ * @mainpage xxHash
+ *
+ * xxHash is an extremely fast non-cryptographic hash algorithm, working at RAM speed
+ * limits.
+ *
+ * It is proposed in four flavors, in three families:
+ * 1. @ref XXH32_family
+ * - Classic 32-bit hash function. Simple, compact, and runs on almost all
+ * 32-bit and 64-bit systems.
+ * 2. @ref XXH64_family
+ * - Classic 64-bit adaptation of XXH32. Just as simple, and runs well on most
+ * 64-bit systems (but _not_ 32-bit systems).
+ * 3. @ref XXH3_family
+ * - Modern 64-bit and 128-bit hash function family which features improved
+ * strength and performance across the board, especially on smaller data.
+ * It benefits greatly from SIMD and 64-bit without requiring it.
+ *
+ * Benchmarks
+ * ---
+ * The reference system uses an Intel i7-9700K CPU, and runs Ubuntu x64 20.04.
+ * The open source benchmark program is compiled with clang v10.0 using -O3 flag.
+ *
+ * | Hash Name | ISA ext | Width | Large Data Speed | Small Data Velocity |
+ * | -------------------- | ------- | ----: | ---------------: | ------------------: |
+ * | XXH3_64bits() | @b AVX2 | 64 | 59.4 GB/s | 133.1 |
+ * | MeowHash | AES-NI | 128 | 58.2 GB/s | 52.5 |
+ * | XXH3_128bits() | @b AVX2 | 128 | 57.9 GB/s | 118.1 |
+ * | CLHash | PCLMUL | 64 | 37.1 GB/s | 58.1 |
+ * | XXH3_64bits() | @b SSE2 | 64 | 31.5 GB/s | 133.1 |
+ * | XXH3_128bits() | @b SSE2 | 128 | 29.6 GB/s | 118.1 |
+ * | RAM sequential read | | N/A | 28.0 GB/s | N/A |
+ * | ahash | AES-NI | 64 | 22.5 GB/s | 107.2 |
+ * | City64 | | 64 | 22.0 GB/s | 76.6 |
+ * | T1ha2 | | 64 | 22.0 GB/s | 99.0 |
+ * | City128 | | 128 | 21.7 GB/s | 57.7 |
+ * | FarmHash | AES-NI | 64 | 21.3 GB/s | 71.9 |
+ * | XXH64() | | 64 | 19.4 GB/s | 71.0 |
+ * | SpookyHash | | 64 | 19.3 GB/s | 53.2 |
+ * | Mum | | 64 | 18.0 GB/s | 67.0 |
+ * | CRC32C | SSE4.2 | 32 | 13.0 GB/s | 57.9 |
+ * | XXH32() | | 32 | 9.7 GB/s | 71.9 |
+ * | City32 | | 32 | 9.1 GB/s | 66.0 |
+ * | Blake3* | @b AVX2 | 256 | 4.4 GB/s | 8.1 |
+ * | Murmur3 | | 32 | 3.9 GB/s | 56.1 |
+ * | SipHash* | | 64 | 3.0 GB/s | 43.2 |
+ * | Blake3* | @b SSE2 | 256 | 2.4 GB/s | 8.1 |
+ * | HighwayHash | | 64 | 1.4 GB/s | 6.0 |
+ * | FNV64 | | 64 | 1.2 GB/s | 62.7 |
+ * | Blake2* | | 256 | 1.1 GB/s | 5.1 |
+ * | SHA1* | | 160 | 0.8 GB/s | 5.6 |
+ * | MD5* | | 128 | 0.6 GB/s | 7.8 |
+ * @note
+ * - Hashes which require a specific ISA extension are noted. SSE2 is also noted,
+ * even though it is mandatory on x64.
+ * - Hashes with an asterisk are cryptographic. Note that MD5 is non-cryptographic
+ * by modern standards.
+ * - Small data velocity is a rough average of algorithm's efficiency for small
+ * data. For more accurate information, see the wiki.
+ * - More benchmarks and strength tests are found on the wiki:
+ * https://github.com/Cyan4973/xxHash/wiki
+ *
+ * Usage
+ * ------
+ * All xxHash variants use a similar API. Changing the algorithm is a trivial
+ * substitution.
+ *
+ * @pre
+ * For functions which take an input and length parameter, the following
+ * requirements are assumed:
+ * - The range from [`input`, `input + length`) is valid, readable memory.
+ * - The only exception is if the `length` is `0`, `input` may be `NULL`.
+ * - For C++, the objects must have the *TriviallyCopyable* property, as the
+ * functions access bytes directly as if it was an array of `unsigned char`.
+ *
+ * @anchor single_shot_example
+ * **Single Shot**
+ *
+ * These functions are stateless functions which hash a contiguous block of memory,
+ * immediately returning the result. They are the easiest and usually the fastest
+ * option.
+ *
+ * XXH32(), XXH64(), XXH3_64bits(), XXH3_128bits()
+ *
+ * @code{.c}
+ * #include <string.h>
+ * #include "xxhash.h"
+ *
+ * // Example for a function which hashes a null terminated string with XXH32().
+ * XXH32_hash_t hash_string(const char* string, XXH32_hash_t seed)
+ * {
+ * // NULL pointers are only valid if the length is zero
+ * size_t length = (string == NULL) ? 0 : strlen(string);
+ * return XXH32(string, length, seed);
+ * }
+ * @endcode
+ *
+ * @anchor streaming_example
+ * **Streaming**
+ *
+ * These groups of functions allow incremental hashing of unknown size, even
+ * more than what would fit in a size_t.
+ *
+ * XXH32_reset(), XXH64_reset(), XXH3_64bits_reset(), XXH3_128bits_reset()
+ *
+ * @code{.c}
+ * #include <stdio.h>
+ * #include <assert.h>
+ * #include "xxhash.h"
+ * // Example for a function which hashes a FILE incrementally with XXH3_64bits().
+ * XXH64_hash_t hashFile(FILE* f)
+ * {
+ * // Allocate a state struct. Do not just use malloc() or new.
+ * XXH3_state_t* state = XXH3_createState();
+ * assert(state != NULL && "Out of memory!");
+ * // Reset the state to start a new hashing session.
+ * XXH3_64bits_reset(state);
+ * char buffer[4096];
+ * size_t count;
+ * // Read the file in chunks
+ * while ((count = fread(buffer, 1, sizeof(buffer), f)) != 0) {
+ * // Run update() as many times as necessary to process the data
+ * XXH3_64bits_update(state, buffer, count);
+ * }
+ * // Retrieve the finalized hash. This will not change the state.
+ * XXH64_hash_t result = XXH3_64bits_digest(state);
+ * // Free the state. Do not use free().
+ * XXH3_freeState(state);
+ * return result;
+ * }
+ * @endcode
+ *
+ * @file xxhash.h
+ * xxHash prototypes and implementation
+ */
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/* ****************************
+ * INLINE mode
+ ******************************/
+/*!
+ * @defgroup public Public API
+ * Contains details on the public xxHash functions.
+ * @{
+ */
+#ifdef XXH_DOXYGEN
+/*!
+ * @brief Gives access to internal state declaration, required for static allocation.
+ *
+ * Incompatible with dynamic linking, due to risks of ABI changes.
+ *
+ * Usage:
+ * @code{.c}
+ * #define XXH_STATIC_LINKING_ONLY
+ * #include "xxhash.h"
+ * @endcode
+ */
+# define XXH_STATIC_LINKING_ONLY
+/* Do not undef XXH_STATIC_LINKING_ONLY for Doxygen */
+
+/*!
+ * @brief Gives access to internal definitions.
+ *
+ * Usage:
+ * @code{.c}
+ * #define XXH_STATIC_LINKING_ONLY
+ * #define XXH_IMPLEMENTATION
+ * #include "xxhash.h"
+ * @endcode
+ */
+# define XXH_IMPLEMENTATION
+/* Do not undef XXH_IMPLEMENTATION for Doxygen */
+
+/*!
+ * @brief Exposes the implementation and marks all functions as `inline`.
+ *
+ * Use these build macros to inline xxhash into the target unit.
+ * Inlining improves performance on small inputs, especially when the length is
+ * expressed as a compile-time constant:
+ *
+ * https://fastcompression.blogspot.com/2018/03/xxhash-for-small-keys-impressive-power.html
+ *
+ * It also keeps xxHash symbols private to the unit, so they are not exported.
+ *
+ * Usage:
+ * @code{.c}
+ * #define XXH_INLINE_ALL
+ * #include "xxhash.h"
+ * @endcode
+ * Do not compile and link xxhash.o as a separate object, as it is not useful.
+ */
+# define XXH_INLINE_ALL
+# undef XXH_INLINE_ALL
+/*!
+ * @brief Exposes the implementation without marking functions as inline.
+ */
+# define XXH_PRIVATE_API
+# undef XXH_PRIVATE_API
+/*!
+ * @brief Emulate a namespace by transparently prefixing all symbols.
+ *
+ * If you want to include _and expose_ xxHash functions from within your own
+ * library, but also want to avoid symbol collisions with other libraries which
+ * may also include xxHash, you can use @ref XXH_NAMESPACE to automatically prefix
+ * any public symbol from xxhash library with the value of @ref XXH_NAMESPACE
+ * (therefore, avoid empty or numeric values).
+ *
+ * Note that no change is required within the calling program as long as it
+ * includes `xxhash.h`: Regular symbol names will be automatically translated
+ * by this header.
+ */
+# define XXH_NAMESPACE /* YOUR NAME HERE */
+# undef XXH_NAMESPACE
+#endif
+
+#if (defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API)) \
+ && !defined(XXH_INLINE_ALL_31684351384)
+ /* this section should be traversed only once */
+# define XXH_INLINE_ALL_31684351384
+ /* give access to the advanced API, required to compile implementations */
+# undef XXH_STATIC_LINKING_ONLY /* avoid macro redef */
+# define XXH_STATIC_LINKING_ONLY
+ /* make all functions private */
+# undef XXH_PUBLIC_API
+# if defined(__GNUC__)
+# define XXH_PUBLIC_API static __inline __attribute__((unused))
+# elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+# define XXH_PUBLIC_API static inline
+# elif defined(_MSC_VER)
+# define XXH_PUBLIC_API static __inline
+# else
+ /* note: this version may generate warnings for unused static functions */
+# define XXH_PUBLIC_API static
+# endif
+
+ /*
+ * This part deals with the special case where a unit wants to inline xxHash,
+ * but "xxhash.h" has previously been included without XXH_INLINE_ALL,
+ * such as part of some previously included *.h header file.
+ * Without further action, the new include would just be ignored,
+ * and functions would effectively _not_ be inlined (silent failure).
+ * The following macros solve this situation by prefixing all inlined names,
+ * avoiding naming collision with previous inclusions.
+ */
+ /* Before that, we unconditionally #undef all symbols,
+ * in case they were already defined with XXH_NAMESPACE.
+ * They will then be redefined for XXH_INLINE_ALL
+ */
+# undef XXH_versionNumber
+ /* XXH32 */
+# undef XXH32
+# undef XXH32_createState
+# undef XXH32_freeState
+# undef XXH32_reset
+# undef XXH32_update
+# undef XXH32_digest
+# undef XXH32_copyState
+# undef XXH32_canonicalFromHash
+# undef XXH32_hashFromCanonical
+ /* XXH64 */
+# undef XXH64
+# undef XXH64_createState
+# undef XXH64_freeState
+# undef XXH64_reset
+# undef XXH64_update
+# undef XXH64_digest
+# undef XXH64_copyState
+# undef XXH64_canonicalFromHash
+# undef XXH64_hashFromCanonical
+ /* XXH3_64bits */
+# undef XXH3_64bits
+# undef XXH3_64bits_withSecret
+# undef XXH3_64bits_withSeed
+# undef XXH3_64bits_withSecretandSeed
+# undef XXH3_createState
+# undef XXH3_freeState
+# undef XXH3_copyState
+# undef XXH3_64bits_reset
+# undef XXH3_64bits_reset_withSeed
+# undef XXH3_64bits_reset_withSecret
+# undef XXH3_64bits_update
+# undef XXH3_64bits_digest
+# undef XXH3_generateSecret
+ /* XXH3_128bits */
+# undef XXH128
+# undef XXH3_128bits
+# undef XXH3_128bits_withSeed
+# undef XXH3_128bits_withSecret
+# undef XXH3_128bits_reset
+# undef XXH3_128bits_reset_withSeed
+# undef XXH3_128bits_reset_withSecret
+# undef XXH3_128bits_reset_withSecretandSeed
+# undef XXH3_128bits_update
+# undef XXH3_128bits_digest
+# undef XXH128_isEqual
+# undef XXH128_cmp
+# undef XXH128_canonicalFromHash
+# undef XXH128_hashFromCanonical
+ /* Finally, free the namespace itself */
+# undef XXH_NAMESPACE
+
+ /* employ the namespace for XXH_INLINE_ALL */
+# define XXH_NAMESPACE XXH_INLINE_
+ /*
+ * Some identifiers (enums, type names) are not symbols,
+ * but they must nonetheless be renamed to avoid redeclaration.
+ * Alternative solution: do not redeclare them.
+ * However, this requires some #ifdefs, and has a more dispersed impact.
+ * Meanwhile, renaming can be achieved in a single place.
+ */
+# define XXH_IPREF(Id) XXH_NAMESPACE ## Id
+# define XXH_OK XXH_IPREF(XXH_OK)
+# define XXH_ERROR XXH_IPREF(XXH_ERROR)
+# define XXH_errorcode XXH_IPREF(XXH_errorcode)
+# define XXH32_canonical_t XXH_IPREF(XXH32_canonical_t)
+# define XXH64_canonical_t XXH_IPREF(XXH64_canonical_t)
+# define XXH128_canonical_t XXH_IPREF(XXH128_canonical_t)
+# define XXH32_state_s XXH_IPREF(XXH32_state_s)
+# define XXH32_state_t XXH_IPREF(XXH32_state_t)
+# define XXH64_state_s XXH_IPREF(XXH64_state_s)
+# define XXH64_state_t XXH_IPREF(XXH64_state_t)
+# define XXH3_state_s XXH_IPREF(XXH3_state_s)
+# define XXH3_state_t XXH_IPREF(XXH3_state_t)
+# define XXH128_hash_t XXH_IPREF(XXH128_hash_t)
+ /* Ensure the header is parsed again, even if it was previously included */
+# undef XXHASH_H_5627135585666179
+# undef XXHASH_H_STATIC_13879238742
+#endif /* XXH_INLINE_ALL || XXH_PRIVATE_API */
+
+/* ****************************************************************
+ * Stable API
+ *****************************************************************/
+#ifndef XXHASH_H_5627135585666179
+#define XXHASH_H_5627135585666179 1
+
+/*! @brief Marks a global symbol. */
+#if !defined(XXH_INLINE_ALL) && !defined(XXH_PRIVATE_API)
+# if defined(WIN32) && defined(_MSC_VER) && (defined(XXH_IMPORT) || defined(XXH_EXPORT))
+# ifdef XXH_EXPORT
+# define XXH_PUBLIC_API __declspec(dllexport)
+# elif XXH_IMPORT
+# define XXH_PUBLIC_API __declspec(dllimport)
+# endif
+# else
+# define XXH_PUBLIC_API /* do nothing */
+# endif
+#endif
+
+#ifdef XXH_NAMESPACE
+# define XXH_CAT(A,B) A##B
+# define XXH_NAME2(A,B) XXH_CAT(A,B)
+# define XXH_versionNumber XXH_NAME2(XXH_NAMESPACE, XXH_versionNumber)
+/* XXH32 */
+# define XXH32 XXH_NAME2(XXH_NAMESPACE, XXH32)
+# define XXH32_createState XXH_NAME2(XXH_NAMESPACE, XXH32_createState)
+# define XXH32_freeState XXH_NAME2(XXH_NAMESPACE, XXH32_freeState)
+# define XXH32_reset XXH_NAME2(XXH_NAMESPACE, XXH32_reset)
+# define XXH32_update XXH_NAME2(XXH_NAMESPACE, XXH32_update)
+# define XXH32_digest XXH_NAME2(XXH_NAMESPACE, XXH32_digest)
+# define XXH32_copyState XXH_NAME2(XXH_NAMESPACE, XXH32_copyState)
+# define XXH32_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH32_canonicalFromHash)
+# define XXH32_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH32_hashFromCanonical)
+/* XXH64 */
+# define XXH64 XXH_NAME2(XXH_NAMESPACE, XXH64)
+# define XXH64_createState XXH_NAME2(XXH_NAMESPACE, XXH64_createState)
+# define XXH64_freeState XXH_NAME2(XXH_NAMESPACE, XXH64_freeState)
+# define XXH64_reset XXH_NAME2(XXH_NAMESPACE, XXH64_reset)
+# define XXH64_update XXH_NAME2(XXH_NAMESPACE, XXH64_update)
+# define XXH64_digest XXH_NAME2(XXH_NAMESPACE, XXH64_digest)
+# define XXH64_copyState XXH_NAME2(XXH_NAMESPACE, XXH64_copyState)
+# define XXH64_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH64_canonicalFromHash)
+# define XXH64_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH64_hashFromCanonical)
+/* XXH3_64bits */
+# define XXH3_64bits XXH_NAME2(XXH_NAMESPACE, XXH3_64bits)
+# define XXH3_64bits_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_withSecret)
+# define XXH3_64bits_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_withSeed)
+# define XXH3_64bits_withSecretandSeed XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_withSecretandSeed)
+# define XXH3_createState XXH_NAME2(XXH_NAMESPACE, XXH3_createState)
+# define XXH3_freeState XXH_NAME2(XXH_NAMESPACE, XXH3_freeState)
+# define XXH3_copyState XXH_NAME2(XXH_NAMESPACE, XXH3_copyState)
+# define XXH3_64bits_reset XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset)
+# define XXH3_64bits_reset_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset_withSeed)
+# define XXH3_64bits_reset_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset_withSecret)
+# define XXH3_64bits_reset_withSecretandSeed XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset_withSecretandSeed)
+# define XXH3_64bits_update XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_update)
+# define XXH3_64bits_digest XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_digest)
+# define XXH3_generateSecret XXH_NAME2(XXH_NAMESPACE, XXH3_generateSecret)
+# define XXH3_generateSecret_fromSeed XXH_NAME2(XXH_NAMESPACE, XXH3_generateSecret_fromSeed)
+/* XXH3_128bits */
+# define XXH128 XXH_NAME2(XXH_NAMESPACE, XXH128)
+# define XXH3_128bits XXH_NAME2(XXH_NAMESPACE, XXH3_128bits)
+# define XXH3_128bits_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_withSeed)
+# define XXH3_128bits_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_withSecret)
+# define XXH3_128bits_withSecretandSeed XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_withSecretandSeed)
+# define XXH3_128bits_reset XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_reset)
+# define XXH3_128bits_reset_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_reset_withSeed)
+# define XXH3_128bits_reset_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_reset_withSecret)
+# define XXH3_128bits_reset_withSecretandSeed XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_reset_withSecretandSeed)
+# define XXH3_128bits_update XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_update)
+# define XXH3_128bits_digest XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_digest)
+# define XXH128_isEqual XXH_NAME2(XXH_NAMESPACE, XXH128_isEqual)
+# define XXH128_cmp XXH_NAME2(XXH_NAMESPACE, XXH128_cmp)
+# define XXH128_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH128_canonicalFromHash)
+# define XXH128_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH128_hashFromCanonical)
+#endif
+
+
+/* *************************************
+* Compiler specifics
+***************************************/
+
+/* specific declaration modes for Windows */
+#if !defined(XXH_INLINE_ALL) && !defined(XXH_PRIVATE_API)
+# if defined(WIN32) && defined(_MSC_VER) && (defined(XXH_IMPORT) || defined(XXH_EXPORT))
+# ifdef XXH_EXPORT
+# define XXH_PUBLIC_API __declspec(dllexport)
+# elif XXH_IMPORT
+# define XXH_PUBLIC_API __declspec(dllimport)
+# endif
+# else
+# define XXH_PUBLIC_API /* do nothing */
+# endif
+#endif
+
+#if defined (__GNUC__)
+# define XXH_CONSTF __attribute__((const))
+# define XXH_PUREF __attribute__((pure))
+# define XXH_MALLOCF __attribute__((malloc))
+#else
+# define XXH_CONSTF /* disable */
+# define XXH_PUREF
+# define XXH_MALLOCF
+#endif
+
+/* *************************************
+* Version
+***************************************/
+#define XXH_VERSION_MAJOR 0
+#define XXH_VERSION_MINOR 8
+#define XXH_VERSION_RELEASE 2
+/*! @brief Version number, encoded as two digits each */
+#define XXH_VERSION_NUMBER (XXH_VERSION_MAJOR *100*100 + XXH_VERSION_MINOR *100 + XXH_VERSION_RELEASE)
+
+/*!
+ * @brief Obtains the xxHash version.
+ *
+ * This is mostly useful when xxHash is compiled as a shared library,
+ * since the returned value comes from the library, as opposed to header file.
+ *
+ * @return @ref XXH_VERSION_NUMBER of the invoked library.
+ */
+XXH_PUBLIC_API XXH_CONSTF unsigned XXH_versionNumber (void);
+
+
+/* ****************************
+* Common basic types
+******************************/
+#include <stddef.h> /* size_t */
+/*!
+ * @brief Exit code for the streaming API.
+ */
+typedef enum {
+ XXH_OK = 0, /*!< OK */
+ XXH_ERROR /*!< Error */
+} XXH_errorcode;
+
+
+/*-**********************************************************************
+* 32-bit hash
+************************************************************************/
+#if defined(XXH_DOXYGEN) /* Don't show <stdint.h> include */
+/*!
+ * @brief An unsigned 32-bit integer.
+ *
+ * Not necessarily defined to `uint32_t` but functionally equivalent.
+ */
+typedef uint32_t XXH32_hash_t;
+
+#elif !defined (__VMS) \
+ && (defined (__cplusplus) \
+ || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
+# include <stdint.h>
+ typedef uint32_t XXH32_hash_t;
+
+#else
+# include <limits.h>
+# if UINT_MAX == 0xFFFFFFFFUL
+ typedef unsigned int XXH32_hash_t;
+# elif ULONG_MAX == 0xFFFFFFFFUL
+ typedef unsigned long XXH32_hash_t;
+# else
+# error "unsupported platform: need a 32-bit type"
+# endif
+#endif
+
+/*!
+ * @}
+ *
+ * @defgroup XXH32_family XXH32 family
+ * @ingroup public
+ * Contains functions used in the classic 32-bit xxHash algorithm.
+ *
+ * @note
+ * XXH32 is useful for older platforms, with no or poor 64-bit performance.
+ * Note that the @ref XXH3_family provides competitive speed for both 32-bit
+ * and 64-bit systems, and offers true 64/128 bit hash results.
+ *
+ * @see @ref XXH64_family, @ref XXH3_family : Other xxHash families
+ * @see @ref XXH32_impl for implementation details
+ * @{
+ */
+
+/*!
+ * @brief Calculates the 32-bit hash of @p input using xxHash32.
+ *
+ * Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark): 5.4 GB/s
+ *
+ * See @ref single_shot_example "Single Shot Example" for an example.
+ *
+ * @param input The block of data to be hashed, at least @p length bytes in size.
+ * @param length The length of @p input, in bytes.
+ * @param seed The 32-bit seed to alter the hash's output predictably.
+ *
+ * @pre
+ * The memory between @p input and @p input + @p length must be valid,
+ * readable, contiguous memory. However, if @p length is `0`, @p input may be
+ * `NULL`. In C++, this also must be *TriviallyCopyable*.
+ *
+ * @return The calculated 32-bit hash value.
+ *
+ * @see
+ * XXH64(), XXH3_64bits_withSeed(), XXH3_128bits_withSeed(), XXH128():
+ * Direct equivalents for the other variants of xxHash.
+ * @see
+ * XXH32_createState(), XXH32_update(), XXH32_digest(): Streaming version.
+ */
+XXH_PUBLIC_API XXH_PUREF XXH32_hash_t XXH32 (const void* input, size_t length, XXH32_hash_t seed);
+
+#ifndef XXH_NO_STREAM
+/*!
+ * Streaming functions generate the xxHash value from an incremental input.
+ * This method is slower than single-call functions, due to state management.
+ * For small inputs, prefer `XXH32()` and `XXH64()`, which are better optimized.
+ *
+ * An XXH state must first be allocated using `XXH*_createState()`.
+ *
+ * Start a new hash by initializing the state with a seed using `XXH*_reset()`.
+ *
+ * Then, feed the hash state by calling `XXH*_update()` as many times as necessary.
+ *
+ * The function returns an error code, with 0 meaning OK, and any other value
+ * meaning there is an error.
+ *
+ * Finally, a hash value can be produced anytime, by using `XXH*_digest()`.
+ * This function returns the nn-bits hash as an int or long long.
+ *
+ * It's still possible to continue inserting input into the hash state after a
+ * digest, and generate new hash values later on by invoking `XXH*_digest()`.
+ *
+ * When done, release the state using `XXH*_freeState()`.
+ *
+ * @see streaming_example at the top of @ref xxhash.h for an example.
+ */
+
+/*!
+ * @typedef struct XXH32_state_s XXH32_state_t
+ * @brief The opaque state struct for the XXH32 streaming API.
+ *
+ * @see XXH32_state_s for details.
+ */
+typedef struct XXH32_state_s XXH32_state_t;
+
+/*!
+ * @brief Allocates an @ref XXH32_state_t.
+ *
+ * Must be freed with XXH32_freeState().
+ * @return An allocated XXH32_state_t on success, `NULL` on failure.
+ */
+XXH_PUBLIC_API XXH_MALLOCF XXH32_state_t* XXH32_createState(void);
+/*!
+ * @brief Frees an @ref XXH32_state_t.
+ *
+ * Must be allocated with XXH32_createState().
+ * @param statePtr A pointer to an @ref XXH32_state_t allocated with @ref XXH32_createState().
+ * @return XXH_OK.
+ */
+XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr);
+/*!
+ * @brief Copies one @ref XXH32_state_t to another.
+ *
+ * @param dst_state The state to copy to.
+ * @param src_state The state to copy from.
+ * @pre
+ * @p dst_state and @p src_state must not be `NULL` and must not overlap.
+ */
+XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* dst_state, const XXH32_state_t* src_state);
+
+/*!
+ * @brief Resets an @ref XXH32_state_t to begin a new hash.
+ *
+ * This function resets and seeds a state. Call it before @ref XXH32_update().
+ *
+ * @param statePtr The state struct to reset.
+ * @param seed The 32-bit seed to alter the hash result predictably.
+ *
+ * @pre
+ * @p statePtr must not be `NULL`.
+ *
+ * @return @ref XXH_OK on success, @ref XXH_ERROR on failure.
+ */
+XXH_PUBLIC_API XXH_errorcode XXH32_reset (XXH32_state_t* statePtr, XXH32_hash_t seed);
+
+/*!
+ * @brief Consumes a block of @p input to an @ref XXH32_state_t.
+ *
+ * Call this to incrementally consume blocks of data.
+ *
+ * @param statePtr The state struct to update.
+ * @param input The block of data to be hashed, at least @p length bytes in size.
+ * @param length The length of @p input, in bytes.
+ *
+ * @pre
+ * @p statePtr must not be `NULL`.
+ * @pre
+ * The memory between @p input and @p input + @p length must be valid,
+ * readable, contiguous memory. However, if @p length is `0`, @p input may be
+ * `NULL`. In C++, this also must be *TriviallyCopyable*.
+ *
+ * @return @ref XXH_OK on success, @ref XXH_ERROR on failure.
+ */
+XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void* input, size_t length);
+
+/*!
+ * @brief Returns the calculated hash value from an @ref XXH32_state_t.
+ *
+ * @note
+ * Calling XXH32_digest() will not affect @p statePtr, so you can update,
+ * digest, and update again.
+ *
+ * @param statePtr The state struct to calculate the hash from.
+ *
+ * @pre
+ * @p statePtr must not be `NULL`.
+ *
+ * @return The calculated xxHash32 value from that state.
+ */
+XXH_PUBLIC_API XXH_PUREF XXH32_hash_t XXH32_digest (const XXH32_state_t* statePtr);
+#endif /* !XXH_NO_STREAM */
+
+/******* Canonical representation *******/
+
+/*
+ * The default return values from XXH functions are unsigned 32 and 64 bit
+ * integers.
+ * This the simplest and fastest format for further post-processing.
+ *
+ * However, this leaves open the question of what is the order on the byte level,
+ * since little and big endian conventions will store the same number differently.
+ *
+ * The canonical representation settles this issue by mandating big-endian
+ * convention, the same convention as human-readable numbers (large digits first).
+ *
+ * When writing hash values to storage, sending them over a network, or printing
+ * them, it's highly recommended to use the canonical representation to ensure
+ * portability across a wider range of systems, present and future.
+ *
+ * The following functions allow transformation of hash values to and from
+ * canonical format.
+ */
+
+/*!
+ * @brief Canonical (big endian) representation of @ref XXH32_hash_t.
+ */
+typedef struct {
+ unsigned char digest[4]; /*!< Hash bytes, big endian */
+} XXH32_canonical_t;
+
+/*!
+ * @brief Converts an @ref XXH32_hash_t to a big endian @ref XXH32_canonical_t.
+ *
+ * @param dst The @ref XXH32_canonical_t pointer to be stored to.
+ * @param hash The @ref XXH32_hash_t to be converted.
+ *
+ * @pre
+ * @p dst must not be `NULL`.
+ */
+XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash);
+
+/*!
+ * @brief Converts an @ref XXH32_canonical_t to a native @ref XXH32_hash_t.
+ *
+ * @param src The @ref XXH32_canonical_t to convert.
+ *
+ * @pre
+ * @p src must not be `NULL`.
+ *
+ * @return The converted hash.
+ */
+XXH_PUBLIC_API XXH_PUREF XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src);
+
+
+/*! @cond Doxygen ignores this part */
+#ifdef __has_attribute
+# define XXH_HAS_ATTRIBUTE(x) __has_attribute(x)
+#else
+# define XXH_HAS_ATTRIBUTE(x) 0
+#endif
+/*! @endcond */
+
+/*! @cond Doxygen ignores this part */
+/*
+ * C23 __STDC_VERSION__ number hasn't been specified yet. For now
+ * leave as `201711L` (C17 + 1).
+ * TODO: Update to correct value when its been specified.
+ */
+#define XXH_C23_VN 201711L
+/*! @endcond */
+
+/*! @cond Doxygen ignores this part */
+/* C-language Attributes are added in C23. */
+#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= XXH_C23_VN) && defined(__has_c_attribute)
+# define XXH_HAS_C_ATTRIBUTE(x) __has_c_attribute(x)
+#else
+# define XXH_HAS_C_ATTRIBUTE(x) 0
+#endif
+/*! @endcond */
+
+/*! @cond Doxygen ignores this part */
+#if defined(__cplusplus) && defined(__has_cpp_attribute)
+# define XXH_HAS_CPP_ATTRIBUTE(x) __has_cpp_attribute(x)
+#else
+# define XXH_HAS_CPP_ATTRIBUTE(x) 0
+#endif
+/*! @endcond */
+
+/*! @cond Doxygen ignores this part */
+/*
+ * Define XXH_FALLTHROUGH macro for annotating switch case with the 'fallthrough' attribute
+ * introduced in CPP17 and C23.
+ * CPP17 : https://en.cppreference.com/w/cpp/language/attributes/fallthrough
+ * C23 : https://en.cppreference.com/w/c/language/attributes/fallthrough
+ */
+#if XXH_HAS_C_ATTRIBUTE(fallthrough) || XXH_HAS_CPP_ATTRIBUTE(fallthrough)
+# define XXH_FALLTHROUGH [[fallthrough]]
+#elif XXH_HAS_ATTRIBUTE(__fallthrough__)
+# define XXH_FALLTHROUGH __attribute__ ((__fallthrough__))
+#else
+# define XXH_FALLTHROUGH /* fallthrough */
+#endif
+/*! @endcond */
+
+/*! @cond Doxygen ignores this part */
+/*
+ * Define XXH_NOESCAPE for annotated pointers in public API.
+ * https://clang.llvm.org/docs/AttributeReference.html#noescape
+ * As of writing this, only supported by clang.
+ */
+#if XXH_HAS_ATTRIBUTE(noescape)
+# define XXH_NOESCAPE __attribute__((noescape))
+#else
+# define XXH_NOESCAPE
+#endif
+/*! @endcond */
+
+
+/*!
+ * @}
+ * @ingroup public
+ * @{
+ */
+
+#ifndef XXH_NO_LONG_LONG
+/*-**********************************************************************
+* 64-bit hash
+************************************************************************/
+#if defined(XXH_DOXYGEN) /* don't include <stdint.h> */
+/*!
+ * @brief An unsigned 64-bit integer.
+ *
+ * Not necessarily defined to `uint64_t` but functionally equivalent.
+ */
+typedef uint64_t XXH64_hash_t;
+#elif !defined (__VMS) \
+ && (defined (__cplusplus) \
+ || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
+# include <stdint.h>
+ typedef uint64_t XXH64_hash_t;
+#else
+# include <limits.h>
+# if defined(__LP64__) && ULONG_MAX == 0xFFFFFFFFFFFFFFFFULL
+ /* LP64 ABI says uint64_t is unsigned long */
+ typedef unsigned long XXH64_hash_t;
+# else
+ /* the following type must have a width of 64-bit */
+ typedef unsigned long long XXH64_hash_t;
+# endif
+#endif
+
+/*!
+ * @}
+ *
+ * @defgroup XXH64_family XXH64 family
+ * @ingroup public
+ * @{
+ * Contains functions used in the classic 64-bit xxHash algorithm.
+ *
+ * @note
+ * XXH3 provides competitive speed for both 32-bit and 64-bit systems,
+ * and offers true 64/128 bit hash results.
+ * It provides better speed for systems with vector processing capabilities.
+ */
+
+/*!
+ * @brief Calculates the 64-bit hash of @p input using xxHash64.
+ *
+ * This function usually runs faster on 64-bit systems, but slower on 32-bit
+ * systems (see benchmark).
+ *
+ * @param input The block of data to be hashed, at least @p length bytes in size.
+ * @param length The length of @p input, in bytes.
+ * @param seed The 64-bit seed to alter the hash's output predictably.
+ *
+ * @pre
+ * The memory between @p input and @p input + @p length must be valid,
+ * readable, contiguous memory. However, if @p length is `0`, @p input may be
+ * `NULL`. In C++, this also must be *TriviallyCopyable*.
+ *
+ * @return The calculated 64-bit hash.
+ *
+ * @see
+ * XXH32(), XXH3_64bits_withSeed(), XXH3_128bits_withSeed(), XXH128():
+ * Direct equivalents for the other variants of xxHash.
+ * @see
+ * XXH64_createState(), XXH64_update(), XXH64_digest(): Streaming version.
+ */
+XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH64(XXH_NOESCAPE const void* input, size_t length, XXH64_hash_t seed);
+
+/******* Streaming *******/
+#ifndef XXH_NO_STREAM
+/*!
+ * @brief The opaque state struct for the XXH64 streaming API.
+ *
+ * @see XXH64_state_s for details.
+ */
+typedef struct XXH64_state_s XXH64_state_t; /* incomplete type */
+
+/*!
+ * @brief Allocates an @ref XXH64_state_t.
+ *
+ * Must be freed with XXH64_freeState().
+ * @return An allocated XXH64_state_t on success, `NULL` on failure.
+ */
+XXH_PUBLIC_API XXH_MALLOCF XXH64_state_t* XXH64_createState(void);
+
+/*!
+ * @brief Frees an @ref XXH64_state_t.
+ *
+ * Must be allocated with XXH64_createState().
+ * @param statePtr A pointer to an @ref XXH64_state_t allocated with @ref XXH64_createState().
+ * @return XXH_OK.
+ */
+XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr);
+
+/*!
+ * @brief Copies one @ref XXH64_state_t to another.
+ *
+ * @param dst_state The state to copy to.
+ * @param src_state The state to copy from.
+ * @pre
+ * @p dst_state and @p src_state must not be `NULL` and must not overlap.
+ */
+XXH_PUBLIC_API void XXH64_copyState(XXH_NOESCAPE XXH64_state_t* dst_state, const XXH64_state_t* src_state);
+
+/*!
+ * @brief Resets an @ref XXH64_state_t to begin a new hash.
+ *
+ * This function resets and seeds a state. Call it before @ref XXH64_update().
+ *
+ * @param statePtr The state struct to reset.
+ * @param seed The 64-bit seed to alter the hash result predictably.
+ *
+ * @pre
+ * @p statePtr must not be `NULL`.
+ *
+ * @return @ref XXH_OK on success, @ref XXH_ERROR on failure.
+ */
+XXH_PUBLIC_API XXH_errorcode XXH64_reset (XXH_NOESCAPE XXH64_state_t* statePtr, XXH64_hash_t seed);
+
+/*!
+ * @brief Consumes a block of @p input to an @ref XXH64_state_t.
+ *
+ * Call this to incrementally consume blocks of data.
+ *
+ * @param statePtr The state struct to update.
+ * @param input The block of data to be hashed, at least @p length bytes in size.
+ * @param length The length of @p input, in bytes.
+ *
+ * @pre
+ * @p statePtr must not be `NULL`.
+ * @pre
+ * The memory between @p input and @p input + @p length must be valid,
+ * readable, contiguous memory. However, if @p length is `0`, @p input may be
+ * `NULL`. In C++, this also must be *TriviallyCopyable*.
+ *
+ * @return @ref XXH_OK on success, @ref XXH_ERROR on failure.
+ */
+XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH_NOESCAPE XXH64_state_t* statePtr, XXH_NOESCAPE const void* input, size_t length);
+
+/*!
+ * @brief Returns the calculated hash value from an @ref XXH64_state_t.
+ *
+ * @note
+ * Calling XXH64_digest() will not affect @p statePtr, so you can update,
+ * digest, and update again.
+ *
+ * @param statePtr The state struct to calculate the hash from.
+ *
+ * @pre
+ * @p statePtr must not be `NULL`.
+ *
+ * @return The calculated xxHash64 value from that state.
+ */
+XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH64_digest (XXH_NOESCAPE const XXH64_state_t* statePtr);
+#endif /* !XXH_NO_STREAM */
+/******* Canonical representation *******/
+
+/*!
+ * @brief Canonical (big endian) representation of @ref XXH64_hash_t.
+ */
+typedef struct { unsigned char digest[sizeof(XXH64_hash_t)]; } XXH64_canonical_t;
+
+/*!
+ * @brief Converts an @ref XXH64_hash_t to a big endian @ref XXH64_canonical_t.
+ *
+ * @param dst The @ref XXH64_canonical_t pointer to be stored to.
+ * @param hash The @ref XXH64_hash_t to be converted.
+ *
+ * @pre
+ * @p dst must not be `NULL`.
+ */
+XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH_NOESCAPE XXH64_canonical_t* dst, XXH64_hash_t hash);
+
+/*!
+ * @brief Converts an @ref XXH64_canonical_t to a native @ref XXH64_hash_t.
+ *
+ * @param src The @ref XXH64_canonical_t to convert.
+ *
+ * @pre
+ * @p src must not be `NULL`.
+ *
+ * @return The converted hash.
+ */
+XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH64_hashFromCanonical(XXH_NOESCAPE const XXH64_canonical_t* src);
+
+#ifndef XXH_NO_XXH3
+
+/*!
+ * @}
+ * ************************************************************************
+ * @defgroup XXH3_family XXH3 family
+ * @ingroup public
+ * @{
+ *
+ * XXH3 is a more recent hash algorithm featuring:
+ * - Improved speed for both small and large inputs
+ * - True 64-bit and 128-bit outputs
+ * - SIMD acceleration
+ * - Improved 32-bit viability
+ *
+ * Speed analysis methodology is explained here:
+ *
+ * https://fastcompression.blogspot.com/2019/03/presenting-xxh3.html
+ *
+ * Compared to XXH64, expect XXH3 to run approximately
+ * ~2x faster on large inputs and >3x faster on small ones,
+ * exact differences vary depending on platform.
+ *
+ * XXH3's speed benefits greatly from SIMD and 64-bit arithmetic,
+ * but does not require it.
+ * Most 32-bit and 64-bit targets that can run XXH32 smoothly can run XXH3
+ * at competitive speeds, even without vector support. Further details are
+ * explained in the implementation.
+ *
+ * XXH3 has a fast scalar implementation, but it also includes accelerated SIMD
+ * implementations for many common platforms:
+ * - AVX512
+ * - AVX2
+ * - SSE2
+ * - ARM NEON
+ * - WebAssembly SIMD128
+ * - POWER8 VSX
+ * - s390x ZVector
+ * This can be controlled via the @ref XXH_VECTOR macro, but it automatically
+ * selects the best version according to predefined macros. For the x86 family, an
+ * automatic runtime dispatcher is included separately in @ref xxh_x86dispatch.c.
+ *
+ * XXH3 implementation is portable:
+ * it has a generic C90 formulation that can be compiled on any platform,
+ * all implementations generate exactly the same hash value on all platforms.
+ * Starting from v0.8.0, it's also labelled "stable", meaning that
+ * any future version will also generate the same hash value.
+ *
+ * XXH3 offers 2 variants, _64bits and _128bits.
+ *
+ * When only 64 bits are needed, prefer invoking the _64bits variant, as it
+ * reduces the amount of mixing, resulting in faster speed on small inputs.
+ * It's also generally simpler to manipulate a scalar return type than a struct.
+ *
+ * The API supports one-shot hashing, streaming mode, and custom secrets.
+ */
+/*-**********************************************************************
+* XXH3 64-bit variant
+************************************************************************/
+
+/*!
+ * @brief 64-bit unseeded variant of XXH3.
+ *
+ * This is equivalent to @ref XXH3_64bits_withSeed() with a seed of 0, however
+ * it may have slightly better performance due to constant propagation of the
+ * defaults.
+ *
+ * @see
+ * XXH32(), XXH64(), XXH3_128bits(): equivalent for the other xxHash algorithms
+ * @see
+ * XXH3_64bits_withSeed(), XXH3_64bits_withSecret(): other seeding variants
+ * @see
+ * XXH3_64bits_reset(), XXH3_64bits_update(), XXH3_64bits_digest(): Streaming version.
+ */
+XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH3_64bits(XXH_NOESCAPE const void* input, size_t length);
+
+/*!
+ * @brief 64-bit seeded variant of XXH3
+ *
+ * This variant generates a custom secret on the fly based on default secret
+ * altered using the `seed` value.
+ *
+ * While this operation is decently fast, note that it's not completely free.
+ *
+ * @note
+ * seed == 0 produces the same results as @ref XXH3_64bits().
+ *
+ * @param input The data to hash
+ * @param length The length
+ * @param seed The 64-bit seed to alter the state.
+ */
+XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH3_64bits_withSeed(XXH_NOESCAPE const void* input, size_t length, XXH64_hash_t seed);
+
+/*!
+ * The bare minimum size for a custom secret.
+ *
+ * @see
+ * XXH3_64bits_withSecret(), XXH3_64bits_reset_withSecret(),
+ * XXH3_128bits_withSecret(), XXH3_128bits_reset_withSecret().
+ */
+#define XXH3_SECRET_SIZE_MIN 136
+
+/*!
+ * @brief 64-bit variant of XXH3 with a custom "secret".
+ *
+ * It's possible to provide any blob of bytes as a "secret" to generate the hash.
+ * This makes it more difficult for an external actor to prepare an intentional collision.
+ * The main condition is that secretSize *must* be large enough (>= XXH3_SECRET_SIZE_MIN).
+ * However, the quality of the secret impacts the dispersion of the hash algorithm.
+ * Therefore, the secret _must_ look like a bunch of random bytes.
+ * Avoid "trivial" or structured data such as repeated sequences or a text document.
+ * Whenever in doubt about the "randomness" of the blob of bytes,
+ * consider employing "XXH3_generateSecret()" instead (see below).
+ * It will generate a proper high entropy secret derived from the blob of bytes.
+ * Another advantage of using XXH3_generateSecret() is that
+ * it guarantees that all bits within the initial blob of bytes
+ * will impact every bit of the output.
+ * This is not necessarily the case when using the blob of bytes directly
+ * because, when hashing _small_ inputs, only a portion of the secret is employed.
+ */
+XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH3_64bits_withSecret(XXH_NOESCAPE const void* data, size_t len, XXH_NOESCAPE const void* secret, size_t secretSize);
+
+
+/******* Streaming *******/
+#ifndef XXH_NO_STREAM
+/*
+ * Streaming requires state maintenance.
+ * This operation costs memory and CPU.
+ * As a consequence, streaming is slower than one-shot hashing.
+ * For better performance, prefer one-shot functions whenever applicable.
+ */
+
+/*!
+ * @brief The state struct for the XXH3 streaming API.
+ *
+ * @see XXH3_state_s for details.
+ */
+typedef struct XXH3_state_s XXH3_state_t;
+XXH_PUBLIC_API XXH_MALLOCF XXH3_state_t* XXH3_createState(void);
+XXH_PUBLIC_API XXH_errorcode XXH3_freeState(XXH3_state_t* statePtr);
+
+/*!
+ * @brief Copies one @ref XXH3_state_t to another.
+ *
+ * @param dst_state The state to copy to.
+ * @param src_state The state to copy from.
+ * @pre
+ * @p dst_state and @p src_state must not be `NULL` and must not overlap.
+ */
+XXH_PUBLIC_API void XXH3_copyState(XXH_NOESCAPE XXH3_state_t* dst_state, XXH_NOESCAPE const XXH3_state_t* src_state);
+
+/*!
+ * @brief Resets an @ref XXH3_state_t to begin a new hash.
+ *
+ * This function resets `statePtr` and generate a secret with default parameters. Call it before @ref XXH3_64bits_update().
+ * Digest will be equivalent to `XXH3_64bits()`.
+ *
+ * @param statePtr The state struct to reset.
+ *
+ * @pre
+ * @p statePtr must not be `NULL`.
+ *
+ * @return @ref XXH_OK on success, @ref XXH_ERROR on failure.
+ *
+ */
+XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset(XXH_NOESCAPE XXH3_state_t* statePtr);
+
+/*!
+ * @brief Resets an @ref XXH3_state_t with 64-bit seed to begin a new hash.
+ *
+ * This function resets `statePtr` and generate a secret from `seed`. Call it before @ref XXH3_64bits_update().
+ * Digest will be equivalent to `XXH3_64bits_withSeed()`.
+ *
+ * @param statePtr The state struct to reset.
+ * @param seed The 64-bit seed to alter the state.
+ *
+ * @pre
+ * @p statePtr must not be `NULL`.
+ *
+ * @return @ref XXH_OK on success, @ref XXH_ERROR on failure.
+ *
+ */
+XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset_withSeed(XXH_NOESCAPE XXH3_state_t* statePtr, XXH64_hash_t seed);
+
+/*!
+ * XXH3_64bits_reset_withSecret():
+ * `secret` is referenced, it _must outlive_ the hash streaming session.
+ * Similar to one-shot API, `secretSize` must be >= `XXH3_SECRET_SIZE_MIN`,
+ * and the quality of produced hash values depends on secret's entropy
+ * (secret's content should look like a bunch of random bytes).
+ * When in doubt about the randomness of a candidate `secret`,
+ * consider employing `XXH3_generateSecret()` instead (see below).
+ */
+XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset_withSecret(XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* secret, size_t secretSize);
+
+/*!
+ * @brief Consumes a block of @p input to an @ref XXH3_state_t.
+ *
+ * Call this to incrementally consume blocks of data.
+ *
+ * @param statePtr The state struct to update.
+ * @param input The block of data to be hashed, at least @p length bytes in size.
+ * @param length The length of @p input, in bytes.
+ *
+ * @pre
+ * @p statePtr must not be `NULL`.
+ * @pre
+ * The memory between @p input and @p input + @p length must be valid,
+ * readable, contiguous memory. However, if @p length is `0`, @p input may be
+ * `NULL`. In C++, this also must be *TriviallyCopyable*.
+ *
+ * @return @ref XXH_OK on success, @ref XXH_ERROR on failure.
+ */
+XXH_PUBLIC_API XXH_errorcode XXH3_64bits_update (XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* input, size_t length);
+
+/*!
+ * @brief Returns the calculated XXH3 64-bit hash value from an @ref XXH3_state_t.
+ *
+ * @note
+ * Calling XXH3_64bits_digest() will not affect @p statePtr, so you can update,
+ * digest, and update again.
+ *
+ * @param statePtr The state struct to calculate the hash from.
+ *
+ * @pre
+ * @p statePtr must not be `NULL`.
+ *
+ * @return The calculated XXH3 64-bit hash value from that state.
+ */
+XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH3_64bits_digest (XXH_NOESCAPE const XXH3_state_t* statePtr);
+#endif /* !XXH_NO_STREAM */
+
+/* note : canonical representation of XXH3 is the same as XXH64
+ * since they both produce XXH64_hash_t values */
+
+
+/*-**********************************************************************
+* XXH3 128-bit variant
+************************************************************************/
+
+/*!
+ * @brief The return value from 128-bit hashes.
+ *
+ * Stored in little endian order, although the fields themselves are in native
+ * endianness.
+ */
+typedef struct {
+ XXH64_hash_t low64; /*!< `value & 0xFFFFFFFFFFFFFFFF` */
+ XXH64_hash_t high64; /*!< `value >> 64` */
+} XXH128_hash_t;
+
+/*!
+ * @brief Unseeded 128-bit variant of XXH3
+ *
+ * The 128-bit variant of XXH3 has more strength, but it has a bit of overhead
+ * for shorter inputs.
+ *
+ * This is equivalent to @ref XXH3_128bits_withSeed() with a seed of 0, however
+ * it may have slightly better performance due to constant propagation of the
+ * defaults.
+ *
+ * @see
+ * XXH32(), XXH64(), XXH3_64bits(): equivalent for the other xxHash algorithms
+ * @see
+ * XXH3_128bits_withSeed(), XXH3_128bits_withSecret(): other seeding variants
+ * @see
+ * XXH3_128bits_reset(), XXH3_128bits_update(), XXH3_128bits_digest(): Streaming version.
+ */
+XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH3_128bits(XXH_NOESCAPE const void* data, size_t len);
+/*! @brief Seeded 128-bit variant of XXH3. @see XXH3_64bits_withSeed(). */
+XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH3_128bits_withSeed(XXH_NOESCAPE const void* data, size_t len, XXH64_hash_t seed);
+/*! @brief Custom secret 128-bit variant of XXH3. @see XXH3_64bits_withSecret(). */
+XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH3_128bits_withSecret(XXH_NOESCAPE const void* data, size_t len, XXH_NOESCAPE const void* secret, size_t secretSize);
+
+/******* Streaming *******/
+#ifndef XXH_NO_STREAM
+/*
+ * Streaming requires state maintenance.
+ * This operation costs memory and CPU.
+ * As a consequence, streaming is slower than one-shot hashing.
+ * For better performance, prefer one-shot functions whenever applicable.
+ *
+ * XXH3_128bits uses the same XXH3_state_t as XXH3_64bits().
+ * Use already declared XXH3_createState() and XXH3_freeState().
+ *
+ * All reset and streaming functions have same meaning as their 64-bit counterpart.
+ */
+
+/*!
+ * @brief Resets an @ref XXH3_state_t to begin a new hash.
+ *
+ * This function resets `statePtr` and generate a secret with default parameters. Call it before @ref XXH3_128bits_update().
+ * Digest will be equivalent to `XXH3_128bits()`.
+ *
+ * @param statePtr The state struct to reset.
+ *
+ * @pre
+ * @p statePtr must not be `NULL`.
+ *
+ * @return @ref XXH_OK on success, @ref XXH_ERROR on failure.
+ *
+ */
+XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset(XXH_NOESCAPE XXH3_state_t* statePtr);
+
+/*!
+ * @brief Resets an @ref XXH3_state_t with 64-bit seed to begin a new hash.
+ *
+ * This function resets `statePtr` and generate a secret from `seed`. Call it before @ref XXH3_128bits_update().
+ * Digest will be equivalent to `XXH3_128bits_withSeed()`.
+ *
+ * @param statePtr The state struct to reset.
+ * @param seed The 64-bit seed to alter the state.
+ *
+ * @pre
+ * @p statePtr must not be `NULL`.
+ *
+ * @return @ref XXH_OK on success, @ref XXH_ERROR on failure.
+ *
+ */
+XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset_withSeed(XXH_NOESCAPE XXH3_state_t* statePtr, XXH64_hash_t seed);
+/*! @brief Custom secret 128-bit variant of XXH3. @see XXH_64bits_reset_withSecret(). */
+XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset_withSecret(XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* secret, size_t secretSize);
+
+/*!
+ * @brief Consumes a block of @p input to an @ref XXH3_state_t.
+ *
+ * Call this to incrementally consume blocks of data.
+ *
+ * @param statePtr The state struct to update.
+ * @param input The block of data to be hashed, at least @p length bytes in size.
+ * @param length The length of @p input, in bytes.
+ *
+ * @pre
+ * @p statePtr must not be `NULL`.
+ * @pre
+ * The memory between @p input and @p input + @p length must be valid,
+ * readable, contiguous memory. However, if @p length is `0`, @p input may be
+ * `NULL`. In C++, this also must be *TriviallyCopyable*.
+ *
+ * @return @ref XXH_OK on success, @ref XXH_ERROR on failure.
+ */
+XXH_PUBLIC_API XXH_errorcode XXH3_128bits_update (XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* input, size_t length);
+
+/*!
+ * @brief Returns the calculated XXH3 128-bit hash value from an @ref XXH3_state_t.
+ *
+ * @note
+ * Calling XXH3_128bits_digest() will not affect @p statePtr, so you can update,
+ * digest, and update again.
+ *
+ * @param statePtr The state struct to calculate the hash from.
+ *
+ * @pre
+ * @p statePtr must not be `NULL`.
+ *
+ * @return The calculated XXH3 128-bit hash value from that state.
+ */
+XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH3_128bits_digest (XXH_NOESCAPE const XXH3_state_t* statePtr);
+#endif /* !XXH_NO_STREAM */
+
+/* Following helper functions make it possible to compare XXH128_hast_t values.
+ * Since XXH128_hash_t is a structure, this capability is not offered by the language.
+ * Note: For better performance, these functions can be inlined using XXH_INLINE_ALL */
+
+/*!
+ * XXH128_isEqual():
+ * Return: 1 if `h1` and `h2` are equal, 0 if they are not.
+ */
+XXH_PUBLIC_API XXH_PUREF int XXH128_isEqual(XXH128_hash_t h1, XXH128_hash_t h2);
+
+/*!
+ * @brief Compares two @ref XXH128_hash_t
+ * This comparator is compatible with stdlib's `qsort()`/`bsearch()`.
+ *
+ * @return: >0 if *h128_1 > *h128_2
+ * =0 if *h128_1 == *h128_2
+ * <0 if *h128_1 < *h128_2
+ */
+XXH_PUBLIC_API XXH_PUREF int XXH128_cmp(XXH_NOESCAPE const void* h128_1, XXH_NOESCAPE const void* h128_2);
+
+
+/******* Canonical representation *******/
+typedef struct { unsigned char digest[sizeof(XXH128_hash_t)]; } XXH128_canonical_t;
+
+
+/*!
+ * @brief Converts an @ref XXH128_hash_t to a big endian @ref XXH128_canonical_t.
+ *
+ * @param dst The @ref XXH128_canonical_t pointer to be stored to.
+ * @param hash The @ref XXH128_hash_t to be converted.
+ *
+ * @pre
+ * @p dst must not be `NULL`.
+ */
+XXH_PUBLIC_API void XXH128_canonicalFromHash(XXH_NOESCAPE XXH128_canonical_t* dst, XXH128_hash_t hash);
+
+/*!
+ * @brief Converts an @ref XXH128_canonical_t to a native @ref XXH128_hash_t.
+ *
+ * @param src The @ref XXH128_canonical_t to convert.
+ *
+ * @pre
+ * @p src must not be `NULL`.
+ *
+ * @return The converted hash.
+ */
+XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH128_hashFromCanonical(XXH_NOESCAPE const XXH128_canonical_t* src);
+
+
+#endif /* !XXH_NO_XXH3 */
+#endif /* XXH_NO_LONG_LONG */
+
+/*!
+ * @}
+ */
+#endif /* XXHASH_H_5627135585666179 */
+
+
+
+#if defined(XXH_STATIC_LINKING_ONLY) && !defined(XXHASH_H_STATIC_13879238742)
+#define XXHASH_H_STATIC_13879238742
+/* ****************************************************************************
+ * This section contains declarations which are not guaranteed to remain stable.
+ * They may change in future versions, becoming incompatible with a different
+ * version of the library.
+ * These declarations should only be used with static linking.
+ * Never use them in association with dynamic linking!
+ ***************************************************************************** */
+
+/*
+ * These definitions are only present to allow static allocation
+ * of XXH states, on stack or in a struct, for example.
+ * Never **ever** access their members directly.
+ */
+
+/*!
+ * @internal
+ * @brief Structure for XXH32 streaming API.
+ *
+ * @note This is only defined when @ref XXH_STATIC_LINKING_ONLY,
+ * @ref XXH_INLINE_ALL, or @ref XXH_IMPLEMENTATION is defined. Otherwise it is
+ * an opaque type. This allows fields to safely be changed.
+ *
+ * Typedef'd to @ref XXH32_state_t.
+ * Do not access the members of this struct directly.
+ * @see XXH64_state_s, XXH3_state_s
+ */
+struct XXH32_state_s {
+ XXH32_hash_t total_len_32; /*!< Total length hashed, modulo 2^32 */
+ XXH32_hash_t large_len; /*!< Whether the hash is >= 16 (handles @ref total_len_32 overflow) */
+ XXH32_hash_t v[4]; /*!< Accumulator lanes */
+ XXH32_hash_t mem32[4]; /*!< Internal buffer for partial reads. Treated as unsigned char[16]. */
+ XXH32_hash_t memsize; /*!< Amount of data in @ref mem32 */
+ XXH32_hash_t reserved; /*!< Reserved field. Do not read nor write to it. */
+}; /* typedef'd to XXH32_state_t */
+
+
+#ifndef XXH_NO_LONG_LONG /* defined when there is no 64-bit support */
+
+/*!
+ * @internal
+ * @brief Structure for XXH64 streaming API.
+ *
+ * @note This is only defined when @ref XXH_STATIC_LINKING_ONLY,
+ * @ref XXH_INLINE_ALL, or @ref XXH_IMPLEMENTATION is defined. Otherwise it is
+ * an opaque type. This allows fields to safely be changed.
+ *
+ * Typedef'd to @ref XXH64_state_t.
+ * Do not access the members of this struct directly.
+ * @see XXH32_state_s, XXH3_state_s
+ */
+struct XXH64_state_s {
+ XXH64_hash_t total_len; /*!< Total length hashed. This is always 64-bit. */
+ XXH64_hash_t v[4]; /*!< Accumulator lanes */
+ XXH64_hash_t mem64[4]; /*!< Internal buffer for partial reads. Treated as unsigned char[32]. */
+ XXH32_hash_t memsize; /*!< Amount of data in @ref mem64 */
+ XXH32_hash_t reserved32; /*!< Reserved field, needed for padding anyways*/
+ XXH64_hash_t reserved64; /*!< Reserved field. Do not read or write to it. */
+}; /* typedef'd to XXH64_state_t */
+
+#ifndef XXH_NO_XXH3
+
+#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) /* >= C11 */
+# include <stdalign.h>
+# define XXH_ALIGN(n) alignas(n)
+#elif defined(__cplusplus) && (__cplusplus >= 201103L) /* >= C++11 */
+/* In C++ alignas() is a keyword */
+# define XXH_ALIGN(n) alignas(n)
+#elif defined(__GNUC__)
+# define XXH_ALIGN(n) __attribute__ ((aligned(n)))
+#elif defined(_MSC_VER)
+# define XXH_ALIGN(n) __declspec(align(n))
+#else
+# define XXH_ALIGN(n) /* disabled */
+#endif
+
+/* Old GCC versions only accept the attribute after the type in structures. */
+#if !(defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) /* C11+ */ \
+ && ! (defined(__cplusplus) && (__cplusplus >= 201103L)) /* >= C++11 */ \
+ && defined(__GNUC__)
+# define XXH_ALIGN_MEMBER(align, type) type XXH_ALIGN(align)
+#else
+# define XXH_ALIGN_MEMBER(align, type) XXH_ALIGN(align) type
+#endif
+
+/*!
+ * @brief The size of the internal XXH3 buffer.
+ *
+ * This is the optimal update size for incremental hashing.
+ *
+ * @see XXH3_64b_update(), XXH3_128b_update().
+ */
+#define XXH3_INTERNALBUFFER_SIZE 256
+
+/*!
+ * @internal
+ * @brief Default size of the secret buffer (and @ref XXH3_kSecret).
+ *
+ * This is the size used in @ref XXH3_kSecret and the seeded functions.
+ *
+ * Not to be confused with @ref XXH3_SECRET_SIZE_MIN.
+ */
+#define XXH3_SECRET_DEFAULT_SIZE 192
+
+/*!
+ * @internal
+ * @brief Structure for XXH3 streaming API.
+ *
+ * @note This is only defined when @ref XXH_STATIC_LINKING_ONLY,
+ * @ref XXH_INLINE_ALL, or @ref XXH_IMPLEMENTATION is defined.
+ * Otherwise it is an opaque type.
+ * Never use this definition in combination with dynamic library.
+ * This allows fields to safely be changed in the future.
+ *
+ * @note ** This structure has a strict alignment requirement of 64 bytes!! **
+ * Do not allocate this with `malloc()` or `new`,
+ * it will not be sufficiently aligned.
+ * Use @ref XXH3_createState() and @ref XXH3_freeState(), or stack allocation.
+ *
+ * Typedef'd to @ref XXH3_state_t.
+ * Do never access the members of this struct directly.
+ *
+ * @see XXH3_INITSTATE() for stack initialization.
+ * @see XXH3_createState(), XXH3_freeState().
+ * @see XXH32_state_s, XXH64_state_s
+ */
+struct XXH3_state_s {
+ XXH_ALIGN_MEMBER(64, XXH64_hash_t acc[8]);
+ /*!< The 8 accumulators. See @ref XXH32_state_s::v and @ref XXH64_state_s::v */
+ XXH_ALIGN_MEMBER(64, unsigned char customSecret[XXH3_SECRET_DEFAULT_SIZE]);
+ /*!< Used to store a custom secret generated from a seed. */
+ XXH_ALIGN_MEMBER(64, unsigned char buffer[XXH3_INTERNALBUFFER_SIZE]);
+ /*!< The internal buffer. @see XXH32_state_s::mem32 */
+ XXH32_hash_t bufferedSize;
+ /*!< The amount of memory in @ref buffer, @see XXH32_state_s::memsize */
+ XXH32_hash_t useSeed;
+ /*!< Reserved field. Needed for padding on 64-bit. */
+ size_t nbStripesSoFar;
+ /*!< Number or stripes processed. */
+ XXH64_hash_t totalLen;
+ /*!< Total length hashed. 64-bit even on 32-bit targets. */
+ size_t nbStripesPerBlock;
+ /*!< Number of stripes per block. */
+ size_t secretLimit;
+ /*!< Size of @ref customSecret or @ref extSecret */
+ XXH64_hash_t seed;
+ /*!< Seed for _withSeed variants. Must be zero otherwise, @see XXH3_INITSTATE() */
+ XXH64_hash_t reserved64;
+ /*!< Reserved field. */
+ const unsigned char* extSecret;
+ /*!< Reference to an external secret for the _withSecret variants, NULL
+ * for other variants. */
+ /* note: there may be some padding at the end due to alignment on 64 bytes */
+}; /* typedef'd to XXH3_state_t */
+
+#undef XXH_ALIGN_MEMBER
+
+/*!
+ * @brief Initializes a stack-allocated `XXH3_state_s`.
+ *
+ * When the @ref XXH3_state_t structure is merely emplaced on stack,
+ * it should be initialized with XXH3_INITSTATE() or a memset()
+ * in case its first reset uses XXH3_NNbits_reset_withSeed().
+ * This init can be omitted if the first reset uses default or _withSecret mode.
+ * This operation isn't necessary when the state is created with XXH3_createState().
+ * Note that this doesn't prepare the state for a streaming operation,
+ * it's still necessary to use XXH3_NNbits_reset*() afterwards.
+ */
+#define XXH3_INITSTATE(XXH3_state_ptr) \
+ do { \
+ XXH3_state_t* tmp_xxh3_state_ptr = (XXH3_state_ptr); \
+ tmp_xxh3_state_ptr->seed = 0; \
+ tmp_xxh3_state_ptr->extSecret = NULL; \
+ } while(0)
+
+
+/*!
+ * simple alias to pre-selected XXH3_128bits variant
+ */
+XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH128(XXH_NOESCAPE const void* data, size_t len, XXH64_hash_t seed);
+
+
+/* === Experimental API === */
+/* Symbols defined below must be considered tied to a specific library version. */
+
+/*!
+ * XXH3_generateSecret():
+ *
+ * Derive a high-entropy secret from any user-defined content, named customSeed.
+ * The generated secret can be used in combination with `*_withSecret()` functions.
+ * The `_withSecret()` variants are useful to provide a higher level of protection
+ * than 64-bit seed, as it becomes much more difficult for an external actor to
+ * guess how to impact the calculation logic.
+ *
+ * The function accepts as input a custom seed of any length and any content,
+ * and derives from it a high-entropy secret of length @p secretSize into an
+ * already allocated buffer @p secretBuffer.
+ *
+ * The generated secret can then be used with any `*_withSecret()` variant.
+ * The functions @ref XXH3_128bits_withSecret(), @ref XXH3_64bits_withSecret(),
+ * @ref XXH3_128bits_reset_withSecret() and @ref XXH3_64bits_reset_withSecret()
+ * are part of this list. They all accept a `secret` parameter
+ * which must be large enough for implementation reasons (>= @ref XXH3_SECRET_SIZE_MIN)
+ * _and_ feature very high entropy (consist of random-looking bytes).
+ * These conditions can be a high bar to meet, so @ref XXH3_generateSecret() can
+ * be employed to ensure proper quality.
+ *
+ * @p customSeed can be anything. It can have any size, even small ones,
+ * and its content can be anything, even "poor entropy" sources such as a bunch
+ * of zeroes. The resulting `secret` will nonetheless provide all required qualities.
+ *
+ * @pre
+ * - @p secretSize must be >= @ref XXH3_SECRET_SIZE_MIN
+ * - When @p customSeedSize > 0, supplying NULL as customSeed is undefined behavior.
+ *
+ * Example code:
+ * @code{.c}
+ * #include <stdio.h>
+ * #include <stdlib.h>
+ * #include <string.h>
+ * #define XXH_STATIC_LINKING_ONLY // expose unstable API
+ * #include "xxhash.h"
+ * // Hashes argv[2] using the entropy from argv[1].
+ * int main(int argc, char* argv[])
+ * {
+ * char secret[XXH3_SECRET_SIZE_MIN];
+ * if (argv != 3) { return 1; }
+ * XXH3_generateSecret(secret, sizeof(secret), argv[1], strlen(argv[1]));
+ * XXH64_hash_t h = XXH3_64bits_withSecret(
+ * argv[2], strlen(argv[2]),
+ * secret, sizeof(secret)
+ * );
+ * printf("%016llx\n", (unsigned long long) h);
+ * }
+ * @endcode
+ */
+XXH_PUBLIC_API XXH_errorcode XXH3_generateSecret(XXH_NOESCAPE void* secretBuffer, size_t secretSize, XXH_NOESCAPE const void* customSeed, size_t customSeedSize);
+
+/*!
+ * @brief Generate the same secret as the _withSeed() variants.
+ *
+ * The generated secret can be used in combination with
+ *`*_withSecret()` and `_withSecretandSeed()` variants.
+ *
+ * Example C++ `std::string` hash class:
+ * @code{.cpp}
+ * #include <string>
+ * #define XXH_STATIC_LINKING_ONLY // expose unstable API
+ * #include "xxhash.h"
+ * // Slow, seeds each time
+ * class HashSlow {
+ * XXH64_hash_t seed;
+ * public:
+ * HashSlow(XXH64_hash_t s) : seed{s} {}
+ * size_t operator()(const std::string& x) const {
+ * return size_t{XXH3_64bits_withSeed(x.c_str(), x.length(), seed)};
+ * }
+ * };
+ * // Fast, caches the seeded secret for future uses.
+ * class HashFast {
+ * unsigned char secret[XXH3_SECRET_SIZE_MIN];
+ * public:
+ * HashFast(XXH64_hash_t s) {
+ * XXH3_generateSecret_fromSeed(secret, seed);
+ * }
+ * size_t operator()(const std::string& x) const {
+ * return size_t{
+ * XXH3_64bits_withSecret(x.c_str(), x.length(), secret, sizeof(secret))
+ * };
+ * }
+ * };
+ * @endcode
+ * @param secretBuffer A writable buffer of @ref XXH3_SECRET_SIZE_MIN bytes
+ * @param seed The seed to seed the state.
+ */
+XXH_PUBLIC_API void XXH3_generateSecret_fromSeed(XXH_NOESCAPE void* secretBuffer, XXH64_hash_t seed);
+
+/*!
+ * These variants generate hash values using either
+ * @p seed for "short" keys (< XXH3_MIDSIZE_MAX = 240 bytes)
+ * or @p secret for "large" keys (>= XXH3_MIDSIZE_MAX).
+ *
+ * This generally benefits speed, compared to `_withSeed()` or `_withSecret()`.
+ * `_withSeed()` has to generate the secret on the fly for "large" keys.
+ * It's fast, but can be perceptible for "not so large" keys (< 1 KB).
+ * `_withSecret()` has to generate the masks on the fly for "small" keys,
+ * which requires more instructions than _withSeed() variants.
+ * Therefore, _withSecretandSeed variant combines the best of both worlds.
+ *
+ * When @p secret has been generated by XXH3_generateSecret_fromSeed(),
+ * this variant produces *exactly* the same results as `_withSeed()` variant,
+ * hence offering only a pure speed benefit on "large" input,
+ * by skipping the need to regenerate the secret for every large input.
+ *
+ * Another usage scenario is to hash the secret to a 64-bit hash value,
+ * for example with XXH3_64bits(), which then becomes the seed,
+ * and then employ both the seed and the secret in _withSecretandSeed().
+ * On top of speed, an added benefit is that each bit in the secret
+ * has a 50% chance to swap each bit in the output, via its impact to the seed.
+ *
+ * This is not guaranteed when using the secret directly in "small data" scenarios,
+ * because only portions of the secret are employed for small data.
+ */
+XXH_PUBLIC_API XXH_PUREF XXH64_hash_t
+XXH3_64bits_withSecretandSeed(XXH_NOESCAPE const void* data, size_t len,
+ XXH_NOESCAPE const void* secret, size_t secretSize,
+ XXH64_hash_t seed);
+/*! @copydoc XXH3_64bits_withSecretandSeed() */
+XXH_PUBLIC_API XXH_PUREF XXH128_hash_t
+XXH3_128bits_withSecretandSeed(XXH_NOESCAPE const void* input, size_t length,
+ XXH_NOESCAPE const void* secret, size_t secretSize,
+ XXH64_hash_t seed64);
+#ifndef XXH_NO_STREAM
+/*! @copydoc XXH3_64bits_withSecretandSeed() */
+XXH_PUBLIC_API XXH_errorcode
+XXH3_64bits_reset_withSecretandSeed(XXH_NOESCAPE XXH3_state_t* statePtr,
+ XXH_NOESCAPE const void* secret, size_t secretSize,
+ XXH64_hash_t seed64);
+/*! @copydoc XXH3_64bits_withSecretandSeed() */
+XXH_PUBLIC_API XXH_errorcode
+XXH3_128bits_reset_withSecretandSeed(XXH_NOESCAPE XXH3_state_t* statePtr,
+ XXH_NOESCAPE const void* secret, size_t secretSize,
+ XXH64_hash_t seed64);
+#endif /* !XXH_NO_STREAM */
+
+#endif /* !XXH_NO_XXH3 */
+#endif /* XXH_NO_LONG_LONG */
+#if defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API)
+# define XXH_IMPLEMENTATION
+#endif
+
+#endif /* defined(XXH_STATIC_LINKING_ONLY) && !defined(XXHASH_H_STATIC_13879238742) */
+
+
+/* ======================================================================== */
+/* ======================================================================== */
+/* ======================================================================== */
+
+
+/*-**********************************************************************
+ * xxHash implementation
+ *-**********************************************************************
+ * xxHash's implementation used to be hosted inside xxhash.c.
+ *
+ * However, inlining requires implementation to be visible to the compiler,
+ * hence be included alongside the header.
+ * Previously, implementation was hosted inside xxhash.c,
+ * which was then #included when inlining was activated.
+ * This construction created issues with a few build and install systems,
+ * as it required xxhash.c to be stored in /include directory.
+ *
+ * xxHash implementation is now directly integrated within xxhash.h.
+ * As a consequence, xxhash.c is no longer needed in /include.
+ *
+ * xxhash.c is still available and is still useful.
+ * In a "normal" setup, when xxhash is not inlined,
+ * xxhash.h only exposes the prototypes and public symbols,
+ * while xxhash.c can be built into an object file xxhash.o
+ * which can then be linked into the final binary.
+ ************************************************************************/
+
+#if ( defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API) \
+ || defined(XXH_IMPLEMENTATION) ) && !defined(XXH_IMPLEM_13a8737387)
+# define XXH_IMPLEM_13a8737387
+
+/* *************************************
+* Tuning parameters
+***************************************/
+
+/*!
+ * @defgroup tuning Tuning parameters
+ * @{
+ *
+ * Various macros to control xxHash's behavior.
+ */
+#ifdef XXH_DOXYGEN
+/*!
+ * @brief Define this to disable 64-bit code.
+ *
+ * Useful if only using the @ref XXH32_family and you have a strict C90 compiler.
+ */
+# define XXH_NO_LONG_LONG
+# undef XXH_NO_LONG_LONG /* don't actually */
+/*!
+ * @brief Controls how unaligned memory is accessed.
+ *
+ * By default, access to unaligned memory is controlled by `memcpy()`, which is
+ * safe and portable.
+ *
+ * Unfortunately, on some target/compiler combinations, the generated assembly
+ * is sub-optimal.
+ *
+ * The below switch allow selection of a different access method
+ * in the search for improved performance.
+ *
+ * @par Possible options:
+ *
+ * - `XXH_FORCE_MEMORY_ACCESS=0` (default): `memcpy`
+ * @par
+ * Use `memcpy()`. Safe and portable. Note that most modern compilers will
+ * eliminate the function call and treat it as an unaligned access.
+ *
+ * - `XXH_FORCE_MEMORY_ACCESS=1`: `__attribute__((aligned(1)))`
+ * @par
+ * Depends on compiler extensions and is therefore not portable.
+ * This method is safe _if_ your compiler supports it,
+ * and *generally* as fast or faster than `memcpy`.
+ *
+ * - `XXH_FORCE_MEMORY_ACCESS=2`: Direct cast
+ * @par
+ * Casts directly and dereferences. This method doesn't depend on the
+ * compiler, but it violates the C standard as it directly dereferences an
+ * unaligned pointer. It can generate buggy code on targets which do not
+ * support unaligned memory accesses, but in some circumstances, it's the
+ * only known way to get the most performance.
+ *
+ * - `XXH_FORCE_MEMORY_ACCESS=3`: Byteshift
+ * @par
+ * Also portable. This can generate the best code on old compilers which don't
+ * inline small `memcpy()` calls, and it might also be faster on big-endian
+ * systems which lack a native byteswap instruction. However, some compilers
+ * will emit literal byteshifts even if the target supports unaligned access.
+ *
+ *
+ * @warning
+ * Methods 1 and 2 rely on implementation-defined behavior. Use these with
+ * care, as what works on one compiler/platform/optimization level may cause
+ * another to read garbage data or even crash.
+ *
+ * See https://fastcompression.blogspot.com/2015/08/accessing-unaligned-memory.html for details.
+ *
+ * Prefer these methods in priority order (0 > 3 > 1 > 2)
+ */
+# define XXH_FORCE_MEMORY_ACCESS 0
+
+/*!
+ * @def XXH_SIZE_OPT
+ * @brief Controls how much xxHash optimizes for size.
+ *
+ * xxHash, when compiled, tends to result in a rather large binary size. This
+ * is mostly due to heavy usage to forced inlining and constant folding of the
+ * @ref XXH3_family to increase performance.
+ *
+ * However, some developers prefer size over speed. This option can
+ * significantly reduce the size of the generated code. When using the `-Os`
+ * or `-Oz` options on GCC or Clang, this is defined to 1 by default,
+ * otherwise it is defined to 0.
+ *
+ * Most of these size optimizations can be controlled manually.
+ *
+ * This is a number from 0-2.
+ * - `XXH_SIZE_OPT` == 0: Default. xxHash makes no size optimizations. Speed
+ * comes first.
+ * - `XXH_SIZE_OPT` == 1: Default for `-Os` and `-Oz`. xxHash is more
+ * conservative and disables hacks that increase code size. It implies the
+ * options @ref XXH_NO_INLINE_HINTS == 1, @ref XXH_FORCE_ALIGN_CHECK == 0,
+ * and @ref XXH3_NEON_LANES == 8 if they are not already defined.
+ * - `XXH_SIZE_OPT` == 2: xxHash tries to make itself as small as possible.
+ * Performance may cry. For example, the single shot functions just use the
+ * streaming API.
+ */
+# define XXH_SIZE_OPT 0
+
+/*!
+ * @def XXH_FORCE_ALIGN_CHECK
+ * @brief If defined to non-zero, adds a special path for aligned inputs (XXH32()
+ * and XXH64() only).
+ *
+ * This is an important performance trick for architectures without decent
+ * unaligned memory access performance.
+ *
+ * It checks for input alignment, and when conditions are met, uses a "fast
+ * path" employing direct 32-bit/64-bit reads, resulting in _dramatically
+ * faster_ read speed.
+ *
+ * The check costs one initial branch per hash, which is generally negligible,
+ * but not zero.
+ *
+ * Moreover, it's not useful to generate an additional code path if memory
+ * access uses the same instruction for both aligned and unaligned
+ * addresses (e.g. x86 and aarch64).
+ *
+ * In these cases, the alignment check can be removed by setting this macro to 0.
+ * Then the code will always use unaligned memory access.
+ * Align check is automatically disabled on x86, x64, ARM64, and some ARM chips
+ * which are platforms known to offer good unaligned memory accesses performance.
+ *
+ * It is also disabled by default when @ref XXH_SIZE_OPT >= 1.
+ *
+ * This option does not affect XXH3 (only XXH32 and XXH64).
+ */
+# define XXH_FORCE_ALIGN_CHECK 0
+
+/*!
+ * @def XXH_NO_INLINE_HINTS
+ * @brief When non-zero, sets all functions to `static`.
+ *
+ * By default, xxHash tries to force the compiler to inline almost all internal
+ * functions.
+ *
+ * This can usually improve performance due to reduced jumping and improved
+ * constant folding, but significantly increases the size of the binary which
+ * might not be favorable.
+ *
+ * Additionally, sometimes the forced inlining can be detrimental to performance,
+ * depending on the architecture.
+ *
+ * XXH_NO_INLINE_HINTS marks all internal functions as static, giving the
+ * compiler full control on whether to inline or not.
+ *
+ * When not optimizing (-O0), using `-fno-inline` with GCC or Clang, or if
+ * @ref XXH_SIZE_OPT >= 1, this will automatically be defined.
+ */
+# define XXH_NO_INLINE_HINTS 0
+
+/*!
+ * @def XXH3_INLINE_SECRET
+ * @brief Determines whether to inline the XXH3 withSecret code.
+ *
+ * When the secret size is known, the compiler can improve the performance
+ * of XXH3_64bits_withSecret() and XXH3_128bits_withSecret().
+ *
+ * However, if the secret size is not known, it doesn't have any benefit. This
+ * happens when xxHash is compiled into a global symbol. Therefore, if
+ * @ref XXH_INLINE_ALL is *not* defined, this will be defined to 0.
+ *
+ * Additionally, this defaults to 0 on GCC 12+, which has an issue with function pointers
+ * that are *sometimes* force inline on -Og, and it is impossible to automatically
+ * detect this optimization level.
+ */
+# define XXH3_INLINE_SECRET 0
+
+/*!
+ * @def XXH32_ENDJMP
+ * @brief Whether to use a jump for `XXH32_finalize`.
+ *
+ * For performance, `XXH32_finalize` uses multiple branches in the finalizer.
+ * This is generally preferable for performance,
+ * but depending on exact architecture, a jmp may be preferable.
+ *
+ * This setting is only possibly making a difference for very small inputs.
+ */
+# define XXH32_ENDJMP 0
+
+/*!
+ * @internal
+ * @brief Redefines old internal names.
+ *
+ * For compatibility with code that uses xxHash's internals before the names
+ * were changed to improve namespacing. There is no other reason to use this.
+ */
+# define XXH_OLD_NAMES
+# undef XXH_OLD_NAMES /* don't actually use, it is ugly. */
+
+/*!
+ * @def XXH_NO_STREAM
+ * @brief Disables the streaming API.
+ *
+ * When xxHash is not inlined and the streaming functions are not used, disabling
+ * the streaming functions can improve code size significantly, especially with
+ * the @ref XXH3_family which tends to make constant folded copies of itself.
+ */
+# define XXH_NO_STREAM
+# undef XXH_NO_STREAM /* don't actually */
+#endif /* XXH_DOXYGEN */
+/*!
+ * @}
+ */
+
+#ifndef XXH_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */
+ /* prefer __packed__ structures (method 1) for GCC
+ * < ARMv7 with unaligned access (e.g. Raspbian armhf) still uses byte shifting, so we use memcpy
+ * which for some reason does unaligned loads. */
+# if defined(__GNUC__) && !(defined(__ARM_ARCH) && __ARM_ARCH < 7 && defined(__ARM_FEATURE_UNALIGNED))
+# define XXH_FORCE_MEMORY_ACCESS 1
+# endif
+#endif
+
+#ifndef XXH_SIZE_OPT
+ /* default to 1 for -Os or -Oz */
+# if (defined(__GNUC__) || defined(__clang__)) && defined(__OPTIMIZE_SIZE__)
+# define XXH_SIZE_OPT 1
+# else
+# define XXH_SIZE_OPT 0
+# endif
+#endif
+
+#ifndef XXH_FORCE_ALIGN_CHECK /* can be defined externally */
+ /* don't check on sizeopt, x86, aarch64, or arm when unaligned access is available */
+# if XXH_SIZE_OPT >= 1 || \
+ defined(__i386) || defined(__x86_64__) || defined(__aarch64__) || defined(__ARM_FEATURE_UNALIGNED) \
+ || defined(_M_IX86) || defined(_M_X64) || defined(_M_ARM64) || defined(_M_ARM) /* visual */
+# define XXH_FORCE_ALIGN_CHECK 0
+# else
+# define XXH_FORCE_ALIGN_CHECK 1
+# endif
+#endif
+
+#ifndef XXH_NO_INLINE_HINTS
+# if XXH_SIZE_OPT >= 1 || defined(__NO_INLINE__) /* -O0, -fno-inline */
+# define XXH_NO_INLINE_HINTS 1
+# else
+# define XXH_NO_INLINE_HINTS 0
+# endif
+#endif
+
+#ifndef XXH3_INLINE_SECRET
+# if (defined(__GNUC__) && !defined(__clang__) && __GNUC__ >= 12) \
+ || !defined(XXH_INLINE_ALL)
+# define XXH3_INLINE_SECRET 0
+# else
+# define XXH3_INLINE_SECRET 1
+# endif
+#endif
+
+#ifndef XXH32_ENDJMP
+/* generally preferable for performance */
+# define XXH32_ENDJMP 0
+#endif
+
+/*!
+ * @defgroup impl Implementation
+ * @{
+ */
+
+
+/* *************************************
+* Includes & Memory related functions
+***************************************/
+#if defined(XXH_NO_STREAM)
+/* nothing */
+#elif defined(XXH_NO_STDLIB)
+
+/* When requesting to disable any mention of stdlib,
+ * the library loses the ability to invoked malloc / free.
+ * In practice, it means that functions like `XXH*_createState()`
+ * will always fail, and return NULL.
+ * This flag is useful in situations where
+ * xxhash.h is integrated into some kernel, embedded or limited environment
+ * without access to dynamic allocation.
+ */
+
+static XXH_CONSTF void* XXH_malloc(size_t s) { (void)s; return NULL; }
+static void XXH_free(void* p) { (void)p; }
+
+#else
+
+/*
+ * Modify the local functions below should you wish to use
+ * different memory routines for malloc() and free()
+ */
+#include <stdlib.h>
+
+/*!
+ * @internal
+ * @brief Modify this function to use a different routine than malloc().
+ */
+static XXH_MALLOCF void* XXH_malloc(size_t s) { return malloc(s); }
+
+/*!
+ * @internal
+ * @brief Modify this function to use a different routine than free().
+ */
+static void XXH_free(void* p) { free(p); }
+
+#endif /* XXH_NO_STDLIB */
+
+#include <string.h>
+
+/*!
+ * @internal
+ * @brief Modify this function to use a different routine than memcpy().
+ */
+static void* XXH_memcpy(void* dest, const void* src, size_t size)
+{
+ return memcpy(dest,src,size);
+}
+
+#include <limits.h> /* ULLONG_MAX */
+
+
+/* *************************************
+* Compiler Specific Options
+***************************************/
+#ifdef _MSC_VER /* Visual Studio warning fix */
+# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
+#endif
+
+#if XXH_NO_INLINE_HINTS /* disable inlining hints */
+# if defined(__GNUC__) || defined(__clang__)
+# define XXH_FORCE_INLINE static __attribute__((unused))
+# else
+# define XXH_FORCE_INLINE static
+# endif
+# define XXH_NO_INLINE static
+/* enable inlining hints */
+#elif defined(__GNUC__) || defined(__clang__)
+# define XXH_FORCE_INLINE static __inline__ __attribute__((always_inline, unused))
+# define XXH_NO_INLINE static __attribute__((noinline))
+#elif defined(_MSC_VER) /* Visual Studio */
+# define XXH_FORCE_INLINE static __forceinline
+# define XXH_NO_INLINE static __declspec(noinline)
+#elif defined (__cplusplus) \
+ || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) /* C99 */
+# define XXH_FORCE_INLINE static inline
+# define XXH_NO_INLINE static
+#else
+# define XXH_FORCE_INLINE static
+# define XXH_NO_INLINE static
+#endif
+
+#if XXH3_INLINE_SECRET
+# define XXH3_WITH_SECRET_INLINE XXH_FORCE_INLINE
+#else
+# define XXH3_WITH_SECRET_INLINE XXH_NO_INLINE
+#endif
+
+
+/* *************************************
+* Debug
+***************************************/
+/*!
+ * @ingroup tuning
+ * @def XXH_DEBUGLEVEL
+ * @brief Sets the debugging level.
+ *
+ * XXH_DEBUGLEVEL is expected to be defined externally, typically via the
+ * compiler's command line options. The value must be a number.
+ */
+#ifndef XXH_DEBUGLEVEL
+# ifdef DEBUGLEVEL /* backwards compat */
+# define XXH_DEBUGLEVEL DEBUGLEVEL
+# else
+# define XXH_DEBUGLEVEL 0
+# endif
+#endif
+
+#if (XXH_DEBUGLEVEL>=1)
+# include <assert.h> /* note: can still be disabled with NDEBUG */
+# define XXH_ASSERT(c) assert(c)
+#else
+# if defined(__INTEL_COMPILER)
+# define XXH_ASSERT(c) XXH_ASSUME((unsigned char) (c))
+# else
+# define XXH_ASSERT(c) XXH_ASSUME(c)
+# endif
+#endif
+
+/* note: use after variable declarations */
+#ifndef XXH_STATIC_ASSERT
+# if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) /* C11 */
+# define XXH_STATIC_ASSERT_WITH_MESSAGE(c,m) do { _Static_assert((c),m); } while(0)
+# elif defined(__cplusplus) && (__cplusplus >= 201103L) /* C++11 */
+# define XXH_STATIC_ASSERT_WITH_MESSAGE(c,m) do { static_assert((c),m); } while(0)
+# else
+# define XXH_STATIC_ASSERT_WITH_MESSAGE(c,m) do { struct xxh_sa { char x[(c) ? 1 : -1]; }; } while(0)
+# endif
+# define XXH_STATIC_ASSERT(c) XXH_STATIC_ASSERT_WITH_MESSAGE((c),#c)
+#endif
+
+/*!
+ * @internal
+ * @def XXH_COMPILER_GUARD(var)
+ * @brief Used to prevent unwanted optimizations for @p var.
+ *
+ * It uses an empty GCC inline assembly statement with a register constraint
+ * which forces @p var into a general purpose register (eg eax, ebx, ecx
+ * on x86) and marks it as modified.
+ *
+ * This is used in a few places to avoid unwanted autovectorization (e.g.
+ * XXH32_round()). All vectorization we want is explicit via intrinsics,
+ * and _usually_ isn't wanted elsewhere.
+ *
+ * We also use it to prevent unwanted constant folding for AArch64 in
+ * XXH3_initCustomSecret_scalar().
+ */
+#if defined(__GNUC__) || defined(__clang__)
+# define XXH_COMPILER_GUARD(var) __asm__("" : "+r" (var))
+#else
+# define XXH_COMPILER_GUARD(var) ((void)0)
+#endif
+
+/* Specifically for NEON vectors which use the "w" constraint, on
+ * Clang. */
+#if defined(__clang__) && defined(__ARM_ARCH) && !defined(__wasm__)
+# define XXH_COMPILER_GUARD_CLANG_NEON(var) __asm__("" : "+w" (var))
+#else
+# define XXH_COMPILER_GUARD_CLANG_NEON(var) ((void)0)
+#endif
+
+/* *************************************
+* Basic Types
+***************************************/
+#if !defined (__VMS) \
+ && (defined (__cplusplus) \
+ || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
+# include <stdint.h>
+ typedef uint8_t xxh_u8;
+#else
+ typedef unsigned char xxh_u8;
+#endif
+typedef XXH32_hash_t xxh_u32;
+
+#ifdef XXH_OLD_NAMES
+# warning "XXH_OLD_NAMES is planned to be removed starting v0.9. If the program depends on it, consider moving away from it by employing newer type names directly"
+# define BYTE xxh_u8
+# define U8 xxh_u8
+# define U32 xxh_u32
+#endif
+
+/* *** Memory access *** */
+
+/*!
+ * @internal
+ * @fn xxh_u32 XXH_read32(const void* ptr)
+ * @brief Reads an unaligned 32-bit integer from @p ptr in native endianness.
+ *
+ * Affected by @ref XXH_FORCE_MEMORY_ACCESS.
+ *
+ * @param ptr The pointer to read from.
+ * @return The 32-bit native endian integer from the bytes at @p ptr.
+ */
+
+/*!
+ * @internal
+ * @fn xxh_u32 XXH_readLE32(const void* ptr)
+ * @brief Reads an unaligned 32-bit little endian integer from @p ptr.
+ *
+ * Affected by @ref XXH_FORCE_MEMORY_ACCESS.
+ *
+ * @param ptr The pointer to read from.
+ * @return The 32-bit little endian integer from the bytes at @p ptr.
+ */
+
+/*!
+ * @internal
+ * @fn xxh_u32 XXH_readBE32(const void* ptr)
+ * @brief Reads an unaligned 32-bit big endian integer from @p ptr.
+ *
+ * Affected by @ref XXH_FORCE_MEMORY_ACCESS.
+ *
+ * @param ptr The pointer to read from.
+ * @return The 32-bit big endian integer from the bytes at @p ptr.
+ */
+
+/*!
+ * @internal
+ * @fn xxh_u32 XXH_readLE32_align(const void* ptr, XXH_alignment align)
+ * @brief Like @ref XXH_readLE32(), but has an option for aligned reads.
+ *
+ * Affected by @ref XXH_FORCE_MEMORY_ACCESS.
+ * Note that when @ref XXH_FORCE_ALIGN_CHECK == 0, the @p align parameter is
+ * always @ref XXH_alignment::XXH_unaligned.
+ *
+ * @param ptr The pointer to read from.
+ * @param align Whether @p ptr is aligned.
+ * @pre
+ * If @p align == @ref XXH_alignment::XXH_aligned, @p ptr must be 4 byte
+ * aligned.
+ * @return The 32-bit little endian integer from the bytes at @p ptr.
+ */
+
+#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==3))
+/*
+ * Manual byteshift. Best for old compilers which don't inline memcpy.
+ * We actually directly use XXH_readLE32 and XXH_readBE32.
+ */
+#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2))
+
+/*
+ * Force direct memory access. Only works on CPU which support unaligned memory
+ * access in hardware.
+ */
+static xxh_u32 XXH_read32(const void* memPtr) { return *(const xxh_u32*) memPtr; }
+
+#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1))
+
+/*
+ * __attribute__((aligned(1))) is supported by gcc and clang. Originally the
+ * documentation claimed that it only increased the alignment, but actually it
+ * can decrease it on gcc, clang, and icc:
+ * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=69502,
+ * https://gcc.godbolt.org/z/xYez1j67Y.
+ */
+#ifdef XXH_OLD_NAMES
+typedef union { xxh_u32 u32; } __attribute__((packed)) unalign;
+#endif
+static xxh_u32 XXH_read32(const void* ptr)
+{
+ typedef __attribute__((aligned(1))) xxh_u32 xxh_unalign32;
+ return *((const xxh_unalign32*)ptr);
+}
+
+#else
+
+/*
+ * Portable and safe solution. Generally efficient.
+ * see: https://fastcompression.blogspot.com/2015/08/accessing-unaligned-memory.html
+ */
+static xxh_u32 XXH_read32(const void* memPtr)
+{
+ xxh_u32 val;
+ XXH_memcpy(&val, memPtr, sizeof(val));
+ return val;
+}
+
+#endif /* XXH_FORCE_DIRECT_MEMORY_ACCESS */
+
+
+/* *** Endianness *** */
+
+/*!
+ * @ingroup tuning
+ * @def XXH_CPU_LITTLE_ENDIAN
+ * @brief Whether the target is little endian.
+ *
+ * Defined to 1 if the target is little endian, or 0 if it is big endian.
+ * It can be defined externally, for example on the compiler command line.
+ *
+ * If it is not defined,
+ * a runtime check (which is usually constant folded) is used instead.
+ *
+ * @note
+ * This is not necessarily defined to an integer constant.
+ *
+ * @see XXH_isLittleEndian() for the runtime check.
+ */
+#ifndef XXH_CPU_LITTLE_ENDIAN
+/*
+ * Try to detect endianness automatically, to avoid the nonstandard behavior
+ * in `XXH_isLittleEndian()`
+ */
+# if defined(_WIN32) /* Windows is always little endian */ \
+ || defined(__LITTLE_ENDIAN__) \
+ || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
+# define XXH_CPU_LITTLE_ENDIAN 1
+# elif defined(__BIG_ENDIAN__) \
+ || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
+# define XXH_CPU_LITTLE_ENDIAN 0
+# else
+/*!
+ * @internal
+ * @brief Runtime check for @ref XXH_CPU_LITTLE_ENDIAN.
+ *
+ * Most compilers will constant fold this.
+ */
+static int XXH_isLittleEndian(void)
+{
+ /*
+ * Portable and well-defined behavior.
+ * Don't use static: it is detrimental to performance.
+ */
+ const union { xxh_u32 u; xxh_u8 c[4]; } one = { 1 };
+ return one.c[0];
+}
+# define XXH_CPU_LITTLE_ENDIAN XXH_isLittleEndian()
+# endif
+#endif
+
+
+
+
+/* ****************************************
+* Compiler-specific Functions and Macros
+******************************************/
+#define XXH_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
+
+#ifdef __has_builtin
+# define XXH_HAS_BUILTIN(x) __has_builtin(x)
+#else
+# define XXH_HAS_BUILTIN(x) 0
+#endif
+
+
+
+/*
+ * C23 and future versions have standard "unreachable()".
+ * Once it has been implemented reliably we can add it as an
+ * additional case:
+ *
+ * ```
+ * #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= XXH_C23_VN)
+ * # include <stddef.h>
+ * # ifdef unreachable
+ * # define XXH_UNREACHABLE() unreachable()
+ * # endif
+ * #endif
+ * ```
+ *
+ * Note C++23 also has std::unreachable() which can be detected
+ * as follows:
+ * ```
+ * #if defined(__cpp_lib_unreachable) && (__cpp_lib_unreachable >= 202202L)
+ * # include <utility>
+ * # define XXH_UNREACHABLE() std::unreachable()
+ * #endif
+ * ```
+ * NB: `__cpp_lib_unreachable` is defined in the `<version>` header.
+ * We don't use that as including `<utility>` in `extern "C"` blocks
+ * doesn't work on GCC12
+ */
+
+#if XXH_HAS_BUILTIN(__builtin_unreachable)
+# define XXH_UNREACHABLE() __builtin_unreachable()
+
+#elif defined(_MSC_VER)
+# define XXH_UNREACHABLE() __assume(0)
+
+#else
+# define XXH_UNREACHABLE()
+#endif
+
+#if XXH_HAS_BUILTIN(__builtin_assume)
+# define XXH_ASSUME(c) __builtin_assume(c)
+#else
+# define XXH_ASSUME(c) if (!(c)) { XXH_UNREACHABLE(); }
+#endif
+
+/*!
+ * @internal
+ * @def XXH_rotl32(x,r)
+ * @brief 32-bit rotate left.
+ *
+ * @param x The 32-bit integer to be rotated.
+ * @param r The number of bits to rotate.
+ * @pre
+ * @p r > 0 && @p r < 32
+ * @note
+ * @p x and @p r may be evaluated multiple times.
+ * @return The rotated result.
+ */
+#if !defined(NO_CLANG_BUILTIN) && XXH_HAS_BUILTIN(__builtin_rotateleft32) \
+ && XXH_HAS_BUILTIN(__builtin_rotateleft64)
+# define XXH_rotl32 __builtin_rotateleft32
+# define XXH_rotl64 __builtin_rotateleft64
+/* Note: although _rotl exists for minGW (GCC under windows), performance seems poor */
+#elif defined(_MSC_VER)
+# define XXH_rotl32(x,r) _rotl(x,r)
+# define XXH_rotl64(x,r) _rotl64(x,r)
+#else
+# define XXH_rotl32(x,r) (((x) << (r)) | ((x) >> (32 - (r))))
+# define XXH_rotl64(x,r) (((x) << (r)) | ((x) >> (64 - (r))))
+#endif
+
+/*!
+ * @internal
+ * @fn xxh_u32 XXH_swap32(xxh_u32 x)
+ * @brief A 32-bit byteswap.
+ *
+ * @param x The 32-bit integer to byteswap.
+ * @return @p x, byteswapped.
+ */
+#if defined(_MSC_VER) /* Visual Studio */
+# define XXH_swap32 _byteswap_ulong
+#elif XXH_GCC_VERSION >= 403
+# define XXH_swap32 __builtin_bswap32
+#else
+static xxh_u32 XXH_swap32 (xxh_u32 x)
+{
+ return ((x << 24) & 0xff000000 ) |
+ ((x << 8) & 0x00ff0000 ) |
+ ((x >> 8) & 0x0000ff00 ) |
+ ((x >> 24) & 0x000000ff );
+}
+#endif
+
+
+/* ***************************
+* Memory reads
+*****************************/
+
+/*!
+ * @internal
+ * @brief Enum to indicate whether a pointer is aligned.
+ */
+typedef enum {
+ XXH_aligned, /*!< Aligned */
+ XXH_unaligned /*!< Possibly unaligned */
+} XXH_alignment;
+
+/*
+ * XXH_FORCE_MEMORY_ACCESS==3 is an endian-independent byteshift load.
+ *
+ * This is ideal for older compilers which don't inline memcpy.
+ */
+#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==3))
+
+XXH_FORCE_INLINE xxh_u32 XXH_readLE32(const void* memPtr)
+{
+ const xxh_u8* bytePtr = (const xxh_u8 *)memPtr;
+ return bytePtr[0]
+ | ((xxh_u32)bytePtr[1] << 8)
+ | ((xxh_u32)bytePtr[2] << 16)
+ | ((xxh_u32)bytePtr[3] << 24);
+}
+
+XXH_FORCE_INLINE xxh_u32 XXH_readBE32(const void* memPtr)
+{
+ const xxh_u8* bytePtr = (const xxh_u8 *)memPtr;
+ return bytePtr[3]
+ | ((xxh_u32)bytePtr[2] << 8)
+ | ((xxh_u32)bytePtr[1] << 16)
+ | ((xxh_u32)bytePtr[0] << 24);
+}
+
+#else
+XXH_FORCE_INLINE xxh_u32 XXH_readLE32(const void* ptr)
+{
+ return XXH_CPU_LITTLE_ENDIAN ? XXH_read32(ptr) : XXH_swap32(XXH_read32(ptr));
+}
+
+static xxh_u32 XXH_readBE32(const void* ptr)
+{
+ return XXH_CPU_LITTLE_ENDIAN ? XXH_swap32(XXH_read32(ptr)) : XXH_read32(ptr);
+}
+#endif
+
+XXH_FORCE_INLINE xxh_u32
+XXH_readLE32_align(const void* ptr, XXH_alignment align)
+{
+ if (align==XXH_unaligned) {
+ return XXH_readLE32(ptr);
+ } else {
+ return XXH_CPU_LITTLE_ENDIAN ? *(const xxh_u32*)ptr : XXH_swap32(*(const xxh_u32*)ptr);
+ }
+}
+
+
+/* *************************************
+* Misc
+***************************************/
+/*! @ingroup public */
+XXH_PUBLIC_API unsigned XXH_versionNumber (void) { return XXH_VERSION_NUMBER; }
+
+
+/* *******************************************************************
+* 32-bit hash functions
+*********************************************************************/
+/*!
+ * @}
+ * @defgroup XXH32_impl XXH32 implementation
+ * @ingroup impl
+ *
+ * Details on the XXH32 implementation.
+ * @{
+ */
+ /* #define instead of static const, to be used as initializers */
+#define XXH_PRIME32_1 0x9E3779B1U /*!< 0b10011110001101110111100110110001 */
+#define XXH_PRIME32_2 0x85EBCA77U /*!< 0b10000101111010111100101001110111 */
+#define XXH_PRIME32_3 0xC2B2AE3DU /*!< 0b11000010101100101010111000111101 */
+#define XXH_PRIME32_4 0x27D4EB2FU /*!< 0b00100111110101001110101100101111 */
+#define XXH_PRIME32_5 0x165667B1U /*!< 0b00010110010101100110011110110001 */
+
+#ifdef XXH_OLD_NAMES
+# define PRIME32_1 XXH_PRIME32_1
+# define PRIME32_2 XXH_PRIME32_2
+# define PRIME32_3 XXH_PRIME32_3
+# define PRIME32_4 XXH_PRIME32_4
+# define PRIME32_5 XXH_PRIME32_5
+#endif
+
+/*!
+ * @internal
+ * @brief Normal stripe processing routine.
+ *
+ * This shuffles the bits so that any bit from @p input impacts several bits in
+ * @p acc.
+ *
+ * @param acc The accumulator lane.
+ * @param input The stripe of input to mix.
+ * @return The mixed accumulator lane.
+ */
+static xxh_u32 XXH32_round(xxh_u32 acc, xxh_u32 input)
+{
+ acc += input * XXH_PRIME32_2;
+ acc = XXH_rotl32(acc, 13);
+ acc *= XXH_PRIME32_1;
+#if (defined(__SSE4_1__) || defined(__aarch64__) || defined(__wasm_simd128__)) && !defined(XXH_ENABLE_AUTOVECTORIZE)
+ /*
+ * UGLY HACK:
+ * A compiler fence is the only thing that prevents GCC and Clang from
+ * autovectorizing the XXH32 loop (pragmas and attributes don't work for some
+ * reason) without globally disabling SSE4.1.
+ *
+ * The reason we want to avoid vectorization is because despite working on
+ * 4 integers at a time, there are multiple factors slowing XXH32 down on
+ * SSE4:
+ * - There's a ridiculous amount of lag from pmulld (10 cycles of latency on
+ * newer chips!) making it slightly slower to multiply four integers at
+ * once compared to four integers independently. Even when pmulld was
+ * fastest, Sandy/Ivy Bridge, it is still not worth it to go into SSE
+ * just to multiply unless doing a long operation.
+ *
+ * - Four instructions are required to rotate,
+ * movqda tmp, v // not required with VEX encoding
+ * pslld tmp, 13 // tmp <<= 13
+ * psrld v, 19 // x >>= 19
+ * por v, tmp // x |= tmp
+ * compared to one for scalar:
+ * roll v, 13 // reliably fast across the board
+ * shldl v, v, 13 // Sandy Bridge and later prefer this for some reason
+ *
+ * - Instruction level parallelism is actually more beneficial here because
+ * the SIMD actually serializes this operation: While v1 is rotating, v2
+ * can load data, while v3 can multiply. SSE forces them to operate
+ * together.
+ *
+ * This is also enabled on AArch64, as Clang is *very aggressive* in vectorizing
+ * the loop. NEON is only faster on the A53, and with the newer cores, it is less
+ * than half the speed.
+ *
+ * Additionally, this is used on WASM SIMD128 because it JITs to the same
+ * SIMD instructions and has the same issue.
+ */
+ XXH_COMPILER_GUARD(acc);
+#endif
+ return acc;
+}
+
+/*!
+ * @internal
+ * @brief Mixes all bits to finalize the hash.
+ *
+ * The final mix ensures that all input bits have a chance to impact any bit in
+ * the output digest, resulting in an unbiased distribution.
+ *
+ * @param hash The hash to avalanche.
+ * @return The avalanched hash.
+ */
+static xxh_u32 XXH32_avalanche(xxh_u32 hash)
+{
+ hash ^= hash >> 15;
+ hash *= XXH_PRIME32_2;
+ hash ^= hash >> 13;
+ hash *= XXH_PRIME32_3;
+ hash ^= hash >> 16;
+ return hash;
+}
+
+#define XXH_get32bits(p) XXH_readLE32_align(p, align)
+
+/*!
+ * @internal
+ * @brief Processes the last 0-15 bytes of @p ptr.
+ *
+ * There may be up to 15 bytes remaining to consume from the input.
+ * This final stage will digest them to ensure that all input bytes are present
+ * in the final mix.
+ *
+ * @param hash The hash to finalize.
+ * @param ptr The pointer to the remaining input.
+ * @param len The remaining length, modulo 16.
+ * @param align Whether @p ptr is aligned.
+ * @return The finalized hash.
+ * @see XXH64_finalize().
+ */
+static XXH_PUREF xxh_u32
+XXH32_finalize(xxh_u32 hash, const xxh_u8* ptr, size_t len, XXH_alignment align)
+{
+#define XXH_PROCESS1 do { \
+ hash += (*ptr++) * XXH_PRIME32_5; \
+ hash = XXH_rotl32(hash, 11) * XXH_PRIME32_1; \
+} while (0)
+
+#define XXH_PROCESS4 do { \
+ hash += XXH_get32bits(ptr) * XXH_PRIME32_3; \
+ ptr += 4; \
+ hash = XXH_rotl32(hash, 17) * XXH_PRIME32_4; \
+} while (0)
+
+ if (ptr==NULL) XXH_ASSERT(len == 0);
+
+ /* Compact rerolled version; generally faster */
+ if (!XXH32_ENDJMP) {
+ len &= 15;
+ while (len >= 4) {
+ XXH_PROCESS4;
+ len -= 4;
+ }
+ while (len > 0) {
+ XXH_PROCESS1;
+ --len;
+ }
+ return XXH32_avalanche(hash);
+ } else {
+ switch(len&15) /* or switch(bEnd - p) */ {
+ case 12: XXH_PROCESS4;
+ XXH_FALLTHROUGH; /* fallthrough */
+ case 8: XXH_PROCESS4;
+ XXH_FALLTHROUGH; /* fallthrough */
+ case 4: XXH_PROCESS4;
+ return XXH32_avalanche(hash);
+
+ case 13: XXH_PROCESS4;
+ XXH_FALLTHROUGH; /* fallthrough */
+ case 9: XXH_PROCESS4;
+ XXH_FALLTHROUGH; /* fallthrough */
+ case 5: XXH_PROCESS4;
+ XXH_PROCESS1;
+ return XXH32_avalanche(hash);
+
+ case 14: XXH_PROCESS4;
+ XXH_FALLTHROUGH; /* fallthrough */
+ case 10: XXH_PROCESS4;
+ XXH_FALLTHROUGH; /* fallthrough */
+ case 6: XXH_PROCESS4;
+ XXH_PROCESS1;
+ XXH_PROCESS1;
+ return XXH32_avalanche(hash);
+
+ case 15: XXH_PROCESS4;
+ XXH_FALLTHROUGH; /* fallthrough */
+ case 11: XXH_PROCESS4;
+ XXH_FALLTHROUGH; /* fallthrough */
+ case 7: XXH_PROCESS4;
+ XXH_FALLTHROUGH; /* fallthrough */
+ case 3: XXH_PROCESS1;
+ XXH_FALLTHROUGH; /* fallthrough */
+ case 2: XXH_PROCESS1;
+ XXH_FALLTHROUGH; /* fallthrough */
+ case 1: XXH_PROCESS1;
+ XXH_FALLTHROUGH; /* fallthrough */
+ case 0: return XXH32_avalanche(hash);
+ }
+ XXH_ASSERT(0);
+ return hash; /* reaching this point is deemed impossible */
+ }
+}
+
+#ifdef XXH_OLD_NAMES
+# define PROCESS1 XXH_PROCESS1
+# define PROCESS4 XXH_PROCESS4
+#else
+# undef XXH_PROCESS1
+# undef XXH_PROCESS4
+#endif
+
+/*!
+ * @internal
+ * @brief The implementation for @ref XXH32().
+ *
+ * @param input , len , seed Directly passed from @ref XXH32().
+ * @param align Whether @p input is aligned.
+ * @return The calculated hash.
+ */
+XXH_FORCE_INLINE XXH_PUREF xxh_u32
+XXH32_endian_align(const xxh_u8* input, size_t len, xxh_u32 seed, XXH_alignment align)
+{
+ xxh_u32 h32;
+
+ if (input==NULL) XXH_ASSERT(len == 0);
+
+ if (len>=16) {
+ const xxh_u8* const bEnd = input + len;
+ const xxh_u8* const limit = bEnd - 15;
+ xxh_u32 v1 = seed + XXH_PRIME32_1 + XXH_PRIME32_2;
+ xxh_u32 v2 = seed + XXH_PRIME32_2;
+ xxh_u32 v3 = seed + 0;
+ xxh_u32 v4 = seed - XXH_PRIME32_1;
+
+ do {
+ v1 = XXH32_round(v1, XXH_get32bits(input)); input += 4;
+ v2 = XXH32_round(v2, XXH_get32bits(input)); input += 4;
+ v3 = XXH32_round(v3, XXH_get32bits(input)); input += 4;
+ v4 = XXH32_round(v4, XXH_get32bits(input)); input += 4;
+ } while (input < limit);
+
+ h32 = XXH_rotl32(v1, 1) + XXH_rotl32(v2, 7)
+ + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18);
+ } else {
+ h32 = seed + XXH_PRIME32_5;
+ }
+
+ h32 += (xxh_u32)len;
+
+ return XXH32_finalize(h32, input, len&15, align);
+}
+
+/*! @ingroup XXH32_family */
+XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t len, XXH32_hash_t seed)
+{
+#if !defined(XXH_NO_STREAM) && XXH_SIZE_OPT >= 2
+ /* Simple version, good for code maintenance, but unfortunately slow for small inputs */
+ XXH32_state_t state;
+ XXH32_reset(&state, seed);
+ XXH32_update(&state, (const xxh_u8*)input, len);
+ return XXH32_digest(&state);
+#else
+ if (XXH_FORCE_ALIGN_CHECK) {
+ if ((((size_t)input) & 3) == 0) { /* Input is 4-bytes aligned, leverage the speed benefit */
+ return XXH32_endian_align((const xxh_u8*)input, len, seed, XXH_aligned);
+ } }
+
+ return XXH32_endian_align((const xxh_u8*)input, len, seed, XXH_unaligned);
+#endif
+}
+
+
+
+/******* Hash streaming *******/
+#ifndef XXH_NO_STREAM
+/*! @ingroup XXH32_family */
+XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void)
+{
+ return (XXH32_state_t*)XXH_malloc(sizeof(XXH32_state_t));
+}
+/*! @ingroup XXH32_family */
+XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr)
+{
+ XXH_free(statePtr);
+ return XXH_OK;
+}
+
+/*! @ingroup XXH32_family */
+XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* dstState, const XXH32_state_t* srcState)
+{
+ XXH_memcpy(dstState, srcState, sizeof(*dstState));
+}
+
+/*! @ingroup XXH32_family */
+XXH_PUBLIC_API XXH_errorcode XXH32_reset(XXH32_state_t* statePtr, XXH32_hash_t seed)
+{
+ XXH_ASSERT(statePtr != NULL);
+ memset(statePtr, 0, sizeof(*statePtr));
+ statePtr->v[0] = seed + XXH_PRIME32_1 + XXH_PRIME32_2;
+ statePtr->v[1] = seed + XXH_PRIME32_2;
+ statePtr->v[2] = seed + 0;
+ statePtr->v[3] = seed - XXH_PRIME32_1;
+ return XXH_OK;
+}
+
+
+/*! @ingroup XXH32_family */
+XXH_PUBLIC_API XXH_errorcode
+XXH32_update(XXH32_state_t* state, const void* input, size_t len)
+{
+ if (input==NULL) {
+ XXH_ASSERT(len == 0);
+ return XXH_OK;
+ }
+
+ { const xxh_u8* p = (const xxh_u8*)input;
+ const xxh_u8* const bEnd = p + len;
+
+ state->total_len_32 += (XXH32_hash_t)len;
+ state->large_len |= (XXH32_hash_t)((len>=16) | (state->total_len_32>=16));
+
+ if (state->memsize + len < 16) { /* fill in tmp buffer */
+ XXH_memcpy((xxh_u8*)(state->mem32) + state->memsize, input, len);
+ state->memsize += (XXH32_hash_t)len;
+ return XXH_OK;
+ }
+
+ if (state->memsize) { /* some data left from previous update */
+ XXH_memcpy((xxh_u8*)(state->mem32) + state->memsize, input, 16-state->memsize);
+ { const xxh_u32* p32 = state->mem32;
+ state->v[0] = XXH32_round(state->v[0], XXH_readLE32(p32)); p32++;
+ state->v[1] = XXH32_round(state->v[1], XXH_readLE32(p32)); p32++;
+ state->v[2] = XXH32_round(state->v[2], XXH_readLE32(p32)); p32++;
+ state->v[3] = XXH32_round(state->v[3], XXH_readLE32(p32));
+ }
+ p += 16-state->memsize;
+ state->memsize = 0;
+ }
+
+ if (p <= bEnd-16) {
+ const xxh_u8* const limit = bEnd - 16;
+
+ do {
+ state->v[0] = XXH32_round(state->v[0], XXH_readLE32(p)); p+=4;
+ state->v[1] = XXH32_round(state->v[1], XXH_readLE32(p)); p+=4;
+ state->v[2] = XXH32_round(state->v[2], XXH_readLE32(p)); p+=4;
+ state->v[3] = XXH32_round(state->v[3], XXH_readLE32(p)); p+=4;
+ } while (p<=limit);
+
+ }
+
+ if (p < bEnd) {
+ XXH_memcpy(state->mem32, p, (size_t)(bEnd-p));
+ state->memsize = (unsigned)(bEnd-p);
+ }
+ }
+
+ return XXH_OK;
+}
+
+
+/*! @ingroup XXH32_family */
+XXH_PUBLIC_API XXH32_hash_t XXH32_digest(const XXH32_state_t* state)
+{
+ xxh_u32 h32;
+
+ if (state->large_len) {
+ h32 = XXH_rotl32(state->v[0], 1)
+ + XXH_rotl32(state->v[1], 7)
+ + XXH_rotl32(state->v[2], 12)
+ + XXH_rotl32(state->v[3], 18);
+ } else {
+ h32 = state->v[2] /* == seed */ + XXH_PRIME32_5;
+ }
+
+ h32 += state->total_len_32;
+
+ return XXH32_finalize(h32, (const xxh_u8*)state->mem32, state->memsize, XXH_aligned);
+}
+#endif /* !XXH_NO_STREAM */
+
+/******* Canonical representation *******/
+
+/*!
+ * @ingroup XXH32_family
+ * The default return values from XXH functions are unsigned 32 and 64 bit
+ * integers.
+ *
+ * The canonical representation uses big endian convention, the same convention
+ * as human-readable numbers (large digits first).
+ *
+ * This way, hash values can be written into a file or buffer, remaining
+ * comparable across different systems.
+ *
+ * The following functions allow transformation of hash values to and from their
+ * canonical format.
+ */
+XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash)
+{
+ XXH_STATIC_ASSERT(sizeof(XXH32_canonical_t) == sizeof(XXH32_hash_t));
+ if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap32(hash);
+ XXH_memcpy(dst, &hash, sizeof(*dst));
+}
+/*! @ingroup XXH32_family */
+XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src)
+{
+ return XXH_readBE32(src);
+}
+
+
+#ifndef XXH_NO_LONG_LONG
+
+/* *******************************************************************
+* 64-bit hash functions
+*********************************************************************/
+/*!
+ * @}
+ * @ingroup impl
+ * @{
+ */
+/******* Memory access *******/
+
+typedef XXH64_hash_t xxh_u64;
+
+#ifdef XXH_OLD_NAMES
+# define U64 xxh_u64
+#endif
+
+#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==3))
+/*
+ * Manual byteshift. Best for old compilers which don't inline memcpy.
+ * We actually directly use XXH_readLE64 and XXH_readBE64.
+ */
+#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2))
+
+/* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */
+static xxh_u64 XXH_read64(const void* memPtr)
+{
+ return *(const xxh_u64*) memPtr;
+}
+
+#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1))
+
+/*
+ * __attribute__((aligned(1))) is supported by gcc and clang. Originally the
+ * documentation claimed that it only increased the alignment, but actually it
+ * can decrease it on gcc, clang, and icc:
+ * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=69502,
+ * https://gcc.godbolt.org/z/xYez1j67Y.
+ */
+#ifdef XXH_OLD_NAMES
+typedef union { xxh_u32 u32; xxh_u64 u64; } __attribute__((packed)) unalign64;
+#endif
+static xxh_u64 XXH_read64(const void* ptr)
+{
+ typedef __attribute__((aligned(1))) xxh_u64 xxh_unalign64;
+ return *((const xxh_unalign64*)ptr);
+}
+
+#else
+
+/*
+ * Portable and safe solution. Generally efficient.
+ * see: https://fastcompression.blogspot.com/2015/08/accessing-unaligned-memory.html
+ */
+static xxh_u64 XXH_read64(const void* memPtr)
+{
+ xxh_u64 val;
+ XXH_memcpy(&val, memPtr, sizeof(val));
+ return val;
+}
+
+#endif /* XXH_FORCE_DIRECT_MEMORY_ACCESS */
+
+#if defined(_MSC_VER) /* Visual Studio */
+# define XXH_swap64 _byteswap_uint64
+#elif XXH_GCC_VERSION >= 403
+# define XXH_swap64 __builtin_bswap64
+#else
+static xxh_u64 XXH_swap64(xxh_u64 x)
+{
+ return ((x << 56) & 0xff00000000000000ULL) |
+ ((x << 40) & 0x00ff000000000000ULL) |
+ ((x << 24) & 0x0000ff0000000000ULL) |
+ ((x << 8) & 0x000000ff00000000ULL) |
+ ((x >> 8) & 0x00000000ff000000ULL) |
+ ((x >> 24) & 0x0000000000ff0000ULL) |
+ ((x >> 40) & 0x000000000000ff00ULL) |
+ ((x >> 56) & 0x00000000000000ffULL);
+}
+#endif
+
+
+/* XXH_FORCE_MEMORY_ACCESS==3 is an endian-independent byteshift load. */
+#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==3))
+
+XXH_FORCE_INLINE xxh_u64 XXH_readLE64(const void* memPtr)
+{
+ const xxh_u8* bytePtr = (const xxh_u8 *)memPtr;
+ return bytePtr[0]
+ | ((xxh_u64)bytePtr[1] << 8)
+ | ((xxh_u64)bytePtr[2] << 16)
+ | ((xxh_u64)bytePtr[3] << 24)
+ | ((xxh_u64)bytePtr[4] << 32)
+ | ((xxh_u64)bytePtr[5] << 40)
+ | ((xxh_u64)bytePtr[6] << 48)
+ | ((xxh_u64)bytePtr[7] << 56);
+}
+
+XXH_FORCE_INLINE xxh_u64 XXH_readBE64(const void* memPtr)
+{
+ const xxh_u8* bytePtr = (const xxh_u8 *)memPtr;
+ return bytePtr[7]
+ | ((xxh_u64)bytePtr[6] << 8)
+ | ((xxh_u64)bytePtr[5] << 16)
+ | ((xxh_u64)bytePtr[4] << 24)
+ | ((xxh_u64)bytePtr[3] << 32)
+ | ((xxh_u64)bytePtr[2] << 40)
+ | ((xxh_u64)bytePtr[1] << 48)
+ | ((xxh_u64)bytePtr[0] << 56);
+}
+
+#else
+XXH_FORCE_INLINE xxh_u64 XXH_readLE64(const void* ptr)
+{
+ return XXH_CPU_LITTLE_ENDIAN ? XXH_read64(ptr) : XXH_swap64(XXH_read64(ptr));
+}
+
+static xxh_u64 XXH_readBE64(const void* ptr)
+{
+ return XXH_CPU_LITTLE_ENDIAN ? XXH_swap64(XXH_read64(ptr)) : XXH_read64(ptr);
+}
+#endif
+
+XXH_FORCE_INLINE xxh_u64
+XXH_readLE64_align(const void* ptr, XXH_alignment align)
+{
+ if (align==XXH_unaligned)
+ return XXH_readLE64(ptr);
+ else
+ return XXH_CPU_LITTLE_ENDIAN ? *(const xxh_u64*)ptr : XXH_swap64(*(const xxh_u64*)ptr);
+}
+
+
+/******* xxh64 *******/
+/*!
+ * @}
+ * @defgroup XXH64_impl XXH64 implementation
+ * @ingroup impl
+ *
+ * Details on the XXH64 implementation.
+ * @{
+ */
+/* #define rather that static const, to be used as initializers */
+#define XXH_PRIME64_1 0x9E3779B185EBCA87ULL /*!< 0b1001111000110111011110011011000110000101111010111100101010000111 */
+#define XXH_PRIME64_2 0xC2B2AE3D27D4EB4FULL /*!< 0b1100001010110010101011100011110100100111110101001110101101001111 */
+#define XXH_PRIME64_3 0x165667B19E3779F9ULL /*!< 0b0001011001010110011001111011000110011110001101110111100111111001 */
+#define XXH_PRIME64_4 0x85EBCA77C2B2AE63ULL /*!< 0b1000010111101011110010100111011111000010101100101010111001100011 */
+#define XXH_PRIME64_5 0x27D4EB2F165667C5ULL /*!< 0b0010011111010100111010110010111100010110010101100110011111000101 */
+
+#ifdef XXH_OLD_NAMES
+# define PRIME64_1 XXH_PRIME64_1
+# define PRIME64_2 XXH_PRIME64_2
+# define PRIME64_3 XXH_PRIME64_3
+# define PRIME64_4 XXH_PRIME64_4
+# define PRIME64_5 XXH_PRIME64_5
+#endif
+
+/*! @copydoc XXH32_round */
+static xxh_u64 XXH64_round(xxh_u64 acc, xxh_u64 input)
+{
+ acc += input * XXH_PRIME64_2;
+ acc = XXH_rotl64(acc, 31);
+ acc *= XXH_PRIME64_1;
+ return acc;
+}
+
+static xxh_u64 XXH64_mergeRound(xxh_u64 acc, xxh_u64 val)
+{
+ val = XXH64_round(0, val);
+ acc ^= val;
+ acc = acc * XXH_PRIME64_1 + XXH_PRIME64_4;
+ return acc;
+}
+
+/*! @copydoc XXH32_avalanche */
+static xxh_u64 XXH64_avalanche(xxh_u64 hash)
+{
+ hash ^= hash >> 33;
+ hash *= XXH_PRIME64_2;
+ hash ^= hash >> 29;
+ hash *= XXH_PRIME64_3;
+ hash ^= hash >> 32;
+ return hash;
+}
+
+
+#define XXH_get64bits(p) XXH_readLE64_align(p, align)
+
+/*!
+ * @internal
+ * @brief Processes the last 0-31 bytes of @p ptr.
+ *
+ * There may be up to 31 bytes remaining to consume from the input.
+ * This final stage will digest them to ensure that all input bytes are present
+ * in the final mix.
+ *
+ * @param hash The hash to finalize.
+ * @param ptr The pointer to the remaining input.
+ * @param len The remaining length, modulo 32.
+ * @param align Whether @p ptr is aligned.
+ * @return The finalized hash
+ * @see XXH32_finalize().
+ */
+static XXH_PUREF xxh_u64
+XXH64_finalize(xxh_u64 hash, const xxh_u8* ptr, size_t len, XXH_alignment align)
+{
+ if (ptr==NULL) XXH_ASSERT(len == 0);
+ len &= 31;
+ while (len >= 8) {
+ xxh_u64 const k1 = XXH64_round(0, XXH_get64bits(ptr));
+ ptr += 8;
+ hash ^= k1;
+ hash = XXH_rotl64(hash,27) * XXH_PRIME64_1 + XXH_PRIME64_4;
+ len -= 8;
+ }
+ if (len >= 4) {
+ hash ^= (xxh_u64)(XXH_get32bits(ptr)) * XXH_PRIME64_1;
+ ptr += 4;
+ hash = XXH_rotl64(hash, 23) * XXH_PRIME64_2 + XXH_PRIME64_3;
+ len -= 4;
+ }
+ while (len > 0) {
+ hash ^= (*ptr++) * XXH_PRIME64_5;
+ hash = XXH_rotl64(hash, 11) * XXH_PRIME64_1;
+ --len;
+ }
+ return XXH64_avalanche(hash);
+}
+
+#ifdef XXH_OLD_NAMES
+# define PROCESS1_64 XXH_PROCESS1_64
+# define PROCESS4_64 XXH_PROCESS4_64
+# define PROCESS8_64 XXH_PROCESS8_64
+#else
+# undef XXH_PROCESS1_64
+# undef XXH_PROCESS4_64
+# undef XXH_PROCESS8_64
+#endif
+
+/*!
+ * @internal
+ * @brief The implementation for @ref XXH64().
+ *
+ * @param input , len , seed Directly passed from @ref XXH64().
+ * @param align Whether @p input is aligned.
+ * @return The calculated hash.
+ */
+XXH_FORCE_INLINE XXH_PUREF xxh_u64
+XXH64_endian_align(const xxh_u8* input, size_t len, xxh_u64 seed, XXH_alignment align)
+{
+ xxh_u64 h64;
+ if (input==NULL) XXH_ASSERT(len == 0);
+
+ if (len>=32) {
+ const xxh_u8* const bEnd = input + len;
+ const xxh_u8* const limit = bEnd - 31;
+ xxh_u64 v1 = seed + XXH_PRIME64_1 + XXH_PRIME64_2;
+ xxh_u64 v2 = seed + XXH_PRIME64_2;
+ xxh_u64 v3 = seed + 0;
+ xxh_u64 v4 = seed - XXH_PRIME64_1;
+
+ do {
+ v1 = XXH64_round(v1, XXH_get64bits(input)); input+=8;
+ v2 = XXH64_round(v2, XXH_get64bits(input)); input+=8;
+ v3 = XXH64_round(v3, XXH_get64bits(input)); input+=8;
+ v4 = XXH64_round(v4, XXH_get64bits(input)); input+=8;
+ } while (input<limit);
+
+ h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18);
+ h64 = XXH64_mergeRound(h64, v1);
+ h64 = XXH64_mergeRound(h64, v2);
+ h64 = XXH64_mergeRound(h64, v3);
+ h64 = XXH64_mergeRound(h64, v4);
+
+ } else {
+ h64 = seed + XXH_PRIME64_5;
+ }
+
+ h64 += (xxh_u64) len;
+
+ return XXH64_finalize(h64, input, len, align);
+}
+
+
+/*! @ingroup XXH64_family */
+XXH_PUBLIC_API XXH64_hash_t XXH64 (XXH_NOESCAPE const void* input, size_t len, XXH64_hash_t seed)
+{
+#if !defined(XXH_NO_STREAM) && XXH_SIZE_OPT >= 2
+ /* Simple version, good for code maintenance, but unfortunately slow for small inputs */
+ XXH64_state_t state;
+ XXH64_reset(&state, seed);
+ XXH64_update(&state, (const xxh_u8*)input, len);
+ return XXH64_digest(&state);
+#else
+ if (XXH_FORCE_ALIGN_CHECK) {
+ if ((((size_t)input) & 7)==0) { /* Input is aligned, let's leverage the speed advantage */
+ return XXH64_endian_align((const xxh_u8*)input, len, seed, XXH_aligned);
+ } }
+
+ return XXH64_endian_align((const xxh_u8*)input, len, seed, XXH_unaligned);
+
+#endif
+}
+
+/******* Hash Streaming *******/
+#ifndef XXH_NO_STREAM
+/*! @ingroup XXH64_family*/
+XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void)
+{
+ return (XXH64_state_t*)XXH_malloc(sizeof(XXH64_state_t));
+}
+/*! @ingroup XXH64_family */
+XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr)
+{
+ XXH_free(statePtr);
+ return XXH_OK;
+}
+
+/*! @ingroup XXH64_family */
+XXH_PUBLIC_API void XXH64_copyState(XXH_NOESCAPE XXH64_state_t* dstState, const XXH64_state_t* srcState)
+{
+ XXH_memcpy(dstState, srcState, sizeof(*dstState));
+}
+
+/*! @ingroup XXH64_family */
+XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH_NOESCAPE XXH64_state_t* statePtr, XXH64_hash_t seed)
+{
+ XXH_ASSERT(statePtr != NULL);
+ memset(statePtr, 0, sizeof(*statePtr));
+ statePtr->v[0] = seed + XXH_PRIME64_1 + XXH_PRIME64_2;
+ statePtr->v[1] = seed + XXH_PRIME64_2;
+ statePtr->v[2] = seed + 0;
+ statePtr->v[3] = seed - XXH_PRIME64_1;
+ return XXH_OK;
+}
+
+/*! @ingroup XXH64_family */
+XXH_PUBLIC_API XXH_errorcode
+XXH64_update (XXH_NOESCAPE XXH64_state_t* state, XXH_NOESCAPE const void* input, size_t len)
+{
+ if (input==NULL) {
+ XXH_ASSERT(len == 0);
+ return XXH_OK;
+ }
+
+ { const xxh_u8* p = (const xxh_u8*)input;
+ const xxh_u8* const bEnd = p + len;
+
+ state->total_len += len;
+
+ if (state->memsize + len < 32) { /* fill in tmp buffer */
+ XXH_memcpy(((xxh_u8*)state->mem64) + state->memsize, input, len);
+ state->memsize += (xxh_u32)len;
+ return XXH_OK;
+ }
+
+ if (state->memsize) { /* tmp buffer is full */
+ XXH_memcpy(((xxh_u8*)state->mem64) + state->memsize, input, 32-state->memsize);
+ state->v[0] = XXH64_round(state->v[0], XXH_readLE64(state->mem64+0));
+ state->v[1] = XXH64_round(state->v[1], XXH_readLE64(state->mem64+1));
+ state->v[2] = XXH64_round(state->v[2], XXH_readLE64(state->mem64+2));
+ state->v[3] = XXH64_round(state->v[3], XXH_readLE64(state->mem64+3));
+ p += 32 - state->memsize;
+ state->memsize = 0;
+ }
+
+ if (p+32 <= bEnd) {
+ const xxh_u8* const limit = bEnd - 32;
+
+ do {
+ state->v[0] = XXH64_round(state->v[0], XXH_readLE64(p)); p+=8;
+ state->v[1] = XXH64_round(state->v[1], XXH_readLE64(p)); p+=8;
+ state->v[2] = XXH64_round(state->v[2], XXH_readLE64(p)); p+=8;
+ state->v[3] = XXH64_round(state->v[3], XXH_readLE64(p)); p+=8;
+ } while (p<=limit);
+
+ }
+
+ if (p < bEnd) {
+ XXH_memcpy(state->mem64, p, (size_t)(bEnd-p));
+ state->memsize = (unsigned)(bEnd-p);
+ }
+ }
+
+ return XXH_OK;
+}
+
+
+/*! @ingroup XXH64_family */
+XXH_PUBLIC_API XXH64_hash_t XXH64_digest(XXH_NOESCAPE const XXH64_state_t* state)
+{
+ xxh_u64 h64;
+
+ if (state->total_len >= 32) {
+ h64 = XXH_rotl64(state->v[0], 1) + XXH_rotl64(state->v[1], 7) + XXH_rotl64(state->v[2], 12) + XXH_rotl64(state->v[3], 18);
+ h64 = XXH64_mergeRound(h64, state->v[0]);
+ h64 = XXH64_mergeRound(h64, state->v[1]);
+ h64 = XXH64_mergeRound(h64, state->v[2]);
+ h64 = XXH64_mergeRound(h64, state->v[3]);
+ } else {
+ h64 = state->v[2] /*seed*/ + XXH_PRIME64_5;
+ }
+
+ h64 += (xxh_u64) state->total_len;
+
+ return XXH64_finalize(h64, (const xxh_u8*)state->mem64, (size_t)state->total_len, XXH_aligned);
+}
+#endif /* !XXH_NO_STREAM */
+
+/******* Canonical representation *******/
+
+/*! @ingroup XXH64_family */
+XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH_NOESCAPE XXH64_canonical_t* dst, XXH64_hash_t hash)
+{
+ XXH_STATIC_ASSERT(sizeof(XXH64_canonical_t) == sizeof(XXH64_hash_t));
+ if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap64(hash);
+ XXH_memcpy(dst, &hash, sizeof(*dst));
+}
+
+/*! @ingroup XXH64_family */
+XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(XXH_NOESCAPE const XXH64_canonical_t* src)
+{
+ return XXH_readBE64(src);
+}
+
+#ifndef XXH_NO_XXH3
+
+/* *********************************************************************
+* XXH3
+* New generation hash designed for speed on small keys and vectorization
+************************************************************************ */
+/*!
+ * @}
+ * @defgroup XXH3_impl XXH3 implementation
+ * @ingroup impl
+ * @{
+ */
+
+/* === Compiler specifics === */
+
+#if ((defined(sun) || defined(__sun)) && __cplusplus) /* Solaris includes __STDC_VERSION__ with C++. Tested with GCC 5.5 */
+# define XXH_RESTRICT /* disable */
+#elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* >= C99 */
+# define XXH_RESTRICT restrict
+#elif (defined (__GNUC__) && ((__GNUC__ > 3) || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))) \
+ || (defined (__clang__)) \
+ || (defined (_MSC_VER) && (_MSC_VER >= 1400)) \
+ || (defined (__INTEL_COMPILER) && (__INTEL_COMPILER >= 1300))
+/*
+ * There are a LOT more compilers that recognize __restrict but this
+ * covers the major ones.
+ */
+# define XXH_RESTRICT __restrict
+#else
+# define XXH_RESTRICT /* disable */
+#endif
+
+#if (defined(__GNUC__) && (__GNUC__ >= 3)) \
+ || (defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 800)) \
+ || defined(__clang__)
+# define XXH_likely(x) __builtin_expect(x, 1)
+# define XXH_unlikely(x) __builtin_expect(x, 0)
+#else
+# define XXH_likely(x) (x)
+# define XXH_unlikely(x) (x)
+#endif
+
+#ifndef XXH_HAS_INCLUDE
+# ifdef __has_include
+# define XXH_HAS_INCLUDE(x) __has_include(x)
+# else
+# define XXH_HAS_INCLUDE(x) 0
+# endif
+#endif
+
+#if defined(__GNUC__) || defined(__clang__)
+# if defined(__ARM_FEATURE_SVE)
+# include <arm_sve.h>
+# endif
+# if defined(__ARM_NEON__) || defined(__ARM_NEON) \
+ || (defined(_M_ARM) && _M_ARM >= 7) \
+ || defined(_M_ARM64) || defined(_M_ARM64EC) \
+ || (defined(__wasm_simd128__) && XXH_HAS_INCLUDE(<arm_neon.h>)) /* WASM SIMD128 via SIMDe */
+# define inline __inline__ /* circumvent a clang bug */
+# include <arm_neon.h>
+# undef inline
+# elif defined(__AVX2__)
+# include <immintrin.h>
+# elif defined(__SSE2__)
+# include <emmintrin.h>
+# endif
+#endif
+
+#if defined(_MSC_VER)
+# include <intrin.h>
+#endif
+
+/*
+ * One goal of XXH3 is to make it fast on both 32-bit and 64-bit, while
+ * remaining a true 64-bit/128-bit hash function.
+ *
+ * This is done by prioritizing a subset of 64-bit operations that can be
+ * emulated without too many steps on the average 32-bit machine.
+ *
+ * For example, these two lines seem similar, and run equally fast on 64-bit:
+ *
+ * xxh_u64 x;
+ * x ^= (x >> 47); // good
+ * x ^= (x >> 13); // bad
+ *
+ * However, to a 32-bit machine, there is a major difference.
+ *
+ * x ^= (x >> 47) looks like this:
+ *
+ * x.lo ^= (x.hi >> (47 - 32));
+ *
+ * while x ^= (x >> 13) looks like this:
+ *
+ * // note: funnel shifts are not usually cheap.
+ * x.lo ^= (x.lo >> 13) | (x.hi << (32 - 13));
+ * x.hi ^= (x.hi >> 13);
+ *
+ * The first one is significantly faster than the second, simply because the
+ * shift is larger than 32. This means:
+ * - All the bits we need are in the upper 32 bits, so we can ignore the lower
+ * 32 bits in the shift.
+ * - The shift result will always fit in the lower 32 bits, and therefore,
+ * we can ignore the upper 32 bits in the xor.
+ *
+ * Thanks to this optimization, XXH3 only requires these features to be efficient:
+ *
+ * - Usable unaligned access
+ * - A 32-bit or 64-bit ALU
+ * - If 32-bit, a decent ADC instruction
+ * - A 32 or 64-bit multiply with a 64-bit result
+ * - For the 128-bit variant, a decent byteswap helps short inputs.
+ *
+ * The first two are already required by XXH32, and almost all 32-bit and 64-bit
+ * platforms which can run XXH32 can run XXH3 efficiently.
+ *
+ * Thumb-1, the classic 16-bit only subset of ARM's instruction set, is one
+ * notable exception.
+ *
+ * First of all, Thumb-1 lacks support for the UMULL instruction which
+ * performs the important long multiply. This means numerous __aeabi_lmul
+ * calls.
+ *
+ * Second of all, the 8 functional registers are just not enough.
+ * Setup for __aeabi_lmul, byteshift loads, pointers, and all arithmetic need
+ * Lo registers, and this shuffling results in thousands more MOVs than A32.
+ *
+ * A32 and T32 don't have this limitation. They can access all 14 registers,
+ * do a 32->64 multiply with UMULL, and the flexible operand allowing free
+ * shifts is helpful, too.
+ *
+ * Therefore, we do a quick sanity check.
+ *
+ * If compiling Thumb-1 for a target which supports ARM instructions, we will
+ * emit a warning, as it is not a "sane" platform to compile for.
+ *
+ * Usually, if this happens, it is because of an accident and you probably need
+ * to specify -march, as you likely meant to compile for a newer architecture.
+ *
+ * Credit: large sections of the vectorial and asm source code paths
+ * have been contributed by @easyaspi314
+ */
+#if defined(__thumb__) && !defined(__thumb2__) && defined(__ARM_ARCH_ISA_ARM)
+# warning "XXH3 is highly inefficient without ARM or Thumb-2."
+#endif
+
+/* ==========================================
+ * Vectorization detection
+ * ========================================== */
+
+#ifdef XXH_DOXYGEN
+/*!
+ * @ingroup tuning
+ * @brief Overrides the vectorization implementation chosen for XXH3.
+ *
+ * Can be defined to 0 to disable SIMD or any of the values mentioned in
+ * @ref XXH_VECTOR_TYPE.
+ *
+ * If this is not defined, it uses predefined macros to determine the best
+ * implementation.
+ */
+# define XXH_VECTOR XXH_SCALAR
+/*!
+ * @ingroup tuning
+ * @brief Possible values for @ref XXH_VECTOR.
+ *
+ * Note that these are actually implemented as macros.
+ *
+ * If this is not defined, it is detected automatically.
+ * internal macro XXH_X86DISPATCH overrides this.
+ */
+enum XXH_VECTOR_TYPE /* fake enum */ {
+ XXH_SCALAR = 0, /*!< Portable scalar version */
+ XXH_SSE2 = 1, /*!<
+ * SSE2 for Pentium 4, Opteron, all x86_64.
+ *
+ * @note SSE2 is also guaranteed on Windows 10, macOS, and
+ * Android x86.
+ */
+ XXH_AVX2 = 2, /*!< AVX2 for Haswell and Bulldozer */
+ XXH_AVX512 = 3, /*!< AVX512 for Skylake and Icelake */
+ XXH_NEON = 4, /*!<
+ * NEON for most ARMv7-A, all AArch64, and WASM SIMD128
+ * via the SIMDeverywhere polyfill provided with the
+ * Emscripten SDK.
+ */
+ XXH_VSX = 5, /*!< VSX and ZVector for POWER8/z13 (64-bit) */
+ XXH_SVE = 6, /*!< SVE for some ARMv8-A and ARMv9-A */
+};
+/*!
+ * @ingroup tuning
+ * @brief Selects the minimum alignment for XXH3's accumulators.
+ *
+ * When using SIMD, this should match the alignment required for said vector
+ * type, so, for example, 32 for AVX2.
+ *
+ * Default: Auto detected.
+ */
+# define XXH_ACC_ALIGN 8
+#endif
+
+/* Actual definition */
+#ifndef XXH_DOXYGEN
+# define XXH_SCALAR 0
+# define XXH_SSE2 1
+# define XXH_AVX2 2
+# define XXH_AVX512 3
+# define XXH_NEON 4
+# define XXH_VSX 5
+# define XXH_SVE 6
+#endif
+
+#ifndef XXH_VECTOR /* can be defined on command line */
+# if defined(__ARM_FEATURE_SVE)
+# define XXH_VECTOR XXH_SVE
+# elif ( \
+ defined(__ARM_NEON__) || defined(__ARM_NEON) /* gcc */ \
+ || defined(_M_ARM) || defined(_M_ARM64) || defined(_M_ARM64EC) /* msvc */ \
+ || (defined(__wasm_simd128__) && XXH_HAS_INCLUDE(<arm_neon.h>)) /* wasm simd128 via SIMDe */ \
+ ) && ( \
+ defined(_WIN32) || defined(__LITTLE_ENDIAN__) /* little endian only */ \
+ || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) \
+ )
+# define XXH_VECTOR XXH_NEON
+# elif defined(__AVX512F__)
+# define XXH_VECTOR XXH_AVX512
+# elif defined(__AVX2__)
+# define XXH_VECTOR XXH_AVX2
+# elif defined(__SSE2__) || defined(_M_AMD64) || defined(_M_X64) || (defined(_M_IX86_FP) && (_M_IX86_FP == 2))
+# define XXH_VECTOR XXH_SSE2
+# elif (defined(__PPC64__) && defined(__POWER8_VECTOR__)) \
+ || (defined(__s390x__) && defined(__VEC__)) \
+ && defined(__GNUC__) /* TODO: IBM XL */
+# define XXH_VECTOR XXH_VSX
+# else
+# define XXH_VECTOR XXH_SCALAR
+# endif
+#endif
+
+/* __ARM_FEATURE_SVE is only supported by GCC & Clang. */
+#if (XXH_VECTOR == XXH_SVE) && !defined(__ARM_FEATURE_SVE)
+# ifdef _MSC_VER
+# pragma warning(once : 4606)
+# else
+# warning "__ARM_FEATURE_SVE isn't supported. Use SCALAR instead."
+# endif
+# undef XXH_VECTOR
+# define XXH_VECTOR XXH_SCALAR
+#endif
+
+/*
+ * Controls the alignment of the accumulator,
+ * for compatibility with aligned vector loads, which are usually faster.
+ */
+#ifndef XXH_ACC_ALIGN
+# if defined(XXH_X86DISPATCH)
+# define XXH_ACC_ALIGN 64 /* for compatibility with avx512 */
+# elif XXH_VECTOR == XXH_SCALAR /* scalar */
+# define XXH_ACC_ALIGN 8
+# elif XXH_VECTOR == XXH_SSE2 /* sse2 */
+# define XXH_ACC_ALIGN 16
+# elif XXH_VECTOR == XXH_AVX2 /* avx2 */
+# define XXH_ACC_ALIGN 32
+# elif XXH_VECTOR == XXH_NEON /* neon */
+# define XXH_ACC_ALIGN 16
+# elif XXH_VECTOR == XXH_VSX /* vsx */
+# define XXH_ACC_ALIGN 16
+# elif XXH_VECTOR == XXH_AVX512 /* avx512 */
+# define XXH_ACC_ALIGN 64
+# elif XXH_VECTOR == XXH_SVE /* sve */
+# define XXH_ACC_ALIGN 64
+# endif
+#endif
+
+#if defined(XXH_X86DISPATCH) || XXH_VECTOR == XXH_SSE2 \
+ || XXH_VECTOR == XXH_AVX2 || XXH_VECTOR == XXH_AVX512
+# define XXH_SEC_ALIGN XXH_ACC_ALIGN
+#elif XXH_VECTOR == XXH_SVE
+# define XXH_SEC_ALIGN XXH_ACC_ALIGN
+#else
+# define XXH_SEC_ALIGN 8
+#endif
+
+#if defined(__GNUC__) || defined(__clang__)
+# define XXH_ALIASING __attribute__((may_alias))
+#else
+# define XXH_ALIASING /* nothing */
+#endif
+
+/*
+ * UGLY HACK:
+ * GCC usually generates the best code with -O3 for xxHash.
+ *
+ * However, when targeting AVX2, it is overzealous in its unrolling resulting
+ * in code roughly 3/4 the speed of Clang.
+ *
+ * There are other issues, such as GCC splitting _mm256_loadu_si256 into
+ * _mm_loadu_si128 + _mm256_inserti128_si256. This is an optimization which
+ * only applies to Sandy and Ivy Bridge... which don't even support AVX2.
+ *
+ * That is why when compiling the AVX2 version, it is recommended to use either
+ * -O2 -mavx2 -march=haswell
+ * or
+ * -O2 -mavx2 -mno-avx256-split-unaligned-load
+ * for decent performance, or to use Clang instead.
+ *
+ * Fortunately, we can control the first one with a pragma that forces GCC into
+ * -O2, but the other one we can't control without "failed to inline always
+ * inline function due to target mismatch" warnings.
+ */
+#if XXH_VECTOR == XXH_AVX2 /* AVX2 */ \
+ && defined(__GNUC__) && !defined(__clang__) /* GCC, not Clang */ \
+ && defined(__OPTIMIZE__) && XXH_SIZE_OPT <= 0 /* respect -O0 and -Os */
+# pragma GCC push_options
+# pragma GCC optimize("-O2")
+#endif
+
+#if XXH_VECTOR == XXH_NEON
+
+/*
+ * UGLY HACK: While AArch64 GCC on Linux does not seem to care, on macOS, GCC -O3
+ * optimizes out the entire hashLong loop because of the aliasing violation.
+ *
+ * However, GCC is also inefficient at load-store optimization with vld1q/vst1q,
+ * so the only option is to mark it as aliasing.
+ */
+typedef uint64x2_t xxh_aliasing_uint64x2_t XXH_ALIASING;
+
+/*!
+ * @internal
+ * @brief `vld1q_u64` but faster and alignment-safe.
+ *
+ * On AArch64, unaligned access is always safe, but on ARMv7-a, it is only
+ * *conditionally* safe (`vld1` has an alignment bit like `movdq[ua]` in x86).
+ *
+ * GCC for AArch64 sees `vld1q_u8` as an intrinsic instead of a load, so it
+ * prohibits load-store optimizations. Therefore, a direct dereference is used.
+ *
+ * Otherwise, `vld1q_u8` is used with `vreinterpretq_u8_u64` to do a safe
+ * unaligned load.
+ */
+#if defined(__aarch64__) && defined(__GNUC__) && !defined(__clang__)
+XXH_FORCE_INLINE uint64x2_t XXH_vld1q_u64(void const* ptr) /* silence -Wcast-align */
+{
+ return *(xxh_aliasing_uint64x2_t const *)ptr;
+}
+#else
+XXH_FORCE_INLINE uint64x2_t XXH_vld1q_u64(void const* ptr)
+{
+ return vreinterpretq_u64_u8(vld1q_u8((uint8_t const*)ptr));
+}
+#endif
+
+/*!
+ * @internal
+ * @brief `vmlal_u32` on low and high halves of a vector.
+ *
+ * This is a workaround for AArch64 GCC < 11 which implemented arm_neon.h with
+ * inline assembly and were therefore incapable of merging the `vget_{low, high}_u32`
+ * with `vmlal_u32`.
+ */
+#if defined(__aarch64__) && defined(__GNUC__) && !defined(__clang__) && __GNUC__ < 11
+XXH_FORCE_INLINE uint64x2_t
+XXH_vmlal_low_u32(uint64x2_t acc, uint32x4_t lhs, uint32x4_t rhs)
+{
+ /* Inline assembly is the only way */
+ __asm__("umlal %0.2d, %1.2s, %2.2s" : "+w" (acc) : "w" (lhs), "w" (rhs));
+ return acc;
+}
+XXH_FORCE_INLINE uint64x2_t
+XXH_vmlal_high_u32(uint64x2_t acc, uint32x4_t lhs, uint32x4_t rhs)
+{
+ /* This intrinsic works as expected */
+ return vmlal_high_u32(acc, lhs, rhs);
+}
+#else
+/* Portable intrinsic versions */
+XXH_FORCE_INLINE uint64x2_t
+XXH_vmlal_low_u32(uint64x2_t acc, uint32x4_t lhs, uint32x4_t rhs)
+{
+ return vmlal_u32(acc, vget_low_u32(lhs), vget_low_u32(rhs));
+}
+/*! @copydoc XXH_vmlal_low_u32
+ * Assume the compiler converts this to vmlal_high_u32 on aarch64 */
+XXH_FORCE_INLINE uint64x2_t
+XXH_vmlal_high_u32(uint64x2_t acc, uint32x4_t lhs, uint32x4_t rhs)
+{
+ return vmlal_u32(acc, vget_high_u32(lhs), vget_high_u32(rhs));
+}
+#endif
+
+/*!
+ * @ingroup tuning
+ * @brief Controls the NEON to scalar ratio for XXH3
+ *
+ * This can be set to 2, 4, 6, or 8.
+ *
+ * ARM Cortex CPUs are _very_ sensitive to how their pipelines are used.
+ *
+ * For example, the Cortex-A73 can dispatch 3 micro-ops per cycle, but only 2 of those
+ * can be NEON. If you are only using NEON instructions, you are only using 2/3 of the CPU
+ * bandwidth.
+ *
+ * This is even more noticeable on the more advanced cores like the Cortex-A76 which
+ * can dispatch 8 micro-ops per cycle, but still only 2 NEON micro-ops at once.
+ *
+ * Therefore, to make the most out of the pipeline, it is beneficial to run 6 NEON lanes
+ * and 2 scalar lanes, which is chosen by default.
+ *
+ * This does not apply to Apple processors or 32-bit processors, which run better with
+ * full NEON. These will default to 8. Additionally, size-optimized builds run 8 lanes.
+ *
+ * This change benefits CPUs with large micro-op buffers without negatively affecting
+ * most other CPUs:
+ *
+ * | Chipset | Dispatch type | NEON only | 6:2 hybrid | Diff. |
+ * |:----------------------|:--------------------|----------:|-----------:|------:|
+ * | Snapdragon 730 (A76) | 2 NEON/8 micro-ops | 8.8 GB/s | 10.1 GB/s | ~16% |
+ * | Snapdragon 835 (A73) | 2 NEON/3 micro-ops | 5.1 GB/s | 5.3 GB/s | ~5% |
+ * | Marvell PXA1928 (A53) | In-order dual-issue | 1.9 GB/s | 1.9 GB/s | 0% |
+ * | Apple M1 | 4 NEON/8 micro-ops | 37.3 GB/s | 36.1 GB/s | ~-3% |
+ *
+ * It also seems to fix some bad codegen on GCC, making it almost as fast as clang.
+ *
+ * When using WASM SIMD128, if this is 2 or 6, SIMDe will scalarize 2 of the lanes meaning
+ * it effectively becomes worse 4.
+ *
+ * @see XXH3_accumulate_512_neon()
+ */
+# ifndef XXH3_NEON_LANES
+# if (defined(__aarch64__) || defined(__arm64__) || defined(_M_ARM64) || defined(_M_ARM64EC)) \
+ && !defined(__APPLE__) && XXH_SIZE_OPT <= 0
+# define XXH3_NEON_LANES 6
+# else
+# define XXH3_NEON_LANES XXH_ACC_NB
+# endif
+# endif
+#endif /* XXH_VECTOR == XXH_NEON */
+
+/*
+ * VSX and Z Vector helpers.
+ *
+ * This is very messy, and any pull requests to clean this up are welcome.
+ *
+ * There are a lot of problems with supporting VSX and s390x, due to
+ * inconsistent intrinsics, spotty coverage, and multiple endiannesses.
+ */
+#if XXH_VECTOR == XXH_VSX
+/* Annoyingly, these headers _may_ define three macros: `bool`, `vector`,
+ * and `pixel`. This is a problem for obvious reasons.
+ *
+ * These keywords are unnecessary; the spec literally says they are
+ * equivalent to `__bool`, `__vector`, and `__pixel` and may be undef'd
+ * after including the header.
+ *
+ * We use pragma push_macro/pop_macro to keep the namespace clean. */
+# pragma push_macro("bool")
+# pragma push_macro("vector")
+# pragma push_macro("pixel")
+/* silence potential macro redefined warnings */
+# undef bool
+# undef vector
+# undef pixel
+
+# if defined(__s390x__)
+# include <s390intrin.h>
+# else
+# include <altivec.h>
+# endif
+
+/* Restore the original macro values, if applicable. */
+# pragma pop_macro("pixel")
+# pragma pop_macro("vector")
+# pragma pop_macro("bool")
+
+typedef __vector unsigned long long xxh_u64x2;
+typedef __vector unsigned char xxh_u8x16;
+typedef __vector unsigned xxh_u32x4;
+
+/*
+ * UGLY HACK: Similar to aarch64 macOS GCC, s390x GCC has the same aliasing issue.
+ */
+typedef xxh_u64x2 xxh_aliasing_u64x2 XXH_ALIASING;
+
+# ifndef XXH_VSX_BE
+# if defined(__BIG_ENDIAN__) \
+ || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
+# define XXH_VSX_BE 1
+# elif defined(__VEC_ELEMENT_REG_ORDER__) && __VEC_ELEMENT_REG_ORDER__ == __ORDER_BIG_ENDIAN__
+# warning "-maltivec=be is not recommended. Please use native endianness."
+# define XXH_VSX_BE 1
+# else
+# define XXH_VSX_BE 0
+# endif
+# endif /* !defined(XXH_VSX_BE) */
+
+# if XXH_VSX_BE
+# if defined(__POWER9_VECTOR__) || (defined(__clang__) && defined(__s390x__))
+# define XXH_vec_revb vec_revb
+# else
+/*!
+ * A polyfill for POWER9's vec_revb().
+ */
+XXH_FORCE_INLINE xxh_u64x2 XXH_vec_revb(xxh_u64x2 val)
+{
+ xxh_u8x16 const vByteSwap = { 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00,
+ 0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A, 0x09, 0x08 };
+ return vec_perm(val, val, vByteSwap);
+}
+# endif
+# endif /* XXH_VSX_BE */
+
+/*!
+ * Performs an unaligned vector load and byte swaps it on big endian.
+ */
+XXH_FORCE_INLINE xxh_u64x2 XXH_vec_loadu(const void *ptr)
+{
+ xxh_u64x2 ret;
+ XXH_memcpy(&ret, ptr, sizeof(xxh_u64x2));
+# if XXH_VSX_BE
+ ret = XXH_vec_revb(ret);
+# endif
+ return ret;
+}
+
+/*
+ * vec_mulo and vec_mule are very problematic intrinsics on PowerPC
+ *
+ * These intrinsics weren't added until GCC 8, despite existing for a while,
+ * and they are endian dependent. Also, their meaning swap depending on version.
+ * */
+# if defined(__s390x__)
+ /* s390x is always big endian, no issue on this platform */
+# define XXH_vec_mulo vec_mulo
+# define XXH_vec_mule vec_mule
+# elif defined(__clang__) && XXH_HAS_BUILTIN(__builtin_altivec_vmuleuw) && !defined(__ibmxl__)
+/* Clang has a better way to control this, we can just use the builtin which doesn't swap. */
+ /* The IBM XL Compiler (which defined __clang__) only implements the vec_* operations */
+# define XXH_vec_mulo __builtin_altivec_vmulouw
+# define XXH_vec_mule __builtin_altivec_vmuleuw
+# else
+/* gcc needs inline assembly */
+/* Adapted from https://github.com/google/highwayhash/blob/master/highwayhash/hh_vsx.h. */
+XXH_FORCE_INLINE xxh_u64x2 XXH_vec_mulo(xxh_u32x4 a, xxh_u32x4 b)
+{
+ xxh_u64x2 result;
+ __asm__("vmulouw %0, %1, %2" : "=v" (result) : "v" (a), "v" (b));
+ return result;
+}
+XXH_FORCE_INLINE xxh_u64x2 XXH_vec_mule(xxh_u32x4 a, xxh_u32x4 b)
+{
+ xxh_u64x2 result;
+ __asm__("vmuleuw %0, %1, %2" : "=v" (result) : "v" (a), "v" (b));
+ return result;
+}
+# endif /* XXH_vec_mulo, XXH_vec_mule */
+#endif /* XXH_VECTOR == XXH_VSX */
+
+#if XXH_VECTOR == XXH_SVE
+#define ACCRND(acc, offset) \
+do { \
+ svuint64_t input_vec = svld1_u64(mask, xinput + offset); \
+ svuint64_t secret_vec = svld1_u64(mask, xsecret + offset); \
+ svuint64_t mixed = sveor_u64_x(mask, secret_vec, input_vec); \
+ svuint64_t swapped = svtbl_u64(input_vec, kSwap); \
+ svuint64_t mixed_lo = svextw_u64_x(mask, mixed); \
+ svuint64_t mixed_hi = svlsr_n_u64_x(mask, mixed, 32); \
+ svuint64_t mul = svmad_u64_x(mask, mixed_lo, mixed_hi, swapped); \
+ acc = svadd_u64_x(mask, acc, mul); \
+} while (0)
+#endif /* XXH_VECTOR == XXH_SVE */
+
+/* prefetch
+ * can be disabled, by declaring XXH_NO_PREFETCH build macro */
+#if defined(XXH_NO_PREFETCH)
+# define XXH_PREFETCH(ptr) (void)(ptr) /* disabled */
+#else
+# if XXH_SIZE_OPT >= 1
+# define XXH_PREFETCH(ptr) (void)(ptr)
+# elif defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86)) /* _mm_prefetch() not defined outside of x86/x64 */
+# include <mmintrin.h> /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */
+# define XXH_PREFETCH(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T0)
+# elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) )
+# define XXH_PREFETCH(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */)
+# else
+# define XXH_PREFETCH(ptr) (void)(ptr) /* disabled */
+# endif
+#endif /* XXH_NO_PREFETCH */
+
+
+/* ==========================================
+ * XXH3 default settings
+ * ========================================== */
+
+#define XXH_SECRET_DEFAULT_SIZE 192 /* minimum XXH3_SECRET_SIZE_MIN */
+
+#if (XXH_SECRET_DEFAULT_SIZE < XXH3_SECRET_SIZE_MIN)
+# error "default keyset is not large enough"
+#endif
+
+/*! Pseudorandom secret taken directly from FARSH. */
+XXH_ALIGN(64) static const xxh_u8 XXH3_kSecret[XXH_SECRET_DEFAULT_SIZE] = {
+ 0xb8, 0xfe, 0x6c, 0x39, 0x23, 0xa4, 0x4b, 0xbe, 0x7c, 0x01, 0x81, 0x2c, 0xf7, 0x21, 0xad, 0x1c,
+ 0xde, 0xd4, 0x6d, 0xe9, 0x83, 0x90, 0x97, 0xdb, 0x72, 0x40, 0xa4, 0xa4, 0xb7, 0xb3, 0x67, 0x1f,
+ 0xcb, 0x79, 0xe6, 0x4e, 0xcc, 0xc0, 0xe5, 0x78, 0x82, 0x5a, 0xd0, 0x7d, 0xcc, 0xff, 0x72, 0x21,
+ 0xb8, 0x08, 0x46, 0x74, 0xf7, 0x43, 0x24, 0x8e, 0xe0, 0x35, 0x90, 0xe6, 0x81, 0x3a, 0x26, 0x4c,
+ 0x3c, 0x28, 0x52, 0xbb, 0x91, 0xc3, 0x00, 0xcb, 0x88, 0xd0, 0x65, 0x8b, 0x1b, 0x53, 0x2e, 0xa3,
+ 0x71, 0x64, 0x48, 0x97, 0xa2, 0x0d, 0xf9, 0x4e, 0x38, 0x19, 0xef, 0x46, 0xa9, 0xde, 0xac, 0xd8,
+ 0xa8, 0xfa, 0x76, 0x3f, 0xe3, 0x9c, 0x34, 0x3f, 0xf9, 0xdc, 0xbb, 0xc7, 0xc7, 0x0b, 0x4f, 0x1d,
+ 0x8a, 0x51, 0xe0, 0x4b, 0xcd, 0xb4, 0x59, 0x31, 0xc8, 0x9f, 0x7e, 0xc9, 0xd9, 0x78, 0x73, 0x64,
+ 0xea, 0xc5, 0xac, 0x83, 0x34, 0xd3, 0xeb, 0xc3, 0xc5, 0x81, 0xa0, 0xff, 0xfa, 0x13, 0x63, 0xeb,
+ 0x17, 0x0d, 0xdd, 0x51, 0xb7, 0xf0, 0xda, 0x49, 0xd3, 0x16, 0x55, 0x26, 0x29, 0xd4, 0x68, 0x9e,
+ 0x2b, 0x16, 0xbe, 0x58, 0x7d, 0x47, 0xa1, 0xfc, 0x8f, 0xf8, 0xb8, 0xd1, 0x7a, 0xd0, 0x31, 0xce,
+ 0x45, 0xcb, 0x3a, 0x8f, 0x95, 0x16, 0x04, 0x28, 0xaf, 0xd7, 0xfb, 0xca, 0xbb, 0x4b, 0x40, 0x7e,
+};
+
+static const xxh_u64 PRIME_MX1 = 0x165667919E3779F9ULL; /*!< 0b0001011001010110011001111001000110011110001101110111100111111001 */
+static const xxh_u64 PRIME_MX2 = 0x9FB21C651E98DF25ULL; /*!< 0b1001111110110010000111000110010100011110100110001101111100100101 */
+
+#ifdef XXH_OLD_NAMES
+# define kSecret XXH3_kSecret
+#endif
+
+#ifdef XXH_DOXYGEN
+/*!
+ * @brief Calculates a 32-bit to 64-bit long multiply.
+ *
+ * Implemented as a macro.
+ *
+ * Wraps `__emulu` on MSVC x86 because it tends to call `__allmul` when it doesn't
+ * need to (but it shouldn't need to anyways, it is about 7 instructions to do
+ * a 64x64 multiply...). Since we know that this will _always_ emit `MULL`, we
+ * use that instead of the normal method.
+ *
+ * If you are compiling for platforms like Thumb-1 and don't have a better option,
+ * you may also want to write your own long multiply routine here.
+ *
+ * @param x, y Numbers to be multiplied
+ * @return 64-bit product of the low 32 bits of @p x and @p y.
+ */
+XXH_FORCE_INLINE xxh_u64
+XXH_mult32to64(xxh_u64 x, xxh_u64 y)
+{
+ return (x & 0xFFFFFFFF) * (y & 0xFFFFFFFF);
+}
+#elif defined(_MSC_VER) && defined(_M_IX86)
+# define XXH_mult32to64(x, y) __emulu((unsigned)(x), (unsigned)(y))
+#else
+/*
+ * Downcast + upcast is usually better than masking on older compilers like
+ * GCC 4.2 (especially 32-bit ones), all without affecting newer compilers.
+ *
+ * The other method, (x & 0xFFFFFFFF) * (y & 0xFFFFFFFF), will AND both operands
+ * and perform a full 64x64 multiply -- entirely redundant on 32-bit.
+ */
+# define XXH_mult32to64(x, y) ((xxh_u64)(xxh_u32)(x) * (xxh_u64)(xxh_u32)(y))
+#endif
+
+/*!
+ * @brief Calculates a 64->128-bit long multiply.
+ *
+ * Uses `__uint128_t` and `_umul128` if available, otherwise uses a scalar
+ * version.
+ *
+ * @param lhs , rhs The 64-bit integers to be multiplied
+ * @return The 128-bit result represented in an @ref XXH128_hash_t.
+ */
+static XXH128_hash_t
+XXH_mult64to128(xxh_u64 lhs, xxh_u64 rhs)
+{
+ /*
+ * GCC/Clang __uint128_t method.
+ *
+ * On most 64-bit targets, GCC and Clang define a __uint128_t type.
+ * This is usually the best way as it usually uses a native long 64-bit
+ * multiply, such as MULQ on x86_64 or MUL + UMULH on aarch64.
+ *
+ * Usually.
+ *
+ * Despite being a 32-bit platform, Clang (and emscripten) define this type
+ * despite not having the arithmetic for it. This results in a laggy
+ * compiler builtin call which calculates a full 128-bit multiply.
+ * In that case it is best to use the portable one.
+ * https://github.com/Cyan4973/xxHash/issues/211#issuecomment-515575677
+ */
+#if (defined(__GNUC__) || defined(__clang__)) && !defined(__wasm__) \
+ && defined(__SIZEOF_INT128__) \
+ || (defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128)
+
+ __uint128_t const product = (__uint128_t)lhs * (__uint128_t)rhs;
+ XXH128_hash_t r128;
+ r128.low64 = (xxh_u64)(product);
+ r128.high64 = (xxh_u64)(product >> 64);
+ return r128;
+
+ /*
+ * MSVC for x64's _umul128 method.
+ *
+ * xxh_u64 _umul128(xxh_u64 Multiplier, xxh_u64 Multiplicand, xxh_u64 *HighProduct);
+ *
+ * This compiles to single operand MUL on x64.
+ */
+#elif (defined(_M_X64) || defined(_M_IA64)) && !defined(_M_ARM64EC)
+
+#ifndef _MSC_VER
+# pragma intrinsic(_umul128)
+#endif
+ xxh_u64 product_high;
+ xxh_u64 const product_low = _umul128(lhs, rhs, &product_high);
+ XXH128_hash_t r128;
+ r128.low64 = product_low;
+ r128.high64 = product_high;
+ return r128;
+
+ /*
+ * MSVC for ARM64's __umulh method.
+ *
+ * This compiles to the same MUL + UMULH as GCC/Clang's __uint128_t method.
+ */
+#elif defined(_M_ARM64) || defined(_M_ARM64EC)
+
+#ifndef _MSC_VER
+# pragma intrinsic(__umulh)
+#endif
+ XXH128_hash_t r128;
+ r128.low64 = lhs * rhs;
+ r128.high64 = __umulh(lhs, rhs);
+ return r128;
+
+#else
+ /*
+ * Portable scalar method. Optimized for 32-bit and 64-bit ALUs.
+ *
+ * This is a fast and simple grade school multiply, which is shown below
+ * with base 10 arithmetic instead of base 0x100000000.
+ *
+ * 9 3 // D2 lhs = 93
+ * x 7 5 // D2 rhs = 75
+ * ----------
+ * 1 5 // D2 lo_lo = (93 % 10) * (75 % 10) = 15
+ * 4 5 | // D2 hi_lo = (93 / 10) * (75 % 10) = 45
+ * 2 1 | // D2 lo_hi = (93 % 10) * (75 / 10) = 21
+ * + 6 3 | | // D2 hi_hi = (93 / 10) * (75 / 10) = 63
+ * ---------
+ * 2 7 | // D2 cross = (15 / 10) + (45 % 10) + 21 = 27
+ * + 6 7 | | // D2 upper = (27 / 10) + (45 / 10) + 63 = 67
+ * ---------
+ * 6 9 7 5 // D4 res = (27 * 10) + (15 % 10) + (67 * 100) = 6975
+ *
+ * The reasons for adding the products like this are:
+ * 1. It avoids manual carry tracking. Just like how
+ * (9 * 9) + 9 + 9 = 99, the same applies with this for UINT64_MAX.
+ * This avoids a lot of complexity.
+ *
+ * 2. It hints for, and on Clang, compiles to, the powerful UMAAL
+ * instruction available in ARM's Digital Signal Processing extension
+ * in 32-bit ARMv6 and later, which is shown below:
+ *
+ * void UMAAL(xxh_u32 *RdLo, xxh_u32 *RdHi, xxh_u32 Rn, xxh_u32 Rm)
+ * {
+ * xxh_u64 product = (xxh_u64)*RdLo * (xxh_u64)*RdHi + Rn + Rm;
+ * *RdLo = (xxh_u32)(product & 0xFFFFFFFF);
+ * *RdHi = (xxh_u32)(product >> 32);
+ * }
+ *
+ * This instruction was designed for efficient long multiplication, and
+ * allows this to be calculated in only 4 instructions at speeds
+ * comparable to some 64-bit ALUs.
+ *
+ * 3. It isn't terrible on other platforms. Usually this will be a couple
+ * of 32-bit ADD/ADCs.
+ */
+
+ /* First calculate all of the cross products. */
+ xxh_u64 const lo_lo = XXH_mult32to64(lhs & 0xFFFFFFFF, rhs & 0xFFFFFFFF);
+ xxh_u64 const hi_lo = XXH_mult32to64(lhs >> 32, rhs & 0xFFFFFFFF);
+ xxh_u64 const lo_hi = XXH_mult32to64(lhs & 0xFFFFFFFF, rhs >> 32);
+ xxh_u64 const hi_hi = XXH_mult32to64(lhs >> 32, rhs >> 32);
+
+ /* Now add the products together. These will never overflow. */
+ xxh_u64 const cross = (lo_lo >> 32) + (hi_lo & 0xFFFFFFFF) + lo_hi;
+ xxh_u64 const upper = (hi_lo >> 32) + (cross >> 32) + hi_hi;
+ xxh_u64 const lower = (cross << 32) | (lo_lo & 0xFFFFFFFF);
+
+ XXH128_hash_t r128;
+ r128.low64 = lower;
+ r128.high64 = upper;
+ return r128;
+#endif
+}
+
+/*!
+ * @brief Calculates a 64-bit to 128-bit multiply, then XOR folds it.
+ *
+ * The reason for the separate function is to prevent passing too many structs
+ * around by value. This will hopefully inline the multiply, but we don't force it.
+ *
+ * @param lhs , rhs The 64-bit integers to multiply
+ * @return The low 64 bits of the product XOR'd by the high 64 bits.
+ * @see XXH_mult64to128()
+ */
+static xxh_u64
+XXH3_mul128_fold64(xxh_u64 lhs, xxh_u64 rhs)
+{
+ XXH128_hash_t product = XXH_mult64to128(lhs, rhs);
+ return product.low64 ^ product.high64;
+}
+
+/*! Seems to produce slightly better code on GCC for some reason. */
+XXH_FORCE_INLINE XXH_CONSTF xxh_u64 XXH_xorshift64(xxh_u64 v64, int shift)
+{
+ XXH_ASSERT(0 <= shift && shift < 64);
+ return v64 ^ (v64 >> shift);
+}
+
+/*
+ * This is a fast avalanche stage,
+ * suitable when input bits are already partially mixed
+ */
+static XXH64_hash_t XXH3_avalanche(xxh_u64 h64)
+{
+ h64 = XXH_xorshift64(h64, 37);
+ h64 *= PRIME_MX1;
+ h64 = XXH_xorshift64(h64, 32);
+ return h64;
+}
+
+/*
+ * This is a stronger avalanche,
+ * inspired by Pelle Evensen's rrmxmx
+ * preferable when input has not been previously mixed
+ */
+static XXH64_hash_t XXH3_rrmxmx(xxh_u64 h64, xxh_u64 len)
+{
+ /* this mix is inspired by Pelle Evensen's rrmxmx */
+ h64 ^= XXH_rotl64(h64, 49) ^ XXH_rotl64(h64, 24);
+ h64 *= PRIME_MX2;
+ h64 ^= (h64 >> 35) + len ;
+ h64 *= PRIME_MX2;
+ return XXH_xorshift64(h64, 28);
+}
+
+
+/* ==========================================
+ * Short keys
+ * ==========================================
+ * One of the shortcomings of XXH32 and XXH64 was that their performance was
+ * sub-optimal on short lengths. It used an iterative algorithm which strongly
+ * favored lengths that were a multiple of 4 or 8.
+ *
+ * Instead of iterating over individual inputs, we use a set of single shot
+ * functions which piece together a range of lengths and operate in constant time.
+ *
+ * Additionally, the number of multiplies has been significantly reduced. This
+ * reduces latency, especially when emulating 64-bit multiplies on 32-bit.
+ *
+ * Depending on the platform, this may or may not be faster than XXH32, but it
+ * is almost guaranteed to be faster than XXH64.
+ */
+
+/*
+ * At very short lengths, there isn't enough input to fully hide secrets, or use
+ * the entire secret.
+ *
+ * There is also only a limited amount of mixing we can do before significantly
+ * impacting performance.
+ *
+ * Therefore, we use different sections of the secret and always mix two secret
+ * samples with an XOR. This should have no effect on performance on the
+ * seedless or withSeed variants because everything _should_ be constant folded
+ * by modern compilers.
+ *
+ * The XOR mixing hides individual parts of the secret and increases entropy.
+ *
+ * This adds an extra layer of strength for custom secrets.
+ */
+XXH_FORCE_INLINE XXH_PUREF XXH64_hash_t
+XXH3_len_1to3_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
+{
+ XXH_ASSERT(input != NULL);
+ XXH_ASSERT(1 <= len && len <= 3);
+ XXH_ASSERT(secret != NULL);
+ /*
+ * len = 1: combined = { input[0], 0x01, input[0], input[0] }
+ * len = 2: combined = { input[1], 0x02, input[0], input[1] }
+ * len = 3: combined = { input[2], 0x03, input[0], input[1] }
+ */
+ { xxh_u8 const c1 = input[0];
+ xxh_u8 const c2 = input[len >> 1];
+ xxh_u8 const c3 = input[len - 1];
+ xxh_u32 const combined = ((xxh_u32)c1 << 16) | ((xxh_u32)c2 << 24)
+ | ((xxh_u32)c3 << 0) | ((xxh_u32)len << 8);
+ xxh_u64 const bitflip = (XXH_readLE32(secret) ^ XXH_readLE32(secret+4)) + seed;
+ xxh_u64 const keyed = (xxh_u64)combined ^ bitflip;
+ return XXH64_avalanche(keyed);
+ }
+}
+
+XXH_FORCE_INLINE XXH_PUREF XXH64_hash_t
+XXH3_len_4to8_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
+{
+ XXH_ASSERT(input != NULL);
+ XXH_ASSERT(secret != NULL);
+ XXH_ASSERT(4 <= len && len <= 8);
+ seed ^= (xxh_u64)XXH_swap32((xxh_u32)seed) << 32;
+ { xxh_u32 const input1 = XXH_readLE32(input);
+ xxh_u32 const input2 = XXH_readLE32(input + len - 4);
+ xxh_u64 const bitflip = (XXH_readLE64(secret+8) ^ XXH_readLE64(secret+16)) - seed;
+ xxh_u64 const input64 = input2 + (((xxh_u64)input1) << 32);
+ xxh_u64 const keyed = input64 ^ bitflip;
+ return XXH3_rrmxmx(keyed, len);
+ }
+}
+
+XXH_FORCE_INLINE XXH_PUREF XXH64_hash_t
+XXH3_len_9to16_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
+{
+ XXH_ASSERT(input != NULL);
+ XXH_ASSERT(secret != NULL);
+ XXH_ASSERT(9 <= len && len <= 16);
+ { xxh_u64 const bitflip1 = (XXH_readLE64(secret+24) ^ XXH_readLE64(secret+32)) + seed;
+ xxh_u64 const bitflip2 = (XXH_readLE64(secret+40) ^ XXH_readLE64(secret+48)) - seed;
+ xxh_u64 const input_lo = XXH_readLE64(input) ^ bitflip1;
+ xxh_u64 const input_hi = XXH_readLE64(input + len - 8) ^ bitflip2;
+ xxh_u64 const acc = len
+ + XXH_swap64(input_lo) + input_hi
+ + XXH3_mul128_fold64(input_lo, input_hi);
+ return XXH3_avalanche(acc);
+ }
+}
+
+XXH_FORCE_INLINE XXH_PUREF XXH64_hash_t
+XXH3_len_0to16_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
+{
+ XXH_ASSERT(len <= 16);
+ { if (XXH_likely(len > 8)) return XXH3_len_9to16_64b(input, len, secret, seed);
+ if (XXH_likely(len >= 4)) return XXH3_len_4to8_64b(input, len, secret, seed);
+ if (len) return XXH3_len_1to3_64b(input, len, secret, seed);
+ return XXH64_avalanche(seed ^ (XXH_readLE64(secret+56) ^ XXH_readLE64(secret+64)));
+ }
+}
+
+/*
+ * DISCLAIMER: There are known *seed-dependent* multicollisions here due to
+ * multiplication by zero, affecting hashes of lengths 17 to 240.
+ *
+ * However, they are very unlikely.
+ *
+ * Keep this in mind when using the unseeded XXH3_64bits() variant: As with all
+ * unseeded non-cryptographic hashes, it does not attempt to defend itself
+ * against specially crafted inputs, only random inputs.
+ *
+ * Compared to classic UMAC where a 1 in 2^31 chance of 4 consecutive bytes
+ * cancelling out the secret is taken an arbitrary number of times (addressed
+ * in XXH3_accumulate_512), this collision is very unlikely with random inputs
+ * and/or proper seeding:
+ *
+ * This only has a 1 in 2^63 chance of 8 consecutive bytes cancelling out, in a
+ * function that is only called up to 16 times per hash with up to 240 bytes of
+ * input.
+ *
+ * This is not too bad for a non-cryptographic hash function, especially with
+ * only 64 bit outputs.
+ *
+ * The 128-bit variant (which trades some speed for strength) is NOT affected
+ * by this, although it is always a good idea to use a proper seed if you care
+ * about strength.
+ */
+XXH_FORCE_INLINE xxh_u64 XXH3_mix16B(const xxh_u8* XXH_RESTRICT input,
+ const xxh_u8* XXH_RESTRICT secret, xxh_u64 seed64)
+{
+#if defined(__GNUC__) && !defined(__clang__) /* GCC, not Clang */ \
+ && defined(__i386__) && defined(__SSE2__) /* x86 + SSE2 */ \
+ && !defined(XXH_ENABLE_AUTOVECTORIZE) /* Define to disable like XXH32 hack */
+ /*
+ * UGLY HACK:
+ * GCC for x86 tends to autovectorize the 128-bit multiply, resulting in
+ * slower code.
+ *
+ * By forcing seed64 into a register, we disrupt the cost model and
+ * cause it to scalarize. See `XXH32_round()`
+ *
+ * FIXME: Clang's output is still _much_ faster -- On an AMD Ryzen 3600,
+ * XXH3_64bits @ len=240 runs at 4.6 GB/s with Clang 9, but 3.3 GB/s on
+ * GCC 9.2, despite both emitting scalar code.
+ *
+ * GCC generates much better scalar code than Clang for the rest of XXH3,
+ * which is why finding a more optimal codepath is an interest.
+ */
+ XXH_COMPILER_GUARD(seed64);
+#endif
+ { xxh_u64 const input_lo = XXH_readLE64(input);
+ xxh_u64 const input_hi = XXH_readLE64(input+8);
+ return XXH3_mul128_fold64(
+ input_lo ^ (XXH_readLE64(secret) + seed64),
+ input_hi ^ (XXH_readLE64(secret+8) - seed64)
+ );
+ }
+}
+
+/* For mid range keys, XXH3 uses a Mum-hash variant. */
+XXH_FORCE_INLINE XXH_PUREF XXH64_hash_t
+XXH3_len_17to128_64b(const xxh_u8* XXH_RESTRICT input, size_t len,
+ const xxh_u8* XXH_RESTRICT secret, size_t secretSize,
+ XXH64_hash_t seed)
+{
+ XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); (void)secretSize;
+ XXH_ASSERT(16 < len && len <= 128);
+
+ { xxh_u64 acc = len * XXH_PRIME64_1;
+#if XXH_SIZE_OPT >= 1
+ /* Smaller and cleaner, but slightly slower. */
+ unsigned int i = (unsigned int)(len - 1) / 32;
+ do {
+ acc += XXH3_mix16B(input+16 * i, secret+32*i, seed);
+ acc += XXH3_mix16B(input+len-16*(i+1), secret+32*i+16, seed);
+ } while (i-- != 0);
+#else
+ if (len > 32) {
+ if (len > 64) {
+ if (len > 96) {
+ acc += XXH3_mix16B(input+48, secret+96, seed);
+ acc += XXH3_mix16B(input+len-64, secret+112, seed);
+ }
+ acc += XXH3_mix16B(input+32, secret+64, seed);
+ acc += XXH3_mix16B(input+len-48, secret+80, seed);
+ }
+ acc += XXH3_mix16B(input+16, secret+32, seed);
+ acc += XXH3_mix16B(input+len-32, secret+48, seed);
+ }
+ acc += XXH3_mix16B(input+0, secret+0, seed);
+ acc += XXH3_mix16B(input+len-16, secret+16, seed);
+#endif
+ return XXH3_avalanche(acc);
+ }
+}
+
+#define XXH3_MIDSIZE_MAX 240
+
+XXH_NO_INLINE XXH_PUREF XXH64_hash_t
+XXH3_len_129to240_64b(const xxh_u8* XXH_RESTRICT input, size_t len,
+ const xxh_u8* XXH_RESTRICT secret, size_t secretSize,
+ XXH64_hash_t seed)
+{
+ XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); (void)secretSize;
+ XXH_ASSERT(128 < len && len <= XXH3_MIDSIZE_MAX);
+
+ #define XXH3_MIDSIZE_STARTOFFSET 3
+ #define XXH3_MIDSIZE_LASTOFFSET 17
+
+ { xxh_u64 acc = len * XXH_PRIME64_1;
+ xxh_u64 acc_end;
+ unsigned int const nbRounds = (unsigned int)len / 16;
+ unsigned int i;
+ XXH_ASSERT(128 < len && len <= XXH3_MIDSIZE_MAX);
+ for (i=0; i<8; i++) {
+ acc += XXH3_mix16B(input+(16*i), secret+(16*i), seed);
+ }
+ /* last bytes */
+ acc_end = XXH3_mix16B(input + len - 16, secret + XXH3_SECRET_SIZE_MIN - XXH3_MIDSIZE_LASTOFFSET, seed);
+ XXH_ASSERT(nbRounds >= 8);
+ acc = XXH3_avalanche(acc);
+#if defined(__clang__) /* Clang */ \
+ && (defined(__ARM_NEON) || defined(__ARM_NEON__)) /* NEON */ \
+ && !defined(XXH_ENABLE_AUTOVECTORIZE) /* Define to disable */
+ /*
+ * UGLY HACK:
+ * Clang for ARMv7-A tries to vectorize this loop, similar to GCC x86.
+ * In everywhere else, it uses scalar code.
+ *
+ * For 64->128-bit multiplies, even if the NEON was 100% optimal, it
+ * would still be slower than UMAAL (see XXH_mult64to128).
+ *
+ * Unfortunately, Clang doesn't handle the long multiplies properly and
+ * converts them to the nonexistent "vmulq_u64" intrinsic, which is then
+ * scalarized into an ugly mess of VMOV.32 instructions.
+ *
+ * This mess is difficult to avoid without turning autovectorization
+ * off completely, but they are usually relatively minor and/or not
+ * worth it to fix.
+ *
+ * This loop is the easiest to fix, as unlike XXH32, this pragma
+ * _actually works_ because it is a loop vectorization instead of an
+ * SLP vectorization.
+ */
+ #pragma clang loop vectorize(disable)
+#endif
+ for (i=8 ; i < nbRounds; i++) {
+ /*
+ * Prevents clang for unrolling the acc loop and interleaving with this one.
+ */
+ XXH_COMPILER_GUARD(acc);
+ acc_end += XXH3_mix16B(input+(16*i), secret+(16*(i-8)) + XXH3_MIDSIZE_STARTOFFSET, seed);
+ }
+ return XXH3_avalanche(acc + acc_end);
+ }
+}
+
+
+/* ======= Long Keys ======= */
+
+#define XXH_STRIPE_LEN 64
+#define XXH_SECRET_CONSUME_RATE 8 /* nb of secret bytes consumed at each accumulation */
+#define XXH_ACC_NB (XXH_STRIPE_LEN / sizeof(xxh_u64))
+
+#ifdef XXH_OLD_NAMES
+# define STRIPE_LEN XXH_STRIPE_LEN
+# define ACC_NB XXH_ACC_NB
+#endif
+
+#ifndef XXH_PREFETCH_DIST
+# ifdef __clang__
+# define XXH_PREFETCH_DIST 320
+# else
+# if (XXH_VECTOR == XXH_AVX512)
+# define XXH_PREFETCH_DIST 512
+# else
+# define XXH_PREFETCH_DIST 384
+# endif
+# endif /* __clang__ */
+#endif /* XXH_PREFETCH_DIST */
+
+/*
+ * These macros are to generate an XXH3_accumulate() function.
+ * The two arguments select the name suffix and target attribute.
+ *
+ * The name of this symbol is XXH3_accumulate_<name>() and it calls
+ * XXH3_accumulate_512_<name>().
+ *
+ * It may be useful to hand implement this function if the compiler fails to
+ * optimize the inline function.
+ */
+#define XXH3_ACCUMULATE_TEMPLATE(name) \
+void \
+XXH3_accumulate_##name(xxh_u64* XXH_RESTRICT acc, \
+ const xxh_u8* XXH_RESTRICT input, \
+ const xxh_u8* XXH_RESTRICT secret, \
+ size_t nbStripes) \
+{ \
+ size_t n; \
+ for (n = 0; n < nbStripes; n++ ) { \
+ const xxh_u8* const in = input + n*XXH_STRIPE_LEN; \
+ XXH_PREFETCH(in + XXH_PREFETCH_DIST); \
+ XXH3_accumulate_512_##name( \
+ acc, \
+ in, \
+ secret + n*XXH_SECRET_CONSUME_RATE); \
+ } \
+}
+
+
+XXH_FORCE_INLINE void XXH_writeLE64(void* dst, xxh_u64 v64)
+{
+ if (!XXH_CPU_LITTLE_ENDIAN) v64 = XXH_swap64(v64);
+ XXH_memcpy(dst, &v64, sizeof(v64));
+}
+
+/* Several intrinsic functions below are supposed to accept __int64 as argument,
+ * as documented in https://software.intel.com/sites/landingpage/IntrinsicsGuide/ .
+ * However, several environments do not define __int64 type,
+ * requiring a workaround.
+ */
+#if !defined (__VMS) \
+ && (defined (__cplusplus) \
+ || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
+ typedef int64_t xxh_i64;
+#else
+ /* the following type must have a width of 64-bit */
+ typedef long long xxh_i64;
+#endif
+
+
+/*
+ * XXH3_accumulate_512 is the tightest loop for long inputs, and it is the most optimized.
+ *
+ * It is a hardened version of UMAC, based off of FARSH's implementation.
+ *
+ * This was chosen because it adapts quite well to 32-bit, 64-bit, and SIMD
+ * implementations, and it is ridiculously fast.
+ *
+ * We harden it by mixing the original input to the accumulators as well as the product.
+ *
+ * This means that in the (relatively likely) case of a multiply by zero, the
+ * original input is preserved.
+ *
+ * On 128-bit inputs, we swap 64-bit pairs when we add the input to improve
+ * cross-pollination, as otherwise the upper and lower halves would be
+ * essentially independent.
+ *
+ * This doesn't matter on 64-bit hashes since they all get merged together in
+ * the end, so we skip the extra step.
+ *
+ * Both XXH3_64bits and XXH3_128bits use this subroutine.
+ */
+
+#if (XXH_VECTOR == XXH_AVX512) \
+ || (defined(XXH_DISPATCH_AVX512) && XXH_DISPATCH_AVX512 != 0)
+
+#ifndef XXH_TARGET_AVX512
+# define XXH_TARGET_AVX512 /* disable attribute target */
+#endif
+
+XXH_FORCE_INLINE XXH_TARGET_AVX512 void
+XXH3_accumulate_512_avx512(void* XXH_RESTRICT acc,
+ const void* XXH_RESTRICT input,
+ const void* XXH_RESTRICT secret)
+{
+ __m512i* const xacc = (__m512i *) acc;
+ XXH_ASSERT((((size_t)acc) & 63) == 0);
+ XXH_STATIC_ASSERT(XXH_STRIPE_LEN == sizeof(__m512i));
+
+ {
+ /* data_vec = input[0]; */
+ __m512i const data_vec = _mm512_loadu_si512 (input);
+ /* key_vec = secret[0]; */
+ __m512i const key_vec = _mm512_loadu_si512 (secret);
+ /* data_key = data_vec ^ key_vec; */
+ __m512i const data_key = _mm512_xor_si512 (data_vec, key_vec);
+ /* data_key_lo = data_key >> 32; */
+ __m512i const data_key_lo = _mm512_srli_epi64 (data_key, 32);
+ /* product = (data_key & 0xffffffff) * (data_key_lo & 0xffffffff); */
+ __m512i const product = _mm512_mul_epu32 (data_key, data_key_lo);
+ /* xacc[0] += swap(data_vec); */
+ __m512i const data_swap = _mm512_shuffle_epi32(data_vec, (_MM_PERM_ENUM)_MM_SHUFFLE(1, 0, 3, 2));
+ __m512i const sum = _mm512_add_epi64(*xacc, data_swap);
+ /* xacc[0] += product; */
+ *xacc = _mm512_add_epi64(product, sum);
+ }
+}
+XXH_FORCE_INLINE XXH_TARGET_AVX512 XXH3_ACCUMULATE_TEMPLATE(avx512)
+
+/*
+ * XXH3_scrambleAcc: Scrambles the accumulators to improve mixing.
+ *
+ * Multiplication isn't perfect, as explained by Google in HighwayHash:
+ *
+ * // Multiplication mixes/scrambles bytes 0-7 of the 64-bit result to
+ * // varying degrees. In descending order of goodness, bytes
+ * // 3 4 2 5 1 6 0 7 have quality 228 224 164 160 100 96 36 32.
+ * // As expected, the upper and lower bytes are much worse.
+ *
+ * Source: https://github.com/google/highwayhash/blob/0aaf66b/highwayhash/hh_avx2.h#L291
+ *
+ * Since our algorithm uses a pseudorandom secret to add some variance into the
+ * mix, we don't need to (or want to) mix as often or as much as HighwayHash does.
+ *
+ * This isn't as tight as XXH3_accumulate, but still written in SIMD to avoid
+ * extraction.
+ *
+ * Both XXH3_64bits and XXH3_128bits use this subroutine.
+ */
+
+XXH_FORCE_INLINE XXH_TARGET_AVX512 void
+XXH3_scrambleAcc_avx512(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
+{
+ XXH_ASSERT((((size_t)acc) & 63) == 0);
+ XXH_STATIC_ASSERT(XXH_STRIPE_LEN == sizeof(__m512i));
+ { __m512i* const xacc = (__m512i*) acc;
+ const __m512i prime32 = _mm512_set1_epi32((int)XXH_PRIME32_1);
+
+ /* xacc[0] ^= (xacc[0] >> 47) */
+ __m512i const acc_vec = *xacc;
+ __m512i const shifted = _mm512_srli_epi64 (acc_vec, 47);
+ /* xacc[0] ^= secret; */
+ __m512i const key_vec = _mm512_loadu_si512 (secret);
+ __m512i const data_key = _mm512_ternarylogic_epi32(key_vec, acc_vec, shifted, 0x96 /* key_vec ^ acc_vec ^ shifted */);
+
+ /* xacc[0] *= XXH_PRIME32_1; */
+ __m512i const data_key_hi = _mm512_srli_epi64 (data_key, 32);
+ __m512i const prod_lo = _mm512_mul_epu32 (data_key, prime32);
+ __m512i const prod_hi = _mm512_mul_epu32 (data_key_hi, prime32);
+ *xacc = _mm512_add_epi64(prod_lo, _mm512_slli_epi64(prod_hi, 32));
+ }
+}
+
+XXH_FORCE_INLINE XXH_TARGET_AVX512 void
+XXH3_initCustomSecret_avx512(void* XXH_RESTRICT customSecret, xxh_u64 seed64)
+{
+ XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE & 63) == 0);
+ XXH_STATIC_ASSERT(XXH_SEC_ALIGN == 64);
+ XXH_ASSERT(((size_t)customSecret & 63) == 0);
+ (void)(&XXH_writeLE64);
+ { int const nbRounds = XXH_SECRET_DEFAULT_SIZE / sizeof(__m512i);
+ __m512i const seed_pos = _mm512_set1_epi64((xxh_i64)seed64);
+ __m512i const seed = _mm512_mask_sub_epi64(seed_pos, 0xAA, _mm512_set1_epi8(0), seed_pos);
+
+ const __m512i* const src = (const __m512i*) ((const void*) XXH3_kSecret);
+ __m512i* const dest = ( __m512i*) customSecret;
+ int i;
+ XXH_ASSERT(((size_t)src & 63) == 0); /* control alignment */
+ XXH_ASSERT(((size_t)dest & 63) == 0);
+ for (i=0; i < nbRounds; ++i) {
+ dest[i] = _mm512_add_epi64(_mm512_load_si512(src + i), seed);
+ } }
+}
+
+#endif
+
+#if (XXH_VECTOR == XXH_AVX2) \
+ || (defined(XXH_DISPATCH_AVX2) && XXH_DISPATCH_AVX2 != 0)
+
+#ifndef XXH_TARGET_AVX2
+# define XXH_TARGET_AVX2 /* disable attribute target */
+#endif
+
+XXH_FORCE_INLINE XXH_TARGET_AVX2 void
+XXH3_accumulate_512_avx2( void* XXH_RESTRICT acc,
+ const void* XXH_RESTRICT input,
+ const void* XXH_RESTRICT secret)
+{
+ XXH_ASSERT((((size_t)acc) & 31) == 0);
+ { __m256i* const xacc = (__m256i *) acc;
+ /* Unaligned. This is mainly for pointer arithmetic, and because
+ * _mm256_loadu_si256 requires a const __m256i * pointer for some reason. */
+ const __m256i* const xinput = (const __m256i *) input;
+ /* Unaligned. This is mainly for pointer arithmetic, and because
+ * _mm256_loadu_si256 requires a const __m256i * pointer for some reason. */
+ const __m256i* const xsecret = (const __m256i *) secret;
+
+ size_t i;
+ for (i=0; i < XXH_STRIPE_LEN/sizeof(__m256i); i++) {
+ /* data_vec = xinput[i]; */
+ __m256i const data_vec = _mm256_loadu_si256 (xinput+i);
+ /* key_vec = xsecret[i]; */
+ __m256i const key_vec = _mm256_loadu_si256 (xsecret+i);
+ /* data_key = data_vec ^ key_vec; */
+ __m256i const data_key = _mm256_xor_si256 (data_vec, key_vec);
+ /* data_key_lo = data_key >> 32; */
+ __m256i const data_key_lo = _mm256_srli_epi64 (data_key, 32);
+ /* product = (data_key & 0xffffffff) * (data_key_lo & 0xffffffff); */
+ __m256i const product = _mm256_mul_epu32 (data_key, data_key_lo);
+ /* xacc[i] += swap(data_vec); */
+ __m256i const data_swap = _mm256_shuffle_epi32(data_vec, _MM_SHUFFLE(1, 0, 3, 2));
+ __m256i const sum = _mm256_add_epi64(xacc[i], data_swap);
+ /* xacc[i] += product; */
+ xacc[i] = _mm256_add_epi64(product, sum);
+ } }
+}
+XXH_FORCE_INLINE XXH_TARGET_AVX2 XXH3_ACCUMULATE_TEMPLATE(avx2)
+
+XXH_FORCE_INLINE XXH_TARGET_AVX2 void
+XXH3_scrambleAcc_avx2(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
+{
+ XXH_ASSERT((((size_t)acc) & 31) == 0);
+ { __m256i* const xacc = (__m256i*) acc;
+ /* Unaligned. This is mainly for pointer arithmetic, and because
+ * _mm256_loadu_si256 requires a const __m256i * pointer for some reason. */
+ const __m256i* const xsecret = (const __m256i *) secret;
+ const __m256i prime32 = _mm256_set1_epi32((int)XXH_PRIME32_1);
+
+ size_t i;
+ for (i=0; i < XXH_STRIPE_LEN/sizeof(__m256i); i++) {
+ /* xacc[i] ^= (xacc[i] >> 47) */
+ __m256i const acc_vec = xacc[i];
+ __m256i const shifted = _mm256_srli_epi64 (acc_vec, 47);
+ __m256i const data_vec = _mm256_xor_si256 (acc_vec, shifted);
+ /* xacc[i] ^= xsecret; */
+ __m256i const key_vec = _mm256_loadu_si256 (xsecret+i);
+ __m256i const data_key = _mm256_xor_si256 (data_vec, key_vec);
+
+ /* xacc[i] *= XXH_PRIME32_1; */
+ __m256i const data_key_hi = _mm256_srli_epi64 (data_key, 32);
+ __m256i const prod_lo = _mm256_mul_epu32 (data_key, prime32);
+ __m256i const prod_hi = _mm256_mul_epu32 (data_key_hi, prime32);
+ xacc[i] = _mm256_add_epi64(prod_lo, _mm256_slli_epi64(prod_hi, 32));
+ }
+ }
+}
+
+XXH_FORCE_INLINE XXH_TARGET_AVX2 void XXH3_initCustomSecret_avx2(void* XXH_RESTRICT customSecret, xxh_u64 seed64)
+{
+ XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE & 31) == 0);
+ XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE / sizeof(__m256i)) == 6);
+ XXH_STATIC_ASSERT(XXH_SEC_ALIGN <= 64);
+ (void)(&XXH_writeLE64);
+ XXH_PREFETCH(customSecret);
+ { __m256i const seed = _mm256_set_epi64x((xxh_i64)(0U - seed64), (xxh_i64)seed64, (xxh_i64)(0U - seed64), (xxh_i64)seed64);
+
+ const __m256i* const src = (const __m256i*) ((const void*) XXH3_kSecret);
+ __m256i* dest = ( __m256i*) customSecret;
+
+# if defined(__GNUC__) || defined(__clang__)
+ /*
+ * On GCC & Clang, marking 'dest' as modified will cause the compiler:
+ * - do not extract the secret from sse registers in the internal loop
+ * - use less common registers, and avoid pushing these reg into stack
+ */
+ XXH_COMPILER_GUARD(dest);
+# endif
+ XXH_ASSERT(((size_t)src & 31) == 0); /* control alignment */
+ XXH_ASSERT(((size_t)dest & 31) == 0);
+
+ /* GCC -O2 need unroll loop manually */
+ dest[0] = _mm256_add_epi64(_mm256_load_si256(src+0), seed);
+ dest[1] = _mm256_add_epi64(_mm256_load_si256(src+1), seed);
+ dest[2] = _mm256_add_epi64(_mm256_load_si256(src+2), seed);
+ dest[3] = _mm256_add_epi64(_mm256_load_si256(src+3), seed);
+ dest[4] = _mm256_add_epi64(_mm256_load_si256(src+4), seed);
+ dest[5] = _mm256_add_epi64(_mm256_load_si256(src+5), seed);
+ }
+}
+
+#endif
+
+/* x86dispatch always generates SSE2 */
+#if (XXH_VECTOR == XXH_SSE2) || defined(XXH_X86DISPATCH)
+
+#ifndef XXH_TARGET_SSE2
+# define XXH_TARGET_SSE2 /* disable attribute target */
+#endif
+
+XXH_FORCE_INLINE XXH_TARGET_SSE2 void
+XXH3_accumulate_512_sse2( void* XXH_RESTRICT acc,
+ const void* XXH_RESTRICT input,
+ const void* XXH_RESTRICT secret)
+{
+ /* SSE2 is just a half-scale version of the AVX2 version. */
+ XXH_ASSERT((((size_t)acc) & 15) == 0);
+ { __m128i* const xacc = (__m128i *) acc;
+ /* Unaligned. This is mainly for pointer arithmetic, and because
+ * _mm_loadu_si128 requires a const __m128i * pointer for some reason. */
+ const __m128i* const xinput = (const __m128i *) input;
+ /* Unaligned. This is mainly for pointer arithmetic, and because
+ * _mm_loadu_si128 requires a const __m128i * pointer for some reason. */
+ const __m128i* const xsecret = (const __m128i *) secret;
+
+ size_t i;
+ for (i=0; i < XXH_STRIPE_LEN/sizeof(__m128i); i++) {
+ /* data_vec = xinput[i]; */
+ __m128i const data_vec = _mm_loadu_si128 (xinput+i);
+ /* key_vec = xsecret[i]; */
+ __m128i const key_vec = _mm_loadu_si128 (xsecret+i);
+ /* data_key = data_vec ^ key_vec; */
+ __m128i const data_key = _mm_xor_si128 (data_vec, key_vec);
+ /* data_key_lo = data_key >> 32; */
+ __m128i const data_key_lo = _mm_shuffle_epi32 (data_key, _MM_SHUFFLE(0, 3, 0, 1));
+ /* product = (data_key & 0xffffffff) * (data_key_lo & 0xffffffff); */
+ __m128i const product = _mm_mul_epu32 (data_key, data_key_lo);
+ /* xacc[i] += swap(data_vec); */
+ __m128i const data_swap = _mm_shuffle_epi32(data_vec, _MM_SHUFFLE(1,0,3,2));
+ __m128i const sum = _mm_add_epi64(xacc[i], data_swap);
+ /* xacc[i] += product; */
+ xacc[i] = _mm_add_epi64(product, sum);
+ } }
+}
+XXH_FORCE_INLINE XXH_TARGET_SSE2 XXH3_ACCUMULATE_TEMPLATE(sse2)
+
+XXH_FORCE_INLINE XXH_TARGET_SSE2 void
+XXH3_scrambleAcc_sse2(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
+{
+ XXH_ASSERT((((size_t)acc) & 15) == 0);
+ { __m128i* const xacc = (__m128i*) acc;
+ /* Unaligned. This is mainly for pointer arithmetic, and because
+ * _mm_loadu_si128 requires a const __m128i * pointer for some reason. */
+ const __m128i* const xsecret = (const __m128i *) secret;
+ const __m128i prime32 = _mm_set1_epi32((int)XXH_PRIME32_1);
+
+ size_t i;
+ for (i=0; i < XXH_STRIPE_LEN/sizeof(__m128i); i++) {
+ /* xacc[i] ^= (xacc[i] >> 47) */
+ __m128i const acc_vec = xacc[i];
+ __m128i const shifted = _mm_srli_epi64 (acc_vec, 47);
+ __m128i const data_vec = _mm_xor_si128 (acc_vec, shifted);
+ /* xacc[i] ^= xsecret[i]; */
+ __m128i const key_vec = _mm_loadu_si128 (xsecret+i);
+ __m128i const data_key = _mm_xor_si128 (data_vec, key_vec);
+
+ /* xacc[i] *= XXH_PRIME32_1; */
+ __m128i const data_key_hi = _mm_shuffle_epi32 (data_key, _MM_SHUFFLE(0, 3, 0, 1));
+ __m128i const prod_lo = _mm_mul_epu32 (data_key, prime32);
+ __m128i const prod_hi = _mm_mul_epu32 (data_key_hi, prime32);
+ xacc[i] = _mm_add_epi64(prod_lo, _mm_slli_epi64(prod_hi, 32));
+ }
+ }
+}
+
+XXH_FORCE_INLINE XXH_TARGET_SSE2 void XXH3_initCustomSecret_sse2(void* XXH_RESTRICT customSecret, xxh_u64 seed64)
+{
+ XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE & 15) == 0);
+ (void)(&XXH_writeLE64);
+ { int const nbRounds = XXH_SECRET_DEFAULT_SIZE / sizeof(__m128i);
+
+# if defined(_MSC_VER) && defined(_M_IX86) && _MSC_VER < 1900
+ /* MSVC 32bit mode does not support _mm_set_epi64x before 2015 */
+ XXH_ALIGN(16) const xxh_i64 seed64x2[2] = { (xxh_i64)seed64, (xxh_i64)(0U - seed64) };
+ __m128i const seed = _mm_load_si128((__m128i const*)seed64x2);
+# else
+ __m128i const seed = _mm_set_epi64x((xxh_i64)(0U - seed64), (xxh_i64)seed64);
+# endif
+ int i;
+
+ const void* const src16 = XXH3_kSecret;
+ __m128i* dst16 = (__m128i*) customSecret;
+# if defined(__GNUC__) || defined(__clang__)
+ /*
+ * On GCC & Clang, marking 'dest' as modified will cause the compiler:
+ * - do not extract the secret from sse registers in the internal loop
+ * - use less common registers, and avoid pushing these reg into stack
+ */
+ XXH_COMPILER_GUARD(dst16);
+# endif
+ XXH_ASSERT(((size_t)src16 & 15) == 0); /* control alignment */
+ XXH_ASSERT(((size_t)dst16 & 15) == 0);
+
+ for (i=0; i < nbRounds; ++i) {
+ dst16[i] = _mm_add_epi64(_mm_load_si128((const __m128i *)src16+i), seed);
+ } }
+}
+
+#endif
+
+#if (XXH_VECTOR == XXH_NEON)
+
+/* forward declarations for the scalar routines */
+XXH_FORCE_INLINE void
+XXH3_scalarRound(void* XXH_RESTRICT acc, void const* XXH_RESTRICT input,
+ void const* XXH_RESTRICT secret, size_t lane);
+
+XXH_FORCE_INLINE void
+XXH3_scalarScrambleRound(void* XXH_RESTRICT acc,
+ void const* XXH_RESTRICT secret, size_t lane);
+
+/*!
+ * @internal
+ * @brief The bulk processing loop for NEON and WASM SIMD128.
+ *
+ * The NEON code path is actually partially scalar when running on AArch64. This
+ * is to optimize the pipelining and can have up to 15% speedup depending on the
+ * CPU, and it also mitigates some GCC codegen issues.
+ *
+ * @see XXH3_NEON_LANES for configuring this and details about this optimization.
+ *
+ * NEON's 32-bit to 64-bit long multiply takes a half vector of 32-bit
+ * integers instead of the other platforms which mask full 64-bit vectors,
+ * so the setup is more complicated than just shifting right.
+ *
+ * Additionally, there is an optimization for 4 lanes at once noted below.
+ *
+ * Since, as stated, the most optimal amount of lanes for Cortexes is 6,
+ * there needs to be *three* versions of the accumulate operation used
+ * for the remaining 2 lanes.
+ *
+ * WASM's SIMD128 uses SIMDe's arm_neon.h polyfill because the intrinsics overlap
+ * nearly perfectly.
+ */
+
+XXH_FORCE_INLINE void
+XXH3_accumulate_512_neon( void* XXH_RESTRICT acc,
+ const void* XXH_RESTRICT input,
+ const void* XXH_RESTRICT secret)
+{
+ XXH_ASSERT((((size_t)acc) & 15) == 0);
+ XXH_STATIC_ASSERT(XXH3_NEON_LANES > 0 && XXH3_NEON_LANES <= XXH_ACC_NB && XXH3_NEON_LANES % 2 == 0);
+ { /* GCC for darwin arm64 does not like aliasing here */
+ xxh_aliasing_uint64x2_t* const xacc = (xxh_aliasing_uint64x2_t*) acc;
+ /* We don't use a uint32x4_t pointer because it causes bus errors on ARMv7. */
+ uint8_t const* xinput = (const uint8_t *) input;
+ uint8_t const* xsecret = (const uint8_t *) secret;
+
+ size_t i;
+#ifdef __wasm_simd128__
+ /*
+ * On WASM SIMD128, Clang emits direct address loads when XXH3_kSecret
+ * is constant propagated, which results in it converting it to this
+ * inside the loop:
+ *
+ * a = v128.load(XXH3_kSecret + 0 + $secret_offset, offset = 0)
+ * b = v128.load(XXH3_kSecret + 16 + $secret_offset, offset = 0)
+ * ...
+ *
+ * This requires a full 32-bit address immediate (and therefore a 6 byte
+ * instruction) as well as an add for each offset.
+ *
+ * Putting an asm guard prevents it from folding (at the cost of losing
+ * the alignment hint), and uses the free offset in `v128.load` instead
+ * of adding secret_offset each time which overall reduces code size by
+ * about a kilobyte and improves performance.
+ */
+ XXH_COMPILER_GUARD(xsecret);
+#endif
+ /* Scalar lanes use the normal scalarRound routine */
+ for (i = XXH3_NEON_LANES; i < XXH_ACC_NB; i++) {
+ XXH3_scalarRound(acc, input, secret, i);
+ }
+ i = 0;
+ /* 4 NEON lanes at a time. */
+ for (; i+1 < XXH3_NEON_LANES / 2; i+=2) {
+ /* data_vec = xinput[i]; */
+ uint64x2_t data_vec_1 = XXH_vld1q_u64(xinput + (i * 16));
+ uint64x2_t data_vec_2 = XXH_vld1q_u64(xinput + ((i+1) * 16));
+ /* key_vec = xsecret[i]; */
+ uint64x2_t key_vec_1 = XXH_vld1q_u64(xsecret + (i * 16));
+ uint64x2_t key_vec_2 = XXH_vld1q_u64(xsecret + ((i+1) * 16));
+ /* data_swap = swap(data_vec) */
+ uint64x2_t data_swap_1 = vextq_u64(data_vec_1, data_vec_1, 1);
+ uint64x2_t data_swap_2 = vextq_u64(data_vec_2, data_vec_2, 1);
+ /* data_key = data_vec ^ key_vec; */
+ uint64x2_t data_key_1 = veorq_u64(data_vec_1, key_vec_1);
+ uint64x2_t data_key_2 = veorq_u64(data_vec_2, key_vec_2);
+
+ /*
+ * If we reinterpret the 64x2 vectors as 32x4 vectors, we can use a
+ * de-interleave operation for 4 lanes in 1 step with `vuzpq_u32` to
+ * get one vector with the low 32 bits of each lane, and one vector
+ * with the high 32 bits of each lane.
+ *
+ * The intrinsic returns a double vector because the original ARMv7-a
+ * instruction modified both arguments in place. AArch64 and SIMD128 emit
+ * two instructions from this intrinsic.
+ *
+ * [ dk11L | dk11H | dk12L | dk12H ] -> [ dk11L | dk12L | dk21L | dk22L ]
+ * [ dk21L | dk21H | dk22L | dk22H ] -> [ dk11H | dk12H | dk21H | dk22H ]
+ */
+ uint32x4x2_t unzipped = vuzpq_u32(
+ vreinterpretq_u32_u64(data_key_1),
+ vreinterpretq_u32_u64(data_key_2)
+ );
+ /* data_key_lo = data_key & 0xFFFFFFFF */
+ uint32x4_t data_key_lo = unzipped.val[0];
+ /* data_key_hi = data_key >> 32 */
+ uint32x4_t data_key_hi = unzipped.val[1];
+ /*
+ * Then, we can split the vectors horizontally and multiply which, as for most
+ * widening intrinsics, have a variant that works on both high half vectors
+ * for free on AArch64. A similar instruction is available on SIMD128.
+ *
+ * sum = data_swap + (u64x2) data_key_lo * (u64x2) data_key_hi
+ */
+ uint64x2_t sum_1 = XXH_vmlal_low_u32(data_swap_1, data_key_lo, data_key_hi);
+ uint64x2_t sum_2 = XXH_vmlal_high_u32(data_swap_2, data_key_lo, data_key_hi);
+ /*
+ * Clang reorders
+ * a += b * c; // umlal swap.2d, dkl.2s, dkh.2s
+ * c += a; // add acc.2d, acc.2d, swap.2d
+ * to
+ * c += a; // add acc.2d, acc.2d, swap.2d
+ * c += b * c; // umlal acc.2d, dkl.2s, dkh.2s
+ *
+ * While it would make sense in theory since the addition is faster,
+ * for reasons likely related to umlal being limited to certain NEON
+ * pipelines, this is worse. A compiler guard fixes this.
+ */
+ XXH_COMPILER_GUARD_CLANG_NEON(sum_1);
+ XXH_COMPILER_GUARD_CLANG_NEON(sum_2);
+ /* xacc[i] = acc_vec + sum; */
+ xacc[i] = vaddq_u64(xacc[i], sum_1);
+ xacc[i+1] = vaddq_u64(xacc[i+1], sum_2);
+ }
+ /* Operate on the remaining NEON lanes 2 at a time. */
+ for (; i < XXH3_NEON_LANES / 2; i++) {
+ /* data_vec = xinput[i]; */
+ uint64x2_t data_vec = XXH_vld1q_u64(xinput + (i * 16));
+ /* key_vec = xsecret[i]; */
+ uint64x2_t key_vec = XXH_vld1q_u64(xsecret + (i * 16));
+ /* acc_vec_2 = swap(data_vec) */
+ uint64x2_t data_swap = vextq_u64(data_vec, data_vec, 1);
+ /* data_key = data_vec ^ key_vec; */
+ uint64x2_t data_key = veorq_u64(data_vec, key_vec);
+ /* For two lanes, just use VMOVN and VSHRN. */
+ /* data_key_lo = data_key & 0xFFFFFFFF; */
+ uint32x2_t data_key_lo = vmovn_u64(data_key);
+ /* data_key_hi = data_key >> 32; */
+ uint32x2_t data_key_hi = vshrn_n_u64(data_key, 32);
+ /* sum = data_swap + (u64x2) data_key_lo * (u64x2) data_key_hi; */
+ uint64x2_t sum = vmlal_u32(data_swap, data_key_lo, data_key_hi);
+ /* Same Clang workaround as before */
+ XXH_COMPILER_GUARD_CLANG_NEON(sum);
+ /* xacc[i] = acc_vec + sum; */
+ xacc[i] = vaddq_u64 (xacc[i], sum);
+ }
+ }
+}
+XXH_FORCE_INLINE XXH3_ACCUMULATE_TEMPLATE(neon)
+
+XXH_FORCE_INLINE void
+XXH3_scrambleAcc_neon(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
+{
+ XXH_ASSERT((((size_t)acc) & 15) == 0);
+
+ { xxh_aliasing_uint64x2_t* xacc = (xxh_aliasing_uint64x2_t*) acc;
+ uint8_t const* xsecret = (uint8_t const*) secret;
+
+ size_t i;
+ /* WASM uses operator overloads and doesn't need these. */
+#ifndef __wasm_simd128__
+ /* { prime32_1, prime32_1 } */
+ uint32x2_t const kPrimeLo = vdup_n_u32(XXH_PRIME32_1);
+ /* { 0, prime32_1, 0, prime32_1 } */
+ uint32x4_t const kPrimeHi = vreinterpretq_u32_u64(vdupq_n_u64((xxh_u64)XXH_PRIME32_1 << 32));
+#endif
+
+ /* AArch64 uses both scalar and neon at the same time */
+ for (i = XXH3_NEON_LANES; i < XXH_ACC_NB; i++) {
+ XXH3_scalarScrambleRound(acc, secret, i);
+ }
+ for (i=0; i < XXH3_NEON_LANES / 2; i++) {
+ /* xacc[i] ^= (xacc[i] >> 47); */
+ uint64x2_t acc_vec = xacc[i];
+ uint64x2_t shifted = vshrq_n_u64(acc_vec, 47);
+ uint64x2_t data_vec = veorq_u64(acc_vec, shifted);
+
+ /* xacc[i] ^= xsecret[i]; */
+ uint64x2_t key_vec = XXH_vld1q_u64(xsecret + (i * 16));
+ uint64x2_t data_key = veorq_u64(data_vec, key_vec);
+ /* xacc[i] *= XXH_PRIME32_1 */
+#ifdef __wasm_simd128__
+ /* SIMD128 has multiply by u64x2, use it instead of expanding and scalarizing */
+ xacc[i] = data_key * XXH_PRIME32_1;
+#else
+ /*
+ * Expanded version with portable NEON intrinsics
+ *
+ * lo(x) * lo(y) + (hi(x) * lo(y) << 32)
+ *
+ * prod_hi = hi(data_key) * lo(prime) << 32
+ *
+ * Since we only need 32 bits of this multiply a trick can be used, reinterpreting the vector
+ * as a uint32x4_t and multiplying by { 0, prime, 0, prime } to cancel out the unwanted bits
+ * and avoid the shift.
+ */
+ uint32x4_t prod_hi = vmulq_u32 (vreinterpretq_u32_u64(data_key), kPrimeHi);
+ /* Extract low bits for vmlal_u32 */
+ uint32x2_t data_key_lo = vmovn_u64(data_key);
+ /* xacc[i] = prod_hi + lo(data_key) * XXH_PRIME32_1; */
+ xacc[i] = vmlal_u32(vreinterpretq_u64_u32(prod_hi), data_key_lo, kPrimeLo);
+#endif
+ }
+ }
+}
+#endif
+
+#if (XXH_VECTOR == XXH_VSX)
+
+XXH_FORCE_INLINE void
+XXH3_accumulate_512_vsx( void* XXH_RESTRICT acc,
+ const void* XXH_RESTRICT input,
+ const void* XXH_RESTRICT secret)
+{
+ /* presumed aligned */
+ xxh_aliasing_u64x2* const xacc = (xxh_aliasing_u64x2*) acc;
+ xxh_u8 const* const xinput = (xxh_u8 const*) input; /* no alignment restriction */
+ xxh_u8 const* const xsecret = (xxh_u8 const*) secret; /* no alignment restriction */
+ xxh_u64x2 const v32 = { 32, 32 };
+ size_t i;
+ for (i = 0; i < XXH_STRIPE_LEN / sizeof(xxh_u64x2); i++) {
+ /* data_vec = xinput[i]; */
+ xxh_u64x2 const data_vec = XXH_vec_loadu(xinput + 16*i);
+ /* key_vec = xsecret[i]; */
+ xxh_u64x2 const key_vec = XXH_vec_loadu(xsecret + 16*i);
+ xxh_u64x2 const data_key = data_vec ^ key_vec;
+ /* shuffled = (data_key << 32) | (data_key >> 32); */
+ xxh_u32x4 const shuffled = (xxh_u32x4)vec_rl(data_key, v32);
+ /* product = ((xxh_u64x2)data_key & 0xFFFFFFFF) * ((xxh_u64x2)shuffled & 0xFFFFFFFF); */
+ xxh_u64x2 const product = XXH_vec_mulo((xxh_u32x4)data_key, shuffled);
+ /* acc_vec = xacc[i]; */
+ xxh_u64x2 acc_vec = xacc[i];
+ acc_vec += product;
+
+ /* swap high and low halves */
+#ifdef __s390x__
+ acc_vec += vec_permi(data_vec, data_vec, 2);
+#else
+ acc_vec += vec_xxpermdi(data_vec, data_vec, 2);
+#endif
+ xacc[i] = acc_vec;
+ }
+}
+XXH_FORCE_INLINE XXH3_ACCUMULATE_TEMPLATE(vsx)
+
+XXH_FORCE_INLINE void
+XXH3_scrambleAcc_vsx(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
+{
+ XXH_ASSERT((((size_t)acc) & 15) == 0);
+
+ { xxh_aliasing_u64x2* const xacc = (xxh_aliasing_u64x2*) acc;
+ const xxh_u8* const xsecret = (const xxh_u8*) secret;
+ /* constants */
+ xxh_u64x2 const v32 = { 32, 32 };
+ xxh_u64x2 const v47 = { 47, 47 };
+ xxh_u32x4 const prime = { XXH_PRIME32_1, XXH_PRIME32_1, XXH_PRIME32_1, XXH_PRIME32_1 };
+ size_t i;
+ for (i = 0; i < XXH_STRIPE_LEN / sizeof(xxh_u64x2); i++) {
+ /* xacc[i] ^= (xacc[i] >> 47); */
+ xxh_u64x2 const acc_vec = xacc[i];
+ xxh_u64x2 const data_vec = acc_vec ^ (acc_vec >> v47);
+
+ /* xacc[i] ^= xsecret[i]; */
+ xxh_u64x2 const key_vec = XXH_vec_loadu(xsecret + 16*i);
+ xxh_u64x2 const data_key = data_vec ^ key_vec;
+
+ /* xacc[i] *= XXH_PRIME32_1 */
+ /* prod_lo = ((xxh_u64x2)data_key & 0xFFFFFFFF) * ((xxh_u64x2)prime & 0xFFFFFFFF); */
+ xxh_u64x2 const prod_even = XXH_vec_mule((xxh_u32x4)data_key, prime);
+ /* prod_hi = ((xxh_u64x2)data_key >> 32) * ((xxh_u64x2)prime >> 32); */
+ xxh_u64x2 const prod_odd = XXH_vec_mulo((xxh_u32x4)data_key, prime);
+ xacc[i] = prod_odd + (prod_even << v32);
+ } }
+}
+
+#endif
+
+#if (XXH_VECTOR == XXH_SVE)
+
+XXH_FORCE_INLINE void
+XXH3_accumulate_512_sve( void* XXH_RESTRICT acc,
+ const void* XXH_RESTRICT input,
+ const void* XXH_RESTRICT secret)
+{
+ uint64_t *xacc = (uint64_t *)acc;
+ const uint64_t *xinput = (const uint64_t *)(const void *)input;
+ const uint64_t *xsecret = (const uint64_t *)(const void *)secret;
+ svuint64_t kSwap = sveor_n_u64_z(svptrue_b64(), svindex_u64(0, 1), 1);
+ uint64_t element_count = svcntd();
+ if (element_count >= 8) {
+ svbool_t mask = svptrue_pat_b64(SV_VL8);
+ svuint64_t vacc = svld1_u64(mask, xacc);
+ ACCRND(vacc, 0);
+ svst1_u64(mask, xacc, vacc);
+ } else if (element_count == 2) { /* sve128 */
+ svbool_t mask = svptrue_pat_b64(SV_VL2);
+ svuint64_t acc0 = svld1_u64(mask, xacc + 0);
+ svuint64_t acc1 = svld1_u64(mask, xacc + 2);
+ svuint64_t acc2 = svld1_u64(mask, xacc + 4);
+ svuint64_t acc3 = svld1_u64(mask, xacc + 6);
+ ACCRND(acc0, 0);
+ ACCRND(acc1, 2);
+ ACCRND(acc2, 4);
+ ACCRND(acc3, 6);
+ svst1_u64(mask, xacc + 0, acc0);
+ svst1_u64(mask, xacc + 2, acc1);
+ svst1_u64(mask, xacc + 4, acc2);
+ svst1_u64(mask, xacc + 6, acc3);
+ } else {
+ svbool_t mask = svptrue_pat_b64(SV_VL4);
+ svuint64_t acc0 = svld1_u64(mask, xacc + 0);
+ svuint64_t acc1 = svld1_u64(mask, xacc + 4);
+ ACCRND(acc0, 0);
+ ACCRND(acc1, 4);
+ svst1_u64(mask, xacc + 0, acc0);
+ svst1_u64(mask, xacc + 4, acc1);
+ }
+}
+
+XXH_FORCE_INLINE void
+XXH3_accumulate_sve(xxh_u64* XXH_RESTRICT acc,
+ const xxh_u8* XXH_RESTRICT input,
+ const xxh_u8* XXH_RESTRICT secret,
+ size_t nbStripes)
+{
+ if (nbStripes != 0) {
+ uint64_t *xacc = (uint64_t *)acc;
+ const uint64_t *xinput = (const uint64_t *)(const void *)input;
+ const uint64_t *xsecret = (const uint64_t *)(const void *)secret;
+ svuint64_t kSwap = sveor_n_u64_z(svptrue_b64(), svindex_u64(0, 1), 1);
+ uint64_t element_count = svcntd();
+ if (element_count >= 8) {
+ svbool_t mask = svptrue_pat_b64(SV_VL8);
+ svuint64_t vacc = svld1_u64(mask, xacc + 0);
+ do {
+ /* svprfd(svbool_t, void *, enum svfprop); */
+ svprfd(mask, xinput + 128, SV_PLDL1STRM);
+ ACCRND(vacc, 0);
+ xinput += 8;
+ xsecret += 1;
+ nbStripes--;
+ } while (nbStripes != 0);
+
+ svst1_u64(mask, xacc + 0, vacc);
+ } else if (element_count == 2) { /* sve128 */
+ svbool_t mask = svptrue_pat_b64(SV_VL2);
+ svuint64_t acc0 = svld1_u64(mask, xacc + 0);
+ svuint64_t acc1 = svld1_u64(mask, xacc + 2);
+ svuint64_t acc2 = svld1_u64(mask, xacc + 4);
+ svuint64_t acc3 = svld1_u64(mask, xacc + 6);
+ do {
+ svprfd(mask, xinput + 128, SV_PLDL1STRM);
+ ACCRND(acc0, 0);
+ ACCRND(acc1, 2);
+ ACCRND(acc2, 4);
+ ACCRND(acc3, 6);
+ xinput += 8;
+ xsecret += 1;
+ nbStripes--;
+ } while (nbStripes != 0);
+
+ svst1_u64(mask, xacc + 0, acc0);
+ svst1_u64(mask, xacc + 2, acc1);
+ svst1_u64(mask, xacc + 4, acc2);
+ svst1_u64(mask, xacc + 6, acc3);
+ } else {
+ svbool_t mask = svptrue_pat_b64(SV_VL4);
+ svuint64_t acc0 = svld1_u64(mask, xacc + 0);
+ svuint64_t acc1 = svld1_u64(mask, xacc + 4);
+ do {
+ svprfd(mask, xinput + 128, SV_PLDL1STRM);
+ ACCRND(acc0, 0);
+ ACCRND(acc1, 4);
+ xinput += 8;
+ xsecret += 1;
+ nbStripes--;
+ } while (nbStripes != 0);
+
+ svst1_u64(mask, xacc + 0, acc0);
+ svst1_u64(mask, xacc + 4, acc1);
+ }
+ }
+}
+
+#endif
+
+/* scalar variants - universal */
+
+#if defined(__aarch64__) && (defined(__GNUC__) || defined(__clang__))
+/*
+ * In XXH3_scalarRound(), GCC and Clang have a similar codegen issue, where they
+ * emit an excess mask and a full 64-bit multiply-add (MADD X-form).
+ *
+ * While this might not seem like much, as AArch64 is a 64-bit architecture, only
+ * big Cortex designs have a full 64-bit multiplier.
+ *
+ * On the little cores, the smaller 32-bit multiplier is used, and full 64-bit
+ * multiplies expand to 2-3 multiplies in microcode. This has a major penalty
+ * of up to 4 latency cycles and 2 stall cycles in the multiply pipeline.
+ *
+ * Thankfully, AArch64 still provides the 32-bit long multiply-add (UMADDL) which does
+ * not have this penalty and does the mask automatically.
+ */
+XXH_FORCE_INLINE xxh_u64
+XXH_mult32to64_add64(xxh_u64 lhs, xxh_u64 rhs, xxh_u64 acc)
+{
+ xxh_u64 ret;
+ /* note: %x = 64-bit register, %w = 32-bit register */
+ __asm__("umaddl %x0, %w1, %w2, %x3" : "=r" (ret) : "r" (lhs), "r" (rhs), "r" (acc));
+ return ret;
+}
+#else
+XXH_FORCE_INLINE xxh_u64
+XXH_mult32to64_add64(xxh_u64 lhs, xxh_u64 rhs, xxh_u64 acc)
+{
+ return XXH_mult32to64((xxh_u32)lhs, (xxh_u32)rhs) + acc;
+}
+#endif
+
+/*!
+ * @internal
+ * @brief Scalar round for @ref XXH3_accumulate_512_scalar().
+ *
+ * This is extracted to its own function because the NEON path uses a combination
+ * of NEON and scalar.
+ */
+XXH_FORCE_INLINE void
+XXH3_scalarRound(void* XXH_RESTRICT acc,
+ void const* XXH_RESTRICT input,
+ void const* XXH_RESTRICT secret,
+ size_t lane)
+{
+ xxh_u64* xacc = (xxh_u64*) acc;
+ xxh_u8 const* xinput = (xxh_u8 const*) input;
+ xxh_u8 const* xsecret = (xxh_u8 const*) secret;
+ XXH_ASSERT(lane < XXH_ACC_NB);
+ XXH_ASSERT(((size_t)acc & (XXH_ACC_ALIGN-1)) == 0);
+ {
+ xxh_u64 const data_val = XXH_readLE64(xinput + lane * 8);
+ xxh_u64 const data_key = data_val ^ XXH_readLE64(xsecret + lane * 8);
+ xacc[lane ^ 1] += data_val; /* swap adjacent lanes */
+ xacc[lane] = XXH_mult32to64_add64(data_key /* & 0xFFFFFFFF */, data_key >> 32, xacc[lane]);
+ }
+}
+
+/*!
+ * @internal
+ * @brief Processes a 64 byte block of data using the scalar path.
+ */
+XXH_FORCE_INLINE void
+XXH3_accumulate_512_scalar(void* XXH_RESTRICT acc,
+ const void* XXH_RESTRICT input,
+ const void* XXH_RESTRICT secret)
+{
+ size_t i;
+ /* ARM GCC refuses to unroll this loop, resulting in a 24% slowdown on ARMv6. */
+#if defined(__GNUC__) && !defined(__clang__) \
+ && (defined(__arm__) || defined(__thumb2__)) \
+ && defined(__ARM_FEATURE_UNALIGNED) /* no unaligned access just wastes bytes */ \
+ && XXH_SIZE_OPT <= 0
+# pragma GCC unroll 8
+#endif
+ for (i=0; i < XXH_ACC_NB; i++) {
+ XXH3_scalarRound(acc, input, secret, i);
+ }
+}
+XXH_FORCE_INLINE XXH3_ACCUMULATE_TEMPLATE(scalar)
+
+/*!
+ * @internal
+ * @brief Scalar scramble step for @ref XXH3_scrambleAcc_scalar().
+ *
+ * This is extracted to its own function because the NEON path uses a combination
+ * of NEON and scalar.
+ */
+XXH_FORCE_INLINE void
+XXH3_scalarScrambleRound(void* XXH_RESTRICT acc,
+ void const* XXH_RESTRICT secret,
+ size_t lane)
+{
+ xxh_u64* const xacc = (xxh_u64*) acc; /* presumed aligned */
+ const xxh_u8* const xsecret = (const xxh_u8*) secret; /* no alignment restriction */
+ XXH_ASSERT((((size_t)acc) & (XXH_ACC_ALIGN-1)) == 0);
+ XXH_ASSERT(lane < XXH_ACC_NB);
+ {
+ xxh_u64 const key64 = XXH_readLE64(xsecret + lane * 8);
+ xxh_u64 acc64 = xacc[lane];
+ acc64 = XXH_xorshift64(acc64, 47);
+ acc64 ^= key64;
+ acc64 *= XXH_PRIME32_1;
+ xacc[lane] = acc64;
+ }
+}
+
+/*!
+ * @internal
+ * @brief Scrambles the accumulators after a large chunk has been read
+ */
+XXH_FORCE_INLINE void
+XXH3_scrambleAcc_scalar(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
+{
+ size_t i;
+ for (i=0; i < XXH_ACC_NB; i++) {
+ XXH3_scalarScrambleRound(acc, secret, i);
+ }
+}
+
+XXH_FORCE_INLINE void
+XXH3_initCustomSecret_scalar(void* XXH_RESTRICT customSecret, xxh_u64 seed64)
+{
+ /*
+ * We need a separate pointer for the hack below,
+ * which requires a non-const pointer.
+ * Any decent compiler will optimize this out otherwise.
+ */
+ const xxh_u8* kSecretPtr = XXH3_kSecret;
+ XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE & 15) == 0);
+
+#if defined(__GNUC__) && defined(__aarch64__)
+ /*
+ * UGLY HACK:
+ * GCC and Clang generate a bunch of MOV/MOVK pairs for aarch64, and they are
+ * placed sequentially, in order, at the top of the unrolled loop.
+ *
+ * While MOVK is great for generating constants (2 cycles for a 64-bit
+ * constant compared to 4 cycles for LDR), it fights for bandwidth with
+ * the arithmetic instructions.
+ *
+ * I L S
+ * MOVK
+ * MOVK
+ * MOVK
+ * MOVK
+ * ADD
+ * SUB STR
+ * STR
+ * By forcing loads from memory (as the asm line causes the compiler to assume
+ * that XXH3_kSecretPtr has been changed), the pipelines are used more
+ * efficiently:
+ * I L S
+ * LDR
+ * ADD LDR
+ * SUB STR
+ * STR
+ *
+ * See XXH3_NEON_LANES for details on the pipsline.
+ *
+ * XXH3_64bits_withSeed, len == 256, Snapdragon 835
+ * without hack: 2654.4 MB/s
+ * with hack: 3202.9 MB/s
+ */
+ XXH_COMPILER_GUARD(kSecretPtr);
+#endif
+ { int const nbRounds = XXH_SECRET_DEFAULT_SIZE / 16;
+ int i;
+ for (i=0; i < nbRounds; i++) {
+ /*
+ * The asm hack causes the compiler to assume that kSecretPtr aliases with
+ * customSecret, and on aarch64, this prevented LDP from merging two
+ * loads together for free. Putting the loads together before the stores
+ * properly generates LDP.
+ */
+ xxh_u64 lo = XXH_readLE64(kSecretPtr + 16*i) + seed64;
+ xxh_u64 hi = XXH_readLE64(kSecretPtr + 16*i + 8) - seed64;
+ XXH_writeLE64((xxh_u8*)customSecret + 16*i, lo);
+ XXH_writeLE64((xxh_u8*)customSecret + 16*i + 8, hi);
+ } }
+}
+
+
+typedef void (*XXH3_f_accumulate)(xxh_u64* XXH_RESTRICT, const xxh_u8* XXH_RESTRICT, const xxh_u8* XXH_RESTRICT, size_t);
+typedef void (*XXH3_f_scrambleAcc)(void* XXH_RESTRICT, const void*);
+typedef void (*XXH3_f_initCustomSecret)(void* XXH_RESTRICT, xxh_u64);
+
+
+#if (XXH_VECTOR == XXH_AVX512)
+
+#define XXH3_accumulate_512 XXH3_accumulate_512_avx512
+#define XXH3_accumulate XXH3_accumulate_avx512
+#define XXH3_scrambleAcc XXH3_scrambleAcc_avx512
+#define XXH3_initCustomSecret XXH3_initCustomSecret_avx512
+
+#elif (XXH_VECTOR == XXH_AVX2)
+
+#define XXH3_accumulate_512 XXH3_accumulate_512_avx2
+#define XXH3_accumulate XXH3_accumulate_avx2
+#define XXH3_scrambleAcc XXH3_scrambleAcc_avx2
+#define XXH3_initCustomSecret XXH3_initCustomSecret_avx2
+
+#elif (XXH_VECTOR == XXH_SSE2)
+
+#define XXH3_accumulate_512 XXH3_accumulate_512_sse2
+#define XXH3_accumulate XXH3_accumulate_sse2
+#define XXH3_scrambleAcc XXH3_scrambleAcc_sse2
+#define XXH3_initCustomSecret XXH3_initCustomSecret_sse2
+
+#elif (XXH_VECTOR == XXH_NEON)
+
+#define XXH3_accumulate_512 XXH3_accumulate_512_neon
+#define XXH3_accumulate XXH3_accumulate_neon
+#define XXH3_scrambleAcc XXH3_scrambleAcc_neon
+#define XXH3_initCustomSecret XXH3_initCustomSecret_scalar
+
+#elif (XXH_VECTOR == XXH_VSX)
+
+#define XXH3_accumulate_512 XXH3_accumulate_512_vsx
+#define XXH3_accumulate XXH3_accumulate_vsx
+#define XXH3_scrambleAcc XXH3_scrambleAcc_vsx
+#define XXH3_initCustomSecret XXH3_initCustomSecret_scalar
+
+#elif (XXH_VECTOR == XXH_SVE)
+#define XXH3_accumulate_512 XXH3_accumulate_512_sve
+#define XXH3_accumulate XXH3_accumulate_sve
+#define XXH3_scrambleAcc XXH3_scrambleAcc_scalar
+#define XXH3_initCustomSecret XXH3_initCustomSecret_scalar
+
+#else /* scalar */
+
+#define XXH3_accumulate_512 XXH3_accumulate_512_scalar
+#define XXH3_accumulate XXH3_accumulate_scalar
+#define XXH3_scrambleAcc XXH3_scrambleAcc_scalar
+#define XXH3_initCustomSecret XXH3_initCustomSecret_scalar
+
+#endif
+
+#if XXH_SIZE_OPT >= 1 /* don't do SIMD for initialization */
+# undef XXH3_initCustomSecret
+# define XXH3_initCustomSecret XXH3_initCustomSecret_scalar
+#endif
+
+XXH_FORCE_INLINE void
+XXH3_hashLong_internal_loop(xxh_u64* XXH_RESTRICT acc,
+ const xxh_u8* XXH_RESTRICT input, size_t len,
+ const xxh_u8* XXH_RESTRICT secret, size_t secretSize,
+ XXH3_f_accumulate f_acc,
+ XXH3_f_scrambleAcc f_scramble)
+{
+ size_t const nbStripesPerBlock = (secretSize - XXH_STRIPE_LEN) / XXH_SECRET_CONSUME_RATE;
+ size_t const block_len = XXH_STRIPE_LEN * nbStripesPerBlock;
+ size_t const nb_blocks = (len - 1) / block_len;
+
+ size_t n;
+
+ XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN);
+
+ for (n = 0; n < nb_blocks; n++) {
+ f_acc(acc, input + n*block_len, secret, nbStripesPerBlock);
+ f_scramble(acc, secret + secretSize - XXH_STRIPE_LEN);
+ }
+
+ /* last partial block */
+ XXH_ASSERT(len > XXH_STRIPE_LEN);
+ { size_t const nbStripes = ((len - 1) - (block_len * nb_blocks)) / XXH_STRIPE_LEN;
+ XXH_ASSERT(nbStripes <= (secretSize / XXH_SECRET_CONSUME_RATE));
+ f_acc(acc, input + nb_blocks*block_len, secret, nbStripes);
+
+ /* last stripe */
+ { const xxh_u8* const p = input + len - XXH_STRIPE_LEN;
+#define XXH_SECRET_LASTACC_START 7 /* not aligned on 8, last secret is different from acc & scrambler */
+ XXH3_accumulate_512(acc, p, secret + secretSize - XXH_STRIPE_LEN - XXH_SECRET_LASTACC_START);
+ } }
+}
+
+XXH_FORCE_INLINE xxh_u64
+XXH3_mix2Accs(const xxh_u64* XXH_RESTRICT acc, const xxh_u8* XXH_RESTRICT secret)
+{
+ return XXH3_mul128_fold64(
+ acc[0] ^ XXH_readLE64(secret),
+ acc[1] ^ XXH_readLE64(secret+8) );
+}
+
+static XXH64_hash_t
+XXH3_mergeAccs(const xxh_u64* XXH_RESTRICT acc, const xxh_u8* XXH_RESTRICT secret, xxh_u64 start)
+{
+ xxh_u64 result64 = start;
+ size_t i = 0;
+
+ for (i = 0; i < 4; i++) {
+ result64 += XXH3_mix2Accs(acc+2*i, secret + 16*i);
+#if defined(__clang__) /* Clang */ \
+ && (defined(__arm__) || defined(__thumb__)) /* ARMv7 */ \
+ && (defined(__ARM_NEON) || defined(__ARM_NEON__)) /* NEON */ \
+ && !defined(XXH_ENABLE_AUTOVECTORIZE) /* Define to disable */
+ /*
+ * UGLY HACK:
+ * Prevent autovectorization on Clang ARMv7-a. Exact same problem as
+ * the one in XXH3_len_129to240_64b. Speeds up shorter keys > 240b.
+ * XXH3_64bits, len == 256, Snapdragon 835:
+ * without hack: 2063.7 MB/s
+ * with hack: 2560.7 MB/s
+ */
+ XXH_COMPILER_GUARD(result64);
+#endif
+ }
+
+ return XXH3_avalanche(result64);
+}
+
+#define XXH3_INIT_ACC { XXH_PRIME32_3, XXH_PRIME64_1, XXH_PRIME64_2, XXH_PRIME64_3, \
+ XXH_PRIME64_4, XXH_PRIME32_2, XXH_PRIME64_5, XXH_PRIME32_1 }
+
+XXH_FORCE_INLINE XXH64_hash_t
+XXH3_hashLong_64b_internal(const void* XXH_RESTRICT input, size_t len,
+ const void* XXH_RESTRICT secret, size_t secretSize,
+ XXH3_f_accumulate f_acc,
+ XXH3_f_scrambleAcc f_scramble)
+{
+ XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64 acc[XXH_ACC_NB] = XXH3_INIT_ACC;
+
+ XXH3_hashLong_internal_loop(acc, (const xxh_u8*)input, len, (const xxh_u8*)secret, secretSize, f_acc, f_scramble);
+
+ /* converge into final hash */
+ XXH_STATIC_ASSERT(sizeof(acc) == 64);
+ /* do not align on 8, so that the secret is different from the accumulator */
+#define XXH_SECRET_MERGEACCS_START 11
+ XXH_ASSERT(secretSize >= sizeof(acc) + XXH_SECRET_MERGEACCS_START);
+ return XXH3_mergeAccs(acc, (const xxh_u8*)secret + XXH_SECRET_MERGEACCS_START, (xxh_u64)len * XXH_PRIME64_1);
+}
+
+/*
+ * It's important for performance to transmit secret's size (when it's static)
+ * so that the compiler can properly optimize the vectorized loop.
+ * This makes a big performance difference for "medium" keys (<1 KB) when using AVX instruction set.
+ * When the secret size is unknown, or on GCC 12 where the mix of NO_INLINE and FORCE_INLINE
+ * breaks -Og, this is XXH_NO_INLINE.
+ */
+XXH3_WITH_SECRET_INLINE XXH64_hash_t
+XXH3_hashLong_64b_withSecret(const void* XXH_RESTRICT input, size_t len,
+ XXH64_hash_t seed64, const xxh_u8* XXH_RESTRICT secret, size_t secretLen)
+{
+ (void)seed64;
+ return XXH3_hashLong_64b_internal(input, len, secret, secretLen, XXH3_accumulate, XXH3_scrambleAcc);
+}
+
+/*
+ * It's preferable for performance that XXH3_hashLong is not inlined,
+ * as it results in a smaller function for small data, easier to the instruction cache.
+ * Note that inside this no_inline function, we do inline the internal loop,
+ * and provide a statically defined secret size to allow optimization of vector loop.
+ */
+XXH_NO_INLINE XXH_PUREF XXH64_hash_t
+XXH3_hashLong_64b_default(const void* XXH_RESTRICT input, size_t len,
+ XXH64_hash_t seed64, const xxh_u8* XXH_RESTRICT secret, size_t secretLen)
+{
+ (void)seed64; (void)secret; (void)secretLen;
+ return XXH3_hashLong_64b_internal(input, len, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_accumulate, XXH3_scrambleAcc);
+}
+
+/*
+ * XXH3_hashLong_64b_withSeed():
+ * Generate a custom key based on alteration of default XXH3_kSecret with the seed,
+ * and then use this key for long mode hashing.
+ *
+ * This operation is decently fast but nonetheless costs a little bit of time.
+ * Try to avoid it whenever possible (typically when seed==0).
+ *
+ * It's important for performance that XXH3_hashLong is not inlined. Not sure
+ * why (uop cache maybe?), but the difference is large and easily measurable.
+ */
+XXH_FORCE_INLINE XXH64_hash_t
+XXH3_hashLong_64b_withSeed_internal(const void* input, size_t len,
+ XXH64_hash_t seed,
+ XXH3_f_accumulate f_acc,
+ XXH3_f_scrambleAcc f_scramble,
+ XXH3_f_initCustomSecret f_initSec)
+{
+#if XXH_SIZE_OPT <= 0
+ if (seed == 0)
+ return XXH3_hashLong_64b_internal(input, len,
+ XXH3_kSecret, sizeof(XXH3_kSecret),
+ f_acc, f_scramble);
+#endif
+ { XXH_ALIGN(XXH_SEC_ALIGN) xxh_u8 secret[XXH_SECRET_DEFAULT_SIZE];
+ f_initSec(secret, seed);
+ return XXH3_hashLong_64b_internal(input, len, secret, sizeof(secret),
+ f_acc, f_scramble);
+ }
+}
+
+/*
+ * It's important for performance that XXH3_hashLong is not inlined.
+ */
+XXH_NO_INLINE XXH64_hash_t
+XXH3_hashLong_64b_withSeed(const void* XXH_RESTRICT input, size_t len,
+ XXH64_hash_t seed, const xxh_u8* XXH_RESTRICT secret, size_t secretLen)
+{
+ (void)secret; (void)secretLen;
+ return XXH3_hashLong_64b_withSeed_internal(input, len, seed,
+ XXH3_accumulate, XXH3_scrambleAcc, XXH3_initCustomSecret);
+}
+
+
+typedef XXH64_hash_t (*XXH3_hashLong64_f)(const void* XXH_RESTRICT, size_t,
+ XXH64_hash_t, const xxh_u8* XXH_RESTRICT, size_t);
+
+XXH_FORCE_INLINE XXH64_hash_t
+XXH3_64bits_internal(const void* XXH_RESTRICT input, size_t len,
+ XXH64_hash_t seed64, const void* XXH_RESTRICT secret, size_t secretLen,
+ XXH3_hashLong64_f f_hashLong)
+{
+ XXH_ASSERT(secretLen >= XXH3_SECRET_SIZE_MIN);
+ /*
+ * If an action is to be taken if `secretLen` condition is not respected,
+ * it should be done here.
+ * For now, it's a contract pre-condition.
+ * Adding a check and a branch here would cost performance at every hash.
+ * Also, note that function signature doesn't offer room to return an error.
+ */
+ if (len <= 16)
+ return XXH3_len_0to16_64b((const xxh_u8*)input, len, (const xxh_u8*)secret, seed64);
+ if (len <= 128)
+ return XXH3_len_17to128_64b((const xxh_u8*)input, len, (const xxh_u8*)secret, secretLen, seed64);
+ if (len <= XXH3_MIDSIZE_MAX)
+ return XXH3_len_129to240_64b((const xxh_u8*)input, len, (const xxh_u8*)secret, secretLen, seed64);
+ return f_hashLong(input, len, seed64, (const xxh_u8*)secret, secretLen);
+}
+
+
+/* === Public entry point === */
+
+/*! @ingroup XXH3_family */
+XXH_PUBLIC_API XXH64_hash_t XXH3_64bits(XXH_NOESCAPE const void* input, size_t length)
+{
+ return XXH3_64bits_internal(input, length, 0, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_64b_default);
+}
+
+/*! @ingroup XXH3_family */
+XXH_PUBLIC_API XXH64_hash_t
+XXH3_64bits_withSecret(XXH_NOESCAPE const void* input, size_t length, XXH_NOESCAPE const void* secret, size_t secretSize)
+{
+ return XXH3_64bits_internal(input, length, 0, secret, secretSize, XXH3_hashLong_64b_withSecret);
+}
+
+/*! @ingroup XXH3_family */
+XXH_PUBLIC_API XXH64_hash_t
+XXH3_64bits_withSeed(XXH_NOESCAPE const void* input, size_t length, XXH64_hash_t seed)
+{
+ return XXH3_64bits_internal(input, length, seed, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_64b_withSeed);
+}
+
+XXH_PUBLIC_API XXH64_hash_t
+XXH3_64bits_withSecretandSeed(XXH_NOESCAPE const void* input, size_t length, XXH_NOESCAPE const void* secret, size_t secretSize, XXH64_hash_t seed)
+{
+ if (length <= XXH3_MIDSIZE_MAX)
+ return XXH3_64bits_internal(input, length, seed, XXH3_kSecret, sizeof(XXH3_kSecret), NULL);
+ return XXH3_hashLong_64b_withSecret(input, length, seed, (const xxh_u8*)secret, secretSize);
+}
+
+
+/* === XXH3 streaming === */
+#ifndef XXH_NO_STREAM
+/*
+ * Malloc's a pointer that is always aligned to align.
+ *
+ * This must be freed with `XXH_alignedFree()`.
+ *
+ * malloc typically guarantees 16 byte alignment on 64-bit systems and 8 byte
+ * alignment on 32-bit. This isn't enough for the 32 byte aligned loads in AVX2
+ * or on 32-bit, the 16 byte aligned loads in SSE2 and NEON.
+ *
+ * This underalignment previously caused a rather obvious crash which went
+ * completely unnoticed due to XXH3_createState() not actually being tested.
+ * Credit to RedSpah for noticing this bug.
+ *
+ * The alignment is done manually: Functions like posix_memalign or _mm_malloc
+ * are avoided: To maintain portability, we would have to write a fallback
+ * like this anyways, and besides, testing for the existence of library
+ * functions without relying on external build tools is impossible.
+ *
+ * The method is simple: Overallocate, manually align, and store the offset
+ * to the original behind the returned pointer.
+ *
+ * Align must be a power of 2 and 8 <= align <= 128.
+ */
+static XXH_MALLOCF void* XXH_alignedMalloc(size_t s, size_t align)
+{
+ XXH_ASSERT(align <= 128 && align >= 8); /* range check */
+ XXH_ASSERT((align & (align-1)) == 0); /* power of 2 */
+ XXH_ASSERT(s != 0 && s < (s + align)); /* empty/overflow */
+ { /* Overallocate to make room for manual realignment and an offset byte */
+ xxh_u8* base = (xxh_u8*)XXH_malloc(s + align);
+ if (base != NULL) {
+ /*
+ * Get the offset needed to align this pointer.
+ *
+ * Even if the returned pointer is aligned, there will always be
+ * at least one byte to store the offset to the original pointer.
+ */
+ size_t offset = align - ((size_t)base & (align - 1)); /* base % align */
+ /* Add the offset for the now-aligned pointer */
+ xxh_u8* ptr = base + offset;
+
+ XXH_ASSERT((size_t)ptr % align == 0);
+
+ /* Store the offset immediately before the returned pointer. */
+ ptr[-1] = (xxh_u8)offset;
+ return ptr;
+ }
+ return NULL;
+ }
+}
+/*
+ * Frees an aligned pointer allocated by XXH_alignedMalloc(). Don't pass
+ * normal malloc'd pointers, XXH_alignedMalloc has a specific data layout.
+ */
+static void XXH_alignedFree(void* p)
+{
+ if (p != NULL) {
+ xxh_u8* ptr = (xxh_u8*)p;
+ /* Get the offset byte we added in XXH_malloc. */
+ xxh_u8 offset = ptr[-1];
+ /* Free the original malloc'd pointer */
+ xxh_u8* base = ptr - offset;
+ XXH_free(base);
+ }
+}
+/*! @ingroup XXH3_family */
+/*!
+ * @brief Allocate an @ref XXH3_state_t.
+ *
+ * Must be freed with XXH3_freeState().
+ * @return An allocated XXH3_state_t on success, `NULL` on failure.
+ */
+XXH_PUBLIC_API XXH3_state_t* XXH3_createState(void)
+{
+ XXH3_state_t* const state = (XXH3_state_t*)XXH_alignedMalloc(sizeof(XXH3_state_t), 64);
+ if (state==NULL) return NULL;
+ XXH3_INITSTATE(state);
+ return state;
+}
+
+/*! @ingroup XXH3_family */
+/*!
+ * @brief Frees an @ref XXH3_state_t.
+ *
+ * Must be allocated with XXH3_createState().
+ * @param statePtr A pointer to an @ref XXH3_state_t allocated with @ref XXH3_createState().
+ * @return XXH_OK.
+ */
+XXH_PUBLIC_API XXH_errorcode XXH3_freeState(XXH3_state_t* statePtr)
+{
+ XXH_alignedFree(statePtr);
+ return XXH_OK;
+}
+
+/*! @ingroup XXH3_family */
+XXH_PUBLIC_API void
+XXH3_copyState(XXH_NOESCAPE XXH3_state_t* dst_state, XXH_NOESCAPE const XXH3_state_t* src_state)
+{
+ XXH_memcpy(dst_state, src_state, sizeof(*dst_state));
+}
+
+static void
+XXH3_reset_internal(XXH3_state_t* statePtr,
+ XXH64_hash_t seed,
+ const void* secret, size_t secretSize)
+{
+ size_t const initStart = offsetof(XXH3_state_t, bufferedSize);
+ size_t const initLength = offsetof(XXH3_state_t, nbStripesPerBlock) - initStart;
+ XXH_ASSERT(offsetof(XXH3_state_t, nbStripesPerBlock) > initStart);
+ XXH_ASSERT(statePtr != NULL);
+ /* set members from bufferedSize to nbStripesPerBlock (excluded) to 0 */
+ memset((char*)statePtr + initStart, 0, initLength);
+ statePtr->acc[0] = XXH_PRIME32_3;
+ statePtr->acc[1] = XXH_PRIME64_1;
+ statePtr->acc[2] = XXH_PRIME64_2;
+ statePtr->acc[3] = XXH_PRIME64_3;
+ statePtr->acc[4] = XXH_PRIME64_4;
+ statePtr->acc[5] = XXH_PRIME32_2;
+ statePtr->acc[6] = XXH_PRIME64_5;
+ statePtr->acc[7] = XXH_PRIME32_1;
+ statePtr->seed = seed;
+ statePtr->useSeed = (seed != 0);
+ statePtr->extSecret = (const unsigned char*)secret;
+ XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN);
+ statePtr->secretLimit = secretSize - XXH_STRIPE_LEN;
+ statePtr->nbStripesPerBlock = statePtr->secretLimit / XXH_SECRET_CONSUME_RATE;
+}
+
+/*! @ingroup XXH3_family */
+XXH_PUBLIC_API XXH_errorcode
+XXH3_64bits_reset(XXH_NOESCAPE XXH3_state_t* statePtr)
+{
+ if (statePtr == NULL) return XXH_ERROR;
+ XXH3_reset_internal(statePtr, 0, XXH3_kSecret, XXH_SECRET_DEFAULT_SIZE);
+ return XXH_OK;
+}
+
+/*! @ingroup XXH3_family */
+XXH_PUBLIC_API XXH_errorcode
+XXH3_64bits_reset_withSecret(XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* secret, size_t secretSize)
+{
+ if (statePtr == NULL) return XXH_ERROR;
+ XXH3_reset_internal(statePtr, 0, secret, secretSize);
+ if (secret == NULL) return XXH_ERROR;
+ if (secretSize < XXH3_SECRET_SIZE_MIN) return XXH_ERROR;
+ return XXH_OK;
+}
+
+/*! @ingroup XXH3_family */
+XXH_PUBLIC_API XXH_errorcode
+XXH3_64bits_reset_withSeed(XXH_NOESCAPE XXH3_state_t* statePtr, XXH64_hash_t seed)
+{
+ if (statePtr == NULL) return XXH_ERROR;
+ if (seed==0) return XXH3_64bits_reset(statePtr);
+ if ((seed != statePtr->seed) || (statePtr->extSecret != NULL))
+ XXH3_initCustomSecret(statePtr->customSecret, seed);
+ XXH3_reset_internal(statePtr, seed, NULL, XXH_SECRET_DEFAULT_SIZE);
+ return XXH_OK;
+}
+
+/*! @ingroup XXH3_family */
+XXH_PUBLIC_API XXH_errorcode
+XXH3_64bits_reset_withSecretandSeed(XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* secret, size_t secretSize, XXH64_hash_t seed64)
+{
+ if (statePtr == NULL) return XXH_ERROR;
+ if (secret == NULL) return XXH_ERROR;
+ if (secretSize < XXH3_SECRET_SIZE_MIN) return XXH_ERROR;
+ XXH3_reset_internal(statePtr, seed64, secret, secretSize);
+ statePtr->useSeed = 1; /* always, even if seed64==0 */
+ return XXH_OK;
+}
+
+/*!
+ * @internal
+ * @brief Processes a large input for XXH3_update() and XXH3_digest_long().
+ *
+ * Unlike XXH3_hashLong_internal_loop(), this can process data that overlaps a block.
+ *
+ * @param acc Pointer to the 8 accumulator lanes
+ * @param nbStripesSoFarPtr In/out pointer to the number of leftover stripes in the block*
+ * @param nbStripesPerBlock Number of stripes in a block
+ * @param input Input pointer
+ * @param nbStripes Number of stripes to process
+ * @param secret Secret pointer
+ * @param secretLimit Offset of the last block in @p secret
+ * @param f_acc Pointer to an XXH3_accumulate implementation
+ * @param f_scramble Pointer to an XXH3_scrambleAcc implementation
+ * @return Pointer past the end of @p input after processing
+ */
+XXH_FORCE_INLINE const xxh_u8 *
+XXH3_consumeStripes(xxh_u64* XXH_RESTRICT acc,
+ size_t* XXH_RESTRICT nbStripesSoFarPtr, size_t nbStripesPerBlock,
+ const xxh_u8* XXH_RESTRICT input, size_t nbStripes,
+ const xxh_u8* XXH_RESTRICT secret, size_t secretLimit,
+ XXH3_f_accumulate f_acc,
+ XXH3_f_scrambleAcc f_scramble)
+{
+ const xxh_u8* initialSecret = secret + *nbStripesSoFarPtr * XXH_SECRET_CONSUME_RATE;
+ /* Process full blocks */
+ if (nbStripes >= (nbStripesPerBlock - *nbStripesSoFarPtr)) {
+ /* Process the initial partial block... */
+ size_t nbStripesThisIter = nbStripesPerBlock - *nbStripesSoFarPtr;
+
+ do {
+ /* Accumulate and scramble */
+ f_acc(acc, input, initialSecret, nbStripesThisIter);
+ f_scramble(acc, secret + secretLimit);
+ input += nbStripesThisIter * XXH_STRIPE_LEN;
+ nbStripes -= nbStripesThisIter;
+ /* Then continue the loop with the full block size */
+ nbStripesThisIter = nbStripesPerBlock;
+ initialSecret = secret;
+ } while (nbStripes >= nbStripesPerBlock);
+ *nbStripesSoFarPtr = 0;
+ }
+ /* Process a partial block */
+ if (nbStripes > 0) {
+ f_acc(acc, input, initialSecret, nbStripes);
+ input += nbStripes * XXH_STRIPE_LEN;
+ *nbStripesSoFarPtr += nbStripes;
+ }
+ /* Return end pointer */
+ return input;
+}
+
+#ifndef XXH3_STREAM_USE_STACK
+# if XXH_SIZE_OPT <= 0 && !defined(__clang__) /* clang doesn't need additional stack space */
+# define XXH3_STREAM_USE_STACK 1
+# endif
+#endif
+/*
+ * Both XXH3_64bits_update and XXH3_128bits_update use this routine.
+ */
+XXH_FORCE_INLINE XXH_errorcode
+XXH3_update(XXH3_state_t* XXH_RESTRICT const state,
+ const xxh_u8* XXH_RESTRICT input, size_t len,
+ XXH3_f_accumulate f_acc,
+ XXH3_f_scrambleAcc f_scramble)
+{
+ if (input==NULL) {
+ XXH_ASSERT(len == 0);
+ return XXH_OK;
+ }
+
+ XXH_ASSERT(state != NULL);
+ { const xxh_u8* const bEnd = input + len;
+ const unsigned char* const secret = (state->extSecret == NULL) ? state->customSecret : state->extSecret;
+#if defined(XXH3_STREAM_USE_STACK) && XXH3_STREAM_USE_STACK >= 1
+ /* For some reason, gcc and MSVC seem to suffer greatly
+ * when operating accumulators directly into state.
+ * Operating into stack space seems to enable proper optimization.
+ * clang, on the other hand, doesn't seem to need this trick */
+ XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64 acc[8];
+ XXH_memcpy(acc, state->acc, sizeof(acc));
+#else
+ xxh_u64* XXH_RESTRICT const acc = state->acc;
+#endif
+ state->totalLen += len;
+ XXH_ASSERT(state->bufferedSize <= XXH3_INTERNALBUFFER_SIZE);
+
+ /* small input : just fill in tmp buffer */
+ if (len <= XXH3_INTERNALBUFFER_SIZE - state->bufferedSize) {
+ XXH_memcpy(state->buffer + state->bufferedSize, input, len);
+ state->bufferedSize += (XXH32_hash_t)len;
+ return XXH_OK;
+ }
+
+ /* total input is now > XXH3_INTERNALBUFFER_SIZE */
+ #define XXH3_INTERNALBUFFER_STRIPES (XXH3_INTERNALBUFFER_SIZE / XXH_STRIPE_LEN)
+ XXH_STATIC_ASSERT(XXH3_INTERNALBUFFER_SIZE % XXH_STRIPE_LEN == 0); /* clean multiple */
+
+ /*
+ * Internal buffer is partially filled (always, except at beginning)
+ * Complete it, then consume it.
+ */
+ if (state->bufferedSize) {
+ size_t const loadSize = XXH3_INTERNALBUFFER_SIZE - state->bufferedSize;
+ XXH_memcpy(state->buffer + state->bufferedSize, input, loadSize);
+ input += loadSize;
+ XXH3_consumeStripes(acc,
+ &state->nbStripesSoFar, state->nbStripesPerBlock,
+ state->buffer, XXH3_INTERNALBUFFER_STRIPES,
+ secret, state->secretLimit,
+ f_acc, f_scramble);
+ state->bufferedSize = 0;
+ }
+ XXH_ASSERT(input < bEnd);
+ if (bEnd - input > XXH3_INTERNALBUFFER_SIZE) {
+ size_t nbStripes = (size_t)(bEnd - 1 - input) / XXH_STRIPE_LEN;
+ input = XXH3_consumeStripes(acc,
+ &state->nbStripesSoFar, state->nbStripesPerBlock,
+ input, nbStripes,
+ secret, state->secretLimit,
+ f_acc, f_scramble);
+ XXH_memcpy(state->buffer + sizeof(state->buffer) - XXH_STRIPE_LEN, input - XXH_STRIPE_LEN, XXH_STRIPE_LEN);
+
+ }
+ /* Some remaining input (always) : buffer it */
+ XXH_ASSERT(input < bEnd);
+ XXH_ASSERT(bEnd - input <= XXH3_INTERNALBUFFER_SIZE);
+ XXH_ASSERT(state->bufferedSize == 0);
+ XXH_memcpy(state->buffer, input, (size_t)(bEnd-input));
+ state->bufferedSize = (XXH32_hash_t)(bEnd-input);
+#if defined(XXH3_STREAM_USE_STACK) && XXH3_STREAM_USE_STACK >= 1
+ /* save stack accumulators into state */
+ XXH_memcpy(state->acc, acc, sizeof(acc));
+#endif
+ }
+
+ return XXH_OK;
+}
+
+/*! @ingroup XXH3_family */
+XXH_PUBLIC_API XXH_errorcode
+XXH3_64bits_update(XXH_NOESCAPE XXH3_state_t* state, XXH_NOESCAPE const void* input, size_t len)
+{
+ return XXH3_update(state, (const xxh_u8*)input, len,
+ XXH3_accumulate, XXH3_scrambleAcc);
+}
+
+
+XXH_FORCE_INLINE void
+XXH3_digest_long (XXH64_hash_t* acc,
+ const XXH3_state_t* state,
+ const unsigned char* secret)
+{
+ xxh_u8 lastStripe[XXH_STRIPE_LEN];
+ const xxh_u8* lastStripePtr;
+
+ /*
+ * Digest on a local copy. This way, the state remains unaltered, and it can
+ * continue ingesting more input afterwards.
+ */
+ XXH_memcpy(acc, state->acc, sizeof(state->acc));
+ if (state->bufferedSize >= XXH_STRIPE_LEN) {
+ /* Consume remaining stripes then point to remaining data in buffer */
+ size_t const nbStripes = (state->bufferedSize - 1) / XXH_STRIPE_LEN;
+ size_t nbStripesSoFar = state->nbStripesSoFar;
+ XXH3_consumeStripes(acc,
+ &nbStripesSoFar, state->nbStripesPerBlock,
+ state->buffer, nbStripes,
+ secret, state->secretLimit,
+ XXH3_accumulate, XXH3_scrambleAcc);
+ lastStripePtr = state->buffer + state->bufferedSize - XXH_STRIPE_LEN;
+ } else { /* bufferedSize < XXH_STRIPE_LEN */
+ /* Copy to temp buffer */
+ size_t const catchupSize = XXH_STRIPE_LEN - state->bufferedSize;
+ XXH_ASSERT(state->bufferedSize > 0); /* there is always some input buffered */
+ XXH_memcpy(lastStripe, state->buffer + sizeof(state->buffer) - catchupSize, catchupSize);
+ XXH_memcpy(lastStripe + catchupSize, state->buffer, state->bufferedSize);
+ lastStripePtr = lastStripe;
+ }
+ /* Last stripe */
+ XXH3_accumulate_512(acc,
+ lastStripePtr,
+ secret + state->secretLimit - XXH_SECRET_LASTACC_START);
+}
+
+/*! @ingroup XXH3_family */
+XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_digest (XXH_NOESCAPE const XXH3_state_t* state)
+{
+ const unsigned char* const secret = (state->extSecret == NULL) ? state->customSecret : state->extSecret;
+ if (state->totalLen > XXH3_MIDSIZE_MAX) {
+ XXH_ALIGN(XXH_ACC_ALIGN) XXH64_hash_t acc[XXH_ACC_NB];
+ XXH3_digest_long(acc, state, secret);
+ return XXH3_mergeAccs(acc,
+ secret + XXH_SECRET_MERGEACCS_START,
+ (xxh_u64)state->totalLen * XXH_PRIME64_1);
+ }
+ /* totalLen <= XXH3_MIDSIZE_MAX: digesting a short input */
+ if (state->useSeed)
+ return XXH3_64bits_withSeed(state->buffer, (size_t)state->totalLen, state->seed);
+ return XXH3_64bits_withSecret(state->buffer, (size_t)(state->totalLen),
+ secret, state->secretLimit + XXH_STRIPE_LEN);
+}
+#endif /* !XXH_NO_STREAM */
+
+
+/* ==========================================
+ * XXH3 128 bits (a.k.a XXH128)
+ * ==========================================
+ * XXH3's 128-bit variant has better mixing and strength than the 64-bit variant,
+ * even without counting the significantly larger output size.
+ *
+ * For example, extra steps are taken to avoid the seed-dependent collisions
+ * in 17-240 byte inputs (See XXH3_mix16B and XXH128_mix32B).
+ *
+ * This strength naturally comes at the cost of some speed, especially on short
+ * lengths. Note that longer hashes are about as fast as the 64-bit version
+ * due to it using only a slight modification of the 64-bit loop.
+ *
+ * XXH128 is also more oriented towards 64-bit machines. It is still extremely
+ * fast for a _128-bit_ hash on 32-bit (it usually clears XXH64).
+ */
+
+XXH_FORCE_INLINE XXH_PUREF XXH128_hash_t
+XXH3_len_1to3_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
+{
+ /* A doubled version of 1to3_64b with different constants. */
+ XXH_ASSERT(input != NULL);
+ XXH_ASSERT(1 <= len && len <= 3);
+ XXH_ASSERT(secret != NULL);
+ /*
+ * len = 1: combinedl = { input[0], 0x01, input[0], input[0] }
+ * len = 2: combinedl = { input[1], 0x02, input[0], input[1] }
+ * len = 3: combinedl = { input[2], 0x03, input[0], input[1] }
+ */
+ { xxh_u8 const c1 = input[0];
+ xxh_u8 const c2 = input[len >> 1];
+ xxh_u8 const c3 = input[len - 1];
+ xxh_u32 const combinedl = ((xxh_u32)c1 <<16) | ((xxh_u32)c2 << 24)
+ | ((xxh_u32)c3 << 0) | ((xxh_u32)len << 8);
+ xxh_u32 const combinedh = XXH_rotl32(XXH_swap32(combinedl), 13);
+ xxh_u64 const bitflipl = (XXH_readLE32(secret) ^ XXH_readLE32(secret+4)) + seed;
+ xxh_u64 const bitfliph = (XXH_readLE32(secret+8) ^ XXH_readLE32(secret+12)) - seed;
+ xxh_u64 const keyed_lo = (xxh_u64)combinedl ^ bitflipl;
+ xxh_u64 const keyed_hi = (xxh_u64)combinedh ^ bitfliph;
+ XXH128_hash_t h128;
+ h128.low64 = XXH64_avalanche(keyed_lo);
+ h128.high64 = XXH64_avalanche(keyed_hi);
+ return h128;
+ }
+}
+
+XXH_FORCE_INLINE XXH_PUREF XXH128_hash_t
+XXH3_len_4to8_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
+{
+ XXH_ASSERT(input != NULL);
+ XXH_ASSERT(secret != NULL);
+ XXH_ASSERT(4 <= len && len <= 8);
+ seed ^= (xxh_u64)XXH_swap32((xxh_u32)seed) << 32;
+ { xxh_u32 const input_lo = XXH_readLE32(input);
+ xxh_u32 const input_hi = XXH_readLE32(input + len - 4);
+ xxh_u64 const input_64 = input_lo + ((xxh_u64)input_hi << 32);
+ xxh_u64 const bitflip = (XXH_readLE64(secret+16) ^ XXH_readLE64(secret+24)) + seed;
+ xxh_u64 const keyed = input_64 ^ bitflip;
+
+ /* Shift len to the left to ensure it is even, this avoids even multiplies. */
+ XXH128_hash_t m128 = XXH_mult64to128(keyed, XXH_PRIME64_1 + (len << 2));
+
+ m128.high64 += (m128.low64 << 1);
+ m128.low64 ^= (m128.high64 >> 3);
+
+ m128.low64 = XXH_xorshift64(m128.low64, 35);
+ m128.low64 *= PRIME_MX2;
+ m128.low64 = XXH_xorshift64(m128.low64, 28);
+ m128.high64 = XXH3_avalanche(m128.high64);
+ return m128;
+ }
+}
+
+XXH_FORCE_INLINE XXH_PUREF XXH128_hash_t
+XXH3_len_9to16_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
+{
+ XXH_ASSERT(input != NULL);
+ XXH_ASSERT(secret != NULL);
+ XXH_ASSERT(9 <= len && len <= 16);
+ { xxh_u64 const bitflipl = (XXH_readLE64(secret+32) ^ XXH_readLE64(secret+40)) - seed;
+ xxh_u64 const bitfliph = (XXH_readLE64(secret+48) ^ XXH_readLE64(secret+56)) + seed;
+ xxh_u64 const input_lo = XXH_readLE64(input);
+ xxh_u64 input_hi = XXH_readLE64(input + len - 8);
+ XXH128_hash_t m128 = XXH_mult64to128(input_lo ^ input_hi ^ bitflipl, XXH_PRIME64_1);
+ /*
+ * Put len in the middle of m128 to ensure that the length gets mixed to
+ * both the low and high bits in the 128x64 multiply below.
+ */
+ m128.low64 += (xxh_u64)(len - 1) << 54;
+ input_hi ^= bitfliph;
+ /*
+ * Add the high 32 bits of input_hi to the high 32 bits of m128, then
+ * add the long product of the low 32 bits of input_hi and XXH_PRIME32_2 to
+ * the high 64 bits of m128.
+ *
+ * The best approach to this operation is different on 32-bit and 64-bit.
+ */
+ if (sizeof(void *) < sizeof(xxh_u64)) { /* 32-bit */
+ /*
+ * 32-bit optimized version, which is more readable.
+ *
+ * On 32-bit, it removes an ADC and delays a dependency between the two
+ * halves of m128.high64, but it generates an extra mask on 64-bit.
+ */
+ m128.high64 += (input_hi & 0xFFFFFFFF00000000ULL) + XXH_mult32to64((xxh_u32)input_hi, XXH_PRIME32_2);
+ } else {
+ /*
+ * 64-bit optimized (albeit more confusing) version.
+ *
+ * Uses some properties of addition and multiplication to remove the mask:
+ *
+ * Let:
+ * a = input_hi.lo = (input_hi & 0x00000000FFFFFFFF)
+ * b = input_hi.hi = (input_hi & 0xFFFFFFFF00000000)
+ * c = XXH_PRIME32_2
+ *
+ * a + (b * c)
+ * Inverse Property: x + y - x == y
+ * a + (b * (1 + c - 1))
+ * Distributive Property: x * (y + z) == (x * y) + (x * z)
+ * a + (b * 1) + (b * (c - 1))
+ * Identity Property: x * 1 == x
+ * a + b + (b * (c - 1))
+ *
+ * Substitute a, b, and c:
+ * input_hi.hi + input_hi.lo + ((xxh_u64)input_hi.lo * (XXH_PRIME32_2 - 1))
+ *
+ * Since input_hi.hi + input_hi.lo == input_hi, we get this:
+ * input_hi + ((xxh_u64)input_hi.lo * (XXH_PRIME32_2 - 1))
+ */
+ m128.high64 += input_hi + XXH_mult32to64((xxh_u32)input_hi, XXH_PRIME32_2 - 1);
+ }
+ /* m128 ^= XXH_swap64(m128 >> 64); */
+ m128.low64 ^= XXH_swap64(m128.high64);
+
+ { /* 128x64 multiply: h128 = m128 * XXH_PRIME64_2; */
+ XXH128_hash_t h128 = XXH_mult64to128(m128.low64, XXH_PRIME64_2);
+ h128.high64 += m128.high64 * XXH_PRIME64_2;
+
+ h128.low64 = XXH3_avalanche(h128.low64);
+ h128.high64 = XXH3_avalanche(h128.high64);
+ return h128;
+ } }
+}
+
+/*
+ * Assumption: `secret` size is >= XXH3_SECRET_SIZE_MIN
+ */
+XXH_FORCE_INLINE XXH_PUREF XXH128_hash_t
+XXH3_len_0to16_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
+{
+ XXH_ASSERT(len <= 16);
+ { if (len > 8) return XXH3_len_9to16_128b(input, len, secret, seed);
+ if (len >= 4) return XXH3_len_4to8_128b(input, len, secret, seed);
+ if (len) return XXH3_len_1to3_128b(input, len, secret, seed);
+ { XXH128_hash_t h128;
+ xxh_u64 const bitflipl = XXH_readLE64(secret+64) ^ XXH_readLE64(secret+72);
+ xxh_u64 const bitfliph = XXH_readLE64(secret+80) ^ XXH_readLE64(secret+88);
+ h128.low64 = XXH64_avalanche(seed ^ bitflipl);
+ h128.high64 = XXH64_avalanche( seed ^ bitfliph);
+ return h128;
+ } }
+}
+
+/*
+ * A bit slower than XXH3_mix16B, but handles multiply by zero better.
+ */
+XXH_FORCE_INLINE XXH128_hash_t
+XXH128_mix32B(XXH128_hash_t acc, const xxh_u8* input_1, const xxh_u8* input_2,
+ const xxh_u8* secret, XXH64_hash_t seed)
+{
+ acc.low64 += XXH3_mix16B (input_1, secret+0, seed);
+ acc.low64 ^= XXH_readLE64(input_2) + XXH_readLE64(input_2 + 8);
+ acc.high64 += XXH3_mix16B (input_2, secret+16, seed);
+ acc.high64 ^= XXH_readLE64(input_1) + XXH_readLE64(input_1 + 8);
+ return acc;
+}
+
+
+XXH_FORCE_INLINE XXH_PUREF XXH128_hash_t
+XXH3_len_17to128_128b(const xxh_u8* XXH_RESTRICT input, size_t len,
+ const xxh_u8* XXH_RESTRICT secret, size_t secretSize,
+ XXH64_hash_t seed)
+{
+ XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); (void)secretSize;
+ XXH_ASSERT(16 < len && len <= 128);
+
+ { XXH128_hash_t acc;
+ acc.low64 = len * XXH_PRIME64_1;
+ acc.high64 = 0;
+
+#if XXH_SIZE_OPT >= 1
+ {
+ /* Smaller, but slightly slower. */
+ unsigned int i = (unsigned int)(len - 1) / 32;
+ do {
+ acc = XXH128_mix32B(acc, input+16*i, input+len-16*(i+1), secret+32*i, seed);
+ } while (i-- != 0);
+ }
+#else
+ if (len > 32) {
+ if (len > 64) {
+ if (len > 96) {
+ acc = XXH128_mix32B(acc, input+48, input+len-64, secret+96, seed);
+ }
+ acc = XXH128_mix32B(acc, input+32, input+len-48, secret+64, seed);
+ }
+ acc = XXH128_mix32B(acc, input+16, input+len-32, secret+32, seed);
+ }
+ acc = XXH128_mix32B(acc, input, input+len-16, secret, seed);
+#endif
+ { XXH128_hash_t h128;
+ h128.low64 = acc.low64 + acc.high64;
+ h128.high64 = (acc.low64 * XXH_PRIME64_1)
+ + (acc.high64 * XXH_PRIME64_4)
+ + ((len - seed) * XXH_PRIME64_2);
+ h128.low64 = XXH3_avalanche(h128.low64);
+ h128.high64 = (XXH64_hash_t)0 - XXH3_avalanche(h128.high64);
+ return h128;
+ }
+ }
+}
+
+XXH_NO_INLINE XXH_PUREF XXH128_hash_t
+XXH3_len_129to240_128b(const xxh_u8* XXH_RESTRICT input, size_t len,
+ const xxh_u8* XXH_RESTRICT secret, size_t secretSize,
+ XXH64_hash_t seed)
+{
+ XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); (void)secretSize;
+ XXH_ASSERT(128 < len && len <= XXH3_MIDSIZE_MAX);
+
+ { XXH128_hash_t acc;
+ unsigned i;
+ acc.low64 = len * XXH_PRIME64_1;
+ acc.high64 = 0;
+ /*
+ * We set as `i` as offset + 32. We do this so that unchanged
+ * `len` can be used as upper bound. This reaches a sweet spot
+ * where both x86 and aarch64 get simple agen and good codegen
+ * for the loop.
+ */
+ for (i = 32; i < 160; i += 32) {
+ acc = XXH128_mix32B(acc,
+ input + i - 32,
+ input + i - 16,
+ secret + i - 32,
+ seed);
+ }
+ acc.low64 = XXH3_avalanche(acc.low64);
+ acc.high64 = XXH3_avalanche(acc.high64);
+ /*
+ * NB: `i <= len` will duplicate the last 32-bytes if
+ * len % 32 was zero. This is an unfortunate necessity to keep
+ * the hash result stable.
+ */
+ for (i=160; i <= len; i += 32) {
+ acc = XXH128_mix32B(acc,
+ input + i - 32,
+ input + i - 16,
+ secret + XXH3_MIDSIZE_STARTOFFSET + i - 160,
+ seed);
+ }
+ /* last bytes */
+ acc = XXH128_mix32B(acc,
+ input + len - 16,
+ input + len - 32,
+ secret + XXH3_SECRET_SIZE_MIN - XXH3_MIDSIZE_LASTOFFSET - 16,
+ (XXH64_hash_t)0 - seed);
+
+ { XXH128_hash_t h128;
+ h128.low64 = acc.low64 + acc.high64;
+ h128.high64 = (acc.low64 * XXH_PRIME64_1)
+ + (acc.high64 * XXH_PRIME64_4)
+ + ((len - seed) * XXH_PRIME64_2);
+ h128.low64 = XXH3_avalanche(h128.low64);
+ h128.high64 = (XXH64_hash_t)0 - XXH3_avalanche(h128.high64);
+ return h128;
+ }
+ }
+}
+
+XXH_FORCE_INLINE XXH128_hash_t
+XXH3_hashLong_128b_internal(const void* XXH_RESTRICT input, size_t len,
+ const xxh_u8* XXH_RESTRICT secret, size_t secretSize,
+ XXH3_f_accumulate f_acc,
+ XXH3_f_scrambleAcc f_scramble)
+{
+ XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64 acc[XXH_ACC_NB] = XXH3_INIT_ACC;
+
+ XXH3_hashLong_internal_loop(acc, (const xxh_u8*)input, len, secret, secretSize, f_acc, f_scramble);
+
+ /* converge into final hash */
+ XXH_STATIC_ASSERT(sizeof(acc) == 64);
+ XXH_ASSERT(secretSize >= sizeof(acc) + XXH_SECRET_MERGEACCS_START);
+ { XXH128_hash_t h128;
+ h128.low64 = XXH3_mergeAccs(acc,
+ secret + XXH_SECRET_MERGEACCS_START,
+ (xxh_u64)len * XXH_PRIME64_1);
+ h128.high64 = XXH3_mergeAccs(acc,
+ secret + secretSize
+ - sizeof(acc) - XXH_SECRET_MERGEACCS_START,
+ ~((xxh_u64)len * XXH_PRIME64_2));
+ return h128;
+ }
+}
+
+/*
+ * It's important for performance that XXH3_hashLong() is not inlined.
+ */
+XXH_NO_INLINE XXH_PUREF XXH128_hash_t
+XXH3_hashLong_128b_default(const void* XXH_RESTRICT input, size_t len,
+ XXH64_hash_t seed64,
+ const void* XXH_RESTRICT secret, size_t secretLen)
+{
+ (void)seed64; (void)secret; (void)secretLen;
+ return XXH3_hashLong_128b_internal(input, len, XXH3_kSecret, sizeof(XXH3_kSecret),
+ XXH3_accumulate, XXH3_scrambleAcc);
+}
+
+/*
+ * It's important for performance to pass @p secretLen (when it's static)
+ * to the compiler, so that it can properly optimize the vectorized loop.
+ *
+ * When the secret size is unknown, or on GCC 12 where the mix of NO_INLINE and FORCE_INLINE
+ * breaks -Og, this is XXH_NO_INLINE.
+ */
+XXH3_WITH_SECRET_INLINE XXH128_hash_t
+XXH3_hashLong_128b_withSecret(const void* XXH_RESTRICT input, size_t len,
+ XXH64_hash_t seed64,
+ const void* XXH_RESTRICT secret, size_t secretLen)
+{
+ (void)seed64;
+ return XXH3_hashLong_128b_internal(input, len, (const xxh_u8*)secret, secretLen,
+ XXH3_accumulate, XXH3_scrambleAcc);
+}
+
+XXH_FORCE_INLINE XXH128_hash_t
+XXH3_hashLong_128b_withSeed_internal(const void* XXH_RESTRICT input, size_t len,
+ XXH64_hash_t seed64,
+ XXH3_f_accumulate f_acc,
+ XXH3_f_scrambleAcc f_scramble,
+ XXH3_f_initCustomSecret f_initSec)
+{
+ if (seed64 == 0)
+ return XXH3_hashLong_128b_internal(input, len,
+ XXH3_kSecret, sizeof(XXH3_kSecret),
+ f_acc, f_scramble);
+ { XXH_ALIGN(XXH_SEC_ALIGN) xxh_u8 secret[XXH_SECRET_DEFAULT_SIZE];
+ f_initSec(secret, seed64);
+ return XXH3_hashLong_128b_internal(input, len, (const xxh_u8*)secret, sizeof(secret),
+ f_acc, f_scramble);
+ }
+}
+
+/*
+ * It's important for performance that XXH3_hashLong is not inlined.
+ */
+XXH_NO_INLINE XXH128_hash_t
+XXH3_hashLong_128b_withSeed(const void* input, size_t len,
+ XXH64_hash_t seed64, const void* XXH_RESTRICT secret, size_t secretLen)
+{
+ (void)secret; (void)secretLen;
+ return XXH3_hashLong_128b_withSeed_internal(input, len, seed64,
+ XXH3_accumulate, XXH3_scrambleAcc, XXH3_initCustomSecret);
+}
+
+typedef XXH128_hash_t (*XXH3_hashLong128_f)(const void* XXH_RESTRICT, size_t,
+ XXH64_hash_t, const void* XXH_RESTRICT, size_t);
+
+XXH_FORCE_INLINE XXH128_hash_t
+XXH3_128bits_internal(const void* input, size_t len,
+ XXH64_hash_t seed64, const void* XXH_RESTRICT secret, size_t secretLen,
+ XXH3_hashLong128_f f_hl128)
+{
+ XXH_ASSERT(secretLen >= XXH3_SECRET_SIZE_MIN);
+ /*
+ * If an action is to be taken if `secret` conditions are not respected,
+ * it should be done here.
+ * For now, it's a contract pre-condition.
+ * Adding a check and a branch here would cost performance at every hash.
+ */
+ if (len <= 16)
+ return XXH3_len_0to16_128b((const xxh_u8*)input, len, (const xxh_u8*)secret, seed64);
+ if (len <= 128)
+ return XXH3_len_17to128_128b((const xxh_u8*)input, len, (const xxh_u8*)secret, secretLen, seed64);
+ if (len <= XXH3_MIDSIZE_MAX)
+ return XXH3_len_129to240_128b((const xxh_u8*)input, len, (const xxh_u8*)secret, secretLen, seed64);
+ return f_hl128(input, len, seed64, secret, secretLen);
+}
+
+
+/* === Public XXH128 API === */
+
+/*! @ingroup XXH3_family */
+XXH_PUBLIC_API XXH128_hash_t XXH3_128bits(XXH_NOESCAPE const void* input, size_t len)
+{
+ return XXH3_128bits_internal(input, len, 0,
+ XXH3_kSecret, sizeof(XXH3_kSecret),
+ XXH3_hashLong_128b_default);
+}
+
+/*! @ingroup XXH3_family */
+XXH_PUBLIC_API XXH128_hash_t
+XXH3_128bits_withSecret(XXH_NOESCAPE const void* input, size_t len, XXH_NOESCAPE const void* secret, size_t secretSize)
+{
+ return XXH3_128bits_internal(input, len, 0,
+ (const xxh_u8*)secret, secretSize,
+ XXH3_hashLong_128b_withSecret);
+}
+
+/*! @ingroup XXH3_family */
+XXH_PUBLIC_API XXH128_hash_t
+XXH3_128bits_withSeed(XXH_NOESCAPE const void* input, size_t len, XXH64_hash_t seed)
+{
+ return XXH3_128bits_internal(input, len, seed,
+ XXH3_kSecret, sizeof(XXH3_kSecret),
+ XXH3_hashLong_128b_withSeed);
+}
+
+/*! @ingroup XXH3_family */
+XXH_PUBLIC_API XXH128_hash_t
+XXH3_128bits_withSecretandSeed(XXH_NOESCAPE const void* input, size_t len, XXH_NOESCAPE const void* secret, size_t secretSize, XXH64_hash_t seed)
+{
+ if (len <= XXH3_MIDSIZE_MAX)
+ return XXH3_128bits_internal(input, len, seed, XXH3_kSecret, sizeof(XXH3_kSecret), NULL);
+ return XXH3_hashLong_128b_withSecret(input, len, seed, secret, secretSize);
+}
+
+/*! @ingroup XXH3_family */
+XXH_PUBLIC_API XXH128_hash_t
+XXH128(XXH_NOESCAPE const void* input, size_t len, XXH64_hash_t seed)
+{
+ return XXH3_128bits_withSeed(input, len, seed);
+}
+
+
+/* === XXH3 128-bit streaming === */
+#ifndef XXH_NO_STREAM
+/*
+ * All initialization and update functions are identical to 64-bit streaming variant.
+ * The only difference is the finalization routine.
+ */
+
+/*! @ingroup XXH3_family */
+XXH_PUBLIC_API XXH_errorcode
+XXH3_128bits_reset(XXH_NOESCAPE XXH3_state_t* statePtr)
+{
+ return XXH3_64bits_reset(statePtr);
+}
+
+/*! @ingroup XXH3_family */
+XXH_PUBLIC_API XXH_errorcode
+XXH3_128bits_reset_withSecret(XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* secret, size_t secretSize)
+{
+ return XXH3_64bits_reset_withSecret(statePtr, secret, secretSize);
+}
+
+/*! @ingroup XXH3_family */
+XXH_PUBLIC_API XXH_errorcode
+XXH3_128bits_reset_withSeed(XXH_NOESCAPE XXH3_state_t* statePtr, XXH64_hash_t seed)
+{
+ return XXH3_64bits_reset_withSeed(statePtr, seed);
+}
+
+/*! @ingroup XXH3_family */
+XXH_PUBLIC_API XXH_errorcode
+XXH3_128bits_reset_withSecretandSeed(XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* secret, size_t secretSize, XXH64_hash_t seed)
+{
+ return XXH3_64bits_reset_withSecretandSeed(statePtr, secret, secretSize, seed);
+}
+
+/*! @ingroup XXH3_family */
+XXH_PUBLIC_API XXH_errorcode
+XXH3_128bits_update(XXH_NOESCAPE XXH3_state_t* state, XXH_NOESCAPE const void* input, size_t len)
+{
+ return XXH3_64bits_update(state, input, len);
+}
+
+/*! @ingroup XXH3_family */
+XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_digest (XXH_NOESCAPE const XXH3_state_t* state)
+{
+ const unsigned char* const secret = (state->extSecret == NULL) ? state->customSecret : state->extSecret;
+ if (state->totalLen > XXH3_MIDSIZE_MAX) {
+ XXH_ALIGN(XXH_ACC_ALIGN) XXH64_hash_t acc[XXH_ACC_NB];
+ XXH3_digest_long(acc, state, secret);
+ XXH_ASSERT(state->secretLimit + XXH_STRIPE_LEN >= sizeof(acc) + XXH_SECRET_MERGEACCS_START);
+ { XXH128_hash_t h128;
+ h128.low64 = XXH3_mergeAccs(acc,
+ secret + XXH_SECRET_MERGEACCS_START,
+ (xxh_u64)state->totalLen * XXH_PRIME64_1);
+ h128.high64 = XXH3_mergeAccs(acc,
+ secret + state->secretLimit + XXH_STRIPE_LEN
+ - sizeof(acc) - XXH_SECRET_MERGEACCS_START,
+ ~((xxh_u64)state->totalLen * XXH_PRIME64_2));
+ return h128;
+ }
+ }
+ /* len <= XXH3_MIDSIZE_MAX : short code */
+ if (state->seed)
+ return XXH3_128bits_withSeed(state->buffer, (size_t)state->totalLen, state->seed);
+ return XXH3_128bits_withSecret(state->buffer, (size_t)(state->totalLen),
+ secret, state->secretLimit + XXH_STRIPE_LEN);
+}
+#endif /* !XXH_NO_STREAM */
+/* 128-bit utility functions */
+
+#include <string.h> /* memcmp, memcpy */
+
+/* return : 1 is equal, 0 if different */
+/*! @ingroup XXH3_family */
+XXH_PUBLIC_API int XXH128_isEqual(XXH128_hash_t h1, XXH128_hash_t h2)
+{
+ /* note : XXH128_hash_t is compact, it has no padding byte */
+ return !(memcmp(&h1, &h2, sizeof(h1)));
+}
+
+/* This prototype is compatible with stdlib's qsort().
+ * @return : >0 if *h128_1 > *h128_2
+ * <0 if *h128_1 < *h128_2
+ * =0 if *h128_1 == *h128_2 */
+/*! @ingroup XXH3_family */
+XXH_PUBLIC_API int XXH128_cmp(XXH_NOESCAPE const void* h128_1, XXH_NOESCAPE const void* h128_2)
+{
+ XXH128_hash_t const h1 = *(const XXH128_hash_t*)h128_1;
+ XXH128_hash_t const h2 = *(const XXH128_hash_t*)h128_2;
+ int const hcmp = (h1.high64 > h2.high64) - (h2.high64 > h1.high64);
+ /* note : bets that, in most cases, hash values are different */
+ if (hcmp) return hcmp;
+ return (h1.low64 > h2.low64) - (h2.low64 > h1.low64);
+}
+
+
+/*====== Canonical representation ======*/
+/*! @ingroup XXH3_family */
+XXH_PUBLIC_API void
+XXH128_canonicalFromHash(XXH_NOESCAPE XXH128_canonical_t* dst, XXH128_hash_t hash)
+{
+ XXH_STATIC_ASSERT(sizeof(XXH128_canonical_t) == sizeof(XXH128_hash_t));
+ if (XXH_CPU_LITTLE_ENDIAN) {
+ hash.high64 = XXH_swap64(hash.high64);
+ hash.low64 = XXH_swap64(hash.low64);
+ }
+ XXH_memcpy(dst, &hash.high64, sizeof(hash.high64));
+ XXH_memcpy((char*)dst + sizeof(hash.high64), &hash.low64, sizeof(hash.low64));
+}
+
+/*! @ingroup XXH3_family */
+XXH_PUBLIC_API XXH128_hash_t
+XXH128_hashFromCanonical(XXH_NOESCAPE const XXH128_canonical_t* src)
+{
+ XXH128_hash_t h;
+ h.high64 = XXH_readBE64(src);
+ h.low64 = XXH_readBE64(src->digest + 8);
+ return h;
+}
+
+
+
+/* ==========================================
+ * Secret generators
+ * ==========================================
+ */
+#define XXH_MIN(x, y) (((x) > (y)) ? (y) : (x))
+
+XXH_FORCE_INLINE void XXH3_combine16(void* dst, XXH128_hash_t h128)
+{
+ XXH_writeLE64( dst, XXH_readLE64(dst) ^ h128.low64 );
+ XXH_writeLE64( (char*)dst+8, XXH_readLE64((char*)dst+8) ^ h128.high64 );
+}
+
+/*! @ingroup XXH3_family */
+XXH_PUBLIC_API XXH_errorcode
+XXH3_generateSecret(XXH_NOESCAPE void* secretBuffer, size_t secretSize, XXH_NOESCAPE const void* customSeed, size_t customSeedSize)
+{
+#if (XXH_DEBUGLEVEL >= 1)
+ XXH_ASSERT(secretBuffer != NULL);
+ XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN);
+#else
+ /* production mode, assert() are disabled */
+ if (secretBuffer == NULL) return XXH_ERROR;
+ if (secretSize < XXH3_SECRET_SIZE_MIN) return XXH_ERROR;
+#endif
+
+ if (customSeedSize == 0) {
+ customSeed = XXH3_kSecret;
+ customSeedSize = XXH_SECRET_DEFAULT_SIZE;
+ }
+#if (XXH_DEBUGLEVEL >= 1)
+ XXH_ASSERT(customSeed != NULL);
+#else
+ if (customSeed == NULL) return XXH_ERROR;
+#endif
+
+ /* Fill secretBuffer with a copy of customSeed - repeat as needed */
+ { size_t pos = 0;
+ while (pos < secretSize) {
+ size_t const toCopy = XXH_MIN((secretSize - pos), customSeedSize);
+ memcpy((char*)secretBuffer + pos, customSeed, toCopy);
+ pos += toCopy;
+ } }
+
+ { size_t const nbSeg16 = secretSize / 16;
+ size_t n;
+ XXH128_canonical_t scrambler;
+ XXH128_canonicalFromHash(&scrambler, XXH128(customSeed, customSeedSize, 0));
+ for (n=0; n<nbSeg16; n++) {
+ XXH128_hash_t const h128 = XXH128(&scrambler, sizeof(scrambler), n);
+ XXH3_combine16((char*)secretBuffer + n*16, h128);
+ }
+ /* last segment */
+ XXH3_combine16((char*)secretBuffer + secretSize - 16, XXH128_hashFromCanonical(&scrambler));
+ }
+ return XXH_OK;
+}
+
+/*! @ingroup XXH3_family */
+XXH_PUBLIC_API void
+XXH3_generateSecret_fromSeed(XXH_NOESCAPE void* secretBuffer, XXH64_hash_t seed)
+{
+ XXH_ALIGN(XXH_SEC_ALIGN) xxh_u8 secret[XXH_SECRET_DEFAULT_SIZE];
+ XXH3_initCustomSecret(secret, seed);
+ XXH_ASSERT(secretBuffer != NULL);
+ memcpy(secretBuffer, secret, XXH_SECRET_DEFAULT_SIZE);
+}
+
+
+
+/* Pop our optimization override from above */
+#if XXH_VECTOR == XXH_AVX2 /* AVX2 */ \
+ && defined(__GNUC__) && !defined(__clang__) /* GCC, not Clang */ \
+ && defined(__OPTIMIZE__) && XXH_SIZE_OPT <= 0 /* respect -O0 and -Os */
+# pragma GCC pop_options
+#endif
+
+#endif /* XXH_NO_LONG_LONG */
+
+#endif /* XXH_NO_XXH3 */
+
+/*!
+ * @}
+ */
+#endif /* XXH_IMPLEMENTATION */
+
+
+#if defined (__cplusplus)
+} /* extern "C" */
+#endif