16 files changed, 5304 insertions, 0 deletions
diff --git a/src/zstd/doc/README.md b/src/zstd/doc/README.md
new file mode 100644
index 00000000..47cfe361
--- /dev/null
+++ b/src/zstd/doc/README.md
@@ -0,0 +1,20 @@
+Zstandard Documentation
+=======================
+
+This directory contains material defining the Zstandard format,
+as well as for help using the `zstd` library.
+
+__`zstd_compression_format.md`__ : This document defines the Zstandard compression format.
+Compliant decoders must adhere to this document,
+and compliant encoders must generate data that follows it.
+
+__`educational_decoder`__ : This directory contains an implementation of a Zstandard decoder,
+compliant with the Zstandard compression format.
+It can be used, for example, to better understand the format,
+or as the basis for a separate implementation a Zstandard decoder/encoder.
+
+__`zstd_manual.html`__ : Documentation on the functions found in `zstd.h`.
+See [http://zstd.net/zstd_manual.html](http://zstd.net/zstd_manual.html) for
+the manual released with the latest official `zstd` release.
+
+
diff --git a/src/zstd/doc/educational_decoder/Makefile b/src/zstd/doc/educational_decoder/Makefile
new file mode 100644
index 00000000..ace1294f
--- /dev/null
+++ b/src/zstd/doc/educational_decoder/Makefile
@@ -0,0 +1,34 @@
+HARNESS_FILES=*.c
+
+MULTITHREAD_LDFLAGS = -pthread
+DEBUGFLAGS= -g -DZSTD_DEBUG=1
+CPPFLAGS += -I$(ZSTDDIR) -I$(ZSTDDIR)/common -I$(ZSTDDIR)/compress \
+            -I$(ZSTDDIR)/dictBuilder -I$(ZSTDDIR)/deprecated -I$(PRGDIR)
+CFLAGS   ?= -O3
+CFLAGS   += -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow                 \
+            -Wstrict-aliasing=1 -Wswitch-enum -Wdeclaration-after-statement \
+            -Wstrict-prototypes -Wundef -Wformat-security                   \
+            -Wvla -Wformat=2 -Winit-self -Wfloat-equal -Wwrite-strings      \
+            -Wredundant-decls
+CFLAGS   += $(DEBUGFLAGS)
+CFLAGS   += $(MOREFLAGS)
+FLAGS     = $(CPPFLAGS) $(CFLAGS) $(LDFLAGS) $(MULTITHREAD_LDFLAGS)
+
+harness: $(HARNESS_FILES)
+	$(CC) $(FLAGS) $^ -o $@
+
+clean:
+	@$(RM) -f harness
+	@$(RM) -rf harness.dSYM
+
+test: harness
+	@zstd README.md -o tmp.zst
+	@./harness tmp.zst tmp
+	@diff -s tmp README.md
+	@$(RM) -f tmp*
+	@zstd --train harness.c zstd_decompress.c zstd_decompress.h README.md
+	@zstd -D dictionary README.md -o tmp.zst
+	@./harness tmp.zst tmp dictionary
+	@diff -s tmp README.md
+	@$(RM) -f tmp* dictionary
+	@make clean
diff --git a/src/zstd/doc/educational_decoder/README.md b/src/zstd/doc/educational_decoder/README.md
new file mode 100644
index 00000000..e3b9bf58
--- /dev/null
+++ b/src/zstd/doc/educational_decoder/README.md
@@ -0,0 +1,29 @@
+Educational Decoder
+===================
+
+`zstd_decompress.c` is a self-contained implementation in C99 of a decoder,
+according to the [Zstandard format specification].
+While it does not implement as many features as the reference decoder,
+such as the streaming API or content checksums, it is written to be easy to
+follow and understand, to help understand how the Zstandard format works.
+It's laid out to match the [format specification],
+so it can be used to understand how complex segments could be implemented.
+It also contains implementations of Huffman and FSE table decoding.
+
+[Zstandard format specification]: https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md
+[format specification]: https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md
+
+`harness.c` provides a simple test harness around the decoder:
+
+    harness <input-file> <output-file> [dictionary]
+
+As an additional resource to be used with this decoder,
+see the `decodecorpus` tool in the [tests] directory.
+It generates valid Zstandard frames that can be used to verify
+a Zstandard decoder implementation.
+Note that to use the tool to verify this decoder implementation,
+the --content-size flag should be set,
+as this decoder does not handle streaming decoding,
+and so it must know the decompressed size in advance.
+
+[tests]: https://github.com/facebook/zstd/blob/dev/tests/
diff --git a/src/zstd/doc/educational_decoder/harness.c b/src/zstd/doc/educational_decoder/harness.c
new file mode 100644
index 00000000..47882b16
--- /dev/null
+++ b/src/zstd/doc/educational_decoder/harness.c
@@ -0,0 +1,125 @@
+/*
+ * Copyright (c) 2017-present, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "zstd_decompress.h"
+
+typedef unsigned char u8;
+
+// If the data doesn't have decompressed size with it, fallback on assuming the
+// compression ratio is at most 16
+#define MAX_COMPRESSION_RATIO (16)
+
+// Protect against allocating too much memory for output
+#define MAX_OUTPUT_SIZE ((size_t)1024 * 1024 * 1024)
+
+u8 *input;
+u8 *output;
+u8 *dict;
+
+size_t read_file(const char *path, u8 **ptr) {
+    FILE *f = fopen(path, "rb");
+    if (!f) {
+        fprintf(stderr, "failed to open file %s\n", path);
+        exit(1);
+    }
+
+    fseek(f, 0L, SEEK_END);
+    size_t size = ftell(f);
+    rewind(f);
+
+    *ptr = malloc(size);
+    if (!ptr) {
+        fprintf(stderr, "failed to allocate memory to hold %s\n", path);
+        exit(1);
+    }
+
+    size_t pos = 0;
+    while (!feof(f)) {
+        size_t read = fread(&(*ptr)[pos], 1, size, f);
+        if (ferror(f)) {
+            fprintf(stderr, "error while reading file %s\n", path);
+            exit(1);
+        }
+        pos += read;
+    }
+
+    fclose(f);
+
+    return pos;
+}
+
+void write_file(const char *path, const u8 *ptr, size_t size) {
+    FILE *f = fopen(path, "wb");
+
+    size_t written = 0;
+    while (written < size) {
+        written += fwrite(&ptr[written], 1, size, f);
+        if (ferror(f)) {
+            fprintf(stderr, "error while writing file %s\n", path);
+            exit(1);
+        }
+    }
+
+    fclose(f);
+}
+
+int main(int argc, char **argv) {
+    if (argc < 3) {
+        fprintf(stderr, "usage: %s <file.zst> <out_path> [dictionary]\n",
+                argv[0]);
+
+        return 1;
+    }
+
+    size_t input_size = read_file(argv[1], &input);
+    size_t dict_size = 0;
+    if (argc >= 4) {
+        dict_size = read_file(argv[3], &dict);
+    }
+
+    size_t decompressed_size = ZSTD_get_decompressed_size(input, input_size);
+    if (decompressed_size == (size_t)-1) {
+        decompressed_size = MAX_COMPRESSION_RATIO * input_size;
+        fprintf(stderr, "WARNING: Compressed data does not contain "
+                        "decompressed size, going to assume the compression "
+                        "ratio is at most %d (decompressed size of at most "
+                        "%zu)\n",
+                MAX_COMPRESSION_RATIO, decompressed_size);
+    }
+    if (decompressed_size > MAX_OUTPUT_SIZE) {
+        fprintf(stderr,
+                "Required output size too large for this implementation\n");
+        return 1;
+    }
+    output = malloc(decompressed_size);
+    if (!output) {
+        fprintf(stderr, "failed to allocate memory\n");
+        return 1;
+    }
+
+    dictionary_t* const parsed_dict = create_dictionary();
+    if (dict) {
+        parse_dictionary(parsed_dict, dict, dict_size);
+    }
+    size_t decompressed =
+        ZSTD_decompress_with_dict(output, decompressed_size,
+                                  input, input_size, parsed_dict);
+
+    free_dictionary(parsed_dict);
+
+    write_file(argv[2], output, decompressed);
+
+    free(input);
+    free(output);
+    free(dict);
+    input = output = dict = NULL;
+}
diff --git a/src/zstd/doc/educational_decoder/zstd_decompress.c b/src/zstd/doc/educational_decoder/zstd_decompress.c
new file mode 100644
index 00000000..bea0e0ce
--- /dev/null
+++ b/src/zstd/doc/educational_decoder/zstd_decompress.c
@@ -0,0 +1,2303 @@
+/*
+ * Copyright (c) 2017-present, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ */
+
+/// Zstandard educational decoder implementation
+/// See https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "zstd_decompress.h"
+
+/******* UTILITY MACROS AND TYPES *********************************************/
+// Max block size decompressed size is 128 KB and literal blocks can't be
+// larger than their block
+#define MAX_LITERALS_SIZE ((size_t)128 * 1024)
+
+#define MAX(a, b) ((a) > (b) ? (a) : (b))
+#define MIN(a, b) ((a) < (b) ? (a) : (b))
+
+/// This decoder calls exit(1) when it encounters an error, however a production
+/// library should propagate error codes
+#define ERROR(s)                                                               \
+    do {                                                                       \
+        fprintf(stderr, "Error: %s\n", s);                                     \
+        exit(1);                                                               \
+    } while (0)
+#define INP_SIZE()                                                             \
+    ERROR("Input buffer smaller than it should be or input is "                \
+          "corrupted")
+#define OUT_SIZE() ERROR("Output buffer too small for output")
+#define CORRUPTION() ERROR("Corruption detected while decompressing")
+#define BAD_ALLOC() ERROR("Memory allocation error")
+#define IMPOSSIBLE() ERROR("An impossibility has occurred")
+
+typedef uint8_t u8;
+typedef uint16_t u16;
+typedef uint32_t u32;
+typedef uint64_t u64;
+
+typedef int8_t i8;
+typedef int16_t i16;
+typedef int32_t i32;
+typedef int64_t i64;
+/******* END UTILITY MACROS AND TYPES *****************************************/
+
+/******* IMPLEMENTATION PRIMITIVE PROTOTYPES **********************************/
+/// The implementations for these functions can be found at the bottom of this
+/// file.  They implement low-level functionality needed for the higher level
+/// decompression functions.
+
+/*** IO STREAM OPERATIONS *************/
+
+/// ostream_t/istream_t are used to wrap the pointers/length data passed into
+/// ZSTD_decompress, so that all IO operations are safely bounds checked
+/// They are written/read forward, and reads are treated as little-endian
+/// They should be used opaquely to ensure safety
+typedef struct {
+    u8 *ptr;
+    size_t len;
+} ostream_t;
+
+typedef struct {
+    const u8 *ptr;
+    size_t len;
+
+    // Input often reads a few bits at a time, so maintain an internal offset
+    int bit_offset;
+} istream_t;
+
+/// The following two functions are the only ones that allow the istream to be
+/// non-byte aligned
+
+/// Reads `num` bits from a bitstream, and updates the internal offset
+static inline u64 IO_read_bits(istream_t *const in, const int num_bits);
+/// Backs-up the stream by `num` bits so they can be read again
+static inline void IO_rewind_bits(istream_t *const in, const int num_bits);
+/// If the remaining bits in a byte will be unused, advance to the end of the
+/// byte
+static inline void IO_align_stream(istream_t *const in);
+
+/// Write the given byte into the output stream
+static inline void IO_write_byte(ostream_t *const out, u8 symb);
+
+/// Returns the number of bytes left to be read in this stream.  The stream must
+/// be byte aligned.
+static inline size_t IO_istream_len(const istream_t *const in);
+
+/// Advances the stream by `len` bytes, and returns a pointer to the chunk that
+/// was skipped.  The stream must be byte aligned.
+static inline const u8 *IO_get_read_ptr(istream_t *const in, size_t len);
+/// Advances the stream by `len` bytes, and returns a pointer to the chunk that
+/// was skipped so it can be written to.
+static inline u8 *IO_get_write_ptr(ostream_t *const out, size_t len);
+
+/// Advance the inner state by `len` bytes.  The stream must be byte aligned.
+static inline void IO_advance_input(istream_t *const in, size_t len);
+
+/// Returns an `ostream_t` constructed from the given pointer and length.
+static inline ostream_t IO_make_ostream(u8 *out, size_t len);
+/// Returns an `istream_t` constructed from the given pointer and length.
+static inline istream_t IO_make_istream(const u8 *in, size_t len);
+
+/// Returns an `istream_t` with the same base as `in`, and length `len`.
+/// Then, advance `in` to account for the consumed bytes.
+/// `in` must be byte aligned.
+static inline istream_t IO_make_sub_istream(istream_t *const in, size_t len);
+/*** END IO STREAM OPERATIONS *********/
+
+/*** BITSTREAM OPERATIONS *************/
+/// Read `num` bits (up to 64) from `src + offset`, where `offset` is in bits,
+/// and return them interpreted as a little-endian unsigned integer.
+static inline u64 read_bits_LE(const u8 *src, const int num_bits,
+                               const size_t offset);
+
+/// Read bits from the end of a HUF or FSE bitstream.  `offset` is in bits, so
+/// it updates `offset` to `offset - bits`, and then reads `bits` bits from
+/// `src + offset`.  If the offset becomes negative, the extra bits at the
+/// bottom are filled in with `0` bits instead of reading from before `src`.
+static inline u64 STREAM_read_bits(const u8 *src, const int bits,
+                                   i64 *const offset);
+/*** END BITSTREAM OPERATIONS *********/
+
+/*** BIT COUNTING OPERATIONS **********/
+/// Returns the index of the highest set bit in `num`, or `-1` if `num == 0`
+static inline int highest_set_bit(const u64 num);
+/*** END BIT COUNTING OPERATIONS ******/
+
+/*** HUFFMAN PRIMITIVES ***************/
+// Table decode method uses exponential memory, so we need to limit depth
+#define HUF_MAX_BITS (16)
+
+// Limit the maximum number of symbols to 256 so we can store a symbol in a byte
+#define HUF_MAX_SYMBS (256)
+
+/// Structure containing all tables necessary for efficient Huffman decoding
+typedef struct {
+    u8 *symbols;
+    u8 *num_bits;
+    int max_bits;
+} HUF_dtable;
+
+/// Decode a single symbol and read in enough bits to refresh the state
+static inline u8 HUF_decode_symbol(const HUF_dtable *const dtable,
+                                   u16 *const state, const u8 *const src,
+                                   i64 *const offset);
+/// Read in a full state's worth of bits to initialize it
+static inline void HUF_init_state(const HUF_dtable *const dtable,
+                                  u16 *const state, const u8 *const src,
+                                  i64 *const offset);
+
+/// Decompresses a single Huffman stream, returns the number of bytes decoded.
+/// `src_len` must be the exact length of the Huffman-coded block.
+static size_t HUF_decompress_1stream(const HUF_dtable *const dtable,
+                                     ostream_t *const out, istream_t *const in);
+/// Same as previous but decodes 4 streams, formatted as in the Zstandard
+/// specification.
+/// `src_len` must be the exact length of the Huffman-coded block.
+static size_t HUF_decompress_4stream(const HUF_dtable *const dtable,
+                                     ostream_t *const out, istream_t *const in);
+
+/// Initialize a Huffman decoding table using the table of bit counts provided
+static void HUF_init_dtable(HUF_dtable *const table, const u8 *const bits,
+                            const int num_symbs);
+/// Initialize a Huffman decoding table using the table of weights provided
+/// Weights follow the definition provided in the Zstandard specification
+static void HUF_init_dtable_usingweights(HUF_dtable *const table,
+                                         const u8 *const weights,
+                                         const int num_symbs);
+
+/// Free the malloc'ed parts of a decoding table
+static void HUF_free_dtable(HUF_dtable *const dtable);
+
+/// Deep copy a decoding table, so that it can be used and free'd without
+/// impacting the source table.
+static void HUF_copy_dtable(HUF_dtable *const dst, const HUF_dtable *const src);
+/*** END HUFFMAN PRIMITIVES ***********/
+
+/*** FSE PRIMITIVES *******************/
+/// For more description of FSE see
+/// https://github.com/Cyan4973/FiniteStateEntropy/
+
+// FSE table decoding uses exponential memory, so limit the maximum accuracy
+#define FSE_MAX_ACCURACY_LOG (15)
+// Limit the maximum number of symbols so they can be stored in a single byte
+#define FSE_MAX_SYMBS (256)
+
+/// The tables needed to decode FSE encoded streams
+typedef struct {
+    u8 *symbols;
+    u8 *num_bits;
+    u16 *new_state_base;
+    int accuracy_log;
+} FSE_dtable;
+
+/// Return the symbol for the current state
+static inline u8 FSE_peek_symbol(const FSE_dtable *const dtable,
+                                 const u16 state);
+/// Read the number of bits necessary to update state, update, and shift offset
+/// back to reflect the bits read
+static inline void FSE_update_state(const FSE_dtable *const dtable,
+                                    u16 *const state, const u8 *const src,
+                                    i64 *const offset);
+
+/// Combine peek and update: decode a symbol and update the state
+static inline u8 FSE_decode_symbol(const FSE_dtable *const dtable,
+                                   u16 *const state, const u8 *const src,
+                                   i64 *const offset);
+
+/// Read bits from the stream to initialize the state and shift offset back
+static inline void FSE_init_state(const FSE_dtable *const dtable,
+                                  u16 *const state, const u8 *const src,
+                                  i64 *const offset);
+
+/// Decompress two interleaved bitstreams (e.g. compressed Huffman weights)
+/// using an FSE decoding table.  `src_len` must be the exact length of the
+/// block.
+static size_t FSE_decompress_interleaved2(const FSE_dtable *const dtable,
+                                          ostream_t *const out,
+                                          istream_t *const in);
+
+/// Initialize a decoding table using normalized frequencies.
+static void FSE_init_dtable(FSE_dtable *const dtable,
+                            const i16 *const norm_freqs, const int num_symbs,
+                            const int accuracy_log);
+
+/// Decode an FSE header as defined in the Zstandard format specification and
+/// use the decoded frequencies to initialize a decoding table.
+static void FSE_decode_header(FSE_dtable *const dtable, istream_t *const in,
+                                const int max_accuracy_log);
+
+/// Initialize an FSE table that will always return the same symbol and consume
+/// 0 bits per symbol, to be used for RLE mode in sequence commands
+static void FSE_init_dtable_rle(FSE_dtable *const dtable, const u8 symb);
+
+/// Free the malloc'ed parts of a decoding table
+static void FSE_free_dtable(FSE_dtable *const dtable);
+
+/// Deep copy a decoding table, so that it can be used and free'd without
+/// impacting the source table.
+static void FSE_copy_dtable(FSE_dtable *const dst, const FSE_dtable *const src);
+/*** END FSE PRIMITIVES ***************/
+
+/******* END IMPLEMENTATION PRIMITIVE PROTOTYPES ******************************/
+
+/******* ZSTD HELPER STRUCTS AND PROTOTYPES ***********************************/
+
+/// A small structure that can be reused in various places that need to access
+/// frame header information
+typedef struct {
+    // The size of window that we need to be able to contiguously store for
+    // references
+    size_t window_size;
+    // The total output size of this compressed frame
+    size_t frame_content_size;
+
+    // The dictionary id if this frame uses one
+    u32 dictionary_id;
+
+    // Whether or not the content of this frame has a checksum
+    int content_checksum_flag;
+    // Whether or not the output for this frame is in a single segment
+    int single_segment_flag;
+} frame_header_t;
+
+/// The context needed to decode blocks in a frame
+typedef struct {
+    frame_header_t header;
+
+    // The total amount of data available for backreferences, to determine if an
+    // offset too large to be correct
+    size_t current_total_output;
+
+    const u8 *dict_content;
+    size_t dict_content_len;
+
+    // Entropy encoding tables so they can be repeated by future blocks instead
+    // of retransmitting
+    HUF_dtable literals_dtable;
+    FSE_dtable ll_dtable;
+    FSE_dtable ml_dtable;
+    FSE_dtable of_dtable;
+
+    // The last 3 offsets for the special "repeat offsets".
+    u64 previous_offsets[3];
+} frame_context_t;
+
+/// The decoded contents of a dictionary so that it doesn't have to be repeated
+/// for each frame that uses it
+struct dictionary_s {
+    // Entropy tables
+    HUF_dtable literals_dtable;
+    FSE_dtable ll_dtable;
+    FSE_dtable ml_dtable;
+    FSE_dtable of_dtable;
+
+    // Raw content for backreferences
+    u8 *content;
+    size_t content_size;
+
+    // Offset history to prepopulate the frame's history
+    u64 previous_offsets[3];
+
+    u32 dictionary_id;
+};
+
+/// A tuple containing the parts necessary to decode and execute a ZSTD sequence
+/// command
+typedef struct {
+    u32 literal_length;
+    u32 match_length;
+    u32 offset;
+} sequence_command_t;
+
+/// The decoder works top-down, starting at the high level like Zstd frames, and
+/// working down to lower more technical levels such as blocks, literals, and
+/// sequences.  The high-level functions roughly follow the outline of the
+/// format specification:
+/// https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md
+
+/// Before the implementation of each high-level function declared here, the
+/// prototypes for their helper functions are defined and explained
+
+/// Decode a single Zstd frame, or error if the input is not a valid frame.
+/// Accepts a dict argument, which may be NULL indicating no dictionary.
+/// See
+/// https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#frame-concatenation
+static void decode_frame(ostream_t *const out, istream_t *const in,
+                         const dictionary_t *const dict);
+
+// Decode data in a compressed block
+static void decompress_block(frame_context_t *const ctx, ostream_t *const out,
+                             istream_t *const in);
+
+// Decode the literals section of a block
+static size_t decode_literals(frame_context_t *const ctx, istream_t *const in,
+                              u8 **const literals);
+
+// Decode the sequences part of a block
+static size_t decode_sequences(frame_context_t *const ctx, istream_t *const in,
+                               sequence_command_t **const sequences);
+
+// Execute the decoded sequences on the literals block
+static void execute_sequences(frame_context_t *const ctx, ostream_t *const out,
+                              const u8 *const literals,
+                              const size_t literals_len,
+                              const sequence_command_t *const sequences,
+                              const size_t num_sequences);
+
+// Copies literals and returns the total literal length that was copied
+static u32 copy_literals(const size_t seq, istream_t *litstream,
+                         ostream_t *const out);
+
+// Given an offset code from a sequence command (either an actual offset value
+// or an index for previous offset), computes the correct offset and udpates
+// the offset history
+static size_t compute_offset(sequence_command_t seq, u64 *const offset_hist);
+
+// Given an offset, match length, and total output, as well as the frame
+// context for the dictionary, determines if the dictionary is used and
+// executes the copy operation
+static void execute_match_copy(frame_context_t *const ctx, size_t offset,
+                              size_t match_length, size_t total_output,
+                              ostream_t *const out);
+
+/******* END ZSTD HELPER STRUCTS AND PROTOTYPES *******************************/
+
+size_t ZSTD_decompress(void *const dst, const size_t dst_len,
+                       const void *const src, const size_t src_len) {
+    dictionary_t* uninit_dict = create_dictionary();
+    size_t const decomp_size = ZSTD_decompress_with_dict(dst, dst_len, src,
+                                                         src_len, uninit_dict);
+    free_dictionary(uninit_dict);
+    return decomp_size;
+}
+
+size_t ZSTD_decompress_with_dict(void *const dst, const size_t dst_len,
+                                 const void *const src, const size_t src_len,
+                                 dictionary_t* parsed_dict) {
+
+    istream_t in = IO_make_istream(src, src_len);
+    ostream_t out = IO_make_ostream(dst, dst_len);
+
+    // "A content compressed by Zstandard is transformed into a Zstandard frame.
+    // Multiple frames can be appended into a single file or stream. A frame is
+    // totally independent, has a defined beginning and end, and a set of
+    // parameters which tells the decoder how to decompress it."
+
+    /* this decoder assumes decompression of a single frame */
+    decode_frame(&out, &in, parsed_dict);
+
+    return out.ptr - (u8 *)dst;
+}
+
+/******* FRAME DECODING ******************************************************/
+
+static void decode_data_frame(ostream_t *const out, istream_t *const in,
+                              const dictionary_t *const dict);
+static void init_frame_context(frame_context_t *const context,
+                               istream_t *const in,
+                               const dictionary_t *const dict);
+static void free_frame_context(frame_context_t *const context);
+static void parse_frame_header(frame_header_t *const header,
+                               istream_t *const in);
+static void frame_context_apply_dict(frame_context_t *const ctx,
+                                     const dictionary_t *const dict);
+
+static void decompress_data(frame_context_t *const ctx, ostream_t *const out,
+                            istream_t *const in);
+
+static void decode_frame(ostream_t *const out, istream_t *const in,
+                         const dictionary_t *const dict) {
+    const u32 magic_number = IO_read_bits(in, 32);
+    // Zstandard frame
+    //
+    // "Magic_Number
+    //
+    // 4 Bytes, little-endian format. Value : 0xFD2FB528"
+    if (magic_number == 0xFD2FB528U) {
+        // ZSTD frame
+        decode_data_frame(out, in, dict);
+
+        return;
+    }
+
+    // not a real frame or a skippable frame
+    ERROR("Tried to decode non-ZSTD frame");
+}
+
+/// Decode a frame that contains compressed data.  Not all frames do as there
+/// are skippable frames.
+/// See
+/// https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#general-structure-of-zstandard-frame-format
+static void decode_data_frame(ostream_t *const out, istream_t *const in,
+                              const dictionary_t *const dict) {
+    frame_context_t ctx;
+
+    // Initialize the context that needs to be carried from block to block
+    init_frame_context(&ctx, in, dict);
+
+    if (ctx.header.frame_content_size != 0 &&
+        ctx.header.frame_content_size > out->len) {
+        OUT_SIZE();
+    }
+
+    decompress_data(&ctx, out, in);
+
+    free_frame_context(&ctx);
+}
+
+/// Takes the information provided in the header and dictionary, and initializes
+/// the context for this frame
+static void init_frame_context(frame_context_t *const context,
+                               istream_t *const in,
+                               const dictionary_t *const dict) {
+    // Most fields in context are correct when initialized to 0
+    memset(context, 0, sizeof(frame_context_t));
+
+    // Parse data from the frame header
+    parse_frame_header(&context->header, in);
+
+    // Set up the offset history for the repeat offset commands
+    context->previous_offsets[0] = 1;
+    context->previous_offsets[1] = 4;
+    context->previous_offsets[2] = 8;
+
+    // Apply details from the dict if it exists
+    frame_context_apply_dict(context, dict);
+}
+
+static void free_frame_context(frame_context_t *const context) {
+    HUF_free_dtable(&context->literals_dtable);
+
+    FSE_free_dtable(&context->ll_dtable);
+    FSE_free_dtable(&context->ml_dtable);
+    FSE_free_dtable(&context->of_dtable);
+
+    memset(context, 0, sizeof(frame_context_t));
+}
+
+static void parse_frame_header(frame_header_t *const header,
+                               istream_t *const in) {
+    // "The first header's byte is called the Frame_Header_Descriptor. It tells
+    // which other fields are present. Decoding this byte is enough to tell the
+    // size of Frame_Header.
+    //
+    // Bit number   Field name
+    // 7-6  Frame_Content_Size_flag
+    // 5    Single_Segment_flag
+    // 4    Unused_bit
+    // 3    Reserved_bit
+    // 2    Content_Checksum_flag
+    // 1-0  Dictionary_ID_flag"
+    const u8 descriptor = IO_read_bits(in, 8);
+
+    // decode frame header descriptor into flags
+    const u8 frame_content_size_flag = descriptor >> 6;
+    const u8 single_segment_flag = (descriptor >> 5) & 1;
+    const u8 reserved_bit = (descriptor >> 3) & 1;
+    const u8 content_checksum_flag = (descriptor >> 2) & 1;
+    const u8 dictionary_id_flag = descriptor & 3;
+
+    if (reserved_bit != 0) {
+        CORRUPTION();
+    }
+
+    header->single_segment_flag = single_segment_flag;
+    header->content_checksum_flag = content_checksum_flag;
+
+    // decode window size
+    if (!single_segment_flag) {
+        // "Provides guarantees on maximum back-reference distance that will be
+        // used within compressed data. This information is important for
+        // decoders to allocate enough memory.
+        //
+        // Bit numbers  7-3         2-0
+        // Field name   Exponent    Mantissa"
+        u8 window_descriptor = IO_read_bits(in, 8);
+        u8 exponent = window_descriptor >> 3;
+        u8 mantissa = window_descriptor & 7;
+
+        // Use the algorithm from the specification to compute window size
+        // https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#window_descriptor
+        size_t window_base = (size_t)1 << (10 + exponent);
+        size_t window_add = (window_base / 8) * mantissa;
+        header->window_size = window_base + window_add;
+    }
+
+    // decode dictionary id if it exists
+    if (dictionary_id_flag) {
+        // "This is a variable size field, which contains the ID of the
+        // dictionary required to properly decode the frame. Note that this
+        // field is optional. When it's not present, it's up to the caller to
+        // make sure it uses the correct dictionary. Format is little-endian."
+        const int bytes_array[] = {0, 1, 2, 4};
+        const int bytes = bytes_array[dictionary_id_flag];
+
+        header->dictionary_id = IO_read_bits(in, bytes * 8);
+    } else {
+        header->dictionary_id = 0;
+    }
+
+    // decode frame content size if it exists
+    if (single_segment_flag || frame_content_size_flag) {
+        // "This is the original (uncompressed) size. This information is
+        // optional. The Field_Size is provided according to value of
+        // Frame_Content_Size_flag. The Field_Size can be equal to 0 (not
+        // present), 1, 2, 4 or 8 bytes. Format is little-endian."
+        //
+        // if frame_content_size_flag == 0 but single_segment_flag is set, we
+        // still have a 1 byte field
+        const int bytes_array[] = {1, 2, 4, 8};
+        const int bytes = bytes_array[frame_content_size_flag];
+
+        header->frame_content_size = IO_read_bits(in, bytes * 8);
+        if (bytes == 2) {
+            // "When Field_Size is 2, the offset of 256 is added."
+            header->frame_content_size += 256;
+        }
+    } else {
+        header->frame_content_size = 0;
+    }
+
+    if (single_segment_flag) {
+        // "The Window_Descriptor byte is optional. It is absent when
+        // Single_Segment_flag is set. In this case, the maximum back-reference
+        // distance is the content size itself, which can be any value from 1 to
+        // 2^64-1 bytes (16 EB)."
+        header->window_size = header->frame_content_size;
+    }
+}
+
+/// A dictionary acts as initializing values for the frame context before
+/// decompression, so we implement it by applying it's predetermined
+/// tables and content to the context before beginning decompression
+static void frame_context_apply_dict(frame_context_t *const ctx,
+                                     const dictionary_t *const dict) {
+    // If the content pointer is NULL then it must be an empty dict
+    if (!dict || !dict->content)
+        return;
+
+    // If the requested dictionary_id is non-zero, the correct dictionary must
+    // be present
+    if (ctx->header.dictionary_id != 0 &&
+        ctx->header.dictionary_id != dict->dictionary_id) {
+        ERROR("Wrong dictionary provided");
+    }
+
+    // Copy the dict content to the context for references during sequence
+    // execution
+    ctx->dict_content = dict->content;
+    ctx->dict_content_len = dict->content_size;
+
+    // If it's a formatted dict copy the precomputed tables in so they can
+    // be used in the table repeat modes
+    if (dict->dictionary_id != 0) {
+        // Deep copy the entropy tables so they can be freed independently of
+        // the dictionary struct
+        HUF_copy_dtable(&ctx->literals_dtable, &dict->literals_dtable);
+        FSE_copy_dtable(&ctx->ll_dtable, &dict->ll_dtable);
+        FSE_copy_dtable(&ctx->of_dtable, &dict->of_dtable);
+        FSE_copy_dtable(&ctx->ml_dtable, &dict->ml_dtable);
+
+        // Copy the repeated offsets
+        memcpy(ctx->previous_offsets, dict->previous_offsets,
+               sizeof(ctx->previous_offsets));
+    }
+}
+
+/// Decompress the data from a frame block by block
+static void decompress_data(frame_context_t *const ctx, ostream_t *const out,
+                            istream_t *const in) {
+    // "A frame encapsulates one or multiple blocks. Each block can be
+    // compressed or not, and has a guaranteed maximum content size, which
+    // depends on frame parameters. Unlike frames, each block depends on
+    // previous blocks for proper decoding. However, each block can be
+    // decompressed without waiting for its successor, allowing streaming
+    // operations."
+    int last_block = 0;
+    do {
+        // "Last_Block
+        //
+        // The lowest bit signals if this block is the last one. Frame ends
+        // right after this block.
+        //
+        // Block_Type and Block_Size
+        //
+        // The next 2 bits represent the Block_Type, while the remaining 21 bits
+        // represent the Block_Size. Format is little-endian."
+        last_block = IO_read_bits(in, 1);
+        const int block_type = IO_read_bits(in, 2);
+        const size_t block_len = IO_read_bits(in, 21);
+
+        switch (block_type) {
+        case 0: {
+            // "Raw_Block - this is an uncompressed block. Block_Size is the
+            // number of bytes to read and copy."
+            const u8 *const read_ptr = IO_get_read_ptr(in, block_len);
+            u8 *const write_ptr = IO_get_write_ptr(out, block_len);
+
+            // Copy the raw data into the output
+            memcpy(write_ptr, read_ptr, block_len);
+
+            ctx->current_total_output += block_len;
+            break;
+        }
+        case 1: {
+            // "RLE_Block - this is a single byte, repeated N times. In which
+            // case, Block_Size is the size to regenerate, while the
+            // "compressed" block is just 1 byte (the byte to repeat)."
+            const u8 *const read_ptr = IO_get_read_ptr(in, 1);
+            u8 *const write_ptr = IO_get_write_ptr(out, block_len);
+
+            // Copy `block_len` copies of `read_ptr[0]` to the output
+            memset(write_ptr, read_ptr[0], block_len);
+
+            ctx->current_total_output += block_len;
+            break;
+        }
+        case 2: {
+            // "Compressed_Block - this is a Zstandard compressed block,
+            // detailed in another section of this specification. Block_Size is
+            // the compressed size.
+
+            // Create a sub-stream for the block
+            istream_t block_stream = IO_make_sub_istream(in, block_len);
+            decompress_block(ctx, out, &block_stream);
+            break;
+        }
+        case 3:
+            // "Reserved - this is not a block. This value cannot be used with
+            // current version of this specification."
+            CORRUPTION();
+            break;
+        default:
+            IMPOSSIBLE();
+        }
+    } while (!last_block);
+
+    if (ctx->header.content_checksum_flag) {
+        // This program does not support checking the checksum, so skip over it
+        // if it's present
+        IO_advance_input(in, 4);
+    }
+}
+/******* END FRAME DECODING ***************************************************/
+
+/******* BLOCK DECOMPRESSION **************************************************/
+static void decompress_block(frame_context_t *const ctx, ostream_t *const out,
+                             istream_t *const in) {
+    // "A compressed block consists of 2 sections :
+    //
+    // Literals_Section
+    // Sequences_Section"
+
+
+    // Part 1: decode the literals block
+    u8 *literals = NULL;
+    const size_t literals_size = decode_literals(ctx, in, &literals);
+
+    // Part 2: decode the sequences block
+    sequence_command_t *sequences = NULL;
+    const size_t num_sequences =
+        decode_sequences(ctx, in, &sequences);
+
+    // Part 3: combine literals and sequence commands to generate output
+    execute_sequences(ctx, out, literals, literals_size, sequences,
+                      num_sequences);
+    free(literals);
+    free(sequences);
+}
+/******* END BLOCK DECOMPRESSION **********************************************/
+
+/******* LITERALS DECODING ****************************************************/
+static size_t decode_literals_simple(istream_t *const in, u8 **const literals,
+                                     const int block_type,
+                                     const int size_format);
+static size_t decode_literals_compressed(frame_context_t *const ctx,
+                                         istream_t *const in,
+                                         u8 **const literals,
+                                         const int block_type,
+                                         const int size_format);
+static void decode_huf_table(HUF_dtable *const dtable, istream_t *const in);
+static void fse_decode_hufweights(ostream_t *weights, istream_t *const in,
+                                    int *const num_symbs);
+
+static size_t decode_literals(frame_context_t *const ctx, istream_t *const in,
+                              u8 **const literals) {
+    // "Literals can be stored uncompressed or compressed using Huffman prefix
+    // codes. When compressed, an optional tree description can be present,
+    // followed by 1 or 4 streams."
+    //
+    // "Literals_Section_Header
+    //
+    // Header is in charge of describing how literals are packed. It's a
+    // byte-aligned variable-size bitfield, ranging from 1 to 5 bytes, using
+    // little-endian convention."
+    //
+    // "Literals_Block_Type
+    //
+    // This field uses 2 lowest bits of first byte, describing 4 different block
+    // types"
+    //
+    // size_format takes between 1 and 2 bits
+    int block_type = IO_read_bits(in, 2);
+    int size_format = IO_read_bits(in, 2);
+
+    if (block_type <= 1) {
+        // Raw or RLE literals block
+        return decode_literals_simple(in, literals, block_type,
+                                      size_format);
+    } else {
+        // Huffman compressed literals
+        return decode_literals_compressed(ctx, in, literals, block_type,
+                                          size_format);
+    }
+}
+
+/// Decodes literals blocks in raw or RLE form
+static size_t decode_literals_simple(istream_t *const in, u8 **const literals,
+                                     const int block_type,
+                                     const int size_format) {
+    size_t size;
+    switch (size_format) {
+    // These cases are in the form ?0
+    // In this case, the ? bit is actually part of the size field
+    case 0:
+    case 2:
+        // "Size_Format uses 1 bit. Regenerated_Size uses 5 bits (0-31)."
+        IO_rewind_bits(in, 1);
+        size = IO_read_bits(in, 5);
+        break;
+    case 1:
+        // "Size_Format uses 2 bits. Regenerated_Size uses 12 bits (0-4095)."
+        size = IO_read_bits(in, 12);
+        break;
+    case 3:
+        // "Size_Format uses 2 bits. Regenerated_Size uses 20 bits (0-1048575)."
+        size = IO_read_bits(in, 20);
+        break;
+    default:
+        // Size format is in range 0-3
+        IMPOSSIBLE();
+    }
+
+    if (size > MAX_LITERALS_SIZE) {
+        CORRUPTION();
+    }
+
+    *literals = malloc(size);
+    if (!*literals) {
+        BAD_ALLOC();
+    }
+
+    switch (block_type) {
+    case 0: {
+        // "Raw_Literals_Block - Literals are stored uncompressed."
+        const u8 *const read_ptr = IO_get_read_ptr(in, size);
+        memcpy(*literals, read_ptr, size);
+        break;
+    }
+    case 1: {
+        // "RLE_Literals_Block - Literals consist of a single byte value repeated N times."
+        const u8 *const read_ptr = IO_get_read_ptr(in, 1);
+        memset(*literals, read_ptr[0], size);
+        break;
+    }
+    default:
+        IMPOSSIBLE();
+    }
+
+    return size;
+}
+
+/// Decodes Huffman compressed literals
+static size_t decode_literals_compressed(frame_context_t *const ctx,
+                                         istream_t *const in,
+                                         u8 **const literals,
+                                         const int block_type,
+                                         const int size_format) {
+    size_t regenerated_size, compressed_size;
+    // Only size_format=0 has 1 stream, so default to 4
+    int num_streams = 4;
+    switch (size_format) {
+    case 0:
+        // "A single stream. Both Compressed_Size and Regenerated_Size use 10
+        // bits (0-1023)."
+        num_streams = 1;
+    // Fall through as it has the same size format
+    case 1:
+        // "4 streams. Both Compressed_Size and Regenerated_Size use 10 bits
+        // (0-1023)."
+        regenerated_size = IO_read_bits(in, 10);
+        compressed_size = IO_read_bits(in, 10);
+        break;
+    case 2:
+        // "4 streams. Both Compressed_Size and Regenerated_Size use 14 bits
+        // (0-16383)."
+        regenerated_size = IO_read_bits(in, 14);
+        compressed_size = IO_read_bits(in, 14);
+        break;
+    case 3:
+        // "4 streams. Both Compressed_Size and Regenerated_Size use 18 bits
+        // (0-262143)."
+        regenerated_size = IO_read_bits(in, 18);
+        compressed_size = IO_read_bits(in, 18);
+        break;
+    default:
+        // Impossible
+        IMPOSSIBLE();
+    }
+    if (regenerated_size > MAX_LITERALS_SIZE ||
+        compressed_size >= regenerated_size) {
+        CORRUPTION();
+    }
+
+    *literals = malloc(regenerated_size);
+    if (!*literals) {
+        BAD_ALLOC();
+    }
+
+    ostream_t lit_stream = IO_make_ostream(*literals, regenerated_size);
+    istream_t huf_stream = IO_make_sub_istream(in, compressed_size);
+
+    if (block_type == 2) {
+        // Decode the provided Huffman table
+        // "This section is only present when Literals_Block_Type type is
+        // Compressed_Literals_Block (2)."
+
+        HUF_free_dtable(&ctx->literals_dtable);
+        decode_huf_table(&ctx->literals_dtable, &huf_stream);
+    } else {
+        // If the previous Huffman table is being repeated, ensure it exists
+        if (!ctx->literals_dtable.symbols) {
+            CORRUPTION();
+        }
+    }
+
+    size_t symbols_decoded;
+    if (num_streams == 1) {
+        symbols_decoded = HUF_decompress_1stream(&ctx->literals_dtable, &lit_stream, &huf_stream);
+    } else {
+        symbols_decoded = HUF_decompress_4stream(&ctx->literals_dtable, &lit_stream, &huf_stream);
+    }
+
+    if (symbols_decoded != regenerated_size) {
+        CORRUPTION();
+    }
+
+    return regenerated_size;
+}
+
+// Decode the Huffman table description
+static void decode_huf_table(HUF_dtable *const dtable, istream_t *const in) {
+    // "All literal values from zero (included) to last present one (excluded)
+    // are represented by Weight with values from 0 to Max_Number_of_Bits."
+
+    // "This is a single byte value (0-255), which describes how to decode the list of weights."
+    const u8 header = IO_read_bits(in, 8);
+
+    u8 weights[HUF_MAX_SYMBS];
+    memset(weights, 0, sizeof(weights));
+
+    int num_symbs;
+
+    if (header >= 128) {
+        // "This is a direct representation, where each Weight is written
+        // directly as a 4 bits field (0-15). The full representation occupies
+        // ((Number_of_Symbols+1)/2) bytes, meaning it uses a last full byte
+        // even if Number_of_Symbols is odd. Number_of_Symbols = headerByte -
+        // 127"
+        num_symbs = header - 127;
+        const size_t bytes = (num_symbs + 1) / 2;
+
+        const u8 *const weight_src = IO_get_read_ptr(in, bytes);
+
+        for (int i = 0; i < num_symbs; i++) {
+            // "They are encoded forward, 2
+            // weights to a byte with the first weight taking the top four bits
+            // and the second taking the bottom four (e.g. the following
+            // operations could be used to read the weights: Weight[0] =
+            // (Byte[0] >> 4), Weight[1] = (Byte[0] & 0xf), etc.)."
+            if (i % 2 == 0) {
+                weights[i] = weight_src[i / 2] >> 4;
+            } else {
+                weights[i] = weight_src[i / 2] & 0xf;
+            }
+        }
+    } else {
+        // The weights are FSE encoded, decode them before we can construct the
+        // table
+        istream_t fse_stream = IO_make_sub_istream(in, header);
+        ostream_t weight_stream = IO_make_ostream(weights, HUF_MAX_SYMBS);
+        fse_decode_hufweights(&weight_stream, &fse_stream, &num_symbs);
+    }
+
+    // Construct the table using the decoded weights
+    HUF_init_dtable_usingweights(dtable, weights, num_symbs);
+}
+
+static void fse_decode_hufweights(ostream_t *weights, istream_t *const in,
+                                    int *const num_symbs) {
+    const int MAX_ACCURACY_LOG = 7;
+
+    FSE_dtable dtable;
+
+    // "An FSE bitstream starts by a header, describing probabilities
+    // distribution. It will create a Decoding Table. For a list of Huffman
+    // weights, maximum accuracy is 7 bits."
+    FSE_decode_header(&dtable, in, MAX_ACCURACY_LOG);
+
+    // Decode the weights
+    *num_symbs = FSE_decompress_interleaved2(&dtable, weights, in);
+
+    FSE_free_dtable(&dtable);
+}
+/******* END LITERALS DECODING ************************************************/
+
+/******* SEQUENCE DECODING ****************************************************/
+/// The combination of FSE states needed to decode sequences
+typedef struct {
+    FSE_dtable ll_table;
+    FSE_dtable of_table;
+    FSE_dtable ml_table;
+
+    u16 ll_state;
+    u16 of_state;
+    u16 ml_state;
+} sequence_states_t;
+
+/// Different modes to signal to decode_seq_tables what to do
+typedef enum {
+    seq_literal_length = 0,
+    seq_offset = 1,
+    seq_match_length = 2,
+} seq_part_t;
+
+typedef enum {
+    seq_predefined = 0,
+    seq_rle = 1,
+    seq_fse = 2,
+    seq_repeat = 3,
+} seq_mode_t;
+
+/// The predefined FSE distribution tables for `seq_predefined` mode
+static const i16 SEQ_LITERAL_LENGTH_DEFAULT_DIST[36] = {
+    4, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1,  1,  2,  2,
+    2, 2, 2, 2, 2, 2, 2, 3, 2, 1, 1, 1, 1, 1, -1, -1, -1, -1};
+static const i16 SEQ_OFFSET_DEFAULT_DIST[29] = {
+    1, 1, 1, 1, 1, 1, 2, 2, 2, 1,  1,  1,  1,  1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1};
+static const i16 SEQ_MATCH_LENGTH_DEFAULT_DIST[53] = {
+    1, 4, 3, 2, 2, 2, 2, 2, 2, 1, 1,  1,  1,  1,  1,  1,  1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  1,  1,  1,  1,  1,  1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1};
+
+/// The sequence decoding baseline and number of additional bits to read/add
+/// https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#the-codes-for-literals-lengths-match-lengths-and-offsets
+static const u32 SEQ_LITERAL_LENGTH_BASELINES[36] = {
+    0,  1,  2,   3,   4,   5,    6,    7,    8,    9,     10,    11,
+    12, 13, 14,  15,  16,  18,   20,   22,   24,   28,    32,    40,
+    48, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384, 32768, 65538};
+static const u8 SEQ_LITERAL_LENGTH_EXTRA_BITS[36] = {
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  0,  0,  0,  0,  1,  1,
+    1, 1, 2, 2, 3, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
+
+static const u32 SEQ_MATCH_LENGTH_BASELINES[53] = {
+    3,  4,   5,   6,   7,    8,    9,    10,   11,    12,    13,   14, 15, 16,
+    17, 18,  19,  20,  21,   22,   23,   24,   25,    26,    27,   28, 29, 30,
+    31, 32,  33,  34,  35,   37,   39,   41,   43,    47,    51,   59, 67, 83,
+    99, 131, 259, 515, 1027, 2051, 4099, 8195, 16387, 32771, 65539};
+static const u8 SEQ_MATCH_LENGTH_EXTRA_BITS[53] = {
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  0,  0,  0,  0,  0,  0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  0,  0,  0,  1,  1,  1, 1,
+    2, 2, 3, 3, 4, 4, 5, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
+
+/// Offset decoding is simpler so we just need a maximum code value
+static const u8 SEQ_MAX_CODES[3] = {35, -1, 52};
+
+static void decompress_sequences(frame_context_t *const ctx,
+                                 istream_t *const in,
+                                 sequence_command_t *const sequences,
+                                 const size_t num_sequences);
+static sequence_command_t decode_sequence(sequence_states_t *const state,
+                                          const u8 *const src,
+                                          i64 *const offset);
+static void decode_seq_table(FSE_dtable *const table, istream_t *const in,
+                               const seq_part_t type, const seq_mode_t mode);
+
+static size_t decode_sequences(frame_context_t *const ctx, istream_t *in,
+                               sequence_command_t **const sequences) {
+    // "A compressed block is a succession of sequences . A sequence is a
+    // literal copy command, followed by a match copy command. A literal copy
+    // command specifies a length. It is the number of bytes to be copied (or
+    // extracted) from the literal section. A match copy command specifies an
+    // offset and a length. The offset gives the position to copy from, which
+    // can be within a previous block."
+
+    size_t num_sequences;
+
+    // "Number_of_Sequences
+    //
+    // This is a variable size field using between 1 and 3 bytes. Let's call its
+    // first byte byte0."
+    u8 header = IO_read_bits(in, 8);
+    if (header == 0) {
+        // "There are no sequences. The sequence section stops there.
+        // Regenerated content is defined entirely by literals section."
+        *sequences = NULL;
+        return 0;
+    } else if (header < 128) {
+        // "Number_of_Sequences = byte0 . Uses 1 byte."
+        num_sequences = header;
+    } else if (header < 255) {
+        // "Number_of_Sequences = ((byte0-128) << 8) + byte1 . Uses 2 bytes."
+        num_sequences = ((header - 128) << 8) + IO_read_bits(in, 8);
+    } else {
+        // "Number_of_Sequences = byte1 + (byte2<<8) + 0x7F00 . Uses 3 bytes."
+        num_sequences = IO_read_bits(in, 16) + 0x7F00;
+    }
+
+    *sequences = malloc(num_sequences * sizeof(sequence_command_t));
+    if (!*sequences) {
+        BAD_ALLOC();
+    }
+
+    decompress_sequences(ctx, in, *sequences, num_sequences);
+    return num_sequences;
+}
+
+/// Decompress the FSE encoded sequence commands
+static void decompress_sequences(frame_context_t *const ctx, istream_t *in,
+                                 sequence_command_t *const sequences,
+                                 const size_t num_sequences) {
+    // "The Sequences_Section regroup all symbols required to decode commands.
+    // There are 3 symbol types : literals lengths, offsets and match lengths.
+    // They are encoded together, interleaved, in a single bitstream."
+
+    // "Symbol compression modes
+    //
+    // This is a single byte, defining the compression mode of each symbol
+    // type."
+    //
+    // Bit number : Field name
+    // 7-6        : Literals_Lengths_Mode
+    // 5-4        : Offsets_Mode
+    // 3-2        : Match_Lengths_Mode
+    // 1-0        : Reserved
+    u8 compression_modes = IO_read_bits(in, 8);
+
+    if ((compression_modes & 3) != 0) {
+        // Reserved bits set
+        CORRUPTION();
+    }
+
+    // "Following the header, up to 3 distribution tables can be described. When
+    // present, they are in this order :
+    //
+    // Literals lengths
+    // Offsets
+    // Match Lengths"
+    // Update the tables we have stored in the context
+    decode_seq_table(&ctx->ll_dtable, in, seq_literal_length,
+                     (compression_modes >> 6) & 3);
+
+    decode_seq_table(&ctx->of_dtable, in, seq_offset,
+                     (compression_modes >> 4) & 3);
+
+    decode_seq_table(&ctx->ml_dtable, in, seq_match_length,
+                     (compression_modes >> 2) & 3);
+
+
+    sequence_states_t states;
+
+    // Initialize the decoding tables
+    {
+        states.ll_table = ctx->ll_dtable;
+        states.of_table = ctx->of_dtable;
+        states.ml_table = ctx->ml_dtable;
+    }
+
+    const size_t len = IO_istream_len(in);
+    const u8 *const src = IO_get_read_ptr(in, len);
+
+    // "After writing the last bit containing information, the compressor writes
+    // a single 1-bit and then fills the byte with 0-7 0 bits of padding."
+    const int padding = 8 - highest_set_bit(src[len - 1]);
+    // The offset starts at the end because FSE streams are read backwards
+    i64 bit_offset = len * 8 - padding;
+
+    // "The bitstream starts with initial state values, each using the required
+    // number of bits in their respective accuracy, decoded previously from
+    // their normalized distribution.
+    //
+    // It starts by Literals_Length_State, followed by Offset_State, and finally
+    // Match_Length_State."
+    FSE_init_state(&states.ll_table, &states.ll_state, src, &bit_offset);
+    FSE_init_state(&states.of_table, &states.of_state, src, &bit_offset);
+    FSE_init_state(&states.ml_table, &states.ml_state, src, &bit_offset);
+
+    for (size_t i = 0; i < num_sequences; i++) {
+        // Decode sequences one by one
+        sequences[i] = decode_sequence(&states, src, &bit_offset);
+    }
+
+    if (bit_offset != 0) {
+        CORRUPTION();
+    }
+}
+
+// Decode a single sequence and update the state
+static sequence_command_t decode_sequence(sequence_states_t *const states,
+                                          const u8 *const src,
+                                          i64 *const offset) {
+    // "Each symbol is a code in its own context, which specifies Baseline and
+    // Number_of_Bits to add. Codes are FSE compressed, and interleaved with raw
+    // additional bits in the same bitstream."
+
+    // Decode symbols, but don't update states
+    const u8 of_code = FSE_peek_symbol(&states->of_table, states->of_state);
+    const u8 ll_code = FSE_peek_symbol(&states->ll_table, states->ll_state);
+    const u8 ml_code = FSE_peek_symbol(&states->ml_table, states->ml_state);
+
+    // Offset doesn't need a max value as it's not decoded using a table
+    if (ll_code > SEQ_MAX_CODES[seq_literal_length] ||
+        ml_code > SEQ_MAX_CODES[seq_match_length]) {
+        CORRUPTION();
+    }
+
+    // Read the interleaved bits
+    sequence_command_t seq;
+    // "Decoding starts by reading the Number_of_Bits required to decode Offset.
+    // It then does the same for Match_Length, and then for Literals_Length."
+    seq.offset = ((u32)1 << of_code) + STREAM_read_bits(src, of_code, offset);
+
+    seq.match_length =
+        SEQ_MATCH_LENGTH_BASELINES[ml_code] +
+        STREAM_read_bits(src, SEQ_MATCH_LENGTH_EXTRA_BITS[ml_code], offset);
+
+    seq.literal_length =
+        SEQ_LITERAL_LENGTH_BASELINES[ll_code] +
+        STREAM_read_bits(src, SEQ_LITERAL_LENGTH_EXTRA_BITS[ll_code], offset);
+
+    // "If it is not the last sequence in the block, the next operation is to
+    // update states. Using the rules pre-calculated in the decoding tables,
+    // Literals_Length_State is updated, followed by Match_Length_State, and
+    // then Offset_State."
+    // If the stream is complete don't read bits to update state
+    if (*offset != 0) {
+        FSE_update_state(&states->ll_table, &states->ll_state, src, offset);
+        FSE_update_state(&states->ml_table, &states->ml_state, src, offset);
+        FSE_update_state(&states->of_table, &states->of_state, src, offset);
+    }
+
+    return seq;
+}
+
+/// Given a sequence part and table mode, decode the FSE distribution
+/// Errors if the mode is `seq_repeat` without a pre-existing table in `table`
+static void decode_seq_table(FSE_dtable *const table, istream_t *const in,
+                             const seq_part_t type, const seq_mode_t mode) {
+    // Constant arrays indexed by seq_part_t
+    const i16 *const default_distributions[] = {SEQ_LITERAL_LENGTH_DEFAULT_DIST,
+                                                SEQ_OFFSET_DEFAULT_DIST,
+                                                SEQ_MATCH_LENGTH_DEFAULT_DIST};
+    const size_t default_distribution_lengths[] = {36, 29, 53};
+    const size_t default_distribution_accuracies[] = {6, 5, 6};
+
+    const size_t max_accuracies[] = {9, 8, 9};
+
+    if (mode != seq_repeat) {
+        // Free old one before overwriting
+        FSE_free_dtable(table);
+    }
+
+    switch (mode) {
+    case seq_predefined: {
+        // "Predefined_Mode : uses a predefined distribution table."
+        const i16 *distribution = default_distributions[type];
+        const size_t symbs = default_distribution_lengths[type];
+        const size_t accuracy_log = default_distribution_accuracies[type];
+
+        FSE_init_dtable(table, distribution, symbs, accuracy_log);
+        break;
+    }
+    case seq_rle: {
+        // "RLE_Mode : it's a single code, repeated Number_of_Sequences times."
+        const u8 symb = IO_get_read_ptr(in, 1)[0];
+        FSE_init_dtable_rle(table, symb);
+        break;
+    }
+    case seq_fse: {
+        // "FSE_Compressed_Mode : standard FSE compression. A distribution table
+        // will be present "
+        FSE_decode_header(table, in, max_accuracies[type]);
+        break;
+    }
+    case seq_repeat:
+        // "Repeat_Mode : re-use distribution table from previous compressed
+        // block."
+        // Nothing to do here, table will be unchanged
+        if (!table->symbols) {
+            // This mode is invalid if we don't already have a table
+            CORRUPTION();
+        }
+        break;
+    default:
+        // Impossible, as mode is from 0-3
+        IMPOSSIBLE();
+        break;
+    }
+
+}
+/******* END SEQUENCE DECODING ************************************************/
+
+/******* SEQUENCE EXECUTION ***************************************************/
+static void execute_sequences(frame_context_t *const ctx, ostream_t *const out,
+                              const u8 *const literals,
+                              const size_t literals_len,
+                              const sequence_command_t *const sequences,
+                              const size_t num_sequences) {
+    istream_t litstream = IO_make_istream(literals, literals_len);
+
+    u64 *const offset_hist = ctx->previous_offsets;
+    size_t total_output = ctx->current_total_output;
+
+    for (size_t i = 0; i < num_sequences; i++) {
+        const sequence_command_t seq = sequences[i];
+        {
+            const u32 literals_size = copy_literals(seq.literal_length, &litstream, out);
+            total_output += literals_size;
+        }
+
+        size_t const offset = compute_offset(seq, offset_hist);
+
+        size_t const match_length = seq.match_length;
+
+        execute_match_copy(ctx, offset, match_length, total_output, out);
+
+        total_output += match_length;
+    }
+
+    // Copy any leftover literals
+    {
+        size_t len = IO_istream_len(&litstream);
+        copy_literals(len, &litstream, out);
+        total_output += len;
+    }
+
+    ctx->current_total_output = total_output;
+}
+
+static u32 copy_literals(const size_t literal_length, istream_t *litstream,
+                         ostream_t *const out) {
+    // If the sequence asks for more literals than are left, the
+    // sequence must be corrupted
+    if (literal_length > IO_istream_len(litstream)) {
+        CORRUPTION();
+    }
+
+    u8 *const write_ptr = IO_get_write_ptr(out, literal_length);
+    const u8 *const read_ptr =
+         IO_get_read_ptr(litstream, literal_length);
+    // Copy literals to output
+    memcpy(write_ptr, read_ptr, literal_length);
+
+    return literal_length;
+}
+
+static size_t compute_offset(sequence_command_t seq, u64 *const offset_hist) {
+    size_t offset;
+    // Offsets are special, we need to handle the repeat offsets
+    if (seq.offset <= 3) {
+        // "The first 3 values define a repeated offset and we will call
+        // them Repeated_Offset1, Repeated_Offset2, and Repeated_Offset3.
+        // They are sorted in recency order, with Repeated_Offset1 meaning
+        // 'most recent one'".
+
+        // Use 0 indexing for the array
+        u32 idx = seq.offset - 1;
+        if (seq.literal_length == 0) {
+            // "There is an exception though, when current sequence's
+            // literals length is 0. In this case, repeated offsets are
+            // shifted by one, so Repeated_Offset1 becomes Repeated_Offset2,
+            // Repeated_Offset2 becomes Repeated_Offset3, and
+            // Repeated_Offset3 becomes Repeated_Offset1 - 1_byte."
+            idx++;
+        }
+
+        if (idx == 0) {
+            offset = offset_hist[0];
+        } else {
+            // If idx == 3 then literal length was 0 and the offset was 3,
+            // as per the exception listed above
+            offset = idx < 3 ? offset_hist[idx] : offset_hist[0] - 1;
+
+            // If idx == 1 we don't need to modify offset_hist[2], since
+            // we're using the second-most recent code
+            if (idx > 1) {
+                offset_hist[2] = offset_hist[1];
+            }
+            offset_hist[1] = offset_hist[0];
+            offset_hist[0] = offset;
+        }
+    } else {
+        // When it's not a repeat offset:
+        // "if (Offset_Value > 3) offset = Offset_Value - 3;"
+        offset = seq.offset - 3;
+
+        // Shift back history
+        offset_hist[2] = offset_hist[1];
+        offset_hist[1] = offset_hist[0];
+        offset_hist[0] = offset;
+    }
+    return offset;
+}
+
+static void execute_match_copy(frame_context_t *const ctx, size_t offset,
+                              size_t match_length, size_t total_output,
+                              ostream_t *const out) {
+    u8 *write_ptr = IO_get_write_ptr(out, match_length);
+    if (total_output <= ctx->header.window_size) {
+        // In this case offset might go back into the dictionary
+        if (offset > total_output + ctx->dict_content_len) {
+            // The offset goes beyond even the dictionary
+            CORRUPTION();
+        }
+
+        if (offset > total_output) {
+            // "The rest of the dictionary is its content. The content act
+            // as a "past" in front of data to compress or decompress, so it
+            // can be referenced in sequence commands."
+            const size_t dict_copy =
+                MIN(offset - total_output, match_length);
+            const size_t dict_offset =
+                ctx->dict_content_len - (offset - total_output);
+
+            memcpy(write_ptr, ctx->dict_content + dict_offset, dict_copy);
+            write_ptr += dict_copy;
+            match_length -= dict_copy;
+        }
+    } else if (offset > ctx->header.window_size) {
+        CORRUPTION();
+    }
+
+    // We must copy byte by byte because the match length might be larger
+    // than the offset
+    // ex: if the output so far was "abc", a command with offset=3 and
+    // match_length=6 would produce "abcabcabc" as the new output
+    for (size_t j = 0; j < match_length; j++) {
+        *write_ptr = *(write_ptr - offset);
+        write_ptr++;
+    }
+}
+/******* END SEQUENCE EXECUTION ***********************************************/
+
+/******* OUTPUT SIZE COUNTING *************************************************/
+/// Get the decompressed size of an input stream so memory can be allocated in
+/// advance.
+/// This implementation assumes `src` points to a single ZSTD-compressed frame
+size_t ZSTD_get_decompressed_size(const void *src, const size_t src_len) {
+    istream_t in = IO_make_istream(src, src_len);
+
+    // get decompressed size from ZSTD frame header
+    {
+        const u32 magic_number = IO_read_bits(&in, 32);
+
+        if (magic_number == 0xFD2FB528U) {
+            // ZSTD frame
+            frame_header_t header;
+            parse_frame_header(&header, &in);
+
+            if (header.frame_content_size == 0 && !header.single_segment_flag) {
+                // Content size not provided, we can't tell
+                return -1;
+            }
+
+            return header.frame_content_size;
+        } else {
+            // not a real frame or skippable frame
+            ERROR("ZSTD frame magic number did not match");
+        }
+    }
+}
+/******* END OUTPUT SIZE COUNTING *********************************************/
+
+/******* DICTIONARY PARSING ***************************************************/
+#define DICT_SIZE_ERROR() ERROR("Dictionary size cannot be less than 8 bytes")
+#define NULL_SRC() ERROR("Tried to create dictionary with pointer to null src");
+
+dictionary_t* create_dictionary() {
+    dictionary_t* dict = calloc(1, sizeof(dictionary_t));
+    if (!dict) {
+        BAD_ALLOC();
+    }
+    return dict;
+}
+
+static void init_dictionary_content(dictionary_t *const dict,
+                                    istream_t *const in);
+
+void parse_dictionary(dictionary_t *const dict, const void *src,
+                             size_t src_len) {
+    const u8 *byte_src = (const u8 *)src;
+    memset(dict, 0, sizeof(dictionary_t));
+    if (src == NULL) { /* cannot initialize dictionary with null src */
+        NULL_SRC();
+    }
+    if (src_len < 8) {
+        DICT_SIZE_ERROR();
+    }
+
+    istream_t in = IO_make_istream(byte_src, src_len);
+
+    const u32 magic_number = IO_read_bits(&in, 32);
+    if (magic_number != 0xEC30A437) {
+        // raw content dict
+        IO_rewind_bits(&in, 32);
+        init_dictionary_content(dict, &in);
+        return;
+    }
+
+    dict->dictionary_id = IO_read_bits(&in, 32);
+
+    // "Entropy_Tables : following the same format as the tables in compressed
+    // blocks. They are stored in following order : Huffman tables for literals,
+    // FSE table for offsets, FSE table for match lengths, and FSE table for
+    // literals lengths. It's finally followed by 3 offset values, populating
+    // recent offsets (instead of using {1,4,8}), stored in order, 4-bytes
+    // little-endian each, for a total of 12 bytes. Each recent offset must have
+    // a value < dictionary size."
+    decode_huf_table(&dict->literals_dtable, &in);
+    decode_seq_table(&dict->of_dtable, &in, seq_offset, seq_fse);
+    decode_seq_table(&dict->ml_dtable, &in, seq_match_length, seq_fse);
+    decode_seq_table(&dict->ll_dtable, &in, seq_literal_length, seq_fse);
+
+    // Read in the previous offset history
+    dict->previous_offsets[0] = IO_read_bits(&in, 32);
+    dict->previous_offsets[1] = IO_read_bits(&in, 32);
+    dict->previous_offsets[2] = IO_read_bits(&in, 32);
+
+    // Ensure the provided offsets aren't too large
+    // "Each recent offset must have a value < dictionary size."
+    for (int i = 0; i < 3; i++) {
+        if (dict->previous_offsets[i] > src_len) {
+            ERROR("Dictionary corrupted");
+        }
+    }
+
+    // "Content : The rest of the dictionary is its content. The content act as
+    // a "past" in front of data to compress or decompress, so it can be
+    // referenced in sequence commands."
+    init_dictionary_content(dict, &in);
+}
+
+static void init_dictionary_content(dictionary_t *const dict,
+                                    istream_t *const in) {
+    // Copy in the content
+    dict->content_size = IO_istream_len(in);
+    dict->content = malloc(dict->content_size);
+    if (!dict->content) {
+        BAD_ALLOC();
+    }
+
+    const u8 *const content = IO_get_read_ptr(in, dict->content_size);
+
+    memcpy(dict->content, content, dict->content_size);
+}
+
+/// Free an allocated dictionary
+void free_dictionary(dictionary_t *const dict) {
+    HUF_free_dtable(&dict->literals_dtable);
+    FSE_free_dtable(&dict->ll_dtable);
+    FSE_free_dtable(&dict->of_dtable);
+    FSE_free_dtable(&dict->ml_dtable);
+
+    free(dict->content);
+
+    memset(dict, 0, sizeof(dictionary_t));
+
+    free(dict);
+}
+/******* END DICTIONARY PARSING ***********************************************/
+
+/******* IO STREAM OPERATIONS *************************************************/
+#define UNALIGNED() ERROR("Attempting to operate on a non-byte aligned stream")
+/// Reads `num` bits from a bitstream, and updates the internal offset
+static inline u64 IO_read_bits(istream_t *const in, const int num_bits) {
+    if (num_bits > 64 || num_bits <= 0) {
+        ERROR("Attempt to read an invalid number of bits");
+    }
+
+    const size_t bytes = (num_bits + in->bit_offset + 7) / 8;
+    const size_t full_bytes = (num_bits + in->bit_offset) / 8;
+    if (bytes > in->len) {
+        INP_SIZE();
+    }
+
+    const u64 result = read_bits_LE(in->ptr, num_bits, in->bit_offset);
+
+    in->bit_offset = (num_bits + in->bit_offset) % 8;
+    in->ptr += full_bytes;
+    in->len -= full_bytes;
+
+    return result;
+}
+
+/// If a non-zero number of bits have been read from the current byte, advance
+/// the offset to the next byte
+static inline void IO_rewind_bits(istream_t *const in, int num_bits) {
+    if (num_bits < 0) {
+        ERROR("Attempting to rewind stream by a negative number of bits");
+    }
+
+    // move the offset back by `num_bits` bits
+    const int new_offset = in->bit_offset - num_bits;
+    // determine the number of whole bytes we have to rewind, rounding up to an
+    // integer number (e.g. if `new_offset == -5`, `bytes == 1`)
+    const i64 bytes = -(new_offset - 7) / 8;
+
+    in->ptr -= bytes;
+    in->len += bytes;
+    // make sure the resulting `bit_offset` is positive, as mod in C does not
+    // convert numbers from negative to positive (e.g. -22 % 8 == -6)
+    in->bit_offset = ((new_offset % 8) + 8) % 8;
+}
+
+/// If the remaining bits in a byte will be unused, advance to the end of the
+/// byte
+static inline void IO_align_stream(istream_t *const in) {
+    if (in->bit_offset != 0) {
+        if (in->len == 0) {
+            INP_SIZE();
+        }
+        in->ptr++;
+        in->len--;
+        in->bit_offset = 0;
+    }
+}
+
+/// Write the given byte into the output stream
+static inline void IO_write_byte(ostream_t *const out, u8 symb) {
+    if (out->len == 0) {
+        OUT_SIZE();
+    }
+
+    out->ptr[0] = symb;
+    out->ptr++;
+    out->len--;
+}
+
+/// Returns the number of bytes left to be read in this stream.  The stream must
+/// be byte aligned.
+static inline size_t IO_istream_len(const istream_t *const in) {
+    return in->len;
+}
+
+/// Returns a pointer where `len` bytes can be read, and advances the internal
+/// state.  The stream must be byte aligned.
+static inline const u8 *IO_get_read_ptr(istream_t *const in, size_t len) {
+    if (len > in->len) {
+        INP_SIZE();
+    }
+    if (in->bit_offset != 0) {
+        UNALIGNED();
+    }
+    const u8 *const ptr = in->ptr;
+    in->ptr += len;
+    in->len -= len;
+
+    return ptr;
+}
+/// Returns a pointer to write `len` bytes to, and advances the internal state
+static inline u8 *IO_get_write_ptr(ostream_t *const out, size_t len) {
+    if (len > out->len) {
+        OUT_SIZE();
+    }
+    u8 *const ptr = out->ptr;
+    out->ptr += len;
+    out->len -= len;
+
+    return ptr;
+}
+
+/// Advance the inner state by `len` bytes
+static inline void IO_advance_input(istream_t *const in, size_t len) {
+    if (len > in->len) {
+         INP_SIZE();
+    }
+    if (in->bit_offset != 0) {
+        UNALIGNED();
+    }
+
+    in->ptr += len;
+    in->len -= len;
+}
+
+/// Returns an `ostream_t` constructed from the given pointer and length
+static inline ostream_t IO_make_ostream(u8 *out, size_t len) {
+    return (ostream_t) { out, len };
+}
+
+/// Returns an `istream_t` constructed from the given pointer and length
+static inline istream_t IO_make_istream(const u8 *in, size_t len) {
+    return (istream_t) { in, len, 0 };
+}
+
+/// Returns an `istream_t` with the same base as `in`, and length `len`
+/// Then, advance `in` to account for the consumed bytes
+/// `in` must be byte aligned
+static inline istream_t IO_make_sub_istream(istream_t *const in, size_t len) {
+    // Consume `len` bytes of the parent stream
+    const u8 *const ptr = IO_get_read_ptr(in, len);
+
+    // Make a substream using the pointer to those `len` bytes
+    return IO_make_istream(ptr, len);
+}
+/******* END IO STREAM OPERATIONS *********************************************/
+
+/******* BITSTREAM OPERATIONS *************************************************/
+/// Read `num` bits (up to 64) from `src + offset`, where `offset` is in bits
+static inline u64 read_bits_LE(const u8 *src, const int num_bits,
+                               const size_t offset) {
+    if (num_bits > 64) {
+        ERROR("Attempt to read an invalid number of bits");
+    }
+
+    // Skip over bytes that aren't in range
+    src += offset / 8;
+    size_t bit_offset = offset % 8;
+    u64 res = 0;
+
+    int shift = 0;
+    int left = num_bits;
+    while (left > 0) {
+        u64 mask = left >= 8 ? 0xff : (((u64)1 << left) - 1);
+        // Read the next byte, shift it to account for the offset, and then mask
+        // out the top part if we don't need all the bits
+        res += (((u64)*src++ >> bit_offset) & mask) << shift;
+        shift += 8 - bit_offset;
+        left -= 8 - bit_offset;
+        bit_offset = 0;
+    }
+
+    return res;
+}
+
+/// Read bits from the end of a HUF or FSE bitstream.  `offset` is in bits, so
+/// it updates `offset` to `offset - bits`, and then reads `bits` bits from
+/// `src + offset`.  If the offset becomes negative, the extra bits at the
+/// bottom are filled in with `0` bits instead of reading from before `src`.
+static inline u64 STREAM_read_bits(const u8 *const src, const int bits,
+                                   i64 *const offset) {
+    *offset = *offset - bits;
+    size_t actual_off = *offset;
+    size_t actual_bits = bits;
+    // Don't actually read bits from before the start of src, so if `*offset <
+    // 0` fix actual_off and actual_bits to reflect the quantity to read
+    if (*offset < 0) {
+        actual_bits += *offset;
+        actual_off = 0;
+    }
+    u64 res = read_bits_LE(src, actual_bits, actual_off);
+
+    if (*offset < 0) {
+        // Fill in the bottom "overflowed" bits with 0's
+        res = -*offset >= 64 ? 0 : (res << -*offset);
+    }
+    return res;
+}
+/******* END BITSTREAM OPERATIONS *********************************************/
+
+/******* BIT COUNTING OPERATIONS **********************************************/
+/// Returns `x`, where `2^x` is the largest power of 2 less than or equal to
+/// `num`, or `-1` if `num == 0`.
+static inline int highest_set_bit(const u64 num) {
+    for (int i = 63; i >= 0; i--) {
+        if (((u64)1 << i) <= num) {
+            return i;
+        }
+    }
+    return -1;
+}
+/******* END BIT COUNTING OPERATIONS ******************************************/
+
+/******* HUFFMAN PRIMITIVES ***************************************************/
+static inline u8 HUF_decode_symbol(const HUF_dtable *const dtable,
+                                   u16 *const state, const u8 *const src,
+                                   i64 *const offset) {
+    // Look up the symbol and number of bits to read
+    const u8 symb = dtable->symbols[*state];
+    const u8 bits = dtable->num_bits[*state];
+    const u16 rest = STREAM_read_bits(src, bits, offset);
+    // Shift `bits` bits out of the state, keeping the low order bits that
+    // weren't necessary to determine this symbol.  Then add in the new bits
+    // read from the stream.
+    *state = ((*state << bits) + rest) & (((u16)1 << dtable->max_bits) - 1);
+
+    return symb;
+}
+
+static inline void HUF_init_state(const HUF_dtable *const dtable,
+                                  u16 *const state, const u8 *const src,
+                                  i64 *const offset) {
+    // Read in a full `dtable->max_bits` bits to initialize the state
+    const u8 bits = dtable->max_bits;
+    *state = STREAM_read_bits(src, bits, offset);
+}
+
+static size_t HUF_decompress_1stream(const HUF_dtable *const dtable,
+                                     ostream_t *const out,
+                                     istream_t *const in) {
+    const size_t len = IO_istream_len(in);
+    if (len == 0) {
+        INP_SIZE();
+    }
+    const u8 *const src = IO_get_read_ptr(in, len);
+
+    // "Each bitstream must be read backward, that is starting from the end down
+    // to the beginning. Therefore it's necessary to know the size of each
+    // bitstream.
+    //
+    // It's also necessary to know exactly which bit is the latest. This is
+    // detected by a final bit flag : the highest bit of latest byte is a
+    // final-bit-flag. Consequently, a last byte of 0 is not possible. And the
+    // final-bit-flag itself is not part of the useful bitstream. Hence, the
+    // last byte contains between 0 and 7 useful bits."
+    const int padding = 8 - highest_set_bit(src[len - 1]);
+
+    // Offset starts at the end because HUF streams are read backwards
+    i64 bit_offset = len * 8 - padding;
+    u16 state;
+
+    HUF_init_state(dtable, &state, src, &bit_offset);
+
+    size_t symbols_written = 0;
+    while (bit_offset > -dtable->max_bits) {
+        // Iterate over the stream, decoding one symbol at a time
+        IO_write_byte(out, HUF_decode_symbol(dtable, &state, src, &bit_offset));
+        symbols_written++;
+    }
+    // "The process continues up to reading the required number of symbols per
+    // stream. If a bitstream is not entirely and exactly consumed, hence
+    // reaching exactly its beginning position with all bits consumed, the
+    // decoding process is considered faulty."
+
+    // When all symbols have been decoded, the final state value shouldn't have
+    // any data from the stream, so it should have "read" dtable->max_bits from
+    // before the start of `src`
+    // Therefore `offset`, the edge to start reading new bits at, should be
+    // dtable->max_bits before the start of the stream
+    if (bit_offset != -dtable->max_bits) {
+        CORRUPTION();
+    }
+
+    return symbols_written;
+}
+
+static size_t HUF_decompress_4stream(const HUF_dtable *const dtable,
+                                     ostream_t *const out, istream_t *const in) {
+    // "Compressed size is provided explicitly : in the 4-streams variant,
+    // bitstreams are preceded by 3 unsigned little-endian 16-bits values. Each
+    // value represents the compressed size of one stream, in order. The last
+    // stream size is deducted from total compressed size and from previously
+    // decoded stream sizes"
+    const size_t csize1 = IO_read_bits(in, 16);
+    const size_t csize2 = IO_read_bits(in, 16);
+    const size_t csize3 = IO_read_bits(in, 16);
+
+    istream_t in1 = IO_make_sub_istream(in, csize1);
+    istream_t in2 = IO_make_sub_istream(in, csize2);
+    istream_t in3 = IO_make_sub_istream(in, csize3);
+    istream_t in4 = IO_make_sub_istream(in, IO_istream_len(in));
+
+    size_t total_output = 0;
+    // Decode each stream independently for simplicity
+    // If we wanted to we could decode all 4 at the same time for speed,
+    // utilizing more execution units
+    total_output += HUF_decompress_1stream(dtable, out, &in1);
+    total_output += HUF_decompress_1stream(dtable, out, &in2);
+    total_output += HUF_decompress_1stream(dtable, out, &in3);
+    total_output += HUF_decompress_1stream(dtable, out, &in4);
+
+    return total_output;
+}
+
+/// Initializes a Huffman table using canonical Huffman codes
+/// For more explanation on canonical Huffman codes see
+/// http://www.cs.uofs.edu/~mccloske/courses/cmps340/huff_canonical_dec2015.html
+/// Codes within a level are allocated in symbol order (i.e. smaller symbols get
+/// earlier codes)
+static void HUF_init_dtable(HUF_dtable *const table, const u8 *const bits,
+                            const int num_symbs) {
+    memset(table, 0, sizeof(HUF_dtable));
+    if (num_symbs > HUF_MAX_SYMBS) {
+        ERROR("Too many symbols for Huffman");
+    }
+
+    u8 max_bits = 0;
+    u16 rank_count[HUF_MAX_BITS + 1];
+    memset(rank_count, 0, sizeof(rank_count));
+
+    // Count the number of symbols for each number of bits, and determine the
+    // depth of the tree
+    for (int i = 0; i < num_symbs; i++) {
+        if (bits[i] > HUF_MAX_BITS) {
+            ERROR("Huffman table depth too large");
+        }
+        max_bits = MAX(max_bits, bits[i]);
+        rank_count[bits[i]]++;
+    }
+
+    const size_t table_size = 1 << max_bits;
+    table->max_bits = max_bits;
+    table->symbols = malloc(table_size);
+    table->num_bits = malloc(table_size);
+
+    if (!table->symbols || !table->num_bits) {
+        free(table->symbols);
+        free(table->num_bits);
+        BAD_ALLOC();
+    }
+
+    // "Symbols are sorted by Weight. Within same Weight, symbols keep natural
+    // order. Symbols with a Weight of zero are removed. Then, starting from
+    // lowest weight, prefix codes are distributed in order."
+
+    u32 rank_idx[HUF_MAX_BITS + 1];
+    // Initialize the starting codes for each rank (number of bits)
+    rank_idx[max_bits] = 0;
+    for (int i = max_bits; i >= 1; i--) {
+        rank_idx[i - 1] = rank_idx[i] + rank_count[i] * (1 << (max_bits - i));
+        // The entire range takes the same number of bits so we can memset it
+        memset(&table->num_bits[rank_idx[i]], i, rank_idx[i - 1] - rank_idx[i]);
+    }
+
+    if (rank_idx[0] != table_size) {
+        CORRUPTION();
+    }
+
+    // Allocate codes and fill in the table
+    for (int i = 0; i < num_symbs; i++) {
+        if (bits[i] != 0) {
+            // Allocate a code for this symbol and set its range in the table
+            const u16 code = rank_idx[bits[i]];
+            // Since the code doesn't care about the bottom `max_bits - bits[i]`
+            // bits of state, it gets a range that spans all possible values of
+            // the lower bits
+            const u16 len = 1 << (max_bits - bits[i]);
+            memset(&table->symbols[code], i, len);
+            rank_idx[bits[i]] += len;
+        }
+    }
+}
+
+static void HUF_init_dtable_usingweights(HUF_dtable *const table,
+                                         const u8 *const weights,
+                                         const int num_symbs) {
+    // +1 because the last weight is not transmitted in the header
+    if (num_symbs + 1 > HUF_MAX_SYMBS) {
+        ERROR("Too many symbols for Huffman");
+    }
+
+    u8 bits[HUF_MAX_SYMBS];
+
+    u64 weight_sum = 0;
+    for (int i = 0; i < num_symbs; i++) {
+        // Weights are in the same range as bit count
+        if (weights[i] > HUF_MAX_BITS) {
+            CORRUPTION();
+        }
+        weight_sum += weights[i] > 0 ? (u64)1 << (weights[i] - 1) : 0;
+    }
+
+    // Find the first power of 2 larger than the sum
+    const int max_bits = highest_set_bit(weight_sum) + 1;
+    const u64 left_over = ((u64)1 << max_bits) - weight_sum;
+    // If the left over isn't a power of 2, the weights are invalid
+    if (left_over & (left_over - 1)) {
+        CORRUPTION();
+    }
+
+    // left_over is used to find the last weight as it's not transmitted
+    // by inverting 2^(weight - 1) we can determine the value of last_weight
+    const int last_weight = highest_set_bit(left_over) + 1;
+
+    for (int i = 0; i < num_symbs; i++) {
+        // "Number_of_Bits = Number_of_Bits ? Max_Number_of_Bits + 1 - Weight : 0"
+        bits[i] = weights[i] > 0 ? (max_bits + 1 - weights[i]) : 0;
+    }
+    bits[num_symbs] =
+        max_bits + 1 - last_weight; // Last weight is always non-zero
+
+    HUF_init_dtable(table, bits, num_symbs + 1);
+}
+
+static void HUF_free_dtable(HUF_dtable *const dtable) {
+    free(dtable->symbols);
+    free(dtable->num_bits);
+    memset(dtable, 0, sizeof(HUF_dtable));
+}
+
+static void HUF_copy_dtable(HUF_dtable *const dst,
+                            const HUF_dtable *const src) {
+    if (src->max_bits == 0) {
+        memset(dst, 0, sizeof(HUF_dtable));
+        return;
+    }
+
+    const size_t size = (size_t)1 << src->max_bits;
+    dst->max_bits = src->max_bits;
+
+    dst->symbols = malloc(size);
+    dst->num_bits = malloc(size);
+    if (!dst->symbols || !dst->num_bits) {
+        BAD_ALLOC();
+    }
+
+    memcpy(dst->symbols, src->symbols, size);
+    memcpy(dst->num_bits, src->num_bits, size);
+}
+/******* END HUFFMAN PRIMITIVES ***********************************************/
+
+/******* FSE PRIMITIVES *******************************************************/
+/// For more description of FSE see
+/// https://github.com/Cyan4973/FiniteStateEntropy/
+
+/// Allow a symbol to be decoded without updating state
+static inline u8 FSE_peek_symbol(const FSE_dtable *const dtable,
+                                 const u16 state) {
+    return dtable->symbols[state];
+}
+
+/// Consumes bits from the input and uses the current state to determine the
+/// next state
+static inline void FSE_update_state(const FSE_dtable *const dtable,
+                                    u16 *const state, const u8 *const src,
+                                    i64 *const offset) {
+    const u8 bits = dtable->num_bits[*state];
+    const u16 rest = STREAM_read_bits(src, bits, offset);
+    *state = dtable->new_state_base[*state] + rest;
+}
+
+/// Decodes a single FSE symbol and updates the offset
+static inline u8 FSE_decode_symbol(const FSE_dtable *const dtable,
+                                   u16 *const state, const u8 *const src,
+                                   i64 *const offset) {
+    const u8 symb = FSE_peek_symbol(dtable, *state);
+    FSE_update_state(dtable, state, src, offset);
+    return symb;
+}
+
+static inline void FSE_init_state(const FSE_dtable *const dtable,
+                                  u16 *const state, const u8 *const src,
+                                  i64 *const offset) {
+    // Read in a full `accuracy_log` bits to initialize the state
+    const u8 bits = dtable->accuracy_log;
+    *state = STREAM_read_bits(src, bits, offset);
+}
+
+static size_t FSE_decompress_interleaved2(const FSE_dtable *const dtable,
+                                          ostream_t *const out,
+                                          istream_t *const in) {
+    const size_t len = IO_istream_len(in);
+    if (len == 0) {
+        INP_SIZE();
+    }
+    const u8 *const src = IO_get_read_ptr(in, len);
+
+    // "Each bitstream must be read backward, that is starting from the end down
+    // to the beginning. Therefore it's necessary to know the size of each
+    // bitstream.
+    //
+    // It's also necessary to know exactly which bit is the latest. This is
+    // detected by a final bit flag : the highest bit of latest byte is a
+    // final-bit-flag. Consequently, a last byte of 0 is not possible. And the
+    // final-bit-flag itself is not part of the useful bitstream. Hence, the
+    // last byte contains between 0 and 7 useful bits."
+    const int padding = 8 - highest_set_bit(src[len - 1]);
+    i64 offset = len * 8 - padding;
+
+    u16 state1, state2;
+    // "The first state (State1) encodes the even indexed symbols, and the
+    // second (State2) encodes the odd indexes. State1 is initialized first, and
+    // then State2, and they take turns decoding a single symbol and updating
+    // their state."
+    FSE_init_state(dtable, &state1, src, &offset);
+    FSE_init_state(dtable, &state2, src, &offset);
+
+    // Decode until we overflow the stream
+    // Since we decode in reverse order, overflowing the stream is offset going
+    // negative
+    size_t symbols_written = 0;
+    while (1) {
+        // "The number of symbols to decode is determined by tracking bitStream
+        // overflow condition: If updating state after decoding a symbol would
+        // require more bits than remain in the stream, it is assumed the extra
+        // bits are 0. Then, the symbols for each of the final states are
+        // decoded and the process is complete."
+        IO_write_byte(out, FSE_decode_symbol(dtable, &state1, src, &offset));
+        symbols_written++;
+        if (offset < 0) {
+            // There's still a symbol to decode in state2
+            IO_write_byte(out, FSE_peek_symbol(dtable, state2));
+            symbols_written++;
+            break;
+        }
+
+        IO_write_byte(out, FSE_decode_symbol(dtable, &state2, src, &offset));
+        symbols_written++;
+        if (offset < 0) {
+            // There's still a symbol to decode in state1
+            IO_write_byte(out, FSE_peek_symbol(dtable, state1));
+            symbols_written++;
+            break;
+        }
+    }
+
+    return symbols_written;
+}
+
+static void FSE_init_dtable(FSE_dtable *const dtable,
+                            const i16 *const norm_freqs, const int num_symbs,
+                            const int accuracy_log) {
+    if (accuracy_log > FSE_MAX_ACCURACY_LOG) {
+        ERROR("FSE accuracy too large");
+    }
+    if (num_symbs > FSE_MAX_SYMBS) {
+        ERROR("Too many symbols for FSE");
+    }
+
+    dtable->accuracy_log = accuracy_log;
+
+    const size_t size = (size_t)1 << accuracy_log;
+    dtable->symbols = malloc(size * sizeof(u8));
+    dtable->num_bits = malloc(size * sizeof(u8));
+    dtable->new_state_base = malloc(size * sizeof(u16));
+
+    if (!dtable->symbols || !dtable->num_bits || !dtable->new_state_base) {
+        BAD_ALLOC();
+    }
+
+    // Used to determine how many bits need to be read for each state,
+    // and where the destination range should start
+    // Needs to be u16 because max value is 2 * max number of symbols,
+    // which can be larger than a byte can store
+    u16 state_desc[FSE_MAX_SYMBS];
+
+    // "Symbols are scanned in their natural order for "less than 1"
+    // probabilities. Symbols with this probability are being attributed a
+    // single cell, starting from the end of the table. These symbols define a
+    // full state reset, reading Accuracy_Log bits."
+    int high_threshold = size;
+    for (int s = 0; s < num_symbs; s++) {
+        // Scan for low probability symbols to put at the top
+        if (norm_freqs[s] == -1) {
+            dtable->symbols[--high_threshold] = s;
+            state_desc[s] = 1;
+        }
+    }
+
+    // "All remaining symbols are sorted in their natural order. Starting from
+    // symbol 0 and table position 0, each symbol gets attributed as many cells
+    // as its probability. Cell allocation is spreaded, not linear."
+    // Place the rest in the table
+    const u16 step = (size >> 1) + (size >> 3) + 3;
+    const u16 mask = size - 1;
+    u16 pos = 0;
+    for (int s = 0; s < num_symbs; s++) {
+        if (norm_freqs[s] <= 0) {
+            continue;
+        }
+
+        state_desc[s] = norm_freqs[s];
+
+        for (int i = 0; i < norm_freqs[s]; i++) {
+            // Give `norm_freqs[s]` states to symbol s
+            dtable->symbols[pos] = s;
+            // "A position is skipped if already occupied, typically by a "less
+            // than 1" probability symbol."
+            do {
+                pos = (pos + step) & mask;
+            } while (pos >=
+                     high_threshold);
+            // Note: no other collision checking is necessary as `step` is
+            // coprime to `size`, so the cycle will visit each position exactly
+            // once
+        }
+    }
+    if (pos != 0) {
+        CORRUPTION();
+    }
+
+    // Now we can fill baseline and num bits
+    for (size_t i = 0; i < size; i++) {
+        u8 symbol = dtable->symbols[i];
+        u16 next_state_desc = state_desc[symbol]++;
+        // Fills in the table appropriately, next_state_desc increases by symbol
+        // over time, decreasing number of bits
+        dtable->num_bits[i] = (u8)(accuracy_log - highest_set_bit(next_state_desc));
+        // Baseline increases until the bit threshold is passed, at which point
+        // it resets to 0
+        dtable->new_state_base[i] =
+            ((u16)next_state_desc << dtable->num_bits[i]) - size;
+    }
+}
+
+/// Decode an FSE header as defined in the Zstandard format specification and
+/// use the decoded frequencies to initialize a decoding table.
+static void FSE_decode_header(FSE_dtable *const dtable, istream_t *const in,
+                                const int max_accuracy_log) {
+    // "An FSE distribution table describes the probabilities of all symbols
+    // from 0 to the last present one (included) on a normalized scale of 1 <<
+    // Accuracy_Log .
+    //
+    // It's a bitstream which is read forward, in little-endian fashion. It's
+    // not necessary to know its exact size, since it will be discovered and
+    // reported by the decoding process.
+    if (max_accuracy_log > FSE_MAX_ACCURACY_LOG) {
+        ERROR("FSE accuracy too large");
+    }
+
+    // The bitstream starts by reporting on which scale it operates.
+    // Accuracy_Log = low4bits + 5. Note that maximum Accuracy_Log for literal
+    // and match lengths is 9, and for offsets is 8. Higher values are
+    // considered errors."
+    const int accuracy_log = 5 + IO_read_bits(in, 4);
+    if (accuracy_log > max_accuracy_log) {
+        ERROR("FSE accuracy too large");
+    }
+
+    // "Then follows each symbol value, from 0 to last present one. The number
+    // of bits used by each field is variable. It depends on :
+    //
+    // Remaining probabilities + 1 : example : Presuming an Accuracy_Log of 8,
+    // and presuming 100 probabilities points have already been distributed, the
+    // decoder may read any value from 0 to 255 - 100 + 1 == 156 (inclusive).
+    // Therefore, it must read log2sup(156) == 8 bits.
+    //
+    // Value decoded : small values use 1 less bit : example : Presuming values
+    // from 0 to 156 (inclusive) are possible, 255-156 = 99 values are remaining
+    // in an 8-bits field. They are used this way : first 99 values (hence from
+    // 0 to 98) use only 7 bits, values from 99 to 156 use 8 bits. "
+
+    i32 remaining = 1 << accuracy_log;
+    i16 frequencies[FSE_MAX_SYMBS];
+
+    int symb = 0;
+    while (remaining > 0 && symb < FSE_MAX_SYMBS) {
+        // Log of the number of possible values we could read
+        int bits = highest_set_bit(remaining + 1) + 1;
+
+        u16 val = IO_read_bits(in, bits);
+
+        // Try to mask out the lower bits to see if it qualifies for the "small
+        // value" threshold
+        const u16 lower_mask = ((u16)1 << (bits - 1)) - 1;
+        const u16 threshold = ((u16)1 << bits) - 1 - (remaining + 1);
+
+        if ((val & lower_mask) < threshold) {
+            IO_rewind_bits(in, 1);
+            val = val & lower_mask;
+        } else if (val > lower_mask) {
+            val = val - threshold;
+        }
+
+        // "Probability is obtained from Value decoded by following formula :
+        // Proba = value - 1"
+        const i16 proba = (i16)val - 1;
+
+        // "It means value 0 becomes negative probability -1. -1 is a special
+        // probability, which means "less than 1". Its effect on distribution
+        // table is described in next paragraph. For the purpose of calculating
+        // cumulated distribution, it counts as one."
+        remaining -= proba < 0 ? -proba : proba;
+
+        frequencies[symb] = proba;
+        symb++;
+
+        // "When a symbol has a probability of zero, it is followed by a 2-bits
+        // repeat flag. This repeat flag tells how many probabilities of zeroes
+        // follow the current one. It provides a number ranging from 0 to 3. If
+        // it is a 3, another 2-bits repeat flag follows, and so on."
+        if (proba == 0) {
+            // Read the next two bits to see how many more 0s
+            int repeat = IO_read_bits(in, 2);
+
+            while (1) {
+                for (int i = 0; i < repeat && symb < FSE_MAX_SYMBS; i++) {
+                    frequencies[symb++] = 0;
+                }
+                if (repeat == 3) {
+                    repeat = IO_read_bits(in, 2);
+                } else {
+                    break;
+                }
+            }
+        }
+    }
+    IO_align_stream(in);
+
+    // "When last symbol reaches cumulated total of 1 << Accuracy_Log, decoding
+    // is complete. If the last symbol makes cumulated total go above 1 <<
+    // Accuracy_Log, distribution is considered corrupted."
+    if (remaining != 0 || symb >= FSE_MAX_SYMBS) {
+        CORRUPTION();
+    }
+
+    // Initialize the decoding table using the determined weights
+    FSE_init_dtable(dtable, frequencies, symb, accuracy_log);
+}
+
+static void FSE_init_dtable_rle(FSE_dtable *const dtable, const u8 symb) {
+    dtable->symbols = malloc(sizeof(u8));
+    dtable->num_bits = malloc(sizeof(u8));
+    dtable->new_state_base = malloc(sizeof(u16));
+
+    if (!dtable->symbols || !dtable->num_bits || !dtable->new_state_base) {
+        BAD_ALLOC();
+    }
+
+    // This setup will always have a state of 0, always return symbol `symb`,
+    // and never consume any bits
+    dtable->symbols[0] = symb;
+    dtable->num_bits[0] = 0;
+    dtable->new_state_base[0] = 0;
+    dtable->accuracy_log = 0;
+}
+
+static void FSE_free_dtable(FSE_dtable *const dtable) {
+    free(dtable->symbols);
+    free(dtable->num_bits);
+    free(dtable->new_state_base);
+    memset(dtable, 0, sizeof(FSE_dtable));
+}
+
+static void FSE_copy_dtable(FSE_dtable *const dst, const FSE_dtable *const src) {
+    if (src->accuracy_log == 0) {
+        memset(dst, 0, sizeof(FSE_dtable));
+        return;
+    }
+
+    size_t size = (size_t)1 << src->accuracy_log;
+    dst->accuracy_log = src->accuracy_log;
+
+    dst->symbols = malloc(size);
+    dst->num_bits = malloc(size);
+    dst->new_state_base = malloc(size * sizeof(u16));
+    if (!dst->symbols || !dst->num_bits || !dst->new_state_base) {
+        BAD_ALLOC();
+    }
+
+    memcpy(dst->symbols, src->symbols, size);
+    memcpy(dst->num_bits, src->num_bits, size);
+    memcpy(dst->new_state_base, src->new_state_base, size * sizeof(u16));
+}
+/******* END FSE PRIMITIVES ***************************************************/
diff --git a/src/zstd/doc/educational_decoder/zstd_decompress.h b/src/zstd/doc/educational_decoder/zstd_decompress.h
new file mode 100644
index 00000000..a01fde33
--- /dev/null
+++ b/src/zstd/doc/educational_decoder/zstd_decompress.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2016-present, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ */
+
+/******* EXPOSED TYPES ********************************************************/
+/*
+* Contains the parsed contents of a dictionary
+* This includes Huffman and FSE tables used for decoding and data on offsets
+*/
+typedef struct dictionary_s dictionary_t;
+/******* END EXPOSED TYPES ****************************************************/
+
+/******* DECOMPRESSION FUNCTIONS **********************************************/
+/// Zstandard decompression functions.
+/// `dst` must point to a space at least as large as the reconstructed output.
+size_t ZSTD_decompress(void *const dst, const size_t dst_len,
+                    const void *const src, const size_t src_len);
+
+/// If `dict != NULL` and `dict_len >= 8`, does the same thing as
+/// `ZSTD_decompress` but uses the provided dict
+size_t ZSTD_decompress_with_dict(void *const dst, const size_t dst_len,
+                              const void *const src, const size_t src_len,
+                              dictionary_t* parsed_dict);
+
+/// Get the decompressed size of an input stream so memory can be allocated in
+/// advance
+/// Returns -1 if the size can't be determined
+/// Assumes decompression of a single frame
+size_t ZSTD_get_decompressed_size(const void *const src, const size_t src_len);
+/******* END DECOMPRESSION FUNCTIONS ******************************************/
+
+/******* DICTIONARY MANAGEMENT ***********************************************/
+/*
+ * Return a valid dictionary_t pointer for use with dictionary initialization
+ * or decompression
+ */
+dictionary_t* create_dictionary();
+
+/*
+ * Parse a provided dictionary blob for use in decompression
+ * `src` -- must point to memory space representing the dictionary
+ * `src_len` -- must provide the dictionary size
+ * `dict` -- will contain the parsed contents of the dictionary and
+ *        can be used for decompression
+ */
+void parse_dictionary(dictionary_t *const dict, const void *src,
+                             size_t src_len);
+
+/*
+ * Free internal Huffman tables, FSE tables, and dictionary content
+ */
+void free_dictionary(dictionary_t *const dict);
+/******* END DICTIONARY MANAGEMENT *******************************************/
diff --git a/src/zstd/doc/images/Cspeed4.png b/src/zstd/doc/images/Cspeed4.png
new file mode 100644
index 00000000..318204c0
--- /dev/null
+++ b/src/zstd/doc/images/Cspeed4.png
diff --git a/src/zstd/doc/images/DCspeed5.png b/src/zstd/doc/images/DCspeed5.png
new file mode 100644
index 00000000..900b0242
--- /dev/null
+++ b/src/zstd/doc/images/DCspeed5.png
diff --git a/src/zstd/doc/images/Dspeed4.png b/src/zstd/doc/images/Dspeed4.png
new file mode 100644
index 00000000..b7baef1f
--- /dev/null
+++ b/src/zstd/doc/images/Dspeed4.png
diff --git a/src/zstd/doc/images/dict-cr.png b/src/zstd/doc/images/dict-cr.png
new file mode 100644
index 00000000..f3a9ce2b
--- /dev/null
+++ b/src/zstd/doc/images/dict-cr.png
diff --git a/src/zstd/doc/images/dict-cs.png b/src/zstd/doc/images/dict-cs.png
new file mode 100644
index 00000000..55e5ef51
--- /dev/null
+++ b/src/zstd/doc/images/dict-cs.png
diff --git a/src/zstd/doc/images/dict-ds.png b/src/zstd/doc/images/dict-ds.png
new file mode 100644
index 00000000..1153f1b9
--- /dev/null
+++ b/src/zstd/doc/images/dict-ds.png
diff --git a/src/zstd/doc/images/ldmCspeed.png b/src/zstd/doc/images/ldmCspeed.png
new file mode 100644
index 00000000..d3bfce4c
--- /dev/null
+++ b/src/zstd/doc/images/ldmCspeed.png
diff --git a/src/zstd/doc/images/ldmDspeed.png b/src/zstd/doc/images/ldmDspeed.png
new file mode 100644
index 00000000..d5445f01
--- /dev/null
+++ b/src/zstd/doc/images/ldmDspeed.png
diff --git a/src/zstd/doc/zstd_compression_format.md b/src/zstd/doc/zstd_compression_format.md
new file mode 100644
index 00000000..aa86d142
--- /dev/null
+++ b/src/zstd/doc/zstd_compression_format.md
@@ -0,0 +1,1535 @@
+Zstandard Compression Format
+============================
+
+### Notices
+
+Copyright (c) 2016-present Yann Collet, Facebook, Inc.
+
+Permission is granted to copy and distribute this document
+for any purpose and without charge,
+including translations into other languages
+and incorporation into compilations,
+provided that the copyright notice and this notice are preserved,
+and that any substantive changes or deletions from the original
+are clearly marked.
+Distribution of this document is unlimited.
+
+### Version
+
+0.2.6 (19/08/17)
+
+
+Introduction
+------------
+
+The purpose of this document is to define a lossless compressed data format,
+that is independent of CPU type, operating system,
+file system and character set, suitable for
+file compression, pipe and streaming compression,
+using the [Zstandard algorithm](http://www.zstandard.org).
+
+The data can be produced or consumed,
+even for an arbitrarily long sequentially presented input data stream,
+using only an a priori bounded amount of intermediate storage,
+and hence can be used in data communications.
+The format uses the Zstandard compression method,
+and optional [xxHash-64 checksum method](http://www.xxhash.org),
+for detection of data corruption.
+
+The data format defined by this specification
+does not attempt to allow random access to compressed data.
+
+This specification is intended for use by implementers of software
+to compress data into Zstandard format and/or decompress data from Zstandard format.
+The text of the specification assumes a basic background in programming
+at the level of bits and other primitive data representations.
+
+Unless otherwise indicated below,
+a compliant compressor must produce data sets
+that conform to the specifications presented here.
+It doesn’t need to support all options though.
+
+A compliant decompressor must be able to decompress
+at least one working set of parameters
+that conforms to the specifications presented here.
+It may also ignore informative fields, such as checksum.
+Whenever it does not support a parameter defined in the compressed stream,
+it must produce a non-ambiguous error code and associated error message
+explaining which parameter is unsupported.
+
+### Overall conventions
+In this document:
+- square brackets i.e. `[` and `]` are used to indicate optional fields or parameters.
+- the naming convention for identifiers is `Mixed_Case_With_Underscores`
+
+### Definitions
+Content compressed by Zstandard is transformed into a Zstandard __frame__.
+Multiple frames can be appended into a single file or stream.
+A frame is completely independent, has a defined beginning and end,
+and a set of parameters which tells the decoder how to decompress it.
+
+A frame encapsulates one or multiple __blocks__.
+Each block can be compressed or not,
+and has a guaranteed maximum content size, which depends on frame parameters.
+Unlike frames, each block depends on previous blocks for proper decoding.
+However, each block can be decompressed without waiting for its successor,
+allowing streaming operations.
+
+Overview
+---------
+- [Frames](#frames)
+  - [Zstandard frames](#zstandard-frames)
+    - [Blocks](#blocks)
+      - [Literals Section](#literals-section)
+      - [Sequences Section](#sequences-section)
+      - [Sequence Execution](#sequence-execution)
+  - [Skippable frames](#skippable-frames)
+- [Entropy Encoding](#entropy-encoding)
+  - [FSE](#fse)
+  - [Huffman Coding](#huffman-coding)
+- [Dictionary Format](#dictionary-format)
+
+Frames
+------
+Zstandard compressed data is made of up one or more __frames__.
+Each frame is independent and can be decompressed indepedently of other frames.
+The decompressed content of multiple concatenated frames is the concatenation of
+each frames decompressed content.
+
+There are two frame formats defined by Zstandard:
+  Zstandard frames and Skippable frames.
+Zstandard frames contain compressed data, while
+skippable frames contain no data and can be used for metadata.
+
+## Zstandard frames
+The structure of a single Zstandard frame is following:
+
+| `Magic_Number` | `Frame_Header` |`Data_Block`| [More data blocks] | [`Content_Checksum`] |
+|:--------------:|:--------------:|:----------:| ------------------ |:--------------------:|
+|  4 bytes       |  2-14 bytes    |  n bytes   |                    |     0-4 bytes        |
+
+__`Magic_Number`__
+
+4 Bytes, __little-endian__ format.
+Value : 0xFD2FB528
+
+__`Frame_Header`__
+
+2 to 14 Bytes, detailed in [`Frame_Header`](#frame_header).
+
+__`Data_Block`__
+
+Detailed in [`Blocks`](#blocks).
+That’s where compressed data is stored.
+
+__`Content_Checksum`__
+
+An optional 32-bit checksum, only present if `Content_Checksum_flag` is set.
+The content checksum is the result
+of [xxh64() hash function](http://www.xxhash.org)
+digesting the original (decoded) data as input, and a seed of zero.
+The low 4 bytes of the checksum are stored in __little-endian__ format.
+
+### `Frame_Header`
+
+The `Frame_Header` has a variable size, with a minimum of 2 bytes,
+and up to 14 bytes depending on optional parameters.
+The structure of `Frame_Header` is following:
+
+| `Frame_Header_Descriptor` | [`Window_Descriptor`] | [`Dictionary_ID`] | [`Frame_Content_Size`] |
+| ------------------------- | --------------------- | ----------------- | ---------------------- |
+| 1 byte                    | 0-1 byte              | 0-4 bytes         | 0-8 bytes              |
+
+#### `Frame_Header_Descriptor`
+
+The first header's byte is called the `Frame_Header_Descriptor`.
+It describes which other fields are present.
+Decoding this byte is enough to tell the size of `Frame_Header`.
+
+| Bit number | Field name                |
+| ---------- | ----------                |
+| 7-6        | `Frame_Content_Size_flag` |
+| 5          | `Single_Segment_flag`     |
+| 4          | `Unused_bit`              |
+| 3          | `Reserved_bit`            |
+| 2          | `Content_Checksum_flag`   |
+| 1-0        | `Dictionary_ID_flag`      |
+
+In this table, bit 7 is the highest bit, while bit 0 is the lowest one.
+
+__`Frame_Content_Size_flag`__
+
+This is a 2-bits flag (`= Frame_Header_Descriptor >> 6`),
+specifying if `Frame_Content_Size` (the decompressed data size)
+is provided within the header.
+`Flag_Value` provides `FCS_Field_Size`,
+which is the number of bytes used by `Frame_Content_Size`
+according to the following table:
+
+|  `Flag_Value`  |    0   |  1  |  2  |  3  |
+| -------------- | ------ | --- | --- | --- |
+|`FCS_Field_Size`| 0 or 1 |  2  |  4  |  8  |
+
+When `Flag_Value` is `0`, `FCS_Field_Size` depends on `Single_Segment_flag` :
+if `Single_Segment_flag` is set, `Field_Size` is 1.
+Otherwise, `Field_Size` is 0 : `Frame_Content_Size` is not provided.
+
+__`Single_Segment_flag`__
+
+If this flag is set,
+data must be regenerated within a single continuous memory segment.
+
+In this case, `Window_Descriptor` byte is skipped,
+but `Frame_Content_Size` is necessarily present.
+As a consequence, the decoder must allocate a memory segment
+of size equal or bigger than `Frame_Content_Size`.
+
+In order to preserve the decoder from unreasonable memory requirements,
+a decoder is allowed to reject a compressed frame
+which requests a memory size beyond decoder's authorized range.
+
+For broader compatibility, decoders are recommended to support
+memory sizes of at least 8 MB.
+This is only a recommendation,
+each decoder is free to support higher or lower limits,
+depending on local limitations.
+
+__`Unused_bit`__
+
+The value of this bit should be set to zero.
+A decoder compliant with this specification version shall not interpret it.
+It might be used in a future version,
+to signal a property which is not mandatory to properly decode the frame.
+
+__`Reserved_bit`__
+
+This bit is reserved for some future feature.
+Its value _must be zero_.
+A decoder compliant with this specification version must ensure it is not set.
+This bit may be used in a future revision,
+to signal a feature that must be interpreted to decode the frame correctly.
+
+__`Content_Checksum_flag`__
+
+If this flag is set, a 32-bits `Content_Checksum` will be present at frame's end.
+See `Content_Checksum` paragraph.
+
+__`Dictionary_ID_flag`__
+
+This is a 2-bits flag (`= FHD & 3`),
+telling if a dictionary ID is provided within the header.
+It also specifies the size of this field as `Field_Size`.
+
+|`Flag_Value`|  0  |  1  |  2  |  3  |
+| ---------- | --- | --- | --- | --- |
+|`Field_Size`|  0  |  1  |  2  |  4  |
+
+#### `Window_Descriptor`
+
+Provides guarantees on minimum memory buffer required to decompress a frame.
+This information is important for decoders to allocate enough memory.
+
+The `Window_Descriptor` byte is optional.
+When `Single_Segment_flag` is set, `Window_Descriptor` is not present.
+In this case, `Window_Size` is `Frame_Content_Size`,
+which can be any value from 0 to 2^64-1 bytes (16 ExaBytes).
+
+| Bit numbers |     7-3    |     2-0    |
+| ----------- | ---------- | ---------- |
+| Field name  | `Exponent` | `Mantissa` |
+
+The minimum memory buffer size is called `Window_Size`.
+It is described by the following formulas :
+```
+windowLog = 10 + Exponent;
+windowBase = 1 << windowLog;
+windowAdd = (windowBase / 8) * Mantissa;
+Window_Size = windowBase + windowAdd;
+```
+The minimum `Window_Size` is 1 KB.
+The maximum `Window_Size` is `(1<<41) + 7*(1<<38)` bytes, which is 3.75 TB.
+
+To properly decode compressed data,
+a decoder will need to allocate a buffer of at least `Window_Size` bytes.
+
+In order to preserve decoder from unreasonable memory requirements,
+a decoder is allowed to reject a compressed frame
+which requests a memory size beyond decoder's authorized range.
+
+For improved interoperability,
+decoders are recommended to be compatible with `Window_Size >= 8 MB`,
+and encoders are recommended to not request more than 8 MB.
+It's merely a recommendation though,
+decoders are free to support larger or lower limits,
+depending on local limitations.
+
+#### `Dictionary_ID`
+
+This is a variable size field, which contains
+the ID of the dictionary required to properly decode the frame.
+`Dictionary_ID` field is optional. When it's not present,
+it's up to the decoder to make sure it uses the correct dictionary.
+
+Field size depends on `Dictionary_ID_flag`.
+1 byte can represent an ID 0-255.
+2 bytes can represent an ID 0-65535.
+4 bytes can represent an ID 0-4294967295.
+Format is __little-endian__.
+
+It's allowed to represent a small ID (for example `13`)
+with a large 4-bytes dictionary ID, even if it is less efficient.
+
+_Reserved ranges :_
+If the frame is going to be distributed in a private environment,
+any dictionary ID can be used.
+However, for public distribution of compressed frames using a dictionary,
+the following ranges are reserved and shall not be used :
+- low range  : `<= 32767`
+- high range : `>= (1 << 31)`
+
+#### `Frame_Content_Size`
+
+This is the original (uncompressed) size. This information is optional.
+`Frame_Content_Size` uses a variable number of bytes, provided by `FCS_Field_Size`.
+`FCS_Field_Size` is provided by the value of `Frame_Content_Size_flag`.
+`FCS_Field_Size` can be equal to 0 (not present), 1, 2, 4 or 8 bytes.
+
+| `FCS_Field_Size` |    Range   |
+| ---------------- | ---------- |
+|        0         |   unknown  |
+|        1         |   0 - 255  |
+|        2         | 256 - 65791|
+|        4         | 0 - 2^32-1 |
+|        8         | 0 - 2^64-1 |
+
+`Frame_Content_Size` format is __little-endian__.
+When `FCS_Field_Size` is 1, 4 or 8 bytes, the value is read directly.
+When `FCS_Field_Size` is 2, _the offset of 256 is added_.
+It's allowed to represent a small size (for example `18`) using any compatible variant.
+
+
+Blocks
+-------
+
+After `Magic_Number` and `Frame_Header`, there are some number of blocks.
+Each frame must have at least one block,
+but there is no upper limit on the number of blocks per frame.
+
+The structure of a block is as follows:
+
+| `Block_Header` | `Block_Content` |
+|:--------------:|:---------------:|
+|    3 bytes     |     n bytes     |
+
+`Block_Header` uses 3 bytes, written using __little-endian__ convention.
+It contains 3 fields :
+
+| `Last_Block` | `Block_Type` | `Block_Size` |
+|:------------:|:------------:|:------------:|
+|    bit 0     |  bits 1-2    |  bits 3-23   |
+
+__`Last_Block`__
+
+The lowest bit signals if this block is the last one.
+The frame will end after this last block.
+It may be followed by an optional `Content_Checksum`
+(see [Zstandard Frames](#zstandard-frames)).
+
+__`Block_Type`__
+
+The next 2 bits represent the `Block_Type`.
+There are 4 block types :
+
+|    Value     |      0      |      1      |         2          |     3     |
+| ------------ | ----------- | ----------- | ------------------ | --------- |
+| `Block_Type` | `Raw_Block` | `RLE_Block` | `Compressed_Block` | `Reserved`|
+
+- `Raw_Block` - this is an uncompressed block.
+  `Block_Content` contains `Block_Size` bytes.
+
+- `RLE_Block` - this is a single byte, repeated `Block_Size` times.
+  `Block_Content` consists of a single byte.
+  On the decompression side, this byte must be repeated `Block_Size` times.
+
+- `Compressed_Block` - this is a [Zstandard compressed block](#compressed-blocks),
+  explained later on.
+  `Block_Size` is the length of `Block_Content`, the compressed data.
+  The decompressed size is not known,
+  but its maximum possible value is guaranteed (see below)
+
+- `Reserved` - this is not a block.
+  This value cannot be used with current version of this specification.
+
+__`Block_Size`__
+
+The upper 21 bits of `Block_Header` represent the `Block_Size`.
+
+Block sizes must respect a few rules :
+- For `Compressed_Block`, `Block_Size` is always strictly less than decompressed size.
+- Block decompressed size is always <= `Window_Size`
+- Block decompressed size is always <= 128 KB.
+
+A block can contain any number of bytes (even empty),
+up to `Block_Maximum_Decompressed_Size`, which is the smallest of :
+- `Window_Size`
+- 128 KB
+
+
+Compressed Blocks
+-----------------
+To decompress a compressed block, the compressed size must be provided
+from `Block_Size` field within `Block_Header`.
+
+A compressed block consists of 2 sections :
+- [Literals Section](#literals-section)
+- [Sequences Section](#sequences-section)
+
+The results of the two sections are then combined to produce the decompressed
+data in [Sequence Execution](#sequence-execution)
+
+#### Prerequisites
+To decode a compressed block, the following elements are necessary :
+- Previous decoded data, up to a distance of `Window_Size`,
+  or all previously decoded data when `Single_Segment_flag` is set.
+- List of "recent offsets" from previous `Compressed_Block`.
+- Decoding tables of previous `Compressed_Block` for each symbol type
+  (literals, literals lengths, match lengths, offsets).
+
+Literals Section
+----------------
+All literals are regrouped in the first part of the block.
+They can be decoded first, and then copied during [Sequence Execution],
+or they can be decoded on the flow during [Sequence Execution].
+
+Literals can be stored uncompressed or compressed using Huffman prefix codes.
+When compressed, an optional tree description can be present,
+followed by 1 or 4 streams.
+
+| `Literals_Section_Header` | [`Huffman_Tree_Description`] | Stream1 | [Stream2] | [Stream3] | [Stream4] |
+| ------------------------- | ---------------------------- | ------- | --------- | --------- | --------- |
+
+
+#### `Literals_Section_Header`
+
+Header is in charge of describing how literals are packed.
+It's a byte-aligned variable-size bitfield, ranging from 1 to 5 bytes,
+using __little-endian__ convention.
+
+| `Literals_Block_Type` | `Size_Format` | `Regenerated_Size` | [`Compressed_Size`] |
+| --------------------- | ------------- | ------------------ | ------------------- |
+|       2 bits          |  1 - 2 bits   |    5 - 20 bits     |     0 - 18 bits     |
+
+In this representation, bits on the left are the lowest bits.
+
+__`Literals_Block_Type`__
+
+This field uses 2 lowest bits of first byte, describing 4 different block types :
+
+| `Literals_Block_Type`       | Value |
+| --------------------------- | ----- |
+| `Raw_Literals_Block`        |   0   |
+| `RLE_Literals_Block`        |   1   |
+| `Compressed_Literals_Block` |   2   |
+| `Treeless_Literals_Block`   |   3   |
+
+- `Raw_Literals_Block` - Literals are stored uncompressed.
+- `RLE_Literals_Block` - Literals consist of a single byte value
+        repeated `Regenerated_Size` times.
+- `Compressed_Literals_Block` - This is a standard Huffman-compressed block,
+        starting with a Huffman tree description.
+        See details below.
+- `Treeless_Literals_Block` - This is a Huffman-compressed block,
+        using Huffman tree _from previous Huffman-compressed literals block_.
+        `Huffman_Tree_Description` will be skipped.
+        Note: If this mode is triggered without any previous Huffman-table in the frame
+        (or [dictionary](#dictionary-format)), this should be treated as data corruption.
+
+__`Size_Format`__
+
+`Size_Format` is divided into 2 families :
+
+- For `Raw_Literals_Block` and `RLE_Literals_Block`,
+  it's only necessary to decode `Regenerated_Size`.
+  There is no `Compressed_Size` field.
+- For `Compressed_Block` and `Treeless_Literals_Block`,
+  it's required to decode both `Compressed_Size`
+  and `Regenerated_Size` (the decompressed size).
+  It's also necessary to decode the number of streams (1 or 4).
+
+For values spanning several bytes, convention is __little-endian__.
+
+__`Size_Format` for `Raw_Literals_Block` and `RLE_Literals_Block`__ :
+
+- Value ?0 : `Size_Format` uses 1 bit.
+               `Regenerated_Size` uses 5 bits (0-31).
+               `Literals_Section_Header` has 1 byte.
+               `Regenerated_Size = Header[0]>>3`
+- Value 01 : `Size_Format` uses 2 bits.
+               `Regenerated_Size` uses 12 bits (0-4095).
+               `Literals_Section_Header` has 2 bytes.
+               `Regenerated_Size = (Header[0]>>4) + (Header[1]<<4)`
+- Value 11 : `Size_Format` uses 2 bits.
+               `Regenerated_Size` uses 20 bits (0-1048575).
+               `Literals_Section_Header` has 3 bytes.
+               `Regenerated_Size = (Header[0]>>4) + (Header[1]<<4) + (Header[2]<<12)`
+
+Only Stream1 is present for these cases.
+Note : it's allowed to represent a short value (for example `13`)
+using a long format, even if it's less efficient.
+
+__`Size_Format` for `Compressed_Literals_Block` and `Treeless_Literals_Block`__ :
+
+- Value 00 : _A single stream_.
+               Both `Regenerated_Size` and `Compressed_Size` use 10 bits (0-1023).
+               `Literals_Section_Header` has 3 bytes.
+- Value 01 : 4 streams.
+               Both `Regenerated_Size` and `Compressed_Size` use 10 bits (0-1023).
+               `Literals_Section_Header` has 3 bytes.
+- Value 10 : 4 streams.
+               Both `Regenerated_Size` and `Compressed_Size` use 14 bits (0-16383).
+               `Literals_Section_Header` has 4 bytes.
+- Value 11 : 4 streams.
+               Both `Regenerated_Size` and `Compressed_Size` use 18 bits (0-262143).
+               `Literals_Section_Header` has 5 bytes.
+
+Both `Compressed_Size` and `Regenerated_Size` fields follow __little-endian__ convention.
+Note: `Compressed_Size` __includes__ the size of the Huffman Tree description
+_when_ it is present.
+
+### Raw Literals Block
+The data in Stream1 is `Regenerated_Size` bytes long,
+it contains the raw literals data to be used during [Sequence Execution].
+
+### RLE Literals Block
+Stream1 consists of a single byte which should be repeated `Regenerated_Size` times
+to generate the decoded literals.
+
+### Compressed Literals Block and Treeless Literals Block
+Both of these modes contain Huffman encoded data.
+`Treeless_Literals_Block` does not have a `Huffman_Tree_Description`.
+
+#### `Huffman_Tree_Description`
+This section is only present when `Literals_Block_Type` type is `Compressed_Literals_Block` (`2`).
+The format of the Huffman tree description can be found at [Huffman Tree description](#huffman-tree-description).
+The size of `Huffman_Tree_Description` is determined during decoding process,
+it must be used to determine where streams begin.
+`Total_Streams_Size = Compressed_Size - Huffman_Tree_Description_Size`.
+
+For `Treeless_Literals_Block`,
+the Huffman table comes from previously compressed literals block.
+
+Huffman compressed data consists of either 1 or 4 Huffman-coded streams.
+
+If only one stream is present, it is a single bitstream occupying the entire
+remaining portion of the literals block, encoded as described within
+[Huffman-Coded Streams](#huffman-coded-streams).
+
+If there are four streams, the literals section header only provides enough
+information to know the decompressed and compressed sizes of all four streams _combined_.
+The decompressed size of each stream is equal to `(Regenerated_Size+3)/4`,
+except for the last stream which may be up to 3 bytes smaller,
+to reach a total decompressed size as specified in `Regenerated_Size`.
+
+The compressed size of each stream is provided explicitly:
+the first 6 bytes of the compressed data consist of three 2-byte __little-endian__ fields,
+describing the compressed sizes of the first three streams.
+`Stream4_Size` is computed from total `Total_Streams_Size` minus sizes of other streams.
+
+`Stream4_Size = Total_Streams_Size - 6 - Stream1_Size - Stream2_Size - Stream3_Size`.
+
+Note: remember that `Total_Streams_Size` can be smaller than `Compressed_Size` in header,
+because `Compressed_Size` also contains `Huffman_Tree_Description_Size` when it is present.
+
+Each of these 4 bitstreams is then decoded independently as a Huffman-Coded stream,
+as described at [Huffman-Coded Streams](#huffman-coded-streams)
+
+
+Sequences Section
+-----------------
+A compressed block is a succession of _sequences_ .
+A sequence is a literal copy command, followed by a match copy command.
+A literal copy command specifies a length.
+It is the number of bytes to be copied (or extracted) from the Literals Section.
+A match copy command specifies an offset and a length.
+
+When all _sequences_ are decoded,
+if there are literals left in the _literal section_,
+these bytes are added at the end of the block.
+
+This is described in more detail in [Sequence Execution](#sequence-execution)
+
+The `Sequences_Section` regroup all symbols required to decode commands.
+There are 3 symbol types : literals lengths, offsets and match lengths.
+They are encoded together, interleaved, in a single _bitstream_.
+
+The `Sequences_Section` starts by a header,
+followed by optional probability tables for each symbol type,
+followed by the bitstream.
+
+| `Sequences_Section_Header` | [`Literals_Length_Table`] | [`Offset_Table`] | [`Match_Length_Table`] | bitStream |
+| -------------------------- | ------------------------- | ---------------- | ---------------------- | --------- |
+
+To decode the `Sequences_Section`, it's required to know its size.
+This size is deduced from `Block_Size - Literals_Section_Size`.
+
+
+#### `Sequences_Section_Header`
+
+Consists of 2 items:
+- `Number_of_Sequences`
+- Symbol compression modes
+
+__`Number_of_Sequences`__
+
+This is a variable size field using between 1 and 3 bytes.
+Let's call its first byte `byte0`.
+- `if (byte0 == 0)` : there are no sequences.
+            The sequence section stops there.
+            Decompressed content is defined entirely as Literals Section content.
+- `if (byte0 < 128)` : `Number_of_Sequences = byte0` . Uses 1 byte.
+- `if (byte0 < 255)` : `Number_of_Sequences = ((byte0-128) << 8) + byte1` . Uses 2 bytes.
+- `if (byte0 == 255)`: `Number_of_Sequences = byte1 + (byte2<<8) + 0x7F00` . Uses 3 bytes.
+
+__Symbol compression modes__
+
+This is a single byte, defining the compression mode of each symbol type.
+
+|Bit number|          7-6            |      5-4       |        3-2           |     1-0    |
+| -------- | ----------------------- | -------------- | -------------------- | ---------- |
+|Field name| `Literals_Lengths_Mode` | `Offsets_Mode` | `Match_Lengths_Mode` | `Reserved` |
+
+The last field, `Reserved`, must be all-zeroes.
+
+`Literals_Lengths_Mode`, `Offsets_Mode` and `Match_Lengths_Mode` define the `Compression_Mode` of
+literals lengths, offsets, and match lengths symbols respectively.
+
+They follow the same enumeration :
+
+|        Value       |         0         |      1     |           2           |       3       |
+| ------------------ | ----------------- | ---------- | --------------------- | ------------- |
+| `Compression_Mode` | `Predefined_Mode` | `RLE_Mode` | `FSE_Compressed_Mode` | `Repeat_Mode` |
+
+- `Predefined_Mode` : A predefined FSE distribution table is used, defined in
+          [default distributions](#default-distributions).
+          No distribution table will be present.
+- `RLE_Mode` : The table description consists of a single byte.
+          This code will be repeated for all sequences.
+- `Repeat_Mode` : The table used in the previous compressed block will be used again.
+          No distribution table will be present.
+          Note: this includes RLE mode, so if `Repeat_Mode` follows `RLE_Mode`, the same symbol will be repeated.
+          If this mode is used without any previous sequence table in the frame
+          (or [dictionary](#dictionary-format)) to repeat, this should be treated as corruption.
+- `FSE_Compressed_Mode` : standard FSE compression.
+          A distribution table will be present.
+          The format of this distribution table is described in [FSE Table Description](#fse-table-description).
+          Note that the maximum allowed accuracy log for literals length and match length tables is 9,
+          and the maximum accuracy log for the offsets table is 8.
+
+#### The codes for literals lengths, match lengths, and offsets.
+
+Each symbol is a _code_ in its own context,
+which specifies `Baseline` and `Number_of_Bits` to add.
+_Codes_ are FSE compressed,
+and interleaved with raw additional bits in the same bitstream.
+
+##### Literals length codes
+
+Literals length codes are values ranging from `0` to `35` included.
+They define lengths from 0 to 131071 bytes.
+The literals length is equal to the decoded `Baseline` plus
+the result of reading `Number_of_Bits` bits from the bitstream,
+as a __little-endian__ value.
+
+| `Literals_Length_Code` |         0-15           |
+| ---------------------- | ---------------------- |
+| length                 | `Literals_Length_Code` |
+| `Number_of_Bits`       |          0             |
+
+| `Literals_Length_Code` |  16  |  17  |  18  |  19  |  20  |  21  |  22  |  23  |
+| ---------------------- | ---- | ---- | ---- | ---- | ---- | ---- | ---- | ---- |
+| `Baseline`             |  16  |  18  |  20  |  22  |  24  |  28  |  32  |  40  |
+| `Number_of_Bits`       |   1  |   1  |   1  |   1  |   2  |   2  |   3  |   3  |
+
+| `Literals_Length_Code` |  24  |  25  |  26  |  27  |  28  |  29  |  30  |  31  |
+| ---------------------- | ---- | ---- | ---- | ---- | ---- | ---- | ---- | ---- |
+| `Baseline`             |  48  |  64  |  128 |  256 |  512 | 1024 | 2048 | 4096 |
+| `Number_of_Bits`       |   4  |   6  |   7  |   8  |   9  |  10  |  11  |  12  |
+
+| `Literals_Length_Code` |  32  |  33  |  34  |  35  |
+| ---------------------- | ---- | ---- | ---- | ---- |
+| `Baseline`             | 8192 |16384 |32768 |65536 |
+| `Number_of_Bits`       |  13  |  14  |  15  |  16  |
+
+
+##### Match length codes
+
+Match length codes are values ranging from `0` to `52` included.
+They define lengths from 3 to 131074 bytes.
+The match length is equal to the decoded `Baseline` plus
+the result of reading `Number_of_Bits` bits from the bitstream,
+as a __little-endian__ value.
+
+| `Match_Length_Code` |         0-31            |
+| ------------------- | ----------------------- |
+| value               | `Match_Length_Code` + 3 |
+| `Number_of_Bits`    |          0              |
+
+| `Match_Length_Code` |  32  |  33  |  34  |  35  |  36  |  37  |  38  |  39  |
+| ------------------- | ---- | ---- | ---- | ---- | ---- | ---- | ---- | ---- |
+| `Baseline`          |  35  |  37  |  39  |  41  |  43  |  47  |  51  |  59  |
+| `Number_of_Bits`    |   1  |   1  |   1  |   1  |   2  |   2  |   3  |   3  |
+
+| `Match_Length_Code` |  40  |  41  |  42  |  43  |  44  |  45  |  46  |  47  |
+| ------------------- | ---- | ---- | ---- | ---- | ---- | ---- | ---- | ---- |
+| `Baseline`          |  67  |  83  |  99  |  131 |  259 |  515 | 1027 | 2051 |
+| `Number_of_Bits`    |   4  |   4  |   5  |   7  |   8  |   9  |  10  |  11  |
+
+| `Match_Length_Code` |  48  |  49  |  50  |  51  |  52  |
+| ------------------- | ---- | ---- | ---- | ---- | ---- |
+| `Baseline`          | 4099 | 8195 |16387 |32771 |65539 |
+| `Number_of_Bits`    |  12  |  13  |  14  |  15  |  16  |
+
+##### Offset codes
+
+Offset codes are values ranging from `0` to `N`.
+
+A decoder is free to limit its maximum `N` supported.
+Recommendation is to support at least up to `22`.
+For information, at the time of this writing.
+the reference decoder supports a maximum `N` value of `28` in 64-bits mode.
+
+An offset code is also the number of additional bits to read in __little-endian__ fashion,
+and can be translated into an `Offset_Value` using the following formulas :
+
+```
+Offset_Value = (1 << offsetCode) + readNBits(offsetCode);
+if (Offset_Value > 3) offset = Offset_Value - 3;
+```
+It means that maximum `Offset_Value` is `(2^(N+1))-1` and it supports back-reference distance up to `(2^(N+1))-4`
+but is limited by [maximum back-reference distance](#window_descriptor).
+
+`Offset_Value` from 1 to 3 are special : they define "repeat codes".
+This is described in more detail in [Repeat Offsets](#repeat-offsets).
+
+#### Decoding Sequences
+FSE bitstreams are read in reverse direction than written. In zstd,
+the compressor writes bits forward into a block and the decompressor
+must read the bitstream _backwards_.
+
+To find the start of the bitstream it is therefore necessary to
+know the offset of the last byte of the block which can be found
+by counting `Block_Size` bytes after the block header.
+
+After writing the last bit containing information, the compressor
+writes a single `1`-bit and then fills the byte with 0-7 `0` bits of
+padding. The last byte of the compressed bitstream cannot be `0` for
+that reason.
+
+When decompressing, the last byte containing the padding is the first
+byte to read. The decompressor needs to skip 0-7 initial `0`-bits and
+the first `1`-bit it occurs. Afterwards, the useful part of the bitstream
+begins.
+
+FSE decoding requires a 'state' to be carried from symbol to symbol.
+For more explanation on FSE decoding, see the [FSE section](#fse).
+
+For sequence decoding, a separate state keeps track of each
+literal lengths, offsets, and match lengths symbols.
+Some FSE primitives are also used.
+For more details on the operation of these primitives, see the [FSE section](#fse).
+
+##### Starting states
+The bitstream starts with initial FSE state values,
+each using the required number of bits in their respective _accuracy_,
+decoded previously from their normalized distribution.
+
+It starts by `Literals_Length_State`,
+followed by `Offset_State`,
+and finally `Match_Length_State`.
+
+Reminder : always keep in mind that all values are read _backward_,
+so the 'start' of the bitstream is at the highest position in memory,
+immediately before the last `1`-bit for padding.
+
+After decoding the starting states, a single sequence is decoded
+`Number_Of_Sequences` times.
+These sequences are decoded in order from first to last.
+Since the compressor writes the bitstream in the forward direction,
+this means the compressor must encode the sequences starting with the last
+one and ending with the first.
+
+##### Decoding a sequence
+For each of the symbol types, the FSE state can be used to determine the appropriate code.
+The code then defines the baseline and number of bits to read for each type.
+See the [description of the codes] for how to determine these values.
+
+[description of the codes]: #the-codes-for-literals-lengths-match-lengths-and-offsets
+
+Decoding starts by reading the `Number_of_Bits` required to decode `Offset`.
+It then does the same for `Match_Length`, and then for `Literals_Length`.
+This sequence is then used for [sequence execution](#sequence-execution).
+
+If it is not the last sequence in the block,
+the next operation is to update states.
+Using the rules pre-calculated in the decoding tables,
+`Literals_Length_State` is updated,
+followed by `Match_Length_State`,
+and then `Offset_State`.
+See the [FSE section](#fse) for details on how to update states from the bitstream.
+
+This operation will be repeated `Number_of_Sequences` times.
+At the end, the bitstream shall be entirely consumed,
+otherwise the bitstream is considered corrupted.
+
+#### Default Distributions
+If `Predefined_Mode` is selected for a symbol type,
+its FSE decoding table is generated from a predefined distribution table defined here.
+For details on how to convert this distribution into a decoding table, see the [FSE section].
+
+[FSE section]: #from-normalized-distribution-to-decoding-tables
+
+##### Literals Length
+The decoding table uses an accuracy log of 6 bits (64 states).
+```
+short literalsLength_defaultDistribution[36] =
+        { 4, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1,
+          2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 1, 1, 1, 1, 1,
+         -1,-1,-1,-1 };
+```
+
+##### Match Length
+The decoding table uses an accuracy log of 6 bits (64 states).
+```
+short matchLengths_defaultDistribution[53] =
+        { 1, 4, 3, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1,
+          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1,-1,
+         -1,-1,-1,-1,-1 };
+```
+
+##### Offset Codes
+The decoding table uses an accuracy log of 5 bits (32 states),
+and supports a maximum `N` value of 28, allowing offset values up to 536,870,908 .
+
+If any sequence in the compressed block requires a larger offset than this,
+it's not possible to use the default distribution to represent it.
+```
+short offsetCodes_defaultDistribution[29] =
+        { 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1,
+          1, 1, 1, 1, 1, 1, 1, 1,-1,-1,-1,-1,-1 };
+```
+
+
+Sequence Execution
+------------------
+Once literals and sequences have been decoded,
+they are combined to produce the decoded content of a block.
+
+Each sequence consists of a tuple of (`literals_length`, `offset_value`, `match_length`),
+decoded as described in the [Sequences Section](#sequences-section).
+To execute a sequence, first copy `literals_length` bytes from the literals section
+to the output.
+
+Then `match_length` bytes are copied from previous decoded data.
+The offset to copy from is determined by `offset_value`:
+if `offset_value > 3`, then the offset is `offset_value - 3`.
+If `offset_value` is from 1-3, the offset is a special repeat offset value.
+See the [repeat offset](#repeat-offsets) section for how the offset is determined
+in this case.
+
+The offset is defined as from the current position, so an offset of 6
+and a match length of 3 means that 3 bytes should be copied from 6 bytes back.
+Note that all offsets leading to previously decoded data
+must be smaller than `Window_Size` defined in `Frame_Header_Descriptor`.
+
+#### Repeat offsets
+As seen in [Sequence Execution](#sequence-execution),
+the first 3 values define a repeated offset and we will call them
+`Repeated_Offset1`, `Repeated_Offset2`, and `Repeated_Offset3`.
+They are sorted in recency order, with `Repeated_Offset1` meaning "most recent one".
+
+If `offset_value == 1`, then the offset used is `Repeated_Offset1`, etc.
+
+There is an exception though, when current sequence's `literals_length = 0`.
+In this case, repeated offsets are shifted by one,
+so an `offset_value` of 1 means `Repeated_Offset2`,
+an `offset_value` of 2 means `Repeated_Offset3`,
+and an `offset_value` of 3 means `Repeated_Offset1 - 1_byte`.
+
+For the first block, the starting offset history is populated with the following values : 1, 4 and 8 (in order).
+
+Then each block gets its starting offset history from the ending values of the most recent `Compressed_Block`.
+Note that blocks which are not `Compressed_Block` are skipped, they do not contribute to offset history.
+
+[Offset Codes]: #offset-codes
+
+###### Offset updates rules
+
+The newest offset takes the lead in offset history,
+shifting others back (up to its previous place if it was already present).
+
+This means that when `Repeated_Offset1` (most recent) is used, history is unmodified.
+When `Repeated_Offset2` is used, it's swapped with `Repeated_Offset1`.
+If any other offset is used, it becomes `Repeated_Offset1` and the rest are shift back by one.
+
+
+Skippable Frames
+----------------
+
+| `Magic_Number` | `Frame_Size` | `User_Data` |
+|:--------------:|:------------:|:-----------:|
+|   4 bytes      |  4 bytes     |   n bytes   |
+
+Skippable frames allow the insertion of user-defined data
+into a flow of concatenated frames.
+Its design is pretty straightforward,
+with the sole objective to allow the decoder to quickly skip
+over user-defined data and continue decoding.
+
+Skippable frames defined in this specification are compatible with [LZ4] ones.
+
+[LZ4]:http://www.lz4.org
+
+__`Magic_Number`__
+
+4 Bytes, __little-endian__ format.
+Value : 0x184D2A5?, which means any value from 0x184D2A50 to 0x184D2A5F.
+All 16 values are valid to identify a skippable frame.
+
+__`Frame_Size`__
+
+This is the size, in bytes, of the following `User_Data`
+(without including the magic number nor the size field itself).
+This field is represented using 4 Bytes, __little-endian__ format, unsigned 32-bits.
+This means `User_Data` can’t be bigger than (2^32-1) bytes.
+
+__`User_Data`__
+
+The `User_Data` can be anything. Data will just be skipped by the decoder.
+
+
+Entropy Encoding
+----------------
+Two types of entropy encoding are used by the Zstandard format:
+FSE, and Huffman coding.
+
+FSE
+---
+FSE, short for Finite State Entropy, is an entropy codec based on [ANS].
+FSE encoding/decoding involves a state that is carried over between symbols,
+so decoding must be done in the opposite direction as encoding.
+Therefore, all FSE bitstreams are read from end to beginning.
+
+For additional details on FSE, see [Finite State Entropy].
+
+[Finite State Entropy]:https://github.com/Cyan4973/FiniteStateEntropy/
+
+FSE decoding involves a decoding table which has a power of 2 size, and contain three elements:
+`Symbol`, `Num_Bits`, and `Baseline`.
+The `log2` of the table size is its `Accuracy_Log`.
+The FSE state represents an index in this table.
+
+To obtain the initial state value, consume `Accuracy_Log` bits from the stream as a __little-endian__ value.
+The next symbol in the stream is the `Symbol` indicated in the table for that state.
+To obtain the next state value,
+the decoder should consume `Num_Bits` bits from the stream as a __little-endian__ value and add it to `Baseline`.
+
+[ANS]: https://en.wikipedia.org/wiki/Asymmetric_Numeral_Systems
+
+### FSE Table Description
+To decode FSE streams, it is necessary to construct the decoding table.
+The Zstandard format encodes FSE table descriptions as follows:
+
+An FSE distribution table describes the probabilities of all symbols
+from `0` to the last present one (included)
+on a normalized scale of `1 << Accuracy_Log` .
+
+It's a bitstream which is read forward, in __little-endian__ fashion.
+It's not necessary to know its exact size,
+since it will be discovered and reported by the decoding process.
+
+The bitstream starts by reporting on which scale it operates.
+`Accuracy_Log = low4bits + 5`.
+
+Then follows each symbol value, from `0` to last present one.
+The number of bits used by each field is variable.
+It depends on :
+
+- Remaining probabilities + 1 :
+  __example__ :
+  Presuming an `Accuracy_Log` of 8,
+  and presuming 100 probabilities points have already been distributed,
+  the decoder may read any value from `0` to `255 - 100 + 1 == 156` (inclusive).
+  Therefore, it must read `log2sup(156) == 8` bits.
+
+- Value decoded : small values use 1 less bit :
+  __example__ :
+  Presuming values from 0 to 156 (inclusive) are possible,
+  255-156 = 99 values are remaining in an 8-bits field.
+  They are used this way :
+  first 99 values (hence from 0 to 98) use only 7 bits,
+  values from 99 to 156 use 8 bits.
+  This is achieved through this scheme :
+
+  | Value read | Value decoded | Number of bits used |
+  | ---------- | ------------- | ------------------- |
+  |   0 -  98  |   0 -  98     |  7                  |
+  |  99 - 127  |  99 - 127     |  8                  |
+  | 128 - 226  |   0 -  98     |  7                  |
+  | 227 - 255  | 128 - 156     |  8                  |
+
+Symbols probabilities are read one by one, in order.
+
+Probability is obtained from Value decoded by following formula :
+`Proba = value - 1`
+
+It means value `0` becomes negative probability `-1`.
+`-1` is a special probability, which means "less than 1".
+Its effect on distribution table is described in the [next section].
+For the purpose of calculating total allocated probability points, it counts as one.
+
+[next section]:#from-normalized-distribution-to-decoding-tables
+
+When a symbol has a __probability__ of `zero`,
+it is followed by a 2-bits repeat flag.
+This repeat flag tells how many probabilities of zeroes follow the current one.
+It provides a number ranging from 0 to 3.
+If it is a 3, another 2-bits repeat flag follows, and so on.
+
+When last symbol reaches cumulated total of `1 << Accuracy_Log`,
+decoding is complete.
+If the last symbol makes cumulated total go above `1 << Accuracy_Log`,
+distribution is considered corrupted.
+
+Then the decoder can tell how many bytes were used in this process,
+and how many symbols are present.
+The bitstream consumes a round number of bytes.
+Any remaining bit within the last byte is just unused.
+
+##### From normalized distribution to decoding tables
+
+The distribution of normalized probabilities is enough
+to create a unique decoding table.
+
+It follows the following build rule :
+
+The table has a size of `Table_Size = 1 << Accuracy_Log`.
+Each cell describes the symbol decoded,
+and instructions to get the next state.
+
+Symbols are scanned in their natural order for "less than 1" probabilities.
+Symbols with this probability are being attributed a single cell,
+starting from the end of the table.
+These symbols define a full state reset, reading `Accuracy_Log` bits.
+
+All remaining symbols are sorted in their natural order.
+Starting from symbol `0` and table position `0`,
+each symbol gets attributed as many cells as its probability.
+Cell allocation is spreaded, not linear :
+each successor position follow this rule :
+
+```
+position += (tableSize>>1) + (tableSize>>3) + 3;
+position &= tableSize-1;
+```
+
+A position is skipped if already occupied by a "less than 1" probability symbol.
+`position` does not reset between symbols, it simply iterates through
+each position in the table, switching to the next symbol when enough
+states have been allocated to the current one.
+
+The result is a list of state values.
+Each state will decode the current symbol.
+
+To get the `Number_of_Bits` and `Baseline` required for next state,
+it's first necessary to sort all states in their natural order.
+The lower states will need 1 more bit than higher ones.
+
+__Example__ :
+Presuming a symbol has a probability of 5.
+It receives 5 state values. States are sorted in natural order.
+
+Next power of 2 is 8.
+Space of probabilities is divided into 8 equal parts.
+Presuming the `Accuracy_Log` is 7, it defines 128 states.
+Divided by 8, each share is 16 large.
+
+In order to reach 8, 8-5=3 lowest states will count "double",
+taking shares twice larger,
+requiring one more bit in the process.
+
+Numbering starts from higher states using less bits.
+
+| state order      |   0   |   1   |    2   |   3  |   4   |
+| ---------------- | ----- | ----- | ------ | ---- | ----- |
+| width            |  32   |  32   |   32   |  16  |  16   |
+| `Number_of_Bits` |   5   |   5   |    5   |   4  |   4   |
+| range number     |   2   |   4   |    6   |   0  |   1   |
+| `Baseline`       |  32   |  64   |   96   |   0  |  16   |
+| range            | 32-63 | 64-95 | 96-127 | 0-15 | 16-31 |
+
+The next state is determined from current state
+by reading the required `Number_of_Bits`, and adding the specified `Baseline`.
+
+See [Appendix A] for the results of this process applied to the default distributions.
+
+[Appendix A]: #appendix-a---decoding-tables-for-predefined-codes
+
+Huffman Coding
+--------------
+Zstandard Huffman-coded streams are read backwards,
+similar to the FSE bitstreams.
+Therefore, to find the start of the bitstream, it is therefore to
+know the offset of the last byte of the Huffman-coded stream.
+
+After writing the last bit containing information, the compressor
+writes a single `1`-bit and then fills the byte with 0-7 `0` bits of
+padding. The last byte of the compressed bitstream cannot be `0` for
+that reason.
+
+When decompressing, the last byte containing the padding is the first
+byte to read. The decompressor needs to skip 0-7 initial `0`-bits and
+the first `1`-bit it occurs. Afterwards, the useful part of the bitstream
+begins.
+
+The bitstream contains Huffman-coded symbols in __little-endian__ order,
+with the codes defined by the method below.
+
+### Huffman Tree Description
+Prefix coding represents symbols from an a priori known alphabet
+by bit sequences (codewords), one codeword for each symbol,
+in a manner such that different symbols may be represented
+by bit sequences of different lengths,
+but a parser can always parse an encoded string
+unambiguously symbol-by-symbol.
+
+Given an alphabet with known symbol frequencies,
+the Huffman algorithm allows the construction of an optimal prefix code
+using the fewest bits of any possible prefix codes for that alphabet.
+
+Prefix code must not exceed a maximum code length.
+More bits improve accuracy but cost more header size,
+and require more memory or more complex decoding operations.
+This specification limits maximum code length to 11 bits.
+
+
+##### Representation
+
+All literal values from zero (included) to last present one (excluded)
+are represented by `Weight` with values from `0` to `Max_Number_of_Bits`.
+Transformation from `Weight` to `Number_of_Bits` follows this formula :
+```
+Number_of_Bits = Weight ? (Max_Number_of_Bits + 1 - Weight) : 0
+```
+The last symbol's `Weight` is deduced from previously decoded ones,
+by completing to the nearest power of 2.
+This power of 2 gives `Max_Number_of_Bits`, the depth of the current tree.
+
+__Example__ :
+Let's presume the following Huffman tree must be described :
+
+|     literal      |  0  |  1  |  2  |  3  |  4  |  5  |
+| ---------------- | --- | --- | --- | --- | --- | --- |
+| `Number_of_Bits` |  1  |  2  |  3  |  0  |  4  |  4  |
+
+The tree depth is 4, since its smallest element uses 4 bits.
+Value `5` will not be listed as it can be determined from the values for 0-4,
+nor will values above `5` as they are all 0.
+Values from `0` to `4` will be listed using `Weight` instead of `Number_of_Bits`.
+Weight formula is :
+```
+Weight = Number_of_Bits ? (Max_Number_of_Bits + 1 - Number_of_Bits) : 0
+```
+It gives the following series of weights :
+
+| literal  |  0  |  1  |  2  |  3  |  4  |
+| -------- | --- | --- | --- | --- | --- |
+| `Weight` |  4  |  3  |  2  |  0  |  1  |
+
+The decoder will do the inverse operation :
+having collected weights of literals from `0` to `4`,
+it knows the last literal, `5`, is present with a non-zero weight.
+The weight of `5` can be determined by advancing to the next power of 2.
+The sum of `2^(Weight-1)` (excluding 0's) is :
+`8 + 4 + 2 + 0 + 1 = 15`.
+Nearest power of 2 is 16.
+Therefore, `Max_Number_of_Bits = 4` and `Weight[5] = 1`.
+
+##### Huffman Tree header
+
+This is a single byte value (0-255),
+which describes how to decode the list of weights.
+
+- if `headerByte` >= 128 : this is a direct representation,
+  where each `Weight` is written directly as a 4 bits field (0-15).
+  They are encoded forward, 2 weights to a byte with the first weight taking
+  the top four bits and the second taking the bottom four (e.g. the following
+  operations could be used to read the weights:
+  `Weight[0] = (Byte[0] >> 4), Weight[1] = (Byte[0] & 0xf)`, etc.).
+  The full representation occupies `((Number_of_Symbols+1)/2)` bytes,
+  meaning it uses a last full byte even if `Number_of_Symbols` is odd.
+  `Number_of_Symbols = headerByte - 127`.
+  Note that maximum `Number_of_Symbols` is 255-127 = 128.
+  A larger series must necessarily use FSE compression.
+
+- if `headerByte` < 128 :
+  the series of weights is compressed by FSE.
+  The length of the FSE-compressed series is equal to `headerByte` (0-127).
+
+##### Finite State Entropy (FSE) compression of Huffman weights
+
+In this case, the series of Huffman weights is compressed using FSE compression.
+It's a single bitstream with 2 interleaved states,
+sharing a single distribution table.
+
+To decode an FSE bitstream, it is necessary to know its compressed size.
+Compressed size is provided by `headerByte`.
+It's also necessary to know its _maximum possible_ decompressed size,
+which is `255`, since literal values span from `0` to `255`,
+and last symbol's weight is not represented.
+
+An FSE bitstream starts by a header, describing probabilities distribution.
+It will create a Decoding Table.
+For a list of Huffman weights, the maximum accuracy log is 7 bits.
+For more description see the [FSE header description](#fse-table-description)
+
+The Huffman header compression uses 2 states,
+which share the same FSE distribution table.
+The first state (`State1`) encodes the even indexed symbols,
+and the second (`State2`) encodes the odd indexes.
+`State1` is initialized first, and then `State2`, and they take turns
+decoding a single symbol and updating their state.
+For more details on these FSE operations, see the [FSE section](#fse).
+
+The number of symbols to decode is determined
+by tracking bitStream overflow condition:
+If updating state after decoding a symbol would require more bits than
+remain in the stream, it is assumed that extra bits are 0.  Then,
+the symbols for each of the final states are decoded and the process is complete.
+
+##### Conversion from weights to Huffman prefix codes
+
+All present symbols shall now have a `Weight` value.
+It is possible to transform weights into Number_of_Bits, using this formula:
+```
+Number_of_Bits = Number_of_Bits ? Max_Number_of_Bits + 1 - Weight : 0
+```
+Symbols are sorted by `Weight`. Within same `Weight`, symbols keep natural order.
+Symbols with a `Weight` of zero are removed.
+Then, starting from lowest weight, prefix codes are distributed in order.
+
+__Example__ :
+Let's presume the following list of weights has been decoded :
+
+| Literal  |  0  |  1  |  2  |  3  |  4  |  5  |
+| -------- | --- | --- | --- | --- | --- | --- |
+| `Weight` |  4  |  3  |  2  |  0  |  1  |  1  |
+
+Sorted by weight and then natural order,
+it gives the following distribution :
+
+| Literal          |  3  |  4  |  5  |  2  |  1  |   0  |
+| ---------------- | --- | --- | --- | --- | --- | ---- |
+| `Weight`         |  0  |  1  |  1  |  2  |  3  |   4  |
+| `Number_of_Bits` |  0  |  4  |  4  |  3  |  2  |   1  |
+| prefix codes     | N/A | 0000| 0001| 001 | 01  |   1  |
+
+### Huffman-coded Streams
+Given a Huffman decoding table,
+it's possible to decode a Huffman-coded stream.
+
+Each bitstream must be read _backward_,
+that is starting from the end down to the beginning.
+Therefore it's necessary to know the size of each bitstream.
+
+It's also necessary to know exactly which _bit_ is the latest.
+This is detected by a final bit flag :
+the highest bit of latest byte is a final-bit-flag.
+Consequently, a last byte of `0` is not possible.
+And the final-bit-flag itself is not part of the useful bitstream.
+Hence, the last byte contains between 0 and 7 useful bits.
+
+Starting from the end,
+it's possible to read the bitstream in a __little-endian__ fashion,
+keeping track of already used bits. Since the bitstream is encoded in reverse
+order, starting from the end read symbols in forward order.
+
+For example, if the literal sequence "0145" was encoded using above prefix code,
+it would be encoded (in reverse order) as:
+
+|Symbol  |   5  |   4  |  1 | 0 | Padding |
+|--------|------|------|----|---|---------|
+|Encoding|`0000`|`0001`|`01`|`1`| `00001` |
+
+Resulting in following 2-bytes bitstream :
+```
+00010000 00001101
+```
+
+Here is an alternative representation with the symbol codes separated by underscore:
+```
+0001_0000 00001_1_01
+```
+
+Reading highest `Max_Number_of_Bits` bits,
+it's possible to compare extracted value to decoding table,
+determining the symbol to decode and number of bits to discard.
+
+The process continues up to reading the required number of symbols per stream.
+If a bitstream is not entirely and exactly consumed,
+hence reaching exactly its beginning position with _all_ bits consumed,
+the decoding process is considered faulty.
+
+
+Dictionary Format
+-----------------
+
+Zstandard is compatible with "raw content" dictionaries,
+free of any format restriction, except that they must be at least 8 bytes.
+These dictionaries function as if they were just the `Content` part
+of a formatted dictionary.
+
+But dictionaries created by `zstd --train` follow a format, described here.
+
+__Pre-requisites__ : a dictionary has a size,
+                     defined either by a buffer limit, or a file size.
+
+| `Magic_Number` | `Dictionary_ID` | `Entropy_Tables` | `Content` |
+| -------------- | --------------- | ---------------- | --------- |
+
+__`Magic_Number`__ : 4 bytes ID, value 0xEC30A437, __little-endian__ format
+
+__`Dictionary_ID`__ : 4 bytes, stored in __little-endian__ format.
+              `Dictionary_ID` can be any value, except 0 (which means no `Dictionary_ID`).
+              It's used by decoders to check if they use the correct dictionary.
+
+_Reserved ranges :_
+              If the frame is going to be distributed in a private environment,
+              any `Dictionary_ID` can be used.
+              However, for public distribution of compressed frames,
+              the following ranges are reserved and shall not be used :
+
+              - low range  : <= 32767
+              - high range : >= (2^31)
+
+__`Entropy_Tables`__ : following the same format as the tables in compressed blocks.
+              See the relevant [FSE](#fse-table-description)
+              and [Huffman](#huffman-tree-description) sections for how to decode these tables.
+              They are stored in following order :
+              Huffman tables for literals, FSE table for offsets,
+              FSE table for match lengths, and FSE table for literals lengths.
+              These tables populate the Repeat Stats literals mode and
+              Repeat distribution mode for sequence decoding.
+              It's finally followed by 3 offset values, populating recent offsets (instead of using `{1,4,8}`),
+              stored in order, 4-bytes __little-endian__ each, for a total of 12 bytes.
+              Each recent offset must have a value < dictionary size.
+
+__`Content`__ : The rest of the dictionary is its content.
+              The content act as a "past" in front of data to compress or decompress,
+              so it can be referenced in sequence commands.
+              As long as the amount of data decoded from this frame is less than or
+              equal to `Window_Size`, sequence commands may specify offsets longer
+              than the total length of decoded output so far to reference back to the
+              dictionary.  After the total output has surpassed `Window_Size` however,
+              this is no longer allowed and the dictionary is no longer accessible.
+
+[compressed blocks]: #the-format-of-compressed_block
+
+
+Appendix A - Decoding tables for predefined codes
+-------------------------------------------------
+
+This appendix contains FSE decoding tables
+for the predefined literal length, match length, and offset codes.
+The tables have been constructed using the algorithm as given above in chapter
+"from normalized distribution to decoding tables".
+The tables here can be used as examples
+to crosscheck that an implementation build its decoding tables correctly.
+
+#### Literal Length Code:
+
+| State | Symbol | Number_Of_Bits | Base |
+| ----- | ------ | -------------- | ---- |
+|     0 |      0 |              4 |    0 |
+|     1 |      0 |              4 |   16 |
+|     2 |      1 |              5 |   32 |
+|     3 |      3 |              5 |    0 |
+|     4 |      4 |              5 |    0 |
+|     5 |      6 |              5 |    0 |
+|     6 |      7 |              5 |    0 |
+|     7 |      9 |              5 |    0 |
+|     8 |     10 |              5 |    0 |
+|     9 |     12 |              5 |    0 |
+|    10 |     14 |              6 |    0 |
+|    11 |     16 |              5 |    0 |
+|    12 |     18 |              5 |    0 |
+|    13 |     19 |              5 |    0 |
+|    14 |     21 |              5 |    0 |
+|    15 |     22 |              5 |    0 |
+|    16 |     24 |              5 |    0 |
+|    17 |     25 |              5 |   32 |
+|    18 |     26 |              5 |    0 |
+|    19 |     27 |              6 |    0 |
+|    20 |     29 |              6 |    0 |
+|    21 |     31 |              6 |    0 |
+|    22 |      0 |              4 |   32 |
+|    23 |      1 |              4 |    0 |
+|    24 |      2 |              5 |    0 |
+|    25 |      4 |              5 |   32 |
+|    26 |      5 |              5 |    0 |
+|    27 |      7 |              5 |   32 |
+|    28 |      8 |              5 |    0 |
+|    29 |     10 |              5 |   32 |
+|    30 |     11 |              5 |    0 |
+|    31 |     13 |              6 |    0 |
+|    32 |     16 |              5 |   32 |
+|    33 |     17 |              5 |    0 |
+|    34 |     19 |              5 |   32 |
+|    35 |     20 |              5 |    0 |
+|    36 |     22 |              5 |   32 |
+|    37 |     23 |              5 |    0 |
+|    38 |     25 |              4 |    0 |
+|    39 |     25 |              4 |   16 |
+|    40 |     26 |              5 |   32 |
+|    41 |     28 |              6 |    0 |
+|    42 |     30 |              6 |    0 |
+|    43 |      0 |              4 |   48 |
+|    44 |      1 |              4 |   16 |
+|    45 |      2 |              5 |   32 |
+|    46 |      3 |              5 |   32 |
+|    47 |      5 |              5 |   32 |
+|    48 |      6 |              5 |   32 |
+|    49 |      8 |              5 |   32 |
+|    50 |      9 |              5 |   32 |
+|    51 |     11 |              5 |   32 |
+|    52 |     12 |              5 |   32 |
+|    53 |     15 |              6 |    0 |
+|    54 |     17 |              5 |   32 |
+|    55 |     18 |              5 |   32 |
+|    56 |     20 |              5 |   32 |
+|    57 |     21 |              5 |   32 |
+|    58 |     23 |              5 |   32 |
+|    59 |     24 |              5 |   32 |
+|    60 |     35 |              6 |    0 |
+|    61 |     34 |              6 |    0 |
+|    62 |     33 |              6 |    0 |
+|    63 |     32 |              6 |    0 |
+
+#### Match Length Code:
+
+| State | Symbol | Number_Of_Bits | Base |
+| ----- | ------ | -------------- | ---- |
+|     0 |      0 |              6 |    0 |
+|     1 |      1 |              4 |    0 |
+|     2 |      2 |              5 |   32 |
+|     3 |      3 |              5 |    0 |
+|     4 |      5 |              5 |    0 |
+|     5 |      6 |              5 |    0 |
+|     6 |      8 |              5 |    0 |
+|     7 |     10 |              6 |    0 |
+|     8 |     13 |              6 |    0 |
+|     9 |     16 |              6 |    0 |
+|    10 |     19 |              6 |    0 |
+|    11 |     22 |              6 |    0 |
+|    12 |     25 |              6 |    0 |
+|    13 |     28 |              6 |    0 |
+|    14 |     31 |              6 |    0 |
+|    15 |     33 |              6 |    0 |
+|    16 |     35 |              6 |    0 |
+|    17 |     37 |              6 |    0 |
+|    18 |     39 |              6 |    0 |
+|    19 |     41 |              6 |    0 |
+|    20 |     43 |              6 |    0 |
+|    21 |     45 |              6 |    0 |
+|    22 |      1 |              4 |   16 |
+|    23 |      2 |              4 |    0 |
+|    24 |      3 |              5 |   32 |
+|    25 |      4 |              5 |    0 |
+|    26 |      6 |              5 |   32 |
+|    27 |      7 |              5 |    0 |
+|    28 |      9 |              6 |    0 |
+|    29 |     12 |              6 |    0 |
+|    30 |     15 |              6 |    0 |
+|    31 |     18 |              6 |    0 |
+|    32 |     21 |              6 |    0 |
+|    33 |     24 |              6 |    0 |
+|    34 |     27 |              6 |    0 |
+|    35 |     30 |              6 |    0 |
+|    36 |     32 |              6 |    0 |
+|    37 |     34 |              6 |    0 |
+|    38 |     36 |              6 |    0 |
+|    39 |     38 |              6 |    0 |
+|    40 |     40 |              6 |    0 |
+|    41 |     42 |              6 |    0 |
+|    42 |     44 |              6 |    0 |
+|    43 |      1 |              4 |   32 |
+|    44 |      1 |              4 |   48 |
+|    45 |      2 |              4 |   16 |
+|    46 |      4 |              5 |   32 |
+|    47 |      5 |              5 |   32 |
+|    48 |      7 |              5 |   32 |
+|    49 |      8 |              5 |   32 |
+|    50 |     11 |              6 |    0 |
+|    51 |     14 |              6 |    0 |
+|    52 |     17 |              6 |    0 |
+|    53 |     20 |              6 |    0 |
+|    54 |     23 |              6 |    0 |
+|    55 |     26 |              6 |    0 |
+|    56 |     29 |              6 |    0 |
+|    57 |     52 |              6 |    0 |
+|    58 |     51 |              6 |    0 |
+|    59 |     50 |              6 |    0 |
+|    60 |     49 |              6 |    0 |
+|    61 |     48 |              6 |    0 |
+|    62 |     47 |              6 |    0 |
+|    63 |     46 |              6 |    0 |
+
+#### Offset Code:
+
+| State | Symbol | Number_Of_Bits | Base |
+| ----- | ------ | -------------- | ---- |
+|     0 |      0 |              5 |    0 |
+|     1 |      6 |              4 |    0 |
+|     2 |      9 |              5 |    0 |
+|     3 |     15 |              5 |    0 |
+|     4 |     21 |              5 |    0 |
+|     5 |      3 |              5 |    0 |
+|     6 |      7 |              4 |    0 |
+|     7 |     12 |              5 |    0 |
+|     8 |     18 |              5 |    0 |
+|     9 |     23 |              5 |    0 |
+|    10 |      5 |              5 |    0 |
+|    11 |      8 |              4 |    0 |
+|    12 |     14 |              5 |    0 |
+|    13 |     20 |              5 |    0 |
+|    14 |      2 |              5 |    0 |
+|    15 |      7 |              4 |   16 |
+|    16 |     11 |              5 |    0 |
+|    17 |     17 |              5 |    0 |
+|    18 |     22 |              5 |    0 |
+|    19 |      4 |              5 |    0 |
+|    20 |      8 |              4 |   16 |
+|    21 |     13 |              5 |    0 |
+|    22 |     19 |              5 |    0 |
+|    23 |      1 |              5 |    0 |
+|    24 |      6 |              4 |   16 |
+|    25 |     10 |              5 |    0 |
+|    26 |     16 |              5 |    0 |
+|    27 |     28 |              5 |    0 |
+|    28 |     27 |              5 |    0 |
+|    29 |     26 |              5 |    0 |
+|    30 |     25 |              5 |    0 |
+|    31 |     24 |              5 |    0 |
+
+Version changes
+---------------
+- 0.2.6 : fixed an error in huffman example, by Ulrich Kunitz
+- 0.2.5 : minor typos and clarifications
+- 0.2.4 : section restructuring, by Sean Purcell
+- 0.2.3 : clarified several details, by Sean Purcell
+- 0.2.2 : added predefined codes, by Johannes Rudolph
+- 0.2.1 : clarify field names, by Przemyslaw Skibinski
+- 0.2.0 : numerous format adjustments for zstd v0.8+
+- 0.1.2 : limit Huffman tree depth to 11 bits
+- 0.1.1 : reserved dictID ranges
+- 0.1.0 : initial release
diff --git a/src/zstd/doc/zstd_manual.html b/src/zstd/doc/zstd_manual.html
new file mode 100644
index 00000000..b4720ada
--- /dev/null
+++ b/src/zstd/doc/zstd_manual.html
@@ -0,0 +1,1200 @@
+<html>
+<head>
+<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
+<title>zstd 1.3.2 Manual</title>
+</head>
+<body>
+<h1>zstd 1.3.2 Manual</h1>
+<hr>
+<a name="Contents"></a><h2>Contents</h2>
+<ol>
+<li><a href="#Chapter1">Introduction</a></li>
+<li><a href="#Chapter2">Version</a></li>
+<li><a href="#Chapter3">Simple API</a></li>
+<li><a href="#Chapter4">Explicit memory management</a></li>
+<li><a href="#Chapter5">Simple dictionary API</a></li>
+<li><a href="#Chapter6">Bulk processing dictionary API</a></li>
+<li><a href="#Chapter7">Streaming</a></li>
+<li><a href="#Chapter8">Streaming compression - HowTo</a></li>
+<li><a href="#Chapter9">Streaming decompression - HowTo</a></li>
+<li><a href="#Chapter10">START OF ADVANCED AND EXPERIMENTAL FUNCTIONS</a></li>
+<li><a href="#Chapter11">Advanced types</a></li>
+<li><a href="#Chapter12">Frame size functions</a></li>
+<li><a href="#Chapter13">Context memory usage</a></li>
+<li><a href="#Chapter14">Advanced compression functions</a></li>
+<li><a href="#Chapter15">Advanced decompression functions</a></li>
+<li><a href="#Chapter16">Advanced streaming functions</a></li>
+<li><a href="#Chapter17">Buffer-less and synchronous inner streaming functions</a></li>
+<li><a href="#Chapter18">Buffer-less streaming compression (synchronous mode)</a></li>
+<li><a href="#Chapter19">Buffer-less streaming decompression (synchronous mode)</a></li>
+<li><a href="#Chapter20">New advanced API (experimental)</a></li>
+<li><a href="#Chapter21">Block level API</a></li>
+</ol>
+<hr>
+<a name="Chapter1"></a><h2>Introduction</h2><pre>
+  zstd, short for Zstandard, is a fast lossless compression algorithm,
+  targeting real-time compression scenarios at zlib-level and better compression ratios.
+  The zstd compression library provides in-memory compression and decompression functions.
+  The library supports compression levels from 1 up to ZSTD_maxCLevel() which is currently 22.
+  Levels >= 20, labeled `--ultra`, should be used with caution, as they require more memory.
+  Compression can be done in:
+    - a single step (described as Simple API)
+    - a single step, reusing a context (described as Explicit memory management)
+    - unbounded multiple steps (described as Streaming compression)
+  The compression ratio achievable on small data can be highly improved using a dictionary in:
+    - a single step (described as Simple dictionary API)
+    - a single step, reusing a dictionary (described as Fast dictionary API)
+
+  Advanced experimental functions can be accessed using #define ZSTD_STATIC_LINKING_ONLY before including zstd.h.
+  Advanced experimental APIs shall never be used with a dynamic library.
+  They are not "stable", their definition may change in the future. Only static linking is allowed.
+<BR></pre>
+
+<a name="Chapter2"></a><h2>Version</h2><pre></pre>
+
+<pre><b>unsigned ZSTD_versionNumber(void);   </b>/**< useful to check dll version */<b>
+</b></pre><BR>
+<a name="Chapter3"></a><h2>Simple API</h2><pre></pre>
+
+<pre><b>size_t ZSTD_compress( void* dst, size_t dstCapacity,
+                const void* src, size_t srcSize,
+                      int compressionLevel);
+</b><p>  Compresses `src` content as a single zstd compressed frame into already allocated `dst`.
+  Hint : compression runs faster if `dstCapacity` >=  `ZSTD_compressBound(srcSize)`.
+  @return : compressed size written into `dst` (<= `dstCapacity),
+            or an error code if it fails (which can be tested using ZSTD_isError()). 
+</p></pre><BR>
+
+<pre><b>size_t ZSTD_decompress( void* dst, size_t dstCapacity,
+                  const void* src, size_t compressedSize);
+</b><p>  `compressedSize` : must be the _exact_ size of some number of compressed and/or skippable frames.
+  `dstCapacity` is an upper bound of originalSize to regenerate.
+  If user cannot imply a maximum upper bound, it's better to use streaming mode to decompress data.
+  @return : the number of bytes decompressed into `dst` (<= `dstCapacity`),
+            or an errorCode if it fails (which can be tested using ZSTD_isError()). 
+</p></pre><BR>
+
+<pre><b>#define ZSTD_CONTENTSIZE_UNKNOWN (0ULL - 1)
+#define ZSTD_CONTENTSIZE_ERROR   (0ULL - 2)
+unsigned long long ZSTD_getFrameContentSize(const void *src, size_t srcSize);
+</b><p>  `src` should point to the start of a ZSTD encoded frame.
+  `srcSize` must be at least as large as the frame header.
+            hint : any size >= `ZSTD_frameHeaderSize_max` is large enough.
+  @return : - decompressed size of the frame in `src`, if known
+            - ZSTD_CONTENTSIZE_UNKNOWN if the size cannot be determined
+            - ZSTD_CONTENTSIZE_ERROR if an error occurred (e.g. invalid magic number, srcSize too small)
+   note 1 : a 0 return value means the frame is valid but "empty".
+   note 2 : decompressed size is an optional field, it may not be present, typically in streaming mode.
+            When `return==ZSTD_CONTENTSIZE_UNKNOWN`, data to decompress could be any size.
+            In which case, it's necessary to use streaming mode to decompress data.
+            Optionally, application can rely on some implicit limit,
+            as ZSTD_decompress() only needs an upper bound of decompressed size.
+            (For example, data could be necessarily cut into blocks <= 16 KB).
+   note 3 : decompressed size is always present when compression is done with ZSTD_compress()
+   note 4 : decompressed size can be very large (64-bits value),
+            potentially larger than what local system can handle as a single memory segment.
+            In which case, it's necessary to use streaming mode to decompress data.
+   note 5 : If source is untrusted, decompressed size could be wrong or intentionally modified.
+            Always ensure return value fits within application's authorized limits.
+            Each application can set its own limits.
+   note 6 : This function replaces ZSTD_getDecompressedSize() 
+</p></pre><BR>
+
+<pre><b>unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize);
+</b><p>  NOTE: This function is now obsolete, in favor of ZSTD_getFrameContentSize().
+  Both functions work the same way,
+  but ZSTD_getDecompressedSize() blends
+  "empty", "unknown" and "error" results in the same return value (0),
+  while ZSTD_getFrameContentSize() distinguishes them.
+
+  'src' is the start of a zstd compressed frame.
+  @return : content size to be decompressed, as a 64-bits value _if known and not empty_, 0 otherwise. 
+</p></pre><BR>
+
+<h3>Helper functions</h3><pre></pre><b><pre>#define ZSTD_COMPRESSBOUND(srcSize)   ((srcSize) + ((srcSize)>>8) + (((srcSize) < 128 KB) ? ((128 KB - (srcSize)) >> 11) </b>/* margin, from 64 to 0 */ : 0))  /* this formula ensures that bound(A) + bound(B) <= bound(A+B) as long as A and B >= 128 KB */<b>
+size_t      ZSTD_compressBound(size_t srcSize); </b>/*!< maximum compressed size in worst case scenario */<b>
+unsigned    ZSTD_isError(size_t code);          </b>/*!< tells if a `size_t` function result is an error code */<b>
+const char* ZSTD_getErrorName(size_t code);     </b>/*!< provides readable string from an error code */<b>
+int         ZSTD_maxCLevel(void);               </b>/*!< maximum compression level available */<b>
+</pre></b><BR>
+<a name="Chapter4"></a><h2>Explicit memory management</h2><pre></pre>
+
+<h3>Compression context</h3><pre>  When compressing many times,
+  it is recommended to allocate a context just once, and re-use it for each successive compression operation.
+  This will make workload friendlier for system's memory.
+  Use one context per thread for parallel execution in multi-threaded environments. 
+</pre><b><pre>typedef struct ZSTD_CCtx_s ZSTD_CCtx;
+ZSTD_CCtx* ZSTD_createCCtx(void);
+size_t     ZSTD_freeCCtx(ZSTD_CCtx* cctx);
+</pre></b><BR>
+<pre><b>size_t ZSTD_compressCCtx(ZSTD_CCtx* ctx,
+                         void* dst, size_t dstCapacity,
+                   const void* src, size_t srcSize,
+                         int compressionLevel);
+</b><p>  Same as ZSTD_compress(), requires an allocated ZSTD_CCtx (see ZSTD_createCCtx()). 
+</p></pre><BR>
+
+<h3>Decompression context</h3><pre>  When decompressing many times,
+  it is recommended to allocate a context only once,
+  and re-use it for each successive compression operation.
+  This will make workload friendlier for system's memory.
+  Use one context per thread for parallel execution. 
+</pre><b><pre>typedef struct ZSTD_DCtx_s ZSTD_DCtx;
+ZSTD_DCtx* ZSTD_createDCtx(void);
+size_t     ZSTD_freeDCtx(ZSTD_DCtx* dctx);
+</pre></b><BR>
+<pre><b>size_t ZSTD_decompressDCtx(ZSTD_DCtx* ctx,
+                           void* dst, size_t dstCapacity,
+                     const void* src, size_t srcSize);
+</b><p>  Same as ZSTD_decompress(), requires an allocated ZSTD_DCtx (see ZSTD_createDCtx()) 
+</p></pre><BR>
+
+<a name="Chapter5"></a><h2>Simple dictionary API</h2><pre></pre>
+
+<pre><b>size_t ZSTD_compress_usingDict(ZSTD_CCtx* ctx,
+                               void* dst, size_t dstCapacity,
+                         const void* src, size_t srcSize,
+                         const void* dict,size_t dictSize,
+                               int compressionLevel);
+</b><p>  Compression using a predefined Dictionary (see dictBuilder/zdict.h).
+  Note : This function loads the dictionary, resulting in significant startup delay.
+  Note : When `dict == NULL || dictSize < 8` no dictionary is used. 
+</p></pre><BR>
+
+<pre><b>size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx,
+                                 void* dst, size_t dstCapacity,
+                           const void* src, size_t srcSize,
+                           const void* dict,size_t dictSize);
+</b><p>  Decompression using a predefined Dictionary (see dictBuilder/zdict.h).
+  Dictionary must be identical to the one used during compression.
+  Note : This function loads the dictionary, resulting in significant startup delay.
+  Note : When `dict == NULL || dictSize < 8` no dictionary is used. 
+</p></pre><BR>
+
+<a name="Chapter6"></a><h2>Bulk processing dictionary API</h2><pre></pre>
+
+<pre><b>ZSTD_CDict* ZSTD_createCDict(const void* dictBuffer, size_t dictSize,
+                             int compressionLevel);
+</b><p>  When compressing multiple messages / blocks with the same dictionary, it's recommended to load it just once.
+  ZSTD_createCDict() will create a digested dictionary, ready to start future compression operations without startup delay.
+  ZSTD_CDict can be created once and shared by multiple threads concurrently, since its usage is read-only.
+  `dictBuffer` can be released after ZSTD_CDict creation, since its content is copied within CDict 
+</p></pre><BR>
+
+<pre><b>size_t      ZSTD_freeCDict(ZSTD_CDict* CDict);
+</b><p>  Function frees memory allocated by ZSTD_createCDict(). 
+</p></pre><BR>
+
+<pre><b>size_t ZSTD_compress_usingCDict(ZSTD_CCtx* cctx,
+                                void* dst, size_t dstCapacity,
+                          const void* src, size_t srcSize,
+                          const ZSTD_CDict* cdict);
+</b><p>  Compression using a digested Dictionary.
+  Faster startup than ZSTD_compress_usingDict(), recommended when same dictionary is used multiple times.
+  Note that compression level is decided during dictionary creation.
+  Frame parameters are hardcoded (dictID=yes, contentSize=yes, checksum=no) 
+</p></pre><BR>
+
+<pre><b>ZSTD_DDict* ZSTD_createDDict(const void* dictBuffer, size_t dictSize);
+</b><p>  Create a digested dictionary, ready to start decompression operation without startup delay.
+  dictBuffer can be released after DDict creation, as its content is copied inside DDict 
+</p></pre><BR>
+
+<pre><b>size_t      ZSTD_freeDDict(ZSTD_DDict* ddict);
+</b><p>  Function frees memory allocated with ZSTD_createDDict() 
+</p></pre><BR>
+
+<pre><b>size_t ZSTD_decompress_usingDDict(ZSTD_DCtx* dctx,
+                                  void* dst, size_t dstCapacity,
+                            const void* src, size_t srcSize,
+                            const ZSTD_DDict* ddict);
+</b><p>  Decompression using a digested Dictionary.
+  Faster startup than ZSTD_decompress_usingDict(), recommended when same dictionary is used multiple times. 
+</p></pre><BR>
+
+<a name="Chapter7"></a><h2>Streaming</h2><pre></pre>
+
+<pre><b>typedef struct ZSTD_inBuffer_s {
+  const void* src;    </b>/**< start of input buffer */<b>
+  size_t size;        </b>/**< size of input buffer */<b>
+  size_t pos;         </b>/**< position where reading stopped. Will be updated. Necessarily 0 <= pos <= size */<b>
+} ZSTD_inBuffer;
+</b></pre><BR>
+<pre><b>typedef struct ZSTD_outBuffer_s {
+  void*  dst;         </b>/**< start of output buffer */<b>
+  size_t size;        </b>/**< size of output buffer */<b>
+  size_t pos;         </b>/**< position where writing stopped. Will be updated. Necessarily 0 <= pos <= size */<b>
+} ZSTD_outBuffer;
+</b></pre><BR>
+<a name="Chapter8"></a><h2>Streaming compression - HowTo</h2><pre>
+  A ZSTD_CStream object is required to track streaming operation.
+  Use ZSTD_createCStream() and ZSTD_freeCStream() to create/release resources.
+  ZSTD_CStream objects can be reused multiple times on consecutive compression operations.
+  It is recommended to re-use ZSTD_CStream in situations where many streaming operations will be achieved consecutively,
+  since it will play nicer with system's memory, by re-using already allocated memory.
+  Use one separate ZSTD_CStream per thread for parallel execution.
+
+  Start a new compression by initializing ZSTD_CStream.
+  Use ZSTD_initCStream() to start a new compression operation.
+  Use ZSTD_initCStream_usingDict() or ZSTD_initCStream_usingCDict() for a compression which requires a dictionary (experimental section)
+
+  Use ZSTD_compressStream() repetitively to consume input stream.
+  The function will automatically update both `pos` fields.
+  Note that it may not consume the entire input, in which case `pos < size`,
+  and it's up to the caller to present again remaining data.
+  @return : a size hint, preferred nb of bytes to use as input for next function call
+            or an error code, which can be tested using ZSTD_isError().
+            Note 1 : it's just a hint, to help latency a little, any other value will work fine.
+            Note 2 : size hint is guaranteed to be <= ZSTD_CStreamInSize()
+
+  At any moment, it's possible to flush whatever data remains within internal buffer, using ZSTD_flushStream().
+  `output->pos` will be updated.
+  Note that some content might still be left within internal buffer if `output->size` is too small.
+  @return : nb of bytes still present within internal buffer (0 if it's empty)
+            or an error code, which can be tested using ZSTD_isError().
+
+  ZSTD_endStream() instructs to finish a frame.
+  It will perform a flush and write frame epilogue.
+  The epilogue is required for decoders to consider a frame completed.
+  ZSTD_endStream() may not be able to flush full data if `output->size` is too small.
+  In which case, call again ZSTD_endStream() to complete the flush.
+  @return : 0 if frame fully completed and fully flushed,
+             or >0 if some data is still present within internal buffer
+                  (value is minimum size estimation for remaining data to flush, but it could be more)
+            or an error code, which can be tested using ZSTD_isError().
+
+ 
+<BR></pre>
+
+<pre><b>typedef ZSTD_CCtx ZSTD_CStream;  </b>/**< CCtx and CStream are now effectively same object (>= v1.3.0) */<b>
+</b></pre><BR>
+<h3>ZSTD_CStream management functions</h3><pre></pre><b><pre>ZSTD_CStream* ZSTD_createCStream(void);
+size_t ZSTD_freeCStream(ZSTD_CStream* zcs);
+</pre></b><BR>
+<h3>Streaming compression functions</h3><pre></pre><b><pre>size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel);
+size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input);
+size_t ZSTD_flushStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output);
+size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output);
+</pre></b><BR>
+<pre><b>size_t ZSTD_CStreamInSize(void);    </b>/**< recommended size for input buffer */<b>
+</b></pre><BR>
+<pre><b>size_t ZSTD_CStreamOutSize(void);   </b>/**< recommended size for output buffer. Guarantee to successfully flush at least one complete compressed block in all circumstances. */<b>
+</b></pre><BR>
+<a name="Chapter9"></a><h2>Streaming decompression - HowTo</h2><pre>
+  A ZSTD_DStream object is required to track streaming operations.
+  Use ZSTD_createDStream() and ZSTD_freeDStream() to create/release resources.
+  ZSTD_DStream objects can be re-used multiple times.
+
+  Use ZSTD_initDStream() to start a new decompression operation,
+   or ZSTD_initDStream_usingDict() if decompression requires a dictionary.
+   @return : recommended first input size
+
+  Use ZSTD_decompressStream() repetitively to consume your input.
+  The function will update both `pos` fields.
+  If `input.pos < input.size`, some input has not been consumed.
+  It's up to the caller to present again remaining data.
+  If `output.pos < output.size`, decoder has flushed everything it could.
+  @return : 0 when a frame is completely decoded and fully flushed,
+            an error code, which can be tested using ZSTD_isError(),
+            any other value > 0, which means there is still some decoding to do to complete current frame.
+            The return value is a suggested next input size (a hint to improve latency) that will never load more than the current frame.
+ 
+<BR></pre>
+
+<pre><b>typedef ZSTD_DCtx ZSTD_DStream;  </b>/**< DCtx and DStream are now effectively same object (>= v1.3.0) */<b>
+</b></pre><BR>
+<h3>ZSTD_DStream management functions</h3><pre></pre><b><pre>ZSTD_DStream* ZSTD_createDStream(void);
+size_t ZSTD_freeDStream(ZSTD_DStream* zds);
+</pre></b><BR>
+<h3>Streaming decompression functions</h3><pre></pre><b><pre>size_t ZSTD_initDStream(ZSTD_DStream* zds);
+size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inBuffer* input);
+</pre></b><BR>
+<pre><b>size_t ZSTD_DStreamInSize(void);    </b>/*!< recommended size for input buffer */<b>
+</b></pre><BR>
+<pre><b>size_t ZSTD_DStreamOutSize(void);   </b>/*!< recommended size for output buffer. Guarantee to successfully flush at least one complete block in all circumstances. */<b>
+</b></pre><BR>
+<a name="Chapter10"></a><h2>START OF ADVANCED AND EXPERIMENTAL FUNCTIONS</h2><pre> The definitions in this section are considered experimental.
+ They should never be used with a dynamic library, as prototypes may change in the future.
+ They are provided for advanced scenarios.
+ Use them only in association with static linking.
+ 
+<BR></pre>
+
+<a name="Chapter11"></a><h2>Advanced types</h2><pre></pre>
+
+<pre><b>typedef enum { ZSTD_fast=1, ZSTD_dfast, ZSTD_greedy, ZSTD_lazy, ZSTD_lazy2,
+               ZSTD_btlazy2, ZSTD_btopt, ZSTD_btultra } ZSTD_strategy;   </b>/* from faster to stronger */<b>
+</b></pre><BR>
+<pre><b>typedef struct {
+    unsigned windowLog;      </b>/**< largest match distance : larger == more compression, more memory needed during decompression */<b>
+    unsigned chainLog;       </b>/**< fully searched segment : larger == more compression, slower, more memory (useless for fast) */<b>
+    unsigned hashLog;        </b>/**< dispatch table : larger == faster, more memory */<b>
+    unsigned searchLog;      </b>/**< nb of searches : larger == more compression, slower */<b>
+    unsigned searchLength;   </b>/**< match length searched : larger == faster decompression, sometimes less compression */<b>
+    unsigned targetLength;   </b>/**< acceptable match size for optimal parser (only) : larger == more compression, slower */<b>
+    ZSTD_strategy strategy;
+} ZSTD_compressionParameters;
+</b></pre><BR>
+<pre><b>typedef struct {
+    unsigned contentSizeFlag; </b>/**< 1: content size will be in frame header (when known) */<b>
+    unsigned checksumFlag;    </b>/**< 1: generate a 32-bits checksum at end of frame, for error detection */<b>
+    unsigned noDictIDFlag;    </b>/**< 1: no dictID will be saved into frame header (if dictionary compression) */<b>
+} ZSTD_frameParameters;
+</b></pre><BR>
+<pre><b>typedef struct {
+    ZSTD_compressionParameters cParams;
+    ZSTD_frameParameters fParams;
+} ZSTD_parameters;
+</b></pre><BR>
+<h3>Custom memory allocation functions</h3><pre></pre><b><pre>typedef void* (*ZSTD_allocFunction) (void* opaque, size_t size);
+typedef void  (*ZSTD_freeFunction) (void* opaque, void* address);
+typedef struct { ZSTD_allocFunction customAlloc; ZSTD_freeFunction customFree; void* opaque; } ZSTD_customMem;
+</b>/* use this constant to defer to stdlib's functions */<b>
+static const ZSTD_customMem ZSTD_defaultCMem = { NULL, NULL, NULL };
+</pre></b><BR>
+<a name="Chapter12"></a><h2>Frame size functions</h2><pre></pre>
+
+<pre><b>size_t ZSTD_findFrameCompressedSize(const void* src, size_t srcSize);
+</b><p>  `src` should point to the start of a ZSTD encoded frame or skippable frame
+  `srcSize` must be at least as large as the frame
+  @return : the compressed size of the first frame starting at `src`,
+            suitable to pass to `ZSTD_decompress` or similar,
+            or an error code if input is invalid 
+</p></pre><BR>
+
+<pre><b>unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize);
+</b><p>  `src` should point the start of a series of ZSTD encoded and/or skippable frames
+  `srcSize` must be the _exact_ size of this series
+       (i.e. there should be a frame boundary exactly at `srcSize` bytes after `src`)
+  @return : - decompressed size of all data in all successive frames
+            - if the decompressed size cannot be determined: ZSTD_CONTENTSIZE_UNKNOWN
+            - if an error occurred: ZSTD_CONTENTSIZE_ERROR
+
+   note 1 : decompressed size is an optional field, that may not be present, especially in streaming mode.
+            When `return==ZSTD_CONTENTSIZE_UNKNOWN`, data to decompress could be any size.
+            In which case, it's necessary to use streaming mode to decompress data.
+   note 2 : decompressed size is always present when compression is done with ZSTD_compress()
+   note 3 : decompressed size can be very large (64-bits value),
+            potentially larger than what local system can handle as a single memory segment.
+            In which case, it's necessary to use streaming mode to decompress data.
+   note 4 : If source is untrusted, decompressed size could be wrong or intentionally modified.
+            Always ensure result fits within application's authorized limits.
+            Each application can set its own limits.
+   note 5 : ZSTD_findDecompressedSize handles multiple frames, and so it must traverse the input to
+            read each contained frame header.  This is fast as most of the data is skipped,
+            however it does mean that all frame data must be present and valid. 
+</p></pre><BR>
+
+<pre><b>size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize);
+</b><p>   `src` should point to the start of a ZSTD frame
+   `srcSize` must be >= ZSTD_frameHeaderSize_prefix.
+   @return : size of the Frame Header 
+</p></pre><BR>
+
+<a name="Chapter13"></a><h2>Context memory usage</h2><pre></pre>
+
+<pre><b>size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx);
+size_t ZSTD_sizeof_DCtx(const ZSTD_DCtx* dctx);
+size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs);
+size_t ZSTD_sizeof_DStream(const ZSTD_DStream* zds);
+size_t ZSTD_sizeof_CDict(const ZSTD_CDict* cdict);
+size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
+</b><p>  These functions give the current memory usage of selected object.
+  Object memory usage can evolve when re-used multiple times. 
+</p></pre><BR>
+
+<pre><b>size_t ZSTD_estimateCCtxSize(int compressionLevel);
+size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams);
+size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params);
+size_t ZSTD_estimateDCtxSize(void);
+</b><p>  These functions make it possible to estimate memory usage
+  of a future {D,C}Ctx, before its creation.
+  ZSTD_estimateCCtxSize() will provide a budget large enough for any compression level up to selected one.
+  It will also consider src size to be arbitrarily "large", which is worst case.
+  If srcSize is known to always be small, ZSTD_estimateCCtxSize_usingCParams() can provide a tighter estimation.
+  ZSTD_estimateCCtxSize_usingCParams() can be used in tandem with ZSTD_getCParams() to create cParams from compressionLevel.
+  ZSTD_estimateCCtxSize_usingCCtxParams() can be used in tandem with ZSTD_CCtxParam_setParameter(). Only single-threaded compression is supported. This function will return an error code if ZSTD_p_nbThreads is > 1.
+  Note : CCtx estimation is only correct for single-threaded compression 
+</p></pre><BR>
+
+<pre><b>size_t ZSTD_estimateCStreamSize(int compressionLevel);
+size_t ZSTD_estimateCStreamSize_usingCParams(ZSTD_compressionParameters cParams);
+size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params);
+size_t ZSTD_estimateDStreamSize(size_t windowSize);
+size_t ZSTD_estimateDStreamSize_fromFrame(const void* src, size_t srcSize);
+</b><p>  ZSTD_estimateCStreamSize() will provide a budget large enough for any compression level up to selected one.
+  It will also consider src size to be arbitrarily "large", which is worst case.
+  If srcSize is known to always be small, ZSTD_estimateCStreamSize_usingCParams() can provide a tighter estimation.
+  ZSTD_estimateCStreamSize_usingCParams() can be used in tandem with ZSTD_getCParams() to create cParams from compressionLevel.
+  ZSTD_estimateCStreamSize_usingCCtxParams() can be used in tandem with ZSTD_CCtxParam_setParameter(). Only single-threaded compression is supported. This function will return an error code if ZSTD_p_nbThreads is set to a value > 1.
+  Note : CStream estimation is only correct for single-threaded compression.
+  ZSTD_DStream memory budget depends on window Size.
+  This information can be passed manually, using ZSTD_estimateDStreamSize,
+  or deducted from a valid frame Header, using ZSTD_estimateDStreamSize_fromFrame();
+  Note : if streaming is init with function ZSTD_init?Stream_usingDict(),
+         an internal ?Dict will be created, which additional size is not estimated here.
+         In this case, get total size by adding ZSTD_estimate?DictSize 
+</p></pre><BR>
+
+<pre><b>typedef enum {
+    ZSTD_dlm_byCopy = 0,     </b>/**< Copy dictionary content internally */<b>
+    ZSTD_dlm_byRef,          </b>/**< Reference dictionary content -- the dictionary buffer must outlive its users. */<b>
+} ZSTD_dictLoadMethod_e;
+</b></pre><BR>
+<pre><b>size_t ZSTD_estimateCDictSize(size_t dictSize, int compressionLevel);
+size_t ZSTD_estimateCDictSize_advanced(size_t dictSize, ZSTD_compressionParameters cParams, ZSTD_dictLoadMethod_e dictLoadMethod);
+size_t ZSTD_estimateDDictSize(size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod);
+</b><p>  ZSTD_estimateCDictSize() will bet that src size is relatively "small", and content is copied, like ZSTD_createCDict().
+  ZSTD_estimateCStreamSize_advanced_usingCParams() makes it possible to control precisely compression parameters, like ZSTD_createCDict_advanced().
+  Note : dictionary created by reference using ZSTD_dlm_byRef are smaller
+ 
+</p></pre><BR>
+
+<a name="Chapter14"></a><h2>Advanced compression functions</h2><pre></pre>
+
+<pre><b>ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD_customMem customMem);
+</b><p>  Create a ZSTD compression context using external alloc and free functions 
+</p></pre><BR>
+
+<pre><b>ZSTD_CCtx* ZSTD_initStaticCCtx(void* workspace, size_t workspaceSize);
+</b><p>  workspace: The memory area to emplace the context into.
+             Provided pointer must 8-bytes aligned.
+             It must outlive context usage.
+  workspaceSize: Use ZSTD_estimateCCtxSize() or ZSTD_estimateCStreamSize()
+                 to determine how large workspace must be to support scenario.
+ @return : pointer to ZSTD_CCtx*, or NULL if error (size too small)
+  Note : zstd will never resize nor malloc() when using a static cctx.
+         If it needs more memory than available, it will simply error out.
+  Note 2 : there is no corresponding "free" function.
+           Since workspace was allocated externally, it must be freed externally too.
+  Limitation 1 : currently not compatible with internal CDict creation, such as
+                 ZSTD_CCtx_loadDictionary() or ZSTD_initCStream_usingDict().
+  Limitation 2 : currently not compatible with multi-threading
+ 
+</p></pre><BR>
+
+<pre><b>ZSTD_CDict* ZSTD_createCDict_byReference(const void* dictBuffer, size_t dictSize, int compressionLevel);
+</b><p>  Create a digested dictionary for compression
+  Dictionary content is simply referenced, and therefore stays in dictBuffer.
+  It is important that dictBuffer outlives CDict, it must remain read accessible throughout the lifetime of CDict 
+</p></pre><BR>
+
+<pre><b>typedef enum { ZSTD_dm_auto=0,        </b>/* dictionary is "full" if it starts with ZSTD_MAGIC_DICTIONARY, otherwise it is "rawContent" */<b>
+               ZSTD_dm_rawContent,    </b>/* ensures dictionary is always loaded as rawContent, even if it starts with ZSTD_MAGIC_DICTIONARY */<b>
+               ZSTD_dm_fullDict       </b>/* refuses to load a dictionary if it does not respect Zstandard's specification */<b>
+} ZSTD_dictMode_e;
+</b></pre><BR>
+<pre><b>ZSTD_CDict* ZSTD_createCDict_advanced(const void* dict, size_t dictSize,
+                                      ZSTD_dictLoadMethod_e dictLoadMethod,
+                                      ZSTD_dictMode_e dictMode,
+                                      ZSTD_compressionParameters cParams,
+                                      ZSTD_customMem customMem);
+</b><p>  Create a ZSTD_CDict using external alloc and free, and customized compression parameters 
+</p></pre><BR>
+
+<pre><b>ZSTD_CDict* ZSTD_initStaticCDict(
+                void* workspace, size_t workspaceSize,
+          const void* dict, size_t dictSize,
+                ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictMode_e dictMode,
+                ZSTD_compressionParameters cParams);
+</b><p>  Generate a digested dictionary in provided memory area.
+  workspace: The memory area to emplace the dictionary into.
+             Provided pointer must 8-bytes aligned.
+             It must outlive dictionary usage.
+  workspaceSize: Use ZSTD_estimateCDictSize()
+                 to determine how large workspace must be.
+  cParams : use ZSTD_getCParams() to transform a compression level
+            into its relevants cParams.
+ @return : pointer to ZSTD_CDict*, or NULL if error (size too small)
+  Note : there is no corresponding "free" function.
+         Since workspace was allocated externally, it must be freed externally.
+ 
+</p></pre><BR>
+
+<pre><b>ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long estimatedSrcSize, size_t dictSize);
+</b><p>   @return ZSTD_compressionParameters structure for a selected compression level and estimated srcSize.
+   `estimatedSrcSize` value is optional, select 0 if not known 
+</p></pre><BR>
+
+<pre><b>ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long estimatedSrcSize, size_t dictSize);
+</b><p>   same as ZSTD_getCParams(), but @return a full `ZSTD_parameters` object instead of sub-component `ZSTD_compressionParameters`.
+   All fields of `ZSTD_frameParameters` are set to default (0) 
+</p></pre><BR>
+
+<pre><b>size_t ZSTD_checkCParams(ZSTD_compressionParameters params);
+</b><p>   Ensure param values remain within authorized range 
+</p></pre><BR>
+
+<pre><b>ZSTD_compressionParameters ZSTD_adjustCParams(ZSTD_compressionParameters cPar, unsigned long long srcSize, size_t dictSize);
+</b><p>  optimize params for a given `srcSize` and `dictSize`.
+  both values are optional, select `0` if unknown. 
+</p></pre><BR>
+
+<pre><b>size_t ZSTD_compress_advanced (ZSTD_CCtx* cctx,
+                      void* dst, size_t dstCapacity,
+                const void* src, size_t srcSize,
+                const void* dict,size_t dictSize,
+                      ZSTD_parameters params);
+</b><p>   Same as ZSTD_compress_usingDict(), with fine-tune control over each compression parameter 
+</p></pre><BR>
+
+<pre><b>size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx,
+                      void* dst, size_t dstCapacity,
+                const void* src, size_t srcSize,
+                const ZSTD_CDict* cdict, ZSTD_frameParameters fParams);
+</b><p>   Same as ZSTD_compress_usingCDict(), with fine-tune control over frame parameters 
+</p></pre><BR>
+
+<a name="Chapter15"></a><h2>Advanced decompression functions</h2><pre></pre>
+
+<pre><b>unsigned ZSTD_isFrame(const void* buffer, size_t size);
+</b><p>  Tells if the content of `buffer` starts with a valid Frame Identifier.
+  Note : Frame Identifier is 4 bytes. If `size < 4`, @return will always be 0.
+  Note 2 : Legacy Frame Identifiers are considered valid only if Legacy Support is enabled.
+  Note 3 : Skippable Frame Identifiers are considered valid. 
+</p></pre><BR>
+
+<pre><b>ZSTD_DCtx* ZSTD_createDCtx_advanced(ZSTD_customMem customMem);
+</b><p>  Create a ZSTD decompression context using external alloc and free functions 
+</p></pre><BR>
+
+<pre><b>ZSTD_DCtx* ZSTD_initStaticDCtx(void* workspace, size_t workspaceSize);
+</b><p>  workspace: The memory area to emplace the context into.
+             Provided pointer must 8-bytes aligned.
+             It must outlive context usage.
+  workspaceSize: Use ZSTD_estimateDCtxSize() or ZSTD_estimateDStreamSize()
+                 to determine how large workspace must be to support scenario.
+ @return : pointer to ZSTD_DCtx*, or NULL if error (size too small)
+  Note : zstd will never resize nor malloc() when using a static dctx.
+         If it needs more memory than available, it will simply error out.
+  Note 2 : static dctx is incompatible with legacy support
+  Note 3 : there is no corresponding "free" function.
+           Since workspace was allocated externally, it must be freed externally.
+  Limitation : currently not compatible with internal DDict creation,
+               such as ZSTD_initDStream_usingDict().
+ 
+</p></pre><BR>
+
+<pre><b>ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize);
+</b><p>  Create a digested dictionary, ready to start decompression operation without startup delay.
+  Dictionary content is referenced, and therefore stays in dictBuffer.
+  It is important that dictBuffer outlives DDict,
+  it must remain read accessible throughout the lifetime of DDict 
+</p></pre><BR>
+
+<pre><b>ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize,
+                                      ZSTD_dictLoadMethod_e dictLoadMethod,
+                                      ZSTD_customMem customMem);
+</b><p>  Create a ZSTD_DDict using external alloc and free, optionally by reference 
+</p></pre><BR>
+
+<pre><b>ZSTD_DDict* ZSTD_initStaticDDict(void* workspace, size_t workspaceSize,
+                                 const void* dict, size_t dictSize,
+                                 ZSTD_dictLoadMethod_e dictLoadMethod);
+</b><p>  Generate a digested dictionary in provided memory area.
+  workspace: The memory area to emplace the dictionary into.
+             Provided pointer must 8-bytes aligned.
+             It must outlive dictionary usage.
+  workspaceSize: Use ZSTD_estimateDDictSize()
+                 to determine how large workspace must be.
+ @return : pointer to ZSTD_DDict*, or NULL if error (size too small)
+  Note : there is no corresponding "free" function.
+         Since workspace was allocated externally, it must be freed externally.
+ 
+</p></pre><BR>
+
+<pre><b>unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize);
+</b><p>  Provides the dictID stored within dictionary.
+  if @return == 0, the dictionary is not conformant with Zstandard specification.
+  It can still be loaded, but as a content-only dictionary. 
+</p></pre><BR>
+
+<pre><b>unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict);
+</b><p>  Provides the dictID of the dictionary loaded into `ddict`.
+  If @return == 0, the dictionary is not conformant to Zstandard specification, or empty.
+  Non-conformant dictionaries can still be loaded, but as content-only dictionaries. 
+</p></pre><BR>
+
+<pre><b>unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize);
+</b><p>  Provides the dictID required to decompressed the frame stored within `src`.
+  If @return == 0, the dictID could not be decoded.
+  This could for one of the following reasons :
+  - The frame does not require a dictionary to be decoded (most common case).
+  - The frame was built with dictID intentionally removed. Whatever dictionary is necessary is a hidden information.
+    Note : this use case also happens when using a non-conformant dictionary.
+  - `srcSize` is too small, and as a result, the frame header could not be decoded (only possible if `srcSize < ZSTD_FRAMEHEADERSIZE_MAX`).
+  - This is not a Zstandard frame.
+  When identifying the exact failure cause, it's possible to use ZSTD_getFrameHeader(), which will provide a more precise error code. 
+</p></pre><BR>
+
+<a name="Chapter16"></a><h2>Advanced streaming functions</h2><pre></pre>
+
+<h3>Advanced Streaming compression functions</h3><pre></pre><b><pre>ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem);
+ZSTD_CStream* ZSTD_initStaticCStream(void* workspace, size_t workspaceSize);    </b>/**< same as ZSTD_initStaticCCtx() */<b>
+size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLevel, unsigned long long pledgedSrcSize);   </b>/**< pledgedSrcSize must be correct, a size of 0 means unknown.  for a frame size of 0 use initCStream_advanced */<b>
+size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel); </b>/**< creates of an internal CDict (incompatible with static CCtx), except if dict == NULL or dictSize < 8, in which case no dict is used. Note: dict is loaded with ZSTD_dm_auto (treated as a full zstd dictionary if it begins with ZSTD_MAGIC_DICTIONARY, else as raw content) and ZSTD_dlm_byCopy.*/<b>
+size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs, const void* dict, size_t dictSize,
+                                             ZSTD_parameters params, unsigned long long pledgedSrcSize);  </b>/**< pledgedSrcSize is optional and can be 0 (meaning unknown). note: if the contentSizeFlag is set, pledgedSrcSize == 0 means the source size is actually 0. dict is loaded with ZSTD_dm_auto and ZSTD_dlm_byCopy. */<b>
+size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict);  </b>/**< note : cdict will just be referenced, and must outlive compression session */<b>
+size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs, const ZSTD_CDict* cdict, ZSTD_frameParameters fParams, unsigned long long pledgedSrcSize);  </b>/**< same as ZSTD_initCStream_usingCDict(), with control over frame parameters */<b>
+</pre></b><BR>
+<pre><b>size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize);
+</b><p>  start a new compression job, using same parameters from previous job.
+  This is typically useful to skip dictionary loading stage, since it will re-use it in-place..
+  Note that zcs must be init at least once before using ZSTD_resetCStream().
+  pledgedSrcSize==0 means "srcSize unknown".
+  If pledgedSrcSize > 0, its value must be correct, as it will be written in header, and controlled at the end.
+  @return : 0, or an error code (which can be tested using ZSTD_isError()) 
+</p></pre><BR>
+
+<h3>Advanced Streaming decompression functions</h3><pre></pre><b><pre>ZSTD_DStream* ZSTD_createDStream_advanced(ZSTD_customMem customMem);
+ZSTD_DStream* ZSTD_initStaticDStream(void* workspace, size_t workspaceSize);    </b>/**< same as ZSTD_initStaticDCtx() */<b>
+typedef enum { DStream_p_maxWindowSize } ZSTD_DStreamParameter_e;
+size_t ZSTD_setDStreamParameter(ZSTD_DStream* zds, ZSTD_DStreamParameter_e paramType, unsigned paramValue);   </b>/* obsolete : this API will be removed in a future version */<b>
+size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize); </b>/**< note: no dictionary will be used if dict == NULL or dictSize < 8 */<b>
+size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const ZSTD_DDict* ddict);  </b>/**< note : ddict is referenced, it must outlive decompression session */<b>
+size_t ZSTD_resetDStream(ZSTD_DStream* zds);  </b>/**< re-use decompression parameters from previous init; saves dictionary loading */<b>
+</pre></b><BR>
+<a name="Chapter17"></a><h2>Buffer-less and synchronous inner streaming functions</h2><pre>
+  This is an advanced API, giving full control over buffer management, for users which need direct control over memory.
+  But it's also a complex one, with several restrictions, documented below.
+  Prefer normal streaming API for an easier experience.
+ 
+<BR></pre>
+
+<a name="Chapter18"></a><h2>Buffer-less streaming compression (synchronous mode)</h2><pre>
+  A ZSTD_CCtx object is required to track streaming operations.
+  Use ZSTD_createCCtx() / ZSTD_freeCCtx() to manage resource.
+  ZSTD_CCtx object can be re-used multiple times within successive compression operations.
+
+  Start by initializing a context.
+  Use ZSTD_compressBegin(), or ZSTD_compressBegin_usingDict() for dictionary compression,
+  or ZSTD_compressBegin_advanced(), for finer parameter control.
+  It's also possible to duplicate a reference context which has already been initialized, using ZSTD_copyCCtx()
+
+  Then, consume your input using ZSTD_compressContinue().
+  There are some important considerations to keep in mind when using this advanced function :
+  - ZSTD_compressContinue() has no internal buffer. It uses externally provided buffers only.
+  - Interface is synchronous : input is consumed entirely and produces 1+ compressed blocks.
+  - Caller must ensure there is enough space in `dst` to store compressed data under worst case scenario.
+    Worst case evaluation is provided by ZSTD_compressBound().
+    ZSTD_compressContinue() doesn't guarantee recover after a failed compression.
+  - ZSTD_compressContinue() presumes prior input ***is still accessible and unmodified*** (up to maximum distance size, see WindowLog).
+    It remembers all previous contiguous blocks, plus one separated memory segment (which can itself consists of multiple contiguous blocks)
+  - ZSTD_compressContinue() detects that prior input has been overwritten when `src` buffer overlaps.
+    In which case, it will "discard" the relevant memory section from its history.
+
+  Finish a frame with ZSTD_compressEnd(), which will write the last block(s) and optional checksum.
+  It's possible to use srcSize==0, in which case, it will write a final empty block to end the frame.
+  Without last block mark, frames are considered unfinished (hence corrupted) by compliant decoders.
+
+  `ZSTD_CCtx` object can be re-used (ZSTD_compressBegin()) to compress again.
+<BR></pre>
+
+<h3>Buffer-less streaming compression functions</h3><pre></pre><b><pre>size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel);
+size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel);
+size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_parameters params, unsigned long long pledgedSrcSize); </b>/**< pledgedSrcSize is optional and can be 0 (meaning unknown). note: if the contentSizeFlag is set, pledgedSrcSize == 0 means the source size is actually 0 */<b>
+size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict); </b>/**< note: fails if cdict==NULL */<b>
+size_t ZSTD_compressBegin_usingCDict_advanced(ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict, ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize);   </b>/* compression parameters are already set within cdict. pledgedSrcSize=0 means null-size */<b>
+size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx, unsigned long long pledgedSrcSize); </b>/**<  note: if pledgedSrcSize can be 0, indicating unknown size.  if it is non-zero, it must be accurate.  for 0 size frames, use compressBegin_advanced */<b>
+</pre></b><BR>
+<a name="Chapter19"></a><h2>Buffer-less streaming decompression (synchronous mode)</h2><pre>
+  A ZSTD_DCtx object is required to track streaming operations.
+  Use ZSTD_createDCtx() / ZSTD_freeDCtx() to manage it.
+  A ZSTD_DCtx object can be re-used multiple times.
+
+  First typical operation is to retrieve frame parameters, using ZSTD_getFrameHeader().
+  Frame header is extracted from the beginning of compressed frame, so providing only the frame's beginning is enough.
+  Data fragment must be large enough to ensure successful decoding.
+ `ZSTD_frameHeaderSize_max` bytes is guaranteed to always be large enough.
+  @result : 0 : successful decoding, the `ZSTD_frameHeader` structure is correctly filled.
+           >0 : `srcSize` is too small, please provide at least @result bytes on next attempt.
+           errorCode, which can be tested using ZSTD_isError().
+
+  It fills a ZSTD_frameHeader structure with important information to correctly decode the frame,
+  such as the dictionary ID, content size, or maximum back-reference distance (`windowSize`).
+  Note that these values could be wrong, either because of data corruption, or because a 3rd party deliberately spoofs false information.
+  As a consequence, check that values remain within valid application range.
+  For example, do not allocate memory blindly, check that `windowSize` is within expectation.
+  Each application can set its own limits, depending on local restrictions.
+  For extended interoperability, it is recommended to support `windowSize` of at least 8 MB.
+
+  ZSTD_decompressContinue() needs previous data blocks during decompression, up to `windowSize` bytes.
+  ZSTD_decompressContinue() is very sensitive to contiguity,
+  if 2 blocks don't follow each other, make sure that either the compressor breaks contiguity at the same place,
+  or that previous contiguous segment is large enough to properly handle maximum back-reference distance.
+  There are multiple ways to guarantee this condition.
+
+  The most memory efficient way is to use a round buffer of sufficient size.
+  Sufficient size is determined by invoking ZSTD_decodingBufferSize_min(),
+  which can @return an error code if required value is too large for current system (in 32-bits mode).
+  In a round buffer methodology, ZSTD_decompressContinue() decompresses each block next to previous one,
+  up to the moment there is not enough room left in the buffer to guarantee decoding another full block,
+  which maximum size is provided in `ZSTD_frameHeader` structure, field `blockSizeMax`.
+  At which point, decoding can resume from the beginning of the buffer.
+  Note that already decoded data stored in the buffer should be flushed before being overwritten.
+
+  There are alternatives possible, for example using two or more buffers of size `windowSize` each, though they consume more memory.
+
+  Finally, if you control the compression process, you can also ignore all buffer size rules,
+  as long as the encoder and decoder progress in "lock-step",
+  aka use exactly the same buffer sizes, break contiguity at the same place, etc.
+
+  Once buffers are setup, start decompression, with ZSTD_decompressBegin().
+  If decompression requires a dictionary, use ZSTD_decompressBegin_usingDict() or ZSTD_decompressBegin_usingDDict().
+
+  Then use ZSTD_nextSrcSizeToDecompress() and ZSTD_decompressContinue() alternatively.
+  ZSTD_nextSrcSizeToDecompress() tells how many bytes to provide as 'srcSize' to ZSTD_decompressContinue().
+  ZSTD_decompressContinue() requires this _exact_ amount of bytes, or it will fail.
+
+ @result of ZSTD_decompressContinue() is the number of bytes regenerated within 'dst' (necessarily <= dstCapacity).
+  It can be zero : it just means ZSTD_decompressContinue() has decoded some metadata item.
+  It can also be an error code, which can be tested with ZSTD_isError().
+
+  A frame is fully decoded when ZSTD_nextSrcSizeToDecompress() returns zero.
+  Context can then be reset to start a new decompression.
+
+  Note : it's possible to know if next input to present is a header or a block, using ZSTD_nextInputType().
+  This information is not required to properly decode a frame.
+
+  == Special case : skippable frames 
+
+  Skippable frames allow integration of user-defined data into a flow of concatenated frames.
+  Skippable frames will be ignored (skipped) by decompressor.
+  The format of skippable frames is as follows :
+  a) Skippable frame ID - 4 Bytes, Little endian format, any value from 0x184D2A50 to 0x184D2A5F
+  b) Frame Size - 4 Bytes, Little endian format, unsigned 32-bits
+  c) Frame Content - any content (User Data) of length equal to Frame Size
+  For skippable frames ZSTD_getFrameHeader() returns zfhPtr->frameType==ZSTD_skippableFrame.
+  For skippable frames ZSTD_decompressContinue() always returns 0 : it only skips the content.
+<BR></pre>
+
+<h3>Buffer-less streaming decompression functions</h3><pre></pre><b><pre>typedef enum { ZSTD_frame, ZSTD_skippableFrame } ZSTD_frameType_e;
+typedef struct {
+    unsigned long long frameContentSize; </b>/* if == ZSTD_CONTENTSIZE_UNKNOWN, it means this field is not available. 0 means "empty" */<b>
+    unsigned long long windowSize;       </b>/* can be very large, up to <= frameContentSize */<b>
+    unsigned blockSizeMax;
+    ZSTD_frameType_e frameType;          </b>/* if == ZSTD_skippableFrame, frameContentSize is the size of skippable content */<b>
+    unsigned headerSize;
+    unsigned dictID;
+    unsigned checksumFlag;
+} ZSTD_frameHeader;
+size_t ZSTD_getFrameHeader(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize);   </b>/**< doesn't consume input */<b>
+size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long long frameContentSize);  </b>/**< when frame content size is not known, pass in frameContentSize == ZSTD_CONTENTSIZE_UNKNOWN */<b>
+</pre></b><BR>
+<pre><b>typedef enum { ZSTDnit_frameHeader, ZSTDnit_blockHeader, ZSTDnit_block, ZSTDnit_lastBlock, ZSTDnit_checksum, ZSTDnit_skippableFrame } ZSTD_nextInputType_e;
+</b></pre><BR>
+<a name="Chapter20"></a><h2>New advanced API (experimental)</h2><pre></pre>
+
+<pre><b>typedef enum {
+    </b>/* Question : should we have a format ZSTD_f_auto ?<b>
+     * For the time being, it would mean exactly the same as ZSTD_f_zstd1.
+     * But, in the future, should several formats be supported,
+     * on the compression side, it would mean "default format".
+     * On the decompression side, it would mean "multi format",
+     * and ZSTD_f_zstd1 could be reserved to mean "accept *only* zstd frames".
+     * Since meaning is a little different, another option could be to define different enums for compression and decompression.
+     * This question could be kept for later, when there are actually multiple formats to support,
+     * but there is also the question of pinning enum values, and pinning value `0` is especially important */
+    ZSTD_f_zstd1 = 0,        </b>/* zstd frame format, specified in zstd_compression_format.md (default) */<b>
+    ZSTD_f_zstd1_magicless,  </b>/* Variant of zstd frame format, without initial 4-bytes magic number.<b>
+                              * Useful to save 4 bytes per generated frame.
+                              * Decoder cannot recognise automatically this format, requiring instructions. */
+} ZSTD_format_e;
+</b></pre><BR>
+<pre><b>typedef enum {
+    </b>/* compression format */<b>
+    ZSTD_p_format = 10,      </b>/* See ZSTD_format_e enum definition.<b>
+                              * Cast selected format as unsigned for ZSTD_CCtx_setParameter() compatibility. */
+
+    </b>/* compression parameters */<b>
+    ZSTD_p_compressionLevel=100, </b>/* Update all compression parameters according to pre-defined cLevel table<b>
+                              * Default level is ZSTD_CLEVEL_DEFAULT==3.
+                              * Special: value 0 means "do not change cLevel". */
+    ZSTD_p_windowLog,        </b>/* Maximum allowed back-reference distance, expressed as power of 2.<b>
+                              * Must be clamped between ZSTD_WINDOWLOG_MIN and ZSTD_WINDOWLOG_MAX.
+                              * Special: value 0 means "do not change windowLog".
+                              * Note: Using a window size greater than ZSTD_MAXWINDOWSIZE_DEFAULT (default: 2^27)
+                              * requires setting the maximum window size at least as large during decompression. */
+    ZSTD_p_hashLog,          </b>/* Size of the probe table, as a power of 2.<b>
+                              * Resulting table size is (1 << (hashLog+2)).
+                              * Must be clamped between ZSTD_HASHLOG_MIN and ZSTD_HASHLOG_MAX.
+                              * Larger tables improve compression ratio of strategies <= dFast,
+                              * and improve speed of strategies > dFast.
+                              * Special: value 0 means "do not change hashLog". */
+    ZSTD_p_chainLog,         </b>/* Size of the full-search table, as a power of 2.<b>
+                              * Resulting table size is (1 << (chainLog+2)).
+                              * Larger tables result in better and slower compression.
+                              * This parameter is useless when using "fast" strategy.
+                              * Special: value 0 means "do not change chainLog". */
+    ZSTD_p_searchLog,        </b>/* Number of search attempts, as a power of 2.<b>
+                              * More attempts result in better and slower compression.
+                              * This parameter is useless when using "fast" and "dFast" strategies.
+                              * Special: value 0 means "do not change searchLog". */
+    ZSTD_p_minMatch,         </b>/* Minimum size of searched matches (note : repCode matches can be smaller).<b>
+                              * Larger values make faster compression and decompression, but decrease ratio.
+                              * Must be clamped between ZSTD_SEARCHLENGTH_MIN and ZSTD_SEARCHLENGTH_MAX.
+                              * Note that currently, for all strategies < btopt, effective minimum is 4.
+                              * Note that currently, for all strategies > fast, effective maximum is 6.
+                              * Special: value 0 means "do not change minMatchLength". */
+    ZSTD_p_targetLength,     </b>/* Only useful for strategies >= btopt.<b>
+                              * Length of Match considered "good enough" to stop search.
+                              * Larger values make compression stronger and slower.
+                              * Special: value 0 means "do not change targetLength". */
+    ZSTD_p_compressionStrategy, </b>/* See ZSTD_strategy enum definition.<b>
+                              * Cast selected strategy as unsigned for ZSTD_CCtx_setParameter() compatibility.
+                              * The higher the value of selected strategy, the more complex it is,
+                              * resulting in stronger and slower compression.
+                              * Special: value 0 means "do not change strategy". */
+
+    </b>/* frame parameters */<b>
+    ZSTD_p_contentSizeFlag=200, </b>/* Content size is written into frame header _whenever known_ (default:1)<b>
+                              * note that content size must be known at the beginning,
+                              * it is sent using ZSTD_CCtx_setPledgedSrcSize() */
+    ZSTD_p_checksumFlag,     </b>/* A 32-bits checksum of content is written at end of frame (default:0) */<b>
+    ZSTD_p_dictIDFlag,       </b>/* When applicable, dictID of dictionary is provided in frame header (default:1) */<b>
+
+    </b>/* multi-threading parameters */<b>
+    ZSTD_p_nbThreads=400,    </b>/* Select how many threads a compression job can spawn (default:1)<b>
+                              * More threads improve speed, but also increase memory usage.
+                              * Can only receive a value > 1 if ZSTD_MULTITHREAD is enabled.
+                              * Special: value 0 means "do not change nbThreads" */
+    ZSTD_p_jobSize,          </b>/* Size of a compression job. Each compression job is completed in parallel.<b>
+                              * 0 means default, which is dynamically determined based on compression parameters.
+                              * Job size must be a minimum of overlapSize, or 1 KB, whichever is largest
+                              * The minimum size is automatically and transparently enforced */
+    ZSTD_p_overlapSizeLog,   </b>/* Size of previous input reloaded at the beginning of each job.<b>
+                              * 0 => no overlap, 6(default) => use 1/8th of windowSize, >=9 => use full windowSize */
+
+    </b>/* advanced parameters - may not remain available after API update */<b>
+    ZSTD_p_forceMaxWindow=1100, </b>/* Force back-reference distances to remain < windowSize,<b>
+                              * even when referencing into Dictionary content (default:0) */
+    ZSTD_p_enableLongDistanceMatching=1200,  </b>/* Enable long distance matching.<b>
+                                         * This parameter is designed to improve the compression
+                                         * ratio for large inputs with long distance matches.
+                                         * This increases the memory usage as well as window size.
+                                         * Note: setting this parameter sets all the LDM parameters
+                                         * as well as ZSTD_p_windowLog. It should be set after
+                                         * ZSTD_p_compressionLevel and before ZSTD_p_windowLog and
+                                         * other LDM parameters. Setting the compression level
+                                         * after this parameter overrides the window log, though LDM
+                                         * will remain enabled until explicitly disabled. */
+    ZSTD_p_ldmHashLog,   </b>/* Size of the table for long distance matching, as a power of 2.<b>
+                          * Larger values increase memory usage and compression ratio, but decrease
+                          * compression speed.
+                          * Must be clamped between ZSTD_HASHLOG_MIN and ZSTD_HASHLOG_MAX
+                          * (default: windowlog - 7). */
+    ZSTD_p_ldmMinMatch,  </b>/* Minimum size of searched matches for long distance matcher.<b>
+                          * Larger/too small values usually decrease compression ratio.
+                          * Must be clamped between ZSTD_LDM_MINMATCH_MIN
+                          * and ZSTD_LDM_MINMATCH_MAX (default: 64). */
+    ZSTD_p_ldmBucketSizeLog,  </b>/* Log size of each bucket in the LDM hash table for collision resolution.<b>
+                               * Larger values usually improve collision resolution but may decrease
+                               * compression speed.
+                               * The maximum value is ZSTD_LDM_BUCKETSIZELOG_MAX (default: 3). */
+    ZSTD_p_ldmHashEveryLog,  </b>/* Frequency of inserting/looking up entries in the LDM hash table.<b>
+                              * The default is MAX(0, (windowLog - ldmHashLog)) to
+                              * optimize hash table usage.
+                              * Larger values improve compression speed. Deviating far from the
+                              * default value will likely result in a decrease in compression ratio.
+                              * Must be clamped between 0 and ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN. */
+
+} ZSTD_cParameter;
+</b></pre><BR>
+<pre><b>size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, unsigned value);
+</b><p>  Set one compression parameter, selected by enum ZSTD_cParameter.
+  Note : when `value` is an enum, cast it to unsigned for proper type checking.
+  @result : 0, or an error code (which can be tested with ZSTD_isError()). 
+</p></pre><BR>
+
+<pre><b>size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long long pledgedSrcSize);
+</b><p>  Total input data size to be compressed as a single frame.
+  This value will be controlled at the end, and result in error if not respected.
+ @result : 0, or an error code (which can be tested with ZSTD_isError()).
+  Note 1 : 0 means zero, empty.
+           In order to mean "unknown content size", pass constant ZSTD_CONTENTSIZE_UNKNOWN.
+           Note that ZSTD_CONTENTSIZE_UNKNOWN is default value for new compression jobs.
+  Note 2 : If all data is provided and consumed in a single round,
+           this value is overriden by srcSize instead. 
+</p></pre><BR>
+
+<pre><b>size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize);
+size_t ZSTD_CCtx_loadDictionary_byReference(ZSTD_CCtx* cctx, const void* dict, size_t dictSize);
+size_t ZSTD_CCtx_loadDictionary_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictMode_e dictMode);
+</b><p>  Create an internal CDict from dict buffer.
+  Decompression will have to use same buffer.
+ @result : 0, or an error code (which can be tested with ZSTD_isError()).
+  Special : Adding a NULL (or 0-size) dictionary invalidates any previous dictionary,
+            meaning "return to no-dictionary mode".
+  Note 1 : `dict` content will be copied internally. Use
+            ZSTD_CCtx_loadDictionary_byReference() to reference dictionary
+            content instead. The dictionary buffer must then outlive its
+            users.
+  Note 2 : Loading a dictionary involves building tables, which are dependent on compression parameters.
+           For this reason, compression parameters cannot be changed anymore after loading a dictionary.
+           It's also a CPU-heavy operation, with non-negligible impact on latency.
+  Note 3 : Dictionary will be used for all future compression jobs.
+           To return to "no-dictionary" situation, load a NULL dictionary
+  Note 5 : Use ZSTD_CCtx_loadDictionary_advanced() to select how dictionary
+           content will be interpreted.
+ 
+</p></pre><BR>
+
+<pre><b>size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict);
+</b><p>  Reference a prepared dictionary, to be used for all next compression jobs.
+  Note that compression parameters are enforced from within CDict,
+  and supercede any compression parameter previously set within CCtx.
+  The dictionary will remain valid for future compression jobs using same CCtx.
+ @result : 0, or an error code (which can be tested with ZSTD_isError()).
+  Special : adding a NULL CDict means "return to no-dictionary mode".
+  Note 1 : Currently, only one dictionary can be managed.
+           Adding a new dictionary effectively "discards" any previous one.
+  Note 2 : CDict is just referenced, its lifetime must outlive CCtx.
+ 
+</p></pre><BR>
+
+<pre><b>size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize);
+size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize, ZSTD_dictMode_e dictMode);
+</b><p>  Reference a prefix (single-usage dictionary) for next compression job.
+  Decompression need same prefix to properly regenerate data.
+  Prefix is **only used once**. Tables are discarded at end of compression job.
+  Subsequent compression jobs will be done without prefix (if none is explicitly referenced).
+  If there is a need to use same prefix multiple times, consider embedding it into a ZSTD_CDict instead.
+ @result : 0, or an error code (which can be tested with ZSTD_isError()).
+  Special : Adding any prefix (including NULL) invalidates any previous prefix or dictionary
+  Note 1 : Prefix buffer is referenced. It must outlive compression job.
+  Note 2 : Referencing a prefix involves building tables, which are dependent on compression parameters.
+           It's a CPU-heavy operation, with non-negligible impact on latency.
+  Note 3 : By default, the prefix is treated as raw content
+           (ZSTD_dm_rawContent). Use ZSTD_CCtx_refPrefix_advanced() to alter
+           dictMode. 
+</p></pre><BR>
+
+<pre><b>typedef enum {
+    ZSTD_e_continue=0, </b>/* collect more data, encoder transparently decides when to output result, for optimal conditions */<b>
+    ZSTD_e_flush,      </b>/* flush any data provided so far - frame will continue, future data can still reference previous data for better compression */<b>
+    ZSTD_e_end         </b>/* flush any remaining data and close current frame. Any additional data starts a new frame. */<b>
+} ZSTD_EndDirective;
+</b></pre><BR>
+<pre><b>size_t ZSTD_compress_generic (ZSTD_CCtx* cctx,
+                              ZSTD_outBuffer* output,
+                              ZSTD_inBuffer* input,
+                              ZSTD_EndDirective endOp);
+</b><p>  Behave about the same as ZSTD_compressStream. To note :
+  - Compression parameters are pushed into CCtx before starting compression, using ZSTD_CCtx_setParameter()
+  - Compression parameters cannot be changed once compression is started.
+  - outpot->pos must be <= dstCapacity, input->pos must be <= srcSize
+  - outpot->pos and input->pos will be updated. They are guaranteed to remain below their respective limit.
+  - @return provides the minimum amount of data still to flush from internal buffers
+            or an error code, which can be tested using ZSTD_isError().
+            if @return != 0, flush is not fully completed, there is some data left within internal buffers.
+  - after a ZSTD_e_end directive, if internal buffer is not fully flushed,
+            only ZSTD_e_end or ZSTD_e_flush operations are allowed.
+            It is necessary to fully flush internal buffers
+            before starting a new compression job, or changing compression parameters.
+ 
+</p></pre><BR>
+
+<pre><b>void ZSTD_CCtx_reset(ZSTD_CCtx* cctx);   </b>/* Not ready yet ! */<b>
+</b><p>  Return a CCtx to clean state.
+  Useful after an error, or to interrupt an ongoing compression job and start a new one.
+  Any internal data not yet flushed is cancelled.
+  Dictionary (if any) is dropped.
+  All parameters are back to default values.
+  It's possible to modify compression parameters after a reset.
+ 
+</p></pre><BR>
+
+<pre><b>size_t ZSTD_compress_generic_simpleArgs (
+                ZSTD_CCtx* cctx,
+                void* dst, size_t dstCapacity, size_t* dstPos,
+          const void* src, size_t srcSize, size_t* srcPos,
+                ZSTD_EndDirective endOp);
+</b><p>  Same as ZSTD_compress_generic(),
+  but using only integral types as arguments.
+  Argument list is larger than ZSTD_{in,out}Buffer,
+  but can be helpful for binders from dynamic languages
+  which have troubles handling structures containing memory pointers.
+ 
+</p></pre><BR>
+
+<pre><b>ZSTD_CCtx_params* ZSTD_createCCtxParams(void);
+</b><p>  Quick howto :
+  - ZSTD_createCCtxParams() : Create a ZSTD_CCtx_params structure
+  - ZSTD_CCtxParam_setParameter() : Push parameters one by one into
+                                    an existing ZSTD_CCtx_params structure.
+                                    This is similar to
+                                    ZSTD_CCtx_setParameter().
+  - ZSTD_CCtx_setParametersUsingCCtxParams() : Apply parameters to
+                                    an existing CCtx.
+                                    These parameters will be applied to
+                                    all subsequent compression jobs.
+  - ZSTD_compress_generic() : Do compression using the CCtx.
+  - ZSTD_freeCCtxParams() : Free the memory.
+
+  This can be used with ZSTD_estimateCCtxSize_advanced_usingCCtxParams()
+  for static allocation for single-threaded compression.
+ 
+</p></pre><BR>
+
+<pre><b>size_t ZSTD_resetCCtxParams(ZSTD_CCtx_params* params);
+</b><p>  Reset params to default, with the default compression level.
+ 
+</p></pre><BR>
+
+<pre><b>size_t ZSTD_initCCtxParams(ZSTD_CCtx_params* cctxParams, int compressionLevel);
+</b><p>  Initializes the compression parameters of cctxParams according to
+  compression level. All other parameters are reset to their default values.
+ 
+</p></pre><BR>
+
+<pre><b>size_t ZSTD_initCCtxParams_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_parameters params);
+</b><p>  Initializes the compression and frame parameters of cctxParams according to
+  params. All other parameters are reset to their default values.
+ 
+</p></pre><BR>
+
+<pre><b>size_t ZSTD_CCtxParam_setParameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, unsigned value);
+</b><p>  Similar to ZSTD_CCtx_setParameter.
+  Set one compression parameter, selected by enum ZSTD_cParameter.
+  Parameters must be applied to a ZSTD_CCtx using ZSTD_CCtx_setParametersUsingCCtxParams().
+  Note : when `value` is an enum, cast it to unsigned for proper type checking.
+ @result : 0, or an error code (which can be tested with ZSTD_isError()).
+ 
+</p></pre><BR>
+
+<pre><b>size_t ZSTD_CCtx_setParametersUsingCCtxParams(
+        ZSTD_CCtx* cctx, const ZSTD_CCtx_params* params);
+</b><p>  Apply a set of ZSTD_CCtx_params to the compression context.
+  This must be done before the dictionary is loaded.
+  The pledgedSrcSize is treated as unknown.
+  Multithreading parameters are applied only if nbThreads > 1.
+ 
+</p></pre><BR>
+
+<h3>Advanced parameters for decompression API</h3><pre></pre><b><pre></pre></b><BR>
+<pre><b>size_t ZSTD_DCtx_loadDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize);   </b>/* not implemented */<b>
+size_t ZSTD_DCtx_loadDictionary_byReference(ZSTD_DCtx* dctx, const void* dict, size_t dictSize);   </b>/* not implemented */<b>
+size_t ZSTD_DCtx_loadDictionary_advanced(ZSTD_DCtx* dctx, const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictMode_e dictMode);   </b>/* not implemented */<b>
+</b><p>  Create an internal DDict from dict buffer,
+  to be used to decompress next frames.
+ @result : 0, or an error code (which can be tested with ZSTD_isError()).
+  Special : Adding a NULL (or 0-size) dictionary invalidates any previous dictionary,
+            meaning "return to no-dictionary mode".
+  Note 1 : `dict` content will be copied internally.
+            Use ZSTD_DCtx_loadDictionary_byReference()
+            to reference dictionary content instead.
+            In which case, the dictionary buffer must outlive its users.
+  Note 2 : Loading a dictionary involves building tables,
+           which has a non-negligible impact on CPU usage and latency.
+  Note 3 : Use ZSTD_DCtx_loadDictionary_advanced() to select
+           how dictionary content will be interpreted and loaded.
+ 
+</p></pre><BR>
+
+<pre><b>size_t ZSTD_DCtx_refDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict);   </b>/* not implemented */<b>
+</b><p>  Reference a prepared dictionary, to be used to decompress next frames.
+  The dictionary remains active for decompression of future frames using same DCtx.
+ @result : 0, or an error code (which can be tested with ZSTD_isError()).
+  Note 1 : Currently, only one dictionary can be managed.
+           Referencing a new dictionary effectively "discards" any previous one.
+  Special : adding a NULL DDict means "return to no-dictionary mode".
+  Note 2 : DDict is just referenced, its lifetime must outlive its usage from DCtx.
+ 
+</p></pre><BR>
+
+<pre><b>size_t ZSTD_DCtx_refPrefix(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize);   </b>/* not implemented */<b>
+size_t ZSTD_DCtx_refPrefix_advanced(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize, ZSTD_dictMode_e dictMode);   </b>/* not implemented */<b>
+</b><p>  Reference a prefix (single-usage dictionary) for next compression job.
+  Prefix is **only used once**. It must be explicitly referenced before each frame.
+  If there is a need to use same prefix multiple times, consider embedding it into a ZSTD_DDict instead.
+ @result : 0, or an error code (which can be tested with ZSTD_isError()).
+  Note 1 : Adding any prefix (including NULL) invalidates any previously set prefix or dictionary
+  Note 2 : Prefix buffer is referenced. It must outlive compression job.
+  Note 3 : By default, the prefix is treated as raw content (ZSTD_dm_rawContent).
+           Use ZSTD_CCtx_refPrefix_advanced() to alter dictMode.
+  Note 4 : Referencing a raw content prefix has almost no cpu nor memory cost.
+ 
+</p></pre><BR>
+
+<pre><b>size_t ZSTD_DCtx_setMaxWindowSize(ZSTD_DCtx* dctx, size_t maxWindowSize);
+</b><p>  Refuses allocating internal buffers for frames requiring a window size larger than provided limit.
+  This is useful to prevent a decoder context from reserving too much memory for itself (potential attack scenario).
+  This parameter is only useful in streaming mode, since no internal buffer is allocated in direct mode.
+  By default, a decompression context accepts all window sizes <= (1 << ZSTD_WINDOWLOG_MAX)
+ @return : 0, or an error code (which can be tested using ZSTD_isError()).
+ 
+</p></pre><BR>
+
+<pre><b>size_t ZSTD_DCtx_setFormat(ZSTD_DCtx* dctx, ZSTD_format_e format);
+</b><p>  Instruct the decoder context about what kind of data to decode next.
+  This instruction is mandatory to decode data without a fully-formed header,
+  such ZSTD_f_zstd1_magicless for example.
+ @return : 0, or an error code (which can be tested using ZSTD_isError()).
+ 
+</p></pre><BR>
+
+<pre><b>size_t ZSTD_decompress_generic(ZSTD_DCtx* dctx,
+                               ZSTD_outBuffer* output,
+                               ZSTD_inBuffer* input);
+</b><p>  Behave the same as ZSTD_decompressStream.
+  Decompression parameters cannot be changed once decompression is started.
+ @return : an error code, which can be tested using ZSTD_isError()
+           if >0, a hint, nb of expected input bytes for next invocation.
+           `0` means : a frame has just been fully decoded and flushed.
+ 
+</p></pre><BR>
+
+<pre><b>size_t ZSTD_decompress_generic_simpleArgs (
+                ZSTD_DCtx* dctx,
+                void* dst, size_t dstCapacity, size_t* dstPos,
+          const void* src, size_t srcSize, size_t* srcPos);
+</b><p>  Same as ZSTD_decompress_generic(),
+  but using only integral types as arguments.
+  Argument list is larger than ZSTD_{in,out}Buffer,
+  but can be helpful for binders from dynamic languages
+  which have troubles handling structures containing memory pointers.
+ 
+</p></pre><BR>
+
+<pre><b>void ZSTD_DCtx_reset(ZSTD_DCtx* dctx);
+</b><p>  Return a DCtx to clean state.
+  If a decompression was ongoing, any internal data not yet flushed is cancelled.
+  All parameters are back to default values, including sticky ones.
+  Dictionary (if any) is dropped.
+  Parameters can be modified again after a reset.
+ 
+</p></pre><BR>
+
+<a name="Chapter21"></a><h2>Block level API</h2><pre></pre>
+
+<pre><b></b><p>    Frame metadata cost is typically ~18 bytes, which can be non-negligible for very small blocks (< 100 bytes).
+    User will have to take in charge required information to regenerate data, such as compressed and content sizes.
+
+    A few rules to respect :
+    - Compressing and decompressing require a context structure
+      + Use ZSTD_createCCtx() and ZSTD_createDCtx()
+    - It is necessary to init context before starting
+      + compression : any ZSTD_compressBegin*() variant, including with dictionary
+      + decompression : any ZSTD_decompressBegin*() variant, including with dictionary
+      + copyCCtx() and copyDCtx() can be used too
+    - Block size is limited, it must be <= ZSTD_getBlockSize() <= ZSTD_BLOCKSIZE_MAX == 128 KB
+      + If input is larger than a block size, it's necessary to split input data into multiple blocks
+      + For inputs larger than a single block size, consider using the regular ZSTD_compress() instead.
+        Frame metadata is not that costly, and quickly becomes negligible as source size grows larger.
+    - When a block is considered not compressible enough, ZSTD_compressBlock() result will be zero.
+      In which case, nothing is produced into `dst`.
+      + User must test for such outcome and deal directly with uncompressed data
+      + ZSTD_decompressBlock() doesn't accept uncompressed data as input !!!
+      + In case of multiple successive blocks, should some of them be uncompressed,
+        decoder must be informed of their existence in order to follow proper history.
+        Use ZSTD_insertBlock() for such a case.
+</p></pre><BR>
+
+<h3>Raw zstd block functions</h3><pre></pre><b><pre>size_t ZSTD_getBlockSize   (const ZSTD_CCtx* cctx);
+size_t ZSTD_compressBlock  (ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+size_t ZSTD_insertBlock(ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize);  </b>/**< insert uncompressed block into `dctx` history. Useful for multi-blocks decompression */<b>
+</pre></b><BR>
+</html>
+</body>