summaryrefslogtreecommitdiffstats
path: root/modules/brotli/common/shared_dictionary.c
diff options
context:
space:
mode:
Diffstat (limited to 'modules/brotli/common/shared_dictionary.c')
-rw-r--r--modules/brotli/common/shared_dictionary.c521
1 files changed, 521 insertions, 0 deletions
diff --git a/modules/brotli/common/shared_dictionary.c b/modules/brotli/common/shared_dictionary.c
new file mode 100644
index 0000000000..49f1c9b075
--- /dev/null
+++ b/modules/brotli/common/shared_dictionary.c
@@ -0,0 +1,521 @@
+/* Copyright 2017 Google Inc. All Rights Reserved.
+
+ Distributed under MIT license.
+ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Shared Dictionary definition and utilities. */
+
+#include <brotli/shared_dictionary.h>
+
+#include <memory.h>
+#include <stdlib.h> /* malloc, free */
+#include <stdio.h>
+
+#include "dictionary.h"
+#include "platform.h"
+#include "shared_dictionary_internal.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+#if defined(BROTLI_EXPERIMENTAL)
+
+#define BROTLI_NUM_ENCODED_LENGTHS (SHARED_BROTLI_MAX_DICTIONARY_WORD_LENGTH \
+ - SHARED_BROTLI_MIN_DICTIONARY_WORD_LENGTH + 1)
+
+/* Max allowed by spec */
+#define BROTLI_MAX_SIZE_BITS 15u
+
+/* Returns BROTLI_TRUE on success, BROTLI_FALSE on failure. */
+static BROTLI_BOOL ReadBool(const uint8_t* encoded, size_t size, size_t* pos,
+ BROTLI_BOOL* result) {
+ uint8_t value;
+ size_t position = *pos;
+ if (position >= size) return BROTLI_FALSE; /* past file end */
+ value = encoded[position++];
+ if (value > 1) return BROTLI_FALSE; /* invalid bool */
+ *result = TO_BROTLI_BOOL(value);
+ *pos = position;
+ return BROTLI_TRUE; /* success */
+}
+
+/* Returns BROTLI_TRUE on success, BROTLI_FALSE on failure. */
+static BROTLI_BOOL ReadUint8(const uint8_t* encoded, size_t size, size_t* pos,
+ uint8_t* result) {
+ size_t position = *pos;
+ if (position + sizeof(uint8_t) > size) return BROTLI_FALSE;
+ *result = encoded[position++];
+ *pos = position;
+ return BROTLI_TRUE;
+}
+
+/* Returns BROTLI_TRUE on success, BROTLI_FALSE on failure. */
+static BROTLI_BOOL ReadUint16(const uint8_t* encoded, size_t size, size_t* pos,
+ uint16_t* result) {
+ size_t position = *pos;
+ if (position + sizeof(uint16_t) > size) return BROTLI_FALSE;
+ *result = BROTLI_UNALIGNED_LOAD16LE(&encoded[position]);
+ position += 2;
+ *pos = position;
+ return BROTLI_TRUE;
+}
+
+/* Reads a varint into a uint32_t, and returns error if it's too large */
+/* Returns BROTLI_TRUE on success, BROTLI_FALSE on failure. */
+static BROTLI_BOOL ReadVarint32(const uint8_t* encoded, size_t size,
+ size_t* pos, uint32_t* result) {
+ int num = 0;
+ uint8_t byte;
+ *result = 0;
+ for (;;) {
+ if (*pos >= size) return BROTLI_FALSE;
+ byte = encoded[(*pos)++];
+ if (num == 4 && byte > 15) return BROTLI_FALSE;
+ *result |= (uint32_t)(byte & 127) << (num * 7);
+ if (byte < 128) return BROTLI_TRUE;
+ num++;
+ }
+}
+
+/* Returns the total length of word list. */
+static size_t BrotliSizeBitsToOffsets(const uint8_t* size_bits_by_length,
+ uint32_t* offsets_by_length) {
+ uint32_t pos = 0;
+ uint32_t i;
+ for (i = 0; i <= SHARED_BROTLI_MAX_DICTIONARY_WORD_LENGTH; i++) {
+ offsets_by_length[i] = pos;
+ if (size_bits_by_length[i] != 0) {
+ pos += i << size_bits_by_length[i];
+ }
+ }
+ return pos;
+}
+
+static BROTLI_BOOL ParseWordList(size_t size, const uint8_t* encoded,
+ size_t* pos, BrotliDictionary* out) {
+ size_t offset;
+ size_t i;
+ size_t position = *pos;
+ if (position + BROTLI_NUM_ENCODED_LENGTHS > size) {
+ return BROTLI_FALSE;
+ }
+
+ memset(out->size_bits_by_length, 0, SHARED_BROTLI_MIN_DICTIONARY_WORD_LENGTH);
+ memcpy(out->size_bits_by_length + SHARED_BROTLI_MIN_DICTIONARY_WORD_LENGTH,
+ &encoded[position], BROTLI_NUM_ENCODED_LENGTHS);
+ for (i = SHARED_BROTLI_MIN_DICTIONARY_WORD_LENGTH;
+ i <= SHARED_BROTLI_MAX_DICTIONARY_WORD_LENGTH; i++) {
+ if (out->size_bits_by_length[i] > BROTLI_MAX_SIZE_BITS) {
+ return BROTLI_FALSE;
+ }
+ }
+ position += BROTLI_NUM_ENCODED_LENGTHS;
+ offset = BrotliSizeBitsToOffsets(
+ out->size_bits_by_length, out->offsets_by_length);
+
+ out->data = &encoded[position];
+ out->data_size = offset;
+ position += offset;
+ if (position > size) return BROTLI_FALSE;
+ *pos = position;
+ return BROTLI_TRUE;
+}
+
+/* Computes the cutOffTransforms of a BrotliTransforms which already has the
+ transforms data correctly filled in. */
+static void ComputeCutoffTransforms(BrotliTransforms* transforms) {
+ uint32_t i;
+ for (i = 0; i < BROTLI_TRANSFORMS_MAX_CUT_OFF + 1; i++) {
+ transforms->cutOffTransforms[i] = -1;
+ }
+ for (i = 0; i < transforms->num_transforms; i++) {
+ const uint8_t* prefix = BROTLI_TRANSFORM_PREFIX(transforms, i);
+ uint8_t type = BROTLI_TRANSFORM_TYPE(transforms, i);
+ const uint8_t* suffix = BROTLI_TRANSFORM_SUFFIX(transforms, i);
+ if (type <= BROTLI_TRANSFORM_OMIT_LAST_9 && *prefix == 0 && *suffix == 0 &&
+ transforms->cutOffTransforms[type] == -1) {
+ transforms->cutOffTransforms[type] = (int16_t)i;
+ }
+ }
+}
+
+static BROTLI_BOOL ParsePrefixSuffixTable(size_t size, const uint8_t* encoded,
+ size_t* pos, BrotliTransforms* out, uint16_t* out_table,
+ size_t* out_table_size) {
+ size_t position = *pos;
+ size_t offset = 0;
+ size_t stringlet_count = 0; /* NUM_PREFIX_SUFFIX */
+ size_t data_length = 0;
+
+ /* PREFIX_SUFFIX_LENGTH */
+ if (!ReadUint16(encoded, size, &position, &out->prefix_suffix_size)) {
+ return BROTLI_FALSE;
+ }
+ data_length = out->prefix_suffix_size;
+
+ /* Must at least have space for null terminator. */
+ if (data_length < 1) return BROTLI_FALSE;
+ out->prefix_suffix = &encoded[position];
+ if (position + data_length >= size) return BROTLI_FALSE;
+ while (BROTLI_TRUE) {
+ /* STRING_LENGTH */
+ size_t stringlet_len = encoded[position + offset];
+ out_table[stringlet_count] = (uint16_t)offset;
+ stringlet_count++;
+ offset++;
+ if (stringlet_len == 0) {
+ if (offset == data_length) {
+ break;
+ } else {
+ return BROTLI_FALSE;
+ }
+ }
+ if (stringlet_count > 255) return BROTLI_FALSE;
+ offset += stringlet_len;
+ if (offset >= data_length) return BROTLI_FALSE;
+ }
+
+ position += data_length;
+ *pos = position;
+ *out_table_size = (uint16_t)stringlet_count;
+ return BROTLI_TRUE;
+}
+
+static BROTLI_BOOL ParseTransformsList(size_t size, const uint8_t* encoded,
+ size_t* pos, BrotliTransforms* out, uint16_t* prefix_suffix_table,
+ size_t* prefix_suffix_count) {
+ uint32_t i;
+ BROTLI_BOOL has_params = BROTLI_FALSE;
+ BROTLI_BOOL prefix_suffix_ok = BROTLI_FALSE;
+ size_t position = *pos;
+ size_t stringlet_cnt = 0;
+ if (position >= size) return BROTLI_FALSE;
+
+ prefix_suffix_ok = ParsePrefixSuffixTable(
+ size, encoded, &position, out, prefix_suffix_table, &stringlet_cnt);
+ if (!prefix_suffix_ok) return BROTLI_FALSE;
+ out->prefix_suffix_map = prefix_suffix_table;
+ *prefix_suffix_count = stringlet_cnt;
+
+ out->num_transforms = encoded[position++];
+ out->transforms = &encoded[position];
+ position += (size_t)out->num_transforms * 3;
+ if (position > size) return BROTLI_FALSE;
+ /* Check for errors and read extra parameters. */
+ for (i = 0; i < out->num_transforms; i++) {
+ uint8_t prefix_id = BROTLI_TRANSFORM_PREFIX_ID(out, i);
+ uint8_t type = BROTLI_TRANSFORM_TYPE(out, i);
+ uint8_t suffix_id = BROTLI_TRANSFORM_SUFFIX_ID(out, i);
+ if (prefix_id >= stringlet_cnt) return BROTLI_FALSE;
+ if (type >= BROTLI_NUM_TRANSFORM_TYPES) return BROTLI_FALSE;
+ if (suffix_id >= stringlet_cnt) return BROTLI_FALSE;
+ if (type == BROTLI_TRANSFORM_SHIFT_FIRST ||
+ type == BROTLI_TRANSFORM_SHIFT_ALL) {
+ has_params = BROTLI_TRUE;
+ }
+ }
+ if (has_params) {
+ out->params = &encoded[position];
+ position += (size_t)out->num_transforms * 2;
+ if (position > size) return BROTLI_FALSE;
+ for (i = 0; i < out->num_transforms; i++) {
+ uint8_t type = BROTLI_TRANSFORM_TYPE(out, i);
+ if (type != BROTLI_TRANSFORM_SHIFT_FIRST &&
+ type != BROTLI_TRANSFORM_SHIFT_ALL) {
+ if (out->params[i * 2] != 0 || out->params[i * 2 + 1] != 0) {
+ return BROTLI_FALSE;
+ }
+ }
+ }
+ } else {
+ out->params = NULL;
+ }
+ ComputeCutoffTransforms(out);
+ *pos = position;
+ return BROTLI_TRUE;
+}
+
+static BROTLI_BOOL DryParseDictionary(const uint8_t* encoded,
+ size_t size, uint32_t* num_prefix, BROTLI_BOOL* is_custom_static_dict) {
+ size_t pos = 0;
+ uint32_t chunk_size = 0;
+ uint8_t num_word_lists;
+ uint8_t num_transform_lists;
+ *is_custom_static_dict = BROTLI_FALSE;
+ *num_prefix = 0;
+
+ /* Skip magic header bytes. */
+ pos += 2;
+
+ /* LZ77_DICTIONARY_LENGTH */
+ if (!ReadVarint32(encoded, size, &pos, &chunk_size)) return BROTLI_FALSE;
+ if (chunk_size != 0) {
+ /* This limitation is not specified but the 32-bit Brotli decoder for now */
+ if (chunk_size > 1073741823) return BROTLI_FALSE;
+ *num_prefix = 1;
+ if (pos + chunk_size > size) return BROTLI_FALSE;
+ pos += chunk_size;
+ }
+
+ if (!ReadUint8(encoded, size, &pos, &num_word_lists)) {
+ return BROTLI_FALSE;
+ }
+ if (!ReadUint8(encoded, size, &pos, &num_transform_lists)) {
+ return BROTLI_FALSE;
+ }
+
+ if (num_word_lists > 0 || num_transform_lists > 0) {
+ *is_custom_static_dict = BROTLI_TRUE;
+ }
+
+ return BROTLI_TRUE;
+}
+
+static BROTLI_BOOL ParseDictionary(const uint8_t* encoded, size_t size,
+ BrotliSharedDictionary* dict) {
+ uint32_t i;
+ size_t pos = 0;
+ uint32_t chunk_size = 0;
+ size_t total_prefix_suffix_count = 0;
+ size_t trasform_list_start[SHARED_BROTLI_NUM_DICTIONARY_CONTEXTS];
+ uint16_t temporary_prefix_suffix_table[256];
+
+ /* Skip magic header bytes. */
+ pos += 2;
+
+ /* LZ77_DICTIONARY_LENGTH */
+ if (!ReadVarint32(encoded, size, &pos, &chunk_size)) return BROTLI_FALSE;
+ if (chunk_size != 0) {
+ if (pos + chunk_size > size) return BROTLI_FALSE;
+ dict->prefix_size[dict->num_prefix] = chunk_size;
+ dict->prefix[dict->num_prefix] = &encoded[pos];
+ dict->num_prefix++;
+ /* LZ77_DICTIONARY_LENGTH bytes. */
+ pos += chunk_size;
+ }
+
+ /* NUM_WORD_LISTS */
+ if (!ReadUint8(encoded, size, &pos, &dict->num_word_lists)) {
+ return BROTLI_FALSE;
+ }
+ if (dict->num_word_lists > SHARED_BROTLI_NUM_DICTIONARY_CONTEXTS) {
+ return BROTLI_FALSE;
+ }
+
+ if (dict->num_word_lists != 0) {
+ dict->words_instances = (BrotliDictionary*)dict->alloc_func(
+ dict->memory_manager_opaque,
+ dict->num_word_lists * sizeof(*dict->words_instances));
+ if (!dict->words_instances) return BROTLI_FALSE; /* OOM */
+ }
+ for (i = 0; i < dict->num_word_lists; i++) {
+ if (!ParseWordList(size, encoded, &pos, &dict->words_instances[i])) {
+ return BROTLI_FALSE;
+ }
+ }
+
+ /* NUM_TRANSFORM_LISTS */
+ if (!ReadUint8(encoded, size, &pos, &dict->num_transform_lists)) {
+ return BROTLI_FALSE;
+ }
+ if (dict->num_transform_lists > SHARED_BROTLI_NUM_DICTIONARY_CONTEXTS) {
+ return BROTLI_FALSE;
+ }
+
+ if (dict->num_transform_lists != 0) {
+ dict->transforms_instances = (BrotliTransforms*)dict->alloc_func(
+ dict->memory_manager_opaque,
+ dict->num_transform_lists * sizeof(*dict->transforms_instances));
+ if (!dict->transforms_instances) return BROTLI_FALSE; /* OOM */
+ }
+ for (i = 0; i < dict->num_transform_lists; i++) {
+ BROTLI_BOOL ok = BROTLI_FALSE;
+ size_t prefix_suffix_count = 0;
+ trasform_list_start[i] = pos;
+ dict->transforms_instances[i].prefix_suffix_map =
+ temporary_prefix_suffix_table;
+ ok = ParseTransformsList(
+ size, encoded, &pos, &dict->transforms_instances[i],
+ temporary_prefix_suffix_table, &prefix_suffix_count);
+ if (!ok) return BROTLI_FALSE;
+ total_prefix_suffix_count += prefix_suffix_count;
+ }
+ if (total_prefix_suffix_count != 0) {
+ dict->prefix_suffix_maps = (uint16_t*)dict->alloc_func(
+ dict->memory_manager_opaque,
+ total_prefix_suffix_count * sizeof(*dict->prefix_suffix_maps));
+ if (!dict->prefix_suffix_maps) return BROTLI_FALSE; /* OOM */
+ }
+ total_prefix_suffix_count = 0;
+ for (i = 0; i < dict->num_transform_lists; i++) {
+ size_t prefix_suffix_count = 0;
+ size_t position = trasform_list_start[i];
+ uint16_t* prefix_suffix_map =
+ &dict->prefix_suffix_maps[total_prefix_suffix_count];
+ BROTLI_BOOL ok = ParsePrefixSuffixTable(
+ size, encoded, &position, &dict->transforms_instances[i],
+ prefix_suffix_map, &prefix_suffix_count);
+ if (!ok) return BROTLI_FALSE;
+ dict->transforms_instances[i].prefix_suffix_map = prefix_suffix_map;
+ total_prefix_suffix_count += prefix_suffix_count;
+ }
+
+ if (dict->num_word_lists != 0 || dict->num_transform_lists != 0) {
+ if (!ReadUint8(encoded, size, &pos, &dict->num_dictionaries)) {
+ return BROTLI_FALSE;
+ }
+ if (dict->num_dictionaries == 0 ||
+ dict->num_dictionaries > SHARED_BROTLI_NUM_DICTIONARY_CONTEXTS) {
+ return BROTLI_FALSE;
+ }
+ for (i = 0; i < dict->num_dictionaries; i++) {
+ uint8_t words_index;
+ uint8_t transforms_index;
+ if (!ReadUint8(encoded, size, &pos, &words_index)) {
+ return BROTLI_FALSE;
+ }
+ if (words_index > dict->num_word_lists) return BROTLI_FALSE;
+ if (!ReadUint8(encoded, size, &pos, &transforms_index)) {
+ return BROTLI_FALSE;
+ }
+ if (transforms_index > dict->num_transform_lists) return BROTLI_FALSE;
+ dict->words[i] = words_index == dict->num_word_lists ?
+ BrotliGetDictionary() : &dict->words_instances[words_index];
+ dict->transforms[i] = transforms_index == dict->num_transform_lists ?
+ BrotliGetTransforms(): &dict->transforms_instances[transforms_index];
+ }
+ /* CONTEXT_ENABLED */
+ if (!ReadBool(encoded, size, &pos, &dict->context_based)) {
+ return BROTLI_FALSE;
+ }
+
+ /* CONTEXT_MAP */
+ if (dict->context_based) {
+ for (i = 0; i < SHARED_BROTLI_NUM_DICTIONARY_CONTEXTS; i++) {
+ if (!ReadUint8(encoded, size, &pos, &dict->context_map[i])) {
+ return BROTLI_FALSE;
+ }
+ if (dict->context_map[i] >= dict->num_dictionaries) {
+ return BROTLI_FALSE;
+ }
+ }
+ }
+ } else {
+ dict->context_based = BROTLI_FALSE;
+ dict->num_dictionaries = 1;
+ dict->words[0] = BrotliGetDictionary();
+ dict->transforms[0] = BrotliGetTransforms();
+ }
+
+ return BROTLI_TRUE;
+}
+
+/* Decodes shared dictionary and verifies correctness.
+ Returns BROTLI_TRUE if dictionary is valid, BROTLI_FALSE otherwise.
+ The BrotliSharedDictionary must already have been initialized. If the
+ BrotliSharedDictionary already contains data, compound dictionaries
+ will be appended, but an error will be returned if it already has
+ custom words or transforms.
+ TODO(lode): link to RFC for shared brotli once published. */
+static BROTLI_BOOL DecodeSharedDictionary(
+ const uint8_t* encoded, size_t size, BrotliSharedDictionary* dict) {
+ uint32_t num_prefix = 0;
+ BROTLI_BOOL is_custom_static_dict = BROTLI_FALSE;
+ BROTLI_BOOL has_custom_static_dict =
+ dict->num_word_lists > 0 || dict->num_transform_lists > 0;
+
+ /* Check magic header bytes. */
+ if (size < 2) return BROTLI_FALSE;
+ if (encoded[0] != 0x91 || encoded[1] != 0) return BROTLI_FALSE;
+
+ if (!DryParseDictionary(encoded, size, &num_prefix, &is_custom_static_dict)) {
+ return BROTLI_FALSE;
+ }
+
+ if (num_prefix + dict->num_prefix > SHARED_BROTLI_MAX_COMPOUND_DICTS) {
+ return BROTLI_FALSE;
+ }
+
+ /* Cannot combine different static dictionaries, only prefix dictionaries */
+ if (has_custom_static_dict && is_custom_static_dict) return BROTLI_FALSE;
+
+ return ParseDictionary(encoded, size, dict);
+}
+
+#endif /* BROTLI_EXPERIMENTAL */
+
+void BrotliSharedDictionaryDestroyInstance(
+ BrotliSharedDictionary* dict) {
+ if (!dict) {
+ return;
+ } else {
+ brotli_free_func free_func = dict->free_func;
+ void* opaque = dict->memory_manager_opaque;
+ /* Cleanup. */
+ free_func(opaque, dict->words_instances);
+ free_func(opaque, dict->transforms_instances);
+ free_func(opaque, dict->prefix_suffix_maps);
+ /* Self-destruction. */
+ free_func(opaque, dict);
+ }
+}
+
+BROTLI_BOOL BrotliSharedDictionaryAttach(
+ BrotliSharedDictionary* dict, BrotliSharedDictionaryType type,
+ size_t data_size, const uint8_t data[BROTLI_ARRAY_PARAM(data_size)]) {
+ if (!dict) {
+ return BROTLI_FALSE;
+ }
+#if defined(BROTLI_EXPERIMENTAL)
+ if (type == BROTLI_SHARED_DICTIONARY_SERIALIZED) {
+ return DecodeSharedDictionary(data, data_size, dict);
+ }
+#endif /* BROTLI_EXPERIMENTAL */
+ if (type == BROTLI_SHARED_DICTIONARY_RAW) {
+ if (dict->num_prefix >= SHARED_BROTLI_MAX_COMPOUND_DICTS) {
+ return BROTLI_FALSE;
+ }
+ dict->prefix_size[dict->num_prefix] = data_size;
+ dict->prefix[dict->num_prefix] = data;
+ dict->num_prefix++;
+ return BROTLI_TRUE;
+ }
+ return BROTLI_FALSE;
+}
+
+BrotliSharedDictionary* BrotliSharedDictionaryCreateInstance(
+ brotli_alloc_func alloc_func, brotli_free_func free_func, void* opaque) {
+ BrotliSharedDictionary* dict = 0;
+ if (!alloc_func && !free_func) {
+ dict = (BrotliSharedDictionary*)malloc(sizeof(BrotliSharedDictionary));
+ } else if (alloc_func && free_func) {
+ dict = (BrotliSharedDictionary*)alloc_func(
+ opaque, sizeof(BrotliSharedDictionary));
+ }
+ if (dict == 0) {
+ return 0;
+ }
+
+ /* TODO(eustas): explicitly initialize all the fields? */
+ memset(dict, 0, sizeof(BrotliSharedDictionary));
+
+ dict->context_based = BROTLI_FALSE;
+ dict->num_dictionaries = 1;
+ dict->num_word_lists = 0;
+ dict->num_transform_lists = 0;
+
+ dict->words[0] = BrotliGetDictionary();
+ dict->transforms[0] = BrotliGetTransforms();
+
+ dict->alloc_func = alloc_func ? alloc_func : BrotliDefaultAllocFunc;
+ dict->free_func = free_func ? free_func : BrotliDefaultFreeFunc;
+ dict->memory_manager_opaque = opaque;
+
+ return dict;
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+} /* extern "C" */
+#endif