From 58daab21cd043e1dc37024a7f99b396788372918 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sat, 9 Mar 2024 14:19:48 +0100 Subject: Merging upstream version 1.44.3. Signed-off-by: Daniel Baumann --- web/server/h2o/libh2o/deps/brotli/enc/context.h | 178 ++++++++++++++++++++++++ 1 file changed, 178 insertions(+) create mode 100644 web/server/h2o/libh2o/deps/brotli/enc/context.h (limited to 'web/server/h2o/libh2o/deps/brotli/enc/context.h') diff --git a/web/server/h2o/libh2o/deps/brotli/enc/context.h b/web/server/h2o/libh2o/deps/brotli/enc/context.h new file mode 100644 index 000000000..00c065bcc --- /dev/null +++ b/web/server/h2o/libh2o/deps/brotli/enc/context.h @@ -0,0 +1,178 @@ +/* Copyright 2013 Google Inc. All Rights Reserved. + + Distributed under MIT license. + See file LICENSE for detail or copy at https://opensource.org/licenses/MIT +*/ + +// Functions to map previous bytes into a context id. + +#ifndef BROTLI_ENC_CONTEXT_H_ +#define BROTLI_ENC_CONTEXT_H_ + +#include "./types.h" + +namespace brotli { + +// Second-order context lookup table for UTF8 byte streams. +// +// If p1 and p2 are the previous two bytes, we calculate the context as +// +// context = kUTF8ContextLookup[p1] | kUTF8ContextLookup[p2 + 256]. +// +// If the previous two bytes are ASCII characters (i.e. < 128), this will be +// equivalent to +// +// context = 4 * context1(p1) + context2(p2), +// +// where context1 is based on the previous byte in the following way: +// +// 0 : non-ASCII control +// 1 : \t, \n, \r +// 2 : space +// 3 : other punctuation +// 4 : " ' +// 5 : % +// 6 : ( < [ { +// 7 : ) > ] } +// 8 : , ; : +// 9 : . +// 10 : = +// 11 : number +// 12 : upper-case vowel +// 13 : upper-case consonant +// 14 : lower-case vowel +// 15 : lower-case consonant +// +// and context2 is based on the second last byte: +// +// 0 : control, space +// 1 : punctuation +// 2 : upper-case letter, number +// 3 : lower-case letter +// +// If the last byte is ASCII, and the second last byte is not (in a valid UTF8 +// stream it will be a continuation byte, value between 128 and 191), the +// context is the same as if the second last byte was an ASCII control or space. +// +// If the last byte is a UTF8 lead byte (value >= 192), then the next byte will +// be a continuation byte and the context id is 2 or 3 depending on the LSB of +// the last byte and to a lesser extent on the second last byte if it is ASCII. +// +// If the last byte is a UTF8 continuation byte, the second last byte can be: +// - continuation byte: the next byte is probably ASCII or lead byte (assuming +// 4-byte UTF8 characters are rare) and the context id is 0 or 1. +// - lead byte (192 - 207): next byte is ASCII or lead byte, context is 0 or 1 +// - lead byte (208 - 255): next byte is continuation byte, context is 2 or 3 +// +// The possible value combinations of the previous two bytes, the range of +// context ids and the type of the next byte is summarized in the table below: +// +// |--------\-----------------------------------------------------------------| +// | \ Last byte | +// | Second \---------------------------------------------------------------| +// | last byte \ ASCII | cont. byte | lead byte | +// | \ (0-127) | (128-191) | (192-) | +// |=============|===================|=====================|==================| +// | ASCII | next: ASCII/lead | not valid | next: cont. | +// | (0-127) | context: 4 - 63 | | context: 2 - 3 | +// |-------------|-------------------|---------------------|------------------| +// | cont. byte | next: ASCII/lead | next: ASCII/lead | next: cont. | +// | (128-191) | context: 4 - 63 | context: 0 - 1 | context: 2 - 3 | +// |-------------|-------------------|---------------------|------------------| +// | lead byte | not valid | next: ASCII/lead | not valid | +// | (192-207) | | context: 0 - 1 | | +// |-------------|-------------------|---------------------|------------------| +// | lead byte | not valid | next: cont. | not valid | +// | (208-) | | context: 2 - 3 | | +// |-------------|-------------------|---------------------|------------------| +static const uint8_t kUTF8ContextLookup[512] = { + // Last byte. + // + // ASCII range. + 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 0, 0, 4, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 8, 12, 16, 12, 12, 20, 12, 16, 24, 28, 12, 12, 32, 12, 36, 12, + 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 32, 32, 24, 40, 28, 12, + 12, 48, 52, 52, 52, 48, 52, 52, 52, 48, 52, 52, 52, 52, 52, 48, + 52, 52, 52, 52, 52, 48, 52, 52, 52, 52, 52, 24, 12, 28, 12, 12, + 12, 56, 60, 60, 60, 56, 60, 60, 60, 56, 60, 60, 60, 60, 60, 56, + 60, 60, 60, 60, 60, 56, 60, 60, 60, 60, 60, 24, 12, 28, 12, 0, + // UTF8 continuation byte range. + 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, + 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, + 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, + 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, + // UTF8 lead byte range. + 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, + 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, + 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, + 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, + // Second last byte. + // + // ASCII range. + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, + 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, + 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 0, + // UTF8 continuation byte range. + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + // UTF8 lead byte range. + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, +}; + +// Context lookup table for small signed integers. +static const uint8_t kSigned3BitContextLookup[] = { + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, +}; + +enum ContextType { + CONTEXT_LSB6 = 0, + CONTEXT_MSB6 = 1, + CONTEXT_UTF8 = 2, + CONTEXT_SIGNED = 3 +}; + +static inline uint8_t Context(uint8_t p1, uint8_t p2, ContextType mode) { + switch (mode) { + case CONTEXT_LSB6: + return p1 & 0x3f; + case CONTEXT_MSB6: + return static_cast(p1 >> 2); + case CONTEXT_UTF8: + return kUTF8ContextLookup[p1] | kUTF8ContextLookup[p2 + 256]; + case CONTEXT_SIGNED: + return static_cast((kSigned3BitContextLookup[p1] << 3) + + kSigned3BitContextLookup[p2]); + default: + return 0; + } +} + +} // namespace brotli + +#endif // BROTLI_ENC_CONTEXT_H_ -- cgit v1.2.3