summaryrefslogtreecommitdiffstats
path: root/web/server/h2o/libh2o/deps/brotli/enc/context.h
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--web/server/h2o/libh2o/deps/brotli/enc/context.h178
1 files changed, 178 insertions, 0 deletions
diff --git a/web/server/h2o/libh2o/deps/brotli/enc/context.h b/web/server/h2o/libh2o/deps/brotli/enc/context.h
new file mode 100644
index 00000000..00c065bc
--- /dev/null
+++ b/web/server/h2o/libh2o/deps/brotli/enc/context.h
@@ -0,0 +1,178 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+ Distributed under MIT license.
+ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+// Functions to map previous bytes into a context id.
+
+#ifndef BROTLI_ENC_CONTEXT_H_
+#define BROTLI_ENC_CONTEXT_H_
+
+#include "./types.h"
+
+namespace brotli {
+
+// Second-order context lookup table for UTF8 byte streams.
+//
+// If p1 and p2 are the previous two bytes, we calculate the context as
+//
+// context = kUTF8ContextLookup[p1] | kUTF8ContextLookup[p2 + 256].
+//
+// If the previous two bytes are ASCII characters (i.e. < 128), this will be
+// equivalent to
+//
+// context = 4 * context1(p1) + context2(p2),
+//
+// where context1 is based on the previous byte in the following way:
+//
+// 0 : non-ASCII control
+// 1 : \t, \n, \r
+// 2 : space
+// 3 : other punctuation
+// 4 : " '
+// 5 : %
+// 6 : ( < [ {
+// 7 : ) > ] }
+// 8 : , ; :
+// 9 : .
+// 10 : =
+// 11 : number
+// 12 : upper-case vowel
+// 13 : upper-case consonant
+// 14 : lower-case vowel
+// 15 : lower-case consonant
+//
+// and context2 is based on the second last byte:
+//
+// 0 : control, space
+// 1 : punctuation
+// 2 : upper-case letter, number
+// 3 : lower-case letter
+//
+// If the last byte is ASCII, and the second last byte is not (in a valid UTF8
+// stream it will be a continuation byte, value between 128 and 191), the
+// context is the same as if the second last byte was an ASCII control or space.
+//
+// If the last byte is a UTF8 lead byte (value >= 192), then the next byte will
+// be a continuation byte and the context id is 2 or 3 depending on the LSB of
+// the last byte and to a lesser extent on the second last byte if it is ASCII.
+//
+// If the last byte is a UTF8 continuation byte, the second last byte can be:
+// - continuation byte: the next byte is probably ASCII or lead byte (assuming
+// 4-byte UTF8 characters are rare) and the context id is 0 or 1.
+// - lead byte (192 - 207): next byte is ASCII or lead byte, context is 0 or 1
+// - lead byte (208 - 255): next byte is continuation byte, context is 2 or 3
+//
+// The possible value combinations of the previous two bytes, the range of
+// context ids and the type of the next byte is summarized in the table below:
+//
+// |--------\-----------------------------------------------------------------|
+// | \ Last byte |
+// | Second \---------------------------------------------------------------|
+// | last byte \ ASCII | cont. byte | lead byte |
+// | \ (0-127) | (128-191) | (192-) |
+// |=============|===================|=====================|==================|
+// | ASCII | next: ASCII/lead | not valid | next: cont. |
+// | (0-127) | context: 4 - 63 | | context: 2 - 3 |
+// |-------------|-------------------|---------------------|------------------|
+// | cont. byte | next: ASCII/lead | next: ASCII/lead | next: cont. |
+// | (128-191) | context: 4 - 63 | context: 0 - 1 | context: 2 - 3 |
+// |-------------|-------------------|---------------------|------------------|
+// | lead byte | not valid | next: ASCII/lead | not valid |
+// | (192-207) | | context: 0 - 1 | |
+// |-------------|-------------------|---------------------|------------------|
+// | lead byte | not valid | next: cont. | not valid |
+// | (208-) | | context: 2 - 3 | |
+// |-------------|-------------------|---------------------|------------------|
+static const uint8_t kUTF8ContextLookup[512] = {
+ // Last byte.
+ //
+ // ASCII range.
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 0, 0, 4, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 8, 12, 16, 12, 12, 20, 12, 16, 24, 28, 12, 12, 32, 12, 36, 12,
+ 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 32, 32, 24, 40, 28, 12,
+ 12, 48, 52, 52, 52, 48, 52, 52, 52, 48, 52, 52, 52, 52, 52, 48,
+ 52, 52, 52, 52, 52, 48, 52, 52, 52, 52, 52, 24, 12, 28, 12, 12,
+ 12, 56, 60, 60, 60, 56, 60, 60, 60, 56, 60, 60, 60, 60, 60, 56,
+ 60, 60, 60, 60, 60, 56, 60, 60, 60, 60, 60, 24, 12, 28, 12, 0,
+ // UTF8 continuation byte range.
+ 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
+ 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
+ 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
+ 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
+ // UTF8 lead byte range.
+ 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
+ 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
+ 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
+ 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
+ // Second last byte.
+ //
+ // ASCII range.
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1,
+ 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1,
+ 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 0,
+ // UTF8 continuation byte range.
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ // UTF8 lead byte range.
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+};
+
+// Context lookup table for small signed integers.
+static const uint8_t kSigned3BitContextLookup[] = {
+ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7,
+};
+
+enum ContextType {
+ CONTEXT_LSB6 = 0,
+ CONTEXT_MSB6 = 1,
+ CONTEXT_UTF8 = 2,
+ CONTEXT_SIGNED = 3
+};
+
+static inline uint8_t Context(uint8_t p1, uint8_t p2, ContextType mode) {
+ switch (mode) {
+ case CONTEXT_LSB6:
+ return p1 & 0x3f;
+ case CONTEXT_MSB6:
+ return static_cast<uint8_t>(p1 >> 2);
+ case CONTEXT_UTF8:
+ return kUTF8ContextLookup[p1] | kUTF8ContextLookup[p2 + 256];
+ case CONTEXT_SIGNED:
+ return static_cast<uint8_t>((kSigned3BitContextLookup[p1] << 3) +
+ kSigned3BitContextLookup[p2]);
+ default:
+ return 0;
+ }
+}
+
+} // namespace brotli
+
+#endif // BROTLI_ENC_CONTEXT_H_