diff options
Diffstat (limited to '')
-rw-r--r-- | gfx/harfbuzz/src/hb-set-digest.hh | 213 |
1 files changed, 213 insertions, 0 deletions
diff --git a/gfx/harfbuzz/src/hb-set-digest.hh b/gfx/harfbuzz/src/hb-set-digest.hh new file mode 100644 index 0000000000..dab713729b --- /dev/null +++ b/gfx/harfbuzz/src/hb-set-digest.hh @@ -0,0 +1,213 @@ +/* + * Copyright © 2012 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Behdad Esfahbod + */ + +#ifndef HB_SET_DIGEST_HH +#define HB_SET_DIGEST_HH + +#include "hb.hh" +#include "hb-machinery.hh" + +/* + * The set-digests here implement various "filters" that support + * "approximate member query". Conceptually these are like Bloom + * Filter and Quotient Filter, however, much smaller, faster, and + * designed to fit the requirements of our uses for glyph coverage + * queries. + * + * Our filters are highly accurate if the lookup covers fairly local + * set of glyphs, but fully flooded and ineffective if coverage is + * all over the place. + * + * The way these are used is that the filter is first populated by + * a lookup's or subtable's Coverage table(s), and then when we + * want to apply the lookup or subtable to a glyph, before trying + * to apply, we ask the filter if the glyph may be covered. If it's + * not, we return early. We can also match a digest against another + * digest. + * + * We use these filters at three levels: + * - If the digest for all the glyphs in the buffer as a whole + * does not match the digest for the lookup, skip the lookup. + * - For each glyph, if it doesn't match the lookup digest, + * skip it. + * - For each glyph, if it doesn't match the subtable digest, + * skip it. + * + * The main filter we use is a combination of three bits-pattern + * filters. A bits-pattern filter checks a number of bits (5 or 6) + * of the input number (glyph-id in this case) and checks whether + * its pattern is amongst the patterns of any of the accepted values. + * The accepted patterns are represented as a "long" integer. The + * check is done using four bitwise operations only. + */ + +template <typename mask_t, unsigned int shift> +struct hb_set_digest_bits_pattern_t +{ + static constexpr unsigned mask_bytes = sizeof (mask_t); + static constexpr unsigned mask_bits = sizeof (mask_t) * 8; + static constexpr unsigned num_bits = 0 + + (mask_bytes >= 1 ? 3 : 0) + + (mask_bytes >= 2 ? 1 : 0) + + (mask_bytes >= 4 ? 1 : 0) + + (mask_bytes >= 8 ? 1 : 0) + + (mask_bytes >= 16? 1 : 0) + + 0; + + static_assert ((shift < sizeof (hb_codepoint_t) * 8), ""); + static_assert ((shift + num_bits <= sizeof (hb_codepoint_t) * 8), ""); + + void init () { mask = 0; } + + void add (const hb_set_digest_bits_pattern_t &o) { mask |= o.mask; } + + void add (hb_codepoint_t g) { mask |= mask_for (g); } + + bool add_range (hb_codepoint_t a, hb_codepoint_t b) + { + if ((b >> shift) - (a >> shift) >= mask_bits - 1) + mask = (mask_t) -1; + else { + mask_t ma = mask_for (a); + mask_t mb = mask_for (b); + mask |= mb + (mb - ma) - (mb < ma); + } + return true; + } + + template <typename T> + void add_array (const T *array, unsigned int count, unsigned int stride=sizeof(T)) + { + for (unsigned int i = 0; i < count; i++) + { + add (*array); + array = &StructAtOffsetUnaligned<T> ((const void *) array, stride); + } + } + template <typename T> + void add_array (const hb_array_t<const T>& arr) { add_array (&arr, arr.len ()); } + template <typename T> + bool add_sorted_array (const T *array, unsigned int count, unsigned int stride=sizeof(T)) + { + add_array (array, count, stride); + return true; + } + template <typename T> + bool add_sorted_array (const hb_sorted_array_t<const T>& arr) { return add_sorted_array (&arr, arr.len ()); } + + bool may_have (const hb_set_digest_bits_pattern_t &o) const + { return mask & o.mask; } + + bool may_have (hb_codepoint_t g) const + { return mask & mask_for (g); } + + private: + + static mask_t mask_for (hb_codepoint_t g) + { return ((mask_t) 1) << ((g >> shift) & (mask_bits - 1)); } + mask_t mask; +}; + +template <typename head_t, typename tail_t> +struct hb_set_digest_combiner_t +{ + void init () + { + head.init (); + tail.init (); + } + + void add (const hb_set_digest_combiner_t &o) + { + head.add (o.head); + tail.add (o.tail); + } + + void add (hb_codepoint_t g) + { + head.add (g); + tail.add (g); + } + + bool add_range (hb_codepoint_t a, hb_codepoint_t b) + { + return head.add_range (a, b) && + tail.add_range (a, b); + } + template <typename T> + void add_array (const T *array, unsigned int count, unsigned int stride=sizeof(T)) + { + head.add_array (array, count, stride); + tail.add_array (array, count, stride); + } + template <typename T> + void add_array (const hb_array_t<const T>& arr) { add_array (&arr, arr.len ()); } + template <typename T> + bool add_sorted_array (const T *array, unsigned int count, unsigned int stride=sizeof(T)) + { + return head.add_sorted_array (array, count, stride) && + tail.add_sorted_array (array, count, stride); + } + template <typename T> + bool add_sorted_array (const hb_sorted_array_t<const T>& arr) { return add_sorted_array (&arr, arr.len ()); } + + bool may_have (const hb_set_digest_combiner_t &o) const + { + return head.may_have (o.head) && tail.may_have (o.tail); + } + + bool may_have (hb_codepoint_t g) const + { + return head.may_have (g) && tail.may_have (g); + } + + private: + head_t head; + tail_t tail; +}; + + +/* + * hb_set_digest_t + * + * This is a combination of digests that performs "best". + * There is not much science to this: it's a result of intuition + * and testing. + */ +using hb_set_digest_t = + hb_set_digest_combiner_t + < + hb_set_digest_bits_pattern_t<unsigned long, 4>, + hb_set_digest_combiner_t + < + hb_set_digest_bits_pattern_t<unsigned long, 0>, + hb_set_digest_bits_pattern_t<unsigned long, 9> + > + > +; + + +#endif /* HB_SET_DIGEST_HH */ |