diff options
Diffstat (limited to '')
-rw-r--r-- | gfx/harfbuzz/src/hb-ot-shape-complex-use-machine.rl | 208 |
1 files changed, 208 insertions, 0 deletions
diff --git a/gfx/harfbuzz/src/hb-ot-shape-complex-use-machine.rl b/gfx/harfbuzz/src/hb-ot-shape-complex-use-machine.rl new file mode 100644 index 0000000000..364ba0d1bb --- /dev/null +++ b/gfx/harfbuzz/src/hb-ot-shape-complex-use-machine.rl @@ -0,0 +1,208 @@ +/* + * Copyright © 2015 Mozilla Foundation. + * Copyright © 2015 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Mozilla Author(s): Jonathan Kew + * Google Author(s): Behdad Esfahbod + */ + +#ifndef HB_OT_SHAPE_COMPLEX_USE_MACHINE_HH +#define HB_OT_SHAPE_COMPLEX_USE_MACHINE_HH + +#include "hb.hh" +#include "hb-ot-shape-complex-machine-index.hh" + +%%{ + machine use_syllable_machine; + alphtype unsigned char; + write data; +}%% + +%%{ + +# Same order as enum use_category_t. Not sure how to avoid duplication. + +O = 0; # OTHER + +B = 1; # BASE +N = 4; # BASE_NUM +GB = 5; # BASE_OTHER +SUB = 11; # CONS_SUB +H = 12; # HALANT + +HN = 13; # HALANT_NUM +ZWNJ = 14; # Zero width non-joiner +R = 18; # REPHA +S = 19; # SYM +CS = 43; # CONS_WITH_STACKER +HVM = 44; # HALANT_OR_VOWEL_MODIFIER +Sk = 48; # SAKOT +G = 49; # HIEROGLYPH +J = 50; # HIEROGLYPH_JOINER +SB = 51; # HIEROGLYPH_SEGMENT_BEGIN +SE = 52; # HIEROGLYPH_SEGMENT_END + +FAbv = 24; # CONS_FINAL_ABOVE +FBlw = 25; # CONS_FINAL_BELOW +FPst = 26; # CONS_FINAL_POST +MAbv = 27; # CONS_MED_ABOVE +MBlw = 28; # CONS_MED_BELOW +MPst = 29; # CONS_MED_POST +MPre = 30; # CONS_MED_PRE +CMAbv = 31; # CONS_MOD_ABOVE +CMBlw = 32; # CONS_MOD_BELOW +VAbv = 33; # VOWEL_ABOVE / VOWEL_ABOVE_BELOW / VOWEL_ABOVE_BELOW_POST / VOWEL_ABOVE_POST +VBlw = 34; # VOWEL_BELOW / VOWEL_BELOW_POST +VPst = 35; # VOWEL_POST UIPC = Right +VPre = 22; # VOWEL_PRE / VOWEL_PRE_ABOVE / VOWEL_PRE_ABOVE_POST / VOWEL_PRE_POST +VMAbv = 37; # VOWEL_MOD_ABOVE +VMBlw = 38; # VOWEL_MOD_BELOW +VMPst = 39; # VOWEL_MOD_POST +VMPre = 23; # VOWEL_MOD_PRE +SMAbv = 41; # SYM_MOD_ABOVE +SMBlw = 42; # SYM_MOD_BELOW +FMAbv = 45; # CONS_FINAL_MOD UIPC = Top +FMBlw = 46; # CONS_FINAL_MOD UIPC = Bottom +FMPst = 47; # CONS_FINAL_MOD UIPC = Not_Applicable + +h = H | HVM | Sk; + +consonant_modifiers = CMAbv* CMBlw* ((h B | SUB) CMAbv? CMBlw*)*; +medial_consonants = MPre? MAbv? MBlw? MPst?; +dependent_vowels = VPre* VAbv* VBlw* VPst*; +vowel_modifiers = HVM? VMPre* VMAbv* VMBlw* VMPst*; +final_consonants = FAbv* FBlw* FPst*; +final_modifiers = FMAbv* FMBlw* | FMPst?; + +complex_syllable_start = (R | CS)? (B | GB); +complex_syllable_middle = + consonant_modifiers + medial_consonants + dependent_vowels + vowel_modifiers + (Sk B)* +; +complex_syllable_tail = + complex_syllable_middle + final_consonants + final_modifiers +; +number_joiner_terminated_cluster_tail = (HN N)* HN; +numeral_cluster_tail = (HN N)+; +symbol_cluster_tail = SMAbv+ SMBlw* | SMBlw+; + +virama_terminated_cluster = + complex_syllable_start + consonant_modifiers + h +; +sakot_terminated_cluster = + complex_syllable_start + complex_syllable_middle + Sk +; +standard_cluster = + complex_syllable_start + complex_syllable_tail +; +broken_cluster = + R? + (complex_syllable_tail | number_joiner_terminated_cluster_tail | numeral_cluster_tail | symbol_cluster_tail) +; + +number_joiner_terminated_cluster = N number_joiner_terminated_cluster_tail; +numeral_cluster = N numeral_cluster_tail?; +symbol_cluster = (S | GB) symbol_cluster_tail?; +hieroglyph_cluster = SB+ | SB* G SE* (J SE* (G SE*)?)*; +independent_cluster = O; +other = any; + +main := |* + independent_cluster => { found_syllable (independent_cluster); }; + virama_terminated_cluster => { found_syllable (virama_terminated_cluster); }; + sakot_terminated_cluster => { found_syllable (sakot_terminated_cluster); }; + standard_cluster => { found_syllable (standard_cluster); }; + number_joiner_terminated_cluster => { found_syllable (number_joiner_terminated_cluster); }; + numeral_cluster => { found_syllable (numeral_cluster); }; + symbol_cluster => { found_syllable (symbol_cluster); }; + hieroglyph_cluster => { found_syllable (hieroglyph_cluster); }; + broken_cluster => { found_syllable (broken_cluster); }; + other => { found_syllable (non_cluster); }; +*|; + + +}%% + +#define found_syllable(syllable_type) \ + HB_STMT_START { \ + if (0) fprintf (stderr, "syllable %d..%d %s\n", (*ts).second.first, (*te).second.first, #syllable_type); \ + for (unsigned i = (*ts).second.first; i < (*te).second.first; ++i) \ + info[i].syllable() = (syllable_serial << 4) | use_##syllable_type; \ + syllable_serial++; \ + if (unlikely (syllable_serial == 16)) syllable_serial = 1; \ + } HB_STMT_END + +static bool +not_standard_default_ignorable (const hb_glyph_info_t &i) +{ return !(i.use_category() == USE_O && _hb_glyph_info_is_default_ignorable (&i)); } + +static void +find_syllables_use (hb_buffer_t *buffer) +{ + hb_glyph_info_t *info = buffer->info; + auto p = + + hb_iter (info, buffer->len) + | hb_enumerate + | hb_filter ([] (const hb_glyph_info_t &i) { return not_standard_default_ignorable (i); }, + hb_second) + | hb_filter ([&] (const hb_pair_t<unsigned, const hb_glyph_info_t &> p) + { + if (p.second.use_category() == USE_ZWNJ) + for (unsigned i = p.first + 1; i < buffer->len; ++i) + if (not_standard_default_ignorable (info[i])) + return !_hb_glyph_info_is_unicode_mark (&info[i]); + return true; + }) + | hb_enumerate + | machine_index + ; + auto pe = p + p.len (); + auto eof = +pe; + auto ts = +p; + auto te = +p; + unsigned int act HB_UNUSED; + int cs; + %%{ + write init; + getkey (*p).second.second.use_category(); + }%% + + unsigned int syllable_serial = 1; + %%{ + write exec; + }%% +} + +#undef found_syllable + +#endif /* HB_OT_SHAPE_COMPLEX_USE_MACHINE_HH */ |