diff options
Diffstat (limited to 'vendor/pulldown-cmark/src/puncttable.rs')
-rw-r--r-- | vendor/pulldown-cmark/src/puncttable.rs | 351 |
1 files changed, 351 insertions, 0 deletions
diff --git a/vendor/pulldown-cmark/src/puncttable.rs b/vendor/pulldown-cmark/src/puncttable.rs new file mode 100644 index 000000000..5acdfbea7 --- /dev/null +++ b/vendor/pulldown-cmark/src/puncttable.rs @@ -0,0 +1,351 @@ +// Copyright 2015 Google Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +//! CommonMark punctuation set based on spec and Unicode properties. + +// Autogenerated by mk_puncttable.py + +const PUNCT_MASKS_ASCII: [u16; 8] = [ + 0x0000, // U+0000...U+000F + 0x0000, // U+0010...U+001F + 0xfffe, // U+0020...U+002F + 0xfc00, // U+0030...U+003F + 0x0001, // U+0040...U+004F + 0xf800, // U+0050...U+005F + 0x0001, // U+0060...U+006F + 0x7800, // U+0070...U+007F +]; + +const PUNCT_TAB: [u16; 132] = [ + 10, // U+00A0...U+00AF + 11, // U+00B0...U+00BF + 55, // U+0370...U+037F + 56, // U+0380...U+038F + 85, // U+0550...U+055F + 88, // U+0580...U+058F + 91, // U+05B0...U+05BF + 92, // U+05C0...U+05CF + 95, // U+05F0...U+05FF + 96, // U+0600...U+060F + 97, // U+0610...U+061F + 102, // U+0660...U+066F + 109, // U+06D0...U+06DF + 112, // U+0700...U+070F + 127, // U+07F0...U+07FF + 131, // U+0830...U+083F + 133, // U+0850...U+085F + 150, // U+0960...U+096F + 151, // U+0970...U+097F + 175, // U+0AF0...U+0AFF + 223, // U+0DF0...U+0DFF + 228, // U+0E40...U+0E4F + 229, // U+0E50...U+0E5F + 240, // U+0F00...U+0F0F + 241, // U+0F10...U+0F1F + 243, // U+0F30...U+0F3F + 248, // U+0F80...U+0F8F + 253, // U+0FD0...U+0FDF + 260, // U+1040...U+104F + 271, // U+10F0...U+10FF + 310, // U+1360...U+136F + 320, // U+1400...U+140F + 358, // U+1660...U+166F + 361, // U+1690...U+169F + 366, // U+16E0...U+16EF + 371, // U+1730...U+173F + 381, // U+17D0...U+17DF + 384, // U+1800...U+180F + 404, // U+1940...U+194F + 417, // U+1A10...U+1A1F + 426, // U+1AA0...U+1AAF + 437, // U+1B50...U+1B5F + 438, // U+1B60...U+1B6F + 447, // U+1BF0...U+1BFF + 451, // U+1C30...U+1C3F + 455, // U+1C70...U+1C7F + 460, // U+1CC0...U+1CCF + 461, // U+1CD0...U+1CDF + 513, // U+2010...U+201F + 514, // U+2020...U+202F + 515, // U+2030...U+203F + 516, // U+2040...U+204F + 517, // U+2050...U+205F + 519, // U+2070...U+207F + 520, // U+2080...U+208F + 560, // U+2300...U+230F + 562, // U+2320...U+232F + 630, // U+2760...U+276F + 631, // U+2770...U+277F + 636, // U+27C0...U+27CF + 638, // U+27E0...U+27EF + 664, // U+2980...U+298F + 665, // U+2990...U+299F + 669, // U+29D0...U+29DF + 671, // U+29F0...U+29FF + 719, // U+2CF0...U+2CFF + 727, // U+2D70...U+2D7F + 736, // U+2E00...U+2E0F + 737, // U+2E10...U+2E1F + 738, // U+2E20...U+2E2F + 739, // U+2E30...U+2E3F + 740, // U+2E40...U+2E4F + 768, // U+3000...U+300F + 769, // U+3010...U+301F + 771, // U+3030...U+303F + 778, // U+30A0...U+30AF + 783, // U+30F0...U+30FF + 2639, // U+A4F0...U+A4FF + 2656, // U+A600...U+A60F + 2663, // U+A670...U+A67F + 2671, // U+A6F0...U+A6FF + 2695, // U+A870...U+A87F + 2700, // U+A8C0...U+A8CF + 2703, // U+A8F0...U+A8FF + 2706, // U+A920...U+A92F + 2709, // U+A950...U+A95F + 2716, // U+A9C0...U+A9CF + 2717, // U+A9D0...U+A9DF + 2725, // U+AA50...U+AA5F + 2733, // U+AAD0...U+AADF + 2735, // U+AAF0...U+AAFF + 2750, // U+ABE0...U+ABEF + 4051, // U+FD30...U+FD3F + 4065, // U+FE10...U+FE1F + 4067, // U+FE30...U+FE3F + 4068, // U+FE40...U+FE4F + 4069, // U+FE50...U+FE5F + 4070, // U+FE60...U+FE6F + 4080, // U+FF00...U+FF0F + 4081, // U+FF10...U+FF1F + 4082, // U+FF20...U+FF2F + 4083, // U+FF30...U+FF3F + 4085, // U+FF50...U+FF5F + 4086, // U+FF60...U+FF6F + 4112, // U+10100...U+1010F + 4153, // U+10390...U+1039F + 4157, // U+103D0...U+103DF + 4182, // U+10560...U+1056F + 4229, // U+10850...U+1085F + 4241, // U+10910...U+1091F + 4243, // U+10930...U+1093F + 4261, // U+10A50...U+10A5F + 4263, // U+10A70...U+10A7F + 4271, // U+10AF0...U+10AFF + 4275, // U+10B30...U+10B3F + 4281, // U+10B90...U+10B9F + 4356, // U+11040...U+1104F + 4363, // U+110B0...U+110BF + 4364, // U+110C0...U+110CF + 4372, // U+11140...U+1114F + 4375, // U+11170...U+1117F + 4380, // U+111C0...U+111CF + 4387, // U+11230...U+1123F + 4428, // U+114C0...U+114CF + 4444, // U+115C0...U+115CF + 4452, // U+11640...U+1164F + 4679, // U+12470...U+1247F + 5798, // U+16A60...U+16A6F + 5807, // U+16AF0...U+16AFF + 5811, // U+16B30...U+16B3F + 5812, // U+16B40...U+16B4F + 7113, // U+1BC90...U+1BC9F +]; + +const PUNCT_MASKS: [u16; 132] = [ + 0x0882, // U+00A0...U+00AF + 0x88c0, // U+00B0...U+00BF + 0x4000, // U+0370...U+037F + 0x0080, // U+0380...U+038F + 0xfc00, // U+0550...U+055F + 0x0600, // U+0580...U+058F + 0x4000, // U+05B0...U+05BF + 0x0049, // U+05C0...U+05CF + 0x0018, // U+05F0...U+05FF + 0x3600, // U+0600...U+060F + 0xc800, // U+0610...U+061F + 0x3c00, // U+0660...U+066F + 0x0010, // U+06D0...U+06DF + 0x3fff, // U+0700...U+070F + 0x0380, // U+07F0...U+07FF + 0x7fff, // U+0830...U+083F + 0x4000, // U+0850...U+085F + 0x0030, // U+0960...U+096F + 0x0001, // U+0970...U+097F + 0x0001, // U+0AF0...U+0AFF + 0x0010, // U+0DF0...U+0DFF + 0x8000, // U+0E40...U+0E4F + 0x0c00, // U+0E50...U+0E5F + 0xfff0, // U+0F00...U+0F0F + 0x0017, // U+0F10...U+0F1F + 0x3c00, // U+0F30...U+0F3F + 0x0020, // U+0F80...U+0F8F + 0x061f, // U+0FD0...U+0FDF + 0xfc00, // U+1040...U+104F + 0x0800, // U+10F0...U+10FF + 0x01ff, // U+1360...U+136F + 0x0001, // U+1400...U+140F + 0x6000, // U+1660...U+166F + 0x1800, // U+1690...U+169F + 0x3800, // U+16E0...U+16EF + 0x0060, // U+1730...U+173F + 0x0770, // U+17D0...U+17DF + 0x07ff, // U+1800...U+180F + 0x0030, // U+1940...U+194F + 0xc000, // U+1A10...U+1A1F + 0x3f7f, // U+1AA0...U+1AAF + 0xfc00, // U+1B50...U+1B5F + 0x0001, // U+1B60...U+1B6F + 0xf000, // U+1BF0...U+1BFF + 0xf800, // U+1C30...U+1C3F + 0xc000, // U+1C70...U+1C7F + 0x00ff, // U+1CC0...U+1CCF + 0x0008, // U+1CD0...U+1CDF + 0xffff, // U+2010...U+201F + 0x00ff, // U+2020...U+202F + 0xffff, // U+2030...U+203F + 0xffef, // U+2040...U+204F + 0x7ffb, // U+2050...U+205F + 0x6000, // U+2070...U+207F + 0x6000, // U+2080...U+208F + 0x0f00, // U+2300...U+230F + 0x0600, // U+2320...U+232F + 0xff00, // U+2760...U+276F + 0x003f, // U+2770...U+277F + 0x0060, // U+27C0...U+27CF + 0xffc0, // U+27E0...U+27EF + 0xfff8, // U+2980...U+298F + 0x01ff, // U+2990...U+299F + 0x0f00, // U+29D0...U+29DF + 0x3000, // U+29F0...U+29FF + 0xde00, // U+2CF0...U+2CFF + 0x0001, // U+2D70...U+2D7F + 0xffff, // U+2E00...U+2E0F + 0xffff, // U+2E10...U+2E1F + 0x7fff, // U+2E20...U+2E2F + 0xffff, // U+2E30...U+2E3F + 0x0007, // U+2E40...U+2E4F + 0xff0e, // U+3000...U+300F + 0xfff3, // U+3010...U+301F + 0x2001, // U+3030...U+303F + 0x0001, // U+30A0...U+30AF + 0x0800, // U+30F0...U+30FF + 0xc000, // U+A4F0...U+A4FF + 0xe000, // U+A600...U+A60F + 0x4008, // U+A670...U+A67F + 0x00fc, // U+A6F0...U+A6FF + 0x00f0, // U+A870...U+A87F + 0xc000, // U+A8C0...U+A8CF + 0x0700, // U+A8F0...U+A8FF + 0xc000, // U+A920...U+A92F + 0x8000, // U+A950...U+A95F + 0x3ffe, // U+A9C0...U+A9CF + 0xc000, // U+A9D0...U+A9DF + 0xf000, // U+AA50...U+AA5F + 0xc000, // U+AAD0...U+AADF + 0x0003, // U+AAF0...U+AAFF + 0x0800, // U+ABE0...U+ABEF + 0xc000, // U+FD30...U+FD3F + 0x03ff, // U+FE10...U+FE1F + 0xffff, // U+FE30...U+FE3F + 0xffff, // U+FE40...U+FE4F + 0xfff7, // U+FE50...U+FE5F + 0x0d0b, // U+FE60...U+FE6F + 0xf7ee, // U+FF00...U+FF0F + 0x8c00, // U+FF10...U+FF1F + 0x0001, // U+FF20...U+FF2F + 0xb800, // U+FF30...U+FF3F + 0xa800, // U+FF50...U+FF5F + 0x003f, // U+FF60...U+FF6F + 0x0007, // U+10100...U+1010F + 0x8000, // U+10390...U+1039F + 0x0001, // U+103D0...U+103DF + 0x8000, // U+10560...U+1056F + 0x0080, // U+10850...U+1085F + 0x8000, // U+10910...U+1091F + 0x8000, // U+10930...U+1093F + 0x01ff, // U+10A50...U+10A5F + 0x8000, // U+10A70...U+10A7F + 0x007f, // U+10AF0...U+10AFF + 0xfe00, // U+10B30...U+10B3F + 0x1e00, // U+10B90...U+10B9F + 0x3f80, // U+11040...U+1104F + 0xd800, // U+110B0...U+110BF + 0x0003, // U+110C0...U+110CF + 0x000f, // U+11140...U+1114F + 0x0030, // U+11170...U+1117F + 0x21e0, // U+111C0...U+111CF + 0x3f00, // U+11230...U+1123F + 0x0040, // U+114C0...U+114CF + 0x03fe, // U+115C0...U+115CF + 0x000e, // U+11640...U+1164F + 0x001f, // U+12470...U+1247F + 0xc000, // U+16A60...U+16A6F + 0x0020, // U+16AF0...U+16AFF + 0x0f80, // U+16B30...U+16B3F + 0x0010, // U+16B40...U+16B4F + 0x8000, // U+1BC90...U+1BC9F +]; + +pub(crate) fn is_ascii_punctuation(c: u8) -> bool { + c < 128 && (PUNCT_MASKS_ASCII[(c / 16) as usize] & (1 << (c & 15))) != 0 +} + +pub(crate) fn is_punctuation(c: char) -> bool { + let cp = c as u32; + if cp < 128 { + return is_ascii_punctuation(cp as u8); + } + if cp > 0x1BC9F { + return false; + } + let high = (cp / 16) as u16; + match PUNCT_TAB.binary_search(&high) { + Ok(index) => (PUNCT_MASKS[index] & (1 << (cp & 15))) != 0, + _ => false, + } +} + +#[cfg(test)] +mod tests { + use super::{is_ascii_punctuation, is_punctuation}; + + #[test] + fn test_ascii() { + assert!(is_ascii_punctuation(b'!')); + assert!(is_ascii_punctuation(b'@')); + assert!(is_ascii_punctuation(b'~')); + assert!(!is_ascii_punctuation(b' ')); + assert!(!is_ascii_punctuation(b'0')); + assert!(!is_ascii_punctuation(b'A')); + assert!(!is_ascii_punctuation(0xA1)); + } + + #[test] + fn test_unicode() { + assert!(is_punctuation('~')); + assert!(!is_punctuation(' ')); + + assert!(is_punctuation('\u{00A1}')); + assert!(is_punctuation('\u{060C}')); + assert!(is_punctuation('\u{FF65}')); + assert!(is_punctuation('\u{1BC9F}')); + assert!(!is_punctuation('\u{1BCA0}')); + } +} |