summaryrefslogtreecommitdiffstats
path: root/vendor/unicode-segmentation/src
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-30 03:57:31 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-30 03:57:31 +0000
commitdc0db358abe19481e475e10c32149b53370f1a1c (patch)
treeab8ce99c4b255ce46f99ef402c27916055b899ee /vendor/unicode-segmentation/src
parentReleasing progress-linux version 1.71.1+dfsg1-2~progress7.99u1. (diff)
downloadrustc-dc0db358abe19481e475e10c32149b53370f1a1c.tar.xz
rustc-dc0db358abe19481e475e10c32149b53370f1a1c.zip
Merging upstream version 1.72.1+dfsg1.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'vendor/unicode-segmentation/src')
-rw-r--r--vendor/unicode-segmentation/src/tables.rs346
1 files changed, 330 insertions, 16 deletions
diff --git a/vendor/unicode-segmentation/src/tables.rs b/vendor/unicode-segmentation/src/tables.rs
index 5a811c922..ca83b503a 100644
--- a/vendor/unicode-segmentation/src/tables.rs
+++ b/vendor/unicode-segmentation/src/tables.rs
@@ -365,7 +365,7 @@ pub mod grapheme {
GC_ZWJ,
}
- fn bsearch_range_value_table(c: char, r: &'static [(char, char, GraphemeCat)]) -> (u32, u32, GraphemeCat) {
+ fn bsearch_range_value_table(c: char, r: &'static [(char, char, GraphemeCat)], default_lower: u32, default_upper: u32) -> (u32, u32, GraphemeCat) {
use core::cmp::Ordering::{Equal, Less, Greater};
match r.binary_search_by(|&(lo, hi, _)| {
if lo <= c && c <= hi { Equal }
@@ -378,8 +378,8 @@ pub mod grapheme {
}
Err(idx) => {
(
- if idx > 0 { r[idx-1].1 as u32 + 1 } else { 0 },
- r.get(idx).map(|c|c.0 as u32 - 1).unwrap_or(core::u32::MAX),
+ if idx > 0 { r[idx-1].1 as u32 + 1 } else { default_lower },
+ r.get(idx).map(|c|c.0 as u32 - 1).unwrap_or(default_upper),
GC_Any,
)
}
@@ -387,9 +387,93 @@ pub mod grapheme {
}
pub fn grapheme_category(c: char) -> (u32, u32, GraphemeCat) {
- bsearch_range_value_table(c, grapheme_cat_table)
+ // Perform a quick O(1) lookup in a precomputed table to determine
+ // the slice of the range table to search in.
+ let lookup_interval = 0x80;
+ let idx = (c as u32 / lookup_interval) as usize;
+ let range = grapheme_cat_lookup.get(idx..(idx + 2)).map_or(
+ // If the `idx` is outside of the precomputed table - use the slice
+ // starting from the last covered index in the precomputed table and
+ // ending with the length of the range table.
+ 1443..1449,
+ |r| (r[0] as usize)..((r[1] + 1) as usize)
+ );
+
+ // Compute pessimistic default lower and upper bounds on the category.
+ // If character doesn't map to any range and there is no adjacent range
+ // in the table slice - these bounds has to apply.
+ let lower = idx as u32 * lookup_interval;
+ let upper = lower + lookup_interval - 1;
+ bsearch_range_value_table(c, &grapheme_cat_table[range], lower, upper)
}
+ const grapheme_cat_lookup: &'static [u16] = &[
+ 0, 5, 9, 9, 9, 9, 9, 10, 10, 10, 11, 11, 16, 21, 26, 29, 32, 37, 41, 53, 65, 75, 86, 97,
+ 106, 116, 131, 143, 153, 157, 161, 168, 173, 183, 188, 189, 191, 191, 191, 192, 192, 192,
+ 192, 192, 192, 192, 192, 198, 206, 209, 211, 219, 219, 232, 233, 242, 258, 262, 270, 270,
+ 271, 271, 271, 271, 271, 279, 280, 282, 284, 284, 284, 286, 290, 290, 291, 291, 295, 297,
+ 298, 313, 317, 317, 317, 318, 318, 318, 318, 322, 322, 322, 323, 324, 325, 325, 325, 325,
+ 325, 328, 329, 329, 329, 329, 331, 331, 331, 331, 331, 331, 331, 331, 331, 331, 331, 331,
+ 331, 331, 331, 331, 331, 331, 331, 331, 331, 331, 331, 331, 331, 331, 331, 331, 331, 331,
+ 331, 331, 331, 331, 331, 331, 331, 331, 331, 331, 331, 331, 331, 331, 331, 331, 331, 331,
+ 331, 331, 331, 331, 331, 331, 331, 331, 331, 331, 331, 331, 331, 331, 331, 331, 331, 331,
+ 331, 331, 331, 331, 331, 331, 331, 331, 331, 331, 331, 331, 331, 331, 331, 331, 331, 331,
+ 331, 331, 331, 331, 331, 331, 331, 331, 331, 331, 331, 331, 331, 331, 331, 331, 331, 331,
+ 331, 331, 331, 331, 331, 331, 331, 331, 331, 331, 331, 331, 331, 331, 331, 331, 331, 331,
+ 331, 331, 331, 331, 331, 331, 331, 331, 331, 331, 331, 331, 331, 331, 331, 331, 331, 331,
+ 331, 331, 331, 331, 331, 331, 331, 331, 331, 331, 331, 331, 331, 331, 331, 331, 331, 331,
+ 331, 331, 331, 331, 331, 331, 331, 331, 331, 331, 331, 331, 331, 331, 331, 331, 331, 331,
+ 331, 331, 331, 331, 331, 331, 331, 331, 331, 331, 331, 331, 331, 331, 331, 331, 331, 331,
+ 331, 331, 331, 331, 331, 331, 331, 331, 331, 331, 331, 331, 331, 331, 331, 331, 331, 331,
+ 331, 331, 331, 331, 331, 331, 331, 331, 331, 331, 331, 331, 331, 331, 331, 331, 331, 331,
+ 331, 331, 331, 333, 335, 335, 335, 342, 347, 351, 360, 369, 379, 379, 386, 395, 405, 413,
+ 423, 431, 441, 450, 459, 469, 477, 487, 495, 505, 514, 523, 533, 541, 551, 559, 569, 578,
+ 587, 597, 605, 615, 623, 633, 642, 651, 661, 669, 679, 687, 697, 706, 715, 725, 733, 743,
+ 751, 761, 770, 779, 789, 797, 807, 815, 825, 834, 843, 853, 861, 871, 879, 889, 898, 907,
+ 917, 925, 935, 943, 953, 962, 971, 981, 989, 999, 1007, 1017, 1026, 1035, 1045, 1053, 1063,
+ 1071, 1081, 1090, 1099, 1109, 1117, 1127, 1135, 1145, 1154, 1163, 1173, 1181, 1186, 1186,
+ 1186, 1186, 1186, 1186, 1186, 1186, 1186, 1186, 1186, 1186, 1186, 1186, 1186, 1186, 1186,
+ 1186, 1186, 1186, 1186, 1186, 1186, 1186, 1186, 1186, 1186, 1186, 1186, 1186, 1186, 1186,
+ 1186, 1186, 1186, 1186, 1186, 1186, 1186, 1186, 1186, 1186, 1186, 1186, 1186, 1186, 1186,
+ 1186, 1186, 1186, 1186, 1186, 1186, 1186, 1186, 1186, 1186, 1186, 1186, 1186, 1186, 1186,
+ 1186, 1186, 1186, 1186, 1186, 1186, 1186, 1186, 1186, 1187, 1187, 1187, 1187, 1187, 1187,
+ 1189, 1190, 1190, 1192, 1192, 1192, 1192, 1193, 1193, 1194, 1195, 1195, 1195, 1195, 1195,
+ 1195, 1195, 1195, 1195, 1195, 1195, 1195, 1195, 1195, 1200, 1201, 1201, 1201, 1201, 1201,
+ 1202, 1202, 1202, 1204, 1205, 1206, 1212, 1221, 1227, 1236, 1244, 1247, 1260, 1260, 1267,
+ 1278, 1278, 1286, 1292, 1299, 1303, 1303, 1307, 1307, 1318, 1324, 1333, 1337, 1337, 1337,
+ 1342, 1349, 1355, 1361, 1361, 1363, 1372, 1372, 1372, 1372, 1372, 1372, 1372, 1372, 1372,
+ 1372, 1372, 1372, 1372, 1372, 1372, 1372, 1372, 1372, 1372, 1372, 1372, 1372, 1372, 1372,
+ 1372, 1372, 1372, 1372, 1372, 1372, 1372, 1372, 1372, 1372, 1372, 1372, 1372, 1372, 1372,
+ 1372, 1372, 1372, 1375, 1375, 1375, 1375, 1375, 1375, 1375, 1375, 1375, 1375, 1375, 1375,
+ 1375, 1375, 1375, 1375, 1375, 1375, 1375, 1375, 1375, 1375, 1375, 1375, 1375, 1375, 1375,
+ 1375, 1375, 1375, 1375, 1375, 1375, 1375, 1375, 1375, 1375, 1375, 1375, 1375, 1375, 1375,
+ 1375, 1375, 1375, 1375, 1375, 1375, 1375, 1375, 1375, 1375, 1375, 1375, 1375, 1375, 1375,
+ 1375, 1375, 1375, 1375, 1375, 1375, 1375, 1375, 1375, 1375, 1375, 1375, 1375, 1375, 1375,
+ 1375, 1375, 1375, 1375, 1375, 1375, 1375, 1375, 1375, 1375, 1375, 1375, 1375, 1375, 1375,
+ 1375, 1375, 1375, 1375, 1375, 1375, 1375, 1375, 1375, 1375, 1375, 1375, 1375, 1375, 1375,
+ 1375, 1375, 1375, 1375, 1375, 1375, 1375, 1376, 1377, 1377, 1377, 1377, 1377, 1377, 1377,
+ 1377, 1378, 1382, 1382, 1382, 1382, 1382, 1382, 1382, 1382, 1382, 1382, 1382, 1382, 1382,
+ 1382, 1382, 1382, 1382, 1382, 1382, 1382, 1382, 1382, 1382, 1382, 1382, 1382, 1382, 1382,
+ 1382, 1382, 1382, 1382, 1382, 1382, 1382, 1382, 1382, 1382, 1382, 1382, 1382, 1382, 1382,
+ 1382, 1382, 1382, 1382, 1382, 1382, 1382, 1382, 1382, 1382, 1382, 1382, 1382, 1382, 1382,
+ 1382, 1382, 1382, 1382, 1382, 1382, 1382, 1382, 1382, 1382, 1382, 1382, 1382, 1382, 1382,
+ 1382, 1382, 1382, 1382, 1382, 1382, 1382, 1382, 1382, 1382, 1382, 1382, 1382, 1382, 1382,
+ 1382, 1382, 1382, 1382, 1382, 1382, 1382, 1382, 1382, 1382, 1382, 1382, 1382, 1382, 1382,
+ 1382, 1382, 1382, 1382, 1382, 1382, 1382, 1382, 1382, 1382, 1382, 1382, 1382, 1382, 1382,
+ 1382, 1382, 1382, 1382, 1382, 1382, 1382, 1382, 1382, 1382, 1382, 1382, 1382, 1382, 1382,
+ 1382, 1382, 1382, 1382, 1382, 1382, 1382, 1382, 1382, 1382, 1382, 1382, 1382, 1382, 1382,
+ 1382, 1382, 1382, 1382, 1382, 1382, 1384, 1384, 1384, 1384, 1384, 1384, 1384, 1384, 1384,
+ 1384, 1384, 1384, 1384, 1384, 1384, 1384, 1384, 1384, 1384, 1384, 1384, 1384, 1384, 1384,
+ 1384, 1384, 1384, 1384, 1384, 1384, 1384, 1384, 1384, 1384, 1384, 1384, 1384, 1386, 1386,
+ 1386, 1386, 1392, 1395, 1396, 1396, 1396, 1396, 1396, 1396, 1396, 1396, 1396, 1396, 1396,
+ 1396, 1396, 1396, 1396, 1396, 1399, 1402, 1402, 1402, 1402, 1402, 1402, 1402, 1402, 1402,
+ 1402, 1402, 1407, 1408, 1409, 1409, 1409, 1411, 1411, 1411, 1411, 1412, 1412, 1412, 1412,
+ 1412, 1412, 1412, 1412, 1413, 1414, 1414, 1414, 1414, 1414, 1414, 1414, 1414, 1414, 1414,
+ 1414, 1414, 1414, 1414, 1414, 1415, 1419, 1423, 1428, 1428, 1428, 1430, 1430, 1430, 1431,
+ 1431, 1432, 1433, 1434, 1435, 1438, 1440, 1442, 1442, 1442, 1443, 1443, 1443, 1443, 1443,
+ 1443, 1443, 1443, 1443, 1443
+ ];
+
const grapheme_cat_table: &'static [(char, char, GraphemeCat)] = &[
('\u{0}', '\u{9}', GC_Control), ('\u{a}', '\u{a}', GC_LF), ('\u{b}', '\u{c}', GC_Control),
('\u{d}', '\u{d}', GC_CR), ('\u{e}', '\u{1f}', GC_Control), ('\u{7f}', '\u{9f}',
@@ -1028,7 +1112,7 @@ pub mod word {
WC_ZWJ,
}
- fn bsearch_range_value_table(c: char, r: &'static [(char, char, WordCat)]) -> (u32, u32, WordCat) {
+ fn bsearch_range_value_table(c: char, r: &'static [(char, char, WordCat)], default_lower: u32, default_upper: u32) -> (u32, u32, WordCat) {
use core::cmp::Ordering::{Equal, Less, Greater};
match r.binary_search_by(|&(lo, hi, _)| {
if lo <= c && c <= hi { Equal }
@@ -1041,8 +1125,8 @@ pub mod word {
}
Err(idx) => {
(
- if idx > 0 { r[idx-1].1 as u32 + 1 } else { 0 },
- r.get(idx).map(|c|c.0 as u32 - 1).unwrap_or(core::u32::MAX),
+ if idx > 0 { r[idx-1].1 as u32 + 1 } else { default_lower },
+ r.get(idx).map(|c|c.0 as u32 - 1).unwrap_or(default_upper),
WC_Any,
)
}
@@ -1050,9 +1134,87 @@ pub mod word {
}
pub fn word_category(c: char) -> (u32, u32, WordCat) {
- bsearch_range_value_table(c, word_cat_table)
+ // Perform a quick O(1) lookup in a precomputed table to determine
+ // the slice of the range table to search in.
+ let lookup_interval = 0x80;
+ let idx = (c as u32 / lookup_interval) as usize;
+ let range = word_cat_lookup.get(idx..(idx + 2)).map_or(
+ // If the `idx` is outside of the precomputed table - use the slice
+ // starting from the last covered index in the precomputed table and
+ // ending with the length of the range table.
+ 1050..1053,
+ |r| (r[0] as usize)..((r[1] + 1) as usize)
+ );
+
+ // Compute pessimistic default lower and upper bounds on the category.
+ // If character doesn't map to any range and there is no adjacent range
+ // in the table slice - these bounds has to apply.
+ let lower = idx as u32 * lookup_interval;
+ let upper = lower + lookup_interval - 1;
+ bsearch_range_value_table(c, &word_cat_table[range], lower, upper)
}
+ const word_cat_lookup: &'static [u16] = &[
+ 0, 14, 22, 22, 22, 22, 24, 30, 36, 36, 38, 43, 55, 66, 78, 83, 93, 104, 111, 121, 143, 162,
+ 180, 198, 215, 231, 250, 266, 278, 282, 286, 295, 301, 308, 316, 316, 316, 321, 329, 333,
+ 336, 336, 336, 336, 336, 338, 342, 351, 354, 359, 365, 369, 370, 375, 378, 384, 391, 397,
+ 409, 409, 411, 411, 411, 420, 430, 449, 451, 464, 465, 465, 465, 465, 465, 465, 466, 466,
+ 466, 466, 466, 466, 466, 466, 466, 466, 466, 466, 466, 466, 466, 466, 470, 476, 486, 487,
+ 487, 487, 487, 492, 496, 497, 500, 500, 501, 502, 502, 502, 502, 502, 502, 502, 502, 502,
+ 502, 502, 502, 502, 502, 502, 502, 502, 502, 502, 502, 502, 502, 502, 502, 502, 502, 502,
+ 502, 502, 502, 502, 502, 502, 502, 502, 502, 502, 502, 502, 502, 502, 502, 502, 502, 502,
+ 502, 502, 502, 502, 502, 502, 502, 502, 502, 502, 502, 502, 502, 502, 502, 502, 502, 502,
+ 502, 502, 502, 502, 502, 502, 502, 502, 502, 502, 502, 502, 502, 502, 502, 502, 502, 502,
+ 502, 502, 502, 502, 502, 502, 502, 502, 502, 502, 502, 502, 502, 502, 502, 502, 502, 502,
+ 502, 502, 502, 502, 502, 502, 502, 502, 502, 502, 502, 502, 502, 502, 502, 502, 502, 502,
+ 502, 502, 502, 502, 502, 502, 502, 502, 502, 502, 502, 502, 502, 502, 502, 502, 502, 502,
+ 502, 502, 502, 502, 502, 502, 502, 502, 502, 502, 502, 502, 502, 502, 502, 502, 502, 502,
+ 502, 502, 502, 502, 502, 502, 502, 502, 502, 502, 502, 502, 502, 502, 502, 502, 502, 502,
+ 502, 502, 502, 502, 502, 502, 502, 502, 502, 502, 502, 502, 502, 502, 502, 502, 502, 502,
+ 502, 502, 502, 502, 502, 502, 502, 502, 502, 502, 502, 502, 502, 502, 502, 502, 502, 502,
+ 502, 502, 502, 502, 502, 502, 502, 502, 502, 502, 502, 502, 502, 502, 502, 502, 502, 502,
+ 502, 502, 504, 504, 504, 511, 515, 515, 519, 529, 538, 544, 551, 559, 568, 574, 578, 578,
+ 578, 578, 578, 578, 578, 578, 578, 578, 578, 578, 578, 578, 578, 578, 578, 578, 578, 578,
+ 578, 578, 578, 578, 578, 578, 578, 578, 578, 578, 578, 578, 578, 578, 578, 578, 578, 578,
+ 578, 578, 578, 578, 578, 578, 578, 578, 578, 578, 578, 578, 578, 578, 578, 578, 578, 578,
+ 578, 578, 578, 578, 578, 578, 578, 578, 578, 578, 578, 578, 578, 578, 578, 578, 578, 578,
+ 578, 578, 578, 578, 578, 578, 578, 578, 578, 578, 578, 578, 578, 578, 581, 581, 581, 581,
+ 581, 581, 581, 581, 581, 581, 581, 581, 581, 581, 581, 581, 581, 581, 581, 581, 581, 581,
+ 581, 581, 581, 581, 581, 581, 581, 581, 581, 581, 581, 581, 581, 581, 581, 581, 581, 581,
+ 581, 581, 581, 581, 581, 581, 581, 581, 581, 581, 581, 581, 581, 581, 581, 581, 581, 581,
+ 581, 581, 581, 581, 581, 581, 581, 581, 581, 581, 581, 581, 581, 592, 593, 593, 593, 594,
+ 597, 609, 611, 620, 628, 634, 635, 636, 637, 637, 640, 644, 648, 648, 652, 655, 662, 662,
+ 662, 665, 668, 675, 678, 680, 682, 692, 696, 699, 700, 701, 703, 706, 706, 706, 710, 714,
+ 718, 726, 734, 744, 753, 759, 767, 785, 785, 791, 796, 796, 801, 805, 809, 811, 811, 813,
+ 815, 828, 835, 844, 848, 848, 848, 854, 857, 869, 875, 875, 877, 885, 886, 886, 886, 886,
+ 886, 886, 886, 886, 887, 888, 888, 889, 889, 889, 889, 889, 889, 889, 889, 889, 889, 889,
+ 889, 889, 889, 889, 889, 889, 889, 889, 889, 889, 890, 890, 890, 890, 890, 890, 890, 890,
+ 890, 895, 895, 895, 895, 895, 895, 895, 895, 895, 895, 895, 895, 895, 895, 895, 895, 895,
+ 895, 895, 895, 895, 895, 895, 895, 895, 895, 895, 895, 895, 895, 895, 895, 895, 895, 895,
+ 895, 896, 896, 896, 896, 896, 896, 896, 896, 896, 896, 896, 896, 896, 896, 896, 896, 896,
+ 896, 896, 896, 896, 896, 896, 896, 896, 896, 896, 896, 896, 896, 896, 896, 896, 896, 896,
+ 896, 896, 896, 896, 896, 896, 896, 896, 896, 896, 896, 896, 896, 896, 896, 896, 896, 896,
+ 896, 896, 896, 896, 896, 896, 896, 896, 896, 896, 896, 896, 896, 896, 896, 896, 896, 896,
+ 896, 899, 903, 908, 909, 909, 909, 909, 909, 910, 910, 913, 920, 920, 920, 920, 920, 920,
+ 920, 920, 920, 920, 920, 920, 920, 920, 920, 920, 920, 920, 920, 920, 920, 920, 920, 920,
+ 920, 920, 920, 920, 920, 920, 920, 920, 920, 920, 920, 920, 920, 920, 920, 920, 920, 920,
+ 920, 920, 920, 920, 920, 920, 920, 920, 920, 920, 920, 920, 920, 920, 920, 920, 920, 920,
+ 920, 920, 920, 920, 920, 920, 920, 920, 920, 920, 920, 920, 920, 920, 920, 920, 920, 920,
+ 920, 920, 920, 920, 920, 920, 920, 920, 920, 920, 920, 920, 920, 920, 920, 920, 920, 920,
+ 920, 920, 920, 920, 920, 920, 920, 920, 920, 920, 920, 920, 920, 920, 920, 920, 920, 920,
+ 920, 920, 920, 920, 920, 920, 920, 920, 920, 920, 920, 920, 920, 920, 923, 924, 924, 927,
+ 927, 927, 927, 927, 927, 927, 927, 927, 927, 927, 927, 927, 927, 927, 927, 927, 927, 927,
+ 927, 927, 927, 929, 933, 933, 933, 933, 933, 933, 933, 933, 933, 933, 933, 933, 933, 933,
+ 933, 933, 933, 933, 933, 933, 933, 933, 933, 933, 933, 933, 933, 933, 933, 933, 933, 933,
+ 933, 933, 933, 933, 933, 935, 935, 935, 935, 938, 941, 942, 942, 942, 942, 943, 951, 960,
+ 960, 960, 964, 968, 973, 973, 973, 973, 973, 976, 979, 979, 979, 979, 979, 979, 979, 979,
+ 979, 981, 981, 987, 988, 993, 993, 993, 998, 998, 998, 998, 1001, 1001, 1001, 1001, 1001,
+ 1001, 1005, 1005, 1007, 1011, 1011, 1011, 1011, 1011, 1011, 1011, 1011, 1011, 1011, 1039,
+ 1044, 1044, 1044, 1044, 1044, 1046, 1048, 1048, 1048, 1048, 1049, 1049, 1049, 1049, 1049,
+ 1049, 1049, 1049, 1049, 1049, 1049, 1049, 1049, 1049, 1049, 1049, 1050, 1050, 1050, 1050,
+ 1050, 1050, 1050, 1050
+ ];
+
const word_cat_table: &'static [(char, char, WordCat)] = &[
('\u{a}', '\u{a}', WC_LF), ('\u{b}', '\u{c}', WC_Newline), ('\u{d}', '\u{d}', WC_CR),
('\u{20}', '\u{20}', WC_WSegSpace), ('\u{22}', '\u{22}', WC_Double_Quote), ('\u{27}',
@@ -1530,7 +1692,7 @@ pub mod emoji {
EC_Extended_Pictographic,
}
- fn bsearch_range_value_table(c: char, r: &'static [(char, char, EmojiCat)]) -> (u32, u32, EmojiCat) {
+ fn bsearch_range_value_table(c: char, r: &'static [(char, char, EmojiCat)], default_lower: u32, default_upper: u32) -> (u32, u32, EmojiCat) {
use core::cmp::Ordering::{Equal, Less, Greater};
match r.binary_search_by(|&(lo, hi, _)| {
if lo <= c && c <= hi { Equal }
@@ -1543,8 +1705,8 @@ pub mod emoji {
}
Err(idx) => {
(
- if idx > 0 { r[idx-1].1 as u32 + 1 } else { 0 },
- r.get(idx).map(|c|c.0 as u32 - 1).unwrap_or(core::u32::MAX),
+ if idx > 0 { r[idx-1].1 as u32 + 1 } else { default_lower },
+ r.get(idx).map(|c|c.0 as u32 - 1).unwrap_or(default_upper),
EC_Any,
)
}
@@ -1552,9 +1714,73 @@ pub mod emoji {
}
pub fn emoji_category(c: char) -> (u32, u32, EmojiCat) {
- bsearch_range_value_table(c, emoji_cat_table)
+ // Perform a quick O(1) lookup in a precomputed table to determine
+ // the slice of the range table to search in.
+ let lookup_interval = 0x80;
+ let idx = (c as u32 / lookup_interval) as usize;
+ let range = emoji_cat_lookup.get(idx..(idx + 2)).map_or(
+ // If the `idx` is outside of the precomputed table - use the slice
+ // starting from the last covered index in the precomputed table and
+ // ending with the length of the range table.
+ 77..78,
+ |r| (r[0] as usize)..((r[1] + 1) as usize)
+ );
+
+ // Compute pessimistic default lower and upper bounds on the category.
+ // If character doesn't map to any range and there is no adjacent range
+ // in the table slice - these bounds has to apply.
+ let lower = idx as u32 * lookup_interval;
+ let upper = lower + lookup_interval - 1;
+ bsearch_range_value_table(c, &emoji_cat_table[range], lower, upper)
}
+ const emoji_cat_lookup: &'static [u8] = &[
+ 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 4, 4, 6, 8, 8, 8, 10, 14, 14, 15, 15, 19, 21, 22, 37, 41, 41, 41, 42, 42, 42, 42,
+ 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 48, 48, 48, 48, 48, 50, 50, 50, 50, 50, 50, 50, 50,
+ 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
+ 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
+ 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
+ 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
+ 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
+ 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
+ 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
+ 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
+ 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
+ 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
+ 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
+ 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
+ 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
+ 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
+ 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
+ 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
+ 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
+ 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
+ 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
+ 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
+ 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
+ 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
+ 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
+ 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
+ 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
+ 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
+ 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
+ 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
+ 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
+ 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
+ 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
+ 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
+ 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
+ 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
+ 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
+ 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
+ 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
+ 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
+ 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 51, 55, 58, 63, 63, 63, 64, 64, 64, 65, 65, 66, 67,
+ 68, 69, 72, 74, 76, 76, 76, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77
+ ];
+
const emoji_cat_table: &'static [(char, char, EmojiCat)] = &[
('\u{a9}', '\u{a9}', EC_Extended_Pictographic), ('\u{ae}', '\u{ae}',
EC_Extended_Pictographic), ('\u{203c}', '\u{203c}', EC_Extended_Pictographic), ('\u{2049}',
@@ -1633,7 +1859,7 @@ pub mod sentence {
SC_Upper,
}
- fn bsearch_range_value_table(c: char, r: &'static [(char, char, SentenceCat)]) -> (u32, u32, SentenceCat) {
+ fn bsearch_range_value_table(c: char, r: &'static [(char, char, SentenceCat)], default_lower: u32, default_upper: u32) -> (u32, u32, SentenceCat) {
use core::cmp::Ordering::{Equal, Less, Greater};
match r.binary_search_by(|&(lo, hi, _)| {
if lo <= c && c <= hi { Equal }
@@ -1646,8 +1872,8 @@ pub mod sentence {
}
Err(idx) => {
(
- if idx > 0 { r[idx-1].1 as u32 + 1 } else { 0 },
- r.get(idx).map(|c|c.0 as u32 - 1).unwrap_or(core::u32::MAX),
+ if idx > 0 { r[idx-1].1 as u32 + 1 } else { default_lower },
+ r.get(idx).map(|c|c.0 as u32 - 1).unwrap_or(default_upper),
SC_Any,
)
}
@@ -1655,9 +1881,97 @@ pub mod sentence {
}
pub fn sentence_category(c: char) -> (u32, u32, SentenceCat) {
- bsearch_range_value_table(c, sentence_cat_table)
+ // Perform a quick O(1) lookup in a precomputed table to determine
+ // the slice of the range table to search in.
+ let lookup_interval = 0x80;
+ let idx = (c as u32 / lookup_interval) as usize;
+ let range = sentence_cat_lookup.get(idx..(idx + 2)).map_or(
+ // If the `idx` is outside of the precomputed table - use the slice
+ // starting from the last covered index in the precomputed table and
+ // ending with the length of the range table.
+ 2410..2421,
+ |r| (r[0] as usize)..((r[1] + 1) as usize)
+ );
+
+ // Compute pessimistic default lower and upper bounds on the category.
+ // If character doesn't map to any range and there is no adjacent range
+ // in the table slice - these bounds has to apply.
+ let lower = idx as u32 * lookup_interval;
+ let upper = lower + lookup_interval - 1;
+ bsearch_range_value_table(c, &sentence_cat_table[range], lower, upper)
}
+ const sentence_cat_lookup: &'static [u16] = &[
+ 0, 19, 31, 154, 247, 314, 323, 333, 375, 409, 528, 579, 588, 599, 612, 618, 629, 643, 650,
+ 661, 683, 702, 720, 738, 755, 771, 790, 806, 818, 825, 840, 850, 856, 871, 882, 882, 882,
+ 887, 895, 901, 904, 904, 904, 904, 904, 907, 912, 922, 928, 937, 943, 950, 953, 959, 964,
+ 973, 980, 988, 1000, 1000, 1002, 1130, 1249, 1267, 1288, 1308, 1311, 1336, 1340, 1340, 1340,
+ 1342, 1342, 1342, 1344, 1344, 1344, 1344, 1344, 1346, 1348, 1348, 1348, 1348, 1351, 1351,
+ 1351, 1351, 1351, 1369, 1476, 1482, 1492, 1501, 1501, 1501, 1501, 1512, 1517, 1518, 1521,
+ 1521, 1521, 1521, 1521, 1521, 1521, 1521, 1521, 1521, 1521, 1521, 1521, 1521, 1521, 1521,
+ 1521, 1521, 1521, 1521, 1521, 1521, 1521, 1521, 1521, 1521, 1521, 1521, 1521, 1521, 1521,
+ 1521, 1521, 1521, 1521, 1521, 1521, 1521, 1521, 1521, 1521, 1521, 1521, 1521, 1521, 1521,
+ 1521, 1521, 1521, 1521, 1521, 1521, 1521, 1521, 1521, 1521, 1522, 1522, 1522, 1522, 1522,
+ 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522,
+ 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522,
+ 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522,
+ 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522,
+ 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522,
+ 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522,
+ 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522,
+ 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522,
+ 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522,
+ 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522,
+ 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522,
+ 1522, 1522, 1522, 1522, 1525, 1525, 1525, 1580, 1613, 1696, 1769, 1780, 1790, 1797, 1808,
+ 1819, 1836, 1843, 1849, 1849, 1849, 1849, 1849, 1849, 1849, 1849, 1849, 1849, 1849, 1849,
+ 1849, 1849, 1849, 1849, 1849, 1849, 1849, 1849, 1849, 1849, 1849, 1849, 1849, 1849, 1849,
+ 1849, 1849, 1849, 1849, 1849, 1849, 1849, 1849, 1849, 1849, 1849, 1849, 1849, 1849, 1849,
+ 1849, 1849, 1849, 1849, 1849, 1849, 1849, 1849, 1849, 1849, 1849, 1849, 1849, 1849, 1849,
+ 1849, 1849, 1849, 1849, 1849, 1849, 1849, 1849, 1849, 1849, 1849, 1849, 1849, 1849, 1849,
+ 1849, 1849, 1849, 1849, 1849, 1849, 1849, 1849, 1849, 1849, 1849, 1849, 1849, 1849, 1849,
+ 1849, 1852, 1852, 1852, 1852, 1852, 1852, 1852, 1852, 1852, 1852, 1852, 1852, 1852, 1852,
+ 1852, 1852, 1852, 1852, 1852, 1852, 1852, 1852, 1852, 1852, 1852, 1852, 1852, 1852, 1852,
+ 1852, 1852, 1852, 1852, 1852, 1852, 1852, 1852, 1852, 1852, 1852, 1852, 1852, 1852, 1852,
+ 1852, 1852, 1852, 1852, 1852, 1852, 1852, 1852, 1852, 1852, 1852, 1852, 1852, 1852, 1852,
+ 1852, 1852, 1852, 1852, 1852, 1852, 1852, 1852, 1852, 1852, 1853, 1854, 1864, 1865, 1865,
+ 1865, 1867, 1870, 1886, 1888, 1905, 1913, 1919, 1920, 1921, 1922, 1922, 1925, 1929, 1933,
+ 1935, 1939, 1942, 1949, 1949, 1949, 1952, 1957, 1964, 1967, 1969, 1971, 1982, 1986, 1989,
+ 1990, 1991, 1993, 1996, 1996, 1996, 2000, 2005, 2010, 2019, 2028, 2039, 2051, 2059, 2068,
+ 2086, 2086, 2093, 2098, 2098, 2105, 2110, 2114, 2119, 2119, 2121, 2124, 2139, 2146, 2156,
+ 2161, 2161, 2161, 2168, 2171, 2183, 2189, 2189, 2192, 2201, 2202, 2202, 2202, 2202, 2202,
+ 2202, 2202, 2202, 2203, 2204, 2204, 2205, 2205, 2205, 2205, 2205, 2205, 2205, 2205, 2205,
+ 2205, 2205, 2205, 2205, 2205, 2205, 2205, 2205, 2205, 2205, 2205, 2205, 2206, 2206, 2206,
+ 2206, 2206, 2206, 2206, 2206, 2206, 2211, 2211, 2211, 2211, 2211, 2211, 2211, 2211, 2211,
+ 2211, 2211, 2211, 2211, 2211, 2211, 2211, 2211, 2211, 2211, 2211, 2211, 2211, 2211, 2211,
+ 2211, 2211, 2211, 2211, 2211, 2211, 2211, 2211, 2211, 2211, 2211, 2211, 2212, 2212, 2212,
+ 2212, 2212, 2212, 2212, 2212, 2212, 2212, 2212, 2212, 2212, 2212, 2212, 2212, 2212, 2212,
+ 2212, 2212, 2212, 2212, 2212, 2212, 2212, 2212, 2212, 2212, 2212, 2212, 2212, 2212, 2212,
+ 2212, 2212, 2212, 2212, 2212, 2212, 2212, 2212, 2212, 2212, 2212, 2212, 2212, 2212, 2212,
+ 2212, 2212, 2212, 2212, 2212, 2212, 2212, 2212, 2212, 2212, 2212, 2212, 2212, 2212, 2212,
+ 2212, 2212, 2212, 2212, 2212, 2212, 2212, 2212, 2212, 2216, 2221, 2228, 2229, 2229, 2229,
+ 2229, 2229, 2231, 2232, 2235, 2242, 2242, 2242, 2242, 2242, 2242, 2242, 2242, 2242, 2242,
+ 2242, 2242, 2242, 2242, 2242, 2242, 2242, 2242, 2242, 2242, 2242, 2242, 2242, 2242, 2242,
+ 2242, 2242, 2242, 2242, 2242, 2242, 2242, 2242, 2242, 2242, 2242, 2242, 2242, 2242, 2242,
+ 2242, 2242, 2242, 2242, 2242, 2242, 2242, 2242, 2243, 2243, 2243, 2243, 2243, 2243, 2243,
+ 2243, 2243, 2243, 2244, 2245, 2245, 2245, 2245, 2245, 2245, 2245, 2245, 2245, 2245, 2245,
+ 2245, 2245, 2245, 2245, 2245, 2245, 2245, 2245, 2245, 2245, 2245, 2245, 2245, 2245, 2245,
+ 2245, 2245, 2245, 2245, 2245, 2245, 2245, 2245, 2245, 2245, 2245, 2245, 2245, 2245, 2245,
+ 2245, 2245, 2245, 2245, 2245, 2245, 2245, 2245, 2245, 2245, 2245, 2245, 2245, 2245, 2245,
+ 2245, 2245, 2245, 2245, 2245, 2245, 2245, 2245, 2245, 2245, 2245, 2245, 2245, 2248, 2248,
+ 2248, 2253, 2253, 2253, 2254, 2254, 2254, 2254, 2254, 2254, 2254, 2254, 2254, 2254, 2254,
+ 2254, 2254, 2254, 2254, 2254, 2254, 2254, 2254, 2256, 2261, 2261, 2261, 2261, 2261, 2261,
+ 2261, 2261, 2261, 2261, 2261, 2261, 2261, 2261, 2261, 2261, 2261, 2261, 2261, 2261, 2261,
+ 2261, 2261, 2261, 2261, 2261, 2261, 2261, 2261, 2261, 2261, 2261, 2261, 2261, 2261, 2261,
+ 2261, 2263, 2263, 2263, 2263, 2266, 2269, 2270, 2270, 2270, 2270, 2275, 2288, 2300, 2305,
+ 2310, 2316, 2322, 2330, 2330, 2330, 2330, 2330, 2333, 2337, 2337, 2337, 2337, 2337, 2337,
+ 2337, 2337, 2337, 2341, 2341, 2347, 2348, 2353, 2353, 2353, 2358, 2358, 2358, 2358, 2361,
+ 2361, 2361, 2361, 2361, 2361, 2365, 2365, 2367, 2372, 2372, 2372, 2372, 2372, 2372, 2372,
+ 2372, 2372, 2372, 2400, 2405, 2405, 2405, 2405, 2405, 2407, 2408, 2408, 2408, 2408, 2408,
+ 2408, 2408, 2408, 2408, 2409, 2409, 2409, 2409, 2409, 2409, 2409, 2409, 2409, 2409, 2409,
+ 2410, 2410, 2410, 2410, 2410, 2410, 2410, 2410
+ ];
+
const sentence_cat_table: &'static [(char, char, SentenceCat)] = &[
('\u{9}', '\u{9}', SC_Sp), ('\u{a}', '\u{a}', SC_LF), ('\u{b}', '\u{c}', SC_Sp), ('\u{d}',
'\u{d}', SC_CR), ('\u{20}', '\u{20}', SC_Sp), ('\u{21}', '\u{21}', SC_STerm), ('\u{22}',