summaryrefslogtreecommitdiffstats
path: root/third_party/rust/icu_segmenter/src/symbols.rs
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/rust/icu_segmenter/src/symbols.rs')
-rw-r--r--third_party/rust/icu_segmenter/src/symbols.rs141
1 files changed, 141 insertions, 0 deletions
diff --git a/third_party/rust/icu_segmenter/src/symbols.rs b/third_party/rust/icu_segmenter/src/symbols.rs
new file mode 100644
index 0000000000..b2c9a2450f
--- /dev/null
+++ b/third_party/rust/icu_segmenter/src/symbols.rs
@@ -0,0 +1,141 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+// TODO(#1637): The numeric values of these symbols are generated by the old transformation code
+// (aka build.rs). We should move these symbols into RuleBreakDataV1, and remove this file.
+
+// Used by line.rs.
+#[allow(dead_code)]
+pub const UNKNOWN: u8 = 0;
+#[allow(dead_code)]
+pub const AI: u8 = 1;
+#[allow(dead_code)]
+pub const AL: u8 = 2;
+#[allow(dead_code)]
+pub const B2: u8 = 3;
+#[allow(dead_code)]
+pub const BA: u8 = 4;
+#[allow(dead_code)]
+pub const BB: u8 = 5;
+#[allow(dead_code)]
+pub const BK: u8 = 6;
+#[allow(dead_code)]
+pub const CB: u8 = 7;
+#[allow(dead_code)]
+pub const CJ: u8 = 8;
+#[allow(dead_code)]
+pub const CL: u8 = 9;
+#[allow(dead_code)]
+pub const CM: u8 = 10;
+#[allow(dead_code)]
+pub const CP: u8 = 11;
+#[allow(dead_code)]
+pub const CR: u8 = 12;
+#[allow(dead_code)]
+pub const EB: u8 = 13;
+#[allow(dead_code)]
+pub const EM: u8 = 14;
+#[allow(dead_code)]
+pub const EX: u8 = 15;
+#[allow(dead_code)]
+pub const GL: u8 = 16;
+#[allow(dead_code)]
+pub const H2: u8 = 17;
+#[allow(dead_code)]
+pub const H3: u8 = 18;
+#[allow(dead_code)]
+pub const HL: u8 = 19;
+#[allow(dead_code)]
+pub const HY: u8 = 20;
+#[allow(dead_code)]
+pub const ID: u8 = 21;
+#[allow(dead_code)]
+pub const ID_CN: u8 = 22;
+#[allow(dead_code)]
+pub const IN: u8 = 23;
+#[allow(dead_code)]
+pub const IS: u8 = 24;
+#[allow(dead_code)]
+pub const JL: u8 = 25;
+#[allow(dead_code)]
+pub const JT: u8 = 26;
+#[allow(dead_code)]
+pub const JV: u8 = 27;
+#[allow(dead_code)]
+pub const LF: u8 = 28;
+#[allow(dead_code)]
+pub const NL: u8 = 29;
+#[allow(dead_code)]
+pub const NS: u8 = 30;
+#[allow(dead_code)]
+pub const NU: u8 = 31;
+#[allow(dead_code)]
+pub const OP_EA: u8 = 32;
+#[allow(dead_code)]
+pub const OP_OP30: u8 = 33;
+#[allow(dead_code)]
+pub const PO: u8 = 34;
+#[allow(dead_code)]
+pub const PO_EAW: u8 = 35;
+#[allow(dead_code)]
+pub const PR: u8 = 36;
+#[allow(dead_code)]
+pub const PR_EAW: u8 = 37;
+#[allow(dead_code)]
+pub const QU: u8 = 38;
+#[allow(dead_code)]
+pub const RI: u8 = 39;
+#[allow(dead_code)]
+pub const SA: u8 = 40;
+#[allow(dead_code)]
+pub const SG: u8 = 41;
+#[allow(dead_code)]
+pub const SP: u8 = 42;
+#[allow(dead_code)]
+pub const SY: u8 = 43;
+#[allow(dead_code)]
+pub const WJ: u8 = 44;
+#[allow(dead_code)]
+pub const XX: u8 = 45;
+#[allow(dead_code)]
+pub const ZW: u8 = 46;
+#[allow(dead_code)]
+pub const ZWJ: u8 = 47;
+#[allow(dead_code)]
+pub const OP_SP: u8 = 48;
+#[allow(dead_code)]
+pub const QU_SP: u8 = 49;
+#[allow(dead_code)]
+pub const CL_CP_SP: u8 = 50;
+#[allow(dead_code)]
+pub const B2_SP: u8 = 51;
+#[allow(dead_code)]
+pub const HL_HY: u8 = 52;
+#[allow(dead_code)]
+pub const LB25_HY: u8 = 53;
+#[allow(dead_code)]
+pub const LB25_OP: u8 = 54;
+#[allow(dead_code)]
+pub const LB25_NU_IS: u8 = 55;
+#[allow(dead_code)]
+pub const LB25_NU_SY: u8 = 56;
+#[allow(dead_code)]
+pub const LB25_NU_CL: u8 = 57;
+#[allow(dead_code)]
+pub const LB25_NU_CP: u8 = 58;
+#[allow(dead_code)]
+pub const RI_RI: u8 = 59;
+#[allow(dead_code)]
+pub const SOT: u8 = 60;
+#[allow(dead_code)]
+pub const EOT: u8 = 61;
+
+// Used by all segmenters.
+pub const BREAK_RULE: i8 = -128;
+pub const UNKNOWN_RULE: i8 = -127;
+pub const NOT_MATCH_RULE: i8 = -2;
+pub const KEEP_RULE: i8 = -1;
+// This is a mask bit chosen sufficiently large than all other concrete states.
+// If a break state contains this bit, we have to look ahead one more character.
+pub const INTERMEDIATE_MATCH_RULE: i8 = 64;