Adding upstream version 1.64.0+dfsg1.upstream/1.64.0+dfsg1

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-17 12:02:58 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-17 12:02:58 +0000
commit: 698f8c2f01ea549d77d7dc3338a12e04c11057b9 (patch)
tree: 173a775858bd501c378080a10dca74132f05bc50 /vendor/ucd-parse/src/case_folding.rs
parent: Initial commit. (diff)
download: rustc-698f8c2f01ea549d77d7dc3338a12e04c11057b9.tar.xz
rustc-698f8c2f01ea549d77d7dc3338a12e04c11057b9.zip
1 files changed, 161 insertions, 0 deletions
diff --git a/vendor/ucd-parse/src/case_folding.rs b/vendor/ucd-parse/src/case_folding.rs
new file mode 100644
index 000000000..813fc81a1
--- /dev/null
+++ b/vendor/ucd-parse/src/case_folding.rs
@@ -0,0 +1,161 @@
+use std::path::Path;
+use std::str::FromStr;
+
+use lazy_static::lazy_static;
+use regex::Regex;
+
+use crate::common::{Codepoint, CodepointIter, UcdFile, UcdFileByCodepoint};
+use crate::error::Error;
+
+/// A single row in the `CaseFolding.txt` file.
+///
+/// The contents of `CaseFolding.txt` are a convenience derived from both
+/// `UnicodeData.txt` and `SpecialCasing.txt`.
+///
+/// Note that a single codepoint may be mapped multiple times. In particular,
+/// a single codepoint might have distinct `CaseStatus::Simple` and
+/// `CaseStatus::Full` mappings.
+#[derive(Clone, Debug, Default, Eq, PartialEq)]
+pub struct CaseFold {
+    /// The codepoint that is being mapped.
+    pub codepoint: Codepoint,
+    /// The case status of this mapping.
+    pub status: CaseStatus,
+    /// The actual case mapping, which is more than one codepoint if this is
+    /// a "full" mapping.
+    pub mapping: Vec<Codepoint>,
+}
+
+impl UcdFile for CaseFold {
+    fn relative_file_path() -> &'static Path {
+        Path::new("CaseFolding.txt")
+    }
+}
+
+impl UcdFileByCodepoint for CaseFold {
+    fn codepoints(&self) -> CodepointIter {
+        self.codepoint.into_iter()
+    }
+}
+
+impl FromStr for CaseFold {
+    type Err = Error;
+
+    fn from_str(line: &str) -> Result<CaseFold, Error> {
+        lazy_static! {
+            static ref PARTS: Regex = Regex::new(
+                r"(?x)
+                ^
+                \s*(?P<codepoint>[^\s;]+)\s*;
+                \s*(?P<status>[^\s;]+)\s*;
+                \s*(?P<mapping>[^;]+)\s*;
+                "
+            )
+            .unwrap();
+        };
+
+        let caps = match PARTS.captures(line.trim()) {
+            Some(caps) => caps,
+            None => return err!("invalid CaseFolding line: '{}'", line),
+        };
+        let mut mapping = vec![];
+        for cp in caps["mapping"].split_whitespace() {
+            mapping.push(cp.parse()?);
+        }
+        Ok(CaseFold {
+            codepoint: caps["codepoint"].parse()?,
+            status: caps["status"].parse()?,
+            mapping,
+        })
+    }
+}
+
+/// The status of a particular case mapping.
+#[derive(Clone, Copy, Debug, Eq, PartialEq)]
+pub enum CaseStatus {
+    /// Case mappings shared by both "simple" and "full" mappings.
+    Common,
+    /// A case mapping that changes the number of codepoints.
+    Full,
+    /// A case mapping that doesn't change the number of codepoints, when it
+    /// differs from `Full`.
+    Simple,
+    /// Special cases (currently only for Turkic mappings) that are typically
+    /// excluded by default. Special cases don't change the number of
+    /// codepoints, but may changed the encoding (e.g., UTF-8) length in bytes.
+    Special,
+}
+
+impl Default for CaseStatus {
+    fn default() -> CaseStatus {
+        CaseStatus::Common
+    }
+}
+
+impl CaseStatus {
+    /// Returns true if and only if this status indicates a case mapping that
+    /// won't change the number of codepoints.
+    pub fn is_fixed(&self) -> bool {
+        *self != CaseStatus::Full
+    }
+}
+
+impl FromStr for CaseStatus {
+    type Err = Error;
+
+    fn from_str(s: &str) -> Result<CaseStatus, Error> {
+        match s {
+            "C" => Ok(CaseStatus::Common),
+            "F" => Ok(CaseStatus::Full),
+            "S" => Ok(CaseStatus::Simple),
+            "T" => Ok(CaseStatus::Special),
+            _ => err!(
+                "unrecognized case status: '{}' \
+                 (must be one of C, F, S or T)",
+                s
+            ),
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::{CaseFold, CaseStatus};
+
+    #[test]
+    fn parse_common() {
+        let line =
+            "0150; C; 0151; # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE\n";
+        let row: CaseFold = line.parse().unwrap();
+        assert_eq!(row.codepoint, 0x0150);
+        assert_eq!(row.status, CaseStatus::Common);
+        assert_eq!(row.mapping, vec![0x0151]);
+    }
+
+    #[test]
+    fn parse_full() {
+        let line = "03B0; F; 03C5 0308 0301; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS\n";
+        let row: CaseFold = line.parse().unwrap();
+        assert_eq!(row.codepoint, 0x03B0);
+        assert_eq!(row.status, CaseStatus::Full);
+        assert_eq!(row.mapping, vec![0x03C5, 0x0308, 0x0301]);
+    }
+
+    #[test]
+    fn parse_simple() {
+        let line = "1F8F; S; 1F87; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI\n";
+        let row: CaseFold = line.parse().unwrap();
+        assert_eq!(row.codepoint, 0x1F8F);
+        assert_eq!(row.status, CaseStatus::Simple);
+        assert_eq!(row.mapping, vec![0x1F87]);
+    }
+
+    #[test]
+    fn parse_special() {
+        let line = "0049; T; 0131; # LATIN CAPITAL LETTER I\n";
+        let row: CaseFold = line.parse().unwrap();
+        assert_eq!(row.codepoint, 0x0049);
+        assert_eq!(row.status, CaseStatus::Special);
+        assert_eq!(row.mapping, vec![0x0131]);
+    }
+}
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-17 12:02:58 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-17 12:02:58 +0000
commit	698f8c2f01ea549d77d7dc3338a12e04c11057b9 (patch)
tree	173a775858bd501c378080a10dca74132f05bc50 /vendor/ucd-parse/src/case_folding.rs
parent	Initial commit. (diff)
download	rustc-698f8c2f01ea549d77d7dc3338a12e04c11057b9.tar.xz rustc-698f8c2f01ea549d77d7dc3338a12e04c11057b9.zip