diff options
Diffstat (limited to 'vendor/ucd-parse/src/jamo_short_name.rs')
-rw-r--r-- | vendor/ucd-parse/src/jamo_short_name.rs | 80 |
1 files changed, 80 insertions, 0 deletions
diff --git a/vendor/ucd-parse/src/jamo_short_name.rs b/vendor/ucd-parse/src/jamo_short_name.rs new file mode 100644 index 000000000..4103dd7ee --- /dev/null +++ b/vendor/ucd-parse/src/jamo_short_name.rs @@ -0,0 +1,80 @@ +use std::path::Path; +use std::str::FromStr; + +use lazy_static::lazy_static; +use regex::Regex; + +use crate::common::{Codepoint, CodepointIter, UcdFile, UcdFileByCodepoint}; +use crate::error::Error; + +/// A single row in the `Jamo.txt` file. +/// +/// The `Jamo.txt` file defines the `Jamo_Short_Name` property. +#[derive(Clone, Debug, Default, Eq, PartialEq)] +pub struct JamoShortName { + /// The codepoint corresponding to this row. + pub codepoint: Codepoint, + /// The actual "Jamo Short Name." This string contains at most 3 bytes and + /// may be empty. + pub name: String, +} + +impl UcdFile for JamoShortName { + fn relative_file_path() -> &'static Path { + Path::new("Jamo.txt") + } +} + +impl UcdFileByCodepoint for JamoShortName { + fn codepoints(&self) -> CodepointIter { + self.codepoint.into_iter() + } +} + +impl FromStr for JamoShortName { + type Err = Error; + + fn from_str(line: &str) -> Result<JamoShortName, Error> { + lazy_static! { + static ref PARTS: Regex = Regex::new( + r"(?x) + ^ + (?P<codepoint>[A-Z0-9]+); + \s* + (?P<name>[A-Z]*) + " + ) + .unwrap(); + }; + + let caps = match PARTS.captures(line.trim()) { + Some(caps) => caps, + None => return err!("invalid Jamo_Short_name line"), + }; + Ok(JamoShortName { + codepoint: caps["codepoint"].parse()?, + name: caps.name("name").unwrap().as_str().to_string(), + }) + } +} + +#[cfg(test)] +mod tests { + use super::JamoShortName; + + #[test] + fn parse1() { + let line = "1164; YAE # HANGUL JUNGSEONG YAE\n"; + let row: JamoShortName = line.parse().unwrap(); + assert_eq!(row.codepoint, 0x1164); + assert_eq!(row.name, "YAE"); + } + + #[test] + fn parse2() { + let line = "110B; # HANGUL CHOSEONG IEUNG\n"; + let row: JamoShortName = line.parse().unwrap(); + assert_eq!(row.codepoint, 0x110B); + assert_eq!(row.name, ""); + } +} |