diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 19:33:14 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 19:33:14 +0000 |
commit | 36d22d82aa202bb199967e9512281e9a53db42c9 (patch) | |
tree | 105e8c98ddea1c1e4784a60a5a6410fa416be2de /third_party/rust/unic-langid-impl/src/bin | |
parent | Initial commit. (diff) | |
download | firefox-esr-upstream.tar.xz firefox-esr-upstream.zip |
Adding upstream version 115.7.0esr.upstream/115.7.0esrupstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'third_party/rust/unic-langid-impl/src/bin')
-rw-r--r-- | third_party/rust/unic-langid-impl/src/bin/generate_layout.rs | 130 | ||||
-rw-r--r-- | third_party/rust/unic-langid-impl/src/bin/generate_likelysubtags.rs | 201 |
2 files changed, 331 insertions, 0 deletions
diff --git a/third_party/rust/unic-langid-impl/src/bin/generate_layout.rs b/third_party/rust/unic-langid-impl/src/bin/generate_layout.rs new file mode 100644 index 0000000000..35d8e27aed --- /dev/null +++ b/third_party/rust/unic-langid-impl/src/bin/generate_layout.rs @@ -0,0 +1,130 @@ +use serde_json::Value; +use std::collections::HashMap; +use std::collections::HashSet; +use std::fs; +use unic_langid_impl::subtags::{Language, Script}; +use unic_langid_impl::CharacterDirection; +use unic_langid_impl::LanguageIdentifier; + +fn langid_to_direction_map(path: &str) -> HashMap<LanguageIdentifier, CharacterDirection> { + let mut result = HashMap::new(); + for entry in fs::read_dir(path).unwrap() { + let entry = entry.unwrap(); + let mut path = entry.path(); + path.push("layout.json"); + let contents = fs::read_to_string(path).expect("Something went wrong reading the file"); + let v: Value = serde_json::from_str(&contents).unwrap(); + + let langid_key = v["main"].as_object().unwrap().keys().nth(0).unwrap(); + + if langid_key == "root" { + continue; + } + let langid: LanguageIdentifier = langid_key.parse().unwrap(); + + let character_order = match v["main"][langid_key]["layout"]["orientation"]["characterOrder"] + .as_str() + .unwrap() + { + "right-to-left" => CharacterDirection::RTL, + "left-to-right" => CharacterDirection::LTR, + _ => unimplemented!("Encountered unknown directionality!"), + }; + result.insert(langid, character_order); + } + result +} + +fn check_all_variants_rtl( + map: &HashMap<LanguageIdentifier, CharacterDirection>, + lang: Option<Language>, + script: Option<Script>, +) -> bool { + for (langid, dir) in map.iter() { + if let Some(reference_script) = script { + if let Some(s) = langid.script { + if reference_script == s && dir != &CharacterDirection::RTL { + return false; + } + } + } + if let Some(reference_lang) = lang { + if langid.language == reference_lang && dir != &CharacterDirection::RTL { + println!("{:#?}", langid); + println!("{:#?}", lang); + return false; + } + } + } + true +} + +fn main() { + let path = "./data/cldr-misc-full/main/"; + let map = langid_to_direction_map(path); + + let mut scripts = HashSet::new(); + let mut langs = HashSet::new(); + + for (langid, dir) in map.iter() { + if dir == &CharacterDirection::LTR { + continue; + } + + let script = langid.script; + + if let Some(script) = script { + if scripts.contains(&script) { + continue; + } + assert!( + check_all_variants_rtl(&map, None, Some(script)), + "We didn't expect a script with two directionalities!" + ); + scripts.insert(script); + continue; + } + + let lang = langid.language; + + if langs.contains(&lang) { + continue; + } + + assert!( + check_all_variants_rtl(&map, Some(lang), None), + "We didn't expect a language with two directionalities!" + ); + langs.insert(lang); + } + + let mut scripts: Vec<String> = scripts + .into_iter() + .map(|s| { + let v: u32 = s.into(); + v.to_string() + }) + .collect(); + scripts.sort(); + let mut langs: Vec<String> = langs + .into_iter() + .map(|s| { + let v: Option<u64> = s.into(); + let v: u64 = v.expect("Expected language to not be undefined."); + v.to_string() + }) + .collect(); + langs.sort(); + + println!( + "pub const SCRIPTS_CHARACTER_DIRECTION_RTL: [u32; {}] = [{}];", + scripts.len(), + scripts.join(", ") + ); + + println!( + "pub const LANGS_CHARACTER_DIRECTION_RTL: [u64; {}] = [{}];", + langs.len(), + langs.join(", ") + ); +} diff --git a/third_party/rust/unic-langid-impl/src/bin/generate_likelysubtags.rs b/third_party/rust/unic-langid-impl/src/bin/generate_likelysubtags.rs new file mode 100644 index 0000000000..a86ffa80d6 --- /dev/null +++ b/third_party/rust/unic-langid-impl/src/bin/generate_likelysubtags.rs @@ -0,0 +1,201 @@ +use serde_json::Value; +use std::fs; +use tinystr::TinyStr8; +use unic_langid_impl::{subtags, LanguageIdentifier}; + +type LangIdSubTags = (Option<u64>, Option<u32>, Option<u32>); + +fn serialize_val(input: LangIdSubTags) -> String { + format!( + "({}, {}, {})", + serialize_lang_option(input.0), + serialize_script_option(input.1), + serialize_region_option(input.2) + ) +} + +fn serialize_lang_option(l: Option<u64>) -> String { + if let Some(l) = l { + format!("Some({})", l) + } else { + String::from("None") + } +} + +fn serialize_script_option(r: Option<u32>) -> String { + if let Some(r) = r { + format!("Some({})", r) + } else { + String::from("None") + } +} + +fn serialize_region_option(r: Option<u32>) -> String { + if let Some(r) = r { + format!("Some({})", r) + } else { + String::from("None") + } +} + +fn main() { + let contents = fs::read_to_string("./data/likelySubtags.json") + .expect("Something went wrong reading the file"); + let v: Value = serde_json::from_str(&contents).unwrap(); + let values = v["supplemental"]["likelySubtags"].as_object().unwrap(); + + let mut lang_only: Vec<(u64, LangIdSubTags)> = vec![]; + let mut lang_region: Vec<(u64, u32, LangIdSubTags)> = vec![]; + let mut lang_script: Vec<(u64, u32, LangIdSubTags)> = vec![]; + let mut script_region: Vec<(u32, u32, LangIdSubTags)> = vec![]; + let mut region_only: Vec<(u32, LangIdSubTags)> = vec![]; + let mut script_only: Vec<(u32, LangIdSubTags)> = vec![]; + + let zz_region: subtags::Region = "ZZ".parse().unwrap(); + + for (k, v) in values { + let key_langid: LanguageIdentifier = k.parse().expect("Failed to parse a key."); + let v: &str = v.as_str().unwrap(); + let mut value_langid: LanguageIdentifier = v.parse().expect("Failed to parse a value."); + if Some(zz_region) == value_langid.region { + value_langid.region = None; + } + let (val_lang, val_script, val_region, _) = value_langid.into_parts(); + + let val_lang: Option<u64> = val_lang.into(); + let val_script: Option<u32> = val_script.map(Into::into); + let val_region: Option<u32> = val_region.map(Into::into); + + let lang = if key_langid.language.is_empty() { + None + } else { + Some(key_langid.language) + }; + let script = key_langid.script; + let region = key_langid.region; + + match (lang, script, region) { + (None, None, None) => lang_only.push(( + u64::from_le_bytes(*TinyStr8::from_str("und").unwrap().all_bytes()), + (val_lang, val_script, val_region), + )), + (Some(l), None, None) => lang_only.push(( + Into::<Option<u64>>::into(l).unwrap(), + (val_lang, val_script, val_region), + )), + (Some(l), None, Some(r)) => lang_region.push(( + Into::<Option<u64>>::into(l).unwrap(), + r.into(), + (val_lang, val_script, val_region), + )), + (Some(l), Some(s), None) => lang_script.push(( + Into::<Option<u64>>::into(l).unwrap(), + s.into(), + (val_lang, val_script, val_region), + )), + (None, Some(s), Some(r)) => { + script_region.push((s.into(), r.into(), (val_lang, val_script, val_region))) + } + (None, Some(s), None) => { + script_only.push((s.into(), (val_lang, val_script, val_region))) + } + (None, None, Some(r)) => { + region_only.push((r.into(), (val_lang, val_script, val_region))) + } + _ => { + panic!("{:#?}", key_langid); + } + } + } + + println!("#![allow(clippy::type_complexity)]"); + println!("#![allow(clippy::unreadable_literal)]\n"); + + let version = v["supplemental"]["version"]["_cldrVersion"] + .as_str() + .unwrap(); + println!("pub static CLDR_VERSION: &str = \"{}\";", version); + + println!( + "pub static LANG_ONLY: [(u64, (Option<u64>, Option<u32>, Option<u32>)); {}] = [", + lang_only.len() + ); + lang_only.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap()); + for (key_lang, val) in lang_only { + println!(" ({}, {}),", key_lang, serialize_val(val),); + } + println!("];"); + + println!( + "pub static LANG_REGION: [(u64, u32, (Option<u64>, Option<u32>, Option<u32>)); {}] = [", + lang_region.len() + ); + lang_region.sort_by(|a, b| { + a.0.partial_cmp(&b.0) + .unwrap() + .then_with(|| a.1.partial_cmp(&b.1).unwrap()) + }); + for (key_lang, key_region, val) in lang_region { + println!( + " ({}, {}, {}),", + key_lang, + key_region, + serialize_val(val), + ); + } + println!("];"); + println!( + "pub static LANG_SCRIPT: [(u64, u32, (Option<u64>, Option<u32>, Option<u32>)); {}] = [", + lang_script.len() + ); + lang_script.sort_by(|a, b| { + a.0.partial_cmp(&b.0) + .unwrap() + .then_with(|| a.1.partial_cmp(&b.1).unwrap()) + }); + for (key_lang, key_script, val) in lang_script { + println!( + " ({}, {}, {}),", + key_lang, + key_script, + serialize_val(val), + ); + } + println!("];"); + println!( + "pub static SCRIPT_REGION: [(u32, u32, (Option<u64>, Option<u32>, Option<u32>)); {}] = [", + script_region.len() + ); + script_region.sort_by(|a, b| { + a.0.partial_cmp(&b.0) + .unwrap() + .then_with(|| a.1.partial_cmp(&b.1).unwrap()) + }); + for (key_script, key_region, val) in script_region { + println!( + " ({}, {}, {}),", + key_script, + key_region, + serialize_val(val), + ); + } + println!("];"); + println!( + "pub static SCRIPT_ONLY: [(u32, (Option<u64>, Option<u32>, Option<u32>)); {}] = [", + script_only.len() + ); + script_only.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap()); + for (key_script, val) in script_only { + println!(" ({}, {}),", key_script, serialize_val(val),); + } + println!("];"); + println!( + "pub static REGION_ONLY: [(u32, (Option<u64>, Option<u32>, Option<u32>)); {}] = [", + region_only.len() + ); + region_only.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap()); + for (key_region, val) in region_only { + println!(" ({}, {}),", key_region, serialize_val(val),); + } + println!("];"); +} |