summaryrefslogtreecommitdiffstats
path: root/third_party/rust/unic-langid-impl/src/bin
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-19 00:47:55 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-19 00:47:55 +0000
commit26a029d407be480d791972afb5975cf62c9360a6 (patch)
treef435a8308119effd964b339f76abb83a57c29483 /third_party/rust/unic-langid-impl/src/bin
parentInitial commit. (diff)
downloadfirefox-26a029d407be480d791972afb5975cf62c9360a6.tar.xz
firefox-26a029d407be480d791972afb5975cf62c9360a6.zip
Adding upstream version 124.0.1.upstream/124.0.1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'third_party/rust/unic-langid-impl/src/bin')
-rw-r--r--third_party/rust/unic-langid-impl/src/bin/generate_layout.rs130
-rw-r--r--third_party/rust/unic-langid-impl/src/bin/generate_likelysubtags.rs201
2 files changed, 331 insertions, 0 deletions
diff --git a/third_party/rust/unic-langid-impl/src/bin/generate_layout.rs b/third_party/rust/unic-langid-impl/src/bin/generate_layout.rs
new file mode 100644
index 0000000000..35d8e27aed
--- /dev/null
+++ b/third_party/rust/unic-langid-impl/src/bin/generate_layout.rs
@@ -0,0 +1,130 @@
+use serde_json::Value;
+use std::collections::HashMap;
+use std::collections::HashSet;
+use std::fs;
+use unic_langid_impl::subtags::{Language, Script};
+use unic_langid_impl::CharacterDirection;
+use unic_langid_impl::LanguageIdentifier;
+
+fn langid_to_direction_map(path: &str) -> HashMap<LanguageIdentifier, CharacterDirection> {
+ let mut result = HashMap::new();
+ for entry in fs::read_dir(path).unwrap() {
+ let entry = entry.unwrap();
+ let mut path = entry.path();
+ path.push("layout.json");
+ let contents = fs::read_to_string(path).expect("Something went wrong reading the file");
+ let v: Value = serde_json::from_str(&contents).unwrap();
+
+ let langid_key = v["main"].as_object().unwrap().keys().nth(0).unwrap();
+
+ if langid_key == "root" {
+ continue;
+ }
+ let langid: LanguageIdentifier = langid_key.parse().unwrap();
+
+ let character_order = match v["main"][langid_key]["layout"]["orientation"]["characterOrder"]
+ .as_str()
+ .unwrap()
+ {
+ "right-to-left" => CharacterDirection::RTL,
+ "left-to-right" => CharacterDirection::LTR,
+ _ => unimplemented!("Encountered unknown directionality!"),
+ };
+ result.insert(langid, character_order);
+ }
+ result
+}
+
+fn check_all_variants_rtl(
+ map: &HashMap<LanguageIdentifier, CharacterDirection>,
+ lang: Option<Language>,
+ script: Option<Script>,
+) -> bool {
+ for (langid, dir) in map.iter() {
+ if let Some(reference_script) = script {
+ if let Some(s) = langid.script {
+ if reference_script == s && dir != &CharacterDirection::RTL {
+ return false;
+ }
+ }
+ }
+ if let Some(reference_lang) = lang {
+ if langid.language == reference_lang && dir != &CharacterDirection::RTL {
+ println!("{:#?}", langid);
+ println!("{:#?}", lang);
+ return false;
+ }
+ }
+ }
+ true
+}
+
+fn main() {
+ let path = "./data/cldr-misc-full/main/";
+ let map = langid_to_direction_map(path);
+
+ let mut scripts = HashSet::new();
+ let mut langs = HashSet::new();
+
+ for (langid, dir) in map.iter() {
+ if dir == &CharacterDirection::LTR {
+ continue;
+ }
+
+ let script = langid.script;
+
+ if let Some(script) = script {
+ if scripts.contains(&script) {
+ continue;
+ }
+ assert!(
+ check_all_variants_rtl(&map, None, Some(script)),
+ "We didn't expect a script with two directionalities!"
+ );
+ scripts.insert(script);
+ continue;
+ }
+
+ let lang = langid.language;
+
+ if langs.contains(&lang) {
+ continue;
+ }
+
+ assert!(
+ check_all_variants_rtl(&map, Some(lang), None),
+ "We didn't expect a language with two directionalities!"
+ );
+ langs.insert(lang);
+ }
+
+ let mut scripts: Vec<String> = scripts
+ .into_iter()
+ .map(|s| {
+ let v: u32 = s.into();
+ v.to_string()
+ })
+ .collect();
+ scripts.sort();
+ let mut langs: Vec<String> = langs
+ .into_iter()
+ .map(|s| {
+ let v: Option<u64> = s.into();
+ let v: u64 = v.expect("Expected language to not be undefined.");
+ v.to_string()
+ })
+ .collect();
+ langs.sort();
+
+ println!(
+ "pub const SCRIPTS_CHARACTER_DIRECTION_RTL: [u32; {}] = [{}];",
+ scripts.len(),
+ scripts.join(", ")
+ );
+
+ println!(
+ "pub const LANGS_CHARACTER_DIRECTION_RTL: [u64; {}] = [{}];",
+ langs.len(),
+ langs.join(", ")
+ );
+}
diff --git a/third_party/rust/unic-langid-impl/src/bin/generate_likelysubtags.rs b/third_party/rust/unic-langid-impl/src/bin/generate_likelysubtags.rs
new file mode 100644
index 0000000000..a86ffa80d6
--- /dev/null
+++ b/third_party/rust/unic-langid-impl/src/bin/generate_likelysubtags.rs
@@ -0,0 +1,201 @@
+use serde_json::Value;
+use std::fs;
+use tinystr::TinyStr8;
+use unic_langid_impl::{subtags, LanguageIdentifier};
+
+type LangIdSubTags = (Option<u64>, Option<u32>, Option<u32>);
+
+fn serialize_val(input: LangIdSubTags) -> String {
+ format!(
+ "({}, {}, {})",
+ serialize_lang_option(input.0),
+ serialize_script_option(input.1),
+ serialize_region_option(input.2)
+ )
+}
+
+fn serialize_lang_option(l: Option<u64>) -> String {
+ if let Some(l) = l {
+ format!("Some({})", l)
+ } else {
+ String::from("None")
+ }
+}
+
+fn serialize_script_option(r: Option<u32>) -> String {
+ if let Some(r) = r {
+ format!("Some({})", r)
+ } else {
+ String::from("None")
+ }
+}
+
+fn serialize_region_option(r: Option<u32>) -> String {
+ if let Some(r) = r {
+ format!("Some({})", r)
+ } else {
+ String::from("None")
+ }
+}
+
+fn main() {
+ let contents = fs::read_to_string("./data/likelySubtags.json")
+ .expect("Something went wrong reading the file");
+ let v: Value = serde_json::from_str(&contents).unwrap();
+ let values = v["supplemental"]["likelySubtags"].as_object().unwrap();
+
+ let mut lang_only: Vec<(u64, LangIdSubTags)> = vec![];
+ let mut lang_region: Vec<(u64, u32, LangIdSubTags)> = vec![];
+ let mut lang_script: Vec<(u64, u32, LangIdSubTags)> = vec![];
+ let mut script_region: Vec<(u32, u32, LangIdSubTags)> = vec![];
+ let mut region_only: Vec<(u32, LangIdSubTags)> = vec![];
+ let mut script_only: Vec<(u32, LangIdSubTags)> = vec![];
+
+ let zz_region: subtags::Region = "ZZ".parse().unwrap();
+
+ for (k, v) in values {
+ let key_langid: LanguageIdentifier = k.parse().expect("Failed to parse a key.");
+ let v: &str = v.as_str().unwrap();
+ let mut value_langid: LanguageIdentifier = v.parse().expect("Failed to parse a value.");
+ if Some(zz_region) == value_langid.region {
+ value_langid.region = None;
+ }
+ let (val_lang, val_script, val_region, _) = value_langid.into_parts();
+
+ let val_lang: Option<u64> = val_lang.into();
+ let val_script: Option<u32> = val_script.map(Into::into);
+ let val_region: Option<u32> = val_region.map(Into::into);
+
+ let lang = if key_langid.language.is_empty() {
+ None
+ } else {
+ Some(key_langid.language)
+ };
+ let script = key_langid.script;
+ let region = key_langid.region;
+
+ match (lang, script, region) {
+ (None, None, None) => lang_only.push((
+ u64::from_le_bytes(*TinyStr8::from_str("und").unwrap().all_bytes()),
+ (val_lang, val_script, val_region),
+ )),
+ (Some(l), None, None) => lang_only.push((
+ Into::<Option<u64>>::into(l).unwrap(),
+ (val_lang, val_script, val_region),
+ )),
+ (Some(l), None, Some(r)) => lang_region.push((
+ Into::<Option<u64>>::into(l).unwrap(),
+ r.into(),
+ (val_lang, val_script, val_region),
+ )),
+ (Some(l), Some(s), None) => lang_script.push((
+ Into::<Option<u64>>::into(l).unwrap(),
+ s.into(),
+ (val_lang, val_script, val_region),
+ )),
+ (None, Some(s), Some(r)) => {
+ script_region.push((s.into(), r.into(), (val_lang, val_script, val_region)))
+ }
+ (None, Some(s), None) => {
+ script_only.push((s.into(), (val_lang, val_script, val_region)))
+ }
+ (None, None, Some(r)) => {
+ region_only.push((r.into(), (val_lang, val_script, val_region)))
+ }
+ _ => {
+ panic!("{:#?}", key_langid);
+ }
+ }
+ }
+
+ println!("#![allow(clippy::type_complexity)]");
+ println!("#![allow(clippy::unreadable_literal)]\n");
+
+ let version = v["supplemental"]["version"]["_cldrVersion"]
+ .as_str()
+ .unwrap();
+ println!("pub static CLDR_VERSION: &str = \"{}\";", version);
+
+ println!(
+ "pub static LANG_ONLY: [(u64, (Option<u64>, Option<u32>, Option<u32>)); {}] = [",
+ lang_only.len()
+ );
+ lang_only.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap());
+ for (key_lang, val) in lang_only {
+ println!(" ({}, {}),", key_lang, serialize_val(val),);
+ }
+ println!("];");
+
+ println!(
+ "pub static LANG_REGION: [(u64, u32, (Option<u64>, Option<u32>, Option<u32>)); {}] = [",
+ lang_region.len()
+ );
+ lang_region.sort_by(|a, b| {
+ a.0.partial_cmp(&b.0)
+ .unwrap()
+ .then_with(|| a.1.partial_cmp(&b.1).unwrap())
+ });
+ for (key_lang, key_region, val) in lang_region {
+ println!(
+ " ({}, {}, {}),",
+ key_lang,
+ key_region,
+ serialize_val(val),
+ );
+ }
+ println!("];");
+ println!(
+ "pub static LANG_SCRIPT: [(u64, u32, (Option<u64>, Option<u32>, Option<u32>)); {}] = [",
+ lang_script.len()
+ );
+ lang_script.sort_by(|a, b| {
+ a.0.partial_cmp(&b.0)
+ .unwrap()
+ .then_with(|| a.1.partial_cmp(&b.1).unwrap())
+ });
+ for (key_lang, key_script, val) in lang_script {
+ println!(
+ " ({}, {}, {}),",
+ key_lang,
+ key_script,
+ serialize_val(val),
+ );
+ }
+ println!("];");
+ println!(
+ "pub static SCRIPT_REGION: [(u32, u32, (Option<u64>, Option<u32>, Option<u32>)); {}] = [",
+ script_region.len()
+ );
+ script_region.sort_by(|a, b| {
+ a.0.partial_cmp(&b.0)
+ .unwrap()
+ .then_with(|| a.1.partial_cmp(&b.1).unwrap())
+ });
+ for (key_script, key_region, val) in script_region {
+ println!(
+ " ({}, {}, {}),",
+ key_script,
+ key_region,
+ serialize_val(val),
+ );
+ }
+ println!("];");
+ println!(
+ "pub static SCRIPT_ONLY: [(u32, (Option<u64>, Option<u32>, Option<u32>)); {}] = [",
+ script_only.len()
+ );
+ script_only.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap());
+ for (key_script, val) in script_only {
+ println!(" ({}, {}),", key_script, serialize_val(val),);
+ }
+ println!("];");
+ println!(
+ "pub static REGION_ONLY: [(u32, (Option<u64>, Option<u32>, Option<u32>)); {}] = [",
+ region_only.len()
+ );
+ region_only.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap());
+ for (key_region, val) in region_only {
+ println!(" ({}, {}),", key_region, serialize_val(val),);
+ }
+ println!("];");
+}