diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-19 00:47:55 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-19 00:47:55 +0000 |
commit | 26a029d407be480d791972afb5975cf62c9360a6 (patch) | |
tree | f435a8308119effd964b339f76abb83a57c29483 /intl/components/src/LocaleGenerated.cpp | |
parent | Initial commit. (diff) | |
download | firefox-26a029d407be480d791972afb5975cf62c9360a6.tar.xz firefox-26a029d407be480d791972afb5975cf62c9360a6.zip |
Adding upstream version 124.0.1.upstream/124.0.1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'intl/components/src/LocaleGenerated.cpp')
-rw-r--r-- | intl/components/src/LocaleGenerated.cpp | 1208 |
1 files changed, 1208 insertions, 0 deletions
diff --git a/intl/components/src/LocaleGenerated.cpp b/intl/components/src/LocaleGenerated.cpp new file mode 100644 index 0000000000..427a78de72 --- /dev/null +++ b/intl/components/src/LocaleGenerated.cpp @@ -0,0 +1,1208 @@ +// Generated by make_intl_data.py. DO NOT EDIT. +// Version: CLDR-43 +// URL: https://unicode.org/Public/cldr/43/cldr-common-43.0.zip + +#include "mozilla/Assertions.h" +#include "mozilla/Span.h" +#include "mozilla/TextUtils.h" + +#include <algorithm> +#include <cstdint> +#include <cstring> +#include <iterator> +#include <string> +#include <type_traits> + +#include "mozilla/intl/Locale.h" + +using namespace mozilla::intl::LanguageTagLimits; + +template <size_t Length, size_t TagLength, size_t SubtagLength> +static inline bool HasReplacement( + const char (&subtags)[Length][TagLength], + const mozilla::intl::LanguageTagSubtag<SubtagLength>& subtag) { + MOZ_ASSERT(subtag.Length() == TagLength - 1, + "subtag must have the same length as the list of subtags"); + + const char* ptr = subtag.Span().data(); + return std::binary_search(std::begin(subtags), std::end(subtags), ptr, + [](const char* a, const char* b) { + return memcmp(a, b, TagLength - 1) < 0; + }); +} + +template <size_t Length, size_t TagLength, size_t SubtagLength> +static inline const char* SearchReplacement( + const char (&subtags)[Length][TagLength], const char* (&aliases)[Length], + const mozilla::intl::LanguageTagSubtag<SubtagLength>& subtag) { + MOZ_ASSERT(subtag.Length() == TagLength - 1, + "subtag must have the same length as the list of subtags"); + + const char* ptr = subtag.Span().data(); + auto p = std::lower_bound(std::begin(subtags), std::end(subtags), ptr, + [](const char* a, const char* b) { + return memcmp(a, b, TagLength - 1) < 0; + }); + if (p != std::end(subtags) && memcmp(*p, ptr, TagLength - 1) == 0) { + return aliases[std::distance(std::begin(subtags), p)]; + } + return nullptr; +} + +#ifdef DEBUG +static bool IsAsciiLowercaseAlphanumeric(char c) { + return mozilla::IsAsciiLowercaseAlpha(c) || mozilla::IsAsciiDigit(c); +} + +static bool IsAsciiLowercaseAlphanumericOrDash(char c) { + return IsAsciiLowercaseAlphanumeric(c) || c == '-'; +} + +static bool IsCanonicallyCasedLanguageTag(mozilla::Span<const char> span) { + return std::all_of(span.begin(), span.end(), + mozilla::IsAsciiLowercaseAlpha<char>); +} + +static bool IsCanonicallyCasedScriptTag(mozilla::Span<const char> span) { + return mozilla::IsAsciiUppercaseAlpha(span[0]) && + std::all_of(span.begin() + 1, span.end(), + mozilla::IsAsciiLowercaseAlpha<char>); +} + +static bool IsCanonicallyCasedRegionTag(mozilla::Span<const char> span) { + return std::all_of(span.begin(), span.end(), + mozilla::IsAsciiUppercaseAlpha<char>) || + std::all_of(span.begin(), span.end(), mozilla::IsAsciiDigit<char>); +} + +static bool IsCanonicallyCasedVariantTag(mozilla::Span<const char> span) { + return std::all_of(span.begin(), span.end(), IsAsciiLowercaseAlphanumeric); +} + +static bool IsCanonicallyCasedUnicodeKey(mozilla::Span<const char> key) { + return std::all_of(key.begin(), key.end(), IsAsciiLowercaseAlphanumeric); +} + +static bool IsCanonicallyCasedUnicodeType(mozilla::Span<const char> type) { + return std::all_of(type.begin(), type.end(), + IsAsciiLowercaseAlphanumericOrDash); +} + +static bool IsCanonicallyCasedTransformKey(mozilla::Span<const char> key) { + return std::all_of(key.begin(), key.end(), IsAsciiLowercaseAlphanumeric); +} + +static bool IsCanonicallyCasedTransformType(mozilla::Span<const char> type) { + return std::all_of(type.begin(), type.end(), + IsAsciiLowercaseAlphanumericOrDash); +} +#endif + +// Mappings from language subtags to preferred values. +// Derived from CLDR Supplemental Data, version 43. +// https://unicode.org/Public/cldr/43/cldr-common-43.0.zip +bool mozilla::intl::Locale::LanguageMapping(LanguageSubtag& language) { + MOZ_ASSERT(IsStructurallyValidLanguageTag(language.Span())); + MOZ_ASSERT(IsCanonicallyCasedLanguageTag(language.Span())); + + if (language.Length() == 2) { + static const char languages[8][3] = { + "bh", "in", "iw", "ji", "jw", "mo", "tl", "tw", + }; + static const char* aliases[8] = { + "bho", "id", "he", "yi", "jv", "ro", "fil", "ak", + }; + + if (const char* replacement = SearchReplacement(languages, aliases, language)) { + language.Set(mozilla::MakeStringSpan(replacement)); + return true; + } + return false; + } + + if (language.Length() == 3) { + static const char languages[408][4] = { + "aam", "aar", "abk", "adp", "afr", "agp", "ais", "ajt", "aju", "aka", + "alb", "als", "amh", "ara", "arb", "arg", "arm", "asd", "asm", "aue", + "ava", "ave", "aym", "ayr", "ayx", "aze", "azj", "bak", "bam", "baq", + "baz", "bcc", "bcl", "bel", "ben", "bgm", "bhk", "bic", "bih", "bis", + "bjd", "bjq", "bkb", "blg", "bod", "bos", "bre", "btb", "bul", "bur", + "bxk", "bxr", "cat", "ccq", "ces", "cha", "che", "chi", "chu", "chv", + "cjr", "cka", "cld", "cmk", "cmn", "cor", "cos", "coy", "cqu", "cre", + "cwd", "cym", "cze", "daf", "dan", "dap", "deu", "dgo", "dhd", "dik", + "diq", "dit", "div", "djl", "dkl", "drh", "drr", "dud", "duj", "dut", + "dwl", "dzo", "ekk", "ell", "elp", "emk", "eng", "epo", "esk", "est", + "eus", "ewe", "fao", "fas", "fat", "fij", "fin", "fra", "fre", "fry", + "fuc", "ful", "gav", "gaz", "gbc", "gbo", "geo", "ger", "gfx", "ggn", + "ggo", "ggr", "gio", "gla", "gle", "glg", "gli", "glv", "gno", "gre", + "grn", "gti", "gug", "guj", "guv", "gya", "hat", "hau", "hdn", "hea", + "heb", "her", "him", "hin", "hmo", "hrr", "hrv", "hun", "hye", "ibi", + "ibo", "ice", "ido", "iii", "ike", "iku", "ile", "ill", "ilw", "ina", + "ind", "ipk", "isl", "ita", "izi", "jar", "jav", "jeg", "jpn", "kal", + "kan", "kas", "kat", "kau", "kaz", "kdv", "kgc", "kgd", "kgh", "khk", + "khm", "kik", "kin", "kir", "kmr", "knc", "kng", "knn", "koj", "kom", + "kon", "kor", "kpp", "kpv", "krm", "ktr", "kua", "kur", "kvs", "kwq", + "kxe", "kxl", "kzh", "kzj", "kzt", "lak", "lao", "lat", "lav", "lbk", + "leg", "lii", "lim", "lin", "lit", "llo", "lmm", "ltz", "lub", "lug", + "lvs", "mac", "mah", "mal", "mao", "mar", "may", "meg", "mgx", "mhr", + "mkd", "mlg", "mlt", "mnk", "mnt", "mof", "mol", "mon", "mri", "msa", + "mst", "mup", "mwd", "mwj", "mya", "myd", "myt", "nad", "nau", "nav", + "nbf", "nbl", "nbx", "ncp", "nde", "ndo", "nep", "nld", "nln", "nlr", + "nno", "nns", "nnx", "nob", "noo", "nor", "npi", "nts", "nxu", "nya", + "oci", "ojg", "oji", "ori", "orm", "ory", "oss", "oun", "pan", "pat", + "pbu", "pcr", "per", "pes", "pli", "plt", "pmc", "pmu", "pnb", "pol", + "por", "ppa", "ppr", "pry", "pus", "puz", "que", "quz", "rmr", "rmy", + "roh", "ron", "rum", "run", "rus", "sag", "san", "sap", "sca", "scc", + "scr", "sgl", "sin", "skk", "slk", "slo", "slv", "smd", "sme", "smo", + "sna", "snb", "snd", "som", "sot", "spa", "spy", "sqi", "src", "srd", + "srp", "ssw", "sul", "sum", "sun", "swa", "swe", "swh", "tah", "tam", + "tat", "tdu", "tel", "tgg", "tgk", "tgl", "tha", "thc", "thw", "thx", + "tib", "tid", "tie", "tir", "tkk", "tlw", "tmp", "tne", "ton", "tsf", + "tsn", "tso", "ttq", "tuk", "tur", "twi", "uig", "ukr", "umu", "unp", + "uok", "urd", "uzb", "uzn", "ven", "vie", "vol", "wel", "wgw", "wit", + "wiw", "wln", "wol", "xba", "xho", "xia", "xkh", "xpe", "xrq", "xsj", + "xsl", "ybd", "ydd", "yen", "yid", "yiy", "yma", "ymt", "yor", "yos", + "yuu", "zai", "zha", "zho", "zir", "zsm", "zul", "zyb", + }; + static const char* aliases[408] = { + "aas", "aa", "ab", "dz", "af", "apf", "ami", "aeb", "jrb", "ak", + "sq", "sq", "am", "ar", "ar", "an", "hy", "snz", "as", "ktz", + "av", "ae", "ay", "ay", "nun", "az", "az", "ba", "bm", "eu", + "nvo", "bal", "bik", "be", "bn", "bcg", "fbl", "bir", "bho", "bi", + "drl", "bzc", "ebk", "iba", "bo", "bs", "br", "beb", "bg", "my", + "luy", "bua", "ca", "rki", "cs", "ch", "ce", "zh", "cu", "cv", + "mom", "cmr", "syr", "xch", "zh", "kw", "co", "pij", "quh", "cr", + "cr", "cy", "cs", "dnj", "da", "njz", "de", "doi", "mwr", "din", + "zza", "dif", "dv", "dze", "aqd", "mn", "kzk", "uth", "dwu", "nl", + "dbt", "dz", "et", "el", "amq", "man", "en", "eo", "ik", "et", + "eu", "ee", "fo", "fa", "ak", "fj", "fi", "fr", "fr", "fy", + "ff", "ff", "dev", "om", "wny", "grb", "ka", "de", "vaj", "gvr", + "esg", "gtu", "aou", "gd", "ga", "gl", "kzk", "gv", "gon", "el", + "gn", "nyc", "gn", "gu", "duz", "gba", "ht", "ha", "hai", "hmn", + "he", "hz", "srx", "hi", "ho", "jal", "hr", "hu", "hy", "opa", + "ig", "is", "io", "ii", "iu", "iu", "ie", "ilm", "gal", "ia", + "id", "ik", "is", "it", "eza", "jgk", "jv", "oyb", "ja", "kl", + "kn", "ks", "ka", "kr", "kk", "zkd", "tdf", "ncq", "kml", "mn", + "km", "ki", "rw", "ky", "ku", "kr", "kg", "kok", "kwv", "kv", + "kg", "ko", "jkm", "kv", "bmf", "dtp", "kj", "ku", "gdj", "yam", + "tvd", "kru", "dgl", "dtp", "dtp", "ksp", "lo", "la", "lv", "bnc", + "enl", "raq", "li", "ln", "lt", "ngt", "rmx", "lb", "lu", "lg", + "lv", "mk", "mh", "ml", "mi", "mr", "ms", "cir", "jbk", "chm", + "mk", "mg", "mt", "man", "wnn", "xnt", "ro", "mn", "mi", "ms", + "mry", "raj", "dmw", "vaj", "my", "aog", "mry", "xny", "na", "nv", + "nru", "nr", "ekc", "kdz", "nd", "ng", "ne", "nl", "azd", "nrk", + "nn", "nbr", "ngv", "nb", "dtd", "no", "ne", "pij", "bpp", "ny", + "oc", "oj", "oj", "or", "om", "or", "os", "vaj", "pa", "kxr", + "ps", "adx", "fa", "fa", "pi", "mg", "huw", "phr", "lah", "pl", + "pt", "bfy", "lcq", "prt", "ps", "pub", "qu", "qu", "emx", "rom", + "rm", "ro", "ro", "rn", "ru", "sg", "sa", "aqt", "hle", "sr", + "hr", "isk", "si", "oyb", "sk", "sk", "sl", "kmb", "se", "sm", + "sn", "iba", "sd", "so", "st", "es", "kln", "sq", "sc", "sc", + "sr", "ss", "sgd", "ulw", "su", "sw", "sv", "sw", "ty", "ta", + "tt", "dtp", "te", "bjp", "tg", "fil", "th", "tpo", "ola", "oyb", + "bo", "itd", "ras", "ti", "twm", "weo", "tyj", "kak", "to", "taj", + "tn", "ts", "tmh", "tk", "tr", "ak", "ug", "uk", "del", "wro", + "ema", "ur", "uz", "uz", "ve", "vi", "vo", "cy", "wgb", "nol", + "nwo", "wa", "wo", "cax", "xh", "acn", "waw", "kpe", "dmw", "suj", + "den", "rki", "yi", "ynq", "yi", "yrm", "lrr", "mtm", "yo", "zom", + "yug", "zap", "za", "zh", "scv", "ms", "zu", "za", + }; + + if (const char* replacement = SearchReplacement(languages, aliases, language)) { + language.Set(mozilla::MakeStringSpan(replacement)); + return true; + } + return false; + } + + return false; +} + +// Language subtags with complex mappings. +// Derived from CLDR Supplemental Data, version 43. +// https://unicode.org/Public/cldr/43/cldr-common-43.0.zip +bool mozilla::intl::Locale::ComplexLanguageMapping(const LanguageSubtag& language) { + MOZ_ASSERT(IsStructurallyValidLanguageTag(language.Span())); + MOZ_ASSERT(IsCanonicallyCasedLanguageTag(language.Span())); + + if (language.Length() == 2) { + return language.EqualTo("sh"); + } + + if (language.Length() == 3) { + static const char languages[6][4] = { + "cnr", "drw", "hbs", "prs", "swc", "tnf", + }; + + return HasReplacement(languages, language); + } + + return false; +} + +// Mappings from script subtags to preferred values. +// Derived from CLDR Supplemental Data, version 43. +// https://unicode.org/Public/cldr/43/cldr-common-43.0.zip +bool mozilla::intl::Locale::ScriptMapping(ScriptSubtag& script) { + MOZ_ASSERT(IsStructurallyValidScriptTag(script.Span())); + MOZ_ASSERT(IsCanonicallyCasedScriptTag(script.Span())); + + { + if (script.EqualTo("Qaai")) { + script.Set(mozilla::MakeStringSpan("Zinh")); + return true; + } + return false; + } +} + +// Mappings from region subtags to preferred values. +// Derived from CLDR Supplemental Data, version 43. +// https://unicode.org/Public/cldr/43/cldr-common-43.0.zip +bool mozilla::intl::Locale::RegionMapping(RegionSubtag& region) { + MOZ_ASSERT(IsStructurallyValidRegionTag(region.Span())); + MOZ_ASSERT(IsCanonicallyCasedRegionTag(region.Span())); + + if (region.Length() == 2) { + static const char regions[23][3] = { + "BU", "CS", "CT", "DD", "DY", "FQ", "FX", "HV", "JT", "MI", + "NH", "NQ", "PU", "PZ", "QU", "RH", "TP", "UK", "VD", "WK", + "YD", "YU", "ZR", + }; + static const char* aliases[23] = { + "MM", "RS", "KI", "DE", "BJ", "AQ", "FR", "BF", "UM", "UM", + "VU", "AQ", "UM", "PA", "EU", "ZW", "TL", "GB", "VN", "UM", + "YE", "RS", "CD", + }; + + if (const char* replacement = SearchReplacement(regions, aliases, region)) { + region.Set(mozilla::MakeStringSpan(replacement)); + return true; + } + return false; + } + + { + static const char regions[299][4] = { + "004", "008", "010", "012", "016", "020", "024", "028", "031", "032", + "036", "040", "044", "048", "050", "051", "052", "056", "060", "064", + "068", "070", "072", "074", "076", "084", "086", "090", "092", "096", + "100", "104", "108", "112", "116", "120", "124", "132", "136", "140", + "144", "148", "152", "156", "158", "162", "166", "170", "174", "175", + "178", "180", "184", "188", "191", "192", "196", "203", "204", "208", + "212", "214", "218", "222", "226", "230", "231", "232", "233", "234", + "238", "239", "242", "246", "248", "249", "250", "254", "258", "260", + "262", "266", "268", "270", "275", "276", "278", "280", "288", "292", + "296", "300", "304", "308", "312", "316", "320", "324", "328", "332", + "334", "336", "340", "344", "348", "352", "356", "360", "364", "368", + "372", "376", "380", "384", "388", "392", "398", "400", "404", "408", + "410", "414", "417", "418", "422", "426", "428", "430", "434", "438", + "440", "442", "446", "450", "454", "458", "462", "466", "470", "474", + "478", "480", "484", "492", "496", "498", "499", "500", "504", "508", + "512", "516", "520", "524", "528", "531", "533", "534", "535", "540", + "548", "554", "558", "562", "566", "570", "574", "578", "580", "581", + "583", "584", "585", "586", "591", "598", "600", "604", "608", "612", + "616", "620", "624", "626", "630", "634", "638", "642", "643", "646", + "652", "654", "659", "660", "662", "663", "666", "670", "674", "678", + "682", "686", "688", "690", "694", "702", "703", "704", "705", "706", + "710", "716", "720", "724", "728", "729", "732", "736", "740", "744", + "748", "752", "756", "760", "762", "764", "768", "772", "776", "780", + "784", "788", "792", "795", "796", "798", "800", "804", "807", "818", + "826", "830", "831", "832", "833", "834", "840", "850", "854", "858", + "860", "862", "876", "882", "886", "887", "891", "894", "958", "959", + "960", "962", "963", "964", "965", "966", "967", "968", "969", "970", + "971", "972", "973", "974", "975", "976", "977", "978", "979", "980", + "981", "982", "983", "984", "985", "986", "987", "988", "989", "990", + "991", "992", "993", "994", "995", "996", "997", "998", "999", + }; + static const char* aliases[299] = { + "AF", "AL", "AQ", "DZ", "AS", "AD", "AO", "AG", "AZ", "AR", + "AU", "AT", "BS", "BH", "BD", "AM", "BB", "BE", "BM", "BT", + "BO", "BA", "BW", "BV", "BR", "BZ", "IO", "SB", "VG", "BN", + "BG", "MM", "BI", "BY", "KH", "CM", "CA", "CV", "KY", "CF", + "LK", "TD", "CL", "CN", "TW", "CX", "CC", "CO", "KM", "YT", + "CG", "CD", "CK", "CR", "HR", "CU", "CY", "CZ", "BJ", "DK", + "DM", "DO", "EC", "SV", "GQ", "ET", "ET", "ER", "EE", "FO", + "FK", "GS", "FJ", "FI", "AX", "FR", "FR", "GF", "PF", "TF", + "DJ", "GA", "GE", "GM", "PS", "DE", "DE", "DE", "GH", "GI", + "KI", "GR", "GL", "GD", "GP", "GU", "GT", "GN", "GY", "HT", + "HM", "VA", "HN", "HK", "HU", "IS", "IN", "ID", "IR", "IQ", + "IE", "IL", "IT", "CI", "JM", "JP", "KZ", "JO", "KE", "KP", + "KR", "KW", "KG", "LA", "LB", "LS", "LV", "LR", "LY", "LI", + "LT", "LU", "MO", "MG", "MW", "MY", "MV", "ML", "MT", "MQ", + "MR", "MU", "MX", "MC", "MN", "MD", "ME", "MS", "MA", "MZ", + "OM", "NA", "NR", "NP", "NL", "CW", "AW", "SX", "BQ", "NC", + "VU", "NZ", "NI", "NE", "NG", "NU", "NF", "NO", "MP", "UM", + "FM", "MH", "PW", "PK", "PA", "PG", "PY", "PE", "PH", "PN", + "PL", "PT", "GW", "TL", "PR", "QA", "RE", "RO", "RU", "RW", + "BL", "SH", "KN", "AI", "LC", "MF", "PM", "VC", "SM", "ST", + "SA", "SN", "RS", "SC", "SL", "SG", "SK", "VN", "SI", "SO", + "ZA", "ZW", "YE", "ES", "SS", "SD", "EH", "SD", "SR", "SJ", + "SZ", "SE", "CH", "SY", "TJ", "TH", "TG", "TK", "TO", "TT", + "AE", "TN", "TR", "TM", "TC", "TV", "UG", "UA", "MK", "EG", + "GB", "JE", "GG", "JE", "IM", "TZ", "US", "VI", "BF", "UY", + "UZ", "VE", "WF", "WS", "YE", "YE", "RS", "ZM", "AA", "QM", + "QN", "QP", "QQ", "QR", "QS", "QT", "EU", "QV", "QW", "QX", + "QY", "QZ", "XA", "XB", "XC", "XD", "XE", "XF", "XG", "XH", + "XI", "XJ", "XK", "XL", "XM", "XN", "XO", "XP", "XQ", "XR", + "XS", "XT", "XU", "XV", "XW", "XX", "XY", "XZ", "ZZ", + }; + + if (const char* replacement = SearchReplacement(regions, aliases, region)) { + region.Set(mozilla::MakeStringSpan(replacement)); + return true; + } + return false; + } +} + +// Region subtags with complex mappings. +// Derived from CLDR Supplemental Data, version 43. +// https://unicode.org/Public/cldr/43/cldr-common-43.0.zip +bool mozilla::intl::Locale::ComplexRegionMapping(const RegionSubtag& region) { + MOZ_ASSERT(IsStructurallyValidRegionTag(region.Span())); + MOZ_ASSERT(IsCanonicallyCasedRegionTag(region.Span())); + + if (region.Length() == 2) { + return region.EqualTo("AN") || + region.EqualTo("NT") || + region.EqualTo("PC") || + region.EqualTo("SU"); + } + + { + static const char regions[9][4] = { + "062", "172", "200", "530", "532", "536", "582", "810", "890", + }; + + return HasReplacement(regions, region); + } +} + +// Language subtags with complex mappings. +// Derived from CLDR Supplemental Data, version 43. +// https://unicode.org/Public/cldr/43/cldr-common-43.0.zip +void mozilla::intl::Locale::PerformComplexLanguageMappings() { + MOZ_ASSERT(IsStructurallyValidLanguageTag(Language().Span())); + MOZ_ASSERT(IsCanonicallyCasedLanguageTag(Language().Span())); + + if (Language().EqualTo("cnr")) { + SetLanguage("sr"); + if (Region().Missing()) { + SetRegion("ME"); + } + } + else if (Language().EqualTo("drw") || + Language().EqualTo("prs") || + Language().EqualTo("tnf")) { + SetLanguage("fa"); + if (Region().Missing()) { + SetRegion("AF"); + } + } + else if (Language().EqualTo("hbs") || + Language().EqualTo("sh")) { + SetLanguage("sr"); + if (Script().Missing()) { + SetScript("Latn"); + } + } + else if (Language().EqualTo("swc")) { + SetLanguage("sw"); + if (Region().Missing()) { + SetRegion("CD"); + } + } +} + +// Region subtags with complex mappings. +// Derived from CLDR Supplemental Data, version 43. +// https://unicode.org/Public/cldr/43/cldr-common-43.0.zip +void mozilla::intl::Locale::PerformComplexRegionMappings() { + MOZ_ASSERT(IsStructurallyValidLanguageTag(Language().Span())); + MOZ_ASSERT(IsCanonicallyCasedLanguageTag(Language().Span())); + MOZ_ASSERT(IsStructurallyValidRegionTag(Region().Span())); + MOZ_ASSERT(IsCanonicallyCasedRegionTag(Region().Span())); + + if (Region().EqualTo("062")) { + if (Language().EqualTo("oui") || + (Language().EqualTo("und") && Script().EqualTo("Ougr"))) { + SetRegion("143"); + } + else { + SetRegion("034"); + } + } + else if (Region().EqualTo("172")) { + if (Language().EqualTo("axm") || + Language().EqualTo("hy") || + Language().EqualTo("hyw") || + Language().EqualTo("rmi") || + (Language().EqualTo("und") && Script().EqualTo("Armn"))) { + SetRegion("AM"); + } + else if (Language().EqualTo("az") || + (Language().EqualTo("azb") && Script().EqualTo("Cyrl")) || + (Language().EqualTo("azb") && Script().EqualTo("Latn")) || + Language().EqualTo("bdk") || + (Language().EqualTo("jdt") && Script().EqualTo("Latn")) || + Language().EqualTo("kjj") || + Language().EqualTo("kry") || + (Language().EqualTo("rut") && Script().EqualTo("Latn")) || + Language().EqualTo("tkr") || + Language().EqualTo("tly") || + Language().EqualTo("ttt")) { + SetRegion("AZ"); + } + else if (Language().EqualTo("be") || + (Language().EqualTo("rml") && Script().EqualTo("Cyrl"))) { + SetRegion("BY"); + } + else if (Language().EqualTo("ab") || + Language().EqualTo("bbl") || + Language().EqualTo("bhn") || + Language().EqualTo("jge") || + Language().EqualTo("ka") || + (Language().EqualTo("ku") && Script().EqualTo("Yezi")) || + Language().EqualTo("oav") || + Language().EqualTo("os") || + Language().EqualTo("sva") || + (Language().EqualTo("und") && Script().EqualTo("Geor")) || + (Language().EqualTo("und") && Script().EqualTo("Yezi")) || + Language().EqualTo("uum") || + Language().EqualTo("xmf")) { + SetRegion("GE"); + } + else if (Language().EqualTo("dng") || + Language().EqualTo("ky")) { + SetRegion("KG"); + } + else if ((Language().EqualTo("ili") && Script().EqualTo("Cyrl")) || + Language().EqualTo("kk") || + (Language().EqualTo("ug") && Script().EqualTo("Cyrl"))) { + SetRegion("KZ"); + } + else if (Language().EqualTo("gag")) { + SetRegion("MD"); + } + else if (Language().EqualTo("abh") || + (Language().EqualTo("isk") && Script().EqualTo("Cyrl")) || + Language().EqualTo("paq") || + Language().EqualTo("sgh") || + Language().EqualTo("tg") || + (Language().EqualTo("wbl") && Script().EqualTo("Cyrl")) || + Language().EqualTo("yai")) { + SetRegion("TJ"); + } + else if (Language().EqualTo("chg") || + Language().EqualTo("tk")) { + SetRegion("TM"); + } + else if (Language().EqualTo("crh") || + Language().EqualTo("got") || + Language().EqualTo("jct") || + Language().EqualTo("ji") || + (Language().EqualTo("kdr") && Script().EqualTo("Cyrl")) || + Language().EqualTo("rue") || + Language().EqualTo("uk") || + (Language().EqualTo("und") && Script().EqualTo("Goth"))) { + SetRegion("UA"); + } + else if (Language().EqualTo("auz") || + Language().EqualTo("kaa") || + Language().EqualTo("sog") || + (Language().EqualTo("und") && Script().EqualTo("Chrs")) || + (Language().EqualTo("und") && Script().EqualTo("Sogd")) || + (Language().EqualTo("und") && Script().EqualTo("Sogo")) || + Language().EqualTo("uz") || + Language().EqualTo("xco")) { + SetRegion("UZ"); + } + else { + SetRegion("RU"); + } + } + else if (Region().EqualTo("200")) { + if (Language().EqualTo("rmc") || + Language().EqualTo("sk")) { + SetRegion("SK"); + } + else { + SetRegion("CZ"); + } + } + else if (Region().EqualTo("530") || + Region().EqualTo("532") || + Region().EqualTo("AN")) { + if (Language().EqualTo("vic")) { + SetRegion("SX"); + } + else { + SetRegion("CW"); + } + } + else if (Region().EqualTo("536") || + Region().EqualTo("NT")) { + if (Language().EqualTo("acm") || + Language().EqualTo("akk") || + Language().EqualTo("ayp") || + Language().EqualTo("bjm") || + Language().EqualTo("ckb") || + Language().EqualTo("kqd") || + (Language().EqualTo("ku") && Script().EqualTo("Arab")) || + Language().EqualTo("mid") || + Language().EqualTo("sdb") || + Language().EqualTo("sdf") || + Language().EqualTo("syr") || + (Language().EqualTo("und") && Script().EqualTo("Syrc")) || + (Language().EqualTo("und") && Script().EqualTo("Xsux"))) { + SetRegion("IQ"); + } + else { + SetRegion("SA"); + } + } + else if (Region().EqualTo("582") || + Region().EqualTo("PC")) { + if (Language().EqualTo("mh")) { + SetRegion("MH"); + } + else if (Language().EqualTo("cal") || + Language().EqualTo("tpv")) { + SetRegion("MP"); + } + else if (Language().EqualTo("pau") || + Language().EqualTo("sov") || + Language().EqualTo("tox")) { + SetRegion("PW"); + } + else { + SetRegion("FM"); + } + } + else if (Region().EqualTo("810") || + Region().EqualTo("SU")) { + if (Language().EqualTo("axm") || + Language().EqualTo("hy") || + Language().EqualTo("hyw") || + Language().EqualTo("rmi") || + (Language().EqualTo("und") && Script().EqualTo("Armn"))) { + SetRegion("AM"); + } + else if (Language().EqualTo("az") || + (Language().EqualTo("azb") && Script().EqualTo("Cyrl")) || + (Language().EqualTo("azb") && Script().EqualTo("Latn")) || + Language().EqualTo("bdk") || + (Language().EqualTo("jdt") && Script().EqualTo("Latn")) || + Language().EqualTo("kjj") || + Language().EqualTo("kry") || + (Language().EqualTo("rut") && Script().EqualTo("Latn")) || + Language().EqualTo("tkr") || + Language().EqualTo("tly") || + Language().EqualTo("ttt")) { + SetRegion("AZ"); + } + else if (Language().EqualTo("be") || + (Language().EqualTo("rml") && Script().EqualTo("Cyrl"))) { + SetRegion("BY"); + } + else if (Language().EqualTo("et") || + Language().EqualTo("vro")) { + SetRegion("EE"); + } + else if (Language().EqualTo("ab") || + Language().EqualTo("bbl") || + Language().EqualTo("bhn") || + Language().EqualTo("jge") || + Language().EqualTo("ka") || + (Language().EqualTo("ku") && Script().EqualTo("Yezi")) || + Language().EqualTo("oav") || + Language().EqualTo("os") || + Language().EqualTo("sva") || + (Language().EqualTo("und") && Script().EqualTo("Geor")) || + (Language().EqualTo("und") && Script().EqualTo("Yezi")) || + Language().EqualTo("uum") || + Language().EqualTo("xmf")) { + SetRegion("GE"); + } + else if (Language().EqualTo("dng") || + Language().EqualTo("ky")) { + SetRegion("KG"); + } + else if ((Language().EqualTo("ili") && Script().EqualTo("Cyrl")) || + Language().EqualTo("kk") || + (Language().EqualTo("ug") && Script().EqualTo("Cyrl"))) { + SetRegion("KZ"); + } + else if (Language().EqualTo("kdr") || + Language().EqualTo("lt") || + Language().EqualTo("olt") || + Language().EqualTo("sgs")) { + SetRegion("LT"); + } + else if (Language().EqualTo("liv") || + Language().EqualTo("ltg") || + Language().EqualTo("lv")) { + SetRegion("LV"); + } + else if (Language().EqualTo("gag")) { + SetRegion("MD"); + } + else if (Language().EqualTo("abh") || + (Language().EqualTo("isk") && Script().EqualTo("Cyrl")) || + Language().EqualTo("paq") || + Language().EqualTo("sgh") || + Language().EqualTo("tg") || + (Language().EqualTo("wbl") && Script().EqualTo("Cyrl")) || + Language().EqualTo("yai")) { + SetRegion("TJ"); + } + else if (Language().EqualTo("chg") || + Language().EqualTo("tk")) { + SetRegion("TM"); + } + else if (Language().EqualTo("crh") || + Language().EqualTo("got") || + Language().EqualTo("jct") || + Language().EqualTo("ji") || + (Language().EqualTo("kdr") && Script().EqualTo("Cyrl")) || + Language().EqualTo("rue") || + Language().EqualTo("uk") || + (Language().EqualTo("und") && Script().EqualTo("Goth"))) { + SetRegion("UA"); + } + else if (Language().EqualTo("auz") || + Language().EqualTo("kaa") || + Language().EqualTo("sog") || + (Language().EqualTo("und") && Script().EqualTo("Chrs")) || + (Language().EqualTo("und") && Script().EqualTo("Sogd")) || + (Language().EqualTo("und") && Script().EqualTo("Sogo")) || + Language().EqualTo("uz") || + Language().EqualTo("xco")) { + SetRegion("UZ"); + } + else { + SetRegion("RU"); + } + } + else if (Region().EqualTo("890")) { + if (Language().EqualTo("bs")) { + SetRegion("BA"); + } + else if (Language().EqualTo("ckm") || + Language().EqualTo("dlm") || + Language().EqualTo("hr") || + Language().EqualTo("ist") || + Language().EqualTo("ruo")) { + SetRegion("HR"); + } + else if (Language().EqualTo("mk")) { + SetRegion("MK"); + } + else if (Language().EqualTo("sl")) { + SetRegion("SI"); + } + else { + SetRegion("RS"); + } + } +} + +static const char* ToCharPointer(const char* str) { + return str; +} + +static const char* ToCharPointer(const mozilla::intl::UniqueChars& str) { + return str.get(); +} + +template <typename T, typename U = T> +static bool IsLessThan(const T& a, const U& b) { + return strcmp(ToCharPointer(a), ToCharPointer(b)) < 0; +} + +// Mappings from variant subtags to preferred values. +// Derived from CLDR Supplemental Data, version 43. +// https://unicode.org/Public/cldr/43/cldr-common-43.0.zip +bool mozilla::intl::Locale::PerformVariantMappings() { + // The variant subtags need to be sorted for binary search. + MOZ_ASSERT(std::is_sorted(mVariants.begin(), mVariants.end(), + IsLessThan<decltype(mVariants)::ElementType>)); + + auto removeVariantAt = [&](size_t index) { + mVariants.erase(mVariants.begin() + index); + }; + + auto insertVariantSortedIfNotPresent = [&](const char* variant) { + auto* p = std::lower_bound( + mVariants.begin(), mVariants.end(), variant, + IsLessThan<decltype(mVariants)::ElementType, decltype(variant)>); + + // Don't insert the replacement when already present. + if (p != mVariants.end() && strcmp(p->get(), variant) == 0) { + return true; + } + + // Insert the preferred variant in sort order. + auto preferred = DuplicateStringToUniqueChars(variant); + return !!mVariants.insert(p, std::move(preferred)); + }; + + for (size_t i = 0; i < mVariants.length();) { + const char* variant = mVariants[i].get(); + MOZ_ASSERT(IsCanonicallyCasedVariantTag(mozilla::MakeStringSpan(variant))); + + if (strcmp(variant, "arevela") == 0 || + strcmp(variant, "arevmda") == 0 || + strcmp(variant, "bokmal") == 0 || + strcmp(variant, "hakka") == 0 || + strcmp(variant, "lojban") == 0 || + strcmp(variant, "nynorsk") == 0 || + strcmp(variant, "saaho") == 0 || + strcmp(variant, "xiang") == 0) { + removeVariantAt(i); + } + else if (strcmp(variant, "aaland") == 0) { + removeVariantAt(i); + SetRegion("AX"); + } + else if (strcmp(variant, "heploc") == 0) { + removeVariantAt(i); + if (!insertVariantSortedIfNotPresent("alalc97")) { + return false; + } + } + else if (strcmp(variant, "polytoni") == 0) { + removeVariantAt(i); + if (!insertVariantSortedIfNotPresent("polyton")) { + return false; + } + } + else { + i++; + } + } + return true; +} + +// Canonicalize legacy locale identifiers. +// Derived from CLDR Supplemental Data, version 43. +// https://unicode.org/Public/cldr/43/cldr-common-43.0.zip +bool mozilla::intl::Locale::UpdateLegacyMappings() { + // We're mapping legacy tags to non-legacy form here. + // Other tags remain unchanged. + // + // Legacy tags are either sign language tags ("sgn") or have one or multiple + // variant subtags. Therefore we can quickly exclude most tags by checking + // these two subtags. + + MOZ_ASSERT(IsCanonicallyCasedLanguageTag(Language().Span())); + + if (!Language().EqualTo("sgn") && mVariants.length() == 0) { + return true; + } + +#ifdef DEBUG + for (const auto& variant : Variants()) { + MOZ_ASSERT(IsStructurallyValidVariantTag(variant)); + MOZ_ASSERT(IsCanonicallyCasedVariantTag(variant)); + } +#endif + + // The variant subtags need to be sorted for binary search. + MOZ_ASSERT(std::is_sorted(mVariants.begin(), mVariants.end(), + IsLessThan<decltype(mVariants)::ElementType>)); + + auto findVariant = [this](const char* variant) { + auto* p = std::lower_bound(mVariants.begin(), mVariants.end(), variant, + IsLessThan<decltype(mVariants)::ElementType, + decltype(variant)>); + + if (p != mVariants.end() && strcmp(p->get(), variant) == 0) { + return p; + } + return static_cast<decltype(p)>(nullptr); + }; + + auto insertVariantSortedIfNotPresent = [&](const char* variant) { + auto* p = std::lower_bound(mVariants.begin(), mVariants.end(), variant, + IsLessThan<decltype(mVariants)::ElementType, + decltype(variant)>); + + // Don't insert the replacement when already present. + if (p != mVariants.end() && strcmp(p->get(), variant) == 0) { + return true; + } + + // Insert the preferred variant in sort order. + auto preferred = DuplicateStringToUniqueChars(variant); + return !!mVariants.insert(p, std::move(preferred)); + }; + + auto removeVariant = [&](auto* p) { + size_t index = std::distance(mVariants.begin(), p); + mVariants.erase(mVariants.begin() + index); + }; + + auto removeVariants = [&](auto* p, auto* q) { + size_t pIndex = std::distance(mVariants.begin(), p); + size_t qIndex = std::distance(mVariants.begin(), q); + MOZ_ASSERT(pIndex < qIndex, "variant subtags are sorted"); + + mVariants.erase(mVariants.begin() + qIndex); + mVariants.erase(mVariants.begin() + pIndex); + }; + + if (mVariants.length() >= 2) { + if (auto* hepburn = findVariant("hepburn")) { + if (auto* heploc = findVariant("heploc")) { + removeVariants(hepburn, heploc); + + if (!insertVariantSortedIfNotPresent("alalc97")) { + return false; + } + } + } + } + + if (Language().EqualTo("sgn")) { + if (Region().Present() && SignLanguageMapping(mLanguage, Region())) { + mRegion.Set(mozilla::MakeStringSpan("")); + } + } + else if (Language().EqualTo("aa") || + Language().EqualTo("aar")) { + if (auto* saaho = findVariant("saaho")) { + removeVariant(saaho); + SetLanguage("ssy"); + } + } + else if (Language().EqualTo("arm") || + Language().EqualTo("hy") || + Language().EqualTo("hye")) { + if (auto* arevmda = findVariant("arevmda")) { + removeVariant(arevmda); + SetLanguage("hyw"); + } + } + else if (Language().EqualTo("art")) { + if (auto* lojban = findVariant("lojban")) { + removeVariant(lojban); + SetLanguage("jbo"); + } + } + else if (Language().EqualTo("cel")) { + if (auto* gaulish = findVariant("gaulish")) { + removeVariant(gaulish); + SetLanguage("xtg"); + } + } + else if (Language().EqualTo("chi") || + Language().EqualTo("cmn") || + Language().EqualTo("zh") || + Language().EqualTo("zho")) { + if (auto* guoyu = findVariant("guoyu")) { + if (auto* hakka = findVariant("hakka")) { + removeVariants(guoyu, hakka); + SetLanguage("hak"); + return true; + } + } + if (auto* guoyu = findVariant("guoyu")) { + if (auto* xiang = findVariant("xiang")) { + removeVariants(guoyu, xiang); + SetLanguage("hsn"); + return true; + } + } + if (auto* guoyu = findVariant("guoyu")) { + removeVariant(guoyu); + SetLanguage("zh"); + } + else if (auto* hakka = findVariant("hakka")) { + removeVariant(hakka); + SetLanguage("hak"); + } + else if (auto* xiang = findVariant("xiang")) { + removeVariant(xiang); + SetLanguage("hsn"); + } + } + else if (Language().EqualTo("no") || + Language().EqualTo("nor")) { + if (auto* bokmal = findVariant("bokmal")) { + removeVariant(bokmal); + SetLanguage("nb"); + } + else if (auto* nynorsk = findVariant("nynorsk")) { + removeVariant(nynorsk); + SetLanguage("nn"); + } + } + + return true; +} + +// Mappings from legacy sign languages. +// Derived from CLDR Supplemental Data, version 43. +// https://unicode.org/Public/cldr/43/cldr-common-43.0.zip +bool mozilla::intl::Locale::SignLanguageMapping(LanguageSubtag& language, + const RegionSubtag& region) { + MOZ_ASSERT(language.EqualTo("sgn")); + MOZ_ASSERT(IsStructurallyValidRegionTag(region.Span())); + MOZ_ASSERT(IsCanonicallyCasedRegionTag(region.Span())); + + if (region.Length() == 2) { + static const char regions[22][3] = { + "BR", "CO", "DD", "DE", "DK", "ES", "FR", "FX", "GB", "GR", + "IE", "IT", "JP", "MX", "NI", "NL", "NO", "PT", "SE", "UK", + "US", "ZA", + }; + static const char* aliases[22] = { + "bzs", "csn", "gsg", "gsg", "dsl", "ssp", "fsl", "fsl", "bfi", "gss", + "isg", "ise", "jsl", "mfs", "ncs", "dse", "nsi", "psr", "swl", "bfi", + "ase", "sfs", + }; + + if (const char* replacement = SearchReplacement(regions, aliases, region)) { + language.Set(mozilla::MakeStringSpan(replacement)); + return true; + } + return false; + } + + { + static const char regions[22][4] = { + "076", "170", "208", "249", "250", "276", "278", "280", "300", "372", + "380", "392", "484", "528", "558", "578", "620", "710", "724", "752", + "826", "840", + }; + static const char* aliases[22] = { + "bzs", "csn", "dsl", "fsl", "fsl", "gsg", "gsg", "gsg", "gss", "isg", + "ise", "jsl", "mfs", "dse", "ncs", "nsi", "psr", "sfs", "ssp", "swl", + "bfi", "ase", + }; + + if (const char* replacement = SearchReplacement(regions, aliases, region)) { + language.Set(mozilla::MakeStringSpan(replacement)); + return true; + } + return false; + } +} + +template <size_t Length> +static inline bool IsUnicodeKey(mozilla::Span<const char> key, const char (&str)[Length]) { + static_assert(Length == UnicodeKeyLength + 1, + "Unicode extension key is two characters long"); + return memcmp(key.data(), str, Length - 1) == 0; +} + +template <size_t Length> +static inline bool IsUnicodeType(mozilla::Span<const char> type, const char (&str)[Length]) { + static_assert(Length > UnicodeKeyLength + 1, + "Unicode extension type contains more than two characters"); + return type.size() == (Length - 1) && + memcmp(type.data(), str, Length - 1) == 0; +} + +static int32_t CompareUnicodeType(const char* a, mozilla::Span<const char> b) { + MOZ_ASSERT(!std::char_traits<char>::find(b.data(), b.size(), '\0'), + "unexpected null-character in string"); + + using UnsignedChar = unsigned char; + for (size_t i = 0; i < b.size(); i++) { + // |a| is zero-terminated and |b| doesn't contain a null-terminator. So if + // we've reached the end of |a|, the below if-statement will always be true. + // That ensures we don't read past the end of |a|. + if (int32_t r = UnsignedChar(a[i]) - UnsignedChar(b[i])) { + return r; + } + } + + // Return zero if both strings are equal or a positive number if |b| is a + // prefix of |a|. + return int32_t(UnsignedChar(a[b.size()])); +} + +template <size_t Length> +static inline const char* SearchUnicodeReplacement( + const char* (&types)[Length], const char* (&aliases)[Length], + mozilla::Span<const char> type) { + + auto p = std::lower_bound(std::begin(types), std::end(types), type, + [](const auto& a, const auto& b) { + return CompareUnicodeType(a, b) < 0; + }); + if (p != std::end(types) && CompareUnicodeType(*p, type) == 0) { + return aliases[std::distance(std::begin(types), p)]; + } + return nullptr; +} + +/** + * Mapping from deprecated BCP 47 Unicode extension types to their preferred + * values. + * + * Spec: https://www.unicode.org/reports/tr35/#Unicode_Locale_Extension_Data_Files + * Spec: https://www.unicode.org/reports/tr35/#t_Extension + */ +const char* mozilla::intl::Locale::ReplaceUnicodeExtensionType( + mozilla::Span<const char> key, mozilla::Span<const char> type) { + MOZ_ASSERT(key.size() == UnicodeKeyLength); + MOZ_ASSERT(IsCanonicallyCasedUnicodeKey(key)); + + MOZ_ASSERT(type.size() > UnicodeKeyLength); + MOZ_ASSERT(IsCanonicallyCasedUnicodeType(type)); + + if (IsUnicodeKey(key, "ca")) { + if (IsUnicodeType(type, "ethiopic-amete-alem")) { + return "ethioaa"; + } + if (IsUnicodeType(type, "islamicc")) { + return "islamic-civil"; + } + } + else if (IsUnicodeKey(key, "kb") || + IsUnicodeKey(key, "kc") || + IsUnicodeKey(key, "kh") || + IsUnicodeKey(key, "kk") || + IsUnicodeKey(key, "kn")) { + if (IsUnicodeType(type, "yes")) { + return "true"; + } + } + else if (IsUnicodeKey(key, "ks")) { + if (IsUnicodeType(type, "primary")) { + return "level1"; + } + if (IsUnicodeType(type, "tertiary")) { + return "level3"; + } + } + else if (IsUnicodeKey(key, "ms")) { + if (IsUnicodeType(type, "imperial")) { + return "uksystem"; + } + } + else if (IsUnicodeKey(key, "rg") || + IsUnicodeKey(key, "sd")) { + static const char* types[144] = { + "cn11" , "cn12" , "cn13" , "cn14" , "cn15" , "cn21" , "cn22" , + "cn23" , "cn31" , "cn32" , "cn33" , "cn34" , "cn35" , "cn36" , + "cn37" , "cn41" , "cn42" , "cn43" , "cn44" , "cn45" , "cn46" , + "cn50" , "cn51" , "cn52" , "cn53" , "cn54" , "cn61" , "cn62" , + "cn63" , "cn64" , "cn65" , "cn71" , "cn91" , "cn92" , "cz10a" , + "cz10b" , "cz10c" , "cz10d" , "cz10e" , "cz10f" , "cz611" , "cz612" , + "cz613" , "cz614" , "cz615" , "cz621" , "cz622" , "cz623" , "cz624" , + "cz626" , "cz627" , "czjc" , "czjm" , "czka" , "czkr" , "czli" , + "czmo" , "czol" , "czpa" , "czpl" , "czpr" , "czst" , "czus" , + "czvy" , "czzl" , "fi01" , "fra" , "frb" , "frbl" , "frc" , + "frcp" , "frd" , "fre" , "frf" , "frg" , "frgf" , "frgp" , + "frh" , "fri" , "frj" , "frk" , "frl" , "frm" , "frmf" , + "frmq" , "frn" , "frnc" , "fro" , "frp" , "frpf" , "frpm" , + "frq" , "frr" , "frre" , "frs" , "frt" , "frtf" , "fru" , + "frv" , "frwf" , "fryt" , "laxn" , "lud" , "lug" , "lul" , + "mrnkc" , "nlaw" , "nlcw" , "nlsx" , "no23" , "nzn" , "nzs" , + "omba" , "omsh" , "plds" , "plkp" , "pllb" , "plld" , "pllu" , + "plma" , "plmz" , "plop" , "plpd" , "plpk" , "plpm" , "plsk" , + "plsl" , "plwn" , "plwp" , "plzp" , "shta" , "tteto" , "ttrcm" , + "ttwto" , "twkhq" , "twtnq" , "twtpq" , "twtxq" , "usas" , "usgu" , + "usmp" , "uspr" , "usum" , "usvi" , + }; + static const char* aliases[144] = { + "cnbj" , "cntj" , "cnhe" , "cnsx" , "cnmn" , "cnln" , "cnjl" , + "cnhl" , "cnsh" , "cnjs" , "cnzj" , "cnah" , "cnfj" , "cnjx" , + "cnsd" , "cnha" , "cnhb" , "cnhn" , "cngd" , "cngx" , "cnhi" , + "cncq" , "cnsc" , "cngz" , "cnyn" , "cnxz" , "cnsn" , "cngs" , + "cnqh" , "cnnx" , "cnxj" , "twzzzz", "hkzzzz", "mozzzz", "cz110" , + "cz111" , "cz112" , "cz113" , "cz114" , "cz115" , "cz663" , "cz632" , + "cz633" , "cz634" , "cz635" , "cz641" , "cz642" , "cz643" , "cz644" , + "cz646" , "cz647" , "cz31" , "cz64" , "cz41" , "cz52" , "cz51" , + "cz80" , "cz71" , "cz53" , "cz32" , "cz10" , "cz20" , "cz42" , + "cz63" , "cz72" , "axzzzz", "frges" , "frnaq" , "blzzzz", "frara" , + "cpzzzz", "frbfc" , "frbre" , "frcvl" , "frges" , "gfzzzz", "gpzzzz", + "frcor" , "frbfc" , "fridf" , "frocc" , "frnaq" , "frges" , "mfzzzz", + "mqzzzz", "frocc" , "nczzzz", "frhdf" , "frnor" , "pfzzzz", "pmzzzz", + "frnor" , "frpdl" , "rezzzz", "frhdf" , "frnaq" , "tfzzzz", "frpac" , + "frara" , "wfzzzz", "ytzzzz", "laxs" , "lucl" , "luec" , "luca" , + "mr13" , "awzzzz", "cwzzzz", "sxzzzz", "no50" , "nzauk" , "nzcan" , + "ombj" , "omsj" , "pl02" , "pl04" , "pl08" , "pl10" , "pl06" , + "pl12" , "pl14" , "pl16" , "pl20" , "pl18" , "pl22" , "pl26" , + "pl24" , "pl28" , "pl30" , "pl32" , "tazzzz", "tttob" , "ttmrc" , + "tttob" , "twkhh" , "twtnn" , "twnwt" , "twtxg" , "aszzzz", "guzzzz", + "mpzzzz", "przzzz", "umzzzz", "vizzzz", + }; + return SearchUnicodeReplacement(types, aliases, type); + } + else if (IsUnicodeKey(key, "tz")) { + static const char* types[30] = { + "aqams" , "camtr" , "cnckg" , "cnhrb" , "cnkhg" , "cuba" , + "egypt" , "eire" , "est" , "gaza" , "gmt0" , "hongkong", + "hst" , "iceland" , "iran" , "israel" , "jamaica" , "japan" , + "libya" , "mst" , "navajo" , "poland" , "portugal", "prc" , + "roc" , "rok" , "turkey" , "uct" , "usnavajo", "zulu" , + }; + static const char* aliases[30] = { + "nzakl" , "cator" , "cnsha" , "cnsha" , "cnurc" , "cuhav" , + "egcai" , "iedub" , "utcw05" , "gazastrp", "gmt" , "hkhkg" , + "utcw10" , "isrey" , "irthr" , "jeruslm" , "jmkin" , "jptyo" , + "lytip" , "utcw07" , "usden" , "plwaw" , "ptlis" , "cnsha" , + "twtpe" , "krsel" , "trist" , "utc" , "usden" , "utc" , + }; + return SearchUnicodeReplacement(types, aliases, type); + } + return nullptr; +} + +template <size_t Length> +static inline bool IsTransformKey(mozilla::Span<const char> key, const char (&str)[Length]) { + static_assert(Length == TransformKeyLength + 1, + "Transform extension key is two characters long"); + return memcmp(key.data(), str, Length - 1) == 0; +} + +template <size_t Length> +static inline bool IsTransformType(mozilla::Span<const char> type, const char (&str)[Length]) { + static_assert(Length > TransformKeyLength + 1, + "Transform extension type contains more than two characters"); + return type.size() == (Length - 1) && + memcmp(type.data(), str, Length - 1) == 0; +} + +/** + * Mapping from deprecated BCP 47 Transform extension types to their preferred + * values. + * + * Spec: https://www.unicode.org/reports/tr35/#Unicode_Locale_Extension_Data_Files + * Spec: https://www.unicode.org/reports/tr35/#t_Extension + */ +const char* mozilla::intl::Locale::ReplaceTransformExtensionType( + mozilla::Span<const char> key, mozilla::Span<const char> type) { + MOZ_ASSERT(key.size() == TransformKeyLength); + MOZ_ASSERT(IsCanonicallyCasedTransformKey(key)); + + MOZ_ASSERT(type.size() > TransformKeyLength); + MOZ_ASSERT(IsCanonicallyCasedTransformType(type)); + + if (IsTransformKey(key, "d0")) { + if (IsTransformType(type, "name")) { + return "charname"; + } + } + else if (IsTransformKey(key, "m0")) { + if (IsTransformType(type, "beta-metsehaf")) { + return "betamets"; + } + if (IsTransformType(type, "ies-jes")) { + return "iesjes"; + } + if (IsTransformType(type, "names")) { + return "prprname"; + } + if (IsTransformType(type, "tekie-alibekit")) { + return "tekieali"; + } + } + return nullptr; +} |