// Generated by make_intl_data.py. DO NOT EDIT. // Version: CLDR-42 // URL: https://unicode.org/Public/cldr/42/core.zip #include "mozilla/Assertions.h" #include "mozilla/Span.h" #include "mozilla/TextUtils.h" #include #include #include #include #include #include #include "mozilla/intl/Locale.h" using namespace mozilla::intl::LanguageTagLimits; template static inline bool HasReplacement( const char (&subtags)[Length][TagLength], const mozilla::intl::LanguageTagSubtag& subtag) { MOZ_ASSERT(subtag.Length() == TagLength - 1, "subtag must have the same length as the list of subtags"); const char* ptr = subtag.Span().data(); return std::binary_search(std::begin(subtags), std::end(subtags), ptr, [](const char* a, const char* b) { return memcmp(a, b, TagLength - 1) < 0; }); } template static inline const char* SearchReplacement( const char (&subtags)[Length][TagLength], const char* (&aliases)[Length], const mozilla::intl::LanguageTagSubtag& subtag) { MOZ_ASSERT(subtag.Length() == TagLength - 1, "subtag must have the same length as the list of subtags"); const char* ptr = subtag.Span().data(); auto p = std::lower_bound(std::begin(subtags), std::end(subtags), ptr, [](const char* a, const char* b) { return memcmp(a, b, TagLength - 1) < 0; }); if (p != std::end(subtags) && memcmp(*p, ptr, TagLength - 1) == 0) { return aliases[std::distance(std::begin(subtags), p)]; } return nullptr; } #ifdef DEBUG static bool IsAsciiLowercaseAlphanumeric(char c) { return mozilla::IsAsciiLowercaseAlpha(c) || mozilla::IsAsciiDigit(c); } static bool IsAsciiLowercaseAlphanumericOrDash(char c) { return IsAsciiLowercaseAlphanumeric(c) || c == '-'; } static bool IsCanonicallyCasedLanguageTag(mozilla::Span span) { return std::all_of(span.begin(), span.end(), mozilla::IsAsciiLowercaseAlpha); } static bool IsCanonicallyCasedScriptTag(mozilla::Span span) { return mozilla::IsAsciiUppercaseAlpha(span[0]) && std::all_of(span.begin() + 1, span.end(), mozilla::IsAsciiLowercaseAlpha); } static bool IsCanonicallyCasedRegionTag(mozilla::Span span) { return std::all_of(span.begin(), span.end(), mozilla::IsAsciiUppercaseAlpha) || std::all_of(span.begin(), span.end(), mozilla::IsAsciiDigit); } static bool IsCanonicallyCasedVariantTag(mozilla::Span span) { return std::all_of(span.begin(), span.end(), IsAsciiLowercaseAlphanumeric); } static bool IsCanonicallyCasedUnicodeKey(mozilla::Span key) { return std::all_of(key.begin(), key.end(), IsAsciiLowercaseAlphanumeric); } static bool IsCanonicallyCasedUnicodeType(mozilla::Span type) { return std::all_of(type.begin(), type.end(), IsAsciiLowercaseAlphanumericOrDash); } static bool IsCanonicallyCasedTransformKey(mozilla::Span key) { return std::all_of(key.begin(), key.end(), IsAsciiLowercaseAlphanumeric); } static bool IsCanonicallyCasedTransformType(mozilla::Span type) { return std::all_of(type.begin(), type.end(), IsAsciiLowercaseAlphanumericOrDash); } #endif // Mappings from language subtags to preferred values. // Derived from CLDR Supplemental Data, version 42. // https://unicode.org/Public/cldr/42/core.zip bool mozilla::intl::Locale::LanguageMapping(LanguageSubtag& language) { MOZ_ASSERT(IsStructurallyValidLanguageTag(language.Span())); MOZ_ASSERT(IsCanonicallyCasedLanguageTag(language.Span())); if (language.Length() == 2) { static const char languages[8][3] = { "bh", "in", "iw", "ji", "jw", "mo", "tl", "tw", }; static const char* aliases[8] = { "bho", "id", "he", "yi", "jv", "ro", "fil", "ak", }; if (const char* replacement = SearchReplacement(languages, aliases, language)) { language.Set(mozilla::MakeStringSpan(replacement)); return true; } return false; } if (language.Length() == 3) { static const char languages[408][4] = { "aam", "aar", "abk", "adp", "afr", "agp", "ais", "ajt", "aju", "aka", "alb", "als", "amh", "ara", "arb", "arg", "arm", "asd", "asm", "aue", "ava", "ave", "aym", "ayr", "ayx", "aze", "azj", "bak", "bam", "baq", "baz", "bcc", "bcl", "bel", "ben", "bgm", "bhk", "bic", "bih", "bis", "bjd", "bjq", "bkb", "blg", "bod", "bos", "bre", "btb", "bul", "bur", "bxk", "bxr", "cat", "ccq", "ces", "cha", "che", "chi", "chu", "chv", "cjr", "cka", "cld", "cmk", "cmn", "cor", "cos", "coy", "cqu", "cre", "cwd", "cym", "cze", "daf", "dan", "dap", "deu", "dgo", "dhd", "dik", "diq", "dit", "div", "djl", "dkl", "drh", "drr", "dud", "duj", "dut", "dwl", "dzo", "ekk", "ell", "elp", "emk", "eng", "epo", "esk", "est", "eus", "ewe", "fao", "fas", "fat", "fij", "fin", "fra", "fre", "fry", "fuc", "ful", "gav", "gaz", "gbc", "gbo", "geo", "ger", "gfx", "ggn", "ggo", "ggr", "gio", "gla", "gle", "glg", "gli", "glv", "gno", "gre", "grn", "gti", "gug", "guj", "guv", "gya", "hat", "hau", "hdn", "hea", "heb", "her", "him", "hin", "hmo", "hrr", "hrv", "hun", "hye", "ibi", "ibo", "ice", "ido", "iii", "ike", "iku", "ile", "ill", "ilw", "ina", "ind", "ipk", "isl", "ita", "izi", "jar", "jav", "jeg", "jpn", "kal", "kan", "kas", "kat", "kau", "kaz", "kdv", "kgc", "kgd", "kgh", "khk", "khm", "kik", "kin", "kir", "kmr", "knc", "kng", "knn", "koj", "kom", "kon", "kor", "kpp", "kpv", "krm", "ktr", "kua", "kur", "kvs", "kwq", "kxe", "kxl", "kzh", "kzj", "kzt", "lak", "lao", "lat", "lav", "lbk", "leg", "lii", "lim", "lin", "lit", "llo", "lmm", "ltz", "lub", "lug", "lvs", "mac", "mah", "mal", "mao", "mar", "may", "meg", "mgx", "mhr", "mkd", "mlg", "mlt", "mnk", "mnt", "mof", "mol", "mon", "mri", "msa", "mst", "mup", "mwd", "mwj", "mya", "myd", "myt", "nad", "nau", "nav", "nbf", "nbl", "nbx", "ncp", "nde", "ndo", "nep", "nld", "nln", "nlr", "nno", "nns", "nnx", "nob", "noo", "nor", "npi", "nts", "nxu", "nya", "oci", "ojg", "oji", "ori", "orm", "ory", "oss", "oun", "pan", "pat", "pbu", "pcr", "per", "pes", "pli", "plt", "pmc", "pmu", "pnb", "pol", "por", "ppa", "ppr", "pry", "pus", "puz", "que", "quz", "rmr", "rmy", "roh", "ron", "rum", "run", "rus", "sag", "san", "sap", "sca", "scc", "scr", "sgl", "sin", "skk", "slk", "slo", "slv", "smd", "sme", "smo", "sna", "snb", "snd", "som", "sot", "spa", "spy", "sqi", "src", "srd", "srp", "ssw", "sul", "sum", "sun", "swa", "swe", "swh", "tah", "tam", "tat", "tdu", "tel", "tgg", "tgk", "tgl", "tha", "thc", "thw", "thx", "tib", "tid", "tie", "tir", "tkk", "tlw", "tmp", "tne", "ton", "tsf", "tsn", "tso", "ttq", "tuk", "tur", "twi", "uig", "ukr", "umu", "unp", "uok", "urd", "uzb", "uzn", "ven", "vie", "vol", "wel", "wgw", "wit", "wiw", "wln", "wol", "xba", "xho", "xia", "xkh", "xpe", "xrq", "xsj", "xsl", "ybd", "ydd", "yen", "yid", "yiy", "yma", "ymt", "yor", "yos", "yuu", "zai", "zha", "zho", "zir", "zsm", "zul", "zyb", }; static const char* aliases[408] = { "aas", "aa", "ab", "dz", "af", "apf", "ami", "aeb", "jrb", "ak", "sq", "sq", "am", "ar", "ar", "an", "hy", "snz", "as", "ktz", "av", "ae", "ay", "ay", "nun", "az", "az", "ba", "bm", "eu", "nvo", "bal", "bik", "be", "bn", "bcg", "fbl", "bir", "bho", "bi", "drl", "bzc", "ebk", "iba", "bo", "bs", "br", "beb", "bg", "my", "luy", "bua", "ca", "rki", "cs", "ch", "ce", "zh", "cu", "cv", "mom", "cmr", "syr", "xch", "zh", "kw", "co", "pij", "quh", "cr", "cr", "cy", "cs", "dnj", "da", "njz", "de", "doi", "mwr", "din", "zza", "dif", "dv", "dze", "aqd", "mn", "kzk", "uth", "dwu", "nl", "dbt", "dz", "et", "el", "amq", "man", "en", "eo", "ik", "et", "eu", "ee", "fo", "fa", "ak", "fj", "fi", "fr", "fr", "fy", "ff", "ff", "dev", "om", "wny", "grb", "ka", "de", "vaj", "gvr", "esg", "gtu", "aou", "gd", "ga", "gl", "kzk", "gv", "gon", "el", "gn", "nyc", "gn", "gu", "duz", "gba", "ht", "ha", "hai", "hmn", "he", "hz", "srx", "hi", "ho", "jal", "hr", "hu", "hy", "opa", "ig", "is", "io", "ii", "iu", "iu", "ie", "ilm", "gal", "ia", "id", "ik", "is", "it", "eza", "jgk", "jv", "oyb", "ja", "kl", "kn", "ks", "ka", "kr", "kk", "zkd", "tdf", "ncq", "kml", "mn", "km", "ki", "rw", "ky", "ku", "kr", "kg", "kok", "kwv", "kv", "kg", "ko", "jkm", "kv", "bmf", "dtp", "kj", "ku", "gdj", "yam", "tvd", "kru", "dgl", "dtp", "dtp", "ksp", "lo", "la", "lv", "bnc", "enl", "raq", "li", "ln", "lt", "ngt", "rmx", "lb", "lu", "lg", "lv", "mk", "mh", "ml", "mi", "mr", "ms", "cir", "jbk", "chm", "mk", "mg", "mt", "man", "wnn", "xnt", "ro", "mn", "mi", "ms", "mry", "raj", "dmw", "vaj", "my", "aog", "mry", "xny", "na", "nv", "nru", "nr", "ekc", "kdz", "nd", "ng", "ne", "nl", "azd", "nrk", "nn", "nbr", "ngv", "nb", "dtd", "no", "ne", "pij", "bpp", "ny", "oc", "oj", "oj", "or", "om", "or", "os", "vaj", "pa", "kxr", "ps", "adx", "fa", "fa", "pi", "mg", "huw", "phr", "lah", "pl", "pt", "bfy", "lcq", "prt", "ps", "pub", "qu", "qu", "emx", "rom", "rm", "ro", "ro", "rn", "ru", "sg", "sa", "aqt", "hle", "sr", "hr", "isk", "si", "oyb", "sk", "sk", "sl", "kmb", "se", "sm", "sn", "iba", "sd", "so", "st", "es", "kln", "sq", "sc", "sc", "sr", "ss", "sgd", "ulw", "su", "sw", "sv", "sw", "ty", "ta", "tt", "dtp", "te", "bjp", "tg", "fil", "th", "tpo", "ola", "oyb", "bo", "itd", "ras", "ti", "twm", "weo", "tyj", "kak", "to", "taj", "tn", "ts", "tmh", "tk", "tr", "ak", "ug", "uk", "del", "wro", "ema", "ur", "uz", "uz", "ve", "vi", "vo", "cy", "wgb", "nol", "nwo", "wa", "wo", "cax", "xh", "acn", "waw", "kpe", "dmw", "suj", "den", "rki", "yi", "ynq", "yi", "yrm", "lrr", "mtm", "yo", "zom", "yug", "zap", "za", "zh", "scv", "ms", "zu", "za", }; if (const char* replacement = SearchReplacement(languages, aliases, language)) { language.Set(mozilla::MakeStringSpan(replacement)); return true; } return false; } return false; } // Language subtags with complex mappings. // Derived from CLDR Supplemental Data, version 42. // https://unicode.org/Public/cldr/42/core.zip bool mozilla::intl::Locale::ComplexLanguageMapping(const LanguageSubtag& language) { MOZ_ASSERT(IsStructurallyValidLanguageTag(language.Span())); MOZ_ASSERT(IsCanonicallyCasedLanguageTag(language.Span())); if (language.Length() == 2) { return language.EqualTo("sh"); } if (language.Length() == 3) { static const char languages[6][4] = { "cnr", "drw", "hbs", "prs", "swc", "tnf", }; return HasReplacement(languages, language); } return false; } // Mappings from script subtags to preferred values. // Derived from CLDR Supplemental Data, version 42. // https://unicode.org/Public/cldr/42/core.zip bool mozilla::intl::Locale::ScriptMapping(ScriptSubtag& script) { MOZ_ASSERT(IsStructurallyValidScriptTag(script.Span())); MOZ_ASSERT(IsCanonicallyCasedScriptTag(script.Span())); { if (script.EqualTo("Qaai")) { script.Set(mozilla::MakeStringSpan("Zinh")); return true; } return false; } } // Mappings from region subtags to preferred values. // Derived from CLDR Supplemental Data, version 42. // https://unicode.org/Public/cldr/42/core.zip bool mozilla::intl::Locale::RegionMapping(RegionSubtag& region) { MOZ_ASSERT(IsStructurallyValidRegionTag(region.Span())); MOZ_ASSERT(IsCanonicallyCasedRegionTag(region.Span())); if (region.Length() == 2) { static const char regions[23][3] = { "BU", "CS", "CT", "DD", "DY", "FQ", "FX", "HV", "JT", "MI", "NH", "NQ", "PU", "PZ", "QU", "RH", "TP", "UK", "VD", "WK", "YD", "YU", "ZR", }; static const char* aliases[23] = { "MM", "RS", "KI", "DE", "BJ", "AQ", "FR", "BF", "UM", "UM", "VU", "AQ", "UM", "PA", "EU", "ZW", "TL", "GB", "VN", "UM", "YE", "RS", "CD", }; if (const char* replacement = SearchReplacement(regions, aliases, region)) { region.Set(mozilla::MakeStringSpan(replacement)); return true; } return false; } { static const char regions[299][4] = { "004", "008", "010", "012", "016", "020", "024", "028", "031", "032", "036", "040", "044", "048", "050", "051", "052", "056", "060", "064", "068", "070", "072", "074", "076", "084", "086", "090", "092", "096", "100", "104", "108", "112", "116", "120", "124", "132", "136", "140", "144", "148", "152", "156", "158", "162", "166", "170", "174", "175", "178", "180", "184", "188", "191", "192", "196", "203", "204", "208", "212", "214", "218", "222", "226", "230", "231", "232", "233", "234", "238", "239", "242", "246", "248", "249", "250", "254", "258", "260", "262", "266", "268", "270", "275", "276", "278", "280", "288", "292", "296", "300", "304", "308", "312", "316", "320", "324", "328", "332", "334", "336", "340", "344", "348", "352", "356", "360", "364", "368", "372", "376", "380", "384", "388", "392", "398", "400", "404", "408", "410", "414", "417", "418", "422", "426", "428", "430", "434", "438", "440", "442", "446", "450", "454", "458", "462", "466", "470", "474", "478", "480", "484", "492", "496", "498", "499", "500", "504", "508", "512", "516", "520", "524", "528", "531", "533", "534", "535", "540", "548", "554", "558", "562", "566", "570", "574", "578", "580", "581", "583", "584", "585", "586", "591", "598", "600", "604", "608", "612", "616", "620", "624", "626", "630", "634", "638", "642", "643", "646", "652", "654", "659", "660", "662", "663", "666", "670", "674", "678", "682", "686", "688", "690", "694", "702", "703", "704", "705", "706", "710", "716", "720", "724", "728", "729", "732", "736", "740", "744", "748", "752", "756", "760", "762", "764", "768", "772", "776", "780", "784", "788", "792", "795", "796", "798", "800", "804", "807", "818", "826", "830", "831", "832", "833", "834", "840", "850", "854", "858", "860", "862", "876", "882", "886", "887", "891", "894", "958", "959", "960", "962", "963", "964", "965", "966", "967", "968", "969", "970", "971", "972", "973", "974", "975", "976", "977", "978", "979", "980", "981", "982", "983", "984", "985", "986", "987", "988", "989", "990", "991", "992", "993", "994", "995", "996", "997", "998", "999", }; static const char* aliases[299] = { "AF", "AL", "AQ", "DZ", "AS", "AD", "AO", "AG", "AZ", "AR", "AU", "AT", "BS", "BH", "BD", "AM", "BB", "BE", "BM", "BT", "BO", "BA", "BW", "BV", "BR", "BZ", "IO", "SB", "VG", "BN", "BG", "MM", "BI", "BY", "KH", "CM", "CA", "CV", "KY", "CF", "LK", "TD", "CL", "CN", "TW", "CX", "CC", "CO", "KM", "YT", "CG", "CD", "CK", "CR", "HR", "CU", "CY", "CZ", "BJ", "DK", "DM", "DO", "EC", "SV", "GQ", "ET", "ET", "ER", "EE", "FO", "FK", "GS", "FJ", "FI", "AX", "FR", "FR", "GF", "PF", "TF", "DJ", "GA", "GE", "GM", "PS", "DE", "DE", "DE", "GH", "GI", "KI", "GR", "GL", "GD", "GP", "GU", "GT", "GN", "GY", "HT", "HM", "VA", "HN", "HK", "HU", "IS", "IN", "ID", "IR", "IQ", "IE", "IL", "IT", "CI", "JM", "JP", "KZ", "JO", "KE", "KP", "KR", "KW", "KG", "LA", "LB", "LS", "LV", "LR", "LY", "LI", "LT", "LU", "MO", "MG", "MW", "MY", "MV", "ML", "MT", "MQ", "MR", "MU", "MX", "MC", "MN", "MD", "ME", "MS", "MA", "MZ", "OM", "NA", "NR", "NP", "NL", "CW", "AW", "SX", "BQ", "NC", "VU", "NZ", "NI", "NE", "NG", "NU", "NF", "NO", "MP", "UM", "FM", "MH", "PW", "PK", "PA", "PG", "PY", "PE", "PH", "PN", "PL", "PT", "GW", "TL", "PR", "QA", "RE", "RO", "RU", "RW", "BL", "SH", "KN", "AI", "LC", "MF", "PM", "VC", "SM", "ST", "SA", "SN", "RS", "SC", "SL", "SG", "SK", "VN", "SI", "SO", "ZA", "ZW", "YE", "ES", "SS", "SD", "EH", "SD", "SR", "SJ", "SZ", "SE", "CH", "SY", "TJ", "TH", "TG", "TK", "TO", "TT", "AE", "TN", "TR", "TM", "TC", "TV", "UG", "UA", "MK", "EG", "GB", "JE", "GG", "JE", "IM", "TZ", "US", "VI", "BF", "UY", "UZ", "VE", "WF", "WS", "YE", "YE", "RS", "ZM", "AA", "QM", "QN", "QP", "QQ", "QR", "QS", "QT", "EU", "QV", "QW", "QX", "QY", "QZ", "XA", "XB", "XC", "XD", "XE", "XF", "XG", "XH", "XI", "XJ", "XK", "XL", "XM", "XN", "XO", "XP", "XQ", "XR", "XS", "XT", "XU", "XV", "XW", "XX", "XY", "XZ", "ZZ", }; if (const char* replacement = SearchReplacement(regions, aliases, region)) { region.Set(mozilla::MakeStringSpan(replacement)); return true; } return false; } } // Region subtags with complex mappings. // Derived from CLDR Supplemental Data, version 42. // https://unicode.org/Public/cldr/42/core.zip bool mozilla::intl::Locale::ComplexRegionMapping(const RegionSubtag& region) { MOZ_ASSERT(IsStructurallyValidRegionTag(region.Span())); MOZ_ASSERT(IsCanonicallyCasedRegionTag(region.Span())); if (region.Length() == 2) { return region.EqualTo("AN") || region.EqualTo("NT") || region.EqualTo("PC") || region.EqualTo("SU"); } { static const char regions[9][4] = { "062", "172", "200", "530", "532", "536", "582", "810", "890", }; return HasReplacement(regions, region); } } // Language subtags with complex mappings. // Derived from CLDR Supplemental Data, version 42. // https://unicode.org/Public/cldr/42/core.zip void mozilla::intl::Locale::PerformComplexLanguageMappings() { MOZ_ASSERT(IsStructurallyValidLanguageTag(Language().Span())); MOZ_ASSERT(IsCanonicallyCasedLanguageTag(Language().Span())); if (Language().EqualTo("cnr")) { SetLanguage("sr"); if (Region().Missing()) { SetRegion("ME"); } } else if (Language().EqualTo("drw") || Language().EqualTo("prs") || Language().EqualTo("tnf")) { SetLanguage("fa"); if (Region().Missing()) { SetRegion("AF"); } } else if (Language().EqualTo("hbs") || Language().EqualTo("sh")) { SetLanguage("sr"); if (Script().Missing()) { SetScript("Latn"); } } else if (Language().EqualTo("swc")) { SetLanguage("sw"); if (Region().Missing()) { SetRegion("CD"); } } } // Region subtags with complex mappings. // Derived from CLDR Supplemental Data, version 42. // https://unicode.org/Public/cldr/42/core.zip void mozilla::intl::Locale::PerformComplexRegionMappings() { MOZ_ASSERT(IsStructurallyValidLanguageTag(Language().Span())); MOZ_ASSERT(IsCanonicallyCasedLanguageTag(Language().Span())); MOZ_ASSERT(IsStructurallyValidRegionTag(Region().Span())); MOZ_ASSERT(IsCanonicallyCasedRegionTag(Region().Span())); if (Region().EqualTo("062")) { if (Language().EqualTo("oui") || (Language().EqualTo("und") && Script().EqualTo("Ougr"))) { SetRegion("143"); } else { SetRegion("034"); } } else if (Region().EqualTo("172")) { if (Language().EqualTo("hy") || (Language().EqualTo("und") && Script().EqualTo("Armn"))) { SetRegion("AM"); } else if (Language().EqualTo("az") || Language().EqualTo("tkr") || Language().EqualTo("tly") || Language().EqualTo("ttt")) { SetRegion("AZ"); } else if (Language().EqualTo("be")) { SetRegion("BY"); } else if (Language().EqualTo("ab") || Language().EqualTo("ka") || (Language().EqualTo("ku") && Script().EqualTo("Yezi")) || Language().EqualTo("os") || (Language().EqualTo("und") && Script().EqualTo("Geor")) || (Language().EqualTo("und") && Script().EqualTo("Yezi")) || Language().EqualTo("xmf")) { SetRegion("GE"); } else if (Language().EqualTo("ky")) { SetRegion("KG"); } else if (Language().EqualTo("kk") || (Language().EqualTo("ug") && Script().EqualTo("Cyrl"))) { SetRegion("KZ"); } else if (Language().EqualTo("gag")) { SetRegion("MD"); } else if (Language().EqualTo("tg")) { SetRegion("TJ"); } else if (Language().EqualTo("tk")) { SetRegion("TM"); } else if (Language().EqualTo("crh") || Language().EqualTo("got") || Language().EqualTo("ji") || Language().EqualTo("rue") || Language().EqualTo("uk") || (Language().EqualTo("und") && Script().EqualTo("Goth"))) { SetRegion("UA"); } else if (Language().EqualTo("kaa") || Language().EqualTo("sog") || (Language().EqualTo("und") && Script().EqualTo("Chrs")) || (Language().EqualTo("und") && Script().EqualTo("Sogd")) || (Language().EqualTo("und") && Script().EqualTo("Sogo")) || Language().EqualTo("uz") || Language().EqualTo("xco")) { SetRegion("UZ"); } else { SetRegion("RU"); } } else if (Region().EqualTo("200")) { if (Language().EqualTo("sk")) { SetRegion("SK"); } else { SetRegion("CZ"); } } else if (Region().EqualTo("530") || Region().EqualTo("532") || Region().EqualTo("AN")) { if (Language().EqualTo("vic")) { SetRegion("SX"); } else { SetRegion("CW"); } } else if (Region().EqualTo("536") || Region().EqualTo("NT")) { if (Language().EqualTo("akk") || Language().EqualTo("ckb") || (Language().EqualTo("ku") && Script().EqualTo("Arab")) || Language().EqualTo("syr") || (Language().EqualTo("und") && Script().EqualTo("Syrc")) || (Language().EqualTo("und") && Script().EqualTo("Xsux"))) { SetRegion("IQ"); } else { SetRegion("SA"); } } else if (Region().EqualTo("582") || Region().EqualTo("PC")) { if (Language().EqualTo("mh")) { SetRegion("MH"); } else if (Language().EqualTo("pau")) { SetRegion("PW"); } else { SetRegion("FM"); } } else if (Region().EqualTo("810") || Region().EqualTo("SU")) { if (Language().EqualTo("hy") || (Language().EqualTo("und") && Script().EqualTo("Armn"))) { SetRegion("AM"); } else if (Language().EqualTo("az") || Language().EqualTo("tkr") || Language().EqualTo("tly") || Language().EqualTo("ttt")) { SetRegion("AZ"); } else if (Language().EqualTo("be")) { SetRegion("BY"); } else if (Language().EqualTo("et") || Language().EqualTo("vro")) { SetRegion("EE"); } else if (Language().EqualTo("ab") || Language().EqualTo("ka") || (Language().EqualTo("ku") && Script().EqualTo("Yezi")) || Language().EqualTo("os") || (Language().EqualTo("und") && Script().EqualTo("Geor")) || (Language().EqualTo("und") && Script().EqualTo("Yezi")) || Language().EqualTo("xmf")) { SetRegion("GE"); } else if (Language().EqualTo("ky")) { SetRegion("KG"); } else if (Language().EqualTo("kk") || (Language().EqualTo("ug") && Script().EqualTo("Cyrl"))) { SetRegion("KZ"); } else if (Language().EqualTo("lt") || Language().EqualTo("sgs")) { SetRegion("LT"); } else if (Language().EqualTo("ltg") || Language().EqualTo("lv")) { SetRegion("LV"); } else if (Language().EqualTo("gag")) { SetRegion("MD"); } else if (Language().EqualTo("tg")) { SetRegion("TJ"); } else if (Language().EqualTo("tk")) { SetRegion("TM"); } else if (Language().EqualTo("crh") || Language().EqualTo("got") || Language().EqualTo("ji") || Language().EqualTo("rue") || Language().EqualTo("uk") || (Language().EqualTo("und") && Script().EqualTo("Goth"))) { SetRegion("UA"); } else if (Language().EqualTo("kaa") || Language().EqualTo("sog") || (Language().EqualTo("und") && Script().EqualTo("Chrs")) || (Language().EqualTo("und") && Script().EqualTo("Sogd")) || (Language().EqualTo("und") && Script().EqualTo("Sogo")) || Language().EqualTo("uz") || Language().EqualTo("xco")) { SetRegion("UZ"); } else { SetRegion("RU"); } } else if (Region().EqualTo("890")) { if (Language().EqualTo("bs")) { SetRegion("BA"); } else if (Language().EqualTo("hr")) { SetRegion("HR"); } else if (Language().EqualTo("mk")) { SetRegion("MK"); } else if (Language().EqualTo("sl")) { SetRegion("SI"); } else { SetRegion("RS"); } } } static const char* ToCharPointer(const char* str) { return str; } static const char* ToCharPointer(const mozilla::intl::UniqueChars& str) { return str.get(); } template static bool IsLessThan(const T& a, const U& b) { return strcmp(ToCharPointer(a), ToCharPointer(b)) < 0; } // Mappings from variant subtags to preferred values. // Derived from CLDR Supplemental Data, version 42. // https://unicode.org/Public/cldr/42/core.zip bool mozilla::intl::Locale::PerformVariantMappings() { // The variant subtags need to be sorted for binary search. MOZ_ASSERT(std::is_sorted(mVariants.begin(), mVariants.end(), IsLessThan)); auto removeVariantAt = [&](size_t index) { mVariants.erase(mVariants.begin() + index); }; auto insertVariantSortedIfNotPresent = [&](const char* variant) { auto* p = std::lower_bound( mVariants.begin(), mVariants.end(), variant, IsLessThan); // Don't insert the replacement when already present. if (p != mVariants.end() && strcmp(p->get(), variant) == 0) { return true; } // Insert the preferred variant in sort order. auto preferred = DuplicateStringToUniqueChars(variant); return !!mVariants.insert(p, std::move(preferred)); }; for (size_t i = 0; i < mVariants.length();) { const char* variant = mVariants[i].get(); MOZ_ASSERT(IsCanonicallyCasedVariantTag(mozilla::MakeStringSpan(variant))); if (strcmp(variant, "arevela") == 0 || strcmp(variant, "arevmda") == 0 || strcmp(variant, "bokmal") == 0 || strcmp(variant, "hakka") == 0 || strcmp(variant, "lojban") == 0 || strcmp(variant, "nynorsk") == 0 || strcmp(variant, "saaho") == 0 || strcmp(variant, "xiang") == 0) { removeVariantAt(i); } else if (strcmp(variant, "aaland") == 0) { removeVariantAt(i); SetRegion("AX"); } else if (strcmp(variant, "heploc") == 0) { removeVariantAt(i); if (!insertVariantSortedIfNotPresent("alalc97")) { return false; } } else if (strcmp(variant, "polytoni") == 0) { removeVariantAt(i); if (!insertVariantSortedIfNotPresent("polyton")) { return false; } } else { i++; } } return true; } // Canonicalize legacy locale identifiers. // Derived from CLDR Supplemental Data, version 42. // https://unicode.org/Public/cldr/42/core.zip bool mozilla::intl::Locale::UpdateLegacyMappings() { // We're mapping legacy tags to non-legacy form here. // Other tags remain unchanged. // // Legacy tags are either sign language tags ("sgn") or have one or multiple // variant subtags. Therefore we can quickly exclude most tags by checking // these two subtags. MOZ_ASSERT(IsCanonicallyCasedLanguageTag(Language().Span())); if (!Language().EqualTo("sgn") && mVariants.length() == 0) { return true; } #ifdef DEBUG for (const auto& variant : Variants()) { MOZ_ASSERT(IsStructurallyValidVariantTag(variant)); MOZ_ASSERT(IsCanonicallyCasedVariantTag(variant)); } #endif // The variant subtags need to be sorted for binary search. MOZ_ASSERT(std::is_sorted(mVariants.begin(), mVariants.end(), IsLessThan)); auto findVariant = [this](const char* variant) { auto* p = std::lower_bound(mVariants.begin(), mVariants.end(), variant, IsLessThan); if (p != mVariants.end() && strcmp(p->get(), variant) == 0) { return p; } return static_cast(nullptr); }; auto insertVariantSortedIfNotPresent = [&](const char* variant) { auto* p = std::lower_bound(mVariants.begin(), mVariants.end(), variant, IsLessThan); // Don't insert the replacement when already present. if (p != mVariants.end() && strcmp(p->get(), variant) == 0) { return true; } // Insert the preferred variant in sort order. auto preferred = DuplicateStringToUniqueChars(variant); return !!mVariants.insert(p, std::move(preferred)); }; auto removeVariant = [&](auto* p) { size_t index = std::distance(mVariants.begin(), p); mVariants.erase(mVariants.begin() + index); }; auto removeVariants = [&](auto* p, auto* q) { size_t pIndex = std::distance(mVariants.begin(), p); size_t qIndex = std::distance(mVariants.begin(), q); MOZ_ASSERT(pIndex < qIndex, "variant subtags are sorted"); mVariants.erase(mVariants.begin() + qIndex); mVariants.erase(mVariants.begin() + pIndex); }; if (mVariants.length() >= 2) { if (auto* hepburn = findVariant("hepburn")) { if (auto* heploc = findVariant("heploc")) { removeVariants(hepburn, heploc); if (!insertVariantSortedIfNotPresent("alalc97")) { return false; } } } } if (Language().EqualTo("sgn")) { if (Region().Present() && SignLanguageMapping(mLanguage, Region())) { mRegion.Set(mozilla::MakeStringSpan("")); } } else if (Language().EqualTo("aa") || Language().EqualTo("aar")) { if (auto* saaho = findVariant("saaho")) { removeVariant(saaho); SetLanguage("ssy"); } } else if (Language().EqualTo("arm") || Language().EqualTo("hy") || Language().EqualTo("hye")) { if (auto* arevmda = findVariant("arevmda")) { removeVariant(arevmda); SetLanguage("hyw"); } } else if (Language().EqualTo("art")) { if (auto* lojban = findVariant("lojban")) { removeVariant(lojban); SetLanguage("jbo"); } } else if (Language().EqualTo("cel")) { if (auto* gaulish = findVariant("gaulish")) { removeVariant(gaulish); SetLanguage("xtg"); } } else if (Language().EqualTo("chi") || Language().EqualTo("cmn") || Language().EqualTo("zh") || Language().EqualTo("zho")) { if (auto* guoyu = findVariant("guoyu")) { if (auto* hakka = findVariant("hakka")) { removeVariants(guoyu, hakka); SetLanguage("hak"); return true; } } if (auto* guoyu = findVariant("guoyu")) { if (auto* xiang = findVariant("xiang")) { removeVariants(guoyu, xiang); SetLanguage("hsn"); return true; } } if (auto* guoyu = findVariant("guoyu")) { removeVariant(guoyu); SetLanguage("zh"); } else if (auto* hakka = findVariant("hakka")) { removeVariant(hakka); SetLanguage("hak"); } else if (auto* xiang = findVariant("xiang")) { removeVariant(xiang); SetLanguage("hsn"); } } else if (Language().EqualTo("no") || Language().EqualTo("nor")) { if (auto* bokmal = findVariant("bokmal")) { removeVariant(bokmal); SetLanguage("nb"); } else if (auto* nynorsk = findVariant("nynorsk")) { removeVariant(nynorsk); SetLanguage("nn"); } } return true; } // Mappings from legacy sign languages. // Derived from CLDR Supplemental Data, version 42. // https://unicode.org/Public/cldr/42/core.zip bool mozilla::intl::Locale::SignLanguageMapping(LanguageSubtag& language, const RegionSubtag& region) { MOZ_ASSERT(language.EqualTo("sgn")); MOZ_ASSERT(IsStructurallyValidRegionTag(region.Span())); MOZ_ASSERT(IsCanonicallyCasedRegionTag(region.Span())); if (region.Length() == 2) { static const char regions[22][3] = { "BR", "CO", "DD", "DE", "DK", "ES", "FR", "FX", "GB", "GR", "IE", "IT", "JP", "MX", "NI", "NL", "NO", "PT", "SE", "UK", "US", "ZA", }; static const char* aliases[22] = { "bzs", "csn", "gsg", "gsg", "dsl", "ssp", "fsl", "fsl", "bfi", "gss", "isg", "ise", "jsl", "mfs", "ncs", "dse", "nsi", "psr", "swl", "bfi", "ase", "sfs", }; if (const char* replacement = SearchReplacement(regions, aliases, region)) { language.Set(mozilla::MakeStringSpan(replacement)); return true; } return false; } { static const char regions[22][4] = { "076", "170", "208", "249", "250", "276", "278", "280", "300", "372", "380", "392", "484", "528", "558", "578", "620", "710", "724", "752", "826", "840", }; static const char* aliases[22] = { "bzs", "csn", "dsl", "fsl", "fsl", "gsg", "gsg", "gsg", "gss", "isg", "ise", "jsl", "mfs", "dse", "ncs", "nsi", "psr", "sfs", "ssp", "swl", "bfi", "ase", }; if (const char* replacement = SearchReplacement(regions, aliases, region)) { language.Set(mozilla::MakeStringSpan(replacement)); return true; } return false; } } template static inline bool IsUnicodeKey(mozilla::Span key, const char (&str)[Length]) { static_assert(Length == UnicodeKeyLength + 1, "Unicode extension key is two characters long"); return memcmp(key.data(), str, Length - 1) == 0; } template static inline bool IsUnicodeType(mozilla::Span type, const char (&str)[Length]) { static_assert(Length > UnicodeKeyLength + 1, "Unicode extension type contains more than two characters"); return type.size() == (Length - 1) && memcmp(type.data(), str, Length - 1) == 0; } static int32_t CompareUnicodeType(const char* a, mozilla::Span b) { MOZ_ASSERT(!std::char_traits::find(b.data(), b.size(), '\0'), "unexpected null-character in string"); using UnsignedChar = unsigned char; for (size_t i = 0; i < b.size(); i++) { // |a| is zero-terminated and |b| doesn't contain a null-terminator. So if // we've reached the end of |a|, the below if-statement will always be true. // That ensures we don't read past the end of |a|. if (int32_t r = UnsignedChar(a[i]) - UnsignedChar(b[i])) { return r; } } // Return zero if both strings are equal or a positive number if |b| is a // prefix of |a|. return int32_t(UnsignedChar(a[b.size()])); } template static inline const char* SearchUnicodeReplacement( const char* (&types)[Length], const char* (&aliases)[Length], mozilla::Span type) { auto p = std::lower_bound(std::begin(types), std::end(types), type, [](const auto& a, const auto& b) { return CompareUnicodeType(a, b) < 0; }); if (p != std::end(types) && CompareUnicodeType(*p, type) == 0) { return aliases[std::distance(std::begin(types), p)]; } return nullptr; } /** * Mapping from deprecated BCP 47 Unicode extension types to their preferred * values. * * Spec: https://www.unicode.org/reports/tr35/#Unicode_Locale_Extension_Data_Files * Spec: https://www.unicode.org/reports/tr35/#t_Extension */ const char* mozilla::intl::Locale::ReplaceUnicodeExtensionType( mozilla::Span key, mozilla::Span type) { MOZ_ASSERT(key.size() == UnicodeKeyLength); MOZ_ASSERT(IsCanonicallyCasedUnicodeKey(key)); MOZ_ASSERT(type.size() > UnicodeKeyLength); MOZ_ASSERT(IsCanonicallyCasedUnicodeType(type)); if (IsUnicodeKey(key, "ca")) { if (IsUnicodeType(type, "ethiopic-amete-alem")) { return "ethioaa"; } if (IsUnicodeType(type, "islamicc")) { return "islamic-civil"; } } else if (IsUnicodeKey(key, "kb") || IsUnicodeKey(key, "kc") || IsUnicodeKey(key, "kh") || IsUnicodeKey(key, "kk") || IsUnicodeKey(key, "kn")) { if (IsUnicodeType(type, "yes")) { return "true"; } } else if (IsUnicodeKey(key, "ks")) { if (IsUnicodeType(type, "primary")) { return "level1"; } if (IsUnicodeType(type, "tertiary")) { return "level3"; } } else if (IsUnicodeKey(key, "ms")) { if (IsUnicodeType(type, "imperial")) { return "uksystem"; } } else if (IsUnicodeKey(key, "rg") || IsUnicodeKey(key, "sd")) { static const char* types[144] = { "cn11" , "cn12" , "cn13" , "cn14" , "cn15" , "cn21" , "cn22" , "cn23" , "cn31" , "cn32" , "cn33" , "cn34" , "cn35" , "cn36" , "cn37" , "cn41" , "cn42" , "cn43" , "cn44" , "cn45" , "cn46" , "cn50" , "cn51" , "cn52" , "cn53" , "cn54" , "cn61" , "cn62" , "cn63" , "cn64" , "cn65" , "cn71" , "cn91" , "cn92" , "cz10a" , "cz10b" , "cz10c" , "cz10d" , "cz10e" , "cz10f" , "cz611" , "cz612" , "cz613" , "cz614" , "cz615" , "cz621" , "cz622" , "cz623" , "cz624" , "cz626" , "cz627" , "czjc" , "czjm" , "czka" , "czkr" , "czli" , "czmo" , "czol" , "czpa" , "czpl" , "czpr" , "czst" , "czus" , "czvy" , "czzl" , "fi01" , "fra" , "frb" , "frbl" , "frc" , "frcp" , "frd" , "fre" , "frf" , "frg" , "frgf" , "frgp" , "frh" , "fri" , "frj" , "frk" , "frl" , "frm" , "frmf" , "frmq" , "frn" , "frnc" , "fro" , "frp" , "frpf" , "frpm" , "frq" , "frr" , "frre" , "frs" , "frt" , "frtf" , "fru" , "frv" , "frwf" , "fryt" , "laxn" , "lud" , "lug" , "lul" , "mrnkc" , "nlaw" , "nlcw" , "nlsx" , "no23" , "nzn" , "nzs" , "omba" , "omsh" , "plds" , "plkp" , "pllb" , "plld" , "pllu" , "plma" , "plmz" , "plop" , "plpd" , "plpk" , "plpm" , "plsk" , "plsl" , "plwn" , "plwp" , "plzp" , "shta" , "tteto" , "ttrcm" , "ttwto" , "twkhq" , "twtnq" , "twtpq" , "twtxq" , "usas" , "usgu" , "usmp" , "uspr" , "usum" , "usvi" , }; static const char* aliases[144] = { "cnbj" , "cntj" , "cnhe" , "cnsx" , "cnmn" , "cnln" , "cnjl" , "cnhl" , "cnsh" , "cnjs" , "cnzj" , "cnah" , "cnfj" , "cnjx" , "cnsd" , "cnha" , "cnhb" , "cnhn" , "cngd" , "cngx" , "cnhi" , "cncq" , "cnsc" , "cngz" , "cnyn" , "cnxz" , "cnsn" , "cngs" , "cnqh" , "cnnx" , "cnxj" , "twzzzz", "hkzzzz", "mozzzz", "cz110" , "cz111" , "cz112" , "cz113" , "cz114" , "cz115" , "cz663" , "cz632" , "cz633" , "cz634" , "cz635" , "cz641" , "cz642" , "cz643" , "cz644" , "cz646" , "cz647" , "cz31" , "cz64" , "cz41" , "cz52" , "cz51" , "cz80" , "cz71" , "cz53" , "cz32" , "cz10" , "cz20" , "cz42" , "cz63" , "cz72" , "axzzzz", "frges" , "frnaq" , "blzzzz", "frara" , "cpzzzz", "frbfc" , "frbre" , "frcvl" , "frges" , "gfzzzz", "gpzzzz", "frcor" , "frbfc" , "fridf" , "frocc" , "frnaq" , "frges" , "mfzzzz", "mqzzzz", "frocc" , "nczzzz", "frhdf" , "frnor" , "pfzzzz", "pmzzzz", "frnor" , "frpdl" , "rezzzz", "frhdf" , "frnaq" , "tfzzzz", "frpac" , "frara" , "wfzzzz", "ytzzzz", "laxs" , "lucl" , "luec" , "luca" , "mr13" , "awzzzz", "cwzzzz", "sxzzzz", "no50" , "nzauk" , "nzcan" , "ombj" , "omsj" , "pl02" , "pl04" , "pl08" , "pl10" , "pl06" , "pl12" , "pl14" , "pl16" , "pl20" , "pl18" , "pl22" , "pl26" , "pl24" , "pl28" , "pl30" , "pl32" , "tazzzz", "tttob" , "ttmrc" , "tttob" , "twkhh" , "twtnn" , "twnwt" , "twtxg" , "aszzzz", "guzzzz", "mpzzzz", "przzzz", "umzzzz", "vizzzz", }; return SearchUnicodeReplacement(types, aliases, type); } else if (IsUnicodeKey(key, "tz")) { static const char* types[29] = { "aqams" , "cnckg" , "cnhrb" , "cnkhg" , "cuba" , "egypt" , "eire" , "est" , "gaza" , "gmt0" , "hongkong", "hst" , "iceland" , "iran" , "israel" , "jamaica" , "japan" , "libya" , "mst" , "navajo" , "poland" , "portugal", "prc" , "roc" , "rok" , "turkey" , "uct" , "usnavajo", "zulu" , }; static const char* aliases[29] = { "nzakl" , "cnsha" , "cnsha" , "cnurc" , "cuhav" , "egcai" , "iedub" , "utcw05" , "gazastrp", "gmt" , "hkhkg" , "utcw10" , "isrey" , "irthr" , "jeruslm" , "jmkin" , "jptyo" , "lytip" , "utcw07" , "usden" , "plwaw" , "ptlis" , "cnsha" , "twtpe" , "krsel" , "trist" , "utc" , "usden" , "utc" , }; return SearchUnicodeReplacement(types, aliases, type); } return nullptr; } template static inline bool IsTransformKey(mozilla::Span key, const char (&str)[Length]) { static_assert(Length == TransformKeyLength + 1, "Transform extension key is two characters long"); return memcmp(key.data(), str, Length - 1) == 0; } template static inline bool IsTransformType(mozilla::Span type, const char (&str)[Length]) { static_assert(Length > TransformKeyLength + 1, "Transform extension type contains more than two characters"); return type.size() == (Length - 1) && memcmp(type.data(), str, Length - 1) == 0; } /** * Mapping from deprecated BCP 47 Transform extension types to their preferred * values. * * Spec: https://www.unicode.org/reports/tr35/#Unicode_Locale_Extension_Data_Files * Spec: https://www.unicode.org/reports/tr35/#t_Extension */ const char* mozilla::intl::Locale::ReplaceTransformExtensionType( mozilla::Span key, mozilla::Span type) { MOZ_ASSERT(key.size() == TransformKeyLength); MOZ_ASSERT(IsCanonicallyCasedTransformKey(key)); MOZ_ASSERT(type.size() > TransformKeyLength); MOZ_ASSERT(IsCanonicallyCasedTransformType(type)); if (IsTransformKey(key, "d0")) { if (IsTransformType(type, "name")) { return "charname"; } } else if (IsTransformKey(key, "m0")) { if (IsTransformType(type, "beta-metsehaf")) { return "betamets"; } if (IsTransformType(type, "ies-jes")) { return "iesjes"; } if (IsTransformType(type, "names")) { return "prprname"; } if (IsTransformType(type, "tekie-alibekit")) { return "tekieali"; } } return nullptr; }