From e4283f6d48b98e764b988b43bbc86b9d52e6ec94 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sun, 7 Apr 2024 19:54:43 +0200 Subject: Adding upstream version 43.9. Signed-off-by: Daniel Baumann --- data/cldr2json/README.md | 40 ++ data/cldr2json/cldr2json.py | 212 +++++++++ data/cldr2json/test/__init__.py | 0 data/cldr2json/test/data/fr-t-k0-android.xml | 138 ++++++ data/cldr2json/test/data/fr.json | 614 +++++++++++++++++++++++++++ data/cldr2json/test/test_cldr2json.py | 212 +++++++++ 6 files changed, 1216 insertions(+) create mode 100644 data/cldr2json/README.md create mode 100755 data/cldr2json/cldr2json.py create mode 100644 data/cldr2json/test/__init__.py create mode 100644 data/cldr2json/test/data/fr-t-k0-android.xml create mode 100644 data/cldr2json/test/data/fr.json create mode 100755 data/cldr2json/test/test_cldr2json.py (limited to 'data/cldr2json') diff --git a/data/cldr2json/README.md b/data/cldr2json/README.md new file mode 100644 index 0000000..0eb54bc --- /dev/null +++ b/data/cldr2json/README.md @@ -0,0 +1,40 @@ +cldr2json +========= + +This script converts Unicode CLDR android keyboard layouts to JSON usable by +GNOME Shell. + +CLDR keyboard layouts can be found at + + + +Usage +===== + + ./cldr2json + +example: + + ./cldr2json cldr/keyboards/android/ json_layouts/ + + +Keyboard layout mapping +======================= + +Unicode CLDR layout identifiers are language codes, while XKB layout +identifiers are... something else. The mapping between the two currently uses +heuristic based on the layout descriptions, in this order: + +- if the CLDR layout description matches an XKB layout description, chose its + XKB identifier +- if one word of the CLDR layout description matches an XKB layout + description, chose its XKB identifier +- if the CLDR layout description matches one word of an XKB layout description, + chose its XKB identifier + +That doesn't always work. For instance it fails for "en" language, that should +match "us" XKB identifier. For such cases, there is a mapping in +LOCALE_TO_XKB_OVERRIDES at the top of the script. If you discover a weird +mapping of if you get a "failed to find XKB mapping for " warning then +please consider adding an override there. + diff --git a/data/cldr2json/cldr2json.py b/data/cldr2json/cldr2json.py new file mode 100755 index 0000000..e5eb3cb --- /dev/null +++ b/data/cldr2json/cldr2json.py @@ -0,0 +1,212 @@ +#!/usr/bin/python3 +# +# Copyright 2015 Daiki Ueno +# 2016 Parag Nemade +# 2017 Alan +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as +# published by the Free Software Foundation; either version 2 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this program; if not, see +# . + +import glob +import json +import locale +import logging +import os +import re +import sys +import xml.etree.ElementTree + +import gi +gi.require_version('GnomeDesktop', '3.0') # NOQA: E402 +from gi.repository import GnomeDesktop + +ESCAPE_PATTERN = re.compile(r'\\u\{([0-9A-Fa-f]+?)\}') +ISO_PATTERN = re.compile(r'[A-E]([0-9]+)') + +LOCALE_TO_XKB_OVERRIDES = { + 'af': 'za', + 'en': 'us', + 'en-GB': 'uk', + 'es-US': 'latam', + 'fr-CA': 'ca', + 'hi': 'in+bolnagri', + 'ky': 'kg', + 'nl-BE': 'be', + 'zu': None +} + + +def parse_single_key(value): + def unescape(m): + return chr(int(m.group(1), 16)) + value = ESCAPE_PATTERN.sub(unescape, value) + return value + + +def parse_rows(keymap): + unsorted_rows = {} + for _map in keymap.iter('map'): + value = _map.get('to') + key = [parse_single_key(value)] + iso = _map.get('iso') + if not ISO_PATTERN.match(iso): + sys.stderr.write('invalid ISO key name: %s\n' % iso) + continue + if not iso[0] in unsorted_rows: + unsorted_rows[iso[0]] = [] + unsorted_rows[iso[0]].append((int(iso[1:]), key)) + # add subkeys + longPress = _map.get('longPress') + if longPress: + for value in longPress.split(' '): + subkey = parse_single_key(value) + key.append(subkey) + + rows = [] + for k, v in sorted(list(unsorted_rows.items()), + key=lambda x: x[0], + reverse=True): + row = [] + for key in sorted(v, key=lambda x: x): + row.append({ 'strings': key[1] }) + rows.append(row) + + return rows + + +def convert_xml(tree): + root = {} + for xml_keyboard in tree.iter("keyboard"): + locale_full = xml_keyboard.get("locale") + locale, sep, end = locale_full.partition("-t-") + root["locale"] = locale + for xml_name in tree.iter("name"): + name = xml_name.get("value") + root["name"] = name + root["levels"] = [] + # parse levels + for index, keymap in enumerate(tree.iter('keyMap')): + # FIXME: heuristics here + modifiers = keymap.get('modifiers') + if not modifiers: + mode = 'default' + modifiers = '' + elif 'shift' in modifiers.split(' '): + mode = 'latched' + modifiers = 'shift' + else: + mode = 'locked' + level = {} + level["level"] = modifiers + level["mode"] = mode + level["rows"] = parse_rows(keymap) + root["levels"].append(level) + return root + + +def locale_to_xkb(locale, name): + if locale in sorted(LOCALE_TO_XKB_OVERRIDES.keys()): + xkb = LOCALE_TO_XKB_OVERRIDES[locale] + logging.debug("override for %s → %s", + locale, xkb) + if xkb: + return xkb + else: + raise KeyError("layout %s explicitly disabled in overrides" + % locale) + xkb_names = sorted(name_to_xkb.keys()) + if name in xkb_names: + return name_to_xkb[name] + else: + logging.debug("name %s failed" % name) + for sub_name in name.split(' '): + if sub_name in xkb_names: + xkb = name_to_xkb[sub_name] + logging.debug("dumb mapping failed but match with locale word: " + "%s (%s) → %s (%s)", + locale, name, xkb, sub_name) + return xkb + else: + logging.debug("sub_name failed") + for xkb_name in xkb_names: + for xkb_sub_name in xkb_name.split(' '): + if xkb_sub_name.strip('()') == name: + xkb = name_to_xkb[xkb_name] + logging.debug("dumb mapping failed but match with xkb word: " + "%s (%s) → %s (%s)", + locale, name, xkb, xkb_name) + return xkb + raise KeyError("failed to find XKB mapping for %s" % locale) + + +def convert_file(source_file, destination_path): + logging.info("Parsing %s", source_file) + + itree = xml.etree.ElementTree.ElementTree() + itree.parse(source_file) + + root = convert_xml(itree) + + try: + xkb_name = locale_to_xkb(root["locale"], root["name"]) + except KeyError as e: + logging.warning(e) + return False + destination_file = os.path.join(destination_path, xkb_name + ".json") + + try: + with open(destination_file, 'x', encoding="utf-8") as dest_fd: + json.dump(root, dest_fd, ensure_ascii=False, indent=2, sort_keys=True) + except FileExistsError as e: + logging.info("File %s exists, not updating", destination_file) + return False + + logging.debug("written %s", destination_file) + + +def load_xkb_mappings(): + xkb = GnomeDesktop.XkbInfo() + layouts = xkb.get_all_layouts() + name_to_xkb = {} + + for layout in layouts: + name = xkb.get_layout_info(layout).display_name + name_to_xkb[name] = layout + + return name_to_xkb + + +locale.setlocale(locale.LC_ALL, "C") +name_to_xkb = load_xkb_mappings() + + +if __name__ == "__main__": + if "DEBUG" in os.environ: + logging.basicConfig(level=logging.DEBUG) + + if len(sys.argv) < 2: + print("supply a CLDR keyboard file") + sys.exit(1) + + if len(sys.argv) < 3: + print("supply an output directory") + sys.exit(1) + + source = sys.argv[1] + destination = sys.argv[2] + if os.path.isfile(source): + convert_file(source, destination) + elif os.path.isdir(source): + for path in glob.glob(source + "/*-t-k0-android.xml"): + convert_file(path, destination) diff --git a/data/cldr2json/test/__init__.py b/data/cldr2json/test/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/data/cldr2json/test/data/fr-t-k0-android.xml b/data/cldr2json/test/data/fr-t-k0-android.xml new file mode 100644 index 0000000..1e76b81 --- /dev/null +++ b/data/cldr2json/test/data/fr-t-k0-android.xml @@ -0,0 +1,138 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/data/cldr2json/test/data/fr.json b/data/cldr2json/test/data/fr.json new file mode 100644 index 0000000..3a8949f --- /dev/null +++ b/data/cldr2json/test/data/fr.json @@ -0,0 +1,614 @@ +{ + "levels": [ + { + "level": "", + "mode": "default", + "rows": [ + [ + [ + "a", + "à", + "â", + "%", + "æ", + "á", + "ä", + "ã", + "å", + "ā", + "ª" + ], + [ + "z" + ], + [ + "e", + "é", + "è", + "ê", + "ë", + "%", + "ę", + "ė", + "ē" + ], + [ + "r" + ], + [ + "t" + ], + [ + "y", + "%", + "ÿ" + ], + [ + "u", + "ù", + "û", + "%", + "ü", + "ú", + "ū" + ], + [ + "i", + "î", + "%", + "ï", + "ì", + "í", + "į", + "ī" + ], + [ + "o", + "ô", + "œ", + "%", + "ö", + "ò", + "ó", + "õ", + "ø", + "ō", + "º" + ], + [ + "p" + ] + ], + [ + [ + "q" + ], + [ + "s" + ], + [ + "d" + ], + [ + "f" + ], + [ + "g" + ], + [ + "h" + ], + [ + "j" + ], + [ + "k" + ], + [ + "l" + ], + [ + "m" + ] + ], + [ + [ + "w" + ], + [ + "x" + ], + [ + "c", + "ç", + "ć", + "č" + ], + [ + "v" + ], + [ + "b" + ], + [ + "n" + ], + [ + "'", + "‘", + "’", + "‹", + "›" + ] + ], + [ + [ + "," + ], + [ + " " + ], + [ + ".", + "#", + "!", + ",", + "?", + "-", + ":", + "'", + "@" + ] + ] + ] + }, + { + "level": "shift", + "mode": "latched", + "rows": [ + [ + [ + "A", + "À", + "Â", + "%", + "Æ", + "Á", + "Ä", + "Ã", + "Å", + "Ā", + "ª" + ], + [ + "Z" + ], + [ + "E", + "É", + "È", + "Ê", + "Ë", + "%", + "Ę", + "Ė", + "Ē" + ], + [ + "R" + ], + [ + "T" + ], + [ + "Y", + "%", + "Ÿ" + ], + [ + "U", + "Ù", + "Û", + "%", + "Ü", + "Ú", + "Ū" + ], + [ + "I", + "Î", + "%", + "Ï", + "Ì", + "Í", + "Į", + "Ī" + ], + [ + "O", + "Ô", + "Œ", + "%", + "Ö", + "Ò", + "Ó", + "Õ", + "Ø", + "Ō", + "º" + ], + [ + "P" + ] + ], + [ + [ + "Q" + ], + [ + "S" + ], + [ + "D" + ], + [ + "F" + ], + [ + "G" + ], + [ + "H" + ], + [ + "J" + ], + [ + "K" + ], + [ + "L" + ], + [ + "M" + ] + ], + [ + [ + "W" + ], + [ + "X" + ], + [ + "C", + "Ç", + "Ć", + "Č" + ], + [ + "V" + ], + [ + "B" + ], + [ + "N" + ], + [ + "'", + "‘", + "’", + "‹", + "›" + ] + ], + [ + [ + "," + ], + [ + " " + ], + [ + ".", + "#", + "!", + ",", + "?", + "-", + ":", + "'", + "@" + ] + ] + ] + }, + { + "level": "opt", + "mode": "locked", + "rows": [ + [ + [ + "1", + "¹", + "½", + "⅓", + "¼", + "⅛" + ], + [ + "2", + "²", + "⅔" + ], + [ + "3", + "³", + "¾", + "⅜" + ], + [ + "4", + "⁴" + ], + [ + "5", + "⅝" + ], + [ + "6" + ], + [ + "7", + "⅞" + ], + [ + "8" + ], + [ + "9" + ], + [ + "0", + "ⁿ", + "∅" + ] + ], + [ + [ + "@" + ], + [ + "#" + ], + [ + "€", + "¢", + "£", + "$", + "¥", + "₱" + ], + [ + "%", + "‰" + ], + [ + "&" + ], + [ + "-", + "_", + "–", + "—", + "·" + ], + [ + "+", + "±" + ], + [ + "(", + "<", + "{", + "[" + ], + [ + ")", + ">", + "}", + "]" + ] + ], + [ + [ + "*", + "†", + "‡", + "★" + ], + [ + "\"", + "“", + "”", + "«", + "»" + ], + [ + "'", + "‘", + "’", + "‹", + "›" + ], + [ + ":" + ], + [ + ";" + ], + [ + "!", + "¡" + ], + [ + "?", + "¿" + ] + ], + [ + [ + "_" + ], + [ + "/" + ], + [ + " " + ], + [ + "," + ], + [ + ".", + "…" + ] + ] + ] + }, + { + "level": "opt+shift", + "mode": "locked", + "rows": [ + [ + [ + "~" + ], + [ + "`" + ], + [ + "|" + ], + [ + "•", + "♪", + "♥", + "♠", + "♦", + "♣" + ], + [ + "√" + ], + [ + "Π", + "π" + ], + [ + "÷" + ], + [ + "×" + ], + [ + "¶", + "§" + ], + [ + "∆" + ] + ], + [ + [ + "£" + ], + [ + "¥" + ], + [ + "$", + "¢" + ], + [ + "¢" + ], + [ + "^", + "↑", + "↓", + "←", + "→" + ], + [ + "°", + "′", + "″" + ], + [ + "=", + "≠", + "≈", + "∞" + ], + [ + "{" + ], + [ + "}" + ] + ], + [ + [ + "\\" + ], + [ + "©" + ], + [ + "®" + ], + [ + "™" + ], + [ + "℅" + ], + [ + "[" + ], + [ + "]" + ] + ], + [ + [ + "<", + "‹", + "≤", + "«" + ], + [ + ">", + "›", + "≥", + "»" + ], + [ + " " + ], + [ + "," + ], + [ + ".", + "…" + ] + ] + ] + } + ], + "locale": "fr", + "name": "French" +} diff --git a/data/cldr2json/test/test_cldr2json.py b/data/cldr2json/test/test_cldr2json.py new file mode 100755 index 0000000..78a6aa0 --- /dev/null +++ b/data/cldr2json/test/test_cldr2json.py @@ -0,0 +1,212 @@ +#!/usr/bin/python3 + +import json +import tempfile +import unittest +import xml.etree.ElementTree + +import cldr2json + + +class TestParseSingleKey(unittest.TestCase): + def test_ascii(self): + self.assertEqual(cldr2json.parse_single_key("a"), "a") + + def test_nonascii(self): + self.assertEqual(cldr2json.parse_single_key("Æ"), "Æ") + + def test_twochars(self): + self.assertEqual(cldr2json.parse_single_key("ԵՒ"), "ԵՒ") + + def test_decode(self): + self.assertEqual(cldr2json.parse_single_key("\\u{200D}"), "\u200d") + + def test_decode_threechars(self): + self.assertEqual(cldr2json.parse_single_key("ज\\u{94D}ञ"), "ज\u094Dञ") + + +class TestParseRow(unittest.TestCase): + def test_parse_row(self): + xml_string = """ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + """ + xml_tree = xml.etree.ElementTree.XML(xml_string) + json = [[ + ['a', 'à', 'â', '%', 'æ', 'á', 'ä', 'ã', 'å', 'ā', 'ª'], + ['z'], + ['e', 'é', 'è', 'ê', 'ë', '%', 'ę', 'ė', 'ē'], + ['r'], + ['t'], + ['y', '%', 'ÿ'], + ['u', 'ù', 'û', '%', 'ü', 'ú', 'ū'], + ['i', 'î', '%', 'ï', 'ì', 'í', 'į', 'ī'], + ['o', 'ô', 'œ', '%', 'ö', 'ò', 'ó', 'õ', 'ø', 'ō', 'º'], + ['p'] + ], [ + ['q'], + ['s'], + ['d'], + ['f'], + ['g'], + ['h'], + ['j'], + ['k'], + ['l'], + ['m'] + ], [ + ['w'], + ['x'], + ['c', 'ç', 'ć', 'č'], + ['v'], + ['b'], + ['n'], + ["'", '‘', '’', '‹', '›'] + ], [ + [','], + [' '], + ['.', '#', '!', ',', '?', '-', ':', "'", '@'] + ]] + self.assertEqual(cldr2json.parse_rows(xml_tree), json) + + +class TestConvertXml(unittest.TestCase): + def test_convert_xml(self): + xml_string = """ + + + + + + + + + + + + + + + + + + + + """ + xml_tree = xml.etree.ElementTree.XML(xml_string) + json = { + "locale": "fr", + "name": "French", + "levels": [ + { + "level": "", + "mode": "default", + "rows": [ + [ + ['a', 'à', 'â', '%', 'æ', 'á', 'ä', 'ã', 'å', 'ā', 'ª'], + ] + ] + }, + { + "level": "shift", + "mode": "latched", + "rows": [ + [ + ['A', 'À', 'Â', '%', 'Æ', 'Á', 'Ä', 'Ã', 'Å', 'Ā', 'ª'], + ] + ] + }, + { + "level": "opt", + "mode": "locked", + "rows": [ + [ + ["1", "¹", "½", "⅓", "¼", "⅛"], + ] + ] + }, + { + "level": "opt+shift", + "mode": "locked", + "rows": [ + [ + ["~"], + ] + ] + } + ] + } + self.assertEqual(cldr2json.convert_xml(xml_tree), json) + + +class TestConvertFile(unittest.TestCase): + def test_fr(self): + outdir = tempfile.mkdtemp() + cldr2json.convert_file("test/data/fr-t-k0-android.xml", outdir) + with open("test/data/fr.json", encoding="utf-8") as expected_json_fd: + expected_json = json.load(expected_json_fd) + with open(outdir + "/fr.json", encoding="utf-8") as actual_json_fd: + actual_json = json.load(actual_json_fd) + self.assertEqual(expected_json, actual_json) + + +class TestLocaleToXKB(unittest.TestCase): + def test_simple(self): + self.assertEqual(cldr2json.locale_to_xkb("fr", "French"), + "fr") + + def test_fallback(self): + self.assertEqual(cldr2json.locale_to_xkb("nb", "Norwegian Bokmål"), + "no") + + def test_fallback2(self): + self.assertEqual(cldr2json.locale_to_xkb("km", "Khmer"), + "kh") + + def test_override(self): + self.assertEqual(cldr2json.locale_to_xkb("en-GB", + "English Great Britain"), + "uk") + + +class LoadXKBMapplings(unittest.TestCase): + def test_dictionnary(self): + self.assertIsInstance(cldr2json.load_xkb_mappings(), dict) + + def test_mapping(self): + mapping = cldr2json.load_xkb_mappings() + self.assertEqual(mapping["French"], "fr") + + +if __name__ == '__main__': + unittest.main() -- cgit v1.2.3