diff options
Diffstat (limited to 'i18nlangtag/source/isolang/MS-LCID-to-list.sh')
-rwxr-xr-x | i18nlangtag/source/isolang/MS-LCID-to-list.sh | 104 |
1 files changed, 104 insertions, 0 deletions
diff --git a/i18nlangtag/source/isolang/MS-LCID-to-list.sh b/i18nlangtag/source/isolang/MS-LCID-to-list.sh new file mode 100755 index 000000000..adb2e1b14 --- /dev/null +++ b/i18nlangtag/source/isolang/MS-LCID-to-list.sh @@ -0,0 +1,104 @@ +#!/usr/bin/env bash +# +# This file is part of the LibreOffice project. +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# +# Generates language ID table and defines and mappings of +# https://winprotocoldoc.blob.core.windows.net/productionwindowsarchives/MS-LCID/[MS-LCID].pdf +# downloaded from http://msdn.microsoft.com/library/cc233965.aspx +# At least this worked for Release: Monday, July 22, 2013; 08/08/2013 Revision 6.0 +# Also worked for 6/30/2015 revision 7.0 +# Also worked for 12/1/2017 revision 11.0 +# +# Uses pdftotext (from poppler-utils), grep and gawk. +# +# The script expects the downloaded [MS-LCID].pdf as MS-LCID.pdf +# +# Files created/OVERWRITTEN: MS-LCID.txt, MS-LCID.lst, MS-LCID.lst.h +# +# Best invoked in a temporary directory ... +# +# As the PDF layout may change, MS-LCID.lst is generated with uppercase hex +# digits and unified spaces (which gawk $1=... automatically does). +# Still, if needed, diff MS-LCID.lst with ignore spaces against the previous +# version for changes and additions, e.g. +# gvimdiff -c 'set diffopt+=iwhite' ../MS-LCID.lst MS-LCID.lst +# The generated MS-LCID.lst.h file is only a copy&paste help to add entries in +# isolang.cxx and not to be committed, the #define names have to be adapted for +# lang.h and isolang.cxx + +pdftotext -layout MS-LCID.pdf +grep '^ *0x[0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] ' MS-LCID.txt | \ + gawk -e '{ $1 = "0x" toupper( substr( $1, 3)); print; }' > MS-LCID.lst +gawk -e ' +{ + val = $1; + tag = $2; + tag = gensub( /,.*/, "", 1, tag); + def = $2; + for (i=3; i<=NF; ++i) + { + def = def "_" $i; + } + def = gensub( /[^a-zA-Z0-9_]/, "_", "g", def); + def = "LANGUAGE_" def + if (def == "LANGUAGE_Neither_defined_nor_reserved") + { + def = def "_" val + } + usedef = def "," + n = split( tag, arr, /-/); + switch (n) + { + case 1: + # lll + mapping = sprintf( " { %-36s %5s, \"\" , k0 },", usedef, "\"" arr[1] "\""); + break; + case 2: + if (length(arr[2]) == 2) + { + # lll-CC + mapping = sprintf( " { %-36s %5s, \"%s\", k0 },", usedef, "\"" arr[1] "\"", arr[2]); + } + else if (length(arr[2]) == 4) + { + # lll-Ssss + mapping = sprintf( " { %-44s %10s, \"\" , k0 },", usedef, "\"" tag "\""); + } + else + { + # lll-### or lll-vvvvvvvv + mapping = sprintf( " { %-33s %16s, \"\", \"\" },", usedef, "\"" tag "\""); + } + break; + default: + if (length(arr[2]) == 2) + { + # lll-CC-vvvvvvvv + mapping = sprintf( " { %-33s %16s, \"%s\", \"%s\" },", usedef, "\"" tag "\"", arr[2], arr[1] "-" arr[3]); + } + else if (length(arr[2]) == 4) + { + # lll-Ssss-CC + mapping = sprintf( " { %-44s %10s, \"%s\", k0 },", usedef, "\"" arr[1] "-" arr[2] "\"", arr[3]); + } + else + { + # grandfathered or stuff + if (length(arr[3] == 2)) + mapping = sprintf( " { %-33s %16s, \"%s\", \"\" },", usedef, "\"" tag "\"", arr[3]); + else + mapping = sprintf( " { %-33s %16s, \"\", \"\" },", usedef, "\"" tag "\""); + } + break; + } + printf "#define %-35s LanguageType(%s)\n", def, val; + print mapping; + print "" +} +' MS-LCID.lst > MS-LCID.lst.h + +# vim: set noet sw=4 ts=4: |