diff options
Diffstat (limited to 'i18npool/source/localedata/data/currency-check.awk')
-rw-r--r-- | i18npool/source/localedata/data/currency-check.awk | 335 |
1 files changed, 335 insertions, 0 deletions
diff --git a/i18npool/source/localedata/data/currency-check.awk b/i18npool/source/localedata/data/currency-check.awk new file mode 100644 index 000000000..1246211e1 --- /dev/null +++ b/i18npool/source/localedata/data/currency-check.awk @@ -0,0 +1,335 @@ +#!/usr/bin/gawk -f +# +# This file is part of the LibreOffice project. +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# +# This file incorporates work covered by the following license notice: +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed +# with this work for additional information regarding copyright +# ownership. The ASF licenses this file to you under the Apache +# License, Version 2.0 (the "License"); you may not use this file +# except in compliance with the License. You may obtain a copy of +# the License at http://www.apache.org/licenses/LICENSE-2.0 . +# +# Usage: gawk -f currency-check.awk *.xml +# Check any +# <FormatCode>...[$xxx-...]...</FormatCode> +# against every +# <CurrencySymbol>xxx</CurrencySymbol> +# definition of the same XML file and output symbols if no match was found. +# For formatindex="12" to formatindex="15" and for formatindex="17" it is +# checked if the used currency symbol is the usedInCompatibleFormatCodes +# currency symbol as it is needed by the number formatter. +# Also generates output if the generic currency symbol (UTF8 string 0xC2A4) +# is used instead of a real currency symbol. +# Author: Eike Rathke <er@openoffice.org> + +BEGIN { + file = "" +} + + +file != FILENAME { + if ( file ) + checkIt() + file = FILENAME + line = 0 + nFormats = 0 + nCurrencies = 0 + bFormatAuto = 0 + sReplaceFrom = "" + sReplaceTo = "" + sMatchReplace = "" + sRefCurrencyFromLocale = "" + crlf = 0 +} + +{ + ++line + # If run under Unix a CrLf spoils ...$ line end checks. DOS line endings + # are boo anyways. + if ( /\x0D$/ ) + { + print "Error: not Unix line ending in line " line + crlf = 1 + exit(1) + } + if ( $1 ~ /^<LC_FORMAT(>|$)/ ) + { + if ( $0 ~ /replaceFrom="\[CURRENCY\]"/ ) + { + sReplaceFrom = "\\[CURRENCY\\]" + sMatchReplace = "^<FormatCode>.*" sReplaceFrom + } + for ( j=2; j<=NF; ++j ) + { + if ( $j ~ /^replaceTo="/ ) + { + l = 12 + if ( $j ~ />$/ ) + ++l + if ( $j ~ /\/>$/ ) + ++l + sReplaceTo = substr( $j, 12, length($j)-l ) + } + } + } + else if ( $1 ~ /^<FormatElement(>|$)/ ) + { + if ( $0 ~ /usage="CURRENCY"/ ) + { + if ( $0 ~ /formatindex="1[23457]"/ ) + bFormatAuto = 1 + else + bFormatAuto = 0 + } + } + else if ( $0 ~ /^[[:blank:]]*<FormatCode>.*\[\$.*-[0-9a-fA-F]+\]/ || + (sMatchReplace && $0 ~ sMatchReplace ) ) + { + if ( sReplaceFrom ) + gsub( sReplaceFrom, sReplaceTo ) + split( $0, arr, /<|>/ ) + split( arr[3], code, /(\[\$)|(-[0-9a-fA-F]+\])/ ) + for ( j in code ) + { + if ( code[j] && code[j] !~ /\#|0|\[NatNum/ ) + { + FormatLine[nFormats] = file " line " line + FormatAuto[nFormats] = bFormatAuto + Formats[nFormats++] = code[j] + } + } + bFormatAuto = 0 + } + else if ( $1 ~ /^<LC_CURRENCY(>|$)/ ) + { + for ( j=2; j<=NF; ++j ) + { + if ( $j ~ /^ref="/ ) + { + l = 6 + if ( $j ~ />$/ ) + ++l + if ( $j ~ /\/>$/ ) + ++l + locale = substr( $j, 6, length($j)-l ) + sRefCurrencyFromLocale = file + oldfile = file + oldline = line + file = locale ".xml" + line = 0 + while ( (getline <file) > 0 ) + { + ++line + getCurrencyParams() + } + close( file ) + if ( !line ) + print "ref locale not available: " file \ + " (from " oldfile " line " oldline ")" + file = oldfile + line = oldline + sRefCurrencyFromLocale = "" + } + } + } + else + getCurrencyParams() +} + + +END { + if ( file && !crlf ) + checkIt() +} + + +function getCurrencyParams() { + # Assumes that each element is on a line on its own! + if ( $1 ~ /^<Currency(>|$)/ ) + { + if ( $0 ~ /default="true"/ ) + SymbolDefault[nCurrencies] = 1 + else + SymbolDefault[nCurrencies] = 0 + if ( $0 ~ /usedInCompatibleFormatCodes="true"/ ) + SymbolCompati[nCurrencies] = 1 + else + SymbolCompati[nCurrencies] = 0 + } + else if ( $0 ~ /^[[:blank:]]*<CurrencyID>/ ) + { + split( $0, arr, /<|>/ ) + if ( sRefCurrencyFromLocale ) + IDLine[nCurrencies] = file " line " line \ + " (referenced from " sRefCurrencyFromLocale ")" + else + IDLine[nCurrencies] = file " line " line + IDs[nCurrencies] = arr[3] + } + else if ( $0 ~ /^[[:blank:]]*<CurrencySymbol>/ ) + { + split( $0, arr, /<|>/ ) + if ( sRefCurrencyFromLocale ) + SymbolLine[nCurrencies] = file " line " line \ + " (referenced from " sRefCurrencyFromLocale ")" + else + SymbolLine[nCurrencies] = file " line " line + Symbols[nCurrencies] = arr[3] + } + else if ( $0 ~ /^[[:blank:]]*<BankSymbol>/ ) + { + split( $0, arr, /<|>/ ) + if ( sRefCurrencyFromLocale ) + BankSymbolLine[nCurrencies] = file " line " line \ + " (referenced from " sRefCurrencyFromLocale ")" + else + BankSymbolLine[nCurrencies] = file " line " line + BankSymbols[nCurrencies] = arr[3] + } + else if ( $1 ~ /^<\/Currency>/ ) + { + ++nCurrencies + } +} + + +function checkIt() { + bad = 0 + for ( j=0; j<nFormats; ++j ) + { + state = FormatInSymbol( Formats[j] ) + if ( Formats[j] == "\xc2\xa4" ) + { + bad = 1 + print " bad: `" Formats[j] "' (" FormatLine[j] ")" + } + else if ( state == 0 ) + { + bad = 1 + print "unknown: `" Formats[j] "' (" FormatLine[j] ")" + } + else if ( FormatAuto[j] && state < 2 ) + { + bad = 1 + print "badauto: `" Formats[j] "' (" FormatLine[j] ")" + } + } + if ( bad ) + { + for ( j=0; j<nCurrencies; ++j ) + { + bDef = 0 + if ( Symbols[j] == "\xc2\xa4" ) + print "def bad: `" Symbols[j] "' (" SymbolLine[j] ")" + if ( SymbolDefault[j] ) + { + bDef = 1 + print "default: `" Symbols[j] "' (" SymbolLine[j] ")" + } + if ( SymbolCompati[j] ) + { + bDef = 1 + print "compati: `" Symbols[j] "' (" SymbolLine[j] ")" + } + if ( !bDef ) + print "defined: `" Symbols[j] "' (" SymbolLine[j] ")" + } + } + else + { + bHasDefault = 0 + bHasCompati = 0 + for ( j=0; j<nCurrencies; ++j ) + { + if ( Symbols[j] == "\xc2\xa4" ) + { + bad = 1 + print "def bad: `" Symbols[j] "' (" SymbolLine[j] ")" + } + if ( SymbolDefault[j] ) + { + if ( !bHasDefault ) + bHasDefault = 1 + else + { + bad = 1 + print "dupe default: `" Symbols[j] "' (" SymbolLine[j] ")" + } + } + if ( SymbolCompati[j] ) + { + if ( !bHasCompati ) + bHasCompati = 1 + else + { + bad = 1 + print "dupe compati: `" Symbols[j] "' (" SymbolLine[j] ")" + } + } + } + if ( !bHasDefault ) + { + bad = 1 + print " no default: (" file ")" + } + if ( !bHasCompati ) + { + bad = 1 + print " no compati: (" file ")" + } + } + for ( j=0; j<nCurrencies; ++j ) + { + # Check if CurrencyID at least resembles some ISO 4217 code. + # The only exception is zh_MO that had an erroneous original data set + # with BankSymbol="P" (stored as ISO code in documents, hence copied to + # CurrencyID now) and needs that entry for legacy documents. + # There is a strange bug in gawk 3.1.4 that does a match of [A-Z] on + # lower case except 'a', regardless of IGNORECASE setting, hence this + # ugly notation. [[:upper:]] wouldn't be correct since we want only + # ASCII to match. + if ( IDs[j] !~ /^[ABCDEFGHIJKLMNOPQRSTUVWXYZ][ABCDEFGHIJKLMNOPQRSTUVWXYZ][ABCDEFGHIJKLMNOPQRSTUVWXYZ]$/ \ + && !(file == "zh_MO.xml" && IDs[j] == "P") ) + { + bad = 1 + print "no ISO 4217 code: `" IDs[j] "' (" IDLine[j] ")" + } + # CurrencyID should equal BankSymbol for now. + if ( IDs[j] != BankSymbols[j] ) + { + bad = 1 + print "not equal: CurrencyID `" IDs[j] "' != BankSymbol `" BankSymbols[j] \ + "' (" IDLine[j] " and " BankSymbolLine[j] ")" + } + } + if ( bad ) + print "" +} + + +function FormatInSymbol( format ) { + state = 0 + for ( nSym=0; nSym<nCurrencies; ++nSym ) + { + if ( format == Symbols[nSym] ) + { + # Two currencies can have the same symbol (e.g. az_AZ.xml 'man.' + # for AZM and AZN), continue to lookup if the match isn't the + # compatible one. + if ( SymbolCompati[nSym] ) + return 2 + else + state = 1 + } + } + return state +} + +# vim: ts=4 sw=4 expandtab |