summaryrefslogtreecommitdiffstats
path: root/i18npool/source/localedata/data/currency-check.awk
diff options
context:
space:
mode:
Diffstat (limited to 'i18npool/source/localedata/data/currency-check.awk')
-rw-r--r--i18npool/source/localedata/data/currency-check.awk335
1 files changed, 335 insertions, 0 deletions
diff --git a/i18npool/source/localedata/data/currency-check.awk b/i18npool/source/localedata/data/currency-check.awk
new file mode 100644
index 000000000..1246211e1
--- /dev/null
+++ b/i18npool/source/localedata/data/currency-check.awk
@@ -0,0 +1,335 @@
+#!/usr/bin/gawk -f
+#
+# This file is part of the LibreOffice project.
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+#
+# This file incorporates work covered by the following license notice:
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed
+# with this work for additional information regarding copyright
+# ownership. The ASF licenses this file to you under the Apache
+# License, Version 2.0 (the "License"); you may not use this file
+# except in compliance with the License. You may obtain a copy of
+# the License at http://www.apache.org/licenses/LICENSE-2.0 .
+#
+# Usage: gawk -f currency-check.awk *.xml
+# Check any
+# <FormatCode>...[$xxx-...]...</FormatCode>
+# against every
+# <CurrencySymbol>xxx</CurrencySymbol>
+# definition of the same XML file and output symbols if no match was found.
+# For formatindex="12" to formatindex="15" and for formatindex="17" it is
+# checked if the used currency symbol is the usedInCompatibleFormatCodes
+# currency symbol as it is needed by the number formatter.
+# Also generates output if the generic currency symbol (UTF8 string 0xC2A4)
+# is used instead of a real currency symbol.
+# Author: Eike Rathke <er@openoffice.org>
+
+BEGIN {
+ file = ""
+}
+
+
+file != FILENAME {
+ if ( file )
+ checkIt()
+ file = FILENAME
+ line = 0
+ nFormats = 0
+ nCurrencies = 0
+ bFormatAuto = 0
+ sReplaceFrom = ""
+ sReplaceTo = ""
+ sMatchReplace = ""
+ sRefCurrencyFromLocale = ""
+ crlf = 0
+}
+
+{
+ ++line
+ # If run under Unix a CrLf spoils ...$ line end checks. DOS line endings
+ # are boo anyways.
+ if ( /\x0D$/ )
+ {
+ print "Error: not Unix line ending in line " line
+ crlf = 1
+ exit(1)
+ }
+ if ( $1 ~ /^<LC_FORMAT(>|$)/ )
+ {
+ if ( $0 ~ /replaceFrom="\[CURRENCY\]"/ )
+ {
+ sReplaceFrom = "\\[CURRENCY\\]"
+ sMatchReplace = "^<FormatCode>.*" sReplaceFrom
+ }
+ for ( j=2; j<=NF; ++j )
+ {
+ if ( $j ~ /^replaceTo="/ )
+ {
+ l = 12
+ if ( $j ~ />$/ )
+ ++l
+ if ( $j ~ /\/>$/ )
+ ++l
+ sReplaceTo = substr( $j, 12, length($j)-l )
+ }
+ }
+ }
+ else if ( $1 ~ /^<FormatElement(>|$)/ )
+ {
+ if ( $0 ~ /usage="CURRENCY"/ )
+ {
+ if ( $0 ~ /formatindex="1[23457]"/ )
+ bFormatAuto = 1
+ else
+ bFormatAuto = 0
+ }
+ }
+ else if ( $0 ~ /^[[:blank:]]*<FormatCode>.*\[\$.*-[0-9a-fA-F]+\]/ ||
+ (sMatchReplace && $0 ~ sMatchReplace ) )
+ {
+ if ( sReplaceFrom )
+ gsub( sReplaceFrom, sReplaceTo )
+ split( $0, arr, /<|>/ )
+ split( arr[3], code, /(\[\$)|(-[0-9a-fA-F]+\])/ )
+ for ( j in code )
+ {
+ if ( code[j] && code[j] !~ /\#|0|\[NatNum/ )
+ {
+ FormatLine[nFormats] = file " line " line
+ FormatAuto[nFormats] = bFormatAuto
+ Formats[nFormats++] = code[j]
+ }
+ }
+ bFormatAuto = 0
+ }
+ else if ( $1 ~ /^<LC_CURRENCY(>|$)/ )
+ {
+ for ( j=2; j<=NF; ++j )
+ {
+ if ( $j ~ /^ref="/ )
+ {
+ l = 6
+ if ( $j ~ />$/ )
+ ++l
+ if ( $j ~ /\/>$/ )
+ ++l
+ locale = substr( $j, 6, length($j)-l )
+ sRefCurrencyFromLocale = file
+ oldfile = file
+ oldline = line
+ file = locale ".xml"
+ line = 0
+ while ( (getline <file) > 0 )
+ {
+ ++line
+ getCurrencyParams()
+ }
+ close( file )
+ if ( !line )
+ print "ref locale not available: " file \
+ " (from " oldfile " line " oldline ")"
+ file = oldfile
+ line = oldline
+ sRefCurrencyFromLocale = ""
+ }
+ }
+ }
+ else
+ getCurrencyParams()
+}
+
+
+END {
+ if ( file && !crlf )
+ checkIt()
+}
+
+
+function getCurrencyParams() {
+ # Assumes that each element is on a line on its own!
+ if ( $1 ~ /^<Currency(>|$)/ )
+ {
+ if ( $0 ~ /default="true"/ )
+ SymbolDefault[nCurrencies] = 1
+ else
+ SymbolDefault[nCurrencies] = 0
+ if ( $0 ~ /usedInCompatibleFormatCodes="true"/ )
+ SymbolCompati[nCurrencies] = 1
+ else
+ SymbolCompati[nCurrencies] = 0
+ }
+ else if ( $0 ~ /^[[:blank:]]*<CurrencyID>/ )
+ {
+ split( $0, arr, /<|>/ )
+ if ( sRefCurrencyFromLocale )
+ IDLine[nCurrencies] = file " line " line \
+ " (referenced from " sRefCurrencyFromLocale ")"
+ else
+ IDLine[nCurrencies] = file " line " line
+ IDs[nCurrencies] = arr[3]
+ }
+ else if ( $0 ~ /^[[:blank:]]*<CurrencySymbol>/ )
+ {
+ split( $0, arr, /<|>/ )
+ if ( sRefCurrencyFromLocale )
+ SymbolLine[nCurrencies] = file " line " line \
+ " (referenced from " sRefCurrencyFromLocale ")"
+ else
+ SymbolLine[nCurrencies] = file " line " line
+ Symbols[nCurrencies] = arr[3]
+ }
+ else if ( $0 ~ /^[[:blank:]]*<BankSymbol>/ )
+ {
+ split( $0, arr, /<|>/ )
+ if ( sRefCurrencyFromLocale )
+ BankSymbolLine[nCurrencies] = file " line " line \
+ " (referenced from " sRefCurrencyFromLocale ")"
+ else
+ BankSymbolLine[nCurrencies] = file " line " line
+ BankSymbols[nCurrencies] = arr[3]
+ }
+ else if ( $1 ~ /^<\/Currency>/ )
+ {
+ ++nCurrencies
+ }
+}
+
+
+function checkIt() {
+ bad = 0
+ for ( j=0; j<nFormats; ++j )
+ {
+ state = FormatInSymbol( Formats[j] )
+ if ( Formats[j] == "\xc2\xa4" )
+ {
+ bad = 1
+ print " bad: `" Formats[j] "' (" FormatLine[j] ")"
+ }
+ else if ( state == 0 )
+ {
+ bad = 1
+ print "unknown: `" Formats[j] "' (" FormatLine[j] ")"
+ }
+ else if ( FormatAuto[j] && state < 2 )
+ {
+ bad = 1
+ print "badauto: `" Formats[j] "' (" FormatLine[j] ")"
+ }
+ }
+ if ( bad )
+ {
+ for ( j=0; j<nCurrencies; ++j )
+ {
+ bDef = 0
+ if ( Symbols[j] == "\xc2\xa4" )
+ print "def bad: `" Symbols[j] "' (" SymbolLine[j] ")"
+ if ( SymbolDefault[j] )
+ {
+ bDef = 1
+ print "default: `" Symbols[j] "' (" SymbolLine[j] ")"
+ }
+ if ( SymbolCompati[j] )
+ {
+ bDef = 1
+ print "compati: `" Symbols[j] "' (" SymbolLine[j] ")"
+ }
+ if ( !bDef )
+ print "defined: `" Symbols[j] "' (" SymbolLine[j] ")"
+ }
+ }
+ else
+ {
+ bHasDefault = 0
+ bHasCompati = 0
+ for ( j=0; j<nCurrencies; ++j )
+ {
+ if ( Symbols[j] == "\xc2\xa4" )
+ {
+ bad = 1
+ print "def bad: `" Symbols[j] "' (" SymbolLine[j] ")"
+ }
+ if ( SymbolDefault[j] )
+ {
+ if ( !bHasDefault )
+ bHasDefault = 1
+ else
+ {
+ bad = 1
+ print "dupe default: `" Symbols[j] "' (" SymbolLine[j] ")"
+ }
+ }
+ if ( SymbolCompati[j] )
+ {
+ if ( !bHasCompati )
+ bHasCompati = 1
+ else
+ {
+ bad = 1
+ print "dupe compati: `" Symbols[j] "' (" SymbolLine[j] ")"
+ }
+ }
+ }
+ if ( !bHasDefault )
+ {
+ bad = 1
+ print " no default: (" file ")"
+ }
+ if ( !bHasCompati )
+ {
+ bad = 1
+ print " no compati: (" file ")"
+ }
+ }
+ for ( j=0; j<nCurrencies; ++j )
+ {
+ # Check if CurrencyID at least resembles some ISO 4217 code.
+ # The only exception is zh_MO that had an erroneous original data set
+ # with BankSymbol="P" (stored as ISO code in documents, hence copied to
+ # CurrencyID now) and needs that entry for legacy documents.
+ # There is a strange bug in gawk 3.1.4 that does a match of [A-Z] on
+ # lower case except 'a', regardless of IGNORECASE setting, hence this
+ # ugly notation. [[:upper:]] wouldn't be correct since we want only
+ # ASCII to match.
+ if ( IDs[j] !~ /^[ABCDEFGHIJKLMNOPQRSTUVWXYZ][ABCDEFGHIJKLMNOPQRSTUVWXYZ][ABCDEFGHIJKLMNOPQRSTUVWXYZ]$/ \
+ && !(file == "zh_MO.xml" && IDs[j] == "P") )
+ {
+ bad = 1
+ print "no ISO 4217 code: `" IDs[j] "' (" IDLine[j] ")"
+ }
+ # CurrencyID should equal BankSymbol for now.
+ if ( IDs[j] != BankSymbols[j] )
+ {
+ bad = 1
+ print "not equal: CurrencyID `" IDs[j] "' != BankSymbol `" BankSymbols[j] \
+ "' (" IDLine[j] " and " BankSymbolLine[j] ")"
+ }
+ }
+ if ( bad )
+ print ""
+}
+
+
+function FormatInSymbol( format ) {
+ state = 0
+ for ( nSym=0; nSym<nCurrencies; ++nSym )
+ {
+ if ( format == Symbols[nSym] )
+ {
+ # Two currencies can have the same symbol (e.g. az_AZ.xml 'man.'
+ # for AZM and AZN), continue to lookup if the match isn't the
+ # compatible one.
+ if ( SymbolCompati[nSym] )
+ return 2
+ else
+ state = 1
+ }
+ }
+ return state
+}
+
+# vim: ts=4 sw=4 expandtab