diff options
Diffstat (limited to '')
-rw-r--r-- | i18npool/source/nativenumber/data/numberchar.h | 277 | ||||
-rw-r--r-- | i18npool/source/nativenumber/nativenumbersupplier.cxx | 1275 |
2 files changed, 1552 insertions, 0 deletions
diff --git a/i18npool/source/nativenumber/data/numberchar.h b/i18npool/source/nativenumber/data/numberchar.h new file mode 100644 index 0000000000..a711e96072 --- /dev/null +++ b/i18npool/source/nativenumber/data/numberchar.h @@ -0,0 +1,277 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#ifndef INCLUDED_I18NPOOL_SOURCE_NATIVENUMBER_DATA_NUMBERCHAR_H +#define INCLUDED_I18NPOOL_SOURCE_NATIVENUMBER_DATA_NUMBERCHAR_H + +#include <sal/types.h> + +namespace i18npool { + +const sal_Int16 NumberChar_HalfWidth = 0; +const sal_Int16 NumberChar_FullWidth = 1; +const sal_Int16 NumberChar_Lower_zh = 2; +const sal_Int16 NumberChar_Upper_zh = 3; +const sal_Int16 NumberChar_Upper_zh_TW = 4; +const sal_Int16 NumberChar_Modern_ja = 5; +const sal_Int16 NumberChar_Traditional_ja= 6; +const sal_Int16 NumberChar_Lower_ko = 7; +const sal_Int16 NumberChar_Upper_ko = 8; +const sal_Int16 NumberChar_Hangul_ko = 9; +const sal_Int16 NumberChar_Indic_ar = 10; +const sal_Int16 NumberChar_EastIndic_ar = 11; +const sal_Int16 NumberChar_hi = 12; +const sal_Int16 NumberChar_th = 13; +const sal_Int16 NumberChar_or = 14; +const sal_Int16 NumberChar_mr = 15; +const sal_Int16 NumberChar_bn = 16; +const sal_Int16 NumberChar_pa = 17; +const sal_Int16 NumberChar_gu = 18; +const sal_Int16 NumberChar_ta = 19; +const sal_Int16 NumberChar_te = 20; +const sal_Int16 NumberChar_kn = 21; +const sal_Int16 NumberChar_ml = 22; +const sal_Int16 NumberChar_lo = 23; +const sal_Int16 NumberChar_bo = 24; +const sal_Int16 NumberChar_my = 25; +const sal_Int16 NumberChar_km = 26; +const sal_Int16 NumberChar_mn = 27; +const sal_Int16 NumberChar_he = 28; +const sal_Int16 NumberChar_ne = 29; +const sal_Int16 NumberChar_dz = 30; +const sal_Int16 NumberChar_cu = 31; +const sal_Int16 NumberChar_Count = 32; + +const sal_Unicode NumberChar[][10] = { +// 0 1 2 3 4 5 6 7 8 9 + { 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, 0x0038, 0x0039 }, // Half Width (Ascii) + { 0xFF10, 0xFF11, 0xFF12, 0xFF13, 0xFF14, 0xFF15, 0xFF16, 0xFF17, 0xFF18, 0xFF19 }, // Full Width + { 0x3007, 0x4E00, 0x4E8c, 0x4E09, 0x56DB, 0x4E94, 0x516D, 0x4E03, 0x516B, 0x4E5D }, // Chinese Lower + { 0x96F6, 0x58F9, 0x8D30, 0x53C1, 0x8086, 0x4F0D, 0x9646, 0x67D2, 0x634C, 0x7396 }, // S. Chinese Upper + { 0x96F6, 0x58F9, 0x8CB3, 0x53C3, 0x8086, 0x4F0D, 0x9678, 0x67D2, 0x634C, 0x7396 }, // T. Chinese Upper + { 0x3007, 0x4E00, 0x4E8C, 0x4E09, 0x56DB, 0x4E94, 0x516D, 0x4E03, 0x516B, 0x4E5D }, // Japanese Modern + { 0x96F6, 0x58F1, 0x5F10, 0x53C2, 0x56DB, 0x4F0D, 0x516D, 0x4E03, 0x516B, 0x4E5D }, // Japanese Trad. + { 0x96F6, 0x4E00, 0x4E8C, 0x4E09, 0x56DB, 0x4E94, 0x516D, 0x4E03, 0x516B, 0x4E5D }, // Korean Lower + { 0x96F6, 0x58F9, 0x8CB3, 0x53C3, 0x56DB, 0x4F0D, 0x516D, 0x4E03, 0x516B, 0x4E5D }, // Korean Upper + { 0xC601, 0xC77C, 0xC774, 0xC0BC, 0xC0AC, 0xC624, 0xC721, 0xCE60, 0xD314, 0xAD6C }, // Korean Hangul + { 0x0660, 0x0661, 0x0662, 0x0663, 0x0664, 0x0665, 0x0666, 0x0667, 0x0668, 0x0669 }, // Arabic Indic + { 0x06F0, 0x06F1, 0x06F2, 0x06F3, 0x06F4, 0x06F5, 0x06F6, 0x06F7, 0x06F8, 0x06F9 }, // Est. Arabic Indic + { 0x0966, 0x0967, 0x0968, 0x0969, 0x096A, 0x096B, 0x096C, 0x096D, 0x096E, 0x096F }, // Indic (Devanagari) + { 0x0E50, 0x0E51, 0x0E52, 0x0E53, 0x0E54, 0x0E55, 0x0E56, 0x0E57, 0x0E58, 0x0E59 }, // Thai + { 0x0866, 0x0867, 0x0868, 0x0869, 0x086A, 0x086B, 0x086C, 0x086D, 0x086E, 0x086F }, // Odia + { 0x0966, 0x0967, 0x0968, 0x0969, 0x096A, 0x096B, 0x096C, 0x096D, 0x096E, 0x096F }, // Marathi + { 0x09E6, 0x09E7, 0x09E8, 0x09E9, 0x09EA, 0x09EB, 0x09EC, 0x09ED, 0x09EE, 0x09EF }, // Bengali + { 0x0A66, 0x0A67, 0x0A68, 0x0A69, 0x0A6A, 0x0A6B, 0x0A6C, 0x0A6D, 0x0A6E, 0x0A6F }, // Punjabi (Gurmukhi) + { 0x0AE6, 0x0AE7, 0x0AE8, 0x0AE9, 0x0AEA, 0x0AEB, 0x0AEC, 0x0AED, 0x0AEE, 0x0AEF }, // Gujarati + { 0x0030, 0x0BE7, 0x0BE8, 0x0BE9, 0x0BEA, 0x0BEB, 0x0BEC, 0x0BED, 0x0BEE, 0x0BEF }, // Tamil + { 0x0C66, 0x0C67, 0x0C68, 0x0C69, 0x0C6A, 0x0C6B, 0x0C6C, 0x0C6D, 0x0C6E, 0x0C6F }, // Telugu + { 0x0CE6, 0x0CE7, 0x0CE8, 0x0CE9, 0x0CEA, 0x0CEB, 0x0CEC, 0x0CED, 0x0CEE, 0x0CEF }, // Kannada + { 0x0DE6, 0x0DE7, 0x0DE8, 0x0DE9, 0x0DEA, 0x0DEB, 0x0DEC, 0x0DED, 0x0DEE, 0x0DEF }, // Malayalam + { 0x0ED0, 0x0ED1, 0x0ED2, 0x0ED3, 0x0ED4, 0x0ED5, 0x0ED6, 0x0ED7, 0x0ED8, 0x0ED9 }, // Lao + { 0x0F20, 0x0F21, 0x0F22, 0x0F23, 0x0F24, 0x0F25, 0x0F26, 0x0F27, 0x0F28, 0x0F29 }, // Tibetan + { 0x1040, 0x1041, 0x1042, 0x1043, 0x1044, 0x1045, 0x1046, 0x1047, 0x1048, 0x1049 }, // Myanmar + { 0x17E0, 0x17E1, 0x17E2, 0x17E3, 0x17E4, 0x17E5, 0x17E6, 0x17E7, 0x17E8, 0x17E9 }, // Cambodian (Khmer) + { 0x1810, 0x1811, 0x1812, 0x1813, 0x1814, 0x1815, 0x1816, 0x1817, 0x1818, 0x1819 }, // Mongolian + { 0x0020, 0x05D0, 0x05D1, 0x05D2, 0x05D3, 0x05D4, 0x05D5, 0x05D6, 0x05D7, 0x05D8 }, // Hebrew + { 0x0966, 0x0967, 0x0968, 0x0969, 0x096A, 0x096B, 0x096C, 0x096D, 0x096E, 0x096F }, // Nepali + { 0x0F20, 0x0F21, 0x0F22, 0x0F23, 0x0F24, 0x0F25, 0x0F26, 0x0F27, 0x0F28, 0x0F29 }, // Dzongkha + { 0x0030, 0x0430, 0x0432, 0x0433, 0x0434, 0x0454, 0x0455, 0x0437, 0x0438, 0x0473 }, // Church Slavic +}; + +static sal_Unicode DecimalChar[] = { + 0x002E, // Half Width (Ascii) + 0xFF0E, // Full Width + 0xFF0E, // Chinese Lower + 0x70B9, // S. Chinese Upper + 0x9EDE, // T. Chinese Upper + 0x30FB, // Japanese Modern + 0x30FB, // Japanese Trad. + 0xFF0E, // Korean Lower + 0x9EDE, // Korean Upper + 0xC810, // Korean Hangul + 0x066B, // Arabic Indic + 0x066B, // Est. Arabic Indic + 0x0000, // Indic (Devanagari) + 0x0000, // Thai + 0x0000, // Odia + 0x0000, // Marathi + 0x0000, // Bengali + 0x0000, // Punjabi (Gurmukhi) + 0x0000, // Gujarati + 0x0000, // Tamil + 0x0000, // Telugu + 0x0000, // Kannada + 0x0000, // Malayalam + 0x0000, // Lao + 0x0000, // Tibetan + 0x0000, // Myanmar + 0x0000, // Cambodian (Khmer) + 0x0000, // Mongolian + 0x0000, // Hebrew + 0x0000, // Nepali + 0x0000, // Dzongkha + 0x0000, // Church Slavic +}; + +const sal_Unicode MinusChar[] = { + 0x002D, // Half Width (Ascii) + 0xFF0D, // Full Width + 0xFF0D, // Chinese Lower + 0x8D1F, // S. Chinese Upper + 0x5069, // T. Chinese Upper + 0x2212, // Japanese Modern + 0x2212, // Japanese Trad. + 0xFF0D, // Korean Lower + 0xFF0D, // Korean Upper + 0xFF0D, // Korean Hangul + 0x0000, // Arabic Indic + 0x2212, // Est. Arabic Indic + 0x0000, // Indic + 0x0000, // Thai + 0x0000, // Odia + 0x0000, // Marathi + 0x0000, // Bengali + 0x0000, // Punjabi + 0x0000, // Gujarati + 0x0000, // Tamil + 0x0000, // Telugu + 0x0000, // Kannada + 0x0000, // Malayalam + 0x0000, // Lao + 0x0000, // Tibetan + 0x0000, // Myanmar + 0x0000, // Cambodian (Khmer) + 0x0000, // Mongolian + 0x0000, // Hebrew + 0x0000, // Nepali + 0x0000, // Dzongkha + 0x0000, // Church Slavic +}; + +static sal_uInt16 SeparatorChar[] = { + 0x002C, // Half Width (Ascii) + 0xFF0C, // Full Width + 0x3001, // Chinese Lower + 0x3001, // S. Chinese Upper + 0x3001, // T. Chinese Upper + 0x3001, // Japanese Modern + 0x3001, // Japanese Trad. + 0x002C, // Korean Lower + 0x002C, // Korean Upper + 0x002C, // Korean Hangul + 0x0000, // Arabic Indic + 0x066C, // Est. Arabic Indic + 0x0000, // Indic + 0x0000, // Thai + 0x0000, // Odia + 0x0000, // Marathi + 0x0000, // Bengali + 0x0000, // Punjabi + 0x0000, // Gujarati + 0x0000, // Tamil + 0x0000, // Telugu + 0x0000, // Kannada + 0x0000, // Malayalam + 0x0000, // Lao + 0x0000, // Tibetan + 0x0000, // Myanmar + 0x0000, // Cambodian (Khmer) + 0x0000, // Mongolian + 0x0000, // Hebrew + 0x0000, // Nepali + 0x0000, // Dzongkha + 0x0000, // Church Slavic +}; + +#define NUMBER_ZERO NumberChar[NumberChar_HalfWidth][0] // 0x0030 +#define NUMBER_ONE NumberChar[NumberChar_HalfWidth][1] // 0x0031 +#define NUMBER_NINE NumberChar[NumberChar_HalfWidth][9] // 0x0039 +#define isNumber(n) ( NUMBER_ZERO <= n && n <= NUMBER_NINE ) +#define isDecimal(n) ( n == DecimalChar[NumberChar_HalfWidth] ) +#define isMinus(n) ( n == MinusChar[NumberChar_HalfWidth] ) +#define isSeparator(n) ( n == SeparatorChar[NumberChar_HalfWidth] ) + +const sal_Int16 Multiplier_Lower_zh = 0; +const sal_Int16 Multiplier_Upper_zh = 1; +const sal_Int16 Multiplier_Lower_zh_TW = 2; +const sal_Int16 Multiplier_Upper_zh_TW = 3; +const sal_Int16 Multiplier_Lower_ko = 4; +const sal_Int16 Multiplier_Upper_ko = 5; +const sal_Int16 Multiplier_Hangul_ko = 6; +const sal_Int16 Multiplier_Modern_ja = 7; +const sal_Int16 Multiplier_Traditional_ja = 8; +const sal_Int16 Multiplier_Count = 9; + +const sal_Int16 ExponentCount_6_CJK = 6; + +const sal_Int16 MultiplierExponent_6_CJK[ExponentCount_6_CJK] = { + 12, 8, 4, 3, 2, 1 +}; +const sal_Unicode MultiplierChar_6_CJK[][ExponentCount_6_CJK] = { + {0x5146, 0x4EBF, 0x4E07, 0x5343, 0x767E, 0x5341}, // S. Chinese Lower + {0x5146, 0x4EBF, 0x4E07, 0x4EDF, 0x4F70, 0x62FE}, // S. Chinese Upper + {0x5146, 0x5104, 0x842C, 0x5343, 0x767E, 0x5341}, // T. Chinese Lower + {0x5146, 0x5104, 0x842C, 0x4EDF, 0x4F70, 0x62FE}, // T. Chinese Upper + {0x5146, 0x5104, 0x4E07, 0x5343, 0x767E, 0x5341}, // Korean Lower + {0x5146, 0x5104, 0x842C, 0x9621, 0x4F70, 0x62FE}, // Korean Upper + {0xC870, 0xC5B5, 0xB9CC, 0xCC9C, 0xBC31, 0xC2ED}, // Korean Hangul + {0x5146, 0x5104, 0x4E07, 0x5343, 0x767E, 0x5341}, // Japanese Modern + {0x5146, 0x5104, 0x842C, 0x9621, 0x767E, 0x62FE} // Japanese Traditional +}; + +const sal_Int16 ExponentCount_2_CJK = 2; + +const sal_Int16 MultiplierExponent_2_CJK[ExponentCount_2_CJK] = { + 8, 4, +}; + +const sal_Unicode MultiplierChar_2_CJK[][ExponentCount_2_CJK] = { + {0x4EBF, 0x4E07}, // S. Chinese Lower + {0x4EBF, 0x4E07}, // S. Chinese Upper + {0x5104, 0x842C}, // T. Chinese Lower + {0x5104, 0x842C}, // T. Chinese Upper + {0x5104, 0x4E07}, // Korean Lower + {0x5104, 0x842C}, // Korean Upper + {0xC5B5, 0xB9CC}, // Korean Hangul + {0x5104, 0x4E07}, // Japanese Modern + {0x5104, 0x842C} // Japanese Traditional +}; + +const sal_Int16 ExponentCount_7_CJK = 7; + +const sal_Int16 MultiplierExponent_7_CJK[ExponentCount_7_CJK] = { + 16, 12, 8, 4, 3, 2, 1 +}; +const sal_Unicode MultiplierChar_7_CJK[][ExponentCount_7_CJK] = { + {0x4EAC, 0x5146, 0x4EBF, 0x4E07, 0x5343, 0x767E, 0x5341}, // S. Chinese Lower + {0x4EAC, 0x5146, 0x4EBF, 0x4E07, 0x4EDF, 0x4F70, 0x62FE}, // S. Chinese Upper + {0x4EAC, 0x5146, 0x5104, 0x842C, 0x5343, 0x767E, 0x5341}, // T. Chinese Lower + {0x4EAC, 0x5146, 0x5104, 0x842C, 0x4EDF, 0x4F70, 0x62FE}, // T. Chinese Upper + {0x4EAC, 0x5146, 0x5104, 0x4E07, 0x5343, 0x767E, 0x5341}, // Korean Lower + {0x4EAC, 0x5146, 0x5104, 0x842C, 0x9621, 0x4F70, 0x62FE}, // Korean Upper + {0x4EAC, 0xC870, 0xC5B5, 0xB9CC, 0xCC9C, 0xBC31, 0xC2ED}, // Korean Hangul + {0x4EAC, 0x5146, 0x5104, 0x4E07, 0x5343, 0x767E, 0x5341}, // Japanese Modern + {0x4EAC, 0x5146, 0x5104, 0x842C, 0x9621, 0x767E, 0x62FE} // Japanese Traditional +}; + +} + +#endif // INCLUDED_I18NPOOL_SOURCE_NATIVENUMBER_DATA_NUMBERCHAR_H + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/i18npool/source/nativenumber/nativenumbersupplier.cxx b/i18npool/source/nativenumber/nativenumbersupplier.cxx new file mode 100644 index 0000000000..0618f30059 --- /dev/null +++ b/i18npool/source/nativenumber/nativenumbersupplier.cxx @@ -0,0 +1,1275 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#include <i18nlangtag/languagetag.hxx> +#include <i18nlangtag/mslangid.hxx> +#include <rtl/ustrbuf.hxx> +#include <sal/macros.h> +#include <nativenumbersupplier.hxx> +#include <localedata.hxx> +#include "data/numberchar.h" +#include <comphelper/processfactory.hxx> +#include <cppuhelper/supportsservice.hxx> +#include <o3tl/string_view.hxx> +#include <cstddef> +#include <map> +#include <mutex> +#include <memory> +#include <string_view> +#include <unordered_map> +#include <com/sun/star/i18n/CharacterClassification.hpp> +#include <com/sun/star/i18n/NativeNumberMode.hpp> +#include <com/sun/star/linguistic2/NumberText.hpp> + +using namespace ::com::sun::star::uno; +using namespace ::com::sun::star::i18n; +using namespace ::com::sun::star::lang; + +namespace { + +struct Number { + sal_Int16 number; + const sal_Unicode *multiplierChar; + sal_Int16 numberFlag; + sal_Int16 exponentCount; + const sal_Int16 *multiplierExponent; +}; + +} + +#define NUMBER_OMIT_ZERO (1 << 0) +#define NUMBER_OMIT_ONLY_ZERO (1 << 1) +#define NUMBER_OMIT_ONE_1 (1 << 2) +#define NUMBER_OMIT_ONE_2 (1 << 3) +#define NUMBER_OMIT_ONE_3 (1 << 4) +#define NUMBER_OMIT_ONE_4 (1 << 5) +#define NUMBER_OMIT_ONE_5 (1 << 6) +#define NUMBER_OMIT_ONE_6 (1 << 7) +#define NUMBER_OMIT_ONE_7 (1 << 8) +#define NUMBER_OMIT_ONE (NUMBER_OMIT_ONE_1|NUMBER_OMIT_ONE_2|NUMBER_OMIT_ONE_3|NUMBER_OMIT_ONE_4|NUMBER_OMIT_ONE_5|NUMBER_OMIT_ONE_6|NUMBER_OMIT_ONE_7) +#define NUMBER_OMIT_ONE_CHECK(bit) (1 << (2 + bit)) +#define NUMBER_OMIT_ALL ( NUMBER_OMIT_ZERO|NUMBER_OMIT_ONE|NUMBER_OMIT_ONLY_ZERO ) +#define NUMBER_OMIT_ZERO_ONE ( NUMBER_OMIT_ZERO|NUMBER_OMIT_ONE ) +#define NUMBER_OMIT_ONE_67 (NUMBER_OMIT_ONE_6|NUMBER_OMIT_ONE_7) +#define NUMBER_OMIT_ZERO_ONE_67 ( NUMBER_OMIT_ZERO|NUMBER_OMIT_ONE_67 ) + +namespace i18npool { + +namespace { + +std::mutex theNatNumMutex; + +} + +static OUString getHebrewNativeNumberString(const OUString& aNumberString, bool useGeresh); + +static OUString getCyrillicNativeNumberString(const OUString& aNumberString); + +/// @throws RuntimeException +static OUString AsciiToNativeChar( const OUString& inStr, sal_Int32 nCount, + Sequence< sal_Int32 >* pOffset, sal_Int16 number ) +{ + const sal_Unicode *src = inStr.getStr(); + rtl_uString *newStr = rtl_uString_alloc(nCount); + if (pOffset) + pOffset->realloc(nCount); + auto ppOffset = pOffset ? pOffset->getArray() : nullptr; + + for (sal_Int32 i = 0; i < nCount; i++) + { + sal_Unicode ch = src[i]; + if (isNumber(ch)) + newStr->buffer[i] = NumberChar[number][ ch - NUMBER_ZERO ]; + else if (i+1 < nCount && isNumber(src[i+1])) { + if (i > 0 && isNumber(src[i-1]) && isSeparator(ch)) + newStr->buffer[i] = SeparatorChar[number] ? SeparatorChar[number] : ch; + else + newStr->buffer[i] = isDecimal(ch) ? (DecimalChar[number] ? DecimalChar[number] : ch) : + isMinus(ch) ? (MinusChar[number] ? MinusChar[number] : ch) : ch; + } + else + newStr->buffer[i] = ch; + if (ppOffset) + ppOffset[i] = i; + } + return OUString(newStr, SAL_NO_ACQUIRE); // take ownership +} + +static bool AsciiToNative_numberMaker(const sal_Unicode *str, sal_Int32 begin, sal_Int32 len, + sal_Unicode *dst, sal_Int32& count, sal_Int16 multiChar_index, Sequence< sal_Int32 >* pOffset, sal_Int32 startPos, + const Number *number, const sal_Unicode* numberChar) +{ + sal_Unicode multiChar = (multiChar_index == -1 ? 0 : number->multiplierChar[multiChar_index]); + auto ppOffset = pOffset ? pOffset->getArray() : nullptr; + if ( len <= number->multiplierExponent[number->exponentCount-1] ) { + if (number->multiplierExponent[number->exponentCount-1] > 1) { + bool bNotZero = false; + for (const sal_Int32 end = begin+len; begin < end; begin++) { + if (bNotZero || str[begin] != NUMBER_ZERO) { + dst[count] = numberChar[str[begin] - NUMBER_ZERO]; + if (ppOffset) + ppOffset[count] = begin + startPos; + count++; + bNotZero = true; + } + } + if (bNotZero && multiChar > 0) { + dst[count] = multiChar; + if (ppOffset) + ppOffset[count] = begin + startPos; + count++; + } + return bNotZero; + } else if (str[begin] != NUMBER_ZERO) { + if (!(number->numberFlag & (multiChar_index < 0 ? 0 : NUMBER_OMIT_ONE_CHECK(multiChar_index))) || str[begin] != NUMBER_ONE) { + dst[count] = numberChar[str[begin] - NUMBER_ZERO]; + if (ppOffset) + ppOffset[count] = begin + startPos; + count++; + } + if (multiChar > 0) { + dst[count] = multiChar; + if (ppOffset) + ppOffset[count] = begin + startPos; + count++; + } + } else if (!(number->numberFlag & NUMBER_OMIT_ZERO) && count > 0 && dst[count-1] != numberChar[0]) { + dst[count] = numberChar[0]; + if (ppOffset) + ppOffset[count] = begin + startPos; + count++; + } + return str[begin] != NUMBER_ZERO; + } else { + bool bPrintPower = false; + // sal_Int16 last = 0; + for (sal_Int16 i = 1; i <= number->exponentCount; i++) { + sal_Int32 tmp = len - (i == number->exponentCount ? 0 : number->multiplierExponent[i]); + if (tmp > 0) { + bPrintPower |= AsciiToNative_numberMaker(str, begin, tmp, dst, count, + (i == number->exponentCount ? -1 : i), pOffset, startPos, number, numberChar); + begin += tmp; + len -= tmp; + } + } + if (bPrintPower) { + if (count > 0 && number->multiplierExponent[number->exponentCount-1] == 1 && + dst[count-1] == numberChar[0]) + count--; + if (multiChar > 0) { + dst[count] = multiChar; + if (ppOffset) + ppOffset[count] = begin + startPos; + count++; + } + } + return bPrintPower; + } +} + +/// @throws RuntimeException +static OUString AsciiToNative( const OUString& inStr, sal_Int32 nCount, + Sequence< sal_Int32 >* pOffset, const Number* number ) +{ + OUString aRet; + + sal_Int32 strLen = inStr.getLength(); + const sal_Unicode *numberChar = NumberChar[number->number]; + + if (nCount > strLen) + nCount = strLen; + + if (nCount > 0) + { + const sal_Unicode *str = inStr.getStr(); + std::unique_ptr<sal_Unicode[]> newStr(new sal_Unicode[nCount * 2 + 1]); + std::unique_ptr<sal_Unicode[]> srcStr(new sal_Unicode[nCount + 1]); // for keeping number without comma + sal_Int32 i, len = 0, count = 0; + + if (pOffset) + pOffset->realloc( nCount * 2 ); + auto ppOffset = pOffset ? pOffset->getArray() : nullptr; + bool bDoDecimal = false; + + for (i = 0; i <= nCount; i++) + { + if (i < nCount && isNumber(str[i])) { + if (bDoDecimal) { + newStr[count] = numberChar[str[i] - NUMBER_ZERO]; + if (ppOffset) + ppOffset[count] = i; + count++; + } + else + srcStr[len++] = str[i]; + } else { + if (len > 0) { + if (i < nCount-1 && isSeparator(str[i]) && isNumber(str[i+1])) + continue; // skip comma inside number string + bool bNotZero = false; + for (sal_Int32 begin = 0, end = len % number->multiplierExponent[0]; + end <= len; begin = end, end += number->multiplierExponent[0]) { + if (end == 0) continue; + sal_Int32 _count = count; + bNotZero |= AsciiToNative_numberMaker(srcStr.get(), begin, end - begin, newStr.get(), count, + end == len ? -1 : 0, pOffset, i - len, number, numberChar); + if (count > 0 && number->multiplierExponent[number->exponentCount-1] == 1 && + newStr[count-1] == numberChar[0]) + count--; + if (bNotZero && _count == count && end != len) { + newStr[count] = number->multiplierChar[0]; + if (ppOffset) + ppOffset[count] = i - len; + count++; + } + } + if (! bNotZero && ! (number->numberFlag & NUMBER_OMIT_ONLY_ZERO)) { + newStr[count] = numberChar[0]; + if (ppOffset) + ppOffset[count] = i - len; + count++; + } + len = 0; + } + if (i < nCount) { + bDoDecimal = (!bDoDecimal && i < nCount-1 && isDecimal(str[i]) && isNumber(str[i+1])); + if (bDoDecimal) + newStr[count] = (DecimalChar[number->number] ? DecimalChar[number->number] : str[i]); + else if (i < nCount-1 && isMinus(str[i]) && isNumber(str[i+1])) + newStr[count] = (MinusChar[number->number] ? MinusChar[number->number] : str[i]); + else if (i < nCount-1 && isSeparator(str[i]) && isNumber(str[i+1])) + newStr[count] = (SeparatorChar[number->number] ? SeparatorChar[number->number] : str[i]); + else + newStr[count] = str[i]; + if (ppOffset) + ppOffset[count] = i; + count++; + } + } + } + + if (pOffset) + pOffset->realloc(count); + aRet = OUString(newStr.get(), count); + } + return aRet; +} + +namespace +{ +void NativeToAscii_numberMaker(sal_Int16 max, sal_Int16 prev, const sal_Unicode *str, + sal_Int32& i, sal_Int32 nCount, sal_Unicode *dst, sal_Int32& count, Sequence< sal_Int32 >* pOffset, + OUString& numberChar, OUString& multiplierChar) +{ + auto ppOffset = pOffset ? pOffset->getArray() : nullptr; + sal_Int16 curr = 0, num = 0, end = 0, shift = 0; + while (++i < nCount) { + if ((curr = sal::static_int_cast<sal_Int16>( numberChar.indexOf(str[i]) )) >= 0) { + if (num > 0) + break; + num = curr % 10; + } else if ((curr = sal::static_int_cast<sal_Int16>( multiplierChar.indexOf(str[i]) )) >= 0) { + curr = MultiplierExponent_7_CJK[curr % ExponentCount_7_CJK]; + if (prev > curr && num == 0) num = 1; // One may be omitted in informal format + shift = end = 0; + if (curr >= max) + max = curr; + else if (curr > prev) + shift = max - curr; + else + end = curr; + while (end++ < prev) { + dst[count] = NUMBER_ZERO + (end == prev ? num : 0); + if (ppOffset) + ppOffset[count] = i; + count++; + } + if (shift) { + count -= max; + for (const sal_Int32 countEnd = count+shift; count < countEnd; count++) { + dst[count] = dst[count + curr]; + if (ppOffset) + ppOffset[count] = ppOffset[count + curr]; + } + max = curr; + } + NativeToAscii_numberMaker(max, curr, str, i, nCount, dst, + count, pOffset, numberChar, multiplierChar); + return; + } else + break; + } + while (end++ < prev) { + dst[count] = NUMBER_ZERO + (end == prev ? num : 0); + if (ppOffset) + ppOffset[count] = i - 1; + count++; + } +} + +/// @throws RuntimeException +OUString NativeToAscii(const OUString& inStr, + sal_Int32 nCount, Sequence< sal_Int32 >* pOffset ) +{ + OUString aRet; + + sal_Int32 strLen = inStr.getLength(); + + if (nCount > strLen) + nCount = strLen; + + if (nCount > 0) { + const sal_Unicode *str = inStr.getStr(); + std::unique_ptr<sal_Unicode[]> newStr(new sal_Unicode[nCount * MultiplierExponent_7_CJK[0] + 2]); + if (pOffset) + pOffset->realloc( nCount * MultiplierExponent_7_CJK[0] + 1 ); + auto ppOffset = pOffset ? pOffset->getArray() : nullptr; + sal_Int32 count = 0, index; + sal_Int32 i; + + OUString numberChar, multiplierChar, decimalChar, separatorChar; + numberChar = OUString(NumberChar[0], 10*NumberChar_Count); + multiplierChar = OUString(MultiplierChar_7_CJK[0], ExponentCount_7_CJK*Multiplier_Count); + decimalChar = OUString(DecimalChar, NumberChar_Count); + std::u16string_view const minusChar(MinusChar, NumberChar_Count); + separatorChar = OUString( + reinterpret_cast<sal_Unicode *>(SeparatorChar), NumberChar_Count); + + for ( i = 0; i < nCount; i++) { + if ((index = multiplierChar.indexOf(str[i])) >= 0) { + if (count == 0 || !isNumber(newStr[count-1])) { // add 1 in front of multiplier + newStr[count] = NUMBER_ONE; + if (ppOffset) + ppOffset[count] = i; + count++; + } + index = MultiplierExponent_7_CJK[index % ExponentCount_7_CJK]; + NativeToAscii_numberMaker( + sal::static_int_cast<sal_Int16>( index ), sal::static_int_cast<sal_Int16>( index ), + str, i, nCount, newStr.get(), count, pOffset, + numberChar, multiplierChar); + } else { + if ((index = numberChar.indexOf(str[i])) >= 0) + newStr[count] = sal::static_int_cast<sal_Unicode>( (index % 10) + NUMBER_ZERO ); + else if (separatorChar.indexOf(str[i]) >= 0 && + (i < nCount-1 && (numberChar.indexOf(str[i+1]) >= 0 || + multiplierChar.indexOf(str[i+1]) >= 0))) + newStr[count] = SeparatorChar[NumberChar_HalfWidth]; + else if (decimalChar.indexOf(str[i]) >= 0 && + (i < nCount-1 && (numberChar.indexOf(str[i+1]) >= 0 || + multiplierChar.indexOf(str[i+1]) >= 0))) + // Only when decimal point is followed by numbers, + // it will be convert to ASCII decimal point + newStr[count] = DecimalChar[NumberChar_HalfWidth]; + else if (minusChar.find(str[i]) != std::u16string_view::npos && + (i < nCount-1 && (numberChar.indexOf(str[i+1]) >= 0 || + multiplierChar.indexOf(str[i+1]) >= 0))) + // Only when minus is followed by numbers, + // it will be convert to ASCII minus sign + newStr[count] = MinusChar[NumberChar_HalfWidth]; + else + newStr[count] = str[i]; + if (ppOffset) + ppOffset[count] = i; + count++; + } + } + + if (pOffset) { + pOffset->realloc(count); + } + aRet = OUString(newStr.get(), count); + } + return aRet; +} + +const Number natnum4[4] = { + { NumberChar_Lower_zh, MultiplierChar_6_CJK[Multiplier_Lower_zh], 0, + ExponentCount_6_CJK, MultiplierExponent_6_CJK }, + { NumberChar_Lower_zh, MultiplierChar_6_CJK[Multiplier_Lower_zh_TW], 0, + ExponentCount_6_CJK, MultiplierExponent_6_CJK }, + { NumberChar_Modern_ja, MultiplierChar_7_CJK[Multiplier_Modern_ja], NUMBER_OMIT_ZERO_ONE_67, + ExponentCount_7_CJK, MultiplierExponent_7_CJK }, + { NumberChar_Lower_ko, MultiplierChar_6_CJK[Multiplier_Lower_ko], NUMBER_OMIT_ZERO, + ExponentCount_6_CJK, MultiplierExponent_6_CJK }, +}; + +const Number natnum5[4] = { + { NumberChar_Upper_zh, MultiplierChar_6_CJK[Multiplier_Upper_zh], 0, + ExponentCount_6_CJK, MultiplierExponent_6_CJK }, + { NumberChar_Upper_zh_TW, MultiplierChar_6_CJK[Multiplier_Upper_zh_TW], 0, + ExponentCount_6_CJK, MultiplierExponent_6_CJK }, + { NumberChar_Traditional_ja, MultiplierChar_7_CJK[Multiplier_Traditional_ja], NUMBER_OMIT_ZERO_ONE_67, + ExponentCount_7_CJK, MultiplierExponent_7_CJK }, + { NumberChar_Upper_ko, MultiplierChar_6_CJK[Multiplier_Upper_ko], 0, + ExponentCount_6_CJK, MultiplierExponent_6_CJK }, +}; + +const Number natnum6[4] = { + { NumberChar_FullWidth, MultiplierChar_6_CJK[Multiplier_Lower_zh], 0, + ExponentCount_6_CJK, MultiplierExponent_6_CJK }, + { NumberChar_FullWidth, MultiplierChar_6_CJK[Multiplier_Lower_zh_TW], 0, + ExponentCount_6_CJK, MultiplierExponent_6_CJK }, + { NumberChar_FullWidth, MultiplierChar_7_CJK[Multiplier_Modern_ja], NUMBER_OMIT_ZERO_ONE_67, + ExponentCount_7_CJK, MultiplierExponent_7_CJK }, + { NumberChar_FullWidth, MultiplierChar_6_CJK[Multiplier_Hangul_ko], NUMBER_OMIT_ZERO, + ExponentCount_6_CJK, MultiplierExponent_6_CJK }, +}; + +const Number natnum7[4] = { + { NumberChar_Lower_zh, MultiplierChar_6_CJK[Multiplier_Lower_zh], NUMBER_OMIT_ALL, + ExponentCount_6_CJK, MultiplierExponent_6_CJK }, + { NumberChar_Lower_zh, MultiplierChar_6_CJK[Multiplier_Lower_zh_TW], NUMBER_OMIT_ALL, + ExponentCount_6_CJK, MultiplierExponent_6_CJK }, + { NumberChar_Modern_ja, MultiplierChar_2_CJK[Multiplier_Modern_ja], NUMBER_OMIT_ZERO_ONE, + ExponentCount_2_CJK, MultiplierExponent_2_CJK }, + { NumberChar_Lower_ko, MultiplierChar_6_CJK[Multiplier_Lower_ko], NUMBER_OMIT_ALL, + ExponentCount_6_CJK, MultiplierExponent_6_CJK }, +}; + +const Number natnum8[4] = { + { NumberChar_Upper_zh, MultiplierChar_6_CJK[Multiplier_Upper_zh], NUMBER_OMIT_ALL, + ExponentCount_6_CJK, MultiplierExponent_6_CJK }, + { NumberChar_Upper_zh_TW, MultiplierChar_6_CJK[Multiplier_Upper_zh_TW], NUMBER_OMIT_ALL, + ExponentCount_6_CJK, MultiplierExponent_6_CJK }, + { NumberChar_Traditional_ja, MultiplierChar_2_CJK[Multiplier_Traditional_ja], NUMBER_OMIT_ZERO_ONE, + ExponentCount_2_CJK, MultiplierExponent_2_CJK }, + { NumberChar_Upper_ko, MultiplierChar_6_CJK[Multiplier_Upper_ko], NUMBER_OMIT_ALL, + ExponentCount_6_CJK, MultiplierExponent_6_CJK }, +}; + +const Number natnum10 = { NumberChar_Hangul_ko, MultiplierChar_6_CJK[Multiplier_Hangul_ko], NUMBER_OMIT_ZERO, + ExponentCount_6_CJK, MultiplierExponent_6_CJK }; +const Number natnum11 = { NumberChar_Hangul_ko, MultiplierChar_6_CJK[Multiplier_Hangul_ko], NUMBER_OMIT_ALL, + ExponentCount_6_CJK, MultiplierExponent_6_CJK }; + +//! ATTENTION: Do not change order of elements! +//! Append new languages to the end of the list! +const char *natnum1Locales[] = { + "zh_CN", + "zh_TW", + "ja", + "ko", + "he", + "ar", + "th", + "hi", + "or", + "mr", + "bn", + "pa", + "gu", + "ta", + "te", + "kn", + "ml", + "lo", + "bo", + "my", + "km", + "mn", + "ne", + "dz", + "fa", + "cu" +}; +const sal_Int16 nbOfLocale = SAL_N_ELEMENTS(natnum1Locales); + +//! ATTENTION: Do not change order of elements! +//! Number and order must match elements of natnum1Locales! +const sal_Int16 natnum1[] = { + NumberChar_Lower_zh, + NumberChar_Lower_zh, + NumberChar_Modern_ja, + NumberChar_Lower_ko, + NumberChar_he, + NumberChar_Indic_ar, + NumberChar_th, + NumberChar_hi, + NumberChar_or, + NumberChar_mr, + NumberChar_bn, + NumberChar_pa, + NumberChar_gu, + NumberChar_ta, + NumberChar_te, + NumberChar_kn, + NumberChar_ml, + NumberChar_lo, + NumberChar_bo, + NumberChar_my, + NumberChar_km, + NumberChar_mn, + NumberChar_ne, + NumberChar_dz, + NumberChar_EastIndic_ar, + NumberChar_cu +}; +const sal_Int16 sizeof_natnum1 = SAL_N_ELEMENTS(natnum1); + +//! ATTENTION: Do not change order of elements! +//! Order must match first elements of natnum1Locales! +const sal_Int16 natnum2[] = { + NumberChar_Upper_zh, + NumberChar_Upper_zh_TW, + NumberChar_Traditional_ja, + NumberChar_Upper_ko, + NumberChar_he +}; +const sal_Int16 sizeof_natnum2 = SAL_N_ELEMENTS(natnum2); + +sal_Int16 getLanguageNumber( const Locale& rLocale) +{ + // return zh_TW for TW, HK and MO, return zh_CN for other zh locales. + if (rLocale.Language == "zh") return MsLangId::isTraditionalChinese(rLocale) ? 1 : 0; + + for (sal_Int16 i = 2; i < nbOfLocale; i++) + if (rLocale.Language.equalsAsciiL(natnum1Locales[i], 2)) + return i; + + return -1; +} + +struct Separators +{ + sal_Unicode DecimalSeparator; + sal_Unicode ThousandSeparator; + Separators(const Locale& rLocale) + { + LocaleDataItem aLocaleItem = LocaleDataImpl::get()->getLocaleItem(rLocale); + DecimalSeparator = aLocaleItem.decimalSeparator.toChar(); + ThousandSeparator = aLocaleItem.thousandSeparator.toChar(); + } +}; + +Separators getLocaleSeparators(const Locale& rLocale, const OUString& rLocStr) +{ + // Guard the static variable below. + std::scoped_lock aGuard(theNatNumMutex); + // Maximum a couple hundred of pairs with 4-byte structs - so no need for smart managing + static std::unordered_map<OUString, Separators> aLocaleSeparatorsBuf; + auto it = aLocaleSeparatorsBuf.find(rLocStr); + if (it == aLocaleSeparatorsBuf.end()) + { + it = aLocaleSeparatorsBuf.emplace(rLocStr, Separators(rLocale)).first; + } + return it->second; +} + +OUString getNumberText(const Locale& rLocale, const OUString& rNumberString, + std::u16string_view sNumberTextParams) +{ + sal_Int32 i, count = 0; + const sal_Int32 len = rNumberString.getLength(); + const sal_Unicode* src = rNumberString.getStr(); + + OUString aLoc = LanguageTag::convertToBcp47(rLocale); + Separators aSeparators = getLocaleSeparators(rLocale, aLoc); + + OUStringBuffer sBuf(len); + for (i = 0; i < len; i++) + { + sal_Unicode ch = src[i]; + if (isNumber(ch) || ch == aSeparators.DecimalSeparator) + { + ++count; + sBuf.append(ch); + } + else if (ch == aSeparators.ThousandSeparator && count > 0) + continue; + else if (isMinus(ch) && count == 0) + sBuf.append(ch); + else + break; + } + + // Handle also month and day names for NatNum12 date formatting + const OUString& rNumberStr = (count == 0) ? rNumberString : sBuf.makeStringAndClear(); + + static auto xNumberText + = css::linguistic2::NumberText::create(comphelper::getProcessComponentContext()); + + // Guard the static variables below. + std::scoped_lock aGuard( theNatNumMutex ); + + OUString numbertext_prefix; + // default "cardinal" gets empty prefix + if (!sNumberTextParams.empty() && sNumberTextParams != u"cardinal") + numbertext_prefix = OUString::Concat(sNumberTextParams) + " "; + // Several hundreds of headings could result typing lags because + // of the continuous update of the multiple number names during typing. + // We fix this by buffering the result of the conversion. + static std::unordered_map<OUString, std::map<OUString, OUString>> aBuff; + auto& rItems = aBuff[rNumberStr]; + auto& rItem = rItems[numbertext_prefix + aLoc]; + if (rItem.isEmpty()) + { + rItem = xNumberText->getNumberText(numbertext_prefix + rNumberStr, rLocale); + // use number at missing number to text conversion + if (rItem.isEmpty()) + rItem = rNumberStr; + } + OUString sResult = rItem; + if (i != 0 && i < len) + sResult += rNumberString.subView(i); + return sResult; +} +} + +OUString NativeNumberSupplierService::getNativeNumberString(const OUString& aNumberString, const Locale& rLocale, + sal_Int16 nNativeNumberMode, + Sequence<sal_Int32>* pOffset, + std::u16string_view rNativeNumberParams) +{ + if (!isValidNatNumImpl(rLocale, nNativeNumberMode)) + return aNumberString; + + if (nNativeNumberMode == NativeNumberMode::NATNUM12) + { + // handle capitalization prefixes "capitalize", "upper", "lower" and "title" + + enum WhichCasing + { + CAPITALIZE, + UPPER, + LOWER, + TITLE + }; + + struct CasingEntry + { + std::u16string_view aLiteral; + WhichCasing eCasing; + }; + + static const CasingEntry Casings[] = + { + { std::u16string_view(u"capitalize"), CAPITALIZE }, + { std::u16string_view(u"upper"), UPPER }, + { std::u16string_view(u"lower"), LOWER }, + { std::u16string_view(u"title"), TITLE } + }; + + std::size_t nStripCase = 0; + size_t nCasing; + for (nCasing = 0; nCasing < SAL_N_ELEMENTS(Casings); ++nCasing) + { + if (o3tl::starts_with(rNativeNumberParams, Casings[nCasing].aLiteral)) + { + nStripCase = Casings[nCasing].aLiteral.size(); + break; + } + } + + if (nStripCase > 0 && (rNativeNumberParams.size() == nStripCase || + rNativeNumberParams[nStripCase++] == ' ')) + { + OUString aStr = getNumberText(rLocale, aNumberString, rNativeNumberParams.substr(nStripCase)); + + if (!xCharClass.is()) + xCharClass = CharacterClassification::create(comphelper::getProcessComponentContext()); + + switch (Casings[nCasing].eCasing) + { + case CAPITALIZE: + return xCharClass->toTitle(aStr, 0, 1, aLocale) + + (aStr.getLength() > 1 ? aStr.subView(1) : u""); + case UPPER: + return xCharClass->toUpper(aStr, 0, aStr.getLength(), aLocale); + case LOWER: + return xCharClass->toLower(aStr, 0, aStr.getLength(), aLocale); + case TITLE: + { + if ( rLocale.Language == "en" ) + { + // title case is common in English, so fix bugs of toTitle(): + // not "One Dollar *And* *Twenty-two* Cents", but + // "One Dollar *and* *Twenty-Two* Cents". + + // Add spaces after hyphens to separate the elements of the + // hyphenated compound words temporarily, allowing their + // capitalization by toTitle() + aStr = aStr.replaceAll("-", "- "); + aStr = xCharClass->toTitle(aStr, 0, aStr.getLength(), aLocale); + return aStr.replaceAll("- ", "-").replaceAll(" And ", " and "); + } + else + return xCharClass->toTitle(aStr, 0, aStr.getLength(), aLocale); + } + } + } + else + { + return getNumberText(rLocale, aNumberString, rNativeNumberParams); + } + } + + sal_Int16 langnum = getLanguageNumber(rLocale); + if (langnum == -1) + return aNumberString; + + const Number *number = nullptr; + sal_Int16 num = -1; + + switch (nNativeNumberMode) + { + case NativeNumberMode::NATNUM0: // Ascii + return NativeToAscii(aNumberString, aNumberString.getLength(), pOffset); + case NativeNumberMode::NATNUM1: // Char, Lower + num = natnum1[langnum]; + break; + case NativeNumberMode::NATNUM2: // Char, Upper + num = natnum2[langnum]; + break; + case NativeNumberMode::NATNUM3: // Char, FullWidth + num = NumberChar_FullWidth; + break; + case NativeNumberMode::NATNUM4: // Text, Lower, Long + number = &natnum4[langnum]; + break; + case NativeNumberMode::NATNUM5: // Text, Upper, Long + number = &natnum5[langnum]; + break; + case NativeNumberMode::NATNUM6: // Text, FullWidth + number = &natnum6[langnum]; + break; + case NativeNumberMode::NATNUM7: // Text. Lower, Short + number = &natnum7[langnum]; + break; + case NativeNumberMode::NATNUM8: // Text, Upper, Short + number = &natnum8[langnum]; + break; + case NativeNumberMode::NATNUM9: // Char, Hangul + num = NumberChar_Hangul_ko; + break; + case NativeNumberMode::NATNUM10: // Text, Hangul, Long + number = &natnum10; + break; + case NativeNumberMode::NATNUM11: // Text, Hangul, Short + number = &natnum11; + break; + default: + break; + } + + if (number || num >= 0) { + if (aLocale.Language != rLocale.Language || + aLocale.Country != rLocale.Country || + aLocale.Variant != rLocale.Variant) { + LocaleDataItem item = LocaleDataImpl::get()->getLocaleItem( rLocale ); + aLocale = rLocale; + DecimalChar[NumberChar_HalfWidth]=item.decimalSeparator.toChar(); + if (DecimalChar[NumberChar_HalfWidth] > 0x7E || DecimalChar[NumberChar_HalfWidth] < 0x21) + DecimalChar[NumberChar_FullWidth]=0xFF0E; + else + DecimalChar[NumberChar_FullWidth]=DecimalChar[NumberChar_HalfWidth]+0xFEE0; + SeparatorChar[NumberChar_HalfWidth]=item.thousandSeparator.toChar(); + if (SeparatorChar[NumberChar_HalfWidth] > 0x7E || SeparatorChar[NumberChar_HalfWidth] < 0x21) + SeparatorChar[NumberChar_FullWidth]=0xFF0C; + else + SeparatorChar[NumberChar_FullWidth]=SeparatorChar[NumberChar_HalfWidth]+0xFEE0; + } + if (number) + return AsciiToNative( aNumberString, aNumberString.getLength(), pOffset, number ); + else if (num == NumberChar_he) + return getHebrewNativeNumberString(aNumberString, + nNativeNumberMode == NativeNumberMode::NATNUM2); + else if (num == NumberChar_cu) + return getCyrillicNativeNumberString(aNumberString); + else + return AsciiToNativeChar(aNumberString, aNumberString.getLength(), pOffset, num); + } + else + return aNumberString; +} + +OUString SAL_CALL NativeNumberSupplierService::getNativeNumberString(const OUString& aNumberString, const Locale& rLocale, + sal_Int16 nNativeNumberMode) +{ + return getNativeNumberString(aNumberString, rLocale, nNativeNumberMode, nullptr); +} + +OUString SAL_CALL NativeNumberSupplierService::getNativeNumberStringParams( + const OUString& rNumberString, const css::lang::Locale& rLocale, sal_Int16 nNativeNumberMode, + const OUString& rNativeNumberParams) +{ + return getNativeNumberString(rNumberString, rLocale, nNativeNumberMode, nullptr, rNativeNumberParams); +} + +sal_Unicode NativeNumberSupplierService::getNativeNumberChar( const sal_Unicode inChar, const Locale& rLocale, sal_Int16 nNativeNumberMode ) +{ + if (nNativeNumberMode == NativeNumberMode::NATNUM0) { // Ascii + for (const auto & i : NumberChar) + for (sal_Int16 j = 0; j < 10; j++) + if (inChar == i[j]) + return j; + return inChar; + } + + if (!isNumber(inChar)) + return inChar; + + if (!isValidNatNumImpl(rLocale, nNativeNumberMode)) + return inChar; + + sal_Int16 langnum = getLanguageNumber(rLocale); + if (langnum == -1) + return inChar; + + switch (nNativeNumberMode) + { + case NativeNumberMode::NATNUM1: // Char, Lower + case NativeNumberMode::NATNUM4: // Text, Lower, Long + case NativeNumberMode::NATNUM7: // Text. Lower, Short + return NumberChar[natnum1[langnum]][inChar - NUMBER_ZERO]; + case NativeNumberMode::NATNUM2: // Char, Upper + case NativeNumberMode::NATNUM5: // Text, Upper, Long + case NativeNumberMode::NATNUM8: // Text, Upper, Short + return NumberChar[natnum2[langnum]][inChar - NUMBER_ZERO]; + case NativeNumberMode::NATNUM3: // Char, FullWidth + case NativeNumberMode::NATNUM6: // Text, FullWidth + return NumberChar[NumberChar_FullWidth][inChar - NUMBER_ZERO]; + case NativeNumberMode::NATNUM9: // Char, Hangul + case NativeNumberMode::NATNUM10: // Text, Hangul, Long + case NativeNumberMode::NATNUM11: // Text, Hangul, Short + return NumberChar[NumberChar_Hangul_ko][inChar - NUMBER_ZERO]; + default: + break; + } + + return inChar; +} + +bool NativeNumberSupplierService::isValidNatNumImpl( const Locale& rLocale, sal_Int16 nNativeNumberMode ) +{ + sal_Int16 langnum = getLanguageNumber(rLocale); + + switch (nNativeNumberMode) { + case NativeNumberMode::NATNUM0: // Ascii + case NativeNumberMode::NATNUM3: // Char, FullWidth + case NativeNumberMode::NATNUM12: // spell out numbers, dates and money amounts + return true; + case NativeNumberMode::NATNUM1: // Char, Lower + return (langnum >= 0); + case NativeNumberMode::NATNUM2: // Char, Upper + if (langnum == 4) // Hebrew numbering + return true; + [[fallthrough]]; + case NativeNumberMode::NATNUM4: // Text, Lower, Long + case NativeNumberMode::NATNUM5: // Text, Upper, Long + case NativeNumberMode::NATNUM6: // Text, FullWidth + case NativeNumberMode::NATNUM7: // Text. Lower, Short + case NativeNumberMode::NATNUM8: // Text, Upper, Short + return (langnum >= 0 && langnum < 4); // CJK numbering + case NativeNumberMode::NATNUM9: // Char, Hangul + case NativeNumberMode::NATNUM10: // Text, Hangul, Long + case NativeNumberMode::NATNUM11: // Text, Hangul, Short + return (langnum == 3); // Korean numbering + } + return false; +} + +NativeNumberXmlAttributes SAL_CALL NativeNumberSupplierService::convertToXmlAttributes( const Locale& rLocale, sal_Int16 nNativeNumberMode ) +{ + static const sal_Int16 attShort = 0; + static const sal_Int16 attMedium = 1; + static const sal_Int16 attLong = 2; + static const char *attType[] = { "short", "medium", "long" }; + + sal_Int16 number = NumberChar_HalfWidth, type = attShort; + + sal_Int16 langnum = -1; + if (isValidNatNum(rLocale, nNativeNumberMode)) { + langnum = getLanguageNumber(rLocale); + } + if (langnum != -1) { + switch (nNativeNumberMode) { + case NativeNumberMode::NATNUM0: // Ascii + number = NumberChar_HalfWidth; + type = attShort; + break; + case NativeNumberMode::NATNUM1: // Char, Lower + number = natnum1[langnum]; + type = attShort; + break; + case NativeNumberMode::NATNUM2: // Char, Upper + number = natnum2[langnum]; + type = number == NumberChar_he ? attMedium : attShort; + break; + case NativeNumberMode::NATNUM3: // Char, FullWidth + number = NumberChar_FullWidth; + type = attShort; + break; + case NativeNumberMode::NATNUM4: // Text, Lower, Long + number = natnum1[langnum]; + type = attLong; + break; + case NativeNumberMode::NATNUM5: // Text, Upper, Long + number = natnum2[langnum]; + type = attLong; + break; + case NativeNumberMode::NATNUM6: // Text, FullWidth + number = NumberChar_FullWidth; + type = attLong; + break; + case NativeNumberMode::NATNUM7: // Text. Lower, Short + number = natnum1[langnum]; + type = attMedium; + break; + case NativeNumberMode::NATNUM8: // Text, Upper, Short + number = natnum2[langnum]; + type = attMedium; + break; + case NativeNumberMode::NATNUM9: // Char, Hangul + number = NumberChar_Hangul_ko; + type = attShort; + break; + case NativeNumberMode::NATNUM10: // Text, Hangul, Long + number = NumberChar_Hangul_ko; + type = attLong; + break; + case NativeNumberMode::NATNUM11: // Text, Hangul, Short + number = NumberChar_Hangul_ko; + type = attMedium; + break; + default: + break; + } + } + return NativeNumberXmlAttributes(rLocale, OUString(&NumberChar[number][1], 1), + OUString::createFromAscii(attType[type])); +} + +static bool natNumIn(sal_Int16 num, const sal_Int16 natnum[], sal_Int16 len) +{ + for (sal_Int16 i = 0; i < len; i++) + if (natnum[i] == num) + return true; + return false; +} + +sal_Int16 SAL_CALL NativeNumberSupplierService::convertFromXmlAttributes( const NativeNumberXmlAttributes& aAttr ) +{ + sal_Unicode numberChar[NumberChar_Count]; + for (sal_Int16 i = 0; i < NumberChar_Count; i++) + numberChar[i] = NumberChar[i][1]; + OUString number(numberChar, NumberChar_Count); + + sal_Int16 num = sal::static_int_cast<sal_Int16>( number.indexOf(aAttr.Format) ); + + if ( aAttr.Style == "short" ) { + if (num == NumberChar_FullWidth) + return NativeNumberMode::NATNUM3; + else if (num == NumberChar_Hangul_ko) + return NativeNumberMode::NATNUM9; + else if (natNumIn(num, natnum1, sizeof_natnum1)) + return NativeNumberMode::NATNUM1; + else if (natNumIn(num, natnum2, sizeof_natnum2)) + return NativeNumberMode::NATNUM2; + } else if ( aAttr.Style == "medium" ) { + if (num == NumberChar_Hangul_ko) + return NativeNumberMode::NATNUM11; + else if (num == NumberChar_he) + return NativeNumberMode::NATNUM2; + else if (natNumIn(num, natnum1, sizeof_natnum1)) + return NativeNumberMode::NATNUM7; + else if (natNumIn(num, natnum2, sizeof_natnum2)) + return NativeNumberMode::NATNUM8; + } else if ( aAttr.Style == "long" ) { + if (num == NumberChar_FullWidth) + return NativeNumberMode::NATNUM6; + else if (num == NumberChar_Hangul_ko) + return NativeNumberMode::NATNUM10; + else if (natNumIn(num, natnum1, sizeof_natnum1)) + return NativeNumberMode::NATNUM4; + else if (natNumIn(num, natnum2, sizeof_natnum2)) + return NativeNumberMode::NATNUM5; + } else { + throw RuntimeException(); + } + return NativeNumberMode::NATNUM0; +} + + +// Following code generates Hebrew Number, +// see numerical system in the Hebrew Numbering System in following link for details, +// http://smontagu.org/writings/HebrewNumbers.html + +namespace { + +struct HebrewNumberChar { + sal_Unicode code; + sal_Int16 value; +}; + +} + +HebrewNumberChar const HebrewNumberCharArray[] = { + { 0x05ea, 400 }, + { 0x05ea, 400 }, + { 0x05e9, 300 }, + { 0x05e8, 200 }, + { 0x05e7, 100 }, + { 0x05e6, 90 }, + { 0x05e4, 80 }, + { 0x05e2, 70 }, + { 0x05e1, 60 }, + { 0x05e0, 50 }, + { 0x05de, 40 }, + { 0x05dc, 30 }, + { 0x05db, 20 }, + { 0x05d9, 10 }, + { 0x05d8, 9 }, + { 0x05d7, 8 }, + { 0x05d6, 7 }, + { 0x05d5, 6 }, + { 0x05d4, 5 }, + { 0x05d3, 4 }, + { 0x05d2, 3 }, + { 0x05d1, 2 }, + { 0x05d0, 1 } +}; + +const sal_Unicode thousand[] = {0x05d0, 0x05dc, 0x05e3, 0x0}; +const sal_Unicode thousands[] = {0x05d0, 0x05dc, 0x05e4, 0x05d9, 0x0}; +const sal_Unicode thousands_last[] = {0x05d0, 0x05dc, 0x05e4, 0x05d9, 0x05dd, 0x0}; +const sal_Unicode geresh = 0x05f3; +const sal_Unicode gershayim = 0x05f4; + +static void makeHebrewNumber(sal_Int64 value, OUStringBuffer& output, bool isLast, bool useGeresh) +{ + sal_Int16 num = sal::static_int_cast<sal_Int16>(value % 1000); + + if (value > 1000) { + makeHebrewNumber(value / 1000, output, num != 0, useGeresh); + output.append(" "); + } + if (num == 0) { + output.append(value == 1000 ? thousand : isLast ? thousands_last : thousands); + } else { + sal_Int16 nbOfChar = 0; + for (sal_Int32 j = 0; num > 0 && j < sal_Int32(SAL_N_ELEMENTS(HebrewNumberCharArray)); j++) { + if (num - HebrewNumberCharArray[j].value >= 0) { + nbOfChar++; + // https://en.wikipedia.org/wiki/Hebrew_numerals#Key_exceptions + // By convention, the numbers 15 and 16 are represented as 9 + 6 and 9 + 7 + if (num == 15 || num == 16) // substitution for 15 and 16 + j++; + assert(j < sal_Int32(SAL_N_ELEMENTS(HebrewNumberCharArray))); + num = sal::static_int_cast<sal_Int16>( num - HebrewNumberCharArray[j].value ); + output.append(HebrewNumberCharArray[j].code); + } + } + if (useGeresh) { + if (nbOfChar > 1) // a number is written as more than one character + output.insert(output.getLength() - 1, gershayim); + else if (nbOfChar == 1) // a number is written as a single character + output.append(geresh); + } + } +} + +OUString getHebrewNativeNumberString(const OUString& aNumberString, bool useGeresh) +{ + sal_Int64 value = 0; + sal_Int32 i, count = 0, len = aNumberString.getLength(); + const sal_Unicode *src = aNumberString.getStr(); + + for (i = 0; i < len; i++) { + sal_Unicode ch = src[i]; + if (isNumber(ch)) { + if (++count >= 20) // Number is too long, could not be handled. + return aNumberString; + value = value * 10 + (ch - NUMBER_ZERO); + } + else if (isSeparator(ch) && count > 0) continue; + else if (isMinus(ch) && count == 0) continue; + else break; + } + + if (value > 0) { + OUStringBuffer output(count*2 + 2 + len - i); + + makeHebrewNumber(value, output, true, useGeresh); + + if (i < len) + output.append(aNumberString.subView(i)); + + return output.makeStringAndClear(); + } + else + return aNumberString; +} + +// Support for Cyrillic Numerals +// See UTN 41 for implementation information +// http://www.unicode.org/notes/tn41/ + +const sal_Unicode cyrillicThousandsMark = 0x0482; +const sal_Unicode cyrillicTitlo = 0x0483; +const sal_Unicode cyrillicTen = 0x0456; + +namespace { + +struct CyrillicNumberChar { + sal_Unicode code; + sal_Int16 value; +}; + +} + +CyrillicNumberChar const CyrillicNumberCharArray[] = { + { 0x0446, 900 }, + { 0x047f, 800 }, + { 0x0471, 700 }, + { 0x0445, 600 }, + { 0x0444, 500 }, + { 0x0443, 400 }, + { 0x0442, 300 }, + { 0x0441, 200 }, + { 0x0440, 100 }, + { 0x0447, 90 }, + { 0x043f, 80 }, + { 0x047b, 70 }, + { 0x046f, 60 }, + { 0x043d, 50 }, + { 0x043c, 40 }, + { 0x043b, 30 }, + { 0x043a, 20 }, + { 0x0456, 10 }, + { 0x0473, 9 }, + { 0x0438, 8 }, + { 0x0437, 7 }, + { 0x0455, 6 }, + { 0x0454, 5 }, + { 0x0434, 4 }, + { 0x0433, 3 }, + { 0x0432, 2 }, + { 0x0430, 1 } +}; + +static void makeCyrillicNumber(sal_Int64 value, OUStringBuffer& output, bool addTitlo) +{ + sal_Int16 num = sal::static_int_cast<sal_Int16>(value % 1000); + if (value >= 1000) { + output.append(cyrillicThousandsMark); + makeCyrillicNumber(value / 1000, output, false); + if (value >= 10000 && (value - 10000) % 1000 != 0) { + output.append(" "); + } + if (value % 1000 == 0) + addTitlo = false; + } + + for (sal_Int32 j = 0; num > 0 && j < sal_Int32(SAL_N_ELEMENTS(CyrillicNumberCharArray)); j++) { + if (num < 20 && num > 10) { + num -= 10; + makeCyrillicNumber(num, output, false); + output.append(cyrillicTen); + break; + } + + if (CyrillicNumberCharArray[j].value <= num) { + output.append(CyrillicNumberCharArray[j].code); + num = sal::static_int_cast<sal_Int16>( num - CyrillicNumberCharArray[j].value ); + } + } + + if (!addTitlo) + return; + + if (output.getLength() == 1) { + output.append(cyrillicTitlo); + } else if (output.getLength() == 2) { + if (value > 800 && value < 900) { + output.append(cyrillicTitlo); + } else { + output.insert(1, cyrillicTitlo); + } + } else if (output.getLength() > 2) { + if (output.indexOf(" ") == output.getLength() - 2) { + output.append(cyrillicTitlo); + } else { + output.insert(output.getLength() - 1, cyrillicTitlo); + } + } +} + +OUString getCyrillicNativeNumberString(const OUString& aNumberString) +{ + sal_Int64 value = 0; + sal_Int32 i, count = 0, len = aNumberString.getLength(); + const sal_Unicode *src = aNumberString.getStr(); + + for (i = 0; i < len; i++) { + sal_Unicode ch = src[i]; + if (isNumber(ch)) { + if (++count >= 8) // Number is too long, could not be handled. + return aNumberString; + value = value * 10 + (ch - NUMBER_ZERO); + } + else if (isSeparator(ch) && count > 0) continue; + else if (isMinus(ch) && count == 0) continue; + else break; + } + + if (value > 0) { + OUStringBuffer output(count*2 + 2 + len - i); + + makeCyrillicNumber(value, output, true); + + if (i < len) + output.append(aNumberString.subView(i)); + + return output.makeStringAndClear(); + } + else + return aNumberString; +} + +constexpr OUString implementationName = u"com.sun.star.i18n.NativeNumberSupplier"_ustr; + +OUString SAL_CALL NativeNumberSupplierService::getImplementationName() +{ + return implementationName; +} + +sal_Bool SAL_CALL +NativeNumberSupplierService::supportsService(const OUString& rServiceName) +{ + return cppu::supportsService(this, rServiceName); +} + +Sequence< OUString > SAL_CALL +NativeNumberSupplierService::getSupportedServiceNames() +{ + return {implementationName, "com.sun.star.i18n.NativeNumberSupplier2"}; +} + +} + +extern "C" SAL_DLLPUBLIC_EXPORT css::uno::XInterface * +com_sun_star_i18n_NativeNumberSupplier_get_implementation( + css::uno::XComponentContext *, + css::uno::Sequence<css::uno::Any> const &) +{ + return cppu::acquire(new i18npool::NativeNumberSupplierService()); +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ |