summaryrefslogtreecommitdiffstats
path: root/i18npool/source/nativenumber
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--i18npool/source/nativenumber/data/numberchar.h277
-rw-r--r--i18npool/source/nativenumber/nativenumbersupplier.cxx1275
2 files changed, 1552 insertions, 0 deletions
diff --git a/i18npool/source/nativenumber/data/numberchar.h b/i18npool/source/nativenumber/data/numberchar.h
new file mode 100644
index 0000000000..a711e96072
--- /dev/null
+++ b/i18npool/source/nativenumber/data/numberchar.h
@@ -0,0 +1,277 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+#ifndef INCLUDED_I18NPOOL_SOURCE_NATIVENUMBER_DATA_NUMBERCHAR_H
+#define INCLUDED_I18NPOOL_SOURCE_NATIVENUMBER_DATA_NUMBERCHAR_H
+
+#include <sal/types.h>
+
+namespace i18npool {
+
+const sal_Int16 NumberChar_HalfWidth = 0;
+const sal_Int16 NumberChar_FullWidth = 1;
+const sal_Int16 NumberChar_Lower_zh = 2;
+const sal_Int16 NumberChar_Upper_zh = 3;
+const sal_Int16 NumberChar_Upper_zh_TW = 4;
+const sal_Int16 NumberChar_Modern_ja = 5;
+const sal_Int16 NumberChar_Traditional_ja= 6;
+const sal_Int16 NumberChar_Lower_ko = 7;
+const sal_Int16 NumberChar_Upper_ko = 8;
+const sal_Int16 NumberChar_Hangul_ko = 9;
+const sal_Int16 NumberChar_Indic_ar = 10;
+const sal_Int16 NumberChar_EastIndic_ar = 11;
+const sal_Int16 NumberChar_hi = 12;
+const sal_Int16 NumberChar_th = 13;
+const sal_Int16 NumberChar_or = 14;
+const sal_Int16 NumberChar_mr = 15;
+const sal_Int16 NumberChar_bn = 16;
+const sal_Int16 NumberChar_pa = 17;
+const sal_Int16 NumberChar_gu = 18;
+const sal_Int16 NumberChar_ta = 19;
+const sal_Int16 NumberChar_te = 20;
+const sal_Int16 NumberChar_kn = 21;
+const sal_Int16 NumberChar_ml = 22;
+const sal_Int16 NumberChar_lo = 23;
+const sal_Int16 NumberChar_bo = 24;
+const sal_Int16 NumberChar_my = 25;
+const sal_Int16 NumberChar_km = 26;
+const sal_Int16 NumberChar_mn = 27;
+const sal_Int16 NumberChar_he = 28;
+const sal_Int16 NumberChar_ne = 29;
+const sal_Int16 NumberChar_dz = 30;
+const sal_Int16 NumberChar_cu = 31;
+const sal_Int16 NumberChar_Count = 32;
+
+const sal_Unicode NumberChar[][10] = {
+// 0 1 2 3 4 5 6 7 8 9
+ { 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, 0x0038, 0x0039 }, // Half Width (Ascii)
+ { 0xFF10, 0xFF11, 0xFF12, 0xFF13, 0xFF14, 0xFF15, 0xFF16, 0xFF17, 0xFF18, 0xFF19 }, // Full Width
+ { 0x3007, 0x4E00, 0x4E8c, 0x4E09, 0x56DB, 0x4E94, 0x516D, 0x4E03, 0x516B, 0x4E5D }, // Chinese Lower
+ { 0x96F6, 0x58F9, 0x8D30, 0x53C1, 0x8086, 0x4F0D, 0x9646, 0x67D2, 0x634C, 0x7396 }, // S. Chinese Upper
+ { 0x96F6, 0x58F9, 0x8CB3, 0x53C3, 0x8086, 0x4F0D, 0x9678, 0x67D2, 0x634C, 0x7396 }, // T. Chinese Upper
+ { 0x3007, 0x4E00, 0x4E8C, 0x4E09, 0x56DB, 0x4E94, 0x516D, 0x4E03, 0x516B, 0x4E5D }, // Japanese Modern
+ { 0x96F6, 0x58F1, 0x5F10, 0x53C2, 0x56DB, 0x4F0D, 0x516D, 0x4E03, 0x516B, 0x4E5D }, // Japanese Trad.
+ { 0x96F6, 0x4E00, 0x4E8C, 0x4E09, 0x56DB, 0x4E94, 0x516D, 0x4E03, 0x516B, 0x4E5D }, // Korean Lower
+ { 0x96F6, 0x58F9, 0x8CB3, 0x53C3, 0x56DB, 0x4F0D, 0x516D, 0x4E03, 0x516B, 0x4E5D }, // Korean Upper
+ { 0xC601, 0xC77C, 0xC774, 0xC0BC, 0xC0AC, 0xC624, 0xC721, 0xCE60, 0xD314, 0xAD6C }, // Korean Hangul
+ { 0x0660, 0x0661, 0x0662, 0x0663, 0x0664, 0x0665, 0x0666, 0x0667, 0x0668, 0x0669 }, // Arabic Indic
+ { 0x06F0, 0x06F1, 0x06F2, 0x06F3, 0x06F4, 0x06F5, 0x06F6, 0x06F7, 0x06F8, 0x06F9 }, // Est. Arabic Indic
+ { 0x0966, 0x0967, 0x0968, 0x0969, 0x096A, 0x096B, 0x096C, 0x096D, 0x096E, 0x096F }, // Indic (Devanagari)
+ { 0x0E50, 0x0E51, 0x0E52, 0x0E53, 0x0E54, 0x0E55, 0x0E56, 0x0E57, 0x0E58, 0x0E59 }, // Thai
+ { 0x0866, 0x0867, 0x0868, 0x0869, 0x086A, 0x086B, 0x086C, 0x086D, 0x086E, 0x086F }, // Odia
+ { 0x0966, 0x0967, 0x0968, 0x0969, 0x096A, 0x096B, 0x096C, 0x096D, 0x096E, 0x096F }, // Marathi
+ { 0x09E6, 0x09E7, 0x09E8, 0x09E9, 0x09EA, 0x09EB, 0x09EC, 0x09ED, 0x09EE, 0x09EF }, // Bengali
+ { 0x0A66, 0x0A67, 0x0A68, 0x0A69, 0x0A6A, 0x0A6B, 0x0A6C, 0x0A6D, 0x0A6E, 0x0A6F }, // Punjabi (Gurmukhi)
+ { 0x0AE6, 0x0AE7, 0x0AE8, 0x0AE9, 0x0AEA, 0x0AEB, 0x0AEC, 0x0AED, 0x0AEE, 0x0AEF }, // Gujarati
+ { 0x0030, 0x0BE7, 0x0BE8, 0x0BE9, 0x0BEA, 0x0BEB, 0x0BEC, 0x0BED, 0x0BEE, 0x0BEF }, // Tamil
+ { 0x0C66, 0x0C67, 0x0C68, 0x0C69, 0x0C6A, 0x0C6B, 0x0C6C, 0x0C6D, 0x0C6E, 0x0C6F }, // Telugu
+ { 0x0CE6, 0x0CE7, 0x0CE8, 0x0CE9, 0x0CEA, 0x0CEB, 0x0CEC, 0x0CED, 0x0CEE, 0x0CEF }, // Kannada
+ { 0x0DE6, 0x0DE7, 0x0DE8, 0x0DE9, 0x0DEA, 0x0DEB, 0x0DEC, 0x0DED, 0x0DEE, 0x0DEF }, // Malayalam
+ { 0x0ED0, 0x0ED1, 0x0ED2, 0x0ED3, 0x0ED4, 0x0ED5, 0x0ED6, 0x0ED7, 0x0ED8, 0x0ED9 }, // Lao
+ { 0x0F20, 0x0F21, 0x0F22, 0x0F23, 0x0F24, 0x0F25, 0x0F26, 0x0F27, 0x0F28, 0x0F29 }, // Tibetan
+ { 0x1040, 0x1041, 0x1042, 0x1043, 0x1044, 0x1045, 0x1046, 0x1047, 0x1048, 0x1049 }, // Myanmar
+ { 0x17E0, 0x17E1, 0x17E2, 0x17E3, 0x17E4, 0x17E5, 0x17E6, 0x17E7, 0x17E8, 0x17E9 }, // Cambodian (Khmer)
+ { 0x1810, 0x1811, 0x1812, 0x1813, 0x1814, 0x1815, 0x1816, 0x1817, 0x1818, 0x1819 }, // Mongolian
+ { 0x0020, 0x05D0, 0x05D1, 0x05D2, 0x05D3, 0x05D4, 0x05D5, 0x05D6, 0x05D7, 0x05D8 }, // Hebrew
+ { 0x0966, 0x0967, 0x0968, 0x0969, 0x096A, 0x096B, 0x096C, 0x096D, 0x096E, 0x096F }, // Nepali
+ { 0x0F20, 0x0F21, 0x0F22, 0x0F23, 0x0F24, 0x0F25, 0x0F26, 0x0F27, 0x0F28, 0x0F29 }, // Dzongkha
+ { 0x0030, 0x0430, 0x0432, 0x0433, 0x0434, 0x0454, 0x0455, 0x0437, 0x0438, 0x0473 }, // Church Slavic
+};
+
+static sal_Unicode DecimalChar[] = {
+ 0x002E, // Half Width (Ascii)
+ 0xFF0E, // Full Width
+ 0xFF0E, // Chinese Lower
+ 0x70B9, // S. Chinese Upper
+ 0x9EDE, // T. Chinese Upper
+ 0x30FB, // Japanese Modern
+ 0x30FB, // Japanese Trad.
+ 0xFF0E, // Korean Lower
+ 0x9EDE, // Korean Upper
+ 0xC810, // Korean Hangul
+ 0x066B, // Arabic Indic
+ 0x066B, // Est. Arabic Indic
+ 0x0000, // Indic (Devanagari)
+ 0x0000, // Thai
+ 0x0000, // Odia
+ 0x0000, // Marathi
+ 0x0000, // Bengali
+ 0x0000, // Punjabi (Gurmukhi)
+ 0x0000, // Gujarati
+ 0x0000, // Tamil
+ 0x0000, // Telugu
+ 0x0000, // Kannada
+ 0x0000, // Malayalam
+ 0x0000, // Lao
+ 0x0000, // Tibetan
+ 0x0000, // Myanmar
+ 0x0000, // Cambodian (Khmer)
+ 0x0000, // Mongolian
+ 0x0000, // Hebrew
+ 0x0000, // Nepali
+ 0x0000, // Dzongkha
+ 0x0000, // Church Slavic
+};
+
+const sal_Unicode MinusChar[] = {
+ 0x002D, // Half Width (Ascii)
+ 0xFF0D, // Full Width
+ 0xFF0D, // Chinese Lower
+ 0x8D1F, // S. Chinese Upper
+ 0x5069, // T. Chinese Upper
+ 0x2212, // Japanese Modern
+ 0x2212, // Japanese Trad.
+ 0xFF0D, // Korean Lower
+ 0xFF0D, // Korean Upper
+ 0xFF0D, // Korean Hangul
+ 0x0000, // Arabic Indic
+ 0x2212, // Est. Arabic Indic
+ 0x0000, // Indic
+ 0x0000, // Thai
+ 0x0000, // Odia
+ 0x0000, // Marathi
+ 0x0000, // Bengali
+ 0x0000, // Punjabi
+ 0x0000, // Gujarati
+ 0x0000, // Tamil
+ 0x0000, // Telugu
+ 0x0000, // Kannada
+ 0x0000, // Malayalam
+ 0x0000, // Lao
+ 0x0000, // Tibetan
+ 0x0000, // Myanmar
+ 0x0000, // Cambodian (Khmer)
+ 0x0000, // Mongolian
+ 0x0000, // Hebrew
+ 0x0000, // Nepali
+ 0x0000, // Dzongkha
+ 0x0000, // Church Slavic
+};
+
+static sal_uInt16 SeparatorChar[] = {
+ 0x002C, // Half Width (Ascii)
+ 0xFF0C, // Full Width
+ 0x3001, // Chinese Lower
+ 0x3001, // S. Chinese Upper
+ 0x3001, // T. Chinese Upper
+ 0x3001, // Japanese Modern
+ 0x3001, // Japanese Trad.
+ 0x002C, // Korean Lower
+ 0x002C, // Korean Upper
+ 0x002C, // Korean Hangul
+ 0x0000, // Arabic Indic
+ 0x066C, // Est. Arabic Indic
+ 0x0000, // Indic
+ 0x0000, // Thai
+ 0x0000, // Odia
+ 0x0000, // Marathi
+ 0x0000, // Bengali
+ 0x0000, // Punjabi
+ 0x0000, // Gujarati
+ 0x0000, // Tamil
+ 0x0000, // Telugu
+ 0x0000, // Kannada
+ 0x0000, // Malayalam
+ 0x0000, // Lao
+ 0x0000, // Tibetan
+ 0x0000, // Myanmar
+ 0x0000, // Cambodian (Khmer)
+ 0x0000, // Mongolian
+ 0x0000, // Hebrew
+ 0x0000, // Nepali
+ 0x0000, // Dzongkha
+ 0x0000, // Church Slavic
+};
+
+#define NUMBER_ZERO NumberChar[NumberChar_HalfWidth][0] // 0x0030
+#define NUMBER_ONE NumberChar[NumberChar_HalfWidth][1] // 0x0031
+#define NUMBER_NINE NumberChar[NumberChar_HalfWidth][9] // 0x0039
+#define isNumber(n) ( NUMBER_ZERO <= n && n <= NUMBER_NINE )
+#define isDecimal(n) ( n == DecimalChar[NumberChar_HalfWidth] )
+#define isMinus(n) ( n == MinusChar[NumberChar_HalfWidth] )
+#define isSeparator(n) ( n == SeparatorChar[NumberChar_HalfWidth] )
+
+const sal_Int16 Multiplier_Lower_zh = 0;
+const sal_Int16 Multiplier_Upper_zh = 1;
+const sal_Int16 Multiplier_Lower_zh_TW = 2;
+const sal_Int16 Multiplier_Upper_zh_TW = 3;
+const sal_Int16 Multiplier_Lower_ko = 4;
+const sal_Int16 Multiplier_Upper_ko = 5;
+const sal_Int16 Multiplier_Hangul_ko = 6;
+const sal_Int16 Multiplier_Modern_ja = 7;
+const sal_Int16 Multiplier_Traditional_ja = 8;
+const sal_Int16 Multiplier_Count = 9;
+
+const sal_Int16 ExponentCount_6_CJK = 6;
+
+const sal_Int16 MultiplierExponent_6_CJK[ExponentCount_6_CJK] = {
+ 12, 8, 4, 3, 2, 1
+};
+const sal_Unicode MultiplierChar_6_CJK[][ExponentCount_6_CJK] = {
+ {0x5146, 0x4EBF, 0x4E07, 0x5343, 0x767E, 0x5341}, // S. Chinese Lower
+ {0x5146, 0x4EBF, 0x4E07, 0x4EDF, 0x4F70, 0x62FE}, // S. Chinese Upper
+ {0x5146, 0x5104, 0x842C, 0x5343, 0x767E, 0x5341}, // T. Chinese Lower
+ {0x5146, 0x5104, 0x842C, 0x4EDF, 0x4F70, 0x62FE}, // T. Chinese Upper
+ {0x5146, 0x5104, 0x4E07, 0x5343, 0x767E, 0x5341}, // Korean Lower
+ {0x5146, 0x5104, 0x842C, 0x9621, 0x4F70, 0x62FE}, // Korean Upper
+ {0xC870, 0xC5B5, 0xB9CC, 0xCC9C, 0xBC31, 0xC2ED}, // Korean Hangul
+ {0x5146, 0x5104, 0x4E07, 0x5343, 0x767E, 0x5341}, // Japanese Modern
+ {0x5146, 0x5104, 0x842C, 0x9621, 0x767E, 0x62FE} // Japanese Traditional
+};
+
+const sal_Int16 ExponentCount_2_CJK = 2;
+
+const sal_Int16 MultiplierExponent_2_CJK[ExponentCount_2_CJK] = {
+ 8, 4,
+};
+
+const sal_Unicode MultiplierChar_2_CJK[][ExponentCount_2_CJK] = {
+ {0x4EBF, 0x4E07}, // S. Chinese Lower
+ {0x4EBF, 0x4E07}, // S. Chinese Upper
+ {0x5104, 0x842C}, // T. Chinese Lower
+ {0x5104, 0x842C}, // T. Chinese Upper
+ {0x5104, 0x4E07}, // Korean Lower
+ {0x5104, 0x842C}, // Korean Upper
+ {0xC5B5, 0xB9CC}, // Korean Hangul
+ {0x5104, 0x4E07}, // Japanese Modern
+ {0x5104, 0x842C} // Japanese Traditional
+};
+
+const sal_Int16 ExponentCount_7_CJK = 7;
+
+const sal_Int16 MultiplierExponent_7_CJK[ExponentCount_7_CJK] = {
+ 16, 12, 8, 4, 3, 2, 1
+};
+const sal_Unicode MultiplierChar_7_CJK[][ExponentCount_7_CJK] = {
+ {0x4EAC, 0x5146, 0x4EBF, 0x4E07, 0x5343, 0x767E, 0x5341}, // S. Chinese Lower
+ {0x4EAC, 0x5146, 0x4EBF, 0x4E07, 0x4EDF, 0x4F70, 0x62FE}, // S. Chinese Upper
+ {0x4EAC, 0x5146, 0x5104, 0x842C, 0x5343, 0x767E, 0x5341}, // T. Chinese Lower
+ {0x4EAC, 0x5146, 0x5104, 0x842C, 0x4EDF, 0x4F70, 0x62FE}, // T. Chinese Upper
+ {0x4EAC, 0x5146, 0x5104, 0x4E07, 0x5343, 0x767E, 0x5341}, // Korean Lower
+ {0x4EAC, 0x5146, 0x5104, 0x842C, 0x9621, 0x4F70, 0x62FE}, // Korean Upper
+ {0x4EAC, 0xC870, 0xC5B5, 0xB9CC, 0xCC9C, 0xBC31, 0xC2ED}, // Korean Hangul
+ {0x4EAC, 0x5146, 0x5104, 0x4E07, 0x5343, 0x767E, 0x5341}, // Japanese Modern
+ {0x4EAC, 0x5146, 0x5104, 0x842C, 0x9621, 0x767E, 0x62FE} // Japanese Traditional
+};
+
+}
+
+#endif // INCLUDED_I18NPOOL_SOURCE_NATIVENUMBER_DATA_NUMBERCHAR_H
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/i18npool/source/nativenumber/nativenumbersupplier.cxx b/i18npool/source/nativenumber/nativenumbersupplier.cxx
new file mode 100644
index 0000000000..0618f30059
--- /dev/null
+++ b/i18npool/source/nativenumber/nativenumbersupplier.cxx
@@ -0,0 +1,1275 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+#include <i18nlangtag/languagetag.hxx>
+#include <i18nlangtag/mslangid.hxx>
+#include <rtl/ustrbuf.hxx>
+#include <sal/macros.h>
+#include <nativenumbersupplier.hxx>
+#include <localedata.hxx>
+#include "data/numberchar.h"
+#include <comphelper/processfactory.hxx>
+#include <cppuhelper/supportsservice.hxx>
+#include <o3tl/string_view.hxx>
+#include <cstddef>
+#include <map>
+#include <mutex>
+#include <memory>
+#include <string_view>
+#include <unordered_map>
+#include <com/sun/star/i18n/CharacterClassification.hpp>
+#include <com/sun/star/i18n/NativeNumberMode.hpp>
+#include <com/sun/star/linguistic2/NumberText.hpp>
+
+using namespace ::com::sun::star::uno;
+using namespace ::com::sun::star::i18n;
+using namespace ::com::sun::star::lang;
+
+namespace {
+
+struct Number {
+ sal_Int16 number;
+ const sal_Unicode *multiplierChar;
+ sal_Int16 numberFlag;
+ sal_Int16 exponentCount;
+ const sal_Int16 *multiplierExponent;
+};
+
+}
+
+#define NUMBER_OMIT_ZERO (1 << 0)
+#define NUMBER_OMIT_ONLY_ZERO (1 << 1)
+#define NUMBER_OMIT_ONE_1 (1 << 2)
+#define NUMBER_OMIT_ONE_2 (1 << 3)
+#define NUMBER_OMIT_ONE_3 (1 << 4)
+#define NUMBER_OMIT_ONE_4 (1 << 5)
+#define NUMBER_OMIT_ONE_5 (1 << 6)
+#define NUMBER_OMIT_ONE_6 (1 << 7)
+#define NUMBER_OMIT_ONE_7 (1 << 8)
+#define NUMBER_OMIT_ONE (NUMBER_OMIT_ONE_1|NUMBER_OMIT_ONE_2|NUMBER_OMIT_ONE_3|NUMBER_OMIT_ONE_4|NUMBER_OMIT_ONE_5|NUMBER_OMIT_ONE_6|NUMBER_OMIT_ONE_7)
+#define NUMBER_OMIT_ONE_CHECK(bit) (1 << (2 + bit))
+#define NUMBER_OMIT_ALL ( NUMBER_OMIT_ZERO|NUMBER_OMIT_ONE|NUMBER_OMIT_ONLY_ZERO )
+#define NUMBER_OMIT_ZERO_ONE ( NUMBER_OMIT_ZERO|NUMBER_OMIT_ONE )
+#define NUMBER_OMIT_ONE_67 (NUMBER_OMIT_ONE_6|NUMBER_OMIT_ONE_7)
+#define NUMBER_OMIT_ZERO_ONE_67 ( NUMBER_OMIT_ZERO|NUMBER_OMIT_ONE_67 )
+
+namespace i18npool {
+
+namespace {
+
+std::mutex theNatNumMutex;
+
+}
+
+static OUString getHebrewNativeNumberString(const OUString& aNumberString, bool useGeresh);
+
+static OUString getCyrillicNativeNumberString(const OUString& aNumberString);
+
+/// @throws RuntimeException
+static OUString AsciiToNativeChar( const OUString& inStr, sal_Int32 nCount,
+ Sequence< sal_Int32 >* pOffset, sal_Int16 number )
+{
+ const sal_Unicode *src = inStr.getStr();
+ rtl_uString *newStr = rtl_uString_alloc(nCount);
+ if (pOffset)
+ pOffset->realloc(nCount);
+ auto ppOffset = pOffset ? pOffset->getArray() : nullptr;
+
+ for (sal_Int32 i = 0; i < nCount; i++)
+ {
+ sal_Unicode ch = src[i];
+ if (isNumber(ch))
+ newStr->buffer[i] = NumberChar[number][ ch - NUMBER_ZERO ];
+ else if (i+1 < nCount && isNumber(src[i+1])) {
+ if (i > 0 && isNumber(src[i-1]) && isSeparator(ch))
+ newStr->buffer[i] = SeparatorChar[number] ? SeparatorChar[number] : ch;
+ else
+ newStr->buffer[i] = isDecimal(ch) ? (DecimalChar[number] ? DecimalChar[number] : ch) :
+ isMinus(ch) ? (MinusChar[number] ? MinusChar[number] : ch) : ch;
+ }
+ else
+ newStr->buffer[i] = ch;
+ if (ppOffset)
+ ppOffset[i] = i;
+ }
+ return OUString(newStr, SAL_NO_ACQUIRE); // take ownership
+}
+
+static bool AsciiToNative_numberMaker(const sal_Unicode *str, sal_Int32 begin, sal_Int32 len,
+ sal_Unicode *dst, sal_Int32& count, sal_Int16 multiChar_index, Sequence< sal_Int32 >* pOffset, sal_Int32 startPos,
+ const Number *number, const sal_Unicode* numberChar)
+{
+ sal_Unicode multiChar = (multiChar_index == -1 ? 0 : number->multiplierChar[multiChar_index]);
+ auto ppOffset = pOffset ? pOffset->getArray() : nullptr;
+ if ( len <= number->multiplierExponent[number->exponentCount-1] ) {
+ if (number->multiplierExponent[number->exponentCount-1] > 1) {
+ bool bNotZero = false;
+ for (const sal_Int32 end = begin+len; begin < end; begin++) {
+ if (bNotZero || str[begin] != NUMBER_ZERO) {
+ dst[count] = numberChar[str[begin] - NUMBER_ZERO];
+ if (ppOffset)
+ ppOffset[count] = begin + startPos;
+ count++;
+ bNotZero = true;
+ }
+ }
+ if (bNotZero && multiChar > 0) {
+ dst[count] = multiChar;
+ if (ppOffset)
+ ppOffset[count] = begin + startPos;
+ count++;
+ }
+ return bNotZero;
+ } else if (str[begin] != NUMBER_ZERO) {
+ if (!(number->numberFlag & (multiChar_index < 0 ? 0 : NUMBER_OMIT_ONE_CHECK(multiChar_index))) || str[begin] != NUMBER_ONE) {
+ dst[count] = numberChar[str[begin] - NUMBER_ZERO];
+ if (ppOffset)
+ ppOffset[count] = begin + startPos;
+ count++;
+ }
+ if (multiChar > 0) {
+ dst[count] = multiChar;
+ if (ppOffset)
+ ppOffset[count] = begin + startPos;
+ count++;
+ }
+ } else if (!(number->numberFlag & NUMBER_OMIT_ZERO) && count > 0 && dst[count-1] != numberChar[0]) {
+ dst[count] = numberChar[0];
+ if (ppOffset)
+ ppOffset[count] = begin + startPos;
+ count++;
+ }
+ return str[begin] != NUMBER_ZERO;
+ } else {
+ bool bPrintPower = false;
+ // sal_Int16 last = 0;
+ for (sal_Int16 i = 1; i <= number->exponentCount; i++) {
+ sal_Int32 tmp = len - (i == number->exponentCount ? 0 : number->multiplierExponent[i]);
+ if (tmp > 0) {
+ bPrintPower |= AsciiToNative_numberMaker(str, begin, tmp, dst, count,
+ (i == number->exponentCount ? -1 : i), pOffset, startPos, number, numberChar);
+ begin += tmp;
+ len -= tmp;
+ }
+ }
+ if (bPrintPower) {
+ if (count > 0 && number->multiplierExponent[number->exponentCount-1] == 1 &&
+ dst[count-1] == numberChar[0])
+ count--;
+ if (multiChar > 0) {
+ dst[count] = multiChar;
+ if (ppOffset)
+ ppOffset[count] = begin + startPos;
+ count++;
+ }
+ }
+ return bPrintPower;
+ }
+}
+
+/// @throws RuntimeException
+static OUString AsciiToNative( const OUString& inStr, sal_Int32 nCount,
+ Sequence< sal_Int32 >* pOffset, const Number* number )
+{
+ OUString aRet;
+
+ sal_Int32 strLen = inStr.getLength();
+ const sal_Unicode *numberChar = NumberChar[number->number];
+
+ if (nCount > strLen)
+ nCount = strLen;
+
+ if (nCount > 0)
+ {
+ const sal_Unicode *str = inStr.getStr();
+ std::unique_ptr<sal_Unicode[]> newStr(new sal_Unicode[nCount * 2 + 1]);
+ std::unique_ptr<sal_Unicode[]> srcStr(new sal_Unicode[nCount + 1]); // for keeping number without comma
+ sal_Int32 i, len = 0, count = 0;
+
+ if (pOffset)
+ pOffset->realloc( nCount * 2 );
+ auto ppOffset = pOffset ? pOffset->getArray() : nullptr;
+ bool bDoDecimal = false;
+
+ for (i = 0; i <= nCount; i++)
+ {
+ if (i < nCount && isNumber(str[i])) {
+ if (bDoDecimal) {
+ newStr[count] = numberChar[str[i] - NUMBER_ZERO];
+ if (ppOffset)
+ ppOffset[count] = i;
+ count++;
+ }
+ else
+ srcStr[len++] = str[i];
+ } else {
+ if (len > 0) {
+ if (i < nCount-1 && isSeparator(str[i]) && isNumber(str[i+1]))
+ continue; // skip comma inside number string
+ bool bNotZero = false;
+ for (sal_Int32 begin = 0, end = len % number->multiplierExponent[0];
+ end <= len; begin = end, end += number->multiplierExponent[0]) {
+ if (end == 0) continue;
+ sal_Int32 _count = count;
+ bNotZero |= AsciiToNative_numberMaker(srcStr.get(), begin, end - begin, newStr.get(), count,
+ end == len ? -1 : 0, pOffset, i - len, number, numberChar);
+ if (count > 0 && number->multiplierExponent[number->exponentCount-1] == 1 &&
+ newStr[count-1] == numberChar[0])
+ count--;
+ if (bNotZero && _count == count && end != len) {
+ newStr[count] = number->multiplierChar[0];
+ if (ppOffset)
+ ppOffset[count] = i - len;
+ count++;
+ }
+ }
+ if (! bNotZero && ! (number->numberFlag & NUMBER_OMIT_ONLY_ZERO)) {
+ newStr[count] = numberChar[0];
+ if (ppOffset)
+ ppOffset[count] = i - len;
+ count++;
+ }
+ len = 0;
+ }
+ if (i < nCount) {
+ bDoDecimal = (!bDoDecimal && i < nCount-1 && isDecimal(str[i]) && isNumber(str[i+1]));
+ if (bDoDecimal)
+ newStr[count] = (DecimalChar[number->number] ? DecimalChar[number->number] : str[i]);
+ else if (i < nCount-1 && isMinus(str[i]) && isNumber(str[i+1]))
+ newStr[count] = (MinusChar[number->number] ? MinusChar[number->number] : str[i]);
+ else if (i < nCount-1 && isSeparator(str[i]) && isNumber(str[i+1]))
+ newStr[count] = (SeparatorChar[number->number] ? SeparatorChar[number->number] : str[i]);
+ else
+ newStr[count] = str[i];
+ if (ppOffset)
+ ppOffset[count] = i;
+ count++;
+ }
+ }
+ }
+
+ if (pOffset)
+ pOffset->realloc(count);
+ aRet = OUString(newStr.get(), count);
+ }
+ return aRet;
+}
+
+namespace
+{
+void NativeToAscii_numberMaker(sal_Int16 max, sal_Int16 prev, const sal_Unicode *str,
+ sal_Int32& i, sal_Int32 nCount, sal_Unicode *dst, sal_Int32& count, Sequence< sal_Int32 >* pOffset,
+ OUString& numberChar, OUString& multiplierChar)
+{
+ auto ppOffset = pOffset ? pOffset->getArray() : nullptr;
+ sal_Int16 curr = 0, num = 0, end = 0, shift = 0;
+ while (++i < nCount) {
+ if ((curr = sal::static_int_cast<sal_Int16>( numberChar.indexOf(str[i]) )) >= 0) {
+ if (num > 0)
+ break;
+ num = curr % 10;
+ } else if ((curr = sal::static_int_cast<sal_Int16>( multiplierChar.indexOf(str[i]) )) >= 0) {
+ curr = MultiplierExponent_7_CJK[curr % ExponentCount_7_CJK];
+ if (prev > curr && num == 0) num = 1; // One may be omitted in informal format
+ shift = end = 0;
+ if (curr >= max)
+ max = curr;
+ else if (curr > prev)
+ shift = max - curr;
+ else
+ end = curr;
+ while (end++ < prev) {
+ dst[count] = NUMBER_ZERO + (end == prev ? num : 0);
+ if (ppOffset)
+ ppOffset[count] = i;
+ count++;
+ }
+ if (shift) {
+ count -= max;
+ for (const sal_Int32 countEnd = count+shift; count < countEnd; count++) {
+ dst[count] = dst[count + curr];
+ if (ppOffset)
+ ppOffset[count] = ppOffset[count + curr];
+ }
+ max = curr;
+ }
+ NativeToAscii_numberMaker(max, curr, str, i, nCount, dst,
+ count, pOffset, numberChar, multiplierChar);
+ return;
+ } else
+ break;
+ }
+ while (end++ < prev) {
+ dst[count] = NUMBER_ZERO + (end == prev ? num : 0);
+ if (ppOffset)
+ ppOffset[count] = i - 1;
+ count++;
+ }
+}
+
+/// @throws RuntimeException
+OUString NativeToAscii(const OUString& inStr,
+ sal_Int32 nCount, Sequence< sal_Int32 >* pOffset )
+{
+ OUString aRet;
+
+ sal_Int32 strLen = inStr.getLength();
+
+ if (nCount > strLen)
+ nCount = strLen;
+
+ if (nCount > 0) {
+ const sal_Unicode *str = inStr.getStr();
+ std::unique_ptr<sal_Unicode[]> newStr(new sal_Unicode[nCount * MultiplierExponent_7_CJK[0] + 2]);
+ if (pOffset)
+ pOffset->realloc( nCount * MultiplierExponent_7_CJK[0] + 1 );
+ auto ppOffset = pOffset ? pOffset->getArray() : nullptr;
+ sal_Int32 count = 0, index;
+ sal_Int32 i;
+
+ OUString numberChar, multiplierChar, decimalChar, separatorChar;
+ numberChar = OUString(NumberChar[0], 10*NumberChar_Count);
+ multiplierChar = OUString(MultiplierChar_7_CJK[0], ExponentCount_7_CJK*Multiplier_Count);
+ decimalChar = OUString(DecimalChar, NumberChar_Count);
+ std::u16string_view const minusChar(MinusChar, NumberChar_Count);
+ separatorChar = OUString(
+ reinterpret_cast<sal_Unicode *>(SeparatorChar), NumberChar_Count);
+
+ for ( i = 0; i < nCount; i++) {
+ if ((index = multiplierChar.indexOf(str[i])) >= 0) {
+ if (count == 0 || !isNumber(newStr[count-1])) { // add 1 in front of multiplier
+ newStr[count] = NUMBER_ONE;
+ if (ppOffset)
+ ppOffset[count] = i;
+ count++;
+ }
+ index = MultiplierExponent_7_CJK[index % ExponentCount_7_CJK];
+ NativeToAscii_numberMaker(
+ sal::static_int_cast<sal_Int16>( index ), sal::static_int_cast<sal_Int16>( index ),
+ str, i, nCount, newStr.get(), count, pOffset,
+ numberChar, multiplierChar);
+ } else {
+ if ((index = numberChar.indexOf(str[i])) >= 0)
+ newStr[count] = sal::static_int_cast<sal_Unicode>( (index % 10) + NUMBER_ZERO );
+ else if (separatorChar.indexOf(str[i]) >= 0 &&
+ (i < nCount-1 && (numberChar.indexOf(str[i+1]) >= 0 ||
+ multiplierChar.indexOf(str[i+1]) >= 0)))
+ newStr[count] = SeparatorChar[NumberChar_HalfWidth];
+ else if (decimalChar.indexOf(str[i]) >= 0 &&
+ (i < nCount-1 && (numberChar.indexOf(str[i+1]) >= 0 ||
+ multiplierChar.indexOf(str[i+1]) >= 0)))
+ // Only when decimal point is followed by numbers,
+ // it will be convert to ASCII decimal point
+ newStr[count] = DecimalChar[NumberChar_HalfWidth];
+ else if (minusChar.find(str[i]) != std::u16string_view::npos &&
+ (i < nCount-1 && (numberChar.indexOf(str[i+1]) >= 0 ||
+ multiplierChar.indexOf(str[i+1]) >= 0)))
+ // Only when minus is followed by numbers,
+ // it will be convert to ASCII minus sign
+ newStr[count] = MinusChar[NumberChar_HalfWidth];
+ else
+ newStr[count] = str[i];
+ if (ppOffset)
+ ppOffset[count] = i;
+ count++;
+ }
+ }
+
+ if (pOffset) {
+ pOffset->realloc(count);
+ }
+ aRet = OUString(newStr.get(), count);
+ }
+ return aRet;
+}
+
+const Number natnum4[4] = {
+ { NumberChar_Lower_zh, MultiplierChar_6_CJK[Multiplier_Lower_zh], 0,
+ ExponentCount_6_CJK, MultiplierExponent_6_CJK },
+ { NumberChar_Lower_zh, MultiplierChar_6_CJK[Multiplier_Lower_zh_TW], 0,
+ ExponentCount_6_CJK, MultiplierExponent_6_CJK },
+ { NumberChar_Modern_ja, MultiplierChar_7_CJK[Multiplier_Modern_ja], NUMBER_OMIT_ZERO_ONE_67,
+ ExponentCount_7_CJK, MultiplierExponent_7_CJK },
+ { NumberChar_Lower_ko, MultiplierChar_6_CJK[Multiplier_Lower_ko], NUMBER_OMIT_ZERO,
+ ExponentCount_6_CJK, MultiplierExponent_6_CJK },
+};
+
+const Number natnum5[4] = {
+ { NumberChar_Upper_zh, MultiplierChar_6_CJK[Multiplier_Upper_zh], 0,
+ ExponentCount_6_CJK, MultiplierExponent_6_CJK },
+ { NumberChar_Upper_zh_TW, MultiplierChar_6_CJK[Multiplier_Upper_zh_TW], 0,
+ ExponentCount_6_CJK, MultiplierExponent_6_CJK },
+ { NumberChar_Traditional_ja, MultiplierChar_7_CJK[Multiplier_Traditional_ja], NUMBER_OMIT_ZERO_ONE_67,
+ ExponentCount_7_CJK, MultiplierExponent_7_CJK },
+ { NumberChar_Upper_ko, MultiplierChar_6_CJK[Multiplier_Upper_ko], 0,
+ ExponentCount_6_CJK, MultiplierExponent_6_CJK },
+};
+
+const Number natnum6[4] = {
+ { NumberChar_FullWidth, MultiplierChar_6_CJK[Multiplier_Lower_zh], 0,
+ ExponentCount_6_CJK, MultiplierExponent_6_CJK },
+ { NumberChar_FullWidth, MultiplierChar_6_CJK[Multiplier_Lower_zh_TW], 0,
+ ExponentCount_6_CJK, MultiplierExponent_6_CJK },
+ { NumberChar_FullWidth, MultiplierChar_7_CJK[Multiplier_Modern_ja], NUMBER_OMIT_ZERO_ONE_67,
+ ExponentCount_7_CJK, MultiplierExponent_7_CJK },
+ { NumberChar_FullWidth, MultiplierChar_6_CJK[Multiplier_Hangul_ko], NUMBER_OMIT_ZERO,
+ ExponentCount_6_CJK, MultiplierExponent_6_CJK },
+};
+
+const Number natnum7[4] = {
+ { NumberChar_Lower_zh, MultiplierChar_6_CJK[Multiplier_Lower_zh], NUMBER_OMIT_ALL,
+ ExponentCount_6_CJK, MultiplierExponent_6_CJK },
+ { NumberChar_Lower_zh, MultiplierChar_6_CJK[Multiplier_Lower_zh_TW], NUMBER_OMIT_ALL,
+ ExponentCount_6_CJK, MultiplierExponent_6_CJK },
+ { NumberChar_Modern_ja, MultiplierChar_2_CJK[Multiplier_Modern_ja], NUMBER_OMIT_ZERO_ONE,
+ ExponentCount_2_CJK, MultiplierExponent_2_CJK },
+ { NumberChar_Lower_ko, MultiplierChar_6_CJK[Multiplier_Lower_ko], NUMBER_OMIT_ALL,
+ ExponentCount_6_CJK, MultiplierExponent_6_CJK },
+};
+
+const Number natnum8[4] = {
+ { NumberChar_Upper_zh, MultiplierChar_6_CJK[Multiplier_Upper_zh], NUMBER_OMIT_ALL,
+ ExponentCount_6_CJK, MultiplierExponent_6_CJK },
+ { NumberChar_Upper_zh_TW, MultiplierChar_6_CJK[Multiplier_Upper_zh_TW], NUMBER_OMIT_ALL,
+ ExponentCount_6_CJK, MultiplierExponent_6_CJK },
+ { NumberChar_Traditional_ja, MultiplierChar_2_CJK[Multiplier_Traditional_ja], NUMBER_OMIT_ZERO_ONE,
+ ExponentCount_2_CJK, MultiplierExponent_2_CJK },
+ { NumberChar_Upper_ko, MultiplierChar_6_CJK[Multiplier_Upper_ko], NUMBER_OMIT_ALL,
+ ExponentCount_6_CJK, MultiplierExponent_6_CJK },
+};
+
+const Number natnum10 = { NumberChar_Hangul_ko, MultiplierChar_6_CJK[Multiplier_Hangul_ko], NUMBER_OMIT_ZERO,
+ ExponentCount_6_CJK, MultiplierExponent_6_CJK };
+const Number natnum11 = { NumberChar_Hangul_ko, MultiplierChar_6_CJK[Multiplier_Hangul_ko], NUMBER_OMIT_ALL,
+ ExponentCount_6_CJK, MultiplierExponent_6_CJK };
+
+//! ATTENTION: Do not change order of elements!
+//! Append new languages to the end of the list!
+const char *natnum1Locales[] = {
+ "zh_CN",
+ "zh_TW",
+ "ja",
+ "ko",
+ "he",
+ "ar",
+ "th",
+ "hi",
+ "or",
+ "mr",
+ "bn",
+ "pa",
+ "gu",
+ "ta",
+ "te",
+ "kn",
+ "ml",
+ "lo",
+ "bo",
+ "my",
+ "km",
+ "mn",
+ "ne",
+ "dz",
+ "fa",
+ "cu"
+};
+const sal_Int16 nbOfLocale = SAL_N_ELEMENTS(natnum1Locales);
+
+//! ATTENTION: Do not change order of elements!
+//! Number and order must match elements of natnum1Locales!
+const sal_Int16 natnum1[] = {
+ NumberChar_Lower_zh,
+ NumberChar_Lower_zh,
+ NumberChar_Modern_ja,
+ NumberChar_Lower_ko,
+ NumberChar_he,
+ NumberChar_Indic_ar,
+ NumberChar_th,
+ NumberChar_hi,
+ NumberChar_or,
+ NumberChar_mr,
+ NumberChar_bn,
+ NumberChar_pa,
+ NumberChar_gu,
+ NumberChar_ta,
+ NumberChar_te,
+ NumberChar_kn,
+ NumberChar_ml,
+ NumberChar_lo,
+ NumberChar_bo,
+ NumberChar_my,
+ NumberChar_km,
+ NumberChar_mn,
+ NumberChar_ne,
+ NumberChar_dz,
+ NumberChar_EastIndic_ar,
+ NumberChar_cu
+};
+const sal_Int16 sizeof_natnum1 = SAL_N_ELEMENTS(natnum1);
+
+//! ATTENTION: Do not change order of elements!
+//! Order must match first elements of natnum1Locales!
+const sal_Int16 natnum2[] = {
+ NumberChar_Upper_zh,
+ NumberChar_Upper_zh_TW,
+ NumberChar_Traditional_ja,
+ NumberChar_Upper_ko,
+ NumberChar_he
+};
+const sal_Int16 sizeof_natnum2 = SAL_N_ELEMENTS(natnum2);
+
+sal_Int16 getLanguageNumber( const Locale& rLocale)
+{
+ // return zh_TW for TW, HK and MO, return zh_CN for other zh locales.
+ if (rLocale.Language == "zh") return MsLangId::isTraditionalChinese(rLocale) ? 1 : 0;
+
+ for (sal_Int16 i = 2; i < nbOfLocale; i++)
+ if (rLocale.Language.equalsAsciiL(natnum1Locales[i], 2))
+ return i;
+
+ return -1;
+}
+
+struct Separators
+{
+ sal_Unicode DecimalSeparator;
+ sal_Unicode ThousandSeparator;
+ Separators(const Locale& rLocale)
+ {
+ LocaleDataItem aLocaleItem = LocaleDataImpl::get()->getLocaleItem(rLocale);
+ DecimalSeparator = aLocaleItem.decimalSeparator.toChar();
+ ThousandSeparator = aLocaleItem.thousandSeparator.toChar();
+ }
+};
+
+Separators getLocaleSeparators(const Locale& rLocale, const OUString& rLocStr)
+{
+ // Guard the static variable below.
+ std::scoped_lock aGuard(theNatNumMutex);
+ // Maximum a couple hundred of pairs with 4-byte structs - so no need for smart managing
+ static std::unordered_map<OUString, Separators> aLocaleSeparatorsBuf;
+ auto it = aLocaleSeparatorsBuf.find(rLocStr);
+ if (it == aLocaleSeparatorsBuf.end())
+ {
+ it = aLocaleSeparatorsBuf.emplace(rLocStr, Separators(rLocale)).first;
+ }
+ return it->second;
+}
+
+OUString getNumberText(const Locale& rLocale, const OUString& rNumberString,
+ std::u16string_view sNumberTextParams)
+{
+ sal_Int32 i, count = 0;
+ const sal_Int32 len = rNumberString.getLength();
+ const sal_Unicode* src = rNumberString.getStr();
+
+ OUString aLoc = LanguageTag::convertToBcp47(rLocale);
+ Separators aSeparators = getLocaleSeparators(rLocale, aLoc);
+
+ OUStringBuffer sBuf(len);
+ for (i = 0; i < len; i++)
+ {
+ sal_Unicode ch = src[i];
+ if (isNumber(ch) || ch == aSeparators.DecimalSeparator)
+ {
+ ++count;
+ sBuf.append(ch);
+ }
+ else if (ch == aSeparators.ThousandSeparator && count > 0)
+ continue;
+ else if (isMinus(ch) && count == 0)
+ sBuf.append(ch);
+ else
+ break;
+ }
+
+ // Handle also month and day names for NatNum12 date formatting
+ const OUString& rNumberStr = (count == 0) ? rNumberString : sBuf.makeStringAndClear();
+
+ static auto xNumberText
+ = css::linguistic2::NumberText::create(comphelper::getProcessComponentContext());
+
+ // Guard the static variables below.
+ std::scoped_lock aGuard( theNatNumMutex );
+
+ OUString numbertext_prefix;
+ // default "cardinal" gets empty prefix
+ if (!sNumberTextParams.empty() && sNumberTextParams != u"cardinal")
+ numbertext_prefix = OUString::Concat(sNumberTextParams) + " ";
+ // Several hundreds of headings could result typing lags because
+ // of the continuous update of the multiple number names during typing.
+ // We fix this by buffering the result of the conversion.
+ static std::unordered_map<OUString, std::map<OUString, OUString>> aBuff;
+ auto& rItems = aBuff[rNumberStr];
+ auto& rItem = rItems[numbertext_prefix + aLoc];
+ if (rItem.isEmpty())
+ {
+ rItem = xNumberText->getNumberText(numbertext_prefix + rNumberStr, rLocale);
+ // use number at missing number to text conversion
+ if (rItem.isEmpty())
+ rItem = rNumberStr;
+ }
+ OUString sResult = rItem;
+ if (i != 0 && i < len)
+ sResult += rNumberString.subView(i);
+ return sResult;
+}
+}
+
+OUString NativeNumberSupplierService::getNativeNumberString(const OUString& aNumberString, const Locale& rLocale,
+ sal_Int16 nNativeNumberMode,
+ Sequence<sal_Int32>* pOffset,
+ std::u16string_view rNativeNumberParams)
+{
+ if (!isValidNatNumImpl(rLocale, nNativeNumberMode))
+ return aNumberString;
+
+ if (nNativeNumberMode == NativeNumberMode::NATNUM12)
+ {
+ // handle capitalization prefixes "capitalize", "upper", "lower" and "title"
+
+ enum WhichCasing
+ {
+ CAPITALIZE,
+ UPPER,
+ LOWER,
+ TITLE
+ };
+
+ struct CasingEntry
+ {
+ std::u16string_view aLiteral;
+ WhichCasing eCasing;
+ };
+
+ static const CasingEntry Casings[] =
+ {
+ { std::u16string_view(u"capitalize"), CAPITALIZE },
+ { std::u16string_view(u"upper"), UPPER },
+ { std::u16string_view(u"lower"), LOWER },
+ { std::u16string_view(u"title"), TITLE }
+ };
+
+ std::size_t nStripCase = 0;
+ size_t nCasing;
+ for (nCasing = 0; nCasing < SAL_N_ELEMENTS(Casings); ++nCasing)
+ {
+ if (o3tl::starts_with(rNativeNumberParams, Casings[nCasing].aLiteral))
+ {
+ nStripCase = Casings[nCasing].aLiteral.size();
+ break;
+ }
+ }
+
+ if (nStripCase > 0 && (rNativeNumberParams.size() == nStripCase ||
+ rNativeNumberParams[nStripCase++] == ' '))
+ {
+ OUString aStr = getNumberText(rLocale, aNumberString, rNativeNumberParams.substr(nStripCase));
+
+ if (!xCharClass.is())
+ xCharClass = CharacterClassification::create(comphelper::getProcessComponentContext());
+
+ switch (Casings[nCasing].eCasing)
+ {
+ case CAPITALIZE:
+ return xCharClass->toTitle(aStr, 0, 1, aLocale) +
+ (aStr.getLength() > 1 ? aStr.subView(1) : u"");
+ case UPPER:
+ return xCharClass->toUpper(aStr, 0, aStr.getLength(), aLocale);
+ case LOWER:
+ return xCharClass->toLower(aStr, 0, aStr.getLength(), aLocale);
+ case TITLE:
+ {
+ if ( rLocale.Language == "en" )
+ {
+ // title case is common in English, so fix bugs of toTitle():
+ // not "One Dollar *And* *Twenty-two* Cents", but
+ // "One Dollar *and* *Twenty-Two* Cents".
+
+ // Add spaces after hyphens to separate the elements of the
+ // hyphenated compound words temporarily, allowing their
+ // capitalization by toTitle()
+ aStr = aStr.replaceAll("-", "- ");
+ aStr = xCharClass->toTitle(aStr, 0, aStr.getLength(), aLocale);
+ return aStr.replaceAll("- ", "-").replaceAll(" And ", " and ");
+ }
+ else
+ return xCharClass->toTitle(aStr, 0, aStr.getLength(), aLocale);
+ }
+ }
+ }
+ else
+ {
+ return getNumberText(rLocale, aNumberString, rNativeNumberParams);
+ }
+ }
+
+ sal_Int16 langnum = getLanguageNumber(rLocale);
+ if (langnum == -1)
+ return aNumberString;
+
+ const Number *number = nullptr;
+ sal_Int16 num = -1;
+
+ switch (nNativeNumberMode)
+ {
+ case NativeNumberMode::NATNUM0: // Ascii
+ return NativeToAscii(aNumberString, aNumberString.getLength(), pOffset);
+ case NativeNumberMode::NATNUM1: // Char, Lower
+ num = natnum1[langnum];
+ break;
+ case NativeNumberMode::NATNUM2: // Char, Upper
+ num = natnum2[langnum];
+ break;
+ case NativeNumberMode::NATNUM3: // Char, FullWidth
+ num = NumberChar_FullWidth;
+ break;
+ case NativeNumberMode::NATNUM4: // Text, Lower, Long
+ number = &natnum4[langnum];
+ break;
+ case NativeNumberMode::NATNUM5: // Text, Upper, Long
+ number = &natnum5[langnum];
+ break;
+ case NativeNumberMode::NATNUM6: // Text, FullWidth
+ number = &natnum6[langnum];
+ break;
+ case NativeNumberMode::NATNUM7: // Text. Lower, Short
+ number = &natnum7[langnum];
+ break;
+ case NativeNumberMode::NATNUM8: // Text, Upper, Short
+ number = &natnum8[langnum];
+ break;
+ case NativeNumberMode::NATNUM9: // Char, Hangul
+ num = NumberChar_Hangul_ko;
+ break;
+ case NativeNumberMode::NATNUM10: // Text, Hangul, Long
+ number = &natnum10;
+ break;
+ case NativeNumberMode::NATNUM11: // Text, Hangul, Short
+ number = &natnum11;
+ break;
+ default:
+ break;
+ }
+
+ if (number || num >= 0) {
+ if (aLocale.Language != rLocale.Language ||
+ aLocale.Country != rLocale.Country ||
+ aLocale.Variant != rLocale.Variant) {
+ LocaleDataItem item = LocaleDataImpl::get()->getLocaleItem( rLocale );
+ aLocale = rLocale;
+ DecimalChar[NumberChar_HalfWidth]=item.decimalSeparator.toChar();
+ if (DecimalChar[NumberChar_HalfWidth] > 0x7E || DecimalChar[NumberChar_HalfWidth] < 0x21)
+ DecimalChar[NumberChar_FullWidth]=0xFF0E;
+ else
+ DecimalChar[NumberChar_FullWidth]=DecimalChar[NumberChar_HalfWidth]+0xFEE0;
+ SeparatorChar[NumberChar_HalfWidth]=item.thousandSeparator.toChar();
+ if (SeparatorChar[NumberChar_HalfWidth] > 0x7E || SeparatorChar[NumberChar_HalfWidth] < 0x21)
+ SeparatorChar[NumberChar_FullWidth]=0xFF0C;
+ else
+ SeparatorChar[NumberChar_FullWidth]=SeparatorChar[NumberChar_HalfWidth]+0xFEE0;
+ }
+ if (number)
+ return AsciiToNative( aNumberString, aNumberString.getLength(), pOffset, number );
+ else if (num == NumberChar_he)
+ return getHebrewNativeNumberString(aNumberString,
+ nNativeNumberMode == NativeNumberMode::NATNUM2);
+ else if (num == NumberChar_cu)
+ return getCyrillicNativeNumberString(aNumberString);
+ else
+ return AsciiToNativeChar(aNumberString, aNumberString.getLength(), pOffset, num);
+ }
+ else
+ return aNumberString;
+}
+
+OUString SAL_CALL NativeNumberSupplierService::getNativeNumberString(const OUString& aNumberString, const Locale& rLocale,
+ sal_Int16 nNativeNumberMode)
+{
+ return getNativeNumberString(aNumberString, rLocale, nNativeNumberMode, nullptr);
+}
+
+OUString SAL_CALL NativeNumberSupplierService::getNativeNumberStringParams(
+ const OUString& rNumberString, const css::lang::Locale& rLocale, sal_Int16 nNativeNumberMode,
+ const OUString& rNativeNumberParams)
+{
+ return getNativeNumberString(rNumberString, rLocale, nNativeNumberMode, nullptr, rNativeNumberParams);
+}
+
+sal_Unicode NativeNumberSupplierService::getNativeNumberChar( const sal_Unicode inChar, const Locale& rLocale, sal_Int16 nNativeNumberMode )
+{
+ if (nNativeNumberMode == NativeNumberMode::NATNUM0) { // Ascii
+ for (const auto & i : NumberChar)
+ for (sal_Int16 j = 0; j < 10; j++)
+ if (inChar == i[j])
+ return j;
+ return inChar;
+ }
+
+ if (!isNumber(inChar))
+ return inChar;
+
+ if (!isValidNatNumImpl(rLocale, nNativeNumberMode))
+ return inChar;
+
+ sal_Int16 langnum = getLanguageNumber(rLocale);
+ if (langnum == -1)
+ return inChar;
+
+ switch (nNativeNumberMode)
+ {
+ case NativeNumberMode::NATNUM1: // Char, Lower
+ case NativeNumberMode::NATNUM4: // Text, Lower, Long
+ case NativeNumberMode::NATNUM7: // Text. Lower, Short
+ return NumberChar[natnum1[langnum]][inChar - NUMBER_ZERO];
+ case NativeNumberMode::NATNUM2: // Char, Upper
+ case NativeNumberMode::NATNUM5: // Text, Upper, Long
+ case NativeNumberMode::NATNUM8: // Text, Upper, Short
+ return NumberChar[natnum2[langnum]][inChar - NUMBER_ZERO];
+ case NativeNumberMode::NATNUM3: // Char, FullWidth
+ case NativeNumberMode::NATNUM6: // Text, FullWidth
+ return NumberChar[NumberChar_FullWidth][inChar - NUMBER_ZERO];
+ case NativeNumberMode::NATNUM9: // Char, Hangul
+ case NativeNumberMode::NATNUM10: // Text, Hangul, Long
+ case NativeNumberMode::NATNUM11: // Text, Hangul, Short
+ return NumberChar[NumberChar_Hangul_ko][inChar - NUMBER_ZERO];
+ default:
+ break;
+ }
+
+ return inChar;
+}
+
+bool NativeNumberSupplierService::isValidNatNumImpl( const Locale& rLocale, sal_Int16 nNativeNumberMode )
+{
+ sal_Int16 langnum = getLanguageNumber(rLocale);
+
+ switch (nNativeNumberMode) {
+ case NativeNumberMode::NATNUM0: // Ascii
+ case NativeNumberMode::NATNUM3: // Char, FullWidth
+ case NativeNumberMode::NATNUM12: // spell out numbers, dates and money amounts
+ return true;
+ case NativeNumberMode::NATNUM1: // Char, Lower
+ return (langnum >= 0);
+ case NativeNumberMode::NATNUM2: // Char, Upper
+ if (langnum == 4) // Hebrew numbering
+ return true;
+ [[fallthrough]];
+ case NativeNumberMode::NATNUM4: // Text, Lower, Long
+ case NativeNumberMode::NATNUM5: // Text, Upper, Long
+ case NativeNumberMode::NATNUM6: // Text, FullWidth
+ case NativeNumberMode::NATNUM7: // Text. Lower, Short
+ case NativeNumberMode::NATNUM8: // Text, Upper, Short
+ return (langnum >= 0 && langnum < 4); // CJK numbering
+ case NativeNumberMode::NATNUM9: // Char, Hangul
+ case NativeNumberMode::NATNUM10: // Text, Hangul, Long
+ case NativeNumberMode::NATNUM11: // Text, Hangul, Short
+ return (langnum == 3); // Korean numbering
+ }
+ return false;
+}
+
+NativeNumberXmlAttributes SAL_CALL NativeNumberSupplierService::convertToXmlAttributes( const Locale& rLocale, sal_Int16 nNativeNumberMode )
+{
+ static const sal_Int16 attShort = 0;
+ static const sal_Int16 attMedium = 1;
+ static const sal_Int16 attLong = 2;
+ static const char *attType[] = { "short", "medium", "long" };
+
+ sal_Int16 number = NumberChar_HalfWidth, type = attShort;
+
+ sal_Int16 langnum = -1;
+ if (isValidNatNum(rLocale, nNativeNumberMode)) {
+ langnum = getLanguageNumber(rLocale);
+ }
+ if (langnum != -1) {
+ switch (nNativeNumberMode) {
+ case NativeNumberMode::NATNUM0: // Ascii
+ number = NumberChar_HalfWidth;
+ type = attShort;
+ break;
+ case NativeNumberMode::NATNUM1: // Char, Lower
+ number = natnum1[langnum];
+ type = attShort;
+ break;
+ case NativeNumberMode::NATNUM2: // Char, Upper
+ number = natnum2[langnum];
+ type = number == NumberChar_he ? attMedium : attShort;
+ break;
+ case NativeNumberMode::NATNUM3: // Char, FullWidth
+ number = NumberChar_FullWidth;
+ type = attShort;
+ break;
+ case NativeNumberMode::NATNUM4: // Text, Lower, Long
+ number = natnum1[langnum];
+ type = attLong;
+ break;
+ case NativeNumberMode::NATNUM5: // Text, Upper, Long
+ number = natnum2[langnum];
+ type = attLong;
+ break;
+ case NativeNumberMode::NATNUM6: // Text, FullWidth
+ number = NumberChar_FullWidth;
+ type = attLong;
+ break;
+ case NativeNumberMode::NATNUM7: // Text. Lower, Short
+ number = natnum1[langnum];
+ type = attMedium;
+ break;
+ case NativeNumberMode::NATNUM8: // Text, Upper, Short
+ number = natnum2[langnum];
+ type = attMedium;
+ break;
+ case NativeNumberMode::NATNUM9: // Char, Hangul
+ number = NumberChar_Hangul_ko;
+ type = attShort;
+ break;
+ case NativeNumberMode::NATNUM10: // Text, Hangul, Long
+ number = NumberChar_Hangul_ko;
+ type = attLong;
+ break;
+ case NativeNumberMode::NATNUM11: // Text, Hangul, Short
+ number = NumberChar_Hangul_ko;
+ type = attMedium;
+ break;
+ default:
+ break;
+ }
+ }
+ return NativeNumberXmlAttributes(rLocale, OUString(&NumberChar[number][1], 1),
+ OUString::createFromAscii(attType[type]));
+}
+
+static bool natNumIn(sal_Int16 num, const sal_Int16 natnum[], sal_Int16 len)
+{
+ for (sal_Int16 i = 0; i < len; i++)
+ if (natnum[i] == num)
+ return true;
+ return false;
+}
+
+sal_Int16 SAL_CALL NativeNumberSupplierService::convertFromXmlAttributes( const NativeNumberXmlAttributes& aAttr )
+{
+ sal_Unicode numberChar[NumberChar_Count];
+ for (sal_Int16 i = 0; i < NumberChar_Count; i++)
+ numberChar[i] = NumberChar[i][1];
+ OUString number(numberChar, NumberChar_Count);
+
+ sal_Int16 num = sal::static_int_cast<sal_Int16>( number.indexOf(aAttr.Format) );
+
+ if ( aAttr.Style == "short" ) {
+ if (num == NumberChar_FullWidth)
+ return NativeNumberMode::NATNUM3;
+ else if (num == NumberChar_Hangul_ko)
+ return NativeNumberMode::NATNUM9;
+ else if (natNumIn(num, natnum1, sizeof_natnum1))
+ return NativeNumberMode::NATNUM1;
+ else if (natNumIn(num, natnum2, sizeof_natnum2))
+ return NativeNumberMode::NATNUM2;
+ } else if ( aAttr.Style == "medium" ) {
+ if (num == NumberChar_Hangul_ko)
+ return NativeNumberMode::NATNUM11;
+ else if (num == NumberChar_he)
+ return NativeNumberMode::NATNUM2;
+ else if (natNumIn(num, natnum1, sizeof_natnum1))
+ return NativeNumberMode::NATNUM7;
+ else if (natNumIn(num, natnum2, sizeof_natnum2))
+ return NativeNumberMode::NATNUM8;
+ } else if ( aAttr.Style == "long" ) {
+ if (num == NumberChar_FullWidth)
+ return NativeNumberMode::NATNUM6;
+ else if (num == NumberChar_Hangul_ko)
+ return NativeNumberMode::NATNUM10;
+ else if (natNumIn(num, natnum1, sizeof_natnum1))
+ return NativeNumberMode::NATNUM4;
+ else if (natNumIn(num, natnum2, sizeof_natnum2))
+ return NativeNumberMode::NATNUM5;
+ } else {
+ throw RuntimeException();
+ }
+ return NativeNumberMode::NATNUM0;
+}
+
+
+// Following code generates Hebrew Number,
+// see numerical system in the Hebrew Numbering System in following link for details,
+// http://smontagu.org/writings/HebrewNumbers.html
+
+namespace {
+
+struct HebrewNumberChar {
+ sal_Unicode code;
+ sal_Int16 value;
+};
+
+}
+
+HebrewNumberChar const HebrewNumberCharArray[] = {
+ { 0x05ea, 400 },
+ { 0x05ea, 400 },
+ { 0x05e9, 300 },
+ { 0x05e8, 200 },
+ { 0x05e7, 100 },
+ { 0x05e6, 90 },
+ { 0x05e4, 80 },
+ { 0x05e2, 70 },
+ { 0x05e1, 60 },
+ { 0x05e0, 50 },
+ { 0x05de, 40 },
+ { 0x05dc, 30 },
+ { 0x05db, 20 },
+ { 0x05d9, 10 },
+ { 0x05d8, 9 },
+ { 0x05d7, 8 },
+ { 0x05d6, 7 },
+ { 0x05d5, 6 },
+ { 0x05d4, 5 },
+ { 0x05d3, 4 },
+ { 0x05d2, 3 },
+ { 0x05d1, 2 },
+ { 0x05d0, 1 }
+};
+
+const sal_Unicode thousand[] = {0x05d0, 0x05dc, 0x05e3, 0x0};
+const sal_Unicode thousands[] = {0x05d0, 0x05dc, 0x05e4, 0x05d9, 0x0};
+const sal_Unicode thousands_last[] = {0x05d0, 0x05dc, 0x05e4, 0x05d9, 0x05dd, 0x0};
+const sal_Unicode geresh = 0x05f3;
+const sal_Unicode gershayim = 0x05f4;
+
+static void makeHebrewNumber(sal_Int64 value, OUStringBuffer& output, bool isLast, bool useGeresh)
+{
+ sal_Int16 num = sal::static_int_cast<sal_Int16>(value % 1000);
+
+ if (value > 1000) {
+ makeHebrewNumber(value / 1000, output, num != 0, useGeresh);
+ output.append(" ");
+ }
+ if (num == 0) {
+ output.append(value == 1000 ? thousand : isLast ? thousands_last : thousands);
+ } else {
+ sal_Int16 nbOfChar = 0;
+ for (sal_Int32 j = 0; num > 0 && j < sal_Int32(SAL_N_ELEMENTS(HebrewNumberCharArray)); j++) {
+ if (num - HebrewNumberCharArray[j].value >= 0) {
+ nbOfChar++;
+ // https://en.wikipedia.org/wiki/Hebrew_numerals#Key_exceptions
+ // By convention, the numbers 15 and 16 are represented as 9 + 6 and 9 + 7
+ if (num == 15 || num == 16) // substitution for 15 and 16
+ j++;
+ assert(j < sal_Int32(SAL_N_ELEMENTS(HebrewNumberCharArray)));
+ num = sal::static_int_cast<sal_Int16>( num - HebrewNumberCharArray[j].value );
+ output.append(HebrewNumberCharArray[j].code);
+ }
+ }
+ if (useGeresh) {
+ if (nbOfChar > 1) // a number is written as more than one character
+ output.insert(output.getLength() - 1, gershayim);
+ else if (nbOfChar == 1) // a number is written as a single character
+ output.append(geresh);
+ }
+ }
+}
+
+OUString getHebrewNativeNumberString(const OUString& aNumberString, bool useGeresh)
+{
+ sal_Int64 value = 0;
+ sal_Int32 i, count = 0, len = aNumberString.getLength();
+ const sal_Unicode *src = aNumberString.getStr();
+
+ for (i = 0; i < len; i++) {
+ sal_Unicode ch = src[i];
+ if (isNumber(ch)) {
+ if (++count >= 20) // Number is too long, could not be handled.
+ return aNumberString;
+ value = value * 10 + (ch - NUMBER_ZERO);
+ }
+ else if (isSeparator(ch) && count > 0) continue;
+ else if (isMinus(ch) && count == 0) continue;
+ else break;
+ }
+
+ if (value > 0) {
+ OUStringBuffer output(count*2 + 2 + len - i);
+
+ makeHebrewNumber(value, output, true, useGeresh);
+
+ if (i < len)
+ output.append(aNumberString.subView(i));
+
+ return output.makeStringAndClear();
+ }
+ else
+ return aNumberString;
+}
+
+// Support for Cyrillic Numerals
+// See UTN 41 for implementation information
+// http://www.unicode.org/notes/tn41/
+
+const sal_Unicode cyrillicThousandsMark = 0x0482;
+const sal_Unicode cyrillicTitlo = 0x0483;
+const sal_Unicode cyrillicTen = 0x0456;
+
+namespace {
+
+struct CyrillicNumberChar {
+ sal_Unicode code;
+ sal_Int16 value;
+};
+
+}
+
+CyrillicNumberChar const CyrillicNumberCharArray[] = {
+ { 0x0446, 900 },
+ { 0x047f, 800 },
+ { 0x0471, 700 },
+ { 0x0445, 600 },
+ { 0x0444, 500 },
+ { 0x0443, 400 },
+ { 0x0442, 300 },
+ { 0x0441, 200 },
+ { 0x0440, 100 },
+ { 0x0447, 90 },
+ { 0x043f, 80 },
+ { 0x047b, 70 },
+ { 0x046f, 60 },
+ { 0x043d, 50 },
+ { 0x043c, 40 },
+ { 0x043b, 30 },
+ { 0x043a, 20 },
+ { 0x0456, 10 },
+ { 0x0473, 9 },
+ { 0x0438, 8 },
+ { 0x0437, 7 },
+ { 0x0455, 6 },
+ { 0x0454, 5 },
+ { 0x0434, 4 },
+ { 0x0433, 3 },
+ { 0x0432, 2 },
+ { 0x0430, 1 }
+};
+
+static void makeCyrillicNumber(sal_Int64 value, OUStringBuffer& output, bool addTitlo)
+{
+ sal_Int16 num = sal::static_int_cast<sal_Int16>(value % 1000);
+ if (value >= 1000) {
+ output.append(cyrillicThousandsMark);
+ makeCyrillicNumber(value / 1000, output, false);
+ if (value >= 10000 && (value - 10000) % 1000 != 0) {
+ output.append(" ");
+ }
+ if (value % 1000 == 0)
+ addTitlo = false;
+ }
+
+ for (sal_Int32 j = 0; num > 0 && j < sal_Int32(SAL_N_ELEMENTS(CyrillicNumberCharArray)); j++) {
+ if (num < 20 && num > 10) {
+ num -= 10;
+ makeCyrillicNumber(num, output, false);
+ output.append(cyrillicTen);
+ break;
+ }
+
+ if (CyrillicNumberCharArray[j].value <= num) {
+ output.append(CyrillicNumberCharArray[j].code);
+ num = sal::static_int_cast<sal_Int16>( num - CyrillicNumberCharArray[j].value );
+ }
+ }
+
+ if (!addTitlo)
+ return;
+
+ if (output.getLength() == 1) {
+ output.append(cyrillicTitlo);
+ } else if (output.getLength() == 2) {
+ if (value > 800 && value < 900) {
+ output.append(cyrillicTitlo);
+ } else {
+ output.insert(1, cyrillicTitlo);
+ }
+ } else if (output.getLength() > 2) {
+ if (output.indexOf(" ") == output.getLength() - 2) {
+ output.append(cyrillicTitlo);
+ } else {
+ output.insert(output.getLength() - 1, cyrillicTitlo);
+ }
+ }
+}
+
+OUString getCyrillicNativeNumberString(const OUString& aNumberString)
+{
+ sal_Int64 value = 0;
+ sal_Int32 i, count = 0, len = aNumberString.getLength();
+ const sal_Unicode *src = aNumberString.getStr();
+
+ for (i = 0; i < len; i++) {
+ sal_Unicode ch = src[i];
+ if (isNumber(ch)) {
+ if (++count >= 8) // Number is too long, could not be handled.
+ return aNumberString;
+ value = value * 10 + (ch - NUMBER_ZERO);
+ }
+ else if (isSeparator(ch) && count > 0) continue;
+ else if (isMinus(ch) && count == 0) continue;
+ else break;
+ }
+
+ if (value > 0) {
+ OUStringBuffer output(count*2 + 2 + len - i);
+
+ makeCyrillicNumber(value, output, true);
+
+ if (i < len)
+ output.append(aNumberString.subView(i));
+
+ return output.makeStringAndClear();
+ }
+ else
+ return aNumberString;
+}
+
+constexpr OUString implementationName = u"com.sun.star.i18n.NativeNumberSupplier"_ustr;
+
+OUString SAL_CALL NativeNumberSupplierService::getImplementationName()
+{
+ return implementationName;
+}
+
+sal_Bool SAL_CALL
+NativeNumberSupplierService::supportsService(const OUString& rServiceName)
+{
+ return cppu::supportsService(this, rServiceName);
+}
+
+Sequence< OUString > SAL_CALL
+NativeNumberSupplierService::getSupportedServiceNames()
+{
+ return {implementationName, "com.sun.star.i18n.NativeNumberSupplier2"};
+}
+
+}
+
+extern "C" SAL_DLLPUBLIC_EXPORT css::uno::XInterface *
+com_sun_star_i18n_NativeNumberSupplier_get_implementation(
+ css::uno::XComponentContext *,
+ css::uno::Sequence<css::uno::Any> const &)
+{
+ return cppu::acquire(new i18npool::NativeNumberSupplierService());
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */