From 267c6f2ac71f92999e969232431ba04678e7437e Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Mon, 15 Apr 2024 07:54:39 +0200 Subject: Adding upstream version 4:24.2.0. Signed-off-by: Daniel Baumann --- include/i18nutil/calendar.hxx | 23 +++++ include/i18nutil/casefolding.hxx | 91 ++++++++++++++++++ include/i18nutil/i18nutildllapi.h | 34 +++++++ include/i18nutil/oneToOneMapping.hxx | 90 ++++++++++++++++++ include/i18nutil/paper.hxx | 158 ++++++++++++++++++++++++++++++++ include/i18nutil/scripttypedetector.hxx | 36 ++++++++ include/i18nutil/searchopt.hxx | 139 ++++++++++++++++++++++++++++ include/i18nutil/transliteration.hxx | 133 +++++++++++++++++++++++++++ include/i18nutil/unicode.hxx | 128 ++++++++++++++++++++++++++ include/i18nutil/widthfolding.hxx | 57 ++++++++++++ 10 files changed, 889 insertions(+) create mode 100644 include/i18nutil/calendar.hxx create mode 100644 include/i18nutil/casefolding.hxx create mode 100644 include/i18nutil/i18nutildllapi.h create mode 100644 include/i18nutil/oneToOneMapping.hxx create mode 100644 include/i18nutil/paper.hxx create mode 100644 include/i18nutil/scripttypedetector.hxx create mode 100644 include/i18nutil/searchopt.hxx create mode 100644 include/i18nutil/transliteration.hxx create mode 100644 include/i18nutil/unicode.hxx create mode 100644 include/i18nutil/widthfolding.hxx (limited to 'include/i18nutil') diff --git a/include/i18nutil/calendar.hxx b/include/i18nutil/calendar.hxx new file mode 100644 index 0000000000..86a5beacd9 --- /dev/null +++ b/include/i18nutil/calendar.hxx @@ -0,0 +1,23 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; fill-column: 100 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +#pragma once + +namespace i18nutil +{ +/** This number shows month days without Jan and Feb. + * According to the article, it is calculated as (365-31-28)/10 = 30.6, but because + * of a floating point bug, it was used as 30.6001 as a workaround. + * + * "30.6001, 25 year old hack?" + * https://www.hpmuseum.org/cgi-sys/cgiwrap/hpmuseum/archv011.cgi?read=31650 */ +constexpr double monthDaysWithoutJanFeb = (365 - 31 - 28) / 10.0; +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab cinoptions=b1,g0,N-s cinkeys+=0=break: */ diff --git a/include/i18nutil/casefolding.hxx b/include/i18nutil/casefolding.hxx new file mode 100644 index 0000000000..b15a51ae8d --- /dev/null +++ b/include/i18nutil/casefolding.hxx @@ -0,0 +1,91 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ +#ifndef INCLUDED_I18NUTIL_CASEFOLDING_HXX +#define INCLUDED_I18NUTIL_CASEFOLDING_HXX + +#include +#include +#include +#include + +namespace com::sun::star::lang { struct Locale; } + +enum class TransliterationFlags; + +enum class MappingType { + NONE = 0x00, + LowerToUpper = 0x01, // Upper to Lower mapping + UpperToLower = 0x02, // Lower to Upper mapping + ToUpper = 0x04, // to Upper mapping + ToLower = 0x08, // to Lower mapping + ToTitle = 0x10, // to Title mapping + SimpleFolding = 0x20, // Simple Case Folding + FullFolding = 0x40, // Full Case Folding + // for final sigma (where the case-mapping is different for the last letter of a word) + CasedLetterMask = LowerToUpper | UpperToLower | ToUpper | ToLower | ToTitle | SimpleFolding | FullFolding, + NotValue = 0x80, // Value field is an address +}; +namespace o3tl { + template<> struct typed_flags : is_typed_flags {}; +} + +namespace i18nutil { + +struct Value +{ + sal_uInt8 type; + sal_uInt16 value; // value or address, depend on the type +}; + +struct Mapping +{ + sal_uInt8 type; + sal_Int8 nmap; +#define NMAPPINGMAX 3 + sal_Unicode map[NMAPPINGMAX]; +}; // for Unconditional mapping + +struct MappingElement +{ + MappingElement() + : current(0) + { + element.type = element.nmap = 0; + } + Mapping element; + sal_Int8 current; +}; + +class UNLESS_MERGELIBS(I18NUTIL_DLLPUBLIC) casefolding +{ +public: + /// @throws css::uno::RuntimeException + static Mapping getValue(const sal_Unicode* str, sal_Int32 pos, sal_Int32 len, css::lang::Locale const & aLocale, MappingType nMappingType); + /// @throws css::uno::RuntimeException + static const Mapping& getConditionalValue(const sal_Unicode* str, sal_Int32 pos, sal_Int32 len, css::lang::Locale const & aLocale, MappingType nMappingType); + /// @throws css::uno::RuntimeException + static sal_Unicode getNextChar(const sal_Unicode *str, sal_Int32& idx, sal_Int32 len, MappingElement& e, css::lang::Locale const & aLocale, MappingType nMappingtype, TransliterationFlags moduleLoaded); + +}; + +} + +#endif + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/include/i18nutil/i18nutildllapi.h b/include/i18nutil/i18nutildllapi.h new file mode 100644 index 0000000000..9797e87a36 --- /dev/null +++ b/include/i18nutil/i18nutildllapi.h @@ -0,0 +1,34 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#ifndef INCLUDED_I18NUTIL_I18NUTILDLLAPI_H +#define INCLUDED_I18NUTIL_I18NUTILDLLAPI_H + +#include + +#if defined(I18NUTIL_DLLIMPLEMENTATION) +#define I18NUTIL_DLLPUBLIC SAL_DLLPUBLIC_EXPORT +#else +#define I18NUTIL_DLLPUBLIC SAL_DLLPUBLIC_IMPORT +#endif +#define I18NUTIL_DLLPRIVATE SAL_DLLPRIVATE + +#endif // INCLUDED_I18NUTIL_I18NUTILDLLAPI_H + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/include/i18nutil/oneToOneMapping.hxx b/include/i18nutil/oneToOneMapping.hxx new file mode 100644 index 0000000000..bc659b30a7 --- /dev/null +++ b/include/i18nutil/oneToOneMapping.hxx @@ -0,0 +1,90 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ +#ifndef INCLUDED_I18NUTIL_ONETOONEMAPPING_HXX +#define INCLUDED_I18NUTIL_ONETOONEMAPPING_HXX + +#include +#include +#include + +namespace i18nutil { + +struct OneToOneMappingTable_t +{ + sal_Unicode first; + sal_Unicode second; +}; + +typedef sal_Int8 UnicodePairFlag; +struct UnicodePairWithFlag +{ + sal_Unicode first; + sal_Unicode second; + UnicodePairFlag flag; +}; + +class UNLESS_MERGELIBS(I18NUTIL_DLLPUBLIC) oneToOneMapping +{ +private: + oneToOneMapping(const oneToOneMapping&) = delete; + oneToOneMapping& operator=(const oneToOneMapping&) = delete; +public: + oneToOneMapping( OneToOneMappingTable_t const *rpTable, const size_t rnSize, const size_t rnUnitSize = sizeof(OneToOneMappingTable_t) ); + virtual ~oneToOneMapping(); + + // binary search + virtual sal_Unicode find( const sal_Unicode nKey ) const; + + // translator + sal_Unicode operator[] ( const sal_Unicode nKey ) const { return find( nKey ); }; + +protected: + OneToOneMappingTable_t const *mpTable; + size_t mnSize; +}; + +class oneToOneMappingWithFlag final : public oneToOneMapping +{ +private: + oneToOneMappingWithFlag(const oneToOneMappingWithFlag&) = delete; + oneToOneMappingWithFlag& operator=(const oneToOneMappingWithFlag&) = delete; + + friend class widthfolding; + +public: + oneToOneMappingWithFlag( UnicodePairWithFlag const *rpTableWF, const size_t rnSize, const UnicodePairFlag rnFlag ); + virtual ~oneToOneMappingWithFlag() override; + + // make index for fast search + void makeIndex(); + + // index search + virtual sal_Unicode find( const sal_Unicode nKey ) const override; +private: + UnicodePairWithFlag const *mpTableWF; + UnicodePairFlag mnFlag; + std::unique_ptr mpIndex[256]; + bool mbHasIndex; +}; + +} + +#endif // _I18N_TRANSLITERATION_ONETOONEMAPPING_HXX_ + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/include/i18nutil/paper.hxx b/include/i18nutil/paper.hxx new file mode 100644 index 0000000000..b84d7b3af0 --- /dev/null +++ b/include/i18nutil/paper.hxx @@ -0,0 +1,158 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#ifndef INCLUDED_I18NUTIL_PAPER_HXX +#define INCLUDED_I18NUTIL_PAPER_HXX + +#include +#include +#include + +namespace com::sun::star::lang +{ +struct Locale; +} + +//!! The values of the following enumerators must correspond to the array position +//!! of the respective paper size in the file i18nutil/source/utility/paper.cxx +//!! Thus don't reorder the enum values here without changing the code there as well. + +//!! Also the RID_STR_PAPERNAMES array in vcl/inc/print.hrc and the +//!! PaperIndex array in Printer::GetPaperName() in +//!! vcl/source/gdi/print.cxx are parallel to this enum. +enum Paper : unsigned +{ + PAPER_A0, + PAPER_A1, + PAPER_A2, + PAPER_A3, + PAPER_A4, + PAPER_A5, + PAPER_B4_ISO, + PAPER_B5_ISO, + PAPER_LETTER, + PAPER_LEGAL, + PAPER_TABLOID, + PAPER_USER, + PAPER_B6_ISO, + PAPER_ENV_C4, + PAPER_ENV_C5, + PAPER_ENV_C6, + PAPER_ENV_C65, + PAPER_ENV_DL, + PAPER_SLIDE_DIA, + PAPER_SCREEN_4_3, + PAPER_C, + PAPER_D, + PAPER_E, + PAPER_EXECUTIVE, + PAPER_FANFOLD_LEGAL_DE, + PAPER_ENV_MONARCH, + PAPER_ENV_PERSONAL, + PAPER_ENV_9, + PAPER_ENV_10, + PAPER_ENV_11, + PAPER_ENV_12, + PAPER_KAI16, + PAPER_KAI32, + PAPER_KAI32BIG, + PAPER_B4_JIS, + PAPER_B5_JIS, + PAPER_B6_JIS, + PAPER_LEDGER, + PAPER_STATEMENT, + PAPER_QUARTO, + PAPER_10x14, + PAPER_ENV_14, + PAPER_ENV_C3, + PAPER_ENV_ITALY, + PAPER_FANFOLD_US, + PAPER_FANFOLD_DE, + PAPER_POSTCARD_JP, + PAPER_9x11, + PAPER_10x11, + PAPER_15x11, + PAPER_ENV_INVITE, + PAPER_A_PLUS, + PAPER_B_PLUS, + PAPER_LETTER_PLUS, + PAPER_A4_PLUS, + PAPER_DOUBLEPOSTCARD_JP, + PAPER_A6, + PAPER_12x11, + PAPER_A7, + PAPER_A8, + PAPER_A9, + PAPER_A10, + PAPER_B0_ISO, + PAPER_B1_ISO, + PAPER_B2_ISO, + PAPER_B3_ISO, + PAPER_B7_ISO, + PAPER_B8_ISO, + PAPER_B9_ISO, + PAPER_B10_ISO, + PAPER_ENV_C2, + PAPER_ENV_C7, + PAPER_ENV_C8, + PAPER_ARCHA, + PAPER_ARCHB, + PAPER_ARCHC, + PAPER_ARCHD, + PAPER_ARCHE, + PAPER_SCREEN_16_9, + PAPER_SCREEN_16_10, + PAPER_16K_195x270, + PAPER_16K_197x273, + PAPER_WIDESCREEN, //PowerPoint Widescreen + PAPER_ONSCREENSHOW_4_3, //PowerPoint On-screen Show (4:3) + PAPER_ONSCREENSHOW_16_9, //PowerPoint On-screen Show (16:9) + PAPER_ONSCREENSHOW_16_10 //PowerPoint On-screen Show (16:10) +}; + +// defined for 'equal size' test with the implementation array +#define NUM_PAPER_ENTRIES (PAPER_ONSCREENSHOW_16_10 - PAPER_A0 + 1) + +class I18NUTIL_DLLPUBLIC PaperInfo +{ + Paper m_eType; + tools::Long m_nPaperWidth; // width in 100thMM + tools::Long m_nPaperHeight; // height in 100thMM +public: + PaperInfo(Paper eType); + PaperInfo(tools::Long nPaperWidth, tools::Long nPaperHeight); + + Paper getPaper() const { return m_eType; } + tools::Long getWidth() const { return m_nPaperWidth; } + tools::Long getHeight() const { return m_nPaperHeight; } + bool sloppyEqual(const PaperInfo& rOther) const; + void doSloppyFit(bool bAlsoTryRotated = false); + + static PaperInfo getSystemDefaultPaper(); + static PaperInfo getDefaultPaperForLocale(const css::lang::Locale& rLocale); + + static Paper fromPSName(const OString& rName); + static OString toPSName(Paper eType); + + static tools::Long sloppyFitPageDimension(tools::Long nDimension); +}; + +#endif // INCLUDED_I18NUTIL_PAPER_HXX + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/include/i18nutil/scripttypedetector.hxx b/include/i18nutil/scripttypedetector.hxx new file mode 100644 index 0000000000..8e92f46b5f --- /dev/null +++ b/include/i18nutil/scripttypedetector.hxx @@ -0,0 +1,36 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ +#ifndef INCLUDED_I18NUTIL_SCRIPTTYPEDETECTOR_HXX +#define INCLUDED_I18NUTIL_SCRIPTTYPEDETECTOR_HXX + +#include + +#include + +class I18NUTIL_DLLPUBLIC ScriptTypeDetector +{ +public: + static sal_Int32 beginOfScriptDirection( std::u16string_view Text, sal_Int32 nPos, sal_Int16 scriptDirection ); + static sal_Int32 endOfScriptDirection( std::u16string_view Text, sal_Int32 nPos, sal_Int16 scriptDirection ); + static sal_Int16 getScriptDirection( std::u16string_view Text, sal_Int32 nPos, sal_Int16 defaultScriptDirection ); +}; + +#endif + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/include/i18nutil/searchopt.hxx b/include/i18nutil/searchopt.hxx new file mode 100644 index 0000000000..0fd5f98676 --- /dev/null +++ b/include/i18nutil/searchopt.hxx @@ -0,0 +1,139 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ +#ifndef INCLUDED_I18NUTIL_SEARCHOPT_HXX +#define INCLUDED_I18NUTIL_SEARCHOPT_HXX + +#include +#include +#include +#include +#include +#include +#include + +namespace i18nutil +{ + +inline constexpr css::util::SearchAlgorithms downgradeSearchAlgorithms2(sal_Int16 searchAlgorithms2) +{ + switch (searchAlgorithms2) + { + case css::util::SearchAlgorithms2::ABSOLUTE: + return css::util::SearchAlgorithms_ABSOLUTE; + case css::util::SearchAlgorithms2::REGEXP: + return css::util::SearchAlgorithms_REGEXP; + case css::util::SearchAlgorithms2::APPROXIMATE: + return css::util::SearchAlgorithms_APPROXIMATE; + default: // default what? + case css::util::SearchAlgorithms2::WILDCARD: // something valid + return css::util::SearchAlgorithms_ABSOLUTE; + } +} + +inline constexpr sal_Int16 upgradeSearchAlgorithms(css::util::SearchAlgorithms searchAlgorithms) +{ + switch (searchAlgorithms) + { + default: // default what? + case css::util::SearchAlgorithms_ABSOLUTE: + return css::util::SearchAlgorithms2::ABSOLUTE; + case css::util::SearchAlgorithms_REGEXP: + return css::util::SearchAlgorithms2::REGEXP; + case css::util::SearchAlgorithms_APPROXIMATE: + return css::util::SearchAlgorithms2::APPROXIMATE; + } +} + +/** + * This is a wrapper around com::sun::star::util::SearchOptions and SearchOptions2, + * but using the more type-safe TransliterationFlags enum, and without obsolete + * algorithmType, which is superseded by AlgorithmType2. + */ +struct SAL_WARN_UNUSED SearchOptions2 { + sal_Int32 searchFlag; + OUString searchString; + OUString replaceString; + css::lang::Locale Locale; + sal_Int32 changedChars; + sal_Int32 deletedChars; + sal_Int32 insertedChars; + TransliterationFlags transliterateFlags; + + sal_Int16 AlgorithmType2; + sal_Int32 WildcardEscapeCharacter; + + SearchOptions2& operator=(css::util::SearchOptions2 const & other) + { + searchFlag = other.searchFlag; + searchString = other.searchString; + replaceString = other.replaceString; + Locale = other.Locale; + changedChars = other.changedChars; + deletedChars = other.deletedChars; + insertedChars = other.insertedChars; + transliterateFlags = static_cast(other.transliterateFlags); + AlgorithmType2 = other.AlgorithmType2; + WildcardEscapeCharacter = other.WildcardEscapeCharacter; + return *this; + } + + css::util::SearchOptions2 toUnoSearchOptions2() const + { + return css::util::SearchOptions2(downgradeSearchAlgorithms2(AlgorithmType2), searchFlag, + searchString, replaceString, + Locale, + changedChars, deletedChars, insertedChars, + static_cast(transliterateFlags), + AlgorithmType2, WildcardEscapeCharacter); + } + + SearchOptions2() + : searchFlag(0) + , changedChars(0) + , deletedChars(0) + , insertedChars(0) + , transliterateFlags(TransliterationFlags::NONE) + , AlgorithmType2(0) + , WildcardEscapeCharacter(0) + {} + + SearchOptions2(const sal_Int32 searchFlag_, + OUString searchString_, OUString replaceString_, + css::lang::Locale Locale_, + const sal_Int32 changedChars_, const sal_Int32 deletedChars_, const sal_Int32 insertedChars_, + const TransliterationFlags& transliterateFlags_, + const sal_Int16 AlgorithmType2_, const sal_Int32 WildcardEscapeCharacter_) + : searchFlag(searchFlag_) + , searchString(std::move(searchString_)) + , replaceString(std::move(replaceString_)) + , Locale(std::move(Locale_)) + , changedChars(changedChars_) + , deletedChars(deletedChars_) + , insertedChars(insertedChars_) + , transliterateFlags(transliterateFlags_) + , AlgorithmType2(AlgorithmType2_) + , WildcardEscapeCharacter(WildcardEscapeCharacter_) + {} +}; + +}; // namespace + +#endif + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/include/i18nutil/transliteration.hxx b/include/i18nutil/transliteration.hxx new file mode 100644 index 0000000000..7de5759995 --- /dev/null +++ b/include/i18nutil/transliteration.hxx @@ -0,0 +1,133 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ +#ifndef INCLUDED_I18NUTIL_TRANSLITERATION_HXX +#define INCLUDED_I18NUTIL_TRANSLITERATION_HXX + +#include +#include +#include + +/** + * This is a superset type of the com::sun::star::i18n::TransliterationModules and TransliterationModulesExtra, + * with some extra type checking + */ +enum class TransliterationFlags { + NONE = 0, + /// Transliterate a string from upper case to lower case + UPPERCASE_LOWERCASE = int(css::i18n::TransliterationModules_UPPERCASE_LOWERCASE), + /// Transliterate a string from lower case to upper case + LOWERCASE_UPPERCASE = int(css::i18n::TransliterationModules_LOWERCASE_UPPERCASE), + /// Transliterate a string from half width character to full width character + HALFWIDTH_FULLWIDTH = int(css::i18n::TransliterationModules_HALFWIDTH_FULLWIDTH), + /// Transliterate a string from full width character to half width character + FULLWIDTH_HALFWIDTH = int(css::i18n::TransliterationModules_FULLWIDTH_HALFWIDTH), + /// Transliterate a Japanese string from Katakana to Hiragana + KATAKANA_HIRAGANA = int(css::i18n::TransliterationModules_KATAKANA_HIRAGANA), + /// Transliterate a Japanese string from Hiragana to Katakana + HIRAGANA_KATAKANA = int(css::i18n::TransliterationModules_HIRAGANA_KATAKANA), + /// Transliterate an ASCII number string to Simplified Chinese lower case number string in spellout format + NumToTextLower_zh_CN = int(css::i18n::TransliterationModules_NumToTextLower_zh_CN), + /// Transliterate an ASCII number string to Simplified Chinese upper case number string in spellout format + NumToTextUpper_zh_CN = int(css::i18n::TransliterationModules_NumToTextUpper_zh_CN), + /// Transliterate an ASCII number string to Traditional Chinese lower case number string in spellout format + NumToTextLower_zh_TW = int(css::i18n::TransliterationModules_NumToTextLower_zh_TW), + /// Transliterate an ASCII number string to Traditional Chinese upper case number string in spellout format + NumToTextUpper_zh_TW = int(css::i18n::TransliterationModules_NumToTextUpper_zh_TW), + /// Transliterate an ASCII number string to formal Korean Hangul number string in spellout format + NumToTextFormalHangul_ko = int(css::i18n::TransliterationModules_NumToTextFormalHangul_ko), + /// Transliterate an ASCII number string to formal Korean Hanja lower case number string in spellout format + NumToTextFormalLower_ko = int(css::i18n::TransliterationModules_NumToTextFormalLower_ko), + /// Transliterate an ASCII number string to formal Korean Hanja upper case number string in spellout format + NumToTextFormalUpper_ko = int(css::i18n::TransliterationModules_NumToTextFormalUpper_ko), + + /** The first character of the sentence is put in upper case + */ + SENTENCE_CASE = int(css::i18n::TransliterationModulesExtra::SENTENCE_CASE), + + + /** The first character of the word is put in upper case. + * This one is part + */ + TITLE_CASE = int(css::i18n::TransliterationModulesExtra::TITLE_CASE), + + + /** All characters of the word are to change their case from small letters + * to capital letters and vice versa. + */ + TOGGLE_CASE = int(css::i18n::TransliterationModulesExtra::TOGGLE_CASE), + + NON_IGNORE_MASK = int(css::i18n::TransliterationModules_NON_IGNORE_MASK), + IGNORE_MASK = 0x7fffff00, + + /// Ignore case when comparing strings by transliteration service + IGNORE_CASE = int(css::i18n::TransliterationModules_IGNORE_CASE), + /// Ignore Hiragana and Katakana when comparing strings by transliteration service + IGNORE_KANA = int(css::i18n::TransliterationModules_IGNORE_KANA), // ja_JP + /// Ignore full width and half width character when comparing strings by transliteration service + IGNORE_WIDTH = int(css::i18n::TransliterationModules_IGNORE_WIDTH), // ja_JP + /// Ignore Japanese traditional Kanji character in Japanese fuzzy search + ignoreTraditionalKanji_ja_JP = int(css::i18n::TransliterationModules_IgnoreTraditionalKanji_ja_JP), + /// Ignore Japanese traditional Katakana and Hiragana character in Japanese fuzzy search + ignoreTraditionalKana_ja_JP = int(css::i18n::TransliterationModules_IgnoreTraditionalKana_ja_JP), + /// Ignore dash or minus sign in Japanese fuzzy search + ignoreMinusSign_ja_JP = int(css::i18n::TransliterationModules_IgnoreMinusSign_ja_JP), + /// Ignore Hiragana and Katakana iteration mark in Japanese fuzzy search + ignoreIterationMark_ja_JP = int(css::i18n::TransliterationModules_IgnoreIterationMark_ja_JP), + /// Ignore separator punctuations in Japanese fuzzy search + ignoreSeparator_ja_JP = int(css::i18n::TransliterationModules_IgnoreSeparator_ja_JP), + /// Ignore Katakana and Hiragana Zi/Zi and Zu/Zu in Japanese fuzzy search + ignoreZiZu_ja_JP = int(css::i18n::TransliterationModules_IgnoreZiZu_ja_JP), + /// Ignore Katakana and Hiragana Ba/Gua and Ha/Fa in Japanese fuzzy search + ignoreBaFa_ja_JP = int(css::i18n::TransliterationModules_IgnoreBaFa_ja_JP), + /// Ignore Katakana and Hiragana Tsui/Tea/Ti and Dyi/Ji in Japanese fuzzy search + ignoreTiJi_ja_JP = int(css::i18n::TransliterationModules_IgnoreTiJi_ja_JP), + /// Ignore Katakana and Hiragana Hyu/Fyu and Byu/Gyu in Japanese fuzzy search + ignoreHyuByu_ja_JP = int(css::i18n::TransliterationModules_IgnoreHyuByu_ja_JP), + /// Ignore Katakana and Hiragana Se/Sye and Ze/Je in Japanese fuzzy search + ignoreSeZe_ja_JP = int(css::i18n::TransliterationModules_IgnoreSeZe_ja_JP), + /// Ignore Katakana YA/A which follows the character in either I or E row in Japanese fuzzy search + ignoreIandEfollowedByYa_ja_JP = int(css::i18n::TransliterationModules_IgnoreIandEfollowedByYa_ja_JP), + /// Ignore Katakana KI/KU which follows the character in SA column in Japanese fuzzy search + ignoreKiKuFollowedBySa_ja_JP = int(css::i18n::TransliterationModules_IgnoreKiKuFollowedBySa_ja_JP), + /// Ignore Japanese normal and small sized character in Japanese fuzzy search + ignoreSize_ja_JP = int(css::i18n::TransliterationModules_IgnoreSize_ja_JP), + /// Ignore Japanese prolonged sound mark in Japanese fuzzy search + ignoreProlongedSoundMark_ja_JP = int(css::i18n::TransliterationModules_IgnoreProlongedSoundMark_ja_JP), + /// Ignore middle dot in Japanese fuzzy search + ignoreMiddleDot_ja_JP = int(css::i18n::TransliterationModules_IgnoreMiddleDot_ja_JP), + /// Ignore white space characters, include space, TAB, return, etc. in Japanese fuzzy search + ignoreSpace_ja_JP = int(css::i18n::TransliterationModules_IgnoreSpace_ja_JP), + /// transliterate Japanese small sized character to normal sized character + smallToLarge_ja_JP = int(css::i18n::TransliterationModules_SmallToLarge_ja_JP), + /// transliterate Japanese normal sized character to small sized character + largeToSmall_ja_JP = int(css::i18n::TransliterationModules_LargeToSmall_ja_JP), + + /// Transliterate decomposing and removing diacritics, not only CTL, despite its name. + IGNORE_DIACRITICS_CTL = int(css::i18n::TransliterationModulesExtra::IGNORE_DIACRITICS_CTL), + /// Ignore Kashida mark. + IGNORE_KASHIDA_CTL = int(css::i18n::TransliterationModulesExtra::IGNORE_KASHIDA_CTL) +}; +namespace o3tl { + template<> struct typed_flags : is_typed_flags {}; +} + + +#endif + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/include/i18nutil/unicode.hxx b/include/i18nutil/unicode.hxx new file mode 100644 index 0000000000..be08595e0b --- /dev/null +++ b/include/i18nutil/unicode.hxx @@ -0,0 +1,128 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ +#ifndef INCLUDED_I18NUTIL_UNICODE_HXX +#define INCLUDED_I18NUTIL_UNICODE_HXX + +#include +#include +#include +#include +#include +#include + +class LanguageTag; + +struct ScriptTypeList +{ + css::i18n::UnicodeScript from; + css::i18n::UnicodeScript to; + sal_Int16 value; +}; + +class I18NUTIL_DLLPUBLIC unicode +{ +public: + static sal_Int16 getUnicodeType(const sal_uInt32 ch); + static sal_Int16 getUnicodeScriptType(const sal_Unicode ch, const ScriptTypeList* typeList, + sal_Int16 unknownType = 0); + static sal_Unicode getUnicodeScriptStart(css::i18n::UnicodeScript type); + static sal_Unicode getUnicodeScriptEnd(css::i18n::UnicodeScript type); + static sal_uInt8 getUnicodeDirection(const sal_Unicode ch); + static sal_uInt32 GetMirroredChar(sal_uInt32); + static bool isControl(const sal_uInt32 ch); + static bool isAlpha(const sal_uInt32 ch); + static bool isSpace(const sal_uInt32 ch); + static bool isWhiteSpace(const sal_uInt32 ch); + + /** Check for Unicode variation sequence selectors + + @param nCode A Unicode code point. + + @return True if code is a Unicode variation sequence selector. + */ + static bool isVariationSelector(sal_uInt32 nCode) + { + return u_getIntPropertyValue(nCode, UCHAR_VARIATION_SELECTOR) != 0; + } + + //Map an ISO 15924 script code to Latin/Asian/Complex/Weak + static sal_Int16 getScriptClassFromUScriptCode(UScriptCode eScript); + + //Return a language that can be written in a given ISO 15924 script code + static OString getExemplarLanguageForUScriptCode(UScriptCode eScript); + + //Format a number as a percentage according to the rules of the given + //language, e.g. 100 -> "100%" for en-US vs "100 %" for de-DE + static OUString formatPercent(double dNumber, const LanguageTag& rLangTag); + + /** Map a LanguageTag's language ISO 639 code or script ISO 15924 code or + language-script or locale to Latin/Asian/Complex/Weak. If more than one + script is used with a language(-country) tag then the first (default) + script is mapped for that language. + + @return a css::i18n::ScriptType value. + */ + static sal_Int16 getScriptClassFromLanguageTag(const LanguageTag& rLanguageTag); +}; + +/* + Toggle between a character and its Unicode Notation. + -implements the concept found in Microsoft Word's Alt-X + -accepts sequences of up to 8 hex characters and converts into the corresponding Unicode Character + -example: 0000A78c or 2bc + -accepts sequences of up to 256 characters in Unicode notation + -example: U+00000065u+0331u+308 + -handles complex characters (with combining elements) and the all of the Unicode planes. +*/ +class I18NUTIL_DLLPUBLIC ToggleUnicodeCodepoint +{ +private: + OUStringBuffer maInput; + OUStringBuffer maUtf16; + OUStringBuffer maCombining; + bool mbAllowMoreChars = true; + bool mbRequiresU = false; + bool mbIsHexString = false; + +public: + /** + Build an input string of valid UTF16 units to toggle. + -do not call the other functions until the input process is complete + -build string from Right to Left. (Start from the character to the left of the cursor: move left.) + */ + bool AllowMoreInput(sal_Unicode uChar); + + /** + Validates (and potentially modifies) the input string. + -all non-input functions must use this function to first to validate the input string + -additional input may be prevented after this function is called + */ + OUString StringToReplace(); + OUString ReplacementString(); + + /** + While sInput.getLength() returns the number of utf16 units to delete, + this function returns the number of "characters" to delete - potentially a smaller number + */ + sal_uInt32 CharsToDelete(); +}; + +#endif + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/include/i18nutil/widthfolding.hxx b/include/i18nutil/widthfolding.hxx new file mode 100644 index 0000000000..87b909b916 --- /dev/null +++ b/include/i18nutil/widthfolding.hxx @@ -0,0 +1,57 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ +#ifndef INCLUDED_I18NUTIL_WIDTHFOLDING_HXX +#define INCLUDED_I18NUTIL_WIDTHFOLDING_HXX + +#include +#include +#include +#include + +namespace com::sun::star::uno { template class Sequence; } +namespace i18nutil { class oneToOneMapping; } + +namespace i18nutil { + +#define WIDTHFOLDING_DONT_USE_COMBINED_VU 0x01 + +class UNLESS_MERGELIBS(I18NUTIL_DLLPUBLIC) widthfolding +{ +public: + static oneToOneMapping& getfull2halfTable(); + static oneToOneMapping& gethalf2fullTable(); + + static oneToOneMapping& getfull2halfTableForASC(); + static oneToOneMapping& gethalf2fullTableForJIS(); + + static oneToOneMapping& getfullKana2halfKanaTable(); + static oneToOneMapping& gethalfKana2fullKanaTable(); + + static OUString decompose_ja_voiced_sound_marks(const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount, css::uno::Sequence< sal_Int32 >* pOffset); + static sal_Unicode decompose_ja_voiced_sound_marksChar2Char (sal_Unicode inChar); + static OUString compose_ja_voiced_sound_marks(const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount, css::uno::Sequence< sal_Int32 >* pOffset, sal_Int32 nFlags = 0 ); + static sal_Unicode getCompositionChar(sal_Unicode c1, sal_Unicode c2); +}; + + +} + +#endif + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ -- cgit v1.2.3