diff options
Diffstat (limited to 'i18npool/source/characterclassification/cclass_unicode.cxx')
-rw-r--r-- | i18npool/source/characterclassification/cclass_unicode.cxx | 308 |
1 files changed, 308 insertions, 0 deletions
diff --git a/i18npool/source/characterclassification/cclass_unicode.cxx b/i18npool/source/characterclassification/cclass_unicode.cxx new file mode 100644 index 0000000000..e3c27e9bc0 --- /dev/null +++ b/i18npool/source/characterclassification/cclass_unicode.cxx @@ -0,0 +1,308 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#include <cclass_unicode.hxx> +#include <com/sun/star/i18n/KCharacterType.hpp> +#include <com/sun/star/i18n/WordType.hpp> +#include <com/sun/star/lang/WrappedTargetRuntimeException.hpp> +#include <unicode/uchar.h> +#include <cppuhelper/exc_hlp.hxx> +#include <cppuhelper/supportsservice.hxx> +#include <breakiteratorImpl.hxx> +#include <transliteration_body.hxx> +#include <rtl/ref.hxx> +#include <o3tl/string_view.hxx> +#include <utility> + +using namespace ::com::sun::star; +using namespace ::com::sun::star::uno; +using namespace ::com::sun::star::i18n; +using namespace ::com::sun::star::lang; + +namespace i18npool { + +// class cclass_Unicode +// ----------------------------------------------------; + +cclass_Unicode::cclass_Unicode( uno::Reference < XComponentContext > xContext ) : + transToUpper( new Transliteration_casemapping() ), + transToLower( new Transliteration_casemapping() ), + transToTitle( new Transliteration_casemapping() ), + m_xContext(std::move( xContext )), + nStartTypes( 0 ), + nContTypes( 0 ), + cGroupSep( ',' ), + cDecimalSep( '.' ), + cDecimalSepAlt( 0 ) +{ + transToUpper->setMappingType(MappingType::ToUpper); + transToLower->setMappingType(MappingType::ToLower); + transToTitle->setMappingType(MappingType::ToTitle); +} + +cclass_Unicode::~cclass_Unicode() { + destroyParserTable(); +} + + +OUString SAL_CALL +cclass_Unicode::toUpper( const OUString& Text, sal_Int32 nPos, sal_Int32 nCount, const Locale& rLocale ) { + sal_Int32 len = Text.getLength(); + if (nPos >= len) + return OUString(); + if (nCount + nPos > len) + nCount = len - nPos; + + transToUpper->setLocale(rLocale); + return transToUpper->transliterateString2String(Text, nPos, nCount); +} + +OUString SAL_CALL +cclass_Unicode::toLower( const OUString& Text, sal_Int32 nPos, sal_Int32 nCount, const Locale& rLocale ) { + sal_Int32 len = Text.getLength(); + if (nPos >= len) + return OUString(); + if (nCount + nPos > len) + nCount = len - nPos; + + transToLower->setLocale(rLocale); + return transToLower->transliterateString2String(Text, nPos, nCount); +} + +OUString SAL_CALL +cclass_Unicode::toTitle( const OUString& Text, sal_Int32 nPos, sal_Int32 nCount, const Locale& rLocale ) { + try + { + sal_Int32 len = Text.getLength(); + if (nPos >= len) + return OUString(); + if (nCount + nPos > len) + nCount = len - nPos; + + transToTitle->setLocale(rLocale); + rtl_uString* pStr = rtl_uString_alloc(nCount); + sal_Unicode* out = pStr->buffer; + rtl::Reference< BreakIteratorImpl > xBrk(new BreakIteratorImpl(m_xContext)); + Boundary bdy = xBrk->getWordBoundary(Text, nPos, rLocale, + WordType::ANYWORD_IGNOREWHITESPACES, true); + for (sal_Int32 i = nPos; i < nCount + nPos; i++, out++) { + if (i >= bdy.endPos) + bdy = xBrk->nextWord(Text, bdy.endPos, rLocale, + WordType::ANYWORD_IGNOREWHITESPACES); + *out = (i == bdy.startPos) ? + transToTitle->transliterateChar2Char(Text[i]) : Text[i]; + } + *out = 0; + return OUString( pStr, SAL_NO_ACQUIRE ); + } + catch (const RuntimeException&) + { + throw; + } + catch (const Exception& e) + { + uno::Any a(cppu::getCaughtException()); + throw lang::WrappedTargetRuntimeException( + "wrapped " + a.getValueTypeName() + ": " + e.Message, + uno::Reference<uno::XInterface>(), a); + } +} + +sal_Int16 SAL_CALL +cclass_Unicode::getType( const OUString& Text, sal_Int32 nPos ) { + if ( nPos < 0 || Text.getLength() <= nPos ) return 0; + return static_cast<sal_Int16>(u_charType(Text.iterateCodePoints(&nPos, 0))); +} + +sal_Int16 SAL_CALL +cclass_Unicode::getCharacterDirection( const OUString& Text, sal_Int32 nPos ) { + if ( nPos < 0 || Text.getLength() <= nPos ) return 0; + return static_cast<sal_Int16>(u_charDirection(Text.iterateCodePoints(&nPos, 0))); +} + + +sal_Int16 SAL_CALL +cclass_Unicode::getScript( const OUString& Text, sal_Int32 nPos ) { + if ( nPos < 0 || Text.getLength() <= nPos ) return 0; + // ICU Unicode script type UBlockCode starts from 1 for Basic Latin, + // while OO.o enum UnicideScript starts from 0. + // To map ICU UBlockCode to OO.o UnicodeScript, it needs to shift 1. + return static_cast<sal_Int16>(ublock_getCode(Text.iterateCodePoints(&nPos, 0)))-1; +} + + +sal_Int32 +cclass_Unicode::getCharType( std::u16string_view Text, sal_Int32* nPos, sal_Int32 increment) { + using namespace ::com::sun::star::i18n::KCharacterType; + + sal_uInt32 ch = o3tl::iterateCodePoints(Text, nPos, increment); + switch ( u_charType(ch) ) { + // Upper + case U_UPPERCASE_LETTER : + return UPPER|LETTER|PRINTABLE|BASE_FORM; + + // Lower + case U_LOWERCASE_LETTER : + return LOWER|LETTER|PRINTABLE|BASE_FORM; + + // Title + case U_TITLECASE_LETTER : + return TITLE_CASE|LETTER|PRINTABLE|BASE_FORM; + + // Letter + case U_MODIFIER_LETTER : + case U_OTHER_LETTER : + return LETTER|PRINTABLE|BASE_FORM; + + // Digit + case U_DECIMAL_DIGIT_NUMBER: + case U_LETTER_NUMBER: + case U_OTHER_NUMBER: + return DIGIT|PRINTABLE|BASE_FORM; + + // Base + case U_NON_SPACING_MARK: + case U_ENCLOSING_MARK: + case U_COMBINING_SPACING_MARK: + return BASE_FORM|PRINTABLE; + + // Print + case U_SPACE_SEPARATOR: + + case U_DASH_PUNCTUATION: + case U_INITIAL_PUNCTUATION: + case U_FINAL_PUNCTUATION: + case U_CONNECTOR_PUNCTUATION: + case U_OTHER_PUNCTUATION: + + case U_MATH_SYMBOL: + case U_CURRENCY_SYMBOL: + case U_MODIFIER_SYMBOL: + case U_OTHER_SYMBOL: + return PRINTABLE; + + // Control + case U_CONTROL_CHAR: + case U_FORMAT_CHAR: + return CONTROL; + + case U_LINE_SEPARATOR: + case U_PARAGRAPH_SEPARATOR: + return CONTROL|PRINTABLE; + + // for all others + default: + return U_GENERAL_OTHER_TYPES; + } +} + +sal_Int32 SAL_CALL +cclass_Unicode::getCharacterType( const OUString& Text, sal_Int32 nPos, const Locale& /*rLocale*/ ) { + if ( nPos < 0 || Text.getLength() <= nPos ) return 0; + return getCharType(Text, &nPos, 0); + +} + +sal_Int32 SAL_CALL +cclass_Unicode::getStringType( const OUString& Text, sal_Int32 nPos, sal_Int32 nCount, const Locale& /*rLocale*/ ) { + if ( nPos < 0 || Text.getLength() <= nPos ) return 0; + + sal_Int32 result = 0; + + while (nCount > 0 && nPos < Text.getLength()) + { + sal_Int32 nOrigPos = nPos; + result |= getCharType(Text, &nPos, 1); + sal_Int32 nUtf16Units = nPos - nOrigPos; + nCount -= nUtf16Units; + } + + return result; +} + +ParseResult SAL_CALL cclass_Unicode::parseAnyToken( + const OUString& Text, + sal_Int32 nPos, + const Locale& rLocale, + sal_Int32 startCharTokenType, + const OUString& userDefinedCharactersStart, + sal_Int32 contCharTokenType, + const OUString& userDefinedCharactersCont ) +{ + ParseResult r; + if ( Text.getLength() <= nPos ) + return r; + + setupParserTable( rLocale, + startCharTokenType, userDefinedCharactersStart, + contCharTokenType, userDefinedCharactersCont ); + parseText( r, Text, nPos ); + + return r; +} + + +ParseResult SAL_CALL cclass_Unicode::parsePredefinedToken( + sal_Int32 nTokenType, + const OUString& Text, + sal_Int32 nPos, + const Locale& rLocale, + sal_Int32 startCharTokenType, + const OUString& userDefinedCharactersStart, + sal_Int32 contCharTokenType, + const OUString& userDefinedCharactersCont ) +{ + ParseResult r; + if ( Text.getLength() <= nPos ) + return r; + + setupParserTable( rLocale, + startCharTokenType, userDefinedCharactersStart, + contCharTokenType, userDefinedCharactersCont ); + parseText( r, Text, nPos, nTokenType ); + + return r; +} + +OUString SAL_CALL cclass_Unicode::getImplementationName() +{ + return "com.sun.star.i18n.CharacterClassification_Unicode"; +} + +sal_Bool SAL_CALL cclass_Unicode::supportsService(const OUString& rServiceName) +{ + return cppu::supportsService(this, rServiceName); +} + +Sequence< OUString > SAL_CALL cclass_Unicode::getSupportedServiceNames() +{ + return { "com.sun.star.i18n.CharacterClassification_Unicode" }; +} + +} + +extern "C" SAL_DLLPUBLIC_EXPORT css::uno::XInterface * +com_sun_star_i18n_CharacterClassification_Unicode_get_implementation( + css::uno::XComponentContext *context, + css::uno::Sequence<css::uno::Any> const &) +{ + return cppu::acquire(new i18npool::cclass_Unicode(context)); +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ |