diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-27 16:51:28 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-27 16:51:28 +0000 |
commit | 940b4d1848e8c70ab7642901a68594e8016caffc (patch) | |
tree | eb72f344ee6c3d9b80a7ecc079ea79e9fba8676d /lingucomponent/source/hyphenator/hyphen | |
parent | Initial commit. (diff) | |
download | libreoffice-upstream.tar.xz libreoffice-upstream.zip |
Adding upstream version 1:7.0.4.upstream/1%7.0.4upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'lingucomponent/source/hyphenator/hyphen')
3 files changed, 998 insertions, 0 deletions
diff --git a/lingucomponent/source/hyphenator/hyphen/hyphen.component b/lingucomponent/source/hyphenator/hyphen/hyphen.component new file mode 100644 index 000000000..ea9298e46 --- /dev/null +++ b/lingucomponent/source/hyphenator/hyphen/hyphen.component @@ -0,0 +1,25 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!-- + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + --> + +<component loader="com.sun.star.loader.SharedLibrary" environment="@CPPU_ENV@" + prefix="hyphen" xmlns="http://openoffice.org/2010/uno-components"> + <implementation name="org.openoffice.lingu.LibHnjHyphenator"> + <service name="com.sun.star.linguistic2.Hyphenator"/> + </implementation> +</component> diff --git a/lingucomponent/source/hyphenator/hyphen/hyphenimp.cxx b/lingucomponent/source/hyphenator/hyphen/hyphenimp.cxx new file mode 100644 index 000000000..d03aaf1df --- /dev/null +++ b/lingucomponent/source/hyphenator/hyphen/hyphenimp.cxx @@ -0,0 +1,840 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#include <com/sun/star/uno/Reference.h> + +#include <comphelper/sequence.hxx> +#include <cppuhelper/factory.hxx> +#include <cppuhelper/supportsservice.hxx> +#include <com/sun/star/registry/XRegistryKey.hpp> +#include <com/sun/star/lang/XSingleServiceFactory.hpp> +#include <com/sun/star/linguistic2/XLinguProperties.hpp> +#include <i18nlangtag/languagetag.hxx> +#include <tools/debug.hxx> +#include <osl/mutex.hxx> +#include <osl/thread.h> + +#include <hyphen.h> +#include "hyphenimp.hxx" + +#include <linguistic/hyphdta.hxx> +#include <rtl/ustring.hxx> +#include <rtl/ustrbuf.hxx> +#include <rtl/textenc.h> +#include <sal/log.hxx> + +#include <linguistic/lngprops.hxx> +#include <linguistic/misc.hxx> +#include <svtools/strings.hrc> +#include <unotools/charclass.hxx> +#include <unotools/pathoptions.hxx> +#include <unotools/useroptions.hxx> +#include <unotools/lingucfg.hxx> +#include <unotools/resmgr.hxx> +#include <osl/file.hxx> + +#include <stdio.h> +#include <string.h> + +#include <cassert> +#include <numeric> +#include <vector> +#include <set> +#include <memory> + +using namespace utl; +using namespace osl; +using namespace com::sun::star; +using namespace com::sun::star::beans; +using namespace com::sun::star::lang; +using namespace com::sun::star::uno; +using namespace com::sun::star::linguistic2; +using namespace linguistic; + +Hyphenator::Hyphenator() : + aEvtListeners ( GetLinguMutex() ) +{ + bDisposing = false; +} + +Hyphenator::~Hyphenator() +{ + for (auto & rInfo : mvDicts) + { + if (rInfo.aPtr) + hnj_hyphen_free(rInfo.aPtr); + } + + if (pPropHelper) + { + pPropHelper->RemoveAsPropListener(); + } +} + +PropertyHelper_Hyphenation& Hyphenator::GetPropHelper_Impl() +{ + if (!pPropHelper) + { + Reference< XLinguProperties > xPropSet = GetLinguProperties(); + + pPropHelper.reset( new PropertyHelper_Hyphenation (static_cast<XHyphenator *>(this), xPropSet ) ); + pPropHelper->AddAsPropListener(); //! after a reference is established + } + return *pPropHelper; +} + +Sequence< Locale > SAL_CALL Hyphenator::getLocales() +{ + MutexGuard aGuard( GetLinguMutex() ); + + // this routine should return the locales supported by the installed + // dictionaries. + if (mvDicts.empty()) + { + SvtLinguConfig aLinguCfg; + + // get list of dictionaries-to-use + // (or better speaking: the list of dictionaries using the + // new configuration entries). + std::vector< SvtLinguConfigDictionaryEntry > aDics; + uno::Sequence< OUString > aFormatList; + aLinguCfg.GetSupportedDictionaryFormatsFor( "Hyphenators", + "org.openoffice.lingu.LibHnjHyphenator", aFormatList ); + for (const auto& rFormat : std::as_const(aFormatList)) + { + std::vector< SvtLinguConfigDictionaryEntry > aTmpDic( + aLinguCfg.GetActiveDictionariesByFormat( rFormat ) ); + aDics.insert( aDics.end(), aTmpDic.begin(), aTmpDic.end() ); + } + + //!! for compatibility with old dictionaries (the ones not using extensions + //!! or new configuration entries, but still using the dictionary.lst file) + //!! Get the list of old style spell checking dictionaries to use... + std::vector< SvtLinguConfigDictionaryEntry > aOldStyleDics( + GetOldStyleDics( "HYPH" ) ); + + // to prefer dictionaries with configuration entries we will only + // use those old style dictionaries that add a language that + // is not yet supported by the list of new style dictionaries + MergeNewStyleDicsAndOldStyleDics( aDics, aOldStyleDics ); + + if (!aDics.empty()) + { + // get supported locales from the dictionaries-to-use... + std::set<OUString> aLocaleNamesSet; + for (auto const& dict : aDics) + { + for (const auto& rLocaleName : dict.aLocaleNames) + { + aLocaleNamesSet.insert( rLocaleName ); + } + } + // ... and add them to the resulting sequence + std::vector<Locale> aLocalesVec; + aLocalesVec.reserve(aLocaleNamesSet.size()); + + std::transform(aLocaleNamesSet.begin(), aLocaleNamesSet.end(), std::back_inserter(aLocalesVec), + [](const OUString& localeName) { return LanguageTag::convertToLocale(localeName); }); + + aSuppLocales = comphelper::containerToSequence(aLocalesVec); + + //! For each dictionary and each locale we need a separate entry. + //! If this results in more than one dictionary per locale than (for now) + //! it is undefined which dictionary gets used. + //! In the future the implementation should support using several dictionaries + //! for one locale. + sal_Int32 numdict = std::accumulate(aDics.begin(), aDics.end(), 0, + [](const sal_Int32 nSum, const SvtLinguConfigDictionaryEntry& dict) { + return nSum + dict.aLocaleNames.getLength(); }); + + // add dictionary information + mvDicts.resize(numdict); + + sal_Int32 k = 0; + for (auto const& dict : aDics) + { + if (dict.aLocaleNames.hasElements() && + dict.aLocations.hasElements()) + { + // currently only one language per dictionary is supported in the actual implementation... + // Thus here we work-around this by adding the same dictionary several times. + // Once for each of its supported locales. + for (const auto& rLocaleName : dict.aLocaleNames) + { + LanguageTag aLanguageTag(rLocaleName); + mvDicts[k].aPtr = nullptr; + mvDicts[k].eEnc = RTL_TEXTENCODING_DONTKNOW; + mvDicts[k].aLoc = aLanguageTag.getLocale(); + mvDicts[k].apCC.reset( new CharClass( aLanguageTag ) ); + // also both files have to be in the same directory and the + // file names must only differ in the extension (.aff/.dic). + // Thus we use the first location only and strip the extension part. + OUString aLocation = dict.aLocations[0]; + sal_Int32 nPos = aLocation.lastIndexOf( '.' ); + aLocation = aLocation.copy( 0, nPos ); + mvDicts[k].aName = aLocation; + + ++k; + } + } + } + DBG_ASSERT( k == numdict, "index mismatch?" ); + } + else + { + // no dictionary found so register no dictionaries + mvDicts.clear(); + aSuppLocales.realloc(0); + } + } + + return aSuppLocales; +} + +sal_Bool SAL_CALL Hyphenator::hasLocale(const Locale& rLocale) +{ + MutexGuard aGuard( GetLinguMutex() ); + + if (!aSuppLocales.hasElements()) + getLocales(); + + return comphelper::findValue(aSuppLocales, rLocale) != -1; +} + +namespace { +bool LoadDictionary(HDInfo& rDict) +{ + OUString DictFN = rDict.aName + ".dic"; + OUString dictpath; + + osl::FileBase::getSystemPathFromFileURL(DictFN, dictpath); + +#if defined(_WIN32) + // hnj_hyphen_load expects UTF-8 encoded paths with \\?\ long path prefix. + OString sTmp = Win_AddLongPathPrefix(OUStringToOString(dictpath, RTL_TEXTENCODING_UTF8)); +#else + OString sTmp(OU2ENC(dictpath, osl_getThreadTextEncoding())); +#endif + HyphenDict *dict = nullptr; + if ((dict = hnj_hyphen_load(sTmp.getStr())) == nullptr) + { + SAL_WARN( + "lingucomponent", + "Couldn't find file " << dictpath); + return false; + } + rDict.aPtr = dict; + rDict.eEnc = getTextEncodingFromCharset(dict->cset); + return true; +} +} + +Reference< XHyphenatedWord > SAL_CALL Hyphenator::hyphenate( const OUString& aWord, + const css::lang::Locale& aLocale, + sal_Int16 nMaxLeading, + const css::uno::Sequence< css::beans::PropertyValue >& aProperties ) +{ + PropertyHelper_Hyphenation& rHelper = GetPropHelper(); + rHelper.SetTmpPropVals(aProperties); + sal_Int16 minTrail = rHelper.GetMinTrailing(); + sal_Int16 minLead = rHelper.GetMinLeading(); + sal_Int16 minLen = rHelper.GetMinWordLength(); + bool bNoHyphenateCaps = rHelper.IsNoHyphenateCaps(); + + HyphenDict *dict = nullptr; + rtl_TextEncoding eEnc = RTL_TEXTENCODING_DONTKNOW; + + Reference< XHyphenatedWord > xRes; + + int k = -1; + for (size_t j = 0; j < mvDicts.size(); ++j) + { + if (aLocale == mvDicts[j].aLoc) + k = j; + } + + // if we have a hyphenation dictionary matching this locale + if (k != -1) + { + int nHyphenationPos = -1; + int nHyphenationPosAlt = -1; + int nHyphenationPosAltHyph = -1; + + // if this dictionary has not been loaded yet do that + if (!mvDicts[k].aPtr) + { + if (!LoadDictionary(mvDicts[k])) + return nullptr; + } + + // otherwise hyphenate the word with that dictionary + dict = mvDicts[k].aPtr; + eEnc = mvDicts[k].eEnc; + CharClass * pCC = mvDicts[k].apCC.get(); + + // Don't hyphenate uppercase words if requested + if (bNoHyphenateCaps && aWord == makeUpperCase(aWord, pCC)) + { + return nullptr; + } + + // we don't want to work with a default text encoding since following incorrect + // results may occur only for specific text and thus may be hard to notice. + // Thus better always make a clean exit here if the text encoding is in question. + // Hopefully something not working at all will raise proper attention quickly. ;-) + DBG_ASSERT( eEnc != RTL_TEXTENCODING_DONTKNOW, "failed to get text encoding! (maybe incorrect encoding string in file)" ); + if (eEnc == RTL_TEXTENCODING_DONTKNOW) + return nullptr; + + CapType ct = capitalType(aWord, pCC); + + // first convert any smart quotes or apostrophes to normal ones + OUStringBuffer rBuf(aWord); + sal_Int32 nc = rBuf.getLength(); + sal_Unicode ch; + for (sal_Int32 ix=0; ix < nc; ix++) + { + ch = rBuf[ix]; + if ((ch == 0x201C) || (ch == 0x201D)) + rBuf[ix] = u'"'; + if ((ch == 0x2018) || (ch == 0x2019)) + rBuf[ix] = u'\''; + } + OUString nWord(rBuf.makeStringAndClear()); + + // now convert word to all lowercase for pattern recognition + OUString nTerm(makeLowerCase(nWord, pCC)); + + // now convert word to needed encoding + OString encWord(OU2ENC(nTerm,eEnc)); + + int wordlen = encWord.getLength(); + std::unique_ptr<char[]> lcword(new char[wordlen + 1]); + std::unique_ptr<char[]> hyphens(new char[wordlen + 5]); + + char ** rep = nullptr; // replacements of discretionary hyphenation + int * pos = nullptr; // array of [hyphenation point] minus [deletion position] + int * cut = nullptr; // length of deletions in original word + + // copy converted word into simple char buffer + strcpy(lcword.get(),encWord.getStr()); + + // now strip off any ending periods + int n = wordlen-1; + while((n >=0) && (lcword[n] == '.')) + n--; + n++; + if (n > 0) + { + const bool bFailed = 0 != hnj_hyphen_hyphenate3( dict, lcword.get(), n, hyphens.get(), nullptr, + &rep, &pos, &cut, minLead, minTrail, + std::max<sal_Int16>(dict->clhmin, std::max<sal_Int16>(dict->clhmin, 2) + std::max(0, minLead - std::max<sal_Int16>(dict->lhmin, 2))), + std::max<sal_Int16>(dict->crhmin, std::max<sal_Int16>(dict->crhmin, 2) + std::max(0, minTrail - std::max<sal_Int16>(dict->rhmin, 2))) ); + if (bFailed) + { + // whoops something did not work + if (rep) + { + for(int j = 0; j < n; j++) + { + if (rep[j]) free(rep[j]); + } + free(rep); + } + if (pos) free(pos); + if (cut) free(cut); + return nullptr; + } + } + + // now backfill hyphens[] for any removed trailing periods + for (int c = n; c < wordlen; c++) hyphens[c] = '0'; + hyphens[wordlen] = '\0'; + + sal_Int32 Leading = GetPosInWordToCheck( aWord, nMaxLeading ); + + for (sal_Int32 i = 0; i < n; i++) + { + int leftrep = 0; + bool hit = (n >= minLen); + if (!rep || !rep[i]) + { + hit = hit && (hyphens[i]&1) && (i < Leading); + hit = hit && (i >= (minLead-1) ); + hit = hit && ((n - i - 1) >= minTrail); + } + else + { + // calculate change character length before hyphenation point signed with '=' + for (char * c = rep[i]; *c && (*c != '='); c++) + { + if (eEnc == RTL_TEXTENCODING_UTF8) + { + if (static_cast<unsigned char>(*c) >> 6 != 2) + leftrep++; + } + else + leftrep++; + } + hit = hit && (hyphens[i]&1) && ((i + leftrep - pos[i]) < Leading); + hit = hit && ((i + leftrep - pos[i]) >= (minLead-1) ); + hit = hit && ((n - i - 1 + sal::static_int_cast< sal_sSize >(strlen(rep[i])) - leftrep - 1) >= minTrail); + } + if (hit) + { + nHyphenationPos = i; + if (rep && rep[i]) + { + nHyphenationPosAlt = i - pos[i]; + nHyphenationPosAltHyph = i + leftrep - pos[i]; + } + } + } + + if (nHyphenationPos == -1) + { + xRes = nullptr; + } + else + { + if (rep && rep[nHyphenationPos]) + { + // remove equal sign + char * s = rep[nHyphenationPos]; + int eq = 0; + for (; *s; s++) + { + if (*s == '=') eq = 1; + if (eq) *s = *(s + 1); + } + OUString repHyphlow(rep[nHyphenationPos], strlen(rep[nHyphenationPos]), eEnc); + OUString repHyph; + switch (ct) + { + case CapType::ALLCAP: + { + repHyph = makeUpperCase(repHyphlow, pCC); + break; + } + case CapType::INITCAP: + { + if (nHyphenationPosAlt == -1) + repHyph = makeInitCap(repHyphlow, pCC); + else + repHyph = repHyphlow; + break; + } + default: + { + repHyph = repHyphlow; + break; + } + } + + // handle shortening + sal_Int16 nPos = static_cast<sal_Int16>((nHyphenationPosAltHyph < nHyphenationPos) ? + nHyphenationPosAltHyph : nHyphenationPos); + // discretionary hyphenation + xRes = HyphenatedWord::CreateHyphenatedWord( aWord, LinguLocaleToLanguage( aLocale ), nPos, + aWord.replaceAt(nHyphenationPosAlt + 1, cut[nHyphenationPos], repHyph), + static_cast<sal_Int16>(nHyphenationPosAltHyph)); + } + else + { + xRes = HyphenatedWord::CreateHyphenatedWord( aWord, LinguLocaleToLanguage( aLocale ), + static_cast<sal_Int16>(nHyphenationPos), aWord, static_cast<sal_Int16>(nHyphenationPos)); + } + } + + if (rep) + { + for(int j = 0; j < n; j++) + { + if (rep[j]) free(rep[j]); + } + free(rep); + } + if (pos) free(pos); + if (cut) free(cut); + return xRes; + } + return nullptr; +} + +Reference < XHyphenatedWord > SAL_CALL Hyphenator::queryAlternativeSpelling( + const OUString& aWord, + const css::lang::Locale& aLocale, + sal_Int16 nIndex, + const css::uno::Sequence< css::beans::PropertyValue >& aProperties ) +{ + // Firstly we allow only one plus character before the hyphen to avoid to miss the right break point: + for (int extrachar = 1; extrachar <= 2; extrachar++) + { + Reference< XHyphenatedWord > xRes = hyphenate(aWord, aLocale, nIndex + 1 + extrachar, aProperties); + if (xRes.is() && xRes->isAlternativeSpelling() && xRes->getHyphenationPos() == nIndex) + return xRes; + } + return nullptr; +} + +Reference< XPossibleHyphens > SAL_CALL Hyphenator::createPossibleHyphens( const OUString& aWord, + const css::lang::Locale& aLocale, + const css::uno::Sequence< css::beans::PropertyValue >& aProperties ) +{ + PropertyHelper_Hyphenation& rHelper = GetPropHelper(); + rHelper.SetTmpPropVals(aProperties); + sal_Int16 minTrail = rHelper.GetMinTrailing(); + sal_Int16 minLead = rHelper.GetMinLeading(); + sal_Int16 minLen = rHelper.GetMinWordLength(); + + // Resolves: fdo#41083 honour MinWordLength in "createPossibleHyphens" as + // well as "hyphenate" + if (aWord.getLength() < minLen) + { + return PossibleHyphens::CreatePossibleHyphens( aWord, LinguLocaleToLanguage( aLocale ), + aWord, Sequence< sal_Int16 >() ); + } + + int k = -1; + for (size_t j = 0; j < mvDicts.size(); ++j) + { + if (aLocale == mvDicts[j].aLoc) + k = j; + } + + // if we have a hyphenation dictionary matching this locale + if (k != -1) + { + HyphenDict *dict = nullptr; + // if this dictionary has not been loaded yet do that + if (!mvDicts[k].aPtr) + { + if (!LoadDictionary(mvDicts[k])) + return nullptr; + } + + // otherwise hyphenate the word with that dictionary + dict = mvDicts[k].aPtr; + rtl_TextEncoding eEnc = mvDicts[k].eEnc; + CharClass* pCC = mvDicts[k].apCC.get(); + + // we don't want to work with a default text encoding since following incorrect + // results may occur only for specific text and thus may be hard to notice. + // Thus better always make a clean exit here if the text encoding is in question. + // Hopefully something not working at all will raise proper attention quickly. ;-) + DBG_ASSERT( eEnc != RTL_TEXTENCODING_DONTKNOW, "failed to get text encoding! (maybe incorrect encoding string in file)" ); + if (eEnc == RTL_TEXTENCODING_DONTKNOW) + return nullptr; + + // first handle smart quotes both single and double + OUStringBuffer rBuf(aWord); + sal_Int32 nc = rBuf.getLength(); + sal_Unicode ch; + for (sal_Int32 ix=0; ix < nc; ix++) + { + ch = rBuf[ix]; + if ((ch == 0x201C) || (ch == 0x201D)) + rBuf[ix] = u'"'; + if ((ch == 0x2018) || (ch == 0x2019)) + rBuf[ix] = u'\''; + } + OUString nWord(rBuf.makeStringAndClear()); + + // now convert word to all lowercase for pattern recognition + OUString nTerm(makeLowerCase(nWord, pCC)); + + // now convert word to needed encoding + OString encWord(OU2ENC(nTerm,eEnc)); + + sal_Int32 wordlen = encWord.getLength(); + std::unique_ptr<char[]> lcword(new char[wordlen+1]); + std::unique_ptr<char[]> hyphens(new char[wordlen+5]); + char ** rep = nullptr; // replacements of discretionary hyphenation + int * pos = nullptr; // array of [hyphenation point] minus [deletion position] + int * cut = nullptr; // length of deletions in original word + + // copy converted word into simple char buffer + strcpy(lcword.get(),encWord.getStr()); + + // first remove any trailing periods + sal_Int32 n = wordlen-1; + while((n >=0) && (lcword[n] == '.')) + n--; + n++; + if (n > 0) + { + const bool bFailed = 0 != hnj_hyphen_hyphenate3(dict, lcword.get(), n, hyphens.get(), nullptr, + &rep, &pos, &cut, minLead, minTrail, + std::max<sal_Int16>(dict->clhmin, std::max<sal_Int16>(dict->clhmin, 2) + std::max(0, minLead - std::max<sal_Int16>(dict->lhmin, 2))), + std::max<sal_Int16>(dict->crhmin, std::max<sal_Int16>(dict->crhmin, 2) + std::max(0, minTrail - std::max<sal_Int16>(dict->rhmin, 2))) ); + if (bFailed) + { + if (rep) + { + for(int j = 0; j < n; j++) + { + if (rep[j]) free(rep[j]); + } + free(rep); + } + if (pos) free(pos); + if (cut) free(cut); + + return nullptr; + } + } + // now backfill hyphens[] for any removed periods + for (sal_Int32 c = n; c < wordlen; c++) + hyphens[c] = '0'; + hyphens[wordlen] = '\0'; + + sal_Int32 nHyphCount = 0; + + for ( sal_Int32 i = 0; i < encWord.getLength(); i++) + { + if (hyphens[i]&1) + nHyphCount++; + } + + Sequence< sal_Int16 > aHyphPos(nHyphCount); + sal_Int16 *pPos = aHyphPos.getArray(); + OUStringBuffer hyphenatedWordBuffer; + nHyphCount = 0; + + for (sal_Int32 i = 0; i < nWord.getLength(); i++) + { + hyphenatedWordBuffer.append(aWord[i]); + // hyphenation position + if (hyphens[i]&1) + { + // linguistic::PossibleHyphens is stuck with + // css::uno::Sequence<sal_Int16> because of + // css.linguistic2.XPossibleHyphens.getHyphenationPositions, so + // any further positions need to be ignored: + assert(i >= SAL_MIN_INT16); + if (i > SAL_MAX_INT16) + { + SAL_WARN( + "lingucomponent", + "hyphen pos " << i << " > SAL_MAX_INT16 in \"" << aWord + << "\""); + continue; + } + pPos[nHyphCount] = i; + hyphenatedWordBuffer.append('='); + nHyphCount++; + } + } + + OUString hyphenatedWord = hyphenatedWordBuffer.makeStringAndClear(); + + Reference< XPossibleHyphens > xRes = PossibleHyphens::CreatePossibleHyphens( + aWord, LinguLocaleToLanguage( aLocale ), hyphenatedWord, aHyphPos); + + if (rep) + { + for(int j = 0; j < n; j++) + { + if (rep[j]) free(rep[j]); + } + free(rep); + } + if (pos) free(pos); + if (cut) free(cut); + + return xRes; + } + + return nullptr; +} + +OUString Hyphenator::makeLowerCase(const OUString& aTerm, CharClass const * pCC) +{ + if (pCC) + return pCC->lowercase(aTerm); + return aTerm; +} + +OUString Hyphenator::makeUpperCase(const OUString& aTerm, CharClass const * pCC) +{ + if (pCC) + return pCC->uppercase(aTerm); + return aTerm; +} + +OUString Hyphenator::makeInitCap(const OUString& aTerm, CharClass const * pCC) +{ + sal_Int32 tlen = aTerm.getLength(); + if (pCC && tlen) + { + OUString bTemp = aTerm.copy(0,1); + if (tlen > 1) + return ( pCC->uppercase(bTemp, 0, 1) + pCC->lowercase(aTerm,1,(tlen-1)) ); + + return pCC->uppercase(bTemp, 0, 1); + } + return aTerm; +} + +/// @throws Exception +static Reference< XInterface > Hyphenator_CreateInstance( + const Reference< XMultiServiceFactory > & /*rSMgr*/ ) +{ + Reference< XInterface > xService = static_cast<cppu::OWeakObject*>(new Hyphenator); + return xService; +} + +sal_Bool SAL_CALL Hyphenator::addLinguServiceEventListener( + const Reference< XLinguServiceEventListener >& rxLstnr ) +{ + MutexGuard aGuard( GetLinguMutex() ); + + bool bRes = false; + if (!bDisposing && rxLstnr.is()) + { + bRes = GetPropHelper().addLinguServiceEventListener( rxLstnr ); + } + return bRes; +} + +sal_Bool SAL_CALL Hyphenator::removeLinguServiceEventListener( + const Reference< XLinguServiceEventListener >& rxLstnr ) +{ + MutexGuard aGuard( GetLinguMutex() ); + + bool bRes = false; + if (!bDisposing && rxLstnr.is()) + { + bRes = GetPropHelper().removeLinguServiceEventListener( rxLstnr ); + } + return bRes; +} + +OUString SAL_CALL Hyphenator::getServiceDisplayName(const Locale& rLocale) +{ + std::locale loc(Translate::Create("svt", LanguageTag(rLocale))); + return Translate::get(STR_DESCRIPTION_LIBHYPHEN, loc); +} + +void SAL_CALL Hyphenator::initialize( const Sequence< Any >& rArguments ) +{ + MutexGuard aGuard( GetLinguMutex() ); + + if (pPropHelper) + return; + + sal_Int32 nLen = rArguments.getLength(); + if (2 == nLen) + { + Reference< XLinguProperties > xPropSet; + rArguments.getConstArray()[0] >>= xPropSet; + // rArguments.getConstArray()[1] >>= xDicList; + + //! Pointer allows for access of the non-UNO functions. + //! And the reference to the UNO-functions while increasing + //! the ref-count and will implicitly free the memory + //! when the object is no longer used. + pPropHelper.reset( new PropertyHelper_Hyphenation( static_cast<XHyphenator *>(this), xPropSet ) ); + pPropHelper->AddAsPropListener(); //! after a reference is established + } + else { + OSL_FAIL( "wrong number of arguments in sequence" ); + } +} + +void SAL_CALL Hyphenator::dispose() +{ + MutexGuard aGuard( GetLinguMutex() ); + + if (!bDisposing) + { + bDisposing = true; + EventObject aEvtObj( static_cast<XHyphenator *>(this) ); + aEvtListeners.disposeAndClear( aEvtObj ); + if (pPropHelper) + { + pPropHelper->RemoveAsPropListener(); + pPropHelper.reset(); + } + } +} + +void SAL_CALL Hyphenator::addEventListener( const Reference< XEventListener >& rxListener ) +{ + MutexGuard aGuard( GetLinguMutex() ); + + if (!bDisposing && rxListener.is()) + aEvtListeners.addInterface( rxListener ); +} + +void SAL_CALL Hyphenator::removeEventListener( const Reference< XEventListener >& rxListener ) +{ + MutexGuard aGuard( GetLinguMutex() ); + + if (!bDisposing && rxListener.is()) + aEvtListeners.removeInterface( rxListener ); +} + +// Service specific part +OUString SAL_CALL Hyphenator::getImplementationName() +{ + return getImplementationName_Static(); +} + +sal_Bool SAL_CALL Hyphenator::supportsService( const OUString& ServiceName ) +{ + return cppu::supportsService(this, ServiceName); +} + +Sequence< OUString > SAL_CALL Hyphenator::getSupportedServiceNames() +{ + return getSupportedServiceNames_Static(); +} + +Sequence< OUString > Hyphenator::getSupportedServiceNames_Static() + throw() +{ + Sequence< OUString > aSNS { SN_HYPHENATOR }; + return aSNS; +} + +extern "C" +{ + +SAL_DLLPUBLIC_EXPORT void * hyphen_component_getFactory( + const char * pImplName, void * pServiceManager, void * /*pRegistryKey*/ ) +{ + void * pRet = nullptr; + if ( Hyphenator::getImplementationName_Static().equalsAscii( pImplName ) ) + { + Reference< XSingleServiceFactory > xFactory = + cppu::createOneInstanceFactory( + static_cast< XMultiServiceFactory * >( pServiceManager ), + Hyphenator::getImplementationName_Static(), + Hyphenator_CreateInstance, + Hyphenator::getSupportedServiceNames_Static()); + // acquire, because we return an interface pointer instead of a reference + xFactory->acquire(); + pRet = xFactory.get(); + } + return pRet; +} + +} +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/lingucomponent/source/hyphenator/hyphen/hyphenimp.hxx b/lingucomponent/source/hyphenator/hyphen/hyphenimp.hxx new file mode 100644 index 000000000..d5103b8e7 --- /dev/null +++ b/lingucomponent/source/hyphenator/hyphen/hyphenimp.hxx @@ -0,0 +1,133 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#ifndef INCLUDED_LINGUCOMPONENT_SOURCE_HYPHENATOR_HYPHEN_HYPHENIMP_HXX +#define INCLUDED_LINGUCOMPONENT_SOURCE_HYPHENATOR_HYPHEN_HYPHENIMP_HXX + +#include <cppuhelper/implbase.hxx> +#include <com/sun/star/lang/XComponent.hpp> +#include <com/sun/star/lang/XInitialization.hpp> +#include <com/sun/star/lang/XServiceDisplayName.hpp> +#include <com/sun/star/beans/XPropertySet.hpp> +#include <com/sun/star/beans/PropertyValues.hpp> +#include <com/sun/star/lang/XServiceInfo.hpp> +#include <com/sun/star/linguistic2/XHyphenator.hpp> +#include <com/sun/star/linguistic2/XLinguServiceEventBroadcaster.hpp> + +#include <unotools/charclass.hxx> + +#include <linguistic/misc.hxx> +#include <linguistic/lngprophelp.hxx> + +#include <lingutil.hxx> +#include <stdio.h> + +#include <hyphen.h> + +using namespace ::com::sun::star::uno; +using namespace ::com::sun::star::beans; +using namespace ::com::sun::star::lang; +using namespace ::com::sun::star::linguistic2; + +struct HDInfo { + HyphenDict * aPtr; + OUString aName; + Locale aLoc; + rtl_TextEncoding eEnc; + std::unique_ptr<CharClass> apCC; +}; + +class Hyphenator : + public cppu::WeakImplHelper + < + XHyphenator, + XLinguServiceEventBroadcaster, + XInitialization, + XComponent, + XServiceInfo, + XServiceDisplayName + > +{ + Sequence< Locale > aSuppLocales; + std::vector< HDInfo > mvDicts; + + ::comphelper::OInterfaceContainerHelper2 aEvtListeners; + std::unique_ptr<linguistic::PropertyHelper_Hyphenation> pPropHelper; + bool bDisposing; + + Hyphenator(const Hyphenator &) = delete; + Hyphenator & operator = (const Hyphenator &) = delete; + + linguistic::PropertyHelper_Hyphenation& GetPropHelper_Impl(); + linguistic::PropertyHelper_Hyphenation& GetPropHelper() + { + return pPropHelper ? *pPropHelper : GetPropHelper_Impl(); + } + +public: + Hyphenator(); + + virtual ~Hyphenator() override; + + // XSupportedLocales (for XHyphenator) + virtual Sequence< Locale > SAL_CALL getLocales() override; + virtual sal_Bool SAL_CALL hasLocale( const Locale& rLocale ) override; + + // XHyphenator + virtual css::uno::Reference< css::linguistic2::XHyphenatedWord > SAL_CALL hyphenate( const OUString& aWord, const css::lang::Locale& aLocale, sal_Int16 nMaxLeading, const css::uno::Sequence< css::beans::PropertyValue >& aProperties ) override; + virtual css::uno::Reference< css::linguistic2::XHyphenatedWord > SAL_CALL queryAlternativeSpelling( const OUString& aWord, const css::lang::Locale& aLocale, sal_Int16 nIndex, const css::uno::Sequence< css::beans::PropertyValue >& aProperties ) override; + virtual css::uno::Reference< css::linguistic2::XPossibleHyphens > SAL_CALL createPossibleHyphens( const OUString& aWord, const css::lang::Locale& aLocale, const css::uno::Sequence< css::beans::PropertyValue >& aProperties ) override; + + // XLinguServiceEventBroadcaster + virtual sal_Bool SAL_CALL addLinguServiceEventListener( const Reference< XLinguServiceEventListener >& rxLstnr ) override; + virtual sal_Bool SAL_CALL removeLinguServiceEventListener( const Reference< XLinguServiceEventListener >& rxLstnr ) override; + + // XServiceDisplayName + virtual OUString SAL_CALL getServiceDisplayName( const Locale& rLocale ) override; + + // XInitialization + virtual void SAL_CALL initialize( const Sequence< Any >& rArguments ) override; + + // XComponent + virtual void SAL_CALL dispose() override; + virtual void SAL_CALL addEventListener( const Reference< XEventListener >& rxListener ) override; + virtual void SAL_CALL removeEventListener( const Reference< XEventListener >& rxListener ) override; + + // XServiceInfo + virtual OUString SAL_CALL getImplementationName() override; + virtual sal_Bool SAL_CALL supportsService( const OUString& rServiceName ) override; + virtual Sequence< OUString > SAL_CALL getSupportedServiceNames() override; + + static inline OUString getImplementationName_Static() throw(); + static Sequence< OUString > getSupportedServiceNames_Static() throw(); + +private: + static OUString makeLowerCase(const OUString&, CharClass const *); + static OUString makeUpperCase(const OUString&, CharClass const *); + static OUString makeInitCap(const OUString&, CharClass const *); +}; + +inline OUString Hyphenator::getImplementationName_Static() throw() +{ + return "org.openoffice.lingu.LibHnjHyphenator"; +} + +#endif + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ |