diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-27 16:51:28 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-27 16:51:28 +0000 |
commit | 940b4d1848e8c70ab7642901a68594e8016caffc (patch) | |
tree | eb72f344ee6c3d9b80a7ecc079ea79e9fba8676d /lingucomponent/source | |
parent | Initial commit. (diff) | |
download | libreoffice-upstream.tar.xz libreoffice-upstream.zip |
Adding upstream version 1:7.0.4.upstream/1%7.0.4upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'lingucomponent/source')
24 files changed, 5032 insertions, 0 deletions
diff --git a/lingucomponent/source/hyphenator/hyphen/hyphen.component b/lingucomponent/source/hyphenator/hyphen/hyphen.component new file mode 100644 index 000000000..ea9298e46 --- /dev/null +++ b/lingucomponent/source/hyphenator/hyphen/hyphen.component @@ -0,0 +1,25 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!-- + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + --> + +<component loader="com.sun.star.loader.SharedLibrary" environment="@CPPU_ENV@" + prefix="hyphen" xmlns="http://openoffice.org/2010/uno-components"> + <implementation name="org.openoffice.lingu.LibHnjHyphenator"> + <service name="com.sun.star.linguistic2.Hyphenator"/> + </implementation> +</component> diff --git a/lingucomponent/source/hyphenator/hyphen/hyphenimp.cxx b/lingucomponent/source/hyphenator/hyphen/hyphenimp.cxx new file mode 100644 index 000000000..d03aaf1df --- /dev/null +++ b/lingucomponent/source/hyphenator/hyphen/hyphenimp.cxx @@ -0,0 +1,840 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#include <com/sun/star/uno/Reference.h> + +#include <comphelper/sequence.hxx> +#include <cppuhelper/factory.hxx> +#include <cppuhelper/supportsservice.hxx> +#include <com/sun/star/registry/XRegistryKey.hpp> +#include <com/sun/star/lang/XSingleServiceFactory.hpp> +#include <com/sun/star/linguistic2/XLinguProperties.hpp> +#include <i18nlangtag/languagetag.hxx> +#include <tools/debug.hxx> +#include <osl/mutex.hxx> +#include <osl/thread.h> + +#include <hyphen.h> +#include "hyphenimp.hxx" + +#include <linguistic/hyphdta.hxx> +#include <rtl/ustring.hxx> +#include <rtl/ustrbuf.hxx> +#include <rtl/textenc.h> +#include <sal/log.hxx> + +#include <linguistic/lngprops.hxx> +#include <linguistic/misc.hxx> +#include <svtools/strings.hrc> +#include <unotools/charclass.hxx> +#include <unotools/pathoptions.hxx> +#include <unotools/useroptions.hxx> +#include <unotools/lingucfg.hxx> +#include <unotools/resmgr.hxx> +#include <osl/file.hxx> + +#include <stdio.h> +#include <string.h> + +#include <cassert> +#include <numeric> +#include <vector> +#include <set> +#include <memory> + +using namespace utl; +using namespace osl; +using namespace com::sun::star; +using namespace com::sun::star::beans; +using namespace com::sun::star::lang; +using namespace com::sun::star::uno; +using namespace com::sun::star::linguistic2; +using namespace linguistic; + +Hyphenator::Hyphenator() : + aEvtListeners ( GetLinguMutex() ) +{ + bDisposing = false; +} + +Hyphenator::~Hyphenator() +{ + for (auto & rInfo : mvDicts) + { + if (rInfo.aPtr) + hnj_hyphen_free(rInfo.aPtr); + } + + if (pPropHelper) + { + pPropHelper->RemoveAsPropListener(); + } +} + +PropertyHelper_Hyphenation& Hyphenator::GetPropHelper_Impl() +{ + if (!pPropHelper) + { + Reference< XLinguProperties > xPropSet = GetLinguProperties(); + + pPropHelper.reset( new PropertyHelper_Hyphenation (static_cast<XHyphenator *>(this), xPropSet ) ); + pPropHelper->AddAsPropListener(); //! after a reference is established + } + return *pPropHelper; +} + +Sequence< Locale > SAL_CALL Hyphenator::getLocales() +{ + MutexGuard aGuard( GetLinguMutex() ); + + // this routine should return the locales supported by the installed + // dictionaries. + if (mvDicts.empty()) + { + SvtLinguConfig aLinguCfg; + + // get list of dictionaries-to-use + // (or better speaking: the list of dictionaries using the + // new configuration entries). + std::vector< SvtLinguConfigDictionaryEntry > aDics; + uno::Sequence< OUString > aFormatList; + aLinguCfg.GetSupportedDictionaryFormatsFor( "Hyphenators", + "org.openoffice.lingu.LibHnjHyphenator", aFormatList ); + for (const auto& rFormat : std::as_const(aFormatList)) + { + std::vector< SvtLinguConfigDictionaryEntry > aTmpDic( + aLinguCfg.GetActiveDictionariesByFormat( rFormat ) ); + aDics.insert( aDics.end(), aTmpDic.begin(), aTmpDic.end() ); + } + + //!! for compatibility with old dictionaries (the ones not using extensions + //!! or new configuration entries, but still using the dictionary.lst file) + //!! Get the list of old style spell checking dictionaries to use... + std::vector< SvtLinguConfigDictionaryEntry > aOldStyleDics( + GetOldStyleDics( "HYPH" ) ); + + // to prefer dictionaries with configuration entries we will only + // use those old style dictionaries that add a language that + // is not yet supported by the list of new style dictionaries + MergeNewStyleDicsAndOldStyleDics( aDics, aOldStyleDics ); + + if (!aDics.empty()) + { + // get supported locales from the dictionaries-to-use... + std::set<OUString> aLocaleNamesSet; + for (auto const& dict : aDics) + { + for (const auto& rLocaleName : dict.aLocaleNames) + { + aLocaleNamesSet.insert( rLocaleName ); + } + } + // ... and add them to the resulting sequence + std::vector<Locale> aLocalesVec; + aLocalesVec.reserve(aLocaleNamesSet.size()); + + std::transform(aLocaleNamesSet.begin(), aLocaleNamesSet.end(), std::back_inserter(aLocalesVec), + [](const OUString& localeName) { return LanguageTag::convertToLocale(localeName); }); + + aSuppLocales = comphelper::containerToSequence(aLocalesVec); + + //! For each dictionary and each locale we need a separate entry. + //! If this results in more than one dictionary per locale than (for now) + //! it is undefined which dictionary gets used. + //! In the future the implementation should support using several dictionaries + //! for one locale. + sal_Int32 numdict = std::accumulate(aDics.begin(), aDics.end(), 0, + [](const sal_Int32 nSum, const SvtLinguConfigDictionaryEntry& dict) { + return nSum + dict.aLocaleNames.getLength(); }); + + // add dictionary information + mvDicts.resize(numdict); + + sal_Int32 k = 0; + for (auto const& dict : aDics) + { + if (dict.aLocaleNames.hasElements() && + dict.aLocations.hasElements()) + { + // currently only one language per dictionary is supported in the actual implementation... + // Thus here we work-around this by adding the same dictionary several times. + // Once for each of its supported locales. + for (const auto& rLocaleName : dict.aLocaleNames) + { + LanguageTag aLanguageTag(rLocaleName); + mvDicts[k].aPtr = nullptr; + mvDicts[k].eEnc = RTL_TEXTENCODING_DONTKNOW; + mvDicts[k].aLoc = aLanguageTag.getLocale(); + mvDicts[k].apCC.reset( new CharClass( aLanguageTag ) ); + // also both files have to be in the same directory and the + // file names must only differ in the extension (.aff/.dic). + // Thus we use the first location only and strip the extension part. + OUString aLocation = dict.aLocations[0]; + sal_Int32 nPos = aLocation.lastIndexOf( '.' ); + aLocation = aLocation.copy( 0, nPos ); + mvDicts[k].aName = aLocation; + + ++k; + } + } + } + DBG_ASSERT( k == numdict, "index mismatch?" ); + } + else + { + // no dictionary found so register no dictionaries + mvDicts.clear(); + aSuppLocales.realloc(0); + } + } + + return aSuppLocales; +} + +sal_Bool SAL_CALL Hyphenator::hasLocale(const Locale& rLocale) +{ + MutexGuard aGuard( GetLinguMutex() ); + + if (!aSuppLocales.hasElements()) + getLocales(); + + return comphelper::findValue(aSuppLocales, rLocale) != -1; +} + +namespace { +bool LoadDictionary(HDInfo& rDict) +{ + OUString DictFN = rDict.aName + ".dic"; + OUString dictpath; + + osl::FileBase::getSystemPathFromFileURL(DictFN, dictpath); + +#if defined(_WIN32) + // hnj_hyphen_load expects UTF-8 encoded paths with \\?\ long path prefix. + OString sTmp = Win_AddLongPathPrefix(OUStringToOString(dictpath, RTL_TEXTENCODING_UTF8)); +#else + OString sTmp(OU2ENC(dictpath, osl_getThreadTextEncoding())); +#endif + HyphenDict *dict = nullptr; + if ((dict = hnj_hyphen_load(sTmp.getStr())) == nullptr) + { + SAL_WARN( + "lingucomponent", + "Couldn't find file " << dictpath); + return false; + } + rDict.aPtr = dict; + rDict.eEnc = getTextEncodingFromCharset(dict->cset); + return true; +} +} + +Reference< XHyphenatedWord > SAL_CALL Hyphenator::hyphenate( const OUString& aWord, + const css::lang::Locale& aLocale, + sal_Int16 nMaxLeading, + const css::uno::Sequence< css::beans::PropertyValue >& aProperties ) +{ + PropertyHelper_Hyphenation& rHelper = GetPropHelper(); + rHelper.SetTmpPropVals(aProperties); + sal_Int16 minTrail = rHelper.GetMinTrailing(); + sal_Int16 minLead = rHelper.GetMinLeading(); + sal_Int16 minLen = rHelper.GetMinWordLength(); + bool bNoHyphenateCaps = rHelper.IsNoHyphenateCaps(); + + HyphenDict *dict = nullptr; + rtl_TextEncoding eEnc = RTL_TEXTENCODING_DONTKNOW; + + Reference< XHyphenatedWord > xRes; + + int k = -1; + for (size_t j = 0; j < mvDicts.size(); ++j) + { + if (aLocale == mvDicts[j].aLoc) + k = j; + } + + // if we have a hyphenation dictionary matching this locale + if (k != -1) + { + int nHyphenationPos = -1; + int nHyphenationPosAlt = -1; + int nHyphenationPosAltHyph = -1; + + // if this dictionary has not been loaded yet do that + if (!mvDicts[k].aPtr) + { + if (!LoadDictionary(mvDicts[k])) + return nullptr; + } + + // otherwise hyphenate the word with that dictionary + dict = mvDicts[k].aPtr; + eEnc = mvDicts[k].eEnc; + CharClass * pCC = mvDicts[k].apCC.get(); + + // Don't hyphenate uppercase words if requested + if (bNoHyphenateCaps && aWord == makeUpperCase(aWord, pCC)) + { + return nullptr; + } + + // we don't want to work with a default text encoding since following incorrect + // results may occur only for specific text and thus may be hard to notice. + // Thus better always make a clean exit here if the text encoding is in question. + // Hopefully something not working at all will raise proper attention quickly. ;-) + DBG_ASSERT( eEnc != RTL_TEXTENCODING_DONTKNOW, "failed to get text encoding! (maybe incorrect encoding string in file)" ); + if (eEnc == RTL_TEXTENCODING_DONTKNOW) + return nullptr; + + CapType ct = capitalType(aWord, pCC); + + // first convert any smart quotes or apostrophes to normal ones + OUStringBuffer rBuf(aWord); + sal_Int32 nc = rBuf.getLength(); + sal_Unicode ch; + for (sal_Int32 ix=0; ix < nc; ix++) + { + ch = rBuf[ix]; + if ((ch == 0x201C) || (ch == 0x201D)) + rBuf[ix] = u'"'; + if ((ch == 0x2018) || (ch == 0x2019)) + rBuf[ix] = u'\''; + } + OUString nWord(rBuf.makeStringAndClear()); + + // now convert word to all lowercase for pattern recognition + OUString nTerm(makeLowerCase(nWord, pCC)); + + // now convert word to needed encoding + OString encWord(OU2ENC(nTerm,eEnc)); + + int wordlen = encWord.getLength(); + std::unique_ptr<char[]> lcword(new char[wordlen + 1]); + std::unique_ptr<char[]> hyphens(new char[wordlen + 5]); + + char ** rep = nullptr; // replacements of discretionary hyphenation + int * pos = nullptr; // array of [hyphenation point] minus [deletion position] + int * cut = nullptr; // length of deletions in original word + + // copy converted word into simple char buffer + strcpy(lcword.get(),encWord.getStr()); + + // now strip off any ending periods + int n = wordlen-1; + while((n >=0) && (lcword[n] == '.')) + n--; + n++; + if (n > 0) + { + const bool bFailed = 0 != hnj_hyphen_hyphenate3( dict, lcword.get(), n, hyphens.get(), nullptr, + &rep, &pos, &cut, minLead, minTrail, + std::max<sal_Int16>(dict->clhmin, std::max<sal_Int16>(dict->clhmin, 2) + std::max(0, minLead - std::max<sal_Int16>(dict->lhmin, 2))), + std::max<sal_Int16>(dict->crhmin, std::max<sal_Int16>(dict->crhmin, 2) + std::max(0, minTrail - std::max<sal_Int16>(dict->rhmin, 2))) ); + if (bFailed) + { + // whoops something did not work + if (rep) + { + for(int j = 0; j < n; j++) + { + if (rep[j]) free(rep[j]); + } + free(rep); + } + if (pos) free(pos); + if (cut) free(cut); + return nullptr; + } + } + + // now backfill hyphens[] for any removed trailing periods + for (int c = n; c < wordlen; c++) hyphens[c] = '0'; + hyphens[wordlen] = '\0'; + + sal_Int32 Leading = GetPosInWordToCheck( aWord, nMaxLeading ); + + for (sal_Int32 i = 0; i < n; i++) + { + int leftrep = 0; + bool hit = (n >= minLen); + if (!rep || !rep[i]) + { + hit = hit && (hyphens[i]&1) && (i < Leading); + hit = hit && (i >= (minLead-1) ); + hit = hit && ((n - i - 1) >= minTrail); + } + else + { + // calculate change character length before hyphenation point signed with '=' + for (char * c = rep[i]; *c && (*c != '='); c++) + { + if (eEnc == RTL_TEXTENCODING_UTF8) + { + if (static_cast<unsigned char>(*c) >> 6 != 2) + leftrep++; + } + else + leftrep++; + } + hit = hit && (hyphens[i]&1) && ((i + leftrep - pos[i]) < Leading); + hit = hit && ((i + leftrep - pos[i]) >= (minLead-1) ); + hit = hit && ((n - i - 1 + sal::static_int_cast< sal_sSize >(strlen(rep[i])) - leftrep - 1) >= minTrail); + } + if (hit) + { + nHyphenationPos = i; + if (rep && rep[i]) + { + nHyphenationPosAlt = i - pos[i]; + nHyphenationPosAltHyph = i + leftrep - pos[i]; + } + } + } + + if (nHyphenationPos == -1) + { + xRes = nullptr; + } + else + { + if (rep && rep[nHyphenationPos]) + { + // remove equal sign + char * s = rep[nHyphenationPos]; + int eq = 0; + for (; *s; s++) + { + if (*s == '=') eq = 1; + if (eq) *s = *(s + 1); + } + OUString repHyphlow(rep[nHyphenationPos], strlen(rep[nHyphenationPos]), eEnc); + OUString repHyph; + switch (ct) + { + case CapType::ALLCAP: + { + repHyph = makeUpperCase(repHyphlow, pCC); + break; + } + case CapType::INITCAP: + { + if (nHyphenationPosAlt == -1) + repHyph = makeInitCap(repHyphlow, pCC); + else + repHyph = repHyphlow; + break; + } + default: + { + repHyph = repHyphlow; + break; + } + } + + // handle shortening + sal_Int16 nPos = static_cast<sal_Int16>((nHyphenationPosAltHyph < nHyphenationPos) ? + nHyphenationPosAltHyph : nHyphenationPos); + // discretionary hyphenation + xRes = HyphenatedWord::CreateHyphenatedWord( aWord, LinguLocaleToLanguage( aLocale ), nPos, + aWord.replaceAt(nHyphenationPosAlt + 1, cut[nHyphenationPos], repHyph), + static_cast<sal_Int16>(nHyphenationPosAltHyph)); + } + else + { + xRes = HyphenatedWord::CreateHyphenatedWord( aWord, LinguLocaleToLanguage( aLocale ), + static_cast<sal_Int16>(nHyphenationPos), aWord, static_cast<sal_Int16>(nHyphenationPos)); + } + } + + if (rep) + { + for(int j = 0; j < n; j++) + { + if (rep[j]) free(rep[j]); + } + free(rep); + } + if (pos) free(pos); + if (cut) free(cut); + return xRes; + } + return nullptr; +} + +Reference < XHyphenatedWord > SAL_CALL Hyphenator::queryAlternativeSpelling( + const OUString& aWord, + const css::lang::Locale& aLocale, + sal_Int16 nIndex, + const css::uno::Sequence< css::beans::PropertyValue >& aProperties ) +{ + // Firstly we allow only one plus character before the hyphen to avoid to miss the right break point: + for (int extrachar = 1; extrachar <= 2; extrachar++) + { + Reference< XHyphenatedWord > xRes = hyphenate(aWord, aLocale, nIndex + 1 + extrachar, aProperties); + if (xRes.is() && xRes->isAlternativeSpelling() && xRes->getHyphenationPos() == nIndex) + return xRes; + } + return nullptr; +} + +Reference< XPossibleHyphens > SAL_CALL Hyphenator::createPossibleHyphens( const OUString& aWord, + const css::lang::Locale& aLocale, + const css::uno::Sequence< css::beans::PropertyValue >& aProperties ) +{ + PropertyHelper_Hyphenation& rHelper = GetPropHelper(); + rHelper.SetTmpPropVals(aProperties); + sal_Int16 minTrail = rHelper.GetMinTrailing(); + sal_Int16 minLead = rHelper.GetMinLeading(); + sal_Int16 minLen = rHelper.GetMinWordLength(); + + // Resolves: fdo#41083 honour MinWordLength in "createPossibleHyphens" as + // well as "hyphenate" + if (aWord.getLength() < minLen) + { + return PossibleHyphens::CreatePossibleHyphens( aWord, LinguLocaleToLanguage( aLocale ), + aWord, Sequence< sal_Int16 >() ); + } + + int k = -1; + for (size_t j = 0; j < mvDicts.size(); ++j) + { + if (aLocale == mvDicts[j].aLoc) + k = j; + } + + // if we have a hyphenation dictionary matching this locale + if (k != -1) + { + HyphenDict *dict = nullptr; + // if this dictionary has not been loaded yet do that + if (!mvDicts[k].aPtr) + { + if (!LoadDictionary(mvDicts[k])) + return nullptr; + } + + // otherwise hyphenate the word with that dictionary + dict = mvDicts[k].aPtr; + rtl_TextEncoding eEnc = mvDicts[k].eEnc; + CharClass* pCC = mvDicts[k].apCC.get(); + + // we don't want to work with a default text encoding since following incorrect + // results may occur only for specific text and thus may be hard to notice. + // Thus better always make a clean exit here if the text encoding is in question. + // Hopefully something not working at all will raise proper attention quickly. ;-) + DBG_ASSERT( eEnc != RTL_TEXTENCODING_DONTKNOW, "failed to get text encoding! (maybe incorrect encoding string in file)" ); + if (eEnc == RTL_TEXTENCODING_DONTKNOW) + return nullptr; + + // first handle smart quotes both single and double + OUStringBuffer rBuf(aWord); + sal_Int32 nc = rBuf.getLength(); + sal_Unicode ch; + for (sal_Int32 ix=0; ix < nc; ix++) + { + ch = rBuf[ix]; + if ((ch == 0x201C) || (ch == 0x201D)) + rBuf[ix] = u'"'; + if ((ch == 0x2018) || (ch == 0x2019)) + rBuf[ix] = u'\''; + } + OUString nWord(rBuf.makeStringAndClear()); + + // now convert word to all lowercase for pattern recognition + OUString nTerm(makeLowerCase(nWord, pCC)); + + // now convert word to needed encoding + OString encWord(OU2ENC(nTerm,eEnc)); + + sal_Int32 wordlen = encWord.getLength(); + std::unique_ptr<char[]> lcword(new char[wordlen+1]); + std::unique_ptr<char[]> hyphens(new char[wordlen+5]); + char ** rep = nullptr; // replacements of discretionary hyphenation + int * pos = nullptr; // array of [hyphenation point] minus [deletion position] + int * cut = nullptr; // length of deletions in original word + + // copy converted word into simple char buffer + strcpy(lcword.get(),encWord.getStr()); + + // first remove any trailing periods + sal_Int32 n = wordlen-1; + while((n >=0) && (lcword[n] == '.')) + n--; + n++; + if (n > 0) + { + const bool bFailed = 0 != hnj_hyphen_hyphenate3(dict, lcword.get(), n, hyphens.get(), nullptr, + &rep, &pos, &cut, minLead, minTrail, + std::max<sal_Int16>(dict->clhmin, std::max<sal_Int16>(dict->clhmin, 2) + std::max(0, minLead - std::max<sal_Int16>(dict->lhmin, 2))), + std::max<sal_Int16>(dict->crhmin, std::max<sal_Int16>(dict->crhmin, 2) + std::max(0, minTrail - std::max<sal_Int16>(dict->rhmin, 2))) ); + if (bFailed) + { + if (rep) + { + for(int j = 0; j < n; j++) + { + if (rep[j]) free(rep[j]); + } + free(rep); + } + if (pos) free(pos); + if (cut) free(cut); + + return nullptr; + } + } + // now backfill hyphens[] for any removed periods + for (sal_Int32 c = n; c < wordlen; c++) + hyphens[c] = '0'; + hyphens[wordlen] = '\0'; + + sal_Int32 nHyphCount = 0; + + for ( sal_Int32 i = 0; i < encWord.getLength(); i++) + { + if (hyphens[i]&1) + nHyphCount++; + } + + Sequence< sal_Int16 > aHyphPos(nHyphCount); + sal_Int16 *pPos = aHyphPos.getArray(); + OUStringBuffer hyphenatedWordBuffer; + nHyphCount = 0; + + for (sal_Int32 i = 0; i < nWord.getLength(); i++) + { + hyphenatedWordBuffer.append(aWord[i]); + // hyphenation position + if (hyphens[i]&1) + { + // linguistic::PossibleHyphens is stuck with + // css::uno::Sequence<sal_Int16> because of + // css.linguistic2.XPossibleHyphens.getHyphenationPositions, so + // any further positions need to be ignored: + assert(i >= SAL_MIN_INT16); + if (i > SAL_MAX_INT16) + { + SAL_WARN( + "lingucomponent", + "hyphen pos " << i << " > SAL_MAX_INT16 in \"" << aWord + << "\""); + continue; + } + pPos[nHyphCount] = i; + hyphenatedWordBuffer.append('='); + nHyphCount++; + } + } + + OUString hyphenatedWord = hyphenatedWordBuffer.makeStringAndClear(); + + Reference< XPossibleHyphens > xRes = PossibleHyphens::CreatePossibleHyphens( + aWord, LinguLocaleToLanguage( aLocale ), hyphenatedWord, aHyphPos); + + if (rep) + { + for(int j = 0; j < n; j++) + { + if (rep[j]) free(rep[j]); + } + free(rep); + } + if (pos) free(pos); + if (cut) free(cut); + + return xRes; + } + + return nullptr; +} + +OUString Hyphenator::makeLowerCase(const OUString& aTerm, CharClass const * pCC) +{ + if (pCC) + return pCC->lowercase(aTerm); + return aTerm; +} + +OUString Hyphenator::makeUpperCase(const OUString& aTerm, CharClass const * pCC) +{ + if (pCC) + return pCC->uppercase(aTerm); + return aTerm; +} + +OUString Hyphenator::makeInitCap(const OUString& aTerm, CharClass const * pCC) +{ + sal_Int32 tlen = aTerm.getLength(); + if (pCC && tlen) + { + OUString bTemp = aTerm.copy(0,1); + if (tlen > 1) + return ( pCC->uppercase(bTemp, 0, 1) + pCC->lowercase(aTerm,1,(tlen-1)) ); + + return pCC->uppercase(bTemp, 0, 1); + } + return aTerm; +} + +/// @throws Exception +static Reference< XInterface > Hyphenator_CreateInstance( + const Reference< XMultiServiceFactory > & /*rSMgr*/ ) +{ + Reference< XInterface > xService = static_cast<cppu::OWeakObject*>(new Hyphenator); + return xService; +} + +sal_Bool SAL_CALL Hyphenator::addLinguServiceEventListener( + const Reference< XLinguServiceEventListener >& rxLstnr ) +{ + MutexGuard aGuard( GetLinguMutex() ); + + bool bRes = false; + if (!bDisposing && rxLstnr.is()) + { + bRes = GetPropHelper().addLinguServiceEventListener( rxLstnr ); + } + return bRes; +} + +sal_Bool SAL_CALL Hyphenator::removeLinguServiceEventListener( + const Reference< XLinguServiceEventListener >& rxLstnr ) +{ + MutexGuard aGuard( GetLinguMutex() ); + + bool bRes = false; + if (!bDisposing && rxLstnr.is()) + { + bRes = GetPropHelper().removeLinguServiceEventListener( rxLstnr ); + } + return bRes; +} + +OUString SAL_CALL Hyphenator::getServiceDisplayName(const Locale& rLocale) +{ + std::locale loc(Translate::Create("svt", LanguageTag(rLocale))); + return Translate::get(STR_DESCRIPTION_LIBHYPHEN, loc); +} + +void SAL_CALL Hyphenator::initialize( const Sequence< Any >& rArguments ) +{ + MutexGuard aGuard( GetLinguMutex() ); + + if (pPropHelper) + return; + + sal_Int32 nLen = rArguments.getLength(); + if (2 == nLen) + { + Reference< XLinguProperties > xPropSet; + rArguments.getConstArray()[0] >>= xPropSet; + // rArguments.getConstArray()[1] >>= xDicList; + + //! Pointer allows for access of the non-UNO functions. + //! And the reference to the UNO-functions while increasing + //! the ref-count and will implicitly free the memory + //! when the object is no longer used. + pPropHelper.reset( new PropertyHelper_Hyphenation( static_cast<XHyphenator *>(this), xPropSet ) ); + pPropHelper->AddAsPropListener(); //! after a reference is established + } + else { + OSL_FAIL( "wrong number of arguments in sequence" ); + } +} + +void SAL_CALL Hyphenator::dispose() +{ + MutexGuard aGuard( GetLinguMutex() ); + + if (!bDisposing) + { + bDisposing = true; + EventObject aEvtObj( static_cast<XHyphenator *>(this) ); + aEvtListeners.disposeAndClear( aEvtObj ); + if (pPropHelper) + { + pPropHelper->RemoveAsPropListener(); + pPropHelper.reset(); + } + } +} + +void SAL_CALL Hyphenator::addEventListener( const Reference< XEventListener >& rxListener ) +{ + MutexGuard aGuard( GetLinguMutex() ); + + if (!bDisposing && rxListener.is()) + aEvtListeners.addInterface( rxListener ); +} + +void SAL_CALL Hyphenator::removeEventListener( const Reference< XEventListener >& rxListener ) +{ + MutexGuard aGuard( GetLinguMutex() ); + + if (!bDisposing && rxListener.is()) + aEvtListeners.removeInterface( rxListener ); +} + +// Service specific part +OUString SAL_CALL Hyphenator::getImplementationName() +{ + return getImplementationName_Static(); +} + +sal_Bool SAL_CALL Hyphenator::supportsService( const OUString& ServiceName ) +{ + return cppu::supportsService(this, ServiceName); +} + +Sequence< OUString > SAL_CALL Hyphenator::getSupportedServiceNames() +{ + return getSupportedServiceNames_Static(); +} + +Sequence< OUString > Hyphenator::getSupportedServiceNames_Static() + throw() +{ + Sequence< OUString > aSNS { SN_HYPHENATOR }; + return aSNS; +} + +extern "C" +{ + +SAL_DLLPUBLIC_EXPORT void * hyphen_component_getFactory( + const char * pImplName, void * pServiceManager, void * /*pRegistryKey*/ ) +{ + void * pRet = nullptr; + if ( Hyphenator::getImplementationName_Static().equalsAscii( pImplName ) ) + { + Reference< XSingleServiceFactory > xFactory = + cppu::createOneInstanceFactory( + static_cast< XMultiServiceFactory * >( pServiceManager ), + Hyphenator::getImplementationName_Static(), + Hyphenator_CreateInstance, + Hyphenator::getSupportedServiceNames_Static()); + // acquire, because we return an interface pointer instead of a reference + xFactory->acquire(); + pRet = xFactory.get(); + } + return pRet; +} + +} +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/lingucomponent/source/hyphenator/hyphen/hyphenimp.hxx b/lingucomponent/source/hyphenator/hyphen/hyphenimp.hxx new file mode 100644 index 000000000..d5103b8e7 --- /dev/null +++ b/lingucomponent/source/hyphenator/hyphen/hyphenimp.hxx @@ -0,0 +1,133 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#ifndef INCLUDED_LINGUCOMPONENT_SOURCE_HYPHENATOR_HYPHEN_HYPHENIMP_HXX +#define INCLUDED_LINGUCOMPONENT_SOURCE_HYPHENATOR_HYPHEN_HYPHENIMP_HXX + +#include <cppuhelper/implbase.hxx> +#include <com/sun/star/lang/XComponent.hpp> +#include <com/sun/star/lang/XInitialization.hpp> +#include <com/sun/star/lang/XServiceDisplayName.hpp> +#include <com/sun/star/beans/XPropertySet.hpp> +#include <com/sun/star/beans/PropertyValues.hpp> +#include <com/sun/star/lang/XServiceInfo.hpp> +#include <com/sun/star/linguistic2/XHyphenator.hpp> +#include <com/sun/star/linguistic2/XLinguServiceEventBroadcaster.hpp> + +#include <unotools/charclass.hxx> + +#include <linguistic/misc.hxx> +#include <linguistic/lngprophelp.hxx> + +#include <lingutil.hxx> +#include <stdio.h> + +#include <hyphen.h> + +using namespace ::com::sun::star::uno; +using namespace ::com::sun::star::beans; +using namespace ::com::sun::star::lang; +using namespace ::com::sun::star::linguistic2; + +struct HDInfo { + HyphenDict * aPtr; + OUString aName; + Locale aLoc; + rtl_TextEncoding eEnc; + std::unique_ptr<CharClass> apCC; +}; + +class Hyphenator : + public cppu::WeakImplHelper + < + XHyphenator, + XLinguServiceEventBroadcaster, + XInitialization, + XComponent, + XServiceInfo, + XServiceDisplayName + > +{ + Sequence< Locale > aSuppLocales; + std::vector< HDInfo > mvDicts; + + ::comphelper::OInterfaceContainerHelper2 aEvtListeners; + std::unique_ptr<linguistic::PropertyHelper_Hyphenation> pPropHelper; + bool bDisposing; + + Hyphenator(const Hyphenator &) = delete; + Hyphenator & operator = (const Hyphenator &) = delete; + + linguistic::PropertyHelper_Hyphenation& GetPropHelper_Impl(); + linguistic::PropertyHelper_Hyphenation& GetPropHelper() + { + return pPropHelper ? *pPropHelper : GetPropHelper_Impl(); + } + +public: + Hyphenator(); + + virtual ~Hyphenator() override; + + // XSupportedLocales (for XHyphenator) + virtual Sequence< Locale > SAL_CALL getLocales() override; + virtual sal_Bool SAL_CALL hasLocale( const Locale& rLocale ) override; + + // XHyphenator + virtual css::uno::Reference< css::linguistic2::XHyphenatedWord > SAL_CALL hyphenate( const OUString& aWord, const css::lang::Locale& aLocale, sal_Int16 nMaxLeading, const css::uno::Sequence< css::beans::PropertyValue >& aProperties ) override; + virtual css::uno::Reference< css::linguistic2::XHyphenatedWord > SAL_CALL queryAlternativeSpelling( const OUString& aWord, const css::lang::Locale& aLocale, sal_Int16 nIndex, const css::uno::Sequence< css::beans::PropertyValue >& aProperties ) override; + virtual css::uno::Reference< css::linguistic2::XPossibleHyphens > SAL_CALL createPossibleHyphens( const OUString& aWord, const css::lang::Locale& aLocale, const css::uno::Sequence< css::beans::PropertyValue >& aProperties ) override; + + // XLinguServiceEventBroadcaster + virtual sal_Bool SAL_CALL addLinguServiceEventListener( const Reference< XLinguServiceEventListener >& rxLstnr ) override; + virtual sal_Bool SAL_CALL removeLinguServiceEventListener( const Reference< XLinguServiceEventListener >& rxLstnr ) override; + + // XServiceDisplayName + virtual OUString SAL_CALL getServiceDisplayName( const Locale& rLocale ) override; + + // XInitialization + virtual void SAL_CALL initialize( const Sequence< Any >& rArguments ) override; + + // XComponent + virtual void SAL_CALL dispose() override; + virtual void SAL_CALL addEventListener( const Reference< XEventListener >& rxListener ) override; + virtual void SAL_CALL removeEventListener( const Reference< XEventListener >& rxListener ) override; + + // XServiceInfo + virtual OUString SAL_CALL getImplementationName() override; + virtual sal_Bool SAL_CALL supportsService( const OUString& rServiceName ) override; + virtual Sequence< OUString > SAL_CALL getSupportedServiceNames() override; + + static inline OUString getImplementationName_Static() throw(); + static Sequence< OUString > getSupportedServiceNames_Static() throw(); + +private: + static OUString makeLowerCase(const OUString&, CharClass const *); + static OUString makeUpperCase(const OUString&, CharClass const *); + static OUString makeInitCap(const OUString&, CharClass const *); +}; + +inline OUString Hyphenator::getImplementationName_Static() throw() +{ + return "org.openoffice.lingu.LibHnjHyphenator"; +} + +#endif + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/lingucomponent/source/languageguessing/guess.cxx b/lingucomponent/source/languageguessing/guess.cxx new file mode 100644 index 000000000..45700ff7d --- /dev/null +++ b/lingucomponent/source/languageguessing/guess.cxx @@ -0,0 +1,103 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#include <sal/config.h> + +#include <cassert> +#include <iostream> +#include <string.h> + +#ifdef SYSTEM_LIBEXTTEXTCAT +#include <libexttextcat/textcat.h> +#else +#include <textcat.h> +#endif + +#include "guess.hxx" + +/* Old textcat.h versions defined bad spelled constants. */ +#ifndef TEXTCAT_RESULT_UNKNOWN_STR +#define TEXTCAT_RESULT_UNKNOWN_STR _TEXTCAT_RESULT_UNKOWN +#endif + +#ifndef TEXTCAT_RESULT_SHORT_STR +#define TEXTCAT_RESULT_SHORT_STR _TEXTCAT_RESULT_SHORT +#endif + +using namespace std; + +Guess::Guess() + : language_str(DEFAULT_LANGUAGE) + , country_str(DEFAULT_COUNTRY) +{ +} + +/* +* this use a char * string to build the guess object +* a string like those is made as : [language-country-encoding]... +* +*/ +Guess::Guess(const char * guess_str) + : language_str(DEFAULT_LANGUAGE) + , country_str(DEFAULT_COUNTRY) +{ + //if the guess is not like "UNKNOWN" or "SHORT", go into the brackets + if(strcmp(guess_str + 1, TEXTCAT_RESULT_UNKNOWN_STR) == 0 + || strcmp(guess_str + 1, TEXTCAT_RESULT_SHORT_STR) == 0) + return; + + // From how this ctor is called from SimpleGuesser::GuessLanguage and + // SimpleGuesser::GetManagedLanguages in + // lingucomponent/source/languageguessing/simpleguesser.cxx, guess_str must start with "[": + assert(guess_str[0] == GUESS_SEPARATOR_OPEN); + auto const start = guess_str + 1; + // Only look at the prefix of guess_str, delimited by the next "]" or "[" or end-of-string; + // split it into at most three segments separated by "-" (where excess occurrences of "-" + // would become part of the third segment), like "en-US-utf8"; the first segment denotes the + // language; if there are three segments, the second denotes the country and the third the + // encoding; otherwise, the second segment, if any (e.g., in "haw-utf8"), denotes the + // encoding: + char const * dash1 = nullptr; + char const * dash2 = nullptr; + auto p = start; + for (;; ++p) { + auto const c = *p; + if (c == '\0' || c == GUESS_SEPARATOR_OPEN || c == GUESS_SEPARATOR_CLOSE) { + break; + } + if (c == GUESS_SEPARATOR_SEP) { + if (dash1 == nullptr) { + dash1 = p; + } else { + dash2 = p; + // The encoding is ignored, so we can stop as soon as we found the second "-": + break; + } + } + } + auto const langLen = (dash1 == nullptr ? p : dash1) - start; + if (langLen != 0) { // if not we use the default value + language_str.assign(start, langLen); + } + if (dash2 != nullptr) { + country_str.assign(dash1 + 1, dash2 - (dash1 + 1)); + } +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/lingucomponent/source/languageguessing/guess.hxx b/lingucomponent/source/languageguessing/guess.hxx new file mode 100644 index 000000000..e68d852a5 --- /dev/null +++ b/lingucomponent/source/languageguessing/guess.hxx @@ -0,0 +1,56 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ +#ifndef INCLUDED_LINGUCOMPONENT_SOURCE_LANGUAGEGUESSING_GUESS_HXX +#define INCLUDED_LINGUCOMPONENT_SOURCE_LANGUAGEGUESSING_GUESS_HXX + +#define GUESS_SEPARATOR_OPEN '[' +#define GUESS_SEPARATOR_CLOSE ']' +#define GUESS_SEPARATOR_SEP '-' +#define DEFAULT_LANGUAGE "" +#define DEFAULT_COUNTRY "" +#define DEFAULT_ENCODING "" + +#include <string> + +using namespace std; + +class Guess final { + public: + + /** + * Default init + */ + Guess(); + + /** + * Init from a string like [en-UK-utf8] and the rank + */ + Guess(const char * guess_str); + + const string& GetLanguage() const { return language_str;} + const string& GetCountry() const { return country_str;} + + private: + string language_str; + string country_str; +}; + +#endif + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/lingucomponent/source/languageguessing/guesslang.component b/lingucomponent/source/languageguessing/guesslang.component new file mode 100644 index 000000000..8e2f2c001 --- /dev/null +++ b/lingucomponent/source/languageguessing/guesslang.component @@ -0,0 +1,25 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!-- + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + --> + +<component loader="com.sun.star.loader.SharedLibrary" environment="@CPPU_ENV@" + prefix="guesslang" xmlns="http://openoffice.org/2010/uno-components"> + <implementation name="com.sun.star.lingu2.LanguageGuessing"> + <service name="com.sun.star.linguistic2.LanguageGuessing"/> + </implementation> +</component> diff --git a/lingucomponent/source/languageguessing/guesslang.cxx b/lingucomponent/source/languageguessing/guesslang.cxx new file mode 100644 index 000000000..7e9d1999b --- /dev/null +++ b/lingucomponent/source/languageguessing/guesslang.cxx @@ -0,0 +1,360 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#include <iostream> + +#include <osl/file.hxx> +#include <tools/debug.hxx> + +#include <sal/config.h> +#include <cppuhelper/factory.hxx> +#include <cppuhelper/implementationentry.hxx> +#include <cppuhelper/implbase.hxx> +#include <cppuhelper/supportsservice.hxx> + +#include "simpleguesser.hxx" +#include "guess.hxx" + +#include <com/sun/star/lang/IllegalArgumentException.hpp> +#include <com/sun/star/lang/XServiceInfo.hpp> +#include <com/sun/star/linguistic2/XLanguageGuessing.hpp> +#include <unotools/pathoptions.hxx> +#include <osl/thread.h> + +#include <sal/macros.h> + +#ifdef SYSTEM_LIBEXTTEXTCAT +#include <libexttextcat/textcat.h> +#else +#include <textcat.h> +#endif + +using namespace ::osl; +using namespace ::cppu; +using namespace ::com::sun::star; +using namespace ::com::sun::star::uno; +using namespace ::com::sun::star::lang; +using namespace ::com::sun::star::linguistic2; + +#define SERVICENAME "com.sun.star.linguistic2.LanguageGuessing" +#define IMPLNAME "com.sun.star.lingu2.LanguageGuessing" + +static Sequence< OUString > getSupportedServiceNames_LangGuess_Impl() +{ + Sequence<OUString> names { SERVICENAME }; + return names; +} + +static OUString getImplementationName_LangGuess_Impl() +{ + return IMPLNAME; +} + +static osl::Mutex & GetLangGuessMutex() +{ + static osl::Mutex aMutex; + return aMutex; +} + +namespace { + +class LangGuess_Impl : + public ::cppu::WeakImplHelper< + XLanguageGuessing, + XServiceInfo > +{ + SimpleGuesser m_aGuesser; + bool m_bInitialized; + + virtual ~LangGuess_Impl() override {} + void EnsureInitialized(); + +public: + LangGuess_Impl(); + LangGuess_Impl(const LangGuess_Impl&) = delete; + LangGuess_Impl& operator=(const LangGuess_Impl&) = delete; + + // XServiceInfo implementation + virtual OUString SAL_CALL getImplementationName( ) override; + virtual sal_Bool SAL_CALL supportsService( const OUString& ServiceName ) override; + virtual Sequence< OUString > SAL_CALL getSupportedServiceNames( ) override; + + // XLanguageGuessing implementation + virtual css::lang::Locale SAL_CALL guessPrimaryLanguage( const OUString& aText, ::sal_Int32 nStartPos, ::sal_Int32 nLen ) override; + virtual void SAL_CALL disableLanguages( const css::uno::Sequence< css::lang::Locale >& aLanguages ) override; + virtual void SAL_CALL enableLanguages( const css::uno::Sequence< css::lang::Locale >& aLanguages ) override; + virtual css::uno::Sequence< css::lang::Locale > SAL_CALL getAvailableLanguages( ) override; + virtual css::uno::Sequence< css::lang::Locale > SAL_CALL getEnabledLanguages( ) override; + virtual css::uno::Sequence< css::lang::Locale > SAL_CALL getDisabledLanguages( ) override; + + // implementation specific + /// @throws RuntimeException + void SetFingerPrintsDB( const OUString &fileName ); +}; + +} + +LangGuess_Impl::LangGuess_Impl() : + m_bInitialized( false ) +{ +} + +void LangGuess_Impl::EnsureInitialized() +{ + if (m_bInitialized) + return; + + // set this to true at the very start to prevent loops because of + // implicitly called functions below + m_bInitialized = true; + + // set default fingerprint path to where those get installed + OUString aPhysPath; + OUString aURL( SvtPathOptions().GetFingerprintPath() ); + osl::FileBase::getSystemPathFromFileURL( aURL, aPhysPath ); +#ifdef _WIN32 + aPhysPath += "\\"; +#else + aPhysPath += "/"; +#endif + + SetFingerPrintsDB( aPhysPath ); + +#if !defined(EXTTEXTCAT_VERSION_MAJOR) + + // disable currently not functional languages... + struct LangCountry + { + const char *pLang; + const char *pCountry; + }; + LangCountry aDisable[] = + { + // not functional in modified libtextcat, but fixed in >= libexttextcat 3.1.0 + // which is the first with EXTTEXTCAT_VERSION_MAJOR defined + {"sco", ""}, {"zh", "CN"}, {"zh", "TW"}, {"ja", ""}, {"ko", ""}, + {"ka", ""}, {"hi", ""}, {"mr", ""}, {"ne", ""}, {"sa", ""}, + {"ta", ""}, {"th", ""}, {"qu", ""}, {"yi", ""} + }; + sal_Int32 nNum = SAL_N_ELEMENTS(aDisable); + Sequence< Locale > aDisableSeq( nNum ); + Locale *pDisableSeq = aDisableSeq.getArray(); + for (sal_Int32 i = 0; i < nNum; ++i) + { + Locale aLocale; + aLocale.Language = OUString::createFromAscii( aDisable[i].pLang ); + aLocale.Country = OUString::createFromAscii( aDisable[i].pCountry ); + pDisableSeq[i] = aLocale; + } + disableLanguages( aDisableSeq ); + DBG_ASSERT( nNum == getDisabledLanguages().getLength(), "size mismatch" ); +#endif +} + +Locale SAL_CALL LangGuess_Impl::guessPrimaryLanguage( + const OUString& rText, + ::sal_Int32 nStartPos, + ::sal_Int32 nLen ) +{ + osl::MutexGuard aGuard( GetLangGuessMutex() ); + + EnsureInitialized(); + + if (nStartPos < 0 || nLen < 0 || nStartPos + nLen > rText.getLength()) + throw lang::IllegalArgumentException(); + + OString o( OUStringToOString( rText.copy(nStartPos, nLen), RTL_TEXTENCODING_UTF8 ) ); + Guess g = m_aGuesser.GuessPrimaryLanguage(o.getStr()); + lang::Locale aRes; + aRes.Language = OUString::createFromAscii( g.GetLanguage().c_str() ); + aRes.Country = OUString::createFromAscii( g.GetCountry().c_str() ); + return aRes; +} + +#define DEFAULT_CONF_FILE_NAME "fpdb.conf" + +void LangGuess_Impl::SetFingerPrintsDB( + const OUString &filePath ) +{ + //! text encoding for file name / path needs to be in the same encoding the OS uses + OString path = OUStringToOString( filePath, osl_getThreadTextEncoding() ); + OString conf_file_path = path + DEFAULT_CONF_FILE_NAME; + + m_aGuesser.SetDBPath(conf_file_path.getStr(), path.getStr()); +} + +uno::Sequence< Locale > SAL_CALL LangGuess_Impl::getAvailableLanguages( ) +{ + osl::MutexGuard aGuard( GetLangGuessMutex() ); + + EnsureInitialized(); + + Sequence< css::lang::Locale > aRes; + vector<Guess> gs = m_aGuesser.GetAllManagedLanguages(); + aRes.realloc(gs.size()); + + css::lang::Locale *pRes = aRes.getArray(); + + for(size_t i = 0; i < gs.size() ; i++ ){ + css::lang::Locale current_aRes; + current_aRes.Language = OUString::createFromAscii( gs[i].GetLanguage().c_str() ); + current_aRes.Country = OUString::createFromAscii( gs[i].GetCountry().c_str() ); + pRes[i] = current_aRes; + } + + return aRes; +} + +uno::Sequence< Locale > SAL_CALL LangGuess_Impl::getEnabledLanguages( ) +{ + osl::MutexGuard aGuard( GetLangGuessMutex() ); + + EnsureInitialized(); + + Sequence< css::lang::Locale > aRes; + vector<Guess> gs = m_aGuesser.GetAvailableLanguages(); + aRes.realloc(gs.size()); + + css::lang::Locale *pRes = aRes.getArray(); + + for(size_t i = 0; i < gs.size() ; i++ ){ + css::lang::Locale current_aRes; + current_aRes.Language = OUString::createFromAscii( gs[i].GetLanguage().c_str() ); + current_aRes.Country = OUString::createFromAscii( gs[i].GetCountry().c_str() ); + pRes[i] = current_aRes; + } + + return aRes; +} + +uno::Sequence< Locale > SAL_CALL LangGuess_Impl::getDisabledLanguages( ) +{ + osl::MutexGuard aGuard( GetLangGuessMutex() ); + + EnsureInitialized(); + + Sequence< css::lang::Locale > aRes; + vector<Guess> gs = m_aGuesser.GetUnavailableLanguages(); + aRes.realloc(gs.size()); + + css::lang::Locale *pRes = aRes.getArray(); + + for(size_t i = 0; i < gs.size() ; i++ ){ + css::lang::Locale current_aRes; + current_aRes.Language = OUString::createFromAscii( gs[i].GetLanguage().c_str() ); + current_aRes.Country = OUString::createFromAscii( gs[i].GetCountry().c_str() ); + pRes[i] = current_aRes; + } + + return aRes; +} + +void SAL_CALL LangGuess_Impl::disableLanguages( + const uno::Sequence< Locale >& rLanguages ) +{ + osl::MutexGuard aGuard( GetLangGuessMutex() ); + + EnsureInitialized(); + + for (const Locale& rLanguage : rLanguages) + { + string language; + + OString l = OUStringToOString( rLanguage.Language, RTL_TEXTENCODING_ASCII_US ); + OString c = OUStringToOString( rLanguage.Country, RTL_TEXTENCODING_ASCII_US ); + + language += l.getStr(); + language += "-"; + language += c.getStr(); + m_aGuesser.DisableLanguage(language); + } +} + +void SAL_CALL LangGuess_Impl::enableLanguages( + const uno::Sequence< Locale >& rLanguages ) +{ + osl::MutexGuard aGuard( GetLangGuessMutex() ); + + EnsureInitialized(); + + for (const Locale& rLanguage : rLanguages) + { + string language; + + OString l = OUStringToOString( rLanguage.Language, RTL_TEXTENCODING_ASCII_US ); + OString c = OUStringToOString( rLanguage.Country, RTL_TEXTENCODING_ASCII_US ); + + language += l.getStr(); + language += "-"; + language += c.getStr(); + m_aGuesser.EnableLanguage(language); + } +} + +OUString SAL_CALL LangGuess_Impl::getImplementationName( ) +{ + return IMPLNAME; +} + +sal_Bool SAL_CALL LangGuess_Impl::supportsService( const OUString& ServiceName ) +{ + return cppu::supportsService(this, ServiceName); +} + +Sequence<OUString> SAL_CALL LangGuess_Impl::getSupportedServiceNames( ) +{ + return { SERVICENAME }; +} + +/** + * Function to create a new component instance; is needed by factory helper implementation. + * @param xMgr service manager to if the components needs other component instances + */ +static Reference< XInterface > LangGuess_Impl_create( + Reference< XComponentContext > const & ) +{ + return static_cast< ::cppu::OWeakObject * >( new LangGuess_Impl ); +} + +//#### EXPORTED ### functions to allow for registration and creation of the UNO component +static const struct ::cppu::ImplementationEntry s_component_entries [] = +{ + { + LangGuess_Impl_create, getImplementationName_LangGuess_Impl, + getSupportedServiceNames_LangGuess_Impl, + ::cppu::createSingleComponentFactory, + nullptr, 0 + }, + { nullptr, nullptr, nullptr, nullptr, nullptr, 0 } +}; + +extern "C" +{ + +SAL_DLLPUBLIC_EXPORT void * guesslang_component_getFactory( + char const * implName, void * xMgr, + void * xRegistry ) +{ + return ::cppu::component_getFactoryHelper( + implName, xMgr, xRegistry, s_component_entries ); +} + +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/lingucomponent/source/languageguessing/simpleguesser.cxx b/lingucomponent/source/languageguessing/simpleguesser.cxx new file mode 100644 index 000000000..76b3b65c3 --- /dev/null +++ b/lingucomponent/source/languageguessing/simpleguesser.cxx @@ -0,0 +1,227 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + + /** + * + * + * + * + * TODO + * - Add exception throwing when h == NULL + * - Not init h when implicit constructor is launched + */ + +#include <string.h> +#include <sstream> +#include <iostream> + +#ifdef SYSTEM_LIBEXTTEXTCAT +#include <libexttextcat/textcat.h> +#include <libexttextcat/common.h> +#include <libexttextcat/constants.h> +#include <libexttextcat/fingerprint.h> +#include <libexttextcat/utf8misc.h> +#else +#include <textcat.h> +#include <common.h> +#include <constants.h> +#include <fingerprint.h> +#include <utf8misc.h> +#endif + +#include <sal/types.h> + +#include<rtl/character.hxx> +#include "simpleguesser.hxx" + +using namespace std; + +static int startsAsciiCaseInsensitive(const std::string &s1, const std::string &s2){ + size_t i; + int ret = 0; + + size_t min = s1.length(); + if (min > s2.length()) + min = s2.length(); + + for(i = 0; i < min && s2[i] && s1[i] && !ret; i++){ + ret = rtl::toAsciiUpperCase(static_cast<unsigned char>(s1[i])) + - rtl::toAsciiUpperCase(static_cast<unsigned char>(s2[i])); + if(s1[i] == '.' || s2[i] == '.') {ret = 0;} //. is a neutral character + } + return ret; + } + +namespace { + +/** + * This following structure is from textcat.c + */ +typedef struct textcat_t{ + + void **fprint; + char *fprint_disable; + uint4 size; + uint4 maxsize; + + char output[MAXOUTPUTSIZE]; + +} textcat_t; +// end of the 3 structs + +} + +SimpleGuesser::SimpleGuesser() +{ + h = nullptr; +} + +SimpleGuesser& SimpleGuesser::operator=(const SimpleGuesser& sg){ + // Check for self-assignment! + if (this == &sg) // Same object? + return *this; // Yes, so skip assignment, and just return *this. + + if(h){textcat_Done(h);} + h = sg.h; + return *this; +} + +SimpleGuesser::~SimpleGuesser() +{ + if(h){textcat_Done(h);} +} + +/*! + \fn SimpleGuesser::GuessLanguage(char* text) + */ +vector<Guess> SimpleGuesser::GuessLanguage(const char* text) +{ + vector<Guess> guesses; + + if (!h) + return guesses; + + int len = strlen(text); + + if (len > MAX_STRING_LENGTH_TO_ANALYSE) + len = MAX_STRING_LENGTH_TO_ANALYSE; + + const char *guess_list = textcat_Classify(h, text, len); + + if (strcmp(guess_list, TEXTCAT_RESULT_SHORT_STR) == 0) + return guesses; + + int current_pointer = 0; + + for(int i = 0; guess_list[current_pointer] != '\0'; i++) + { + while (guess_list[current_pointer] != GUESS_SEPARATOR_OPEN && guess_list[current_pointer] != '\0') + current_pointer++; + if(guess_list[current_pointer] != '\0') + { + Guess g(guess_list + current_pointer); + + guesses.push_back(g); + + current_pointer++; + } + } + + return guesses; +} + +Guess SimpleGuesser::GuessPrimaryLanguage(const char* text) +{ + vector<Guess> ret = GuessLanguage(text); + return ret.empty() ? Guess() : ret[0]; +} +/** + * Is used to know which language is available, unavailable or both + * when mask = 0xF0, return only Available + * when mask = 0x0F, return only Unavailable + * when mask = 0xFF, return both Available and Unavailable + */ +vector<Guess> SimpleGuesser::GetManagedLanguages(const char mask) +{ + textcat_t *tables = static_cast<textcat_t*>(h); + + vector<Guess> lang; + if(!h){return lang;} + + for (size_t i=0; i<tables->size; ++i) + { + if (tables->fprint_disable[i] & mask) + { + string langStr = "["; + langStr += fp_Name(tables->fprint[i]); + Guess g(langStr.c_str()); + lang.push_back(g); + } + } + + return lang; +} + +vector<Guess> SimpleGuesser::GetAvailableLanguages() +{ + return GetManagedLanguages( sal::static_int_cast< char >( 0xF0 ) ); +} + +vector<Guess> SimpleGuesser::GetUnavailableLanguages() +{ + return GetManagedLanguages( sal::static_int_cast< char >( 0x0F )); +} + +vector<Guess> SimpleGuesser::GetAllManagedLanguages() +{ + return GetManagedLanguages( sal::static_int_cast< char >( 0xFF )); +} + +void SimpleGuesser::XableLanguage(const string& lang, char mask) +{ + textcat_t *tables = static_cast<textcat_t*>(h); + + if(!h){return;} + + for (size_t i=0; i<tables->size; i++) + { + string language(fp_Name(tables->fprint[i])); + if (startsAsciiCaseInsensitive(language,lang) == 0) + tables->fprint_disable[i] = mask; + } +} + +void SimpleGuesser::EnableLanguage(const string& lang) +{ + XableLanguage(lang, sal::static_int_cast< char >( 0xF0 )); +} + +void SimpleGuesser::DisableLanguage(const string& lang) +{ + XableLanguage(lang, sal::static_int_cast< char >( 0x0F )); +} + +void SimpleGuesser::SetDBPath(const char* path, const char* prefix) +{ + if (h) + textcat_Done(h); + h = special_textcat_Init(path, prefix); +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/lingucomponent/source/languageguessing/simpleguesser.hxx b/lingucomponent/source/languageguessing/simpleguesser.hxx new file mode 100644 index 000000000..34abf26d4 --- /dev/null +++ b/lingucomponent/source/languageguessing/simpleguesser.hxx @@ -0,0 +1,112 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ +#ifndef INCLUDED_LINGUCOMPONENT_SOURCE_LANGUAGEGUESSING_SIMPLEGUESSER_HXX +#define INCLUDED_LINGUCOMPONENT_SOURCE_LANGUAGEGUESSING_SIMPLEGUESSER_HXX + +#include <string.h> +#include <string> +#include <cstdlib> +#include <vector> +#include "guess.hxx" + +#define MAX_STRING_LENGTH_TO_ANALYSE 200 + +using namespace std; + +class SimpleGuesser final { +public: + /**inits the object with conf file "./conf.txt"*/ + SimpleGuesser(); + + /** + * @param SimpleGuesser& sg the other guesser + */ + SimpleGuesser& operator=(const SimpleGuesser& sg); + + /** + * destroy the object + */ + ~SimpleGuesser(); + + /** + * Analyze a text and return the most probable languages of the text + * @param char* text is the text to analyze + * @return the list of guess + */ + vector<Guess> GuessLanguage(const char* text); + + /** + * Analyze a text and return the most probable language of the text + * @param char* text is the text to analyze + * @return the guess (containing language) + */ + Guess GuessPrimaryLanguage(const char* text); + + /** + * List all available languages (possibly to be in guesses) + * @return the list of languages + */ + vector<Guess> GetAvailableLanguages(); + + /** + * List all languages (possibly in guesses or not) + * @return the list of languages + */ + vector<Guess> GetAllManagedLanguages(); + + /** + * List all Unavailable languages (disable for any reason) + * @return the list of languages + */ + vector<Guess> GetUnavailableLanguages(); + + /** + * Mark a language enabled + * @param string lang the language to enable (build like language-COUNTRY-encoding) + */ + void EnableLanguage(const string& lang); + + /** + * Mark a language disabled + * @param string lang the language to disable (build like language-COUNTRY-encoding) + */ + void DisableLanguage(const string& lang); + + /** + * Load a new DB of fingerprints + * @param const char* thePathOfConfFile self explaining + * @param const char* prefix is the path where the directory which contains fingerprint files is stored + */ + void SetDBPath(const char* thePathOfConfFile, const char* prefix); + +private: + + //Where typical fingerprints (n-gram tables) are stored + void* h; + + //Is used to select languages into the fingerprints DB, the mask is used to indicate if we want enabled disabled or both + vector<Guess> GetManagedLanguages(const char mask); + + //Like getManagedLanguages, this function enable or disable a language and it depends of the mask + void XableLanguage(const string& lang, char mask); +}; + +#endif + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/lingucomponent/source/lingutil/lingutil.cxx b/lingucomponent/source/lingutil/lingutil.cxx new file mode 100644 index 000000000..1c9f449ce --- /dev/null +++ b/lingucomponent/source/lingutil/lingutil.cxx @@ -0,0 +1,294 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#if defined(_WIN32) +#if !defined WIN32_LEAN_AND_MEAN +# define WIN32_LEAN_AND_MEAN +#endif +#include <windows.h> +#endif + +#include <osl/thread.h> +#include <osl/file.hxx> +#include <osl/process.h> +#include <tools/debug.hxx> +#include <tools/urlobj.hxx> +#include <i18nlangtag/languagetag.hxx> +#include <i18nlangtag/mslangid.hxx> +#include <unotools/bootstrap.hxx> +#include <unotools/lingucfg.hxx> +#include <unotools/pathoptions.hxx> +#include <rtl/ustring.hxx> +#include <rtl/string.hxx> +#include <rtl/tencinfo.h> +#include <linguistic/misc.hxx> + +#include <set> +#include <vector> +#include <string.h> + +#include "lingutil.hxx" + +#include <sal/macros.h> + +using namespace ::com::sun::star; + +#if defined(_WIN32) +OString Win_AddLongPathPrefix( const OString &rPathName ) +{ +#define WIN32_LONG_PATH_PREFIX "\\\\?\\" + if (!rPathName.match(WIN32_LONG_PATH_PREFIX)) return WIN32_LONG_PATH_PREFIX + rPathName; + return rPathName; +} +#endif //defined(_WIN32) + +#ifdef SYSTEM_DICTS +// find old style dictionaries in system directories +static void GetOldStyleDicsInDir( + OUString const & aSystemDir, OUString const & aFormatName, + OUString const & aSystemSuffix, OUString const & aSystemPrefix, + std::set< OUString >& aDicLangInUse, + std::vector< SvtLinguConfigDictionaryEntry >& aRes ) +{ + osl::Directory aSystemDicts(aSystemDir); + if (aSystemDicts.open() != osl::FileBase::E_None) + return; + + osl::DirectoryItem aItem; + osl::FileStatus aFileStatus(osl_FileStatus_Mask_FileURL); + while (aSystemDicts.getNextItem(aItem) == osl::FileBase::E_None) + { + aItem.getFileStatus(aFileStatus); + OUString sPath = aFileStatus.getFileURL(); + if (sPath.endsWith(aSystemSuffix)) + { + sal_Int32 nStartIndex = sPath.lastIndexOf('/') + 1; + if (!sPath.match(aSystemPrefix, nStartIndex)) + continue; + OUString sChunk = sPath.copy(nStartIndex + aSystemPrefix.getLength(), + sPath.getLength() - aSystemSuffix.getLength() - + nStartIndex - aSystemPrefix.getLength()); + if (sChunk.isEmpty()) + continue; + + // We prefer (now) to use language tags. + // Avoid feeding in the older LANG_REGION scheme to the BCP47 + // ctor as that triggers use of liblangtag and initializes its + // database which we do not want during startup. Convert + // instead. + sChunk = sChunk.replace( '_', '-'); + + // There's a known exception to the rule, the dreaded + // hu_HU_u8.dic of the myspell-hu package, see + // http://packages.debian.org/search?arch=any&searchon=contents&keywords=hu_HU_u8.dic + // This was ignored because unknown in the old implementation, + // truncate to the known locale and either insert because hu_HU + // wasn't encountered yet, or skip because it was. It doesn't + // really matter because the proper new-style hu_HU dictionary + // will take precedence anyway if installed with a Hungarian + // languagepack. Again, this is only to not pull in all + // liblangtag and stuff during startup, the result would be + // !isValidBcp47() and the dictionary ignored. + if (sChunk == "hu-HU-u8") + sChunk = "hu-HU"; + + LanguageTag aLangTag(sChunk, true); + if (!aLangTag.isValidBcp47()) + continue; + + // Thus we first get the language of the dictionary + const OUString& aLocaleName(aLangTag.getBcp47()); + + if (aDicLangInUse.insert(aLocaleName).second) + { + // add the dictionary to the resulting vector + SvtLinguConfigDictionaryEntry aDicEntry; + aDicEntry.aLocations.realloc(1); + aDicEntry.aLocaleNames.realloc(1); + aDicEntry.aLocations[0] = sPath; + aDicEntry.aFormatName = aFormatName; + aDicEntry.aLocaleNames[0] = aLocaleName; + aRes.push_back( aDicEntry ); + } + } + } +} +#endif + +// build list of old style dictionaries (not as extensions) to use. +// User installed dictionaries (the ones residing in the user paths) +// will get precedence over system installed ones for the same language. +std::vector< SvtLinguConfigDictionaryEntry > GetOldStyleDics( const char *pDicType ) +{ + std::vector< SvtLinguConfigDictionaryEntry > aRes; + + if (!pDicType) + return aRes; + + OUString aFormatName; + OUString aDicExtension; +#ifdef SYSTEM_DICTS + OUString aSystemDir; + OUString aSystemPrefix; + OUString aSystemSuffix; +#endif + if (strcmp( pDicType, "DICT" ) == 0) + { + aFormatName = "DICT_SPELL"; + aDicExtension = ".dic"; +#ifdef SYSTEM_DICTS + aSystemDir = DICT_SYSTEM_DIR; + aSystemSuffix = aDicExtension; +#endif + } + else if (strcmp( pDicType, "HYPH" ) == 0) + { + aFormatName = "DICT_HYPH"; + aDicExtension = ".dic"; +#ifdef SYSTEM_DICTS + aSystemDir = HYPH_SYSTEM_DIR; + aSystemPrefix = "hyph_"; + aSystemSuffix = aDicExtension; +#endif + } + else if (strcmp( pDicType, "THES" ) == 0) + { + aFormatName = "DICT_THES"; + aDicExtension = ".dat"; +#ifdef SYSTEM_DICTS + aSystemDir = THES_SYSTEM_DIR; + aSystemPrefix = "th_"; + aSystemSuffix = "_v2.dat"; +#endif + } + + if (aFormatName.isEmpty() || aDicExtension.isEmpty()) + return aRes; + +#ifdef SYSTEM_DICTS + // set of languages to remember the language where it is already + // decided to make use of the dictionary. + std::set< OUString > aDicLangInUse; + + // follow the hunspell tool's example and check DICPATH for preferred dictionaries + rtl_uString * pSearchPath = nullptr; + osl_getEnvironment(OUString("DICPATH").pData, &pSearchPath); + + if (pSearchPath) + { + OUString aSearchPath(pSearchPath); + rtl_uString_release(pSearchPath); + + sal_Int32 nIndex = 0; + do + { + OUString aSystem = aSearchPath.getToken(0, ':', nIndex); + OUString aCWD; + OUString aRelative; + OUString aAbsolute; + + if (!utl::Bootstrap::getProcessWorkingDir(aCWD)) + continue; + if (osl::FileBase::getFileURLFromSystemPath(aSystem, aRelative) + != osl::FileBase::E_None) + continue; + if (osl::FileBase::getAbsoluteFileURL(aCWD, aRelative, aAbsolute) + != osl::FileBase::E_None) + continue; + + // GetOldStyleDicsInDir will make sure the dictionary is the right + // type based on its prefix, that way hyphen, mythes and regular + // dictionaries can live in one directory + GetOldStyleDicsInDir(aAbsolute, aFormatName, aSystemSuffix, + aSystemPrefix, aDicLangInUse, aRes); + } + while (nIndex != -1); + } + + // load system directories last so that DICPATH prevails + GetOldStyleDicsInDir(aSystemDir, aFormatName, aSystemSuffix, aSystemPrefix, + aDicLangInUse, aRes); +#endif + + return aRes; +} + +void MergeNewStyleDicsAndOldStyleDics( + std::vector< SvtLinguConfigDictionaryEntry > &rNewStyleDics, + const std::vector< SvtLinguConfigDictionaryEntry > &rOldStyleDics ) +{ + // get list of languages supported by new style dictionaries + std::set< OUString > aNewStyleLanguages; + for (auto const& newStyleDic : rNewStyleDics) + { + const uno::Sequence< OUString > aLocaleNames(newStyleDic.aLocaleNames); + sal_Int32 nLocaleNames = aLocaleNames.getLength(); + for (sal_Int32 k = 0; k < nLocaleNames; ++k) + { + aNewStyleLanguages.insert( aLocaleNames[k] ); + } + } + + // now check all old style dictionaries if they will add a not yet + // added language. If so add them to the resulting vector + for (auto const& oldStyleDic : rOldStyleDics) + { + sal_Int32 nOldStyleDics = oldStyleDic.aLocaleNames.getLength(); + + // old style dics should only have one language listed... + DBG_ASSERT( nOldStyleDics, "old style dictionary with more than one language found!"); + if (nOldStyleDics > 0) + { + if (linguistic::LinguIsUnspecified( oldStyleDic.aLocaleNames[0])) + { + OSL_FAIL( "old style dictionary with invalid language found!" ); + continue; + } + + // language not yet added? + if (aNewStyleLanguages.find( oldStyleDic.aLocaleNames[0] ) == aNewStyleLanguages.end()) + rNewStyleDics.push_back(oldStyleDic); + } + else + { + OSL_FAIL( "old style dictionary with no language found!" ); + } + } +} + +rtl_TextEncoding getTextEncodingFromCharset(const char* pCharset) +{ + // default result: used to indicate that we failed to get the proper encoding + rtl_TextEncoding eRet = RTL_TEXTENCODING_DONTKNOW; + + if (pCharset) + { + eRet = rtl_getTextEncodingFromMimeCharset(pCharset); + if (eRet == RTL_TEXTENCODING_DONTKNOW) + eRet = rtl_getTextEncodingFromUnixCharset(pCharset); + if (eRet == RTL_TEXTENCODING_DONTKNOW) + { + if (strcmp("ISCII-DEVANAGARI", pCharset) == 0) + eRet = RTL_TEXTENCODING_ISCII_DEVANAGARI; + } + } + return eRet; +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/lingucomponent/source/lingutil/lingutil.hxx b/lingucomponent/source/lingutil/lingutil.hxx new file mode 100644 index 000000000..984b5e2e8 --- /dev/null +++ b/lingucomponent/source/lingutil/lingutil.hxx @@ -0,0 +1,57 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#ifndef INCLUDED_LINGUCOMPONENT_SOURCE_LINGUTIL_LINGUTIL_HXX +#define INCLUDED_LINGUCOMPONENT_SOURCE_LINGUTIL_LINGUTIL_HXX + +#include <com/sun/star/lang/Locale.hpp> +#include <rtl/string.hxx> +#include <rtl/ustring.hxx> + +#include <vector> + +#define OU2ENC(rtlOUString, rtlEncoding) \ + OString((rtlOUString).getStr(), (rtlOUString).getLength(), \ + rtlEncoding, RTL_UNICODETOTEXT_FLAGS_UNDEFINED_QUESTIONMARK).getStr() + +struct SvtLinguConfigDictionaryEntry; + +#if defined(_WIN32) + +// to be use to get a path name with long path prefix +// under Windows for Hunspell, Hyphen and MyThes libraries +OString Win_AddLongPathPrefix( const OString &rPathName ); +#endif + + +// temporary function, to be removed when new style dictionaries +// using configuration entries are fully implemented and provided +std::vector< SvtLinguConfigDictionaryEntry > GetOldStyleDics( const char * pDicType ); +void MergeNewStyleDicsAndOldStyleDics( std::vector< SvtLinguConfigDictionaryEntry > &rNewStyleDics, const std::vector< SvtLinguConfigDictionaryEntry > &rOldStyleDics ); + +//Find an encoding from a charset string, using +//rtl_getTextEncodingFromMimeCharset and falling back to +//rtl_getTextEncodingFromUnixCharset with the addition of +//ISCII-DEVANAGARI. On failure will return final fallback of +//RTL_TEXTENCODING_ISO_8859_1 +rtl_TextEncoding getTextEncodingFromCharset(const char* pCharset); + +#endif + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/lingucomponent/source/numbertext/numbertext.component b/lingucomponent/source/numbertext/numbertext.component new file mode 100644 index 000000000..b17cc31e0 --- /dev/null +++ b/lingucomponent/source/numbertext/numbertext.component @@ -0,0 +1,25 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!-- + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + --> + +<component loader="com.sun.star.loader.SharedLibrary" environment="@CPPU_ENV@" + prefix="numbertext" xmlns="http://openoffice.org/2010/uno-components"> + <implementation name="com.sun.star.lingu2.NumberText"> + <service name="com.sun.star.linguistic2.NumberText"/> + </implementation> +</component> diff --git a/lingucomponent/source/numbertext/numbertext.cxx b/lingucomponent/source/numbertext/numbertext.cxx new file mode 100644 index 000000000..556dc2ea7 --- /dev/null +++ b/lingucomponent/source/numbertext/numbertext.cxx @@ -0,0 +1,209 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#include <config_libnumbertext.h> +#include <iostream> + +#include <osl/file.hxx> +#include <tools/debug.hxx> +#include <o3tl/char16_t2wchar_t.hxx> + +#include <sal/config.h> +#include <cppuhelper/factory.hxx> +#include <cppuhelper/implementationentry.hxx> +#include <cppuhelper/implbase.hxx> +#include <cppuhelper/supportsservice.hxx> + +#include <i18nlangtag/languagetag.hxx> +#include <com/sun/star/lang/IllegalArgumentException.hpp> +#include <com/sun/star/lang/XServiceInfo.hpp> +#include <com/sun/star/linguistic2/XNumberText.hpp> +#include <unotools/pathoptions.hxx> +#include <osl/thread.h> + +#include <sal/macros.h> + +#if ENABLE_LIBNUMBERTEXT +#include <Numbertext.hxx> +#endif + +using namespace ::osl; +using namespace ::cppu; +using namespace ::com::sun::star; +using namespace ::com::sun::star::uno; +using namespace ::com::sun::star::lang; +using namespace ::com::sun::star::linguistic2; + +#define SERVICENAME "com.sun.star.linguistic2.NumberText" +#define IMPLNAME "com.sun.star.lingu2.NumberText" + +static Sequence<OUString> getSupportedServiceNames_NumberText_Impl() +{ + Sequence<OUString> names{ SERVICENAME }; + return names; +} + +static OUString getImplementationName_NumberText_Impl() { return IMPLNAME; } + +static osl::Mutex& GetNumberTextMutex() +{ + static osl::Mutex aMutex; + return aMutex; +} + +namespace +{ +class NumberText_Impl : public ::cppu::WeakImplHelper<XNumberText, XServiceInfo> +{ +#if ENABLE_LIBNUMBERTEXT + Numbertext m_aNumberText; +#endif + bool m_bInitialized; + + virtual ~NumberText_Impl() override {} + void EnsureInitialized(); + +public: + NumberText_Impl(); + NumberText_Impl(const NumberText_Impl&) = delete; + NumberText_Impl& operator=(const NumberText_Impl&) = delete; + + // XServiceInfo implementation + virtual OUString SAL_CALL getImplementationName() override; + virtual sal_Bool SAL_CALL supportsService(const OUString& ServiceName) override; + virtual Sequence<OUString> SAL_CALL getSupportedServiceNames() override; + + // XNumberText implementation + virtual OUString SAL_CALL getNumberText(const OUString& aText, + const ::css::lang::Locale& rLocale) override; + virtual css::uno::Sequence<css::lang::Locale> SAL_CALL getAvailableLanguages() override; +}; +} + +NumberText_Impl::NumberText_Impl() + : m_bInitialized(false) +{ +} + +void NumberText_Impl::EnsureInitialized() +{ + if (m_bInitialized) + return; + + // set this to true at the very start to prevent loops because of + // implicitly called functions below + m_bInitialized = true; + + // set default numbertext path to where those get installed + OUString aPhysPath; + OUString aURL(SvtPathOptions().GetNumbertextPath()); + osl::FileBase::getSystemPathFromFileURL(aURL, aPhysPath); +#ifdef _WIN32 + aPhysPath += "\\"; +#else + aPhysPath += "/"; +#endif +#if ENABLE_LIBNUMBERTEXT + OString path = OUStringToOString(aPhysPath, osl_getThreadTextEncoding()); + m_aNumberText.set_prefix(path.getStr()); +#endif +} + +OUString SAL_CALL NumberText_Impl::getNumberText(const OUString& rText, const Locale& +#if ENABLE_LIBNUMBERTEXT + rLocale) +#else +) +#endif +{ + osl::MutexGuard aGuard(GetNumberTextMutex()); + EnsureInitialized(); +#if ENABLE_LIBNUMBERTEXT + // libnumbertext supports Language + Country tags (separated by "_" or "-") + LanguageTag aLanguageTag(rLocale); + OUString aCode(aLanguageTag.getLanguage()); + OUString aCountry(aLanguageTag.getCountry()); + OUString aScript(aLanguageTag.getScript()); + if (!aScript.isEmpty()) + aCode += "-" + aScript; + if (!aCountry.isEmpty()) + aCode += "-" + aCountry; + OString aLangCode(OUStringToOString(aCode, RTL_TEXTENCODING_ASCII_US)); +#if defined(_WIN32) + std::wstring sResult(o3tl::toW(rText.getStr())); +#else + OString aInput(OUStringToOString(rText, RTL_TEXTENCODING_UTF8)); + std::wstring sResult = Numbertext::string2wstring(aInput.getStr()); +#endif + bool result = m_aNumberText.numbertext(sResult, aLangCode.getStr()); + DBG_ASSERT(result, "numbertext: false"); +#if defined(_WIN32) + OUString aResult(o3tl::toU(sResult.c_str())); +#else + OUString aResult = OUString::fromUtf8(Numbertext::wstring2string(sResult).c_str()); +#endif + return aResult; +#else + return rText; +#endif +} + +uno::Sequence<Locale> SAL_CALL NumberText_Impl::getAvailableLanguages() +{ + osl::MutexGuard aGuard(GetNumberTextMutex()); + // TODO + Sequence<css::lang::Locale> aRes; + return aRes; +} + +OUString SAL_CALL NumberText_Impl::getImplementationName() { return IMPLNAME; } + +sal_Bool SAL_CALL NumberText_Impl::supportsService(const OUString& ServiceName) +{ + return cppu::supportsService(this, ServiceName); +} + +Sequence<OUString> SAL_CALL NumberText_Impl::getSupportedServiceNames() { return { SERVICENAME }; } + +/** + * Function to create a new component instance; is needed by factory helper implementation. + * @param xMgr service manager to if the components needs other component instances + */ +static Reference<XInterface> NumberText_Impl_create(Reference<XComponentContext> const&) +{ + return static_cast<::cppu::OWeakObject*>(new NumberText_Impl); +} + +//#### EXPORTED ### functions to allow for registration and creation of the UNO component +static const struct ::cppu::ImplementationEntry s_component_entries[] + = { { NumberText_Impl_create, getImplementationName_NumberText_Impl, + getSupportedServiceNames_NumberText_Impl, ::cppu::createSingleComponentFactory, nullptr, + 0 }, + { nullptr, nullptr, nullptr, nullptr, nullptr, 0 } }; + +extern "C" { + +SAL_DLLPUBLIC_EXPORT void* numbertext_component_getFactory(char const* implName, void* xMgr, + void* xRegistry) +{ + return ::cppu::component_getFactoryHelper(implName, xMgr, xRegistry, s_component_entries); +} +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/lingucomponent/source/spellcheck/macosxspell/MacOSXSpell.component b/lingucomponent/source/spellcheck/macosxspell/MacOSXSpell.component new file mode 100644 index 000000000..92807d4b3 --- /dev/null +++ b/lingucomponent/source/spellcheck/macosxspell/MacOSXSpell.component @@ -0,0 +1,25 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!-- + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + --> + +<component loader="com.sun.star.loader.SharedLibrary" environment="@CPPU_ENV@" + prefix="MacOSXSpell" xmlns="http://openoffice.org/2010/uno-components"> + <implementation name="org.openoffice.lingu.MacOSXSpellChecker"> + <service name="com.sun.star.linguistic2.SpellChecker"/> + </implementation> +</component> diff --git a/lingucomponent/source/spellcheck/macosxspell/macspellimp.hxx b/lingucomponent/source/spellcheck/macosxspell/macspellimp.hxx new file mode 100644 index 000000000..0adc1a1cb --- /dev/null +++ b/lingucomponent/source/spellcheck/macosxspell/macspellimp.hxx @@ -0,0 +1,131 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#ifndef INCLUDED_LINGUCOMPONENT_SOURCE_SPELLCHECK_MACOSXSPELL_MACSPELLIMP_HXX +#define INCLUDED_LINGUCOMPONENT_SOURCE_SPELLCHECK_MACOSXSPELL_MACSPELLIMP_HXX + +#include <cppuhelper/implbase.hxx> + +#include <premac.h> +#ifdef MACOSX +#include <Carbon/Carbon.h> +#import <Cocoa/Cocoa.h> +#else +#include <UIKit/UIKit.h> +#endif +#include <postmac.h> +#include <com/sun/star/lang/XComponent.hpp> +#include <com/sun/star/lang/XInitialization.hpp> +#include <com/sun/star/lang/XServiceDisplayName.hpp> +#include <com/sun/star/beans/XPropertySet.hpp> +#include <com/sun/star/lang/XServiceInfo.hpp> +#include <com/sun/star/linguistic2/XSpellChecker.hpp> +#include <com/sun/star/linguistic2/XLinguServiceEventBroadcaster.hpp> + +#include <linguistic/misc.hxx> +#include <linguistic/lngprophelp.hxx> + +#include <lingutil.hxx> + +using namespace ::com::sun::star::uno; +using namespace ::com::sun::star::beans; +using namespace ::com::sun::star::lang; +using namespace ::com::sun::star::linguistic2; + +class MacSpellChecker : + public cppu::WeakImplHelper + < + XSpellChecker, + XLinguServiceEventBroadcaster, + XInitialization, + XComponent, + XServiceInfo, + XServiceDisplayName + > +{ + Sequence< Locale > aSuppLocales; + rtl_TextEncoding * aDEncs; + Locale * aDLocs; + OUString * aDNames; + sal_Int32 numdict; +#ifdef MACOSX + int macTag; // unique tag for this doc +#else + UITextChecker * pChecker; +#endif + ::comphelper::OInterfaceContainerHelper2 aEvtListeners; + rtl::Reference< linguistic::PropertyHelper_Spell > xPropHelper; + bool bDisposing; + + MacSpellChecker(const MacSpellChecker &) = delete; + MacSpellChecker & operator = (const MacSpellChecker &) = delete; + + linguistic::PropertyHelper_Spell & GetPropHelper_Impl(); + linguistic::PropertyHelper_Spell & GetPropHelper() + { + return xPropHelper.is() ? *xPropHelper : GetPropHelper_Impl(); + } + + sal_Int16 GetSpellFailure( const OUString &rWord, const Locale &rLocale ); + Reference< XSpellAlternatives > GetProposals( const OUString &rWord, const Locale &rLocale ); + +public: + MacSpellChecker(); + virtual ~MacSpellChecker() override; + + // XSupportedLocales (for XSpellChecker) + virtual Sequence< Locale > SAL_CALL getLocales() override; + virtual sal_Bool SAL_CALL hasLocale( const Locale& rLocale ) override; + + // XSpellChecker + virtual sal_Bool SAL_CALL isValid( const OUString& rWord, const Locale& rLocale, const css::uno::Sequence<PropertyValue>& rProperties ) override; + virtual Reference< XSpellAlternatives > SAL_CALL spell( const OUString& rWord, const Locale& rLocale, const css::uno::Sequence<PropertyValue>& rProperties ) override; + + // XLinguServiceEventBroadcaster + virtual sal_Bool SAL_CALL addLinguServiceEventListener( const Reference< XLinguServiceEventListener >& rxLstnr ) override; + virtual sal_Bool SAL_CALL removeLinguServiceEventListener( const Reference< XLinguServiceEventListener >& rxLstnr ) override; + + // XServiceDisplayName + virtual OUString SAL_CALL getServiceDisplayName( const Locale& rLocale ) override; + + // XInitialization + virtual void SAL_CALL initialize( const Sequence< Any >& rArguments ) override; + + // XComponent + virtual void SAL_CALL dispose() override; + virtual void SAL_CALL addEventListener( const Reference< XEventListener >& rxListener ) override; + virtual void SAL_CALL removeEventListener( const Reference< XEventListener >& rxListener ) override; + + // XServiceInfo + virtual OUString SAL_CALL getImplementationName() override; + virtual sal_Bool SAL_CALL supportsService( const OUString& rServiceName ) override; + virtual Sequence< OUString > SAL_CALL getSupportedServiceNames() override; + + static inline OUString getImplementationName_Static() throw(); + static Sequence< OUString > getSupportedServiceNames_Static() throw(); +}; + +inline OUString MacSpellChecker::getImplementationName_Static() throw() +{ + return "org.openoffice.lingu.MacOSXSpellChecker"; +} + +#endif + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/lingucomponent/source/spellcheck/macosxspell/macspellimp.mm b/lingucomponent/source/spellcheck/macosxspell/macspellimp.mm new file mode 100644 index 000000000..1d6f84fcc --- /dev/null +++ b/lingucomponent/source/spellcheck/macosxspell/macspellimp.mm @@ -0,0 +1,676 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; fill-column: 100 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#include <com/sun/star/uno/Reference.h> + +#include <com/sun/star/linguistic2/SpellFailure.hpp> +#include <com/sun/star/linguistic2/XLinguProperties.hpp> +#include <cppuhelper/factory.hxx> +#include <cppuhelper/supportsservice.hxx> +#include <com/sun/star/registry/XRegistryKey.hpp> +#include <com/sun/star/lang/XSingleServiceFactory.hpp> +#include <tools/debug.hxx> +#include <osl/mutex.hxx> + +#include "macspellimp.hxx" + +#include <linguistic/spelldta.hxx> +#include <unotools/pathoptions.hxx> +#include <unotools/useroptions.hxx> +#include <osl/file.hxx> +#include <rtl/ustrbuf.hxx> + +using namespace utl; +using namespace osl; +using namespace com::sun::star; +using namespace com::sun::star::beans; +using namespace com::sun::star::lang; +using namespace com::sun::star::uno; +using namespace com::sun::star::linguistic2; +using namespace linguistic; + +MacSpellChecker::MacSpellChecker() : + aEvtListeners( GetLinguMutex() ) +{ + aDEncs = nullptr; + aDLocs = nullptr; + aDNames = nullptr; + bDisposing = false; + numdict = 0; +#ifndef IOS + NSApplicationLoad(); + NSAutoreleasePool* pool = [[NSAutoreleasePool alloc] init]; + macTag = [NSSpellChecker uniqueSpellDocumentTag]; + [pool release]; +#else + pChecker = [[UITextChecker alloc] init]; +#endif +} + + +MacSpellChecker::~MacSpellChecker() +{ + numdict = 0; + if (aDEncs) delete[] aDEncs; + aDEncs = nullptr; + if (aDLocs) delete[] aDLocs; + aDLocs = nullptr; + if (aDNames) delete[] aDNames; + aDNames = nullptr; + if (xPropHelper.is()) + xPropHelper->RemoveAsPropListener(); +} + + +PropertyHelper_Spell & MacSpellChecker::GetPropHelper_Impl() +{ + if (!xPropHelper.is()) + { + Reference< XLinguProperties > xPropSet( GetLinguProperties() ); + + xPropHelper = new PropertyHelper_Spell( static_cast<XSpellChecker *>(this), xPropSet ); + xPropHelper->AddAsPropListener(); + } + return *xPropHelper; +} + + +Sequence< Locale > SAL_CALL MacSpellChecker::getLocales() +{ + MutexGuard aGuard( GetLinguMutex() ); + + // this routine should return the locales supported by the installed + // dictionaries. So here we need to parse both the user edited + // dictionary list and the shared dictionary list + // to see what dictionaries the admin/user has installed + + int numshr; // number of shared dictionary entries + rtl_TextEncoding aEnc = RTL_TEXTENCODING_UTF8; + + std::vector<NSString *> postspdict; + + if (!numdict) { + + // invoke a dictionary manager to get the user dictionary list + // TODO How on macOS? + + // invoke a second dictionary manager to get the shared dictionary list +#ifdef MACOSX + NSArray *aSpellCheckLanguages = [[NSSpellChecker sharedSpellChecker] availableLanguages]; +#else + NSArray *aSpellCheckLanguages = [UITextChecker availableLanguages]; +#endif + + for (NSUInteger i = 0; i < [aSpellCheckLanguages count]; i++) + { + NSString* pLangStr = static_cast<NSString*>([aSpellCheckLanguages objectAtIndex:i]); + + // Fix up generic languages (without territory code) and odd combinations that LO + // doesn't handle. + if ([pLangStr isEqualToString:@"da"]) + { + postspdict.push_back( @"da_DK" ); + } + else if ([pLangStr isEqualToString:@"de"]) + { + const std::vector<NSString*> aDE + { @"AT", @"BE", @"CH", @"DE", @"LI", @"LU" }; + for (auto c: aDE) + { + pLangStr = [@"de_" stringByAppendingString: c]; + postspdict.push_back( pLangStr ); + } + } +#ifdef IOS + // iOS says it has specifically de_DE, but let's assume it is good enough for the other + // variants, too, for now. + else if ([pLangStr isEqualToString:@"de_DE"]) + { + const std::vector<NSString*> aDE + { @"AT", @"BE", @"CH", @"DE", @"LI", @"LU" }; + for (auto c: aDE) + { + pLangStr = [@"de_" stringByAppendingString: c]; + postspdict.push_back( pLangStr ); + } + } +#endif + else if ([pLangStr isEqualToString:@"en"]) + { + // System has en_AU, en_CA, en_GB, and en_IN. Add the rest. + const std::vector<NSString*> aEN + { @"BW", @"BZ", @"GH", @"GM", @"IE", @"JM", @"MU", @"MW", @"MY", @"NA", + @"NZ", @"PH", @"TT", @"US", @"ZA", @"ZW" }; + for (auto c: aEN) + { + pLangStr = [@"en_" stringByAppendingString: c]; + postspdict.push_back( pLangStr ); + } + } + else if ([pLangStr isEqualToString:@"en_JP"] + || [pLangStr isEqualToString:@"en_SG"]) + { + // Just skip, LO doesn't have those yet in this context. + } + else if ([pLangStr isEqualToString:@"es"]) + { + const std::vector<NSString*> aES + { @"AR", @"BO", @"CL", @"CO", @"CR", @"CU", @"DO", @"EC", @"ES", @"GT", + @"HN", @"MX", @"NI", @"PA", @"PE", @"PR", @"PY", @"SV", @"UY", @"VE" }; + for (auto c: aES) + { + pLangStr = [@"es_" stringByAppendingString: c]; + postspdict.push_back( pLangStr ); + } + } + else if ([pLangStr isEqualToString:@"fi"]) + { + postspdict.push_back( @"fi_FI" ); + } + else if ([pLangStr isEqualToString:@"fr"]) + { + const std::vector<NSString*> aFR + { @"BE", @"BF", @"BJ", @"CA", @"CH", @"CI", @"FR", @"LU", @"MC", @"ML", + @"MU", @"NE", @"SN", @"TG" }; + for (auto c: aFR) + { + pLangStr = [@"fr_" stringByAppendingString: c]; + postspdict.push_back( pLangStr ); + } + } + else if ([pLangStr isEqualToString:@"it"]) + { + postspdict.push_back( @"it_CH" ); + postspdict.push_back( @"it_IT" ); + } + else if ([pLangStr isEqualToString:@"ko"]) + { + postspdict.push_back( @"ko_KR" ); + } + else if ([pLangStr isEqualToString:@"nl"]) + { + postspdict.push_back( @"nl_BE" ); + postspdict.push_back( @"nl_NL" ); + } + else if ([pLangStr isEqualToString:@"nb"]) + { + postspdict.push_back( @"nb_NO" ); + } + else if ([pLangStr isEqualToString:@"pl"]) + { + postspdict.push_back( @"pl_PL" ); + } + else if ([pLangStr isEqualToString:@"ru"]) + { + postspdict.push_back( @"ru_RU" ); + } + else if ([pLangStr isEqualToString:@"sv"]) + { + postspdict.push_back( @"sv_FI" ); + postspdict.push_back( @"sv_SE" ); + } +#ifdef IOS + else if ([pLangStr isEqualToString:@"sv_SE"]) + { + postspdict.push_back( @"sv_FI" ); + postspdict.push_back( @"sv_SE" ); + } +#endif + else if ([pLangStr isEqualToString:@"tr"]) + { + postspdict.push_back( @"tr_TR" ); + } + else + postspdict.push_back( pLangStr ); + } + // System has pt_BR and pt_PT, add pt_AO. + postspdict.push_back( @"pt_AO" ); + + numshr = postspdict.size(); + + // we really should merge these and remove duplicates but since + // users can name their dictionaries anything they want it would + // be impossible to know if a real duplication exists unless we + // add some unique key to each myspell dictionary + numdict = numshr; + + if (numdict) { + aDLocs = new Locale [numdict]; + aDEncs = new rtl_TextEncoding [numdict]; + aDNames = new OUString [numdict]; + aSuppLocales.realloc(numdict); + Locale * pLocale = aSuppLocales.getArray(); + int numlocs = 0; + int newloc; + int i,j; + int k = 0; + + //first add the user dictionaries + //TODO for MAC? + + // now add the shared dictionaries + for (i = 0; i < numshr; i++) { + NSDictionary *aLocDict = [ NSLocale componentsFromLocaleIdentifier:postspdict[i] ]; + NSString* aLang = [ aLocDict objectForKey:NSLocaleLanguageCode ]; + NSString* aCountry = [ aLocDict objectForKey:NSLocaleCountryCode ]; + OUString lang([aLang cStringUsingEncoding: NSUTF8StringEncoding], [aLang length], aEnc); + OUString country([ aCountry cStringUsingEncoding: NSUTF8StringEncoding], [aCountry length], aEnc); + Locale nLoc( lang, country, OUString() ); + newloc = 1; + //eliminate duplicates (is this needed for MacOS?) + for (j = 0; j < numlocs; j++) { + if (nLoc == pLocale[j]) newloc = 0; + } + if (newloc) { + pLocale[numlocs] = nLoc; + numlocs++; + } + aDLocs[k] = nLoc; + aDEncs[k] = 0; + k++; + } + + aSuppLocales.realloc(numlocs); + + } else { + /* no dictionary.lst found so register no dictionaries */ + numdict = 0; + aDEncs = nullptr; + aDLocs = nullptr; + aDNames = nullptr; + aSuppLocales.realloc(0); + } + } + + return aSuppLocales; +} + + + +sal_Bool SAL_CALL MacSpellChecker::hasLocale(const Locale& rLocale) +{ + MutexGuard aGuard( GetLinguMutex() ); + + bool bRes = false; + if (!aSuppLocales.getLength()) + getLocales(); + + sal_Int32 nLen = aSuppLocales.getLength(); + for (sal_Int32 i = 0; i < nLen; ++i) + { + const Locale *pLocale = aSuppLocales.getConstArray(); + if (rLocale == pLocale[i]) + { + bRes = true; + break; + } + } + return bRes; +} + + +sal_Int16 MacSpellChecker::GetSpellFailure( const OUString &rWord, const Locale &rLocale ) +{ + rtl_TextEncoding aEnc; + + // initialize a myspell object for each dictionary once + // (note: mutex is held higher up in isValid) + + + sal_Int16 nRes = -1; + + // first handle smart quotes both single and double + OUStringBuffer rBuf(rWord); + sal_Int32 n = rBuf.getLength(); + sal_Unicode c; + for (sal_Int32 ix=0; ix < n; ix++) { + c = rBuf[ix]; + if ((c == 0x201C) || (c == 0x201D)) rBuf[ix] = u'"'; + if ((c == 0x2018) || (c == 0x2019)) rBuf[ix] = u'\''; + } + OUString nWord(rBuf.makeStringAndClear()); + + if (n) + { + aEnc = 0; + NSAutoreleasePool* pool = [[NSAutoreleasePool alloc] init]; + NSString* aNSStr = [[[NSString alloc] initWithCharacters: reinterpret_cast<unichar const *>(nWord.getStr()) length: nWord.getLength()]autorelease]; + NSString* aLang = [[[NSString alloc] initWithCharacters: reinterpret_cast<unichar const *>(rLocale.Language.getStr()) length: rLocale.Language.getLength()]autorelease]; + if(rLocale.Country.getLength()>0) + { + NSString* aCountry = [[[NSString alloc] initWithCharacters: reinterpret_cast<unichar const *>(rLocale.Country.getStr()) length: rLocale.Country.getLength()]autorelease]; + NSString* aTaggedCountry = [@"_" stringByAppendingString:aCountry]; + aLang = [aLang stringByAppendingString:aTaggedCountry]; + } + +#ifdef MACOSX + NSInteger aCount; + NSRange range = [[NSSpellChecker sharedSpellChecker] checkSpellingOfString:aNSStr startingAt:0 language:aLang wrap:false inSpellDocumentWithTag:macTag wordCount:&aCount]; +#else + NSRange range = [pChecker rangeOfMisspelledWordInString:aNSStr range:NSMakeRange(0, [aNSStr length]) startingAt:0 wrap:NO language:aLang]; +#endif + int rVal = 0; + if(range.length>0) + { + rVal = -1; + } + else + { + rVal = 1; + } + [pool release]; + if (rVal != 1) + { + nRes = SpellFailure::SPELLING_ERROR; + } else { + return -1; + } + } + return nRes; +} + + + +sal_Bool SAL_CALL + MacSpellChecker::isValid( const OUString& rWord, const Locale& rLocale, + const css::uno::Sequence<PropertyValue>& rProperties ) +{ + MutexGuard aGuard( GetLinguMutex() ); + + if (rLocale == Locale() || !rWord.getLength()) + return true; + + if (!hasLocale( rLocale )) + return true; + + // Get property values to be used. + // These are be the default values set in the SN_LINGU_PROPERTIES + // PropertySet which are overridden by the supplied ones from the + // last argument. + // You'll probably like to use a simpler solution than the provided + // one using the PropertyHelper_Spell. + + PropertyHelper_Spell &rHelper = GetPropHelper(); + rHelper.SetTmpPropVals( rProperties ); + + sal_Int16 nFailure = GetSpellFailure( rWord, rLocale ); + if (nFailure != -1) + { + LanguageType nLang = LinguLocaleToLanguage( rLocale ); + // postprocess result for errors that should be ignored + if ( (!rHelper.IsSpellUpperCase() && IsUpper( rWord, nLang )) + || (!rHelper.IsSpellWithDigits() && HasDigits( rWord )) + || (!rHelper.IsSpellCapitalization() + && nFailure == SpellFailure::CAPTION_ERROR) + ) + nFailure = -1; + } + + return (nFailure == -1); +} + +Reference< XSpellAlternatives > + MacSpellChecker::GetProposals( const OUString &rWord, const Locale &rLocale ) +{ + // Retrieves the return values for the 'spell' function call in case + // of a misspelled word. + // Especially it may give a list of suggested (correct) words: + + Reference< XSpellAlternatives > xRes; + // note: mutex is held by higher up by spell which covers both + + LanguageType nLang = LinguLocaleToLanguage( rLocale ); + int count; + Sequence< OUString > aStr( 0 ); + + // first handle smart quotes (single and double) + OUStringBuffer rBuf(rWord); + sal_Int32 n = rBuf.getLength(); + sal_Unicode c; + for (sal_Int32 ix=0; ix < n; ix++) { + c = rBuf[ix]; + if ((c == 0x201C) || (c == 0x201D)) rBuf[ix] = u'"'; + if ((c == 0x2018) || (c == 0x2019)) rBuf[ix] = u'\''; + } + OUString nWord(rBuf.makeStringAndClear()); + + if (n) + { + NSAutoreleasePool* pool = [[NSAutoreleasePool alloc] init]; + NSString* aNSStr = [[[NSString alloc] initWithCharacters: reinterpret_cast<unichar const *>(nWord.getStr()) length: nWord.getLength()]autorelease]; + NSString* aLang = [[[NSString alloc] initWithCharacters: reinterpret_cast<unichar const *>(rLocale.Language.getStr()) length: rLocale.Language.getLength()]autorelease]; + if(rLocale.Country.getLength()>0) + { + NSString* aCountry = [[[NSString alloc] initWithCharacters: reinterpret_cast<unichar const *>(rLocale.Country.getStr()) length: rLocale.Country.getLength()]autorelease]; + NSString* aTaggedCountry = [@"_" stringByAppendingString:aCountry]; + aLang = [aLang stringByAppendingString:aTaggedCountry]; + } +#ifdef MACOSX + [[NSSpellChecker sharedSpellChecker] setLanguage:aLang]; + NSArray *guesses = [[NSSpellChecker sharedSpellChecker] guessesForWordRange:NSMakeRange(0, [aNSStr length]) inString:aNSStr language:aLang inSpellDocumentWithTag:0]; + (void) this; // avoid loplugin:staticmethods, the !MACOSX case uses 'this' +#else + NSArray *guesses = [pChecker guessesForWordRange:NSMakeRange(0, [aNSStr length]) inString:aNSStr language:aLang]; +#endif + count = [guesses count]; + if (count) + { + aStr.realloc( count ); + OUString *pStr = aStr.getArray(); + for (int ii=0; ii < count; ii++) + { + // if needed add: if (suglst[ii] == NULL) continue; + NSString* guess = [guesses objectAtIndex:ii]; + OUString cvtwrd(reinterpret_cast<const sal_Unicode*>([guess cStringUsingEncoding:NSUnicodeStringEncoding]), static_cast<sal_Int32>([guess length])); + pStr[ii] = cvtwrd; + } + } + [pool release]; + } + + // now return an empty alternative for no suggestions or the list of alternatives if some found + SpellAlternatives *pAlt = new SpellAlternatives; + pAlt->SetWordLanguage( rWord, nLang ); + pAlt->SetFailureType( SpellFailure::SPELLING_ERROR ); + pAlt->SetAlternatives( aStr ); + xRes = pAlt; + return xRes; + +} + +Reference< XSpellAlternatives > SAL_CALL + MacSpellChecker::spell( const OUString& rWord, const Locale& rLocale, + const css::uno::Sequence<PropertyValue>& rProperties ) +{ + MutexGuard aGuard( GetLinguMutex() ); + + if (rLocale == Locale() || !rWord.getLength()) + return nullptr; + + if (!hasLocale( rLocale )) + return nullptr; + + Reference< XSpellAlternatives > xAlt; + if (!isValid( rWord, rLocale, rProperties )) + { + xAlt = GetProposals( rWord, rLocale ); + } + return xAlt; +} + +/// @throws Exception +static Reference< XInterface > MacSpellChecker_CreateInstance( + const Reference< XMultiServiceFactory > & /*rSMgr*/ ) +{ + + Reference< XInterface > xService = static_cast<cppu::OWeakObject*>(new MacSpellChecker); + return xService; +} + + +sal_Bool SAL_CALL + MacSpellChecker::addLinguServiceEventListener( + const Reference< XLinguServiceEventListener >& rxLstnr ) +{ + MutexGuard aGuard( GetLinguMutex() ); + + bool bRes = false; + if (!bDisposing && rxLstnr.is()) + { + bRes = GetPropHelper().addLinguServiceEventListener( rxLstnr ); + } + return bRes; +} + + +sal_Bool SAL_CALL + MacSpellChecker::removeLinguServiceEventListener( + const Reference< XLinguServiceEventListener >& rxLstnr ) +{ + MutexGuard aGuard( GetLinguMutex() ); + + bool bRes = false; + if (!bDisposing && rxLstnr.is()) + { + DBG_ASSERT( xPropHelper.is(), "xPropHelper non existent" ); + bRes = GetPropHelper().removeLinguServiceEventListener( rxLstnr ); + } + return bRes; +} + + +OUString SAL_CALL + MacSpellChecker::getServiceDisplayName( const Locale& /*rLocale*/ ) +{ + MutexGuard aGuard( GetLinguMutex() ); + return "macOS Spell Checker"; +} + + +void SAL_CALL + MacSpellChecker::initialize( const Sequence< Any >& rArguments ) +{ + MutexGuard aGuard( GetLinguMutex() ); + + if (!xPropHelper.is()) + { + sal_Int32 nLen = rArguments.getLength(); + if (2 == nLen) + { + Reference< XLinguProperties > xPropSet; + rArguments.getConstArray()[0] >>= xPropSet; + //rArguments.getConstArray()[1] >>= xDicList; + + //! Pointer allows for access of the non-UNO functions. + //! And the reference to the UNO-functions while increasing + //! the ref-count and will implicitly free the memory + //! when the object is no longer used. + xPropHelper = new PropertyHelper_Spell( static_cast<XSpellChecker *>(this), xPropSet ); + xPropHelper->AddAsPropListener(); + } + else + OSL_FAIL( "wrong number of arguments in sequence" ); + + } +} + + +void SAL_CALL + MacSpellChecker::dispose() +{ + MutexGuard aGuard( GetLinguMutex() ); + + if (!bDisposing) + { + bDisposing = true; + EventObject aEvtObj( static_cast<XSpellChecker *>(this) ); + aEvtListeners.disposeAndClear( aEvtObj ); + } +} + + +void SAL_CALL + MacSpellChecker::addEventListener( const Reference< XEventListener >& rxListener ) +{ + MutexGuard aGuard( GetLinguMutex() ); + + if (!bDisposing && rxListener.is()) + aEvtListeners.addInterface( rxListener ); +} + + +void SAL_CALL + MacSpellChecker::removeEventListener( const Reference< XEventListener >& rxListener ) +{ + MutexGuard aGuard( GetLinguMutex() ); + + if (!bDisposing && rxListener.is()) + aEvtListeners.removeInterface( rxListener ); +} + +// Service specific part +OUString SAL_CALL MacSpellChecker::getImplementationName() +{ + return getImplementationName_Static(); +} + +sal_Bool SAL_CALL MacSpellChecker::supportsService( const OUString& ServiceName ) +{ + return cppu::supportsService(this, ServiceName); +} + +Sequence< OUString > SAL_CALL MacSpellChecker::getSupportedServiceNames() +{ + return getSupportedServiceNames_Static(); +} + +Sequence< OUString > MacSpellChecker::getSupportedServiceNames_Static() + throw() +{ + Sequence< OUString > aSNS { SN_SPELLCHECKER }; + return aSNS; +} + +extern "C" +{ + +SAL_DLLPUBLIC_EXPORT void * MacOSXSpell_component_getFactory( + const char * pImplName, void * pServiceManager, void * /*pRegistryKey*/ ) +{ + void * pRet = nullptr; + if ( MacSpellChecker::getImplementationName_Static().equalsAscii( pImplName ) ) + { + Reference< XSingleServiceFactory > xFactory = + cppu::createOneInstanceFactory( + static_cast< XMultiServiceFactory * >( pServiceManager ), + MacSpellChecker::getImplementationName_Static(), + MacSpellChecker_CreateInstance, + MacSpellChecker::getSupportedServiceNames_Static()); + // acquire, because we return an interface pointer instead of a reference + xFactory->acquire(); + pRet = xFactory.get(); + } + return pRet; +} + +} + + + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/lingucomponent/source/spellcheck/spell/spell.component b/lingucomponent/source/spellcheck/spell/spell.component new file mode 100644 index 000000000..c11b7a7c3 --- /dev/null +++ b/lingucomponent/source/spellcheck/spell/spell.component @@ -0,0 +1,25 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!-- + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + --> + +<component loader="com.sun.star.loader.SharedLibrary" environment="@CPPU_ENV@" + prefix="spell" xmlns="http://openoffice.org/2010/uno-components"> + <implementation name="org.openoffice.lingu.MySpellSpellChecker"> + <service name="com.sun.star.linguistic2.SpellChecker"/> + </implementation> +</component> diff --git a/lingucomponent/source/spellcheck/spell/sspellimp.cxx b/lingucomponent/source/spellcheck/spell/sspellimp.cxx new file mode 100644 index 000000000..04fc7d8e2 --- /dev/null +++ b/lingucomponent/source/spellcheck/spell/sspellimp.cxx @@ -0,0 +1,674 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#include <com/sun/star/uno/Reference.h> + +#include <com/sun/star/linguistic2/SpellFailure.hpp> +#include <com/sun/star/linguistic2/XLinguProperties.hpp> +#include <comphelper/lok.hxx> +#include <comphelper/processfactory.hxx> +#include <cppuhelper/factory.hxx> +#include <cppuhelper/supportsservice.hxx> +#include <com/sun/star/lang/XSingleServiceFactory.hpp> +#include <com/sun/star/lang/XMultiServiceFactory.hpp> +#include <com/sun/star/registry/XRegistryKey.hpp> +#include <tools/debug.hxx> +#include <osl/mutex.hxx> +#include <osl/thread.h> +#include <com/sun/star/ucb/XSimpleFileAccess.hpp> + +#include <lingutil.hxx> +#include <hunspell.hxx> +#include "sspellimp.hxx" + +#include <linguistic/lngprops.hxx> +#include <linguistic/spelldta.hxx> +#include <i18nlangtag/languagetag.hxx> +#include <svtools/strings.hrc> +#include <unotools/pathoptions.hxx> +#include <unotools/lingucfg.hxx> +#include <unotools/resmgr.hxx> +#include <unotools/useroptions.hxx> +#include <osl/file.hxx> +#include <rtl/ustrbuf.hxx> +#include <rtl/textenc.h> +#include <sal/log.hxx> + +#include <numeric> +#include <utility> +#include <vector> +#include <set> +#include <string.h> + +using namespace utl; +using namespace osl; +using namespace com::sun::star; +using namespace com::sun::star::beans; +using namespace com::sun::star::lang; +using namespace com::sun::star::uno; +using namespace com::sun::star::linguistic2; +using namespace linguistic; + +// XML-header of SPELLML queries +#if !defined SPELL_XML +#define SPELL_XML "<?xml?>" +#endif + +// only available in hunspell >= 1.5 +#if !defined MAXWORDLEN +#define MAXWORDLEN 176 +#endif + +SpellChecker::SpellChecker() : + m_aEvtListeners(GetLinguMutex()), + m_bDisposing(false) +{ +} + +SpellChecker::DictItem::DictItem(OUString i_DName, Locale i_DLoc, rtl_TextEncoding i_DEnc) + : m_aDName(std::move(i_DName)) + , m_aDLoc(std::move(i_DLoc)) + , m_aDEnc(i_DEnc) +{ +} + +SpellChecker::~SpellChecker() +{ + if (m_pPropHelper) + { + m_pPropHelper->RemoveAsPropListener(); + } +} + +PropertyHelper_Spelling & SpellChecker::GetPropHelper_Impl() +{ + if (!m_pPropHelper) + { + Reference< XLinguProperties > xPropSet = GetLinguProperties(); + + m_pPropHelper.reset( new PropertyHelper_Spelling( static_cast<XSpellChecker *>(this), xPropSet ) ); + m_pPropHelper->AddAsPropListener(); //! after a reference is established + } + return *m_pPropHelper; +} + +Sequence< Locale > SAL_CALL SpellChecker::getLocales() +{ + MutexGuard aGuard( GetLinguMutex() ); + + // this routine should return the locales supported by the installed + // dictionaries. + if (m_DictItems.empty()) + { + SvtLinguConfig aLinguCfg; + + // get list of extension dictionaries-to-use + // (or better speaking: the list of dictionaries using the + // new configuration entries). + std::vector< SvtLinguConfigDictionaryEntry > aDics; + uno::Sequence< OUString > aFormatList; + aLinguCfg.GetSupportedDictionaryFormatsFor( "SpellCheckers", + "org.openoffice.lingu.MySpellSpellChecker", aFormatList ); + for (auto const& format : std::as_const(aFormatList)) + { + std::vector< SvtLinguConfigDictionaryEntry > aTmpDic( + aLinguCfg.GetActiveDictionariesByFormat(format) ); + aDics.insert( aDics.end(), aTmpDic.begin(), aTmpDic.end() ); + } + + //!! for compatibility with old dictionaries (the ones not using extensions + //!! or new configuration entries, but still using the dictionary.lst file) + //!! Get the list of old style spell checking dictionaries to use... + std::vector< SvtLinguConfigDictionaryEntry > aOldStyleDics( + GetOldStyleDics( "DICT" ) ); + + // to prefer dictionaries with configuration entries we will only + // use those old style dictionaries that add a language that + // is not yet supported by the list of new style dictionaries + MergeNewStyleDicsAndOldStyleDics( aDics, aOldStyleDics ); + + if (!aDics.empty()) + { + uno::Reference< lang::XMultiServiceFactory > xServiceFactory(comphelper::getProcessServiceFactory()); + uno::Reference< ucb::XSimpleFileAccess > xAccess(xServiceFactory->createInstance("com.sun.star.ucb.SimpleFileAccess"), uno::UNO_QUERY); + // get supported locales from the dictionaries-to-use... + std::set<OUString> aLocaleNamesSet; + for (auto const& dict : aDics) + { + const uno::Sequence< OUString > aLocaleNames( dict.aLocaleNames ); + uno::Sequence< OUString > aLocations( dict.aLocations ); + SAL_WARN_IF( + aLocaleNames.hasElements() && !aLocations.hasElements(), + "lingucomponent", "no locations"); + if (aLocations.hasElements()) + { + if (xAccess.is() && xAccess->exists(aLocations[0])) + { + for (auto const& locale : aLocaleNames) + { + if (!comphelper::LibreOfficeKit::isWhitelistedLanguage(locale)) + continue; + + aLocaleNamesSet.insert(locale); + } + } + else + { + SAL_WARN( + "lingucomponent", + "missing <" << aLocations[0] << ">"); + } + } + } + // ... and add them to the resulting sequence + m_aSuppLocales.realloc( aLocaleNamesSet.size() ); + sal_Int32 k = 0; + for (auto const& localeName : aLocaleNamesSet) + { + Locale aTmp( LanguageTag::convertToLocale(localeName)); + m_aSuppLocales[k++] = aTmp; + } + + //! For each dictionary and each locale we need a separate entry. + //! If this results in more than one dictionary per locale than (for now) + //! it is undefined which dictionary gets used. + //! In the future the implementation should support using several dictionaries + //! for one locale. + sal_uInt32 nDictSize = std::accumulate(aDics.begin(), aDics.end(), sal_uInt32(0), + [](const sal_uInt32 nSum, const SvtLinguConfigDictionaryEntry& dict) { + return nSum + dict.aLocaleNames.getLength(); }); + + // add dictionary information + m_DictItems.reserve(nDictSize); + for (auto const& dict : aDics) + { + if (dict.aLocaleNames.hasElements() && + dict.aLocations.hasElements()) + { + const uno::Sequence< OUString > aLocaleNames( dict.aLocaleNames ); + + // currently only one language per dictionary is supported in the actual implementation... + // Thus here we work-around this by adding the same dictionary several times. + // Once for each of its supported locales. + for (auto const& localeName : aLocaleNames) + { + // also both files have to be in the same directory and the + // file names must only differ in the extension (.aff/.dic). + // Thus we use the first location only and strip the extension part. + OUString aLocation = dict.aLocations[0]; + sal_Int32 nPos = aLocation.lastIndexOf( '.' ); + aLocation = aLocation.copy( 0, nPos ); + + m_DictItems.emplace_back(aLocation, LanguageTag::convertToLocale(localeName), RTL_TEXTENCODING_DONTKNOW); + } + } + } + DBG_ASSERT( nDictSize == m_DictItems.size(), "index mismatch?" ); + } + else + { + // no dictionary found so register no dictionaries + m_aSuppLocales.realloc(0); + } + } + + return m_aSuppLocales; +} + +sal_Bool SAL_CALL SpellChecker::hasLocale(const Locale& rLocale) +{ + MutexGuard aGuard( GetLinguMutex() ); + + bool bRes = false; + if (!m_aSuppLocales.hasElements()) + getLocales(); + + for (auto const& suppLocale : std::as_const(m_aSuppLocales)) + { + if (rLocale == suppLocale) + { + bRes = true; + break; + } + } + return bRes; +} + +sal_Int16 SpellChecker::GetSpellFailure(const OUString &rWord, const Locale &rLocale) +{ + if (rWord.getLength() > MAXWORDLEN) + return -1; + + Hunspell * pMS = nullptr; + rtl_TextEncoding eEnc = RTL_TEXTENCODING_DONTKNOW; + + // initialize a myspell object for each dictionary once + // (note: mutex is held higher up in isValid) + + sal_Int16 nRes = -1; + + // first handle smart quotes both single and double + OUStringBuffer rBuf(rWord); + sal_Int32 n = rBuf.getLength(); + sal_Unicode c; + sal_Int32 extrachar = 0; + + for (sal_Int32 ix=0; ix < n; ix++) + { + c = rBuf[ix]; + if ((c == 0x201C) || (c == 0x201D)) + rBuf[ix] = u'"'; + else if ((c == 0x2018) || (c == 0x2019)) + rBuf[ix] = u'\''; + + // recognize words with Unicode ligatures and ZWNJ/ZWJ characters (only + // with 8-bit encoded dictionaries. For UTF-8 encoded dictionaries + // set ICONV and IGNORE aff file options, if needed.) + else if ((c == 0x200C) || (c == 0x200D) || + ((c >= 0xFB00) && (c <= 0xFB04))) + extrachar = 1; + } + OUString nWord(rBuf.makeStringAndClear()); + + if (n) + { + for (auto& currDict : m_DictItems) + { + pMS = nullptr; + eEnc = RTL_TEXTENCODING_DONTKNOW; + + if (rLocale == currDict.m_aDLoc) + { + if (!currDict.m_pDict) + { + OUString dicpath = currDict.m_aDName + ".dic"; + OUString affpath = currDict.m_aDName + ".aff"; + OUString dict; + OUString aff; + osl::FileBase::getSystemPathFromFileURL(dicpath,dict); + osl::FileBase::getSystemPathFromFileURL(affpath,aff); +#if defined(_WIN32) + // workaround for Windows specific problem that the + // path length in calls to 'fopen' is limited to somewhat + // about 120+ characters which will usually be exceed when + // using dictionaries as extensions. (Hunspell waits UTF-8 encoded + // path with \\?\ long path prefix.) + OString aTmpaff = Win_AddLongPathPrefix(OUStringToOString(aff, RTL_TEXTENCODING_UTF8)); + OString aTmpdict = Win_AddLongPathPrefix(OUStringToOString(dict, RTL_TEXTENCODING_UTF8)); +#else + OString aTmpaff(OU2ENC(aff,osl_getThreadTextEncoding())); + OString aTmpdict(OU2ENC(dict,osl_getThreadTextEncoding())); +#endif + + currDict.m_pDict = std::make_unique<Hunspell>(aTmpaff.getStr(),aTmpdict.getStr()); +#if defined(H_DEPRECATED) + currDict.m_aDEnc = getTextEncodingFromCharset(currDict.m_pDict->get_dict_encoding().c_str()); +#else + currDict.m_aDEnc = getTextEncodingFromCharset(currDict.m_pDict->get_dic_encoding()); +#endif + } + pMS = currDict.m_pDict.get(); + eEnc = currDict.m_aDEnc; + } + + if (pMS) + { + // we don't want to work with a default text encoding since following incorrect + // results may occur only for specific text and thus may be hard to notice. + // Thus better always make a clean exit here if the text encoding is in question. + // Hopefully something not working at all will raise proper attention quickly. ;-) + DBG_ASSERT( eEnc != RTL_TEXTENCODING_DONTKNOW, "failed to get text encoding! (maybe incorrect encoding string in file)" ); + if (eEnc == RTL_TEXTENCODING_DONTKNOW) + return -1; + + OString aWrd(OU2ENC(nWord,eEnc)); +#if defined(H_DEPRECATED) + bool bVal = pMS->spell(std::string(aWrd.getStr())); +#else + bool bVal = pMS->spell(aWrd.getStr()) != 0; +#endif + if (!bVal) { + if (extrachar && (eEnc != RTL_TEXTENCODING_UTF8)) { + OUStringBuffer aBuf(nWord); + n = aBuf.getLength(); + for (sal_Int32 ix=n-1; ix >= 0; ix--) + { + switch (aBuf[ix]) { + case 0xFB00: aBuf.remove(ix, 1); aBuf.insert(ix, "ff"); break; + case 0xFB01: aBuf.remove(ix, 1); aBuf.insert(ix, "fi"); break; + case 0xFB02: aBuf.remove(ix, 1); aBuf.insert(ix, "fl"); break; + case 0xFB03: aBuf.remove(ix, 1); aBuf.insert(ix, "ffi"); break; + case 0xFB04: aBuf.remove(ix, 1); aBuf.insert(ix, "ffl"); break; + case 0x200C: + case 0x200D: aBuf.remove(ix, 1); break; + } + } + OUString aWord(aBuf.makeStringAndClear()); + OString bWrd(OU2ENC(aWord, eEnc)); +#if defined(H_DEPRECATED) + bVal = pMS->spell(std::string(bWrd.getStr())); +#else + bVal = pMS->spell(bWrd.getStr()) != 0; +#endif + if (bVal) return -1; + } + nRes = SpellFailure::SPELLING_ERROR; + } else { + return -1; + } + pMS = nullptr; + } + } + } + + return nRes; +} + +sal_Bool SAL_CALL SpellChecker::isValid( const OUString& rWord, const Locale& rLocale, + const css::uno::Sequence< css::beans::PropertyValue >& rProperties ) +{ + MutexGuard aGuard( GetLinguMutex() ); + + if (rLocale == Locale() || rWord.isEmpty()) + return true; + + if (!hasLocale( rLocale )) + return true; + + // return sal_False to process SPELLML requests (they are longer than the header) + if (rWord.match(SPELL_XML, 0) && (rWord.getLength() > 10)) return false; + + // Get property values to be used. + // These are be the default values set in the SN_LINGU_PROPERTIES + // PropertySet which are overridden by the supplied ones from the + // last argument. + // You'll probably like to use a simpler solution than the provided + // one using the PropertyHelper_Spell. + PropertyHelper_Spelling& rHelper = GetPropHelper(); + rHelper.SetTmpPropVals( rProperties ); + + sal_Int16 nFailure = GetSpellFailure( rWord, rLocale ); + if (nFailure != -1 && !rWord.match(SPELL_XML, 0)) + { + LanguageType nLang = LinguLocaleToLanguage( rLocale ); + // postprocess result for errors that should be ignored + const bool bIgnoreError = + (!rHelper.IsSpellUpperCase() && IsUpper( rWord, nLang )) || + (!rHelper.IsSpellWithDigits() && HasDigits( rWord )) || + (!rHelper.IsSpellCapitalization() && nFailure == SpellFailure::CAPTION_ERROR); + if (bIgnoreError) + nFailure = -1; + } + + return (nFailure == -1); +} + +Reference< XSpellAlternatives > + SpellChecker::GetProposals( const OUString &rWord, const Locale &rLocale ) +{ + // Retrieves the return values for the 'spell' function call in case + // of a misspelled word. + // Especially it may give a list of suggested (correct) words: + Reference< XSpellAlternatives > xRes; + // note: mutex is held by higher up by spell which covers both + + Hunspell* pMS = nullptr; + rtl_TextEncoding eEnc = RTL_TEXTENCODING_DONTKNOW; + + // first handle smart quotes (single and double) + OUStringBuffer rBuf(rWord); + sal_Int32 n = rBuf.getLength(); + sal_Unicode c; + for (sal_Int32 ix=0; ix < n; ix++) + { + c = rBuf[ix]; + if ((c == 0x201C) || (c == 0x201D)) + rBuf[ix] = u'"'; + if ((c == 0x2018) || (c == 0x2019)) + rBuf[ix] = u'\''; + } + OUString nWord(rBuf.makeStringAndClear()); + + if (n) + { + LanguageType nLang = LinguLocaleToLanguage( rLocale ); + int numsug = 0; + + Sequence< OUString > aStr( 0 ); + for (const auto& currDict : m_DictItems) + { + pMS = nullptr; + eEnc = RTL_TEXTENCODING_DONTKNOW; + + if (rLocale == currDict.m_aDLoc) + { + pMS = currDict.m_pDict.get(); + eEnc = currDict.m_aDEnc; + } + + if (pMS) + { + OString aWrd(OU2ENC(nWord,eEnc)); +#if defined(H_DEPRECATED) + std::vector<std::string> suglst = pMS->suggest(std::string(aWrd.getStr())); + if (!suglst.empty()) + { + aStr.realloc(numsug + suglst.size()); + OUString *pStr = aStr.getArray(); + for (size_t ii = 0; ii < suglst.size(); ++ii) + { + OUString cvtwrd(suglst[ii].c_str(), suglst[ii].size(), eEnc); + pStr[numsug + ii] = cvtwrd; + } + numsug += suglst.size(); + } +#else + char ** suglst = nullptr; + int count = pMS->suggest(&suglst, aWrd.getStr()); + if (count) + { + aStr.realloc( numsug + count ); + OUString *pStr = aStr.getArray(); + for (int ii=0; ii < count; ++ii) + { + OUString cvtwrd(suglst[ii],strlen(suglst[ii]),eEnc); + pStr[numsug + ii] = cvtwrd; + } + numsug += count; + } + pMS->free_list(&suglst, count); +#endif + } + } + + // now return an empty alternative for no suggestions or the list of alternatives if some found + xRes = SpellAlternatives::CreateSpellAlternatives( rWord, nLang, SpellFailure::SPELLING_ERROR, aStr ); + return xRes; + } + return xRes; +} + +Reference< XSpellAlternatives > SAL_CALL SpellChecker::spell( + const OUString& rWord, const Locale& rLocale, + const css::uno::Sequence< css::beans::PropertyValue >& rProperties ) +{ + MutexGuard aGuard( GetLinguMutex() ); + + if (rLocale == Locale() || rWord.isEmpty()) + return nullptr; + + if (!hasLocale( rLocale )) + return nullptr; + + Reference< XSpellAlternatives > xAlt; + if (!isValid( rWord, rLocale, rProperties )) + { + xAlt = GetProposals( rWord, rLocale ); + } + return xAlt; +} + +/// @throws Exception +static Reference< XInterface > SpellChecker_CreateInstance( + const Reference< XMultiServiceFactory > & /*rSMgr*/ ) +{ + + Reference< XInterface > xService = static_cast<cppu::OWeakObject*>(new SpellChecker); + return xService; +} + +sal_Bool SAL_CALL SpellChecker::addLinguServiceEventListener( + const Reference< XLinguServiceEventListener >& rxLstnr ) +{ + MutexGuard aGuard( GetLinguMutex() ); + + bool bRes = false; + if (!m_bDisposing && rxLstnr.is()) + { + bRes = GetPropHelper().addLinguServiceEventListener( rxLstnr ); + } + return bRes; +} + +sal_Bool SAL_CALL SpellChecker::removeLinguServiceEventListener( + const Reference< XLinguServiceEventListener >& rxLstnr ) +{ + MutexGuard aGuard( GetLinguMutex() ); + + bool bRes = false; + if (!m_bDisposing && rxLstnr.is()) + { + bRes = GetPropHelper().removeLinguServiceEventListener( rxLstnr ); + } + return bRes; +} + +OUString SAL_CALL SpellChecker::getServiceDisplayName(const Locale& rLocale) +{ + std::locale loc(Translate::Create("svt", LanguageTag(rLocale))); + return Translate::get(STR_DESCRIPTION_HUNSPELL, loc); +} + +void SAL_CALL SpellChecker::initialize( const Sequence< Any >& rArguments ) +{ + MutexGuard aGuard( GetLinguMutex() ); + + if (m_pPropHelper) + return; + + sal_Int32 nLen = rArguments.getLength(); + if (2 == nLen) + { + Reference< XLinguProperties > xPropSet; + rArguments.getConstArray()[0] >>= xPropSet; + // rArguments.getConstArray()[1] >>= xDicList; + + //! Pointer allows for access of the non-UNO functions. + //! And the reference to the UNO-functions while increasing + //! the ref-count and will implicitly free the memory + //! when the object is no longer used. + m_pPropHelper.reset( new PropertyHelper_Spelling( static_cast<XSpellChecker *>(this), xPropSet ) ); + m_pPropHelper->AddAsPropListener(); //! after a reference is established + } + else { + OSL_FAIL( "wrong number of arguments in sequence" ); + } +} + +void SAL_CALL SpellChecker::dispose() +{ + MutexGuard aGuard( GetLinguMutex() ); + + if (!m_bDisposing) + { + m_bDisposing = true; + EventObject aEvtObj( static_cast<XSpellChecker *>(this) ); + m_aEvtListeners.disposeAndClear( aEvtObj ); + if (m_pPropHelper) + { + m_pPropHelper->RemoveAsPropListener(); + m_pPropHelper.reset(); + } + } +} + +void SAL_CALL SpellChecker::addEventListener( const Reference< XEventListener >& rxListener ) +{ + MutexGuard aGuard( GetLinguMutex() ); + + if (!m_bDisposing && rxListener.is()) + m_aEvtListeners.addInterface( rxListener ); +} + +void SAL_CALL SpellChecker::removeEventListener( const Reference< XEventListener >& rxListener ) +{ + MutexGuard aGuard( GetLinguMutex() ); + + if (!m_bDisposing && rxListener.is()) + m_aEvtListeners.removeInterface( rxListener ); +} + +// Service specific part +OUString SAL_CALL SpellChecker::getImplementationName() +{ + return getImplementationName_Static(); +} + +sal_Bool SAL_CALL SpellChecker::supportsService( const OUString& ServiceName ) +{ + return cppu::supportsService(this, ServiceName); +} + +Sequence< OUString > SAL_CALL SpellChecker::getSupportedServiceNames() +{ + return getSupportedServiceNames_Static(); +} + +Sequence< OUString > SpellChecker::getSupportedServiceNames_Static() + throw() +{ + Sequence< OUString > aSNS { SN_SPELLCHECKER }; + return aSNS; +} + +extern "C" +{ + +SAL_DLLPUBLIC_EXPORT void * spell_component_getFactory( + const char * pImplName, void * pServiceManager, void * /*pRegistryKey*/ ) +{ + void * pRet = nullptr; + if ( SpellChecker::getImplementationName_Static().equalsAscii( pImplName ) ) + { + Reference< XSingleServiceFactory > xFactory = + cppu::createOneInstanceFactory( + static_cast< XMultiServiceFactory * >( pServiceManager ), + SpellChecker::getImplementationName_Static(), + SpellChecker_CreateInstance, + SpellChecker::getSupportedServiceNames_Static()); + // acquire, because we return an interface pointer instead of a reference + xFactory->acquire(); + pRet = xFactory.get(); + } + return pRet; +} + +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/lingucomponent/source/spellcheck/spell/sspellimp.hxx b/lingucomponent/source/spellcheck/spell/sspellimp.hxx new file mode 100644 index 000000000..3c55d3c9f --- /dev/null +++ b/lingucomponent/source/spellcheck/spell/sspellimp.hxx @@ -0,0 +1,130 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#ifndef INCLUDED_LINGUCOMPONENT_SOURCE_SPELLCHECK_SPELL_SSPELLIMP_HXX +#define INCLUDED_LINGUCOMPONENT_SOURCE_SPELLCHECK_SPELL_SSPELLIMP_HXX + +#include <cppuhelper/implbase.hxx> +#include <com/sun/star/lang/XComponent.hpp> +#include <com/sun/star/lang/XInitialization.hpp> +#include <com/sun/star/lang/XServiceDisplayName.hpp> +#include <com/sun/star/beans/XPropertySet.hpp> +#include <com/sun/star/beans/PropertyValues.hpp> +#include <com/sun/star/lang/XServiceInfo.hpp> +#include <com/sun/star/linguistic2/XSpellChecker.hpp> +#include <com/sun/star/linguistic2/XLinguServiceEventBroadcaster.hpp> + +#include <linguistic/misc.hxx> +#include <linguistic/lngprophelp.hxx> + +#include <lingutil.hxx> +#include <memory> + +using namespace ::com::sun::star::uno; +using namespace ::com::sun::star::beans; +using namespace ::com::sun::star::lang; +using namespace ::com::sun::star::linguistic2; + +class Hunspell; + +class SpellChecker : + public cppu::WeakImplHelper + < + XSpellChecker, + XLinguServiceEventBroadcaster, + XInitialization, + XComponent, + XServiceInfo, + XServiceDisplayName + > +{ + struct DictItem + { + OUString m_aDName; + Locale m_aDLoc; + std::unique_ptr<Hunspell> m_pDict; + rtl_TextEncoding m_aDEnc; + + DictItem(OUString i_DName, Locale i_DLoc, rtl_TextEncoding i_DEnc); + }; + + std::vector<DictItem> m_DictItems; + + Sequence< Locale > m_aSuppLocales; + + ::comphelper::OInterfaceContainerHelper2 m_aEvtListeners; + std::unique_ptr<linguistic::PropertyHelper_Spelling> m_pPropHelper; + bool m_bDisposing; + + SpellChecker(const SpellChecker &) = delete; + SpellChecker & operator = (const SpellChecker &) = delete; + + linguistic::PropertyHelper_Spelling& GetPropHelper_Impl(); + linguistic::PropertyHelper_Spelling& GetPropHelper() + { + return m_pPropHelper ? *m_pPropHelper : GetPropHelper_Impl(); + } + + sal_Int16 GetSpellFailure( const OUString &rWord, const Locale &rLocale ); + Reference< XSpellAlternatives > GetProposals( const OUString &rWord, const Locale &rLocale ); + +public: + SpellChecker(); + virtual ~SpellChecker() override; + + // XSupportedLocales (for XSpellChecker) + virtual Sequence< Locale > SAL_CALL getLocales() override; + virtual sal_Bool SAL_CALL hasLocale( const Locale& rLocale ) override; + + // XSpellChecker + virtual sal_Bool SAL_CALL isValid( const OUString& rWord, const Locale& rLocale, const css::uno::Sequence< css::beans::PropertyValue >& rProperties ) override; + virtual Reference< XSpellAlternatives > SAL_CALL spell( const OUString& rWord, const Locale& rLocale, const css::uno::Sequence< css::beans::PropertyValue >& rProperties ) override; + + // XLinguServiceEventBroadcaster + virtual sal_Bool SAL_CALL addLinguServiceEventListener( const Reference< XLinguServiceEventListener >& rxLstnr ) override; + virtual sal_Bool SAL_CALL removeLinguServiceEventListener( const Reference< XLinguServiceEventListener >& rxLstnr ) override; + + // XServiceDisplayName + virtual OUString SAL_CALL getServiceDisplayName( const Locale& rLocale ) override; + + // XInitialization + virtual void SAL_CALL initialize( const Sequence< Any >& rArguments ) override; + + // XComponent + virtual void SAL_CALL dispose() override; + virtual void SAL_CALL addEventListener( const Reference< XEventListener >& rxListener ) override; + virtual void SAL_CALL removeEventListener( const Reference< XEventListener >& rxListener ) override; + + // XServiceInfo + virtual OUString SAL_CALL getImplementationName() override; + virtual sal_Bool SAL_CALL supportsService( const OUString& rServiceName ) override; + virtual Sequence< OUString > SAL_CALL getSupportedServiceNames() override; + + static inline OUString getImplementationName_Static() throw(); + static Sequence< OUString > getSupportedServiceNames_Static() throw(); +}; + +inline OUString SpellChecker::getImplementationName_Static() throw() +{ + return "org.openoffice.lingu.MySpellSpellChecker"; +} + +#endif + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/lingucomponent/source/thesaurus/libnth/lnth.component b/lingucomponent/source/thesaurus/libnth/lnth.component new file mode 100644 index 000000000..50aa23c87 --- /dev/null +++ b/lingucomponent/source/thesaurus/libnth/lnth.component @@ -0,0 +1,25 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!-- + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + --> + +<component loader="com.sun.star.loader.SharedLibrary" environment="@CPPU_ENV@" + prefix="lnth" xmlns="http://openoffice.org/2010/uno-components"> + <implementation name="org.openoffice.lingu.new.Thesaurus"> + <service name="com.sun.star.linguistic2.Thesaurus"/> + </implementation> +</component> diff --git a/lingucomponent/source/thesaurus/libnth/nthesdta.cxx b/lingucomponent/source/thesaurus/libnth/nthesdta.cxx new file mode 100644 index 000000000..aa7d2afa7 --- /dev/null +++ b/lingucomponent/source/thesaurus/libnth/nthesdta.cxx @@ -0,0 +1,78 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#include <osl/mutex.hxx> + +#include "nthesdta.hxx" +#include <linguistic/misc.hxx> + +using namespace osl; +using namespace com::sun::star; +using namespace com::sun::star::beans; +using namespace com::sun::star::lang; +using namespace com::sun::star::uno; +using namespace com::sun::star::linguistic2; + +namespace linguistic +{ + +Meaning::Meaning( + const OUString &rTerm) : + + aSyn ( Sequence< OUString >(1) ), + aTerm (rTerm) + +{ +#if 0 + // this is for future use by a german thesaurus when one exists + bIsGermanPreReform = rHelper.IsGermanPreReform; +#endif +} + +Meaning::~Meaning() +{ +} + +OUString SAL_CALL Meaning::getMeaning() +{ + MutexGuard aGuard( GetLinguMutex() ); + return aTerm; +} + +Sequence< OUString > SAL_CALL Meaning::querySynonyms() +{ + MutexGuard aGuard( GetLinguMutex() ); + return aSyn; +} + +void Meaning::SetSynonyms( const Sequence< OUString > &rSyn ) +{ + MutexGuard aGuard( GetLinguMutex() ); + aSyn = rSyn; +} + +void Meaning::SetMeaning( const OUString &rTerm ) +{ + MutexGuard aGuard( GetLinguMutex() ); + aTerm = rTerm; +} + +} // namespace linguistic + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/lingucomponent/source/thesaurus/libnth/nthesdta.hxx b/lingucomponent/source/thesaurus/libnth/nthesdta.hxx new file mode 100644 index 000000000..8e6cb7561 --- /dev/null +++ b/lingucomponent/source/thesaurus/libnth/nthesdta.hxx @@ -0,0 +1,60 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#ifndef INCLUDED_LINGUCOMPONENT_SOURCE_THESAURUS_LIBNTH_NTHESDTA_HXX +#define INCLUDED_LINGUCOMPONENT_SOURCE_THESAURUS_LIBNTH_NTHESDTA_HXX + +#include <com/sun/star/linguistic2/XMeaning.hpp> +#include <cppuhelper/implbase.hxx> + +namespace linguistic +{ + +class Meaning : + public cppu::WeakImplHelper< css::linguistic2::XMeaning > +{ + css::uno::Sequence< OUString > aSyn; // list of synonyms, may be empty. + OUString aTerm; + +#if 0 + // this is for future use by a German thesaurus + sal_Bool bIsGermanPreReform; +#endif + + Meaning(const Meaning &) = delete; + Meaning & operator = (const Meaning &) = delete; + +public: + explicit Meaning(const OUString &rTerm); + virtual ~Meaning() override; + + // XMeaning + virtual OUString SAL_CALL getMeaning() override; + virtual css::uno::Sequence< OUString > SAL_CALL querySynonyms() override; + + // non-interface specific functions + void SetSynonyms( const css::uno::Sequence< OUString > &rSyn ); + void SetMeaning( const OUString &rTerm ); +}; + +} // namespace linguistic + +#endif + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/lingucomponent/source/thesaurus/libnth/nthesimp.cxx b/lingucomponent/source/thesaurus/libnth/nthesimp.cxx new file mode 100644 index 000000000..e165d0962 --- /dev/null +++ b/lingucomponent/source/thesaurus/libnth/nthesimp.cxx @@ -0,0 +1,604 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#include <com/sun/star/uno/Reference.h> +#include <cppuhelper/factory.hxx> +#include <cppuhelper/supportsservice.hxx> +#include <com/sun/star/lang/XSingleServiceFactory.hpp> +#include <com/sun/star/registry/XRegistryKey.hpp> +#include <com/sun/star/beans/XPropertySet.hpp> +#include <com/sun/star/linguistic2/LinguServiceManager.hpp> +#include <com/sun/star/linguistic2/XLinguProperties.hpp> +#include <com/sun/star/linguistic2/XSpellChecker1.hpp> +#include <i18nlangtag/languagetag.hxx> +#include <tools/debug.hxx> +#include <comphelper/lok.hxx> +#include <comphelper/processfactory.hxx> +#include <comphelper/sequence.hxx> +#include <osl/mutex.hxx> +#include <osl/thread.h> +#include <unotools/pathoptions.hxx> +#include <unotools/lingucfg.hxx> +#include <unotools/resmgr.hxx> + +#include <rtl/string.hxx> +#include <rtl/ustrbuf.hxx> +#include <rtl/textenc.h> + +#include <svtools/strings.hrc> + +#include "nthesimp.hxx" +#include <linguistic/misc.hxx> +#include <linguistic/lngprops.hxx> +#include "nthesdta.hxx" + +#include <vector> +#include <numeric> +#include <set> +#include <string.h> + +// XML-header to query SPELLML support +#define SPELLML_SUPPORT "<?xml?>" + +using namespace osl; +using namespace com::sun::star; +using namespace com::sun::star::beans; +using namespace com::sun::star::lang; +using namespace com::sun::star::uno; +using namespace com::sun::star::linguistic2; +using namespace linguistic; + +static uno::Reference< XLinguServiceManager2 > GetLngSvcMgr_Impl() +{ + uno::Reference< XComponentContext > xContext( comphelper::getProcessComponentContext() ); + uno::Reference< XLinguServiceManager2 > xRes = LinguServiceManager::create( xContext ) ; + return xRes; +} + +Thesaurus::Thesaurus() : + aEvtListeners ( GetLinguMutex() ), pPropHelper(nullptr), bDisposing(false), + prevLocale(LANGUAGE_DONTKNOW) +{ +} + +Thesaurus::~Thesaurus() +{ + mvThesInfo.clear(); + if (pPropHelper) + { + pPropHelper->RemoveAsPropListener(); + } +} + +PropertyHelper_Thesaurus& Thesaurus::GetPropHelper_Impl() +{ + if (!pPropHelper) + { + Reference< XLinguProperties > xPropSet = GetLinguProperties(); + + pPropHelper = new PropertyHelper_Thesaurus( static_cast<XThesaurus *>(this), xPropSet ); + pPropHelper->AddAsPropListener(); //! after a reference is established + } + return *pPropHelper; +} + +Sequence< Locale > SAL_CALL Thesaurus::getLocales() +{ + MutexGuard aGuard( GetLinguMutex() ); + + // this routine should return the locales supported by the installed + // dictionaries. + if (mvThesInfo.empty()) + { + SvtLinguConfig aLinguCfg; + + // get list of dictionaries-to-use + std::vector< SvtLinguConfigDictionaryEntry > aDics; + uno::Sequence< OUString > aFormatList; + aLinguCfg.GetSupportedDictionaryFormatsFor( "Thesauri", + "org.openoffice.lingu.new.Thesaurus", aFormatList ); + for (const auto& rFormat : std::as_const(aFormatList)) + { + std::vector< SvtLinguConfigDictionaryEntry > aTmpDic( + aLinguCfg.GetActiveDictionariesByFormat( rFormat ) ); + aDics.insert( aDics.end(), aTmpDic.begin(), aTmpDic.end() ); + } + + //!! for compatibility with old dictionaries (the ones not using extensions + //!! or new configuration entries, but still using the dictionary.lst file) + //!! Get the list of old style spell checking dictionaries to use... + std::vector< SvtLinguConfigDictionaryEntry > aOldStyleDics( + GetOldStyleDics( "THES" ) ); + + // to prefer dictionaries with configuration entries we will only + // use those old style dictionaries that add a language that + // is not yet supported by the list of new style dictionaries + MergeNewStyleDicsAndOldStyleDics( aDics, aOldStyleDics ); + + if (!aDics.empty()) + { + // get supported locales from the dictionaries-to-use... + std::set<OUString> aLocaleNamesSet; + for (auto const& dict : aDics) + { + for (const auto& rLocaleName : dict.aLocaleNames) + { + if (!comphelper::LibreOfficeKit::isWhitelistedLanguage(rLocaleName)) + continue; + + aLocaleNamesSet.insert( rLocaleName ); + } + } + // ... and add them to the resulting sequence + std::vector<Locale> aLocalesVec; + aLocalesVec.reserve(aLocaleNamesSet.size()); + + std::transform(aLocaleNamesSet.begin(), aLocaleNamesSet.end(), std::back_inserter(aLocalesVec), + [](const OUString& localeName) -> Locale { return LanguageTag::convertToLocale(localeName); }); + + aSuppLocales = comphelper::containerToSequence(aLocalesVec); + + //! For each dictionary and each locale we need a separate entry. + //! If this results in more than one dictionary per locale than (for now) + //! it is undefined which dictionary gets used. + //! In the future the implementation should support using several dictionaries + //! for one locale. + sal_Int32 numthes = std::accumulate(aDics.begin(), aDics.end(), 0, + [](const sal_Int32 nSum, const SvtLinguConfigDictionaryEntry& dict) { + return nSum + dict.aLocaleNames.getLength(); }); + + // add dictionary information + mvThesInfo.resize(numthes); + + sal_Int32 k = 0; + for (auto const& dict : aDics) + { + if (dict.aLocaleNames.hasElements() && + dict.aLocations.hasElements()) + { + // currently only one language per dictionary is supported in the actual implementation... + // Thus here we work-around this by adding the same dictionary several times. + // Once for each of its supported locales. + for (const auto& rLocaleName : dict.aLocaleNames) + { + LanguageTag aLanguageTag(rLocaleName); + mvThesInfo[k].aEncoding = RTL_TEXTENCODING_DONTKNOW; + mvThesInfo[k].aLocale = aLanguageTag.getLocale(); + mvThesInfo[k].aCharSetInfo.reset( new CharClass( aLanguageTag ) ); + // also both files have to be in the same directory and the + // file names must only differ in the extension (.aff/.dic). + // Thus we use the first location only and strip the extension part. + OUString aLocation = dict.aLocations[0]; + sal_Int32 nPos = aLocation.lastIndexOf( '.' ); + aLocation = aLocation.copy( 0, nPos ); + mvThesInfo[k].aName = aLocation; + + ++k; + } + } + } + DBG_ASSERT( k == numthes, "index mismatch?" ); + } + else + { + /* no dictionary found so register no dictionaries */ + mvThesInfo.clear(); + aSuppLocales.realloc(0); + } + } + + return aSuppLocales; +} + +sal_Bool SAL_CALL Thesaurus::hasLocale(const Locale& rLocale) +{ + MutexGuard aGuard( GetLinguMutex() ); + + if (!aSuppLocales.hasElements()) + getLocales(); + + return comphelper::findValue(aSuppLocales, rLocale) != -1; +} + +Sequence < Reference < css::linguistic2::XMeaning > > SAL_CALL Thesaurus::queryMeanings( + const OUString& qTerm, const Locale& rLocale, + const css::uno::Sequence< css::beans::PropertyValue >& rProperties) +{ + MutexGuard aGuard( GetLinguMutex() ); + + uno::Sequence< Reference< XMeaning > > aMeanings( 1 ); + uno::Sequence< Reference< XMeaning > > noMeanings( 0 ); + uno::Reference< XLinguServiceManager2 > xLngSvcMgr( GetLngSvcMgr_Impl() ); + uno::Reference< XSpellChecker1 > xSpell; + + OUString aRTerm(qTerm); + OUString aPTerm(qTerm); + CapType ct = CapType::UNKNOWN; + sal_Int32 stem = 0; + sal_Int32 stem2 = 0; + + LanguageType nLanguage = LinguLocaleToLanguage( rLocale ); + + if (LinguIsUnspecified( nLanguage) || aRTerm.isEmpty()) + return noMeanings; + + if (!hasLocale( rLocale )) +#ifdef LINGU_EXCEPTIONS + throw( IllegalArgumentException() ); +#else + return noMeanings; +#endif + + if (prevTerm == qTerm && prevLocale == nLanguage) + return prevMeanings; + + mentry * pmean = nullptr; + sal_Int32 nmean = 0; + + PropertyHelper_Thesaurus &rHelper = GetPropHelper(); + rHelper.SetTmpPropVals( rProperties ); + + MyThes * pTH = nullptr; + rtl_TextEncoding eEnc = RTL_TEXTENCODING_DONTKNOW; + CharClass * pCC = nullptr; + + // find the first thesaurus that matches the locale + for (size_t i =0; i < mvThesInfo.size(); i++) + { + if (rLocale == mvThesInfo[i].aLocale) + { + // open up and initialize this thesaurus if need be + if (!mvThesInfo[i].aThes) + { + OUString datpath = mvThesInfo[i].aName + ".dat"; + OUString idxpath = mvThesInfo[i].aName + ".idx"; + OUString ndat; + OUString nidx; + osl::FileBase::getSystemPathFromFileURL(datpath,ndat); + osl::FileBase::getSystemPathFromFileURL(idxpath,nidx); + +#if defined(_WIN32) + // MyThes waits UTF-8 encoded paths with \\?\ long path prefix. + OString aTmpidx = Win_AddLongPathPrefix(OUStringToOString(nidx, RTL_TEXTENCODING_UTF8)); + OString aTmpdat = Win_AddLongPathPrefix(OUStringToOString(ndat, RTL_TEXTENCODING_UTF8)); +#else + OString aTmpidx(OU2ENC(nidx,osl_getThreadTextEncoding())); + OString aTmpdat(OU2ENC(ndat,osl_getThreadTextEncoding())); +#endif + + mvThesInfo[i].aThes.reset( new MyThes(aTmpidx.getStr(),aTmpdat.getStr()) ); + mvThesInfo[i].aEncoding = getTextEncodingFromCharset(mvThesInfo[i].aThes->get_th_encoding()); + } + pTH = mvThesInfo[i].aThes.get(); + eEnc = mvThesInfo[i].aEncoding; + pCC = mvThesInfo[i].aCharSetInfo.get(); + + if (pTH) + break; + } + } + + // we don't want to work with a default text encoding since following incorrect + // results may occur only for specific text and thus may be hard to notice. + // Thus better always make a clean exit here if the text encoding is in question. + // Hopefully something not working at all will raise proper attention quickly. ;-) + DBG_ASSERT( eEnc != RTL_TEXTENCODING_DONTKNOW, "failed to get text encoding! (maybe incorrect encoding string in file)" ); + if (eEnc == RTL_TEXTENCODING_DONTKNOW) + return noMeanings; + + while (pTH) + { + // convert word to all lower case for searching + if (!stem) + ct = capitalType(aRTerm, pCC); + OUString nTerm(makeLowerCase(aRTerm, pCC)); + OString aTmp( OU2ENC(nTerm, eEnc) ); + nmean = pTH->Lookup(aTmp.getStr(),aTmp.getLength(),&pmean); + + if (nmean) + aMeanings.realloc( nmean ); + + mentry * pe = pmean; + OUString codeTerm = qTerm; + Reference< XSpellAlternatives > xTmpRes2; + + if (stem) + { + xTmpRes2 = xSpell->spell( "<?xml?><query type='analyze'><word>" + + aPTerm + "</word></query>", static_cast<sal_uInt16>(nLanguage), rProperties ); + if (xTmpRes2.is()) + { + Sequence<OUString>seq = xTmpRes2->getAlternatives(); + if (seq.hasElements()) + { + codeTerm = seq[0]; + stem2 = 1; + } + } + } + + for (int j = 0; j < nmean; j++) + { + int count = pe->count; + if (count) + { + Sequence< OUString > aStr( count ); + OUString *pStr = aStr.getArray(); + + for (int i=0; i < count; i++) + { + OUString sTerm(pe->psyns[i],strlen(pe->psyns[i]),eEnc ); + sal_Int32 catpos = sTerm.indexOf('('); + OUString catst; + if (catpos > 2) + { + // remove category name for affixation and casing + catst = " " + sTerm.copy(catpos); + sTerm = sTerm.copy(0, catpos); + sTerm = sTerm.trim(); + } + // generate synonyms with affixes + if (stem && stem2) + { + Reference< XSpellAlternatives > xTmpRes = xSpell->spell( "<?xml?><query type='generate'><word>" + + sTerm + "</word>" + codeTerm + "</query>", static_cast<sal_uInt16>(nLanguage), rProperties ); + if (xTmpRes.is()) + { + Sequence<OUString>seq = xTmpRes->getAlternatives(); + if (seq.hasElements()) + sTerm = seq[0]; + } + } + + CapType ct1 = capitalType(sTerm, pCC); + if (CapType::MIXED == ct1) + ct = ct1; + OUString cTerm; + switch (ct) + { + case CapType::ALLCAP: + cTerm = makeUpperCase(sTerm, pCC); + break; + case CapType::INITCAP: + cTerm = makeInitCap(sTerm, pCC); + break; + default: + cTerm = sTerm; + break; + } + OUString aAlt( cTerm + catst); + pStr[i] = aAlt; + } + Meaning * pMn = new Meaning(aRTerm); + OUString dTerm(pe->defn,strlen(pe->defn),eEnc ); + pMn->SetMeaning(dTerm); + pMn->SetSynonyms(aStr); + Reference<XMeaning>* pMeaning = aMeanings.getArray(); + pMeaning[j] = pMn; + } + pe++; + } + pTH->CleanUpAfterLookup(&pmean,nmean); + + if (nmean) + { + prevTerm = qTerm; + prevMeanings = aMeanings; + prevLocale = nLanguage; + return aMeanings; + } + + if (stem || !xLngSvcMgr.is()) + return noMeanings; + stem = 1; + + xSpell.set( xLngSvcMgr->getSpellChecker(), UNO_QUERY ); + if (!xSpell.is() || !xSpell->isValid( SPELLML_SUPPORT, static_cast<sal_uInt16>(nLanguage), rProperties )) + return noMeanings; + Reference< XSpellAlternatives > xTmpRes = xSpell->spell( "<?xml?><query type='stem'><word>" + + aRTerm + "</word></query>", static_cast<sal_uInt16>(nLanguage), rProperties ); + if (xTmpRes.is()) + { + Sequence<OUString>seq = xTmpRes->getAlternatives(); + if (seq.hasElements()) + { + aRTerm = seq[0]; // XXX Use only the first stem + continue; + } + } + + // stem the last word of the synonym (for categories after affixation) + aRTerm = aRTerm.trim(); + sal_Int32 pos = aRTerm.lastIndexOf(' '); + if (!pos) + return noMeanings; + xTmpRes = xSpell->spell( "<?xml?><query type='stem'><word>" + + aRTerm.copy(pos + 1) + "</word></query>", static_cast<sal_uInt16>(nLanguage), rProperties ); + if (xTmpRes.is()) + { + Sequence<OUString>seq = xTmpRes->getAlternatives(); + if (seq.hasElements()) + { + aPTerm = aRTerm.copy(pos + 1); + aRTerm = aRTerm.copy(0, pos + 1) + seq[0]; +#if 0 + for (int i = 0; i < seq.getLength(); i++) + { + OString o = OUStringToOString(seq[i], RTL_TEXTENCODING_UTF8); + fprintf(stderr, "%d: %s\n", i + 1, o.pData->buffer); + } +#endif + continue; + } + } + break; + } + return noMeanings; +} + +/// @throws Exception +static Reference< XInterface > Thesaurus_CreateInstance( + const Reference< XMultiServiceFactory > & /*rSMgr*/ ) +{ + Reference< XInterface > xService = static_cast<cppu::OWeakObject*>(new Thesaurus); + return xService; +} + +OUString SAL_CALL Thesaurus::getServiceDisplayName(const Locale& rLocale) +{ + std::locale loc(Translate::Create("svt", LanguageTag(rLocale))); + return Translate::get(STR_DESCRIPTION_MYTHES, loc); +} + +void SAL_CALL Thesaurus::initialize( const Sequence< Any >& rArguments ) +{ + MutexGuard aGuard( GetLinguMutex() ); + + if (pPropHelper) + return; + + sal_Int32 nLen = rArguments.getLength(); + if (1 == nLen) + { + Reference< XLinguProperties > xPropSet; + rArguments.getConstArray()[0] >>= xPropSet; + + //! Pointer allows for access of the non-UNO functions. + //! And the reference to the UNO-functions while increasing + //! the ref-count and will implicitly free the memory + //! when the object is no longer used. + pPropHelper = new PropertyHelper_Thesaurus( static_cast<XThesaurus *>(this), xPropSet ); + pPropHelper->AddAsPropListener(); //! after a reference is established + } + else + OSL_FAIL( "wrong number of arguments in sequence" ); +} + +OUString Thesaurus::makeLowerCase(const OUString& aTerm, CharClass const * pCC) +{ + if (pCC) + return pCC->lowercase(aTerm); + return aTerm; +} + +OUString Thesaurus::makeUpperCase(const OUString& aTerm, CharClass const * pCC) +{ + if (pCC) + return pCC->uppercase(aTerm); + return aTerm; +} + +OUString Thesaurus::makeInitCap(const OUString& aTerm, CharClass const * pCC) +{ + sal_Int32 tlen = aTerm.getLength(); + if (pCC && tlen) + { + OUString bTemp = aTerm.copy(0,1); + if (tlen > 1) + { + return ( pCC->uppercase(bTemp, 0, 1) + + pCC->lowercase(aTerm,1,(tlen-1)) ); + } + + return pCC->uppercase(bTemp, 0, 1); + } + return aTerm; +} + +void SAL_CALL Thesaurus::dispose() +{ + MutexGuard aGuard( GetLinguMutex() ); + + if (!bDisposing) + { + bDisposing = true; + EventObject aEvtObj( static_cast<XThesaurus *>(this) ); + aEvtListeners.disposeAndClear( aEvtObj ); + if (pPropHelper) + { + pPropHelper->RemoveAsPropListener(); + delete pPropHelper; + pPropHelper = nullptr; + } + } +} + +void SAL_CALL Thesaurus::addEventListener( const Reference< XEventListener >& rxListener ) +{ + MutexGuard aGuard( GetLinguMutex() ); + + if (!bDisposing && rxListener.is()) + aEvtListeners.addInterface( rxListener ); +} + +void SAL_CALL Thesaurus::removeEventListener( const Reference< XEventListener >& rxListener ) +{ + MutexGuard aGuard( GetLinguMutex() ); + + if (!bDisposing && rxListener.is()) + aEvtListeners.removeInterface( rxListener ); +} + +// Service specific part +OUString SAL_CALL Thesaurus::getImplementationName() +{ + return getImplementationName_Static(); +} + +sal_Bool SAL_CALL Thesaurus::supportsService( const OUString& ServiceName ) +{ + return cppu::supportsService(this, ServiceName); +} + +Sequence< OUString > SAL_CALL Thesaurus::getSupportedServiceNames() +{ + return getSupportedServiceNames_Static(); +} + +Sequence< OUString > Thesaurus::getSupportedServiceNames_Static() + throw() +{ + Sequence< OUString > aSNS { SN_THESAURUS }; + return aSNS; +} + +extern "C" +{ +SAL_DLLPUBLIC_EXPORT void * lnth_component_getFactory( + const char * pImplName, void * pServiceManager, void * /*pRegistryKey*/ ) +{ + void * pRet = nullptr; + if ( Thesaurus::getImplementationName_Static().equalsAscii( pImplName ) ) + { + + Reference< XSingleServiceFactory > xFactory = + cppu::createOneInstanceFactory( + static_cast< XMultiServiceFactory * >( pServiceManager ), + Thesaurus::getImplementationName_Static(), + Thesaurus_CreateInstance, + Thesaurus::getSupportedServiceNames_Static()); + // acquire, because we return an interface pointer instead of a reference + xFactory->acquire(); + pRet = xFactory.get(); + } + return pRet; +} +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/lingucomponent/source/thesaurus/libnth/nthesimp.hxx b/lingucomponent/source/thesaurus/libnth/nthesimp.hxx new file mode 100644 index 000000000..cef9016b1 --- /dev/null +++ b/lingucomponent/source/thesaurus/libnth/nthesimp.hxx @@ -0,0 +1,138 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#ifndef INCLUDED_LINGUCOMPONENT_SOURCE_THESAURUS_LIBNTH_NTHESIMP_HXX +#define INCLUDED_LINGUCOMPONENT_SOURCE_THESAURUS_LIBNTH_NTHESIMP_HXX + +#include <cppuhelper/implbase.hxx> +#include <com/sun/star/uno/Reference.h> +#include <com/sun/star/uno/Sequence.h> +#include <com/sun/star/lang/XComponent.hpp> +#include <com/sun/star/lang/XInitialization.hpp> +#include <com/sun/star/lang/XServiceDisplayName.hpp> +#include <com/sun/star/beans/XPropertySet.hpp> +#include <com/sun/star/beans/PropertyValues.hpp> + +#include <com/sun/star/lang/XServiceInfo.hpp> +#include <com/sun/star/linguistic2/XMeaning.hpp> +#include <com/sun/star/linguistic2/XThesaurus.hpp> + +#include <unotools/charclass.hxx> + +#include <lingutil.hxx> +#include <linguistic/misc.hxx> +#include <linguistic/lngprophelp.hxx> + +#include <osl/file.hxx> +#include <mythes.hxx> +#include <memory> +#include <vector> + +using namespace ::com::sun::star::uno; +using namespace ::com::sun::star::beans; +using namespace ::com::sun::star::lang; +using namespace ::com::sun::star::linguistic2; + +namespace com::sun::star::beans { class XPropertySet; } + +class Thesaurus : + public cppu::WeakImplHelper + < + XThesaurus, + XInitialization, + XComponent, + XServiceInfo, + XServiceDisplayName + > +{ + Sequence< Locale > aSuppLocales; + + ::comphelper::OInterfaceContainerHelper2 aEvtListeners; + linguistic::PropertyHelper_Thesaurus* pPropHelper; + bool bDisposing; + struct ThesInfo + { + std::unique_ptr<CharClass> aCharSetInfo; + std::unique_ptr<MyThes> aThes; + rtl_TextEncoding aEncoding; + Locale aLocale; + OUString aName; + }; + std::vector<ThesInfo> mvThesInfo; + + // cache for the Thesaurus dialog + Sequence < Reference < css::linguistic2::XMeaning > > prevMeanings; + OUString prevTerm; + LanguageType prevLocale; + + Thesaurus(const Thesaurus &) = delete; + Thesaurus & operator = (const Thesaurus &) = delete; + + linguistic::PropertyHelper_Thesaurus& GetPropHelper_Impl(); + linguistic::PropertyHelper_Thesaurus& GetPropHelper() + { + return pPropHelper ? *pPropHelper : GetPropHelper_Impl(); + } + +public: + Thesaurus(); + virtual ~Thesaurus() override; + + // XSupportedLocales (for XThesaurus) + virtual Sequence< Locale > SAL_CALL getLocales() override; + virtual sal_Bool SAL_CALL hasLocale( const Locale& rLocale ) override; + + // XThesaurus + virtual Sequence< Reference < css::linguistic2::XMeaning > > SAL_CALL queryMeanings( const OUString& rTerm, const Locale& rLocale, const css::uno::Sequence< css::beans::PropertyValue >& rProperties ) override; + + // XServiceDisplayName + virtual OUString SAL_CALL getServiceDisplayName( const Locale& rLocale ) override; + + // XInitialization + virtual void SAL_CALL initialize( const Sequence< Any >& rArguments ) override; + + // XComponent + virtual void SAL_CALL dispose() override; + virtual void SAL_CALL addEventListener( const Reference< XEventListener >& rxListener ) override; + virtual void SAL_CALL removeEventListener( const Reference< XEventListener >& rxListener ) override; + + // XServiceInfo + virtual OUString SAL_CALL getImplementationName() override; + virtual sal_Bool SAL_CALL supportsService( const OUString& rServiceName ) override; + virtual Sequence< OUString > SAL_CALL getSupportedServiceNames() override; + + static inline OUString + getImplementationName_Static() throw(); + static Sequence< OUString > + getSupportedServiceNames_Static() throw(); + +private: + static OUString makeLowerCase(const OUString&, CharClass const *); + static OUString makeUpperCase(const OUString&, CharClass const *); + static OUString makeInitCap(const OUString&, CharClass const *); +}; + +inline OUString Thesaurus::getImplementationName_Static() throw() +{ + return "org.openoffice.lingu.new.Thesaurus"; +} + +#endif + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ |