summaryrefslogtreecommitdiffstats
path: root/lingucomponent/source
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--lingucomponent/source/hyphenator/hyphen/hyphen.component26
-rw-r--r--lingucomponent/source/hyphenator/hyphen/hyphenimp.cxx805
-rw-r--r--lingucomponent/source/hyphenator/hyphen/hyphenimp.hxx126
-rw-r--r--lingucomponent/source/languageguessing/guess.cxx100
-rw-r--r--lingucomponent/source/languageguessing/guess.hxx54
-rw-r--r--lingucomponent/source/languageguessing/guesslang.component26
-rw-r--r--lingucomponent/source/languageguessing/guesslang.cxx321
-rw-r--r--lingucomponent/source/languageguessing/simpleguesser.cxx221
-rw-r--r--lingucomponent/source/languageguessing/simpleguesser.hxx108
-rw-r--r--lingucomponent/source/lingutil/lingutil.cxx304
-rw-r--r--lingucomponent/source/lingutil/lingutil.hxx55
-rw-r--r--lingucomponent/source/numbertext/numbertext.component26
-rw-r--r--lingucomponent/source/numbertext/numbertext.cxx168
-rw-r--r--lingucomponent/source/spellcheck/languagetool/LanguageTool.component26
-rw-r--r--lingucomponent/source/spellcheck/languagetool/languagetoolimp.cxx407
-rw-r--r--lingucomponent/source/spellcheck/languagetool/languagetoolimp.hxx91
-rw-r--r--lingucomponent/source/spellcheck/macosxspell/MacOSXSpell.component26
-rw-r--r--lingucomponent/source/spellcheck/macosxspell/macspellimp.hxx123
-rw-r--r--lingucomponent/source/spellcheck/macosxspell/macspellimp.mm666
-rw-r--r--lingucomponent/source/spellcheck/spell/spell.component26
-rw-r--r--lingucomponent/source/spellcheck/spell/sspellimp.cxx635
-rw-r--r--lingucomponent/source/spellcheck/spell/sspellimp.hxx120
-rw-r--r--lingucomponent/source/thesaurus/libnth/lnth.component26
-rw-r--r--lingucomponent/source/thesaurus/libnth/nthesdta.cxx78
-rw-r--r--lingucomponent/source/thesaurus/libnth/nthesdta.hxx60
-rw-r--r--lingucomponent/source/thesaurus/libnth/nthesimp.cxx571
-rw-r--r--lingucomponent/source/thesaurus/libnth/nthesimp.hxx129
27 files changed, 5324 insertions, 0 deletions
diff --git a/lingucomponent/source/hyphenator/hyphen/hyphen.component b/lingucomponent/source/hyphenator/hyphen/hyphen.component
new file mode 100644
index 000000000..b9bc8b1f3
--- /dev/null
+++ b/lingucomponent/source/hyphenator/hyphen/hyphen.component
@@ -0,0 +1,26 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ -->
+
+<component loader="com.sun.star.loader.SharedLibrary" environment="@CPPU_ENV@"
+ xmlns="http://openoffice.org/2010/uno-components">
+ <implementation name="org.openoffice.lingu.LibHnjHyphenator"
+ constructor="lingucomponent_Hyphenator_get_implementation" single-instance="true">
+ <service name="com.sun.star.linguistic2.Hyphenator"/>
+ </implementation>
+</component>
diff --git a/lingucomponent/source/hyphenator/hyphen/hyphenimp.cxx b/lingucomponent/source/hyphenator/hyphen/hyphenimp.cxx
new file mode 100644
index 000000000..8ac156ef8
--- /dev/null
+++ b/lingucomponent/source/hyphenator/hyphen/hyphenimp.cxx
@@ -0,0 +1,805 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+#include <com/sun/star/uno/Reference.h>
+
+#include <comphelper/sequence.hxx>
+#include <cppuhelper/factory.hxx>
+#include <cppuhelper/supportsservice.hxx>
+#include <cppuhelper/weak.hxx>
+#include <com/sun/star/linguistic2/XLinguProperties.hpp>
+#include <i18nlangtag/languagetag.hxx>
+#include <tools/debug.hxx>
+#include <osl/mutex.hxx>
+#include <osl/thread.h>
+
+#include <hyphen.h>
+#include "hyphenimp.hxx"
+
+#include <linguistic/hyphdta.hxx>
+#include <rtl/ustring.hxx>
+#include <rtl/ustrbuf.hxx>
+#include <rtl/textenc.h>
+#include <sal/log.hxx>
+
+#include <linguistic/misc.hxx>
+#include <svtools/strings.hrc>
+#include <unotools/charclass.hxx>
+#include <unotools/lingucfg.hxx>
+#include <unotools/resmgr.hxx>
+#include <osl/file.hxx>
+
+#include <stdio.h>
+#include <string.h>
+
+#include <cassert>
+#include <numeric>
+#include <vector>
+#include <set>
+#include <memory>
+
+using namespace utl;
+using namespace osl;
+using namespace com::sun::star;
+using namespace com::sun::star::beans;
+using namespace com::sun::star::lang;
+using namespace com::sun::star::uno;
+using namespace com::sun::star::linguistic2;
+using namespace linguistic;
+
+Hyphenator::Hyphenator() :
+ aEvtListeners ( GetLinguMutex() )
+{
+ bDisposing = false;
+}
+
+Hyphenator::~Hyphenator()
+{
+ for (auto & rInfo : mvDicts)
+ {
+ if (rInfo.aPtr)
+ hnj_hyphen_free(rInfo.aPtr);
+ }
+
+ if (pPropHelper)
+ {
+ pPropHelper->RemoveAsPropListener();
+ }
+}
+
+PropertyHelper_Hyphenation& Hyphenator::GetPropHelper_Impl()
+{
+ if (!pPropHelper)
+ {
+ Reference< XLinguProperties > xPropSet = GetLinguProperties();
+
+ pPropHelper.reset( new PropertyHelper_Hyphenation (static_cast<XHyphenator *>(this), xPropSet ) );
+ pPropHelper->AddAsPropListener(); //! after a reference is established
+ }
+ return *pPropHelper;
+}
+
+Sequence< Locale > SAL_CALL Hyphenator::getLocales()
+{
+ MutexGuard aGuard( GetLinguMutex() );
+
+ // this routine should return the locales supported by the installed
+ // dictionaries.
+ if (mvDicts.empty())
+ {
+ SvtLinguConfig aLinguCfg;
+
+ // get list of dictionaries-to-use
+ // (or better speaking: the list of dictionaries using the
+ // new configuration entries).
+ std::vector< SvtLinguConfigDictionaryEntry > aDics;
+ uno::Sequence< OUString > aFormatList;
+ aLinguCfg.GetSupportedDictionaryFormatsFor( "Hyphenators",
+ "org.openoffice.lingu.LibHnjHyphenator", aFormatList );
+ for (const auto& rFormat : std::as_const(aFormatList))
+ {
+ std::vector< SvtLinguConfigDictionaryEntry > aTmpDic(
+ aLinguCfg.GetActiveDictionariesByFormat( rFormat ) );
+ aDics.insert( aDics.end(), aTmpDic.begin(), aTmpDic.end() );
+ }
+
+ //!! for compatibility with old dictionaries (the ones not using extensions
+ //!! or new configuration entries, but still using the dictionary.lst file)
+ //!! Get the list of old style spell checking dictionaries to use...
+ std::vector< SvtLinguConfigDictionaryEntry > aOldStyleDics(
+ GetOldStyleDics( "HYPH" ) );
+
+ // to prefer dictionaries with configuration entries we will only
+ // use those old style dictionaries that add a language that
+ // is not yet supported by the list of new style dictionaries
+ MergeNewStyleDicsAndOldStyleDics( aDics, aOldStyleDics );
+
+ if (!aDics.empty())
+ {
+ // get supported locales from the dictionaries-to-use...
+ std::set<OUString> aLocaleNamesSet;
+ for (auto const& dict : aDics)
+ {
+ for (const auto& rLocaleName : dict.aLocaleNames)
+ {
+ aLocaleNamesSet.insert( rLocaleName );
+ }
+ }
+ // ... and add them to the resulting sequence
+ std::vector<Locale> aLocalesVec;
+ aLocalesVec.reserve(aLocaleNamesSet.size());
+
+ std::transform(aLocaleNamesSet.begin(), aLocaleNamesSet.end(), std::back_inserter(aLocalesVec),
+ [](const OUString& localeName) { return LanguageTag::convertToLocale(localeName); });
+
+ aSuppLocales = comphelper::containerToSequence(aLocalesVec);
+
+ //! For each dictionary and each locale we need a separate entry.
+ //! If this results in more than one dictionary per locale than (for now)
+ //! it is undefined which dictionary gets used.
+ //! In the future the implementation should support using several dictionaries
+ //! for one locale.
+ sal_Int32 numdict = std::accumulate(aDics.begin(), aDics.end(), 0,
+ [](const sal_Int32 nSum, const SvtLinguConfigDictionaryEntry& dict) {
+ return nSum + dict.aLocaleNames.getLength(); });
+
+ // add dictionary information
+ mvDicts.resize(numdict);
+
+ sal_Int32 k = 0;
+ for (auto const& dict : aDics)
+ {
+ if (dict.aLocaleNames.hasElements() &&
+ dict.aLocations.hasElements())
+ {
+ // currently only one language per dictionary is supported in the actual implementation...
+ // Thus here we work-around this by adding the same dictionary several times.
+ // Once for each of its supported locales.
+ for (const auto& rLocaleName : dict.aLocaleNames)
+ {
+ LanguageTag aLanguageTag(rLocaleName);
+ mvDicts[k].aPtr = nullptr;
+ mvDicts[k].eEnc = RTL_TEXTENCODING_DONTKNOW;
+ mvDicts[k].aLoc = aLanguageTag.getLocale();
+ mvDicts[k].apCC.reset( new CharClass( std::move(aLanguageTag) ) );
+ // also both files have to be in the same directory and the
+ // file names must only differ in the extension (.aff/.dic).
+ // Thus we use the first location only and strip the extension part.
+ OUString aLocation = dict.aLocations[0];
+ sal_Int32 nPos = aLocation.lastIndexOf( '.' );
+ aLocation = aLocation.copy( 0, nPos );
+ mvDicts[k].aName = aLocation;
+
+ ++k;
+ }
+ }
+ }
+ DBG_ASSERT( k == numdict, "index mismatch?" );
+ }
+ else
+ {
+ // no dictionary found so register no dictionaries
+ mvDicts.clear();
+ aSuppLocales.realloc(0);
+ }
+ }
+
+ return aSuppLocales;
+}
+
+sal_Bool SAL_CALL Hyphenator::hasLocale(const Locale& rLocale)
+{
+ MutexGuard aGuard( GetLinguMutex() );
+
+ if (!aSuppLocales.hasElements())
+ getLocales();
+
+ return comphelper::findValue(aSuppLocales, rLocale) != -1;
+}
+
+namespace {
+bool LoadDictionary(HDInfo& rDict)
+{
+ OUString DictFN = rDict.aName + ".dic";
+ OUString dictpath;
+
+ osl::FileBase::getSystemPathFromFileURL(DictFN, dictpath);
+
+#if defined(_WIN32)
+ // hnj_hyphen_load expects UTF-8 encoded paths with \\?\ long path prefix.
+ OString sTmp = Win_AddLongPathPrefix(OUStringToOString(dictpath, RTL_TEXTENCODING_UTF8));
+#else
+ OString sTmp(OU2ENC(dictpath, osl_getThreadTextEncoding()));
+#endif
+ HyphenDict *dict = nullptr;
+ if ((dict = hnj_hyphen_load(sTmp.getStr())) == nullptr)
+ {
+ SAL_WARN(
+ "lingucomponent",
+ "Couldn't find file " << dictpath);
+ return false;
+ }
+ rDict.aPtr = dict;
+ rDict.eEnc = getTextEncodingFromCharset(dict->cset);
+ return true;
+}
+}
+
+Reference< XHyphenatedWord > SAL_CALL Hyphenator::hyphenate( const OUString& aWord,
+ const css::lang::Locale& aLocale,
+ sal_Int16 nMaxLeading,
+ const css::uno::Sequence< css::beans::PropertyValue >& aProperties )
+{
+ PropertyHelper_Hyphenation& rHelper = GetPropHelper();
+ rHelper.SetTmpPropVals(aProperties);
+ sal_Int16 minTrail = rHelper.GetMinTrailing();
+ sal_Int16 minLead = rHelper.GetMinLeading();
+ sal_Int16 minLen = rHelper.GetMinWordLength();
+ bool bNoHyphenateCaps = rHelper.IsNoHyphenateCaps();
+
+ rtl_TextEncoding eEnc = RTL_TEXTENCODING_DONTKNOW;
+
+ Reference< XHyphenatedWord > xRes;
+
+ int k = -1;
+ for (size_t j = 0; j < mvDicts.size(); ++j)
+ {
+ if (aLocale == mvDicts[j].aLoc)
+ k = j;
+ }
+
+ // if we have a hyphenation dictionary matching this locale
+ if (k != -1)
+ {
+ int nHyphenationPos = -1;
+ int nHyphenationPosAlt = -1;
+ int nHyphenationPosAltHyph = -1;
+
+ // if this dictionary has not been loaded yet do that
+ if (!mvDicts[k].aPtr)
+ {
+ if (!LoadDictionary(mvDicts[k]))
+ return nullptr;
+ }
+
+ // otherwise hyphenate the word with that dictionary
+ HyphenDict *dict = mvDicts[k].aPtr;
+ eEnc = mvDicts[k].eEnc;
+ CharClass * pCC = mvDicts[k].apCC.get();
+
+ // Don't hyphenate uppercase words if requested
+ if (bNoHyphenateCaps && aWord == makeUpperCase(aWord, pCC))
+ {
+ return nullptr;
+ }
+
+ // we don't want to work with a default text encoding since following incorrect
+ // results may occur only for specific text and thus may be hard to notice.
+ // Thus better always make a clean exit here if the text encoding is in question.
+ // Hopefully something not working at all will raise proper attention quickly. ;-)
+ DBG_ASSERT( eEnc != RTL_TEXTENCODING_DONTKNOW, "failed to get text encoding! (maybe incorrect encoding string in file)" );
+ if (eEnc == RTL_TEXTENCODING_DONTKNOW)
+ return nullptr;
+
+ CapType ct = capitalType(aWord, pCC);
+
+ // first convert any smart quotes or apostrophes to normal ones
+ OUStringBuffer rBuf(aWord);
+ sal_Int32 nc = rBuf.getLength();
+ sal_Unicode ch;
+ for (sal_Int32 ix=0; ix < nc; ix++)
+ {
+ ch = rBuf[ix];
+ if ((ch == 0x201C) || (ch == 0x201D))
+ rBuf[ix] = u'"';
+ if ((ch == 0x2018) || (ch == 0x2019))
+ rBuf[ix] = u'\'';
+ }
+ OUString nWord(rBuf.makeStringAndClear());
+
+ // now convert word to all lowercase for pattern recognition
+ OUString nTerm(makeLowerCase(nWord, pCC));
+
+ // now convert word to needed encoding
+ OString encWord(OU2ENC(nTerm,eEnc));
+
+ int wordlen = encWord.getLength();
+ std::unique_ptr<char[]> lcword(new char[wordlen + 1]);
+ std::unique_ptr<char[]> hyphens(new char[wordlen + 5]);
+
+ char ** rep = nullptr; // replacements of discretionary hyphenation
+ int * pos = nullptr; // array of [hyphenation point] minus [deletion position]
+ int * cut = nullptr; // length of deletions in original word
+
+ // copy converted word into simple char buffer
+ strcpy(lcword.get(),encWord.getStr());
+
+ // now strip off any ending periods
+ int n = wordlen-1;
+ while((n >=0) && (lcword[n] == '.'))
+ n--;
+ n++;
+ if (n > 0)
+ {
+ const bool bFailed = 0 != hnj_hyphen_hyphenate3( dict, lcword.get(), n, hyphens.get(), nullptr,
+ &rep, &pos, &cut, minLead, minTrail,
+ std::max<sal_Int16>(dict->clhmin, std::max<sal_Int16>(dict->clhmin, 2) + std::max(0, minLead - std::max<sal_Int16>(dict->lhmin, 2))),
+ std::max<sal_Int16>(dict->crhmin, std::max<sal_Int16>(dict->crhmin, 2) + std::max(0, minTrail - std::max<sal_Int16>(dict->rhmin, 2))) );
+ if (bFailed)
+ {
+ // whoops something did not work
+ if (rep)
+ {
+ for(int j = 0; j < n; j++)
+ {
+ if (rep[j]) free(rep[j]);
+ }
+ free(rep);
+ }
+ if (pos) free(pos);
+ if (cut) free(cut);
+ return nullptr;
+ }
+ }
+
+ // now backfill hyphens[] for any removed trailing periods
+ for (int c = n; c < wordlen; c++) hyphens[c] = '0';
+ hyphens[wordlen] = '\0';
+
+ sal_Int32 Leading = GetPosInWordToCheck( aWord, nMaxLeading );
+
+ for (sal_Int32 i = 0; i < n; i++)
+ {
+ int leftrep = 0;
+ bool hit = (n >= minLen);
+ if (!rep || !rep[i])
+ {
+ hit = hit && (hyphens[i]&1) && (i < Leading);
+ hit = hit && (i >= (minLead-1) );
+ hit = hit && ((n - i - 1) >= minTrail);
+ }
+ else
+ {
+ // calculate change character length before hyphenation point signed with '='
+ for (char * c = rep[i]; *c && (*c != '='); c++)
+ {
+ if (eEnc == RTL_TEXTENCODING_UTF8)
+ {
+ if (static_cast<unsigned char>(*c) >> 6 != 2)
+ leftrep++;
+ }
+ else
+ leftrep++;
+ }
+ hit = hit && (hyphens[i]&1) && ((i + leftrep - pos[i]) < Leading);
+ hit = hit && ((i + leftrep - pos[i]) >= (minLead-1) );
+ hit = hit && ((n - i - 1 + sal::static_int_cast< sal_sSize >(strlen(rep[i])) - leftrep - 1) >= minTrail);
+ }
+ if (hit)
+ {
+ nHyphenationPos = i;
+ if (rep && rep[i])
+ {
+ nHyphenationPosAlt = i - pos[i];
+ nHyphenationPosAltHyph = i + leftrep - pos[i];
+ }
+ }
+ }
+
+ if (nHyphenationPos == -1)
+ {
+ xRes = nullptr;
+ }
+ else
+ {
+ if (rep && rep[nHyphenationPos])
+ {
+ // remove equal sign
+ char * s = rep[nHyphenationPos];
+ int eq = 0;
+ for (; *s; s++)
+ {
+ if (*s == '=') eq = 1;
+ if (eq) *s = *(s + 1);
+ }
+ OUString repHyphlow(rep[nHyphenationPos], strlen(rep[nHyphenationPos]), eEnc);
+ OUString repHyph;
+ switch (ct)
+ {
+ case CapType::ALLCAP:
+ {
+ repHyph = makeUpperCase(repHyphlow, pCC);
+ break;
+ }
+ case CapType::INITCAP:
+ {
+ if (nHyphenationPosAlt == -1)
+ repHyph = makeInitCap(repHyphlow, pCC);
+ else
+ repHyph = repHyphlow;
+ break;
+ }
+ default:
+ {
+ repHyph = repHyphlow;
+ break;
+ }
+ }
+
+ // handle shortening
+ sal_Int16 nPos = static_cast<sal_Int16>((nHyphenationPosAltHyph < nHyphenationPos) ?
+ nHyphenationPosAltHyph : nHyphenationPos);
+ // discretionary hyphenation
+ xRes = HyphenatedWord::CreateHyphenatedWord( aWord, LinguLocaleToLanguage( aLocale ), nPos,
+ aWord.replaceAt(nHyphenationPosAlt + 1, cut[nHyphenationPos], repHyph),
+ static_cast<sal_Int16>(nHyphenationPosAltHyph));
+ }
+ else
+ {
+ xRes = HyphenatedWord::CreateHyphenatedWord( aWord, LinguLocaleToLanguage( aLocale ),
+ static_cast<sal_Int16>(nHyphenationPos), aWord, static_cast<sal_Int16>(nHyphenationPos));
+ }
+ }
+
+ if (rep)
+ {
+ for(int j = 0; j < n; j++)
+ {
+ if (rep[j]) free(rep[j]);
+ }
+ free(rep);
+ }
+ if (pos) free(pos);
+ if (cut) free(cut);
+ return xRes;
+ }
+ return nullptr;
+}
+
+Reference < XHyphenatedWord > SAL_CALL Hyphenator::queryAlternativeSpelling(
+ const OUString& aWord,
+ const css::lang::Locale& aLocale,
+ sal_Int16 nIndex,
+ const css::uno::Sequence< css::beans::PropertyValue >& aProperties )
+{
+ // Firstly we allow only one plus character before the hyphen to avoid to miss the right break point:
+ for (int extrachar = 1; extrachar <= 2; extrachar++)
+ {
+ Reference< XHyphenatedWord > xRes = hyphenate(aWord, aLocale, nIndex + 1 + extrachar, aProperties);
+ if (xRes.is() && xRes->isAlternativeSpelling() && xRes->getHyphenationPos() == nIndex)
+ return xRes;
+ }
+ return nullptr;
+}
+
+Reference< XPossibleHyphens > SAL_CALL Hyphenator::createPossibleHyphens( const OUString& aWord,
+ const css::lang::Locale& aLocale,
+ const css::uno::Sequence< css::beans::PropertyValue >& aProperties )
+{
+ PropertyHelper_Hyphenation& rHelper = GetPropHelper();
+ rHelper.SetTmpPropVals(aProperties);
+ sal_Int16 minTrail = rHelper.GetMinTrailing();
+ sal_Int16 minLead = rHelper.GetMinLeading();
+ sal_Int16 minLen = rHelper.GetMinWordLength();
+
+ // Resolves: fdo#41083 honour MinWordLength in "createPossibleHyphens" as
+ // well as "hyphenate"
+ if (aWord.getLength() < minLen)
+ {
+ return PossibleHyphens::CreatePossibleHyphens( aWord, LinguLocaleToLanguage( aLocale ),
+ aWord, Sequence< sal_Int16 >() );
+ }
+
+ int k = -1;
+ for (size_t j = 0; j < mvDicts.size(); ++j)
+ {
+ if (aLocale == mvDicts[j].aLoc)
+ k = j;
+ }
+
+ // if we have a hyphenation dictionary matching this locale
+ if (k != -1)
+ {
+ HyphenDict *dict = nullptr;
+ // if this dictionary has not been loaded yet do that
+ if (!mvDicts[k].aPtr)
+ {
+ if (!LoadDictionary(mvDicts[k]))
+ return nullptr;
+ }
+
+ // otherwise hyphenate the word with that dictionary
+ dict = mvDicts[k].aPtr;
+ rtl_TextEncoding eEnc = mvDicts[k].eEnc;
+ CharClass* pCC = mvDicts[k].apCC.get();
+
+ // we don't want to work with a default text encoding since following incorrect
+ // results may occur only for specific text and thus may be hard to notice.
+ // Thus better always make a clean exit here if the text encoding is in question.
+ // Hopefully something not working at all will raise proper attention quickly. ;-)
+ DBG_ASSERT( eEnc != RTL_TEXTENCODING_DONTKNOW, "failed to get text encoding! (maybe incorrect encoding string in file)" );
+ if (eEnc == RTL_TEXTENCODING_DONTKNOW)
+ return nullptr;
+
+ // first handle smart quotes both single and double
+ OUStringBuffer rBuf(aWord);
+ sal_Int32 nc = rBuf.getLength();
+ sal_Unicode ch;
+ for (sal_Int32 ix=0; ix < nc; ix++)
+ {
+ ch = rBuf[ix];
+ if ((ch == 0x201C) || (ch == 0x201D))
+ rBuf[ix] = u'"';
+ if ((ch == 0x2018) || (ch == 0x2019))
+ rBuf[ix] = u'\'';
+ }
+ OUString nWord(rBuf.makeStringAndClear());
+
+ // now convert word to all lowercase for pattern recognition
+ OUString nTerm(makeLowerCase(nWord, pCC));
+
+ // now convert word to needed encoding
+ OString encWord(OU2ENC(nTerm,eEnc));
+
+ sal_Int32 wordlen = encWord.getLength();
+ std::unique_ptr<char[]> lcword(new char[wordlen+1]);
+ std::unique_ptr<char[]> hyphens(new char[wordlen+5]);
+ char ** rep = nullptr; // replacements of discretionary hyphenation
+ int * pos = nullptr; // array of [hyphenation point] minus [deletion position]
+ int * cut = nullptr; // length of deletions in original word
+
+ // copy converted word into simple char buffer
+ strcpy(lcword.get(),encWord.getStr());
+
+ // first remove any trailing periods
+ sal_Int32 n = wordlen-1;
+ while((n >=0) && (lcword[n] == '.'))
+ n--;
+ n++;
+ if (n > 0)
+ {
+ const bool bFailed = 0 != hnj_hyphen_hyphenate3(dict, lcword.get(), n, hyphens.get(), nullptr,
+ &rep, &pos, &cut, minLead, minTrail,
+ std::max<sal_Int16>(dict->clhmin, std::max<sal_Int16>(dict->clhmin, 2) + std::max(0, minLead - std::max<sal_Int16>(dict->lhmin, 2))),
+ std::max<sal_Int16>(dict->crhmin, std::max<sal_Int16>(dict->crhmin, 2) + std::max(0, minTrail - std::max<sal_Int16>(dict->rhmin, 2))) );
+ if (bFailed)
+ {
+ if (rep)
+ {
+ for(int j = 0; j < n; j++)
+ {
+ if (rep[j]) free(rep[j]);
+ }
+ free(rep);
+ }
+ if (pos) free(pos);
+ if (cut) free(cut);
+
+ return nullptr;
+ }
+ }
+ // now backfill hyphens[] for any removed periods
+ for (sal_Int32 c = n; c < wordlen; c++)
+ hyphens[c] = '0';
+ hyphens[wordlen] = '\0';
+
+ sal_Int32 nHyphCount = 0;
+
+ for ( sal_Int32 i = 0; i < encWord.getLength(); i++)
+ {
+ if (hyphens[i]&1)
+ nHyphCount++;
+ }
+
+ Sequence< sal_Int16 > aHyphPos(nHyphCount);
+ sal_Int16 *pPos = aHyphPos.getArray();
+ OUStringBuffer hyphenatedWordBuffer;
+ nHyphCount = 0;
+
+ for (sal_Int32 i = 0; i < nWord.getLength(); i++)
+ {
+ hyphenatedWordBuffer.append(aWord[i]);
+ // hyphenation position
+ if (hyphens[i]&1)
+ {
+ // linguistic::PossibleHyphens is stuck with
+ // css::uno::Sequence<sal_Int16> because of
+ // css.linguistic2.XPossibleHyphens.getHyphenationPositions, so
+ // any further positions need to be ignored:
+ assert(i >= SAL_MIN_INT16);
+ if (i > SAL_MAX_INT16)
+ {
+ SAL_WARN(
+ "lingucomponent",
+ "hyphen pos " << i << " > SAL_MAX_INT16 in \"" << aWord
+ << "\"");
+ continue;
+ }
+ pPos[nHyphCount] = i;
+ hyphenatedWordBuffer.append('=');
+ nHyphCount++;
+ }
+ }
+
+ OUString hyphenatedWord = hyphenatedWordBuffer.makeStringAndClear();
+
+ Reference< XPossibleHyphens > xRes = PossibleHyphens::CreatePossibleHyphens(
+ aWord, LinguLocaleToLanguage( aLocale ), hyphenatedWord, aHyphPos);
+
+ if (rep)
+ {
+ for(int j = 0; j < n; j++)
+ {
+ if (rep[j]) free(rep[j]);
+ }
+ free(rep);
+ }
+ if (pos) free(pos);
+ if (cut) free(cut);
+
+ return xRes;
+ }
+
+ return nullptr;
+}
+
+OUString Hyphenator::makeLowerCase(const OUString& aTerm, CharClass const * pCC)
+{
+ if (pCC)
+ return pCC->lowercase(aTerm);
+ return aTerm;
+}
+
+OUString Hyphenator::makeUpperCase(const OUString& aTerm, CharClass const * pCC)
+{
+ if (pCC)
+ return pCC->uppercase(aTerm);
+ return aTerm;
+}
+
+OUString Hyphenator::makeInitCap(const OUString& aTerm, CharClass const * pCC)
+{
+ sal_Int32 tlen = aTerm.getLength();
+ if (pCC && tlen)
+ {
+ OUString bTemp = aTerm.copy(0,1);
+ if (tlen > 1)
+ return ( pCC->uppercase(bTemp, 0, 1) + pCC->lowercase(aTerm,1,(tlen-1)) );
+
+ return pCC->uppercase(bTemp, 0, 1);
+ }
+ return aTerm;
+}
+
+sal_Bool SAL_CALL Hyphenator::addLinguServiceEventListener(
+ const Reference< XLinguServiceEventListener >& rxLstnr )
+{
+ MutexGuard aGuard( GetLinguMutex() );
+
+ bool bRes = false;
+ if (!bDisposing && rxLstnr.is())
+ {
+ bRes = GetPropHelper().addLinguServiceEventListener( rxLstnr );
+ }
+ return bRes;
+}
+
+sal_Bool SAL_CALL Hyphenator::removeLinguServiceEventListener(
+ const Reference< XLinguServiceEventListener >& rxLstnr )
+{
+ MutexGuard aGuard( GetLinguMutex() );
+
+ bool bRes = false;
+ if (!bDisposing && rxLstnr.is())
+ {
+ bRes = GetPropHelper().removeLinguServiceEventListener( rxLstnr );
+ }
+ return bRes;
+}
+
+OUString SAL_CALL Hyphenator::getServiceDisplayName(const Locale& rLocale)
+{
+ std::locale loc(Translate::Create("svt", LanguageTag(rLocale)));
+ return Translate::get(STR_DESCRIPTION_LIBHYPHEN, loc);
+}
+
+void SAL_CALL Hyphenator::initialize( const Sequence< Any >& rArguments )
+{
+ MutexGuard aGuard( GetLinguMutex() );
+
+ if (pPropHelper)
+ return;
+
+ sal_Int32 nLen = rArguments.getLength();
+ if (2 == nLen)
+ {
+ Reference< XLinguProperties > xPropSet;
+ rArguments.getConstArray()[0] >>= xPropSet;
+ // rArguments.getConstArray()[1] >>= xDicList;
+
+ //! Pointer allows for access of the non-UNO functions.
+ //! And the reference to the UNO-functions while increasing
+ //! the ref-count and will implicitly free the memory
+ //! when the object is no longer used.
+ pPropHelper.reset( new PropertyHelper_Hyphenation( static_cast<XHyphenator *>(this), xPropSet ) );
+ pPropHelper->AddAsPropListener(); //! after a reference is established
+ }
+ else {
+ OSL_FAIL( "wrong number of arguments in sequence" );
+ }
+}
+
+void SAL_CALL Hyphenator::dispose()
+{
+ MutexGuard aGuard( GetLinguMutex() );
+
+ if (!bDisposing)
+ {
+ bDisposing = true;
+ EventObject aEvtObj( static_cast<XHyphenator *>(this) );
+ aEvtListeners.disposeAndClear( aEvtObj );
+ if (pPropHelper)
+ {
+ pPropHelper->RemoveAsPropListener();
+ pPropHelper.reset();
+ }
+ }
+}
+
+void SAL_CALL Hyphenator::addEventListener( const Reference< XEventListener >& rxListener )
+{
+ MutexGuard aGuard( GetLinguMutex() );
+
+ if (!bDisposing && rxListener.is())
+ aEvtListeners.addInterface( rxListener );
+}
+
+void SAL_CALL Hyphenator::removeEventListener( const Reference< XEventListener >& rxListener )
+{
+ MutexGuard aGuard( GetLinguMutex() );
+
+ if (!bDisposing && rxListener.is())
+ aEvtListeners.removeInterface( rxListener );
+}
+
+// Service specific part
+OUString SAL_CALL Hyphenator::getImplementationName()
+{
+ return "org.openoffice.lingu.LibHnjHyphenator";
+}
+
+sal_Bool SAL_CALL Hyphenator::supportsService( const OUString& ServiceName )
+{
+ return cppu::supportsService(this, ServiceName);
+}
+
+Sequence< OUString > SAL_CALL Hyphenator::getSupportedServiceNames()
+{
+ return { SN_HYPHENATOR };
+}
+
+extern "C" SAL_DLLPUBLIC_EXPORT css::uno::XInterface*
+lingucomponent_Hyphenator_get_implementation(
+ css::uno::XComponentContext* , css::uno::Sequence<css::uno::Any> const&)
+{
+ return cppu::acquire(new Hyphenator());
+}
+
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/lingucomponent/source/hyphenator/hyphen/hyphenimp.hxx b/lingucomponent/source/hyphenator/hyphen/hyphenimp.hxx
new file mode 100644
index 000000000..45ebca112
--- /dev/null
+++ b/lingucomponent/source/hyphenator/hyphen/hyphenimp.hxx
@@ -0,0 +1,126 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+#ifndef INCLUDED_LINGUCOMPONENT_SOURCE_HYPHENATOR_HYPHEN_HYPHENIMP_HXX
+#define INCLUDED_LINGUCOMPONENT_SOURCE_HYPHENATOR_HYPHEN_HYPHENIMP_HXX
+
+#include <comphelper/interfacecontainer3.hxx>
+#include <cppuhelper/implbase.hxx>
+#include <com/sun/star/lang/XComponent.hpp>
+#include <com/sun/star/lang/XInitialization.hpp>
+#include <com/sun/star/lang/XServiceDisplayName.hpp>
+#include <com/sun/star/beans/XPropertySet.hpp>
+#include <com/sun/star/beans/PropertyValues.hpp>
+#include <com/sun/star/lang/XServiceInfo.hpp>
+#include <com/sun/star/linguistic2/XHyphenator.hpp>
+#include <com/sun/star/linguistic2/XLinguServiceEventBroadcaster.hpp>
+
+#include <unotools/charclass.hxx>
+
+#include <linguistic/misc.hxx>
+#include <linguistic/lngprophelp.hxx>
+
+#include <lingutil.hxx>
+#include <stdio.h>
+
+#include <hyphen.h>
+
+using namespace ::com::sun::star::uno;
+using namespace ::com::sun::star::beans;
+using namespace ::com::sun::star::lang;
+using namespace ::com::sun::star::linguistic2;
+
+struct HDInfo {
+ HyphenDict * aPtr;
+ OUString aName;
+ Locale aLoc;
+ rtl_TextEncoding eEnc;
+ std::unique_ptr<CharClass> apCC;
+};
+
+class Hyphenator :
+ public cppu::WeakImplHelper
+ <
+ XHyphenator,
+ XLinguServiceEventBroadcaster,
+ XInitialization,
+ XComponent,
+ XServiceInfo,
+ XServiceDisplayName
+ >
+{
+ Sequence< Locale > aSuppLocales;
+ std::vector< HDInfo > mvDicts;
+
+ ::comphelper::OInterfaceContainerHelper3<XEventListener> aEvtListeners;
+ std::unique_ptr<linguistic::PropertyHelper_Hyphenation> pPropHelper;
+ bool bDisposing;
+
+ Hyphenator(const Hyphenator &) = delete;
+ Hyphenator & operator = (const Hyphenator &) = delete;
+
+ linguistic::PropertyHelper_Hyphenation& GetPropHelper_Impl();
+ linguistic::PropertyHelper_Hyphenation& GetPropHelper()
+ {
+ return pPropHelper ? *pPropHelper : GetPropHelper_Impl();
+ }
+
+public:
+ Hyphenator();
+
+ virtual ~Hyphenator() override;
+
+ // XSupportedLocales (for XHyphenator)
+ virtual Sequence< Locale > SAL_CALL getLocales() override;
+ virtual sal_Bool SAL_CALL hasLocale( const Locale& rLocale ) override;
+
+ // XHyphenator
+ virtual css::uno::Reference< css::linguistic2::XHyphenatedWord > SAL_CALL hyphenate( const OUString& aWord, const css::lang::Locale& aLocale, sal_Int16 nMaxLeading, const css::uno::Sequence< css::beans::PropertyValue >& aProperties ) override;
+ virtual css::uno::Reference< css::linguistic2::XHyphenatedWord > SAL_CALL queryAlternativeSpelling( const OUString& aWord, const css::lang::Locale& aLocale, sal_Int16 nIndex, const css::uno::Sequence< css::beans::PropertyValue >& aProperties ) override;
+ virtual css::uno::Reference< css::linguistic2::XPossibleHyphens > SAL_CALL createPossibleHyphens( const OUString& aWord, const css::lang::Locale& aLocale, const css::uno::Sequence< css::beans::PropertyValue >& aProperties ) override;
+
+ // XLinguServiceEventBroadcaster
+ virtual sal_Bool SAL_CALL addLinguServiceEventListener( const Reference< XLinguServiceEventListener >& rxLstnr ) override;
+ virtual sal_Bool SAL_CALL removeLinguServiceEventListener( const Reference< XLinguServiceEventListener >& rxLstnr ) override;
+
+ // XServiceDisplayName
+ virtual OUString SAL_CALL getServiceDisplayName( const Locale& rLocale ) override;
+
+ // XInitialization
+ virtual void SAL_CALL initialize( const Sequence< Any >& rArguments ) override;
+
+ // XComponent
+ virtual void SAL_CALL dispose() override;
+ virtual void SAL_CALL addEventListener( const Reference< XEventListener >& rxListener ) override;
+ virtual void SAL_CALL removeEventListener( const Reference< XEventListener >& rxListener ) override;
+
+ // XServiceInfo
+ virtual OUString SAL_CALL getImplementationName() override;
+ virtual sal_Bool SAL_CALL supportsService( const OUString& rServiceName ) override;
+ virtual Sequence< OUString > SAL_CALL getSupportedServiceNames() override;
+
+private:
+ static OUString makeLowerCase(const OUString&, CharClass const *);
+ static OUString makeUpperCase(const OUString&, CharClass const *);
+ static OUString makeInitCap(const OUString&, CharClass const *);
+};
+
+#endif
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/lingucomponent/source/languageguessing/guess.cxx b/lingucomponent/source/languageguessing/guess.cxx
new file mode 100644
index 000000000..a7cbeccab
--- /dev/null
+++ b/lingucomponent/source/languageguessing/guess.cxx
@@ -0,0 +1,100 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+#include <sal/config.h>
+
+#include <cassert>
+#include <string.h>
+
+#ifdef SYSTEM_LIBEXTTEXTCAT
+#include <libexttextcat/textcat.h>
+#else
+#include <textcat.h>
+#endif
+
+#include "guess.hxx"
+
+/* Old textcat.h versions defined bad spelled constants. */
+#ifndef TEXTCAT_RESULT_UNKNOWN_STR
+#define TEXTCAT_RESULT_UNKNOWN_STR _TEXTCAT_RESULT_UNKOWN
+#endif
+
+#ifndef TEXTCAT_RESULT_SHORT_STR
+#define TEXTCAT_RESULT_SHORT_STR _TEXTCAT_RESULT_SHORT
+#endif
+
+Guess::Guess()
+ : language_str(DEFAULT_LANGUAGE)
+ , country_str(DEFAULT_COUNTRY)
+{
+}
+
+/*
+* this use a char * string to build the guess object
+* a string like those is made as : [language-country-encoding]...
+*
+*/
+Guess::Guess(const char * guess_str)
+ : language_str(DEFAULT_LANGUAGE)
+ , country_str(DEFAULT_COUNTRY)
+{
+ //if the guess is not like "UNKNOWN" or "SHORT", go into the brackets
+ if(strcmp(guess_str + 1, TEXTCAT_RESULT_UNKNOWN_STR) == 0
+ || strcmp(guess_str + 1, TEXTCAT_RESULT_SHORT_STR) == 0)
+ return;
+
+ // From how this ctor is called from SimpleGuesser::GuessLanguage and
+ // SimpleGuesser::GetManagedLanguages in
+ // lingucomponent/source/languageguessing/simpleguesser.cxx, guess_str must start with "[":
+ assert(guess_str[0] == GUESS_SEPARATOR_OPEN);
+ auto const start = guess_str + 1;
+ // Only look at the prefix of guess_str, delimited by the next "]" or "[" or end-of-string;
+ // split it into at most three segments separated by "-" (where excess occurrences of "-"
+ // would become part of the third segment), like "en-US-utf8"; the first segment denotes the
+ // language; if there are three segments, the second denotes the country and the third the
+ // encoding; otherwise, the second segment, if any (e.g., in "haw-utf8"), denotes the
+ // encoding:
+ char const * dash1 = nullptr;
+ char const * dash2 = nullptr;
+ auto p = start;
+ for (;; ++p) {
+ auto const c = *p;
+ if (c == '\0' || c == GUESS_SEPARATOR_OPEN || c == GUESS_SEPARATOR_CLOSE) {
+ break;
+ }
+ if (c == GUESS_SEPARATOR_SEP) {
+ if (dash1 == nullptr) {
+ dash1 = p;
+ } else {
+ dash2 = p;
+ // The encoding is ignored, so we can stop as soon as we found the second "-":
+ break;
+ }
+ }
+ }
+ auto const langLen = (dash1 == nullptr ? p : dash1) - start;
+ if (langLen != 0) { // if not we use the default value
+ language_str.assign(start, langLen);
+ }
+ if (dash2 != nullptr) {
+ country_str.assign(dash1 + 1, dash2 - (dash1 + 1));
+ }
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/lingucomponent/source/languageguessing/guess.hxx b/lingucomponent/source/languageguessing/guess.hxx
new file mode 100644
index 000000000..627033d3a
--- /dev/null
+++ b/lingucomponent/source/languageguessing/guess.hxx
@@ -0,0 +1,54 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+#ifndef INCLUDED_LINGUCOMPONENT_SOURCE_LANGUAGEGUESSING_GUESS_HXX
+#define INCLUDED_LINGUCOMPONENT_SOURCE_LANGUAGEGUESSING_GUESS_HXX
+
+#define GUESS_SEPARATOR_OPEN '['
+#define GUESS_SEPARATOR_CLOSE ']'
+#define GUESS_SEPARATOR_SEP '-'
+#define DEFAULT_LANGUAGE ""
+#define DEFAULT_COUNTRY ""
+#define DEFAULT_ENCODING ""
+
+#include <string>
+
+class Guess final {
+ public:
+
+ /**
+ * Default init
+ */
+ Guess();
+
+ /**
+ * Init from a string like [en-UK-utf8] and the rank
+ */
+ Guess(const char * guess_str);
+
+ const std::string& GetLanguage() const { return language_str;}
+ const std::string& GetCountry() const { return country_str;}
+
+ private:
+ std::string language_str;
+ std::string country_str;
+};
+
+#endif
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/lingucomponent/source/languageguessing/guesslang.component b/lingucomponent/source/languageguessing/guesslang.component
new file mode 100644
index 000000000..75f6e7ce2
--- /dev/null
+++ b/lingucomponent/source/languageguessing/guesslang.component
@@ -0,0 +1,26 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ -->
+
+<component loader="com.sun.star.loader.SharedLibrary" environment="@CPPU_ENV@"
+ xmlns="http://openoffice.org/2010/uno-components">
+ <implementation name="com.sun.star.lingu2.LanguageGuessing"
+ constructor="lingucomponent_LangGuess_get_implementation">
+ <service name="com.sun.star.linguistic2.LanguageGuessing"/>
+ </implementation>
+</component>
diff --git a/lingucomponent/source/languageguessing/guesslang.cxx b/lingucomponent/source/languageguessing/guesslang.cxx
new file mode 100644
index 000000000..d6d5803a5
--- /dev/null
+++ b/lingucomponent/source/languageguessing/guesslang.cxx
@@ -0,0 +1,321 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+#include <iostream>
+#include <mutex>
+#include <string_view>
+
+#include <osl/file.hxx>
+#include <tools/debug.hxx>
+
+#include <sal/config.h>
+#include <cppuhelper/factory.hxx>
+#include <cppuhelper/implbase.hxx>
+#include <cppuhelper/supportsservice.hxx>
+
+#include "simpleguesser.hxx"
+#include "guess.hxx"
+
+#include <com/sun/star/lang/IllegalArgumentException.hpp>
+#include <com/sun/star/lang/XServiceInfo.hpp>
+#include <com/sun/star/linguistic2/XLanguageGuessing.hpp>
+#include <unotools/pathoptions.hxx>
+#include <osl/thread.h>
+
+#include <sal/macros.h>
+
+#ifdef SYSTEM_LIBEXTTEXTCAT
+#include <libexttextcat/textcat.h>
+#else
+#include <textcat.h>
+#endif
+
+using namespace ::std;
+using namespace ::osl;
+using namespace ::cppu;
+using namespace ::com::sun::star;
+using namespace ::com::sun::star::uno;
+using namespace ::com::sun::star::lang;
+using namespace ::com::sun::star::linguistic2;
+
+static std::mutex & GetLangGuessMutex()
+{
+ static std::mutex aMutex;
+ return aMutex;
+}
+
+namespace {
+
+class LangGuess_Impl :
+ public ::cppu::WeakImplHelper<
+ XLanguageGuessing,
+ XServiceInfo >
+{
+ SimpleGuesser m_aGuesser;
+ bool m_bInitialized;
+
+ virtual ~LangGuess_Impl() override {}
+ void EnsureInitialized();
+
+public:
+ LangGuess_Impl();
+ LangGuess_Impl(const LangGuess_Impl&) = delete;
+ LangGuess_Impl& operator=(const LangGuess_Impl&) = delete;
+
+ // XServiceInfo implementation
+ virtual OUString SAL_CALL getImplementationName( ) override;
+ virtual sal_Bool SAL_CALL supportsService( const OUString& ServiceName ) override;
+ virtual Sequence< OUString > SAL_CALL getSupportedServiceNames( ) override;
+
+ // XLanguageGuessing implementation
+ virtual css::lang::Locale SAL_CALL guessPrimaryLanguage( const OUString& aText, ::sal_Int32 nStartPos, ::sal_Int32 nLen ) override;
+ virtual void SAL_CALL disableLanguages( const css::uno::Sequence< css::lang::Locale >& aLanguages ) override;
+ virtual void SAL_CALL enableLanguages( const css::uno::Sequence< css::lang::Locale >& aLanguages ) override;
+ virtual css::uno::Sequence< css::lang::Locale > SAL_CALL getAvailableLanguages( ) override;
+ virtual css::uno::Sequence< css::lang::Locale > SAL_CALL getEnabledLanguages( ) override;
+ virtual css::uno::Sequence< css::lang::Locale > SAL_CALL getDisabledLanguages( ) override;
+
+ // implementation specific
+ /// @throws RuntimeException
+ void SetFingerPrintsDB( std::u16string_view fileName );
+};
+
+}
+
+LangGuess_Impl::LangGuess_Impl() :
+ m_bInitialized( false )
+{
+}
+
+void LangGuess_Impl::EnsureInitialized()
+{
+ if (m_bInitialized)
+ return;
+
+ // set this to true at the very start to prevent loops because of
+ // implicitly called functions below
+ m_bInitialized = true;
+
+ // set default fingerprint path to where those get installed
+ OUString aPhysPath;
+ OUString aURL( SvtPathOptions().GetFingerprintPath() );
+ osl::FileBase::getSystemPathFromFileURL( aURL, aPhysPath );
+#ifdef _WIN32
+ aPhysPath += "\\";
+#else
+ aPhysPath += "/";
+#endif
+
+ SetFingerPrintsDB( aPhysPath );
+
+#if !defined(EXTTEXTCAT_VERSION_MAJOR)
+
+ // disable currently not functional languages...
+ struct LangCountry
+ {
+ const char *pLang;
+ const char *pCountry;
+ };
+ LangCountry aDisable[] =
+ {
+ // not functional in modified libtextcat, but fixed in >= libexttextcat 3.1.0
+ // which is the first with EXTTEXTCAT_VERSION_MAJOR defined
+ {"sco", ""}, {"zh", "CN"}, {"zh", "TW"}, {"ja", ""}, {"ko", ""},
+ {"ka", ""}, {"hi", ""}, {"mr", ""}, {"ne", ""}, {"sa", ""},
+ {"ta", ""}, {"th", ""}, {"qu", ""}, {"yi", ""}
+ };
+ sal_Int32 nNum = SAL_N_ELEMENTS(aDisable);
+ Sequence< Locale > aDisableSeq( nNum );
+ Locale *pDisableSeq = aDisableSeq.getArray();
+ for (sal_Int32 i = 0; i < nNum; ++i)
+ {
+ Locale aLocale;
+ aLocale.Language = OUString::createFromAscii( aDisable[i].pLang );
+ aLocale.Country = OUString::createFromAscii( aDisable[i].pCountry );
+ pDisableSeq[i] = aLocale;
+ }
+ disableLanguages( aDisableSeq );
+ DBG_ASSERT( nNum == getDisabledLanguages().getLength(), "size mismatch" );
+#endif
+}
+
+Locale SAL_CALL LangGuess_Impl::guessPrimaryLanguage(
+ const OUString& rText,
+ ::sal_Int32 nStartPos,
+ ::sal_Int32 nLen )
+{
+ std::scoped_lock aGuard( GetLangGuessMutex() );
+
+ EnsureInitialized();
+
+ if (nStartPos < 0 || nLen < 0 || nStartPos + nLen > rText.getLength())
+ throw lang::IllegalArgumentException();
+
+ OString o( OUStringToOString( rText.subView(nStartPos, nLen), RTL_TEXTENCODING_UTF8 ) );
+ Guess g = m_aGuesser.GuessPrimaryLanguage(o.getStr());
+ lang::Locale aRes;
+ aRes.Language = OUString::createFromAscii( g.GetLanguage().c_str() );
+ aRes.Country = OUString::createFromAscii( g.GetCountry().c_str() );
+ return aRes;
+}
+
+#define DEFAULT_CONF_FILE_NAME "fpdb.conf"
+
+void LangGuess_Impl::SetFingerPrintsDB(
+ std::u16string_view filePath )
+{
+ //! text encoding for file name / path needs to be in the same encoding the OS uses
+ OString path = OUStringToOString( filePath, osl_getThreadTextEncoding() );
+ OString conf_file_path = path + DEFAULT_CONF_FILE_NAME;
+
+ m_aGuesser.SetDBPath(conf_file_path.getStr(), path.getStr());
+}
+
+uno::Sequence< Locale > SAL_CALL LangGuess_Impl::getAvailableLanguages( )
+{
+ std::scoped_lock aGuard( GetLangGuessMutex() );
+
+ EnsureInitialized();
+
+ Sequence< css::lang::Locale > aRes;
+ vector<Guess> gs = m_aGuesser.GetAllManagedLanguages();
+ aRes.realloc(gs.size());
+
+ css::lang::Locale *pRes = aRes.getArray();
+
+ for(size_t i = 0; i < gs.size() ; i++ ){
+ css::lang::Locale current_aRes;
+ current_aRes.Language = OUString::createFromAscii( gs[i].GetLanguage().c_str() );
+ current_aRes.Country = OUString::createFromAscii( gs[i].GetCountry().c_str() );
+ pRes[i] = current_aRes;
+ }
+
+ return aRes;
+}
+
+uno::Sequence< Locale > SAL_CALL LangGuess_Impl::getEnabledLanguages( )
+{
+ std::scoped_lock aGuard( GetLangGuessMutex() );
+
+ EnsureInitialized();
+
+ Sequence< css::lang::Locale > aRes;
+ vector<Guess> gs = m_aGuesser.GetAvailableLanguages();
+ aRes.realloc(gs.size());
+
+ css::lang::Locale *pRes = aRes.getArray();
+
+ for(size_t i = 0; i < gs.size() ; i++ ){
+ css::lang::Locale current_aRes;
+ current_aRes.Language = OUString::createFromAscii( gs[i].GetLanguage().c_str() );
+ current_aRes.Country = OUString::createFromAscii( gs[i].GetCountry().c_str() );
+ pRes[i] = current_aRes;
+ }
+
+ return aRes;
+}
+
+uno::Sequence< Locale > SAL_CALL LangGuess_Impl::getDisabledLanguages( )
+{
+ std::scoped_lock aGuard( GetLangGuessMutex() );
+
+ EnsureInitialized();
+
+ Sequence< css::lang::Locale > aRes;
+ vector<Guess> gs = m_aGuesser.GetUnavailableLanguages();
+ aRes.realloc(gs.size());
+
+ css::lang::Locale *pRes = aRes.getArray();
+
+ for(size_t i = 0; i < gs.size() ; i++ ){
+ css::lang::Locale current_aRes;
+ current_aRes.Language = OUString::createFromAscii( gs[i].GetLanguage().c_str() );
+ current_aRes.Country = OUString::createFromAscii( gs[i].GetCountry().c_str() );
+ pRes[i] = current_aRes;
+ }
+
+ return aRes;
+}
+
+void SAL_CALL LangGuess_Impl::disableLanguages(
+ const uno::Sequence< Locale >& rLanguages )
+{
+ std::scoped_lock aGuard( GetLangGuessMutex() );
+
+ EnsureInitialized();
+
+ for (const Locale& rLanguage : rLanguages)
+ {
+ string language;
+
+ OString l = OUStringToOString( rLanguage.Language, RTL_TEXTENCODING_ASCII_US );
+ OString c = OUStringToOString( rLanguage.Country, RTL_TEXTENCODING_ASCII_US );
+
+ language += l.getStr();
+ language += "-";
+ language += c.getStr();
+ m_aGuesser.DisableLanguage(language);
+ }
+}
+
+void SAL_CALL LangGuess_Impl::enableLanguages(
+ const uno::Sequence< Locale >& rLanguages )
+{
+ std::scoped_lock aGuard( GetLangGuessMutex() );
+
+ EnsureInitialized();
+
+ for (const Locale& rLanguage : rLanguages)
+ {
+ string language;
+
+ OString l = OUStringToOString( rLanguage.Language, RTL_TEXTENCODING_ASCII_US );
+ OString c = OUStringToOString( rLanguage.Country, RTL_TEXTENCODING_ASCII_US );
+
+ language += l.getStr();
+ language += "-";
+ language += c.getStr();
+ m_aGuesser.EnableLanguage(language);
+ }
+}
+
+OUString SAL_CALL LangGuess_Impl::getImplementationName( )
+{
+ return "com.sun.star.lingu2.LanguageGuessing";
+}
+
+sal_Bool SAL_CALL LangGuess_Impl::supportsService( const OUString& ServiceName )
+{
+ return cppu::supportsService(this, ServiceName);
+}
+
+Sequence<OUString> SAL_CALL LangGuess_Impl::getSupportedServiceNames( )
+{
+ return { "com.sun.star.linguistic2.LanguageGuessing" };
+}
+
+extern "C" SAL_DLLPUBLIC_EXPORT css::uno::XInterface*
+lingucomponent_LangGuess_get_implementation(
+ css::uno::XComponentContext* , css::uno::Sequence<css::uno::Any> const&)
+{
+ return cppu::acquire(new LangGuess_Impl());
+}
+
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/lingucomponent/source/languageguessing/simpleguesser.cxx b/lingucomponent/source/languageguessing/simpleguesser.cxx
new file mode 100644
index 000000000..7210b1f45
--- /dev/null
+++ b/lingucomponent/source/languageguessing/simpleguesser.cxx
@@ -0,0 +1,221 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+ /**
+ *
+ *
+ *
+ *
+ * TODO
+ * - Add exception throwing when h == NULL
+ * - Not init h when implicit constructor is launched
+ */
+
+#include <string.h>
+
+#ifdef SYSTEM_LIBEXTTEXTCAT
+#include <libexttextcat/textcat.h>
+#include <libexttextcat/common.h>
+#include <libexttextcat/constants.h>
+#include <libexttextcat/fingerprint.h>
+#else
+#include <textcat.h>
+#include <common.h>
+#include <constants.h>
+#include <fingerprint.h>
+#endif
+
+#include <sal/types.h>
+
+#include<rtl/character.hxx>
+#include "simpleguesser.hxx"
+
+static int startsAsciiCaseInsensitive(const std::string &s1, const std::string &s2){
+ size_t i;
+ int ret = 0;
+
+ size_t min = s1.length();
+ if (min > s2.length())
+ min = s2.length();
+
+ for(i = 0; i < min && s2[i] && s1[i] && !ret; i++){
+ ret = rtl::toAsciiUpperCase(static_cast<unsigned char>(s1[i]))
+ - rtl::toAsciiUpperCase(static_cast<unsigned char>(s2[i]));
+ if(s1[i] == '.' || s2[i] == '.') {ret = 0;} //. is a neutral character
+ }
+ return ret;
+ }
+
+namespace {
+
+/**
+ * This following structure is from textcat.c
+ */
+typedef struct textcat_t{
+
+ void **fprint;
+ char *fprint_disable;
+ uint4 size;
+ uint4 maxsize;
+
+ char output[MAXOUTPUTSIZE];
+
+} textcat_t;
+// end of the 3 structs
+
+}
+
+SimpleGuesser::SimpleGuesser()
+{
+ h = nullptr;
+}
+
+SimpleGuesser& SimpleGuesser::operator=(const SimpleGuesser& sg){
+ // Check for self-assignment!
+ if (this == &sg) // Same object?
+ return *this; // Yes, so skip assignment, and just return *this.
+
+ if(h){textcat_Done(h);}
+ h = sg.h;
+ return *this;
+}
+
+SimpleGuesser::~SimpleGuesser()
+{
+ if(h){textcat_Done(h);}
+}
+
+/*!
+ \fn SimpleGuesser::GuessLanguage(char* text)
+ */
+std::vector<Guess> SimpleGuesser::GuessLanguage(const char* text)
+{
+ std::vector<Guess> guesses;
+
+ if (!h)
+ return guesses;
+
+ int len = strlen(text);
+
+ if (len > MAX_STRING_LENGTH_TO_ANALYSE)
+ len = MAX_STRING_LENGTH_TO_ANALYSE;
+
+ const char *guess_list = textcat_Classify(h, text, len);
+
+ if (strcmp(guess_list, TEXTCAT_RESULT_SHORT_STR) == 0)
+ return guesses;
+
+ int current_pointer = 0;
+
+ while(guess_list[current_pointer] != '\0')
+ {
+ while (guess_list[current_pointer] != GUESS_SEPARATOR_OPEN && guess_list[current_pointer] != '\0')
+ current_pointer++;
+ if(guess_list[current_pointer] != '\0')
+ {
+ Guess g(guess_list + current_pointer);
+
+ guesses.push_back(g);
+
+ current_pointer++;
+ }
+ }
+
+ return guesses;
+}
+
+Guess SimpleGuesser::GuessPrimaryLanguage(const char* text)
+{
+ std::vector<Guess> ret = GuessLanguage(text);
+ return ret.empty() ? Guess() : ret[0];
+}
+/**
+ * Is used to know which language is available, unavailable or both
+ * when mask = 0xF0, return only Available
+ * when mask = 0x0F, return only Unavailable
+ * when mask = 0xFF, return both Available and Unavailable
+ */
+std::vector<Guess> SimpleGuesser::GetManagedLanguages(const char mask)
+{
+ textcat_t *tables = static_cast<textcat_t*>(h);
+
+ std::vector<Guess> lang;
+ if(!h){return lang;}
+
+ for (size_t i=0; i<tables->size; ++i)
+ {
+ if (tables->fprint_disable[i] & mask)
+ {
+ std::string langStr = "[";
+ langStr += fp_Name(tables->fprint[i]);
+ Guess g(langStr.c_str());
+ lang.push_back(g);
+ }
+ }
+
+ return lang;
+}
+
+std::vector<Guess> SimpleGuesser::GetAvailableLanguages()
+{
+ return GetManagedLanguages( sal::static_int_cast< char >( 0xF0 ) );
+}
+
+std::vector<Guess> SimpleGuesser::GetUnavailableLanguages()
+{
+ return GetManagedLanguages( sal::static_int_cast< char >( 0x0F ));
+}
+
+std::vector<Guess> SimpleGuesser::GetAllManagedLanguages()
+{
+ return GetManagedLanguages( sal::static_int_cast< char >( 0xFF ));
+}
+
+void SimpleGuesser::XableLanguage(const std::string& lang, char mask)
+{
+ textcat_t *tables = static_cast<textcat_t*>(h);
+
+ if(!h){return;}
+
+ for (size_t i=0; i<tables->size; i++)
+ {
+ std::string language(fp_Name(tables->fprint[i]));
+ if (startsAsciiCaseInsensitive(language,lang) == 0)
+ tables->fprint_disable[i] = mask;
+ }
+}
+
+void SimpleGuesser::EnableLanguage(const std::string& lang)
+{
+ XableLanguage(lang, sal::static_int_cast< char >( 0xF0 ));
+}
+
+void SimpleGuesser::DisableLanguage(const std::string& lang)
+{
+ XableLanguage(lang, sal::static_int_cast< char >( 0x0F ));
+}
+
+void SimpleGuesser::SetDBPath(const char* path, const char* prefix)
+{
+ if (h)
+ textcat_Done(h);
+ h = special_textcat_Init(path, prefix);
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/lingucomponent/source/languageguessing/simpleguesser.hxx b/lingucomponent/source/languageguessing/simpleguesser.hxx
new file mode 100644
index 000000000..aec544285
--- /dev/null
+++ b/lingucomponent/source/languageguessing/simpleguesser.hxx
@@ -0,0 +1,108 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+#ifndef INCLUDED_LINGUCOMPONENT_SOURCE_LANGUAGEGUESSING_SIMPLEGUESSER_HXX
+#define INCLUDED_LINGUCOMPONENT_SOURCE_LANGUAGEGUESSING_SIMPLEGUESSER_HXX
+
+#include <string>
+#include <vector>
+#include "guess.hxx"
+
+#define MAX_STRING_LENGTH_TO_ANALYSE 200
+
+class SimpleGuesser final
+{
+public:
+ /**inits the object with conf file "./conf.txt"*/
+ SimpleGuesser();
+
+ /**
+ * @param SimpleGuesser& sg the other guesser
+ */
+ SimpleGuesser& operator=(const SimpleGuesser& sg);
+
+ /**
+ * destroy the object
+ */
+ ~SimpleGuesser();
+
+ /**
+ * Analyze a text and return the most probable languages of the text
+ * @param char* text is the text to analyze
+ * @return the list of guess
+ */
+ std::vector<Guess> GuessLanguage(const char* text);
+
+ /**
+ * Analyze a text and return the most probable language of the text
+ * @param char* text is the text to analyze
+ * @return the guess (containing language)
+ */
+ Guess GuessPrimaryLanguage(const char* text);
+
+ /**
+ * List all available languages (possibly to be in guesses)
+ * @return the list of languages
+ */
+ std::vector<Guess> GetAvailableLanguages();
+
+ /**
+ * List all languages (possibly in guesses or not)
+ * @return the list of languages
+ */
+ std::vector<Guess> GetAllManagedLanguages();
+
+ /**
+ * List all Unavailable languages (disable for any reason)
+ * @return the list of languages
+ */
+ std::vector<Guess> GetUnavailableLanguages();
+
+ /**
+ * Mark a language enabled
+ * @param string lang the language to enable (build like language-COUNTRY-encoding)
+ */
+ void EnableLanguage(const std::string& lang);
+
+ /**
+ * Mark a language disabled
+ * @param string lang the language to disable (build like language-COUNTRY-encoding)
+ */
+ void DisableLanguage(const std::string& lang);
+
+ /**
+ * Load a new DB of fingerprints
+ * @param const char* thePathOfConfFile self explaining
+ * @param const char* prefix is the path where the directory which contains fingerprint files is stored
+ */
+ void SetDBPath(const char* thePathOfConfFile, const char* prefix);
+
+private:
+ //Where typical fingerprints (n-gram tables) are stored
+ void* h;
+
+ //Is used to select languages into the fingerprints DB, the mask is used to indicate if we want enabled disabled or both
+ std::vector<Guess> GetManagedLanguages(const char mask);
+
+ //Like getManagedLanguages, this function enable or disable a language and it depends of the mask
+ void XableLanguage(const std::string& lang, char mask);
+};
+
+#endif
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/lingucomponent/source/lingutil/lingutil.cxx b/lingucomponent/source/lingutil/lingutil.cxx
new file mode 100644
index 000000000..f0ab84026
--- /dev/null
+++ b/lingucomponent/source/lingutil/lingutil.cxx
@@ -0,0 +1,304 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+#if defined(_WIN32)
+#if !defined WIN32_LEAN_AND_MEAN
+# define WIN32_LEAN_AND_MEAN
+#endif
+#include <windows.h>
+#endif
+
+#include <osl/thread.h>
+#include <osl/file.hxx>
+#include <osl/process.h>
+#include <tools/debug.hxx>
+#include <tools/urlobj.hxx>
+#include <i18nlangtag/languagetag.hxx>
+#include <i18nlangtag/mslangid.hxx>
+#include <unotools/bootstrap.hxx>
+#include <unotools/lingucfg.hxx>
+#include <unotools/pathoptions.hxx>
+#include <rtl/bootstrap.hxx>
+#include <rtl/ustring.hxx>
+#include <rtl/string.hxx>
+#include <rtl/tencinfo.h>
+#include <linguistic/misc.hxx>
+
+#include <set>
+#include <vector>
+#include <string.h>
+
+#include "lingutil.hxx"
+
+#include <sal/macros.h>
+
+using namespace ::com::sun::star;
+
+#if defined(_WIN32)
+OString Win_AddLongPathPrefix( const OString &rPathName )
+{
+ constexpr OStringLiteral WIN32_LONG_PATH_PREFIX = "\\\\?\\";
+ if (!rPathName.match(WIN32_LONG_PATH_PREFIX)) return WIN32_LONG_PATH_PREFIX + rPathName;
+ return rPathName;
+}
+#endif //defined(_WIN32)
+
+#if defined SYSTEM_DICTS || defined IOS
+// find old style dictionaries in system directories
+static void GetOldStyleDicsInDir(
+ OUString const & aSystemDir, OUString const & aFormatName,
+ OUString const & aSystemSuffix, OUString const & aSystemPrefix,
+ std::set< OUString >& aDicLangInUse,
+ std::vector< SvtLinguConfigDictionaryEntry >& aRes )
+{
+ osl::Directory aSystemDicts(aSystemDir);
+ if (aSystemDicts.open() != osl::FileBase::E_None)
+ return;
+
+ osl::DirectoryItem aItem;
+ osl::FileStatus aFileStatus(osl_FileStatus_Mask_FileURL);
+ while (aSystemDicts.getNextItem(aItem) == osl::FileBase::E_None)
+ {
+ aItem.getFileStatus(aFileStatus);
+ OUString sPath = aFileStatus.getFileURL();
+ if (sPath.endsWith(aSystemSuffix))
+ {
+ sal_Int32 nStartIndex = sPath.lastIndexOf('/') + 1;
+ if (!sPath.match(aSystemPrefix, nStartIndex))
+ continue;
+ OUString sChunk = sPath.copy(nStartIndex + aSystemPrefix.getLength(),
+ sPath.getLength() - aSystemSuffix.getLength() -
+ nStartIndex - aSystemPrefix.getLength());
+ if (sChunk.isEmpty())
+ continue;
+
+ // We prefer (now) to use language tags.
+ // Avoid feeding in the older LANG_REGION scheme to the BCP47
+ // ctor as that triggers use of liblangtag and initializes its
+ // database which we do not want during startup. Convert
+ // instead.
+ sChunk = sChunk.replace( '_', '-');
+
+ // There's a known exception to the rule, the dreaded
+ // hu_HU_u8.dic of the myspell-hu package, see
+ // http://packages.debian.org/search?arch=any&searchon=contents&keywords=hu_HU_u8.dic
+ // This was ignored because unknown in the old implementation,
+ // truncate to the known locale and either insert because hu_HU
+ // wasn't encountered yet, or skip because it was. It doesn't
+ // really matter because the proper new-style hu_HU dictionary
+ // will take precedence anyway if installed with a Hungarian
+ // languagepack. Again, this is only to not pull in all
+ // liblangtag and stuff during startup, the result would be
+ // !isValidBcp47() and the dictionary ignored.
+ if (sChunk == "hu-HU-u8")
+ sChunk = "hu-HU";
+
+ LanguageTag aLangTag(sChunk, true);
+ if (!aLangTag.isValidBcp47())
+ continue;
+
+ // Thus we first get the language of the dictionary
+ const OUString& aLocaleName(aLangTag.getBcp47());
+
+ if (aDicLangInUse.insert(aLocaleName).second)
+ {
+ // add the dictionary to the resulting vector
+ SvtLinguConfigDictionaryEntry aDicEntry;
+ aDicEntry.aLocations = { sPath };
+ aDicEntry.aLocaleNames = { aLocaleName };
+ aDicEntry.aFormatName = aFormatName;
+ aRes.push_back( aDicEntry );
+ }
+ }
+ }
+}
+#endif
+
+// build list of old style dictionaries (not as extensions) to use.
+// User installed dictionaries (the ones residing in the user paths)
+// will get precedence over system installed ones for the same language.
+std::vector< SvtLinguConfigDictionaryEntry > GetOldStyleDics( const char *pDicType )
+{
+ std::vector< SvtLinguConfigDictionaryEntry > aRes;
+
+ if (!pDicType)
+ return aRes;
+
+ OUString aFormatName;
+ OUString aDicExtension;
+#if defined SYSTEM_DICTS || defined IOS
+ OUString aSystemDir;
+ OUString aSystemPrefix;
+ OUString aSystemSuffix;
+#endif
+ if (strcmp( pDicType, "DICT" ) == 0)
+ {
+ aFormatName = "DICT_SPELL";
+ aDicExtension = ".dic";
+#ifdef SYSTEM_DICTS
+ aSystemDir = DICT_SYSTEM_DIR;
+ aSystemSuffix = aDicExtension;
+#elif defined IOS
+ aSystemDir = "$BRAND_BASE_DIR/share/spell";
+ rtl::Bootstrap::expandMacros(aSystemDir);
+ aSystemSuffix = ".dic";
+#endif
+ }
+ else if (strcmp( pDicType, "HYPH" ) == 0)
+ {
+ aFormatName = "DICT_HYPH";
+ aDicExtension = ".dic";
+#ifdef SYSTEM_DICTS
+ aSystemDir = HYPH_SYSTEM_DIR;
+ aSystemPrefix = "hyph_";
+ aSystemSuffix = aDicExtension;
+#endif
+ }
+ else if (strcmp( pDicType, "THES" ) == 0)
+ {
+ aFormatName = "DICT_THES";
+ aDicExtension = ".dat";
+#ifdef SYSTEM_DICTS
+ aSystemDir = THES_SYSTEM_DIR;
+ aSystemPrefix = "th_";
+ aSystemSuffix = "_v2.dat";
+#elif defined IOS
+ aSystemDir = "$BRAND_BASE_DIR/share/thes";
+ rtl::Bootstrap::expandMacros(aSystemDir);
+ aSystemPrefix = "th_";
+ aSystemSuffix = "_v2.dat";
+#endif
+ }
+
+ if (aFormatName.isEmpty() || aDicExtension.isEmpty())
+ return aRes;
+
+#if defined SYSTEM_DICTS || defined IOS
+ // set of languages to remember the language where it is already
+ // decided to make use of the dictionary.
+ std::set< OUString > aDicLangInUse;
+
+#ifndef IOS
+ // follow the hunspell tool's example and check DICPATH for preferred dictionaries
+ rtl_uString * pSearchPath = nullptr;
+ osl_getEnvironment(OUString("DICPATH").pData, &pSearchPath);
+
+ if (pSearchPath)
+ {
+ OUString aSearchPath(pSearchPath);
+ rtl_uString_release(pSearchPath);
+
+ sal_Int32 nIndex = 0;
+ do
+ {
+ OUString aSystem( aSearchPath.getToken(0, ':', nIndex) );
+ OUString aCWD;
+ OUString aRelative;
+ OUString aAbsolute;
+
+ if (!utl::Bootstrap::getProcessWorkingDir(aCWD))
+ continue;
+ if (osl::FileBase::getFileURLFromSystemPath(aSystem, aRelative)
+ != osl::FileBase::E_None)
+ continue;
+ if (osl::FileBase::getAbsoluteFileURL(aCWD, aRelative, aAbsolute)
+ != osl::FileBase::E_None)
+ continue;
+
+ // GetOldStyleDicsInDir will make sure the dictionary is the right
+ // type based on its prefix, that way hyphen, mythes and regular
+ // dictionaries can live in one directory
+ GetOldStyleDicsInDir(aAbsolute, aFormatName, aSystemSuffix,
+ aSystemPrefix, aDicLangInUse, aRes);
+ }
+ while (nIndex != -1);
+ }
+#endif
+
+ // load system directories last so that DICPATH prevails
+ GetOldStyleDicsInDir(aSystemDir, aFormatName, aSystemSuffix, aSystemPrefix,
+ aDicLangInUse, aRes);
+#endif
+
+ return aRes;
+}
+
+void MergeNewStyleDicsAndOldStyleDics(
+ std::vector< SvtLinguConfigDictionaryEntry > &rNewStyleDics,
+ const std::vector< SvtLinguConfigDictionaryEntry > &rOldStyleDics )
+{
+ // get list of languages supported by new style dictionaries
+ std::set< OUString > aNewStyleLanguages;
+ for (auto const& newStyleDic : rNewStyleDics)
+ {
+ const uno::Sequence< OUString > aLocaleNames(newStyleDic.aLocaleNames);
+ sal_Int32 nLocaleNames = aLocaleNames.getLength();
+ for (sal_Int32 k = 0; k < nLocaleNames; ++k)
+ {
+ aNewStyleLanguages.insert( aLocaleNames[k] );
+ }
+ }
+
+ // now check all old style dictionaries if they will add a not yet
+ // added language. If so add them to the resulting vector
+ for (auto const& oldStyleDic : rOldStyleDics)
+ {
+ sal_Int32 nOldStyleDics = oldStyleDic.aLocaleNames.getLength();
+
+ // old style dics should only have one language listed...
+ DBG_ASSERT( nOldStyleDics, "old style dictionary with more than one language found!");
+ if (nOldStyleDics > 0)
+ {
+ if (linguistic::LinguIsUnspecified( oldStyleDic.aLocaleNames[0]))
+ {
+ OSL_FAIL( "old style dictionary with invalid language found!" );
+ continue;
+ }
+
+ // language not yet added?
+ if (aNewStyleLanguages.find( oldStyleDic.aLocaleNames[0] ) == aNewStyleLanguages.end())
+ rNewStyleDics.push_back(oldStyleDic);
+ }
+ else
+ {
+ OSL_FAIL( "old style dictionary with no language found!" );
+ }
+ }
+}
+
+rtl_TextEncoding getTextEncodingFromCharset(const char* pCharset)
+{
+ // default result: used to indicate that we failed to get the proper encoding
+ rtl_TextEncoding eRet = RTL_TEXTENCODING_DONTKNOW;
+
+ if (pCharset)
+ {
+ eRet = rtl_getTextEncodingFromMimeCharset(pCharset);
+ if (eRet == RTL_TEXTENCODING_DONTKNOW)
+ eRet = rtl_getTextEncodingFromUnixCharset(pCharset);
+ if (eRet == RTL_TEXTENCODING_DONTKNOW)
+ {
+ if (strcmp("ISCII-DEVANAGARI", pCharset) == 0)
+ eRet = RTL_TEXTENCODING_ISCII_DEVANAGARI;
+ }
+ }
+ return eRet;
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/lingucomponent/source/lingutil/lingutil.hxx b/lingucomponent/source/lingutil/lingutil.hxx
new file mode 100644
index 000000000..4c4fe15ec
--- /dev/null
+++ b/lingucomponent/source/lingutil/lingutil.hxx
@@ -0,0 +1,55 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+#ifndef INCLUDED_LINGUCOMPONENT_SOURCE_LINGUTIL_LINGUTIL_HXX
+#define INCLUDED_LINGUCOMPONENT_SOURCE_LINGUTIL_LINGUTIL_HXX
+
+#include <rtl/string.hxx>
+
+#include <vector>
+
+#define OU2ENC(rtlOUString, rtlEncoding) \
+ OString((rtlOUString).getStr(), (rtlOUString).getLength(), \
+ rtlEncoding, RTL_UNICODETOTEXT_FLAGS_UNDEFINED_QUESTIONMARK).getStr()
+
+struct SvtLinguConfigDictionaryEntry;
+
+#if defined(_WIN32)
+
+// to be use to get a path name with long path prefix
+// under Windows for Hunspell, Hyphen and MyThes libraries
+OString Win_AddLongPathPrefix( const OString &rPathName );
+#endif
+
+
+// temporary function, to be removed when new style dictionaries
+// using configuration entries are fully implemented and provided
+std::vector< SvtLinguConfigDictionaryEntry > GetOldStyleDics( const char * pDicType );
+void MergeNewStyleDicsAndOldStyleDics( std::vector< SvtLinguConfigDictionaryEntry > &rNewStyleDics, const std::vector< SvtLinguConfigDictionaryEntry > &rOldStyleDics );
+
+//Find an encoding from a charset string, using
+//rtl_getTextEncodingFromMimeCharset and falling back to
+//rtl_getTextEncodingFromUnixCharset with the addition of
+//ISCII-DEVANAGARI. On failure will return final fallback of
+//RTL_TEXTENCODING_ISO_8859_1
+rtl_TextEncoding getTextEncodingFromCharset(const char* pCharset);
+
+#endif
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/lingucomponent/source/numbertext/numbertext.component b/lingucomponent/source/numbertext/numbertext.component
new file mode 100644
index 000000000..c3277533b
--- /dev/null
+++ b/lingucomponent/source/numbertext/numbertext.component
@@ -0,0 +1,26 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ -->
+
+<component loader="com.sun.star.loader.SharedLibrary" environment="@CPPU_ENV@"
+ xmlns="http://openoffice.org/2010/uno-components">
+ <implementation name="com.sun.star.lingu2.NumberText"
+ constructor="lingucomponent_NumberText_get_implementation">
+ <service name="com.sun.star.linguistic2.NumberText"/>
+ </implementation>
+</component>
diff --git a/lingucomponent/source/numbertext/numbertext.cxx b/lingucomponent/source/numbertext/numbertext.cxx
new file mode 100644
index 000000000..79c8e6810
--- /dev/null
+++ b/lingucomponent/source/numbertext/numbertext.cxx
@@ -0,0 +1,168 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+#include <mutex>
+
+#include <osl/file.hxx>
+#include <tools/debug.hxx>
+#include <o3tl/char16_t2wchar_t.hxx>
+
+#include <sal/config.h>
+#include <cppuhelper/factory.hxx>
+#include <cppuhelper/implbase.hxx>
+#include <cppuhelper/supportsservice.hxx>
+
+#include <i18nlangtag/languagetag.hxx>
+#include <com/sun/star/lang/XServiceInfo.hpp>
+#include <com/sun/star/linguistic2/XNumberText.hpp>
+#include <unotools/pathoptions.hxx>
+#include <osl/thread.h>
+
+#include <Numbertext.hxx>
+
+using namespace ::osl;
+using namespace ::cppu;
+using namespace ::com::sun::star;
+using namespace ::com::sun::star::uno;
+using namespace ::com::sun::star::lang;
+using namespace ::com::sun::star::linguistic2;
+
+static std::mutex& GetNumberTextMutex()
+{
+ static std::mutex aMutex;
+ return aMutex;
+}
+
+namespace
+{
+class NumberText_Impl : public ::cppu::WeakImplHelper<XNumberText, XServiceInfo>
+{
+ Numbertext m_aNumberText;
+ bool m_bInitialized;
+
+ virtual ~NumberText_Impl() override {}
+ void EnsureInitialized();
+
+public:
+ NumberText_Impl();
+ NumberText_Impl(const NumberText_Impl&) = delete;
+ NumberText_Impl& operator=(const NumberText_Impl&) = delete;
+
+ // XServiceInfo implementation
+ virtual OUString SAL_CALL getImplementationName() override;
+ virtual sal_Bool SAL_CALL supportsService(const OUString& ServiceName) override;
+ virtual Sequence<OUString> SAL_CALL getSupportedServiceNames() override;
+
+ // XNumberText implementation
+ virtual OUString SAL_CALL getNumberText(const OUString& aText,
+ const ::css::lang::Locale& rLocale) override;
+ virtual css::uno::Sequence<css::lang::Locale> SAL_CALL getAvailableLanguages() override;
+};
+}
+
+NumberText_Impl::NumberText_Impl()
+ : m_bInitialized(false)
+{
+}
+
+void NumberText_Impl::EnsureInitialized()
+{
+ if (m_bInitialized)
+ return;
+
+ // set this to true at the very start to prevent loops because of
+ // implicitly called functions below
+ m_bInitialized = true;
+
+ // set default numbertext path to where those get installed
+ OUString aPhysPath;
+ OUString aURL(SvtPathOptions().GetNumbertextPath());
+ osl::FileBase::getSystemPathFromFileURL(aURL, aPhysPath);
+#ifdef _WIN32
+ aPhysPath += "\\";
+ const rtl_TextEncoding eEnc = RTL_TEXTENCODING_UTF8;
+#else
+ aPhysPath += "/";
+ const rtl_TextEncoding eEnc = osl_getThreadTextEncoding();
+#endif
+ OString path = OUStringToOString(aPhysPath, eEnc);
+ m_aNumberText.set_prefix(path.getStr());
+}
+
+OUString SAL_CALL NumberText_Impl::getNumberText(const OUString& rText, const Locale& rLocale)
+{
+ std::scoped_lock aGuard(GetNumberTextMutex());
+ EnsureInitialized();
+ // libnumbertext supports Language + Country tags (separated by "_" or "-")
+ LanguageTag aLanguageTag(rLocale);
+ OUString aCode(aLanguageTag.getLanguage());
+ OUString aCountry(aLanguageTag.getCountry());
+ OUString aScript(aLanguageTag.getScript());
+ if (!aScript.isEmpty())
+ aCode += "-" + aScript;
+ if (!aCountry.isEmpty())
+ aCode += "-" + aCountry;
+ OString aLangCode(OUStringToOString(aCode, RTL_TEXTENCODING_ASCII_US));
+#if defined(_WIN32)
+ std::wstring sResult(o3tl::toW(rText.getStr()));
+#else
+ OString aInput(OUStringToOString(rText, RTL_TEXTENCODING_UTF8));
+ std::wstring sResult = Numbertext::string2wstring(aInput.getStr());
+#endif
+ bool result = m_aNumberText.numbertext(sResult, aLangCode.getStr());
+ DBG_ASSERT(result, "numbertext: false");
+#if defined(_WIN32)
+ OUString aResult(o3tl::toU(sResult.c_str()));
+#else
+ OUString aResult = OUString::fromUtf8(Numbertext::wstring2string(sResult).c_str());
+#endif
+ return aResult;
+}
+
+uno::Sequence<Locale> SAL_CALL NumberText_Impl::getAvailableLanguages()
+{
+ std::scoped_lock aGuard(GetNumberTextMutex());
+ // TODO
+ Sequence<css::lang::Locale> aRes;
+ return aRes;
+}
+
+OUString SAL_CALL NumberText_Impl::getImplementationName()
+{
+ return "com.sun.star.lingu2.NumberText";
+}
+
+sal_Bool SAL_CALL NumberText_Impl::supportsService(const OUString& ServiceName)
+{
+ return cppu::supportsService(this, ServiceName);
+}
+
+Sequence<OUString> SAL_CALL NumberText_Impl::getSupportedServiceNames()
+{
+ return { "com.sun.star.linguistic2.NumberText" };
+}
+
+extern "C" SAL_DLLPUBLIC_EXPORT css::uno::XInterface*
+lingucomponent_NumberText_get_implementation(css::uno::XComponentContext*,
+ css::uno::Sequence<css::uno::Any> const&)
+{
+ return cppu::acquire(new NumberText_Impl());
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/lingucomponent/source/spellcheck/languagetool/LanguageTool.component b/lingucomponent/source/spellcheck/languagetool/LanguageTool.component
new file mode 100644
index 000000000..9f7eb3d08
--- /dev/null
+++ b/lingucomponent/source/spellcheck/languagetool/LanguageTool.component
@@ -0,0 +1,26 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ -->
+
+<component loader="com.sun.star.loader.SharedLibrary" environment="@CPPU_ENV@"
+ xmlns="http://openoffice.org/2010/uno-components">
+ <implementation name="org.openoffice.lingu.LanguageToolGrammarChecker"
+ constructor="lingucomponent_LanguageToolGrammarChecker_get_implementation" single-instance="true">
+ <service name="com.sun.star.linguistic2.Proofreader"/>
+ </implementation>
+</component>
diff --git a/lingucomponent/source/spellcheck/languagetool/languagetoolimp.cxx b/lingucomponent/source/spellcheck/languagetool/languagetoolimp.cxx
new file mode 100644
index 000000000..06b4fcb64
--- /dev/null
+++ b/lingucomponent/source/spellcheck/languagetool/languagetoolimp.cxx
@@ -0,0 +1,407 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+#include <cppuhelper/factory.hxx>
+#include <cppuhelper/supportsservice.hxx>
+#include <cppuhelper/weak.hxx>
+#include "languagetoolimp.hxx"
+
+#include <i18nlangtag/languagetag.hxx>
+#include <svtools/strings.hrc>
+#include <unotools/resmgr.hxx>
+
+#include <vector>
+#include <set>
+#include <string.h>
+
+#include <curl/curl.h>
+#include <boost/property_tree/ptree.hpp>
+#include <boost/property_tree/json_parser.hpp>
+#include <algorithm>
+#include <string_view>
+#include <sal/log.hxx>
+#include <svtools/languagetoolcfg.hxx>
+#include <tools/color.hxx>
+#include <tools/long.hxx>
+#include <com/sun/star/uno/Any.hxx>
+
+using namespace osl;
+using namespace com::sun::star;
+using namespace com::sun::star::beans;
+using namespace com::sun::star::lang;
+using namespace com::sun::star::uno;
+using namespace com::sun::star::linguistic2;
+using namespace linguistic;
+
+#define COL_ORANGE Color(0xD1, 0x68, 0x20)
+
+namespace
+{
+PropertyValue lcl_MakePropertyValue(const OUString& rName, uno::Any& rValue)
+{
+ return PropertyValue(rName, -1, rValue, css::beans::PropertyState_DIRECT_VALUE);
+}
+
+Sequence<PropertyValue> lcl_GetLineColorPropertyFromErrorId(const std::string& rErrorId)
+{
+ uno::Any aColor;
+ if (rErrorId == "TYPOS")
+ {
+ aColor <<= COL_LIGHTRED;
+ }
+ else if (rErrorId == "STYLE")
+ {
+ aColor <<= COL_LIGHTBLUE;
+ }
+ else
+ {
+ // Same color is used for other errorId's such as GRAMMAR, TYPOGRAPHY..
+ aColor <<= COL_ORANGE;
+ }
+ Sequence<PropertyValue> aProperties{ lcl_MakePropertyValue("LineColor", aColor) };
+ return aProperties;
+}
+}
+
+LanguageToolGrammarChecker::LanguageToolGrammarChecker()
+ : mCachedResults(MAX_CACHE_SIZE)
+{
+}
+
+LanguageToolGrammarChecker::~LanguageToolGrammarChecker() {}
+
+sal_Bool SAL_CALL LanguageToolGrammarChecker::isSpellChecker() { return false; }
+
+sal_Bool SAL_CALL LanguageToolGrammarChecker::hasLocale(const Locale& rLocale)
+{
+ bool bRes = false;
+ if (!m_aSuppLocales.hasElements())
+ getLocales();
+
+ for (auto const& suppLocale : std::as_const(m_aSuppLocales))
+ {
+ if (rLocale == suppLocale)
+ {
+ bRes = true;
+ break;
+ }
+ }
+
+ return bRes;
+}
+
+Sequence<Locale> SAL_CALL LanguageToolGrammarChecker::getLocales()
+{
+ if (m_aSuppLocales.hasElements())
+ return m_aSuppLocales;
+ SvxLanguageToolOptions& rLanguageOpts = SvxLanguageToolOptions::Get();
+ OString localeUrl = OUStringToOString(rLanguageOpts.getLocaleListURL(), RTL_TEXTENCODING_UTF8);
+ if (localeUrl.isEmpty())
+ {
+ return m_aSuppLocales;
+ }
+ tools::Long statusCode = 0;
+ std::string response = makeHttpRequest(localeUrl, HTTP_METHOD::HTTP_GET, OString(), statusCode);
+ if (statusCode != 200)
+ {
+ return m_aSuppLocales;
+ }
+ if (response.empty())
+ {
+ return m_aSuppLocales;
+ }
+ boost::property_tree::ptree root;
+ std::stringstream aStream(response);
+ boost::property_tree::read_json(aStream, root);
+
+ size_t length = root.size();
+ m_aSuppLocales.realloc(length);
+ auto pArray = m_aSuppLocales.getArray();
+ int i = 0;
+ for (auto it = root.begin(); it != root.end(); it++, i++)
+ {
+ boost::property_tree::ptree& localeItem = it->second;
+ const std::string longCode = localeItem.get<std::string>("longCode");
+ Locale aLocale = LanguageTag::convertToLocale(
+ OUString(longCode.c_str(), longCode.length(), RTL_TEXTENCODING_UTF8));
+ pArray[i] = aLocale;
+ }
+ return m_aSuppLocales;
+}
+
+// Callback to get the response data from server.
+static size_t WriteCallback(void* ptr, size_t size, size_t nmemb, void* userp)
+{
+ if (!userp)
+ return 0;
+
+ std::string* response = static_cast<std::string*>(userp);
+ size_t real_size = size * nmemb;
+ response->append(static_cast<char*>(ptr), real_size);
+ return real_size;
+}
+
+ProofreadingResult SAL_CALL LanguageToolGrammarChecker::doProofreading(
+ const OUString& aDocumentIdentifier, const OUString& aText, const Locale& aLocale,
+ sal_Int32 nStartOfSentencePosition, sal_Int32 nSuggestedBehindEndOfSentencePosition,
+ const Sequence<PropertyValue>& aProperties)
+{
+ // ProofreadingResult declared here instead of parseHttpJSONResponse because of the early exists.
+ ProofreadingResult xRes;
+ xRes.aDocumentIdentifier = aDocumentIdentifier;
+ xRes.aText = aText;
+ xRes.aLocale = aLocale;
+ xRes.nStartOfSentencePosition = nStartOfSentencePosition;
+ xRes.nBehindEndOfSentencePosition = nSuggestedBehindEndOfSentencePosition;
+ xRes.aProperties = Sequence<PropertyValue>();
+ xRes.xProofreader = this;
+ xRes.aErrors = Sequence<SingleProofreadingError>();
+
+ if (aText.isEmpty())
+ {
+ return xRes;
+ }
+
+ if (nStartOfSentencePosition != 0)
+ {
+ return xRes;
+ }
+
+ xRes.nStartOfNextSentencePosition = aText.getLength();
+
+ SvxLanguageToolOptions& rLanguageOpts = SvxLanguageToolOptions::Get();
+ if (rLanguageOpts.getEnabled() == false)
+ {
+ return xRes;
+ }
+
+ OString checkerURL = OUStringToOString(rLanguageOpts.getCheckerURL(), RTL_TEXTENCODING_UTF8);
+ if (checkerURL.isEmpty())
+ {
+ return xRes;
+ }
+
+ if (aProperties.getLength() > 0 && aProperties[0].Name == "Update")
+ {
+ // locale changed
+ xRes.aText = "";
+ return xRes;
+ }
+
+ sal_Int32 spaceIndex = std::min(xRes.nStartOfNextSentencePosition, aText.getLength() - 1);
+ while (spaceIndex < aText.getLength() && aText[spaceIndex] == ' ')
+ {
+ xRes.nStartOfNextSentencePosition += 1;
+ spaceIndex = xRes.nStartOfNextSentencePosition;
+ }
+ if (xRes.nStartOfNextSentencePosition == nSuggestedBehindEndOfSentencePosition
+ && spaceIndex < aText.getLength())
+ {
+ xRes.nStartOfNextSentencePosition
+ = std::min(nSuggestedBehindEndOfSentencePosition + 1, aText.getLength());
+ }
+ xRes.nBehindEndOfSentencePosition
+ = std::min(xRes.nStartOfNextSentencePosition, aText.getLength());
+
+ auto cachedResult = mCachedResults.find(aText);
+ if (cachedResult != mCachedResults.end())
+ {
+ xRes.aErrors = cachedResult->second;
+ return xRes;
+ }
+
+ tools::Long http_code = 0;
+ OUString langTag(aLocale.Language + "-" + aLocale.Country);
+ OString postData(OUStringToOString(
+ OUStringConcatenation("text=" + aText + "&language=" + langTag), RTL_TEXTENCODING_UTF8));
+ const std::string response_body
+ = makeHttpRequest(checkerURL, HTTP_METHOD::HTTP_POST, postData, http_code);
+
+ if (http_code != 200)
+ {
+ return xRes;
+ }
+
+ if (response_body.length() <= 0)
+ {
+ return xRes;
+ }
+
+ parseProofreadingJSONResponse(xRes, response_body);
+ // cache the result
+ mCachedResults.insert(
+ std::pair<OUString, Sequence<SingleProofreadingError>>(aText, xRes.aErrors));
+ return xRes;
+}
+
+/*
+ rResult is both input and output
+ aJSONBody is the response body from the HTTP Request to LanguageTool API
+*/
+void LanguageToolGrammarChecker::parseProofreadingJSONResponse(ProofreadingResult& rResult,
+ std::string_view aJSONBody)
+{
+ boost::property_tree::ptree root;
+ std::stringstream aStream(aJSONBody.data());
+ boost::property_tree::read_json(aStream, root);
+ boost::property_tree::ptree& matches = root.get_child("matches");
+ size_t matchSize = matches.size();
+
+ if (matchSize <= 0)
+ {
+ return;
+ }
+ Sequence<SingleProofreadingError> aErrors(matchSize);
+ auto pErrors = aErrors.getArray();
+ size_t i = 0;
+ for (auto it1 = matches.begin(); it1 != matches.end(); it1++, i++)
+ {
+ const boost::property_tree::ptree& match = it1->second;
+ int offset = match.get<int>("offset");
+ int length = match.get<int>("length");
+ const std::string shortMessage = match.get<std::string>("message");
+ const std::string message = match.get<std::string>("shortMessage");
+
+ // Parse the error category for Line Color
+ const boost::property_tree::ptree& rule = match.get_child("rule");
+ const boost::property_tree::ptree& ruleCategory = rule.get_child("category");
+ const std::string errorCategoryId = ruleCategory.get<std::string>("id");
+
+ OUString aShortComment(shortMessage.c_str(), shortMessage.length(), RTL_TEXTENCODING_UTF8);
+ OUString aFullComment(message.c_str(), message.length(), RTL_TEXTENCODING_UTF8);
+
+ pErrors[i].nErrorStart = offset;
+ pErrors[i].nErrorLength = length;
+ pErrors[i].nErrorType = PROOFREADING_ERROR;
+ pErrors[i].aShortComment = aShortComment;
+ pErrors[i].aFullComment = aFullComment;
+ pErrors[i].aProperties = lcl_GetLineColorPropertyFromErrorId(errorCategoryId);
+ ;
+ const boost::property_tree::ptree& replacements = match.get_child("replacements");
+ int suggestionSize = replacements.size();
+
+ if (suggestionSize <= 0)
+ {
+ continue;
+ }
+ pErrors[i].aSuggestions.realloc(std::min(suggestionSize, MAX_SUGGESTIONS_SIZE));
+ auto pSuggestions = pErrors[i].aSuggestions.getArray();
+ // Limit suggestions to avoid crash on context menu popup:
+ // (soffice:17251): Gdk-CRITICAL **: 17:00:21.277: ../../../../../gdk/wayland/gdkdisplay-wayland.c:1399: Unable to create Cairo image
+ // surface: invalid value (typically too big) for the size of the input (surface, pattern, etc.)
+ int j = 0;
+ for (auto it2 = replacements.begin(); it2 != replacements.end() && j < MAX_SUGGESTIONS_SIZE;
+ it2++, j++)
+ {
+ const boost::property_tree::ptree& replacement = it2->second;
+ std::string replacementStr = replacement.get<std::string>("value");
+ pSuggestions[j]
+ = OUString(replacementStr.c_str(), replacementStr.length(), RTL_TEXTENCODING_UTF8);
+ }
+ }
+ rResult.aErrors = aErrors;
+}
+
+std::string LanguageToolGrammarChecker::makeHttpRequest(std::string_view aURL, HTTP_METHOD method,
+ const OString& aPostData,
+ tools::Long& nStatusCode)
+{
+ std::unique_ptr<CURL, std::function<void(CURL*)>> curl(curl_easy_init(),
+ [](CURL* p) { curl_easy_cleanup(p); });
+ if (!curl)
+ return {}; // empty string
+
+ bool isPremium = false;
+ SvxLanguageToolOptions& rLanguageOpts = SvxLanguageToolOptions::Get();
+ OString apiKey = OUStringToOString(rLanguageOpts.getApiKey(), RTL_TEXTENCODING_UTF8);
+ OString username = OUStringToOString(rLanguageOpts.getUsername(), RTL_TEXTENCODING_UTF8);
+ OString premiumPostData;
+ if (!apiKey.isEmpty() && !username.isEmpty())
+ {
+ isPremium = true;
+ }
+
+ std::string response_body;
+ curl_easy_setopt(curl.get(), CURLOPT_URL, aURL.data());
+
+ curl_easy_setopt(curl.get(), CURLOPT_FAILONERROR, 1L);
+ // curl_easy_setopt(curl.get(), CURLOPT_VERBOSE, 1L);
+
+ curl_easy_setopt(curl.get(), CURLOPT_WRITEFUNCTION, WriteCallback);
+ curl_easy_setopt(curl.get(), CURLOPT_WRITEDATA, static_cast<void*>(&response_body));
+ curl_easy_setopt(curl.get(), CURLOPT_SSL_VERIFYPEER, false);
+ curl_easy_setopt(curl.get(), CURLOPT_SSL_VERIFYHOST, false);
+ curl_easy_setopt(curl.get(), CURLOPT_TIMEOUT, CURL_TIMEOUT);
+
+ if (method == HTTP_METHOD::HTTP_POST)
+ {
+ curl_easy_setopt(curl.get(), CURLOPT_POST, 1L);
+ if (isPremium == false)
+ {
+ curl_easy_setopt(curl.get(), CURLOPT_POSTFIELDS, aPostData.getStr());
+ }
+ else
+ {
+ premiumPostData = aPostData + "&username=" + username + "&apiKey=" + apiKey;
+ curl_easy_setopt(curl.get(), CURLOPT_POSTFIELDS, premiumPostData.getStr());
+ }
+ }
+
+ /*CURLcode cc = */
+ curl_easy_perform(curl.get());
+ curl_easy_getinfo(curl.get(), CURLINFO_RESPONSE_CODE, &nStatusCode);
+ return response_body;
+}
+
+void SAL_CALL LanguageToolGrammarChecker::ignoreRule(const OUString& /*aRuleIdentifier*/,
+ const Locale& /*aLocale*/
+)
+{
+}
+void SAL_CALL LanguageToolGrammarChecker::resetIgnoreRules() {}
+
+OUString SAL_CALL LanguageToolGrammarChecker::getServiceDisplayName(const Locale& rLocale)
+{
+ std::locale loc(Translate::Create("svt", LanguageTag(rLocale)));
+ return Translate::get(STR_DESCRIPTION_LANGUAGETOOL, loc);
+}
+
+OUString SAL_CALL LanguageToolGrammarChecker::getImplementationName()
+{
+ return "org.openoffice.lingu.LanguageToolGrammarChecker";
+}
+
+sal_Bool SAL_CALL LanguageToolGrammarChecker::supportsService(const OUString& ServiceName)
+{
+ return cppu::supportsService(this, ServiceName);
+}
+
+Sequence<OUString> SAL_CALL LanguageToolGrammarChecker::getSupportedServiceNames()
+{
+ return { SN_GRAMMARCHECKER };
+}
+
+void SAL_CALL LanguageToolGrammarChecker::initialize(const Sequence<Any>& /*rArguments*/) {}
+
+extern "C" SAL_DLLPUBLIC_EXPORT css::uno::XInterface*
+lingucomponent_LanguageToolGrammarChecker_get_implementation(
+ css::uno::XComponentContext*, css::uno::Sequence<css::uno::Any> const&)
+{
+ return cppu::acquire(static_cast<cppu::OWeakObject*>(new LanguageToolGrammarChecker()));
+} \ No newline at end of file
diff --git a/lingucomponent/source/spellcheck/languagetool/languagetoolimp.hxx b/lingucomponent/source/spellcheck/languagetool/languagetoolimp.hxx
new file mode 100644
index 000000000..e0dadfeca
--- /dev/null
+++ b/lingucomponent/source/spellcheck/languagetool/languagetoolimp.hxx
@@ -0,0 +1,91 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+#pragma once
+#include <com/sun/star/lang/XInitialization.hpp>
+#include <com/sun/star/lang/XServiceDisplayName.hpp>
+#include <com/sun/star/lang/XServiceInfo.hpp>
+#include <com/sun/star/lang/XServiceName.hpp>
+#include <com/sun/star/linguistic2/XProofreader.hpp>
+#include <com/sun/star/linguistic2/ProofreadingResult.hpp>
+#include <com/sun/star/beans/XPropertySet.hpp>
+#include <com/sun/star/beans/PropertyValues.hpp>
+#include <linguistic/misc.hxx>
+#include <string_view>
+#include <o3tl/lru_map.hxx>
+#include <tools/long.hxx>
+
+using namespace ::com::sun::star::uno;
+using namespace ::com::sun::star::beans;
+using namespace ::com::sun::star::lang;
+using namespace ::com::sun::star::linguistic2;
+
+// Magical numbers
+#define MAX_CACHE_SIZE 10
+#define MAX_SUGGESTIONS_SIZE 10
+#define PROOFREADING_ERROR 2
+#define CURL_TIMEOUT 10L
+
+enum class HTTP_METHOD
+{
+ HTTP_GET,
+ HTTP_POST
+};
+
+class LanguageToolGrammarChecker
+ : public cppu::WeakImplHelper<XProofreader, XInitialization, XServiceInfo, XServiceDisplayName>
+{
+ Sequence<Locale> m_aSuppLocales;
+ o3tl::lru_map<OUString, Sequence<SingleProofreadingError>> mCachedResults;
+ LanguageToolGrammarChecker(const LanguageToolGrammarChecker&) = delete;
+ LanguageToolGrammarChecker& operator=(const LanguageToolGrammarChecker&) = delete;
+ static void parseProofreadingJSONResponse(ProofreadingResult& rResult,
+ std::string_view aJSONBody);
+ static std::string makeHttpRequest(std::string_view aURL, HTTP_METHOD method,
+ const OString& aPostData, tools::Long& nStatusCode);
+
+public:
+ LanguageToolGrammarChecker();
+ virtual ~LanguageToolGrammarChecker() override;
+
+ // XSupportedLocales
+ virtual Sequence<Locale> SAL_CALL getLocales() override;
+ virtual sal_Bool SAL_CALL hasLocale(const Locale& rLocale) override;
+
+ // XProofReader
+ virtual sal_Bool SAL_CALL isSpellChecker() override;
+ virtual ProofreadingResult SAL_CALL doProofreading(
+ const OUString& aDocumentIdentifier, const OUString& aText, const Locale& aLocale,
+ sal_Int32 nStartOfSentencePosition, sal_Int32 nSuggestedBehindEndOfSentencePosition,
+ const Sequence<PropertyValue>& aProperties) override;
+
+ virtual void SAL_CALL ignoreRule(const OUString& aRuleIdentifier,
+ const Locale& aLocale) override;
+ virtual void SAL_CALL resetIgnoreRules() override;
+
+ // XServiceDisplayName
+ virtual OUString SAL_CALL getServiceDisplayName(const Locale& rLocale) override;
+
+ // XInitialization
+ virtual void SAL_CALL initialize(const Sequence<Any>& rArguments) override;
+
+ // XServiceInfo
+ virtual OUString SAL_CALL getImplementationName() override;
+ virtual sal_Bool SAL_CALL supportsService(const OUString& rServiceName) override;
+ virtual Sequence<OUString> SAL_CALL getSupportedServiceNames() override;
+}; \ No newline at end of file
diff --git a/lingucomponent/source/spellcheck/macosxspell/MacOSXSpell.component b/lingucomponent/source/spellcheck/macosxspell/MacOSXSpell.component
new file mode 100644
index 000000000..b1fe7d612
--- /dev/null
+++ b/lingucomponent/source/spellcheck/macosxspell/MacOSXSpell.component
@@ -0,0 +1,26 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ -->
+
+<component loader="com.sun.star.loader.SharedLibrary" environment="@CPPU_ENV@"
+ xmlns="http://openoffice.org/2010/uno-components">
+ <implementation name="org.openoffice.lingu.MacOSXSpellChecker"
+ constructor="lingucomponent_MacSpellChecker_get_implementation" single-instance="true">
+ <service name="com.sun.star.linguistic2.SpellChecker"/>
+ </implementation>
+</component>
diff --git a/lingucomponent/source/spellcheck/macosxspell/macspellimp.hxx b/lingucomponent/source/spellcheck/macosxspell/macspellimp.hxx
new file mode 100644
index 000000000..776c474d2
--- /dev/null
+++ b/lingucomponent/source/spellcheck/macosxspell/macspellimp.hxx
@@ -0,0 +1,123 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+#ifndef INCLUDED_LINGUCOMPONENT_SOURCE_SPELLCHECK_MACOSXSPELL_MACSPELLIMP_HXX
+#define INCLUDED_LINGUCOMPONENT_SOURCE_SPELLCHECK_MACOSXSPELL_MACSPELLIMP_HXX
+
+#include <comphelper/interfacecontainer3.hxx>
+#include <cppuhelper/implbase.hxx>
+
+#include <premac.h>
+#ifdef MACOSX
+#import <Cocoa/Cocoa.h>
+#else
+#include <UIKit/UIKit.h>
+#endif
+#include <postmac.h>
+#include <com/sun/star/lang/XComponent.hpp>
+#include <com/sun/star/lang/XInitialization.hpp>
+#include <com/sun/star/lang/XServiceDisplayName.hpp>
+#include <com/sun/star/beans/XPropertySet.hpp>
+#include <com/sun/star/lang/XServiceInfo.hpp>
+#include <com/sun/star/linguistic2/XSpellChecker.hpp>
+#include <com/sun/star/linguistic2/XLinguServiceEventBroadcaster.hpp>
+
+#include <linguistic/misc.hxx>
+#include <linguistic/lngprophelp.hxx>
+
+#include <lingutil.hxx>
+
+using namespace ::com::sun::star::uno;
+using namespace ::com::sun::star::beans;
+using namespace ::com::sun::star::lang;
+using namespace ::com::sun::star::linguistic2;
+
+class MacSpellChecker :
+ public cppu::WeakImplHelper
+ <
+ XSpellChecker,
+ XLinguServiceEventBroadcaster,
+ XInitialization,
+ XComponent,
+ XServiceInfo,
+ XServiceDisplayName
+ >
+{
+ Sequence< Locale > aSuppLocales;
+ rtl_TextEncoding * aDEncs;
+ Locale * aDLocs;
+ OUString * aDNames;
+ sal_Int32 numdict;
+#ifdef MACOSX
+ int macTag; // unique tag for this doc
+#else
+ UITextChecker * pChecker;
+#endif
+ ::comphelper::OInterfaceContainerHelper3<XEventListener> aEvtListeners;
+ rtl::Reference< linguistic::PropertyHelper_Spell > xPropHelper;
+ bool bDisposing;
+
+ MacSpellChecker(const MacSpellChecker &) = delete;
+ MacSpellChecker & operator = (const MacSpellChecker &) = delete;
+
+ linguistic::PropertyHelper_Spell & GetPropHelper_Impl();
+ linguistic::PropertyHelper_Spell & GetPropHelper()
+ {
+ return xPropHelper.is() ? *xPropHelper : GetPropHelper_Impl();
+ }
+
+ sal_Int16 GetSpellFailure( const OUString &rWord, const Locale &rLocale );
+ Reference< XSpellAlternatives > GetProposals( const OUString &rWord, const Locale &rLocale );
+
+public:
+ MacSpellChecker();
+ virtual ~MacSpellChecker() override;
+
+ // XSupportedLocales (for XSpellChecker)
+ virtual Sequence< Locale > SAL_CALL getLocales() override;
+ virtual sal_Bool SAL_CALL hasLocale( const Locale& rLocale ) override;
+
+ // XSpellChecker
+ virtual sal_Bool SAL_CALL isValid( const OUString& rWord, const Locale& rLocale, const css::uno::Sequence<PropertyValue>& rProperties ) override;
+ virtual Reference< XSpellAlternatives > SAL_CALL spell( const OUString& rWord, const Locale& rLocale, const css::uno::Sequence<PropertyValue>& rProperties ) override;
+
+ // XLinguServiceEventBroadcaster
+ virtual sal_Bool SAL_CALL addLinguServiceEventListener( const Reference< XLinguServiceEventListener >& rxLstnr ) override;
+ virtual sal_Bool SAL_CALL removeLinguServiceEventListener( const Reference< XLinguServiceEventListener >& rxLstnr ) override;
+
+ // XServiceDisplayName
+ virtual OUString SAL_CALL getServiceDisplayName( const Locale& rLocale ) override;
+
+ // XInitialization
+ virtual void SAL_CALL initialize( const Sequence< Any >& rArguments ) override;
+
+ // XComponent
+ virtual void SAL_CALL dispose() override;
+ virtual void SAL_CALL addEventListener( const Reference< XEventListener >& rxListener ) override;
+ virtual void SAL_CALL removeEventListener( const Reference< XEventListener >& rxListener ) override;
+
+ // XServiceInfo
+ virtual OUString SAL_CALL getImplementationName() override;
+ virtual sal_Bool SAL_CALL supportsService( const OUString& rServiceName ) override;
+ virtual Sequence< OUString > SAL_CALL getSupportedServiceNames() override;
+};
+
+#endif
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/lingucomponent/source/spellcheck/macosxspell/macspellimp.mm b/lingucomponent/source/spellcheck/macosxspell/macspellimp.mm
new file mode 100644
index 000000000..c20871b1e
--- /dev/null
+++ b/lingucomponent/source/spellcheck/macosxspell/macspellimp.mm
@@ -0,0 +1,666 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; fill-column: 100 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+#include <com/sun/star/uno/Reference.h>
+
+#include <com/sun/star/linguistic2/SpellFailure.hpp>
+#include <com/sun/star/linguistic2/XLinguProperties.hpp>
+#include <cppuhelper/factory.hxx>
+#include <cppuhelper/supportsservice.hxx>
+#include <cppuhelper/weak.hxx>
+#include <com/sun/star/registry/XRegistryKey.hpp>
+#include <com/sun/star/lang/XSingleServiceFactory.hpp>
+#include <tools/debug.hxx>
+#include <osl/mutex.hxx>
+
+#include "macspellimp.hxx"
+
+#include <linguistic/spelldta.hxx>
+#include <unotools/pathoptions.hxx>
+#include <unotools/useroptions.hxx>
+#include <osl/file.hxx>
+#include <rtl/ref.hxx>
+#include <rtl/ustrbuf.hxx>
+
+using namespace utl;
+using namespace osl;
+using namespace com::sun::star;
+using namespace com::sun::star::beans;
+using namespace com::sun::star::lang;
+using namespace com::sun::star::uno;
+using namespace com::sun::star::linguistic2;
+using namespace linguistic;
+
+MacSpellChecker::MacSpellChecker() :
+ aEvtListeners( GetLinguMutex() )
+{
+ aDEncs = nullptr;
+ aDLocs = nullptr;
+ aDNames = nullptr;
+ bDisposing = false;
+ numdict = 0;
+#ifndef IOS
+ NSApplicationLoad();
+ NSAutoreleasePool* pool = [[NSAutoreleasePool alloc] init];
+ macTag = [NSSpellChecker uniqueSpellDocumentTag];
+ [pool release];
+#else
+ pChecker = [[UITextChecker alloc] init];
+#endif
+}
+
+
+MacSpellChecker::~MacSpellChecker()
+{
+ numdict = 0;
+ if (aDEncs) delete[] aDEncs;
+ aDEncs = nullptr;
+ if (aDLocs) delete[] aDLocs;
+ aDLocs = nullptr;
+ if (aDNames) delete[] aDNames;
+ aDNames = nullptr;
+ if (xPropHelper.is())
+ xPropHelper->RemoveAsPropListener();
+}
+
+
+PropertyHelper_Spell & MacSpellChecker::GetPropHelper_Impl()
+{
+ if (!xPropHelper.is())
+ {
+ Reference< XLinguProperties > xPropSet( GetLinguProperties() );
+
+ xPropHelper = new PropertyHelper_Spell( static_cast<XSpellChecker *>(this), xPropSet );
+ xPropHelper->AddAsPropListener();
+ }
+ return *xPropHelper;
+}
+
+
+Sequence< Locale > SAL_CALL MacSpellChecker::getLocales()
+{
+ MutexGuard aGuard( GetLinguMutex() );
+
+ // this routine should return the locales supported by the installed
+ // dictionaries. So here we need to parse both the user edited
+ // dictionary list and the shared dictionary list
+ // to see what dictionaries the admin/user has installed
+
+ int numshr; // number of shared dictionary entries
+ rtl_TextEncoding aEnc = RTL_TEXTENCODING_UTF8;
+
+ std::vector<NSString *> postspdict;
+
+ if (!numdict) {
+
+ // invoke a dictionary manager to get the user dictionary list
+ // TODO How on macOS?
+
+ // invoke a second dictionary manager to get the shared dictionary list
+#ifdef MACOSX
+ NSArray *aSpellCheckLanguages = [[NSSpellChecker sharedSpellChecker] availableLanguages];
+#else
+ NSArray *aSpellCheckLanguages = [UITextChecker availableLanguages];
+#endif
+
+ for (NSUInteger i = 0; i < [aSpellCheckLanguages count]; i++)
+ {
+ NSString* pLangStr = static_cast<NSString*>([aSpellCheckLanguages objectAtIndex:i]);
+
+ // Fix up generic languages (without territory code) and odd combinations that LO
+ // doesn't handle.
+ if ([pLangStr isEqualToString:@"da"])
+ {
+ postspdict.push_back( @"da_DK" );
+ }
+ else if ([pLangStr isEqualToString:@"de"])
+ {
+ // Not de_CH and de_LI, though. They need separate dictionaries.
+ const std::vector<NSString*> aDE
+ { @"AT", @"BE", @"DE", @"LU" };
+ for (auto c: aDE)
+ {
+ pLangStr = [@"de_" stringByAppendingString: c];
+ postspdict.push_back( pLangStr );
+ }
+ }
+#ifdef IOS
+ // iOS says it has specifically de_DE. Let's assume it is good enough for German as
+ // written in Austria, Belgium, and Luxembourg, too. (Not for German in Switzerland and
+ // Liechtenstein. For those you need to bundle the myspell dictionary.)
+ else if ([pLangStr isEqualToString:@"de_DE"])
+ {
+ const std::vector<NSString*> aDE
+ { @"AT", @"BE", @"DE", @"LU" };
+ for (auto c: aDE)
+ {
+ pLangStr = [@"de_" stringByAppendingString: c];
+ postspdict.push_back( pLangStr );
+ }
+ }
+#endif
+ else if ([pLangStr isEqualToString:@"en"])
+ {
+ // System has en_AU, en_CA, en_GB, and en_IN. Add the rest.
+ const std::vector<NSString*> aEN
+ { @"BW", @"BZ", @"GH", @"GM", @"IE", @"JM", @"MU", @"MW", @"MY", @"NA",
+ @"NZ", @"PH", @"TT", @"US", @"ZA", @"ZW" };
+ for (auto c: aEN)
+ {
+ pLangStr = [@"en_" stringByAppendingString: c];
+ postspdict.push_back( pLangStr );
+ }
+ }
+ else if ([pLangStr isEqualToString:@"en_JP"]
+ || [pLangStr isEqualToString:@"en_SG"])
+ {
+ // Just skip, LO doesn't have those yet in this context.
+ }
+ else if ([pLangStr isEqualToString:@"es"])
+ {
+ const std::vector<NSString*> aES
+ { @"AR", @"BO", @"CL", @"CO", @"CR", @"CU", @"DO", @"EC", @"ES", @"GT",
+ @"HN", @"MX", @"NI", @"PA", @"PE", @"PR", @"PY", @"SV", @"UY", @"VE" };
+ for (auto c: aES)
+ {
+ pLangStr = [@"es_" stringByAppendingString: c];
+ postspdict.push_back( pLangStr );
+ }
+ }
+ else if ([pLangStr isEqualToString:@"fi"])
+ {
+ postspdict.push_back( @"fi_FI" );
+ }
+ else if ([pLangStr isEqualToString:@"fr"])
+ {
+ const std::vector<NSString*> aFR
+ { @"BE", @"BF", @"BJ", @"CA", @"CH", @"CI", @"FR", @"LU", @"MC", @"ML",
+ @"MU", @"NE", @"SN", @"TG" };
+ for (auto c: aFR)
+ {
+ pLangStr = [@"fr_" stringByAppendingString: c];
+ postspdict.push_back( pLangStr );
+ }
+ }
+#ifdef IOS
+ else if ([pLangStr isEqualToString:@"fr_FR"])
+ {
+ const std::vector<NSString*> aFR
+ { @"BE", @"BF", @"BJ", @"CA", @"CH", @"CI", @"FR", @"LU", @"MC", @"ML",
+ @"MU", @"NE", @"SN", @"TG" };
+ for (auto c: aFR)
+ {
+ pLangStr = [@"fr_" stringByAppendingString: c];
+ postspdict.push_back( pLangStr );
+ }
+ }
+#endif
+ else if ([pLangStr isEqualToString:@"it"])
+ {
+ postspdict.push_back( @"it_CH" );
+ postspdict.push_back( @"it_IT" );
+ }
+#ifdef IOS
+ else if ([pLangStr isEqualToString:@"it_IT"])
+ {
+ const std::vector<NSString*> aIT
+ { @"CH", @"IT" };
+ for (auto c: aIT)
+ {
+ pLangStr = [@"it_" stringByAppendingString: c];
+ postspdict.push_back( pLangStr );
+ }
+ }
+#endif
+ else if ([pLangStr isEqualToString:@"ko"])
+ {
+ postspdict.push_back( @"ko_KR" );
+ }
+ else if ([pLangStr isEqualToString:@"nl"])
+ {
+ postspdict.push_back( @"nl_BE" );
+ postspdict.push_back( @"nl_NL" );
+ }
+ else if ([pLangStr isEqualToString:@"nb"])
+ {
+ postspdict.push_back( @"nb_NO" );
+ }
+ else if ([pLangStr isEqualToString:@"pl"])
+ {
+ postspdict.push_back( @"pl_PL" );
+ }
+ else if ([pLangStr isEqualToString:@"ru"])
+ {
+ postspdict.push_back( @"ru_RU" );
+ }
+ else if ([pLangStr isEqualToString:@"sv"])
+ {
+ postspdict.push_back( @"sv_FI" );
+ postspdict.push_back( @"sv_SE" );
+ }
+#ifdef IOS
+ else if ([pLangStr isEqualToString:@"sv_SE"])
+ {
+ postspdict.push_back( @"sv_FI" );
+ postspdict.push_back( @"sv_SE" );
+ }
+#endif
+ else if ([pLangStr isEqualToString:@"tr"])
+ {
+ postspdict.push_back( @"tr_TR" );
+ }
+ else
+ postspdict.push_back( pLangStr );
+ }
+ // System has pt_BR and pt_PT, add pt_AO.
+ postspdict.push_back( @"pt_AO" );
+
+ numshr = postspdict.size();
+
+ // we really should merge these and remove duplicates but since
+ // users can name their dictionaries anything they want it would
+ // be impossible to know if a real duplication exists unless we
+ // add some unique key to each myspell dictionary
+ numdict = numshr;
+
+ if (numdict) {
+ aDLocs = new Locale [numdict];
+ aDEncs = new rtl_TextEncoding [numdict];
+ aDNames = new OUString [numdict];
+ aSuppLocales.realloc(numdict);
+ Locale * pLocale = aSuppLocales.getArray();
+ int numlocs = 0;
+ int newloc;
+ int i,j;
+ int k = 0;
+
+ //first add the user dictionaries
+ //TODO for MAC?
+
+ // now add the shared dictionaries
+ for (i = 0; i < numshr; i++) {
+ NSDictionary *aLocDict = [ NSLocale componentsFromLocaleIdentifier:postspdict[i] ];
+ NSString* aLang = [ aLocDict objectForKey:NSLocaleLanguageCode ];
+ NSString* aCountry = [ aLocDict objectForKey:NSLocaleCountryCode ];
+ OUString lang([aLang cStringUsingEncoding: NSUTF8StringEncoding], [aLang length], aEnc);
+ OUString country([ aCountry cStringUsingEncoding: NSUTF8StringEncoding], [aCountry length], aEnc);
+ Locale nLoc( lang, country, OUString() );
+ newloc = 1;
+ //eliminate duplicates (is this needed for MacOS?)
+ for (j = 0; j < numlocs; j++) {
+ if (nLoc == pLocale[j]) newloc = 0;
+ }
+ if (newloc) {
+ pLocale[numlocs] = nLoc;
+ numlocs++;
+ }
+ aDLocs[k] = nLoc;
+ aDEncs[k] = 0;
+ k++;
+ }
+
+ aSuppLocales.realloc(numlocs);
+
+ } else {
+ /* no dictionary.lst found so register no dictionaries */
+ numdict = 0;
+ aDEncs = nullptr;
+ aDLocs = nullptr;
+ aDNames = nullptr;
+ aSuppLocales.realloc(0);
+ }
+ }
+
+ return aSuppLocales;
+}
+
+
+
+sal_Bool SAL_CALL MacSpellChecker::hasLocale(const Locale& rLocale)
+{
+ MutexGuard aGuard( GetLinguMutex() );
+
+ bool bRes = false;
+ if (!aSuppLocales.getLength())
+ getLocales();
+
+ sal_Int32 nLen = aSuppLocales.getLength();
+ for (sal_Int32 i = 0; i < nLen; ++i)
+ {
+ const Locale *pLocale = aSuppLocales.getConstArray();
+ if (rLocale == pLocale[i])
+ {
+ bRes = true;
+ break;
+ }
+ }
+ return bRes;
+}
+
+
+sal_Int16 MacSpellChecker::GetSpellFailure( const OUString &rWord, const Locale &rLocale )
+{
+ // initialize a myspell object for each dictionary once
+ // (note: mutex is held higher up in isValid)
+
+
+ sal_Int16 nRes = -1;
+
+ // first handle smart quotes both single and double
+ OUStringBuffer rBuf(rWord);
+ sal_Int32 n = rBuf.getLength();
+ sal_Unicode c;
+ for (sal_Int32 ix=0; ix < n; ix++) {
+ c = rBuf[ix];
+ if ((c == 0x201C) || (c == 0x201D)) rBuf[ix] = u'"';
+ if ((c == 0x2018) || (c == 0x2019)) rBuf[ix] = u'\'';
+ }
+ OUString nWord(rBuf.makeStringAndClear());
+
+ if (n)
+ {
+ NSAutoreleasePool* pool = [[NSAutoreleasePool alloc] init];
+ NSString* aNSStr = [[[NSString alloc] initWithCharacters: reinterpret_cast<unichar const *>(nWord.getStr()) length: nWord.getLength()]autorelease];
+ NSString* aLang = [[[NSString alloc] initWithCharacters: reinterpret_cast<unichar const *>(rLocale.Language.getStr()) length: rLocale.Language.getLength()]autorelease];
+ if(rLocale.Country.getLength()>0)
+ {
+ NSString* aCountry = [[[NSString alloc] initWithCharacters: reinterpret_cast<unichar const *>(rLocale.Country.getStr()) length: rLocale.Country.getLength()]autorelease];
+ NSString* aTaggedCountry = [@"_" stringByAppendingString:aCountry];
+ aLang = [aLang stringByAppendingString:aTaggedCountry];
+ }
+
+#ifdef MACOSX
+ NSInteger aCount;
+ NSRange range = [[NSSpellChecker sharedSpellChecker] checkSpellingOfString:aNSStr startingAt:0 language:aLang wrap:false inSpellDocumentWithTag:macTag wordCount:&aCount];
+#else
+ NSRange range = [pChecker rangeOfMisspelledWordInString:aNSStr range:NSMakeRange(0, [aNSStr length]) startingAt:0 wrap:NO language:aLang];
+#endif
+ int rVal = 0;
+ if(range.length>0)
+ {
+ rVal = -1;
+ }
+ else
+ {
+ rVal = 1;
+ }
+ [pool release];
+ if (rVal != 1)
+ {
+ nRes = SpellFailure::SPELLING_ERROR;
+ } else {
+ return -1;
+ }
+ }
+ return nRes;
+}
+
+
+
+sal_Bool SAL_CALL
+ MacSpellChecker::isValid( const OUString& rWord, const Locale& rLocale,
+ const css::uno::Sequence<PropertyValue>& rProperties )
+{
+ MutexGuard aGuard( GetLinguMutex() );
+
+ if (rLocale == Locale() || !rWord.getLength())
+ return true;
+
+ if (!hasLocale( rLocale ))
+ return true;
+
+ // Get property values to be used.
+ // These are be the default values set in the SN_LINGU_PROPERTIES
+ // PropertySet which are overridden by the supplied ones from the
+ // last argument.
+ // You'll probably like to use a simpler solution than the provided
+ // one using the PropertyHelper_Spell.
+
+ PropertyHelper_Spell &rHelper = GetPropHelper();
+ rHelper.SetTmpPropVals( rProperties );
+
+ sal_Int16 nFailure = GetSpellFailure( rWord, rLocale );
+ if (nFailure != -1)
+ {
+ LanguageType nLang = LinguLocaleToLanguage( rLocale );
+ // postprocess result for errors that should be ignored
+ if ( (!rHelper.IsSpellUpperCase() && IsUpper( rWord, nLang ))
+ || (!rHelper.IsSpellWithDigits() && HasDigits( rWord ))
+ || (!rHelper.IsSpellCapitalization()
+ && nFailure == SpellFailure::CAPTION_ERROR)
+ )
+ nFailure = -1;
+ }
+
+ return (nFailure == -1);
+}
+
+Reference< XSpellAlternatives >
+ MacSpellChecker::GetProposals( const OUString &rWord, const Locale &rLocale )
+{
+ // Retrieves the return values for the 'spell' function call in case
+ // of a misspelled word.
+ // Especially it may give a list of suggested (correct) words:
+
+ Reference< XSpellAlternatives > xRes;
+ // note: mutex is held by higher up by spell which covers both
+
+ LanguageType nLang = LinguLocaleToLanguage( rLocale );
+ int count;
+ Sequence< OUString > aStr( 0 );
+
+ // first handle smart quotes (single and double)
+ OUStringBuffer rBuf(rWord);
+ sal_Int32 n = rBuf.getLength();
+ sal_Unicode c;
+ for (sal_Int32 ix=0; ix < n; ix++) {
+ c = rBuf[ix];
+ if ((c == 0x201C) || (c == 0x201D)) rBuf[ix] = u'"';
+ if ((c == 0x2018) || (c == 0x2019)) rBuf[ix] = u'\'';
+ }
+ OUString nWord(rBuf.makeStringAndClear());
+
+ if (n)
+ {
+ NSAutoreleasePool* pool = [[NSAutoreleasePool alloc] init];
+ NSString* aNSStr = [[[NSString alloc] initWithCharacters: reinterpret_cast<unichar const *>(nWord.getStr()) length: nWord.getLength()]autorelease];
+ NSString* aLang = [[[NSString alloc] initWithCharacters: reinterpret_cast<unichar const *>(rLocale.Language.getStr()) length: rLocale.Language.getLength()]autorelease];
+ if(rLocale.Country.getLength()>0)
+ {
+ NSString* aCountry = [[[NSString alloc] initWithCharacters: reinterpret_cast<unichar const *>(rLocale.Country.getStr()) length: rLocale.Country.getLength()]autorelease];
+ NSString* aTaggedCountry = [@"_" stringByAppendingString:aCountry];
+ aLang = [aLang stringByAppendingString:aTaggedCountry];
+ }
+#ifdef MACOSX
+ [[NSSpellChecker sharedSpellChecker] setLanguage:aLang];
+ NSArray *guesses = [[NSSpellChecker sharedSpellChecker] guessesForWordRange:NSMakeRange(0, [aNSStr length]) inString:aNSStr language:aLang inSpellDocumentWithTag:0];
+ (void) this; // avoid loplugin:staticmethods, the !MACOSX case uses 'this'
+#else
+ NSArray *guesses = [pChecker guessesForWordRange:NSMakeRange(0, [aNSStr length]) inString:aNSStr language:aLang];
+#endif
+ count = [guesses count];
+ if (count)
+ {
+ aStr.realloc( count );
+ OUString *pStr = aStr.getArray();
+ for (int ii=0; ii < count; ii++)
+ {
+ // if needed add: if (suglst[ii] == NULL) continue;
+ NSString* guess = [guesses objectAtIndex:ii];
+ OUString cvtwrd(reinterpret_cast<const sal_Unicode*>([guess cStringUsingEncoding:NSUnicodeStringEncoding]), static_cast<sal_Int32>([guess length]));
+ pStr[ii] = cvtwrd;
+ }
+ }
+ [pool release];
+ }
+
+ // now return an empty alternative for no suggestions or the list of alternatives if some found
+ rtl::Reference<SpellAlternatives> pAlt = new SpellAlternatives;
+ pAlt->SetWordLanguage( rWord, nLang );
+ pAlt->SetFailureType( SpellFailure::SPELLING_ERROR );
+ pAlt->SetAlternatives( aStr );
+ xRes = pAlt;
+ return xRes;
+
+}
+
+Reference< XSpellAlternatives > SAL_CALL
+ MacSpellChecker::spell( const OUString& rWord, const Locale& rLocale,
+ const css::uno::Sequence<PropertyValue>& rProperties )
+{
+ MutexGuard aGuard( GetLinguMutex() );
+
+ if (rLocale == Locale() || !rWord.getLength())
+ return nullptr;
+
+ if (!hasLocale( rLocale ))
+ return nullptr;
+
+ Reference< XSpellAlternatives > xAlt;
+ if (!isValid( rWord, rLocale, rProperties ))
+ {
+ xAlt = GetProposals( rWord, rLocale );
+ }
+ return xAlt;
+}
+
+sal_Bool SAL_CALL
+ MacSpellChecker::addLinguServiceEventListener(
+ const Reference< XLinguServiceEventListener >& rxLstnr )
+{
+ MutexGuard aGuard( GetLinguMutex() );
+
+ bool bRes = false;
+ if (!bDisposing && rxLstnr.is())
+ {
+ bRes = GetPropHelper().addLinguServiceEventListener( rxLstnr );
+ }
+ return bRes;
+}
+
+
+sal_Bool SAL_CALL
+ MacSpellChecker::removeLinguServiceEventListener(
+ const Reference< XLinguServiceEventListener >& rxLstnr )
+{
+ MutexGuard aGuard( GetLinguMutex() );
+
+ bool bRes = false;
+ if (!bDisposing && rxLstnr.is())
+ {
+ DBG_ASSERT( xPropHelper.is(), "xPropHelper non existent" );
+ bRes = GetPropHelper().removeLinguServiceEventListener( rxLstnr );
+ }
+ return bRes;
+}
+
+
+OUString SAL_CALL
+ MacSpellChecker::getServiceDisplayName( const Locale& /*rLocale*/ )
+{
+ MutexGuard aGuard( GetLinguMutex() );
+ return "macOS Spell Checker";
+}
+
+
+void SAL_CALL
+ MacSpellChecker::initialize( const Sequence< Any >& rArguments )
+{
+ MutexGuard aGuard( GetLinguMutex() );
+
+ if (!xPropHelper.is())
+ {
+ sal_Int32 nLen = rArguments.getLength();
+ if (2 == nLen)
+ {
+ Reference< XLinguProperties > xPropSet;
+ rArguments.getConstArray()[0] >>= xPropSet;
+ //rArguments.getConstArray()[1] >>= xDicList;
+
+ //! Pointer allows for access of the non-UNO functions.
+ //! And the reference to the UNO-functions while increasing
+ //! the ref-count and will implicitly free the memory
+ //! when the object is no longer used.
+ xPropHelper = new PropertyHelper_Spell( static_cast<XSpellChecker *>(this), xPropSet );
+ xPropHelper->AddAsPropListener();
+ }
+ else
+ OSL_FAIL( "wrong number of arguments in sequence" );
+
+ }
+}
+
+
+void SAL_CALL
+ MacSpellChecker::dispose()
+{
+ MutexGuard aGuard( GetLinguMutex() );
+
+ if (!bDisposing)
+ {
+ bDisposing = true;
+ EventObject aEvtObj( static_cast<XSpellChecker *>(this) );
+ aEvtListeners.disposeAndClear( aEvtObj );
+ }
+}
+
+
+void SAL_CALL
+ MacSpellChecker::addEventListener( const Reference< XEventListener >& rxListener )
+{
+ MutexGuard aGuard( GetLinguMutex() );
+
+ if (!bDisposing && rxListener.is())
+ aEvtListeners.addInterface( rxListener );
+}
+
+
+void SAL_CALL
+ MacSpellChecker::removeEventListener( const Reference< XEventListener >& rxListener )
+{
+ MutexGuard aGuard( GetLinguMutex() );
+
+ if (!bDisposing && rxListener.is())
+ aEvtListeners.removeInterface( rxListener );
+}
+
+// Service specific part
+OUString SAL_CALL MacSpellChecker::getImplementationName()
+{
+ return "org.openoffice.lingu.MacOSXSpellChecker";
+}
+
+sal_Bool SAL_CALL MacSpellChecker::supportsService( const OUString& ServiceName )
+{
+ return cppu::supportsService(this, ServiceName);
+}
+
+Sequence< OUString > SAL_CALL MacSpellChecker::getSupportedServiceNames()
+{
+ return { SN_SPELLCHECKER };
+}
+
+extern "C" SAL_DLLPUBLIC_EXPORT css::uno::XInterface*
+lingucomponent_MacSpellChecker_get_implementation(
+ css::uno::XComponentContext* , css::uno::Sequence<css::uno::Any> const&)
+{
+ return cppu::acquire(new MacSpellChecker());
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/lingucomponent/source/spellcheck/spell/spell.component b/lingucomponent/source/spellcheck/spell/spell.component
new file mode 100644
index 000000000..c284e13fc
--- /dev/null
+++ b/lingucomponent/source/spellcheck/spell/spell.component
@@ -0,0 +1,26 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ -->
+
+<component loader="com.sun.star.loader.SharedLibrary" environment="@CPPU_ENV@"
+ xmlns="http://openoffice.org/2010/uno-components">
+ <implementation name="org.openoffice.lingu.MySpellSpellChecker"
+ constructor="lingucomponent_SpellChecker_get_implementation" single-instance="true">
+ <service name="com.sun.star.linguistic2.SpellChecker"/>
+ </implementation>
+</component>
diff --git a/lingucomponent/source/spellcheck/spell/sspellimp.cxx b/lingucomponent/source/spellcheck/spell/sspellimp.cxx
new file mode 100644
index 000000000..95b264157
--- /dev/null
+++ b/lingucomponent/source/spellcheck/spell/sspellimp.cxx
@@ -0,0 +1,635 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+#include <com/sun/star/uno/Reference.h>
+
+#include <com/sun/star/linguistic2/SpellFailure.hpp>
+#include <com/sun/star/linguistic2/XLinguProperties.hpp>
+#include <comphelper/lok.hxx>
+#include <comphelper/processfactory.hxx>
+#include <cppuhelper/supportsservice.hxx>
+#include <cppuhelper/weak.hxx>
+#include <com/sun/star/lang/XMultiServiceFactory.hpp>
+#include <tools/debug.hxx>
+#include <osl/mutex.hxx>
+#include <osl/thread.h>
+#include <com/sun/star/ucb/XSimpleFileAccess.hpp>
+
+#include <lingutil.hxx>
+#include <hunspell.hxx>
+#include "sspellimp.hxx"
+
+#include <linguistic/misc.hxx>
+#include <linguistic/spelldta.hxx>
+#include <i18nlangtag/languagetag.hxx>
+#include <svtools/strings.hrc>
+#include <unotools/lingucfg.hxx>
+#include <unotools/resmgr.hxx>
+#include <osl/file.hxx>
+#include <rtl/ustrbuf.hxx>
+#include <rtl/textenc.h>
+#include <sal/log.hxx>
+
+#include <numeric>
+#include <utility>
+#include <vector>
+#include <set>
+#include <string.h>
+
+using namespace utl;
+using namespace osl;
+using namespace com::sun::star;
+using namespace com::sun::star::beans;
+using namespace com::sun::star::lang;
+using namespace com::sun::star::uno;
+using namespace com::sun::star::linguistic2;
+using namespace linguistic;
+
+// XML-header of SPELLML queries
+#if !defined SPELL_XML
+constexpr OUStringLiteral SPELL_XML = u"<?xml?>";
+#endif
+
+// only available in hunspell >= 1.5
+#if !defined MAXWORDLEN
+#define MAXWORDLEN 176
+#endif
+
+SpellChecker::SpellChecker() :
+ m_aEvtListeners(GetLinguMutex()),
+ m_bDisposing(false)
+{
+}
+
+SpellChecker::DictItem::DictItem(OUString i_DName, Locale i_DLoc, rtl_TextEncoding i_DEnc)
+ : m_aDName(std::move(i_DName))
+ , m_aDLoc(std::move(i_DLoc))
+ , m_aDEnc(i_DEnc)
+{
+}
+
+SpellChecker::~SpellChecker()
+{
+ if (m_pPropHelper)
+ {
+ m_pPropHelper->RemoveAsPropListener();
+ }
+}
+
+PropertyHelper_Spelling & SpellChecker::GetPropHelper_Impl()
+{
+ if (!m_pPropHelper)
+ {
+ Reference< XLinguProperties > xPropSet = GetLinguProperties();
+
+ m_pPropHelper.reset( new PropertyHelper_Spelling( static_cast<XSpellChecker *>(this), xPropSet ) );
+ m_pPropHelper->AddAsPropListener(); //! after a reference is established
+ }
+ return *m_pPropHelper;
+}
+
+Sequence< Locale > SAL_CALL SpellChecker::getLocales()
+{
+ MutexGuard aGuard( GetLinguMutex() );
+
+ // this routine should return the locales supported by the installed
+ // dictionaries.
+ if (m_DictItems.empty())
+ {
+ SvtLinguConfig aLinguCfg;
+
+ // get list of extension dictionaries-to-use
+ // (or better speaking: the list of dictionaries using the
+ // new configuration entries).
+ std::vector< SvtLinguConfigDictionaryEntry > aDics;
+ uno::Sequence< OUString > aFormatList;
+ aLinguCfg.GetSupportedDictionaryFormatsFor( "SpellCheckers",
+ "org.openoffice.lingu.MySpellSpellChecker", aFormatList );
+ for (auto const& format : std::as_const(aFormatList))
+ {
+ std::vector< SvtLinguConfigDictionaryEntry > aTmpDic(
+ aLinguCfg.GetActiveDictionariesByFormat(format) );
+ aDics.insert( aDics.end(), aTmpDic.begin(), aTmpDic.end() );
+ }
+
+ //!! for compatibility with old dictionaries (the ones not using extensions
+ //!! or new configuration entries, but still using the dictionary.lst file)
+ //!! Get the list of old style spell checking dictionaries to use...
+ std::vector< SvtLinguConfigDictionaryEntry > aOldStyleDics(
+ GetOldStyleDics( "DICT" ) );
+
+ // to prefer dictionaries with configuration entries we will only
+ // use those old style dictionaries that add a language that
+ // is not yet supported by the list of new style dictionaries
+ MergeNewStyleDicsAndOldStyleDics( aDics, aOldStyleDics );
+
+ if (!aDics.empty())
+ {
+ uno::Reference< lang::XMultiServiceFactory > xServiceFactory(comphelper::getProcessServiceFactory());
+ uno::Reference< ucb::XSimpleFileAccess > xAccess(xServiceFactory->createInstance("com.sun.star.ucb.SimpleFileAccess"), uno::UNO_QUERY);
+ // get supported locales from the dictionaries-to-use...
+ std::set<OUString> aLocaleNamesSet;
+ for (auto const& dict : aDics)
+ {
+ const uno::Sequence< OUString > aLocaleNames( dict.aLocaleNames );
+ uno::Sequence< OUString > aLocations( dict.aLocations );
+ SAL_WARN_IF(
+ aLocaleNames.hasElements() && !aLocations.hasElements(),
+ "lingucomponent", "no locations");
+ if (aLocations.hasElements())
+ {
+ if (xAccess.is() && xAccess->exists(aLocations[0]))
+ {
+ for (auto const& locale : aLocaleNames)
+ {
+ if (!comphelper::LibreOfficeKit::isAllowlistedLanguage(locale))
+ continue;
+
+ aLocaleNamesSet.insert(locale);
+ }
+ }
+ else
+ {
+ SAL_WARN(
+ "lingucomponent",
+ "missing <" << aLocations[0] << ">");
+ }
+ }
+ }
+ // ... and add them to the resulting sequence
+ m_aSuppLocales.realloc( aLocaleNamesSet.size() );
+ std::transform(
+ aLocaleNamesSet.begin(), aLocaleNamesSet.end(), m_aSuppLocales.getArray(),
+ [](auto const& localeName) { return LanguageTag::convertToLocale(localeName); });
+
+ //! For each dictionary and each locale we need a separate entry.
+ //! If this results in more than one dictionary per locale than (for now)
+ //! it is undefined which dictionary gets used.
+ //! In the future the implementation should support using several dictionaries
+ //! for one locale.
+ sal_uInt32 nDictSize = std::accumulate(aDics.begin(), aDics.end(), sal_uInt32(0),
+ [](const sal_uInt32 nSum, const SvtLinguConfigDictionaryEntry& dict) {
+ return nSum + dict.aLocaleNames.getLength(); });
+
+ // add dictionary information
+ m_DictItems.reserve(nDictSize);
+ for (auto const& dict : aDics)
+ {
+ if (dict.aLocaleNames.hasElements() &&
+ dict.aLocations.hasElements())
+ {
+ const uno::Sequence< OUString > aLocaleNames( dict.aLocaleNames );
+
+ // currently only one language per dictionary is supported in the actual implementation...
+ // Thus here we work-around this by adding the same dictionary several times.
+ // Once for each of its supported locales.
+ for (auto const& localeName : aLocaleNames)
+ {
+ // also both files have to be in the same directory and the
+ // file names must only differ in the extension (.aff/.dic).
+ // Thus we use the first location only and strip the extension part.
+ OUString aLocation = dict.aLocations[0];
+ sal_Int32 nPos = aLocation.lastIndexOf( '.' );
+ aLocation = aLocation.copy( 0, nPos );
+
+ m_DictItems.emplace_back(aLocation, LanguageTag::convertToLocale(localeName), RTL_TEXTENCODING_DONTKNOW);
+ }
+ }
+ }
+ DBG_ASSERT( nDictSize == m_DictItems.size(), "index mismatch?" );
+ }
+ else
+ {
+ // no dictionary found so register no dictionaries
+ m_aSuppLocales.realloc(0);
+ }
+ }
+
+ return m_aSuppLocales;
+}
+
+sal_Bool SAL_CALL SpellChecker::hasLocale(const Locale& rLocale)
+{
+ MutexGuard aGuard( GetLinguMutex() );
+
+ bool bRes = false;
+ if (!m_aSuppLocales.hasElements())
+ getLocales();
+
+ for (auto const& suppLocale : std::as_const(m_aSuppLocales))
+ {
+ if (rLocale == suppLocale)
+ {
+ bRes = true;
+ break;
+ }
+ }
+ return bRes;
+}
+
+sal_Int16 SpellChecker::GetSpellFailure(const OUString &rWord, const Locale &rLocale)
+{
+ if (rWord.getLength() > MAXWORDLEN)
+ return -1;
+
+ Hunspell * pMS = nullptr;
+ rtl_TextEncoding eEnc = RTL_TEXTENCODING_DONTKNOW;
+
+ // initialize a myspell object for each dictionary once
+ // (note: mutex is held higher up in isValid)
+
+ sal_Int16 nRes = -1;
+
+ // first handle smart quotes both single and double
+ OUStringBuffer rBuf(rWord);
+ sal_Int32 n = rBuf.getLength();
+ sal_Unicode c;
+ sal_Int32 extrachar = 0;
+
+ for (sal_Int32 ix=0; ix < n; ix++)
+ {
+ c = rBuf[ix];
+ if ((c == 0x201C) || (c == 0x201D))
+ rBuf[ix] = u'"';
+ else if ((c == 0x2018) || (c == 0x2019))
+ rBuf[ix] = u'\'';
+
+ // recognize words with Unicode ligatures and ZWNJ/ZWJ characters (only
+ // with 8-bit encoded dictionaries. For UTF-8 encoded dictionaries
+ // set ICONV and IGNORE aff file options, if needed.)
+ else if ((c == 0x200C) || (c == 0x200D) ||
+ ((c >= 0xFB00) && (c <= 0xFB04)))
+ extrachar = 1;
+ }
+ OUString nWord(rBuf.makeStringAndClear());
+
+ if (n)
+ {
+ for (auto& currDict : m_DictItems)
+ {
+ pMS = nullptr;
+ eEnc = RTL_TEXTENCODING_DONTKNOW;
+
+ if (rLocale == currDict.m_aDLoc)
+ {
+ if (!currDict.m_pDict)
+ {
+ OUString dicpath = currDict.m_aDName + ".dic";
+ OUString affpath = currDict.m_aDName + ".aff";
+ OUString dict;
+ OUString aff;
+ osl::FileBase::getSystemPathFromFileURL(dicpath,dict);
+ osl::FileBase::getSystemPathFromFileURL(affpath,aff);
+#if defined(_WIN32)
+ // workaround for Windows specific problem that the
+ // path length in calls to 'fopen' is limited to somewhat
+ // about 120+ characters which will usually be exceed when
+ // using dictionaries as extensions. (Hunspell waits UTF-8 encoded
+ // path with \\?\ long path prefix.)
+ OString aTmpaff = Win_AddLongPathPrefix(OUStringToOString(aff, RTL_TEXTENCODING_UTF8));
+ OString aTmpdict = Win_AddLongPathPrefix(OUStringToOString(dict, RTL_TEXTENCODING_UTF8));
+#else
+ OString aTmpaff(OU2ENC(aff,osl_getThreadTextEncoding()));
+ OString aTmpdict(OU2ENC(dict,osl_getThreadTextEncoding()));
+#endif
+
+ currDict.m_pDict = std::make_unique<Hunspell>(aTmpaff.getStr(),aTmpdict.getStr());
+#if defined(H_DEPRECATED)
+ currDict.m_aDEnc = getTextEncodingFromCharset(currDict.m_pDict->get_dict_encoding().c_str());
+#else
+ currDict.m_aDEnc = getTextEncodingFromCharset(currDict.m_pDict->get_dic_encoding());
+#endif
+ }
+ pMS = currDict.m_pDict.get();
+ eEnc = currDict.m_aDEnc;
+ }
+
+ if (pMS)
+ {
+ // we don't want to work with a default text encoding since following incorrect
+ // results may occur only for specific text and thus may be hard to notice.
+ // Thus better always make a clean exit here if the text encoding is in question.
+ // Hopefully something not working at all will raise proper attention quickly. ;-)
+ DBG_ASSERT( eEnc != RTL_TEXTENCODING_DONTKNOW, "failed to get text encoding! (maybe incorrect encoding string in file)" );
+ if (eEnc == RTL_TEXTENCODING_DONTKNOW)
+ return -1;
+
+ OString aWrd(OU2ENC(nWord,eEnc));
+#if defined(H_DEPRECATED)
+ bool bVal = pMS->spell(std::string(aWrd.getStr()));
+#else
+ bool bVal = pMS->spell(aWrd.getStr()) != 0;
+#endif
+ if (!bVal) {
+ if (extrachar && (eEnc != RTL_TEXTENCODING_UTF8)) {
+ OUStringBuffer aBuf(nWord);
+ n = aBuf.getLength();
+ for (sal_Int32 ix=n-1; ix >= 0; ix--)
+ {
+ switch (aBuf[ix]) {
+ case 0xFB00: aBuf.remove(ix, 1); aBuf.insert(ix, "ff"); break;
+ case 0xFB01: aBuf.remove(ix, 1); aBuf.insert(ix, "fi"); break;
+ case 0xFB02: aBuf.remove(ix, 1); aBuf.insert(ix, "fl"); break;
+ case 0xFB03: aBuf.remove(ix, 1); aBuf.insert(ix, "ffi"); break;
+ case 0xFB04: aBuf.remove(ix, 1); aBuf.insert(ix, "ffl"); break;
+ case 0x200C:
+ case 0x200D: aBuf.remove(ix, 1); break;
+ }
+ }
+ OUString aWord(aBuf.makeStringAndClear());
+ OString bWrd(OU2ENC(aWord, eEnc));
+#if defined(H_DEPRECATED)
+ bVal = pMS->spell(std::string(bWrd.getStr()));
+#else
+ bVal = pMS->spell(bWrd.getStr()) != 0;
+#endif
+ if (bVal) return -1;
+ }
+ nRes = SpellFailure::SPELLING_ERROR;
+ } else {
+ return -1;
+ }
+ pMS = nullptr;
+ }
+ }
+ }
+
+ return nRes;
+}
+
+sal_Bool SAL_CALL SpellChecker::isValid( const OUString& rWord, const Locale& rLocale,
+ const css::uno::Sequence< css::beans::PropertyValue >& rProperties )
+{
+ MutexGuard aGuard( GetLinguMutex() );
+
+ if (rLocale == Locale() || rWord.isEmpty())
+ return true;
+
+ if (!hasLocale( rLocale ))
+ return true;
+
+ // return sal_False to process SPELLML requests (they are longer than the header)
+ if (rWord.match(SPELL_XML, 0) && (rWord.getLength() > 10)) return false;
+
+ // Get property values to be used.
+ // These are be the default values set in the SN_LINGU_PROPERTIES
+ // PropertySet which are overridden by the supplied ones from the
+ // last argument.
+ // You'll probably like to use a simpler solution than the provided
+ // one using the PropertyHelper_Spell.
+ PropertyHelper_Spelling& rHelper = GetPropHelper();
+ rHelper.SetTmpPropVals( rProperties );
+
+ sal_Int16 nFailure = GetSpellFailure( rWord, rLocale );
+ if (nFailure != -1 && !rWord.match(SPELL_XML, 0))
+ {
+ LanguageType nLang = LinguLocaleToLanguage( rLocale );
+ // postprocess result for errors that should be ignored
+ const bool bIgnoreError =
+ (!rHelper.IsSpellUpperCase() && IsUpper( rWord, nLang )) ||
+ (!rHelper.IsSpellWithDigits() && HasDigits( rWord )) ||
+ (!rHelper.IsSpellCapitalization() && nFailure == SpellFailure::CAPTION_ERROR);
+ if (bIgnoreError)
+ nFailure = -1;
+ }
+
+ return (nFailure == -1);
+}
+
+Reference< XSpellAlternatives >
+ SpellChecker::GetProposals( const OUString &rWord, const Locale &rLocale )
+{
+ // Retrieves the return values for the 'spell' function call in case
+ // of a misspelled word.
+ // Especially it may give a list of suggested (correct) words:
+ Reference< XSpellAlternatives > xRes;
+ // note: mutex is held by higher up by spell which covers both
+
+ Hunspell* pMS = nullptr;
+ rtl_TextEncoding eEnc = RTL_TEXTENCODING_DONTKNOW;
+
+ // first handle smart quotes (single and double)
+ OUStringBuffer rBuf(rWord);
+ sal_Int32 n = rBuf.getLength();
+ sal_Unicode c;
+ for (sal_Int32 ix=0; ix < n; ix++)
+ {
+ c = rBuf[ix];
+ if ((c == 0x201C) || (c == 0x201D))
+ rBuf[ix] = u'"';
+ if ((c == 0x2018) || (c == 0x2019))
+ rBuf[ix] = u'\'';
+ }
+ OUString nWord(rBuf.makeStringAndClear());
+
+ if (n)
+ {
+ LanguageType nLang = LinguLocaleToLanguage( rLocale );
+ int numsug = 0;
+
+ Sequence< OUString > aStr( 0 );
+ for (const auto& currDict : m_DictItems)
+ {
+ pMS = nullptr;
+ eEnc = RTL_TEXTENCODING_DONTKNOW;
+
+ if (rLocale == currDict.m_aDLoc)
+ {
+ pMS = currDict.m_pDict.get();
+ eEnc = currDict.m_aDEnc;
+ }
+
+ if (pMS)
+ {
+ OString aWrd(OU2ENC(nWord,eEnc));
+#if defined(H_DEPRECATED)
+ std::vector<std::string> suglst = pMS->suggest(std::string(aWrd.getStr()));
+ if (!suglst.empty())
+ {
+ aStr.realloc(numsug + suglst.size());
+ OUString *pStr = aStr.getArray();
+ for (size_t ii = 0; ii < suglst.size(); ++ii)
+ {
+ OUString cvtwrd(suglst[ii].c_str(), suglst[ii].size(), eEnc);
+ pStr[numsug + ii] = cvtwrd;
+ }
+ numsug += suglst.size();
+ }
+#else
+ char ** suglst = nullptr;
+ int count = pMS->suggest(&suglst, aWrd.getStr());
+ if (count)
+ {
+ aStr.realloc( numsug + count );
+ OUString *pStr = aStr.getArray();
+ for (int ii=0; ii < count; ++ii)
+ {
+ OUString cvtwrd(suglst[ii],strlen(suglst[ii]),eEnc);
+ pStr[numsug + ii] = cvtwrd;
+ }
+ numsug += count;
+ }
+ pMS->free_list(&suglst, count);
+#endif
+ }
+ }
+
+ // now return an empty alternative for no suggestions or the list of alternatives if some found
+ xRes = SpellAlternatives::CreateSpellAlternatives( rWord, nLang, SpellFailure::SPELLING_ERROR, aStr );
+ return xRes;
+ }
+ return xRes;
+}
+
+Reference< XSpellAlternatives > SAL_CALL SpellChecker::spell(
+ const OUString& rWord, const Locale& rLocale,
+ const css::uno::Sequence< css::beans::PropertyValue >& rProperties )
+{
+ MutexGuard aGuard( GetLinguMutex() );
+
+ if (rLocale == Locale() || rWord.isEmpty())
+ return nullptr;
+
+ if (!hasLocale( rLocale ))
+ return nullptr;
+
+ Reference< XSpellAlternatives > xAlt;
+ if (!isValid( rWord, rLocale, rProperties ))
+ {
+ xAlt = GetProposals( rWord, rLocale );
+ }
+ return xAlt;
+}
+
+sal_Bool SAL_CALL SpellChecker::addLinguServiceEventListener(
+ const Reference< XLinguServiceEventListener >& rxLstnr )
+{
+ MutexGuard aGuard( GetLinguMutex() );
+
+ bool bRes = false;
+ if (!m_bDisposing && rxLstnr.is())
+ {
+ bRes = GetPropHelper().addLinguServiceEventListener( rxLstnr );
+ }
+ return bRes;
+}
+
+sal_Bool SAL_CALL SpellChecker::removeLinguServiceEventListener(
+ const Reference< XLinguServiceEventListener >& rxLstnr )
+{
+ MutexGuard aGuard( GetLinguMutex() );
+
+ bool bRes = false;
+ if (!m_bDisposing && rxLstnr.is())
+ {
+ bRes = GetPropHelper().removeLinguServiceEventListener( rxLstnr );
+ }
+ return bRes;
+}
+
+OUString SAL_CALL SpellChecker::getServiceDisplayName(const Locale& rLocale)
+{
+ std::locale loc(Translate::Create("svt", LanguageTag(rLocale)));
+ return Translate::get(STR_DESCRIPTION_HUNSPELL, loc);
+}
+
+void SAL_CALL SpellChecker::initialize( const Sequence< Any >& rArguments )
+{
+ MutexGuard aGuard( GetLinguMutex() );
+
+ if (m_pPropHelper)
+ return;
+
+ sal_Int32 nLen = rArguments.getLength();
+ if (2 == nLen)
+ {
+ Reference< XLinguProperties > xPropSet;
+ rArguments.getConstArray()[0] >>= xPropSet;
+ // rArguments.getConstArray()[1] >>= xDicList;
+
+ //! Pointer allows for access of the non-UNO functions.
+ //! And the reference to the UNO-functions while increasing
+ //! the ref-count and will implicitly free the memory
+ //! when the object is no longer used.
+ m_pPropHelper.reset( new PropertyHelper_Spelling( static_cast<XSpellChecker *>(this), xPropSet ) );
+ m_pPropHelper->AddAsPropListener(); //! after a reference is established
+ }
+ else {
+ OSL_FAIL( "wrong number of arguments in sequence" );
+ }
+}
+
+void SAL_CALL SpellChecker::dispose()
+{
+ MutexGuard aGuard( GetLinguMutex() );
+
+ if (!m_bDisposing)
+ {
+ m_bDisposing = true;
+ EventObject aEvtObj( static_cast<XSpellChecker *>(this) );
+ m_aEvtListeners.disposeAndClear( aEvtObj );
+ if (m_pPropHelper)
+ {
+ m_pPropHelper->RemoveAsPropListener();
+ m_pPropHelper.reset();
+ }
+ }
+}
+
+void SAL_CALL SpellChecker::addEventListener( const Reference< XEventListener >& rxListener )
+{
+ MutexGuard aGuard( GetLinguMutex() );
+
+ if (!m_bDisposing && rxListener.is())
+ m_aEvtListeners.addInterface( rxListener );
+}
+
+void SAL_CALL SpellChecker::removeEventListener( const Reference< XEventListener >& rxListener )
+{
+ MutexGuard aGuard( GetLinguMutex() );
+
+ if (!m_bDisposing && rxListener.is())
+ m_aEvtListeners.removeInterface( rxListener );
+}
+
+// Service specific part
+OUString SAL_CALL SpellChecker::getImplementationName()
+{
+ return "org.openoffice.lingu.MySpellSpellChecker";
+}
+
+sal_Bool SAL_CALL SpellChecker::supportsService( const OUString& ServiceName )
+{
+ return cppu::supportsService(this, ServiceName);
+}
+
+Sequence< OUString > SAL_CALL SpellChecker::getSupportedServiceNames()
+{
+ return { SN_SPELLCHECKER };
+}
+
+extern "C" SAL_DLLPUBLIC_EXPORT css::uno::XInterface*
+lingucomponent_SpellChecker_get_implementation(
+ css::uno::XComponentContext* , css::uno::Sequence<css::uno::Any> const&)
+{
+ return cppu::acquire(new SpellChecker());
+}
+
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/lingucomponent/source/spellcheck/spell/sspellimp.hxx b/lingucomponent/source/spellcheck/spell/sspellimp.hxx
new file mode 100644
index 000000000..000f1756f
--- /dev/null
+++ b/lingucomponent/source/spellcheck/spell/sspellimp.hxx
@@ -0,0 +1,120 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+#ifndef INCLUDED_LINGUCOMPONENT_SOURCE_SPELLCHECK_SPELL_SSPELLIMP_HXX
+#define INCLUDED_LINGUCOMPONENT_SOURCE_SPELLCHECK_SPELL_SSPELLIMP_HXX
+
+#include <comphelper/interfacecontainer3.hxx>
+#include <cppuhelper/implbase.hxx>
+#include <com/sun/star/lang/XComponent.hpp>
+#include <com/sun/star/lang/XInitialization.hpp>
+#include <com/sun/star/lang/XServiceDisplayName.hpp>
+#include <com/sun/star/beans/PropertyValue.hpp>
+#include <com/sun/star/lang/XServiceInfo.hpp>
+#include <com/sun/star/linguistic2/XSpellChecker.hpp>
+#include <com/sun/star/linguistic2/XLinguServiceEventBroadcaster.hpp>
+
+#include <linguistic/lngprophelp.hxx>
+
+#include <memory>
+
+#include <hunspell.hxx>
+
+using namespace ::com::sun::star::uno;
+using namespace ::com::sun::star::beans;
+using namespace ::com::sun::star::lang;
+using namespace ::com::sun::star::linguistic2;
+
+class SpellChecker :
+ public cppu::WeakImplHelper
+ <
+ XSpellChecker,
+ XLinguServiceEventBroadcaster,
+ XInitialization,
+ XComponent,
+ XServiceInfo,
+ XServiceDisplayName
+ >
+{
+ struct DictItem
+ {
+ OUString m_aDName;
+ Locale m_aDLoc;
+ std::unique_ptr<Hunspell> m_pDict;
+ rtl_TextEncoding m_aDEnc;
+
+ DictItem(OUString i_DName, Locale i_DLoc, rtl_TextEncoding i_DEnc);
+ };
+
+ std::vector<DictItem> m_DictItems;
+
+ Sequence< Locale > m_aSuppLocales;
+
+ ::comphelper::OInterfaceContainerHelper3<XEventListener> m_aEvtListeners;
+ std::unique_ptr<linguistic::PropertyHelper_Spelling> m_pPropHelper;
+ bool m_bDisposing;
+
+ SpellChecker(const SpellChecker &) = delete;
+ SpellChecker & operator = (const SpellChecker &) = delete;
+
+ linguistic::PropertyHelper_Spelling& GetPropHelper_Impl();
+ linguistic::PropertyHelper_Spelling& GetPropHelper()
+ {
+ return m_pPropHelper ? *m_pPropHelper : GetPropHelper_Impl();
+ }
+
+ sal_Int16 GetSpellFailure( const OUString &rWord, const Locale &rLocale );
+ Reference< XSpellAlternatives > GetProposals( const OUString &rWord, const Locale &rLocale );
+
+public:
+ SpellChecker();
+ virtual ~SpellChecker() override;
+
+ // XSupportedLocales (for XSpellChecker)
+ virtual Sequence< Locale > SAL_CALL getLocales() override;
+ virtual sal_Bool SAL_CALL hasLocale( const Locale& rLocale ) override;
+
+ // XSpellChecker
+ virtual sal_Bool SAL_CALL isValid( const OUString& rWord, const Locale& rLocale, const css::uno::Sequence< css::beans::PropertyValue >& rProperties ) override;
+ virtual Reference< XSpellAlternatives > SAL_CALL spell( const OUString& rWord, const Locale& rLocale, const css::uno::Sequence< css::beans::PropertyValue >& rProperties ) override;
+
+ // XLinguServiceEventBroadcaster
+ virtual sal_Bool SAL_CALL addLinguServiceEventListener( const Reference< XLinguServiceEventListener >& rxLstnr ) override;
+ virtual sal_Bool SAL_CALL removeLinguServiceEventListener( const Reference< XLinguServiceEventListener >& rxLstnr ) override;
+
+ // XServiceDisplayName
+ virtual OUString SAL_CALL getServiceDisplayName( const Locale& rLocale ) override;
+
+ // XInitialization
+ virtual void SAL_CALL initialize( const Sequence< Any >& rArguments ) override;
+
+ // XComponent
+ virtual void SAL_CALL dispose() override;
+ virtual void SAL_CALL addEventListener( const Reference< XEventListener >& rxListener ) override;
+ virtual void SAL_CALL removeEventListener( const Reference< XEventListener >& rxListener ) override;
+
+ // XServiceInfo
+ virtual OUString SAL_CALL getImplementationName() override;
+ virtual sal_Bool SAL_CALL supportsService( const OUString& rServiceName ) override;
+ virtual Sequence< OUString > SAL_CALL getSupportedServiceNames() override;
+};
+
+#endif
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/lingucomponent/source/thesaurus/libnth/lnth.component b/lingucomponent/source/thesaurus/libnth/lnth.component
new file mode 100644
index 000000000..66e90e2cf
--- /dev/null
+++ b/lingucomponent/source/thesaurus/libnth/lnth.component
@@ -0,0 +1,26 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ -->
+
+<component loader="com.sun.star.loader.SharedLibrary" environment="@CPPU_ENV@"
+ xmlns="http://openoffice.org/2010/uno-components">
+ <implementation name="org.openoffice.lingu.new.Thesaurus"
+ constructor="lingucomponent_Thesaurus_get_implementation" single-instance="true">
+ <service name="com.sun.star.linguistic2.Thesaurus"/>
+ </implementation>
+</component>
diff --git a/lingucomponent/source/thesaurus/libnth/nthesdta.cxx b/lingucomponent/source/thesaurus/libnth/nthesdta.cxx
new file mode 100644
index 000000000..aa7d2afa7
--- /dev/null
+++ b/lingucomponent/source/thesaurus/libnth/nthesdta.cxx
@@ -0,0 +1,78 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+#include <osl/mutex.hxx>
+
+#include "nthesdta.hxx"
+#include <linguistic/misc.hxx>
+
+using namespace osl;
+using namespace com::sun::star;
+using namespace com::sun::star::beans;
+using namespace com::sun::star::lang;
+using namespace com::sun::star::uno;
+using namespace com::sun::star::linguistic2;
+
+namespace linguistic
+{
+
+Meaning::Meaning(
+ const OUString &rTerm) :
+
+ aSyn ( Sequence< OUString >(1) ),
+ aTerm (rTerm)
+
+{
+#if 0
+ // this is for future use by a german thesaurus when one exists
+ bIsGermanPreReform = rHelper.IsGermanPreReform;
+#endif
+}
+
+Meaning::~Meaning()
+{
+}
+
+OUString SAL_CALL Meaning::getMeaning()
+{
+ MutexGuard aGuard( GetLinguMutex() );
+ return aTerm;
+}
+
+Sequence< OUString > SAL_CALL Meaning::querySynonyms()
+{
+ MutexGuard aGuard( GetLinguMutex() );
+ return aSyn;
+}
+
+void Meaning::SetSynonyms( const Sequence< OUString > &rSyn )
+{
+ MutexGuard aGuard( GetLinguMutex() );
+ aSyn = rSyn;
+}
+
+void Meaning::SetMeaning( const OUString &rTerm )
+{
+ MutexGuard aGuard( GetLinguMutex() );
+ aTerm = rTerm;
+}
+
+} // namespace linguistic
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/lingucomponent/source/thesaurus/libnth/nthesdta.hxx b/lingucomponent/source/thesaurus/libnth/nthesdta.hxx
new file mode 100644
index 000000000..8e6cb7561
--- /dev/null
+++ b/lingucomponent/source/thesaurus/libnth/nthesdta.hxx
@@ -0,0 +1,60 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+#ifndef INCLUDED_LINGUCOMPONENT_SOURCE_THESAURUS_LIBNTH_NTHESDTA_HXX
+#define INCLUDED_LINGUCOMPONENT_SOURCE_THESAURUS_LIBNTH_NTHESDTA_HXX
+
+#include <com/sun/star/linguistic2/XMeaning.hpp>
+#include <cppuhelper/implbase.hxx>
+
+namespace linguistic
+{
+
+class Meaning :
+ public cppu::WeakImplHelper< css::linguistic2::XMeaning >
+{
+ css::uno::Sequence< OUString > aSyn; // list of synonyms, may be empty.
+ OUString aTerm;
+
+#if 0
+ // this is for future use by a German thesaurus
+ sal_Bool bIsGermanPreReform;
+#endif
+
+ Meaning(const Meaning &) = delete;
+ Meaning & operator = (const Meaning &) = delete;
+
+public:
+ explicit Meaning(const OUString &rTerm);
+ virtual ~Meaning() override;
+
+ // XMeaning
+ virtual OUString SAL_CALL getMeaning() override;
+ virtual css::uno::Sequence< OUString > SAL_CALL querySynonyms() override;
+
+ // non-interface specific functions
+ void SetSynonyms( const css::uno::Sequence< OUString > &rSyn );
+ void SetMeaning( const OUString &rTerm );
+};
+
+} // namespace linguistic
+
+#endif
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/lingucomponent/source/thesaurus/libnth/nthesimp.cxx b/lingucomponent/source/thesaurus/libnth/nthesimp.cxx
new file mode 100644
index 000000000..ea3e3af8d
--- /dev/null
+++ b/lingucomponent/source/thesaurus/libnth/nthesimp.cxx
@@ -0,0 +1,571 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+#include <com/sun/star/uno/Reference.h>
+#include <cppuhelper/factory.hxx>
+#include <cppuhelper/supportsservice.hxx>
+#include <cppuhelper/weak.hxx>
+#include <com/sun/star/linguistic2/LinguServiceManager.hpp>
+#include <com/sun/star/linguistic2/XLinguProperties.hpp>
+#include <com/sun/star/linguistic2/XSpellChecker1.hpp>
+#include <i18nlangtag/languagetag.hxx>
+#include <tools/debug.hxx>
+#include <comphelper/lok.hxx>
+#include <comphelper/processfactory.hxx>
+#include <comphelper/sequence.hxx>
+#include <osl/mutex.hxx>
+#include <osl/thread.h>
+#include <unotools/lingucfg.hxx>
+#include <unotools/resmgr.hxx>
+
+#include <rtl/string.hxx>
+#include <rtl/textenc.h>
+
+#include <svtools/strings.hrc>
+
+#include "nthesimp.hxx"
+#include <linguistic/misc.hxx>
+#include "nthesdta.hxx"
+
+#include <vector>
+#include <numeric>
+#include <set>
+#include <string.h>
+
+// XML-header to query SPELLML support
+constexpr OUStringLiteral SPELLML_SUPPORT = u"<?xml?>";
+
+using namespace osl;
+using namespace com::sun::star;
+using namespace com::sun::star::beans;
+using namespace com::sun::star::lang;
+using namespace com::sun::star::uno;
+using namespace com::sun::star::linguistic2;
+using namespace linguistic;
+
+static uno::Reference< XLinguServiceManager2 > GetLngSvcMgr_Impl()
+{
+ uno::Reference< XComponentContext > xContext( comphelper::getProcessComponentContext() );
+ uno::Reference< XLinguServiceManager2 > xRes = LinguServiceManager::create( xContext ) ;
+ return xRes;
+}
+
+Thesaurus::Thesaurus() :
+ aEvtListeners ( GetLinguMutex() ), pPropHelper(nullptr), bDisposing(false),
+ prevLocale(LANGUAGE_DONTKNOW)
+{
+}
+
+Thesaurus::~Thesaurus()
+{
+ mvThesInfo.clear();
+ if (pPropHelper)
+ {
+ pPropHelper->RemoveAsPropListener();
+ }
+}
+
+PropertyHelper_Thesaurus& Thesaurus::GetPropHelper_Impl()
+{
+ if (!pPropHelper)
+ {
+ Reference< XLinguProperties > xPropSet = GetLinguProperties();
+
+ pPropHelper = new PropertyHelper_Thesaurus( static_cast<XThesaurus *>(this), xPropSet );
+ pPropHelper->AddAsPropListener(); //! after a reference is established
+ }
+ return *pPropHelper;
+}
+
+Sequence< Locale > SAL_CALL Thesaurus::getLocales()
+{
+ MutexGuard aGuard( GetLinguMutex() );
+
+ // this routine should return the locales supported by the installed
+ // dictionaries.
+ if (mvThesInfo.empty())
+ {
+ SvtLinguConfig aLinguCfg;
+
+ // get list of dictionaries-to-use
+ std::vector< SvtLinguConfigDictionaryEntry > aDics;
+ uno::Sequence< OUString > aFormatList;
+ aLinguCfg.GetSupportedDictionaryFormatsFor( "Thesauri",
+ "org.openoffice.lingu.new.Thesaurus", aFormatList );
+ for (const auto& rFormat : std::as_const(aFormatList))
+ {
+ std::vector< SvtLinguConfigDictionaryEntry > aTmpDic(
+ aLinguCfg.GetActiveDictionariesByFormat( rFormat ) );
+ aDics.insert( aDics.end(), aTmpDic.begin(), aTmpDic.end() );
+ }
+
+ //!! for compatibility with old dictionaries (the ones not using extensions
+ //!! or new configuration entries, but still using the dictionary.lst file)
+ //!! Get the list of old style spell checking dictionaries to use...
+ std::vector< SvtLinguConfigDictionaryEntry > aOldStyleDics(
+ GetOldStyleDics( "THES" ) );
+
+ // to prefer dictionaries with configuration entries we will only
+ // use those old style dictionaries that add a language that
+ // is not yet supported by the list of new style dictionaries
+ MergeNewStyleDicsAndOldStyleDics( aDics, aOldStyleDics );
+
+ if (!aDics.empty())
+ {
+ // get supported locales from the dictionaries-to-use...
+ std::set<OUString> aLocaleNamesSet;
+ for (auto const& dict : aDics)
+ {
+ for (const auto& rLocaleName : dict.aLocaleNames)
+ {
+ if (!comphelper::LibreOfficeKit::isAllowlistedLanguage(rLocaleName))
+ continue;
+
+ aLocaleNamesSet.insert( rLocaleName );
+ }
+ }
+ // ... and add them to the resulting sequence
+ std::vector<Locale> aLocalesVec;
+ aLocalesVec.reserve(aLocaleNamesSet.size());
+
+ std::transform(aLocaleNamesSet.begin(), aLocaleNamesSet.end(), std::back_inserter(aLocalesVec),
+ [](const OUString& localeName) -> Locale { return LanguageTag::convertToLocale(localeName); });
+
+ aSuppLocales = comphelper::containerToSequence(aLocalesVec);
+
+ //! For each dictionary and each locale we need a separate entry.
+ //! If this results in more than one dictionary per locale than (for now)
+ //! it is undefined which dictionary gets used.
+ //! In the future the implementation should support using several dictionaries
+ //! for one locale.
+ sal_Int32 numthes = std::accumulate(aDics.begin(), aDics.end(), 0,
+ [](const sal_Int32 nSum, const SvtLinguConfigDictionaryEntry& dict) {
+ return nSum + dict.aLocaleNames.getLength(); });
+
+ // add dictionary information
+ mvThesInfo.resize(numthes);
+
+ sal_Int32 k = 0;
+ for (auto const& dict : aDics)
+ {
+ if (dict.aLocaleNames.hasElements() &&
+ dict.aLocations.hasElements())
+ {
+ // currently only one language per dictionary is supported in the actual implementation...
+ // Thus here we work-around this by adding the same dictionary several times.
+ // Once for each of its supported locales.
+ for (const auto& rLocaleName : dict.aLocaleNames)
+ {
+ LanguageTag aLanguageTag(rLocaleName);
+ mvThesInfo[k].aEncoding = RTL_TEXTENCODING_DONTKNOW;
+ mvThesInfo[k].aLocale = aLanguageTag.getLocale();
+ mvThesInfo[k].aCharSetInfo.reset( new CharClass( std::move(aLanguageTag) ) );
+ // also both files have to be in the same directory and the
+ // file names must only differ in the extension (.aff/.dic).
+ // Thus we use the first location only and strip the extension part.
+ OUString aLocation = dict.aLocations[0];
+ sal_Int32 nPos = aLocation.lastIndexOf( '.' );
+ aLocation = aLocation.copy( 0, nPos );
+ mvThesInfo[k].aName = aLocation;
+
+ ++k;
+ }
+ }
+ }
+ DBG_ASSERT( k == numthes, "index mismatch?" );
+ }
+ else
+ {
+ /* no dictionary found so register no dictionaries */
+ mvThesInfo.clear();
+ aSuppLocales.realloc(0);
+ }
+ }
+
+ return aSuppLocales;
+}
+
+sal_Bool SAL_CALL Thesaurus::hasLocale(const Locale& rLocale)
+{
+ MutexGuard aGuard( GetLinguMutex() );
+
+ if (!aSuppLocales.hasElements())
+ getLocales();
+
+ return comphelper::findValue(aSuppLocales, rLocale) != -1;
+}
+
+Sequence < Reference < css::linguistic2::XMeaning > > SAL_CALL Thesaurus::queryMeanings(
+ const OUString& qTerm, const Locale& rLocale,
+ const css::uno::Sequence< css::beans::PropertyValue >& rProperties)
+{
+ MutexGuard aGuard( GetLinguMutex() );
+
+ uno::Sequence< Reference< XMeaning > > aMeanings( 1 );
+ uno::Sequence< Reference< XMeaning > > noMeanings( 0 );
+ uno::Reference< XLinguServiceManager2 > xLngSvcMgr( GetLngSvcMgr_Impl() );
+ uno::Reference< XSpellChecker1 > xSpell;
+
+ OUString aRTerm(qTerm);
+ OUString aPTerm(qTerm);
+ CapType ct = CapType::UNKNOWN;
+ sal_Int32 stem = 0;
+ sal_Int32 stem2 = 0;
+
+ LanguageType nLanguage = LinguLocaleToLanguage( rLocale );
+
+ if (LinguIsUnspecified( nLanguage) || aRTerm.isEmpty())
+ return noMeanings;
+
+ if (!hasLocale( rLocale ))
+#ifdef LINGU_EXCEPTIONS
+ throw( IllegalArgumentException() );
+#else
+ return noMeanings;
+#endif
+
+ if (prevTerm == qTerm && prevLocale == nLanguage)
+ return prevMeanings;
+
+ mentry * pmean = nullptr;
+ sal_Int32 nmean = 0;
+
+ PropertyHelper_Thesaurus &rHelper = GetPropHelper();
+ rHelper.SetTmpPropVals( rProperties );
+
+ MyThes * pTH = nullptr;
+ rtl_TextEncoding eEnc = RTL_TEXTENCODING_DONTKNOW;
+ CharClass * pCC = nullptr;
+
+ // find the first thesaurus that matches the locale
+ for (size_t i =0; i < mvThesInfo.size(); i++)
+ {
+ if (rLocale == mvThesInfo[i].aLocale)
+ {
+ // open up and initialize this thesaurus if need be
+ if (!mvThesInfo[i].aThes)
+ {
+ OUString datpath = mvThesInfo[i].aName + ".dat";
+ OUString idxpath = mvThesInfo[i].aName + ".idx";
+ OUString ndat;
+ OUString nidx;
+ osl::FileBase::getSystemPathFromFileURL(datpath,ndat);
+ osl::FileBase::getSystemPathFromFileURL(idxpath,nidx);
+
+#if defined(_WIN32)
+ // MyThes waits UTF-8 encoded paths with \\?\ long path prefix.
+ OString aTmpidx = Win_AddLongPathPrefix(OUStringToOString(nidx, RTL_TEXTENCODING_UTF8));
+ OString aTmpdat = Win_AddLongPathPrefix(OUStringToOString(ndat, RTL_TEXTENCODING_UTF8));
+#else
+ OString aTmpidx(OU2ENC(nidx,osl_getThreadTextEncoding()));
+ OString aTmpdat(OU2ENC(ndat,osl_getThreadTextEncoding()));
+#endif
+
+ mvThesInfo[i].aThes.reset( new MyThes(aTmpidx.getStr(),aTmpdat.getStr()) );
+ mvThesInfo[i].aEncoding = getTextEncodingFromCharset(mvThesInfo[i].aThes->get_th_encoding());
+ }
+ pTH = mvThesInfo[i].aThes.get();
+ eEnc = mvThesInfo[i].aEncoding;
+ pCC = mvThesInfo[i].aCharSetInfo.get();
+
+ if (pTH)
+ break;
+ }
+ }
+
+ // we don't want to work with a default text encoding since following incorrect
+ // results may occur only for specific text and thus may be hard to notice.
+ // Thus better always make a clean exit here if the text encoding is in question.
+ // Hopefully something not working at all will raise proper attention quickly. ;-)
+ DBG_ASSERT( eEnc != RTL_TEXTENCODING_DONTKNOW, "failed to get text encoding! (maybe incorrect encoding string in file)" );
+ if (eEnc == RTL_TEXTENCODING_DONTKNOW)
+ return noMeanings;
+
+ while (pTH)
+ {
+ // convert word to all lower case for searching
+ if (!stem)
+ ct = capitalType(aRTerm, pCC);
+ OUString nTerm(makeLowerCase(aRTerm, pCC));
+ OString aTmp( OU2ENC(nTerm, eEnc) );
+ nmean = pTH->Lookup(aTmp.getStr(),aTmp.getLength(),&pmean);
+
+ if (nmean)
+ aMeanings.realloc( nmean );
+
+ mentry * pe = pmean;
+ OUString codeTerm = qTerm;
+ Reference< XSpellAlternatives > xTmpRes2;
+
+ if (stem)
+ {
+ xTmpRes2 = xSpell->spell( "<?xml?><query type='analyze'><word>" +
+ aPTerm + "</word></query>", static_cast<sal_uInt16>(nLanguage), rProperties );
+ if (xTmpRes2.is())
+ {
+ Sequence<OUString>seq = xTmpRes2->getAlternatives();
+ if (seq.hasElements())
+ {
+ codeTerm = seq[0];
+ stem2 = 1;
+ }
+ }
+ }
+
+ for (int j = 0; j < nmean; j++)
+ {
+ int count = pe->count;
+ if (count)
+ {
+ Sequence< OUString > aStr( count );
+ OUString *pStr = aStr.getArray();
+
+ for (int i=0; i < count; i++)
+ {
+ OUString sTerm(pe->psyns[i],strlen(pe->psyns[i]),eEnc );
+ sal_Int32 catpos = sTerm.indexOf('(');
+ OUString catst;
+ if (catpos > 2)
+ {
+ // remove category name for affixation and casing
+ catst = OUString::Concat(" ") + sTerm.subView(catpos);
+ sTerm = sTerm.copy(0, catpos);
+ sTerm = sTerm.trim();
+ }
+ // generate synonyms with affixes
+ if (stem && stem2)
+ {
+ Reference< XSpellAlternatives > xTmpRes = xSpell->spell( "<?xml?><query type='generate'><word>" +
+ sTerm + "</word>" + codeTerm + "</query>", static_cast<sal_uInt16>(nLanguage), rProperties );
+ if (xTmpRes.is())
+ {
+ Sequence<OUString>seq = xTmpRes->getAlternatives();
+ if (seq.hasElements())
+ sTerm = seq[0];
+ }
+ }
+
+ CapType ct1 = capitalType(sTerm, pCC);
+ if (CapType::MIXED == ct1)
+ ct = ct1;
+ OUString cTerm;
+ switch (ct)
+ {
+ case CapType::ALLCAP:
+ cTerm = makeUpperCase(sTerm, pCC);
+ break;
+ case CapType::INITCAP:
+ cTerm = makeInitCap(sTerm, pCC);
+ break;
+ default:
+ cTerm = sTerm;
+ break;
+ }
+ OUString aAlt( cTerm + catst);
+ pStr[i] = aAlt;
+ }
+ rtl::Reference<Meaning> pMn = new Meaning(aRTerm);
+ OUString dTerm(pe->defn,strlen(pe->defn),eEnc );
+ pMn->SetMeaning(dTerm);
+ pMn->SetSynonyms(aStr);
+ Reference<XMeaning>* pMeaning = aMeanings.getArray();
+ pMeaning[j] = pMn;
+ }
+ pe++;
+ }
+ pTH->CleanUpAfterLookup(&pmean,nmean);
+
+ if (nmean)
+ {
+ prevTerm = qTerm;
+ prevMeanings = aMeanings;
+ prevLocale = nLanguage;
+ return aMeanings;
+ }
+
+ if (stem || !xLngSvcMgr.is())
+ return noMeanings;
+ stem = 1;
+
+ xSpell.set( xLngSvcMgr->getSpellChecker(), UNO_QUERY );
+ if (!xSpell.is() || !xSpell->isValid( SPELLML_SUPPORT, static_cast<sal_uInt16>(nLanguage), rProperties ))
+ return noMeanings;
+ Reference< XSpellAlternatives > xTmpRes = xSpell->spell( "<?xml?><query type='stem'><word>" +
+ aRTerm + "</word></query>", static_cast<sal_uInt16>(nLanguage), rProperties );
+ if (xTmpRes.is())
+ {
+ Sequence<OUString>seq = xTmpRes->getAlternatives();
+ if (seq.hasElements())
+ {
+ aRTerm = seq[0]; // XXX Use only the first stem
+ continue;
+ }
+ }
+
+ // stem the last word of the synonym (for categories after affixation)
+ aRTerm = aRTerm.trim();
+ sal_Int32 pos = aRTerm.lastIndexOf(' ');
+ if (!pos)
+ return noMeanings;
+ xTmpRes = xSpell->spell( OUString::Concat("<?xml?><query type='stem'><word>") +
+ aRTerm.subView(pos + 1) + "</word></query>", static_cast<sal_uInt16>(nLanguage), rProperties );
+ if (xTmpRes.is())
+ {
+ Sequence<OUString>seq = xTmpRes->getAlternatives();
+ if (seq.hasElements())
+ {
+ aPTerm = aRTerm.copy(pos + 1);
+ aRTerm = aRTerm.subView(0, pos + 1) + seq[0];
+#if 0
+ for (int i = 0; i < seq.getLength(); i++)
+ {
+ OString o = OUStringToOString(seq[i], RTL_TEXTENCODING_UTF8);
+ fprintf(stderr, "%d: %s\n", i + 1, o.pData->buffer);
+ }
+#endif
+ continue;
+ }
+ }
+ break;
+ }
+ return noMeanings;
+}
+
+OUString SAL_CALL Thesaurus::getServiceDisplayName(const Locale& rLocale)
+{
+ std::locale loc(Translate::Create("svt", LanguageTag(rLocale)));
+ return Translate::get(STR_DESCRIPTION_MYTHES, loc);
+}
+
+void SAL_CALL Thesaurus::initialize( const Sequence< Any >& rArguments )
+{
+ MutexGuard aGuard( GetLinguMutex() );
+
+ if (pPropHelper)
+ return;
+
+ sal_Int32 nLen = rArguments.getLength();
+ // Accept one of two args so we can be compatible with the call site in GetAvailLocales()
+ // linguistic module
+ if (1 == nLen || 2 == nLen)
+ {
+ Reference< XLinguProperties > xPropSet;
+ rArguments.getConstArray()[0] >>= xPropSet;
+ assert(xPropSet);
+
+ //! Pointer allows for access of the non-UNO functions.
+ //! And the reference to the UNO-functions while increasing
+ //! the ref-count and will implicitly free the memory
+ //! when the object is no longer used.
+ pPropHelper = new PropertyHelper_Thesaurus( static_cast<XThesaurus *>(this), xPropSet );
+ pPropHelper->AddAsPropListener(); //! after a reference is established
+ }
+ else
+ OSL_FAIL( "wrong number of arguments in sequence" );
+}
+
+OUString Thesaurus::makeLowerCase(const OUString& aTerm, CharClass const * pCC)
+{
+ if (pCC)
+ return pCC->lowercase(aTerm);
+ return aTerm;
+}
+
+OUString Thesaurus::makeUpperCase(const OUString& aTerm, CharClass const * pCC)
+{
+ if (pCC)
+ return pCC->uppercase(aTerm);
+ return aTerm;
+}
+
+OUString Thesaurus::makeInitCap(const OUString& aTerm, CharClass const * pCC)
+{
+ sal_Int32 tlen = aTerm.getLength();
+ if (pCC && tlen)
+ {
+ OUString bTemp = aTerm.copy(0,1);
+ if (tlen > 1)
+ {
+ return ( pCC->uppercase(bTemp, 0, 1)
+ + pCC->lowercase(aTerm,1,(tlen-1)) );
+ }
+
+ return pCC->uppercase(bTemp, 0, 1);
+ }
+ return aTerm;
+}
+
+void SAL_CALL Thesaurus::dispose()
+{
+ MutexGuard aGuard( GetLinguMutex() );
+
+ if (!bDisposing)
+ {
+ bDisposing = true;
+ EventObject aEvtObj( static_cast<XThesaurus *>(this) );
+ aEvtListeners.disposeAndClear( aEvtObj );
+ if (pPropHelper)
+ {
+ pPropHelper->RemoveAsPropListener();
+ delete pPropHelper;
+ pPropHelper = nullptr;
+ }
+ }
+}
+
+void SAL_CALL Thesaurus::addEventListener( const Reference< XEventListener >& rxListener )
+{
+ MutexGuard aGuard( GetLinguMutex() );
+
+ if (!bDisposing && rxListener.is())
+ aEvtListeners.addInterface( rxListener );
+}
+
+void SAL_CALL Thesaurus::removeEventListener( const Reference< XEventListener >& rxListener )
+{
+ MutexGuard aGuard( GetLinguMutex() );
+
+ if (!bDisposing && rxListener.is())
+ aEvtListeners.removeInterface( rxListener );
+}
+
+// Service specific part
+OUString SAL_CALL Thesaurus::getImplementationName()
+{
+ return "org.openoffice.lingu.new.Thesaurus";
+}
+
+sal_Bool SAL_CALL Thesaurus::supportsService( const OUString& ServiceName )
+{
+ return cppu::supportsService(this, ServiceName);
+}
+
+Sequence< OUString > SAL_CALL Thesaurus::getSupportedServiceNames()
+{
+ return { SN_THESAURUS };
+}
+
+extern "C" SAL_DLLPUBLIC_EXPORT css::uno::XInterface*
+lingucomponent_Thesaurus_get_implementation(
+ css::uno::XComponentContext* , css::uno::Sequence<css::uno::Any> const&)
+{
+ return cppu::acquire(new Thesaurus());
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/lingucomponent/source/thesaurus/libnth/nthesimp.hxx b/lingucomponent/source/thesaurus/libnth/nthesimp.hxx
new file mode 100644
index 000000000..04eab0688
--- /dev/null
+++ b/lingucomponent/source/thesaurus/libnth/nthesimp.hxx
@@ -0,0 +1,129 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+#ifndef INCLUDED_LINGUCOMPONENT_SOURCE_THESAURUS_LIBNTH_NTHESIMP_HXX
+#define INCLUDED_LINGUCOMPONENT_SOURCE_THESAURUS_LIBNTH_NTHESIMP_HXX
+
+#include <comphelper/interfacecontainer3.hxx>
+#include <cppuhelper/implbase.hxx>
+#include <com/sun/star/uno/Reference.h>
+#include <com/sun/star/uno/Sequence.h>
+#include <com/sun/star/lang/XComponent.hpp>
+#include <com/sun/star/lang/XInitialization.hpp>
+#include <com/sun/star/lang/XServiceDisplayName.hpp>
+#include <com/sun/star/beans/XPropertySet.hpp>
+#include <com/sun/star/beans/PropertyValues.hpp>
+
+#include <com/sun/star/lang/XServiceInfo.hpp>
+#include <com/sun/star/linguistic2/XMeaning.hpp>
+#include <com/sun/star/linguistic2/XThesaurus.hpp>
+
+#include <unotools/charclass.hxx>
+
+#include <lingutil.hxx>
+#include <linguistic/misc.hxx>
+#include <linguistic/lngprophelp.hxx>
+
+#include <osl/file.hxx>
+#include <mythes.hxx>
+#include <memory>
+#include <vector>
+
+using namespace ::com::sun::star::uno;
+using namespace ::com::sun::star::beans;
+using namespace ::com::sun::star::lang;
+using namespace ::com::sun::star::linguistic2;
+
+namespace com::sun::star::beans { class XPropertySet; }
+
+class Thesaurus :
+ public cppu::WeakImplHelper
+ <
+ XThesaurus,
+ XInitialization,
+ XComponent,
+ XServiceInfo,
+ XServiceDisplayName
+ >
+{
+ Sequence< Locale > aSuppLocales;
+
+ ::comphelper::OInterfaceContainerHelper3<XEventListener> aEvtListeners;
+ linguistic::PropertyHelper_Thesaurus* pPropHelper;
+ bool bDisposing;
+ struct ThesInfo
+ {
+ std::unique_ptr<CharClass> aCharSetInfo;
+ std::unique_ptr<MyThes> aThes;
+ rtl_TextEncoding aEncoding;
+ Locale aLocale;
+ OUString aName;
+ };
+ std::vector<ThesInfo> mvThesInfo;
+
+ // cache for the Thesaurus dialog
+ Sequence < Reference < css::linguistic2::XMeaning > > prevMeanings;
+ OUString prevTerm;
+ LanguageType prevLocale;
+
+ Thesaurus(const Thesaurus &) = delete;
+ Thesaurus & operator = (const Thesaurus &) = delete;
+
+ linguistic::PropertyHelper_Thesaurus& GetPropHelper_Impl();
+ linguistic::PropertyHelper_Thesaurus& GetPropHelper()
+ {
+ return pPropHelper ? *pPropHelper : GetPropHelper_Impl();
+ }
+
+public:
+ Thesaurus();
+ virtual ~Thesaurus() override;
+
+ // XSupportedLocales (for XThesaurus)
+ virtual Sequence< Locale > SAL_CALL getLocales() override;
+ virtual sal_Bool SAL_CALL hasLocale( const Locale& rLocale ) override;
+
+ // XThesaurus
+ virtual Sequence< Reference < css::linguistic2::XMeaning > > SAL_CALL queryMeanings( const OUString& rTerm, const Locale& rLocale, const css::uno::Sequence< css::beans::PropertyValue >& rProperties ) override;
+
+ // XServiceDisplayName
+ virtual OUString SAL_CALL getServiceDisplayName( const Locale& rLocale ) override;
+
+ // XInitialization
+ virtual void SAL_CALL initialize( const Sequence< Any >& rArguments ) override;
+
+ // XComponent
+ virtual void SAL_CALL dispose() override;
+ virtual void SAL_CALL addEventListener( const Reference< XEventListener >& rxListener ) override;
+ virtual void SAL_CALL removeEventListener( const Reference< XEventListener >& rxListener ) override;
+
+ // XServiceInfo
+ virtual OUString SAL_CALL getImplementationName() override;
+ virtual sal_Bool SAL_CALL supportsService( const OUString& rServiceName ) override;
+ virtual Sequence< OUString > SAL_CALL getSupportedServiceNames() override;
+
+private:
+ static OUString makeLowerCase(const OUString&, CharClass const *);
+ static OUString makeUpperCase(const OUString&, CharClass const *);
+ static OUString makeInitCap(const OUString&, CharClass const *);
+};
+
+#endif
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */