diff options
Diffstat (limited to '')
42 files changed, 5831 insertions, 0 deletions
diff --git a/lingucomponent/IwyuFilter_lingucomponent.yaml b/lingucomponent/IwyuFilter_lingucomponent.yaml new file mode 100644 index 000000000..ebea8246c --- /dev/null +++ b/lingucomponent/IwyuFilter_lingucomponent.yaml @@ -0,0 +1,20 @@ +--- +assumeFilename: lingucomponent/source/spellcheck/spell/sspellimp.cxx +excludelist: + lingucomponent/source/lingutil/lingutil.hxx: + # Needed on WIN + - rtl/string.hxx + lingucomponent/source/languageguessing/guesslang.cxx: + # Needed for code protected by EXTTEXTCAT_VERSION_MAJOR + - libexttextcat/textcat.h + - tools/debug.hxx + - sal/macros.h + lingucomponent/source/numbertext/numbertext.cxx: + # Needed on WIN + - o3tl/char16_t2wchar_t.hxx + lingucomponent/source/spellcheck/spell/sspellimp.cxx: + # Needed for SpellFailure::SPELLING_ERROR + - com/sun/star/linguistic2/SpellFailure.hpp + lingucomponent/source/hyphenator/hyphen/hyphenimp.cxx: + # Actually used + - com/sun/star/linguistic2/XLinguProperties.hpp diff --git a/lingucomponent/Library_LanguageTool.mk b/lingucomponent/Library_LanguageTool.mk new file mode 100644 index 000000000..98ff7cf7e --- /dev/null +++ b/lingucomponent/Library_LanguageTool.mk @@ -0,0 +1,48 @@ +# -*- Mode: makefile-gmake; tab-width: 4; indent-tabs-mode: t -*- +# +# +# This file is part of the LibreOffice project. +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# + +$(eval $(call gb_Library_Library,LanguageTool)) + +$(eval $(call gb_Library_set_componentfile,LanguageTool,lingucomponent/source/spellcheck/languagetool/LanguageTool,services)) + +$(eval $(call gb_Library_set_include,LanguageTool,\ + $$(INCLUDE) \ + -I$(SRCDIR)/lingucomponent/source/lingutil \ +)) + +$(eval $(call gb_Library_use_sdk_api,LanguageTool)) + +$(eval $(call gb_Library_use_libraries,LanguageTool,\ + comphelper \ + cppu \ + cppuhelper \ + i18nlangtag \ + svt \ + lng \ + sal \ + tl \ + utl \ +)) + +$(eval $(call gb_Library_use_static_libraries,LanguageTool,\ + ulingu \ +)) + +$(eval $(call gb_Library_use_externals,LanguageTool,\ + boost_headers \ + icuuc \ + curl \ +)) + +$(eval $(call gb_Library_add_exception_objects,LanguageTool,\ + lingucomponent/source/spellcheck/languagetool/languagetoolimp \ +)) + +# vim: set noet sw=4 ts=4: diff --git a/lingucomponent/Library_MacOSXSpell.mk b/lingucomponent/Library_MacOSXSpell.mk new file mode 100644 index 000000000..df77686d1 --- /dev/null +++ b/lingucomponent/Library_MacOSXSpell.mk @@ -0,0 +1,47 @@ +# -*- Mode: makefile-gmake; tab-width: 4; indent-tabs-mode: t -*- +# +# +# This file is part of the LibreOffice project. +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# + +$(eval $(call gb_Library_Library,MacOSXSpell)) + +$(eval $(call gb_Library_set_componentfile,MacOSXSpell,lingucomponent/source/spellcheck/macosxspell/MacOSXSpell,services)) + +$(eval $(call gb_Library_set_include,MacOSXSpell,\ + $$(INCLUDE) \ + -I$(SRCDIR)/lingucomponent/source/lingutil \ +)) + +$(eval $(call gb_Library_use_sdk_api,MacOSXSpell)) + +$(eval $(call gb_Library_use_libraries,MacOSXSpell,\ + comphelper \ + cppu \ + cppuhelper \ + lng \ + sal \ + i18nlangtag \ + svl \ + tl \ + ucbhelper \ + utl \ +)) + +$(eval $(call gb_Library_use_system_darwin_frameworks,MacOSXSpell,\ + Cocoa \ +)) + +$(eval $(call gb_Library_use_externals,MacOSXSpell,\ + boost_headers \ +)) + +$(eval $(call gb_Library_add_objcxxobjects,MacOSXSpell,\ + lingucomponent/source/spellcheck/macosxspell/macspellimp \ +)) + +# vim: set noet sw=4 ts=4: diff --git a/lingucomponent/Library_guesslang.mk b/lingucomponent/Library_guesslang.mk new file mode 100644 index 000000000..a700ed545 --- /dev/null +++ b/lingucomponent/Library_guesslang.mk @@ -0,0 +1,41 @@ +# -*- Mode: makefile-gmake; tab-width: 4; indent-tabs-mode: t -*- +# +# +# This file is part of the LibreOffice project. +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# + +$(eval $(call gb_Library_Library,guesslang)) + +$(eval $(call gb_Library_set_componentfile,guesslang,lingucomponent/source/languageguessing/guesslang,services)) + +ifneq ($(ENABLE_WASM_STRIP_GUESSLANG),TRUE) +$(eval $(call gb_Library_use_externals,guesslang,\ + libexttextcat \ +)) +endif + +$(eval $(call gb_Library_use_externals,guesslang,\ + boost_headers \ +)) + +$(eval $(call gb_Library_use_sdk_api,guesslang)) + +$(eval $(call gb_Library_use_libraries,guesslang,\ + cppu \ + cppuhelper \ + sal \ + tl \ + utl \ +)) + +$(eval $(call gb_Library_add_exception_objects,guesslang,\ + lingucomponent/source/languageguessing/guess \ + lingucomponent/source/languageguessing/guesslang \ + lingucomponent/source/languageguessing/simpleguesser \ +)) + +# vim: set noet sw=4 ts=4: diff --git a/lingucomponent/Library_hyphen.mk b/lingucomponent/Library_hyphen.mk new file mode 100644 index 000000000..dd172e080 --- /dev/null +++ b/lingucomponent/Library_hyphen.mk @@ -0,0 +1,47 @@ +# -*- Mode: makefile-gmake; tab-width: 4; indent-tabs-mode: t -*- +# +# +# This file is part of the LibreOffice project. +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# + +$(eval $(call gb_Library_Library,hyphen)) + +$(eval $(call gb_Library_set_componentfile,hyphen,lingucomponent/source/hyphenator/hyphen/hyphen,services)) + +$(eval $(call gb_Library_set_include,hyphen,\ + $$(INCLUDE) \ + -I$(SRCDIR)/lingucomponent/source/lingutil \ +)) + +$(eval $(call gb_Library_use_sdk_api,hyphen)) + +$(eval $(call gb_Library_use_libraries,hyphen,\ + comphelper \ + cppu \ + cppuhelper \ + i18nlangtag \ + lng \ + sal \ + tl \ + utl \ +)) + +$(eval $(call gb_Library_use_static_libraries,hyphen,\ + ulingu \ +)) + +$(eval $(call gb_Library_use_externals,hyphen,\ + boost_headers \ + hunspell \ + hyphen \ +)) + +$(eval $(call gb_Library_add_exception_objects,hyphen,\ + lingucomponent/source/hyphenator/hyphen/hyphenimp \ +)) + +# vim: set noet sw=4 ts=4: diff --git a/lingucomponent/Library_lnth.mk b/lingucomponent/Library_lnth.mk new file mode 100644 index 000000000..00d16790e --- /dev/null +++ b/lingucomponent/Library_lnth.mk @@ -0,0 +1,48 @@ +# -*- Mode: makefile-gmake; tab-width: 4; indent-tabs-mode: t -*- +# +# +# This file is part of the LibreOffice project. +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# + +$(eval $(call gb_Library_Library,lnth)) + +$(eval $(call gb_Library_set_componentfile,lnth,lingucomponent/source/thesaurus/libnth/lnth,services)) + +$(eval $(call gb_Library_set_include,lnth,\ + $$(INCLUDE) \ + -I$(SRCDIR)/lingucomponent/source/lingutil \ +)) + +$(eval $(call gb_Library_use_sdk_api,lnth)) + +$(eval $(call gb_Library_use_libraries,lnth,\ + comphelper \ + cppu \ + cppuhelper \ + i18nlangtag \ + lng \ + sal \ + tl \ + utl \ +)) + +$(eval $(call gb_Library_use_static_libraries,lnth,\ + ulingu \ +)) + +$(eval $(call gb_Library_use_externals,lnth,\ + boost_headers \ + hunspell \ + mythes \ +)) + +$(eval $(call gb_Library_add_exception_objects,lnth,\ + lingucomponent/source/thesaurus/libnth/nthesdta \ + lingucomponent/source/thesaurus/libnth/nthesimp \ +)) + +# vim: set noet sw=4 ts=4: diff --git a/lingucomponent/Library_numbertext.mk b/lingucomponent/Library_numbertext.mk new file mode 100644 index 000000000..6cb6ba10a --- /dev/null +++ b/lingucomponent/Library_numbertext.mk @@ -0,0 +1,34 @@ +# -*- Mode: makefile-gmake; tab-width: 4; indent-tabs-mode: t -*- +# +# +# This file is part of the LibreOffice project. +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# + +$(eval $(call gb_Library_Library,numbertext)) + +$(eval $(call gb_Library_set_componentfile,numbertext,lingucomponent/source/numbertext/numbertext,services)) + +$(eval $(call gb_Library_use_externals,numbertext,\ + libnumbertext \ +)) + +$(eval $(call gb_Library_use_sdk_api,numbertext)) + +$(eval $(call gb_Library_use_libraries,numbertext,\ + cppu \ + cppuhelper \ + i18nlangtag \ + sal \ + tl \ + utl \ +)) + +$(eval $(call gb_Library_add_exception_objects,numbertext,\ + lingucomponent/source/numbertext/numbertext \ +)) + +# vim: set noet sw=4 ts=4: diff --git a/lingucomponent/Library_spell.mk b/lingucomponent/Library_spell.mk new file mode 100644 index 000000000..c00ea8d90 --- /dev/null +++ b/lingucomponent/Library_spell.mk @@ -0,0 +1,47 @@ +# -*- Mode: makefile-gmake; tab-width: 4; indent-tabs-mode: t -*- +# +# +# This file is part of the LibreOffice project. +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# + +$(eval $(call gb_Library_Library,spell)) + +$(eval $(call gb_Library_set_componentfile,spell,lingucomponent/source/spellcheck/spell/spell,services)) + +$(eval $(call gb_Library_set_include,spell,\ + $$(INCLUDE) \ + -I$(SRCDIR)/lingucomponent/source/lingutil \ +)) + +$(eval $(call gb_Library_use_sdk_api,spell)) + +$(eval $(call gb_Library_use_libraries,spell,\ + comphelper \ + cppu \ + cppuhelper \ + i18nlangtag \ + lng \ + sal \ + tl \ + utl \ +)) + +$(eval $(call gb_Library_use_static_libraries,spell,\ + ulingu \ +)) + +$(eval $(call gb_Library_use_externals,spell,\ + boost_headers \ + hunspell \ + icuuc \ +)) + +$(eval $(call gb_Library_add_exception_objects,spell,\ + lingucomponent/source/spellcheck/spell/sspellimp \ +)) + +# vim: set noet sw=4 ts=4: diff --git a/lingucomponent/Makefile b/lingucomponent/Makefile new file mode 100644 index 000000000..0997e6284 --- /dev/null +++ b/lingucomponent/Makefile @@ -0,0 +1,14 @@ +# -*- Mode: makefile-gmake; tab-width: 4; indent-tabs-mode: t -*- +# +# This file is part of the LibreOffice project. +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# + +module_directory:=$(dir $(realpath $(firstword $(MAKEFILE_LIST)))) + +include $(module_directory)/../solenv/gbuild/partial_build.mk + +# vim: set noet sw=4 ts=4: diff --git a/lingucomponent/Module_lingucomponent.mk b/lingucomponent/Module_lingucomponent.mk new file mode 100644 index 000000000..13f1a829d --- /dev/null +++ b/lingucomponent/Module_lingucomponent.mk @@ -0,0 +1,34 @@ +# -*- Mode: makefile-gmake; tab-width: 4; indent-tabs-mode: t -*- +# +# +# This file is part of the LibreOffice project. +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# + +$(eval $(call gb_Module_Module,lingucomponent)) + +ifneq ($(ENABLE_WASM_STRIP_GUESSLANG),TRUE) +$(eval $(call gb_Module_add_targets,lingucomponent,\ + Library_guesslang \ +)) +endif + +ifneq ($(ENABLE_WASM_STRIP_HUNSPELL),TRUE) +$(eval $(call gb_Module_add_targets,lingucomponent,\ + Library_hyphen \ + Library_lnth \ + Library_spell \ + StaticLibrary_ulingu \ +)) +endif + +$(eval $(call gb_Module_add_targets,lingucomponent,\ + $(if $(filter iOS MACOSX,$(OS)),Library_MacOSXSpell) \ + Library_numbertext \ + Library_LanguageTool \ +)) + +# vim: set noet sw=4 ts=4: diff --git a/lingucomponent/README.md b/lingucomponent/README.md new file mode 100644 index 000000000..6db4086ac --- /dev/null +++ b/lingucomponent/README.md @@ -0,0 +1,3 @@ +# Linguistics Components + +`lingucomponent` contains spellcheck, hyphenator, thesaurus, etc. diff --git a/lingucomponent/StaticLibrary_ulingu.mk b/lingucomponent/StaticLibrary_ulingu.mk new file mode 100644 index 000000000..1bee43eeb --- /dev/null +++ b/lingucomponent/StaticLibrary_ulingu.mk @@ -0,0 +1,34 @@ +# -*- Mode: makefile-gmake; tab-width: 4; indent-tabs-mode: t -*- +# +# +# This file is part of the LibreOffice project. +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# + +$(eval $(call gb_StaticLibrary_StaticLibrary,ulingu)) + +$(eval $(call gb_StaticLibrary_use_api,ulingu,\ + udkapi \ + offapi \ +)) + +ifneq ($(SYSTEM_DICTS),) +$(eval $(call gb_StaticLibrary_add_defs,ulingu,\ + -DSYSTEM_DICTS -DDICT_SYSTEM_DIR=\"$(DICT_SYSTEM_DIR)\" -DHYPH_SYSTEM_DIR=\"$(HYPH_SYSTEM_DIR)\" -DTHES_SYSTEM_DIR=\"$(THES_SYSTEM_DIR)\" \ +)) + +endif + +$(eval $(call gb_StaticLibrary_use_externals,ulingu,\ + boost_headers \ + hunspell \ +)) + +$(eval $(call gb_StaticLibrary_add_exception_objects,ulingu,\ + lingucomponent/source/lingutil/lingutil \ +)) + +# vim: set noet sw=4 ts=4: diff --git a/lingucomponent/config/Linguistic-lingucomponent-hyphenator.xcu b/lingucomponent/config/Linguistic-lingucomponent-hyphenator.xcu new file mode 100644 index 000000000..c42f16313 --- /dev/null +++ b/lingucomponent/config/Linguistic-lingucomponent-hyphenator.xcu @@ -0,0 +1,30 @@ +<?xml version='1.0' encoding='UTF-8'?> +<!-- + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . +--> +<oor:component-data oor:name="Linguistic" oor:package="org.openoffice.Office" xmlns:install="http://openoffice.org/2004/installation" xmlns:oor="http://openoffice.org/2001/registry" xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"> + <node oor:name="ServiceManager"> + <node oor:name="Hyphenators"> + <node oor:name="org.openoffice.lingu.LibHnjHyphenator" oor:op="fuse"> + <prop oor:name="SupportedDictionaryFormats" oor:type="oor:string-list"> + <value>DICT_HYPH</value> + </prop> + </node> + </node> + </node> +</oor:component-data> + diff --git a/lingucomponent/config/Linguistic-lingucomponent-spellchecker.xcu b/lingucomponent/config/Linguistic-lingucomponent-spellchecker.xcu new file mode 100644 index 000000000..8f35b44e5 --- /dev/null +++ b/lingucomponent/config/Linguistic-lingucomponent-spellchecker.xcu @@ -0,0 +1,30 @@ +<?xml version='1.0' encoding='UTF-8'?> +<!-- + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . +--> +<oor:component-data oor:name="Linguistic" oor:package="org.openoffice.Office" xmlns:install="http://openoffice.org/2004/installation" xmlns:oor="http://openoffice.org/2001/registry" xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"> + <node oor:name="ServiceManager"> + <node oor:name="SpellCheckers"> + <node oor:name="org.openoffice.lingu.MySpellSpellChecker" oor:op="fuse"> + <prop oor:name="SupportedDictionaryFormats" oor:type="oor:string-list"> + <value>DICT_SPELL</value> + </prop> + </node> + </node> + </node> +</oor:component-data> + diff --git a/lingucomponent/config/Linguistic-lingucomponent-thesaurus.xcu b/lingucomponent/config/Linguistic-lingucomponent-thesaurus.xcu new file mode 100644 index 000000000..e228e6556 --- /dev/null +++ b/lingucomponent/config/Linguistic-lingucomponent-thesaurus.xcu @@ -0,0 +1,30 @@ +<?xml version='1.0' encoding='UTF-8'?> +<!-- + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . +--> +<oor:component-data oor:name="Linguistic" oor:package="org.openoffice.Office" xmlns:install="http://openoffice.org/2004/installation" xmlns:oor="http://openoffice.org/2001/registry" xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"> + <node oor:name="ServiceManager"> + <node oor:name="Thesauri"> + <node oor:name="org.openoffice.lingu.new.Thesaurus" oor:op="fuse"> + <prop oor:name="SupportedDictionaryFormats" oor:type="oor:string-list"> + <value>DICT_THES</value> + </prop> + </node> + </node> + </node> +</oor:component-data> + diff --git a/lingucomponent/source/hyphenator/hyphen/hyphen.component b/lingucomponent/source/hyphenator/hyphen/hyphen.component new file mode 100644 index 000000000..b9bc8b1f3 --- /dev/null +++ b/lingucomponent/source/hyphenator/hyphen/hyphen.component @@ -0,0 +1,26 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!-- + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + --> + +<component loader="com.sun.star.loader.SharedLibrary" environment="@CPPU_ENV@" + xmlns="http://openoffice.org/2010/uno-components"> + <implementation name="org.openoffice.lingu.LibHnjHyphenator" + constructor="lingucomponent_Hyphenator_get_implementation" single-instance="true"> + <service name="com.sun.star.linguistic2.Hyphenator"/> + </implementation> +</component> diff --git a/lingucomponent/source/hyphenator/hyphen/hyphenimp.cxx b/lingucomponent/source/hyphenator/hyphen/hyphenimp.cxx new file mode 100644 index 000000000..8ac156ef8 --- /dev/null +++ b/lingucomponent/source/hyphenator/hyphen/hyphenimp.cxx @@ -0,0 +1,805 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#include <com/sun/star/uno/Reference.h> + +#include <comphelper/sequence.hxx> +#include <cppuhelper/factory.hxx> +#include <cppuhelper/supportsservice.hxx> +#include <cppuhelper/weak.hxx> +#include <com/sun/star/linguistic2/XLinguProperties.hpp> +#include <i18nlangtag/languagetag.hxx> +#include <tools/debug.hxx> +#include <osl/mutex.hxx> +#include <osl/thread.h> + +#include <hyphen.h> +#include "hyphenimp.hxx" + +#include <linguistic/hyphdta.hxx> +#include <rtl/ustring.hxx> +#include <rtl/ustrbuf.hxx> +#include <rtl/textenc.h> +#include <sal/log.hxx> + +#include <linguistic/misc.hxx> +#include <svtools/strings.hrc> +#include <unotools/charclass.hxx> +#include <unotools/lingucfg.hxx> +#include <unotools/resmgr.hxx> +#include <osl/file.hxx> + +#include <stdio.h> +#include <string.h> + +#include <cassert> +#include <numeric> +#include <vector> +#include <set> +#include <memory> + +using namespace utl; +using namespace osl; +using namespace com::sun::star; +using namespace com::sun::star::beans; +using namespace com::sun::star::lang; +using namespace com::sun::star::uno; +using namespace com::sun::star::linguistic2; +using namespace linguistic; + +Hyphenator::Hyphenator() : + aEvtListeners ( GetLinguMutex() ) +{ + bDisposing = false; +} + +Hyphenator::~Hyphenator() +{ + for (auto & rInfo : mvDicts) + { + if (rInfo.aPtr) + hnj_hyphen_free(rInfo.aPtr); + } + + if (pPropHelper) + { + pPropHelper->RemoveAsPropListener(); + } +} + +PropertyHelper_Hyphenation& Hyphenator::GetPropHelper_Impl() +{ + if (!pPropHelper) + { + Reference< XLinguProperties > xPropSet = GetLinguProperties(); + + pPropHelper.reset( new PropertyHelper_Hyphenation (static_cast<XHyphenator *>(this), xPropSet ) ); + pPropHelper->AddAsPropListener(); //! after a reference is established + } + return *pPropHelper; +} + +Sequence< Locale > SAL_CALL Hyphenator::getLocales() +{ + MutexGuard aGuard( GetLinguMutex() ); + + // this routine should return the locales supported by the installed + // dictionaries. + if (mvDicts.empty()) + { + SvtLinguConfig aLinguCfg; + + // get list of dictionaries-to-use + // (or better speaking: the list of dictionaries using the + // new configuration entries). + std::vector< SvtLinguConfigDictionaryEntry > aDics; + uno::Sequence< OUString > aFormatList; + aLinguCfg.GetSupportedDictionaryFormatsFor( "Hyphenators", + "org.openoffice.lingu.LibHnjHyphenator", aFormatList ); + for (const auto& rFormat : std::as_const(aFormatList)) + { + std::vector< SvtLinguConfigDictionaryEntry > aTmpDic( + aLinguCfg.GetActiveDictionariesByFormat( rFormat ) ); + aDics.insert( aDics.end(), aTmpDic.begin(), aTmpDic.end() ); + } + + //!! for compatibility with old dictionaries (the ones not using extensions + //!! or new configuration entries, but still using the dictionary.lst file) + //!! Get the list of old style spell checking dictionaries to use... + std::vector< SvtLinguConfigDictionaryEntry > aOldStyleDics( + GetOldStyleDics( "HYPH" ) ); + + // to prefer dictionaries with configuration entries we will only + // use those old style dictionaries that add a language that + // is not yet supported by the list of new style dictionaries + MergeNewStyleDicsAndOldStyleDics( aDics, aOldStyleDics ); + + if (!aDics.empty()) + { + // get supported locales from the dictionaries-to-use... + std::set<OUString> aLocaleNamesSet; + for (auto const& dict : aDics) + { + for (const auto& rLocaleName : dict.aLocaleNames) + { + aLocaleNamesSet.insert( rLocaleName ); + } + } + // ... and add them to the resulting sequence + std::vector<Locale> aLocalesVec; + aLocalesVec.reserve(aLocaleNamesSet.size()); + + std::transform(aLocaleNamesSet.begin(), aLocaleNamesSet.end(), std::back_inserter(aLocalesVec), + [](const OUString& localeName) { return LanguageTag::convertToLocale(localeName); }); + + aSuppLocales = comphelper::containerToSequence(aLocalesVec); + + //! For each dictionary and each locale we need a separate entry. + //! If this results in more than one dictionary per locale than (for now) + //! it is undefined which dictionary gets used. + //! In the future the implementation should support using several dictionaries + //! for one locale. + sal_Int32 numdict = std::accumulate(aDics.begin(), aDics.end(), 0, + [](const sal_Int32 nSum, const SvtLinguConfigDictionaryEntry& dict) { + return nSum + dict.aLocaleNames.getLength(); }); + + // add dictionary information + mvDicts.resize(numdict); + + sal_Int32 k = 0; + for (auto const& dict : aDics) + { + if (dict.aLocaleNames.hasElements() && + dict.aLocations.hasElements()) + { + // currently only one language per dictionary is supported in the actual implementation... + // Thus here we work-around this by adding the same dictionary several times. + // Once for each of its supported locales. + for (const auto& rLocaleName : dict.aLocaleNames) + { + LanguageTag aLanguageTag(rLocaleName); + mvDicts[k].aPtr = nullptr; + mvDicts[k].eEnc = RTL_TEXTENCODING_DONTKNOW; + mvDicts[k].aLoc = aLanguageTag.getLocale(); + mvDicts[k].apCC.reset( new CharClass( std::move(aLanguageTag) ) ); + // also both files have to be in the same directory and the + // file names must only differ in the extension (.aff/.dic). + // Thus we use the first location only and strip the extension part. + OUString aLocation = dict.aLocations[0]; + sal_Int32 nPos = aLocation.lastIndexOf( '.' ); + aLocation = aLocation.copy( 0, nPos ); + mvDicts[k].aName = aLocation; + + ++k; + } + } + } + DBG_ASSERT( k == numdict, "index mismatch?" ); + } + else + { + // no dictionary found so register no dictionaries + mvDicts.clear(); + aSuppLocales.realloc(0); + } + } + + return aSuppLocales; +} + +sal_Bool SAL_CALL Hyphenator::hasLocale(const Locale& rLocale) +{ + MutexGuard aGuard( GetLinguMutex() ); + + if (!aSuppLocales.hasElements()) + getLocales(); + + return comphelper::findValue(aSuppLocales, rLocale) != -1; +} + +namespace { +bool LoadDictionary(HDInfo& rDict) +{ + OUString DictFN = rDict.aName + ".dic"; + OUString dictpath; + + osl::FileBase::getSystemPathFromFileURL(DictFN, dictpath); + +#if defined(_WIN32) + // hnj_hyphen_load expects UTF-8 encoded paths with \\?\ long path prefix. + OString sTmp = Win_AddLongPathPrefix(OUStringToOString(dictpath, RTL_TEXTENCODING_UTF8)); +#else + OString sTmp(OU2ENC(dictpath, osl_getThreadTextEncoding())); +#endif + HyphenDict *dict = nullptr; + if ((dict = hnj_hyphen_load(sTmp.getStr())) == nullptr) + { + SAL_WARN( + "lingucomponent", + "Couldn't find file " << dictpath); + return false; + } + rDict.aPtr = dict; + rDict.eEnc = getTextEncodingFromCharset(dict->cset); + return true; +} +} + +Reference< XHyphenatedWord > SAL_CALL Hyphenator::hyphenate( const OUString& aWord, + const css::lang::Locale& aLocale, + sal_Int16 nMaxLeading, + const css::uno::Sequence< css::beans::PropertyValue >& aProperties ) +{ + PropertyHelper_Hyphenation& rHelper = GetPropHelper(); + rHelper.SetTmpPropVals(aProperties); + sal_Int16 minTrail = rHelper.GetMinTrailing(); + sal_Int16 minLead = rHelper.GetMinLeading(); + sal_Int16 minLen = rHelper.GetMinWordLength(); + bool bNoHyphenateCaps = rHelper.IsNoHyphenateCaps(); + + rtl_TextEncoding eEnc = RTL_TEXTENCODING_DONTKNOW; + + Reference< XHyphenatedWord > xRes; + + int k = -1; + for (size_t j = 0; j < mvDicts.size(); ++j) + { + if (aLocale == mvDicts[j].aLoc) + k = j; + } + + // if we have a hyphenation dictionary matching this locale + if (k != -1) + { + int nHyphenationPos = -1; + int nHyphenationPosAlt = -1; + int nHyphenationPosAltHyph = -1; + + // if this dictionary has not been loaded yet do that + if (!mvDicts[k].aPtr) + { + if (!LoadDictionary(mvDicts[k])) + return nullptr; + } + + // otherwise hyphenate the word with that dictionary + HyphenDict *dict = mvDicts[k].aPtr; + eEnc = mvDicts[k].eEnc; + CharClass * pCC = mvDicts[k].apCC.get(); + + // Don't hyphenate uppercase words if requested + if (bNoHyphenateCaps && aWord == makeUpperCase(aWord, pCC)) + { + return nullptr; + } + + // we don't want to work with a default text encoding since following incorrect + // results may occur only for specific text and thus may be hard to notice. + // Thus better always make a clean exit here if the text encoding is in question. + // Hopefully something not working at all will raise proper attention quickly. ;-) + DBG_ASSERT( eEnc != RTL_TEXTENCODING_DONTKNOW, "failed to get text encoding! (maybe incorrect encoding string in file)" ); + if (eEnc == RTL_TEXTENCODING_DONTKNOW) + return nullptr; + + CapType ct = capitalType(aWord, pCC); + + // first convert any smart quotes or apostrophes to normal ones + OUStringBuffer rBuf(aWord); + sal_Int32 nc = rBuf.getLength(); + sal_Unicode ch; + for (sal_Int32 ix=0; ix < nc; ix++) + { + ch = rBuf[ix]; + if ((ch == 0x201C) || (ch == 0x201D)) + rBuf[ix] = u'"'; + if ((ch == 0x2018) || (ch == 0x2019)) + rBuf[ix] = u'\''; + } + OUString nWord(rBuf.makeStringAndClear()); + + // now convert word to all lowercase for pattern recognition + OUString nTerm(makeLowerCase(nWord, pCC)); + + // now convert word to needed encoding + OString encWord(OU2ENC(nTerm,eEnc)); + + int wordlen = encWord.getLength(); + std::unique_ptr<char[]> lcword(new char[wordlen + 1]); + std::unique_ptr<char[]> hyphens(new char[wordlen + 5]); + + char ** rep = nullptr; // replacements of discretionary hyphenation + int * pos = nullptr; // array of [hyphenation point] minus [deletion position] + int * cut = nullptr; // length of deletions in original word + + // copy converted word into simple char buffer + strcpy(lcword.get(),encWord.getStr()); + + // now strip off any ending periods + int n = wordlen-1; + while((n >=0) && (lcword[n] == '.')) + n--; + n++; + if (n > 0) + { + const bool bFailed = 0 != hnj_hyphen_hyphenate3( dict, lcword.get(), n, hyphens.get(), nullptr, + &rep, &pos, &cut, minLead, minTrail, + std::max<sal_Int16>(dict->clhmin, std::max<sal_Int16>(dict->clhmin, 2) + std::max(0, minLead - std::max<sal_Int16>(dict->lhmin, 2))), + std::max<sal_Int16>(dict->crhmin, std::max<sal_Int16>(dict->crhmin, 2) + std::max(0, minTrail - std::max<sal_Int16>(dict->rhmin, 2))) ); + if (bFailed) + { + // whoops something did not work + if (rep) + { + for(int j = 0; j < n; j++) + { + if (rep[j]) free(rep[j]); + } + free(rep); + } + if (pos) free(pos); + if (cut) free(cut); + return nullptr; + } + } + + // now backfill hyphens[] for any removed trailing periods + for (int c = n; c < wordlen; c++) hyphens[c] = '0'; + hyphens[wordlen] = '\0'; + + sal_Int32 Leading = GetPosInWordToCheck( aWord, nMaxLeading ); + + for (sal_Int32 i = 0; i < n; i++) + { + int leftrep = 0; + bool hit = (n >= minLen); + if (!rep || !rep[i]) + { + hit = hit && (hyphens[i]&1) && (i < Leading); + hit = hit && (i >= (minLead-1) ); + hit = hit && ((n - i - 1) >= minTrail); + } + else + { + // calculate change character length before hyphenation point signed with '=' + for (char * c = rep[i]; *c && (*c != '='); c++) + { + if (eEnc == RTL_TEXTENCODING_UTF8) + { + if (static_cast<unsigned char>(*c) >> 6 != 2) + leftrep++; + } + else + leftrep++; + } + hit = hit && (hyphens[i]&1) && ((i + leftrep - pos[i]) < Leading); + hit = hit && ((i + leftrep - pos[i]) >= (minLead-1) ); + hit = hit && ((n - i - 1 + sal::static_int_cast< sal_sSize >(strlen(rep[i])) - leftrep - 1) >= minTrail); + } + if (hit) + { + nHyphenationPos = i; + if (rep && rep[i]) + { + nHyphenationPosAlt = i - pos[i]; + nHyphenationPosAltHyph = i + leftrep - pos[i]; + } + } + } + + if (nHyphenationPos == -1) + { + xRes = nullptr; + } + else + { + if (rep && rep[nHyphenationPos]) + { + // remove equal sign + char * s = rep[nHyphenationPos]; + int eq = 0; + for (; *s; s++) + { + if (*s == '=') eq = 1; + if (eq) *s = *(s + 1); + } + OUString repHyphlow(rep[nHyphenationPos], strlen(rep[nHyphenationPos]), eEnc); + OUString repHyph; + switch (ct) + { + case CapType::ALLCAP: + { + repHyph = makeUpperCase(repHyphlow, pCC); + break; + } + case CapType::INITCAP: + { + if (nHyphenationPosAlt == -1) + repHyph = makeInitCap(repHyphlow, pCC); + else + repHyph = repHyphlow; + break; + } + default: + { + repHyph = repHyphlow; + break; + } + } + + // handle shortening + sal_Int16 nPos = static_cast<sal_Int16>((nHyphenationPosAltHyph < nHyphenationPos) ? + nHyphenationPosAltHyph : nHyphenationPos); + // discretionary hyphenation + xRes = HyphenatedWord::CreateHyphenatedWord( aWord, LinguLocaleToLanguage( aLocale ), nPos, + aWord.replaceAt(nHyphenationPosAlt + 1, cut[nHyphenationPos], repHyph), + static_cast<sal_Int16>(nHyphenationPosAltHyph)); + } + else + { + xRes = HyphenatedWord::CreateHyphenatedWord( aWord, LinguLocaleToLanguage( aLocale ), + static_cast<sal_Int16>(nHyphenationPos), aWord, static_cast<sal_Int16>(nHyphenationPos)); + } + } + + if (rep) + { + for(int j = 0; j < n; j++) + { + if (rep[j]) free(rep[j]); + } + free(rep); + } + if (pos) free(pos); + if (cut) free(cut); + return xRes; + } + return nullptr; +} + +Reference < XHyphenatedWord > SAL_CALL Hyphenator::queryAlternativeSpelling( + const OUString& aWord, + const css::lang::Locale& aLocale, + sal_Int16 nIndex, + const css::uno::Sequence< css::beans::PropertyValue >& aProperties ) +{ + // Firstly we allow only one plus character before the hyphen to avoid to miss the right break point: + for (int extrachar = 1; extrachar <= 2; extrachar++) + { + Reference< XHyphenatedWord > xRes = hyphenate(aWord, aLocale, nIndex + 1 + extrachar, aProperties); + if (xRes.is() && xRes->isAlternativeSpelling() && xRes->getHyphenationPos() == nIndex) + return xRes; + } + return nullptr; +} + +Reference< XPossibleHyphens > SAL_CALL Hyphenator::createPossibleHyphens( const OUString& aWord, + const css::lang::Locale& aLocale, + const css::uno::Sequence< css::beans::PropertyValue >& aProperties ) +{ + PropertyHelper_Hyphenation& rHelper = GetPropHelper(); + rHelper.SetTmpPropVals(aProperties); + sal_Int16 minTrail = rHelper.GetMinTrailing(); + sal_Int16 minLead = rHelper.GetMinLeading(); + sal_Int16 minLen = rHelper.GetMinWordLength(); + + // Resolves: fdo#41083 honour MinWordLength in "createPossibleHyphens" as + // well as "hyphenate" + if (aWord.getLength() < minLen) + { + return PossibleHyphens::CreatePossibleHyphens( aWord, LinguLocaleToLanguage( aLocale ), + aWord, Sequence< sal_Int16 >() ); + } + + int k = -1; + for (size_t j = 0; j < mvDicts.size(); ++j) + { + if (aLocale == mvDicts[j].aLoc) + k = j; + } + + // if we have a hyphenation dictionary matching this locale + if (k != -1) + { + HyphenDict *dict = nullptr; + // if this dictionary has not been loaded yet do that + if (!mvDicts[k].aPtr) + { + if (!LoadDictionary(mvDicts[k])) + return nullptr; + } + + // otherwise hyphenate the word with that dictionary + dict = mvDicts[k].aPtr; + rtl_TextEncoding eEnc = mvDicts[k].eEnc; + CharClass* pCC = mvDicts[k].apCC.get(); + + // we don't want to work with a default text encoding since following incorrect + // results may occur only for specific text and thus may be hard to notice. + // Thus better always make a clean exit here if the text encoding is in question. + // Hopefully something not working at all will raise proper attention quickly. ;-) + DBG_ASSERT( eEnc != RTL_TEXTENCODING_DONTKNOW, "failed to get text encoding! (maybe incorrect encoding string in file)" ); + if (eEnc == RTL_TEXTENCODING_DONTKNOW) + return nullptr; + + // first handle smart quotes both single and double + OUStringBuffer rBuf(aWord); + sal_Int32 nc = rBuf.getLength(); + sal_Unicode ch; + for (sal_Int32 ix=0; ix < nc; ix++) + { + ch = rBuf[ix]; + if ((ch == 0x201C) || (ch == 0x201D)) + rBuf[ix] = u'"'; + if ((ch == 0x2018) || (ch == 0x2019)) + rBuf[ix] = u'\''; + } + OUString nWord(rBuf.makeStringAndClear()); + + // now convert word to all lowercase for pattern recognition + OUString nTerm(makeLowerCase(nWord, pCC)); + + // now convert word to needed encoding + OString encWord(OU2ENC(nTerm,eEnc)); + + sal_Int32 wordlen = encWord.getLength(); + std::unique_ptr<char[]> lcword(new char[wordlen+1]); + std::unique_ptr<char[]> hyphens(new char[wordlen+5]); + char ** rep = nullptr; // replacements of discretionary hyphenation + int * pos = nullptr; // array of [hyphenation point] minus [deletion position] + int * cut = nullptr; // length of deletions in original word + + // copy converted word into simple char buffer + strcpy(lcword.get(),encWord.getStr()); + + // first remove any trailing periods + sal_Int32 n = wordlen-1; + while((n >=0) && (lcword[n] == '.')) + n--; + n++; + if (n > 0) + { + const bool bFailed = 0 != hnj_hyphen_hyphenate3(dict, lcword.get(), n, hyphens.get(), nullptr, + &rep, &pos, &cut, minLead, minTrail, + std::max<sal_Int16>(dict->clhmin, std::max<sal_Int16>(dict->clhmin, 2) + std::max(0, minLead - std::max<sal_Int16>(dict->lhmin, 2))), + std::max<sal_Int16>(dict->crhmin, std::max<sal_Int16>(dict->crhmin, 2) + std::max(0, minTrail - std::max<sal_Int16>(dict->rhmin, 2))) ); + if (bFailed) + { + if (rep) + { + for(int j = 0; j < n; j++) + { + if (rep[j]) free(rep[j]); + } + free(rep); + } + if (pos) free(pos); + if (cut) free(cut); + + return nullptr; + } + } + // now backfill hyphens[] for any removed periods + for (sal_Int32 c = n; c < wordlen; c++) + hyphens[c] = '0'; + hyphens[wordlen] = '\0'; + + sal_Int32 nHyphCount = 0; + + for ( sal_Int32 i = 0; i < encWord.getLength(); i++) + { + if (hyphens[i]&1) + nHyphCount++; + } + + Sequence< sal_Int16 > aHyphPos(nHyphCount); + sal_Int16 *pPos = aHyphPos.getArray(); + OUStringBuffer hyphenatedWordBuffer; + nHyphCount = 0; + + for (sal_Int32 i = 0; i < nWord.getLength(); i++) + { + hyphenatedWordBuffer.append(aWord[i]); + // hyphenation position + if (hyphens[i]&1) + { + // linguistic::PossibleHyphens is stuck with + // css::uno::Sequence<sal_Int16> because of + // css.linguistic2.XPossibleHyphens.getHyphenationPositions, so + // any further positions need to be ignored: + assert(i >= SAL_MIN_INT16); + if (i > SAL_MAX_INT16) + { + SAL_WARN( + "lingucomponent", + "hyphen pos " << i << " > SAL_MAX_INT16 in \"" << aWord + << "\""); + continue; + } + pPos[nHyphCount] = i; + hyphenatedWordBuffer.append('='); + nHyphCount++; + } + } + + OUString hyphenatedWord = hyphenatedWordBuffer.makeStringAndClear(); + + Reference< XPossibleHyphens > xRes = PossibleHyphens::CreatePossibleHyphens( + aWord, LinguLocaleToLanguage( aLocale ), hyphenatedWord, aHyphPos); + + if (rep) + { + for(int j = 0; j < n; j++) + { + if (rep[j]) free(rep[j]); + } + free(rep); + } + if (pos) free(pos); + if (cut) free(cut); + + return xRes; + } + + return nullptr; +} + +OUString Hyphenator::makeLowerCase(const OUString& aTerm, CharClass const * pCC) +{ + if (pCC) + return pCC->lowercase(aTerm); + return aTerm; +} + +OUString Hyphenator::makeUpperCase(const OUString& aTerm, CharClass const * pCC) +{ + if (pCC) + return pCC->uppercase(aTerm); + return aTerm; +} + +OUString Hyphenator::makeInitCap(const OUString& aTerm, CharClass const * pCC) +{ + sal_Int32 tlen = aTerm.getLength(); + if (pCC && tlen) + { + OUString bTemp = aTerm.copy(0,1); + if (tlen > 1) + return ( pCC->uppercase(bTemp, 0, 1) + pCC->lowercase(aTerm,1,(tlen-1)) ); + + return pCC->uppercase(bTemp, 0, 1); + } + return aTerm; +} + +sal_Bool SAL_CALL Hyphenator::addLinguServiceEventListener( + const Reference< XLinguServiceEventListener >& rxLstnr ) +{ + MutexGuard aGuard( GetLinguMutex() ); + + bool bRes = false; + if (!bDisposing && rxLstnr.is()) + { + bRes = GetPropHelper().addLinguServiceEventListener( rxLstnr ); + } + return bRes; +} + +sal_Bool SAL_CALL Hyphenator::removeLinguServiceEventListener( + const Reference< XLinguServiceEventListener >& rxLstnr ) +{ + MutexGuard aGuard( GetLinguMutex() ); + + bool bRes = false; + if (!bDisposing && rxLstnr.is()) + { + bRes = GetPropHelper().removeLinguServiceEventListener( rxLstnr ); + } + return bRes; +} + +OUString SAL_CALL Hyphenator::getServiceDisplayName(const Locale& rLocale) +{ + std::locale loc(Translate::Create("svt", LanguageTag(rLocale))); + return Translate::get(STR_DESCRIPTION_LIBHYPHEN, loc); +} + +void SAL_CALL Hyphenator::initialize( const Sequence< Any >& rArguments ) +{ + MutexGuard aGuard( GetLinguMutex() ); + + if (pPropHelper) + return; + + sal_Int32 nLen = rArguments.getLength(); + if (2 == nLen) + { + Reference< XLinguProperties > xPropSet; + rArguments.getConstArray()[0] >>= xPropSet; + // rArguments.getConstArray()[1] >>= xDicList; + + //! Pointer allows for access of the non-UNO functions. + //! And the reference to the UNO-functions while increasing + //! the ref-count and will implicitly free the memory + //! when the object is no longer used. + pPropHelper.reset( new PropertyHelper_Hyphenation( static_cast<XHyphenator *>(this), xPropSet ) ); + pPropHelper->AddAsPropListener(); //! after a reference is established + } + else { + OSL_FAIL( "wrong number of arguments in sequence" ); + } +} + +void SAL_CALL Hyphenator::dispose() +{ + MutexGuard aGuard( GetLinguMutex() ); + + if (!bDisposing) + { + bDisposing = true; + EventObject aEvtObj( static_cast<XHyphenator *>(this) ); + aEvtListeners.disposeAndClear( aEvtObj ); + if (pPropHelper) + { + pPropHelper->RemoveAsPropListener(); + pPropHelper.reset(); + } + } +} + +void SAL_CALL Hyphenator::addEventListener( const Reference< XEventListener >& rxListener ) +{ + MutexGuard aGuard( GetLinguMutex() ); + + if (!bDisposing && rxListener.is()) + aEvtListeners.addInterface( rxListener ); +} + +void SAL_CALL Hyphenator::removeEventListener( const Reference< XEventListener >& rxListener ) +{ + MutexGuard aGuard( GetLinguMutex() ); + + if (!bDisposing && rxListener.is()) + aEvtListeners.removeInterface( rxListener ); +} + +// Service specific part +OUString SAL_CALL Hyphenator::getImplementationName() +{ + return "org.openoffice.lingu.LibHnjHyphenator"; +} + +sal_Bool SAL_CALL Hyphenator::supportsService( const OUString& ServiceName ) +{ + return cppu::supportsService(this, ServiceName); +} + +Sequence< OUString > SAL_CALL Hyphenator::getSupportedServiceNames() +{ + return { SN_HYPHENATOR }; +} + +extern "C" SAL_DLLPUBLIC_EXPORT css::uno::XInterface* +lingucomponent_Hyphenator_get_implementation( + css::uno::XComponentContext* , css::uno::Sequence<css::uno::Any> const&) +{ + return cppu::acquire(new Hyphenator()); +} + + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/lingucomponent/source/hyphenator/hyphen/hyphenimp.hxx b/lingucomponent/source/hyphenator/hyphen/hyphenimp.hxx new file mode 100644 index 000000000..45ebca112 --- /dev/null +++ b/lingucomponent/source/hyphenator/hyphen/hyphenimp.hxx @@ -0,0 +1,126 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#ifndef INCLUDED_LINGUCOMPONENT_SOURCE_HYPHENATOR_HYPHEN_HYPHENIMP_HXX +#define INCLUDED_LINGUCOMPONENT_SOURCE_HYPHENATOR_HYPHEN_HYPHENIMP_HXX + +#include <comphelper/interfacecontainer3.hxx> +#include <cppuhelper/implbase.hxx> +#include <com/sun/star/lang/XComponent.hpp> +#include <com/sun/star/lang/XInitialization.hpp> +#include <com/sun/star/lang/XServiceDisplayName.hpp> +#include <com/sun/star/beans/XPropertySet.hpp> +#include <com/sun/star/beans/PropertyValues.hpp> +#include <com/sun/star/lang/XServiceInfo.hpp> +#include <com/sun/star/linguistic2/XHyphenator.hpp> +#include <com/sun/star/linguistic2/XLinguServiceEventBroadcaster.hpp> + +#include <unotools/charclass.hxx> + +#include <linguistic/misc.hxx> +#include <linguistic/lngprophelp.hxx> + +#include <lingutil.hxx> +#include <stdio.h> + +#include <hyphen.h> + +using namespace ::com::sun::star::uno; +using namespace ::com::sun::star::beans; +using namespace ::com::sun::star::lang; +using namespace ::com::sun::star::linguistic2; + +struct HDInfo { + HyphenDict * aPtr; + OUString aName; + Locale aLoc; + rtl_TextEncoding eEnc; + std::unique_ptr<CharClass> apCC; +}; + +class Hyphenator : + public cppu::WeakImplHelper + < + XHyphenator, + XLinguServiceEventBroadcaster, + XInitialization, + XComponent, + XServiceInfo, + XServiceDisplayName + > +{ + Sequence< Locale > aSuppLocales; + std::vector< HDInfo > mvDicts; + + ::comphelper::OInterfaceContainerHelper3<XEventListener> aEvtListeners; + std::unique_ptr<linguistic::PropertyHelper_Hyphenation> pPropHelper; + bool bDisposing; + + Hyphenator(const Hyphenator &) = delete; + Hyphenator & operator = (const Hyphenator &) = delete; + + linguistic::PropertyHelper_Hyphenation& GetPropHelper_Impl(); + linguistic::PropertyHelper_Hyphenation& GetPropHelper() + { + return pPropHelper ? *pPropHelper : GetPropHelper_Impl(); + } + +public: + Hyphenator(); + + virtual ~Hyphenator() override; + + // XSupportedLocales (for XHyphenator) + virtual Sequence< Locale > SAL_CALL getLocales() override; + virtual sal_Bool SAL_CALL hasLocale( const Locale& rLocale ) override; + + // XHyphenator + virtual css::uno::Reference< css::linguistic2::XHyphenatedWord > SAL_CALL hyphenate( const OUString& aWord, const css::lang::Locale& aLocale, sal_Int16 nMaxLeading, const css::uno::Sequence< css::beans::PropertyValue >& aProperties ) override; + virtual css::uno::Reference< css::linguistic2::XHyphenatedWord > SAL_CALL queryAlternativeSpelling( const OUString& aWord, const css::lang::Locale& aLocale, sal_Int16 nIndex, const css::uno::Sequence< css::beans::PropertyValue >& aProperties ) override; + virtual css::uno::Reference< css::linguistic2::XPossibleHyphens > SAL_CALL createPossibleHyphens( const OUString& aWord, const css::lang::Locale& aLocale, const css::uno::Sequence< css::beans::PropertyValue >& aProperties ) override; + + // XLinguServiceEventBroadcaster + virtual sal_Bool SAL_CALL addLinguServiceEventListener( const Reference< XLinguServiceEventListener >& rxLstnr ) override; + virtual sal_Bool SAL_CALL removeLinguServiceEventListener( const Reference< XLinguServiceEventListener >& rxLstnr ) override; + + // XServiceDisplayName + virtual OUString SAL_CALL getServiceDisplayName( const Locale& rLocale ) override; + + // XInitialization + virtual void SAL_CALL initialize( const Sequence< Any >& rArguments ) override; + + // XComponent + virtual void SAL_CALL dispose() override; + virtual void SAL_CALL addEventListener( const Reference< XEventListener >& rxListener ) override; + virtual void SAL_CALL removeEventListener( const Reference< XEventListener >& rxListener ) override; + + // XServiceInfo + virtual OUString SAL_CALL getImplementationName() override; + virtual sal_Bool SAL_CALL supportsService( const OUString& rServiceName ) override; + virtual Sequence< OUString > SAL_CALL getSupportedServiceNames() override; + +private: + static OUString makeLowerCase(const OUString&, CharClass const *); + static OUString makeUpperCase(const OUString&, CharClass const *); + static OUString makeInitCap(const OUString&, CharClass const *); +}; + +#endif + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/lingucomponent/source/languageguessing/guess.cxx b/lingucomponent/source/languageguessing/guess.cxx new file mode 100644 index 000000000..a7cbeccab --- /dev/null +++ b/lingucomponent/source/languageguessing/guess.cxx @@ -0,0 +1,100 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#include <sal/config.h> + +#include <cassert> +#include <string.h> + +#ifdef SYSTEM_LIBEXTTEXTCAT +#include <libexttextcat/textcat.h> +#else +#include <textcat.h> +#endif + +#include "guess.hxx" + +/* Old textcat.h versions defined bad spelled constants. */ +#ifndef TEXTCAT_RESULT_UNKNOWN_STR +#define TEXTCAT_RESULT_UNKNOWN_STR _TEXTCAT_RESULT_UNKOWN +#endif + +#ifndef TEXTCAT_RESULT_SHORT_STR +#define TEXTCAT_RESULT_SHORT_STR _TEXTCAT_RESULT_SHORT +#endif + +Guess::Guess() + : language_str(DEFAULT_LANGUAGE) + , country_str(DEFAULT_COUNTRY) +{ +} + +/* +* this use a char * string to build the guess object +* a string like those is made as : [language-country-encoding]... +* +*/ +Guess::Guess(const char * guess_str) + : language_str(DEFAULT_LANGUAGE) + , country_str(DEFAULT_COUNTRY) +{ + //if the guess is not like "UNKNOWN" or "SHORT", go into the brackets + if(strcmp(guess_str + 1, TEXTCAT_RESULT_UNKNOWN_STR) == 0 + || strcmp(guess_str + 1, TEXTCAT_RESULT_SHORT_STR) == 0) + return; + + // From how this ctor is called from SimpleGuesser::GuessLanguage and + // SimpleGuesser::GetManagedLanguages in + // lingucomponent/source/languageguessing/simpleguesser.cxx, guess_str must start with "[": + assert(guess_str[0] == GUESS_SEPARATOR_OPEN); + auto const start = guess_str + 1; + // Only look at the prefix of guess_str, delimited by the next "]" or "[" or end-of-string; + // split it into at most three segments separated by "-" (where excess occurrences of "-" + // would become part of the third segment), like "en-US-utf8"; the first segment denotes the + // language; if there are three segments, the second denotes the country and the third the + // encoding; otherwise, the second segment, if any (e.g., in "haw-utf8"), denotes the + // encoding: + char const * dash1 = nullptr; + char const * dash2 = nullptr; + auto p = start; + for (;; ++p) { + auto const c = *p; + if (c == '\0' || c == GUESS_SEPARATOR_OPEN || c == GUESS_SEPARATOR_CLOSE) { + break; + } + if (c == GUESS_SEPARATOR_SEP) { + if (dash1 == nullptr) { + dash1 = p; + } else { + dash2 = p; + // The encoding is ignored, so we can stop as soon as we found the second "-": + break; + } + } + } + auto const langLen = (dash1 == nullptr ? p : dash1) - start; + if (langLen != 0) { // if not we use the default value + language_str.assign(start, langLen); + } + if (dash2 != nullptr) { + country_str.assign(dash1 + 1, dash2 - (dash1 + 1)); + } +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/lingucomponent/source/languageguessing/guess.hxx b/lingucomponent/source/languageguessing/guess.hxx new file mode 100644 index 000000000..627033d3a --- /dev/null +++ b/lingucomponent/source/languageguessing/guess.hxx @@ -0,0 +1,54 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ +#ifndef INCLUDED_LINGUCOMPONENT_SOURCE_LANGUAGEGUESSING_GUESS_HXX +#define INCLUDED_LINGUCOMPONENT_SOURCE_LANGUAGEGUESSING_GUESS_HXX + +#define GUESS_SEPARATOR_OPEN '[' +#define GUESS_SEPARATOR_CLOSE ']' +#define GUESS_SEPARATOR_SEP '-' +#define DEFAULT_LANGUAGE "" +#define DEFAULT_COUNTRY "" +#define DEFAULT_ENCODING "" + +#include <string> + +class Guess final { + public: + + /** + * Default init + */ + Guess(); + + /** + * Init from a string like [en-UK-utf8] and the rank + */ + Guess(const char * guess_str); + + const std::string& GetLanguage() const { return language_str;} + const std::string& GetCountry() const { return country_str;} + + private: + std::string language_str; + std::string country_str; +}; + +#endif + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/lingucomponent/source/languageguessing/guesslang.component b/lingucomponent/source/languageguessing/guesslang.component new file mode 100644 index 000000000..75f6e7ce2 --- /dev/null +++ b/lingucomponent/source/languageguessing/guesslang.component @@ -0,0 +1,26 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!-- + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + --> + +<component loader="com.sun.star.loader.SharedLibrary" environment="@CPPU_ENV@" + xmlns="http://openoffice.org/2010/uno-components"> + <implementation name="com.sun.star.lingu2.LanguageGuessing" + constructor="lingucomponent_LangGuess_get_implementation"> + <service name="com.sun.star.linguistic2.LanguageGuessing"/> + </implementation> +</component> diff --git a/lingucomponent/source/languageguessing/guesslang.cxx b/lingucomponent/source/languageguessing/guesslang.cxx new file mode 100644 index 000000000..d6d5803a5 --- /dev/null +++ b/lingucomponent/source/languageguessing/guesslang.cxx @@ -0,0 +1,321 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#include <iostream> +#include <mutex> +#include <string_view> + +#include <osl/file.hxx> +#include <tools/debug.hxx> + +#include <sal/config.h> +#include <cppuhelper/factory.hxx> +#include <cppuhelper/implbase.hxx> +#include <cppuhelper/supportsservice.hxx> + +#include "simpleguesser.hxx" +#include "guess.hxx" + +#include <com/sun/star/lang/IllegalArgumentException.hpp> +#include <com/sun/star/lang/XServiceInfo.hpp> +#include <com/sun/star/linguistic2/XLanguageGuessing.hpp> +#include <unotools/pathoptions.hxx> +#include <osl/thread.h> + +#include <sal/macros.h> + +#ifdef SYSTEM_LIBEXTTEXTCAT +#include <libexttextcat/textcat.h> +#else +#include <textcat.h> +#endif + +using namespace ::std; +using namespace ::osl; +using namespace ::cppu; +using namespace ::com::sun::star; +using namespace ::com::sun::star::uno; +using namespace ::com::sun::star::lang; +using namespace ::com::sun::star::linguistic2; + +static std::mutex & GetLangGuessMutex() +{ + static std::mutex aMutex; + return aMutex; +} + +namespace { + +class LangGuess_Impl : + public ::cppu::WeakImplHelper< + XLanguageGuessing, + XServiceInfo > +{ + SimpleGuesser m_aGuesser; + bool m_bInitialized; + + virtual ~LangGuess_Impl() override {} + void EnsureInitialized(); + +public: + LangGuess_Impl(); + LangGuess_Impl(const LangGuess_Impl&) = delete; + LangGuess_Impl& operator=(const LangGuess_Impl&) = delete; + + // XServiceInfo implementation + virtual OUString SAL_CALL getImplementationName( ) override; + virtual sal_Bool SAL_CALL supportsService( const OUString& ServiceName ) override; + virtual Sequence< OUString > SAL_CALL getSupportedServiceNames( ) override; + + // XLanguageGuessing implementation + virtual css::lang::Locale SAL_CALL guessPrimaryLanguage( const OUString& aText, ::sal_Int32 nStartPos, ::sal_Int32 nLen ) override; + virtual void SAL_CALL disableLanguages( const css::uno::Sequence< css::lang::Locale >& aLanguages ) override; + virtual void SAL_CALL enableLanguages( const css::uno::Sequence< css::lang::Locale >& aLanguages ) override; + virtual css::uno::Sequence< css::lang::Locale > SAL_CALL getAvailableLanguages( ) override; + virtual css::uno::Sequence< css::lang::Locale > SAL_CALL getEnabledLanguages( ) override; + virtual css::uno::Sequence< css::lang::Locale > SAL_CALL getDisabledLanguages( ) override; + + // implementation specific + /// @throws RuntimeException + void SetFingerPrintsDB( std::u16string_view fileName ); +}; + +} + +LangGuess_Impl::LangGuess_Impl() : + m_bInitialized( false ) +{ +} + +void LangGuess_Impl::EnsureInitialized() +{ + if (m_bInitialized) + return; + + // set this to true at the very start to prevent loops because of + // implicitly called functions below + m_bInitialized = true; + + // set default fingerprint path to where those get installed + OUString aPhysPath; + OUString aURL( SvtPathOptions().GetFingerprintPath() ); + osl::FileBase::getSystemPathFromFileURL( aURL, aPhysPath ); +#ifdef _WIN32 + aPhysPath += "\\"; +#else + aPhysPath += "/"; +#endif + + SetFingerPrintsDB( aPhysPath ); + +#if !defined(EXTTEXTCAT_VERSION_MAJOR) + + // disable currently not functional languages... + struct LangCountry + { + const char *pLang; + const char *pCountry; + }; + LangCountry aDisable[] = + { + // not functional in modified libtextcat, but fixed in >= libexttextcat 3.1.0 + // which is the first with EXTTEXTCAT_VERSION_MAJOR defined + {"sco", ""}, {"zh", "CN"}, {"zh", "TW"}, {"ja", ""}, {"ko", ""}, + {"ka", ""}, {"hi", ""}, {"mr", ""}, {"ne", ""}, {"sa", ""}, + {"ta", ""}, {"th", ""}, {"qu", ""}, {"yi", ""} + }; + sal_Int32 nNum = SAL_N_ELEMENTS(aDisable); + Sequence< Locale > aDisableSeq( nNum ); + Locale *pDisableSeq = aDisableSeq.getArray(); + for (sal_Int32 i = 0; i < nNum; ++i) + { + Locale aLocale; + aLocale.Language = OUString::createFromAscii( aDisable[i].pLang ); + aLocale.Country = OUString::createFromAscii( aDisable[i].pCountry ); + pDisableSeq[i] = aLocale; + } + disableLanguages( aDisableSeq ); + DBG_ASSERT( nNum == getDisabledLanguages().getLength(), "size mismatch" ); +#endif +} + +Locale SAL_CALL LangGuess_Impl::guessPrimaryLanguage( + const OUString& rText, + ::sal_Int32 nStartPos, + ::sal_Int32 nLen ) +{ + std::scoped_lock aGuard( GetLangGuessMutex() ); + + EnsureInitialized(); + + if (nStartPos < 0 || nLen < 0 || nStartPos + nLen > rText.getLength()) + throw lang::IllegalArgumentException(); + + OString o( OUStringToOString( rText.subView(nStartPos, nLen), RTL_TEXTENCODING_UTF8 ) ); + Guess g = m_aGuesser.GuessPrimaryLanguage(o.getStr()); + lang::Locale aRes; + aRes.Language = OUString::createFromAscii( g.GetLanguage().c_str() ); + aRes.Country = OUString::createFromAscii( g.GetCountry().c_str() ); + return aRes; +} + +#define DEFAULT_CONF_FILE_NAME "fpdb.conf" + +void LangGuess_Impl::SetFingerPrintsDB( + std::u16string_view filePath ) +{ + //! text encoding for file name / path needs to be in the same encoding the OS uses + OString path = OUStringToOString( filePath, osl_getThreadTextEncoding() ); + OString conf_file_path = path + DEFAULT_CONF_FILE_NAME; + + m_aGuesser.SetDBPath(conf_file_path.getStr(), path.getStr()); +} + +uno::Sequence< Locale > SAL_CALL LangGuess_Impl::getAvailableLanguages( ) +{ + std::scoped_lock aGuard( GetLangGuessMutex() ); + + EnsureInitialized(); + + Sequence< css::lang::Locale > aRes; + vector<Guess> gs = m_aGuesser.GetAllManagedLanguages(); + aRes.realloc(gs.size()); + + css::lang::Locale *pRes = aRes.getArray(); + + for(size_t i = 0; i < gs.size() ; i++ ){ + css::lang::Locale current_aRes; + current_aRes.Language = OUString::createFromAscii( gs[i].GetLanguage().c_str() ); + current_aRes.Country = OUString::createFromAscii( gs[i].GetCountry().c_str() ); + pRes[i] = current_aRes; + } + + return aRes; +} + +uno::Sequence< Locale > SAL_CALL LangGuess_Impl::getEnabledLanguages( ) +{ + std::scoped_lock aGuard( GetLangGuessMutex() ); + + EnsureInitialized(); + + Sequence< css::lang::Locale > aRes; + vector<Guess> gs = m_aGuesser.GetAvailableLanguages(); + aRes.realloc(gs.size()); + + css::lang::Locale *pRes = aRes.getArray(); + + for(size_t i = 0; i < gs.size() ; i++ ){ + css::lang::Locale current_aRes; + current_aRes.Language = OUString::createFromAscii( gs[i].GetLanguage().c_str() ); + current_aRes.Country = OUString::createFromAscii( gs[i].GetCountry().c_str() ); + pRes[i] = current_aRes; + } + + return aRes; +} + +uno::Sequence< Locale > SAL_CALL LangGuess_Impl::getDisabledLanguages( ) +{ + std::scoped_lock aGuard( GetLangGuessMutex() ); + + EnsureInitialized(); + + Sequence< css::lang::Locale > aRes; + vector<Guess> gs = m_aGuesser.GetUnavailableLanguages(); + aRes.realloc(gs.size()); + + css::lang::Locale *pRes = aRes.getArray(); + + for(size_t i = 0; i < gs.size() ; i++ ){ + css::lang::Locale current_aRes; + current_aRes.Language = OUString::createFromAscii( gs[i].GetLanguage().c_str() ); + current_aRes.Country = OUString::createFromAscii( gs[i].GetCountry().c_str() ); + pRes[i] = current_aRes; + } + + return aRes; +} + +void SAL_CALL LangGuess_Impl::disableLanguages( + const uno::Sequence< Locale >& rLanguages ) +{ + std::scoped_lock aGuard( GetLangGuessMutex() ); + + EnsureInitialized(); + + for (const Locale& rLanguage : rLanguages) + { + string language; + + OString l = OUStringToOString( rLanguage.Language, RTL_TEXTENCODING_ASCII_US ); + OString c = OUStringToOString( rLanguage.Country, RTL_TEXTENCODING_ASCII_US ); + + language += l.getStr(); + language += "-"; + language += c.getStr(); + m_aGuesser.DisableLanguage(language); + } +} + +void SAL_CALL LangGuess_Impl::enableLanguages( + const uno::Sequence< Locale >& rLanguages ) +{ + std::scoped_lock aGuard( GetLangGuessMutex() ); + + EnsureInitialized(); + + for (const Locale& rLanguage : rLanguages) + { + string language; + + OString l = OUStringToOString( rLanguage.Language, RTL_TEXTENCODING_ASCII_US ); + OString c = OUStringToOString( rLanguage.Country, RTL_TEXTENCODING_ASCII_US ); + + language += l.getStr(); + language += "-"; + language += c.getStr(); + m_aGuesser.EnableLanguage(language); + } +} + +OUString SAL_CALL LangGuess_Impl::getImplementationName( ) +{ + return "com.sun.star.lingu2.LanguageGuessing"; +} + +sal_Bool SAL_CALL LangGuess_Impl::supportsService( const OUString& ServiceName ) +{ + return cppu::supportsService(this, ServiceName); +} + +Sequence<OUString> SAL_CALL LangGuess_Impl::getSupportedServiceNames( ) +{ + return { "com.sun.star.linguistic2.LanguageGuessing" }; +} + +extern "C" SAL_DLLPUBLIC_EXPORT css::uno::XInterface* +lingucomponent_LangGuess_get_implementation( + css::uno::XComponentContext* , css::uno::Sequence<css::uno::Any> const&) +{ + return cppu::acquire(new LangGuess_Impl()); +} + + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/lingucomponent/source/languageguessing/simpleguesser.cxx b/lingucomponent/source/languageguessing/simpleguesser.cxx new file mode 100644 index 000000000..7210b1f45 --- /dev/null +++ b/lingucomponent/source/languageguessing/simpleguesser.cxx @@ -0,0 +1,221 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + + /** + * + * + * + * + * TODO + * - Add exception throwing when h == NULL + * - Not init h when implicit constructor is launched + */ + +#include <string.h> + +#ifdef SYSTEM_LIBEXTTEXTCAT +#include <libexttextcat/textcat.h> +#include <libexttextcat/common.h> +#include <libexttextcat/constants.h> +#include <libexttextcat/fingerprint.h> +#else +#include <textcat.h> +#include <common.h> +#include <constants.h> +#include <fingerprint.h> +#endif + +#include <sal/types.h> + +#include<rtl/character.hxx> +#include "simpleguesser.hxx" + +static int startsAsciiCaseInsensitive(const std::string &s1, const std::string &s2){ + size_t i; + int ret = 0; + + size_t min = s1.length(); + if (min > s2.length()) + min = s2.length(); + + for(i = 0; i < min && s2[i] && s1[i] && !ret; i++){ + ret = rtl::toAsciiUpperCase(static_cast<unsigned char>(s1[i])) + - rtl::toAsciiUpperCase(static_cast<unsigned char>(s2[i])); + if(s1[i] == '.' || s2[i] == '.') {ret = 0;} //. is a neutral character + } + return ret; + } + +namespace { + +/** + * This following structure is from textcat.c + */ +typedef struct textcat_t{ + + void **fprint; + char *fprint_disable; + uint4 size; + uint4 maxsize; + + char output[MAXOUTPUTSIZE]; + +} textcat_t; +// end of the 3 structs + +} + +SimpleGuesser::SimpleGuesser() +{ + h = nullptr; +} + +SimpleGuesser& SimpleGuesser::operator=(const SimpleGuesser& sg){ + // Check for self-assignment! + if (this == &sg) // Same object? + return *this; // Yes, so skip assignment, and just return *this. + + if(h){textcat_Done(h);} + h = sg.h; + return *this; +} + +SimpleGuesser::~SimpleGuesser() +{ + if(h){textcat_Done(h);} +} + +/*! + \fn SimpleGuesser::GuessLanguage(char* text) + */ +std::vector<Guess> SimpleGuesser::GuessLanguage(const char* text) +{ + std::vector<Guess> guesses; + + if (!h) + return guesses; + + int len = strlen(text); + + if (len > MAX_STRING_LENGTH_TO_ANALYSE) + len = MAX_STRING_LENGTH_TO_ANALYSE; + + const char *guess_list = textcat_Classify(h, text, len); + + if (strcmp(guess_list, TEXTCAT_RESULT_SHORT_STR) == 0) + return guesses; + + int current_pointer = 0; + + while(guess_list[current_pointer] != '\0') + { + while (guess_list[current_pointer] != GUESS_SEPARATOR_OPEN && guess_list[current_pointer] != '\0') + current_pointer++; + if(guess_list[current_pointer] != '\0') + { + Guess g(guess_list + current_pointer); + + guesses.push_back(g); + + current_pointer++; + } + } + + return guesses; +} + +Guess SimpleGuesser::GuessPrimaryLanguage(const char* text) +{ + std::vector<Guess> ret = GuessLanguage(text); + return ret.empty() ? Guess() : ret[0]; +} +/** + * Is used to know which language is available, unavailable or both + * when mask = 0xF0, return only Available + * when mask = 0x0F, return only Unavailable + * when mask = 0xFF, return both Available and Unavailable + */ +std::vector<Guess> SimpleGuesser::GetManagedLanguages(const char mask) +{ + textcat_t *tables = static_cast<textcat_t*>(h); + + std::vector<Guess> lang; + if(!h){return lang;} + + for (size_t i=0; i<tables->size; ++i) + { + if (tables->fprint_disable[i] & mask) + { + std::string langStr = "["; + langStr += fp_Name(tables->fprint[i]); + Guess g(langStr.c_str()); + lang.push_back(g); + } + } + + return lang; +} + +std::vector<Guess> SimpleGuesser::GetAvailableLanguages() +{ + return GetManagedLanguages( sal::static_int_cast< char >( 0xF0 ) ); +} + +std::vector<Guess> SimpleGuesser::GetUnavailableLanguages() +{ + return GetManagedLanguages( sal::static_int_cast< char >( 0x0F )); +} + +std::vector<Guess> SimpleGuesser::GetAllManagedLanguages() +{ + return GetManagedLanguages( sal::static_int_cast< char >( 0xFF )); +} + +void SimpleGuesser::XableLanguage(const std::string& lang, char mask) +{ + textcat_t *tables = static_cast<textcat_t*>(h); + + if(!h){return;} + + for (size_t i=0; i<tables->size; i++) + { + std::string language(fp_Name(tables->fprint[i])); + if (startsAsciiCaseInsensitive(language,lang) == 0) + tables->fprint_disable[i] = mask; + } +} + +void SimpleGuesser::EnableLanguage(const std::string& lang) +{ + XableLanguage(lang, sal::static_int_cast< char >( 0xF0 )); +} + +void SimpleGuesser::DisableLanguage(const std::string& lang) +{ + XableLanguage(lang, sal::static_int_cast< char >( 0x0F )); +} + +void SimpleGuesser::SetDBPath(const char* path, const char* prefix) +{ + if (h) + textcat_Done(h); + h = special_textcat_Init(path, prefix); +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/lingucomponent/source/languageguessing/simpleguesser.hxx b/lingucomponent/source/languageguessing/simpleguesser.hxx new file mode 100644 index 000000000..aec544285 --- /dev/null +++ b/lingucomponent/source/languageguessing/simpleguesser.hxx @@ -0,0 +1,108 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ +#ifndef INCLUDED_LINGUCOMPONENT_SOURCE_LANGUAGEGUESSING_SIMPLEGUESSER_HXX +#define INCLUDED_LINGUCOMPONENT_SOURCE_LANGUAGEGUESSING_SIMPLEGUESSER_HXX + +#include <string> +#include <vector> +#include "guess.hxx" + +#define MAX_STRING_LENGTH_TO_ANALYSE 200 + +class SimpleGuesser final +{ +public: + /**inits the object with conf file "./conf.txt"*/ + SimpleGuesser(); + + /** + * @param SimpleGuesser& sg the other guesser + */ + SimpleGuesser& operator=(const SimpleGuesser& sg); + + /** + * destroy the object + */ + ~SimpleGuesser(); + + /** + * Analyze a text and return the most probable languages of the text + * @param char* text is the text to analyze + * @return the list of guess + */ + std::vector<Guess> GuessLanguage(const char* text); + + /** + * Analyze a text and return the most probable language of the text + * @param char* text is the text to analyze + * @return the guess (containing language) + */ + Guess GuessPrimaryLanguage(const char* text); + + /** + * List all available languages (possibly to be in guesses) + * @return the list of languages + */ + std::vector<Guess> GetAvailableLanguages(); + + /** + * List all languages (possibly in guesses or not) + * @return the list of languages + */ + std::vector<Guess> GetAllManagedLanguages(); + + /** + * List all Unavailable languages (disable for any reason) + * @return the list of languages + */ + std::vector<Guess> GetUnavailableLanguages(); + + /** + * Mark a language enabled + * @param string lang the language to enable (build like language-COUNTRY-encoding) + */ + void EnableLanguage(const std::string& lang); + + /** + * Mark a language disabled + * @param string lang the language to disable (build like language-COUNTRY-encoding) + */ + void DisableLanguage(const std::string& lang); + + /** + * Load a new DB of fingerprints + * @param const char* thePathOfConfFile self explaining + * @param const char* prefix is the path where the directory which contains fingerprint files is stored + */ + void SetDBPath(const char* thePathOfConfFile, const char* prefix); + +private: + //Where typical fingerprints (n-gram tables) are stored + void* h; + + //Is used to select languages into the fingerprints DB, the mask is used to indicate if we want enabled disabled or both + std::vector<Guess> GetManagedLanguages(const char mask); + + //Like getManagedLanguages, this function enable or disable a language and it depends of the mask + void XableLanguage(const std::string& lang, char mask); +}; + +#endif + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/lingucomponent/source/lingutil/lingutil.cxx b/lingucomponent/source/lingutil/lingutil.cxx new file mode 100644 index 000000000..f0ab84026 --- /dev/null +++ b/lingucomponent/source/lingutil/lingutil.cxx @@ -0,0 +1,304 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#if defined(_WIN32) +#if !defined WIN32_LEAN_AND_MEAN +# define WIN32_LEAN_AND_MEAN +#endif +#include <windows.h> +#endif + +#include <osl/thread.h> +#include <osl/file.hxx> +#include <osl/process.h> +#include <tools/debug.hxx> +#include <tools/urlobj.hxx> +#include <i18nlangtag/languagetag.hxx> +#include <i18nlangtag/mslangid.hxx> +#include <unotools/bootstrap.hxx> +#include <unotools/lingucfg.hxx> +#include <unotools/pathoptions.hxx> +#include <rtl/bootstrap.hxx> +#include <rtl/ustring.hxx> +#include <rtl/string.hxx> +#include <rtl/tencinfo.h> +#include <linguistic/misc.hxx> + +#include <set> +#include <vector> +#include <string.h> + +#include "lingutil.hxx" + +#include <sal/macros.h> + +using namespace ::com::sun::star; + +#if defined(_WIN32) +OString Win_AddLongPathPrefix( const OString &rPathName ) +{ + constexpr OStringLiteral WIN32_LONG_PATH_PREFIX = "\\\\?\\"; + if (!rPathName.match(WIN32_LONG_PATH_PREFIX)) return WIN32_LONG_PATH_PREFIX + rPathName; + return rPathName; +} +#endif //defined(_WIN32) + +#if defined SYSTEM_DICTS || defined IOS +// find old style dictionaries in system directories +static void GetOldStyleDicsInDir( + OUString const & aSystemDir, OUString const & aFormatName, + OUString const & aSystemSuffix, OUString const & aSystemPrefix, + std::set< OUString >& aDicLangInUse, + std::vector< SvtLinguConfigDictionaryEntry >& aRes ) +{ + osl::Directory aSystemDicts(aSystemDir); + if (aSystemDicts.open() != osl::FileBase::E_None) + return; + + osl::DirectoryItem aItem; + osl::FileStatus aFileStatus(osl_FileStatus_Mask_FileURL); + while (aSystemDicts.getNextItem(aItem) == osl::FileBase::E_None) + { + aItem.getFileStatus(aFileStatus); + OUString sPath = aFileStatus.getFileURL(); + if (sPath.endsWith(aSystemSuffix)) + { + sal_Int32 nStartIndex = sPath.lastIndexOf('/') + 1; + if (!sPath.match(aSystemPrefix, nStartIndex)) + continue; + OUString sChunk = sPath.copy(nStartIndex + aSystemPrefix.getLength(), + sPath.getLength() - aSystemSuffix.getLength() - + nStartIndex - aSystemPrefix.getLength()); + if (sChunk.isEmpty()) + continue; + + // We prefer (now) to use language tags. + // Avoid feeding in the older LANG_REGION scheme to the BCP47 + // ctor as that triggers use of liblangtag and initializes its + // database which we do not want during startup. Convert + // instead. + sChunk = sChunk.replace( '_', '-'); + + // There's a known exception to the rule, the dreaded + // hu_HU_u8.dic of the myspell-hu package, see + // http://packages.debian.org/search?arch=any&searchon=contents&keywords=hu_HU_u8.dic + // This was ignored because unknown in the old implementation, + // truncate to the known locale and either insert because hu_HU + // wasn't encountered yet, or skip because it was. It doesn't + // really matter because the proper new-style hu_HU dictionary + // will take precedence anyway if installed with a Hungarian + // languagepack. Again, this is only to not pull in all + // liblangtag and stuff during startup, the result would be + // !isValidBcp47() and the dictionary ignored. + if (sChunk == "hu-HU-u8") + sChunk = "hu-HU"; + + LanguageTag aLangTag(sChunk, true); + if (!aLangTag.isValidBcp47()) + continue; + + // Thus we first get the language of the dictionary + const OUString& aLocaleName(aLangTag.getBcp47()); + + if (aDicLangInUse.insert(aLocaleName).second) + { + // add the dictionary to the resulting vector + SvtLinguConfigDictionaryEntry aDicEntry; + aDicEntry.aLocations = { sPath }; + aDicEntry.aLocaleNames = { aLocaleName }; + aDicEntry.aFormatName = aFormatName; + aRes.push_back( aDicEntry ); + } + } + } +} +#endif + +// build list of old style dictionaries (not as extensions) to use. +// User installed dictionaries (the ones residing in the user paths) +// will get precedence over system installed ones for the same language. +std::vector< SvtLinguConfigDictionaryEntry > GetOldStyleDics( const char *pDicType ) +{ + std::vector< SvtLinguConfigDictionaryEntry > aRes; + + if (!pDicType) + return aRes; + + OUString aFormatName; + OUString aDicExtension; +#if defined SYSTEM_DICTS || defined IOS + OUString aSystemDir; + OUString aSystemPrefix; + OUString aSystemSuffix; +#endif + if (strcmp( pDicType, "DICT" ) == 0) + { + aFormatName = "DICT_SPELL"; + aDicExtension = ".dic"; +#ifdef SYSTEM_DICTS + aSystemDir = DICT_SYSTEM_DIR; + aSystemSuffix = aDicExtension; +#elif defined IOS + aSystemDir = "$BRAND_BASE_DIR/share/spell"; + rtl::Bootstrap::expandMacros(aSystemDir); + aSystemSuffix = ".dic"; +#endif + } + else if (strcmp( pDicType, "HYPH" ) == 0) + { + aFormatName = "DICT_HYPH"; + aDicExtension = ".dic"; +#ifdef SYSTEM_DICTS + aSystemDir = HYPH_SYSTEM_DIR; + aSystemPrefix = "hyph_"; + aSystemSuffix = aDicExtension; +#endif + } + else if (strcmp( pDicType, "THES" ) == 0) + { + aFormatName = "DICT_THES"; + aDicExtension = ".dat"; +#ifdef SYSTEM_DICTS + aSystemDir = THES_SYSTEM_DIR; + aSystemPrefix = "th_"; + aSystemSuffix = "_v2.dat"; +#elif defined IOS + aSystemDir = "$BRAND_BASE_DIR/share/thes"; + rtl::Bootstrap::expandMacros(aSystemDir); + aSystemPrefix = "th_"; + aSystemSuffix = "_v2.dat"; +#endif + } + + if (aFormatName.isEmpty() || aDicExtension.isEmpty()) + return aRes; + +#if defined SYSTEM_DICTS || defined IOS + // set of languages to remember the language where it is already + // decided to make use of the dictionary. + std::set< OUString > aDicLangInUse; + +#ifndef IOS + // follow the hunspell tool's example and check DICPATH for preferred dictionaries + rtl_uString * pSearchPath = nullptr; + osl_getEnvironment(OUString("DICPATH").pData, &pSearchPath); + + if (pSearchPath) + { + OUString aSearchPath(pSearchPath); + rtl_uString_release(pSearchPath); + + sal_Int32 nIndex = 0; + do + { + OUString aSystem( aSearchPath.getToken(0, ':', nIndex) ); + OUString aCWD; + OUString aRelative; + OUString aAbsolute; + + if (!utl::Bootstrap::getProcessWorkingDir(aCWD)) + continue; + if (osl::FileBase::getFileURLFromSystemPath(aSystem, aRelative) + != osl::FileBase::E_None) + continue; + if (osl::FileBase::getAbsoluteFileURL(aCWD, aRelative, aAbsolute) + != osl::FileBase::E_None) + continue; + + // GetOldStyleDicsInDir will make sure the dictionary is the right + // type based on its prefix, that way hyphen, mythes and regular + // dictionaries can live in one directory + GetOldStyleDicsInDir(aAbsolute, aFormatName, aSystemSuffix, + aSystemPrefix, aDicLangInUse, aRes); + } + while (nIndex != -1); + } +#endif + + // load system directories last so that DICPATH prevails + GetOldStyleDicsInDir(aSystemDir, aFormatName, aSystemSuffix, aSystemPrefix, + aDicLangInUse, aRes); +#endif + + return aRes; +} + +void MergeNewStyleDicsAndOldStyleDics( + std::vector< SvtLinguConfigDictionaryEntry > &rNewStyleDics, + const std::vector< SvtLinguConfigDictionaryEntry > &rOldStyleDics ) +{ + // get list of languages supported by new style dictionaries + std::set< OUString > aNewStyleLanguages; + for (auto const& newStyleDic : rNewStyleDics) + { + const uno::Sequence< OUString > aLocaleNames(newStyleDic.aLocaleNames); + sal_Int32 nLocaleNames = aLocaleNames.getLength(); + for (sal_Int32 k = 0; k < nLocaleNames; ++k) + { + aNewStyleLanguages.insert( aLocaleNames[k] ); + } + } + + // now check all old style dictionaries if they will add a not yet + // added language. If so add them to the resulting vector + for (auto const& oldStyleDic : rOldStyleDics) + { + sal_Int32 nOldStyleDics = oldStyleDic.aLocaleNames.getLength(); + + // old style dics should only have one language listed... + DBG_ASSERT( nOldStyleDics, "old style dictionary with more than one language found!"); + if (nOldStyleDics > 0) + { + if (linguistic::LinguIsUnspecified( oldStyleDic.aLocaleNames[0])) + { + OSL_FAIL( "old style dictionary with invalid language found!" ); + continue; + } + + // language not yet added? + if (aNewStyleLanguages.find( oldStyleDic.aLocaleNames[0] ) == aNewStyleLanguages.end()) + rNewStyleDics.push_back(oldStyleDic); + } + else + { + OSL_FAIL( "old style dictionary with no language found!" ); + } + } +} + +rtl_TextEncoding getTextEncodingFromCharset(const char* pCharset) +{ + // default result: used to indicate that we failed to get the proper encoding + rtl_TextEncoding eRet = RTL_TEXTENCODING_DONTKNOW; + + if (pCharset) + { + eRet = rtl_getTextEncodingFromMimeCharset(pCharset); + if (eRet == RTL_TEXTENCODING_DONTKNOW) + eRet = rtl_getTextEncodingFromUnixCharset(pCharset); + if (eRet == RTL_TEXTENCODING_DONTKNOW) + { + if (strcmp("ISCII-DEVANAGARI", pCharset) == 0) + eRet = RTL_TEXTENCODING_ISCII_DEVANAGARI; + } + } + return eRet; +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/lingucomponent/source/lingutil/lingutil.hxx b/lingucomponent/source/lingutil/lingutil.hxx new file mode 100644 index 000000000..4c4fe15ec --- /dev/null +++ b/lingucomponent/source/lingutil/lingutil.hxx @@ -0,0 +1,55 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#ifndef INCLUDED_LINGUCOMPONENT_SOURCE_LINGUTIL_LINGUTIL_HXX +#define INCLUDED_LINGUCOMPONENT_SOURCE_LINGUTIL_LINGUTIL_HXX + +#include <rtl/string.hxx> + +#include <vector> + +#define OU2ENC(rtlOUString, rtlEncoding) \ + OString((rtlOUString).getStr(), (rtlOUString).getLength(), \ + rtlEncoding, RTL_UNICODETOTEXT_FLAGS_UNDEFINED_QUESTIONMARK).getStr() + +struct SvtLinguConfigDictionaryEntry; + +#if defined(_WIN32) + +// to be use to get a path name with long path prefix +// under Windows for Hunspell, Hyphen and MyThes libraries +OString Win_AddLongPathPrefix( const OString &rPathName ); +#endif + + +// temporary function, to be removed when new style dictionaries +// using configuration entries are fully implemented and provided +std::vector< SvtLinguConfigDictionaryEntry > GetOldStyleDics( const char * pDicType ); +void MergeNewStyleDicsAndOldStyleDics( std::vector< SvtLinguConfigDictionaryEntry > &rNewStyleDics, const std::vector< SvtLinguConfigDictionaryEntry > &rOldStyleDics ); + +//Find an encoding from a charset string, using +//rtl_getTextEncodingFromMimeCharset and falling back to +//rtl_getTextEncodingFromUnixCharset with the addition of +//ISCII-DEVANAGARI. On failure will return final fallback of +//RTL_TEXTENCODING_ISO_8859_1 +rtl_TextEncoding getTextEncodingFromCharset(const char* pCharset); + +#endif + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/lingucomponent/source/numbertext/numbertext.component b/lingucomponent/source/numbertext/numbertext.component new file mode 100644 index 000000000..c3277533b --- /dev/null +++ b/lingucomponent/source/numbertext/numbertext.component @@ -0,0 +1,26 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!-- + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + --> + +<component loader="com.sun.star.loader.SharedLibrary" environment="@CPPU_ENV@" + xmlns="http://openoffice.org/2010/uno-components"> + <implementation name="com.sun.star.lingu2.NumberText" + constructor="lingucomponent_NumberText_get_implementation"> + <service name="com.sun.star.linguistic2.NumberText"/> + </implementation> +</component> diff --git a/lingucomponent/source/numbertext/numbertext.cxx b/lingucomponent/source/numbertext/numbertext.cxx new file mode 100644 index 000000000..79c8e6810 --- /dev/null +++ b/lingucomponent/source/numbertext/numbertext.cxx @@ -0,0 +1,168 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#include <mutex> + +#include <osl/file.hxx> +#include <tools/debug.hxx> +#include <o3tl/char16_t2wchar_t.hxx> + +#include <sal/config.h> +#include <cppuhelper/factory.hxx> +#include <cppuhelper/implbase.hxx> +#include <cppuhelper/supportsservice.hxx> + +#include <i18nlangtag/languagetag.hxx> +#include <com/sun/star/lang/XServiceInfo.hpp> +#include <com/sun/star/linguistic2/XNumberText.hpp> +#include <unotools/pathoptions.hxx> +#include <osl/thread.h> + +#include <Numbertext.hxx> + +using namespace ::osl; +using namespace ::cppu; +using namespace ::com::sun::star; +using namespace ::com::sun::star::uno; +using namespace ::com::sun::star::lang; +using namespace ::com::sun::star::linguistic2; + +static std::mutex& GetNumberTextMutex() +{ + static std::mutex aMutex; + return aMutex; +} + +namespace +{ +class NumberText_Impl : public ::cppu::WeakImplHelper<XNumberText, XServiceInfo> +{ + Numbertext m_aNumberText; + bool m_bInitialized; + + virtual ~NumberText_Impl() override {} + void EnsureInitialized(); + +public: + NumberText_Impl(); + NumberText_Impl(const NumberText_Impl&) = delete; + NumberText_Impl& operator=(const NumberText_Impl&) = delete; + + // XServiceInfo implementation + virtual OUString SAL_CALL getImplementationName() override; + virtual sal_Bool SAL_CALL supportsService(const OUString& ServiceName) override; + virtual Sequence<OUString> SAL_CALL getSupportedServiceNames() override; + + // XNumberText implementation + virtual OUString SAL_CALL getNumberText(const OUString& aText, + const ::css::lang::Locale& rLocale) override; + virtual css::uno::Sequence<css::lang::Locale> SAL_CALL getAvailableLanguages() override; +}; +} + +NumberText_Impl::NumberText_Impl() + : m_bInitialized(false) +{ +} + +void NumberText_Impl::EnsureInitialized() +{ + if (m_bInitialized) + return; + + // set this to true at the very start to prevent loops because of + // implicitly called functions below + m_bInitialized = true; + + // set default numbertext path to where those get installed + OUString aPhysPath; + OUString aURL(SvtPathOptions().GetNumbertextPath()); + osl::FileBase::getSystemPathFromFileURL(aURL, aPhysPath); +#ifdef _WIN32 + aPhysPath += "\\"; + const rtl_TextEncoding eEnc = RTL_TEXTENCODING_UTF8; +#else + aPhysPath += "/"; + const rtl_TextEncoding eEnc = osl_getThreadTextEncoding(); +#endif + OString path = OUStringToOString(aPhysPath, eEnc); + m_aNumberText.set_prefix(path.getStr()); +} + +OUString SAL_CALL NumberText_Impl::getNumberText(const OUString& rText, const Locale& rLocale) +{ + std::scoped_lock aGuard(GetNumberTextMutex()); + EnsureInitialized(); + // libnumbertext supports Language + Country tags (separated by "_" or "-") + LanguageTag aLanguageTag(rLocale); + OUString aCode(aLanguageTag.getLanguage()); + OUString aCountry(aLanguageTag.getCountry()); + OUString aScript(aLanguageTag.getScript()); + if (!aScript.isEmpty()) + aCode += "-" + aScript; + if (!aCountry.isEmpty()) + aCode += "-" + aCountry; + OString aLangCode(OUStringToOString(aCode, RTL_TEXTENCODING_ASCII_US)); +#if defined(_WIN32) + std::wstring sResult(o3tl::toW(rText.getStr())); +#else + OString aInput(OUStringToOString(rText, RTL_TEXTENCODING_UTF8)); + std::wstring sResult = Numbertext::string2wstring(aInput.getStr()); +#endif + bool result = m_aNumberText.numbertext(sResult, aLangCode.getStr()); + DBG_ASSERT(result, "numbertext: false"); +#if defined(_WIN32) + OUString aResult(o3tl::toU(sResult.c_str())); +#else + OUString aResult = OUString::fromUtf8(Numbertext::wstring2string(sResult).c_str()); +#endif + return aResult; +} + +uno::Sequence<Locale> SAL_CALL NumberText_Impl::getAvailableLanguages() +{ + std::scoped_lock aGuard(GetNumberTextMutex()); + // TODO + Sequence<css::lang::Locale> aRes; + return aRes; +} + +OUString SAL_CALL NumberText_Impl::getImplementationName() +{ + return "com.sun.star.lingu2.NumberText"; +} + +sal_Bool SAL_CALL NumberText_Impl::supportsService(const OUString& ServiceName) +{ + return cppu::supportsService(this, ServiceName); +} + +Sequence<OUString> SAL_CALL NumberText_Impl::getSupportedServiceNames() +{ + return { "com.sun.star.linguistic2.NumberText" }; +} + +extern "C" SAL_DLLPUBLIC_EXPORT css::uno::XInterface* +lingucomponent_NumberText_get_implementation(css::uno::XComponentContext*, + css::uno::Sequence<css::uno::Any> const&) +{ + return cppu::acquire(new NumberText_Impl()); +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/lingucomponent/source/spellcheck/languagetool/LanguageTool.component b/lingucomponent/source/spellcheck/languagetool/LanguageTool.component new file mode 100644 index 000000000..9f7eb3d08 --- /dev/null +++ b/lingucomponent/source/spellcheck/languagetool/LanguageTool.component @@ -0,0 +1,26 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!-- + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + --> + +<component loader="com.sun.star.loader.SharedLibrary" environment="@CPPU_ENV@" + xmlns="http://openoffice.org/2010/uno-components"> + <implementation name="org.openoffice.lingu.LanguageToolGrammarChecker" + constructor="lingucomponent_LanguageToolGrammarChecker_get_implementation" single-instance="true"> + <service name="com.sun.star.linguistic2.Proofreader"/> + </implementation> +</component> diff --git a/lingucomponent/source/spellcheck/languagetool/languagetoolimp.cxx b/lingucomponent/source/spellcheck/languagetool/languagetoolimp.cxx new file mode 100644 index 000000000..06b4fcb64 --- /dev/null +++ b/lingucomponent/source/spellcheck/languagetool/languagetoolimp.cxx @@ -0,0 +1,407 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#include <cppuhelper/factory.hxx> +#include <cppuhelper/supportsservice.hxx> +#include <cppuhelper/weak.hxx> +#include "languagetoolimp.hxx" + +#include <i18nlangtag/languagetag.hxx> +#include <svtools/strings.hrc> +#include <unotools/resmgr.hxx> + +#include <vector> +#include <set> +#include <string.h> + +#include <curl/curl.h> +#include <boost/property_tree/ptree.hpp> +#include <boost/property_tree/json_parser.hpp> +#include <algorithm> +#include <string_view> +#include <sal/log.hxx> +#include <svtools/languagetoolcfg.hxx> +#include <tools/color.hxx> +#include <tools/long.hxx> +#include <com/sun/star/uno/Any.hxx> + +using namespace osl; +using namespace com::sun::star; +using namespace com::sun::star::beans; +using namespace com::sun::star::lang; +using namespace com::sun::star::uno; +using namespace com::sun::star::linguistic2; +using namespace linguistic; + +#define COL_ORANGE Color(0xD1, 0x68, 0x20) + +namespace +{ +PropertyValue lcl_MakePropertyValue(const OUString& rName, uno::Any& rValue) +{ + return PropertyValue(rName, -1, rValue, css::beans::PropertyState_DIRECT_VALUE); +} + +Sequence<PropertyValue> lcl_GetLineColorPropertyFromErrorId(const std::string& rErrorId) +{ + uno::Any aColor; + if (rErrorId == "TYPOS") + { + aColor <<= COL_LIGHTRED; + } + else if (rErrorId == "STYLE") + { + aColor <<= COL_LIGHTBLUE; + } + else + { + // Same color is used for other errorId's such as GRAMMAR, TYPOGRAPHY.. + aColor <<= COL_ORANGE; + } + Sequence<PropertyValue> aProperties{ lcl_MakePropertyValue("LineColor", aColor) }; + return aProperties; +} +} + +LanguageToolGrammarChecker::LanguageToolGrammarChecker() + : mCachedResults(MAX_CACHE_SIZE) +{ +} + +LanguageToolGrammarChecker::~LanguageToolGrammarChecker() {} + +sal_Bool SAL_CALL LanguageToolGrammarChecker::isSpellChecker() { return false; } + +sal_Bool SAL_CALL LanguageToolGrammarChecker::hasLocale(const Locale& rLocale) +{ + bool bRes = false; + if (!m_aSuppLocales.hasElements()) + getLocales(); + + for (auto const& suppLocale : std::as_const(m_aSuppLocales)) + { + if (rLocale == suppLocale) + { + bRes = true; + break; + } + } + + return bRes; +} + +Sequence<Locale> SAL_CALL LanguageToolGrammarChecker::getLocales() +{ + if (m_aSuppLocales.hasElements()) + return m_aSuppLocales; + SvxLanguageToolOptions& rLanguageOpts = SvxLanguageToolOptions::Get(); + OString localeUrl = OUStringToOString(rLanguageOpts.getLocaleListURL(), RTL_TEXTENCODING_UTF8); + if (localeUrl.isEmpty()) + { + return m_aSuppLocales; + } + tools::Long statusCode = 0; + std::string response = makeHttpRequest(localeUrl, HTTP_METHOD::HTTP_GET, OString(), statusCode); + if (statusCode != 200) + { + return m_aSuppLocales; + } + if (response.empty()) + { + return m_aSuppLocales; + } + boost::property_tree::ptree root; + std::stringstream aStream(response); + boost::property_tree::read_json(aStream, root); + + size_t length = root.size(); + m_aSuppLocales.realloc(length); + auto pArray = m_aSuppLocales.getArray(); + int i = 0; + for (auto it = root.begin(); it != root.end(); it++, i++) + { + boost::property_tree::ptree& localeItem = it->second; + const std::string longCode = localeItem.get<std::string>("longCode"); + Locale aLocale = LanguageTag::convertToLocale( + OUString(longCode.c_str(), longCode.length(), RTL_TEXTENCODING_UTF8)); + pArray[i] = aLocale; + } + return m_aSuppLocales; +} + +// Callback to get the response data from server. +static size_t WriteCallback(void* ptr, size_t size, size_t nmemb, void* userp) +{ + if (!userp) + return 0; + + std::string* response = static_cast<std::string*>(userp); + size_t real_size = size * nmemb; + response->append(static_cast<char*>(ptr), real_size); + return real_size; +} + +ProofreadingResult SAL_CALL LanguageToolGrammarChecker::doProofreading( + const OUString& aDocumentIdentifier, const OUString& aText, const Locale& aLocale, + sal_Int32 nStartOfSentencePosition, sal_Int32 nSuggestedBehindEndOfSentencePosition, + const Sequence<PropertyValue>& aProperties) +{ + // ProofreadingResult declared here instead of parseHttpJSONResponse because of the early exists. + ProofreadingResult xRes; + xRes.aDocumentIdentifier = aDocumentIdentifier; + xRes.aText = aText; + xRes.aLocale = aLocale; + xRes.nStartOfSentencePosition = nStartOfSentencePosition; + xRes.nBehindEndOfSentencePosition = nSuggestedBehindEndOfSentencePosition; + xRes.aProperties = Sequence<PropertyValue>(); + xRes.xProofreader = this; + xRes.aErrors = Sequence<SingleProofreadingError>(); + + if (aText.isEmpty()) + { + return xRes; + } + + if (nStartOfSentencePosition != 0) + { + return xRes; + } + + xRes.nStartOfNextSentencePosition = aText.getLength(); + + SvxLanguageToolOptions& rLanguageOpts = SvxLanguageToolOptions::Get(); + if (rLanguageOpts.getEnabled() == false) + { + return xRes; + } + + OString checkerURL = OUStringToOString(rLanguageOpts.getCheckerURL(), RTL_TEXTENCODING_UTF8); + if (checkerURL.isEmpty()) + { + return xRes; + } + + if (aProperties.getLength() > 0 && aProperties[0].Name == "Update") + { + // locale changed + xRes.aText = ""; + return xRes; + } + + sal_Int32 spaceIndex = std::min(xRes.nStartOfNextSentencePosition, aText.getLength() - 1); + while (spaceIndex < aText.getLength() && aText[spaceIndex] == ' ') + { + xRes.nStartOfNextSentencePosition += 1; + spaceIndex = xRes.nStartOfNextSentencePosition; + } + if (xRes.nStartOfNextSentencePosition == nSuggestedBehindEndOfSentencePosition + && spaceIndex < aText.getLength()) + { + xRes.nStartOfNextSentencePosition + = std::min(nSuggestedBehindEndOfSentencePosition + 1, aText.getLength()); + } + xRes.nBehindEndOfSentencePosition + = std::min(xRes.nStartOfNextSentencePosition, aText.getLength()); + + auto cachedResult = mCachedResults.find(aText); + if (cachedResult != mCachedResults.end()) + { + xRes.aErrors = cachedResult->second; + return xRes; + } + + tools::Long http_code = 0; + OUString langTag(aLocale.Language + "-" + aLocale.Country); + OString postData(OUStringToOString( + OUStringConcatenation("text=" + aText + "&language=" + langTag), RTL_TEXTENCODING_UTF8)); + const std::string response_body + = makeHttpRequest(checkerURL, HTTP_METHOD::HTTP_POST, postData, http_code); + + if (http_code != 200) + { + return xRes; + } + + if (response_body.length() <= 0) + { + return xRes; + } + + parseProofreadingJSONResponse(xRes, response_body); + // cache the result + mCachedResults.insert( + std::pair<OUString, Sequence<SingleProofreadingError>>(aText, xRes.aErrors)); + return xRes; +} + +/* + rResult is both input and output + aJSONBody is the response body from the HTTP Request to LanguageTool API +*/ +void LanguageToolGrammarChecker::parseProofreadingJSONResponse(ProofreadingResult& rResult, + std::string_view aJSONBody) +{ + boost::property_tree::ptree root; + std::stringstream aStream(aJSONBody.data()); + boost::property_tree::read_json(aStream, root); + boost::property_tree::ptree& matches = root.get_child("matches"); + size_t matchSize = matches.size(); + + if (matchSize <= 0) + { + return; + } + Sequence<SingleProofreadingError> aErrors(matchSize); + auto pErrors = aErrors.getArray(); + size_t i = 0; + for (auto it1 = matches.begin(); it1 != matches.end(); it1++, i++) + { + const boost::property_tree::ptree& match = it1->second; + int offset = match.get<int>("offset"); + int length = match.get<int>("length"); + const std::string shortMessage = match.get<std::string>("message"); + const std::string message = match.get<std::string>("shortMessage"); + + // Parse the error category for Line Color + const boost::property_tree::ptree& rule = match.get_child("rule"); + const boost::property_tree::ptree& ruleCategory = rule.get_child("category"); + const std::string errorCategoryId = ruleCategory.get<std::string>("id"); + + OUString aShortComment(shortMessage.c_str(), shortMessage.length(), RTL_TEXTENCODING_UTF8); + OUString aFullComment(message.c_str(), message.length(), RTL_TEXTENCODING_UTF8); + + pErrors[i].nErrorStart = offset; + pErrors[i].nErrorLength = length; + pErrors[i].nErrorType = PROOFREADING_ERROR; + pErrors[i].aShortComment = aShortComment; + pErrors[i].aFullComment = aFullComment; + pErrors[i].aProperties = lcl_GetLineColorPropertyFromErrorId(errorCategoryId); + ; + const boost::property_tree::ptree& replacements = match.get_child("replacements"); + int suggestionSize = replacements.size(); + + if (suggestionSize <= 0) + { + continue; + } + pErrors[i].aSuggestions.realloc(std::min(suggestionSize, MAX_SUGGESTIONS_SIZE)); + auto pSuggestions = pErrors[i].aSuggestions.getArray(); + // Limit suggestions to avoid crash on context menu popup: + // (soffice:17251): Gdk-CRITICAL **: 17:00:21.277: ../../../../../gdk/wayland/gdkdisplay-wayland.c:1399: Unable to create Cairo image + // surface: invalid value (typically too big) for the size of the input (surface, pattern, etc.) + int j = 0; + for (auto it2 = replacements.begin(); it2 != replacements.end() && j < MAX_SUGGESTIONS_SIZE; + it2++, j++) + { + const boost::property_tree::ptree& replacement = it2->second; + std::string replacementStr = replacement.get<std::string>("value"); + pSuggestions[j] + = OUString(replacementStr.c_str(), replacementStr.length(), RTL_TEXTENCODING_UTF8); + } + } + rResult.aErrors = aErrors; +} + +std::string LanguageToolGrammarChecker::makeHttpRequest(std::string_view aURL, HTTP_METHOD method, + const OString& aPostData, + tools::Long& nStatusCode) +{ + std::unique_ptr<CURL, std::function<void(CURL*)>> curl(curl_easy_init(), + [](CURL* p) { curl_easy_cleanup(p); }); + if (!curl) + return {}; // empty string + + bool isPremium = false; + SvxLanguageToolOptions& rLanguageOpts = SvxLanguageToolOptions::Get(); + OString apiKey = OUStringToOString(rLanguageOpts.getApiKey(), RTL_TEXTENCODING_UTF8); + OString username = OUStringToOString(rLanguageOpts.getUsername(), RTL_TEXTENCODING_UTF8); + OString premiumPostData; + if (!apiKey.isEmpty() && !username.isEmpty()) + { + isPremium = true; + } + + std::string response_body; + curl_easy_setopt(curl.get(), CURLOPT_URL, aURL.data()); + + curl_easy_setopt(curl.get(), CURLOPT_FAILONERROR, 1L); + // curl_easy_setopt(curl.get(), CURLOPT_VERBOSE, 1L); + + curl_easy_setopt(curl.get(), CURLOPT_WRITEFUNCTION, WriteCallback); + curl_easy_setopt(curl.get(), CURLOPT_WRITEDATA, static_cast<void*>(&response_body)); + curl_easy_setopt(curl.get(), CURLOPT_SSL_VERIFYPEER, false); + curl_easy_setopt(curl.get(), CURLOPT_SSL_VERIFYHOST, false); + curl_easy_setopt(curl.get(), CURLOPT_TIMEOUT, CURL_TIMEOUT); + + if (method == HTTP_METHOD::HTTP_POST) + { + curl_easy_setopt(curl.get(), CURLOPT_POST, 1L); + if (isPremium == false) + { + curl_easy_setopt(curl.get(), CURLOPT_POSTFIELDS, aPostData.getStr()); + } + else + { + premiumPostData = aPostData + "&username=" + username + "&apiKey=" + apiKey; + curl_easy_setopt(curl.get(), CURLOPT_POSTFIELDS, premiumPostData.getStr()); + } + } + + /*CURLcode cc = */ + curl_easy_perform(curl.get()); + curl_easy_getinfo(curl.get(), CURLINFO_RESPONSE_CODE, &nStatusCode); + return response_body; +} + +void SAL_CALL LanguageToolGrammarChecker::ignoreRule(const OUString& /*aRuleIdentifier*/, + const Locale& /*aLocale*/ +) +{ +} +void SAL_CALL LanguageToolGrammarChecker::resetIgnoreRules() {} + +OUString SAL_CALL LanguageToolGrammarChecker::getServiceDisplayName(const Locale& rLocale) +{ + std::locale loc(Translate::Create("svt", LanguageTag(rLocale))); + return Translate::get(STR_DESCRIPTION_LANGUAGETOOL, loc); +} + +OUString SAL_CALL LanguageToolGrammarChecker::getImplementationName() +{ + return "org.openoffice.lingu.LanguageToolGrammarChecker"; +} + +sal_Bool SAL_CALL LanguageToolGrammarChecker::supportsService(const OUString& ServiceName) +{ + return cppu::supportsService(this, ServiceName); +} + +Sequence<OUString> SAL_CALL LanguageToolGrammarChecker::getSupportedServiceNames() +{ + return { SN_GRAMMARCHECKER }; +} + +void SAL_CALL LanguageToolGrammarChecker::initialize(const Sequence<Any>& /*rArguments*/) {} + +extern "C" SAL_DLLPUBLIC_EXPORT css::uno::XInterface* +lingucomponent_LanguageToolGrammarChecker_get_implementation( + css::uno::XComponentContext*, css::uno::Sequence<css::uno::Any> const&) +{ + return cppu::acquire(static_cast<cppu::OWeakObject*>(new LanguageToolGrammarChecker())); +}
\ No newline at end of file diff --git a/lingucomponent/source/spellcheck/languagetool/languagetoolimp.hxx b/lingucomponent/source/spellcheck/languagetool/languagetoolimp.hxx new file mode 100644 index 000000000..e0dadfeca --- /dev/null +++ b/lingucomponent/source/spellcheck/languagetool/languagetoolimp.hxx @@ -0,0 +1,91 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ +#pragma once +#include <com/sun/star/lang/XInitialization.hpp> +#include <com/sun/star/lang/XServiceDisplayName.hpp> +#include <com/sun/star/lang/XServiceInfo.hpp> +#include <com/sun/star/lang/XServiceName.hpp> +#include <com/sun/star/linguistic2/XProofreader.hpp> +#include <com/sun/star/linguistic2/ProofreadingResult.hpp> +#include <com/sun/star/beans/XPropertySet.hpp> +#include <com/sun/star/beans/PropertyValues.hpp> +#include <linguistic/misc.hxx> +#include <string_view> +#include <o3tl/lru_map.hxx> +#include <tools/long.hxx> + +using namespace ::com::sun::star::uno; +using namespace ::com::sun::star::beans; +using namespace ::com::sun::star::lang; +using namespace ::com::sun::star::linguistic2; + +// Magical numbers +#define MAX_CACHE_SIZE 10 +#define MAX_SUGGESTIONS_SIZE 10 +#define PROOFREADING_ERROR 2 +#define CURL_TIMEOUT 10L + +enum class HTTP_METHOD +{ + HTTP_GET, + HTTP_POST +}; + +class LanguageToolGrammarChecker + : public cppu::WeakImplHelper<XProofreader, XInitialization, XServiceInfo, XServiceDisplayName> +{ + Sequence<Locale> m_aSuppLocales; + o3tl::lru_map<OUString, Sequence<SingleProofreadingError>> mCachedResults; + LanguageToolGrammarChecker(const LanguageToolGrammarChecker&) = delete; + LanguageToolGrammarChecker& operator=(const LanguageToolGrammarChecker&) = delete; + static void parseProofreadingJSONResponse(ProofreadingResult& rResult, + std::string_view aJSONBody); + static std::string makeHttpRequest(std::string_view aURL, HTTP_METHOD method, + const OString& aPostData, tools::Long& nStatusCode); + +public: + LanguageToolGrammarChecker(); + virtual ~LanguageToolGrammarChecker() override; + + // XSupportedLocales + virtual Sequence<Locale> SAL_CALL getLocales() override; + virtual sal_Bool SAL_CALL hasLocale(const Locale& rLocale) override; + + // XProofReader + virtual sal_Bool SAL_CALL isSpellChecker() override; + virtual ProofreadingResult SAL_CALL doProofreading( + const OUString& aDocumentIdentifier, const OUString& aText, const Locale& aLocale, + sal_Int32 nStartOfSentencePosition, sal_Int32 nSuggestedBehindEndOfSentencePosition, + const Sequence<PropertyValue>& aProperties) override; + + virtual void SAL_CALL ignoreRule(const OUString& aRuleIdentifier, + const Locale& aLocale) override; + virtual void SAL_CALL resetIgnoreRules() override; + + // XServiceDisplayName + virtual OUString SAL_CALL getServiceDisplayName(const Locale& rLocale) override; + + // XInitialization + virtual void SAL_CALL initialize(const Sequence<Any>& rArguments) override; + + // XServiceInfo + virtual OUString SAL_CALL getImplementationName() override; + virtual sal_Bool SAL_CALL supportsService(const OUString& rServiceName) override; + virtual Sequence<OUString> SAL_CALL getSupportedServiceNames() override; +};
\ No newline at end of file diff --git a/lingucomponent/source/spellcheck/macosxspell/MacOSXSpell.component b/lingucomponent/source/spellcheck/macosxspell/MacOSXSpell.component new file mode 100644 index 000000000..b1fe7d612 --- /dev/null +++ b/lingucomponent/source/spellcheck/macosxspell/MacOSXSpell.component @@ -0,0 +1,26 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!-- + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + --> + +<component loader="com.sun.star.loader.SharedLibrary" environment="@CPPU_ENV@" + xmlns="http://openoffice.org/2010/uno-components"> + <implementation name="org.openoffice.lingu.MacOSXSpellChecker" + constructor="lingucomponent_MacSpellChecker_get_implementation" single-instance="true"> + <service name="com.sun.star.linguistic2.SpellChecker"/> + </implementation> +</component> diff --git a/lingucomponent/source/spellcheck/macosxspell/macspellimp.hxx b/lingucomponent/source/spellcheck/macosxspell/macspellimp.hxx new file mode 100644 index 000000000..776c474d2 --- /dev/null +++ b/lingucomponent/source/spellcheck/macosxspell/macspellimp.hxx @@ -0,0 +1,123 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#ifndef INCLUDED_LINGUCOMPONENT_SOURCE_SPELLCHECK_MACOSXSPELL_MACSPELLIMP_HXX +#define INCLUDED_LINGUCOMPONENT_SOURCE_SPELLCHECK_MACOSXSPELL_MACSPELLIMP_HXX + +#include <comphelper/interfacecontainer3.hxx> +#include <cppuhelper/implbase.hxx> + +#include <premac.h> +#ifdef MACOSX +#import <Cocoa/Cocoa.h> +#else +#include <UIKit/UIKit.h> +#endif +#include <postmac.h> +#include <com/sun/star/lang/XComponent.hpp> +#include <com/sun/star/lang/XInitialization.hpp> +#include <com/sun/star/lang/XServiceDisplayName.hpp> +#include <com/sun/star/beans/XPropertySet.hpp> +#include <com/sun/star/lang/XServiceInfo.hpp> +#include <com/sun/star/linguistic2/XSpellChecker.hpp> +#include <com/sun/star/linguistic2/XLinguServiceEventBroadcaster.hpp> + +#include <linguistic/misc.hxx> +#include <linguistic/lngprophelp.hxx> + +#include <lingutil.hxx> + +using namespace ::com::sun::star::uno; +using namespace ::com::sun::star::beans; +using namespace ::com::sun::star::lang; +using namespace ::com::sun::star::linguistic2; + +class MacSpellChecker : + public cppu::WeakImplHelper + < + XSpellChecker, + XLinguServiceEventBroadcaster, + XInitialization, + XComponent, + XServiceInfo, + XServiceDisplayName + > +{ + Sequence< Locale > aSuppLocales; + rtl_TextEncoding * aDEncs; + Locale * aDLocs; + OUString * aDNames; + sal_Int32 numdict; +#ifdef MACOSX + int macTag; // unique tag for this doc +#else + UITextChecker * pChecker; +#endif + ::comphelper::OInterfaceContainerHelper3<XEventListener> aEvtListeners; + rtl::Reference< linguistic::PropertyHelper_Spell > xPropHelper; + bool bDisposing; + + MacSpellChecker(const MacSpellChecker &) = delete; + MacSpellChecker & operator = (const MacSpellChecker &) = delete; + + linguistic::PropertyHelper_Spell & GetPropHelper_Impl(); + linguistic::PropertyHelper_Spell & GetPropHelper() + { + return xPropHelper.is() ? *xPropHelper : GetPropHelper_Impl(); + } + + sal_Int16 GetSpellFailure( const OUString &rWord, const Locale &rLocale ); + Reference< XSpellAlternatives > GetProposals( const OUString &rWord, const Locale &rLocale ); + +public: + MacSpellChecker(); + virtual ~MacSpellChecker() override; + + // XSupportedLocales (for XSpellChecker) + virtual Sequence< Locale > SAL_CALL getLocales() override; + virtual sal_Bool SAL_CALL hasLocale( const Locale& rLocale ) override; + + // XSpellChecker + virtual sal_Bool SAL_CALL isValid( const OUString& rWord, const Locale& rLocale, const css::uno::Sequence<PropertyValue>& rProperties ) override; + virtual Reference< XSpellAlternatives > SAL_CALL spell( const OUString& rWord, const Locale& rLocale, const css::uno::Sequence<PropertyValue>& rProperties ) override; + + // XLinguServiceEventBroadcaster + virtual sal_Bool SAL_CALL addLinguServiceEventListener( const Reference< XLinguServiceEventListener >& rxLstnr ) override; + virtual sal_Bool SAL_CALL removeLinguServiceEventListener( const Reference< XLinguServiceEventListener >& rxLstnr ) override; + + // XServiceDisplayName + virtual OUString SAL_CALL getServiceDisplayName( const Locale& rLocale ) override; + + // XInitialization + virtual void SAL_CALL initialize( const Sequence< Any >& rArguments ) override; + + // XComponent + virtual void SAL_CALL dispose() override; + virtual void SAL_CALL addEventListener( const Reference< XEventListener >& rxListener ) override; + virtual void SAL_CALL removeEventListener( const Reference< XEventListener >& rxListener ) override; + + // XServiceInfo + virtual OUString SAL_CALL getImplementationName() override; + virtual sal_Bool SAL_CALL supportsService( const OUString& rServiceName ) override; + virtual Sequence< OUString > SAL_CALL getSupportedServiceNames() override; +}; + +#endif + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/lingucomponent/source/spellcheck/macosxspell/macspellimp.mm b/lingucomponent/source/spellcheck/macosxspell/macspellimp.mm new file mode 100644 index 000000000..c20871b1e --- /dev/null +++ b/lingucomponent/source/spellcheck/macosxspell/macspellimp.mm @@ -0,0 +1,666 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; fill-column: 100 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#include <com/sun/star/uno/Reference.h> + +#include <com/sun/star/linguistic2/SpellFailure.hpp> +#include <com/sun/star/linguistic2/XLinguProperties.hpp> +#include <cppuhelper/factory.hxx> +#include <cppuhelper/supportsservice.hxx> +#include <cppuhelper/weak.hxx> +#include <com/sun/star/registry/XRegistryKey.hpp> +#include <com/sun/star/lang/XSingleServiceFactory.hpp> +#include <tools/debug.hxx> +#include <osl/mutex.hxx> + +#include "macspellimp.hxx" + +#include <linguistic/spelldta.hxx> +#include <unotools/pathoptions.hxx> +#include <unotools/useroptions.hxx> +#include <osl/file.hxx> +#include <rtl/ref.hxx> +#include <rtl/ustrbuf.hxx> + +using namespace utl; +using namespace osl; +using namespace com::sun::star; +using namespace com::sun::star::beans; +using namespace com::sun::star::lang; +using namespace com::sun::star::uno; +using namespace com::sun::star::linguistic2; +using namespace linguistic; + +MacSpellChecker::MacSpellChecker() : + aEvtListeners( GetLinguMutex() ) +{ + aDEncs = nullptr; + aDLocs = nullptr; + aDNames = nullptr; + bDisposing = false; + numdict = 0; +#ifndef IOS + NSApplicationLoad(); + NSAutoreleasePool* pool = [[NSAutoreleasePool alloc] init]; + macTag = [NSSpellChecker uniqueSpellDocumentTag]; + [pool release]; +#else + pChecker = [[UITextChecker alloc] init]; +#endif +} + + +MacSpellChecker::~MacSpellChecker() +{ + numdict = 0; + if (aDEncs) delete[] aDEncs; + aDEncs = nullptr; + if (aDLocs) delete[] aDLocs; + aDLocs = nullptr; + if (aDNames) delete[] aDNames; + aDNames = nullptr; + if (xPropHelper.is()) + xPropHelper->RemoveAsPropListener(); +} + + +PropertyHelper_Spell & MacSpellChecker::GetPropHelper_Impl() +{ + if (!xPropHelper.is()) + { + Reference< XLinguProperties > xPropSet( GetLinguProperties() ); + + xPropHelper = new PropertyHelper_Spell( static_cast<XSpellChecker *>(this), xPropSet ); + xPropHelper->AddAsPropListener(); + } + return *xPropHelper; +} + + +Sequence< Locale > SAL_CALL MacSpellChecker::getLocales() +{ + MutexGuard aGuard( GetLinguMutex() ); + + // this routine should return the locales supported by the installed + // dictionaries. So here we need to parse both the user edited + // dictionary list and the shared dictionary list + // to see what dictionaries the admin/user has installed + + int numshr; // number of shared dictionary entries + rtl_TextEncoding aEnc = RTL_TEXTENCODING_UTF8; + + std::vector<NSString *> postspdict; + + if (!numdict) { + + // invoke a dictionary manager to get the user dictionary list + // TODO How on macOS? + + // invoke a second dictionary manager to get the shared dictionary list +#ifdef MACOSX + NSArray *aSpellCheckLanguages = [[NSSpellChecker sharedSpellChecker] availableLanguages]; +#else + NSArray *aSpellCheckLanguages = [UITextChecker availableLanguages]; +#endif + + for (NSUInteger i = 0; i < [aSpellCheckLanguages count]; i++) + { + NSString* pLangStr = static_cast<NSString*>([aSpellCheckLanguages objectAtIndex:i]); + + // Fix up generic languages (without territory code) and odd combinations that LO + // doesn't handle. + if ([pLangStr isEqualToString:@"da"]) + { + postspdict.push_back( @"da_DK" ); + } + else if ([pLangStr isEqualToString:@"de"]) + { + // Not de_CH and de_LI, though. They need separate dictionaries. + const std::vector<NSString*> aDE + { @"AT", @"BE", @"DE", @"LU" }; + for (auto c: aDE) + { + pLangStr = [@"de_" stringByAppendingString: c]; + postspdict.push_back( pLangStr ); + } + } +#ifdef IOS + // iOS says it has specifically de_DE. Let's assume it is good enough for German as + // written in Austria, Belgium, and Luxembourg, too. (Not for German in Switzerland and + // Liechtenstein. For those you need to bundle the myspell dictionary.) + else if ([pLangStr isEqualToString:@"de_DE"]) + { + const std::vector<NSString*> aDE + { @"AT", @"BE", @"DE", @"LU" }; + for (auto c: aDE) + { + pLangStr = [@"de_" stringByAppendingString: c]; + postspdict.push_back( pLangStr ); + } + } +#endif + else if ([pLangStr isEqualToString:@"en"]) + { + // System has en_AU, en_CA, en_GB, and en_IN. Add the rest. + const std::vector<NSString*> aEN + { @"BW", @"BZ", @"GH", @"GM", @"IE", @"JM", @"MU", @"MW", @"MY", @"NA", + @"NZ", @"PH", @"TT", @"US", @"ZA", @"ZW" }; + for (auto c: aEN) + { + pLangStr = [@"en_" stringByAppendingString: c]; + postspdict.push_back( pLangStr ); + } + } + else if ([pLangStr isEqualToString:@"en_JP"] + || [pLangStr isEqualToString:@"en_SG"]) + { + // Just skip, LO doesn't have those yet in this context. + } + else if ([pLangStr isEqualToString:@"es"]) + { + const std::vector<NSString*> aES + { @"AR", @"BO", @"CL", @"CO", @"CR", @"CU", @"DO", @"EC", @"ES", @"GT", + @"HN", @"MX", @"NI", @"PA", @"PE", @"PR", @"PY", @"SV", @"UY", @"VE" }; + for (auto c: aES) + { + pLangStr = [@"es_" stringByAppendingString: c]; + postspdict.push_back( pLangStr ); + } + } + else if ([pLangStr isEqualToString:@"fi"]) + { + postspdict.push_back( @"fi_FI" ); + } + else if ([pLangStr isEqualToString:@"fr"]) + { + const std::vector<NSString*> aFR + { @"BE", @"BF", @"BJ", @"CA", @"CH", @"CI", @"FR", @"LU", @"MC", @"ML", + @"MU", @"NE", @"SN", @"TG" }; + for (auto c: aFR) + { + pLangStr = [@"fr_" stringByAppendingString: c]; + postspdict.push_back( pLangStr ); + } + } +#ifdef IOS + else if ([pLangStr isEqualToString:@"fr_FR"]) + { + const std::vector<NSString*> aFR + { @"BE", @"BF", @"BJ", @"CA", @"CH", @"CI", @"FR", @"LU", @"MC", @"ML", + @"MU", @"NE", @"SN", @"TG" }; + for (auto c: aFR) + { + pLangStr = [@"fr_" stringByAppendingString: c]; + postspdict.push_back( pLangStr ); + } + } +#endif + else if ([pLangStr isEqualToString:@"it"]) + { + postspdict.push_back( @"it_CH" ); + postspdict.push_back( @"it_IT" ); + } +#ifdef IOS + else if ([pLangStr isEqualToString:@"it_IT"]) + { + const std::vector<NSString*> aIT + { @"CH", @"IT" }; + for (auto c: aIT) + { + pLangStr = [@"it_" stringByAppendingString: c]; + postspdict.push_back( pLangStr ); + } + } +#endif + else if ([pLangStr isEqualToString:@"ko"]) + { + postspdict.push_back( @"ko_KR" ); + } + else if ([pLangStr isEqualToString:@"nl"]) + { + postspdict.push_back( @"nl_BE" ); + postspdict.push_back( @"nl_NL" ); + } + else if ([pLangStr isEqualToString:@"nb"]) + { + postspdict.push_back( @"nb_NO" ); + } + else if ([pLangStr isEqualToString:@"pl"]) + { + postspdict.push_back( @"pl_PL" ); + } + else if ([pLangStr isEqualToString:@"ru"]) + { + postspdict.push_back( @"ru_RU" ); + } + else if ([pLangStr isEqualToString:@"sv"]) + { + postspdict.push_back( @"sv_FI" ); + postspdict.push_back( @"sv_SE" ); + } +#ifdef IOS + else if ([pLangStr isEqualToString:@"sv_SE"]) + { + postspdict.push_back( @"sv_FI" ); + postspdict.push_back( @"sv_SE" ); + } +#endif + else if ([pLangStr isEqualToString:@"tr"]) + { + postspdict.push_back( @"tr_TR" ); + } + else + postspdict.push_back( pLangStr ); + } + // System has pt_BR and pt_PT, add pt_AO. + postspdict.push_back( @"pt_AO" ); + + numshr = postspdict.size(); + + // we really should merge these and remove duplicates but since + // users can name their dictionaries anything they want it would + // be impossible to know if a real duplication exists unless we + // add some unique key to each myspell dictionary + numdict = numshr; + + if (numdict) { + aDLocs = new Locale [numdict]; + aDEncs = new rtl_TextEncoding [numdict]; + aDNames = new OUString [numdict]; + aSuppLocales.realloc(numdict); + Locale * pLocale = aSuppLocales.getArray(); + int numlocs = 0; + int newloc; + int i,j; + int k = 0; + + //first add the user dictionaries + //TODO for MAC? + + // now add the shared dictionaries + for (i = 0; i < numshr; i++) { + NSDictionary *aLocDict = [ NSLocale componentsFromLocaleIdentifier:postspdict[i] ]; + NSString* aLang = [ aLocDict objectForKey:NSLocaleLanguageCode ]; + NSString* aCountry = [ aLocDict objectForKey:NSLocaleCountryCode ]; + OUString lang([aLang cStringUsingEncoding: NSUTF8StringEncoding], [aLang length], aEnc); + OUString country([ aCountry cStringUsingEncoding: NSUTF8StringEncoding], [aCountry length], aEnc); + Locale nLoc( lang, country, OUString() ); + newloc = 1; + //eliminate duplicates (is this needed for MacOS?) + for (j = 0; j < numlocs; j++) { + if (nLoc == pLocale[j]) newloc = 0; + } + if (newloc) { + pLocale[numlocs] = nLoc; + numlocs++; + } + aDLocs[k] = nLoc; + aDEncs[k] = 0; + k++; + } + + aSuppLocales.realloc(numlocs); + + } else { + /* no dictionary.lst found so register no dictionaries */ + numdict = 0; + aDEncs = nullptr; + aDLocs = nullptr; + aDNames = nullptr; + aSuppLocales.realloc(0); + } + } + + return aSuppLocales; +} + + + +sal_Bool SAL_CALL MacSpellChecker::hasLocale(const Locale& rLocale) +{ + MutexGuard aGuard( GetLinguMutex() ); + + bool bRes = false; + if (!aSuppLocales.getLength()) + getLocales(); + + sal_Int32 nLen = aSuppLocales.getLength(); + for (sal_Int32 i = 0; i < nLen; ++i) + { + const Locale *pLocale = aSuppLocales.getConstArray(); + if (rLocale == pLocale[i]) + { + bRes = true; + break; + } + } + return bRes; +} + + +sal_Int16 MacSpellChecker::GetSpellFailure( const OUString &rWord, const Locale &rLocale ) +{ + // initialize a myspell object for each dictionary once + // (note: mutex is held higher up in isValid) + + + sal_Int16 nRes = -1; + + // first handle smart quotes both single and double + OUStringBuffer rBuf(rWord); + sal_Int32 n = rBuf.getLength(); + sal_Unicode c; + for (sal_Int32 ix=0; ix < n; ix++) { + c = rBuf[ix]; + if ((c == 0x201C) || (c == 0x201D)) rBuf[ix] = u'"'; + if ((c == 0x2018) || (c == 0x2019)) rBuf[ix] = u'\''; + } + OUString nWord(rBuf.makeStringAndClear()); + + if (n) + { + NSAutoreleasePool* pool = [[NSAutoreleasePool alloc] init]; + NSString* aNSStr = [[[NSString alloc] initWithCharacters: reinterpret_cast<unichar const *>(nWord.getStr()) length: nWord.getLength()]autorelease]; + NSString* aLang = [[[NSString alloc] initWithCharacters: reinterpret_cast<unichar const *>(rLocale.Language.getStr()) length: rLocale.Language.getLength()]autorelease]; + if(rLocale.Country.getLength()>0) + { + NSString* aCountry = [[[NSString alloc] initWithCharacters: reinterpret_cast<unichar const *>(rLocale.Country.getStr()) length: rLocale.Country.getLength()]autorelease]; + NSString* aTaggedCountry = [@"_" stringByAppendingString:aCountry]; + aLang = [aLang stringByAppendingString:aTaggedCountry]; + } + +#ifdef MACOSX + NSInteger aCount; + NSRange range = [[NSSpellChecker sharedSpellChecker] checkSpellingOfString:aNSStr startingAt:0 language:aLang wrap:false inSpellDocumentWithTag:macTag wordCount:&aCount]; +#else + NSRange range = [pChecker rangeOfMisspelledWordInString:aNSStr range:NSMakeRange(0, [aNSStr length]) startingAt:0 wrap:NO language:aLang]; +#endif + int rVal = 0; + if(range.length>0) + { + rVal = -1; + } + else + { + rVal = 1; + } + [pool release]; + if (rVal != 1) + { + nRes = SpellFailure::SPELLING_ERROR; + } else { + return -1; + } + } + return nRes; +} + + + +sal_Bool SAL_CALL + MacSpellChecker::isValid( const OUString& rWord, const Locale& rLocale, + const css::uno::Sequence<PropertyValue>& rProperties ) +{ + MutexGuard aGuard( GetLinguMutex() ); + + if (rLocale == Locale() || !rWord.getLength()) + return true; + + if (!hasLocale( rLocale )) + return true; + + // Get property values to be used. + // These are be the default values set in the SN_LINGU_PROPERTIES + // PropertySet which are overridden by the supplied ones from the + // last argument. + // You'll probably like to use a simpler solution than the provided + // one using the PropertyHelper_Spell. + + PropertyHelper_Spell &rHelper = GetPropHelper(); + rHelper.SetTmpPropVals( rProperties ); + + sal_Int16 nFailure = GetSpellFailure( rWord, rLocale ); + if (nFailure != -1) + { + LanguageType nLang = LinguLocaleToLanguage( rLocale ); + // postprocess result for errors that should be ignored + if ( (!rHelper.IsSpellUpperCase() && IsUpper( rWord, nLang )) + || (!rHelper.IsSpellWithDigits() && HasDigits( rWord )) + || (!rHelper.IsSpellCapitalization() + && nFailure == SpellFailure::CAPTION_ERROR) + ) + nFailure = -1; + } + + return (nFailure == -1); +} + +Reference< XSpellAlternatives > + MacSpellChecker::GetProposals( const OUString &rWord, const Locale &rLocale ) +{ + // Retrieves the return values for the 'spell' function call in case + // of a misspelled word. + // Especially it may give a list of suggested (correct) words: + + Reference< XSpellAlternatives > xRes; + // note: mutex is held by higher up by spell which covers both + + LanguageType nLang = LinguLocaleToLanguage( rLocale ); + int count; + Sequence< OUString > aStr( 0 ); + + // first handle smart quotes (single and double) + OUStringBuffer rBuf(rWord); + sal_Int32 n = rBuf.getLength(); + sal_Unicode c; + for (sal_Int32 ix=0; ix < n; ix++) { + c = rBuf[ix]; + if ((c == 0x201C) || (c == 0x201D)) rBuf[ix] = u'"'; + if ((c == 0x2018) || (c == 0x2019)) rBuf[ix] = u'\''; + } + OUString nWord(rBuf.makeStringAndClear()); + + if (n) + { + NSAutoreleasePool* pool = [[NSAutoreleasePool alloc] init]; + NSString* aNSStr = [[[NSString alloc] initWithCharacters: reinterpret_cast<unichar const *>(nWord.getStr()) length: nWord.getLength()]autorelease]; + NSString* aLang = [[[NSString alloc] initWithCharacters: reinterpret_cast<unichar const *>(rLocale.Language.getStr()) length: rLocale.Language.getLength()]autorelease]; + if(rLocale.Country.getLength()>0) + { + NSString* aCountry = [[[NSString alloc] initWithCharacters: reinterpret_cast<unichar const *>(rLocale.Country.getStr()) length: rLocale.Country.getLength()]autorelease]; + NSString* aTaggedCountry = [@"_" stringByAppendingString:aCountry]; + aLang = [aLang stringByAppendingString:aTaggedCountry]; + } +#ifdef MACOSX + [[NSSpellChecker sharedSpellChecker] setLanguage:aLang]; + NSArray *guesses = [[NSSpellChecker sharedSpellChecker] guessesForWordRange:NSMakeRange(0, [aNSStr length]) inString:aNSStr language:aLang inSpellDocumentWithTag:0]; + (void) this; // avoid loplugin:staticmethods, the !MACOSX case uses 'this' +#else + NSArray *guesses = [pChecker guessesForWordRange:NSMakeRange(0, [aNSStr length]) inString:aNSStr language:aLang]; +#endif + count = [guesses count]; + if (count) + { + aStr.realloc( count ); + OUString *pStr = aStr.getArray(); + for (int ii=0; ii < count; ii++) + { + // if needed add: if (suglst[ii] == NULL) continue; + NSString* guess = [guesses objectAtIndex:ii]; + OUString cvtwrd(reinterpret_cast<const sal_Unicode*>([guess cStringUsingEncoding:NSUnicodeStringEncoding]), static_cast<sal_Int32>([guess length])); + pStr[ii] = cvtwrd; + } + } + [pool release]; + } + + // now return an empty alternative for no suggestions or the list of alternatives if some found + rtl::Reference<SpellAlternatives> pAlt = new SpellAlternatives; + pAlt->SetWordLanguage( rWord, nLang ); + pAlt->SetFailureType( SpellFailure::SPELLING_ERROR ); + pAlt->SetAlternatives( aStr ); + xRes = pAlt; + return xRes; + +} + +Reference< XSpellAlternatives > SAL_CALL + MacSpellChecker::spell( const OUString& rWord, const Locale& rLocale, + const css::uno::Sequence<PropertyValue>& rProperties ) +{ + MutexGuard aGuard( GetLinguMutex() ); + + if (rLocale == Locale() || !rWord.getLength()) + return nullptr; + + if (!hasLocale( rLocale )) + return nullptr; + + Reference< XSpellAlternatives > xAlt; + if (!isValid( rWord, rLocale, rProperties )) + { + xAlt = GetProposals( rWord, rLocale ); + } + return xAlt; +} + +sal_Bool SAL_CALL + MacSpellChecker::addLinguServiceEventListener( + const Reference< XLinguServiceEventListener >& rxLstnr ) +{ + MutexGuard aGuard( GetLinguMutex() ); + + bool bRes = false; + if (!bDisposing && rxLstnr.is()) + { + bRes = GetPropHelper().addLinguServiceEventListener( rxLstnr ); + } + return bRes; +} + + +sal_Bool SAL_CALL + MacSpellChecker::removeLinguServiceEventListener( + const Reference< XLinguServiceEventListener >& rxLstnr ) +{ + MutexGuard aGuard( GetLinguMutex() ); + + bool bRes = false; + if (!bDisposing && rxLstnr.is()) + { + DBG_ASSERT( xPropHelper.is(), "xPropHelper non existent" ); + bRes = GetPropHelper().removeLinguServiceEventListener( rxLstnr ); + } + return bRes; +} + + +OUString SAL_CALL + MacSpellChecker::getServiceDisplayName( const Locale& /*rLocale*/ ) +{ + MutexGuard aGuard( GetLinguMutex() ); + return "macOS Spell Checker"; +} + + +void SAL_CALL + MacSpellChecker::initialize( const Sequence< Any >& rArguments ) +{ + MutexGuard aGuard( GetLinguMutex() ); + + if (!xPropHelper.is()) + { + sal_Int32 nLen = rArguments.getLength(); + if (2 == nLen) + { + Reference< XLinguProperties > xPropSet; + rArguments.getConstArray()[0] >>= xPropSet; + //rArguments.getConstArray()[1] >>= xDicList; + + //! Pointer allows for access of the non-UNO functions. + //! And the reference to the UNO-functions while increasing + //! the ref-count and will implicitly free the memory + //! when the object is no longer used. + xPropHelper = new PropertyHelper_Spell( static_cast<XSpellChecker *>(this), xPropSet ); + xPropHelper->AddAsPropListener(); + } + else + OSL_FAIL( "wrong number of arguments in sequence" ); + + } +} + + +void SAL_CALL + MacSpellChecker::dispose() +{ + MutexGuard aGuard( GetLinguMutex() ); + + if (!bDisposing) + { + bDisposing = true; + EventObject aEvtObj( static_cast<XSpellChecker *>(this) ); + aEvtListeners.disposeAndClear( aEvtObj ); + } +} + + +void SAL_CALL + MacSpellChecker::addEventListener( const Reference< XEventListener >& rxListener ) +{ + MutexGuard aGuard( GetLinguMutex() ); + + if (!bDisposing && rxListener.is()) + aEvtListeners.addInterface( rxListener ); +} + + +void SAL_CALL + MacSpellChecker::removeEventListener( const Reference< XEventListener >& rxListener ) +{ + MutexGuard aGuard( GetLinguMutex() ); + + if (!bDisposing && rxListener.is()) + aEvtListeners.removeInterface( rxListener ); +} + +// Service specific part +OUString SAL_CALL MacSpellChecker::getImplementationName() +{ + return "org.openoffice.lingu.MacOSXSpellChecker"; +} + +sal_Bool SAL_CALL MacSpellChecker::supportsService( const OUString& ServiceName ) +{ + return cppu::supportsService(this, ServiceName); +} + +Sequence< OUString > SAL_CALL MacSpellChecker::getSupportedServiceNames() +{ + return { SN_SPELLCHECKER }; +} + +extern "C" SAL_DLLPUBLIC_EXPORT css::uno::XInterface* +lingucomponent_MacSpellChecker_get_implementation( + css::uno::XComponentContext* , css::uno::Sequence<css::uno::Any> const&) +{ + return cppu::acquire(new MacSpellChecker()); +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/lingucomponent/source/spellcheck/spell/spell.component b/lingucomponent/source/spellcheck/spell/spell.component new file mode 100644 index 000000000..c284e13fc --- /dev/null +++ b/lingucomponent/source/spellcheck/spell/spell.component @@ -0,0 +1,26 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!-- + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + --> + +<component loader="com.sun.star.loader.SharedLibrary" environment="@CPPU_ENV@" + xmlns="http://openoffice.org/2010/uno-components"> + <implementation name="org.openoffice.lingu.MySpellSpellChecker" + constructor="lingucomponent_SpellChecker_get_implementation" single-instance="true"> + <service name="com.sun.star.linguistic2.SpellChecker"/> + </implementation> +</component> diff --git a/lingucomponent/source/spellcheck/spell/sspellimp.cxx b/lingucomponent/source/spellcheck/spell/sspellimp.cxx new file mode 100644 index 000000000..95b264157 --- /dev/null +++ b/lingucomponent/source/spellcheck/spell/sspellimp.cxx @@ -0,0 +1,635 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#include <com/sun/star/uno/Reference.h> + +#include <com/sun/star/linguistic2/SpellFailure.hpp> +#include <com/sun/star/linguistic2/XLinguProperties.hpp> +#include <comphelper/lok.hxx> +#include <comphelper/processfactory.hxx> +#include <cppuhelper/supportsservice.hxx> +#include <cppuhelper/weak.hxx> +#include <com/sun/star/lang/XMultiServiceFactory.hpp> +#include <tools/debug.hxx> +#include <osl/mutex.hxx> +#include <osl/thread.h> +#include <com/sun/star/ucb/XSimpleFileAccess.hpp> + +#include <lingutil.hxx> +#include <hunspell.hxx> +#include "sspellimp.hxx" + +#include <linguistic/misc.hxx> +#include <linguistic/spelldta.hxx> +#include <i18nlangtag/languagetag.hxx> +#include <svtools/strings.hrc> +#include <unotools/lingucfg.hxx> +#include <unotools/resmgr.hxx> +#include <osl/file.hxx> +#include <rtl/ustrbuf.hxx> +#include <rtl/textenc.h> +#include <sal/log.hxx> + +#include <numeric> +#include <utility> +#include <vector> +#include <set> +#include <string.h> + +using namespace utl; +using namespace osl; +using namespace com::sun::star; +using namespace com::sun::star::beans; +using namespace com::sun::star::lang; +using namespace com::sun::star::uno; +using namespace com::sun::star::linguistic2; +using namespace linguistic; + +// XML-header of SPELLML queries +#if !defined SPELL_XML +constexpr OUStringLiteral SPELL_XML = u"<?xml?>"; +#endif + +// only available in hunspell >= 1.5 +#if !defined MAXWORDLEN +#define MAXWORDLEN 176 +#endif + +SpellChecker::SpellChecker() : + m_aEvtListeners(GetLinguMutex()), + m_bDisposing(false) +{ +} + +SpellChecker::DictItem::DictItem(OUString i_DName, Locale i_DLoc, rtl_TextEncoding i_DEnc) + : m_aDName(std::move(i_DName)) + , m_aDLoc(std::move(i_DLoc)) + , m_aDEnc(i_DEnc) +{ +} + +SpellChecker::~SpellChecker() +{ + if (m_pPropHelper) + { + m_pPropHelper->RemoveAsPropListener(); + } +} + +PropertyHelper_Spelling & SpellChecker::GetPropHelper_Impl() +{ + if (!m_pPropHelper) + { + Reference< XLinguProperties > xPropSet = GetLinguProperties(); + + m_pPropHelper.reset( new PropertyHelper_Spelling( static_cast<XSpellChecker *>(this), xPropSet ) ); + m_pPropHelper->AddAsPropListener(); //! after a reference is established + } + return *m_pPropHelper; +} + +Sequence< Locale > SAL_CALL SpellChecker::getLocales() +{ + MutexGuard aGuard( GetLinguMutex() ); + + // this routine should return the locales supported by the installed + // dictionaries. + if (m_DictItems.empty()) + { + SvtLinguConfig aLinguCfg; + + // get list of extension dictionaries-to-use + // (or better speaking: the list of dictionaries using the + // new configuration entries). + std::vector< SvtLinguConfigDictionaryEntry > aDics; + uno::Sequence< OUString > aFormatList; + aLinguCfg.GetSupportedDictionaryFormatsFor( "SpellCheckers", + "org.openoffice.lingu.MySpellSpellChecker", aFormatList ); + for (auto const& format : std::as_const(aFormatList)) + { + std::vector< SvtLinguConfigDictionaryEntry > aTmpDic( + aLinguCfg.GetActiveDictionariesByFormat(format) ); + aDics.insert( aDics.end(), aTmpDic.begin(), aTmpDic.end() ); + } + + //!! for compatibility with old dictionaries (the ones not using extensions + //!! or new configuration entries, but still using the dictionary.lst file) + //!! Get the list of old style spell checking dictionaries to use... + std::vector< SvtLinguConfigDictionaryEntry > aOldStyleDics( + GetOldStyleDics( "DICT" ) ); + + // to prefer dictionaries with configuration entries we will only + // use those old style dictionaries that add a language that + // is not yet supported by the list of new style dictionaries + MergeNewStyleDicsAndOldStyleDics( aDics, aOldStyleDics ); + + if (!aDics.empty()) + { + uno::Reference< lang::XMultiServiceFactory > xServiceFactory(comphelper::getProcessServiceFactory()); + uno::Reference< ucb::XSimpleFileAccess > xAccess(xServiceFactory->createInstance("com.sun.star.ucb.SimpleFileAccess"), uno::UNO_QUERY); + // get supported locales from the dictionaries-to-use... + std::set<OUString> aLocaleNamesSet; + for (auto const& dict : aDics) + { + const uno::Sequence< OUString > aLocaleNames( dict.aLocaleNames ); + uno::Sequence< OUString > aLocations( dict.aLocations ); + SAL_WARN_IF( + aLocaleNames.hasElements() && !aLocations.hasElements(), + "lingucomponent", "no locations"); + if (aLocations.hasElements()) + { + if (xAccess.is() && xAccess->exists(aLocations[0])) + { + for (auto const& locale : aLocaleNames) + { + if (!comphelper::LibreOfficeKit::isAllowlistedLanguage(locale)) + continue; + + aLocaleNamesSet.insert(locale); + } + } + else + { + SAL_WARN( + "lingucomponent", + "missing <" << aLocations[0] << ">"); + } + } + } + // ... and add them to the resulting sequence + m_aSuppLocales.realloc( aLocaleNamesSet.size() ); + std::transform( + aLocaleNamesSet.begin(), aLocaleNamesSet.end(), m_aSuppLocales.getArray(), + [](auto const& localeName) { return LanguageTag::convertToLocale(localeName); }); + + //! For each dictionary and each locale we need a separate entry. + //! If this results in more than one dictionary per locale than (for now) + //! it is undefined which dictionary gets used. + //! In the future the implementation should support using several dictionaries + //! for one locale. + sal_uInt32 nDictSize = std::accumulate(aDics.begin(), aDics.end(), sal_uInt32(0), + [](const sal_uInt32 nSum, const SvtLinguConfigDictionaryEntry& dict) { + return nSum + dict.aLocaleNames.getLength(); }); + + // add dictionary information + m_DictItems.reserve(nDictSize); + for (auto const& dict : aDics) + { + if (dict.aLocaleNames.hasElements() && + dict.aLocations.hasElements()) + { + const uno::Sequence< OUString > aLocaleNames( dict.aLocaleNames ); + + // currently only one language per dictionary is supported in the actual implementation... + // Thus here we work-around this by adding the same dictionary several times. + // Once for each of its supported locales. + for (auto const& localeName : aLocaleNames) + { + // also both files have to be in the same directory and the + // file names must only differ in the extension (.aff/.dic). + // Thus we use the first location only and strip the extension part. + OUString aLocation = dict.aLocations[0]; + sal_Int32 nPos = aLocation.lastIndexOf( '.' ); + aLocation = aLocation.copy( 0, nPos ); + + m_DictItems.emplace_back(aLocation, LanguageTag::convertToLocale(localeName), RTL_TEXTENCODING_DONTKNOW); + } + } + } + DBG_ASSERT( nDictSize == m_DictItems.size(), "index mismatch?" ); + } + else + { + // no dictionary found so register no dictionaries + m_aSuppLocales.realloc(0); + } + } + + return m_aSuppLocales; +} + +sal_Bool SAL_CALL SpellChecker::hasLocale(const Locale& rLocale) +{ + MutexGuard aGuard( GetLinguMutex() ); + + bool bRes = false; + if (!m_aSuppLocales.hasElements()) + getLocales(); + + for (auto const& suppLocale : std::as_const(m_aSuppLocales)) + { + if (rLocale == suppLocale) + { + bRes = true; + break; + } + } + return bRes; +} + +sal_Int16 SpellChecker::GetSpellFailure(const OUString &rWord, const Locale &rLocale) +{ + if (rWord.getLength() > MAXWORDLEN) + return -1; + + Hunspell * pMS = nullptr; + rtl_TextEncoding eEnc = RTL_TEXTENCODING_DONTKNOW; + + // initialize a myspell object for each dictionary once + // (note: mutex is held higher up in isValid) + + sal_Int16 nRes = -1; + + // first handle smart quotes both single and double + OUStringBuffer rBuf(rWord); + sal_Int32 n = rBuf.getLength(); + sal_Unicode c; + sal_Int32 extrachar = 0; + + for (sal_Int32 ix=0; ix < n; ix++) + { + c = rBuf[ix]; + if ((c == 0x201C) || (c == 0x201D)) + rBuf[ix] = u'"'; + else if ((c == 0x2018) || (c == 0x2019)) + rBuf[ix] = u'\''; + + // recognize words with Unicode ligatures and ZWNJ/ZWJ characters (only + // with 8-bit encoded dictionaries. For UTF-8 encoded dictionaries + // set ICONV and IGNORE aff file options, if needed.) + else if ((c == 0x200C) || (c == 0x200D) || + ((c >= 0xFB00) && (c <= 0xFB04))) + extrachar = 1; + } + OUString nWord(rBuf.makeStringAndClear()); + + if (n) + { + for (auto& currDict : m_DictItems) + { + pMS = nullptr; + eEnc = RTL_TEXTENCODING_DONTKNOW; + + if (rLocale == currDict.m_aDLoc) + { + if (!currDict.m_pDict) + { + OUString dicpath = currDict.m_aDName + ".dic"; + OUString affpath = currDict.m_aDName + ".aff"; + OUString dict; + OUString aff; + osl::FileBase::getSystemPathFromFileURL(dicpath,dict); + osl::FileBase::getSystemPathFromFileURL(affpath,aff); +#if defined(_WIN32) + // workaround for Windows specific problem that the + // path length in calls to 'fopen' is limited to somewhat + // about 120+ characters which will usually be exceed when + // using dictionaries as extensions. (Hunspell waits UTF-8 encoded + // path with \\?\ long path prefix.) + OString aTmpaff = Win_AddLongPathPrefix(OUStringToOString(aff, RTL_TEXTENCODING_UTF8)); + OString aTmpdict = Win_AddLongPathPrefix(OUStringToOString(dict, RTL_TEXTENCODING_UTF8)); +#else + OString aTmpaff(OU2ENC(aff,osl_getThreadTextEncoding())); + OString aTmpdict(OU2ENC(dict,osl_getThreadTextEncoding())); +#endif + + currDict.m_pDict = std::make_unique<Hunspell>(aTmpaff.getStr(),aTmpdict.getStr()); +#if defined(H_DEPRECATED) + currDict.m_aDEnc = getTextEncodingFromCharset(currDict.m_pDict->get_dict_encoding().c_str()); +#else + currDict.m_aDEnc = getTextEncodingFromCharset(currDict.m_pDict->get_dic_encoding()); +#endif + } + pMS = currDict.m_pDict.get(); + eEnc = currDict.m_aDEnc; + } + + if (pMS) + { + // we don't want to work with a default text encoding since following incorrect + // results may occur only for specific text and thus may be hard to notice. + // Thus better always make a clean exit here if the text encoding is in question. + // Hopefully something not working at all will raise proper attention quickly. ;-) + DBG_ASSERT( eEnc != RTL_TEXTENCODING_DONTKNOW, "failed to get text encoding! (maybe incorrect encoding string in file)" ); + if (eEnc == RTL_TEXTENCODING_DONTKNOW) + return -1; + + OString aWrd(OU2ENC(nWord,eEnc)); +#if defined(H_DEPRECATED) + bool bVal = pMS->spell(std::string(aWrd.getStr())); +#else + bool bVal = pMS->spell(aWrd.getStr()) != 0; +#endif + if (!bVal) { + if (extrachar && (eEnc != RTL_TEXTENCODING_UTF8)) { + OUStringBuffer aBuf(nWord); + n = aBuf.getLength(); + for (sal_Int32 ix=n-1; ix >= 0; ix--) + { + switch (aBuf[ix]) { + case 0xFB00: aBuf.remove(ix, 1); aBuf.insert(ix, "ff"); break; + case 0xFB01: aBuf.remove(ix, 1); aBuf.insert(ix, "fi"); break; + case 0xFB02: aBuf.remove(ix, 1); aBuf.insert(ix, "fl"); break; + case 0xFB03: aBuf.remove(ix, 1); aBuf.insert(ix, "ffi"); break; + case 0xFB04: aBuf.remove(ix, 1); aBuf.insert(ix, "ffl"); break; + case 0x200C: + case 0x200D: aBuf.remove(ix, 1); break; + } + } + OUString aWord(aBuf.makeStringAndClear()); + OString bWrd(OU2ENC(aWord, eEnc)); +#if defined(H_DEPRECATED) + bVal = pMS->spell(std::string(bWrd.getStr())); +#else + bVal = pMS->spell(bWrd.getStr()) != 0; +#endif + if (bVal) return -1; + } + nRes = SpellFailure::SPELLING_ERROR; + } else { + return -1; + } + pMS = nullptr; + } + } + } + + return nRes; +} + +sal_Bool SAL_CALL SpellChecker::isValid( const OUString& rWord, const Locale& rLocale, + const css::uno::Sequence< css::beans::PropertyValue >& rProperties ) +{ + MutexGuard aGuard( GetLinguMutex() ); + + if (rLocale == Locale() || rWord.isEmpty()) + return true; + + if (!hasLocale( rLocale )) + return true; + + // return sal_False to process SPELLML requests (they are longer than the header) + if (rWord.match(SPELL_XML, 0) && (rWord.getLength() > 10)) return false; + + // Get property values to be used. + // These are be the default values set in the SN_LINGU_PROPERTIES + // PropertySet which are overridden by the supplied ones from the + // last argument. + // You'll probably like to use a simpler solution than the provided + // one using the PropertyHelper_Spell. + PropertyHelper_Spelling& rHelper = GetPropHelper(); + rHelper.SetTmpPropVals( rProperties ); + + sal_Int16 nFailure = GetSpellFailure( rWord, rLocale ); + if (nFailure != -1 && !rWord.match(SPELL_XML, 0)) + { + LanguageType nLang = LinguLocaleToLanguage( rLocale ); + // postprocess result for errors that should be ignored + const bool bIgnoreError = + (!rHelper.IsSpellUpperCase() && IsUpper( rWord, nLang )) || + (!rHelper.IsSpellWithDigits() && HasDigits( rWord )) || + (!rHelper.IsSpellCapitalization() && nFailure == SpellFailure::CAPTION_ERROR); + if (bIgnoreError) + nFailure = -1; + } + + return (nFailure == -1); +} + +Reference< XSpellAlternatives > + SpellChecker::GetProposals( const OUString &rWord, const Locale &rLocale ) +{ + // Retrieves the return values for the 'spell' function call in case + // of a misspelled word. + // Especially it may give a list of suggested (correct) words: + Reference< XSpellAlternatives > xRes; + // note: mutex is held by higher up by spell which covers both + + Hunspell* pMS = nullptr; + rtl_TextEncoding eEnc = RTL_TEXTENCODING_DONTKNOW; + + // first handle smart quotes (single and double) + OUStringBuffer rBuf(rWord); + sal_Int32 n = rBuf.getLength(); + sal_Unicode c; + for (sal_Int32 ix=0; ix < n; ix++) + { + c = rBuf[ix]; + if ((c == 0x201C) || (c == 0x201D)) + rBuf[ix] = u'"'; + if ((c == 0x2018) || (c == 0x2019)) + rBuf[ix] = u'\''; + } + OUString nWord(rBuf.makeStringAndClear()); + + if (n) + { + LanguageType nLang = LinguLocaleToLanguage( rLocale ); + int numsug = 0; + + Sequence< OUString > aStr( 0 ); + for (const auto& currDict : m_DictItems) + { + pMS = nullptr; + eEnc = RTL_TEXTENCODING_DONTKNOW; + + if (rLocale == currDict.m_aDLoc) + { + pMS = currDict.m_pDict.get(); + eEnc = currDict.m_aDEnc; + } + + if (pMS) + { + OString aWrd(OU2ENC(nWord,eEnc)); +#if defined(H_DEPRECATED) + std::vector<std::string> suglst = pMS->suggest(std::string(aWrd.getStr())); + if (!suglst.empty()) + { + aStr.realloc(numsug + suglst.size()); + OUString *pStr = aStr.getArray(); + for (size_t ii = 0; ii < suglst.size(); ++ii) + { + OUString cvtwrd(suglst[ii].c_str(), suglst[ii].size(), eEnc); + pStr[numsug + ii] = cvtwrd; + } + numsug += suglst.size(); + } +#else + char ** suglst = nullptr; + int count = pMS->suggest(&suglst, aWrd.getStr()); + if (count) + { + aStr.realloc( numsug + count ); + OUString *pStr = aStr.getArray(); + for (int ii=0; ii < count; ++ii) + { + OUString cvtwrd(suglst[ii],strlen(suglst[ii]),eEnc); + pStr[numsug + ii] = cvtwrd; + } + numsug += count; + } + pMS->free_list(&suglst, count); +#endif + } + } + + // now return an empty alternative for no suggestions or the list of alternatives if some found + xRes = SpellAlternatives::CreateSpellAlternatives( rWord, nLang, SpellFailure::SPELLING_ERROR, aStr ); + return xRes; + } + return xRes; +} + +Reference< XSpellAlternatives > SAL_CALL SpellChecker::spell( + const OUString& rWord, const Locale& rLocale, + const css::uno::Sequence< css::beans::PropertyValue >& rProperties ) +{ + MutexGuard aGuard( GetLinguMutex() ); + + if (rLocale == Locale() || rWord.isEmpty()) + return nullptr; + + if (!hasLocale( rLocale )) + return nullptr; + + Reference< XSpellAlternatives > xAlt; + if (!isValid( rWord, rLocale, rProperties )) + { + xAlt = GetProposals( rWord, rLocale ); + } + return xAlt; +} + +sal_Bool SAL_CALL SpellChecker::addLinguServiceEventListener( + const Reference< XLinguServiceEventListener >& rxLstnr ) +{ + MutexGuard aGuard( GetLinguMutex() ); + + bool bRes = false; + if (!m_bDisposing && rxLstnr.is()) + { + bRes = GetPropHelper().addLinguServiceEventListener( rxLstnr ); + } + return bRes; +} + +sal_Bool SAL_CALL SpellChecker::removeLinguServiceEventListener( + const Reference< XLinguServiceEventListener >& rxLstnr ) +{ + MutexGuard aGuard( GetLinguMutex() ); + + bool bRes = false; + if (!m_bDisposing && rxLstnr.is()) + { + bRes = GetPropHelper().removeLinguServiceEventListener( rxLstnr ); + } + return bRes; +} + +OUString SAL_CALL SpellChecker::getServiceDisplayName(const Locale& rLocale) +{ + std::locale loc(Translate::Create("svt", LanguageTag(rLocale))); + return Translate::get(STR_DESCRIPTION_HUNSPELL, loc); +} + +void SAL_CALL SpellChecker::initialize( const Sequence< Any >& rArguments ) +{ + MutexGuard aGuard( GetLinguMutex() ); + + if (m_pPropHelper) + return; + + sal_Int32 nLen = rArguments.getLength(); + if (2 == nLen) + { + Reference< XLinguProperties > xPropSet; + rArguments.getConstArray()[0] >>= xPropSet; + // rArguments.getConstArray()[1] >>= xDicList; + + //! Pointer allows for access of the non-UNO functions. + //! And the reference to the UNO-functions while increasing + //! the ref-count and will implicitly free the memory + //! when the object is no longer used. + m_pPropHelper.reset( new PropertyHelper_Spelling( static_cast<XSpellChecker *>(this), xPropSet ) ); + m_pPropHelper->AddAsPropListener(); //! after a reference is established + } + else { + OSL_FAIL( "wrong number of arguments in sequence" ); + } +} + +void SAL_CALL SpellChecker::dispose() +{ + MutexGuard aGuard( GetLinguMutex() ); + + if (!m_bDisposing) + { + m_bDisposing = true; + EventObject aEvtObj( static_cast<XSpellChecker *>(this) ); + m_aEvtListeners.disposeAndClear( aEvtObj ); + if (m_pPropHelper) + { + m_pPropHelper->RemoveAsPropListener(); + m_pPropHelper.reset(); + } + } +} + +void SAL_CALL SpellChecker::addEventListener( const Reference< XEventListener >& rxListener ) +{ + MutexGuard aGuard( GetLinguMutex() ); + + if (!m_bDisposing && rxListener.is()) + m_aEvtListeners.addInterface( rxListener ); +} + +void SAL_CALL SpellChecker::removeEventListener( const Reference< XEventListener >& rxListener ) +{ + MutexGuard aGuard( GetLinguMutex() ); + + if (!m_bDisposing && rxListener.is()) + m_aEvtListeners.removeInterface( rxListener ); +} + +// Service specific part +OUString SAL_CALL SpellChecker::getImplementationName() +{ + return "org.openoffice.lingu.MySpellSpellChecker"; +} + +sal_Bool SAL_CALL SpellChecker::supportsService( const OUString& ServiceName ) +{ + return cppu::supportsService(this, ServiceName); +} + +Sequence< OUString > SAL_CALL SpellChecker::getSupportedServiceNames() +{ + return { SN_SPELLCHECKER }; +} + +extern "C" SAL_DLLPUBLIC_EXPORT css::uno::XInterface* +lingucomponent_SpellChecker_get_implementation( + css::uno::XComponentContext* , css::uno::Sequence<css::uno::Any> const&) +{ + return cppu::acquire(new SpellChecker()); +} + + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/lingucomponent/source/spellcheck/spell/sspellimp.hxx b/lingucomponent/source/spellcheck/spell/sspellimp.hxx new file mode 100644 index 000000000..000f1756f --- /dev/null +++ b/lingucomponent/source/spellcheck/spell/sspellimp.hxx @@ -0,0 +1,120 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#ifndef INCLUDED_LINGUCOMPONENT_SOURCE_SPELLCHECK_SPELL_SSPELLIMP_HXX +#define INCLUDED_LINGUCOMPONENT_SOURCE_SPELLCHECK_SPELL_SSPELLIMP_HXX + +#include <comphelper/interfacecontainer3.hxx> +#include <cppuhelper/implbase.hxx> +#include <com/sun/star/lang/XComponent.hpp> +#include <com/sun/star/lang/XInitialization.hpp> +#include <com/sun/star/lang/XServiceDisplayName.hpp> +#include <com/sun/star/beans/PropertyValue.hpp> +#include <com/sun/star/lang/XServiceInfo.hpp> +#include <com/sun/star/linguistic2/XSpellChecker.hpp> +#include <com/sun/star/linguistic2/XLinguServiceEventBroadcaster.hpp> + +#include <linguistic/lngprophelp.hxx> + +#include <memory> + +#include <hunspell.hxx> + +using namespace ::com::sun::star::uno; +using namespace ::com::sun::star::beans; +using namespace ::com::sun::star::lang; +using namespace ::com::sun::star::linguistic2; + +class SpellChecker : + public cppu::WeakImplHelper + < + XSpellChecker, + XLinguServiceEventBroadcaster, + XInitialization, + XComponent, + XServiceInfo, + XServiceDisplayName + > +{ + struct DictItem + { + OUString m_aDName; + Locale m_aDLoc; + std::unique_ptr<Hunspell> m_pDict; + rtl_TextEncoding m_aDEnc; + + DictItem(OUString i_DName, Locale i_DLoc, rtl_TextEncoding i_DEnc); + }; + + std::vector<DictItem> m_DictItems; + + Sequence< Locale > m_aSuppLocales; + + ::comphelper::OInterfaceContainerHelper3<XEventListener> m_aEvtListeners; + std::unique_ptr<linguistic::PropertyHelper_Spelling> m_pPropHelper; + bool m_bDisposing; + + SpellChecker(const SpellChecker &) = delete; + SpellChecker & operator = (const SpellChecker &) = delete; + + linguistic::PropertyHelper_Spelling& GetPropHelper_Impl(); + linguistic::PropertyHelper_Spelling& GetPropHelper() + { + return m_pPropHelper ? *m_pPropHelper : GetPropHelper_Impl(); + } + + sal_Int16 GetSpellFailure( const OUString &rWord, const Locale &rLocale ); + Reference< XSpellAlternatives > GetProposals( const OUString &rWord, const Locale &rLocale ); + +public: + SpellChecker(); + virtual ~SpellChecker() override; + + // XSupportedLocales (for XSpellChecker) + virtual Sequence< Locale > SAL_CALL getLocales() override; + virtual sal_Bool SAL_CALL hasLocale( const Locale& rLocale ) override; + + // XSpellChecker + virtual sal_Bool SAL_CALL isValid( const OUString& rWord, const Locale& rLocale, const css::uno::Sequence< css::beans::PropertyValue >& rProperties ) override; + virtual Reference< XSpellAlternatives > SAL_CALL spell( const OUString& rWord, const Locale& rLocale, const css::uno::Sequence< css::beans::PropertyValue >& rProperties ) override; + + // XLinguServiceEventBroadcaster + virtual sal_Bool SAL_CALL addLinguServiceEventListener( const Reference< XLinguServiceEventListener >& rxLstnr ) override; + virtual sal_Bool SAL_CALL removeLinguServiceEventListener( const Reference< XLinguServiceEventListener >& rxLstnr ) override; + + // XServiceDisplayName + virtual OUString SAL_CALL getServiceDisplayName( const Locale& rLocale ) override; + + // XInitialization + virtual void SAL_CALL initialize( const Sequence< Any >& rArguments ) override; + + // XComponent + virtual void SAL_CALL dispose() override; + virtual void SAL_CALL addEventListener( const Reference< XEventListener >& rxListener ) override; + virtual void SAL_CALL removeEventListener( const Reference< XEventListener >& rxListener ) override; + + // XServiceInfo + virtual OUString SAL_CALL getImplementationName() override; + virtual sal_Bool SAL_CALL supportsService( const OUString& rServiceName ) override; + virtual Sequence< OUString > SAL_CALL getSupportedServiceNames() override; +}; + +#endif + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/lingucomponent/source/thesaurus/libnth/lnth.component b/lingucomponent/source/thesaurus/libnth/lnth.component new file mode 100644 index 000000000..66e90e2cf --- /dev/null +++ b/lingucomponent/source/thesaurus/libnth/lnth.component @@ -0,0 +1,26 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!-- + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + --> + +<component loader="com.sun.star.loader.SharedLibrary" environment="@CPPU_ENV@" + xmlns="http://openoffice.org/2010/uno-components"> + <implementation name="org.openoffice.lingu.new.Thesaurus" + constructor="lingucomponent_Thesaurus_get_implementation" single-instance="true"> + <service name="com.sun.star.linguistic2.Thesaurus"/> + </implementation> +</component> diff --git a/lingucomponent/source/thesaurus/libnth/nthesdta.cxx b/lingucomponent/source/thesaurus/libnth/nthesdta.cxx new file mode 100644 index 000000000..aa7d2afa7 --- /dev/null +++ b/lingucomponent/source/thesaurus/libnth/nthesdta.cxx @@ -0,0 +1,78 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#include <osl/mutex.hxx> + +#include "nthesdta.hxx" +#include <linguistic/misc.hxx> + +using namespace osl; +using namespace com::sun::star; +using namespace com::sun::star::beans; +using namespace com::sun::star::lang; +using namespace com::sun::star::uno; +using namespace com::sun::star::linguistic2; + +namespace linguistic +{ + +Meaning::Meaning( + const OUString &rTerm) : + + aSyn ( Sequence< OUString >(1) ), + aTerm (rTerm) + +{ +#if 0 + // this is for future use by a german thesaurus when one exists + bIsGermanPreReform = rHelper.IsGermanPreReform; +#endif +} + +Meaning::~Meaning() +{ +} + +OUString SAL_CALL Meaning::getMeaning() +{ + MutexGuard aGuard( GetLinguMutex() ); + return aTerm; +} + +Sequence< OUString > SAL_CALL Meaning::querySynonyms() +{ + MutexGuard aGuard( GetLinguMutex() ); + return aSyn; +} + +void Meaning::SetSynonyms( const Sequence< OUString > &rSyn ) +{ + MutexGuard aGuard( GetLinguMutex() ); + aSyn = rSyn; +} + +void Meaning::SetMeaning( const OUString &rTerm ) +{ + MutexGuard aGuard( GetLinguMutex() ); + aTerm = rTerm; +} + +} // namespace linguistic + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/lingucomponent/source/thesaurus/libnth/nthesdta.hxx b/lingucomponent/source/thesaurus/libnth/nthesdta.hxx new file mode 100644 index 000000000..8e6cb7561 --- /dev/null +++ b/lingucomponent/source/thesaurus/libnth/nthesdta.hxx @@ -0,0 +1,60 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#ifndef INCLUDED_LINGUCOMPONENT_SOURCE_THESAURUS_LIBNTH_NTHESDTA_HXX +#define INCLUDED_LINGUCOMPONENT_SOURCE_THESAURUS_LIBNTH_NTHESDTA_HXX + +#include <com/sun/star/linguistic2/XMeaning.hpp> +#include <cppuhelper/implbase.hxx> + +namespace linguistic +{ + +class Meaning : + public cppu::WeakImplHelper< css::linguistic2::XMeaning > +{ + css::uno::Sequence< OUString > aSyn; // list of synonyms, may be empty. + OUString aTerm; + +#if 0 + // this is for future use by a German thesaurus + sal_Bool bIsGermanPreReform; +#endif + + Meaning(const Meaning &) = delete; + Meaning & operator = (const Meaning &) = delete; + +public: + explicit Meaning(const OUString &rTerm); + virtual ~Meaning() override; + + // XMeaning + virtual OUString SAL_CALL getMeaning() override; + virtual css::uno::Sequence< OUString > SAL_CALL querySynonyms() override; + + // non-interface specific functions + void SetSynonyms( const css::uno::Sequence< OUString > &rSyn ); + void SetMeaning( const OUString &rTerm ); +}; + +} // namespace linguistic + +#endif + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/lingucomponent/source/thesaurus/libnth/nthesimp.cxx b/lingucomponent/source/thesaurus/libnth/nthesimp.cxx new file mode 100644 index 000000000..ea3e3af8d --- /dev/null +++ b/lingucomponent/source/thesaurus/libnth/nthesimp.cxx @@ -0,0 +1,571 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#include <com/sun/star/uno/Reference.h> +#include <cppuhelper/factory.hxx> +#include <cppuhelper/supportsservice.hxx> +#include <cppuhelper/weak.hxx> +#include <com/sun/star/linguistic2/LinguServiceManager.hpp> +#include <com/sun/star/linguistic2/XLinguProperties.hpp> +#include <com/sun/star/linguistic2/XSpellChecker1.hpp> +#include <i18nlangtag/languagetag.hxx> +#include <tools/debug.hxx> +#include <comphelper/lok.hxx> +#include <comphelper/processfactory.hxx> +#include <comphelper/sequence.hxx> +#include <osl/mutex.hxx> +#include <osl/thread.h> +#include <unotools/lingucfg.hxx> +#include <unotools/resmgr.hxx> + +#include <rtl/string.hxx> +#include <rtl/textenc.h> + +#include <svtools/strings.hrc> + +#include "nthesimp.hxx" +#include <linguistic/misc.hxx> +#include "nthesdta.hxx" + +#include <vector> +#include <numeric> +#include <set> +#include <string.h> + +// XML-header to query SPELLML support +constexpr OUStringLiteral SPELLML_SUPPORT = u"<?xml?>"; + +using namespace osl; +using namespace com::sun::star; +using namespace com::sun::star::beans; +using namespace com::sun::star::lang; +using namespace com::sun::star::uno; +using namespace com::sun::star::linguistic2; +using namespace linguistic; + +static uno::Reference< XLinguServiceManager2 > GetLngSvcMgr_Impl() +{ + uno::Reference< XComponentContext > xContext( comphelper::getProcessComponentContext() ); + uno::Reference< XLinguServiceManager2 > xRes = LinguServiceManager::create( xContext ) ; + return xRes; +} + +Thesaurus::Thesaurus() : + aEvtListeners ( GetLinguMutex() ), pPropHelper(nullptr), bDisposing(false), + prevLocale(LANGUAGE_DONTKNOW) +{ +} + +Thesaurus::~Thesaurus() +{ + mvThesInfo.clear(); + if (pPropHelper) + { + pPropHelper->RemoveAsPropListener(); + } +} + +PropertyHelper_Thesaurus& Thesaurus::GetPropHelper_Impl() +{ + if (!pPropHelper) + { + Reference< XLinguProperties > xPropSet = GetLinguProperties(); + + pPropHelper = new PropertyHelper_Thesaurus( static_cast<XThesaurus *>(this), xPropSet ); + pPropHelper->AddAsPropListener(); //! after a reference is established + } + return *pPropHelper; +} + +Sequence< Locale > SAL_CALL Thesaurus::getLocales() +{ + MutexGuard aGuard( GetLinguMutex() ); + + // this routine should return the locales supported by the installed + // dictionaries. + if (mvThesInfo.empty()) + { + SvtLinguConfig aLinguCfg; + + // get list of dictionaries-to-use + std::vector< SvtLinguConfigDictionaryEntry > aDics; + uno::Sequence< OUString > aFormatList; + aLinguCfg.GetSupportedDictionaryFormatsFor( "Thesauri", + "org.openoffice.lingu.new.Thesaurus", aFormatList ); + for (const auto& rFormat : std::as_const(aFormatList)) + { + std::vector< SvtLinguConfigDictionaryEntry > aTmpDic( + aLinguCfg.GetActiveDictionariesByFormat( rFormat ) ); + aDics.insert( aDics.end(), aTmpDic.begin(), aTmpDic.end() ); + } + + //!! for compatibility with old dictionaries (the ones not using extensions + //!! or new configuration entries, but still using the dictionary.lst file) + //!! Get the list of old style spell checking dictionaries to use... + std::vector< SvtLinguConfigDictionaryEntry > aOldStyleDics( + GetOldStyleDics( "THES" ) ); + + // to prefer dictionaries with configuration entries we will only + // use those old style dictionaries that add a language that + // is not yet supported by the list of new style dictionaries + MergeNewStyleDicsAndOldStyleDics( aDics, aOldStyleDics ); + + if (!aDics.empty()) + { + // get supported locales from the dictionaries-to-use... + std::set<OUString> aLocaleNamesSet; + for (auto const& dict : aDics) + { + for (const auto& rLocaleName : dict.aLocaleNames) + { + if (!comphelper::LibreOfficeKit::isAllowlistedLanguage(rLocaleName)) + continue; + + aLocaleNamesSet.insert( rLocaleName ); + } + } + // ... and add them to the resulting sequence + std::vector<Locale> aLocalesVec; + aLocalesVec.reserve(aLocaleNamesSet.size()); + + std::transform(aLocaleNamesSet.begin(), aLocaleNamesSet.end(), std::back_inserter(aLocalesVec), + [](const OUString& localeName) -> Locale { return LanguageTag::convertToLocale(localeName); }); + + aSuppLocales = comphelper::containerToSequence(aLocalesVec); + + //! For each dictionary and each locale we need a separate entry. + //! If this results in more than one dictionary per locale than (for now) + //! it is undefined which dictionary gets used. + //! In the future the implementation should support using several dictionaries + //! for one locale. + sal_Int32 numthes = std::accumulate(aDics.begin(), aDics.end(), 0, + [](const sal_Int32 nSum, const SvtLinguConfigDictionaryEntry& dict) { + return nSum + dict.aLocaleNames.getLength(); }); + + // add dictionary information + mvThesInfo.resize(numthes); + + sal_Int32 k = 0; + for (auto const& dict : aDics) + { + if (dict.aLocaleNames.hasElements() && + dict.aLocations.hasElements()) + { + // currently only one language per dictionary is supported in the actual implementation... + // Thus here we work-around this by adding the same dictionary several times. + // Once for each of its supported locales. + for (const auto& rLocaleName : dict.aLocaleNames) + { + LanguageTag aLanguageTag(rLocaleName); + mvThesInfo[k].aEncoding = RTL_TEXTENCODING_DONTKNOW; + mvThesInfo[k].aLocale = aLanguageTag.getLocale(); + mvThesInfo[k].aCharSetInfo.reset( new CharClass( std::move(aLanguageTag) ) ); + // also both files have to be in the same directory and the + // file names must only differ in the extension (.aff/.dic). + // Thus we use the first location only and strip the extension part. + OUString aLocation = dict.aLocations[0]; + sal_Int32 nPos = aLocation.lastIndexOf( '.' ); + aLocation = aLocation.copy( 0, nPos ); + mvThesInfo[k].aName = aLocation; + + ++k; + } + } + } + DBG_ASSERT( k == numthes, "index mismatch?" ); + } + else + { + /* no dictionary found so register no dictionaries */ + mvThesInfo.clear(); + aSuppLocales.realloc(0); + } + } + + return aSuppLocales; +} + +sal_Bool SAL_CALL Thesaurus::hasLocale(const Locale& rLocale) +{ + MutexGuard aGuard( GetLinguMutex() ); + + if (!aSuppLocales.hasElements()) + getLocales(); + + return comphelper::findValue(aSuppLocales, rLocale) != -1; +} + +Sequence < Reference < css::linguistic2::XMeaning > > SAL_CALL Thesaurus::queryMeanings( + const OUString& qTerm, const Locale& rLocale, + const css::uno::Sequence< css::beans::PropertyValue >& rProperties) +{ + MutexGuard aGuard( GetLinguMutex() ); + + uno::Sequence< Reference< XMeaning > > aMeanings( 1 ); + uno::Sequence< Reference< XMeaning > > noMeanings( 0 ); + uno::Reference< XLinguServiceManager2 > xLngSvcMgr( GetLngSvcMgr_Impl() ); + uno::Reference< XSpellChecker1 > xSpell; + + OUString aRTerm(qTerm); + OUString aPTerm(qTerm); + CapType ct = CapType::UNKNOWN; + sal_Int32 stem = 0; + sal_Int32 stem2 = 0; + + LanguageType nLanguage = LinguLocaleToLanguage( rLocale ); + + if (LinguIsUnspecified( nLanguage) || aRTerm.isEmpty()) + return noMeanings; + + if (!hasLocale( rLocale )) +#ifdef LINGU_EXCEPTIONS + throw( IllegalArgumentException() ); +#else + return noMeanings; +#endif + + if (prevTerm == qTerm && prevLocale == nLanguage) + return prevMeanings; + + mentry * pmean = nullptr; + sal_Int32 nmean = 0; + + PropertyHelper_Thesaurus &rHelper = GetPropHelper(); + rHelper.SetTmpPropVals( rProperties ); + + MyThes * pTH = nullptr; + rtl_TextEncoding eEnc = RTL_TEXTENCODING_DONTKNOW; + CharClass * pCC = nullptr; + + // find the first thesaurus that matches the locale + for (size_t i =0; i < mvThesInfo.size(); i++) + { + if (rLocale == mvThesInfo[i].aLocale) + { + // open up and initialize this thesaurus if need be + if (!mvThesInfo[i].aThes) + { + OUString datpath = mvThesInfo[i].aName + ".dat"; + OUString idxpath = mvThesInfo[i].aName + ".idx"; + OUString ndat; + OUString nidx; + osl::FileBase::getSystemPathFromFileURL(datpath,ndat); + osl::FileBase::getSystemPathFromFileURL(idxpath,nidx); + +#if defined(_WIN32) + // MyThes waits UTF-8 encoded paths with \\?\ long path prefix. + OString aTmpidx = Win_AddLongPathPrefix(OUStringToOString(nidx, RTL_TEXTENCODING_UTF8)); + OString aTmpdat = Win_AddLongPathPrefix(OUStringToOString(ndat, RTL_TEXTENCODING_UTF8)); +#else + OString aTmpidx(OU2ENC(nidx,osl_getThreadTextEncoding())); + OString aTmpdat(OU2ENC(ndat,osl_getThreadTextEncoding())); +#endif + + mvThesInfo[i].aThes.reset( new MyThes(aTmpidx.getStr(),aTmpdat.getStr()) ); + mvThesInfo[i].aEncoding = getTextEncodingFromCharset(mvThesInfo[i].aThes->get_th_encoding()); + } + pTH = mvThesInfo[i].aThes.get(); + eEnc = mvThesInfo[i].aEncoding; + pCC = mvThesInfo[i].aCharSetInfo.get(); + + if (pTH) + break; + } + } + + // we don't want to work with a default text encoding since following incorrect + // results may occur only for specific text and thus may be hard to notice. + // Thus better always make a clean exit here if the text encoding is in question. + // Hopefully something not working at all will raise proper attention quickly. ;-) + DBG_ASSERT( eEnc != RTL_TEXTENCODING_DONTKNOW, "failed to get text encoding! (maybe incorrect encoding string in file)" ); + if (eEnc == RTL_TEXTENCODING_DONTKNOW) + return noMeanings; + + while (pTH) + { + // convert word to all lower case for searching + if (!stem) + ct = capitalType(aRTerm, pCC); + OUString nTerm(makeLowerCase(aRTerm, pCC)); + OString aTmp( OU2ENC(nTerm, eEnc) ); + nmean = pTH->Lookup(aTmp.getStr(),aTmp.getLength(),&pmean); + + if (nmean) + aMeanings.realloc( nmean ); + + mentry * pe = pmean; + OUString codeTerm = qTerm; + Reference< XSpellAlternatives > xTmpRes2; + + if (stem) + { + xTmpRes2 = xSpell->spell( "<?xml?><query type='analyze'><word>" + + aPTerm + "</word></query>", static_cast<sal_uInt16>(nLanguage), rProperties ); + if (xTmpRes2.is()) + { + Sequence<OUString>seq = xTmpRes2->getAlternatives(); + if (seq.hasElements()) + { + codeTerm = seq[0]; + stem2 = 1; + } + } + } + + for (int j = 0; j < nmean; j++) + { + int count = pe->count; + if (count) + { + Sequence< OUString > aStr( count ); + OUString *pStr = aStr.getArray(); + + for (int i=0; i < count; i++) + { + OUString sTerm(pe->psyns[i],strlen(pe->psyns[i]),eEnc ); + sal_Int32 catpos = sTerm.indexOf('('); + OUString catst; + if (catpos > 2) + { + // remove category name for affixation and casing + catst = OUString::Concat(" ") + sTerm.subView(catpos); + sTerm = sTerm.copy(0, catpos); + sTerm = sTerm.trim(); + } + // generate synonyms with affixes + if (stem && stem2) + { + Reference< XSpellAlternatives > xTmpRes = xSpell->spell( "<?xml?><query type='generate'><word>" + + sTerm + "</word>" + codeTerm + "</query>", static_cast<sal_uInt16>(nLanguage), rProperties ); + if (xTmpRes.is()) + { + Sequence<OUString>seq = xTmpRes->getAlternatives(); + if (seq.hasElements()) + sTerm = seq[0]; + } + } + + CapType ct1 = capitalType(sTerm, pCC); + if (CapType::MIXED == ct1) + ct = ct1; + OUString cTerm; + switch (ct) + { + case CapType::ALLCAP: + cTerm = makeUpperCase(sTerm, pCC); + break; + case CapType::INITCAP: + cTerm = makeInitCap(sTerm, pCC); + break; + default: + cTerm = sTerm; + break; + } + OUString aAlt( cTerm + catst); + pStr[i] = aAlt; + } + rtl::Reference<Meaning> pMn = new Meaning(aRTerm); + OUString dTerm(pe->defn,strlen(pe->defn),eEnc ); + pMn->SetMeaning(dTerm); + pMn->SetSynonyms(aStr); + Reference<XMeaning>* pMeaning = aMeanings.getArray(); + pMeaning[j] = pMn; + } + pe++; + } + pTH->CleanUpAfterLookup(&pmean,nmean); + + if (nmean) + { + prevTerm = qTerm; + prevMeanings = aMeanings; + prevLocale = nLanguage; + return aMeanings; + } + + if (stem || !xLngSvcMgr.is()) + return noMeanings; + stem = 1; + + xSpell.set( xLngSvcMgr->getSpellChecker(), UNO_QUERY ); + if (!xSpell.is() || !xSpell->isValid( SPELLML_SUPPORT, static_cast<sal_uInt16>(nLanguage), rProperties )) + return noMeanings; + Reference< XSpellAlternatives > xTmpRes = xSpell->spell( "<?xml?><query type='stem'><word>" + + aRTerm + "</word></query>", static_cast<sal_uInt16>(nLanguage), rProperties ); + if (xTmpRes.is()) + { + Sequence<OUString>seq = xTmpRes->getAlternatives(); + if (seq.hasElements()) + { + aRTerm = seq[0]; // XXX Use only the first stem + continue; + } + } + + // stem the last word of the synonym (for categories after affixation) + aRTerm = aRTerm.trim(); + sal_Int32 pos = aRTerm.lastIndexOf(' '); + if (!pos) + return noMeanings; + xTmpRes = xSpell->spell( OUString::Concat("<?xml?><query type='stem'><word>") + + aRTerm.subView(pos + 1) + "</word></query>", static_cast<sal_uInt16>(nLanguage), rProperties ); + if (xTmpRes.is()) + { + Sequence<OUString>seq = xTmpRes->getAlternatives(); + if (seq.hasElements()) + { + aPTerm = aRTerm.copy(pos + 1); + aRTerm = aRTerm.subView(0, pos + 1) + seq[0]; +#if 0 + for (int i = 0; i < seq.getLength(); i++) + { + OString o = OUStringToOString(seq[i], RTL_TEXTENCODING_UTF8); + fprintf(stderr, "%d: %s\n", i + 1, o.pData->buffer); + } +#endif + continue; + } + } + break; + } + return noMeanings; +} + +OUString SAL_CALL Thesaurus::getServiceDisplayName(const Locale& rLocale) +{ + std::locale loc(Translate::Create("svt", LanguageTag(rLocale))); + return Translate::get(STR_DESCRIPTION_MYTHES, loc); +} + +void SAL_CALL Thesaurus::initialize( const Sequence< Any >& rArguments ) +{ + MutexGuard aGuard( GetLinguMutex() ); + + if (pPropHelper) + return; + + sal_Int32 nLen = rArguments.getLength(); + // Accept one of two args so we can be compatible with the call site in GetAvailLocales() + // linguistic module + if (1 == nLen || 2 == nLen) + { + Reference< XLinguProperties > xPropSet; + rArguments.getConstArray()[0] >>= xPropSet; + assert(xPropSet); + + //! Pointer allows for access of the non-UNO functions. + //! And the reference to the UNO-functions while increasing + //! the ref-count and will implicitly free the memory + //! when the object is no longer used. + pPropHelper = new PropertyHelper_Thesaurus( static_cast<XThesaurus *>(this), xPropSet ); + pPropHelper->AddAsPropListener(); //! after a reference is established + } + else + OSL_FAIL( "wrong number of arguments in sequence" ); +} + +OUString Thesaurus::makeLowerCase(const OUString& aTerm, CharClass const * pCC) +{ + if (pCC) + return pCC->lowercase(aTerm); + return aTerm; +} + +OUString Thesaurus::makeUpperCase(const OUString& aTerm, CharClass const * pCC) +{ + if (pCC) + return pCC->uppercase(aTerm); + return aTerm; +} + +OUString Thesaurus::makeInitCap(const OUString& aTerm, CharClass const * pCC) +{ + sal_Int32 tlen = aTerm.getLength(); + if (pCC && tlen) + { + OUString bTemp = aTerm.copy(0,1); + if (tlen > 1) + { + return ( pCC->uppercase(bTemp, 0, 1) + + pCC->lowercase(aTerm,1,(tlen-1)) ); + } + + return pCC->uppercase(bTemp, 0, 1); + } + return aTerm; +} + +void SAL_CALL Thesaurus::dispose() +{ + MutexGuard aGuard( GetLinguMutex() ); + + if (!bDisposing) + { + bDisposing = true; + EventObject aEvtObj( static_cast<XThesaurus *>(this) ); + aEvtListeners.disposeAndClear( aEvtObj ); + if (pPropHelper) + { + pPropHelper->RemoveAsPropListener(); + delete pPropHelper; + pPropHelper = nullptr; + } + } +} + +void SAL_CALL Thesaurus::addEventListener( const Reference< XEventListener >& rxListener ) +{ + MutexGuard aGuard( GetLinguMutex() ); + + if (!bDisposing && rxListener.is()) + aEvtListeners.addInterface( rxListener ); +} + +void SAL_CALL Thesaurus::removeEventListener( const Reference< XEventListener >& rxListener ) +{ + MutexGuard aGuard( GetLinguMutex() ); + + if (!bDisposing && rxListener.is()) + aEvtListeners.removeInterface( rxListener ); +} + +// Service specific part +OUString SAL_CALL Thesaurus::getImplementationName() +{ + return "org.openoffice.lingu.new.Thesaurus"; +} + +sal_Bool SAL_CALL Thesaurus::supportsService( const OUString& ServiceName ) +{ + return cppu::supportsService(this, ServiceName); +} + +Sequence< OUString > SAL_CALL Thesaurus::getSupportedServiceNames() +{ + return { SN_THESAURUS }; +} + +extern "C" SAL_DLLPUBLIC_EXPORT css::uno::XInterface* +lingucomponent_Thesaurus_get_implementation( + css::uno::XComponentContext* , css::uno::Sequence<css::uno::Any> const&) +{ + return cppu::acquire(new Thesaurus()); +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/lingucomponent/source/thesaurus/libnth/nthesimp.hxx b/lingucomponent/source/thesaurus/libnth/nthesimp.hxx new file mode 100644 index 000000000..04eab0688 --- /dev/null +++ b/lingucomponent/source/thesaurus/libnth/nthesimp.hxx @@ -0,0 +1,129 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#ifndef INCLUDED_LINGUCOMPONENT_SOURCE_THESAURUS_LIBNTH_NTHESIMP_HXX +#define INCLUDED_LINGUCOMPONENT_SOURCE_THESAURUS_LIBNTH_NTHESIMP_HXX + +#include <comphelper/interfacecontainer3.hxx> +#include <cppuhelper/implbase.hxx> +#include <com/sun/star/uno/Reference.h> +#include <com/sun/star/uno/Sequence.h> +#include <com/sun/star/lang/XComponent.hpp> +#include <com/sun/star/lang/XInitialization.hpp> +#include <com/sun/star/lang/XServiceDisplayName.hpp> +#include <com/sun/star/beans/XPropertySet.hpp> +#include <com/sun/star/beans/PropertyValues.hpp> + +#include <com/sun/star/lang/XServiceInfo.hpp> +#include <com/sun/star/linguistic2/XMeaning.hpp> +#include <com/sun/star/linguistic2/XThesaurus.hpp> + +#include <unotools/charclass.hxx> + +#include <lingutil.hxx> +#include <linguistic/misc.hxx> +#include <linguistic/lngprophelp.hxx> + +#include <osl/file.hxx> +#include <mythes.hxx> +#include <memory> +#include <vector> + +using namespace ::com::sun::star::uno; +using namespace ::com::sun::star::beans; +using namespace ::com::sun::star::lang; +using namespace ::com::sun::star::linguistic2; + +namespace com::sun::star::beans { class XPropertySet; } + +class Thesaurus : + public cppu::WeakImplHelper + < + XThesaurus, + XInitialization, + XComponent, + XServiceInfo, + XServiceDisplayName + > +{ + Sequence< Locale > aSuppLocales; + + ::comphelper::OInterfaceContainerHelper3<XEventListener> aEvtListeners; + linguistic::PropertyHelper_Thesaurus* pPropHelper; + bool bDisposing; + struct ThesInfo + { + std::unique_ptr<CharClass> aCharSetInfo; + std::unique_ptr<MyThes> aThes; + rtl_TextEncoding aEncoding; + Locale aLocale; + OUString aName; + }; + std::vector<ThesInfo> mvThesInfo; + + // cache for the Thesaurus dialog + Sequence < Reference < css::linguistic2::XMeaning > > prevMeanings; + OUString prevTerm; + LanguageType prevLocale; + + Thesaurus(const Thesaurus &) = delete; + Thesaurus & operator = (const Thesaurus &) = delete; + + linguistic::PropertyHelper_Thesaurus& GetPropHelper_Impl(); + linguistic::PropertyHelper_Thesaurus& GetPropHelper() + { + return pPropHelper ? *pPropHelper : GetPropHelper_Impl(); + } + +public: + Thesaurus(); + virtual ~Thesaurus() override; + + // XSupportedLocales (for XThesaurus) + virtual Sequence< Locale > SAL_CALL getLocales() override; + virtual sal_Bool SAL_CALL hasLocale( const Locale& rLocale ) override; + + // XThesaurus + virtual Sequence< Reference < css::linguistic2::XMeaning > > SAL_CALL queryMeanings( const OUString& rTerm, const Locale& rLocale, const css::uno::Sequence< css::beans::PropertyValue >& rProperties ) override; + + // XServiceDisplayName + virtual OUString SAL_CALL getServiceDisplayName( const Locale& rLocale ) override; + + // XInitialization + virtual void SAL_CALL initialize( const Sequence< Any >& rArguments ) override; + + // XComponent + virtual void SAL_CALL dispose() override; + virtual void SAL_CALL addEventListener( const Reference< XEventListener >& rxListener ) override; + virtual void SAL_CALL removeEventListener( const Reference< XEventListener >& rxListener ) override; + + // XServiceInfo + virtual OUString SAL_CALL getImplementationName() override; + virtual sal_Bool SAL_CALL supportsService( const OUString& rServiceName ) override; + virtual Sequence< OUString > SAL_CALL getSupportedServiceNames() override; + +private: + static OUString makeLowerCase(const OUString&, CharClass const *); + static OUString makeUpperCase(const OUString&, CharClass const *); + static OUString makeInitCap(const OUString&, CharClass const *); +}; + +#endif + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ |