diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-15 05:54:39 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-15 05:54:39 +0000 |
commit | 267c6f2ac71f92999e969232431ba04678e7437e (patch) | |
tree | 358c9467650e1d0a1d7227a21dac2e3d08b622b2 /lingucomponent | |
parent | Initial commit. (diff) | |
download | libreoffice-267c6f2ac71f92999e969232431ba04678e7437e.tar.xz libreoffice-267c6f2ac71f92999e969232431ba04678e7437e.zip |
Adding upstream version 4:24.2.0.upstream/4%24.2.0
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'lingucomponent')
43 files changed, 6000 insertions, 0 deletions
diff --git a/lingucomponent/IwyuFilter_lingucomponent.yaml b/lingucomponent/IwyuFilter_lingucomponent.yaml new file mode 100644 index 0000000000..ebea8246cc --- /dev/null +++ b/lingucomponent/IwyuFilter_lingucomponent.yaml @@ -0,0 +1,20 @@ +--- +assumeFilename: lingucomponent/source/spellcheck/spell/sspellimp.cxx +excludelist: + lingucomponent/source/lingutil/lingutil.hxx: + # Needed on WIN + - rtl/string.hxx + lingucomponent/source/languageguessing/guesslang.cxx: + # Needed for code protected by EXTTEXTCAT_VERSION_MAJOR + - libexttextcat/textcat.h + - tools/debug.hxx + - sal/macros.h + lingucomponent/source/numbertext/numbertext.cxx: + # Needed on WIN + - o3tl/char16_t2wchar_t.hxx + lingucomponent/source/spellcheck/spell/sspellimp.cxx: + # Needed for SpellFailure::SPELLING_ERROR + - com/sun/star/linguistic2/SpellFailure.hpp + lingucomponent/source/hyphenator/hyphen/hyphenimp.cxx: + # Actually used + - com/sun/star/linguistic2/XLinguProperties.hpp diff --git a/lingucomponent/Library_LanguageTool.mk b/lingucomponent/Library_LanguageTool.mk new file mode 100644 index 0000000000..91907fe2b5 --- /dev/null +++ b/lingucomponent/Library_LanguageTool.mk @@ -0,0 +1,52 @@ +# -*- Mode: makefile-gmake; tab-width: 4; indent-tabs-mode: t -*- +# +# +# This file is part of the LibreOffice project. +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# + +$(eval $(call gb_Library_Library,LanguageTool)) + +$(eval $(call gb_Library_set_componentfile,LanguageTool,lingucomponent/source/spellcheck/languagetool/LanguageTool,services)) + +$(eval $(call gb_Library_set_include,LanguageTool,\ + $$(INCLUDE) \ + -I$(SRCDIR)/lingucomponent/source/lingutil \ +)) + +$(eval $(call gb_Library_use_sdk_api,LanguageTool)) + +$(eval $(call gb_Library_use_libraries,LanguageTool,\ + comphelper \ + cppu \ + cppuhelper \ + i18nlangtag \ + svt \ + lng \ + sal \ + tl \ + utl \ +)) + +$(eval $(call gb_Library_use_static_libraries,LanguageTool,\ + ulingu \ +)) + +$(eval $(call gb_Library_use_externals,LanguageTool,\ + boost_headers \ + icuuc \ + curl \ +)) + +$(eval $(call gb_Library_use_custom_headers,LanguageTool,\ + officecfg/registry \ +)) + +$(eval $(call gb_Library_add_exception_objects,LanguageTool,\ + lingucomponent/source/spellcheck/languagetool/languagetoolimp \ +)) + +# vim: set noet sw=4 ts=4: diff --git a/lingucomponent/Library_MacOSXSpell.mk b/lingucomponent/Library_MacOSXSpell.mk new file mode 100644 index 0000000000..df77686d1c --- /dev/null +++ b/lingucomponent/Library_MacOSXSpell.mk @@ -0,0 +1,47 @@ +# -*- Mode: makefile-gmake; tab-width: 4; indent-tabs-mode: t -*- +# +# +# This file is part of the LibreOffice project. +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# + +$(eval $(call gb_Library_Library,MacOSXSpell)) + +$(eval $(call gb_Library_set_componentfile,MacOSXSpell,lingucomponent/source/spellcheck/macosxspell/MacOSXSpell,services)) + +$(eval $(call gb_Library_set_include,MacOSXSpell,\ + $$(INCLUDE) \ + -I$(SRCDIR)/lingucomponent/source/lingutil \ +)) + +$(eval $(call gb_Library_use_sdk_api,MacOSXSpell)) + +$(eval $(call gb_Library_use_libraries,MacOSXSpell,\ + comphelper \ + cppu \ + cppuhelper \ + lng \ + sal \ + i18nlangtag \ + svl \ + tl \ + ucbhelper \ + utl \ +)) + +$(eval $(call gb_Library_use_system_darwin_frameworks,MacOSXSpell,\ + Cocoa \ +)) + +$(eval $(call gb_Library_use_externals,MacOSXSpell,\ + boost_headers \ +)) + +$(eval $(call gb_Library_add_objcxxobjects,MacOSXSpell,\ + lingucomponent/source/spellcheck/macosxspell/macspellimp \ +)) + +# vim: set noet sw=4 ts=4: diff --git a/lingucomponent/Library_guesslang.mk b/lingucomponent/Library_guesslang.mk new file mode 100644 index 0000000000..a700ed545b --- /dev/null +++ b/lingucomponent/Library_guesslang.mk @@ -0,0 +1,41 @@ +# -*- Mode: makefile-gmake; tab-width: 4; indent-tabs-mode: t -*- +# +# +# This file is part of the LibreOffice project. +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# + +$(eval $(call gb_Library_Library,guesslang)) + +$(eval $(call gb_Library_set_componentfile,guesslang,lingucomponent/source/languageguessing/guesslang,services)) + +ifneq ($(ENABLE_WASM_STRIP_GUESSLANG),TRUE) +$(eval $(call gb_Library_use_externals,guesslang,\ + libexttextcat \ +)) +endif + +$(eval $(call gb_Library_use_externals,guesslang,\ + boost_headers \ +)) + +$(eval $(call gb_Library_use_sdk_api,guesslang)) + +$(eval $(call gb_Library_use_libraries,guesslang,\ + cppu \ + cppuhelper \ + sal \ + tl \ + utl \ +)) + +$(eval $(call gb_Library_add_exception_objects,guesslang,\ + lingucomponent/source/languageguessing/guess \ + lingucomponent/source/languageguessing/guesslang \ + lingucomponent/source/languageguessing/simpleguesser \ +)) + +# vim: set noet sw=4 ts=4: diff --git a/lingucomponent/Library_hyphen.mk b/lingucomponent/Library_hyphen.mk new file mode 100644 index 0000000000..dd172e080e --- /dev/null +++ b/lingucomponent/Library_hyphen.mk @@ -0,0 +1,47 @@ +# -*- Mode: makefile-gmake; tab-width: 4; indent-tabs-mode: t -*- +# +# +# This file is part of the LibreOffice project. +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# + +$(eval $(call gb_Library_Library,hyphen)) + +$(eval $(call gb_Library_set_componentfile,hyphen,lingucomponent/source/hyphenator/hyphen/hyphen,services)) + +$(eval $(call gb_Library_set_include,hyphen,\ + $$(INCLUDE) \ + -I$(SRCDIR)/lingucomponent/source/lingutil \ +)) + +$(eval $(call gb_Library_use_sdk_api,hyphen)) + +$(eval $(call gb_Library_use_libraries,hyphen,\ + comphelper \ + cppu \ + cppuhelper \ + i18nlangtag \ + lng \ + sal \ + tl \ + utl \ +)) + +$(eval $(call gb_Library_use_static_libraries,hyphen,\ + ulingu \ +)) + +$(eval $(call gb_Library_use_externals,hyphen,\ + boost_headers \ + hunspell \ + hyphen \ +)) + +$(eval $(call gb_Library_add_exception_objects,hyphen,\ + lingucomponent/source/hyphenator/hyphen/hyphenimp \ +)) + +# vim: set noet sw=4 ts=4: diff --git a/lingucomponent/Library_lnth.mk b/lingucomponent/Library_lnth.mk new file mode 100644 index 0000000000..00d16790e2 --- /dev/null +++ b/lingucomponent/Library_lnth.mk @@ -0,0 +1,48 @@ +# -*- Mode: makefile-gmake; tab-width: 4; indent-tabs-mode: t -*- +# +# +# This file is part of the LibreOffice project. +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# + +$(eval $(call gb_Library_Library,lnth)) + +$(eval $(call gb_Library_set_componentfile,lnth,lingucomponent/source/thesaurus/libnth/lnth,services)) + +$(eval $(call gb_Library_set_include,lnth,\ + $$(INCLUDE) \ + -I$(SRCDIR)/lingucomponent/source/lingutil \ +)) + +$(eval $(call gb_Library_use_sdk_api,lnth)) + +$(eval $(call gb_Library_use_libraries,lnth,\ + comphelper \ + cppu \ + cppuhelper \ + i18nlangtag \ + lng \ + sal \ + tl \ + utl \ +)) + +$(eval $(call gb_Library_use_static_libraries,lnth,\ + ulingu \ +)) + +$(eval $(call gb_Library_use_externals,lnth,\ + boost_headers \ + hunspell \ + mythes \ +)) + +$(eval $(call gb_Library_add_exception_objects,lnth,\ + lingucomponent/source/thesaurus/libnth/nthesdta \ + lingucomponent/source/thesaurus/libnth/nthesimp \ +)) + +# vim: set noet sw=4 ts=4: diff --git a/lingucomponent/Library_numbertext.mk b/lingucomponent/Library_numbertext.mk new file mode 100644 index 0000000000..6cb6ba10a5 --- /dev/null +++ b/lingucomponent/Library_numbertext.mk @@ -0,0 +1,34 @@ +# -*- Mode: makefile-gmake; tab-width: 4; indent-tabs-mode: t -*- +# +# +# This file is part of the LibreOffice project. +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# + +$(eval $(call gb_Library_Library,numbertext)) + +$(eval $(call gb_Library_set_componentfile,numbertext,lingucomponent/source/numbertext/numbertext,services)) + +$(eval $(call gb_Library_use_externals,numbertext,\ + libnumbertext \ +)) + +$(eval $(call gb_Library_use_sdk_api,numbertext)) + +$(eval $(call gb_Library_use_libraries,numbertext,\ + cppu \ + cppuhelper \ + i18nlangtag \ + sal \ + tl \ + utl \ +)) + +$(eval $(call gb_Library_add_exception_objects,numbertext,\ + lingucomponent/source/numbertext/numbertext \ +)) + +# vim: set noet sw=4 ts=4: diff --git a/lingucomponent/Library_spell.mk b/lingucomponent/Library_spell.mk new file mode 100644 index 0000000000..c00ea8d906 --- /dev/null +++ b/lingucomponent/Library_spell.mk @@ -0,0 +1,47 @@ +# -*- Mode: makefile-gmake; tab-width: 4; indent-tabs-mode: t -*- +# +# +# This file is part of the LibreOffice project. +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# + +$(eval $(call gb_Library_Library,spell)) + +$(eval $(call gb_Library_set_componentfile,spell,lingucomponent/source/spellcheck/spell/spell,services)) + +$(eval $(call gb_Library_set_include,spell,\ + $$(INCLUDE) \ + -I$(SRCDIR)/lingucomponent/source/lingutil \ +)) + +$(eval $(call gb_Library_use_sdk_api,spell)) + +$(eval $(call gb_Library_use_libraries,spell,\ + comphelper \ + cppu \ + cppuhelper \ + i18nlangtag \ + lng \ + sal \ + tl \ + utl \ +)) + +$(eval $(call gb_Library_use_static_libraries,spell,\ + ulingu \ +)) + +$(eval $(call gb_Library_use_externals,spell,\ + boost_headers \ + hunspell \ + icuuc \ +)) + +$(eval $(call gb_Library_add_exception_objects,spell,\ + lingucomponent/source/spellcheck/spell/sspellimp \ +)) + +# vim: set noet sw=4 ts=4: diff --git a/lingucomponent/Makefile b/lingucomponent/Makefile new file mode 100644 index 0000000000..0997e62848 --- /dev/null +++ b/lingucomponent/Makefile @@ -0,0 +1,14 @@ +# -*- Mode: makefile-gmake; tab-width: 4; indent-tabs-mode: t -*- +# +# This file is part of the LibreOffice project. +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# + +module_directory:=$(dir $(realpath $(firstword $(MAKEFILE_LIST)))) + +include $(module_directory)/../solenv/gbuild/partial_build.mk + +# vim: set noet sw=4 ts=4: diff --git a/lingucomponent/Module_lingucomponent.mk b/lingucomponent/Module_lingucomponent.mk new file mode 100644 index 0000000000..ea31defcf5 --- /dev/null +++ b/lingucomponent/Module_lingucomponent.mk @@ -0,0 +1,41 @@ +# -*- Mode: makefile-gmake; tab-width: 4; indent-tabs-mode: t -*- +# +# +# This file is part of the LibreOffice project. +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# + +$(eval $(call gb_Module_Module,lingucomponent)) + +ifneq ($(ENABLE_WASM_STRIP_GUESSLANG),TRUE) +$(eval $(call gb_Module_add_targets,lingucomponent,\ + Library_guesslang \ +)) +endif + +ifneq ($(ENABLE_WASM_STRIP_HUNSPELL),TRUE) +$(eval $(call gb_Module_add_targets,lingucomponent,\ + Library_hyphen \ + Library_lnth \ + Library_spell \ + StaticLibrary_ulingu \ +)) +endif + +ifeq ($(ENABLE_CURL),TRUE) +ifneq ($(ENABLE_WASM_STRIP_LANGUAGETOOL),TRUE) +$(eval $(call gb_Module_add_targets,lingucomponent,\ + Library_LanguageTool \ +)) +endif +endif + +$(eval $(call gb_Module_add_targets,lingucomponent,\ + $(if $(filter iOS MACOSX,$(OS)),Library_MacOSXSpell) \ + Library_numbertext \ +)) + +# vim: set noet sw=4 ts=4: diff --git a/lingucomponent/README.md b/lingucomponent/README.md new file mode 100644 index 0000000000..6db4086ac7 --- /dev/null +++ b/lingucomponent/README.md @@ -0,0 +1,3 @@ +# Linguistics Components + +`lingucomponent` contains spellcheck, hyphenator, thesaurus, etc. diff --git a/lingucomponent/StaticLibrary_ulingu.mk b/lingucomponent/StaticLibrary_ulingu.mk new file mode 100644 index 0000000000..1bee43eeb1 --- /dev/null +++ b/lingucomponent/StaticLibrary_ulingu.mk @@ -0,0 +1,34 @@ +# -*- Mode: makefile-gmake; tab-width: 4; indent-tabs-mode: t -*- +# +# +# This file is part of the LibreOffice project. +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# + +$(eval $(call gb_StaticLibrary_StaticLibrary,ulingu)) + +$(eval $(call gb_StaticLibrary_use_api,ulingu,\ + udkapi \ + offapi \ +)) + +ifneq ($(SYSTEM_DICTS),) +$(eval $(call gb_StaticLibrary_add_defs,ulingu,\ + -DSYSTEM_DICTS -DDICT_SYSTEM_DIR=\"$(DICT_SYSTEM_DIR)\" -DHYPH_SYSTEM_DIR=\"$(HYPH_SYSTEM_DIR)\" -DTHES_SYSTEM_DIR=\"$(THES_SYSTEM_DIR)\" \ +)) + +endif + +$(eval $(call gb_StaticLibrary_use_externals,ulingu,\ + boost_headers \ + hunspell \ +)) + +$(eval $(call gb_StaticLibrary_add_exception_objects,ulingu,\ + lingucomponent/source/lingutil/lingutil \ +)) + +# vim: set noet sw=4 ts=4: diff --git a/lingucomponent/config/Linguistic-lingucomponent-grammarchecker.xcu b/lingucomponent/config/Linguistic-lingucomponent-grammarchecker.xcu new file mode 100644 index 0000000000..6fa455d969 --- /dev/null +++ b/lingucomponent/config/Linguistic-lingucomponent-grammarchecker.xcu @@ -0,0 +1,30 @@ +<?xml version='1.0' encoding='UTF-8'?> +<!-- + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . +--> +<oor:component-data oor:name="Linguistic" oor:package="org.openoffice.Office" xmlns:install="http://openoffice.org/2004/installation" xmlns:oor="http://openoffice.org/2001/registry" xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"> + <node oor:name="ServiceManager"> + <node oor:name="GrammarCheckers"> + <node oor:name="org.openoffice.lingu.LanguageToolGrammarChecker" oor:op="fuse"> + <prop oor:name="Locales" oor:type="oor:string-list"> + <value>ar ast-ES be-BY br-FR ca-ES ca-ES-valencia zh-CN da-DK nl nl-BE en en-AU en-CA en-GB en-NZ en-ZA en-US fr gl-ES de de-AT de-DE de-CH el-GR ga-IE it ja-JP km-KH nb no fa pl-PL pt pt-AO pt-BR pt-MZ pt-PT ro-RO ru-RU de-DE-x-simple-language sk-SK sl-SI es es-AR sv tl-PH ta-IN uk-UA</value> + </prop> + </node> + </node> + </node> +</oor:component-data> + diff --git a/lingucomponent/config/Linguistic-lingucomponent-hyphenator.xcu b/lingucomponent/config/Linguistic-lingucomponent-hyphenator.xcu new file mode 100644 index 0000000000..c42f16313e --- /dev/null +++ b/lingucomponent/config/Linguistic-lingucomponent-hyphenator.xcu @@ -0,0 +1,30 @@ +<?xml version='1.0' encoding='UTF-8'?> +<!-- + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . +--> +<oor:component-data oor:name="Linguistic" oor:package="org.openoffice.Office" xmlns:install="http://openoffice.org/2004/installation" xmlns:oor="http://openoffice.org/2001/registry" xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"> + <node oor:name="ServiceManager"> + <node oor:name="Hyphenators"> + <node oor:name="org.openoffice.lingu.LibHnjHyphenator" oor:op="fuse"> + <prop oor:name="SupportedDictionaryFormats" oor:type="oor:string-list"> + <value>DICT_HYPH</value> + </prop> + </node> + </node> + </node> +</oor:component-data> + diff --git a/lingucomponent/config/Linguistic-lingucomponent-spellchecker.xcu b/lingucomponent/config/Linguistic-lingucomponent-spellchecker.xcu new file mode 100644 index 0000000000..8f35b44e5f --- /dev/null +++ b/lingucomponent/config/Linguistic-lingucomponent-spellchecker.xcu @@ -0,0 +1,30 @@ +<?xml version='1.0' encoding='UTF-8'?> +<!-- + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . +--> +<oor:component-data oor:name="Linguistic" oor:package="org.openoffice.Office" xmlns:install="http://openoffice.org/2004/installation" xmlns:oor="http://openoffice.org/2001/registry" xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"> + <node oor:name="ServiceManager"> + <node oor:name="SpellCheckers"> + <node oor:name="org.openoffice.lingu.MySpellSpellChecker" oor:op="fuse"> + <prop oor:name="SupportedDictionaryFormats" oor:type="oor:string-list"> + <value>DICT_SPELL</value> + </prop> + </node> + </node> + </node> +</oor:component-data> + diff --git a/lingucomponent/config/Linguistic-lingucomponent-thesaurus.xcu b/lingucomponent/config/Linguistic-lingucomponent-thesaurus.xcu new file mode 100644 index 0000000000..e228e65561 --- /dev/null +++ b/lingucomponent/config/Linguistic-lingucomponent-thesaurus.xcu @@ -0,0 +1,30 @@ +<?xml version='1.0' encoding='UTF-8'?> +<!-- + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . +--> +<oor:component-data oor:name="Linguistic" oor:package="org.openoffice.Office" xmlns:install="http://openoffice.org/2004/installation" xmlns:oor="http://openoffice.org/2001/registry" xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"> + <node oor:name="ServiceManager"> + <node oor:name="Thesauri"> + <node oor:name="org.openoffice.lingu.new.Thesaurus" oor:op="fuse"> + <prop oor:name="SupportedDictionaryFormats" oor:type="oor:string-list"> + <value>DICT_THES</value> + </prop> + </node> + </node> + </node> +</oor:component-data> + diff --git a/lingucomponent/source/hyphenator/hyphen/hyphen.component b/lingucomponent/source/hyphenator/hyphen/hyphen.component new file mode 100644 index 0000000000..b9bc8b1f36 --- /dev/null +++ b/lingucomponent/source/hyphenator/hyphen/hyphen.component @@ -0,0 +1,26 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!-- + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + --> + +<component loader="com.sun.star.loader.SharedLibrary" environment="@CPPU_ENV@" + xmlns="http://openoffice.org/2010/uno-components"> + <implementation name="org.openoffice.lingu.LibHnjHyphenator" + constructor="lingucomponent_Hyphenator_get_implementation" single-instance="true"> + <service name="com.sun.star.linguistic2.Hyphenator"/> + </implementation> +</component> diff --git a/lingucomponent/source/hyphenator/hyphen/hyphenimp.cxx b/lingucomponent/source/hyphenator/hyphen/hyphenimp.cxx new file mode 100644 index 0000000000..8ac156ef8c --- /dev/null +++ b/lingucomponent/source/hyphenator/hyphen/hyphenimp.cxx @@ -0,0 +1,805 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#include <com/sun/star/uno/Reference.h> + +#include <comphelper/sequence.hxx> +#include <cppuhelper/factory.hxx> +#include <cppuhelper/supportsservice.hxx> +#include <cppuhelper/weak.hxx> +#include <com/sun/star/linguistic2/XLinguProperties.hpp> +#include <i18nlangtag/languagetag.hxx> +#include <tools/debug.hxx> +#include <osl/mutex.hxx> +#include <osl/thread.h> + +#include <hyphen.h> +#include "hyphenimp.hxx" + +#include <linguistic/hyphdta.hxx> +#include <rtl/ustring.hxx> +#include <rtl/ustrbuf.hxx> +#include <rtl/textenc.h> +#include <sal/log.hxx> + +#include <linguistic/misc.hxx> +#include <svtools/strings.hrc> +#include <unotools/charclass.hxx> +#include <unotools/lingucfg.hxx> +#include <unotools/resmgr.hxx> +#include <osl/file.hxx> + +#include <stdio.h> +#include <string.h> + +#include <cassert> +#include <numeric> +#include <vector> +#include <set> +#include <memory> + +using namespace utl; +using namespace osl; +using namespace com::sun::star; +using namespace com::sun::star::beans; +using namespace com::sun::star::lang; +using namespace com::sun::star::uno; +using namespace com::sun::star::linguistic2; +using namespace linguistic; + +Hyphenator::Hyphenator() : + aEvtListeners ( GetLinguMutex() ) +{ + bDisposing = false; +} + +Hyphenator::~Hyphenator() +{ + for (auto & rInfo : mvDicts) + { + if (rInfo.aPtr) + hnj_hyphen_free(rInfo.aPtr); + } + + if (pPropHelper) + { + pPropHelper->RemoveAsPropListener(); + } +} + +PropertyHelper_Hyphenation& Hyphenator::GetPropHelper_Impl() +{ + if (!pPropHelper) + { + Reference< XLinguProperties > xPropSet = GetLinguProperties(); + + pPropHelper.reset( new PropertyHelper_Hyphenation (static_cast<XHyphenator *>(this), xPropSet ) ); + pPropHelper->AddAsPropListener(); //! after a reference is established + } + return *pPropHelper; +} + +Sequence< Locale > SAL_CALL Hyphenator::getLocales() +{ + MutexGuard aGuard( GetLinguMutex() ); + + // this routine should return the locales supported by the installed + // dictionaries. + if (mvDicts.empty()) + { + SvtLinguConfig aLinguCfg; + + // get list of dictionaries-to-use + // (or better speaking: the list of dictionaries using the + // new configuration entries). + std::vector< SvtLinguConfigDictionaryEntry > aDics; + uno::Sequence< OUString > aFormatList; + aLinguCfg.GetSupportedDictionaryFormatsFor( "Hyphenators", + "org.openoffice.lingu.LibHnjHyphenator", aFormatList ); + for (const auto& rFormat : std::as_const(aFormatList)) + { + std::vector< SvtLinguConfigDictionaryEntry > aTmpDic( + aLinguCfg.GetActiveDictionariesByFormat( rFormat ) ); + aDics.insert( aDics.end(), aTmpDic.begin(), aTmpDic.end() ); + } + + //!! for compatibility with old dictionaries (the ones not using extensions + //!! or new configuration entries, but still using the dictionary.lst file) + //!! Get the list of old style spell checking dictionaries to use... + std::vector< SvtLinguConfigDictionaryEntry > aOldStyleDics( + GetOldStyleDics( "HYPH" ) ); + + // to prefer dictionaries with configuration entries we will only + // use those old style dictionaries that add a language that + // is not yet supported by the list of new style dictionaries + MergeNewStyleDicsAndOldStyleDics( aDics, aOldStyleDics ); + + if (!aDics.empty()) + { + // get supported locales from the dictionaries-to-use... + std::set<OUString> aLocaleNamesSet; + for (auto const& dict : aDics) + { + for (const auto& rLocaleName : dict.aLocaleNames) + { + aLocaleNamesSet.insert( rLocaleName ); + } + } + // ... and add them to the resulting sequence + std::vector<Locale> aLocalesVec; + aLocalesVec.reserve(aLocaleNamesSet.size()); + + std::transform(aLocaleNamesSet.begin(), aLocaleNamesSet.end(), std::back_inserter(aLocalesVec), + [](const OUString& localeName) { return LanguageTag::convertToLocale(localeName); }); + + aSuppLocales = comphelper::containerToSequence(aLocalesVec); + + //! For each dictionary and each locale we need a separate entry. + //! If this results in more than one dictionary per locale than (for now) + //! it is undefined which dictionary gets used. + //! In the future the implementation should support using several dictionaries + //! for one locale. + sal_Int32 numdict = std::accumulate(aDics.begin(), aDics.end(), 0, + [](const sal_Int32 nSum, const SvtLinguConfigDictionaryEntry& dict) { + return nSum + dict.aLocaleNames.getLength(); }); + + // add dictionary information + mvDicts.resize(numdict); + + sal_Int32 k = 0; + for (auto const& dict : aDics) + { + if (dict.aLocaleNames.hasElements() && + dict.aLocations.hasElements()) + { + // currently only one language per dictionary is supported in the actual implementation... + // Thus here we work-around this by adding the same dictionary several times. + // Once for each of its supported locales. + for (const auto& rLocaleName : dict.aLocaleNames) + { + LanguageTag aLanguageTag(rLocaleName); + mvDicts[k].aPtr = nullptr; + mvDicts[k].eEnc = RTL_TEXTENCODING_DONTKNOW; + mvDicts[k].aLoc = aLanguageTag.getLocale(); + mvDicts[k].apCC.reset( new CharClass( std::move(aLanguageTag) ) ); + // also both files have to be in the same directory and the + // file names must only differ in the extension (.aff/.dic). + // Thus we use the first location only and strip the extension part. + OUString aLocation = dict.aLocations[0]; + sal_Int32 nPos = aLocation.lastIndexOf( '.' ); + aLocation = aLocation.copy( 0, nPos ); + mvDicts[k].aName = aLocation; + + ++k; + } + } + } + DBG_ASSERT( k == numdict, "index mismatch?" ); + } + else + { + // no dictionary found so register no dictionaries + mvDicts.clear(); + aSuppLocales.realloc(0); + } + } + + return aSuppLocales; +} + +sal_Bool SAL_CALL Hyphenator::hasLocale(const Locale& rLocale) +{ + MutexGuard aGuard( GetLinguMutex() ); + + if (!aSuppLocales.hasElements()) + getLocales(); + + return comphelper::findValue(aSuppLocales, rLocale) != -1; +} + +namespace { +bool LoadDictionary(HDInfo& rDict) +{ + OUString DictFN = rDict.aName + ".dic"; + OUString dictpath; + + osl::FileBase::getSystemPathFromFileURL(DictFN, dictpath); + +#if defined(_WIN32) + // hnj_hyphen_load expects UTF-8 encoded paths with \\?\ long path prefix. + OString sTmp = Win_AddLongPathPrefix(OUStringToOString(dictpath, RTL_TEXTENCODING_UTF8)); +#else + OString sTmp(OU2ENC(dictpath, osl_getThreadTextEncoding())); +#endif + HyphenDict *dict = nullptr; + if ((dict = hnj_hyphen_load(sTmp.getStr())) == nullptr) + { + SAL_WARN( + "lingucomponent", + "Couldn't find file " << dictpath); + return false; + } + rDict.aPtr = dict; + rDict.eEnc = getTextEncodingFromCharset(dict->cset); + return true; +} +} + +Reference< XHyphenatedWord > SAL_CALL Hyphenator::hyphenate( const OUString& aWord, + const css::lang::Locale& aLocale, + sal_Int16 nMaxLeading, + const css::uno::Sequence< css::beans::PropertyValue >& aProperties ) +{ + PropertyHelper_Hyphenation& rHelper = GetPropHelper(); + rHelper.SetTmpPropVals(aProperties); + sal_Int16 minTrail = rHelper.GetMinTrailing(); + sal_Int16 minLead = rHelper.GetMinLeading(); + sal_Int16 minLen = rHelper.GetMinWordLength(); + bool bNoHyphenateCaps = rHelper.IsNoHyphenateCaps(); + + rtl_TextEncoding eEnc = RTL_TEXTENCODING_DONTKNOW; + + Reference< XHyphenatedWord > xRes; + + int k = -1; + for (size_t j = 0; j < mvDicts.size(); ++j) + { + if (aLocale == mvDicts[j].aLoc) + k = j; + } + + // if we have a hyphenation dictionary matching this locale + if (k != -1) + { + int nHyphenationPos = -1; + int nHyphenationPosAlt = -1; + int nHyphenationPosAltHyph = -1; + + // if this dictionary has not been loaded yet do that + if (!mvDicts[k].aPtr) + { + if (!LoadDictionary(mvDicts[k])) + return nullptr; + } + + // otherwise hyphenate the word with that dictionary + HyphenDict *dict = mvDicts[k].aPtr; + eEnc = mvDicts[k].eEnc; + CharClass * pCC = mvDicts[k].apCC.get(); + + // Don't hyphenate uppercase words if requested + if (bNoHyphenateCaps && aWord == makeUpperCase(aWord, pCC)) + { + return nullptr; + } + + // we don't want to work with a default text encoding since following incorrect + // results may occur only for specific text and thus may be hard to notice. + // Thus better always make a clean exit here if the text encoding is in question. + // Hopefully something not working at all will raise proper attention quickly. ;-) + DBG_ASSERT( eEnc != RTL_TEXTENCODING_DONTKNOW, "failed to get text encoding! (maybe incorrect encoding string in file)" ); + if (eEnc == RTL_TEXTENCODING_DONTKNOW) + return nullptr; + + CapType ct = capitalType(aWord, pCC); + + // first convert any smart quotes or apostrophes to normal ones + OUStringBuffer rBuf(aWord); + sal_Int32 nc = rBuf.getLength(); + sal_Unicode ch; + for (sal_Int32 ix=0; ix < nc; ix++) + { + ch = rBuf[ix]; + if ((ch == 0x201C) || (ch == 0x201D)) + rBuf[ix] = u'"'; + if ((ch == 0x2018) || (ch == 0x2019)) + rBuf[ix] = u'\''; + } + OUString nWord(rBuf.makeStringAndClear()); + + // now convert word to all lowercase for pattern recognition + OUString nTerm(makeLowerCase(nWord, pCC)); + + // now convert word to needed encoding + OString encWord(OU2ENC(nTerm,eEnc)); + + int wordlen = encWord.getLength(); + std::unique_ptr<char[]> lcword(new char[wordlen + 1]); + std::unique_ptr<char[]> hyphens(new char[wordlen + 5]); + + char ** rep = nullptr; // replacements of discretionary hyphenation + int * pos = nullptr; // array of [hyphenation point] minus [deletion position] + int * cut = nullptr; // length of deletions in original word + + // copy converted word into simple char buffer + strcpy(lcword.get(),encWord.getStr()); + + // now strip off any ending periods + int n = wordlen-1; + while((n >=0) && (lcword[n] == '.')) + n--; + n++; + if (n > 0) + { + const bool bFailed = 0 != hnj_hyphen_hyphenate3( dict, lcword.get(), n, hyphens.get(), nullptr, + &rep, &pos, &cut, minLead, minTrail, + std::max<sal_Int16>(dict->clhmin, std::max<sal_Int16>(dict->clhmin, 2) + std::max(0, minLead - std::max<sal_Int16>(dict->lhmin, 2))), + std::max<sal_Int16>(dict->crhmin, std::max<sal_Int16>(dict->crhmin, 2) + std::max(0, minTrail - std::max<sal_Int16>(dict->rhmin, 2))) ); + if (bFailed) + { + // whoops something did not work + if (rep) + { + for(int j = 0; j < n; j++) + { + if (rep[j]) free(rep[j]); + } + free(rep); + } + if (pos) free(pos); + if (cut) free(cut); + return nullptr; + } + } + + // now backfill hyphens[] for any removed trailing periods + for (int c = n; c < wordlen; c++) hyphens[c] = '0'; + hyphens[wordlen] = '\0'; + + sal_Int32 Leading = GetPosInWordToCheck( aWord, nMaxLeading ); + + for (sal_Int32 i = 0; i < n; i++) + { + int leftrep = 0; + bool hit = (n >= minLen); + if (!rep || !rep[i]) + { + hit = hit && (hyphens[i]&1) && (i < Leading); + hit = hit && (i >= (minLead-1) ); + hit = hit && ((n - i - 1) >= minTrail); + } + else + { + // calculate change character length before hyphenation point signed with '=' + for (char * c = rep[i]; *c && (*c != '='); c++) + { + if (eEnc == RTL_TEXTENCODING_UTF8) + { + if (static_cast<unsigned char>(*c) >> 6 != 2) + leftrep++; + } + else + leftrep++; + } + hit = hit && (hyphens[i]&1) && ((i + leftrep - pos[i]) < Leading); + hit = hit && ((i + leftrep - pos[i]) >= (minLead-1) ); + hit = hit && ((n - i - 1 + sal::static_int_cast< sal_sSize >(strlen(rep[i])) - leftrep - 1) >= minTrail); + } + if (hit) + { + nHyphenationPos = i; + if (rep && rep[i]) + { + nHyphenationPosAlt = i - pos[i]; + nHyphenationPosAltHyph = i + leftrep - pos[i]; + } + } + } + + if (nHyphenationPos == -1) + { + xRes = nullptr; + } + else + { + if (rep && rep[nHyphenationPos]) + { + // remove equal sign + char * s = rep[nHyphenationPos]; + int eq = 0; + for (; *s; s++) + { + if (*s == '=') eq = 1; + if (eq) *s = *(s + 1); + } + OUString repHyphlow(rep[nHyphenationPos], strlen(rep[nHyphenationPos]), eEnc); + OUString repHyph; + switch (ct) + { + case CapType::ALLCAP: + { + repHyph = makeUpperCase(repHyphlow, pCC); + break; + } + case CapType::INITCAP: + { + if (nHyphenationPosAlt == -1) + repHyph = makeInitCap(repHyphlow, pCC); + else + repHyph = repHyphlow; + break; + } + default: + { + repHyph = repHyphlow; + break; + } + } + + // handle shortening + sal_Int16 nPos = static_cast<sal_Int16>((nHyphenationPosAltHyph < nHyphenationPos) ? + nHyphenationPosAltHyph : nHyphenationPos); + // discretionary hyphenation + xRes = HyphenatedWord::CreateHyphenatedWord( aWord, LinguLocaleToLanguage( aLocale ), nPos, + aWord.replaceAt(nHyphenationPosAlt + 1, cut[nHyphenationPos], repHyph), + static_cast<sal_Int16>(nHyphenationPosAltHyph)); + } + else + { + xRes = HyphenatedWord::CreateHyphenatedWord( aWord, LinguLocaleToLanguage( aLocale ), + static_cast<sal_Int16>(nHyphenationPos), aWord, static_cast<sal_Int16>(nHyphenationPos)); + } + } + + if (rep) + { + for(int j = 0; j < n; j++) + { + if (rep[j]) free(rep[j]); + } + free(rep); + } + if (pos) free(pos); + if (cut) free(cut); + return xRes; + } + return nullptr; +} + +Reference < XHyphenatedWord > SAL_CALL Hyphenator::queryAlternativeSpelling( + const OUString& aWord, + const css::lang::Locale& aLocale, + sal_Int16 nIndex, + const css::uno::Sequence< css::beans::PropertyValue >& aProperties ) +{ + // Firstly we allow only one plus character before the hyphen to avoid to miss the right break point: + for (int extrachar = 1; extrachar <= 2; extrachar++) + { + Reference< XHyphenatedWord > xRes = hyphenate(aWord, aLocale, nIndex + 1 + extrachar, aProperties); + if (xRes.is() && xRes->isAlternativeSpelling() && xRes->getHyphenationPos() == nIndex) + return xRes; + } + return nullptr; +} + +Reference< XPossibleHyphens > SAL_CALL Hyphenator::createPossibleHyphens( const OUString& aWord, + const css::lang::Locale& aLocale, + const css::uno::Sequence< css::beans::PropertyValue >& aProperties ) +{ + PropertyHelper_Hyphenation& rHelper = GetPropHelper(); + rHelper.SetTmpPropVals(aProperties); + sal_Int16 minTrail = rHelper.GetMinTrailing(); + sal_Int16 minLead = rHelper.GetMinLeading(); + sal_Int16 minLen = rHelper.GetMinWordLength(); + + // Resolves: fdo#41083 honour MinWordLength in "createPossibleHyphens" as + // well as "hyphenate" + if (aWord.getLength() < minLen) + { + return PossibleHyphens::CreatePossibleHyphens( aWord, LinguLocaleToLanguage( aLocale ), + aWord, Sequence< sal_Int16 >() ); + } + + int k = -1; + for (size_t j = 0; j < mvDicts.size(); ++j) + { + if (aLocale == mvDicts[j].aLoc) + k = j; + } + + // if we have a hyphenation dictionary matching this locale + if (k != -1) + { + HyphenDict *dict = nullptr; + // if this dictionary has not been loaded yet do that + if (!mvDicts[k].aPtr) + { + if (!LoadDictionary(mvDicts[k])) + return nullptr; + } + + // otherwise hyphenate the word with that dictionary + dict = mvDicts[k].aPtr; + rtl_TextEncoding eEnc = mvDicts[k].eEnc; + CharClass* pCC = mvDicts[k].apCC.get(); + + // we don't want to work with a default text encoding since following incorrect + // results may occur only for specific text and thus may be hard to notice. + // Thus better always make a clean exit here if the text encoding is in question. + // Hopefully something not working at all will raise proper attention quickly. ;-) + DBG_ASSERT( eEnc != RTL_TEXTENCODING_DONTKNOW, "failed to get text encoding! (maybe incorrect encoding string in file)" ); + if (eEnc == RTL_TEXTENCODING_DONTKNOW) + return nullptr; + + // first handle smart quotes both single and double + OUStringBuffer rBuf(aWord); + sal_Int32 nc = rBuf.getLength(); + sal_Unicode ch; + for (sal_Int32 ix=0; ix < nc; ix++) + { + ch = rBuf[ix]; + if ((ch == 0x201C) || (ch == 0x201D)) + rBuf[ix] = u'"'; + if ((ch == 0x2018) || (ch == 0x2019)) + rBuf[ix] = u'\''; + } + OUString nWord(rBuf.makeStringAndClear()); + + // now convert word to all lowercase for pattern recognition + OUString nTerm(makeLowerCase(nWord, pCC)); + + // now convert word to needed encoding + OString encWord(OU2ENC(nTerm,eEnc)); + + sal_Int32 wordlen = encWord.getLength(); + std::unique_ptr<char[]> lcword(new char[wordlen+1]); + std::unique_ptr<char[]> hyphens(new char[wordlen+5]); + char ** rep = nullptr; // replacements of discretionary hyphenation + int * pos = nullptr; // array of [hyphenation point] minus [deletion position] + int * cut = nullptr; // length of deletions in original word + + // copy converted word into simple char buffer + strcpy(lcword.get(),encWord.getStr()); + + // first remove any trailing periods + sal_Int32 n = wordlen-1; + while((n >=0) && (lcword[n] == '.')) + n--; + n++; + if (n > 0) + { + const bool bFailed = 0 != hnj_hyphen_hyphenate3(dict, lcword.get(), n, hyphens.get(), nullptr, + &rep, &pos, &cut, minLead, minTrail, + std::max<sal_Int16>(dict->clhmin, std::max<sal_Int16>(dict->clhmin, 2) + std::max(0, minLead - std::max<sal_Int16>(dict->lhmin, 2))), + std::max<sal_Int16>(dict->crhmin, std::max<sal_Int16>(dict->crhmin, 2) + std::max(0, minTrail - std::max<sal_Int16>(dict->rhmin, 2))) ); + if (bFailed) + { + if (rep) + { + for(int j = 0; j < n; j++) + { + if (rep[j]) free(rep[j]); + } + free(rep); + } + if (pos) free(pos); + if (cut) free(cut); + + return nullptr; + } + } + // now backfill hyphens[] for any removed periods + for (sal_Int32 c = n; c < wordlen; c++) + hyphens[c] = '0'; + hyphens[wordlen] = '\0'; + + sal_Int32 nHyphCount = 0; + + for ( sal_Int32 i = 0; i < encWord.getLength(); i++) + { + if (hyphens[i]&1) + nHyphCount++; + } + + Sequence< sal_Int16 > aHyphPos(nHyphCount); + sal_Int16 *pPos = aHyphPos.getArray(); + OUStringBuffer hyphenatedWordBuffer; + nHyphCount = 0; + + for (sal_Int32 i = 0; i < nWord.getLength(); i++) + { + hyphenatedWordBuffer.append(aWord[i]); + // hyphenation position + if (hyphens[i]&1) + { + // linguistic::PossibleHyphens is stuck with + // css::uno::Sequence<sal_Int16> because of + // css.linguistic2.XPossibleHyphens.getHyphenationPositions, so + // any further positions need to be ignored: + assert(i >= SAL_MIN_INT16); + if (i > SAL_MAX_INT16) + { + SAL_WARN( + "lingucomponent", + "hyphen pos " << i << " > SAL_MAX_INT16 in \"" << aWord + << "\""); + continue; + } + pPos[nHyphCount] = i; + hyphenatedWordBuffer.append('='); + nHyphCount++; + } + } + + OUString hyphenatedWord = hyphenatedWordBuffer.makeStringAndClear(); + + Reference< XPossibleHyphens > xRes = PossibleHyphens::CreatePossibleHyphens( + aWord, LinguLocaleToLanguage( aLocale ), hyphenatedWord, aHyphPos); + + if (rep) + { + for(int j = 0; j < n; j++) + { + if (rep[j]) free(rep[j]); + } + free(rep); + } + if (pos) free(pos); + if (cut) free(cut); + + return xRes; + } + + return nullptr; +} + +OUString Hyphenator::makeLowerCase(const OUString& aTerm, CharClass const * pCC) +{ + if (pCC) + return pCC->lowercase(aTerm); + return aTerm; +} + +OUString Hyphenator::makeUpperCase(const OUString& aTerm, CharClass const * pCC) +{ + if (pCC) + return pCC->uppercase(aTerm); + return aTerm; +} + +OUString Hyphenator::makeInitCap(const OUString& aTerm, CharClass const * pCC) +{ + sal_Int32 tlen = aTerm.getLength(); + if (pCC && tlen) + { + OUString bTemp = aTerm.copy(0,1); + if (tlen > 1) + return ( pCC->uppercase(bTemp, 0, 1) + pCC->lowercase(aTerm,1,(tlen-1)) ); + + return pCC->uppercase(bTemp, 0, 1); + } + return aTerm; +} + +sal_Bool SAL_CALL Hyphenator::addLinguServiceEventListener( + const Reference< XLinguServiceEventListener >& rxLstnr ) +{ + MutexGuard aGuard( GetLinguMutex() ); + + bool bRes = false; + if (!bDisposing && rxLstnr.is()) + { + bRes = GetPropHelper().addLinguServiceEventListener( rxLstnr ); + } + return bRes; +} + +sal_Bool SAL_CALL Hyphenator::removeLinguServiceEventListener( + const Reference< XLinguServiceEventListener >& rxLstnr ) +{ + MutexGuard aGuard( GetLinguMutex() ); + + bool bRes = false; + if (!bDisposing && rxLstnr.is()) + { + bRes = GetPropHelper().removeLinguServiceEventListener( rxLstnr ); + } + return bRes; +} + +OUString SAL_CALL Hyphenator::getServiceDisplayName(const Locale& rLocale) +{ + std::locale loc(Translate::Create("svt", LanguageTag(rLocale))); + return Translate::get(STR_DESCRIPTION_LIBHYPHEN, loc); +} + +void SAL_CALL Hyphenator::initialize( const Sequence< Any >& rArguments ) +{ + MutexGuard aGuard( GetLinguMutex() ); + + if (pPropHelper) + return; + + sal_Int32 nLen = rArguments.getLength(); + if (2 == nLen) + { + Reference< XLinguProperties > xPropSet; + rArguments.getConstArray()[0] >>= xPropSet; + // rArguments.getConstArray()[1] >>= xDicList; + + //! Pointer allows for access of the non-UNO functions. + //! And the reference to the UNO-functions while increasing + //! the ref-count and will implicitly free the memory + //! when the object is no longer used. + pPropHelper.reset( new PropertyHelper_Hyphenation( static_cast<XHyphenator *>(this), xPropSet ) ); + pPropHelper->AddAsPropListener(); //! after a reference is established + } + else { + OSL_FAIL( "wrong number of arguments in sequence" ); + } +} + +void SAL_CALL Hyphenator::dispose() +{ + MutexGuard aGuard( GetLinguMutex() ); + + if (!bDisposing) + { + bDisposing = true; + EventObject aEvtObj( static_cast<XHyphenator *>(this) ); + aEvtListeners.disposeAndClear( aEvtObj ); + if (pPropHelper) + { + pPropHelper->RemoveAsPropListener(); + pPropHelper.reset(); + } + } +} + +void SAL_CALL Hyphenator::addEventListener( const Reference< XEventListener >& rxListener ) +{ + MutexGuard aGuard( GetLinguMutex() ); + + if (!bDisposing && rxListener.is()) + aEvtListeners.addInterface( rxListener ); +} + +void SAL_CALL Hyphenator::removeEventListener( const Reference< XEventListener >& rxListener ) +{ + MutexGuard aGuard( GetLinguMutex() ); + + if (!bDisposing && rxListener.is()) + aEvtListeners.removeInterface( rxListener ); +} + +// Service specific part +OUString SAL_CALL Hyphenator::getImplementationName() +{ + return "org.openoffice.lingu.LibHnjHyphenator"; +} + +sal_Bool SAL_CALL Hyphenator::supportsService( const OUString& ServiceName ) +{ + return cppu::supportsService(this, ServiceName); +} + +Sequence< OUString > SAL_CALL Hyphenator::getSupportedServiceNames() +{ + return { SN_HYPHENATOR }; +} + +extern "C" SAL_DLLPUBLIC_EXPORT css::uno::XInterface* +lingucomponent_Hyphenator_get_implementation( + css::uno::XComponentContext* , css::uno::Sequence<css::uno::Any> const&) +{ + return cppu::acquire(new Hyphenator()); +} + + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/lingucomponent/source/hyphenator/hyphen/hyphenimp.hxx b/lingucomponent/source/hyphenator/hyphen/hyphenimp.hxx new file mode 100644 index 0000000000..45ebca1125 --- /dev/null +++ b/lingucomponent/source/hyphenator/hyphen/hyphenimp.hxx @@ -0,0 +1,126 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#ifndef INCLUDED_LINGUCOMPONENT_SOURCE_HYPHENATOR_HYPHEN_HYPHENIMP_HXX +#define INCLUDED_LINGUCOMPONENT_SOURCE_HYPHENATOR_HYPHEN_HYPHENIMP_HXX + +#include <comphelper/interfacecontainer3.hxx> +#include <cppuhelper/implbase.hxx> +#include <com/sun/star/lang/XComponent.hpp> +#include <com/sun/star/lang/XInitialization.hpp> +#include <com/sun/star/lang/XServiceDisplayName.hpp> +#include <com/sun/star/beans/XPropertySet.hpp> +#include <com/sun/star/beans/PropertyValues.hpp> +#include <com/sun/star/lang/XServiceInfo.hpp> +#include <com/sun/star/linguistic2/XHyphenator.hpp> +#include <com/sun/star/linguistic2/XLinguServiceEventBroadcaster.hpp> + +#include <unotools/charclass.hxx> + +#include <linguistic/misc.hxx> +#include <linguistic/lngprophelp.hxx> + +#include <lingutil.hxx> +#include <stdio.h> + +#include <hyphen.h> + +using namespace ::com::sun::star::uno; +using namespace ::com::sun::star::beans; +using namespace ::com::sun::star::lang; +using namespace ::com::sun::star::linguistic2; + +struct HDInfo { + HyphenDict * aPtr; + OUString aName; + Locale aLoc; + rtl_TextEncoding eEnc; + std::unique_ptr<CharClass> apCC; +}; + +class Hyphenator : + public cppu::WeakImplHelper + < + XHyphenator, + XLinguServiceEventBroadcaster, + XInitialization, + XComponent, + XServiceInfo, + XServiceDisplayName + > +{ + Sequence< Locale > aSuppLocales; + std::vector< HDInfo > mvDicts; + + ::comphelper::OInterfaceContainerHelper3<XEventListener> aEvtListeners; + std::unique_ptr<linguistic::PropertyHelper_Hyphenation> pPropHelper; + bool bDisposing; + + Hyphenator(const Hyphenator &) = delete; + Hyphenator & operator = (const Hyphenator &) = delete; + + linguistic::PropertyHelper_Hyphenation& GetPropHelper_Impl(); + linguistic::PropertyHelper_Hyphenation& GetPropHelper() + { + return pPropHelper ? *pPropHelper : GetPropHelper_Impl(); + } + +public: + Hyphenator(); + + virtual ~Hyphenator() override; + + // XSupportedLocales (for XHyphenator) + virtual Sequence< Locale > SAL_CALL getLocales() override; + virtual sal_Bool SAL_CALL hasLocale( const Locale& rLocale ) override; + + // XHyphenator + virtual css::uno::Reference< css::linguistic2::XHyphenatedWord > SAL_CALL hyphenate( const OUString& aWord, const css::lang::Locale& aLocale, sal_Int16 nMaxLeading, const css::uno::Sequence< css::beans::PropertyValue >& aProperties ) override; + virtual css::uno::Reference< css::linguistic2::XHyphenatedWord > SAL_CALL queryAlternativeSpelling( const OUString& aWord, const css::lang::Locale& aLocale, sal_Int16 nIndex, const css::uno::Sequence< css::beans::PropertyValue >& aProperties ) override; + virtual css::uno::Reference< css::linguistic2::XPossibleHyphens > SAL_CALL createPossibleHyphens( const OUString& aWord, const css::lang::Locale& aLocale, const css::uno::Sequence< css::beans::PropertyValue >& aProperties ) override; + + // XLinguServiceEventBroadcaster + virtual sal_Bool SAL_CALL addLinguServiceEventListener( const Reference< XLinguServiceEventListener >& rxLstnr ) override; + virtual sal_Bool SAL_CALL removeLinguServiceEventListener( const Reference< XLinguServiceEventListener >& rxLstnr ) override; + + // XServiceDisplayName + virtual OUString SAL_CALL getServiceDisplayName( const Locale& rLocale ) override; + + // XInitialization + virtual void SAL_CALL initialize( const Sequence< Any >& rArguments ) override; + + // XComponent + virtual void SAL_CALL dispose() override; + virtual void SAL_CALL addEventListener( const Reference< XEventListener >& rxListener ) override; + virtual void SAL_CALL removeEventListener( const Reference< XEventListener >& rxListener ) override; + + // XServiceInfo + virtual OUString SAL_CALL getImplementationName() override; + virtual sal_Bool SAL_CALL supportsService( const OUString& rServiceName ) override; + virtual Sequence< OUString > SAL_CALL getSupportedServiceNames() override; + +private: + static OUString makeLowerCase(const OUString&, CharClass const *); + static OUString makeUpperCase(const OUString&, CharClass const *); + static OUString makeInitCap(const OUString&, CharClass const *); +}; + +#endif + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/lingucomponent/source/languageguessing/guess.cxx b/lingucomponent/source/languageguessing/guess.cxx new file mode 100644 index 0000000000..a7cbeccabc --- /dev/null +++ b/lingucomponent/source/languageguessing/guess.cxx @@ -0,0 +1,100 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#include <sal/config.h> + +#include <cassert> +#include <string.h> + +#ifdef SYSTEM_LIBEXTTEXTCAT +#include <libexttextcat/textcat.h> +#else +#include <textcat.h> +#endif + +#include "guess.hxx" + +/* Old textcat.h versions defined bad spelled constants. */ +#ifndef TEXTCAT_RESULT_UNKNOWN_STR +#define TEXTCAT_RESULT_UNKNOWN_STR _TEXTCAT_RESULT_UNKOWN +#endif + +#ifndef TEXTCAT_RESULT_SHORT_STR +#define TEXTCAT_RESULT_SHORT_STR _TEXTCAT_RESULT_SHORT +#endif + +Guess::Guess() + : language_str(DEFAULT_LANGUAGE) + , country_str(DEFAULT_COUNTRY) +{ +} + +/* +* this use a char * string to build the guess object +* a string like those is made as : [language-country-encoding]... +* +*/ +Guess::Guess(const char * guess_str) + : language_str(DEFAULT_LANGUAGE) + , country_str(DEFAULT_COUNTRY) +{ + //if the guess is not like "UNKNOWN" or "SHORT", go into the brackets + if(strcmp(guess_str + 1, TEXTCAT_RESULT_UNKNOWN_STR) == 0 + || strcmp(guess_str + 1, TEXTCAT_RESULT_SHORT_STR) == 0) + return; + + // From how this ctor is called from SimpleGuesser::GuessLanguage and + // SimpleGuesser::GetManagedLanguages in + // lingucomponent/source/languageguessing/simpleguesser.cxx, guess_str must start with "[": + assert(guess_str[0] == GUESS_SEPARATOR_OPEN); + auto const start = guess_str + 1; + // Only look at the prefix of guess_str, delimited by the next "]" or "[" or end-of-string; + // split it into at most three segments separated by "-" (where excess occurrences of "-" + // would become part of the third segment), like "en-US-utf8"; the first segment denotes the + // language; if there are three segments, the second denotes the country and the third the + // encoding; otherwise, the second segment, if any (e.g., in "haw-utf8"), denotes the + // encoding: + char const * dash1 = nullptr; + char const * dash2 = nullptr; + auto p = start; + for (;; ++p) { + auto const c = *p; + if (c == '\0' || c == GUESS_SEPARATOR_OPEN || c == GUESS_SEPARATOR_CLOSE) { + break; + } + if (c == GUESS_SEPARATOR_SEP) { + if (dash1 == nullptr) { + dash1 = p; + } else { + dash2 = p; + // The encoding is ignored, so we can stop as soon as we found the second "-": + break; + } + } + } + auto const langLen = (dash1 == nullptr ? p : dash1) - start; + if (langLen != 0) { // if not we use the default value + language_str.assign(start, langLen); + } + if (dash2 != nullptr) { + country_str.assign(dash1 + 1, dash2 - (dash1 + 1)); + } +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/lingucomponent/source/languageguessing/guess.hxx b/lingucomponent/source/languageguessing/guess.hxx new file mode 100644 index 0000000000..627033d3ab --- /dev/null +++ b/lingucomponent/source/languageguessing/guess.hxx @@ -0,0 +1,54 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ +#ifndef INCLUDED_LINGUCOMPONENT_SOURCE_LANGUAGEGUESSING_GUESS_HXX +#define INCLUDED_LINGUCOMPONENT_SOURCE_LANGUAGEGUESSING_GUESS_HXX + +#define GUESS_SEPARATOR_OPEN '[' +#define GUESS_SEPARATOR_CLOSE ']' +#define GUESS_SEPARATOR_SEP '-' +#define DEFAULT_LANGUAGE "" +#define DEFAULT_COUNTRY "" +#define DEFAULT_ENCODING "" + +#include <string> + +class Guess final { + public: + + /** + * Default init + */ + Guess(); + + /** + * Init from a string like [en-UK-utf8] and the rank + */ + Guess(const char * guess_str); + + const std::string& GetLanguage() const { return language_str;} + const std::string& GetCountry() const { return country_str;} + + private: + std::string language_str; + std::string country_str; +}; + +#endif + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/lingucomponent/source/languageguessing/guesslang.component b/lingucomponent/source/languageguessing/guesslang.component new file mode 100644 index 0000000000..75f6e7ce2d --- /dev/null +++ b/lingucomponent/source/languageguessing/guesslang.component @@ -0,0 +1,26 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!-- + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + --> + +<component loader="com.sun.star.loader.SharedLibrary" environment="@CPPU_ENV@" + xmlns="http://openoffice.org/2010/uno-components"> + <implementation name="com.sun.star.lingu2.LanguageGuessing" + constructor="lingucomponent_LangGuess_get_implementation"> + <service name="com.sun.star.linguistic2.LanguageGuessing"/> + </implementation> +</component> diff --git a/lingucomponent/source/languageguessing/guesslang.cxx b/lingucomponent/source/languageguessing/guesslang.cxx new file mode 100644 index 0000000000..e88cd997ca --- /dev/null +++ b/lingucomponent/source/languageguessing/guesslang.cxx @@ -0,0 +1,320 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#include <iostream> +#include <mutex> +#include <string_view> + +#include <osl/file.hxx> +#include <tools/debug.hxx> + +#include <sal/config.h> +#include <cppuhelper/factory.hxx> +#include <cppuhelper/implbase.hxx> +#include <cppuhelper/supportsservice.hxx> + +#include "simpleguesser.hxx" +#include "guess.hxx" + +#include <com/sun/star/lang/IllegalArgumentException.hpp> +#include <com/sun/star/lang/XServiceInfo.hpp> +#include <com/sun/star/linguistic2/XLanguageGuessing.hpp> +#include <unotools/pathoptions.hxx> +#include <osl/thread.h> + +#include <sal/macros.h> + +#ifdef SYSTEM_LIBEXTTEXTCAT +#include <libexttextcat/textcat.h> +#else +#include <textcat.h> +#endif + +using namespace ::osl; +using namespace ::cppu; +using namespace ::com::sun::star; +using namespace ::com::sun::star::uno; +using namespace ::com::sun::star::lang; +using namespace ::com::sun::star::linguistic2; + +static std::mutex & GetLangGuessMutex() +{ + static std::mutex aMutex; + return aMutex; +} + +namespace { + +class LangGuess_Impl : + public ::cppu::WeakImplHelper< + XLanguageGuessing, + XServiceInfo > +{ + SimpleGuesser m_aGuesser; + bool m_bInitialized; + + virtual ~LangGuess_Impl() override {} + void EnsureInitialized(); + +public: + LangGuess_Impl(); + LangGuess_Impl(const LangGuess_Impl&) = delete; + LangGuess_Impl& operator=(const LangGuess_Impl&) = delete; + + // XServiceInfo implementation + virtual OUString SAL_CALL getImplementationName( ) override; + virtual sal_Bool SAL_CALL supportsService( const OUString& ServiceName ) override; + virtual Sequence< OUString > SAL_CALL getSupportedServiceNames( ) override; + + // XLanguageGuessing implementation + virtual css::lang::Locale SAL_CALL guessPrimaryLanguage( const OUString& aText, ::sal_Int32 nStartPos, ::sal_Int32 nLen ) override; + virtual void SAL_CALL disableLanguages( const css::uno::Sequence< css::lang::Locale >& aLanguages ) override; + virtual void SAL_CALL enableLanguages( const css::uno::Sequence< css::lang::Locale >& aLanguages ) override; + virtual css::uno::Sequence< css::lang::Locale > SAL_CALL getAvailableLanguages( ) override; + virtual css::uno::Sequence< css::lang::Locale > SAL_CALL getEnabledLanguages( ) override; + virtual css::uno::Sequence< css::lang::Locale > SAL_CALL getDisabledLanguages( ) override; + + // implementation specific + /// @throws RuntimeException + void SetFingerPrintsDB( std::u16string_view fileName ); +}; + +} + +LangGuess_Impl::LangGuess_Impl() : + m_bInitialized( false ) +{ +} + +void LangGuess_Impl::EnsureInitialized() +{ + if (m_bInitialized) + return; + + // set this to true at the very start to prevent loops because of + // implicitly called functions below + m_bInitialized = true; + + // set default fingerprint path to where those get installed + OUString aPhysPath; + OUString aURL( SvtPathOptions().GetFingerprintPath() ); + osl::FileBase::getSystemPathFromFileURL( aURL, aPhysPath ); +#ifdef _WIN32 + aPhysPath += "\\"; +#else + aPhysPath += "/"; +#endif + + SetFingerPrintsDB( aPhysPath ); + +#if !defined(EXTTEXTCAT_VERSION_MAJOR) + + // disable currently not functional languages... + struct LangCountry + { + const char *pLang; + const char *pCountry; + }; + LangCountry aDisable[] = + { + // not functional in modified libtextcat, but fixed in >= libexttextcat 3.1.0 + // which is the first with EXTTEXTCAT_VERSION_MAJOR defined + {"sco", ""}, {"zh", "CN"}, {"zh", "TW"}, {"ja", ""}, {"ko", ""}, + {"ka", ""}, {"hi", ""}, {"mr", ""}, {"ne", ""}, {"sa", ""}, + {"ta", ""}, {"th", ""}, {"qu", ""}, {"yi", ""} + }; + sal_Int32 nNum = SAL_N_ELEMENTS(aDisable); + Sequence< Locale > aDisableSeq( nNum ); + Locale *pDisableSeq = aDisableSeq.getArray(); + for (sal_Int32 i = 0; i < nNum; ++i) + { + Locale aLocale; + aLocale.Language = OUString::createFromAscii( aDisable[i].pLang ); + aLocale.Country = OUString::createFromAscii( aDisable[i].pCountry ); + pDisableSeq[i] = aLocale; + } + disableLanguages( aDisableSeq ); + DBG_ASSERT( nNum == getDisabledLanguages().getLength(), "size mismatch" ); +#endif +} + +Locale SAL_CALL LangGuess_Impl::guessPrimaryLanguage( + const OUString& rText, + ::sal_Int32 nStartPos, + ::sal_Int32 nLen ) +{ + std::scoped_lock aGuard( GetLangGuessMutex() ); + + EnsureInitialized(); + + if (nStartPos < 0 || nLen < 0 || nStartPos + nLen > rText.getLength()) + throw lang::IllegalArgumentException(); + + OString o( OUStringToOString( rText.subView(nStartPos, nLen), RTL_TEXTENCODING_UTF8 ) ); + Guess g = m_aGuesser.GuessPrimaryLanguage(o.getStr()); + lang::Locale aRes; + aRes.Language = OUString::createFromAscii( g.GetLanguage() ); + aRes.Country = OUString::createFromAscii( g.GetCountry() ); + return aRes; +} + +#define DEFAULT_CONF_FILE_NAME "fpdb.conf" + +void LangGuess_Impl::SetFingerPrintsDB( + std::u16string_view filePath ) +{ + //! text encoding for file name / path needs to be in the same encoding the OS uses + OString path = OUStringToOString( filePath, osl_getThreadTextEncoding() ); + OString conf_file_path = path + DEFAULT_CONF_FILE_NAME; + + m_aGuesser.SetDBPath(conf_file_path.getStr(), path.getStr()); +} + +uno::Sequence< Locale > SAL_CALL LangGuess_Impl::getAvailableLanguages( ) +{ + std::scoped_lock aGuard( GetLangGuessMutex() ); + + EnsureInitialized(); + + Sequence< css::lang::Locale > aRes; + std::vector<Guess> gs = m_aGuesser.GetAllManagedLanguages(); + aRes.realloc(gs.size()); + + css::lang::Locale *pRes = aRes.getArray(); + + for(size_t i = 0; i < gs.size() ; i++ ){ + css::lang::Locale current_aRes; + current_aRes.Language = OUString::createFromAscii( gs[i].GetLanguage() ); + current_aRes.Country = OUString::createFromAscii( gs[i].GetCountry() ); + pRes[i] = current_aRes; + } + + return aRes; +} + +uno::Sequence< Locale > SAL_CALL LangGuess_Impl::getEnabledLanguages( ) +{ + std::scoped_lock aGuard( GetLangGuessMutex() ); + + EnsureInitialized(); + + Sequence< css::lang::Locale > aRes; + std::vector<Guess> gs = m_aGuesser.GetAvailableLanguages(); + aRes.realloc(gs.size()); + + css::lang::Locale *pRes = aRes.getArray(); + + for(size_t i = 0; i < gs.size() ; i++ ){ + css::lang::Locale current_aRes; + current_aRes.Language = OUString::createFromAscii( gs[i].GetLanguage() ); + current_aRes.Country = OUString::createFromAscii( gs[i].GetCountry() ); + pRes[i] = current_aRes; + } + + return aRes; +} + +uno::Sequence< Locale > SAL_CALL LangGuess_Impl::getDisabledLanguages( ) +{ + std::scoped_lock aGuard( GetLangGuessMutex() ); + + EnsureInitialized(); + + Sequence< css::lang::Locale > aRes; + std::vector<Guess> gs = m_aGuesser.GetUnavailableLanguages(); + aRes.realloc(gs.size()); + + css::lang::Locale *pRes = aRes.getArray(); + + for(size_t i = 0; i < gs.size() ; i++ ){ + css::lang::Locale current_aRes; + current_aRes.Language = OUString::createFromAscii( gs[i].GetLanguage() ); + current_aRes.Country = OUString::createFromAscii( gs[i].GetCountry() ); + pRes[i] = current_aRes; + } + + return aRes; +} + +void SAL_CALL LangGuess_Impl::disableLanguages( + const uno::Sequence< Locale >& rLanguages ) +{ + std::scoped_lock aGuard( GetLangGuessMutex() ); + + EnsureInitialized(); + + for (const Locale& rLanguage : rLanguages) + { + std::string language; + + OString l = OUStringToOString( rLanguage.Language, RTL_TEXTENCODING_ASCII_US ); + OString c = OUStringToOString( rLanguage.Country, RTL_TEXTENCODING_ASCII_US ); + + language += l.getStr(); + language += "-"; + language += c.getStr(); + m_aGuesser.DisableLanguage(language); + } +} + +void SAL_CALL LangGuess_Impl::enableLanguages( + const uno::Sequence< Locale >& rLanguages ) +{ + std::scoped_lock aGuard( GetLangGuessMutex() ); + + EnsureInitialized(); + + for (const Locale& rLanguage : rLanguages) + { + std::string language; + + OString l = OUStringToOString( rLanguage.Language, RTL_TEXTENCODING_ASCII_US ); + OString c = OUStringToOString( rLanguage.Country, RTL_TEXTENCODING_ASCII_US ); + + language += l.getStr(); + language += "-"; + language += c.getStr(); + m_aGuesser.EnableLanguage(language); + } +} + +OUString SAL_CALL LangGuess_Impl::getImplementationName( ) +{ + return "com.sun.star.lingu2.LanguageGuessing"; +} + +sal_Bool SAL_CALL LangGuess_Impl::supportsService( const OUString& ServiceName ) +{ + return cppu::supportsService(this, ServiceName); +} + +Sequence<OUString> SAL_CALL LangGuess_Impl::getSupportedServiceNames( ) +{ + return { "com.sun.star.linguistic2.LanguageGuessing" }; +} + +extern "C" SAL_DLLPUBLIC_EXPORT css::uno::XInterface* +lingucomponent_LangGuess_get_implementation( + css::uno::XComponentContext* , css::uno::Sequence<css::uno::Any> const&) +{ + return cppu::acquire(new LangGuess_Impl()); +} + + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/lingucomponent/source/languageguessing/simpleguesser.cxx b/lingucomponent/source/languageguessing/simpleguesser.cxx new file mode 100644 index 0000000000..7210b1f451 --- /dev/null +++ b/lingucomponent/source/languageguessing/simpleguesser.cxx @@ -0,0 +1,221 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + + /** + * + * + * + * + * TODO + * - Add exception throwing when h == NULL + * - Not init h when implicit constructor is launched + */ + +#include <string.h> + +#ifdef SYSTEM_LIBEXTTEXTCAT +#include <libexttextcat/textcat.h> +#include <libexttextcat/common.h> +#include <libexttextcat/constants.h> +#include <libexttextcat/fingerprint.h> +#else +#include <textcat.h> +#include <common.h> +#include <constants.h> +#include <fingerprint.h> +#endif + +#include <sal/types.h> + +#include<rtl/character.hxx> +#include "simpleguesser.hxx" + +static int startsAsciiCaseInsensitive(const std::string &s1, const std::string &s2){ + size_t i; + int ret = 0; + + size_t min = s1.length(); + if (min > s2.length()) + min = s2.length(); + + for(i = 0; i < min && s2[i] && s1[i] && !ret; i++){ + ret = rtl::toAsciiUpperCase(static_cast<unsigned char>(s1[i])) + - rtl::toAsciiUpperCase(static_cast<unsigned char>(s2[i])); + if(s1[i] == '.' || s2[i] == '.') {ret = 0;} //. is a neutral character + } + return ret; + } + +namespace { + +/** + * This following structure is from textcat.c + */ +typedef struct textcat_t{ + + void **fprint; + char *fprint_disable; + uint4 size; + uint4 maxsize; + + char output[MAXOUTPUTSIZE]; + +} textcat_t; +// end of the 3 structs + +} + +SimpleGuesser::SimpleGuesser() +{ + h = nullptr; +} + +SimpleGuesser& SimpleGuesser::operator=(const SimpleGuesser& sg){ + // Check for self-assignment! + if (this == &sg) // Same object? + return *this; // Yes, so skip assignment, and just return *this. + + if(h){textcat_Done(h);} + h = sg.h; + return *this; +} + +SimpleGuesser::~SimpleGuesser() +{ + if(h){textcat_Done(h);} +} + +/*! + \fn SimpleGuesser::GuessLanguage(char* text) + */ +std::vector<Guess> SimpleGuesser::GuessLanguage(const char* text) +{ + std::vector<Guess> guesses; + + if (!h) + return guesses; + + int len = strlen(text); + + if (len > MAX_STRING_LENGTH_TO_ANALYSE) + len = MAX_STRING_LENGTH_TO_ANALYSE; + + const char *guess_list = textcat_Classify(h, text, len); + + if (strcmp(guess_list, TEXTCAT_RESULT_SHORT_STR) == 0) + return guesses; + + int current_pointer = 0; + + while(guess_list[current_pointer] != '\0') + { + while (guess_list[current_pointer] != GUESS_SEPARATOR_OPEN && guess_list[current_pointer] != '\0') + current_pointer++; + if(guess_list[current_pointer] != '\0') + { + Guess g(guess_list + current_pointer); + + guesses.push_back(g); + + current_pointer++; + } + } + + return guesses; +} + +Guess SimpleGuesser::GuessPrimaryLanguage(const char* text) +{ + std::vector<Guess> ret = GuessLanguage(text); + return ret.empty() ? Guess() : ret[0]; +} +/** + * Is used to know which language is available, unavailable or both + * when mask = 0xF0, return only Available + * when mask = 0x0F, return only Unavailable + * when mask = 0xFF, return both Available and Unavailable + */ +std::vector<Guess> SimpleGuesser::GetManagedLanguages(const char mask) +{ + textcat_t *tables = static_cast<textcat_t*>(h); + + std::vector<Guess> lang; + if(!h){return lang;} + + for (size_t i=0; i<tables->size; ++i) + { + if (tables->fprint_disable[i] & mask) + { + std::string langStr = "["; + langStr += fp_Name(tables->fprint[i]); + Guess g(langStr.c_str()); + lang.push_back(g); + } + } + + return lang; +} + +std::vector<Guess> SimpleGuesser::GetAvailableLanguages() +{ + return GetManagedLanguages( sal::static_int_cast< char >( 0xF0 ) ); +} + +std::vector<Guess> SimpleGuesser::GetUnavailableLanguages() +{ + return GetManagedLanguages( sal::static_int_cast< char >( 0x0F )); +} + +std::vector<Guess> SimpleGuesser::GetAllManagedLanguages() +{ + return GetManagedLanguages( sal::static_int_cast< char >( 0xFF )); +} + +void SimpleGuesser::XableLanguage(const std::string& lang, char mask) +{ + textcat_t *tables = static_cast<textcat_t*>(h); + + if(!h){return;} + + for (size_t i=0; i<tables->size; i++) + { + std::string language(fp_Name(tables->fprint[i])); + if (startsAsciiCaseInsensitive(language,lang) == 0) + tables->fprint_disable[i] = mask; + } +} + +void SimpleGuesser::EnableLanguage(const std::string& lang) +{ + XableLanguage(lang, sal::static_int_cast< char >( 0xF0 )); +} + +void SimpleGuesser::DisableLanguage(const std::string& lang) +{ + XableLanguage(lang, sal::static_int_cast< char >( 0x0F )); +} + +void SimpleGuesser::SetDBPath(const char* path, const char* prefix) +{ + if (h) + textcat_Done(h); + h = special_textcat_Init(path, prefix); +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/lingucomponent/source/languageguessing/simpleguesser.hxx b/lingucomponent/source/languageguessing/simpleguesser.hxx new file mode 100644 index 0000000000..aec5442853 --- /dev/null +++ b/lingucomponent/source/languageguessing/simpleguesser.hxx @@ -0,0 +1,108 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ +#ifndef INCLUDED_LINGUCOMPONENT_SOURCE_LANGUAGEGUESSING_SIMPLEGUESSER_HXX +#define INCLUDED_LINGUCOMPONENT_SOURCE_LANGUAGEGUESSING_SIMPLEGUESSER_HXX + +#include <string> +#include <vector> +#include "guess.hxx" + +#define MAX_STRING_LENGTH_TO_ANALYSE 200 + +class SimpleGuesser final +{ +public: + /**inits the object with conf file "./conf.txt"*/ + SimpleGuesser(); + + /** + * @param SimpleGuesser& sg the other guesser + */ + SimpleGuesser& operator=(const SimpleGuesser& sg); + + /** + * destroy the object + */ + ~SimpleGuesser(); + + /** + * Analyze a text and return the most probable languages of the text + * @param char* text is the text to analyze + * @return the list of guess + */ + std::vector<Guess> GuessLanguage(const char* text); + + /** + * Analyze a text and return the most probable language of the text + * @param char* text is the text to analyze + * @return the guess (containing language) + */ + Guess GuessPrimaryLanguage(const char* text); + + /** + * List all available languages (possibly to be in guesses) + * @return the list of languages + */ + std::vector<Guess> GetAvailableLanguages(); + + /** + * List all languages (possibly in guesses or not) + * @return the list of languages + */ + std::vector<Guess> GetAllManagedLanguages(); + + /** + * List all Unavailable languages (disable for any reason) + * @return the list of languages + */ + std::vector<Guess> GetUnavailableLanguages(); + + /** + * Mark a language enabled + * @param string lang the language to enable (build like language-COUNTRY-encoding) + */ + void EnableLanguage(const std::string& lang); + + /** + * Mark a language disabled + * @param string lang the language to disable (build like language-COUNTRY-encoding) + */ + void DisableLanguage(const std::string& lang); + + /** + * Load a new DB of fingerprints + * @param const char* thePathOfConfFile self explaining + * @param const char* prefix is the path where the directory which contains fingerprint files is stored + */ + void SetDBPath(const char* thePathOfConfFile, const char* prefix); + +private: + //Where typical fingerprints (n-gram tables) are stored + void* h; + + //Is used to select languages into the fingerprints DB, the mask is used to indicate if we want enabled disabled or both + std::vector<Guess> GetManagedLanguages(const char mask); + + //Like getManagedLanguages, this function enable or disable a language and it depends of the mask + void XableLanguage(const std::string& lang, char mask); +}; + +#endif + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/lingucomponent/source/lingutil/lingutil.cxx b/lingucomponent/source/lingutil/lingutil.cxx new file mode 100644 index 0000000000..c737698417 --- /dev/null +++ b/lingucomponent/source/lingutil/lingutil.cxx @@ -0,0 +1,314 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#if defined(_WIN32) +#if !defined WIN32_LEAN_AND_MEAN +# define WIN32_LEAN_AND_MEAN +#endif +#include <windows.h> +#endif + +#include <osl/thread.h> +#include <osl/file.hxx> +#include <osl/process.h> +#include <tools/debug.hxx> +#include <tools/urlobj.hxx> +#include <i18nlangtag/languagetag.hxx> +#include <i18nlangtag/mslangid.hxx> +#include <unotools/bootstrap.hxx> +#include <unotools/lingucfg.hxx> +#include <unotools/pathoptions.hxx> +#include <rtl/bootstrap.hxx> +#include <rtl/ustring.hxx> +#include <rtl/string.hxx> +#include <rtl/tencinfo.h> +#include <linguistic/misc.hxx> + +#include <set> +#include <vector> +#include <string.h> + +#include "lingutil.hxx" + +#include <sal/macros.h> + +using namespace ::com::sun::star; + +#if defined(_WIN32) +OString Win_AddLongPathPrefix( const OString &rPathName ) +{ + constexpr OString WIN32_LONG_PATH_PREFIX = "\\\\?\\"_ostr; + if (!rPathName.match(WIN32_LONG_PATH_PREFIX)) return WIN32_LONG_PATH_PREFIX + rPathName; + return rPathName; +} +#endif //defined(_WIN32) + +#if defined SYSTEM_DICTS || defined IOS +// find old style dictionaries in system directories +static void GetOldStyleDicsInDir( + OUString const & aSystemDir, OUString const & aFormatName, + std::u16string_view aSystemSuffix, std::u16string_view aSystemPrefix, + std::set< OUString >& aDicLangInUse, + std::vector< SvtLinguConfigDictionaryEntry >& aRes ) +{ + osl::Directory aSystemDicts(aSystemDir); + if (aSystemDicts.open() != osl::FileBase::E_None) + return; + + osl::DirectoryItem aItem; + osl::FileStatus aFileStatus(osl_FileStatus_Mask_FileURL); + while (aSystemDicts.getNextItem(aItem) == osl::FileBase::E_None) + { + aItem.getFileStatus(aFileStatus); + OUString sPath = aFileStatus.getFileURL(); + if (sPath.endsWith(aSystemSuffix)) + { + sal_Int32 nStartIndex = sPath.lastIndexOf('/') + 1; + if (!sPath.match(aSystemPrefix, nStartIndex)) + continue; + OUString sChunk = sPath.copy(nStartIndex + aSystemPrefix.size(), + sPath.getLength() - aSystemSuffix.size() - + nStartIndex - aSystemPrefix.size()); + if (sChunk.isEmpty()) + continue; + + // We prefer (now) to use language tags. + // Avoid feeding in the older LANG_REGION scheme to the BCP47 + // ctor as that triggers use of liblangtag and initializes its + // database which we do not want during startup. Convert + // instead. + sChunk = sChunk.replace( '_', '-'); + + // There's a known exception to the rule, the dreaded + // hu_HU_u8.dic of the myspell-hu package, see + // http://packages.debian.org/search?arch=any&searchon=contents&keywords=hu_HU_u8.dic + // This was ignored because unknown in the old implementation, + // truncate to the known locale and either insert because hu_HU + // wasn't encountered yet, or skip because it was. It doesn't + // really matter because the proper new-style hu_HU dictionary + // will take precedence anyway if installed with a Hungarian + // languagepack. Again, this is only to not pull in all + // liblangtag and stuff during startup, the result would be + // !isValidBcp47() and the dictionary ignored. + if (sChunk == "hu-HU-u8") + sChunk = "hu-HU"; + + LanguageTag aLangTag(sChunk, true); + if (!aLangTag.isValidBcp47()) + continue; + + // Thus we first get the language of the dictionary + const OUString& aLocaleName(aLangTag.getBcp47()); + + if (aDicLangInUse.insert(aLocaleName).second) + { + // add the dictionary to the resulting vector + SvtLinguConfigDictionaryEntry aDicEntry; + aDicEntry.aLocations = { sPath }; + aDicEntry.aFormatName = aFormatName; + if (aLocaleName == u"ar") + aDicEntry.aLocaleNames = { + aLocaleName, + u"ar-AE"_ustr, u"ar-BH"_ustr, u"ar-DJ"_ustr, u"ar-DZ"_ustr, u"ar-EG"_ustr, + u"ar-ER"_ustr, u"ar-IL"_ustr, u"ar-IQ"_ustr, u"ar-JO"_ustr, u"ar-KM"_ustr, + u"ar-KW"_ustr, u"ar-LB"_ustr, u"ar-LY"_ustr, u"ar-MA"_ustr, u"ar-MR"_ustr, + u"ar-OM"_ustr, u"ar-PS"_ustr, u"ar-QA"_ustr, u"ar-SA"_ustr, u"ar-SD"_ustr, + u"ar-SO"_ustr, u"ar-SY"_ustr, u"ar-TD"_ustr, u"ar-TN"_ustr, u"ar-YE"_ustr + }; + else + aDicEntry.aLocaleNames = { aLocaleName }; + aRes.push_back( aDicEntry ); + } + } + } +} +#endif + +// build list of old style dictionaries (not as extensions) to use. +// User installed dictionaries (the ones residing in the user paths) +// will get precedence over system installed ones for the same language. +std::vector< SvtLinguConfigDictionaryEntry > GetOldStyleDics( const char *pDicType ) +{ + std::vector< SvtLinguConfigDictionaryEntry > aRes; + + if (!pDicType) + return aRes; + + OUString aFormatName; + OUString aDicExtension; +#if defined SYSTEM_DICTS || defined IOS + OUString aSystemDir; + OUString aSystemPrefix; + OUString aSystemSuffix; +#endif + if (strcmp( pDicType, "DICT" ) == 0) + { + aFormatName = "DICT_SPELL"; + aDicExtension = ".dic"; +#ifdef SYSTEM_DICTS + aSystemDir = DICT_SYSTEM_DIR; + aSystemSuffix = aDicExtension; +#elif defined IOS + aSystemDir = "$BRAND_BASE_DIR/share/spell"; + rtl::Bootstrap::expandMacros(aSystemDir); + aSystemSuffix = ".dic"; +#endif + } + else if (strcmp( pDicType, "HYPH" ) == 0) + { + aFormatName = "DICT_HYPH"; + aDicExtension = ".dic"; +#ifdef SYSTEM_DICTS + aSystemDir = HYPH_SYSTEM_DIR; + aSystemPrefix = "hyph_"; + aSystemSuffix = aDicExtension; +#endif + } + else if (strcmp( pDicType, "THES" ) == 0) + { + aFormatName = "DICT_THES"; + aDicExtension = ".dat"; +#ifdef SYSTEM_DICTS + aSystemDir = THES_SYSTEM_DIR; + aSystemPrefix = "th_"; + aSystemSuffix = "_v2.dat"; +#elif defined IOS + aSystemDir = "$BRAND_BASE_DIR/share/thes"; + rtl::Bootstrap::expandMacros(aSystemDir); + aSystemPrefix = "th_"; + aSystemSuffix = "_v2.dat"; +#endif + } + + if (aFormatName.isEmpty() || aDicExtension.isEmpty()) + return aRes; + +#if defined SYSTEM_DICTS || defined IOS + // set of languages to remember the language where it is already + // decided to make use of the dictionary. + std::set< OUString > aDicLangInUse; + +#ifndef IOS + // follow the hunspell tool's example and check DICPATH for preferred dictionaries + rtl_uString * pSearchPath = nullptr; + osl_getEnvironment(OUString("DICPATH").pData, &pSearchPath); + + if (pSearchPath) + { + OUString aSearchPath(pSearchPath); + rtl_uString_release(pSearchPath); + + sal_Int32 nIndex = 0; + do + { + OUString aSystem( aSearchPath.getToken(0, ':', nIndex) ); + OUString aCWD; + OUString aRelative; + OUString aAbsolute; + + if (!utl::Bootstrap::getProcessWorkingDir(aCWD)) + continue; + if (osl::FileBase::getFileURLFromSystemPath(aSystem, aRelative) + != osl::FileBase::E_None) + continue; + if (osl::FileBase::getAbsoluteFileURL(aCWD, aRelative, aAbsolute) + != osl::FileBase::E_None) + continue; + + // GetOldStyleDicsInDir will make sure the dictionary is the right + // type based on its prefix, that way hyphen, mythes and regular + // dictionaries can live in one directory + GetOldStyleDicsInDir(aAbsolute, aFormatName, aSystemSuffix, + aSystemPrefix, aDicLangInUse, aRes); + } + while (nIndex != -1); + } +#endif + + // load system directories last so that DICPATH prevails + GetOldStyleDicsInDir(aSystemDir, aFormatName, aSystemSuffix, aSystemPrefix, + aDicLangInUse, aRes); +#endif + + return aRes; +} + +void MergeNewStyleDicsAndOldStyleDics( + std::vector< SvtLinguConfigDictionaryEntry > &rNewStyleDics, + const std::vector< SvtLinguConfigDictionaryEntry > &rOldStyleDics ) +{ + // get list of languages supported by new style dictionaries + std::set< OUString > aNewStyleLanguages; + for (auto const& newStyleDic : rNewStyleDics) + { + const uno::Sequence< OUString > aLocaleNames(newStyleDic.aLocaleNames); + sal_Int32 nLocaleNames = aLocaleNames.getLength(); + for (sal_Int32 k = 0; k < nLocaleNames; ++k) + { + aNewStyleLanguages.insert( aLocaleNames[k] ); + } + } + + // now check all old style dictionaries if they will add a not yet + // added language. If so add them to the resulting vector + for (auto const& oldStyleDic : rOldStyleDics) + { + sal_Int32 nOldStyleDics = oldStyleDic.aLocaleNames.getLength(); + + // old style dics should only have one language listed... + DBG_ASSERT( nOldStyleDics, "old style dictionary with more than one language found!"); + if (nOldStyleDics > 0) + { + if (linguistic::LinguIsUnspecified( oldStyleDic.aLocaleNames[0])) + { + OSL_FAIL( "old style dictionary with invalid language found!" ); + continue; + } + + // language not yet added? + if (aNewStyleLanguages.find( oldStyleDic.aLocaleNames[0] ) == aNewStyleLanguages.end()) + rNewStyleDics.push_back(oldStyleDic); + } + else + { + OSL_FAIL( "old style dictionary with no language found!" ); + } + } +} + +rtl_TextEncoding getTextEncodingFromCharset(const char* pCharset) +{ + // default result: used to indicate that we failed to get the proper encoding + rtl_TextEncoding eRet = RTL_TEXTENCODING_DONTKNOW; + + if (pCharset) + { + eRet = rtl_getTextEncodingFromMimeCharset(pCharset); + if (eRet == RTL_TEXTENCODING_DONTKNOW) + eRet = rtl_getTextEncodingFromUnixCharset(pCharset); + if (eRet == RTL_TEXTENCODING_DONTKNOW) + { + if (strcmp("ISCII-DEVANAGARI", pCharset) == 0) + eRet = RTL_TEXTENCODING_ISCII_DEVANAGARI; + } + } + return eRet; +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/lingucomponent/source/lingutil/lingutil.hxx b/lingucomponent/source/lingutil/lingutil.hxx new file mode 100644 index 0000000000..687c414827 --- /dev/null +++ b/lingucomponent/source/lingutil/lingutil.hxx @@ -0,0 +1,55 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#ifndef INCLUDED_LINGUCOMPONENT_SOURCE_LINGUTIL_LINGUTIL_HXX +#define INCLUDED_LINGUCOMPONENT_SOURCE_LINGUTIL_LINGUTIL_HXX + +#include <rtl/string.hxx> + +#include <vector> + +#define OU2ENC(rtlOUString, rtlEncoding) \ + OString((rtlOUString).getStr(), (rtlOUString).getLength(), \ + rtlEncoding, RTL_UNICODETOTEXT_FLAGS_UNDEFINED_QUESTIONMARK) + +struct SvtLinguConfigDictionaryEntry; + +#if defined(_WIN32) + +// to be use to get a path name with long path prefix +// under Windows for Hunspell, Hyphen and MyThes libraries +OString Win_AddLongPathPrefix( const OString &rPathName ); +#endif + + +// temporary function, to be removed when new style dictionaries +// using configuration entries are fully implemented and provided +std::vector< SvtLinguConfigDictionaryEntry > GetOldStyleDics( const char * pDicType ); +void MergeNewStyleDicsAndOldStyleDics( std::vector< SvtLinguConfigDictionaryEntry > &rNewStyleDics, const std::vector< SvtLinguConfigDictionaryEntry > &rOldStyleDics ); + +//Find an encoding from a charset string, using +//rtl_getTextEncodingFromMimeCharset and falling back to +//rtl_getTextEncodingFromUnixCharset with the addition of +//ISCII-DEVANAGARI. On failure will return final fallback of +//RTL_TEXTENCODING_ISO_8859_1 +rtl_TextEncoding getTextEncodingFromCharset(const char* pCharset); + +#endif + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/lingucomponent/source/numbertext/numbertext.component b/lingucomponent/source/numbertext/numbertext.component new file mode 100644 index 0000000000..c3277533b5 --- /dev/null +++ b/lingucomponent/source/numbertext/numbertext.component @@ -0,0 +1,26 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!-- + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + --> + +<component loader="com.sun.star.loader.SharedLibrary" environment="@CPPU_ENV@" + xmlns="http://openoffice.org/2010/uno-components"> + <implementation name="com.sun.star.lingu2.NumberText" + constructor="lingucomponent_NumberText_get_implementation"> + <service name="com.sun.star.linguistic2.NumberText"/> + </implementation> +</component> diff --git a/lingucomponent/source/numbertext/numbertext.cxx b/lingucomponent/source/numbertext/numbertext.cxx new file mode 100644 index 0000000000..b87b2cc5fc --- /dev/null +++ b/lingucomponent/source/numbertext/numbertext.cxx @@ -0,0 +1,168 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#include <mutex> + +#include <osl/file.hxx> +#include <tools/debug.hxx> +#include <o3tl/char16_t2wchar_t.hxx> + +#include <sal/config.h> +#include <cppuhelper/factory.hxx> +#include <cppuhelper/implbase.hxx> +#include <cppuhelper/supportsservice.hxx> + +#include <i18nlangtag/languagetag.hxx> +#include <com/sun/star/lang/XServiceInfo.hpp> +#include <com/sun/star/linguistic2/XNumberText.hpp> +#include <unotools/pathoptions.hxx> +#include <osl/thread.h> + +#include <Numbertext.hxx> + +using namespace ::osl; +using namespace ::cppu; +using namespace ::com::sun::star; +using namespace ::com::sun::star::uno; +using namespace ::com::sun::star::lang; +using namespace ::com::sun::star::linguistic2; + +static std::mutex& GetNumberTextMutex() +{ + static std::mutex aMutex; + return aMutex; +} + +namespace +{ +class NumberText_Impl : public ::cppu::WeakImplHelper<XNumberText, XServiceInfo> +{ + Numbertext m_aNumberText; + bool m_bInitialized; + + virtual ~NumberText_Impl() override {} + void EnsureInitialized(); + +public: + NumberText_Impl(); + NumberText_Impl(const NumberText_Impl&) = delete; + NumberText_Impl& operator=(const NumberText_Impl&) = delete; + + // XServiceInfo implementation + virtual OUString SAL_CALL getImplementationName() override; + virtual sal_Bool SAL_CALL supportsService(const OUString& ServiceName) override; + virtual Sequence<OUString> SAL_CALL getSupportedServiceNames() override; + + // XNumberText implementation + virtual OUString SAL_CALL getNumberText(const OUString& aText, + const ::css::lang::Locale& rLocale) override; + virtual css::uno::Sequence<css::lang::Locale> SAL_CALL getAvailableLanguages() override; +}; +} + +NumberText_Impl::NumberText_Impl() + : m_bInitialized(false) +{ +} + +void NumberText_Impl::EnsureInitialized() +{ + if (m_bInitialized) + return; + + // set this to true at the very start to prevent loops because of + // implicitly called functions below + m_bInitialized = true; + + // set default numbertext path to where those get installed + OUString aPhysPath; + OUString aURL(SvtPathOptions().GetNumbertextPath()); + osl::FileBase::getSystemPathFromFileURL(aURL, aPhysPath); +#ifdef _WIN32 + aPhysPath += "\\"; + const rtl_TextEncoding eEnc = RTL_TEXTENCODING_UTF8; +#else + aPhysPath += "/"; + const rtl_TextEncoding eEnc = osl_getThreadTextEncoding(); +#endif + OString path = OUStringToOString(aPhysPath, eEnc); + m_aNumberText.set_prefix(std::string(path)); +} + +OUString SAL_CALL NumberText_Impl::getNumberText(const OUString& rText, const Locale& rLocale) +{ + std::scoped_lock aGuard(GetNumberTextMutex()); + EnsureInitialized(); + // libnumbertext supports Language + Country tags (separated by "_" or "-") + LanguageTag aLanguageTag(rLocale); + OUString aCode(aLanguageTag.getLanguage()); + OUString aCountry(aLanguageTag.getCountry()); + OUString aScript(aLanguageTag.getScript()); + if (!aScript.isEmpty()) + aCode += "-" + aScript; + if (!aCountry.isEmpty()) + aCode += "-" + aCountry; + OString aLangCode(OUStringToOString(aCode, RTL_TEXTENCODING_ASCII_US)); +#if defined(_WIN32) + std::wstring sResult(o3tl::toW(rText.getStr())); +#else + OString aInput(OUStringToOString(rText, RTL_TEXTENCODING_UTF8)); + std::wstring sResult = Numbertext::string2wstring(std::string(aInput)); +#endif + bool result = m_aNumberText.numbertext(sResult, std::string(aLangCode)); + DBG_ASSERT(result, "numbertext: false"); +#if defined(_WIN32) + OUString aResult(o3tl::toU(sResult)); +#else + OUString aResult = OUString::fromUtf8(Numbertext::wstring2string(sResult)); +#endif + return aResult; +} + +uno::Sequence<Locale> SAL_CALL NumberText_Impl::getAvailableLanguages() +{ + std::scoped_lock aGuard(GetNumberTextMutex()); + // TODO + Sequence<css::lang::Locale> aRes; + return aRes; +} + +OUString SAL_CALL NumberText_Impl::getImplementationName() +{ + return "com.sun.star.lingu2.NumberText"; +} + +sal_Bool SAL_CALL NumberText_Impl::supportsService(const OUString& ServiceName) +{ + return cppu::supportsService(this, ServiceName); +} + +Sequence<OUString> SAL_CALL NumberText_Impl::getSupportedServiceNames() +{ + return { "com.sun.star.linguistic2.NumberText" }; +} + +extern "C" SAL_DLLPUBLIC_EXPORT css::uno::XInterface* +lingucomponent_NumberText_get_implementation(css::uno::XComponentContext*, + css::uno::Sequence<css::uno::Any> const&) +{ + return cppu::acquire(new NumberText_Impl()); +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/lingucomponent/source/spellcheck/languagetool/LanguageTool.component b/lingucomponent/source/spellcheck/languagetool/LanguageTool.component new file mode 100644 index 0000000000..9f7eb3d087 --- /dev/null +++ b/lingucomponent/source/spellcheck/languagetool/LanguageTool.component @@ -0,0 +1,26 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!-- + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + --> + +<component loader="com.sun.star.loader.SharedLibrary" environment="@CPPU_ENV@" + xmlns="http://openoffice.org/2010/uno-components"> + <implementation name="org.openoffice.lingu.LanguageToolGrammarChecker" + constructor="lingucomponent_LanguageToolGrammarChecker_get_implementation" single-instance="true"> + <service name="com.sun.star.linguistic2.Proofreader"/> + </implementation> +</component> diff --git a/lingucomponent/source/spellcheck/languagetool/languagetoolimp.cxx b/lingucomponent/source/spellcheck/languagetool/languagetoolimp.cxx new file mode 100644 index 0000000000..fe912cb6b3 --- /dev/null +++ b/lingucomponent/source/spellcheck/languagetool/languagetoolimp.cxx @@ -0,0 +1,519 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#include <sal/config.h> + +#include <config_version.h> + +#include <cppuhelper/factory.hxx> +#include <cppuhelper/supportsservice.hxx> +#include <cppuhelper/weak.hxx> +#include "languagetoolimp.hxx" + +#include <i18nlangtag/languagetag.hxx> +#include <svtools/strings.hrc> +#include <unotools/resmgr.hxx> + +#include <vector> +#include <set> +#include <string.h> + +#include <officecfg/Office/Linguistic.hxx> + +#include <curl/curl.h> +#include <boost/property_tree/ptree.hpp> +#include <boost/property_tree/json_parser.hpp> +#include <algorithm> +#include <string_view> + +#include <systools/curlinit.hxx> + +#include <sal/log.hxx> +#include <tools/color.hxx> +#include <tools/long.hxx> +#include <com/sun/star/text/TextMarkupType.hpp> +#include <com/sun/star/uno/Any.hxx> +#include <comphelper/propertyvalue.hxx> +#include <unotools/lingucfg.hxx> +#include <osl/mutex.hxx> +#include <rtl/uri.hxx> + +using namespace com::sun::star; +using namespace com::sun::star::beans; +using namespace com::sun::star::lang; +using namespace com::sun::star::linguistic2; + +constexpr OUStringLiteral sDuden = u"duden"; + +namespace +{ +constexpr size_t MAX_SUGGESTIONS_SIZE = 10; +using LanguageToolCfg = officecfg::Office::Linguistic::GrammarChecking::LanguageTool; + +PropertyValue lcl_GetLineColorPropertyFromErrorId(const std::string& rErrorId) +{ + Color aColor; + if (rErrorId == "TYPOS" || rErrorId == "orth") + { + aColor = COL_LIGHTRED; + } + else if (rErrorId == "STYLE") + { + aColor = COL_LIGHTBLUE; + } + else + { + // Same color is used for other errorId's such as GRAMMAR, TYPOGRAPHY.. + constexpr Color COL_ORANGE(0xD1, 0x68, 0x20); + aColor = COL_ORANGE; + } + return comphelper::makePropertyValue("LineColor", aColor); +} + +OString encodeTextForLT(const OUString& text) +{ + // Let's be a bit conservative. I don't find a good description what needs encoding (and in + // which way) at https://languagetool.org/http-api/; the "Try it out!" function shows that + // different cases are handled differently by the demo; some percent-encode the UTF-8 + // representation, like %D0%90 (for cyrillic А); some turn into entities like ! (for + // exclamation mark !); some other to things like \u0027 (for apostrophe '). So only keep + // RFC 3986's "Unreserved Characters" set unencoded, use UTF-8 percent-encoding for the rest. + static constexpr auto myCharClass = rtl::createUriCharClass( + u8"-._~0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"); + return OUStringToOString( + rtl::Uri::encode(text, myCharClass.data(), rtl_UriEncodeStrict, RTL_TEXTENCODING_UTF8), + RTL_TEXTENCODING_ASCII_US); +} + +// Callback to get the response data from server. +size_t WriteCallback(void* ptr, size_t size, size_t nmemb, void* userp) +{ + if (!userp) + return 0; + + std::string* response = static_cast<std::string*>(userp); + size_t real_size = size * nmemb; + response->append(static_cast<char*>(ptr), real_size); + return real_size; +} + +enum class HTTP_METHOD +{ + HTTP_GET, + HTTP_POST +}; + +std::string makeHttpRequest_impl(std::u16string_view aURL, HTTP_METHOD method, + const OString& aPostData, curl_slist* pHttpHeader, + tools::Long& nStatusCode) +{ + struct curl_cleanup_t + { + void operator()(CURL* p) const { curl_easy_cleanup(p); } + }; + std::unique_ptr<CURL, curl_cleanup_t> curl(curl_easy_init()); + if (!curl) + { + SAL_WARN("languagetool", "CURL initialization failed"); + return {}; // empty string + } + + ::InitCurl_easy(curl.get()); + + OString aURL8 = OUStringToOString(aURL, RTL_TEXTENCODING_UTF8); + (void)curl_easy_setopt(curl.get(), CURLOPT_HTTPHEADER, pHttpHeader); + (void)curl_easy_setopt(curl.get(), CURLOPT_FAILONERROR, 1L); + (void)curl_easy_setopt(curl.get(), CURLOPT_URL, aURL8.getStr()); + (void)curl_easy_setopt(curl.get(), CURLOPT_TIMEOUT, 10L); + // (void)curl_easy_setopt(curl.get(), CURLOPT_VERBOSE, 1L); + + std::string response_body; + (void)curl_easy_setopt(curl.get(), CURLOPT_WRITEFUNCTION, WriteCallback); + (void)curl_easy_setopt(curl.get(), CURLOPT_WRITEDATA, &response_body); + + // allow unknown or self-signed certificates + if (!LanguageToolCfg::SSLCertVerify::get()) + { + (void)curl_easy_setopt(curl.get(), CURLOPT_SSL_VERIFYPEER, false); + (void)curl_easy_setopt(curl.get(), CURLOPT_SSL_VERIFYHOST, false); + } + + if (method == HTTP_METHOD::HTTP_POST) + { + (void)curl_easy_setopt(curl.get(), CURLOPT_POST, 1L); + (void)curl_easy_setopt(curl.get(), CURLOPT_POSTFIELDS, aPostData.getStr()); + } + + CURLcode cc = curl_easy_perform(curl.get()); + if (cc != CURLE_OK) + { + SAL_WARN("languagetool", + "CURL request returned with error: " << static_cast<sal_Int32>(cc)); + } + + curl_easy_getinfo(curl.get(), CURLINFO_RESPONSE_CODE, &nStatusCode); + return response_body; +} + +std::string makeDudenHttpRequest(std::u16string_view aURL, const OString& aPostData, + tools::Long& nStatusCode) +{ + struct curl_slist* pList = nullptr; + OString sAccessToken + = OUStringToOString(LanguageToolCfg::ApiKey::get().value_or(""), RTL_TEXTENCODING_UTF8); + + pList = curl_slist_append(pList, "Cache-Control: no-cache"); + pList = curl_slist_append(pList, "Content-Type: application/json"); + if (!sAccessToken.isEmpty()) + { + sAccessToken = "access_token: " + sAccessToken; + pList = curl_slist_append(pList, sAccessToken.getStr()); + } + + return makeHttpRequest_impl(aURL, HTTP_METHOD::HTTP_POST, aPostData, pList, nStatusCode); +} + +std::string makeHttpRequest(std::u16string_view aURL, HTTP_METHOD method, const OString& aPostData, + tools::Long& nStatusCode) +{ + OString realPostData(aPostData); + if (method == HTTP_METHOD::HTTP_POST) + { + OString apiKey + = OUStringToOString(LanguageToolCfg::ApiKey::get().value_or(""), RTL_TEXTENCODING_UTF8); + OUString username = LanguageToolCfg::Username::get().value_or(""); + if (!apiKey.isEmpty() && !username.isEmpty()) + realPostData += "&username=" + encodeTextForLT(username) + "&apiKey=" + apiKey; + } + + return makeHttpRequest_impl(aURL, method, realPostData, nullptr, nStatusCode); +} + +template <typename Func> +uno::Sequence<SingleProofreadingError> parseJson(std::string&& json, std::string path, Func f) +{ + std::stringstream aStream(std::move(json)); // Optimized in C++20 + boost::property_tree::ptree aRoot; + boost::property_tree::read_json(aStream, aRoot); + + if (auto tree = aRoot.get_child_optional(path)) + { + uno::Sequence<SingleProofreadingError> aErrors(tree->size()); + auto it = tree->begin(); + for (auto& rError : asNonConstRange(aErrors)) + { + f(it->second, rError); + it++; + } + return aErrors; + } + return {}; +} + +void parseDudenResponse(ProofreadingResult& rResult, std::string&& aJSONBody) +{ + rResult.aErrors = parseJson( + std::move(aJSONBody), "check-positions", + [](const boost::property_tree::ptree& rPos, SingleProofreadingError& rError) { + rError.nErrorStart = rPos.get<int>("offset", 0); + rError.nErrorLength = rPos.get<int>("length", 0); + rError.nErrorType = text::TextMarkupType::PROOFREADING; + //rError.aShortComment = ?? + //rError.aFullComment = ?? + const std::string sType = rPos.get<std::string>("type", {}); + rError.aProperties = { lcl_GetLineColorPropertyFromErrorId(sType) }; + + const auto proposals = rPos.get_child_optional("proposals"); + if (!proposals) + return; + rError.aSuggestions.realloc(std::min(proposals->size(), MAX_SUGGESTIONS_SIZE)); + auto itProp = proposals->begin(); + for (auto& rSuggestion : asNonConstRange(rError.aSuggestions)) + { + rSuggestion = OStringToOUString(itProp->second.data(), RTL_TEXTENCODING_UTF8); + itProp++; + } + }); +} + +/* + rResult is both input and output + aJSONBody is the response body from the HTTP Request to LanguageTool API +*/ +void parseProofreadingJSONResponse(ProofreadingResult& rResult, std::string&& aJSONBody) +{ + rResult.aErrors = parseJson( + std::move(aJSONBody), "matches", + [](const boost::property_tree::ptree& match, SingleProofreadingError& rError) { + rError.nErrorStart = match.get<int>("offset", 0); + rError.nErrorLength = match.get<int>("length", 0); + rError.nErrorType = text::TextMarkupType::PROOFREADING; + const std::string shortMessage = match.get<std::string>("message", {}); + const std::string message = match.get<std::string>("shortMessage", {}); + + rError.aShortComment = OStringToOUString(shortMessage, RTL_TEXTENCODING_UTF8); + rError.aFullComment = OStringToOUString(message, RTL_TEXTENCODING_UTF8); + + // Parse the error category for Line Color + std::string errorCategoryId; + if (auto rule = match.get_child_optional("rule")) + if (auto ruleCategory = rule->get_child_optional("category")) + errorCategoryId = ruleCategory->get<std::string>("id", {}); + rError.aProperties = { lcl_GetLineColorPropertyFromErrorId(errorCategoryId) }; + + const auto replacements = match.get_child_optional("replacements"); + if (!replacements) + return; + // Limit suggestions to avoid crash on context menu popup: + // (soffice:17251): Gdk-CRITICAL **: 17:00:21.277: ../../../../../gdk/wayland/gdkdisplay-wayland.c:1399: Unable to create Cairo image + // surface: invalid value (typically too big) for the size of the input (surface, pattern, etc.) + rError.aSuggestions.realloc(std::min(replacements->size(), MAX_SUGGESTIONS_SIZE)); + auto itRep = replacements->begin(); + for (auto& rSuggestion : asNonConstRange(rError.aSuggestions)) + { + std::string replacementStr = itRep->second.get<std::string>("value", {}); + rSuggestion = OStringToOUString(replacementStr, RTL_TEXTENCODING_UTF8); + itRep++; + } + }); +} + +OUString getCheckerURL() +{ + if (auto oURL = LanguageToolCfg::BaseURL::get()) + if (!oURL->isEmpty()) + return *oURL + "/check"; + return {}; +} +} + +LanguageToolGrammarChecker::LanguageToolGrammarChecker() + : mCachedResults(10) +{ +} + +LanguageToolGrammarChecker::~LanguageToolGrammarChecker() {} + +sal_Bool SAL_CALL LanguageToolGrammarChecker::isSpellChecker() { return false; } + +sal_Bool SAL_CALL LanguageToolGrammarChecker::hasLocale(const Locale& rLocale) +{ + if (!m_aSuppLocales.hasElements()) + getLocales(); + + for (auto const& suppLocale : std::as_const(m_aSuppLocales)) + if (rLocale == suppLocale) + return true; + + SAL_INFO("languagetool", "No locale \"" << LanguageTag::convertToBcp47(rLocale, false) << "\""); + return false; +} + +uno::Sequence<Locale> SAL_CALL LanguageToolGrammarChecker::getLocales() +{ + osl::MutexGuard aGuard(linguistic::GetLinguMutex()); + + if (m_aSuppLocales.hasElements()) + return m_aSuppLocales; + + if (!LanguageToolCfg::IsEnabled::get()) + return m_aSuppLocales; + + SvtLinguConfig aLinguCfg; + uno::Sequence<OUString> aLocaleList; + + if (LanguageToolCfg::RestProtocol::get().value_or("") == sDuden) + { + aLocaleList.realloc(3); + aLocaleList.getArray()[0] = "de-DE"; + aLocaleList.getArray()[1] = "en-US"; + aLocaleList.getArray()[2] = "en-GB"; + } + else + aLinguCfg.GetLocaleListFor("GrammarCheckers", + "org.openoffice.lingu.LanguageToolGrammarChecker", aLocaleList); + + auto nLength = aLocaleList.getLength(); + m_aSuppLocales.realloc(nLength); + auto pArray = m_aSuppLocales.getArray(); + auto pLocaleList = aLocaleList.getArray(); + + for (auto i = 0; i < nLength; i++) + { + pArray[i] = LanguageTag::convertToLocale(pLocaleList[i]); + } + + return m_aSuppLocales; +} + +ProofreadingResult SAL_CALL LanguageToolGrammarChecker::doProofreading( + const OUString& aDocumentIdentifier, const OUString& aText, const Locale& aLocale, + sal_Int32 nStartOfSentencePosition, sal_Int32 nSuggestedBehindEndOfSentencePosition, + const uno::Sequence<PropertyValue>& aProperties) +{ + // ProofreadingResult declared here instead of parseHttpJSONResponse because of the early exists. + ProofreadingResult xRes; + xRes.aDocumentIdentifier = aDocumentIdentifier; + xRes.aText = aText; + xRes.aLocale = aLocale; + xRes.nStartOfSentencePosition = nStartOfSentencePosition; + xRes.nBehindEndOfSentencePosition = nSuggestedBehindEndOfSentencePosition; + xRes.aProperties = {}; + xRes.xProofreader = this; + xRes.aErrors = {}; + + if (aText.isEmpty()) + { + return xRes; + } + + if (nStartOfSentencePosition != 0) + { + return xRes; + } + + xRes.nStartOfNextSentencePosition = aText.getLength(); + + if (!LanguageToolCfg::IsEnabled::get()) + { + return xRes; + } + + OUString checkerURL = getCheckerURL(); + if (checkerURL.isEmpty()) + { + return xRes; + } + + if (aProperties.getLength() > 0 && aProperties[0].Name == "Update") + { + // locale changed + xRes.aText = ""; + return xRes; + } + + sal_Int32 spaceIndex = std::min(xRes.nStartOfNextSentencePosition, aText.getLength() - 1); + while (spaceIndex < aText.getLength() && aText[spaceIndex] == ' ') + { + xRes.nStartOfNextSentencePosition += 1; + spaceIndex = xRes.nStartOfNextSentencePosition; + } + if (xRes.nStartOfNextSentencePosition == nSuggestedBehindEndOfSentencePosition + && spaceIndex < aText.getLength()) + { + xRes.nStartOfNextSentencePosition + = std::min(nSuggestedBehindEndOfSentencePosition + 1, aText.getLength()); + } + xRes.nBehindEndOfSentencePosition + = std::min(xRes.nStartOfNextSentencePosition, aText.getLength()); + + OString langTag(LanguageTag::convertToBcp47(aLocale, false).toUtf8()); + OString postData; + const bool bDudenProtocol = LanguageToolCfg::RestProtocol::get().value_or("") == "duden"; + if (bDudenProtocol) + { + std::stringstream aStream; + boost::property_tree::ptree aTree; + aTree.put("text-language", langTag.getStr()); + aTree.put("text", aText.toUtf8()); // We don't encode the text in Duden Corrector tool case. + aTree.put("hyphenation", false); + aTree.put("spellchecking-level", 3); + aTree.put("correction-proposals", true); + boost::property_tree::write_json(aStream, aTree); + postData = OString(aStream.str()); + } + else + { + postData = "text=" + encodeTextForLT(aText) + "&language=" + langTag; + } + + if (auto cachedResult = mCachedResults.find(postData); cachedResult != mCachedResults.end()) + { + xRes.aErrors = cachedResult->second; + return xRes; + } + + tools::Long http_code = 0; + std::string response_body; + if (bDudenProtocol) + response_body = makeDudenHttpRequest(checkerURL, postData, http_code); + else + response_body = makeHttpRequest(checkerURL, HTTP_METHOD::HTTP_POST, postData, http_code); + + if (http_code != 200) + { + return xRes; + } + + if (response_body.length() <= 0) + { + return xRes; + } + + if (bDudenProtocol) + { + parseDudenResponse(xRes, std::move(response_body)); + } + else + { + parseProofreadingJSONResponse(xRes, std::move(response_body)); + } + // cache the result + mCachedResults.insert(std::make_pair(postData, xRes.aErrors)); + return xRes; +} + +void SAL_CALL LanguageToolGrammarChecker::ignoreRule(const OUString& /*aRuleIdentifier*/, + const Locale& /*aLocale*/ +) +{ +} +void SAL_CALL LanguageToolGrammarChecker::resetIgnoreRules() {} + +OUString SAL_CALL LanguageToolGrammarChecker::getServiceDisplayName(const Locale& rLocale) +{ + std::locale loc(Translate::Create("svt", LanguageTag(rLocale))); + return Translate::get(STR_DESCRIPTION_LANGUAGETOOL, loc); +} + +OUString SAL_CALL LanguageToolGrammarChecker::getImplementationName() +{ + return "org.openoffice.lingu.LanguageToolGrammarChecker"; +} + +sal_Bool SAL_CALL LanguageToolGrammarChecker::supportsService(const OUString& ServiceName) +{ + return cppu::supportsService(this, ServiceName); +} + +uno::Sequence<OUString> SAL_CALL LanguageToolGrammarChecker::getSupportedServiceNames() +{ + return { SN_GRAMMARCHECKER }; +} + +void SAL_CALL LanguageToolGrammarChecker::initialize(const uno::Sequence<uno::Any>&) {} + +extern "C" SAL_DLLPUBLIC_EXPORT css::uno::XInterface* +lingucomponent_LanguageToolGrammarChecker_get_implementation( + css::uno::XComponentContext*, css::uno::Sequence<css::uno::Any> const&) +{ + return cppu::acquire(new LanguageToolGrammarChecker()); +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab cinoptions=b1,g0,N-s cinkeys+=0=break: */ diff --git a/lingucomponent/source/spellcheck/languagetool/languagetoolimp.hxx b/lingucomponent/source/spellcheck/languagetool/languagetoolimp.hxx new file mode 100644 index 0000000000..93d2c84c61 --- /dev/null +++ b/lingucomponent/source/spellcheck/languagetool/languagetoolimp.hxx @@ -0,0 +1,75 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ +#pragma once +#include <com/sun/star/lang/XInitialization.hpp> +#include <com/sun/star/lang/XServiceDisplayName.hpp> +#include <com/sun/star/lang/XServiceInfo.hpp> +#include <com/sun/star/lang/XServiceName.hpp> +#include <com/sun/star/linguistic2/XProofreader.hpp> +#include <com/sun/star/linguistic2/ProofreadingResult.hpp> +#include <com/sun/star/beans/XPropertySet.hpp> +#include <com/sun/star/beans/PropertyValues.hpp> +#include <linguistic/misc.hxx> +#include <string_view> +#include <o3tl/lru_map.hxx> +#include <tools/long.hxx> + +class LanguageToolGrammarChecker + : public cppu::WeakImplHelper<css::linguistic2::XProofreader, css::lang::XInitialization, + css::lang::XServiceInfo, css::lang::XServiceDisplayName> +{ + css::uno::Sequence<css::lang::Locale> m_aSuppLocales; + o3tl::lru_map<OString, css::uno::Sequence<css::linguistic2::SingleProofreadingError>> + mCachedResults; + LanguageToolGrammarChecker(const LanguageToolGrammarChecker&) = delete; + LanguageToolGrammarChecker& operator=(const LanguageToolGrammarChecker&) = delete; + +public: + LanguageToolGrammarChecker(); + virtual ~LanguageToolGrammarChecker() override; + + // XSupportedLocales + virtual css::uno::Sequence<css::lang::Locale> SAL_CALL getLocales() override; + virtual sal_Bool SAL_CALL hasLocale(const css::lang::Locale& rLocale) override; + + // XProofReader + virtual sal_Bool SAL_CALL isSpellChecker() override; + virtual css::linguistic2::ProofreadingResult SAL_CALL + doProofreading(const OUString& aDocumentIdentifier, const OUString& aText, + const css::lang::Locale& aLocale, sal_Int32 nStartOfSentencePosition, + sal_Int32 nSuggestedBehindEndOfSentencePosition, + const css::uno::Sequence<css::beans::PropertyValue>& aProperties) override; + + virtual void SAL_CALL ignoreRule(const OUString& aRuleIdentifier, + const css::lang::Locale& aLocale) override; + virtual void SAL_CALL resetIgnoreRules() override; + + // XServiceDisplayName + virtual OUString SAL_CALL getServiceDisplayName(const css::lang::Locale& rLocale) override; + + // XInitialization + virtual void SAL_CALL initialize(const css::uno::Sequence<css::uno::Any>& rArguments) override; + + // XServiceInfo + virtual OUString SAL_CALL getImplementationName() override; + virtual sal_Bool SAL_CALL supportsService(const OUString& rServiceName) override; + virtual css::uno::Sequence<OUString> SAL_CALL getSupportedServiceNames() override; +}; + +/* vim:set shiftwidth=4 softtabstop=4 expandtab cinoptions=b1,g0,N-s cinkeys+=0=break: */ diff --git a/lingucomponent/source/spellcheck/macosxspell/MacOSXSpell.component b/lingucomponent/source/spellcheck/macosxspell/MacOSXSpell.component new file mode 100644 index 0000000000..b1fe7d612a --- /dev/null +++ b/lingucomponent/source/spellcheck/macosxspell/MacOSXSpell.component @@ -0,0 +1,26 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!-- + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + --> + +<component loader="com.sun.star.loader.SharedLibrary" environment="@CPPU_ENV@" + xmlns="http://openoffice.org/2010/uno-components"> + <implementation name="org.openoffice.lingu.MacOSXSpellChecker" + constructor="lingucomponent_MacSpellChecker_get_implementation" single-instance="true"> + <service name="com.sun.star.linguistic2.SpellChecker"/> + </implementation> +</component> diff --git a/lingucomponent/source/spellcheck/macosxspell/macspellimp.hxx b/lingucomponent/source/spellcheck/macosxspell/macspellimp.hxx new file mode 100644 index 0000000000..776c474d21 --- /dev/null +++ b/lingucomponent/source/spellcheck/macosxspell/macspellimp.hxx @@ -0,0 +1,123 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#ifndef INCLUDED_LINGUCOMPONENT_SOURCE_SPELLCHECK_MACOSXSPELL_MACSPELLIMP_HXX +#define INCLUDED_LINGUCOMPONENT_SOURCE_SPELLCHECK_MACOSXSPELL_MACSPELLIMP_HXX + +#include <comphelper/interfacecontainer3.hxx> +#include <cppuhelper/implbase.hxx> + +#include <premac.h> +#ifdef MACOSX +#import <Cocoa/Cocoa.h> +#else +#include <UIKit/UIKit.h> +#endif +#include <postmac.h> +#include <com/sun/star/lang/XComponent.hpp> +#include <com/sun/star/lang/XInitialization.hpp> +#include <com/sun/star/lang/XServiceDisplayName.hpp> +#include <com/sun/star/beans/XPropertySet.hpp> +#include <com/sun/star/lang/XServiceInfo.hpp> +#include <com/sun/star/linguistic2/XSpellChecker.hpp> +#include <com/sun/star/linguistic2/XLinguServiceEventBroadcaster.hpp> + +#include <linguistic/misc.hxx> +#include <linguistic/lngprophelp.hxx> + +#include <lingutil.hxx> + +using namespace ::com::sun::star::uno; +using namespace ::com::sun::star::beans; +using namespace ::com::sun::star::lang; +using namespace ::com::sun::star::linguistic2; + +class MacSpellChecker : + public cppu::WeakImplHelper + < + XSpellChecker, + XLinguServiceEventBroadcaster, + XInitialization, + XComponent, + XServiceInfo, + XServiceDisplayName + > +{ + Sequence< Locale > aSuppLocales; + rtl_TextEncoding * aDEncs; + Locale * aDLocs; + OUString * aDNames; + sal_Int32 numdict; +#ifdef MACOSX + int macTag; // unique tag for this doc +#else + UITextChecker * pChecker; +#endif + ::comphelper::OInterfaceContainerHelper3<XEventListener> aEvtListeners; + rtl::Reference< linguistic::PropertyHelper_Spell > xPropHelper; + bool bDisposing; + + MacSpellChecker(const MacSpellChecker &) = delete; + MacSpellChecker & operator = (const MacSpellChecker &) = delete; + + linguistic::PropertyHelper_Spell & GetPropHelper_Impl(); + linguistic::PropertyHelper_Spell & GetPropHelper() + { + return xPropHelper.is() ? *xPropHelper : GetPropHelper_Impl(); + } + + sal_Int16 GetSpellFailure( const OUString &rWord, const Locale &rLocale ); + Reference< XSpellAlternatives > GetProposals( const OUString &rWord, const Locale &rLocale ); + +public: + MacSpellChecker(); + virtual ~MacSpellChecker() override; + + // XSupportedLocales (for XSpellChecker) + virtual Sequence< Locale > SAL_CALL getLocales() override; + virtual sal_Bool SAL_CALL hasLocale( const Locale& rLocale ) override; + + // XSpellChecker + virtual sal_Bool SAL_CALL isValid( const OUString& rWord, const Locale& rLocale, const css::uno::Sequence<PropertyValue>& rProperties ) override; + virtual Reference< XSpellAlternatives > SAL_CALL spell( const OUString& rWord, const Locale& rLocale, const css::uno::Sequence<PropertyValue>& rProperties ) override; + + // XLinguServiceEventBroadcaster + virtual sal_Bool SAL_CALL addLinguServiceEventListener( const Reference< XLinguServiceEventListener >& rxLstnr ) override; + virtual sal_Bool SAL_CALL removeLinguServiceEventListener( const Reference< XLinguServiceEventListener >& rxLstnr ) override; + + // XServiceDisplayName + virtual OUString SAL_CALL getServiceDisplayName( const Locale& rLocale ) override; + + // XInitialization + virtual void SAL_CALL initialize( const Sequence< Any >& rArguments ) override; + + // XComponent + virtual void SAL_CALL dispose() override; + virtual void SAL_CALL addEventListener( const Reference< XEventListener >& rxListener ) override; + virtual void SAL_CALL removeEventListener( const Reference< XEventListener >& rxListener ) override; + + // XServiceInfo + virtual OUString SAL_CALL getImplementationName() override; + virtual sal_Bool SAL_CALL supportsService( const OUString& rServiceName ) override; + virtual Sequence< OUString > SAL_CALL getSupportedServiceNames() override; +}; + +#endif + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/lingucomponent/source/spellcheck/macosxspell/macspellimp.mm b/lingucomponent/source/spellcheck/macosxspell/macspellimp.mm new file mode 100644 index 0000000000..448870e912 --- /dev/null +++ b/lingucomponent/source/spellcheck/macosxspell/macspellimp.mm @@ -0,0 +1,678 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; fill-column: 100 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#include <com/sun/star/uno/Reference.h> + +#include <com/sun/star/linguistic2/SpellFailure.hpp> +#include <com/sun/star/linguistic2/XLinguProperties.hpp> +#include <cppuhelper/factory.hxx> +#include <cppuhelper/supportsservice.hxx> +#include <cppuhelper/weak.hxx> +#include <com/sun/star/registry/XRegistryKey.hpp> +#include <com/sun/star/lang/XSingleServiceFactory.hpp> +#include <tools/debug.hxx> +#include <osl/mutex.hxx> + +#include "macspellimp.hxx" + +#include <linguistic/spelldta.hxx> +#include <unotools/pathoptions.hxx> +#include <unotools/useroptions.hxx> +#include <osl/file.hxx> +#include <rtl/ref.hxx> +#include <rtl/ustrbuf.hxx> + +using namespace utl; +using namespace osl; +using namespace com::sun::star; +using namespace com::sun::star::beans; +using namespace com::sun::star::lang; +using namespace com::sun::star::uno; +using namespace com::sun::star::linguistic2; +using namespace linguistic; + +MacSpellChecker::MacSpellChecker() : + aEvtListeners( GetLinguMutex() ) +{ + aDEncs = nullptr; + aDLocs = nullptr; + aDNames = nullptr; + bDisposing = false; + numdict = 0; +#ifndef IOS + NSApplicationLoad(); + NSAutoreleasePool* pool = [[NSAutoreleasePool alloc] init]; + macTag = [NSSpellChecker uniqueSpellDocumentTag]; + [pool release]; +#else + pChecker = [[UITextChecker alloc] init]; +#endif +} + + +MacSpellChecker::~MacSpellChecker() +{ + numdict = 0; + if (aDEncs) delete[] aDEncs; + aDEncs = nullptr; + if (aDLocs) delete[] aDLocs; + aDLocs = nullptr; + if (aDNames) delete[] aDNames; + aDNames = nullptr; + if (xPropHelper.is()) + xPropHelper->RemoveAsPropListener(); +} + + +PropertyHelper_Spell & MacSpellChecker::GetPropHelper_Impl() +{ + if (!xPropHelper.is()) + { + Reference< XLinguProperties > xPropSet( GetLinguProperties() ); + + xPropHelper = new PropertyHelper_Spell( static_cast<XSpellChecker *>(this), xPropSet ); + xPropHelper->AddAsPropListener(); + } + return *xPropHelper; +} + + +Sequence< Locale > SAL_CALL MacSpellChecker::getLocales() +{ + MutexGuard aGuard( GetLinguMutex() ); + + // this routine should return the locales supported by the installed + // dictionaries. So here we need to parse both the user edited + // dictionary list and the shared dictionary list + // to see what dictionaries the admin/user has installed + + int numshr; // number of shared dictionary entries + rtl_TextEncoding aEnc = RTL_TEXTENCODING_UTF8; + + std::vector<NSString *> postspdict; + + if (!numdict) { + + // invoke a dictionary manager to get the user dictionary list + // TODO How on macOS? + + // invoke a second dictionary manager to get the shared dictionary list +#ifdef MACOSX + NSArray *aSpellCheckLanguages = [[NSSpellChecker sharedSpellChecker] availableLanguages]; +#else + NSArray *aSpellCheckLanguages = [UITextChecker availableLanguages]; +#endif + + for (NSUInteger i = 0; i < [aSpellCheckLanguages count]; i++) + { + NSString* pLangStr = static_cast<NSString*>([aSpellCheckLanguages objectAtIndex:i]); + + // Fix up generic languages (without territory code) and odd combinations that LO + // doesn't handle. + if ([pLangStr isEqualToString:@"ar"]) + { + const std::vector<NSString*> aAR + { @"AE", @"BH", @"DJ", @"DZ", @"EG", @"ER", @"IL", @"IQ", @"JO", + @"KM", @"KW", @"LB", @"LY", @"MA", @"MR", @"OM", @"PS", @"QA", + @"SA", @"SD", @"SO", @"SY", @"TD", @"TN", @"YE" }; + for (auto c: aAR) + { + pLangStr = [@"ar_" stringByAppendingString: c]; + postspdict.push_back( pLangStr ); + } + } + else if ([pLangStr isEqualToString:@"da"]) + { + postspdict.push_back( @"da_DK" ); + } + else if ([pLangStr isEqualToString:@"de"]) + { + // Not de_CH and de_LI, though. They need separate dictionaries. + const std::vector<NSString*> aDE + { @"AT", @"BE", @"DE", @"LU" }; + for (auto c: aDE) + { + pLangStr = [@"de_" stringByAppendingString: c]; + postspdict.push_back( pLangStr ); + } + } +#ifdef IOS + // iOS says it has specifically de_DE. Let's assume it is good enough for German as + // written in Austria, Belgium, and Luxembourg, too. (Not for German in Switzerland and + // Liechtenstein. For those you need to bundle the myspell dictionary.) + else if ([pLangStr isEqualToString:@"de_DE"]) + { + const std::vector<NSString*> aDE + { @"AT", @"BE", @"DE", @"LU" }; + for (auto c: aDE) + { + pLangStr = [@"de_" stringByAppendingString: c]; + postspdict.push_back( pLangStr ); + } + } +#endif + else if ([pLangStr isEqualToString:@"en"]) + { + // System has en_AU, en_CA, en_GB, and en_IN. Add the rest. + const std::vector<NSString*> aEN + { @"BW", @"BZ", @"GH", @"GM", @"IE", @"JM", @"MU", @"MW", @"MY", @"NA", + @"NZ", @"PH", @"TT", @"US", @"ZA", @"ZW" }; + for (auto c: aEN) + { + pLangStr = [@"en_" stringByAppendingString: c]; + postspdict.push_back( pLangStr ); + } + } + else if ([pLangStr isEqualToString:@"en_JP"] + || [pLangStr isEqualToString:@"en_SG"]) + { + // Just skip, LO doesn't have those yet in this context. + } + else if ([pLangStr isEqualToString:@"es"]) + { + const std::vector<NSString*> aES + { @"AR", @"BO", @"CL", @"CO", @"CR", @"CU", @"DO", @"EC", @"ES", @"GT", + @"HN", @"MX", @"NI", @"PA", @"PE", @"PR", @"PY", @"SV", @"UY", @"VE" }; + for (auto c: aES) + { + pLangStr = [@"es_" stringByAppendingString: c]; + postspdict.push_back( pLangStr ); + } + } + else if ([pLangStr isEqualToString:@"fi"]) + { + postspdict.push_back( @"fi_FI" ); + } + else if ([pLangStr isEqualToString:@"fr"]) + { + const std::vector<NSString*> aFR + { @"BE", @"BF", @"BJ", @"CA", @"CH", @"CI", @"FR", @"LU", @"MC", @"ML", + @"MU", @"NE", @"SN", @"TG" }; + for (auto c: aFR) + { + pLangStr = [@"fr_" stringByAppendingString: c]; + postspdict.push_back( pLangStr ); + } + } +#ifdef IOS + else if ([pLangStr isEqualToString:@"fr_FR"]) + { + const std::vector<NSString*> aFR + { @"BE", @"BF", @"BJ", @"CA", @"CH", @"CI", @"FR", @"LU", @"MC", @"ML", + @"MU", @"NE", @"SN", @"TG" }; + for (auto c: aFR) + { + pLangStr = [@"fr_" stringByAppendingString: c]; + postspdict.push_back( pLangStr ); + } + } +#endif + else if ([pLangStr isEqualToString:@"it"]) + { + postspdict.push_back( @"it_CH" ); + postspdict.push_back( @"it_IT" ); + } +#ifdef IOS + else if ([pLangStr isEqualToString:@"it_IT"]) + { + const std::vector<NSString*> aIT + { @"CH", @"IT" }; + for (auto c: aIT) + { + pLangStr = [@"it_" stringByAppendingString: c]; + postspdict.push_back( pLangStr ); + } + } +#endif + else if ([pLangStr isEqualToString:@"ko"]) + { + postspdict.push_back( @"ko_KR" ); + } + else if ([pLangStr isEqualToString:@"nl"]) + { + postspdict.push_back( @"nl_BE" ); + postspdict.push_back( @"nl_NL" ); + } + else if ([pLangStr isEqualToString:@"nb"]) + { + postspdict.push_back( @"nb_NO" ); + } + else if ([pLangStr isEqualToString:@"pl"]) + { + postspdict.push_back( @"pl_PL" ); + } + else if ([pLangStr isEqualToString:@"ru"]) + { + postspdict.push_back( @"ru_RU" ); + } + else if ([pLangStr isEqualToString:@"sv"]) + { + postspdict.push_back( @"sv_FI" ); + postspdict.push_back( @"sv_SE" ); + } +#ifdef IOS + else if ([pLangStr isEqualToString:@"sv_SE"]) + { + postspdict.push_back( @"sv_FI" ); + postspdict.push_back( @"sv_SE" ); + } +#endif + else if ([pLangStr isEqualToString:@"tr"]) + { + postspdict.push_back( @"tr_TR" ); + } + else + postspdict.push_back( pLangStr ); + } + // System has pt_BR and pt_PT, add pt_AO. + postspdict.push_back( @"pt_AO" ); + + numshr = postspdict.size(); + + // we really should merge these and remove duplicates but since + // users can name their dictionaries anything they want it would + // be impossible to know if a real duplication exists unless we + // add some unique key to each myspell dictionary + numdict = numshr; + + if (numdict) { + aDLocs = new Locale [numdict]; + aDEncs = new rtl_TextEncoding [numdict]; + aDNames = new OUString [numdict]; + aSuppLocales.realloc(numdict); + Locale * pLocale = aSuppLocales.getArray(); + int numlocs = 0; + int newloc; + int i,j; + int k = 0; + + //first add the user dictionaries + //TODO for MAC? + + // now add the shared dictionaries + for (i = 0; i < numshr; i++) { + NSDictionary *aLocDict = [ NSLocale componentsFromLocaleIdentifier:postspdict[i] ]; + NSString* aLang = [ aLocDict objectForKey:NSLocaleLanguageCode ]; + NSString* aCountry = [ aLocDict objectForKey:NSLocaleCountryCode ]; + OUString lang([aLang cStringUsingEncoding: NSUTF8StringEncoding], [aLang length], aEnc); + OUString country([ aCountry cStringUsingEncoding: NSUTF8StringEncoding], [aCountry length], aEnc); + Locale nLoc( lang, country, OUString() ); + newloc = 1; + //eliminate duplicates (is this needed for MacOS?) + for (j = 0; j < numlocs; j++) { + if (nLoc == pLocale[j]) newloc = 0; + } + if (newloc) { + pLocale[numlocs] = nLoc; + numlocs++; + } + aDLocs[k] = nLoc; + aDEncs[k] = 0; + k++; + } + + aSuppLocales.realloc(numlocs); + + } else { + /* no dictionary.lst found so register no dictionaries */ + numdict = 0; + aDEncs = nullptr; + aDLocs = nullptr; + aDNames = nullptr; + aSuppLocales.realloc(0); + } + } + + return aSuppLocales; +} + + + +sal_Bool SAL_CALL MacSpellChecker::hasLocale(const Locale& rLocale) +{ + MutexGuard aGuard( GetLinguMutex() ); + + bool bRes = false; + if (!aSuppLocales.getLength()) + getLocales(); + + sal_Int32 nLen = aSuppLocales.getLength(); + for (sal_Int32 i = 0; i < nLen; ++i) + { + const Locale *pLocale = aSuppLocales.getConstArray(); + if (rLocale == pLocale[i]) + { + bRes = true; + break; + } + } + return bRes; +} + + +sal_Int16 MacSpellChecker::GetSpellFailure( const OUString &rWord, const Locale &rLocale ) +{ + // initialize a myspell object for each dictionary once + // (note: mutex is held higher up in isValid) + + + sal_Int16 nRes = -1; + + // first handle smart quotes both single and double + OUStringBuffer rBuf(rWord); + sal_Int32 n = rBuf.getLength(); + sal_Unicode c; + for (sal_Int32 ix=0; ix < n; ix++) { + c = rBuf[ix]; + if ((c == 0x201C) || (c == 0x201D)) rBuf[ix] = u'"'; + if ((c == 0x2018) || (c == 0x2019)) rBuf[ix] = u'\''; + } + OUString nWord(rBuf.makeStringAndClear()); + + if (n) + { + NSAutoreleasePool* pool = [[NSAutoreleasePool alloc] init]; + NSString* aNSStr = [[[NSString alloc] initWithCharacters: reinterpret_cast<unichar const *>(nWord.getStr()) length: nWord.getLength()]autorelease]; + NSString* aLang = [[[NSString alloc] initWithCharacters: reinterpret_cast<unichar const *>(rLocale.Language.getStr()) length: rLocale.Language.getLength()]autorelease]; + if(rLocale.Country.getLength()>0) + { + NSString* aCountry = [[[NSString alloc] initWithCharacters: reinterpret_cast<unichar const *>(rLocale.Country.getStr()) length: rLocale.Country.getLength()]autorelease]; + NSString* aTaggedCountry = [@"_" stringByAppendingString:aCountry]; + aLang = [aLang stringByAppendingString:aTaggedCountry]; + } + +#ifdef MACOSX + NSInteger aCount; + NSRange range = [[NSSpellChecker sharedSpellChecker] checkSpellingOfString:aNSStr startingAt:0 language:aLang wrap:false inSpellDocumentWithTag:macTag wordCount:&aCount]; +#else + NSRange range = [pChecker rangeOfMisspelledWordInString:aNSStr range:NSMakeRange(0, [aNSStr length]) startingAt:0 wrap:NO language:aLang]; +#endif + int rVal = 0; + if(range.length>0) + { + rVal = -1; + } + else + { + rVal = 1; + } + [pool release]; + if (rVal != 1) + { + nRes = SpellFailure::SPELLING_ERROR; + } else { + return -1; + } + } + return nRes; +} + + + +sal_Bool SAL_CALL + MacSpellChecker::isValid( const OUString& rWord, const Locale& rLocale, + const css::uno::Sequence<PropertyValue>& rProperties ) +{ + MutexGuard aGuard( GetLinguMutex() ); + + if (rLocale == Locale() || !rWord.getLength()) + return true; + + if (!hasLocale( rLocale )) + return true; + + // Get property values to be used. + // These are be the default values set in the SN_LINGU_PROPERTIES + // PropertySet which are overridden by the supplied ones from the + // last argument. + // You'll probably like to use a simpler solution than the provided + // one using the PropertyHelper_Spell. + + PropertyHelper_Spell &rHelper = GetPropHelper(); + rHelper.SetTmpPropVals( rProperties ); + + sal_Int16 nFailure = GetSpellFailure( rWord, rLocale ); + if (nFailure != -1) + { + LanguageType nLang = LinguLocaleToLanguage( rLocale ); + // postprocess result for errors that should be ignored + if ( (!rHelper.IsSpellUpperCase() && IsUpper( rWord, nLang )) + || (!rHelper.IsSpellWithDigits() && HasDigits( rWord )) + || (!rHelper.IsSpellCapitalization() + && nFailure == SpellFailure::CAPTION_ERROR) + ) + nFailure = -1; + } + + return (nFailure == -1); +} + +Reference< XSpellAlternatives > + MacSpellChecker::GetProposals( const OUString &rWord, const Locale &rLocale ) +{ + // Retrieves the return values for the 'spell' function call in case + // of a misspelled word. + // Especially it may give a list of suggested (correct) words: + + Reference< XSpellAlternatives > xRes; + // note: mutex is held by higher up by spell which covers both + + LanguageType nLang = LinguLocaleToLanguage( rLocale ); + int count; + Sequence< OUString > aStr( 0 ); + + // first handle smart quotes (single and double) + OUStringBuffer rBuf(rWord); + sal_Int32 n = rBuf.getLength(); + sal_Unicode c; + for (sal_Int32 ix=0; ix < n; ix++) { + c = rBuf[ix]; + if ((c == 0x201C) || (c == 0x201D)) rBuf[ix] = u'"'; + if ((c == 0x2018) || (c == 0x2019)) rBuf[ix] = u'\''; + } + OUString nWord(rBuf.makeStringAndClear()); + + if (n) + { + NSAutoreleasePool* pool = [[NSAutoreleasePool alloc] init]; + NSString* aNSStr = [[[NSString alloc] initWithCharacters: reinterpret_cast<unichar const *>(nWord.getStr()) length: nWord.getLength()]autorelease]; + NSString* aLang = [[[NSString alloc] initWithCharacters: reinterpret_cast<unichar const *>(rLocale.Language.getStr()) length: rLocale.Language.getLength()]autorelease]; + if(rLocale.Country.getLength()>0) + { + NSString* aCountry = [[[NSString alloc] initWithCharacters: reinterpret_cast<unichar const *>(rLocale.Country.getStr()) length: rLocale.Country.getLength()]autorelease]; + NSString* aTaggedCountry = [@"_" stringByAppendingString:aCountry]; + aLang = [aLang stringByAppendingString:aTaggedCountry]; + } +#ifdef MACOSX + [[NSSpellChecker sharedSpellChecker] setLanguage:aLang]; + NSArray *guesses = [[NSSpellChecker sharedSpellChecker] guessesForWordRange:NSMakeRange(0, [aNSStr length]) inString:aNSStr language:aLang inSpellDocumentWithTag:0]; + (void) this; // avoid loplugin:staticmethods, the !MACOSX case uses 'this' +#else + NSArray *guesses = [pChecker guessesForWordRange:NSMakeRange(0, [aNSStr length]) inString:aNSStr language:aLang]; +#endif + count = [guesses count]; + if (count) + { + aStr.realloc( count ); + OUString *pStr = aStr.getArray(); + for (int ii=0; ii < count; ii++) + { + // if needed add: if (suglst[ii] == NULL) continue; + NSString* guess = [guesses objectAtIndex:ii]; + OUString cvtwrd(reinterpret_cast<const sal_Unicode*>([guess cStringUsingEncoding:NSUnicodeStringEncoding]), static_cast<sal_Int32>([guess length])); + pStr[ii] = cvtwrd; + } + } + [pool release]; + } + + // now return an empty alternative for no suggestions or the list of alternatives if some found + rtl::Reference<SpellAlternatives> pAlt = new SpellAlternatives; + pAlt->SetWordLanguage( rWord, nLang ); + pAlt->SetFailureType( SpellFailure::SPELLING_ERROR ); + pAlt->SetAlternatives( aStr ); + xRes = pAlt; + return xRes; + +} + +Reference< XSpellAlternatives > SAL_CALL + MacSpellChecker::spell( const OUString& rWord, const Locale& rLocale, + const css::uno::Sequence<PropertyValue>& rProperties ) +{ + MutexGuard aGuard( GetLinguMutex() ); + + if (rLocale == Locale() || !rWord.getLength()) + return nullptr; + + if (!hasLocale( rLocale )) + return nullptr; + + Reference< XSpellAlternatives > xAlt; + if (!isValid( rWord, rLocale, rProperties )) + { + xAlt = GetProposals( rWord, rLocale ); + } + return xAlt; +} + +sal_Bool SAL_CALL + MacSpellChecker::addLinguServiceEventListener( + const Reference< XLinguServiceEventListener >& rxLstnr ) +{ + MutexGuard aGuard( GetLinguMutex() ); + + bool bRes = false; + if (!bDisposing && rxLstnr.is()) + { + bRes = GetPropHelper().addLinguServiceEventListener( rxLstnr ); + } + return bRes; +} + + +sal_Bool SAL_CALL + MacSpellChecker::removeLinguServiceEventListener( + const Reference< XLinguServiceEventListener >& rxLstnr ) +{ + MutexGuard aGuard( GetLinguMutex() ); + + bool bRes = false; + if (!bDisposing && rxLstnr.is()) + { + DBG_ASSERT( xPropHelper.is(), "xPropHelper non existent" ); + bRes = GetPropHelper().removeLinguServiceEventListener( rxLstnr ); + } + return bRes; +} + + +OUString SAL_CALL + MacSpellChecker::getServiceDisplayName( const Locale& /*rLocale*/ ) +{ + MutexGuard aGuard( GetLinguMutex() ); + return "macOS Spell Checker"; +} + + +void SAL_CALL + MacSpellChecker::initialize( const Sequence< Any >& rArguments ) +{ + MutexGuard aGuard( GetLinguMutex() ); + + if (!xPropHelper.is()) + { + sal_Int32 nLen = rArguments.getLength(); + if (2 == nLen) + { + Reference< XLinguProperties > xPropSet; + rArguments.getConstArray()[0] >>= xPropSet; + //rArguments.getConstArray()[1] >>= xDicList; + + //! Pointer allows for access of the non-UNO functions. + //! And the reference to the UNO-functions while increasing + //! the ref-count and will implicitly free the memory + //! when the object is no longer used. + xPropHelper = new PropertyHelper_Spell( static_cast<XSpellChecker *>(this), xPropSet ); + xPropHelper->AddAsPropListener(); + } + else + OSL_FAIL( "wrong number of arguments in sequence" ); + + } +} + + +void SAL_CALL + MacSpellChecker::dispose() +{ + MutexGuard aGuard( GetLinguMutex() ); + + if (!bDisposing) + { + bDisposing = true; + EventObject aEvtObj( static_cast<XSpellChecker *>(this) ); + aEvtListeners.disposeAndClear( aEvtObj ); + } +} + + +void SAL_CALL + MacSpellChecker::addEventListener( const Reference< XEventListener >& rxListener ) +{ + MutexGuard aGuard( GetLinguMutex() ); + + if (!bDisposing && rxListener.is()) + aEvtListeners.addInterface( rxListener ); +} + + +void SAL_CALL + MacSpellChecker::removeEventListener( const Reference< XEventListener >& rxListener ) +{ + MutexGuard aGuard( GetLinguMutex() ); + + if (!bDisposing && rxListener.is()) + aEvtListeners.removeInterface( rxListener ); +} + +// Service specific part +OUString SAL_CALL MacSpellChecker::getImplementationName() +{ + return "org.openoffice.lingu.MacOSXSpellChecker"; +} + +sal_Bool SAL_CALL MacSpellChecker::supportsService( const OUString& ServiceName ) +{ + return cppu::supportsService(this, ServiceName); +} + +Sequence< OUString > SAL_CALL MacSpellChecker::getSupportedServiceNames() +{ + return { SN_SPELLCHECKER }; +} + +extern "C" SAL_DLLPUBLIC_EXPORT css::uno::XInterface* +lingucomponent_MacSpellChecker_get_implementation( + css::uno::XComponentContext* , css::uno::Sequence<css::uno::Any> const&) +{ + return cppu::acquire(new MacSpellChecker()); +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/lingucomponent/source/spellcheck/spell/spell.component b/lingucomponent/source/spellcheck/spell/spell.component new file mode 100644 index 0000000000..c284e13fc3 --- /dev/null +++ b/lingucomponent/source/spellcheck/spell/spell.component @@ -0,0 +1,26 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!-- + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + --> + +<component loader="com.sun.star.loader.SharedLibrary" environment="@CPPU_ENV@" + xmlns="http://openoffice.org/2010/uno-components"> + <implementation name="org.openoffice.lingu.MySpellSpellChecker" + constructor="lingucomponent_SpellChecker_get_implementation" single-instance="true"> + <service name="com.sun.star.linguistic2.SpellChecker"/> + </implementation> +</component> diff --git a/lingucomponent/source/spellcheck/spell/sspellimp.cxx b/lingucomponent/source/spellcheck/spell/sspellimp.cxx new file mode 100644 index 0000000000..193ddb2c32 --- /dev/null +++ b/lingucomponent/source/spellcheck/spell/sspellimp.cxx @@ -0,0 +1,648 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#include <com/sun/star/uno/Reference.h> + +#include <com/sun/star/linguistic2/SpellFailure.hpp> +#include <com/sun/star/linguistic2/XLinguProperties.hpp> +#include <comphelper/lok.hxx> +#include <comphelper/processfactory.hxx> +#include <cppuhelper/supportsservice.hxx> +#include <cppuhelper/weak.hxx> +#include <com/sun/star/lang/XMultiServiceFactory.hpp> +#include <tools/debug.hxx> +#include <osl/mutex.hxx> +#include <osl/thread.h> +#include <com/sun/star/ucb/XSimpleFileAccess.hpp> + +#include <lingutil.hxx> +#include <hunspell.hxx> +#include "sspellimp.hxx" + +#include <linguistic/misc.hxx> +#include <linguistic/spelldta.hxx> +#include <i18nlangtag/languagetag.hxx> +#include <svtools/strings.hrc> +#include <unotools/lingucfg.hxx> +#include <unotools/resmgr.hxx> +#include <osl/file.hxx> +#include <rtl/ustrbuf.hxx> +#include <rtl/textenc.h> +#include <sal/log.hxx> + +#include <numeric> +#include <utility> +#include <vector> +#include <set> +#include <string.h> + +using namespace utl; +using namespace osl; +using namespace com::sun::star; +using namespace com::sun::star::beans; +using namespace com::sun::star::lang; +using namespace com::sun::star::uno; +using namespace com::sun::star::linguistic2; +using namespace linguistic; + +// XML-header of SPELLML queries +#if !defined SPELL_XML +constexpr OUStringLiteral SPELL_XML = u"<?xml?>"; +#endif + +// only available in hunspell >= 1.5 +#if !defined MAXWORDLEN +#define MAXWORDLEN 176 +#endif + +SpellChecker::SpellChecker() : + m_aEvtListeners(GetLinguMutex()), + m_bDisposing(false) +{ +} + +SpellChecker::DictItem::DictItem(OUString i_DName, Locale i_DLoc, rtl_TextEncoding i_DEnc) + : m_aDName(std::move(i_DName)) + , m_aDLoc(std::move(i_DLoc)) + , m_aDEnc(i_DEnc) +{ +} + +SpellChecker::~SpellChecker() +{ + if (m_pPropHelper) + { + m_pPropHelper->RemoveAsPropListener(); + } +} + +PropertyHelper_Spelling & SpellChecker::GetPropHelper_Impl() +{ + if (!m_pPropHelper) + { + Reference< XLinguProperties > xPropSet = GetLinguProperties(); + + m_pPropHelper.reset( new PropertyHelper_Spelling( static_cast<XSpellChecker *>(this), xPropSet ) ); + m_pPropHelper->AddAsPropListener(); //! after a reference is established + } + return *m_pPropHelper; +} + +Sequence< Locale > SAL_CALL SpellChecker::getLocales() +{ + MutexGuard aGuard( GetLinguMutex() ); + + // this routine should return the locales supported by the installed + // dictionaries. + if (m_DictItems.empty()) + { + SvtLinguConfig aLinguCfg; + + // get list of extension dictionaries-to-use + // (or better speaking: the list of dictionaries using the + // new configuration entries). + std::vector< SvtLinguConfigDictionaryEntry > aDics; + uno::Sequence< OUString > aFormatList; + aLinguCfg.GetSupportedDictionaryFormatsFor( "SpellCheckers", + "org.openoffice.lingu.MySpellSpellChecker", aFormatList ); + for (auto const& format : std::as_const(aFormatList)) + { + std::vector< SvtLinguConfigDictionaryEntry > aTmpDic( + aLinguCfg.GetActiveDictionariesByFormat(format) ); + aDics.insert( aDics.end(), aTmpDic.begin(), aTmpDic.end() ); + } + + //!! for compatibility with old dictionaries (the ones not using extensions + //!! or new configuration entries, but still using the dictionary.lst file) + //!! Get the list of old style spell checking dictionaries to use... + std::vector< SvtLinguConfigDictionaryEntry > aOldStyleDics( + GetOldStyleDics( "DICT" ) ); + + // to prefer dictionaries with configuration entries we will only + // use those old style dictionaries that add a language that + // is not yet supported by the list of new style dictionaries + MergeNewStyleDicsAndOldStyleDics( aDics, aOldStyleDics ); + + if (!aDics.empty()) + { + uno::Reference< lang::XMultiServiceFactory > xServiceFactory(comphelper::getProcessServiceFactory()); + uno::Reference< ucb::XSimpleFileAccess > xAccess(xServiceFactory->createInstance("com.sun.star.ucb.SimpleFileAccess"), uno::UNO_QUERY); + // get supported locales from the dictionaries-to-use... + std::set<OUString> aLocaleNamesSet; + for (auto const& dict : aDics) + { + const uno::Sequence< OUString > aLocaleNames( dict.aLocaleNames ); + uno::Sequence< OUString > aLocations( dict.aLocations ); + SAL_WARN_IF( + aLocaleNames.hasElements() && !aLocations.hasElements(), + "lingucomponent", "no locations"); + if (aLocations.hasElements()) + { + if (xAccess.is() && xAccess->exists(aLocations[0])) + { + for (auto const& locale : aLocaleNames) + { + if (!comphelper::LibreOfficeKit::isAllowlistedLanguage(locale)) + continue; + + aLocaleNamesSet.insert(locale); + } + } + else + { + SAL_WARN( + "lingucomponent", + "missing <" << aLocations[0] << ">"); + } + } + } + // ... and add them to the resulting sequence + m_aSuppLocales.realloc( aLocaleNamesSet.size() ); + std::transform( + aLocaleNamesSet.begin(), aLocaleNamesSet.end(), m_aSuppLocales.getArray(), + [](auto const& localeName) { return LanguageTag::convertToLocale(localeName); }); + + //! For each dictionary and each locale we need a separate entry. + //! If this results in more than one dictionary per locale than (for now) + //! it is undefined which dictionary gets used. + //! In the future the implementation should support using several dictionaries + //! for one locale. + sal_uInt32 nDictSize = std::accumulate(aDics.begin(), aDics.end(), sal_uInt32(0), + [](const sal_uInt32 nSum, const SvtLinguConfigDictionaryEntry& dict) { + return nSum + dict.aLocaleNames.getLength(); }); + + // add dictionary information + m_DictItems.reserve(nDictSize); + for (auto const& dict : aDics) + { + if (dict.aLocaleNames.hasElements() && + dict.aLocations.hasElements()) + { + const uno::Sequence< OUString > aLocaleNames( dict.aLocaleNames ); + + // currently only one language per dictionary is supported in the actual implementation... + // Thus here we work-around this by adding the same dictionary several times. + // Once for each of its supported locales. + for (auto const& localeName : aLocaleNames) + { + // also both files have to be in the same directory and the + // file names must only differ in the extension (.aff/.dic). + // Thus we use the first location only and strip the extension part. + OUString aLocation = dict.aLocations[0]; + sal_Int32 nPos = aLocation.lastIndexOf( '.' ); + aLocation = aLocation.copy( 0, nPos ); + + m_DictItems.emplace_back(aLocation, LanguageTag::convertToLocale(localeName), RTL_TEXTENCODING_DONTKNOW); + } + } + } + DBG_ASSERT( nDictSize == m_DictItems.size(), "index mismatch?" ); + } + else + { + // no dictionary found so register no dictionaries + m_aSuppLocales.realloc(0); + } + } + + return m_aSuppLocales; +} + +sal_Bool SAL_CALL SpellChecker::hasLocale(const Locale& rLocale) +{ + MutexGuard aGuard( GetLinguMutex() ); + + bool bRes = false; + if (!m_aSuppLocales.hasElements()) + getLocales(); + + for (auto const& suppLocale : std::as_const(m_aSuppLocales)) + { + if (rLocale == suppLocale) + { + bRes = true; + break; + } + } + return bRes; +} + +sal_Int16 SpellChecker::GetSpellFailure(const OUString &rWord, const Locale &rLocale, int& rInfo) +{ + if (rWord.getLength() > MAXWORDLEN) + return -1; + + Hunspell * pMS = nullptr; + rtl_TextEncoding eEnc = RTL_TEXTENCODING_DONTKNOW; + + // initialize a myspell object for each dictionary once + // (note: mutex is held higher up in isValid) + + sal_Int16 nRes = -1; + + // first handle smart quotes both single and double + OUStringBuffer rBuf(rWord); + sal_Int32 n = rBuf.getLength(); + sal_Unicode c; + sal_Int32 extrachar = 0; + + for (sal_Int32 ix=0; ix < n; ix++) + { + c = rBuf[ix]; + if ((c == 0x201C) || (c == 0x201D)) + rBuf[ix] = u'"'; + else if ((c == 0x2018) || (c == 0x2019)) + rBuf[ix] = u'\''; + + // recognize words with Unicode ligatures and ZWNJ/ZWJ characters (only + // with 8-bit encoded dictionaries. For UTF-8 encoded dictionaries + // set ICONV and IGNORE aff file options, if needed.) + else if ((c == 0x200C) || (c == 0x200D) || + ((c >= 0xFB00) && (c <= 0xFB04))) + extrachar = 1; + } + OUString nWord(rBuf.makeStringAndClear()); + + if (n) + { + for (auto& currDict : m_DictItems) + { + pMS = nullptr; + eEnc = RTL_TEXTENCODING_DONTKNOW; + + if (rLocale == currDict.m_aDLoc) + { + if (!currDict.m_pDict) + { + OUString dicpath = currDict.m_aDName + ".dic"; + OUString affpath = currDict.m_aDName + ".aff"; + OUString dict; + OUString aff; + osl::FileBase::getSystemPathFromFileURL(dicpath,dict); + osl::FileBase::getSystemPathFromFileURL(affpath,aff); +#if defined(_WIN32) + // workaround for Windows specific problem that the + // path length in calls to 'fopen' is limited to somewhat + // about 120+ characters which will usually be exceed when + // using dictionaries as extensions. (Hunspell waits UTF-8 encoded + // path with \\?\ long path prefix.) + OString aTmpaff = Win_AddLongPathPrefix(OUStringToOString(aff, RTL_TEXTENCODING_UTF8)); + OString aTmpdict = Win_AddLongPathPrefix(OUStringToOString(dict, RTL_TEXTENCODING_UTF8)); +#else + OString aTmpaff(OU2ENC(aff,osl_getThreadTextEncoding())); + OString aTmpdict(OU2ENC(dict,osl_getThreadTextEncoding())); +#endif + + currDict.m_pDict = std::make_unique<Hunspell>(aTmpaff.getStr(),aTmpdict.getStr()); +#if defined(H_DEPRECATED) + currDict.m_aDEnc = getTextEncodingFromCharset(currDict.m_pDict->get_dict_encoding().c_str()); +#else + currDict.m_aDEnc = getTextEncodingFromCharset(currDict.m_pDict->get_dic_encoding()); +#endif + } + pMS = currDict.m_pDict.get(); + eEnc = currDict.m_aDEnc; + } + + if (pMS) + { + // we don't want to work with a default text encoding since following incorrect + // results may occur only for specific text and thus may be hard to notice. + // Thus better always make a clean exit here if the text encoding is in question. + // Hopefully something not working at all will raise proper attention quickly. ;-) + DBG_ASSERT( eEnc != RTL_TEXTENCODING_DONTKNOW, "failed to get text encoding! (maybe incorrect encoding string in file)" ); + if (eEnc == RTL_TEXTENCODING_DONTKNOW) + return -1; + + OString aWrd(OU2ENC(nWord,eEnc)); +#if defined(H_DEPRECATED) + bool bVal = pMS->spell(std::string(aWrd), &rInfo); +#else + bool bVal = pMS->spell(aWrd.getStr(), &rInfo) != 0; +#endif + if (!bVal) { + if (extrachar && (eEnc != RTL_TEXTENCODING_UTF8)) { + OUStringBuffer aBuf(nWord); + n = aBuf.getLength(); + for (sal_Int32 ix=n-1; ix >= 0; ix--) + { + switch (aBuf[ix]) { + case 0xFB00: aBuf.remove(ix, 1); aBuf.insert(ix, "ff"); break; + case 0xFB01: aBuf.remove(ix, 1); aBuf.insert(ix, "fi"); break; + case 0xFB02: aBuf.remove(ix, 1); aBuf.insert(ix, "fl"); break; + case 0xFB03: aBuf.remove(ix, 1); aBuf.insert(ix, "ffi"); break; + case 0xFB04: aBuf.remove(ix, 1); aBuf.insert(ix, "ffl"); break; + case 0x200C: + case 0x200D: aBuf.remove(ix, 1); break; + } + } + OUString aWord(aBuf.makeStringAndClear()); + OString bWrd(OU2ENC(aWord, eEnc)); +#if defined(H_DEPRECATED) + bVal = pMS->spell(std::string(bWrd), &rInfo); +#else + bVal = pMS->spell(bWrd.getStr(), &rInfo) != 0; +#endif + if (bVal) return -1; + } + nRes = SpellFailure::SPELLING_ERROR; + } else { + return -1; + } + pMS = nullptr; + } + } + } + + return nRes; +} + +sal_Bool SAL_CALL SpellChecker::isValid( const OUString& rWord, const Locale& rLocale, + const css::uno::Sequence< css::beans::PropertyValue >& rProperties ) +{ + MutexGuard aGuard( GetLinguMutex() ); + + if (rLocale == Locale() || rWord.isEmpty()) + return true; + + if (!hasLocale( rLocale )) + return true; + + // return sal_False to process SPELLML requests (they are longer than the header) + if (rWord.match(SPELL_XML, 0) && (rWord.getLength() > 10)) return false; + + // Get property values to be used. + // These are be the default values set in the SN_LINGU_PROPERTIES + // PropertySet which are overridden by the supplied ones from the + // last argument. + // You'll probably like to use a simpler solution than the provided + // one using the PropertyHelper_Spell. + PropertyHelper_Spelling& rHelper = GetPropHelper(); + rHelper.SetTmpPropVals( rProperties ); + + int nInfo = 0; + sal_Int16 nFailure = GetSpellFailure( rWord, rLocale, nInfo ); + if (nFailure != -1 && !rWord.match(SPELL_XML, 0)) + { + LanguageType nLang = LinguLocaleToLanguage( rLocale ); + // postprocess result for errors that should be ignored + const bool bIgnoreError = + (!rHelper.IsSpellUpperCase() && IsUpper( rWord, nLang )) || + (!rHelper.IsSpellWithDigits() && HasDigits( rWord )) || + (!rHelper.IsSpellCapitalization() && nFailure == SpellFailure::CAPTION_ERROR); + if (bIgnoreError) + nFailure = -1; + } +//#define SPELL_COMPOUND 1 << 0 + + // valid word, but it's a rule-based compound word + if ( nFailure == -1 && (nInfo & SPELL_COMPOUND) ) + { + bool bHasHyphen = rWord.indexOf('-') > -1; + if ( (bHasHyphen && !rHelper.IsSpellHyphenatedCompound()) || + (!bHasHyphen && !rHelper.IsSpellClosedCompound()) ) + { + return false; + } + } + + return (nFailure == -1); +} + +Reference< XSpellAlternatives > + SpellChecker::GetProposals( const OUString &rWord, const Locale &rLocale ) +{ + // Retrieves the return values for the 'spell' function call in case + // of a misspelled word. + // Especially it may give a list of suggested (correct) words: + Reference< XSpellAlternatives > xRes; + // note: mutex is held by higher up by spell which covers both + + Hunspell* pMS = nullptr; + rtl_TextEncoding eEnc = RTL_TEXTENCODING_DONTKNOW; + + // first handle smart quotes (single and double) + OUStringBuffer rBuf(rWord); + sal_Int32 n = rBuf.getLength(); + sal_Unicode c; + for (sal_Int32 ix=0; ix < n; ix++) + { + c = rBuf[ix]; + if ((c == 0x201C) || (c == 0x201D)) + rBuf[ix] = u'"'; + if ((c == 0x2018) || (c == 0x2019)) + rBuf[ix] = u'\''; + } + OUString nWord(rBuf.makeStringAndClear()); + + if (n) + { + LanguageType nLang = LinguLocaleToLanguage( rLocale ); + int numsug = 0; + + Sequence< OUString > aStr( 0 ); + for (const auto& currDict : m_DictItems) + { + pMS = nullptr; + eEnc = RTL_TEXTENCODING_DONTKNOW; + + if (rLocale == currDict.m_aDLoc) + { + pMS = currDict.m_pDict.get(); + eEnc = currDict.m_aDEnc; + } + + if (pMS) + { + OString aWrd(OU2ENC(nWord,eEnc)); +#if defined(H_DEPRECATED) + std::vector<std::string> suglst = pMS->suggest(std::string(aWrd)); + if (!suglst.empty()) + { + aStr.realloc(numsug + suglst.size()); + OUString *pStr = aStr.getArray(); + for (size_t ii = 0; ii < suglst.size(); ++ii) + { + OUString cvtwrd(suglst[ii].c_str(), suglst[ii].size(), eEnc); + pStr[numsug + ii] = cvtwrd; + } + numsug += suglst.size(); + } +#else + char ** suglst = nullptr; + int count = pMS->suggest(&suglst, aWrd.getStr()); + if (count) + { + aStr.realloc( numsug + count ); + OUString *pStr = aStr.getArray(); + for (int ii=0; ii < count; ++ii) + { + OUString cvtwrd(suglst[ii],strlen(suglst[ii]),eEnc); + pStr[numsug + ii] = cvtwrd; + } + numsug += count; + } + pMS->free_list(&suglst, count); +#endif + } + } + + // now return an empty alternative for no suggestions or the list of alternatives if some found + xRes = SpellAlternatives::CreateSpellAlternatives( rWord, nLang, SpellFailure::SPELLING_ERROR, aStr ); + return xRes; + } + return xRes; +} + +Reference< XSpellAlternatives > SAL_CALL SpellChecker::spell( + const OUString& rWord, const Locale& rLocale, + const css::uno::Sequence< css::beans::PropertyValue >& rProperties ) +{ + MutexGuard aGuard( GetLinguMutex() ); + + if (rLocale == Locale() || rWord.isEmpty()) + return nullptr; + + if (!hasLocale( rLocale )) + return nullptr; + + Reference< XSpellAlternatives > xAlt; + if (!isValid( rWord, rLocale, rProperties )) + { + xAlt = GetProposals( rWord, rLocale ); + } + return xAlt; +} + +sal_Bool SAL_CALL SpellChecker::addLinguServiceEventListener( + const Reference< XLinguServiceEventListener >& rxLstnr ) +{ + MutexGuard aGuard( GetLinguMutex() ); + + bool bRes = false; + if (!m_bDisposing && rxLstnr.is()) + { + bRes = GetPropHelper().addLinguServiceEventListener( rxLstnr ); + } + return bRes; +} + +sal_Bool SAL_CALL SpellChecker::removeLinguServiceEventListener( + const Reference< XLinguServiceEventListener >& rxLstnr ) +{ + MutexGuard aGuard( GetLinguMutex() ); + + bool bRes = false; + if (!m_bDisposing && rxLstnr.is()) + { + bRes = GetPropHelper().removeLinguServiceEventListener( rxLstnr ); + } + return bRes; +} + +OUString SAL_CALL SpellChecker::getServiceDisplayName(const Locale& rLocale) +{ + std::locale loc(Translate::Create("svt", LanguageTag(rLocale))); + return Translate::get(STR_DESCRIPTION_HUNSPELL, loc); +} + +void SAL_CALL SpellChecker::initialize( const Sequence< Any >& rArguments ) +{ + MutexGuard aGuard( GetLinguMutex() ); + + if (m_pPropHelper) + return; + + sal_Int32 nLen = rArguments.getLength(); + if (2 == nLen) + { + Reference< XLinguProperties > xPropSet; + rArguments.getConstArray()[0] >>= xPropSet; + // rArguments.getConstArray()[1] >>= xDicList; + + //! Pointer allows for access of the non-UNO functions. + //! And the reference to the UNO-functions while increasing + //! the ref-count and will implicitly free the memory + //! when the object is no longer used. + m_pPropHelper.reset( new PropertyHelper_Spelling( static_cast<XSpellChecker *>(this), xPropSet ) ); + m_pPropHelper->AddAsPropListener(); //! after a reference is established + } + else { + OSL_FAIL( "wrong number of arguments in sequence" ); + } +} + +void SAL_CALL SpellChecker::dispose() +{ + MutexGuard aGuard( GetLinguMutex() ); + + if (!m_bDisposing) + { + m_bDisposing = true; + EventObject aEvtObj( static_cast<XSpellChecker *>(this) ); + m_aEvtListeners.disposeAndClear( aEvtObj ); + if (m_pPropHelper) + { + m_pPropHelper->RemoveAsPropListener(); + m_pPropHelper.reset(); + } + } +} + +void SAL_CALL SpellChecker::addEventListener( const Reference< XEventListener >& rxListener ) +{ + MutexGuard aGuard( GetLinguMutex() ); + + if (!m_bDisposing && rxListener.is()) + m_aEvtListeners.addInterface( rxListener ); +} + +void SAL_CALL SpellChecker::removeEventListener( const Reference< XEventListener >& rxListener ) +{ + MutexGuard aGuard( GetLinguMutex() ); + + if (!m_bDisposing && rxListener.is()) + m_aEvtListeners.removeInterface( rxListener ); +} + +// Service specific part +OUString SAL_CALL SpellChecker::getImplementationName() +{ + return "org.openoffice.lingu.MySpellSpellChecker"; +} + +sal_Bool SAL_CALL SpellChecker::supportsService( const OUString& ServiceName ) +{ + return cppu::supportsService(this, ServiceName); +} + +Sequence< OUString > SAL_CALL SpellChecker::getSupportedServiceNames() +{ + return { SN_SPELLCHECKER }; +} + +extern "C" SAL_DLLPUBLIC_EXPORT css::uno::XInterface* +lingucomponent_SpellChecker_get_implementation( + css::uno::XComponentContext* , css::uno::Sequence<css::uno::Any> const&) +{ + return cppu::acquire(new SpellChecker()); +} + + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/lingucomponent/source/spellcheck/spell/sspellimp.hxx b/lingucomponent/source/spellcheck/spell/sspellimp.hxx new file mode 100644 index 0000000000..68ddc69b3c --- /dev/null +++ b/lingucomponent/source/spellcheck/spell/sspellimp.hxx @@ -0,0 +1,120 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#ifndef INCLUDED_LINGUCOMPONENT_SOURCE_SPELLCHECK_SPELL_SSPELLIMP_HXX +#define INCLUDED_LINGUCOMPONENT_SOURCE_SPELLCHECK_SPELL_SSPELLIMP_HXX + +#include <comphelper/interfacecontainer3.hxx> +#include <cppuhelper/implbase.hxx> +#include <com/sun/star/lang/XComponent.hpp> +#include <com/sun/star/lang/XInitialization.hpp> +#include <com/sun/star/lang/XServiceDisplayName.hpp> +#include <com/sun/star/beans/PropertyValue.hpp> +#include <com/sun/star/lang/XServiceInfo.hpp> +#include <com/sun/star/linguistic2/XSpellChecker.hpp> +#include <com/sun/star/linguistic2/XLinguServiceEventBroadcaster.hpp> + +#include <linguistic/lngprophelp.hxx> + +#include <memory> + +#include <hunspell.hxx> + +using namespace ::com::sun::star::uno; +using namespace ::com::sun::star::beans; +using namespace ::com::sun::star::lang; +using namespace ::com::sun::star::linguistic2; + +class SpellChecker : + public cppu::WeakImplHelper + < + XSpellChecker, + XLinguServiceEventBroadcaster, + XInitialization, + XComponent, + XServiceInfo, + XServiceDisplayName + > +{ + struct DictItem + { + OUString m_aDName; + Locale m_aDLoc; + std::unique_ptr<Hunspell> m_pDict; + rtl_TextEncoding m_aDEnc; + + DictItem(OUString i_DName, Locale i_DLoc, rtl_TextEncoding i_DEnc); + }; + + std::vector<DictItem> m_DictItems; + + Sequence< Locale > m_aSuppLocales; + + ::comphelper::OInterfaceContainerHelper3<XEventListener> m_aEvtListeners; + std::unique_ptr<linguistic::PropertyHelper_Spelling> m_pPropHelper; + bool m_bDisposing; + + SpellChecker(const SpellChecker &) = delete; + SpellChecker & operator = (const SpellChecker &) = delete; + + linguistic::PropertyHelper_Spelling& GetPropHelper_Impl(); + linguistic::PropertyHelper_Spelling& GetPropHelper() + { + return m_pPropHelper ? *m_pPropHelper : GetPropHelper_Impl(); + } + + sal_Int16 GetSpellFailure( const OUString &rWord, const Locale &rLocale, int& rInfo ); + Reference< XSpellAlternatives > GetProposals( const OUString &rWord, const Locale &rLocale ); + +public: + SpellChecker(); + virtual ~SpellChecker() override; + + // XSupportedLocales (for XSpellChecker) + virtual Sequence< Locale > SAL_CALL getLocales() override; + virtual sal_Bool SAL_CALL hasLocale( const Locale& rLocale ) override; + + // XSpellChecker + virtual sal_Bool SAL_CALL isValid( const OUString& rWord, const Locale& rLocale, const css::uno::Sequence< css::beans::PropertyValue >& rProperties ) override; + virtual Reference< XSpellAlternatives > SAL_CALL spell( const OUString& rWord, const Locale& rLocale, const css::uno::Sequence< css::beans::PropertyValue >& rProperties ) override; + + // XLinguServiceEventBroadcaster + virtual sal_Bool SAL_CALL addLinguServiceEventListener( const Reference< XLinguServiceEventListener >& rxLstnr ) override; + virtual sal_Bool SAL_CALL removeLinguServiceEventListener( const Reference< XLinguServiceEventListener >& rxLstnr ) override; + + // XServiceDisplayName + virtual OUString SAL_CALL getServiceDisplayName( const Locale& rLocale ) override; + + // XInitialization + virtual void SAL_CALL initialize( const Sequence< Any >& rArguments ) override; + + // XComponent + virtual void SAL_CALL dispose() override; + virtual void SAL_CALL addEventListener( const Reference< XEventListener >& rxListener ) override; + virtual void SAL_CALL removeEventListener( const Reference< XEventListener >& rxListener ) override; + + // XServiceInfo + virtual OUString SAL_CALL getImplementationName() override; + virtual sal_Bool SAL_CALL supportsService( const OUString& rServiceName ) override; + virtual Sequence< OUString > SAL_CALL getSupportedServiceNames() override; +}; + +#endif + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/lingucomponent/source/thesaurus/libnth/lnth.component b/lingucomponent/source/thesaurus/libnth/lnth.component new file mode 100644 index 0000000000..66e90e2cf2 --- /dev/null +++ b/lingucomponent/source/thesaurus/libnth/lnth.component @@ -0,0 +1,26 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!-- + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + --> + +<component loader="com.sun.star.loader.SharedLibrary" environment="@CPPU_ENV@" + xmlns="http://openoffice.org/2010/uno-components"> + <implementation name="org.openoffice.lingu.new.Thesaurus" + constructor="lingucomponent_Thesaurus_get_implementation" single-instance="true"> + <service name="com.sun.star.linguistic2.Thesaurus"/> + </implementation> +</component> diff --git a/lingucomponent/source/thesaurus/libnth/nthesdta.cxx b/lingucomponent/source/thesaurus/libnth/nthesdta.cxx new file mode 100644 index 0000000000..6d076d8484 --- /dev/null +++ b/lingucomponent/source/thesaurus/libnth/nthesdta.cxx @@ -0,0 +1,76 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#include <osl/mutex.hxx> + +#include "nthesdta.hxx" +#include <linguistic/misc.hxx> +#include <utility> + +using namespace osl; +using namespace com::sun::star; +using namespace com::sun::star::beans; +using namespace com::sun::star::lang; +using namespace com::sun::star::uno; +using namespace com::sun::star::linguistic2; + +namespace linguistic +{ + +Meaning::Meaning(OUString _aTerm) : + aSyn ( Sequence< OUString >(1) ), + aTerm (std::move(_aTerm)) +{ +#if 0 + // this is for future use by a german thesaurus when one exists + bIsGermanPreReform = rHelper.IsGermanPreReform; +#endif +} + +Meaning::~Meaning() +{ +} + +OUString SAL_CALL Meaning::getMeaning() +{ + MutexGuard aGuard( GetLinguMutex() ); + return aTerm; +} + +Sequence< OUString > SAL_CALL Meaning::querySynonyms() +{ + MutexGuard aGuard( GetLinguMutex() ); + return aSyn; +} + +void Meaning::SetSynonyms( const Sequence< OUString > &rSyn ) +{ + MutexGuard aGuard( GetLinguMutex() ); + aSyn = rSyn; +} + +void Meaning::SetMeaning( const OUString &rTerm ) +{ + MutexGuard aGuard( GetLinguMutex() ); + aTerm = rTerm; +} + +} // namespace linguistic + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/lingucomponent/source/thesaurus/libnth/nthesdta.hxx b/lingucomponent/source/thesaurus/libnth/nthesdta.hxx new file mode 100644 index 0000000000..fb2c6438d7 --- /dev/null +++ b/lingucomponent/source/thesaurus/libnth/nthesdta.hxx @@ -0,0 +1,60 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#ifndef INCLUDED_LINGUCOMPONENT_SOURCE_THESAURUS_LIBNTH_NTHESDTA_HXX +#define INCLUDED_LINGUCOMPONENT_SOURCE_THESAURUS_LIBNTH_NTHESDTA_HXX + +#include <com/sun/star/linguistic2/XMeaning.hpp> +#include <cppuhelper/implbase.hxx> + +namespace linguistic +{ + +class Meaning : + public cppu::WeakImplHelper< css::linguistic2::XMeaning > +{ + css::uno::Sequence< OUString > aSyn; // list of synonyms, may be empty. + OUString aTerm; + +#if 0 + // this is for future use by a German thesaurus + sal_Bool bIsGermanPreReform; +#endif + + Meaning(const Meaning &) = delete; + Meaning & operator = (const Meaning &) = delete; + +public: + explicit Meaning(OUString aTerm); + virtual ~Meaning() override; + + // XMeaning + virtual OUString SAL_CALL getMeaning() override; + virtual css::uno::Sequence< OUString > SAL_CALL querySynonyms() override; + + // non-interface specific functions + void SetSynonyms( const css::uno::Sequence< OUString > &rSyn ); + void SetMeaning( const OUString &rTerm ); +}; + +} // namespace linguistic + +#endif + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/lingucomponent/source/thesaurus/libnth/nthesimp.cxx b/lingucomponent/source/thesaurus/libnth/nthesimp.cxx new file mode 100644 index 0000000000..ea3e3af8dd --- /dev/null +++ b/lingucomponent/source/thesaurus/libnth/nthesimp.cxx @@ -0,0 +1,571 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#include <com/sun/star/uno/Reference.h> +#include <cppuhelper/factory.hxx> +#include <cppuhelper/supportsservice.hxx> +#include <cppuhelper/weak.hxx> +#include <com/sun/star/linguistic2/LinguServiceManager.hpp> +#include <com/sun/star/linguistic2/XLinguProperties.hpp> +#include <com/sun/star/linguistic2/XSpellChecker1.hpp> +#include <i18nlangtag/languagetag.hxx> +#include <tools/debug.hxx> +#include <comphelper/lok.hxx> +#include <comphelper/processfactory.hxx> +#include <comphelper/sequence.hxx> +#include <osl/mutex.hxx> +#include <osl/thread.h> +#include <unotools/lingucfg.hxx> +#include <unotools/resmgr.hxx> + +#include <rtl/string.hxx> +#include <rtl/textenc.h> + +#include <svtools/strings.hrc> + +#include "nthesimp.hxx" +#include <linguistic/misc.hxx> +#include "nthesdta.hxx" + +#include <vector> +#include <numeric> +#include <set> +#include <string.h> + +// XML-header to query SPELLML support +constexpr OUStringLiteral SPELLML_SUPPORT = u"<?xml?>"; + +using namespace osl; +using namespace com::sun::star; +using namespace com::sun::star::beans; +using namespace com::sun::star::lang; +using namespace com::sun::star::uno; +using namespace com::sun::star::linguistic2; +using namespace linguistic; + +static uno::Reference< XLinguServiceManager2 > GetLngSvcMgr_Impl() +{ + uno::Reference< XComponentContext > xContext( comphelper::getProcessComponentContext() ); + uno::Reference< XLinguServiceManager2 > xRes = LinguServiceManager::create( xContext ) ; + return xRes; +} + +Thesaurus::Thesaurus() : + aEvtListeners ( GetLinguMutex() ), pPropHelper(nullptr), bDisposing(false), + prevLocale(LANGUAGE_DONTKNOW) +{ +} + +Thesaurus::~Thesaurus() +{ + mvThesInfo.clear(); + if (pPropHelper) + { + pPropHelper->RemoveAsPropListener(); + } +} + +PropertyHelper_Thesaurus& Thesaurus::GetPropHelper_Impl() +{ + if (!pPropHelper) + { + Reference< XLinguProperties > xPropSet = GetLinguProperties(); + + pPropHelper = new PropertyHelper_Thesaurus( static_cast<XThesaurus *>(this), xPropSet ); + pPropHelper->AddAsPropListener(); //! after a reference is established + } + return *pPropHelper; +} + +Sequence< Locale > SAL_CALL Thesaurus::getLocales() +{ + MutexGuard aGuard( GetLinguMutex() ); + + // this routine should return the locales supported by the installed + // dictionaries. + if (mvThesInfo.empty()) + { + SvtLinguConfig aLinguCfg; + + // get list of dictionaries-to-use + std::vector< SvtLinguConfigDictionaryEntry > aDics; + uno::Sequence< OUString > aFormatList; + aLinguCfg.GetSupportedDictionaryFormatsFor( "Thesauri", + "org.openoffice.lingu.new.Thesaurus", aFormatList ); + for (const auto& rFormat : std::as_const(aFormatList)) + { + std::vector< SvtLinguConfigDictionaryEntry > aTmpDic( + aLinguCfg.GetActiveDictionariesByFormat( rFormat ) ); + aDics.insert( aDics.end(), aTmpDic.begin(), aTmpDic.end() ); + } + + //!! for compatibility with old dictionaries (the ones not using extensions + //!! or new configuration entries, but still using the dictionary.lst file) + //!! Get the list of old style spell checking dictionaries to use... + std::vector< SvtLinguConfigDictionaryEntry > aOldStyleDics( + GetOldStyleDics( "THES" ) ); + + // to prefer dictionaries with configuration entries we will only + // use those old style dictionaries that add a language that + // is not yet supported by the list of new style dictionaries + MergeNewStyleDicsAndOldStyleDics( aDics, aOldStyleDics ); + + if (!aDics.empty()) + { + // get supported locales from the dictionaries-to-use... + std::set<OUString> aLocaleNamesSet; + for (auto const& dict : aDics) + { + for (const auto& rLocaleName : dict.aLocaleNames) + { + if (!comphelper::LibreOfficeKit::isAllowlistedLanguage(rLocaleName)) + continue; + + aLocaleNamesSet.insert( rLocaleName ); + } + } + // ... and add them to the resulting sequence + std::vector<Locale> aLocalesVec; + aLocalesVec.reserve(aLocaleNamesSet.size()); + + std::transform(aLocaleNamesSet.begin(), aLocaleNamesSet.end(), std::back_inserter(aLocalesVec), + [](const OUString& localeName) -> Locale { return LanguageTag::convertToLocale(localeName); }); + + aSuppLocales = comphelper::containerToSequence(aLocalesVec); + + //! For each dictionary and each locale we need a separate entry. + //! If this results in more than one dictionary per locale than (for now) + //! it is undefined which dictionary gets used. + //! In the future the implementation should support using several dictionaries + //! for one locale. + sal_Int32 numthes = std::accumulate(aDics.begin(), aDics.end(), 0, + [](const sal_Int32 nSum, const SvtLinguConfigDictionaryEntry& dict) { + return nSum + dict.aLocaleNames.getLength(); }); + + // add dictionary information + mvThesInfo.resize(numthes); + + sal_Int32 k = 0; + for (auto const& dict : aDics) + { + if (dict.aLocaleNames.hasElements() && + dict.aLocations.hasElements()) + { + // currently only one language per dictionary is supported in the actual implementation... + // Thus here we work-around this by adding the same dictionary several times. + // Once for each of its supported locales. + for (const auto& rLocaleName : dict.aLocaleNames) + { + LanguageTag aLanguageTag(rLocaleName); + mvThesInfo[k].aEncoding = RTL_TEXTENCODING_DONTKNOW; + mvThesInfo[k].aLocale = aLanguageTag.getLocale(); + mvThesInfo[k].aCharSetInfo.reset( new CharClass( std::move(aLanguageTag) ) ); + // also both files have to be in the same directory and the + // file names must only differ in the extension (.aff/.dic). + // Thus we use the first location only and strip the extension part. + OUString aLocation = dict.aLocations[0]; + sal_Int32 nPos = aLocation.lastIndexOf( '.' ); + aLocation = aLocation.copy( 0, nPos ); + mvThesInfo[k].aName = aLocation; + + ++k; + } + } + } + DBG_ASSERT( k == numthes, "index mismatch?" ); + } + else + { + /* no dictionary found so register no dictionaries */ + mvThesInfo.clear(); + aSuppLocales.realloc(0); + } + } + + return aSuppLocales; +} + +sal_Bool SAL_CALL Thesaurus::hasLocale(const Locale& rLocale) +{ + MutexGuard aGuard( GetLinguMutex() ); + + if (!aSuppLocales.hasElements()) + getLocales(); + + return comphelper::findValue(aSuppLocales, rLocale) != -1; +} + +Sequence < Reference < css::linguistic2::XMeaning > > SAL_CALL Thesaurus::queryMeanings( + const OUString& qTerm, const Locale& rLocale, + const css::uno::Sequence< css::beans::PropertyValue >& rProperties) +{ + MutexGuard aGuard( GetLinguMutex() ); + + uno::Sequence< Reference< XMeaning > > aMeanings( 1 ); + uno::Sequence< Reference< XMeaning > > noMeanings( 0 ); + uno::Reference< XLinguServiceManager2 > xLngSvcMgr( GetLngSvcMgr_Impl() ); + uno::Reference< XSpellChecker1 > xSpell; + + OUString aRTerm(qTerm); + OUString aPTerm(qTerm); + CapType ct = CapType::UNKNOWN; + sal_Int32 stem = 0; + sal_Int32 stem2 = 0; + + LanguageType nLanguage = LinguLocaleToLanguage( rLocale ); + + if (LinguIsUnspecified( nLanguage) || aRTerm.isEmpty()) + return noMeanings; + + if (!hasLocale( rLocale )) +#ifdef LINGU_EXCEPTIONS + throw( IllegalArgumentException() ); +#else + return noMeanings; +#endif + + if (prevTerm == qTerm && prevLocale == nLanguage) + return prevMeanings; + + mentry * pmean = nullptr; + sal_Int32 nmean = 0; + + PropertyHelper_Thesaurus &rHelper = GetPropHelper(); + rHelper.SetTmpPropVals( rProperties ); + + MyThes * pTH = nullptr; + rtl_TextEncoding eEnc = RTL_TEXTENCODING_DONTKNOW; + CharClass * pCC = nullptr; + + // find the first thesaurus that matches the locale + for (size_t i =0; i < mvThesInfo.size(); i++) + { + if (rLocale == mvThesInfo[i].aLocale) + { + // open up and initialize this thesaurus if need be + if (!mvThesInfo[i].aThes) + { + OUString datpath = mvThesInfo[i].aName + ".dat"; + OUString idxpath = mvThesInfo[i].aName + ".idx"; + OUString ndat; + OUString nidx; + osl::FileBase::getSystemPathFromFileURL(datpath,ndat); + osl::FileBase::getSystemPathFromFileURL(idxpath,nidx); + +#if defined(_WIN32) + // MyThes waits UTF-8 encoded paths with \\?\ long path prefix. + OString aTmpidx = Win_AddLongPathPrefix(OUStringToOString(nidx, RTL_TEXTENCODING_UTF8)); + OString aTmpdat = Win_AddLongPathPrefix(OUStringToOString(ndat, RTL_TEXTENCODING_UTF8)); +#else + OString aTmpidx(OU2ENC(nidx,osl_getThreadTextEncoding())); + OString aTmpdat(OU2ENC(ndat,osl_getThreadTextEncoding())); +#endif + + mvThesInfo[i].aThes.reset( new MyThes(aTmpidx.getStr(),aTmpdat.getStr()) ); + mvThesInfo[i].aEncoding = getTextEncodingFromCharset(mvThesInfo[i].aThes->get_th_encoding()); + } + pTH = mvThesInfo[i].aThes.get(); + eEnc = mvThesInfo[i].aEncoding; + pCC = mvThesInfo[i].aCharSetInfo.get(); + + if (pTH) + break; + } + } + + // we don't want to work with a default text encoding since following incorrect + // results may occur only for specific text and thus may be hard to notice. + // Thus better always make a clean exit here if the text encoding is in question. + // Hopefully something not working at all will raise proper attention quickly. ;-) + DBG_ASSERT( eEnc != RTL_TEXTENCODING_DONTKNOW, "failed to get text encoding! (maybe incorrect encoding string in file)" ); + if (eEnc == RTL_TEXTENCODING_DONTKNOW) + return noMeanings; + + while (pTH) + { + // convert word to all lower case for searching + if (!stem) + ct = capitalType(aRTerm, pCC); + OUString nTerm(makeLowerCase(aRTerm, pCC)); + OString aTmp( OU2ENC(nTerm, eEnc) ); + nmean = pTH->Lookup(aTmp.getStr(),aTmp.getLength(),&pmean); + + if (nmean) + aMeanings.realloc( nmean ); + + mentry * pe = pmean; + OUString codeTerm = qTerm; + Reference< XSpellAlternatives > xTmpRes2; + + if (stem) + { + xTmpRes2 = xSpell->spell( "<?xml?><query type='analyze'><word>" + + aPTerm + "</word></query>", static_cast<sal_uInt16>(nLanguage), rProperties ); + if (xTmpRes2.is()) + { + Sequence<OUString>seq = xTmpRes2->getAlternatives(); + if (seq.hasElements()) + { + codeTerm = seq[0]; + stem2 = 1; + } + } + } + + for (int j = 0; j < nmean; j++) + { + int count = pe->count; + if (count) + { + Sequence< OUString > aStr( count ); + OUString *pStr = aStr.getArray(); + + for (int i=0; i < count; i++) + { + OUString sTerm(pe->psyns[i],strlen(pe->psyns[i]),eEnc ); + sal_Int32 catpos = sTerm.indexOf('('); + OUString catst; + if (catpos > 2) + { + // remove category name for affixation and casing + catst = OUString::Concat(" ") + sTerm.subView(catpos); + sTerm = sTerm.copy(0, catpos); + sTerm = sTerm.trim(); + } + // generate synonyms with affixes + if (stem && stem2) + { + Reference< XSpellAlternatives > xTmpRes = xSpell->spell( "<?xml?><query type='generate'><word>" + + sTerm + "</word>" + codeTerm + "</query>", static_cast<sal_uInt16>(nLanguage), rProperties ); + if (xTmpRes.is()) + { + Sequence<OUString>seq = xTmpRes->getAlternatives(); + if (seq.hasElements()) + sTerm = seq[0]; + } + } + + CapType ct1 = capitalType(sTerm, pCC); + if (CapType::MIXED == ct1) + ct = ct1; + OUString cTerm; + switch (ct) + { + case CapType::ALLCAP: + cTerm = makeUpperCase(sTerm, pCC); + break; + case CapType::INITCAP: + cTerm = makeInitCap(sTerm, pCC); + break; + default: + cTerm = sTerm; + break; + } + OUString aAlt( cTerm + catst); + pStr[i] = aAlt; + } + rtl::Reference<Meaning> pMn = new Meaning(aRTerm); + OUString dTerm(pe->defn,strlen(pe->defn),eEnc ); + pMn->SetMeaning(dTerm); + pMn->SetSynonyms(aStr); + Reference<XMeaning>* pMeaning = aMeanings.getArray(); + pMeaning[j] = pMn; + } + pe++; + } + pTH->CleanUpAfterLookup(&pmean,nmean); + + if (nmean) + { + prevTerm = qTerm; + prevMeanings = aMeanings; + prevLocale = nLanguage; + return aMeanings; + } + + if (stem || !xLngSvcMgr.is()) + return noMeanings; + stem = 1; + + xSpell.set( xLngSvcMgr->getSpellChecker(), UNO_QUERY ); + if (!xSpell.is() || !xSpell->isValid( SPELLML_SUPPORT, static_cast<sal_uInt16>(nLanguage), rProperties )) + return noMeanings; + Reference< XSpellAlternatives > xTmpRes = xSpell->spell( "<?xml?><query type='stem'><word>" + + aRTerm + "</word></query>", static_cast<sal_uInt16>(nLanguage), rProperties ); + if (xTmpRes.is()) + { + Sequence<OUString>seq = xTmpRes->getAlternatives(); + if (seq.hasElements()) + { + aRTerm = seq[0]; // XXX Use only the first stem + continue; + } + } + + // stem the last word of the synonym (for categories after affixation) + aRTerm = aRTerm.trim(); + sal_Int32 pos = aRTerm.lastIndexOf(' '); + if (!pos) + return noMeanings; + xTmpRes = xSpell->spell( OUString::Concat("<?xml?><query type='stem'><word>") + + aRTerm.subView(pos + 1) + "</word></query>", static_cast<sal_uInt16>(nLanguage), rProperties ); + if (xTmpRes.is()) + { + Sequence<OUString>seq = xTmpRes->getAlternatives(); + if (seq.hasElements()) + { + aPTerm = aRTerm.copy(pos + 1); + aRTerm = aRTerm.subView(0, pos + 1) + seq[0]; +#if 0 + for (int i = 0; i < seq.getLength(); i++) + { + OString o = OUStringToOString(seq[i], RTL_TEXTENCODING_UTF8); + fprintf(stderr, "%d: %s\n", i + 1, o.pData->buffer); + } +#endif + continue; + } + } + break; + } + return noMeanings; +} + +OUString SAL_CALL Thesaurus::getServiceDisplayName(const Locale& rLocale) +{ + std::locale loc(Translate::Create("svt", LanguageTag(rLocale))); + return Translate::get(STR_DESCRIPTION_MYTHES, loc); +} + +void SAL_CALL Thesaurus::initialize( const Sequence< Any >& rArguments ) +{ + MutexGuard aGuard( GetLinguMutex() ); + + if (pPropHelper) + return; + + sal_Int32 nLen = rArguments.getLength(); + // Accept one of two args so we can be compatible with the call site in GetAvailLocales() + // linguistic module + if (1 == nLen || 2 == nLen) + { + Reference< XLinguProperties > xPropSet; + rArguments.getConstArray()[0] >>= xPropSet; + assert(xPropSet); + + //! Pointer allows for access of the non-UNO functions. + //! And the reference to the UNO-functions while increasing + //! the ref-count and will implicitly free the memory + //! when the object is no longer used. + pPropHelper = new PropertyHelper_Thesaurus( static_cast<XThesaurus *>(this), xPropSet ); + pPropHelper->AddAsPropListener(); //! after a reference is established + } + else + OSL_FAIL( "wrong number of arguments in sequence" ); +} + +OUString Thesaurus::makeLowerCase(const OUString& aTerm, CharClass const * pCC) +{ + if (pCC) + return pCC->lowercase(aTerm); + return aTerm; +} + +OUString Thesaurus::makeUpperCase(const OUString& aTerm, CharClass const * pCC) +{ + if (pCC) + return pCC->uppercase(aTerm); + return aTerm; +} + +OUString Thesaurus::makeInitCap(const OUString& aTerm, CharClass const * pCC) +{ + sal_Int32 tlen = aTerm.getLength(); + if (pCC && tlen) + { + OUString bTemp = aTerm.copy(0,1); + if (tlen > 1) + { + return ( pCC->uppercase(bTemp, 0, 1) + + pCC->lowercase(aTerm,1,(tlen-1)) ); + } + + return pCC->uppercase(bTemp, 0, 1); + } + return aTerm; +} + +void SAL_CALL Thesaurus::dispose() +{ + MutexGuard aGuard( GetLinguMutex() ); + + if (!bDisposing) + { + bDisposing = true; + EventObject aEvtObj( static_cast<XThesaurus *>(this) ); + aEvtListeners.disposeAndClear( aEvtObj ); + if (pPropHelper) + { + pPropHelper->RemoveAsPropListener(); + delete pPropHelper; + pPropHelper = nullptr; + } + } +} + +void SAL_CALL Thesaurus::addEventListener( const Reference< XEventListener >& rxListener ) +{ + MutexGuard aGuard( GetLinguMutex() ); + + if (!bDisposing && rxListener.is()) + aEvtListeners.addInterface( rxListener ); +} + +void SAL_CALL Thesaurus::removeEventListener( const Reference< XEventListener >& rxListener ) +{ + MutexGuard aGuard( GetLinguMutex() ); + + if (!bDisposing && rxListener.is()) + aEvtListeners.removeInterface( rxListener ); +} + +// Service specific part +OUString SAL_CALL Thesaurus::getImplementationName() +{ + return "org.openoffice.lingu.new.Thesaurus"; +} + +sal_Bool SAL_CALL Thesaurus::supportsService( const OUString& ServiceName ) +{ + return cppu::supportsService(this, ServiceName); +} + +Sequence< OUString > SAL_CALL Thesaurus::getSupportedServiceNames() +{ + return { SN_THESAURUS }; +} + +extern "C" SAL_DLLPUBLIC_EXPORT css::uno::XInterface* +lingucomponent_Thesaurus_get_implementation( + css::uno::XComponentContext* , css::uno::Sequence<css::uno::Any> const&) +{ + return cppu::acquire(new Thesaurus()); +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/lingucomponent/source/thesaurus/libnth/nthesimp.hxx b/lingucomponent/source/thesaurus/libnth/nthesimp.hxx new file mode 100644 index 0000000000..04eab06882 --- /dev/null +++ b/lingucomponent/source/thesaurus/libnth/nthesimp.hxx @@ -0,0 +1,129 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#ifndef INCLUDED_LINGUCOMPONENT_SOURCE_THESAURUS_LIBNTH_NTHESIMP_HXX +#define INCLUDED_LINGUCOMPONENT_SOURCE_THESAURUS_LIBNTH_NTHESIMP_HXX + +#include <comphelper/interfacecontainer3.hxx> +#include <cppuhelper/implbase.hxx> +#include <com/sun/star/uno/Reference.h> +#include <com/sun/star/uno/Sequence.h> +#include <com/sun/star/lang/XComponent.hpp> +#include <com/sun/star/lang/XInitialization.hpp> +#include <com/sun/star/lang/XServiceDisplayName.hpp> +#include <com/sun/star/beans/XPropertySet.hpp> +#include <com/sun/star/beans/PropertyValues.hpp> + +#include <com/sun/star/lang/XServiceInfo.hpp> +#include <com/sun/star/linguistic2/XMeaning.hpp> +#include <com/sun/star/linguistic2/XThesaurus.hpp> + +#include <unotools/charclass.hxx> + +#include <lingutil.hxx> +#include <linguistic/misc.hxx> +#include <linguistic/lngprophelp.hxx> + +#include <osl/file.hxx> +#include <mythes.hxx> +#include <memory> +#include <vector> + +using namespace ::com::sun::star::uno; +using namespace ::com::sun::star::beans; +using namespace ::com::sun::star::lang; +using namespace ::com::sun::star::linguistic2; + +namespace com::sun::star::beans { class XPropertySet; } + +class Thesaurus : + public cppu::WeakImplHelper + < + XThesaurus, + XInitialization, + XComponent, + XServiceInfo, + XServiceDisplayName + > +{ + Sequence< Locale > aSuppLocales; + + ::comphelper::OInterfaceContainerHelper3<XEventListener> aEvtListeners; + linguistic::PropertyHelper_Thesaurus* pPropHelper; + bool bDisposing; + struct ThesInfo + { + std::unique_ptr<CharClass> aCharSetInfo; + std::unique_ptr<MyThes> aThes; + rtl_TextEncoding aEncoding; + Locale aLocale; + OUString aName; + }; + std::vector<ThesInfo> mvThesInfo; + + // cache for the Thesaurus dialog + Sequence < Reference < css::linguistic2::XMeaning > > prevMeanings; + OUString prevTerm; + LanguageType prevLocale; + + Thesaurus(const Thesaurus &) = delete; + Thesaurus & operator = (const Thesaurus &) = delete; + + linguistic::PropertyHelper_Thesaurus& GetPropHelper_Impl(); + linguistic::PropertyHelper_Thesaurus& GetPropHelper() + { + return pPropHelper ? *pPropHelper : GetPropHelper_Impl(); + } + +public: + Thesaurus(); + virtual ~Thesaurus() override; + + // XSupportedLocales (for XThesaurus) + virtual Sequence< Locale > SAL_CALL getLocales() override; + virtual sal_Bool SAL_CALL hasLocale( const Locale& rLocale ) override; + + // XThesaurus + virtual Sequence< Reference < css::linguistic2::XMeaning > > SAL_CALL queryMeanings( const OUString& rTerm, const Locale& rLocale, const css::uno::Sequence< css::beans::PropertyValue >& rProperties ) override; + + // XServiceDisplayName + virtual OUString SAL_CALL getServiceDisplayName( const Locale& rLocale ) override; + + // XInitialization + virtual void SAL_CALL initialize( const Sequence< Any >& rArguments ) override; + + // XComponent + virtual void SAL_CALL dispose() override; + virtual void SAL_CALL addEventListener( const Reference< XEventListener >& rxListener ) override; + virtual void SAL_CALL removeEventListener( const Reference< XEventListener >& rxListener ) override; + + // XServiceInfo + virtual OUString SAL_CALL getImplementationName() override; + virtual sal_Bool SAL_CALL supportsService( const OUString& rServiceName ) override; + virtual Sequence< OUString > SAL_CALL getSupportedServiceNames() override; + +private: + static OUString makeLowerCase(const OUString&, CharClass const *); + static OUString makeUpperCase(const OUString&, CharClass const *); + static OUString makeInitCap(const OUString&, CharClass const *); +}; + +#endif + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ |