diff options
Diffstat (limited to 'external/hunspell')
-rw-r--r-- | external/hunspell/0001-Keep-only-REP-ph-or-2-word-dictionary-phrase-suggest.patch | 149 | ||||
-rw-r--r-- | external/hunspell/0001-Resolves-rhbz-2158548-allow-longer-words-for-hunspel.patch | 77 | ||||
-rw-r--r-- | external/hunspell/0001-fix-LibreOffice-build-problem-with-basic_string-appe.patch | 84 | ||||
-rw-r--r-- | external/hunspell/ExternalProject_hunspell.mk | 38 | ||||
-rw-r--r-- | external/hunspell/Makefile | 7 | ||||
-rw-r--r-- | external/hunspell/Module_hunspell.mk | 25 | ||||
-rw-r--r-- | external/hunspell/README | 4 | ||||
-rw-r--r-- | external/hunspell/StaticLibrary_hunspell.mk | 41 | ||||
-rw-r--r-- | external/hunspell/UnpackedTarball_hunspell.mk | 32 | ||||
-rw-r--r-- | external/hunspell/bit_cast.patch.0 | 22 | ||||
-rw-r--r-- | external/hunspell/clock-monotonic.patch.1 | 105 |
11 files changed, 584 insertions, 0 deletions
diff --git a/external/hunspell/0001-Keep-only-REP-ph-or-2-word-dictionary-phrase-suggest.patch b/external/hunspell/0001-Keep-only-REP-ph-or-2-word-dictionary-phrase-suggest.patch new file mode 100644 index 0000000000..2e903a34e4 --- /dev/null +++ b/external/hunspell/0001-Keep-only-REP-ph-or-2-word-dictionary-phrase-suggest.patch @@ -0,0 +1,149 @@ +From b88f9ea57bdb9b219f3c1d2c67f4f882f1f23194 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?L=C3=A1szl=C3=B3=20N=C3=A9meth?= <nemeth@numbertext.org> +Date: Sun, 14 May 2023 22:15:15 +0200 +Subject: [PATCH] Keep only REP, ph: or 2-word dictionary phrase suggestions + +These are the best suggestions, no need to search other +ones to avoid annoying redundant and long list. + +For example to suggest only "a lot" to the bad form "alot", +add the 2-word phrase "a lot" to the dic file. + +Or for a very typical spelling mistake, enough to specify the +bad form with a ph: in the dictionary file to remove the other +suggestions. + +Note: partial revert of commit de9fe28008eb0761c33bd83847f282602c599fda +"fix up some warnings seen with -Wall -Wextra". +--- + src/hunspell/atypes.hxx | 1 + + src/hunspell/suggestmgr.cxx | 31 ++++++++++++++++++++++++++----- + src/hunspell/suggestmgr.hxx | 2 +- + tests/ph.sug | 4 ++-- + tests/rep.sug | 2 +- + 5 files changed, 31 insertions(+), 9 deletions(-) + +diff --git a/src/hunspell/atypes.hxx b/src/hunspell/atypes.hxx +index 7e5a5c0..6e3ed1b 100644 +--- a/src/hunspell/atypes.hxx ++++ b/src/hunspell/atypes.hxx +@@ -82,6 +82,7 @@ static inline void HUNSPELL_WARNING(FILE*, const char*, ...) {} + #define SPELL_ORIGCAP (1 << 5) + #define SPELL_WARN (1 << 6) + #define SPELL_COMPOUND_2 (1 << 7) // permit only 2 dictionary words in the compound ++#define SPELL_BEST_SUG (1 << 8) // limit suggestions for the best ones, i.e. ph: + + #define MINCPDLEN 3 + #define MAXCOMPOUND 10 +diff --git a/src/hunspell/suggestmgr.cxx b/src/hunspell/suggestmgr.cxx +index 19a24f8..ba688aa 100644 +--- a/src/hunspell/suggestmgr.cxx ++++ b/src/hunspell/suggestmgr.cxx +@@ -242,8 +242,11 @@ bool SuggestMgr::suggest(std::vector<std::string>& slst, + if ((slst.size() < maxSug) && (!cpdsuggest || (slst.size() < oldSug + maxcpdsugs))) { + size_t i = slst.size(); + replchars(slst, word, cpdsuggest, info); +- if (slst.size() > i) ++ if (slst.size() > i) { + good_suggestion = true; ++ if (info & SPELL_BEST_SUG) ++ return true; ++ } + } + if (clock() > timelimit + TIMELIMIT_SUGGESTION) + return good_suggestion; +@@ -365,7 +368,10 @@ bool SuggestMgr::suggest(std::vector<std::string>& slst, + // we always suggest them, in despite of nosplitsugs, and + // drop compound word and other suggestions) + if (!cpdsuggest || (!nosplitsugs && slst.size() < oldSug + maxcpdsugs)) { +- good_suggestion = twowords(slst, word, cpdsuggest, good_suggestion, info); ++ good_suggestion = twowords(slst, word, cpdsuggest, good_suggestion, info); ++ ++ if (info & SPELL_BEST_SUG) ++ return true; + } + if (clock() > timelimit + TIMELIMIT_SUGGESTION) + return good_suggestion; +@@ -506,15 +512,23 @@ int SuggestMgr::replchars(std::vector<std::string>& wlst, + candidate.assign(word, 0, r); + candidate.append(entry.outstrings[type]); + candidate.append(word, r + entry.pattern.size(), std::string::npos); ++ size_t sp = candidate.find(' '); ++ size_t oldns = wlst.size(); + testsug(wlst, candidate, cpdsuggest, NULL, NULL, info); ++ if (oldns < wlst.size()) { ++ int patlen = entry.pattern.size(); ++ int replen = entry.outstrings[type].size(); ++ // REP suggestions are the best, don't search other type of suggestions ++ info |= SPELL_BEST_SUG; ++ } ++ + // check REP suggestions with space +- size_t sp = candidate.find(' '); + if (sp != std::string::npos) { + size_t prev = 0; + while (sp != std::string::npos) { + std::string prev_chunk = candidate.substr(prev, sp - prev); + if (checkword(prev_chunk, 0, NULL, NULL)) { +- size_t oldns = wlst.size(); ++ oldns = wlst.size(); + std::string post_chunk = candidate.substr(sp + 1); + testsug(wlst, post_chunk, cpdsuggest, NULL, NULL, info); + if (oldns < wlst.size()) { +@@ -854,11 +868,15 @@ bool SuggestMgr::twowords(std::vector<std::string>& wlst, + // alot -> a lot, alto, slot... + *p = ' '; + if (!cpdsuggest && checkword(candidate, cpdsuggest, NULL, NULL)) { ++ // best solution ++ info |= SPELL_BEST_SUG; ++ + // remove not word pair suggestions + if (!good) { + good = true; + wlst.clear(); + } ++ + wlst.insert(wlst.begin(), candidate); + } + +@@ -867,6 +885,9 @@ bool SuggestMgr::twowords(std::vector<std::string>& wlst, + *p = '-'; + + if (!cpdsuggest && checkword(candidate, cpdsuggest, NULL, NULL)) { ++ // best solution ++ info |= SPELL_BEST_SUG; ++ + // remove not word pair suggestions + if (!good) { + good = true; +diff --git a/tests/ph.sug b/tests/ph.sug +index 8daee56..ccd936e 100644 +--- a/tests/ph.sug ++++ b/tests/ph.sug +@@ -1,11 +1,11 @@ + a lot +-in spite, inspire ++in spite + what + what + Wednesday + Wednesday + Wednesday + Wednesday +-which, witch, winch, wish ++which, witch + Oh, my gosh! + OH, MY GOSH! +diff --git a/tests/rep.sug b/tests/rep.sug +index b48a5b8..424731c 100644 +--- a/tests/rep.sug ++++ b/tests/rep.sug +@@ -5,4 +5,4 @@ a lot, lot + un alunno + bar + vinte e un +-auto's, auto ++auto's +-- +2.25.1 + diff --git a/external/hunspell/0001-Resolves-rhbz-2158548-allow-longer-words-for-hunspel.patch b/external/hunspell/0001-Resolves-rhbz-2158548-allow-longer-words-for-hunspel.patch new file mode 100644 index 0000000000..c0225fbd70 --- /dev/null +++ b/external/hunspell/0001-Resolves-rhbz-2158548-allow-longer-words-for-hunspel.patch @@ -0,0 +1,77 @@ +From e2fe9f86e1769b440972971240e9b8fb1cd53b97 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Caol=C3=A1n=20McNamara?= <caolanm@redhat.com> +Date: Fri, 6 Jan 2023 16:20:45 +0000 +Subject: [PATCH] Resolves: rhbz#2158548 allow longer words for hunspell-ko +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +https://github.com/hunspell/hunspell/issues/903 + +A problem since the sanity check added in: + +commit 05e44e069e4cfaa9ce1264bf13f23fc9abd7ed05 +Author: Caolán McNamara <caolanm@redhat.com> +Date: Thu Sep 1 13:46:40 2022 +0100 + + Check word limit (#813) + + * check against hentry blen max +--- + src/hunspell/hashmgr.cxx | 6 +++--- + src/hunspell/htypes.hxx | 4 ++-- + tests/korean.dic | 3 ++- + 3 files changed, 7 insertions(+), 6 deletions(-) + +diff --git a/src/hunspell/hashmgr.cxx b/src/hunspell/hashmgr.cxx +index 100916d..14201e9 100644 +--- a/src/hunspell/hashmgr.cxx ++++ b/src/hunspell/hashmgr.cxx +@@ -209,7 +209,7 @@ int HashMgr::add_word(const std::string& in_word, + } + + // limit of hp->blen +- if (word->size() > std::numeric_limits<unsigned char>::max()) { ++ if (word->size() > std::numeric_limits<unsigned short>::max()) { + HUNSPELL_WARNING(stderr, "error: word len %ld is over max limit\n", word->size()); + delete desc_copy; + delete word_copy; +@@ -235,8 +235,8 @@ int HashMgr::add_word(const std::string& in_word, + + int i = hash(hpw, word->size()); + +- hp->blen = (unsigned char)word->size(); +- hp->clen = (unsigned char)wcl; ++ hp->blen = (unsigned short)word->size(); ++ hp->clen = (unsigned short)wcl; + hp->alen = (short)al; + hp->astr = aff; + hp->next = NULL; +diff --git a/src/hunspell/htypes.hxx b/src/hunspell/htypes.hxx +index 44366b1..2b896fb 100644 +--- a/src/hunspell/htypes.hxx ++++ b/src/hunspell/htypes.hxx +@@ -62,8 +62,8 @@ + #endif + + struct hentry { +- unsigned char blen; // word length in bytes +- unsigned char clen; // word length in characters (different for UTF-8 enc.) ++ unsigned short blen; // word length in bytes ++ unsigned short clen; // word length in characters (different for UTF-8 enc.) + short alen; // length of affix flag vector + unsigned short* astr; // affix flag vector + struct hentry* next; // next word with same hash code +diff --git a/tests/korean.dic b/tests/korean.dic +index 95cb450..d76ea05 100644 +--- a/tests/korean.dic ++++ b/tests/korean.dic +@@ -1,3 +1,4 @@ +-2 ++3 + 들어오세요 + 안녕하세요 ++김수한무거북이와두루미삼천갑자동방삭치치카포사리사리세ᅡ워리워리세브리캉무드셀ᅡ구름위허ᅵ케ᅵᆫᅦ담벼락서생원에ᄀ양 +-- +2.38.1 + diff --git a/external/hunspell/0001-fix-LibreOffice-build-problem-with-basic_string-appe.patch b/external/hunspell/0001-fix-LibreOffice-build-problem-with-basic_string-appe.patch new file mode 100644 index 0000000000..3c6f9831b2 --- /dev/null +++ b/external/hunspell/0001-fix-LibreOffice-build-problem-with-basic_string-appe.patch @@ -0,0 +1,84 @@ +From 1587ea4ab5e8d94c9c0d552f7ab61c217ebdcbeb Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?L=C3=A1szl=C3=B3=20N=C3=A9meth?= <nemeth@numbertext.org> +Date: Fri, 30 Dec 2022 12:20:02 +0100 +Subject: [PATCH] fix LibreOffice build problem with basic_string::append() + +--- + src/hunspell/affentry.cxx | 6 +++--- + src/hunspell/affixmgr.cxx | 8 ++++---- + 2 files changed, 7 insertions(+), 7 deletions(-) + +diff --git a/src/hunspell/affentry.cxx b/src/hunspell/affentry.cxx +index 46e8b58..6ee23be 100644 +--- a/src/hunspell/affentry.cxx ++++ b/src/hunspell/affentry.cxx +@@ -290,7 +290,7 @@ struct hentry* PfxEntry::check_twosfx(const std::string& word, + // back any characters that would have been stripped + + std::string tmpword(strip); +- tmpword.append(word, start + appnd.size()); ++ tmpword.append(word, start + appnd.size(), tmpl); + + // now make sure all of the conditions on characters + // are met. Please see the appendix at the end of +@@ -338,7 +338,7 @@ std::string PfxEntry::check_twosfx_morph(const std::string& word, + // back any characters that would have been stripped + + std::string tmpword(strip); +- tmpword.append(word, start + appnd.size()); ++ tmpword.append(word, start + appnd.size(), tmpl); + + // now make sure all of the conditions on characters + // are met. Please see the appendix at the end of +@@ -386,7 +386,7 @@ std::string PfxEntry::check_morph(const std::string& word, + // back any characters that would have been stripped + + std::string tmpword(strip); +- tmpword.append(word, start + appnd.size()); ++ tmpword.append(word, start + appnd.size(), tmpl); + + // now make sure all of the conditions on characters + // are met. Please see the appendix at the end of +diff --git a/src/hunspell/affixmgr.cxx b/src/hunspell/affixmgr.cxx +index 4d1ad2f..a8931c1 100644 +--- a/src/hunspell/affixmgr.cxx ++++ b/src/hunspell/affixmgr.cxx +@@ -2465,7 +2465,7 @@ int AffixMgr::compound_check_morph(const std::string& word, + result.append(presult); + result.push_back(MSEP_FLD); + result.append(MORPH_PART); +- result.append(word, i); ++ result.append(word, i, word.size()); + if (complexprefixes && HENTRY_DATA(rv)) + result.append(HENTRY_DATA2(rv)); + if (!HENTRY_FIND(rv, MORPH_STEM)) { +@@ -2522,7 +2522,7 @@ int AffixMgr::compound_check_morph(const std::string& word, + result.append(presult); + result.push_back(MSEP_FLD); + result.append(MORPH_PART); +- result.append(word, i); ++ result.append(word, i, word.size()); + + if (HENTRY_DATA(rv)) { + if (complexprefixes) +@@ -2573,7 +2573,7 @@ int AffixMgr::compound_check_morph(const std::string& word, + if (!m.empty()) { + result.push_back(MSEP_FLD); + result.append(MORPH_PART); +- result.append(word, i); ++ result.append(word, i, word.size()); + line_uniq_app(m, MSEP_REC); + result.append(m); + } +@@ -2665,7 +2665,7 @@ int AffixMgr::compound_check_morph(const std::string& word, + if (!m.empty()) { + result.push_back(MSEP_FLD); + result.append(MORPH_PART); +- result.append(word, i); ++ result.append(word, i, word.size()); + line_uniq_app(m, MSEP_REC); + result.push_back(MSEP_FLD); + result.append(m); +-- +2.17.1 + diff --git a/external/hunspell/ExternalProject_hunspell.mk b/external/hunspell/ExternalProject_hunspell.mk new file mode 100644 index 0000000000..47c4c736bd --- /dev/null +++ b/external/hunspell/ExternalProject_hunspell.mk @@ -0,0 +1,38 @@ +# -*- Mode: makefile-gmake; tab-width: 4; indent-tabs-mode: t -*- +# +# This file is part of the LibreOffice project. +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# + +$(eval $(call gb_ExternalProject_ExternalProject,hunspell)) + +$(eval $(call gb_ExternalProject_register_targets,hunspell,\ + build \ +)) + +hunspell_CPPCLAGS=$(CPPFLAGS) + +hunspell_CPPFLAGS+=$(gb_COMPILERDEFS_STDLIB_DEBUG) + +hunspell_CXXFLAGS:=$(CXXFLAGS) $(gb_LTOFLAGS) \ + $(gb_EMSCRIPTEN_CPPFLAGS) \ + $(call gb_ExternalProject_get_build_flags,hunspell) + +hunspell_LDFLAGS:=$(gb_LTOFLAGS) $(call gb_ExternalProject_get_link_flags,hunspell) + +$(call gb_ExternalProject_get_state_target,hunspell,build): + $(call gb_Trace_StartRange,hunspell,EXTERNAL) + $(call gb_ExternalProject_run,build,\ + $(gb_RUN_CONFIGURE) ./configure --disable-shared --disable-nls --with-pic \ + $(gb_CONFIGURE_PLATFORMS) \ + $(if $(hunspell_CPPFLAGS),CPPFLAGS='$(hunspell_CPPFLAGS)') \ + $(if $(hunspell_CXXFLAGS),CXXFLAGS='$(hunspell_CXXFLAGS)') \ + $(if $(hunspell_LDFLAGS),LDFLAGS='$(hunspell_LDFLAGS)') \ + && cd src/hunspell && $(MAKE) \ + ) + $(call gb_Trace_EndRange,hunspell,EXTERNAL) + +# vim: set noet sw=4 ts=4: diff --git a/external/hunspell/Makefile b/external/hunspell/Makefile new file mode 100644 index 0000000000..e4968cf85f --- /dev/null +++ b/external/hunspell/Makefile @@ -0,0 +1,7 @@ +# -*- Mode: makefile-gmake; tab-width: 4; indent-tabs-mode: t -*- + +module_directory:=$(dir $(realpath $(firstword $(MAKEFILE_LIST)))) + +include $(module_directory)/../../solenv/gbuild/partial_build.mk + +# vim: set noet sw=4 ts=4: diff --git a/external/hunspell/Module_hunspell.mk b/external/hunspell/Module_hunspell.mk new file mode 100644 index 0000000000..505a9fb0cb --- /dev/null +++ b/external/hunspell/Module_hunspell.mk @@ -0,0 +1,25 @@ +# -*- Mode: makefile-gmake; tab-width: 4; indent-tabs-mode: t -*- +# +# This file is part of the LibreOffice project. +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# + +$(eval $(call gb_Module_Module,hunspell)) + +$(eval $(call gb_Module_add_targets,hunspell,\ + UnpackedTarball_hunspell \ +)) +ifeq ($(COM),MSC) +$(eval $(call gb_Module_add_targets,hunspell,\ + StaticLibrary_hunspell \ +)) +else +$(eval $(call gb_Module_add_targets,hunspell,\ + ExternalProject_hunspell \ +)) +endif + +# vim: set noet sw=4 ts=4: diff --git a/external/hunspell/README b/external/hunspell/README new file mode 100644 index 0000000000..1221735cad --- /dev/null +++ b/external/hunspell/README @@ -0,0 +1,4 @@ +Library for spell checking. + +From: +[http://hunspell.sourceforge.net/]. diff --git a/external/hunspell/StaticLibrary_hunspell.mk b/external/hunspell/StaticLibrary_hunspell.mk new file mode 100644 index 0000000000..3a2eccafb0 --- /dev/null +++ b/external/hunspell/StaticLibrary_hunspell.mk @@ -0,0 +1,41 @@ +# -*- Mode: makefile-gmake; tab-width: 4; indent-tabs-mode: t -*- +# +# This file is part of the LibreOffice project. +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# + +$(eval $(call gb_StaticLibrary_StaticLibrary,hunspell)) + +$(eval $(call gb_StaticLibrary_use_unpacked,hunspell,hunspell)) + +$(eval $(call gb_StaticLibrary_use_external,hunspell,icu_headers)) + +$(eval $(call gb_StaticLibrary_set_warnings_disabled,hunspell)) + +$(eval $(call gb_StaticLibrary_add_defs,hunspell,\ + -DHUNSPELL_STATIC \ + -DOPENOFFICEORG \ +)) + +ifneq ($(ENABLE_WASM_STRIP_HUNSPELL),TRUE) +$(eval $(call gb_StaticLibrary_add_generated_exception_objects,hunspell,\ + UnpackedTarball/hunspell/src/hunspell/hunspell \ +)) +endif + +$(eval $(call gb_StaticLibrary_add_generated_exception_objects,hunspell,\ + UnpackedTarball/hunspell/src/hunspell/affentry \ + UnpackedTarball/hunspell/src/hunspell/affixmgr \ + UnpackedTarball/hunspell/src/hunspell/csutil \ + UnpackedTarball/hunspell/src/hunspell/hashmgr \ + UnpackedTarball/hunspell/src/hunspell/suggestmgr \ + UnpackedTarball/hunspell/src/hunspell/phonet \ + UnpackedTarball/hunspell/src/hunspell/hunzip \ + UnpackedTarball/hunspell/src/hunspell/filemgr \ + UnpackedTarball/hunspell/src/hunspell/replist \ +)) + +# vim: set noet sw=4 ts=4: diff --git a/external/hunspell/UnpackedTarball_hunspell.mk b/external/hunspell/UnpackedTarball_hunspell.mk new file mode 100644 index 0000000000..0d59867982 --- /dev/null +++ b/external/hunspell/UnpackedTarball_hunspell.mk @@ -0,0 +1,32 @@ + -*- Mode: makefile-gmake; tab-width: 4; indent-tabs-mode: t -*- +# +# This file is part of the LibreOffice project. +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# + +$(eval $(call gb_UnpackedTarball_UnpackedTarball,hunspell)) + +$(eval $(call gb_UnpackedTarball_set_tarball,hunspell,$(HUNSPELL_TARBALL))) + +$(eval $(call gb_UnpackedTarball_update_autoconf_configs,hunspell)) + +ifeq ($(COM),MSC) +$(eval $(call gb_UnpackedTarball_set_post_action,hunspell,\ + touch src/hunspell/config.h \ +)) +endif + +$(eval $(call gb_UnpackedTarball_set_patchlevel,hunspell,1)) + +$(eval $(call gb_UnpackedTarball_add_patches,hunspell, \ + external/hunspell/0001-fix-LibreOffice-build-problem-with-basic_string-appe.patch \ + external/hunspell/0001-Resolves-rhbz-2158548-allow-longer-words-for-hunspel.patch \ + external/hunspell/0001-Keep-only-REP-ph-or-2-word-dictionary-phrase-suggest.patch \ + external/hunspell/bit_cast.patch.0 \ + external/hunspell/clock-monotonic.patch.1 \ +)) + +# vim: set noet sw=4 ts=4: diff --git a/external/hunspell/bit_cast.patch.0 b/external/hunspell/bit_cast.patch.0 new file mode 100644 index 0000000000..777b9bbfdd --- /dev/null +++ b/external/hunspell/bit_cast.patch.0 @@ -0,0 +1,22 @@ +--- src/hunspell/w_char.hxx ++++ src/hunspell/w_char.hxx +@@ -42,9 +42,8 @@ + + #if __cplusplus >= 202002L + #include <bit> +-#else ++#endif + #include <cstring> +-#endif + + #ifndef GCC + struct w_char { +@@ -58,7 +57,7 @@ + { + #if defined(__i386__) || defined(_M_IX86) || defined(_M_X64) + //use little-endian optimized version +-#if __cplusplus >= 202002L ++#if __cplusplus >= 202002L && defined __cpp_lib_bit_cast && __cpp_lib_bit_cast >= 201806L + return std::bit_cast<unsigned short>(*this); + #else + unsigned short u; diff --git a/external/hunspell/clock-monotonic.patch.1 b/external/hunspell/clock-monotonic.patch.1 new file mode 100644 index 0000000000..dd873f6c39 --- /dev/null +++ b/external/hunspell/clock-monotonic.patch.1 @@ -0,0 +1,105 @@ +From 5737bdb3d7e5819528e33c360a73372e0e93a6be Mon Sep 17 00:00:00 2001 +From: Noel Grandin <noel.grandin@collabora.co.uk> +Date: Fri, 3 Nov 2023 12:04:30 +0000 +Subject: [PATCH] speed up hunspell inner loop + +which calls into the kernel to get elapsed time, instead of using the +VDSO-based CLOCK_MONOTONIC (or at least, the C++ equivalent of that), +which is much faster + +https://gerrit.libreoffice.org/c/core/+/158809 +--- + src/hunspell/affixmgr.cxx | 43 +++++++++++++++++++++++---------------- + 1 file changed, 25 insertions(+), 18 deletions(-) + +diff --git a/src/hunspell/affixmgr.cxx b/src/hunspell/affixmgr.cxx +index 2cad09f..a3c93cd 100644 +--- a/src/hunspell/affixmgr.cxx ++++ b/src/hunspell/affixmgr.cxx +@@ -75,6 +75,7 @@ + #include <ctime> + + #include <algorithm> ++#include <chrono> + #include <memory> + #include <limits> + #include <string> +@@ -1590,17 +1591,20 @@ struct hentry* AffixMgr::compound_check(const std::string& word, + // add a time limit to handle possible + // combinatorical explosion of the overlapping words + +- HUNSPELL_THREAD_LOCAL clock_t timelimit; ++ HUNSPELL_THREAD_LOCAL std::chrono::steady_clock::time_point clock_time_start; ++ HUNSPELL_THREAD_LOCAL bool timelimit_exceeded; ++ ++ // get the current time ++ std::chrono::steady_clock::time_point clock_now = std::chrono::steady_clock::now(); + + if (wordnum == 0) { +- // get the start time, seeing as we're reusing this set to 0 +- // to flag timeout, use clock() + 1 to avoid start clock() +- // of 0 as being a timeout +- timelimit = clock() + 1; +- } +- else if (timelimit != 0 && (clock() > timelimit + TIMELIMIT)) { +- timelimit = 0; ++ // set the start time ++ clock_time_start = clock_now; ++ timelimit_exceeded = false; + } ++ else if (std::chrono::duration_cast<std::chrono::milliseconds>(clock_now - clock_time_start).count() ++ > TIMELIMIT * CLOCKS_PER_SEC * 1000) ++ timelimit_exceeded = true; + + setcminmax(&cmin, &cmax, word.c_str(), len); + +@@ -1626,7 +1630,7 @@ struct hentry* AffixMgr::compound_check(const std::string& word, + + do { // simplified checkcompoundpattern loop + +- if (timelimit == 0) ++ if (timelimit_exceeded) + return 0; + + if (scpd > 0) { +@@ -2216,17 +2220,20 @@ int AffixMgr::compound_check_morph(const std::string& word, + // add a time limit to handle possible + // combinatorical explosion of the overlapping words + +- HUNSPELL_THREAD_LOCAL clock_t timelimit; ++ HUNSPELL_THREAD_LOCAL std::chrono::steady_clock::time_point clock_time_start; ++ HUNSPELL_THREAD_LOCAL bool timelimit_exceeded; ++ ++ // get the current time ++ std::chrono::steady_clock::time_point clock_now = std::chrono::steady_clock::now(); + + if (wordnum == 0) { +- // get the start time, seeing as we're reusing this set to 0 +- // to flag timeout, use clock() + 1 to avoid start clock() +- // of 0 as being a timeout +- timelimit = clock() + 1; +- } +- else if (timelimit != 0 && (clock() > timelimit + TIMELIMIT)) { +- timelimit = 0; ++ // set the start time ++ clock_time_start = clock_now; ++ timelimit_exceeded = false; + } ++ else if (std::chrono::duration_cast<std::chrono::milliseconds>(clock_now - clock_time_start).count() ++ > TIMELIMIT * CLOCKS_PER_SEC * 1000) ++ timelimit_exceeded = true; + + setcminmax(&cmin, &cmax, word.c_str(), len); + +@@ -2246,7 +2253,7 @@ int AffixMgr::compound_check_morph(const std::string& word, + + do { // onlycpdrule loop + +- if (timelimit == 0) ++ if (timelimit_exceeded) + return 0; + + oldnumsyllable = numsyllable; +-- +2.41.0 + |