summaryrefslogtreecommitdiffstats
path: root/extensions/spellcheck/hunspell/src/affixmgr.hxx
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--extensions/spellcheck/hunspell/src/affixmgr.hxx368
1 files changed, 368 insertions, 0 deletions
diff --git a/extensions/spellcheck/hunspell/src/affixmgr.hxx b/extensions/spellcheck/hunspell/src/affixmgr.hxx
new file mode 100644
index 0000000000..450f50a65c
--- /dev/null
+++ b/extensions/spellcheck/hunspell/src/affixmgr.hxx
@@ -0,0 +1,368 @@
+/* ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * Copyright (C) 2002-2022 Németh László
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
+ *
+ * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
+ * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
+ * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
+ * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
+ * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+/*
+ * Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada
+ * And Contributors. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * 3. All modifications to the source code must be clearly marked as
+ * such. Binary redistributions based on modified source code
+ * must be clearly marked as modified versions in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ * KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef AFFIXMGR_HXX_
+#define AFFIXMGR_HXX_
+
+#include <stdio.h>
+
+#include <string>
+#include <vector>
+
+#include "atypes.hxx"
+#include "baseaffix.hxx"
+#include "hashmgr.hxx"
+#include "phonet.hxx"
+#include "replist.hxx"
+
+// check flag duplication
+#define dupSFX (1 << 0)
+#define dupPFX (1 << 1)
+
+class PfxEntry;
+class SfxEntry;
+
+class AffixMgr {
+ PfxEntry* pStart[SETSIZE];
+ SfxEntry* sStart[SETSIZE];
+ PfxEntry* pFlag[SETSIZE];
+ SfxEntry* sFlag[SETSIZE];
+ const std::vector<HashMgr*>& alldic;
+ const HashMgr* pHMgr;
+ std::string keystring;
+ std::string trystring;
+ std::string encoding;
+ struct cs_info* csconv;
+ int utf8;
+ int complexprefixes;
+ FLAG compoundflag;
+ FLAG compoundbegin;
+ FLAG compoundmiddle;
+ FLAG compoundend;
+ FLAG compoundroot;
+ FLAG compoundforbidflag;
+ FLAG compoundpermitflag;
+ int compoundmoresuffixes;
+ int checkcompounddup;
+ int checkcompoundrep;
+ int checkcompoundcase;
+ int checkcompoundtriple;
+ int simplifiedtriple;
+ FLAG forbiddenword;
+ FLAG nosuggest;
+ FLAG nongramsuggest;
+ FLAG needaffix;
+ int cpdmin;
+ RepList* iconvtable;
+ RepList* oconvtable;
+ bool parsedmaptable;
+ std::vector<mapentry> maptable;
+ bool parsedbreaktable;
+ std::vector<std::string> breaktable;
+ bool parsedcheckcpd;
+ std::vector<patentry> checkcpdtable;
+ int simplifiedcpd;
+ bool parseddefcpd;
+ std::vector<flagentry> defcpdtable;
+ phonetable* phone;
+ int maxngramsugs;
+ int maxcpdsugs;
+ int maxdiff;
+ int onlymaxdiff;
+ int nosplitsugs;
+ int sugswithdots;
+ int cpdwordmax;
+ int cpdmaxsyllable;
+ std::string cpdvowels; // vowels (for calculating of Hungarian compounding limit,
+ std::vector<w_char> cpdvowels_utf16; //vowels for UTF-8 encoding
+ std::string cpdsyllablenum; // syllable count incrementing flag
+ const char* pfxappnd; // BUG: not stateless
+ const char* sfxappnd; // BUG: not stateless
+ int sfxextra; // BUG: not stateless
+ FLAG sfxflag; // BUG: not stateless
+ char* derived; // BUG: not stateless
+ SfxEntry* sfx; // BUG: not stateless
+ PfxEntry* pfx; // BUG: not stateless
+ int checknum;
+ std::string wordchars; // letters + spec. word characters
+ std::vector<w_char> wordchars_utf16;
+ std::string ignorechars; // letters + spec. word characters
+ std::vector<w_char> ignorechars_utf16;
+ std::string version; // affix and dictionary file version string
+ std::string lang; // language
+ int langnum;
+ FLAG lemma_present;
+ FLAG circumfix;
+ FLAG onlyincompound;
+ FLAG keepcase;
+ FLAG forceucase;
+ FLAG warn;
+ int forbidwarn;
+ FLAG substandard;
+ int checksharps;
+ int fullstrip;
+
+ int havecontclass; // boolean variable
+ char contclasses[CONTSIZE]; // flags of possible continuing classes (twofold
+ // affix)
+
+ public:
+ AffixMgr(const char* affpath, const std::vector<HashMgr*>& ptr, const char* key = NULL);
+ ~AffixMgr();
+ struct hentry* affix_check(const char* word,
+ int len,
+ const unsigned short needflag = (unsigned short)0,
+ char in_compound = IN_CPD_NOT);
+ struct hentry* prefix_check(const char* word,
+ int len,
+ char in_compound,
+ const FLAG needflag = FLAG_NULL);
+ inline int isSubset(const char* s1, const char* s2);
+ struct hentry* prefix_check_twosfx(const char* word,
+ int len,
+ char in_compound,
+ const FLAG needflag = FLAG_NULL);
+ inline int isRevSubset(const char* s1, const char* end_of_s2, int len);
+ struct hentry* suffix_check(const char* word,
+ int len,
+ int sfxopts,
+ PfxEntry* ppfx,
+ const FLAG cclass = FLAG_NULL,
+ const FLAG needflag = FLAG_NULL,
+ char in_compound = IN_CPD_NOT);
+ struct hentry* suffix_check_twosfx(const char* word,
+ int len,
+ int sfxopts,
+ PfxEntry* ppfx,
+ const FLAG needflag = FLAG_NULL);
+
+ std::string affix_check_morph(const char* word,
+ int len,
+ const FLAG needflag = FLAG_NULL,
+ char in_compound = IN_CPD_NOT);
+ std::string prefix_check_morph(const char* word,
+ int len,
+ char in_compound,
+ const FLAG needflag = FLAG_NULL);
+ std::string suffix_check_morph(const char* word,
+ int len,
+ int sfxopts,
+ PfxEntry* ppfx,
+ const FLAG cclass = FLAG_NULL,
+ const FLAG needflag = FLAG_NULL,
+ char in_compound = IN_CPD_NOT);
+
+ std::string prefix_check_twosfx_morph(const char* word,
+ int len,
+ char in_compound,
+ const FLAG needflag = FLAG_NULL);
+ std::string suffix_check_twosfx_morph(const char* word,
+ int len,
+ int sfxopts,
+ PfxEntry* ppfx,
+ const FLAG needflag = FLAG_NULL);
+
+ std::string morphgen(const char* ts,
+ int wl,
+ const unsigned short* ap,
+ unsigned short al,
+ const char* morph,
+ const char* targetmorph,
+ int level);
+
+ int expand_rootword(struct guessword* wlst,
+ int maxn,
+ const char* ts,
+ int wl,
+ const unsigned short* ap,
+ unsigned short al,
+ const char* bad,
+ int,
+ const char*);
+
+ short get_syllable(const std::string& word);
+ int cpdrep_check(const char* word, int len);
+ int cpdwordpair_check(const char * word, int len);
+ int cpdpat_check(const char* word,
+ int len,
+ hentry* r1,
+ hentry* r2,
+ const char affixed);
+ int defcpd_check(hentry*** words,
+ short wnum,
+ hentry* rv,
+ hentry** rwords,
+ char all);
+ int cpdcase_check(const char* word, int len);
+ inline int candidate_check(const char* word, int len);
+ void setcminmax(int* cmin, int* cmax, const char* word, int len);
+ struct hentry* compound_check(const std::string& word,
+ short wordnum,
+ short numsyllable,
+ short maxwordnum,
+ short wnum,
+ hentry** words,
+ hentry** rwords,
+ char hu_mov_rule,
+ char is_sug,
+ int* info);
+
+ int compound_check_morph(const char* word,
+ int len,
+ short wordnum,
+ short numsyllable,
+ short maxwordnum,
+ short wnum,
+ hentry** words,
+ hentry** rwords,
+ char hu_mov_rule,
+ std::string& result,
+ const std::string* partresult);
+
+ std::vector<std::string> get_suffix_words(short unsigned* suff,
+ int len,
+ const char* root_word);
+
+ struct hentry* lookup(const char* word);
+ const std::vector<replentry>& get_reptable() const;
+ RepList* get_iconvtable() const;
+ RepList* get_oconvtable() const;
+ struct phonetable* get_phonetable() const;
+ const std::vector<mapentry>& get_maptable() const;
+ const std::vector<std::string>& get_breaktable() const;
+ const std::string& get_encoding();
+ int get_langnum() const;
+ char* get_key_string();
+ char* get_try_string() const;
+ const std::string& get_wordchars() const;
+ const std::vector<w_char>& get_wordchars_utf16() const;
+ const char* get_ignore() const;
+ const std::vector<w_char>& get_ignore_utf16() const;
+ int get_compound() const;
+ FLAG get_compoundflag() const;
+ FLAG get_forbiddenword() const;
+ FLAG get_nosuggest() const;
+ FLAG get_nongramsuggest() const;
+ FLAG get_substandard() const;
+ FLAG get_needaffix() const;
+ FLAG get_onlyincompound() const;
+ const char* get_derived() const;
+ const std::string& get_version() const;
+ int have_contclass() const;
+ int get_utf8() const;
+ int get_complexprefixes() const;
+ char* get_suffixed(char) const;
+ int get_maxngramsugs() const;
+ int get_maxcpdsugs() const;
+ int get_maxdiff() const;
+ int get_onlymaxdiff() const;
+ int get_nosplitsugs() const;
+ int get_sugswithdots(void) const;
+ FLAG get_keepcase(void) const;
+ FLAG get_forceucase(void) const;
+ FLAG get_warn(void) const;
+ int get_forbidwarn(void) const;
+ int get_checksharps(void) const;
+ char* encode_flag(unsigned short aflag) const;
+ int get_fullstrip() const;
+
+ private:
+ int parse_file(const char* affpath, const char* key);
+ bool parse_flag(const std::string& line, unsigned short* out, FileMgr* af);
+ bool parse_num(const std::string& line, int* out, FileMgr* af);
+ bool parse_cpdsyllable(const std::string& line, FileMgr* af);
+ bool parse_convtable(const std::string& line,
+ FileMgr* af,
+ RepList** rl,
+ const std::string& keyword);
+ bool parse_phonetable(const std::string& line, FileMgr* af);
+ bool parse_maptable(const std::string& line, FileMgr* af);
+ bool parse_breaktable(const std::string& line, FileMgr* af);
+ bool parse_checkcpdtable(const std::string& line, FileMgr* af);
+ bool parse_defcpdtable(const std::string& line, FileMgr* af);
+ bool parse_affix(const std::string& line, const char at, FileMgr* af, char* dupflags);
+
+ void reverse_condition(std::string&);
+ std::string& debugflag(std::string& result, unsigned short flag);
+ int condlen(const char*);
+ int encodeit(AffEntry& entry, const char* cs);
+ int build_pfxtree(PfxEntry* pfxptr);
+ int build_sfxtree(SfxEntry* sfxptr);
+ int process_pfx_order();
+ int process_sfx_order();
+ PfxEntry* process_pfx_in_order(PfxEntry* ptr, PfxEntry* nptr);
+ SfxEntry* process_sfx_in_order(SfxEntry* ptr, SfxEntry* nptr);
+ int process_pfx_tree_to_list();
+ int process_sfx_tree_to_list();
+ int redundant_condition(char, const char* strip, int stripl, const char* cond, int);
+ void finishFileMgr(FileMgr* afflst);
+};
+
+#endif